git subrepo clone https://git.savannah.gnu.org/git/lightning.git deps/lightning
authorPaul Cercueil <paul@crapouillou.net>
Thu, 30 Jan 2020 15:33:44 +0000 (12:33 -0300)
committerPaul Cercueil <paul@crapouillou.net>
Sat, 8 Feb 2020 14:44:52 +0000 (11:44 -0300)
subrepo:
  subdir:   "deps/lightning"
  merged:   "b0b8eb5"
upstream:
  origin:   "https://git.savannah.gnu.org/git/lightning.git"
  branch:   "master"
  commit:   "b0b8eb5"
git-subrepo:
  version:  "0.4.1"
  origin:   "https://github.com/ingydotnet/git-subrepo.git"
  commit:   "a04d8c2"

223 files changed:
deps/lightning/.gitattributes [new file with mode: 0644]
deps/lightning/.gitignore [new file with mode: 0644]
deps/lightning/.gitrepo [new file with mode: 0644]
deps/lightning/AUTHORS [new file with mode: 0644]
deps/lightning/COPYING [new file with mode: 0644]
deps/lightning/COPYING.DOC [new file with mode: 0644]
deps/lightning/COPYING.LESSER [new file with mode: 0644]
deps/lightning/ChangeLog [new file with mode: 0644]
deps/lightning/Makefile.am [new file with mode: 0644]
deps/lightning/NEWS [new file with mode: 0644]
deps/lightning/README [new file with mode: 0644]
deps/lightning/THANKS [new file with mode: 0644]
deps/lightning/TODO [new file with mode: 0644]
deps/lightning/check/3to2.ok [new file with mode: 0644]
deps/lightning/check/3to2.tst [new file with mode: 0644]
deps/lightning/check/Makefile.am [new file with mode: 0644]
deps/lightning/check/add.ok [new file with mode: 0644]
deps/lightning/check/add.tst [new file with mode: 0644]
deps/lightning/check/align.ok [new file with mode: 0644]
deps/lightning/check/align.tst [new file with mode: 0644]
deps/lightning/check/all.tst [new file with mode: 0644]
deps/lightning/check/allocai.ok [new file with mode: 0644]
deps/lightning/check/allocai.tst [new file with mode: 0644]
deps/lightning/check/allocar.ok [new file with mode: 0644]
deps/lightning/check/allocar.tst [new file with mode: 0644]
deps/lightning/check/alu.inc [new file with mode: 0644]
deps/lightning/check/alu_add.ok [new file with mode: 0644]
deps/lightning/check/alu_add.tst [new file with mode: 0644]
deps/lightning/check/alu_and.ok [new file with mode: 0644]
deps/lightning/check/alu_and.tst [new file with mode: 0644]
deps/lightning/check/alu_com.ok [new file with mode: 0644]
deps/lightning/check/alu_com.tst [new file with mode: 0644]
deps/lightning/check/alu_div.ok [new file with mode: 0644]
deps/lightning/check/alu_div.tst [new file with mode: 0644]
deps/lightning/check/alu_lsh.ok [new file with mode: 0644]
deps/lightning/check/alu_lsh.tst [new file with mode: 0644]
deps/lightning/check/alu_mul.ok [new file with mode: 0644]
deps/lightning/check/alu_mul.tst [new file with mode: 0644]
deps/lightning/check/alu_neg.ok [new file with mode: 0644]
deps/lightning/check/alu_neg.tst [new file with mode: 0644]
deps/lightning/check/alu_or.ok [new file with mode: 0644]
deps/lightning/check/alu_or.tst [new file with mode: 0644]
deps/lightning/check/alu_rem.ok [new file with mode: 0644]
deps/lightning/check/alu_rem.tst [new file with mode: 0644]
deps/lightning/check/alu_rsb.ok [new file with mode: 0644]
deps/lightning/check/alu_rsb.tst [new file with mode: 0644]
deps/lightning/check/alu_rsh.ok [new file with mode: 0644]
deps/lightning/check/alu_rsh.tst [new file with mode: 0644]
deps/lightning/check/alu_sub.ok [new file with mode: 0644]
deps/lightning/check/alu_sub.tst [new file with mode: 0644]
deps/lightning/check/alu_xor.ok [new file with mode: 0644]
deps/lightning/check/alu_xor.tst [new file with mode: 0644]
deps/lightning/check/alux_add.ok [new file with mode: 0644]
deps/lightning/check/alux_add.tst [new file with mode: 0644]
deps/lightning/check/alux_sub.ok [new file with mode: 0644]
deps/lightning/check/alux_sub.tst [new file with mode: 0644]
deps/lightning/check/bp.ok [new file with mode: 0644]
deps/lightning/check/bp.tst [new file with mode: 0644]
deps/lightning/check/branch.ok [new file with mode: 0644]
deps/lightning/check/branch.tst [new file with mode: 0644]
deps/lightning/check/call.ok [new file with mode: 0644]
deps/lightning/check/call.tst [new file with mode: 0644]
deps/lightning/check/carg.c [new file with mode: 0644]
deps/lightning/check/carry.ok [new file with mode: 0644]
deps/lightning/check/carry.tst [new file with mode: 0644]
deps/lightning/check/ccall.c [new file with mode: 0644]
deps/lightning/check/check.arm.sh [new file with mode: 0755]
deps/lightning/check/check.arm.swf.sh [new file with mode: 0755]
deps/lightning/check/check.arm4.swf.sh [new file with mode: 0755]
deps/lightning/check/check.nodata.sh [new file with mode: 0755]
deps/lightning/check/check.sh [new file with mode: 0755]
deps/lightning/check/check.swf.sh [new file with mode: 0755]
deps/lightning/check/check.x87.nodata.sh [new file with mode: 0755]
deps/lightning/check/check.x87.sh [new file with mode: 0755]
deps/lightning/check/clobber.ok [new file with mode: 0644]
deps/lightning/check/clobber.tst [new file with mode: 0644]
deps/lightning/check/ctramp.c [new file with mode: 0644]
deps/lightning/check/cva_list.c [new file with mode: 0644]
deps/lightning/check/cvt.ok [new file with mode: 0644]
deps/lightning/check/cvt.tst [new file with mode: 0644]
deps/lightning/check/divi.ok [new file with mode: 0644]
deps/lightning/check/divi.tst [new file with mode: 0644]
deps/lightning/check/fib.ok [new file with mode: 0644]
deps/lightning/check/fib.tst [new file with mode: 0644]
deps/lightning/check/float.ok [new file with mode: 0644]
deps/lightning/check/float.tst [new file with mode: 0644]
deps/lightning/check/fop_abs.ok [new file with mode: 0644]
deps/lightning/check/fop_abs.tst [new file with mode: 0644]
deps/lightning/check/fop_sqrt.ok [new file with mode: 0644]
deps/lightning/check/fop_sqrt.tst [new file with mode: 0644]
deps/lightning/check/hton.ok [new file with mode: 0644]
deps/lightning/check/hton.tst [new file with mode: 0644]
deps/lightning/check/jmpr.ok [new file with mode: 0644]
deps/lightning/check/jmpr.tst [new file with mode: 0644]
deps/lightning/check/ldst.inc [new file with mode: 0644]
deps/lightning/check/ldsti.ok [new file with mode: 0644]
deps/lightning/check/ldsti.tst [new file with mode: 0644]
deps/lightning/check/ldstr-c.ok [new file with mode: 0644]
deps/lightning/check/ldstr-c.tst [new file with mode: 0644]
deps/lightning/check/ldstr.ok [new file with mode: 0644]
deps/lightning/check/ldstr.tst [new file with mode: 0644]
deps/lightning/check/ldstxi-c.ok [new file with mode: 0644]
deps/lightning/check/ldstxi-c.tst [new file with mode: 0644]
deps/lightning/check/ldstxi.ok [new file with mode: 0644]
deps/lightning/check/ldstxi.tst [new file with mode: 0644]
deps/lightning/check/ldstxr-c.ok [new file with mode: 0644]
deps/lightning/check/ldstxr-c.tst [new file with mode: 0644]
deps/lightning/check/ldstxr.ok [new file with mode: 0644]
deps/lightning/check/ldstxr.tst [new file with mode: 0644]
deps/lightning/check/lightning.c [new file with mode: 0644]
deps/lightning/check/nodata.c [new file with mode: 0644]
deps/lightning/check/put.ok [new file with mode: 0644]
deps/lightning/check/put.tst [new file with mode: 0644]
deps/lightning/check/qalu.inc [new file with mode: 0644]
deps/lightning/check/qalu_div.ok [new file with mode: 0644]
deps/lightning/check/qalu_div.tst [new file with mode: 0644]
deps/lightning/check/qalu_mul.ok [new file with mode: 0644]
deps/lightning/check/qalu_mul.tst [new file with mode: 0644]
deps/lightning/check/range.ok [new file with mode: 0644]
deps/lightning/check/range.tst [new file with mode: 0644]
deps/lightning/check/ranger.ok [new file with mode: 0644]
deps/lightning/check/ranger.tst [new file with mode: 0644]
deps/lightning/check/ret.ok [new file with mode: 0644]
deps/lightning/check/ret.tst [new file with mode: 0644]
deps/lightning/check/rpn.ok [new file with mode: 0644]
deps/lightning/check/rpn.tst [new file with mode: 0644]
deps/lightning/check/run-test [new file with mode: 0755]
deps/lightning/check/self.c [new file with mode: 0644]
deps/lightning/check/setcode.c [new file with mode: 0644]
deps/lightning/check/stack.ok [new file with mode: 0644]
deps/lightning/check/stack.tst [new file with mode: 0644]
deps/lightning/check/tramp.ok [new file with mode: 0644]
deps/lightning/check/tramp.tst [new file with mode: 0644]
deps/lightning/check/va_list.ok [new file with mode: 0644]
deps/lightning/check/va_list.tst [new file with mode: 0644]
deps/lightning/check/varargs.ok [new file with mode: 0644]
deps/lightning/check/varargs.tst [new file with mode: 0644]
deps/lightning/configure.ac [new file with mode: 0644]
deps/lightning/doc/.cvsignore [new file with mode: 0644]
deps/lightning/doc/.gitignore [new file with mode: 0644]
deps/lightning/doc/Makefile.am [new file with mode: 0644]
deps/lightning/doc/body.texi [new file with mode: 0644]
deps/lightning/doc/fact.c [new file with mode: 0644]
deps/lightning/doc/ifib.c [new file with mode: 0644]
deps/lightning/doc/incr.c [new file with mode: 0644]
deps/lightning/doc/lightning.texi [new file with mode: 0644]
deps/lightning/doc/printf.c [new file with mode: 0644]
deps/lightning/doc/rfib.c [new file with mode: 0644]
deps/lightning/doc/rpn.c [new file with mode: 0644]
deps/lightning/doc/version.texi [new file with mode: 0644]
deps/lightning/include/Makefile.am [new file with mode: 0644]
deps/lightning/include/lightning.h.in [new file with mode: 0644]
deps/lightning/include/lightning/Makefile.am [new file with mode: 0644]
deps/lightning/include/lightning/jit_aarch64.h [new file with mode: 0644]
deps/lightning/include/lightning/jit_alpha.h [new file with mode: 0644]
deps/lightning/include/lightning/jit_arm.h [new file with mode: 0644]
deps/lightning/include/lightning/jit_hppa.h [new file with mode: 0644]
deps/lightning/include/lightning/jit_ia64.h [new file with mode: 0644]
deps/lightning/include/lightning/jit_mips.h [new file with mode: 0644]
deps/lightning/include/lightning/jit_ppc.h [new file with mode: 0644]
deps/lightning/include/lightning/jit_private.h [new file with mode: 0644]
deps/lightning/include/lightning/jit_riscv.h [new file with mode: 0644]
deps/lightning/include/lightning/jit_s390.h [new file with mode: 0644]
deps/lightning/include/lightning/jit_sparc.h [new file with mode: 0644]
deps/lightning/include/lightning/jit_x86.h [new file with mode: 0644]
deps/lightning/lib/Makefile.am [new file with mode: 0644]
deps/lightning/lib/jit_aarch64-cpu.c [new file with mode: 0644]
deps/lightning/lib/jit_aarch64-fpu.c [new file with mode: 0644]
deps/lightning/lib/jit_aarch64-sz.c [new file with mode: 0644]
deps/lightning/lib/jit_aarch64.c [new file with mode: 0644]
deps/lightning/lib/jit_alpha-cpu.c [new file with mode: 0644]
deps/lightning/lib/jit_alpha-fpu.c [new file with mode: 0644]
deps/lightning/lib/jit_alpha-sz.c [new file with mode: 0644]
deps/lightning/lib/jit_alpha.c [new file with mode: 0644]
deps/lightning/lib/jit_arm-cpu.c [new file with mode: 0644]
deps/lightning/lib/jit_arm-swf.c [new file with mode: 0644]
deps/lightning/lib/jit_arm-sz.c [new file with mode: 0644]
deps/lightning/lib/jit_arm-vfp.c [new file with mode: 0644]
deps/lightning/lib/jit_arm.c [new file with mode: 0644]
deps/lightning/lib/jit_disasm.c [new file with mode: 0644]
deps/lightning/lib/jit_hppa-cpu.c [new file with mode: 0644]
deps/lightning/lib/jit_hppa-fpu.c [new file with mode: 0644]
deps/lightning/lib/jit_hppa-sz.c [new file with mode: 0644]
deps/lightning/lib/jit_hppa.c [new file with mode: 0644]
deps/lightning/lib/jit_ia64-cpu.c [new file with mode: 0644]
deps/lightning/lib/jit_ia64-fpu.c [new file with mode: 0644]
deps/lightning/lib/jit_ia64-sz.c [new file with mode: 0644]
deps/lightning/lib/jit_ia64.c [new file with mode: 0644]
deps/lightning/lib/jit_memory.c [new file with mode: 0644]
deps/lightning/lib/jit_mips-cpu.c [new file with mode: 0644]
deps/lightning/lib/jit_mips-fpu.c [new file with mode: 0644]
deps/lightning/lib/jit_mips-sz.c [new file with mode: 0644]
deps/lightning/lib/jit_mips.c [new file with mode: 0644]
deps/lightning/lib/jit_names.c [new file with mode: 0644]
deps/lightning/lib/jit_note.c [new file with mode: 0644]
deps/lightning/lib/jit_ppc-cpu.c [new file with mode: 0644]
deps/lightning/lib/jit_ppc-fpu.c [new file with mode: 0644]
deps/lightning/lib/jit_ppc-sz.c [new file with mode: 0644]
deps/lightning/lib/jit_ppc.c [new file with mode: 0644]
deps/lightning/lib/jit_print.c [new file with mode: 0644]
deps/lightning/lib/jit_rewind.c [new file with mode: 0644]
deps/lightning/lib/jit_riscv-cpu.c [new file with mode: 0644]
deps/lightning/lib/jit_riscv-fpu.c [new file with mode: 0644]
deps/lightning/lib/jit_riscv-sz.c [new file with mode: 0644]
deps/lightning/lib/jit_riscv.c [new file with mode: 0644]
deps/lightning/lib/jit_s390-cpu.c [new file with mode: 0644]
deps/lightning/lib/jit_s390-fpu.c [new file with mode: 0644]
deps/lightning/lib/jit_s390-sz.c [new file with mode: 0644]
deps/lightning/lib/jit_s390.c [new file with mode: 0644]
deps/lightning/lib/jit_size.c [new file with mode: 0644]
deps/lightning/lib/jit_sparc-cpu.c [new file with mode: 0644]
deps/lightning/lib/jit_sparc-fpu.c [new file with mode: 0644]
deps/lightning/lib/jit_sparc-sz.c [new file with mode: 0644]
deps/lightning/lib/jit_sparc.c [new file with mode: 0644]
deps/lightning/lib/jit_x86-cpu.c [new file with mode: 0644]
deps/lightning/lib/jit_x86-sse.c [new file with mode: 0644]
deps/lightning/lib/jit_x86-sz.c [new file with mode: 0644]
deps/lightning/lib/jit_x86-x87.c [new file with mode: 0644]
deps/lightning/lib/jit_x86.c [new file with mode: 0644]
deps/lightning/lib/lightning.c [new file with mode: 0644]
deps/lightning/lightning.pc.in [new file with mode: 0644]
deps/lightning/m4/.gitkeep [new file with mode: 0644]
deps/lightning/size.c [new file with mode: 0644]

diff --git a/deps/lightning/.gitattributes b/deps/lightning/.gitattributes
new file mode 100644 (file)
index 0000000..e8495d5
--- /dev/null
@@ -0,0 +1 @@
+ChangeLog merge=merge-changelog
diff --git a/deps/lightning/.gitignore b/deps/lightning/.gitignore
new file mode 100644 (file)
index 0000000..62ca42a
--- /dev/null
@@ -0,0 +1,33 @@
++*
+autom4te.cache
+aclocal.m4
+depcomp
+INSTALL
+Makefile
+Makefile.in
+config.guess
+config.h
+config.h.in
+config.log
+config.status
+config.sub
+configure
+install-sh
+libtool
+lightning-*.tar.*
+ltmain.sh
+missing
+size
+stamp-h1
+test-driver
+check/.deps
+doc/.deps
+lib/.deps
+m4/libtool.m4
+m4/lt~obsolete.m4
+m4/ltoptions.m4
+m4/ltsugar.m4
+m4/ltversion.m4
+doc/mdate-sh
+doc/texinfo.tex
+lightning.pc
diff --git a/deps/lightning/.gitrepo b/deps/lightning/.gitrepo
new file mode 100644 (file)
index 0000000..bb1106e
--- /dev/null
@@ -0,0 +1,12 @@
+; DO NOT EDIT (unless you know what you are doing)
+;
+; This subdirectory is a git "subrepo", and this file is maintained by the
+; git-subrepo command. See https://github.com/git-commands/git-subrepo#readme
+;
+[subrepo]
+       remote = https://git.savannah.gnu.org/git/lightning.git
+       branch = master
+       commit = b0b8eb5e856c0d29053dc842e1919a2eb58c8cda
+       parent = 819f3dfc11f81f58cb52bd7b1f7cc5025791af62
+       method = merge
+       cmdver = 0.4.1
diff --git a/deps/lightning/AUTHORS b/deps/lightning/AUTHORS
new file mode 100644 (file)
index 0000000..2097c63
--- /dev/null
@@ -0,0 +1,14 @@
+Paulo Cesar Pereira de Andrade <pcpa@gnu.org>
+
+Paolo Bonzini <bonzini@gnu.org>
+
+PPC assembler by Ian Piumarta <piumarta@inria.fr>
+
+i386 assembler by Ian Piumarta <piumarta@inria.fr>
+and Gwenole Beauchesne <gb.public@free.fr>
+
+x86-64 backend by Matthew Flatt <mflatt@cs.utah.edu>
+
+Major PPC contributions by Laurent Michel <ldm@thorgal.homelinux.org>
+
+Major SPARC contributions by Ludovic Courtes <ludo@chbouib.org>
diff --git a/deps/lightning/COPYING b/deps/lightning/COPYING
new file mode 100644 (file)
index 0000000..4432540
--- /dev/null
@@ -0,0 +1,676 @@
+
+                   GNU GENERAL PUBLIC LICENSE
+                      Version 3, 29 June 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+                           Preamble
+
+  The GNU General Public License is a free, copyleft license for
+software and other kinds of works.
+
+  The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works.  By contrast,
+the GNU General Public License is intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users.  We, the Free Software Foundation, use the
+GNU General Public License for most of our software; it applies also to
+any other work released this way by its authors.  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+
+  To protect your rights, we need to prevent others from denying you
+these rights or asking you to surrender the rights.  Therefore, you have
+certain responsibilities if you distribute copies of the software, or if
+you modify it: responsibilities to respect the freedom of others.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must pass on to the recipients the same
+freedoms that you received.  You must make sure that they, too, receive
+or can get the source code.  And you must show them these terms so they
+know their rights.
+
+  Developers that use the GNU GPL protect your rights with two steps:
+(1) assert copyright on the software, and (2) offer you this License
+giving you legal permission to copy, distribute and/or modify it.
+
+  For the developers' and authors' protection, the GPL clearly explains
+that there is no warranty for this free software.  For both users' and
+authors' sake, the GPL requires that modified versions be marked as
+changed, so that their problems will not be attributed erroneously to
+authors of previous versions.
+
+  Some devices are designed to deny users access to install or run
+modified versions of the software inside them, although the manufacturer
+can do so.  This is fundamentally incompatible with the aim of
+protecting users' freedom to change the software.  The systematic
+pattern of such abuse occurs in the area of products for individuals to
+use, which is precisely where it is most unacceptable.  Therefore, we
+have designed this version of the GPL to prohibit the practice for those
+products.  If such problems arise substantially in other domains, we
+stand ready to extend this provision to those domains in future versions
+of the GPL, as needed to protect the freedom of users.
+
+  Finally, every program is threatened constantly by software patents.
+States should not allow patents to restrict development and use of
+software on general-purpose computers, but in those that do, we wish to
+avoid the special danger that patents applied to a free program could
+make it effectively proprietary.  To prevent this, the GPL assures that
+patents cannot be used to render the program non-free.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+                      TERMS AND CONDITIONS
+
+  0. Definitions.
+
+  "This License" refers to version 3 of the GNU General Public License.
+
+  "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+  "The Program" refers to any copyrightable work licensed under this
+License.  Each licensee is addressed as "you".  "Licensees" and
+"recipients" may be individuals or organizations.
+
+  To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy.  The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+
+  A "covered work" means either the unmodified Program or a work based
+on the Program.
+
+  To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy.  Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+
+  To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies.  Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+
+  An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License.  If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+  1. Source Code.
+
+  The "source code" for a work means the preferred form of the work
+for making modifications to it.  "Object code" means any non-source
+form of a work.
+
+  A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+
+  The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form.  A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+
+  The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities.  However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work.  For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+
+  The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+
+  The Corresponding Source for a work in source code form is that
+same work.
+
+  2. Basic Permissions.
+
+  All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met.  This License explicitly affirms your unlimited
+permission to run the unmodified Program.  The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work.  This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+
+  You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force.  You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright.  Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+
+  Conveying under any other circumstances is permitted solely under
+the conditions stated below.  Sublicensing is not allowed; section 10
+makes it unnecessary.
+
+  3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+
+  No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+
+  When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+
+  4. Conveying Verbatim Copies.
+
+  You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+
+  You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+
+  5. Conveying Modified Source Versions.
+
+  You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+
+    a) The work must carry prominent notices stating that you modified
+    it, and giving a relevant date.
+
+    b) The work must carry prominent notices stating that it is
+    released under this License and any conditions added under section
+    7.  This requirement modifies the requirement in section 4 to
+    "keep intact all notices".
+
+    c) You must license the entire work, as a whole, under this
+    License to anyone who comes into possession of a copy.  This
+    License will therefore apply, along with any applicable section 7
+    additional terms, to the whole of the work, and all its parts,
+    regardless of how they are packaged.  This License gives no
+    permission to license the work in any other way, but it does not
+    invalidate such permission if you have separately received it.
+
+    d) If the work has interactive user interfaces, each must display
+    Appropriate Legal Notices; however, if the Program has interactive
+    interfaces that do not display Appropriate Legal Notices, your
+    work need not make them do so.
+
+  A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit.  Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+  6. Conveying Non-Source Forms.
+
+  You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+
+    a) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by the
+    Corresponding Source fixed on a durable physical medium
+    customarily used for software interchange.
+
+    b) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by a
+    written offer, valid for at least three years and valid for as
+    long as you offer spare parts or customer support for that product
+    model, to give anyone who possesses the object code either (1) a
+    copy of the Corresponding Source for all the software in the
+    product that is covered by this License, on a durable physical
+    medium customarily used for software interchange, for a price no
+    more than your reasonable cost of physically performing this
+    conveying of source, or (2) access to copy the
+    Corresponding Source from a network server at no charge.
+
+    c) Convey individual copies of the object code with a copy of the
+    written offer to provide the Corresponding Source.  This
+    alternative is allowed only occasionally and noncommercially, and
+    only if you received the object code with such an offer, in accord
+    with subsection 6b.
+
+    d) Convey the object code by offering access from a designated
+    place (gratis or for a charge), and offer equivalent access to the
+    Corresponding Source in the same way through the same place at no
+    further charge.  You need not require recipients to copy the
+    Corresponding Source along with the object code.  If the place to
+    copy the object code is a network server, the Corresponding Source
+    may be on a different server (operated by you or a third party)
+    that supports equivalent copying facilities, provided you maintain
+    clear directions next to the object code saying where to find the
+    Corresponding Source.  Regardless of what server hosts the
+    Corresponding Source, you remain obligated to ensure that it is
+    available for as long as needed to satisfy these requirements.
+
+    e) Convey the object code using peer-to-peer transmission, provided
+    you inform other peers where the object code and Corresponding
+    Source of the work are being offered to the general public at no
+    charge under subsection 6d.
+
+  A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+
+  A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling.  In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage.  For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product.  A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+  "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source.  The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+
+  If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information.  But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+
+  The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed.  Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+
+  Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+
+  7. Additional Terms.
+
+  "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law.  If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+  When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it.  (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.)  You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+
+  Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+
+    a) Disclaiming warranty or limiting liability differently from the
+    terms of sections 15 and 16 of this License; or
+
+    b) Requiring preservation of specified reasonable legal notices or
+    author attributions in that material or in the Appropriate Legal
+    Notices displayed by works containing it; or
+
+    c) Prohibiting misrepresentation of the origin of that material, or
+    requiring that modified versions of such material be marked in
+    reasonable ways as different from the original version; or
+
+    d) Limiting the use for publicity purposes of names of licensors or
+    authors of the material; or
+
+    e) Declining to grant rights under trademark law for use of some
+    trade names, trademarks, or service marks; or
+
+    f) Requiring indemnification of licensors and authors of that
+    material by anyone who conveys the material (or modified versions of
+    it) with contractual assumptions of liability to the recipient, for
+    any liability that these contractual assumptions directly impose on
+    those licensors and authors.
+
+  All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10.  If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term.  If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+
+  If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+
+  Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+
+  8. Termination.
+
+  You may not propagate or modify a covered work except as expressly
+provided under this License.  Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+
+  However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+  Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+  Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License.  If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+
+  9. Acceptance Not Required for Having Copies.
+
+  You are not required to accept this License in order to receive or
+run a copy of the Program.  Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance.  However,
+nothing other than this License grants you permission to propagate or
+modify any covered work.  These actions infringe copyright if you do
+not accept this License.  Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+
+  10. Automatic Licensing of Downstream Recipients.
+
+  Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License.  You are not responsible
+for enforcing compliance by third parties with this License.
+
+  An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations.  If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+
+  You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License.  For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+
+  11. Patents.
+
+  A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based.  The
+work thus licensed is called the contributor's "contributor version".
+
+  A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version.  For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+
+  Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+
+  In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement).  To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+
+  If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients.  "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+  
+  If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+
+  A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License.  You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+
+  Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+
+  12. No Surrender of Others' Freedom.
+
+  If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all.  For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+
+  13. Use with the GNU Affero General Public License.
+
+  Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU Affero General Public License into a single
+combined work, and to convey the resulting work.  The terms of this
+License will continue to apply to the part which is the covered work,
+but the special requirements of the GNU Affero General Public License,
+section 13, concerning interaction through a network will apply to the
+combination as such.
+
+  14. Revised Versions of this License.
+
+  The Free Software Foundation may publish revised and/or new versions of
+the GNU General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+  Each version is given a distinguishing version number.  If the
+Program specifies that a certain numbered version of the GNU General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation.  If the Program does not specify a version number of the
+GNU General Public License, you may choose any version ever published
+by the Free Software Foundation.
+
+  If the Program specifies that a proxy can decide which future
+versions of the GNU General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+
+  Later license versions may give you additional or different
+permissions.  However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+
+  15. Disclaimer of Warranty.
+
+  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+  16. Limitation of Liability.
+
+  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+
+  17. Interpretation of Sections 15 and 16.
+
+  If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+
+                    END OF TERMS AND CONDITIONS
+
+           How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+Also add information on how to contact you by electronic and paper mail.
+
+  If the program does terminal interaction, make it output a short
+notice like this when it starts in an interactive mode:
+
+    <program>  Copyright (C) <year>  <name of author>
+    This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, your program's commands
+might be different; for a GUI interface, you would use an "about box".
+
+  You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU GPL, see
+<http://www.gnu.org/licenses/>.
+
+  The GNU General Public License does not permit incorporating your program
+into proprietary programs.  If your program is a subroutine library, you
+may consider it more useful to permit linking proprietary applications with
+the library.  If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.  But first, please read
+<http://www.gnu.org/philosophy/why-not-lgpl.html>.
+
diff --git a/deps/lightning/COPYING.DOC b/deps/lightning/COPYING.DOC
new file mode 100644 (file)
index 0000000..1a86456
--- /dev/null
@@ -0,0 +1,355 @@
+               GNU Free Documentation License
+                  Version 1.1, March 2000
+
+ Copyright (C) 2000  Free Software Foundation, Inc.
+     51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+
+0. PREAMBLE
+
+The purpose of this License is to make a manual, textbook, or other
+written document "free" in the sense of freedom: to assure everyone
+the effective freedom to copy and redistribute it, with or without
+modifying it, either commercially or noncommercially.  Secondarily,
+this License preserves for the author and publisher a way to get
+credit for their work, while not being considered responsible for
+modifications made by others.
+
+This License is a kind of "copyleft", which means that derivative
+works of the document must themselves be free in the same sense.  It
+complements the GNU General Public License, which is a copyleft
+license designed for free software.
+
+We have designed this License in order to use it for manuals for free
+software, because free software needs free documentation: a free
+program should come with manuals providing the same freedoms that the
+software does.  But this License is not limited to software manuals;
+it can be used for any textual work, regardless of subject matter or
+whether it is published as a printed book.  We recommend this License
+principally for works whose purpose is instruction or reference.
+
+
+1. APPLICABILITY AND DEFINITIONS
+
+This License applies to any manual or other work that contains a
+notice placed by the copyright holder saying it can be distributed
+under the terms of this License.  The "Document", below, refers to any
+such manual or work.  Any member of the public is a licensee, and is
+addressed as "you".
+
+A "Modified Version" of the Document means any work containing the
+Document or a portion of it, either copied verbatim, or with
+modifications and/or translated into another language.
+
+A "Secondary Section" is a named appendix or a front-matter section of
+the Document that deals exclusively with the relationship of the
+publishers or authors of the Document to the Document's overall subject
+(or to related matters) and contains nothing that could fall directly
+within that overall subject.  (For example, if the Document is in part a
+textbook of mathematics, a Secondary Section may not explain any
+mathematics.)  The relationship could be a matter of historical
+connection with the subject or with related matters, or of legal,
+commercial, philosophical, ethical or political position regarding
+them.
+
+The "Invariant Sections" are certain Secondary Sections whose titles
+are designated, as being those of Invariant Sections, in the notice
+that says that the Document is released under this License.
+
+The "Cover Texts" are certain short passages of text that are listed,
+as Front-Cover Texts or Back-Cover Texts, in the notice that says that
+the Document is released under this License.
+
+A "Transparent" copy of the Document means a machine-readable copy,
+represented in a format whose specification is available to the
+general public, whose contents can be viewed and edited directly and
+straightforwardly with generic text editors or (for images composed of
+pixels) generic paint programs or (for drawings) some widely available
+drawing editor, and that is suitable for input to text formatters or
+for automatic translation to a variety of formats suitable for input
+to text formatters.  A copy made in an otherwise Transparent file
+format whose markup has been designed to thwart or discourage
+subsequent modification by readers is not Transparent.  A copy that is
+not "Transparent" is called "Opaque".
+
+Examples of suitable formats for Transparent copies include plain
+ASCII without markup, Texinfo input format, LaTeX input format, SGML
+or XML using a publicly available DTD, and standard-conforming simple
+HTML designed for human modification.  Opaque formats include
+PostScript, PDF, proprietary formats that can be read and edited only
+by proprietary word processors, SGML or XML for which the DTD and/or
+processing tools are not generally available, and the
+machine-generated HTML produced by some word processors for output
+purposes only.
+
+The "Title Page" means, for a printed book, the title page itself,
+plus such following pages as are needed to hold, legibly, the material
+this License requires to appear in the title page.  For works in
+formats which do not have any title page as such, "Title Page" means
+the text near the most prominent appearance of the work's title,
+preceding the beginning of the body of the text.
+
+
+2. VERBATIM COPYING
+
+You may copy and distribute the Document in any medium, either
+commercially or noncommercially, provided that this License, the
+copyright notices, and the license notice saying this License applies
+to the Document are reproduced in all copies, and that you add no other
+conditions whatsoever to those of this License.  You may not use
+technical measures to obstruct or control the reading or further
+copying of the copies you make or distribute.  However, you may accept
+compensation in exchange for copies.  If you distribute a large enough
+number of copies you must also follow the conditions in section 3.
+
+You may also lend copies, under the same conditions stated above, and
+you may publicly display copies.
+
+
+3. COPYING IN QUANTITY
+
+If you publish printed copies of the Document numbering more than 100,
+and the Document's license notice requires Cover Texts, you must enclose
+the copies in covers that carry, clearly and legibly, all these Cover
+Texts: Front-Cover Texts on the front cover, and Back-Cover Texts on
+the back cover.  Both covers must also clearly and legibly identify
+you as the publisher of these copies.  The front cover must present
+the full title with all words of the title equally prominent and
+visible.  You may add other material on the covers in addition.
+Copying with changes limited to the covers, as long as they preserve
+the title of the Document and satisfy these conditions, can be treated
+as verbatim copying in other respects.
+
+If the required texts for either cover are too voluminous to fit
+legibly, you should put the first ones listed (as many as fit
+reasonably) on the actual cover, and continue the rest onto adjacent
+pages.
+
+If you publish or distribute Opaque copies of the Document numbering
+more than 100, you must either include a machine-readable Transparent
+copy along with each Opaque copy, or state in or with each Opaque copy
+a publicly-accessible computer-network location containing a complete
+Transparent copy of the Document, free of added material, which the
+general network-using public has access to download anonymously at no
+charge using public-standard network protocols.  If you use the latter
+option, you must take reasonably prudent steps, when you begin
+distribution of Opaque copies in quantity, to ensure that this
+Transparent copy will remain thus accessible at the stated location
+until at least one year after the last time you distribute an Opaque
+copy (directly or through your agents or retailers) of that edition to
+the public.
+
+It is requested, but not required, that you contact the authors of the
+Document well before redistributing any large number of copies, to give
+them a chance to provide you with an updated version of the Document.
+
+
+4. MODIFICATIONS
+
+You may copy and distribute a Modified Version of the Document under
+the conditions of sections 2 and 3 above, provided that you release
+the Modified Version under precisely this License, with the Modified
+Version filling the role of the Document, thus licensing distribution
+and modification of the Modified Version to whoever possesses a copy
+of it.  In addition, you must do these things in the Modified Version:
+
+A. Use in the Title Page (and on the covers, if any) a title distinct
+   from that of the Document, and from those of previous versions
+   (which should, if there were any, be listed in the History section
+   of the Document).  You may use the same title as a previous version
+   if the original publisher of that version gives permission.
+B. List on the Title Page, as authors, one or more persons or entities
+   responsible for authorship of the modifications in the Modified
+   Version, together with at least five of the principal authors of the
+   Document (all of its principal authors, if it has less than five).
+C. State on the Title page the name of the publisher of the
+   Modified Version, as the publisher.
+D. Preserve all the copyright notices of the Document.
+E. Add an appropriate copyright notice for your modifications
+   adjacent to the other copyright notices.
+F. Include, immediately after the copyright notices, a license notice
+   giving the public permission to use the Modified Version under the
+   terms of this License, in the form shown in the Addendum below.
+G. Preserve in that license notice the full lists of Invariant Sections
+   and required Cover Texts given in the Document's license notice.
+H. Include an unaltered copy of this License.
+I. Preserve the section entitled "History", and its title, and add to
+   it an item stating at least the title, year, new authors, and
+   publisher of the Modified Version as given on the Title Page.  If
+   there is no section entitled "History" in the Document, create one
+   stating the title, year, authors, and publisher of the Document as
+   given on its Title Page, then add an item describing the Modified
+   Version as stated in the previous sentence.
+J. Preserve the network location, if any, given in the Document for
+   public access to a Transparent copy of the Document, and likewise
+   the network locations given in the Document for previous versions
+   it was based on.  These may be placed in the "History" section.
+   You may omit a network location for a work that was published at
+   least four years before the Document itself, or if the original
+   publisher of the version it refers to gives permission.
+K. In any section entitled "Acknowledgements" or "Dedications",
+   preserve the section's title, and preserve in the section all the
+   substance and tone of each of the contributor acknowledgements
+   and/or dedications given therein.
+L. Preserve all the Invariant Sections of the Document,
+   unaltered in their text and in their titles.  Section numbers
+   or the equivalent are not considered part of the section titles.
+M. Delete any section entitled "Endorsements".  Such a section
+   may not be included in the Modified Version.
+N. Do not retitle any existing section as "Endorsements"
+   or to conflict in title with any Invariant Section.
+
+If the Modified Version includes new front-matter sections or
+appendices that qualify as Secondary Sections and contain no material
+copied from the Document, you may at your option designate some or all
+of these sections as invariant.  To do this, add their titles to the
+list of Invariant Sections in the Modified Version's license notice.
+These titles must be distinct from any other section titles.
+
+You may add a section entitled "Endorsements", provided it contains
+nothing but endorsements of your Modified Version by various
+parties--for example, statements of peer review or that the text has
+been approved by an organization as the authoritative definition of a
+standard.
+
+You may add a passage of up to five words as a Front-Cover Text, and a
+passage of up to 25 words as a Back-Cover Text, to the end of the list
+of Cover Texts in the Modified Version.  Only one passage of
+Front-Cover Text and one of Back-Cover Text may be added by (or
+through arrangements made by) any one entity.  If the Document already
+includes a cover text for the same cover, previously added by you or
+by arrangement made by the same entity you are acting on behalf of,
+you may not add another; but you may replace the old one, on explicit
+permission from the previous publisher that added the old one.
+
+The author(s) and publisher(s) of the Document do not by this License
+give permission to use their names for publicity for or to assert or
+imply endorsement of any Modified Version.
+
+
+5. COMBINING DOCUMENTS
+
+You may combine the Document with other documents released under this
+License, under the terms defined in section 4 above for modified
+versions, provided that you include in the combination all of the
+Invariant Sections of all of the original documents, unmodified, and
+list them all as Invariant Sections of your combined work in its
+license notice.
+
+The combined work need only contain one copy of this License, and
+multiple identical Invariant Sections may be replaced with a single
+copy.  If there are multiple Invariant Sections with the same name but
+different contents, make the title of each such section unique by
+adding at the end of it, in parentheses, the name of the original
+author or publisher of that section if known, or else a unique number.
+Make the same adjustment to the section titles in the list of
+Invariant Sections in the license notice of the combined work.
+
+In the combination, you must combine any sections entitled "History"
+in the various original documents, forming one section entitled
+"History"; likewise combine any sections entitled "Acknowledgements",
+and any sections entitled "Dedications".  You must delete all sections
+entitled "Endorsements."
+
+
+6. COLLECTIONS OF DOCUMENTS
+
+You may make a collection consisting of the Document and other documents
+released under this License, and replace the individual copies of this
+License in the various documents with a single copy that is included in
+the collection, provided that you follow the rules of this License for
+verbatim copying of each of the documents in all other respects.
+
+You may extract a single document from such a collection, and distribute
+it individually under this License, provided you insert a copy of this
+License into the extracted document, and follow this License in all
+other respects regarding verbatim copying of that document.
+
+
+7. AGGREGATION WITH INDEPENDENT WORKS
+
+A compilation of the Document or its derivatives with other separate
+and independent documents or works, in or on a volume of a storage or
+distribution medium, does not as a whole count as a Modified Version
+of the Document, provided no compilation copyright is claimed for the
+compilation.  Such a compilation is called an "aggregate", and this
+License does not apply to the other self-contained works thus compiled
+with the Document, on account of their being thus compiled, if they
+are not themselves derivative works of the Document.
+
+If the Cover Text requirement of section 3 is applicable to these
+copies of the Document, then if the Document is less than one quarter
+of the entire aggregate, the Document's Cover Texts may be placed on
+covers that surround only the Document within the aggregate.
+Otherwise they must appear on covers around the whole aggregate.
+
+
+8. TRANSLATION
+
+Translation is considered a kind of modification, so you may
+distribute translations of the Document under the terms of section 4.
+Replacing Invariant Sections with translations requires special
+permission from their copyright holders, but you may include
+translations of some or all Invariant Sections in addition to the
+original versions of these Invariant Sections.  You may include a
+translation of this License provided that you also include the
+original English version of this License.  In case of a disagreement
+between the translation and the original English version of this
+License, the original English version will prevail.
+
+
+9. TERMINATION
+
+You may not copy, modify, sublicense, or distribute the Document except
+as expressly provided for under this License.  Any other attempt to
+copy, modify, sublicense or distribute the Document is void, and will
+automatically terminate your rights under this License.  However,
+parties who have received copies, or rights, from you under this
+License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+
+10. FUTURE REVISIONS OF THIS LICENSE
+
+The Free Software Foundation may publish new, revised versions
+of the GNU Free Documentation License from time to time.  Such new
+versions will be similar in spirit to the present version, but may
+differ in detail to address new problems or concerns.  See
+http://www.gnu.org/copyleft/.
+
+Each version of the License is given a distinguishing version number.
+If the Document specifies that a particular numbered version of this
+License "or any later version" applies to it, you have the option of
+following the terms and conditions either of that specified version or
+of any later version that has been published (not as a draft) by the
+Free Software Foundation.  If the Document does not specify a version
+number of this License, you may choose any version ever published (not
+as a draft) by the Free Software Foundation.
+
+
+ADDENDUM: How to use this License for your documents
+
+To use this License in a document you have written, include a copy of
+the License in the document and put the following copyright and
+license notices just after the title page:
+
+      Copyright (c)  YEAR  YOUR NAME.
+      Permission is granted to copy, distribute and/or modify this document
+      under the terms of the GNU Free Documentation License, Version 1.1
+      or any later version published by the Free Software Foundation;
+      with the Invariant Sections being LIST THEIR TITLES, with the
+      Front-Cover Texts being LIST, and with the Back-Cover Texts being LIST.
+      A copy of the license is included in the section entitled "GNU
+      Free Documentation License".
+
+If you have no Invariant Sections, write "with no Invariant Sections"
+instead of saying which ones are invariant.  If you have no
+Front-Cover Texts, write "no Front-Cover Texts" instead of
+"Front-Cover Texts being LIST"; likewise for Back-Cover Texts.
+
+If your document contains nontrivial examples of program code, we
+recommend releasing these examples in parallel under your choice of
+free software license, such as the GNU General Public License,
+to permit their use in free software.
diff --git a/deps/lightning/COPYING.LESSER b/deps/lightning/COPYING.LESSER
new file mode 100644 (file)
index 0000000..fc8a5de
--- /dev/null
@@ -0,0 +1,165 @@
+                  GNU LESSER GENERAL PUBLIC LICENSE
+                       Version 3, 29 June 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+
+  This version of the GNU Lesser General Public License incorporates
+the terms and conditions of version 3 of the GNU General Public
+License, supplemented by the additional permissions listed below.
+
+  0. Additional Definitions. 
+
+  As used herein, "this License" refers to version 3 of the GNU Lesser
+General Public License, and the "GNU GPL" refers to version 3 of the GNU
+General Public License.
+
+  "The Library" refers to a covered work governed by this License,
+other than an Application or a Combined Work as defined below.
+
+  An "Application" is any work that makes use of an interface provided
+by the Library, but which is not otherwise based on the Library.
+Defining a subclass of a class defined by the Library is deemed a mode
+of using an interface provided by the Library.
+
+  A "Combined Work" is a work produced by combining or linking an
+Application with the Library.  The particular version of the Library
+with which the Combined Work was made is also called the "Linked
+Version".
+
+  The "Minimal Corresponding Source" for a Combined Work means the
+Corresponding Source for the Combined Work, excluding any source code
+for portions of the Combined Work that, considered in isolation, are
+based on the Application, and not on the Linked Version.
+
+  The "Corresponding Application Code" for a Combined Work means the
+object code and/or source code for the Application, including any data
+and utility programs needed for reproducing the Combined Work from the
+Application, but excluding the System Libraries of the Combined Work.
+
+  1. Exception to Section 3 of the GNU GPL.
+
+  You may convey a covered work under sections 3 and 4 of this License
+without being bound by section 3 of the GNU GPL.
+
+  2. Conveying Modified Versions.
+
+  If you modify a copy of the Library, and, in your modifications, a
+facility refers to a function or data to be supplied by an Application
+that uses the facility (other than as an argument passed when the
+facility is invoked), then you may convey a copy of the modified
+version:
+
+   a) under this License, provided that you make a good faith effort to
+   ensure that, in the event an Application does not supply the
+   function or data, the facility still operates, and performs
+   whatever part of its purpose remains meaningful, or
+
+   b) under the GNU GPL, with none of the additional permissions of
+   this License applicable to that copy.
+
+  3. Object Code Incorporating Material from Library Header Files.
+
+  The object code form of an Application may incorporate material from
+a header file that is part of the Library.  You may convey such object
+code under terms of your choice, provided that, if the incorporated
+material is not limited to numerical parameters, data structure
+layouts and accessors, or small macros, inline functions and templates
+(ten or fewer lines in length), you do both of the following:
+
+   a) Give prominent notice with each copy of the object code that the
+   Library is used in it and that the Library and its use are
+   covered by this License.
+
+   b) Accompany the object code with a copy of the GNU GPL and this license
+   document.
+
+  4. Combined Works.
+
+  You may convey a Combined Work under terms of your choice that,
+taken together, effectively do not restrict modification of the
+portions of the Library contained in the Combined Work and reverse
+engineering for debugging such modifications, if you also do each of
+the following:
+
+   a) Give prominent notice with each copy of the Combined Work that
+   the Library is used in it and that the Library and its use are
+   covered by this License.
+
+   b) Accompany the Combined Work with a copy of the GNU GPL and this license
+   document.
+
+   c) For a Combined Work that displays copyright notices during
+   execution, include the copyright notice for the Library among
+   these notices, as well as a reference directing the user to the
+   copies of the GNU GPL and this license document.
+
+   d) Do one of the following:
+
+       0) Convey the Minimal Corresponding Source under the terms of this
+       License, and the Corresponding Application Code in a form
+       suitable for, and under terms that permit, the user to
+       recombine or relink the Application with a modified version of
+       the Linked Version to produce a modified Combined Work, in the
+       manner specified by section 6 of the GNU GPL for conveying
+       Corresponding Source.
+
+       1) Use a suitable shared library mechanism for linking with the
+       Library.  A suitable mechanism is one that (a) uses at run time
+       a copy of the Library already present on the user's computer
+       system, and (b) will operate properly with a modified version
+       of the Library that is interface-compatible with the Linked
+       Version. 
+
+   e) Provide Installation Information, but only if you would otherwise
+   be required to provide such information under section 6 of the
+   GNU GPL, and only to the extent that such information is
+   necessary to install and execute a modified version of the
+   Combined Work produced by recombining or relinking the
+   Application with a modified version of the Linked Version. (If
+   you use option 4d0, the Installation Information must accompany
+   the Minimal Corresponding Source and Corresponding Application
+   Code. If you use option 4d1, you must provide the Installation
+   Information in the manner specified by section 6 of the GNU GPL
+   for conveying Corresponding Source.)
+
+  5. Combined Libraries.
+
+  You may place library facilities that are a work based on the
+Library side by side in a single library together with other library
+facilities that are not Applications and are not covered by this
+License, and convey such a combined library under terms of your
+choice, if you do both of the following:
+
+   a) Accompany the combined library with a copy of the same work based
+   on the Library, uncombined with any other library facilities,
+   conveyed under the terms of this License.
+
+   b) Give prominent notice with the combined library that part of it
+   is a work based on the Library, and explaining where to find the
+   accompanying uncombined form of the same work.
+
+  6. Revised Versions of the GNU Lesser General Public License.
+
+  The Free Software Foundation may publish revised and/or new versions
+of the GNU Lesser General Public License from time to time. Such new
+versions will be similar in spirit to the present version, but may
+differ in detail to address new problems or concerns.
+
+  Each version is given a distinguishing version number. If the
+Library as you received it specifies that a certain numbered version
+of the GNU Lesser General Public License "or any later version"
+applies to it, you have the option of following the terms and
+conditions either of that published version or of any later version
+published by the Free Software Foundation. If the Library as you
+received it does not specify a version number of the GNU Lesser
+General Public License, you may choose any version of the GNU Lesser
+General Public License ever published by the Free Software Foundation.
+
+  If the Library as you received it specifies that a proxy can decide
+whether future versions of the GNU Lesser General Public License shall
+apply, that proxy's public statement of acceptance of any version is
+permanent authorization for you to choose that version for the
+Library.
diff --git a/deps/lightning/ChangeLog b/deps/lightning/ChangeLog
new file mode 100644 (file)
index 0000000..76cac91
--- /dev/null
@@ -0,0 +1,4148 @@
+2020-23-01 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/lightning.c: Add a proper fix to the condition of considering
+       a register dead at entry of a block when it modifies the register
+       !after! a branch to a target where it is live.
+       The correction is just to split blocks on branches. It uses an
+       extra label per branch, but makes following the code simpler and
+       avoid costly searches.
+
+2020-22-01 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/lightning.c: Correct a special condition of a register
+       only assigned in a block, and incorrectly marked as dead before
+       a jump where the register is live.
+
+2019-10-04 Paulo Andrade <pcpa@gnu.org>
+
+       * check/lightning.c, include/lightning/jit_private.h,
+       include/lightning/jit_x86.h, lib/jit_x86-cpu.c, lib/jit_x86-sz.c,
+       lib/jit_x86.c: Correct issues with MinGW64 that defines _WIN32
+       and needs long long for jit_word_t.
+
+2019-09-16 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/lightning.c: Do not add registers that are never modified
+       to the set of registers to scan for live range, what might
+       consume a lot of cpu time, doing nothing.
+
+2019-09-16 Marc Nieper-WiÃkirchen <marc@nieper-wisskirchen.de>
+
+       * include/lightning/jit_x86.h, lib/jit_x86.c: Correct x86_64
+       backend, made %r12 a callee-save register as dictated by the
+       System V AMD64 ABI.
+
+2019-09-16 Paulo Andrade <pcpa@gnu.org>
+
+       * Makefile.am: Do not force CFLAGS for the get_jit_size target.
+       * check/lightning.c: Drop __ppc__ check.
+       * include/lightning.h.in: Drop __ppc__ check. Add new
+       jit_flag_vararg flag, for special case in powerpc 32 bit using
+        the SYSV abi, and need to pass an extra argument during code
+       generation.
+       * include/lightning/jit_ppc.c: Drop __ppc_ check. Remove the
+       ABI_ELFv2 macro, as it now directly checks for the _CALL_ELF
+       value when/if appropriate.
+       * include/lightning/jit_private.h: Drop __ppc_ check. Check for
+       _CALL_AIXDESC to define extra data to handle function
+       descriptors.
+       * lib/jit_ppc-cpu.c: Update to check for _CALL_SYSV; assume
+       !_CALL_SYSV == (_CALL_AIX || _CALL_LINUX). Significant code
+       rework for the SYSV abi.
+       * lib/jit_ppc-sz.c: Update for the SYSV abi.
+       * lib/jit_ppc.c: Update for the SYSV abi. Add matching va_list
+       type. Drop __ppc__ check.
+       * lib/jit_size.c: Drop __ppc_ check.
+       * lib/lightning.c: Drop __ppc__ check. Add proper check for
+       __powerpc__ and _CALL_AIXDESC to manage function descriptors.
+       * lib/size.c: Update to drop __pppc_ check and SYSV abi.
+
+2019-08-30 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_mips-cpu.c: Use JALR to get the same effect as JR, as
+       in mips32r6 JR generates an illegal instruction. Thanks to
+       Bruno Haible for providing a patch and the information, reported at
+       https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=925129
+       * THANKS: update.
+
+2019-08-29 Marc Nieper-WiÃkirchen <marc@nieper-wisskirchen.de>
+
+       * include/lightning/jit_private.h: Move definition of offsetof
+       from the public header file here.
+
+       * configure.ac, include/Makefile.am, include/lightning.h,
+       include/lightning.h.in: Generate lightning.h from lightning.in.h
+       and remove the dependence on config.h from the public header file.
+
+2019-06-04 Paulo Andrade <pcpa@gnu.org>
+
+       * include/lightning/jit_riscv.h, lib/jit_riscv-cpu.c,
+       lib/jit_riscv-fpu.c, lib/jit_riscv-sz.c, lib/jit_riscv.c:
+       Implement riscv port. Only 64 bit Linux supported. Built on
+       Fedora 28 image.
+
+       * check/all.tst, check/float.tst, configure.ac, include/lightning.h,
+       include/lightning/Makefile.am, include/lightning/jit_private.h,
+       lib/Makefile.am, lib/jit_disasm.c, lib/jit_size.c, lib/lightning.c:
+       Minor updates for the new riscv port.
+
+2019-06-04 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_alpha.c lib/jit_ia64.c lib/jit_mips.c lib/jit_sparc.c:
+       Correct assertion of _jitc->regarg after emiting an instruction.
+       jit_carry may be set, but not an argument to the current instruction.
+
+2019-06-01 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/lightning.c: Correct assertion on jit_unget_reg when the
+       argument is jit_carry, and jit_carry was not used in the
+       instruction.
+
+2019-06-01 Paulo Andrade <pcpa@gnu.org>
+
+       * include/lightning/jit_private.h: Remove no longer need
+       setmask field of jit_block_t and blockmask from jit_compiler_t.
+
+       * lib/lightning.c: Rework of register live and unknown state
+       information during jit generation. It no longer recurses nor
+       do dangerous bit unset of registers in unknown state. The
+       only pitfall known, that must be taken care now is that jmpr
+       (or jmpi to not a jit node) is treated as a function call, as
+       otherwise it would need to consider all registers live, and
+       spill/reload during all jit generation.
+
+2018-12-28 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_disasm.c: Release bfd handle. Thanks for patch to
+       Marc Nieper-Wißkirchen.
+
+2018-08-30 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_disasm.c: Add hints to select hppa disassembler.
+
+       * lib/jit_hppa-cpu.c: Correct address of vastart when all
+       argument registers were used as non vararg arguments.
+
+       * lib/jit_hppa-fpu.c: Disable load/store of rv,ix,rb where
+       rv is the value, ix is an register or integer offset and rb
+       is a base register. These should be better tested, as they do
+       not work on all environments (fail on qemu-hppa).
+
+2018-04-20 Paulo Andrade <pcpa@gnu.org>
+
+       * include/lightning/jit_private.h: Add new register classes to
+       flag float registers and double only registers, required for sparc64
+       where only low 32 bit fpr registers can be used for single precision
+       operations.
+       Add new 128 bit jit_regset_t type for sparc64 register set.
+
+       * include/lightning/jit_sparc.h, lib/jit_sparc-cpu.c, lib/jit_sparc-fpu.c,
+       lib/jit_sparc-sz.c, lib/jit_sparc.c: Update for 64 bits sparc.
+
+       * lib/lightning.c: Update for new jit_regset_t required for sparc64.
+
+2018-02-26 Paulo Andrade <pcpa@gnu.org>
+
+       * check/lightning.c, include/lightning.h: Add the new jit_va_push
+       interface. That should be called when passing a va_list to a C
+       function. This is required because on Alpha a va_list is passed
+       by value, and lightning does not know about data types, so, cannot
+       understand it is pushing a va_list as argument.
+
+       * lib/jit_names.c, lib/lightning.c: Minor changes for the new
+       jit_code_va_push.
+
+       * check/cva_list.c: Update only test case using jit_va_push, to
+       pass a va_list to a C function.
+
+       doc/body.texi: Better documentation of the varargs interface.
+
+       * jit_alpha.c, jit_alpha-cpu.c: Update to properly push a
+       C va_list and correctly calculate varargs offset.
+
+       * lib/jit_aarch64-sz.c, lib/jit_aarch64.c, lib/jit_alpha-sz.c,
+       lib/jit_arm-sz.c, lib/jit_arm.c, lib/jit_hppa-sz.c, lib/jit_hppa.c,
+       lib/jit_ia64-sz.c, lib/jit_ia64.c, lib/jit_mips-sz.c, lib/jit_mips.c,
+       lib/jit_ppc-sz.c, lib/jit_ppc.c, lib/jit_s390-sz.c, lib/jit_s390.c,
+       lib/jit_sparc-sz.c, lib/jit_sparc.c, lib/jit_x86-sz.c, lib/jit_x86.c:
+       Update for the new jit_va_push interface.
+
+2018-02-22 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_alpha-cpu.c: Always set t12 to the address of the
+       current function, to properly work on all systems. Previously
+       the shortcut did only work on Tru64. For Linux and glibc the
+       change is required.
+
+2018-02-22 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_aarch64.c, lib/jit_alpha.c, lib/jit_arm.c,
+       lib/jit_mips.c, lib/jit_ppc.c, lib/jit_sparc.c, lib/jit_x86.c:
+       Correct wrong logic in usage of jit_live in jit_retr. The
+       problem is that if a temporary is required during epilog,
+       the return register might be allocated, so, jit_live must always
+       be used.
+
+2018-01-31 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/lightning.c: Avoid deep recursions when computing live
+       register ranges.
+
+2018-01-31 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_mips-cpu.c: Correct frame size and varargs
+       initialization for the n32 abi.
+       * lib/jit_mips.c, lib/jit_mips-fpu.c: Correct 32 bit abis
+       in big-endian.
+
+2017-09-13 Paulo Andrade <pcpa@gnu.org>
+
+       * configure.ac: Add check for binutils 2.29 prototype to the
+       disassembler function.
+       * lib/jit_disasm.c: Adapt for binutils 2.29 change.
+
+2017-06-09 Paulo Andrade <pcpa@gnu.org>
+
+       * include/lightning/jit_private.h, lib/lightning.c: Add a
+       second pass from start when computing register live ranges.
+       This should be used temporarily, and is required for certain
+       loop constructs, with several consecutive blocks not referencing
+       a live register.
+
+2016-05-05 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/lightning.c: Correct wrong movr simplification,
+       remove no longer needed code to set return registers live
+       and update live register set when reaching a label boundary,
+       but do not descend if the block has been already visited.
+       The later need some tuning for complex code generation, where
+       it will still have issues.
+
+2015-11-30 Paulo Andrade <pcpa@gnu.org>
+
+       * doc/body.texi: Change documentation to no longer say
+       it is a variant of the Fibonacci sequence, and document
+       a proper implementation.
+       Thanks to Jon Arintok for pointing out that the Fibonacci
+       sequence generation was incorrect. It was documented, but
+       still confusing.
+
+       * check/fib.tst, check/fib.ok, check/bp.tst, check/bp.ok,
+       doc/ifib.c, doc/rbif.c: Implement a proper Fibonacci
+       sequence implementation.
+
+2015-07-03 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_mips-cpu.c: Correct definition of htonr_ul.
+       Correct prolog/epilog/va* routines to work on o64 abi.
+
+       * lib/jit_mips-fpu.c: Correct load of double literal
+       argument when not using a data buffer.
+       Remove alignment correction in vaarg_d if using the
+       new mips abi.
+
+       * lib/jit_mips.c: Correct code to allow creating variadic
+       jit functions when using the new mips abi.
+
+       * lib/jit_rewind.c: Minor adjust for rewind when using
+       the new mips abi, if there are varargs arguments in
+       registers.
+
+2015-06-06 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_ia64-cpu.c: Search backward for the last output
+       register used, otherwise would stop too early if a float
+       argument used the slot.
+       Correct offset of first va_list argument, and use proper
+       va_list abi.
+
+       * lib/jit_ia64-fpu.c: Add new functions to move a gpr
+       to a fpr register, to counterpart the ones that move a
+       fpr to a gpr. These are required to properly implement
+       jit_getarg*_{f,d} on complex prototypes, or variadic
+       jit functions.
+
+       * lib/jit_ia64-sz.c: Update for support to jit variadic
+       functions.
+
+       * lib/jit_ia64.c: Implement proper abi for variadic
+       jit functions.
+
+2015-06-04 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_rewind.c: New file implementing generic functions
+       to "rewind", or rewrite IR code sequences.
+
+       * include/lightning.h: Add several new codes, that previously
+       were a function call, that would synthesize the operation.
+       Now, there is a code for the operation, and a new flag to
+       know an operation is synthesized.
+
+       * include/lightning/jit_private.h: Add several new macros to
+       help construct synthesized IR code sequences.
+
+       * lib/Makefile.am: Update for lib/jit_rewind.c.
+
+       * lib/jit_disasm.c: Update for a small rework on jit_node_t,
+       so that --enable-devel-disassembler does not need a change
+       in the layout of jit_node_t.
+
+       * lib/jit_names.c: Update for the new codes.
+
+       * lib/jit_print.c: Update to print more readable output, and
+       flag synthesized IR code sequences.
+
+       * lib/jit_aarch64-sz.c, lib/jit_aarch64.c,
+       lib/jit_arm-sz.c, lib/jit_arm.c, lib/jit_x86-sz.c,
+       lib/jit_x86.c: Update for new synthesized IR code sequences.
+
+       * lib/jit_ppc-cpu.c, lib/jit_ppc-fpu., lib/jit_ppc-sz.c,
+       lib/jit_ppc.c, lib/jit_mips-cpu.c, lib/jit_mips-fpu.c,
+       lib/jit_mips-sz.c, lib/jit_mips.c, lib/jit_s390-fpu.c,
+       lib/jit_s390-sz.c, lib/jit_s390.c: Update for new synthesized
+       IR code sequences and correct bugs in the initial varargs
+       implementation support.
+
+       * lib/jit_alpha-sz.c, lib/jit_alpha.c, lib/jit_hppa-sz.c,
+       lib/jit_hppa.c, lib/jit_ia64-sz.c, lib/jit_ia64.c,
+       lib/jit_sparc-sz.c, lib/jit_sparc.c: Add generic, untested
+       support for the new synthesized IR code sequences. Known
+       most likely broken right now, and should be corrected once
+       access to these hosts is available.
+
+       * lib/lightning.c: Update for new IR codes, and add support
+       for not yet existing instructions that change third argument.
+
+       * size.c: Change to use different tables for LE and BE PowerPC.
+       Correct a wrong endif for x32.
+
+2015-05-25 Paulo Andrade <pcpa@gnu.org>
+
+       * check/cva_list.c: New file implementing a test to ensure
+       the value returned by jit_va_start is a valid C va_list.
+
+       * check/va_list.ok: New simple helper file, as now the
+       va_list.tst test is enabled.
+
+       * check/va_list.tst: Rewritten for an extensive variadic
+       jit functions test.
+
+       * check/Makefile.am: Update for the new tests.
+
+       * lib/jit_arm-cpu.c, lib/jit_arm-swf.c, lib/jit_arm-vfp.c,
+       lib/jit_arm.c: Correct broken software float in a previous
+       commit. Note that the hard float abi implementation is known
+       broken at this time, for special cases involving variadic
+       functions, and should be corrected next.
+
+       lib/jit_x86-cpu.c, lib/jit_x86-sz.c, lib/jit_x86.c: Correct
+       the jit_va_list_t semantics to match C va_list.
+
+2015-05-24 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/Makefile.am: Bump library major. This is a preparation
+       for a rework that was due for quite some time, but that is
+       now required to properly implement variadic jit functions.
+       The rework is mainly required to know at prolog parsing, if
+       a function is variadic or not. This will benefit a few
+       backends, and is mandatory for the hard float arm abi.
+       The rework was already planned for quite some time, to
+       be able to use a variable stack framesize, and for leaf
+       functions optimization where applicable.
+       The change will be source compatible, but will change
+       some internals, and jit_code_t values, as some new will
+       be added.
+       The only behavior change is that, jit_arg_register_p may
+       change return value on hard float arm abi, if called before
+       or after jit_ellipsis. Common sense anyway, would say to
+       make that call after jit_ellipsis, but documentation
+       should be updated for it.
+
+2015-05-24 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_aarch64-fpu.c, lib/jit_aarch64.c: Correct base
+       aarch64 varargs code.
+
+2015-05-24 Paulo Andrade <pcpa@gnu.org>
+
+       * check/lightning.c: Clearly run check if clang is the system
+       compiler.
+
+2015-05-20 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_sparc-cpu.c, lib/jit_sparc-fpu.c, lib/jit_sparc.c:
+       Add base support to jit vararg functions to the sparc backend.
+
+2015-05-20 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_alpha-cpu.c, lib/jit_alpha-fpu.c, lib/jit_alpha.c:
+       Add base support to jit vararg functions to the alpha backend.
+
+2015-05-19 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_hppa-cpu.c, lib/jit_hppa-fpu.c, lib/jit_hppa.c:
+       Add base support to jit vararg functions to the hppa backend.
+
+2015-05-10 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_ia64-cpu.c, lib/jit_ia64-fpu.c, lib/jit_ia64.c:
+       Add base support to jit vararg functions to the ia64 backend.
+
+2015-05-10 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_ia64-fpu.c, lib/jit_ia64.c: Correct movi_d_w
+       and movi_f_w implementation to work when not using a
+       data buffer. This causes the check varargs.tst to
+       work when passing "-d" to the lightning test tool.
+
+2015-05-10 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_ia64.c: Implement inline assembly cache flush,
+       required on multiprocessor systems.
+
+2015-05-06 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_mips-cpu.c, lib/jit_mips-fpu.c, lib/jit_mips.c:
+       Add base support to jit vararg functions to the mips backend.
+       Currently only supported on the o32 abi, until access to a
+       n32 system is arranged.
+
+2015-05-05 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_ppc-cpu.c, lib/jit_ppc-fpu.c, lib/jit_ppc.c:
+       Add base support to jit vararg functions to the PowerPC backend.
+
+2015-05-02 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_s390-cpu.c, lib/jit_s390-fpu.c, lib/jit_s390.c:
+       Add base support to jit vararg functions to the s390 backend.
+
+2015-05-01 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_arm-cpu.c, lib/jit_arm-swf.c, lib/jit_arm-vfp.c,
+       lib/jit_arm.c: Add base support to jit vararg
+       functions to the arm backend.
+
+2015-04-30 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_aarch64-cpu.c, lib/jit_aarch64-fpu.c,
+       lib/jit_aarch64.c: Add base support to jit vararg
+       functions to the aarch64 backend.
+
+2015-04-27 Paulo Andrade <pcpa@gnu.org>
+
+       * include/lightning.h, include/lightning/jit_private.h,
+       lib/jit_names.c, lib/lightning.c: Add initial support
+       for the new jit_va_start, jit_va_arg, jit_va_arg_d, and
+       jit_va_end interfaces. The jit_va_start call is supposed
+       to return a va_list compatible pointer, but not yet
+       decided if it will be "declared" stdarg compatible,
+       as for now only x86 support has been added (and should
+       be compatible), but issues may arise on other backends.
+
+       * check/lightning.c: Add wrappers to call the new jit_va_*
+       interfaces.
+
+       * lib/jit_x86-cpu.c, lib/jit_x86.c: Implement the new
+       jit_va_* for x86.
+
+       * lib/jit_x86-sz.c: Add fields, but not yet fully updated,
+       as this is an intermediate commit.
+
+       * lib/jit_aarch64-sz.c, lib/jit_aarch64.c,
+       lib/jit_alpha-sz.c, lib/jit_alpha.c,
+       lib/jit_arm-sz.c, lib/jit_arm.c,
+       lib/jit_hppa-sz.c, lib/jit_hppa.c,
+       lib/jit_ia64-sz.c, lib/jit_ia64.c,
+       lib/jit_mips-sz.c, lib/jit_mips.c,
+       lib/jit_ppc-sz.c, lib/jit_ppc.c,
+       lib/jit_s390-sz.c, lib/jit_s390.c,
+       lib/jit_sparc-sz.c, lib/jit_sparc.c: Prepare for the
+       new jit_va_* interfaces. Not yet implemented, and will
+       cause an assertion if used.
+
+       * check/va_list.tst: Simple early test case, that works
+       on x86_64, x32, ix86, cygwin, and cygwin64.
+
+2015-02-17 Paulo Andrade <pcpa@gnu.org>
+
+       * include/lightning.h, include/lightning/jit_private.h,
+       lib/jit_aarch64-cpu.c, lib/jit_aarch64.c,
+       lib/jit_alpha-cpu.c, lib/jit_alpha.c,
+       lib/jit_arm-cpu.c, lib/jit_arm.c,
+       lib/jit_hppa-cpu.c, lib/jit_hppa.c,
+       lib/jit_ia64-cpu.c, lib/jit_ia64.c,
+       lib/jit_mips-cpu.c, lib/jit_mips.c,
+       lib/jit_ppc-cpu.c, lib/jit_ppc.c,
+       lib/jit_s390-cpu.c, lib/jit_s390.c,
+       lib/jit_sparc-cpu.c, lib/jit_sparc.c,
+       lib/jit_x86-cpu.c, lib/jit_x86.c: Implement the new
+       jit_allocar(offs, size) interface, that receives
+       two integer registers arguments, allocates space
+       dynamically in the stack, returns the offset in
+       the first argument, and uses the second argument
+       for the size in bytes of the memory to be allocated.
+
+       * check/allocar.ok, check/allocar.tst: New files
+       implementing test cases for the new jit_allocar
+       interface.
+
+       * check/Makefile.am, check/lightning.c: Update for
+       the new test case and interface.
+
+       * doc/body.texi: Add documentation of the new
+       interface.
+
+2015-02-17 Paulo Andrade <pcpa@gnu.org>
+
+       * include/lightning/jit_x86.h, lib/jit_x86-cpu.c,
+       lib/jit_x86-x87.c: No longer make st(7) available.
+       Need to keep one x87 slots empty to avoid exceptions.
+       This has the side effect of no longer needing the
+       hackish emms instruction before a function call.
+
+2015-02-16 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/lightning.c: Remove the jit_regno_patch bitfield
+       register fields before actual emit, as it is only really
+       used before emit, otherwise, on special conditions it
+       may consider live registers as dead during code emit.
+
+2015-02-15 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_x86-cpu.c, lib/jit_x86-sse.c, lib/jit_x86-x87.c:
+       Correct encoding of ldxr* stxr* in the x32 abi. If the
+       displacement register is negative, it would generate
+       a 64 bit instruction with a 32 bit unsigned displacement.
+
+       * check/ranger.tst, check/ranger.ok: New files, implementing
+       a test case for negative loads and stores. This is range.tst
+       converted to use registers instead of immediate offsets.
+
+       check/Makefile.am: Update for the new test case.
+
+2015-02-07 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_size.c: Preventively use at least 144 bytes
+       if JIT_INSTR_MAX is less than it. The logic is not
+       guaranteed to be 100% precise, it is mostly heuristics
+       to allocate a buffer with as close as possible size,
+       but a wrong value may cause code generation to write
+       past the end of the buffer.
+
+2015-02-03 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/lightning.c: Correct the reason the bug in
+       simplify_stxi was not triggered before, it was due to
+       incorrectly resetting the value->code field, what was
+       causing it to never properly optimize:
+               stxi Im0 Rb0 Rt0
+               ldxi Rt1 Rb1 Im1
+       when Rb0 == Rb1, Rt0 == Rt1 and Im0 == Im1
+       There was another possible issue, that has been also
+       addressed in this commit, that would be the case of
+       Rbn == Rtn, where no redundancy removal is possible.
+
+2015-02-03 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/lightning.c: Correct wrong check in simplify_stxi.
+       The test was incorrectly comparing the target register
+       and the displacement offset. This was a time bomb bug,
+       that would trigger in code like:
+               stxi Im0 Rb0 Rt0
+               stxi Im1 Rb1 Rt1
+       if Rb0 == Rb1 && Rt0 == Rt1 && Im0 == Rt1, that is,
+       the wrong check was Im0 == Rt1, instead of the supposed
+       Im0 == Imm1 (that was what the code mean't to do). It
+       was removing the second stxi assuming it was redundantly
+       generated; as that is not uncommon pattern on
+       translators generating jit.
+
+2015-02-02 Paulo Andrade <pcpa@gnu.org>
+
+       * configure.ac, include/lightning/jit_private.h,
+       lib/jit_aarch64.c, lib/jit_alpha.c, lib/jit_arm.c,
+       lib/jit_disasm.c, lib/jit_hppa.c, lib/jit_ia64.c,
+       lib/jit_mips.c, lib/jit_ppc.c, lib/jit_print.c,
+       lib/jit_s390.c, lib/jit_sparc.c, lib/jit_x86.c: Add a new
+       --enable-devel-disassembler option, that should be used
+       during development, or lightning debug. This option
+       intermixes previous jit_print and jit_disassemble
+       output, making it easier to visualize what lightning
+       call was used, and what code was generated.
+
+2015-01-31 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_arm-cpu.c, lib/jit_arm.c: Only limit to 24 bit
+       displacement non conditional jump in the same jit_state_t.
+
+2015-01-19 Paulo Andrade <pcpa@gnu.org>
+
+       * doc/body.texi: Reorder documentation, making jit_frame
+       and jit_tramp the lightning response to the need of
+       trampolines, continuations and tail call optimizations.
+       A pseudo code example of a factorial function was added.
+       Also added a section for description of the available
+       predicates.
+
+       * doc/fact.c: New file, implementing a simple example of
+       a translation of a trivial, recursive, tail call optimization
+       into lightning calls. This is the conversion to functional C
+       code of the example in doc/body.texi.
+
+       * doc/Makefile.am: Update for the next test case.
+
+2015-01-17 Paulo Andrade <pcpa@gnu.org>
+
+       * include/lightning.h, lib/jit_aarch64.c,
+       lib/jit_alpha.c, lib/jit_arm-vfp.c, lib/jit_arm.c,
+       lib/jit_hppa.c, lib/jit_ia64.c, lib/jit_mips.c,
+       lib/jit_ppc.c, lib/jit_s390.c, lib/jit_sparc.c,
+       lib/jit_x86.c: Add the new jit_arg_register_p predicate.
+       The predicate is expected to be used to know if an
+       argument is in a register, what would need special
+       handling if code that can overwrite non callee save
+       registers is executed.
+
+       * check/carg.c: New test case to check consistency and
+       expected usage of jit_arg_register_p.
+
+       * check/Makefile.am: Update for new test case.
+
+2015-01-17 Paulo Andrade <pcpa@gnu.org>
+
+       * include/lightning/jit_aarch64.h,
+       include/lightning/jit_alpha.h,
+       include/lightning/jit_arm.h,
+       include/lightning/jit_hppa.h,
+       include/lightning/jit_mips.h,
+        include/lightning/jit_ppc.h,
+       include/lightning/jit_s390.h,
+       include/lightning/jit_sparc.h,
+       include/lightning/jit_x86.h,
+       lib/jit_aarch64.c, lib/jit_alpha.c,
+       lib/jit_arm.c, lib/jit_hppa.c,
+       lib/jit_ia64.c, lib/jit_mips.c,
+       lib/jit_ppc.c, lib/jit_s390.c,
+       lib/jit_sparc.c, lib/jit_x86.c: Remove jit_arg_reg_p and
+       jit_arg_f_reg_p from a public header, and define it only
+       on port specific files where an integer offset is used
+       to qualify an argument identifier. Exported code expects
+       an opaque pointer (but of jit_node_t* type) to "qualify"
+       an argument identifier.
+       This patch, and the code review/simplification done during
+       it also corrected some bugs:
+       o Inconsistent jit_arg_d value of double argument after 3
+         integer arguments in arm for jit_functions; tested, C
+         functions were being properly called.
+       o Inconsistent use of getarg_{f,d} and putarg*_{f,d} on
+         s390 (32-bit) that happened to not have a proper test
+         case, as it would only happen for jit functions, and
+         tested, called C functions had proper arguments.
+       o Corrected a "last minute" correction that did not go
+         to the committed version, and would not compile on hppa,
+         due to bad _jit_putargi_d prototype definition.
+
+2015-01-17 Paulo Andrade <pcpa@gnu.org>
+
+       * doc/body.texi: Correct wrong/outdated information for
+       hton*, pusharg* and ret*, and add missing documentation
+       for rsb*, qmul*, qdvi* and putarg*.
+
+2015-01-15 Paulo Andrade <pcpa@gnu.org>
+
+       * configure.ac, lib/jit_disasm.c: Rewrite workaround
+       to apparent problem to initialize powerpc disassembler.
+
+2015-01-15 Paulo Andrade <pcpa@gnu.org>
+
+       * include/lightning.h, lib/jit_aarch64.c,
+       lib/jit_alpha.c, lib/jit_arm.c, lib/jit_hppa.c,
+       lib/jit_ia64.c, lib/jit_mips.c, lib/jit_ppc.c,
+       lib/jit_s390.c, lib/jit_sparc.c, lib/jit_x86.c:
+       Implement jit_putarg*. It works as a mix of jit_getarg*
+       and jit_pusharg*, in the way that the first argument is
+       a register or immediate, and the second is a pointer
+       returned by jit_arg*. The use of the interface is to change
+       values of arguments to the current jit function.
+
+       * check/put.ok, check/put.tst: New test cases exercising
+       the new jit_putarg* interface.
+
+       * check/Makefile.am, check/lightning.c: Update for the
+       new test case and interface.
+
+2015-01-08 Paulo Andrade <pcpa@gnu.org>
+
+       * include/lightning/jit_s390.h, lib/jit_s390-cpu.c,
+       lib/jit_s390-fpu.c, lib/jit_s390-sz.c, lib/jit_s390.c:
+       Renamed s390x* files to s390*.
+
+       * check/float.tst, check/lightning.c, configure.ac,
+       include/lightning.h, include/lightning/Makefile.am,
+       lib/Makefile.am, lib/jit_s390.c, lib/jit_size.c,
+       lib/lightning.c: Update for renamed files.
+
+2015-01-08 Paulo Andrade <pcpa@gnu.org>
+
+        * include/lightning.h, include/lightning/jit_private.h,
+        include/lightning/jit_s390x.h, lib/jit_disasm.c,
+        lib/jit_s390x-cpu.c, lib/jit_s390x-fpu.c, lib/jit_s390x-sz.c,
+        lib/jit_s390x.c, lib/jit_size.c, lib/lightning.c:
+       Add support for generating jit for s390 32 bit. This change
+       also removed %f15 from the list of temporaries fpr registers;
+       it was not being used, but if were, it would corrupt the
+       stack frame because the spill address would overwrite grp
+       offsets.
+
+2014-12-26 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_ppc-cpu.c, lib/jit_ppc.c: Correct some endianess issues
+       on the powerpc le backend.
+
+2014-12-26 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_ppc-cpu.c: Add mcrxr instruction emulation,
+       as this instruction has been phased out, and should be
+       implemented as a kernel trap.
+
+2014-12-26 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_arm.c: Better check for need to flush constants
+       before the pool being no longer reachable.
+
+2014-12-25 Paulo Andrade <pcpa@gnu.org>
+
+       * include/lightning.h: Split jit_htonr in the new 3 interfaces
+       jit_htonr_us, jit_htonr_ui and jit_htonr_ul, the later only
+       available on 64 bit. The plain/untyped jit_htonr macro call
+       maps to the wordsize one.
+       * lib/jit_aarch64-cpu.c,  lib/jit_aarch64-sz.c, lib/jit_aarch64.c,
+       lib/jit_alpha-cpu.c, lib/jit_alpha-sz.c, lib/jit_alpha.c,
+       lib/jit_arm-cpu.c, lib/jit_arm-sz.c, lib/jit_arm.c,
+       lib/jit_hppa-cpu.c, lib/jit_hppa-sz.c, lib/jit_hppa.c,
+       lib/jit_ia64-cpu.c, lib/jit_ia64-sz.c, lib/jit_ia64.c,
+       lib/jit_mips-cpu.c, lib/jit_mips-sz.c, lib/jit_mips.c,
+       lib/jit_ppc-cpu.c, lib/jit_ppc-sz.c, lib/jit_ppc.c,
+       lib/jit_s390x-cpu.c, lib/jit_s390x-sz.c, lib/jit_s390x.c,
+       lib/jit_sparc-cpu.c, lib/jit_sparc-sz.c, lib/jit_sparc.c,
+       lib/jit_x86-cpu.c, lib/jit_x86-sz.c, lib/jit_x86.c:
+       Update backends for the new jit_htonr*.
+       * check/lightning.c, lib/jit_names.c, lib/lightning.c:
+       Update for the new jit_htonr* interfaces.
+       * check/Makefile.am: Update for new test cases.
+       * check/hton.ok, check/hton.tst: New test cases.
+
+2014-12-24 Paulo Andrade <pcpa@gnu.org>
+
+       * include/lightning/jit_private.h, include/lightning/jit_x86.h,
+       lib/jit_disasm.c, lib/jit_x86-cpu.c, lib/jit_x86-sse.c,
+       lib/jit_x86-sz.c, lib/jit_x86-x87.c, lib/jit_x86.c,
+       size.c: Implement support for the x32 abi. Built and
+       tested on Gentoo default/linux/amd64/13.0/x32 profile.
+
+2014-12-24 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_names.c: Add missing rsbi_f and rsbi_d strings.
+
+2014-12-21 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_arm.c: Call __clear_cache for every page.
+       This should only be required for older boards or
+       toolchain setup, but has been reported to be required
+       for lightning at some point.
+
+2014-12-21 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_arm.c: Correct check to guard overflow of index
+       of constants from program counter.
+
+2014-11-24 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/lightning.c: Remove an optimization to calee save
+       registers that may incorrectly remove a jit_movr under
+       special conditions.
+
+2014-11-20 Paulo Andrade <pcpa@gnu.org>
+
+       * include/lightning/jit_ppc.h, lib/jit_ppc-cpu.c,
+       lib/jit_ppc.c: Add initial powerpc le support.
+
+2014-11-20 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_disasm.c: Change thumb or arm disassemble based on
+       jit code before disassembly.
+
+       * lib/jit_arm-cpu.c: Correct reversed arguments to LDRD and
+       STRD instructions, and correct checking for support of those.
+
+       * lib/jit_arm-swf.c: Correct wrong use of LDRD and STRD and
+       only use those if the register is even.
+
+       * check/check.arm.swf.sh, check/check.arm4.swf.sh: New files
+       to test LDRD and STRD, as well as the alternate code path
+       when those are not available, in the .arm4. test case.
+
+       * check/Makefile.am: Update for the new test cases.
+
+2014-11-08 Paulo Andrade <pcpa@gnu.org>
+
+       * include/lightning/jit_private.h, lib/jit_aarch64.c,
+       lib/jit_alpha.c, lib/jit_arm.c, lib/jit_hppa.c,
+       lib/jit_ia64.c, lib/jit_mips.c, lib/jit_ppc.c,
+       lib/jit_s390x.c, lib/jit_sparc.c, lib/jit_x86.c:
+       Implement a private jit_flush call, that flushes
+       the cache, if applicable, aligning down to the
+       previous and up to the next page boundary.
+
+2014-11-08 Paulo Andrade <pcpa@gnu.org>
+
+       * check/ctramp.c: New file. It just repeats the test
+       of tramp.tst, but using two jit_state_t, what should
+       test possible issues with two contexts, and also validate
+       jit_tramp works on backends with function descriptions.
+
+       * check/Makefile.am: Update for new test case.
+
+2014-11-03 Paulo Andrade <pcpa@gnu.org>
+
+       * include/lightning/jit_mips.h: Do not make the t9 register
+       JIT_R11 (or JIT_R7 for n32 or n64 abi) available. Previously
+       it cause problems if one expects it to not be changed in a
+       function call. For example, calling a jit function, where it
+       really does not need to be changed.
+
+2014-10-26 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_aarch64.c, lib/jit_alpha.c, lib/jit_arm.c,
+       lib/jit_hppa.c, lib/jit_ia64.c, lib/jit_mips.c, lib/jit_ppc.c,
+       lib/jit_s390x.c, lib/jit_sparc.c, lib/jit_x86.c: Add an
+       assertion to all code generation "drivers" to ensure
+       _jitc->regarg is empty or in an expected state, after
+       translation of a lightning instruction to native code.
+       This change was a brute force test to find out other cases
+       of a temporary not being release (like was happening with
+       _bmsi and _bmci on x86), but no other case was found,
+       after running make check, with assertions enabled, on all
+       backends.
+
+2014-10-26 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_x86-cpu.c: Correct a register allocation leak in
+       _bmsi and _bmci.
+
+2014-10-25 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_disasm.c: Do not cause an fatal error if init_jit
+       fails in the jit_init_debug call.
+
+2014-10-24 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_ia64.c, lib/jit_ppc.c: Correct handling of function
+       descriptor when first prolog is a jit_tramp prolog. The
+       test case was using the same jit_context_t, so was not
+       triggering this condition.
+
+       * lib/jit_ppc-cpu.c: Properly handle jump displacements that
+       do not fit on 24 powerpc. This required changing from previous
+       "mtlr reg, blr" to "mtctr reg, bctr" to properly handle
+       the logic to "hide" function descriptors, but that would
+       also be required as the proper jit_jmpr when/if implementing
+       optimizations to leaf functions (was working with blr because
+       it is saved/reloaded in prolog/epilog).
+
+2014-10-21 Paulo Andrade <pcpa@gnu.org>
+
+       * include/lightning.h, lib/lightning.c: Add three predicates
+       to query information about labels. jit_forward_p(label)
+       will return non zero if the label is "forward", that is
+       need a call to jit_link(label), jit_indirect_p(label)
+       that returns non zero if the label was created with the
+       jit_indirect() call, and jit_target_p(label) that will
+       return non zero if there is at least one jump patched
+       to land at that label.
+
+2014-10-18 Paulo Andrade <pcpa@gnu.org>
+
+       * check/range.ok, check/range.tst: New test case designed
+       to catch incorrect code generation, usually due to incorrect
+       test of immediate size. The test checks a large amount of
+       encodings in "power of two" boundaries. This test exorcises
+       a significant amount of code paths that was previously not
+       tested.
+
+       * check/Makefile.am: Add range test to make check target.
+
+       * lib/jit_aarch64-cpu.c: Correct wrong address calculation
+       for stxi_c, stxi_s, stxi_i and stxi_l when the offset is
+       too large.
+
+        * lib/jit_mips-fpu.c: Correct wrong size test to check if
+       an immediate can be encoded in a float or double store.
+
+       * lib/jit_s390x-cpu.c: Correct inverted encoding to stxi_s
+       when the offset cannot be encoded, and fallbacks to an
+       alternate encoding in 2 instructions.
+
+2014-10-17 Paulo Andrade <pcpa@gnu.org>
+
+       * check/alu_rsb.ok, check/alu_rsb.tst: New files implementing
+       tests for jit_rsb*.
+
+       * check/Makefile.am, check/lightning.c, include/lightning.h,
+       lib/jit_aarch64-cpu.c, lib/jit_aarch64-fpu.c, lib/jit_aarch64-sz.c,
+       lib/jit_aarch64.c, lib/jit_alpha-cpu.c, lib/jit_alpha-fpu.c,
+       lib/jit_alpha-sz.c, lib/jit_alpha.c, lib/jit_arm-cpu.c,
+       lib/jit_arm-swf.c, lib/jit_arm-sz.c, lib/jit_arm-vfp.c,
+       lib/jit_arm.c, lib/jit_hppa-cpu.c, lib/jit_hppa-fpu.c,
+       lib/jit_hppa-sz.c, lib/jit_hppa.c, lib/jit_ia64-cpu.c,
+       lib/jit_ia64-fpu.c, lib/jit_ia64-sz.c, lib/jit_ia64.c,
+       lib/jit_mips-cpu.c, lib/jit_mips-fpu.c, lib/jit_mips-sz.c,
+       lib/jit_mips.c, lib/jit_names.c, lib/jit_ppc-cpu.c,
+       lib/jit_ppc-fpu.c, lib/jit_ppc-sz.c, lib/jit_ppc.c,
+       lib/jit_s390x-cpu.c, lib/jit_s390x-fpu.c, lib/jit_s390x-sz.c,
+       lib/jit_s390x.c, lib/jit_sparc-cpu.c, lib/jit_sparc-fpu.c,
+       lib/jit_sparc-sz.c, lib/jit_sparc.c, lib/jit_x86-cpu.c,
+       lib/jit_x86-sse.c, lib/jit_x86-sz.c, lib/jit_x86-x87.c,
+       lib/jit_x86.c, lib/lightning.c: Implement jit_rsb*. This
+       was a missing lightning 1.x interface, that on most
+       backends is synthesized, but on a few backends (hppa and ia64),
+       it can generate better code as on those there is, or the
+       only instruction with an immediate is in "rsb" format
+       (left operand).
+
+2014-10-17 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_names.c: New file with single definition of string
+       representation of lightning IR codes.
+
+       * size.c: Modified to append the code name in a C comment
+       after the maximum instruction size.
+
+       * lib/jit_print.c: Minor change to not duplicate jit_names.c
+       contents.
+
+       * lib/jit_aarch64-sz.c, lib/jit_alpha-sz.c, lib/jit_arm-sz.c,
+       lib/jit_hppa-sz.c, lib/jit_ia64-sz.c, lib/jit_mips-sz.c,
+       lib/jit_ppc-sz.c, lib/jit_s390x-sz.c, lib/jit_sparc-sz.c,
+       lib/jit_x86-sz.c: Rewritten to add string representation of
+       IR codes in a C comment.
+
+2014-10-14 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_aarch64-cpu.c, lib/jit_alpha-cpu.c, lib/jit_arm-cpu.c,
+       lib/jit_hppa-cpu.c, lib/jit_mips-cpu.c, lib/jit_ppc-cpu.c,
+       lib/jit_sparc-cpu.c: Implement or correct the internal
+       nop(count) call that receives an argument that tells the
+       modulo bytes to align the code for the next instruction.
+
+       * include/lightning.h, lib/lightning.c, lib/jit_aarch64.c,
+       lib/jit_alpha.c, lib/jit_arm.c, lib/jit_hppa.c, lib/jit_ia64.c,
+       lib/jit_mips.c, lib/jit_ppc.c, lib/jit_s390x.c, lib/jit_sparc.c,
+       lib/jit_x86.c: Implement the new jit_align() call that receive
+       an argument, that tells the modulo, in bytes, to align the
+       next instruction. In most backends the only value that makes
+       a difference is a value that matches sizeof(void*), as all
+       other values usually are already automatically aligned in
+       labels, but not guaranteed to be aligned at word size bytes.
+
+       * check/align.ok, check/align.tst: New files, implementing
+       a simple test for the new jit_align() interface.
+
+       * check/Makefile.am, check/lightning.c, lib/jit_aarch64-sz.c,
+       lib/jit_alpha-sz.c, lib/jit_arm-sz.c, lib/jit_hppa-sz.c,
+       lib/jit_ia64-sz.c, lib/jit_mips-sz.c, lib/jit_ppc-sz.c,
+       lib/jit_print.c, lib/jit_s390x-sz.c, lib/jit_sparc-sz.c,
+       lib/jit_x86-sz.c: Update for the new jit_code_align code and
+       the jit_align() interface.
+
+2014-10-13 Paulo Andrade <pcpa@gnu.org>
+
+       * include/lightning.h, lib/jit_size.c, size.c: Use a
+       symbolic value for the last IR code.
+
+2014-10-12 Paulo Andrade <pcpa@gnu.org>
+
+       * include/lightning.h, include/lightning/jit_private.h,
+       lib/jit_aarch64-cpu.c, lib/jit_alpha-cpu.c, lib/jit_arm-cpu.c,
+       lib/jit_hppa-cpu.c, lib/jit_ia64-cpu.c, lib/jit_mips-cpu.c,
+       lib/jit_ppc-cpu.c, lib/jit_s390x-cpu.c, lib/jit_sparc-cpu.c,
+       lib/jit_x86-cpu.c, lib/lightning.c: Implement the new
+       jit_frame and jit_tramp interfaces, that allow writing
+       trampoline like calls, where a single dispatcher jit buffer
+       is written, and later other jit buffers are created, with
+       the same stack frame layout as the dispatcher. This is the
+       logic that GNU Smalltalk used in lightning 1.x, and is required
+       to make a sane port for lighting 2.x.
+
+       * jit_ia64-cpu.c: Implement support for jit_frame and jit_tramp,
+       and also correct wrong encoding for B4 instructions, that
+       implement jmpr, as well as correct reverse logic in _jmpr,
+       that was moving the branch register to the jump register,
+       and not vice-versa.
+       Also, if a stack frame is to be assumed, always assume it may
+       call a function with up to 8 arguments, regardless of the
+       hint frame argument.
+
+       * lib/jit_arm.c: Add a new must_align_p() interface to ensure
+       function prologs are always aligned. This condition was
+       previously always true, somewhat by accident, but with
+       jit_tramp it is not guaranteed.
+
+       * jit_ia64-cpu.c: lib/jit_ppc.c: Add minor special handling
+       required to implement jit_tramp, where a function descriptor
+       should not be added before a prolog, as jit_tramp means omit
+       prolog.
+
+       * check/lightning.c: Update test driver for the new interfaces.
+
+       * check/Makefile.am, check/tramp.tst, check/tramp.ok: Add
+       a simple test and example of the jit_frame and jit_tramp
+       usage implementing a simple Fibonacci function using a
+       simulation of an interpreter stack and how it would handle
+       state in language specific variables.
+
+       * doc/body.texi: Add documentation for jit_frame and
+       jit_tramp.
+
+2014-09-29 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_aarch64.c, lib/jit_alpha.c, lib/jit_arm.c,
+       lib/jit_hppa.c, lib/jit_ia64.c, lib/jit_mips.c,
+       lib/jit_ppc.c, lib/jit_s390x.c, lib/jit_sparc.c,
+       lib/jit_x86.c, lib/lightning.c: Allow jit_jmpi on a
+       target that is not a node. This may lead to hard to
+       debug code generation, but is a required feature for
+       certain generators, like the ones that used lightning
+       1.2x. Note that previously, but not really well
+       documented, it was instructed to use:
+       jit_movi(rn, addr); jit_jmpr(rn);
+       but now, plain:
+       jit_patch_abs(jit_jmpi(), addr);
+       should also work.
+
+2014-09-24 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_x86-sz.c: Generate information about instruction
+       lengths for more precise calculation of buffer size on
+       Windows x64. This change is specially important because
+       the maximum instruction length is larger than other
+       systems, what could cause an out of bounds write on
+       special conditions without this update.
+
+2014-09-24 Paulo Andrade <pcpa@gnu.org>
+
+       * check/lightning.c: Add workaround to conflicting global
+       optind variable in cygwin binutils that have an internal
+       getopt* implementation.
+
+       * lib/jit_x86-cpu.c: Add a simple define ffsl ffs if building
+       for 32 bit and there is no ffsl function.
+
+2014-09-24 Paulo Andrade <pcpa@gnu.org>
+
+        * check/lightning.c: Add a hopefully temporary kludge to not use
+       sprintf and sscanf returned by dlsym. This is required to pass
+       the varargs test.
+
+        * include/lightning/jit_private.h: Use symbolic name for first
+       integer register argument, as this is different in sysv and
+       win64 abi.
+
+        * include/lightning/jit_x86.h: Add conditionals and definitions
+       for Windows x64 (under __CYGWIN__ preprocessor conditional).
+
+        * lib/jit_x86-cpu.c: Correct one instruction encoding bug, that
+       was working by accident. Only use rax to rdx for some byte
+       operations to work on compatibility mode (that is, to generate
+       the proper encoding, instead of actually generating encoding
+       for high byte registers, e.g. %bh).
+       Add proper prolog and epilog for windows x64.
+
+        * lib/jit_x86-sse.c: Correct a swapped rex prefix for float
+       operations.
+
+        * lib/jit_x86.c: Adjust to support Windows x64 abi.
+
+       * check/check.x87.nodata.sh: New file, previously used but that
+       was missing git.
+
+2014-09-07 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/lightning.c: Mark all registers advertised as live, as
+       per jit_callee_save_p as live whenever reaching a jump that
+       cannot be tracked. This is a rethink of the previous commit,
+       and is a better approach, otherwise there would not be much
+       sense on relying on jit_callee_save_p if it could not be
+       trusted.
+
+       * check/jmpr.tst, check/jmpr.ok: New files implementing a very
+       simple test case, that would actually cause an assertion on
+       code before the change to only mark as live when reaching a
+       jump that could not tracked, the actually advertised as callee
+       save registers.
+
+       check/Makefile.am: Update for new jmpr test case.
+
+2014-09-01 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/lightning.c: Do not mark all registers in unknown state
+       as live on jit_jmpr, or jit_jmpi to an absolute address. Instead,
+       treat it as a function call, and only consider JIT_Vn registers
+       as possibly live.
+
+2014-08-29 Paulo Andrade <pcpa@gnu.org>
+
+       * doc/body.texi: Add a proper info menu entry for
+       GNU lightning.
+
+       * doc/version.texi: Regenerate.
+
+2014-08-16 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_aarch64-cpu.c, lib/jit_aarch64-fpu.c,
+       lib/jit_arm-cpu.c, lib/jit_arm-vfp.c,
+       lib/jit_hppa-cpu.c, lib/jit_hppa-fpu.c,
+       lib/jit_ia64-cpu.c, lib/jit_ia64-fpu.c,
+       lib/jit_mips-cpu.c, lib/jit_mips-fpu.c,
+       lib/jit_ppc-cpu.c, lib/jit_ppc-fpu.c,
+       lib/jit_s390x-cpu.c, lib/jit_s390x-fpu.c,
+       lib/jit_s390x.c, lib/jit_sparc-cpu.c,
+       lib/jit_x86-cpu.c, lib/jit_x86-sse.c,
+       lib/jit_x86-x87.c: Review generation of all branch
+       instructions and always adds the jit_class_nospill
+       bitfield for temporary registers that cannot be spilled
+       because the reload would be after a conditional jump; the
+       patch only adds an extra assertion. These conditions do
+       not happen on documented lightning usage, but can happen
+       if one uses the not exported jit_get_reg and jit_unget_reg
+       calls and cause enough register starvation.
+
+2014-08-16 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_alpha.c: Correct wrong bitmask of most argument
+       float register arguments, that were being set as callee
+       save instead of argument registers class.
+
+2014-08-16 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_arm-sz.c: Regenerate table of known maximum
+       instruction sizes for the software float fallback,
+       that implements "virtual" float registers in the stack
+       and operations as calls to libgcc.
+
+       * size.c: Correct typo in the generated jit_arm-sz.c file.
+
+2014-08-10 Paulo Andrade <pcpa@gnu.org>
+
+        * include/lightning/jit_alpha.h, lib/jit_alpha-cpu.c,
+        lib/jit_alpha-fpu.c, lib/jit_alpha-sz.c, lib/jit_alpha.c:
+       New files implementing a lightning Alpha port. Thanks
+       to Trent Nelson and snakebit.net staff for providing access
+       to an Alpha system.
+
+        * check/float.tst, check/lightning.c, configure.ac,
+        include/lightning.h, include/lightning/Makefile.am,
+        include/lightning/jit_private.h, lib/Makefile.am,
+        lib/jit_disasm.c, lib/jit_size.c, lib/lightning.c:
+       Minor changes to adapt for the new Alpha port.
+
+2014-08-10 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/lightning.c: Always mark JIT_RET and JIT_FRET as
+       live in a function epilog. This is required because
+       on some ports a complex sequence, allocating one or more
+       registers, may be required to jump from a ret* to the
+       epilog, and the lightning api does not have annotations
+       to know if a function returns a value, or the type of
+       the return value.
+
+2014-08-10 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/lightning.c: Change the correct live bitmask of
+       return registers after a function call in jit_update.
+
+2014-08-10 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/lightning.c: Change assertions to have an int
+       result and correct a bad bit mask assertion.
+
+2014-08-10 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_aarch64.c: Correct bad setup for assertion
+       of consistency before a patch.
+
+2014-08-10 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_mips-cpu.c: Correct typo in the jit_bmsr
+       implementation that was using the wrong test result
+       register.
+
+2014-07-28 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_memory.c: Do not call free on NULL pointers.
+
+       * include/lightning/jit_private.h, lib/jit_note.c,
+       lib/lightning.c: Add a wrapper to memcpy and memmove
+       to not actually call those functions with a zero size
+       argument, and likely also a null src or dst.
+
+2014-07-27 Paulo Andrade <pcpa@gnu.org>
+
+       * include/lightning/jit_private.h, lib/jit_disasm.c,
+       lib/lightning.c: Remove the global jit_progname variable.
+       It was being only used in jit_init_debug, that is called
+       from init_jit, so, just pass an argument.
+
+2014-07-27 Paulo Andrade <pcpa@gnu.org>
+
+       * doc/body.texi: Add note that jit_set_memory_functions
+       should be called before init_jit, because init_jit
+       itself may call the memory wrappers.
+
+2014-04-22 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_arm.c: Do not get confused with default settings
+       if /proc is not mounted on Linux specific code path.
+
+2014-04-09 Paulo Andrade <pcpa@gnu.org>
+
+       * include/lightning/jit_aarch64.h, include/lightning/jit_arm.h,
+       include/lightning/jit_hppa.h, include/lightning/jit_ia64.h,
+       include/lightning/jit_mips.h, include/lightning/jit_ppc.h,
+       include/lightning/jit_private.h, include/lightning/jit_s390x.h,
+       include/lightning/jit_sparc.h, include/lightning/jit_x86.h:
+       Do not add jit_regset_t, JIT_RA0, and JIT_FA0 to the installed
+       header file. These types and definitions are supposed to be
+       only used internally.
+
+2014-04-05 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_arm-cpu.c: Only adjust stack pointer in prolog if
+       need stack space, that is, do not emit a nop instruction
+       subtracting zero from the stack pointer.
+
+2014-04-04 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_disasm.c: Correct a crash in the doc/printf example
+       on arm due to releasing the data_info information in
+       jit_clear_state. This is a special case for arm only, and
+       actually, only armv5 or older uses the data_info buffer,
+       or when forcing arm instruction set mode besides thumb
+       available.
+
+2014-12-03 Paulo Andrade <pcpa@gnu.org>
+
+       * doc/body.texi: Write detailed description and examples for
+       jit_get_memory_functions, jit_set_memory_functions,
+       jit_get_code, jit_set_code, jit_get_data and jit_set_data.
+
+2014-12-03 Paulo Andrade <pcpa@gnu.org>
+
+       * include/lightning.h, include/lightning/jit_private.h,
+       lib/lightning.c: Implement the new jit_set_data() interface,
+       and the new jit_get_data() helper. Like jit_set_code(),
+       jit_realize() should be called before jit_set_data().
+       The most common usage should be jit_set_data(JIT_DISABLE_DATA
+       | JIT_DISABLE_NOTE), to force synthesize any float/double
+       constant in the stack and not generate any debug information.
+
+       * lib/jit_note.c: Minor change to debug note generation as
+       now it uses an alternate temporary data buffer during constants
+       and debug generation to accommodate the possibility of the user
+       setting an alternate data buffer.
+
+       * lib/jit_hppa-fpu.c, lib/jit_s390x.c, lib/jit_s390x-cpu.c,
+       lib/jit_s390x-fpu.c, lib/jit_sparc.c, lib/jit_sparc-fpu.c,
+       lib/jit_x86-sse.c, lib/jit_x86-x87.c: Implement jit_set_data.
+
+       * lib/jit_hppa-sz.c, lib/jit_sparc-sz.c, lib/jit_x86-sz.c,
+       lib/jit_s390x-sz.c: Update for several instructions that now
+       have a different maximum length due to jit_set_data.
+
+       * lib/jit_mips-fpu.c: Implement jit_set_data, but missing
+       validation on n32 and n64 abis (and/or big endian).
+
+       * lib/jit_mips-sz.c: Update for changes in o32.
+
+       * lib/jit_ppc-fpu.c: Implement jit_set_data, but missing
+       validation on Darwin PPC.
+
+       * lib/jit_ppc-sz.c: Update for changes in powerpc 32 and
+       64 bit.
+
+       * lib/jit_ia64-fpu.c: Implement untested jit_set_data.
+
+       * TODO: Add note to list ports that were not tested for the
+       new jit_set_data() feature, due to no longer having access
+       to them.
+
+       * check/nodata.c: New file implementing a simple test exercising
+       several different conditions created by jit_set_data().
+
+       * check/check.nodata.sh: New file implementing a wrapper
+       over the existing *.tst files, that runs all tests without
+       using a data buffer for constants; only meaningful (and
+       enabled) on architectures that used to store float/double
+       constants on a read only data buffer.
+
+       * configure.ac, check/Makefile.am: Update for the new test
+       cases.
+
+       * check/lightning.c: Implement the new "-d" option that
+       sets an internal flag to call jit_set_data() disable
+       constants and debug, that is, using only a pure code
+       buffer.
+
+2014-11-03 Paulo Andrade <pcpa@gnu.org>
+
+       * include/lightning.h, include/lightning/jit_private.h,
+       lib/lightning.c: Implement the new jit_set_code() interface,
+       that allows instructing lightning to use an alternate code
+       buffer. The new jit_realize() function should be called
+       before jit_set_code(), and usually call jit_get_code()
+       to query the amount of bytes expected to be required for
+       the code.
+
+       * lib/jit_size.c: Minor update to have less chances of
+       miscalculating the code buffer by starting the counter
+       with the size of the longest instruction instead of zero,
+       as code emit fails if at any moment less than the longest
+       instruction bytes are available.
+
+       * check/setcode.c: New file implementing some basic tests
+       of the new jit_set_code() interface.
+
+       * check/Makefile.am: Update for newer test case.
+
+2014-06-03 Paulo Andrade <pcpa@gnu.org>
+
+       * include/lightning.h, lib/lightning.c: Add the new
+       jit_indirect() call, that returns a special label node,
+       and tells lightning that the label may be the target of
+       an indirect jump.
+
+       * doc/body.texi: Document the new jit_indirect() call, and
+       add examples of different ways to create labels and branches.
+
+2014-23-02 Paulo Andrade <pcpa@gnu.org>
+
+       *  lib/jit_x86.c: Rewrite previous patch to inline save/restore
+       because clobbering %ebx in x86 is treated as an error
+       (jit_x86.c:239:5: error: PIC register clobbered by 'ebx' in 'asm').
+
+2014-19-02 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_x86.c: Rewrite incorrect inline assembly that could
+       truncate a variable in a callee save register. Now it simply
+       tells gcc that the register is clobbered, instead of using a
+       *32 bit* swap with a temporary variable. The problem only
+       happens when compiling with optimization.
+
+2014-19-02 Paulo Andrade <pcpa@gnu.org>
+
+       * include/lightning/jit_aarch64.h, include/lightning/jit_arm.h,
+       include/lightning/jit_hppa.h, include/lightning/jit_ia64.h,
+       include/lightning/jit_mips.h, include/lightning/jit_ppc.h,
+       include/lightning/jit_s390x.h, include/lightning/jit_sparc.h,
+       include/lightning/jit_x86.h: Change jit_regset_t to an
+       unsigned type, to allow safe right shift.
+
+       * lib/lightning.c: Rewrite jit_regset_scan1 to allow easier
+       compiler optimization.
+
+2013-12-03 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_x86-x87.c: Correct wrong optimization when
+       loading the log(2) constant.
+
+2013-12-03 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_x86-cpu.c: Use the emms instruction before
+       calling any function. This is particularly important
+       when using c99 complex functions as it can easily
+       overflow the x87 stack due to the way lightning uses
+       the x87 stack as a flat register file.
+
+2013-12-02 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_x86-x87.c: Correct wrong code generation due
+       to comparing the base and not the value register with
+       %st(0) in stxi_f.
+
+2013-12-02 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_x86-x87.c, lib/jit_x86.c: Use 8 bytes aligned
+       stack offset for float/double x87 to/from sse move.
+
+2013-11-27 Paulo Andrade <pcpa@gnu.org>
+
+       * configure.ac, lib/jit_arm-swf.c, lib/jit_arm.c: Add
+       changes that should at least allow building lightning
+       on Apple iOS7.
+
+2013-10-08 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_ppc-cpu.c: Correct wrong shortcut for ldxi_l with
+       a zero offset, that was calling ldr_i instead of ldr_l.
+
+2013-10-08 Paulo Andrade <pcpa@gnu.org>
+
+       * include/lightning/jit_arm.h, lib/jit_arm-cpu.c: Do not use
+       by default load/store instructions that map to ldrt/strt.
+       There is already the long displacement version for positive
+       offsets, and when using a (shorter) negative offset it does
+       not map to ldrt/strt. At least on qemu strt may cause
+       reproducible, but unexpected SIGILL.
+
+2013-10-08 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_arm-vfp.c: Correct wrong load/store offset
+       calculation when the displacement is constant but too
+       large to use an instruction with an immediate offset.
+
+2013-10-07 Paulo Andrade <pcpa@gnu.org>
+
+       * check/self.c: Extend tests to validate jit_callee_save_p
+       does not cause an assertion on valid arguments, and test
+       extra registers defined on some backends.
+
+       * configure.ac: Do not ignore environment CFLAGS when
+       checking if need to test runtime configurable options,
+       like use x87 when sse2 is available, arm instruction set
+       instead of thumb, etc.
+
+       * include/lightning/jit_arm.h: Correct wrong jit_f macro
+       definition.
+
+       * include/lightning/jit_ia64.h, include/lightning/jit_ppc.h: 
+       Correct wrong jit_r macro definition.
+
+       * lib/jit_x86-x87.c, lib/jit_x86.c: Actually use the
+       reserved stack space for integer to/from float conversion.
+       The stack space was also changed to ensure it is 8 bytes
+       aligned. Also, for Solaris x86 in 32 bit mode, an alternate
+       truncr_d was implemented because for some reason it is
+       failing with SIGILL if using the "fisttpl" instructions,
+       that must be available on p6 or newer, but for the sake of
+       making all tests pass, implement a 486 or newer sequence
+       if "sun" is defined.
+
+2013-10-03 Paulo Andrade <pcpa@gnu.org>
+
+       * include/lightning/jit_mips.h, lib/jit_mips-cpu.c,
+       lib/jit_mips-sz.c, lib/jit_mips.c, size: Build and
+       pass all test cases on Irix big endian mips using
+       the 64 bit abi.
+
+2013-10-02 Paulo Andrade <pcpa@gnu.org>
+
+       * include/lightning/jit_mips.h: Add proper mips abi detection.
+
+2013-09-30 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_print.c: Do not crash if calling jit_print from
+       gdb before actually emitting code.
+
+       * lib/lightning.c: Correct misplaced check for already
+       visited blocks on conditional branches, what was preventing
+       proper merge live bit masks of forward blocks.
+
+2013-09-30 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_x86-cpu.c: Correct not properly tested case of using
+       %r12 as index register, what was causing an invalid assertion.
+       %r12 is mapped to the "extra" JIT_R3 register, and test cases
+       only test "standard" lightning registers.
+
+2013-09-28 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_ia64.c: Minor change to force collecting the maximum
+       instruction length in the --enable-devel-get-jit-size build
+       mode. The actual generated file did not change because the
+       sampling was large enough that it had already collected proper
+       information in the previously slightly buggy code (not forcing
+       a sync of the instructions that could be combined).
+
+2013-09-27 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_arm.c: Correct build when disassembler is
+       disabled.
+
+2013-09-25 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_ia64-cpu.c, lib/jit_ia64-fpu.c: Correct some
+       off by one range checks (that were only accepting values
+       one less than the maximum allowed) and an invalid test
+       condition check that was forcing it to always use
+       indirect jumps even when reachable with an immediate
+       displacement.
+
+2013-09-24 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_aarch64-sz.c, lib/jit_arm-sz.c, lib/jit_hppa-sz.c,
+       lib/jit_ia64-sz.c, lib/jit_mips-sz.c, lib/jit_ppc-sz.c,
+       lib/jit_s390x-sz.c, lib/jit_size.c, lib/jit_sparc-sz.c,
+       lib/jit_x86-sz.c: New files implementing static tables
+       with longest known instructions length generated to match
+       a lightning instruction. These tables should make it easier
+       to make it very unlikely to ever miscalculate, or by too
+       much, the size of a code buffer.
+
+       * lib/jit_size.c: New file that aids to either collect
+       jit code size information, or use the information depending
+       on build options.
+
+       * size.c: New helper file that parses input for, and create
+       an initial jit_$arch-sz.c file, that needs some minor edit
+       for arches with multiple configurations.
+
+       * configure.ac, Makefile.am: Add the new, devel mode only
+       --enable-devel-get-jit-size configure option, that sets
+       compile time flags to collect jit code size information,
+       that will be used as input for the "noinst size program".
+
+       * lib/jit_aarch64.c, lib/jit_arm.c, lib/jit_disasm.c,
+       lib/jit_hppa.c, lib/jit_ia64.c, lib/jit_memory.c,
+       lib/jit_mips.c, lib/jit_ppc.c, lib/jit_s390x.c,
+       lib/jit_sparc.c, lib/jit_x86.c, lib/lightning.c: Minor
+       changes for the --enable-devel-get-jit-size build mode,
+       as well as the "production build mode" with jit code
+       size information.
+
+2013-09-14 Paulo Andrade <pcpa@gnu.org>
+
+       * include/lightning.h, lib/lightning.c: Add the new
+       jit_pointer_p interface, that returns a boolean value
+       telling if the pointer argument is inside the jit
+       code buffer. This is useful to avoid the need to add
+       extra labels and calls to jit_address to figure bounds
+       of code buffer, and still keep internal data private.
+
+2013-09-13 Paulo Andrade <pcpa@gnu.org>
+
+       * include/lightning.h, include/lightning/jit_private.h,
+       lib/jit_note.c: Change the code argument of jit_get_note
+       to a jit_pointer_t and make jit_get_note a public interface.
+       It was intended so since start, as a way to map an offset
+       in the code to a function name, file name and line number
+       mapping.
+
+2013-09-11 Paulo Andrade <pcpa@gnu.org>
+
+       * doc/body.texi: Correct reversed arguments in example of
+       usage in a (possibly) multi threaded, multiple jit_state_t
+       environments.
+
+       * include/lightning/jit_arm.h, include/lightning/jit_private.h,
+       lib/jit_arm-cpu.c, lib/jit_arm.c: Make a previously, non
+       documented, global state private to the related jit_state_t
+       generating code.
+
+2013-09-10 Paulo Andrade <pcpa@gnu.org>
+
+       * check/self.c, check/self.ok: New files implementing simple
+       consistency check assertions. At first validating some macros
+       that use values from different sources agree.
+
+       * check/Makefile.am: Update for the new test case.
+
+       * include/lightning.h,  lib/lightning.c: Add the new
+       jit_callee_save_p() call, that is intended to be used when
+       writing complex code using lightning, so that one does not
+       need to verify what backend is being used, or have access to
+       private data, to query if a register is callee save or not;
+       on several backends the scratch registers are actually callee
+       save.
+
+       * include/lightning/jit_aarch64.h, include/lightning/jit_arm.h,
+       include/lightning/jit_hppa.h, include/lightning/jit_mips.h,
+       include/lightning/jit_ppc.h, include/lightning/jit_sparc.h,
+       include/lightning/jit_x86.h: Add an explicit definition for
+       JIT_R3-JIT_Rn, JIT_V3-JIT_Vn and JIT_F6-JIT_Fn when applicable.
+       This allows one to write code based on "#if defined(JIT_XN)"
+       and therefore, not need to check what is the current backend
+       or have access to private data structures. This is particularly
+       useful when writing virtual machines with several specialized,
+       global registers.
+
+       * lib/jit_ia64.c: Properly flag the callee save general
+       purpose registers as such, so that jit_callee_save_p() works
+       as intended.
+
+2013-09-10 Paulo Andrade <pcpa@gnu.org>
+
+       * check/lightning.c, configure.ac: Conditionally use the
+       code written to workaround a bug in the Hercules emulator,
+       as isnan and isinf are not available at least on HP-UX ia64.
+
+2013-09-10 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_s390x-cpu.c: Spill/reload correct callee save
+       float registers.
+
+2013-09-10 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_hppa-cpu.c: Correct code to call a function stored
+       in a register or a patched function address.
+
+2013-09-10 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_ia64-cpu.c: Correct incorrect logic when restoring
+       the value of the "r2" callee save register.
+
+2013-08-29 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_arm-cpu.c, lib/jit_arm.c: Correct wrong test and update
+       of the thumb offset information, when checking if needing to
+       patch a jump from arm to thumb mode. The problem would happen when
+       remapping the code buffer, and the new address being lower than
+       the previous one.
+
+2013-08-26 Paulo Andrade <pcpa@gnu.org>
+
+       * configure.ac: Extend FreeBSD test to also handle NetBSD.
+
+       * lib/jit_x86-cpu.c: Correct wrongly defined offset type of
+       ldxi_ui. Problem detected when building on NetBSD.
+
+       * lib/lightning.c: Adjust code to handle NetBSD mremap,
+       where arguments do not match Linux mremap.
+
+2013-08-26 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_ppc.c: Correct C sequence point problem miscalculating
+       the actual function address in a function descriptor. Problem
+       happens with gcc 4.8.1 at least.
+
+2013-08-11 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_s390x-cpu.c: Correct code checking if immediate
+       fits instruction, but using the negated value.
+
+2013-07-28 Paulo Andrade <pcpa@gnu.org>
+
+       * include/lightning/jit_s390x.h, lib/jit_s390x-cpu.c,
+       lib/jit_s390x-fpu.c, lib/jit_s390x.c: New files
+       implementing the new s390x port.
+
+       * configure.ac, include/lightning.h,
+       include/lightning/Makefile.am,
+       include/lightning/jit_private.h,
+       lib/Makefile.am, lib/jit_disasm.c, lib/lightning.c:
+       Minor adaptation for the new s390x backend.
+
+       * check/float.tst: Update for the s390x result of
+       truncating +Inf to integer.
+
+       * check/qalu_mul.tst: Add extra test cases to better test
+       high word of signed multiplication as the result is
+       adjust from unsigned multiplication on s390x.
+
+2013-07-28 Paulo Andrade <pcpa@gnu.org>
+
+       * check/lightning.c: Do not assume casting a double NaN or
+       Inf to float will produce the expected float NaN or Inf.
+       This is not true at least under s390x.
+
+2013-07-28 Paulo Andrade <pcpa@gnu.org>
+
+       * check/check.arm.sh, check/check.sh, check/check.swf.sh,
+       check/check.x87.sh: Properly check test programs output,
+       not just rely on the test program self testing the results
+       and not crashing.
+
+2013-07-28 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_aarch64.c: Remove unused macros left from cut&paste
+       of jit_arm.c.
+
+2013-07-16 Paulo Andrade <pcpa@gnu.org>
+
+       * include/lightning/jit_aarch64.h, lib/jit_aarch64-cpu.c,
+       lib/jit_aarch64-fpu.c, lib/jit_aarch64.c: New files
+       implementing the new aarch64 port, as a new architecture,
+       not as an expansion of the existing armv[4-7] port.
+
+       * check/lightning.c: Add aarch64 support and a small
+       change to recognize character constants as immediate
+       values.
+
+       * check/float.tst: Add aarch64 preprocessor conditionals
+       to select proper expected value when converting [+-]Inf
+       and NaN to integer.
+
+       * include/lightning/jit_arm.h, lib/jit_arm.c: Minor changes
+       to better match the new aarch64 files.
+
+       * configure.ac, include/lightning.h,
+       include/lightning/Makefile.am, include/lightning/jit_private.h,
+       lib/Makefile.am, lib/lightning.c: Minor adjustments
+       for the aarch64 port.
+
+2013-07-08 Paulo Andrade <pcpa@gnu.org>
+
+       * NEWS, THANKS, configure.ac, doc/version.texi: Update for
+       the 1.99a second alpha release.
+
+2013-06-25 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_mips.c: Correct cut&paste error that caused wrong
+       stack offset calculation for double arguments in stack in
+       the o32 abi.
+       Correct typo in the __LITTLE_ENDIAN macro name, that came
+       from cut&paste error in the original typo in lib/jit_ppc.c.
+
+       * lib/jit_ia64.c, lib/jit_ppc.c: Correct typo in the
+       __LITTLE_ENDIAN macro name.
+
+2013-06-22 Paulo Andrade <pcpa@gnu.org>
+
+       * check/lightning.c, configure.ac, include/lightning.h,
+       lib/lightning.c: Add tests and quirks to build/detect
+       and/or work on Irix.
+
+       * include/lightning/jit_mips.h, lib/jit_mips-cpu.c,
+       lib/jit_mips-fpu.c, lib/jit_mips.c: Adapt code to run
+       in big endian mips, using the n32 abi.
+
+2013-06-18 Paulo Andrade <pcpa@gnu.org>
+
+       * include/lightning.h: Minor extra preprocessor testing
+       to "detect" byte order on x86 solaris, that now builds
+       and pass all test cases.
+
+2013-06-18 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_sparc-cpu.c: Correct compiler warning of value
+       used before assignment. The usage is bogus as the api
+       requires always patching jumps, but the random value used
+       could cause an assertion due to invalid displacement.
+
+       * lib/jit_sparc.c: Always load and store double arguments
+       in stack as 2 float loads or stores, for safety, as unaligned
+       access is not allowed in Sparc Solaris.
+
+2013-06-14 Paulo Andrade <pcpa@gnu.org>
+
+       * configure.ac: Force -mlp64 to CFLAGS on HP-UX ia64 port.
+       It is the only supported mode, and expects gcc as C compiler.
+
+       * include/lightning.h, lib/jit_ia64-cpu.c, lib/jit_ia64.c:
+       Correct ia64 port to work on HP-UX that runs it in big endian
+       mode.
+
+2013-06-10 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_hppa.c: Sanitize the cache synchronization inline
+       assembly code that was doing twice the work and redundantly
+       flushing the end address every loop iteration.
+
+2013-06-09 Paulo Andrade <pcpa@gnu.org>
+
+       * configure.ac, check/Makefile.am, doc/Makefile.am: Do not
+       explicitly link to -ldl, but instead autodetect the library
+       with dlopen, dlsym, etc.
+
+       * check/lightning.c: Add workaround to apparently buggy
+       getopt in HP-UX that sets optind to the wrong index, and
+       use RTLD_NEXT on HP-UX instead of RTLD_DEFAULT to dlsym
+       global symbols.
+
+       * include/lightning.h: Rework definitions of wordsize and
+       byte order to detect proper values on HP-UX.
+
+       * lib/lightning.c: Minor correction to use MAP_ANONYMOUS
+       instead of MAP_ANON on HP-UX.
+
+       * lib/jit_hppa.c: Float arguments must be passed on integer
+       registers on HP-UX, not only for varargs functions.
+         Add code to properly clear instruction cache. This was
+       not required on Debian hppa port, but may have been working
+       by accident.
+
+       * lib/jit_hppa-cpu.c: Follow pattern of HP-UX binaries and
+       use bve,n instead of bv,n to return from functions.
+
+       * lib/jit_hppa-fpu.c: For some reason "fst? frX,rX,(rY)" did
+       not work on the tested computer (HP-UX B.11.23 U 9000/785 HP-UX)
+       so the code was changed, at first for __hpux only to add the
+       base and offset register and use the instruction with an
+       immediate (zero) offset.
+
+2013-06-07 Paulo Andrade <pcpa@gnu.org>
+
+       * check/lightning.c, lib/jit_disasm.c, lib/jit_ppc-cpu.c,
+       lib/jit_ppc-fpu.c, lib/jit_ppc.c, include/lightning.h,
+       include/lightning/jit_ppc.h, include/lightning/jit_private.h:
+       Adapt code to work on 32 bit AIX ppc using gcc. Most changes
+       are basically to adapt the elf64 logic to 32 bit, as it does
+       not use the same convention of 32 bit Darwin ppc.
+
+       * check/stack.tst: Add a fake memcpy function to the test
+       case if running under AIX, as it is not available to dlsym.
+
+       * configure.ac: Check for getopt.h header, not available in
+       AIX.
+
+2013-06-01 Paulo Andrade <pcpa@gnu.org>
+
+       * include/lightning/jit_hppa.h, lib/jit_hppa-cpu.c,
+       lib/jit_hppa-fpu.c, lib/jit_hppa.c: New files implementing
+       the hppa port. Built on Debian Linux PA-RISC 2.0, 32 bit.
+
+       * check/float.tst: Add preprocessor for hppa expected
+       values when converting NaN and +-Inf to an integer.
+
+       * check/ldst.inc: Ensure double load/store tests use an
+       8 byte aligned address by default.
+
+       * lib/lightning.c: Correct a bug found during tests in
+       the new port, where qmul* and qdiv* were not properly
+       setting one of the result registers as modified in the
+       function, what would be a problem if the only "write"
+       usage were the qmul* or qdiv*.
+
+       * check/varargs.tst, check/varargs.ok: Add one extra
+       interleaved integer/double test to validate proper code
+       generation in the extra case.
+
+       * check/lightning.c, configure.ac, include/lightning.h,
+       include/lightning/Makefile.am,
+       include/lightning/jit_private.h, lib/Makefile.am,
+       lib/jit_disasm.c: Update for the hppa port.
+
+2013-04-27 Paulo Andrade <pcpa@gnu.org>
+
+       * check/varargs.tst: Correct misplaced .align directive
+       that was causing the double buffer to not be aligned at
+       8 bytes.
+       * lib/jit_ia64-cpu.c:
+         Properly implement abi for excess arguments passed on
+       stack.
+         Simplify load/store with immediate displacement argument
+       with zero value.
+         Simplify some calls to "subi" changing to "addi" with
+       a negative argument.
+         Remove some #if 0'ed code, that could be useful in
+       special conditions, but the most useful one would be
+       to "optimize" "static" jit functions, but for the sake
+       of simplicity, jit functions are implemented in a way
+       that can be passed back to C code as C function pointers.
+         Add an attribute to prototypes of several unused functions.
+       These functions are defined for the sake of implementing all
+       Itanium documented instructions, but a significant amount of
+       them is not used by lightning.
+       * lib/jit_ia64-fpu.c: Simplify load/store with zero immediate
+       displacement and add unused attribute for functions not used
+       by lightning, but required to provide macros implementing all
+       Itanium documented instructions.
+       * lib/jit_ia64.c: Update for the properly implemented abi
+       for stack arguments.
+       * lib/lightning.c: Mark an unused function as such.
+
+2013-04-27 Paulo Andrade <pcpa@gnu.org>
+
+       lib/jit_ia64-cpu.c:
+         Correct immediate range check of integer comparisons when
+       inverting arguments.
+         Correct gei_u that was not decrementing immediate when
+       inverting arguments.
+         Correct b?add* and b?sub* that were not properly updating
+       the result register.
+
+2013-04-27 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_ia64-cpu.c: Correct wrong mapping of 2 instructions
+       in "M-, stop, M-, stop" translation, that was ignoring the
+       last stop (implemented as a nop I- stop).
+
+       * lib/jit_ia64-fpu.c: Properly implement fnorm.s and fnorm.d,
+       as well as the proper integer to float or double conversion.
+
+2013-04-27 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_ia64-cpu.c: Correct bogus implementation of ldr_T
+       for signed integers, that was using ld1.s, ld2.s and ld4.s.
+       The ".s" stands for speculative load, not sign extend.
+
+       * lib/jit_ia64-fpu.c: Correct bogus implementation of ldxr_T
+       for float and double. The third (actually, second) argument
+       is indeed added to the base register, but the base register
+       is modified. The actual M7 implementation was already correct,
+       just the ldxr_f and ldxr_d implementation that was kept in
+       a prototype state, misinterpreting what M7 does.
+
+2013-04-27 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_ia64-cpu.c: Correct X2 pattern matching by preventing
+       it to attempt to require a stop between the L and the X
+       instruction; that is, check the registers and predicates
+       before emitting the L instruction, not after.
+
+       * lib/jit_ia64-fpu.c: Slightly simplify and correct
+       divr_f and divrd_d implementation.
+
+       * check/lightning.c: Add __ia64__ preprocessor define
+       on Itanium. 
+
+       * check/alu.inc, check/clobber.tst, check/float.tst: Define
+       several macros conditionally to __ia64__. This is required
+       because __ia64__ jit generation can use way too many memory,
+       due to not implementing instruction reordering to avoid
+       as much as possible "stops", what causes way too many nops
+       to be generated, as well as the fact that division and
+       remainder requires function calls, and float division
+       requires significant code to implement.
+
+2013-04-27 Paulo Andrade <pcpa@gnu.org>
+
+       * include/lightning.h: Add new backend specific movr_w_d,
+       movr_d_w and movi_d_w codes as helpers to ia64 varargs
+       functions arguments.
+
+       * lib/jit_ia64-cpu.c:
+         Correct wrong encoding of A5 small integers.
+         Correct define of "mux" instruction modifiers.
+         Correct ordering of arguments and predicates of cmp_xy
+       implementation with immediate arguments; like most other
+       codes with an immediate, the immediate is the second, not
+       the third argument.
+
+       * lib/jit_ia64-fpu.c: Actual implementation of the code
+       to move to/from gpr to/from fpr, to implement varargs abi.
+
+       * lib/jit_ia64.c: Make fpr argument registers not allocatable
+       as temporaries, no need for the extra checks when there are
+       plenty registers.
+
+       * lib/jit_print.c, lib/lightning.c: Minor updates for the
+       new movr_w_d, movr_d_w and movi_d_w codes.
+
+2013-04-26 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_ia64-cpu.c, lib/jit_ia64-fpu.c: Correct code to
+       also insert a stop to break an instruction group if a
+       register is written more than once in the same group.
+       This may happen if a register is argument and result of
+       some lightning call (not a real instruction). The most
+       common case should be code in the pattern:
+               movl rn=largenum
+               ...
+               mov rn=smallnum
+       where "rn" would end up holding "largenum".
+       But the problem possibly could happen in other circumstances.
+
+2013-04-26 Paulo Andrade <pcpa@gnu.org>
+
+       * include/lightning/jit_ia64.h, lib/jit_ia64-cpu.c,
+       lib/jit_ia64-fpu.c, lib/jit_ia64.c:
+         Relocate JIT_Rn registers to the local registers, as, like
+       float registers, div/rem and sqrt are implemented as function
+       calls, and may overwrite non saved scratch registers.
+         Change patch_at to receive a jit_code_t instead of a
+       jit_node_t, so that it is easier to "inline" patches when
+       some instruction requires complex code to implement, e.g.
+       uneq and ltgt.
+         Correct arguments to FMA and FMA like instructions that,
+       due to a cut&paste error were passing the wrong argument
+       to the related F- implementation function.
+         Rewrite ltgt to return the proper result if one (or both)
+       of the arguments is unordered.
+
+2013-04-26 Paulo Andrade <pcpa@gnu.org>
+
+       * include/lightning/jit_ia64.h, include/lightning/jit_private.h,
+       lib/jit_ia64-cpu.c, lib/jit_ia64-fpu.c, lib/jit_ia64.c,
+       lib/lightning.c: Rework code to detect need of a "stop" to
+       also handle predicates, as if a predicate is written, it
+       cannot be read in the same instruction group.
+         Use a single jit_regset_t variable for all registers when
+       checking need for a stop (increment value by 128 for
+       float registers).
+         Correct wrong "subi" implementation, as the code executed
+       is r0=im-r1, not r0=r1-im.
+         Use standard lightning 6 fpr registers, and rework to
+       use callee save float registers, that may be spill/reloaded
+       in prolog/epilog. This is required because some jit
+       instructions implementations need to call functions; currently
+       integer div/mod and float sqrt, what may change the value of
+       scratch float registers.
+         Rework point of "sync" of branches that need to return a
+       patch'able address, because the need for a "stop" before a
+       predicate read causes all branches to be the instruction
+       in slot 0, as there is no template to "stop" and branch
+       in the same instruction "bundle".
+
+2013-04-25 Paulo Andrade <pcpa@gnu.org>
+
+       * include/lightning/jit_ia64.h, lib/jit_ia64-cpu.c,
+       lib/jit_ia64-fpu.c, lib/jit_ia64.c: New files implementing
+       the basic infrastructure of an Itanium port. The code
+       compiles and can generate jit for basic hello world like
+       functions.
+
+       * check/lightning.c, configure.ac, include/lightning.h,
+       include/lightning/Makefile.am, include/lightning/jit_private.h,
+       lib/Makefile.am, lib/lightning.c: Update for the Itanium
+       port.
+
+       * lib/jit_mips-cpu.c, lib/jit_mips.c: Correct typo and
+       make the jit_carry register local to the jit_state_t.
+       This matches code reviewed in the Itanium port, that
+       should use the same base logic to handle carry/borrow.
+
+2013-04-10 Paulo Andrade <pcpa@gnu.org>
+
+       * include/lightning/jit_private.h, lib/jit_arm.c,
+       lib/jit_mips-cpu.c, lib/jit_mips.c, lib/jit_ppc-cpu.c,
+       lib/jit_ppc.c, lib/jit_print.c, lib/jit_sparc-cpu.c,
+       lib/jit_sparc.c, lib/jit_x86-cpu.c, lib/jit_x86.c,
+       lib/lightning.c: Change all jit_regset macros to take
+       a pointer argument, to avoid structure copies when
+       adding a port to an architecture with more than 64
+       registers.
+
+2013-04-08 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_arm.c, lib/jit_ppc.c: Do not rely on __clear_cache
+       aligning to the next page boundary the end argument. It may
+       actually truncate it.
+
+2013-03-29 Paulo Andrade <pcpa@gnu.org>
+
+       * include/lightning/jit_private.h, lib/jit_arm.c, lib/jit_memory.c,
+       lib/jit_mips.c, lib/jit_ppc.c, lib/jit_sparc.c, lib/jit_x86.c,
+       lib/lightning.c: Do not start over jit generation if can grow
+       the code buffer with mremap without moving the base pointer.
+
+2013-03-29 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_memory.c: Implement a simple memory allocation wrapper
+       to allow overriding calls to malloc/calloc/realloc/free, as well
+       as ensuring all memory containing pointers is zero or points to
+       allocated memory.
+
+       * include/lightning.h, include/lightning/jit_private.h: Definitions
+       for the memory allocation wrapper.
+
+       * lib/Makefile.am: Update for new jit_memory.c file.
+
+       * lib/jit_arm.c, lib/jit_disasm.c, lib/jit_mips.c, lib/jit_note.c,
+       lib/jit_ppc.c, lib/jit_sparc.c, lib/jit_x86.c, lib/lightning.c:
+       Use the new memory allocation wrapper code.
+
+2013-03-22 Paulo Andrade <pcpa@gnu.org>
+
+       * configure.ac, include/lightning/jit_private.h, lib/lightning.c:
+       Remove dependency on gmp. Only a simple bitmap was required, and
+       that was not enough reason to force linking to gmp and possible
+       complications caused by it.
+
+2013-03-10 Paulo Andrade <pcpa@gnu.org>
+
+       * include/lightning.h: Add check for __powerpc__ defined
+       in Linux, while Darwin defines __ppc__.
+
+       * include/lightning/jit_ppc.h: Adjust register definitions
+       for Darwin 32 bit and Linux 64 bit ppc usage and/or ABI.
+
+       * include/lightning/jit_private.h: Add proper check for
+       Linux __powerpc__ and an data definition for an workaround
+       to properly handle code that starts with a jump to a "main"
+       label.
+
+       * lib/jit_disasm.c: Add extra disassembler initialization
+       for __powerpc64__.
+
+       * lib/jit_ppc-cpu.c: Add extra macros and functions, and
+       correct/adapt previous ones to handle powerpc64.
+
+       * lib/jit_ppc-fpu.c: Adapt for 64 bit wordsize. Basically
+       add conversion from/to int32/int64 and proper handling of
+       load/store offsets too large for 32 bit.
+
+       * lib/jit_ppc.c: Add calls to 64 bit codes and adaptation
+       for the PowerPC 64 bit Linux ABI.
+
+       * lib/jit_arm.c, lib/jit_mips.c, lib/jit_sparc, lib/jit_x86.c,
+       lib/lightning.c: Correct off by one error when restarting jit
+       of a function due to finding too late that needs to spill/reload
+       some register. Problem was found by accident on a very special
+       condition during PowerPC 64 code adaptation.
+
+2013-03-08 Paulo Andrade <pcpa@gnu.org>
+
+       * check/lightning.c: Add missing ppc preprocessor definition.
+
+2013-03-06 Paulo Andrade <pcpa@gnu.org>
+
+       * check/float.tst: Comment out the int to negative infinity
+       test in mips for the moment because not all Loongson agrees
+       on the result.
+
+       * lib/jit_disasm.c: Add a test instead of an assertion
+       when loading symbols for disassembly due to a failure with
+       a simple binutils build in Debian mipsel64.
+
+2013-03-06 Paulo Andrade <pcpa@gnu.org>
+
+       * include/lightning/jit_private.h, lib/jit_arm-cpu.c,
+       lib/jit_arm.c, lib/jit_disasm.c, lib/jit_mips-cpu.c,
+       lib/jit_mips.c, lib/jit_note.c, lib/jit_ppc-cpu.c,
+       lib/jit_ppc.c, lib/jit_print.c, lib/jit_sparc-cpu.c,
+       lib/jit_sparc.c, lib/jit_x86-cpu.c, lib/jit_x86.c,
+       lib/lightning.c: Add an extra structure for data storage
+       during jit generation, and release it after generating
+       jit, to reduce a bit memory usage, and also to make it
+       easier to understand what data is available during
+       jit runtime.
+
+2013-03-06 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/lightning.c: Make data and code buffer readonly.
+
+2013-02-20 Paulo Andrade <pcpa@gnu.org>
+
+       * doc/body.texi: Fool proof validate the examples of what
+       an assembly-language programmer would write and correct the
+       wrong sparc example.
+
+2013-02-19 Paulo Andrade <pcpa@gnu.org>
+
+       * doc/body.texi: Add back the SPARC code generation example.
+
+2013-02-19 Paulo Andrade <pcpa@gnu.org>
+
+       * check/lightning.c: Remove state flag to work with partial
+       sparc port, by just disassembling if there was incomplete
+       code generation.
+
+       * jit_sparc-cpu.c: Correct wrong range check for immediate
+       integer constants (off by one bit shift).
+         Correct macro implementing equivalent "rd %y, rd" assembly.
+         Implement qmul* and qdiv*.
+
+       * jit_sparc.c: Update for qmul* and qdiv* and remove logic
+       to handle incomplete code generation during sparc port.
+
+2013-02-18 Paulo Andrade <pcpa@gnu.org>
+
+       * check/float.tst: Add sparc to list of known NaN and +-Inf
+       to integer conversion.
+
+       * check/lightning.c: Define __sparc__ to preprocessor in
+       the sparc backend.
+
+       * include/lightning/jit_private.h: Correct wrong definition
+       of emit_stxi_d, that has lived for a long time, but would
+       cause problems whenever needing to spill/reload a float
+       register.
+
+       * include/lightning/jit_sparc.h: Can only use %g2,%g3,%g4
+       for scratch variables, as other "global" registers are
+       reserved for the system, e.g. libc.
+         Reorder float register naming to make it easier to
+       access odd float registers, so that generating code for
+       pusharg and getarg is easier for the IR.
+
+       * lib/jit_mips-cpu.c, lib/jit_ppc-cpu.c: Update to match
+       new code in jit_sparc-cpu.c. It must call jit_get_reg
+       with jit_class_nospill if using the register to move
+       an unconditional branch address to it, as the reload
+       will not happen (actually could happen in the delay
+       slot...)
+
+       * lib/jit_sparc-cpu.c: Correct wrong macro definition for
+       ldxr_s.
+         Properly implement div* and implement rem. Div* needs
+       to use the y register, and rem* needs to be synthesized.
+         Correct b?sub* macro definitions.
+
+       * lib/jit_sparc-fpu.c: Correct reversed float to/from double
+       conversion.
+         Correct wrong jit_get_reg call asking for a gpr and then
+       using the fpr with that number.
+         Correct wrong branch displacement computation for
+       conditional branches.
+
+       * lib/jit_sparc.c: Correct getarg_d and pushargi_d implementation.
+         Add rem* entries to the switch converting IR to machine code.
+
+       * lib/lightning.c: Correct a problem detected when adding
+       the jit_class_nospill flag to jit_get_reg, that was caused
+       when having a branch to an "epilog" node, what would cause
+       the code to think all registers in unknown state were live,
+       while in truth, all registers in unknown state in the
+       "just after return" point are actually dead.
+
+2013-02-17 Paulo Andrade <pcpa@gnu.org>
+
+       * include/lightning/jit_sparc.h, lib/jit_sparc-cpu.c,
+       lib/jit_sparc-fpu.c, lib/jit_sparc.c: New files implementing
+       the basic framework of the sparc port.
+
+       * configure.ac, include/lightning.h, include/lightning/Makefile.am,
+       include/lightning/jit_private.h, lib/jit_disasm.c: Update
+       for the sparc port framework.
+
+       * lib/jit_mips.c: Correct reversed retr/reti logic.
+
+       * lib/jit_ppc.c: Correct misspelled __LITTLE_ENDIAN.
+
+       * lib/lightning.c: Always do byte hashing in hash_data, because
+       the logic to "compress" strings causes large pointers to not
+       be guaranteed aligned at 4 byte boundaries.
+         Update for the sparc port framework.
+
+2013-02-11 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_arm.c: Correct jit_pushargi_f in the arm hardfp abi.
+       Most of the logic uses even numbered register numbers, so that
+       a float and a double can be used in the same register, but
+       the abi requires packing the float arguments, so jit_pushargi_f
+       needs to allocate a temporary register to modify only the
+       proper register argument (or be very smart to push two
+       immediate arguments if applicable).
+
+2013-02-11 Paulo Andrade <pcpa@gnu.org>
+
+       * include/lightning.h, lib/lightning.c: Implement the new
+       jit_clear_state and jit_destroy_state calls. jit_clear_state
+       releases all memory not required during jit_execution; that
+       is, leaves only the mmap'ed data and code buffers allocated.
+       jit_destroy_state releases the mmap'ed buffers as well as
+       the jit_state_t object itself, that holds pointers to the
+       code and data buffers, as well as annotation pointers (for
+       disassembly or backtrace) in the data buffer.
+
+       * lib/jit_note.c: Correct invalid vector offset access.
+
+       * check/ccall.c, check/lightning.c, doc/ifib.c, doc/incr.c,
+       doc/printf.c, doc/rfib.c, doc/rpn.c: Use the new jit_clear_state
+       and jit_destroy_state calls, to demonstrate the new code to
+       release all jit memory.
+
+       * doc/body.texi: Add basic documentation and usage description
+       of jit_clear_state and jit_destroy_state.
+
+2013-02-11 Paulo Andrade <pcpa@gnu.org>
+
+       * include/lightning/jit_private.h, lib/jit_note.c, lib/lightning.c:
+         Store all annotation information in the mmap'ed area reserved for
+       read only data. This adds code to not allocate memory for jit_note_t
+       objects, and to relocate jit_line_t objects and its contents after
+       calculating annotation information. The jit_line_t objects are
+       relocated because it is not possible to always calculate before
+       hand data layout because note information may be extended or
+       redundant entries removed, as well as allowed to be added in
+       non sequential order.
+         A bug was also corrected in _jit_set_note, that was causing it
+       to allocate new jit_line_t objects when not needed. It was still
+       working correctly, but allocating way more memory than required.
+
+2013-02-05 Paulo Andrade <pcpa@gnu.org>
+
+       *include/lightning.h, lib/lightning.c: Add the new jit_live code
+       to explicitly mark a register as live. It is required to avoid
+       assuming functions always return a value in the gpr and fpr return
+       register, and to avoid the need of some very specialized codes
+       that vary too much from backend to backend, to instruct the
+       optimization code the return register is live.
+
+       * lib/jit_arm.c, lib/jit_mips.c, lib/jit_ppc.c, lib/jit_print.c,
+       lib/jit_x86.c: Update for the new jit_live code.
+
+       * check/ret.ok, check/ret.tst: New files implementing a simple
+       test case that would previously fail at least in ix86/x86_64.
+
+       * check/Makefile.am: Update for new "ret" test case.
+
+2013-02-05 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_ppc-cpu.c, lib/jit_ppc.c: Validate and correct
+       problems in the qmul and qdiv ppc implementation.
+
+2013-02-04 Paulo Andrade <pcpa@gnu.org>
+
+       * include/lightning.h, include/lightning/jit_private.h,
+       lib/jit_arm-cpu.c, lib/jit_arm.c, lib/jit_mips-cpu.c,
+       lib/jit_mips.c, lib/jit_ppc-cpu.c, lib/jit_ppc.c,
+       lib/jit_x86-cpu.c, lib/jit_x86.c, lib/lightning.c:
+       Implement the new qmul and qdiv instructions that return signed
+       and unsigned lo/hi multiplication result and div/rem division result.
+       These should be useful for jit translation of code that needs to
+       know if a multiplication overflows (no branch opcode added) or if
+       a division is exact (easy check if remainder is zero).
+
+       * check/lightning.c, lib/jit_print.c, check/Makefile.am,
+       check/all.tst: Update for the new qmul and qdiv instructions.
+
+       * check/qalu.inc, check/qalu_div.ok, check/qalu_div.tst,
+       check/qalu_mul.ok, check/qalu_mul.tst: New files implementing
+       simple test cases for qmul and qdiv.
+
+2013-01-30 Paulo Andrade <pcpa@gnu.org>
+
+       * doc/body.texi: Correct "jmpi" description that incorrectly
+       told it was possible to pass any address as jump target. The
+       only way to do that is "movi+jmpr".
+
+2013-01-30 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_x86-cpu.c: Correct undefined behavior code.
+       http://gcc.gnu.org/bugzilla/show_bug.cgi?id=56143
+
+2013-01-29 Paulo Andrade <pcpa@gnu.org>
+
+       * configure.ac: Use AC_CONFIG_HEADERS instead of AC_CONFIG_HEADER
+       to have HAVE_CONFIG_H defined with latest aclocal.
+
+       * include/lightning/jit_private.h, lib/lightning.c: Add new
+       abstraction to use an heuristic to calculate amount of space
+       required for jit generation, and code to reallocate buffer if
+       did miscalculate it.
+
+       * lib/jit_arm.c, lib/jit_mips.c, lib/jit_ppc.c, lib/jit_x86.c:
+       Update to use new code to estimate and resize of required buffer
+       for jit code.
+
+       * lib/jit_x86-cpu.c: Minor cosmetic change to avoid adding a
+       non required rex prefix when calling a function pointer stored
+       in a register.
+
+2013-01-24 Paulo Andrade <pcpa@gnu.org>
+
+       * check/Makefile.am: "make debug" target should pass only
+       the main test tool program as argument for running gdb
+
+       * configure.ac: Add the --enable-assertions options.
+
+       * doc/Makefile.am, doc/body.texi, doc/lightning.texi:
+       Major rewrite of the documentation to match the current
+       implementation.
+
+       * doc/version.texi: Automatic date update.
+
+       * doc/ifib.c, doc/incr.c, doc/printf.c, doc/rfib.c, doc/rpn.c:
+       Implementation of the documentation examples, that are also
+       compiled during a normal build.
+
+       * doc/p-lightning.texi, doc/porting.texi, doc/toc.texi,
+       doc/u-lightning.texi, doc/using.texi: These files were
+       renamed in the documentation rewrite, as the documentation
+       was significantly trimmed due to full removal of the porting
+       chapters. Better porting documentation should be added but
+       for the moment it was just removed the documentation not
+       matching the implementation.
+
+2013-01-18 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_note.c: Correct bounds check and wrong code keeping
+       a pointer that could be changed after a realloc call.
+
+2013-01-18 Paulo Andrade <pcpa@gnu.org>
+
+       * check/3to2.tst, check/add.tst, check/allocai.tst, check/bp.tst,
+       check/call.tst, check/ccall.c, check/clobber.tst, check/divi.tst,
+       check/fib.tst, check/ldsti.tst, check/ldstr-c.tst, check/ldstr.tst,
+       check/ldstxi-c.tst, check/ldstxi.tst, check/ldstxr-c.tst,
+       check/ldstxr.tst, check/lightning.c, check/rpn.tst, check/stack.tst,
+       check/varargs.tst, include/lightning.h,
+       include/lightning/jit_private.h, lib/jit_arm.c, lib/jit_disasm.c,
+       lib/jit_mips.c, lib/jit_note.c, lib/jit_ppc.c, lib/jit_print.c,
+       lib/jit_x86.c, lib/lightning.c: Extend the "jit_note" abstraction
+       with the new "jit_name" call, that receives a string argument, and
+       should usually be called to mark boundaries of functions of code
+       generating jit (that is, it is not expected that the language
+       generating jit map its functions to jit functions).
+
+2013-01-17 Paulo Andrade <pcpa@gnu.org>
+
+       * check/add.tst, check/allocai.tst, check/bp.tst, check/divi.tst,
+       check/fib.tst, check/lightning.c, include/lightning/jit_arm.h,
+       include/lightning/jit_mips.h, include/lightning/jit_ppc.h,
+       include/lightning/jit_private.h, include/lightning/jit_x86.h:
+       Make JIT_RET, JIT_FRET and JIT_SP private. These should not be
+       used in any operations due to frequently having special
+       constraints (usually JIT_FRET). JIT_FP must be made available
+       because it must be used as the base register to access stack
+       space allocated with jit_allocai.
+
+2013-01-14 Paulo Andrade <pcpa@gnu.org>
+
+       * include/lightning.h, lib/lightning.c: Add an extra align
+       argument to the jit_data call (that should be made private),
+       so that it should not align strings at 8 bytes.
+         Correct the jit_note call to include the null ending byte
+       when adding label/note names to the "jit data section".
+
+2013-01-11 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_note.c: New file implementing a simple string+integer
+       annotation, that should be used to map filename and line number
+       to offsets in the generated jit.
+
+       * include/lightning.h, lib/lightning.c: Update for the new
+       note code.
+         Add an extra mandatory argument to init_jit, that is used
+       as argument to bfd_openr.
+         Change from generic void* to char* the argument to jit_note
+       and add an extra integer argument, to map to filename and
+       line number.
+
+       * check/ccall.c, check/lightning.c, include/lightning/jit_private.h,
+       lib/jit_arm.c, lib/jit_disasm.c, lib/jit_mips.c, lib/jit_ppc.c,
+       lib/jit_print.c, lib/jit_x86.c: lib/Makefile.am: Update for the
+       new annotation code.
+
+       * configure.ac, check/Makefile.am: Update to work with latest
+       automake.
+
+2013-01-09 Paulo Andrade <pcpa@gnu.org>
+
+       * include/lightning.h, lib/jit_arm.c, jit_mips-fpu.c,
+       lib/jit_mips.c, lib/jit_print.c, lib/jit_x86.c, lib/lightning.c:
+       Remove the jit_code_getarg_{f,d} and jit_code_pusharg{i,r}_{f,d}
+       calls, replacing them with the new, internal only, jit_movr_w_f,
+       jit_mov{r,i}_f_w, jit_movr_ww_d, and jit_mov{i,r}_d_ww, that
+       better describe the operation being done, and allow removing
+       the hackish code to detect special conditions for arm when
+       moving from/to vfp from/to a grp register pair.
+       Rename jit_code_retval_{f,d} to jit_code_x86_retval_{f,d} as
+       it is specific to 32 bit x86, and used to move abi return
+       value in x87 register to a sse register.
+
+2013-01-05 Paulo Andrade <pcpa@gnu.org>
+
+       * check/cccall.c, check/ccall.ok: New test case to validate
+       interleaved calls from/to C code and jit.
+
+       * check/Makefile.am: Update for the new ccall test case.
+
+       * include/lightning.h, lib/lightning.c: Add the new jit_address
+       call that returns the real/final address of a "note" in the
+       generated jit. It requires a jit_node_t as returned by the
+       jit_note call, and is only valid after calling jit_emit.
+         Add an intermediate solution to properly handle arm
+       soft and softfp modes that move a double to an integer register
+       pair. Currently it just adds extra tests for the condition,
+       but the proper solution should be to have extra lightning
+       codes for these conditions, codes which should be only used
+       by the backends that need it, and merged with the existing
+       jit_pusharg*_{f,d}.
+
+       * include/lightning/jit_private.h: Add new jit_state_t flag
+       to know it finished jit_emit, so that calls to jit_address
+       are valid.
+
+       * lib/jit_mips.c: Correct abi implementation so that the
+       new ccall test case pass. Major problem was using
+       _jit->function.self.arg{i,f} as boolean values, but that
+       would cause lightning.c:patch_registers() to incorrectly
+       assume only one register was used as argument when calling
+       jit_regarg_p(); _jit->function.self.arg{i,f} must be the
+       number of registers used as arguments (in all backends).
+
+       * lib/jit_x86.c: Add workaround, by marking %rax as used,
+       to a special condition, when running out of registers and the
+       allocator trying to spill and reload %rax, but %rax was used
+       as a pointer to a function, what would cause the reload to
+       destroy the return value. This condition can be better
+       generalized, but the current solution is good enough.
+
+       * include/lightning/jit_ppc.h, lib/jit_ppc-cpu.c, lib/jit_ppc.c:
+       Rewrite logic to handle arguments, as the original code was
+       written based on a SysV pdf about the generic powerpc ABI,
+       what did "invent" a new abi for the previous test cases, but
+       failed in the new ccall test in Darwin PPC. Now it properly
+       handles 13 float registers for arguments, as well as proper
+       computation of stack offsets when running out of registers
+       for arguments.
+
+2013-01-02 Paulo Andrade <pcpa@gnu.org>
+
+       * check/float.tst: Correct test case to match ppc also
+       converting positive infinity to 0x7fffffff.
+
+       * lib/jit_arm-swf.c: Correct typos with double underscores.
+
+       * lib/lightning.c: Correct remaining wrong reverse jump logic.
+
+2012-12-29 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/lightning.c: Correct both, wrong and confusing logic
+       to compute the reverse of a jump. Now it properly matches
+       C semantics for "eq" (==) and "ne" (!=) and correct computation
+       of reverse of "uneq" as "gt".
+
+       * check/branch.tst: Update "ne" float branch check that
+       previously happened to be wrongly tested with a NaN argument.
+
+2012-12-29 Paulo Andrade <pcpa@gnu.org>
+
+       * check/float.ok, check/float.tst: New test cases implementing
+       extensive validation of float comparison and branch code
+       generation as well as integer conversion, involving NaN and
+       [+-]Inf.
+
+       * lib/jit_arm-swf.c, lib/jit_x86-sse.c, lib/jit_x86-x87.c:
+       Correct bugs found by new float test case.
+
+       * lib/jit_x86.c: Correct cut&paste error added in commit to
+       convert jit_arg* return value to a jit_node_t*, that would
+       cause it to not properly handle double arguments in ix86.
+
+       * check/Makefile.am: Update for the new test case.
+
+2012-12-28 Paulo Andrade <pcpa@gnu.org>
+
+       * check/lightning.c, include/lightning.h, lib/jit_arm.c,
+       lib/jit_mips.c, lib/jit_ppc.c, lib/jit_print.c, lib/jit_x86.c,
+       lib/lightning.c: Change return value of jit_arg{,_f,_d} to
+       a jit_node_t* object, that should be used as argument to
+       jit_getarg_{c,uc,s,us,i,ui,l,f,d}. This just requires changing
+       from jit_int32_t to jit_pointer_t (or jit_node_t*) the "handle"
+       for the getarg calls, with the benefit that it makes it easy
+       to implement patching of the stack address of non register
+       arguments, this way allowing to implement variable size stack
+       frames if applicable; useful if there are too many registers and
+       jit functions uses only a few callee save registers.
+
+2012-12-27 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_arm.c, lib/jit_mips-cpu.c, lib/jit_mips.c: Correct
+       regressions when patching jit_calli for a forward function.
+
+       * lib/jit_ppc-cpu.c: Correct wrong arguments to ANDI opcode
+       in jit_getarg_u{c,s} implementation.
+
+2012-12-23 Paulo Andrade <pcpa@gnu.org>
+
+       * check/call.ok, check/call.tst: New test cases to validate
+       simple typed argument and return values in function calls.
+
+       * check/lightning.c: Properly handle jit_movi of labels for
+       backward and forward code labels.
+
+       * check/Makefile.am: Update for new test case.
+
+2012-12-23 Paulo Andrade <pcpa@gnu.org>
+
+       * check/carry.ok, check/carry.tst: New test case to validate
+       carry condition handling.
+
+       * check/Makefile.am: Update for new test case.
+
+2012-12-22 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_ppc-cpu.c, lib/jit_ppc.c: Implement logic for
+       jit_htonr for big endian, so that ppc (big endian) pass the
+       new clobber.tst test case.
+
+2012-12-22 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_arm.c: Correct use of wrong argument offset
+       variable in armv7l or float/double argument for varargs
+       function in armv7hl.
+         Correct jit_getarg* logic in software float mode to
+       match expected behavior in other backends, that is, if
+       a function is not called, it is safe to use a few lightning
+       calls before a next jit_getarg* call, as done in the test
+       case check/stack.tst. The proper solution should be to
+       extend the parser in lib/lightning.c to check if there is
+       some float operation that will call some (libgcc?) function,
+       but software float arm should be a very uncommon backend for
+       lightning, so, just load the already in place arguments
+       saved to stack, assuming the register argument was clobbered
+       (what should not be the case most times...).
+
+2012-12-22 Paulo Andrade <pcpa@gnu.org>
+
+       * check/clobber.ok, check/clobber.tst: New test case doing
+       extensive validation tests to ensure registers not used in
+       a operation are not clobbered.
+
+       * check/Makefile.am: Update for new test case.
+
+2012-12-21 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/lightning.c: Partially rewrite/revert code to compute
+       initial register live state at the start of a basic block.
+       The original logic was corrupted when adding optimizations
+       to do as few computations as possible in jit_update. The
+       reglive field must be always a known set of live registers
+       at the start of a basic block. The value that was incorrect
+       was the regmask field, that must be the set of registers
+       that are in unknown state, because they are not known live,
+       neither set (or possibly not set) in the basic block, and
+       *must* store the state at the start of the basic block.
+
+2012-12-20 Paulo Andrade <pcpa@gnu.org>
+
+       * include/lightning/jit_ppc.h: Correct mismatch of JIT_F{1,5}
+       with enum codes, that were correct, and returned by jit_f().
+
+       * lib/jit_ppc-cpu.c, lib/jit_ppc-fpu.c, lib/jit_ppc.c: Properly
+       implement and better describe values when generating stack
+       frames.
+
+2012-12-18 Paulo Andrade <pcpa@gnu.org>
+
+       * check/stack.ok, check/stack.tst: New files to test data
+       integrity on a deep chain of stack frames.
+
+       * lib/jit_arm.c, lib/jit_arm-cpu.c, lib/jit_mips.c,
+       lib/jit_mips-cpu.c, lib/jit_ppc.c, lib/jit_ppc-cpu.c,
+       lib/jit_x86.c, lib/jit_x86-cpu.c: Calculate _jit->function->stack
+       in the emit stage, otherwise it will calculate it wrong if
+       need to jit_allocai space to spill registers.
+
+       * lib/lightning.c: Correct wrong offset when updating the
+       "current" jit function pointer in the code that may need to
+       allocate stack space to spill registers.
+
+       * check/lightning.c: Correct off by one data space check.
+
+       * check/Makefile.am: Update for new test case.
+
+2012-12-17 Paulo Andrade <pcpa@gnu.org>
+
+       * check/fop_abs.ok, check/fop_abs.tst, check/fop_sqrt.ok,
+       check/fop_sqrt.tst: New files implementing simple test cases
+       for the extra float operations.
+
+       * check/Makefile.am: Update for new test cases.
+
+       * check/alu.inc: Add an extra macro to check for unordered
+       equality on tests where it is expected to use NaN as an
+       argument.
+
+       * check/lightning.c: Minor change for proper/common argument
+       syntax handling ommiting arguments to options.
+
+2012-12-17 Paulo Andrade <pcpa@gnu.org>
+
+       * check/Makefile.am: Automatically generate pattern list
+       of tests with alternate jit generation options. This should
+       prevent typos and needing to change multiple places after
+       a change.
+
+2012-12-14 Paulo Andrade <pcpa@gnu.org>
+
+       * check/lightning.c: Remove the ".cpu name value" syntax,
+       as it was not able to do proper changes before the jit
+       internal data structure was initialized. Now it supports
+       several getopt options to force using different jit
+       generation options, effectively replacing the previous
+       syntax.
+
+       * check/run-test: Add simple extra logic to handle differently
+       named test scripts, used to test things like x87 coprocessor
+       in ix86, and arm instruction set or software float in armv7l.
+
+       * configure.ac: Add some AC_RUN_IFELSE calls to figure at
+       compile time if can test different code generation options,
+       and update Makefile generation accordingly.
+
+       * check/Makefile.am, lib/jit_arm.c, lib/jit_x86.c: Update to
+       properly work with the test tool updating the jit_cpu global
+       information.
+
+       * check/check.arm.sh, check/check.swf.sh, check/check.x87.sh:
+       New wrapper files passing -mthumb=0, mvfp=0 and -mx87=1 to
+       the test tool, if applicable, so that it can validate alternate
+       code generation options on test hosts that support them.
+
+2012-12-14 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_x86-x87.c, lib/jit_x86.c: Correct test cases in ix86
+       when using the x87 coprocessor instead of sse2+.
+
+2012-12-14 Paulo Andrade <pcpa@gnu.org>
+
+       * include/lightning.h, include/lightning/jit_private.h,
+       lib/jit_arm.c, lib/jit_mips.c, lib/jit_ppc.c, lib/jit_x86.c,
+       lib/lightning.c: Make jit_ellipsis implementation not
+       backend specific. It is not intended to handle va_list
+       like objects at runtime, as jit_arg* and jit_getarg*
+       return constant values resolved at parse time, so, effectively
+       it is not possible to create printf like jit functions, as
+       there is no va_start, va_arg, va_end, etc, abstraction. This
+       limitation should be kept for the sake of making new ports
+       easier.
+
+2012-12-14 Paulo Andrade <pcpa@gnu.org>
+
+       * include/lightning.h, lib/lightning.c: Add two extra wrapper
+       functions to avoid need for excess pointer to/from word casts.
+
+       * check/lightning.c: Only need for pointer to/from word cast
+       now is jit_movi, update accordingly.
+
+2012-12-13 Paulo Andrade <pcpa@gnu.org>
+
+       * check/varargs.ok, check/varargs.tst: New test cases implementing
+       simple varargs calls with a large amount of arguments to exercise
+       excess arguments on stack.
+
+       * include/lightning.h: Include config.h if HAVE_CONFIG_H is
+       defined.
+
+       * lib/jit_arm.c: Allocate a fpr register, not a gpr one for
+       temporary when pushing varargs arguments in the stack.
+
+       * lib/jit_arm-swf.c: Correct code changing the wrong offset
+       in jit_absr_d and jit_negr_d in software float.
+
+       * lib/jit_mips.c: Correct calculation of offsets of arguments
+       on stack.
+
+       * lib/jit_ppc.c: Correct bogus logic for "next" offset of arguments
+       on stack and adjust for fixed offset of stack arguments.
+
+2012-12-12 Paulo Andrade <pcpa@gnu.org>
+
+       * include/lightning.h, lib/jit_arm.c, lib/jit_mips.c,
+       lib/jit_ppc.c, lib/jit_x86.c, lib/lightning.c: Change jit_prepare
+       to no longer receive an argument. If receiving an argument, it
+       should be an ABI specifier, not a boolean if varargs or not,
+       and add the new jit_ellipsis call, to specify where the
+       ellipsis is in the C prototype of the function being called.
+       Note that currently it is not supported to define varargs
+       functions and it will be ignored if calling jit_ellipsis not
+       in a prepare/finish* block, but this should be addressed.
+
+       * check/allocai.tst, check/alu_add.tst, check/alu_and.tst,
+       check/alu_com.tst, check/alu_div.tst, check/alu_lsh.tst,
+       check/alu_mul.tst, check/alu_neg.tst, check/alu_or.tst,
+       check/alu_rem.tst, check/alu_rsh.tst, check/alu_sub.tst,
+       check/alu_xor.tst, check/alux_add.tst, check/alux_sub.tst,
+       check/bp.tst, check/branch.tst, check/cvt.tst, check/divi.tst,
+       check/fib.tst, check/ldsti.tst, check/ldstr-c.tst,
+       check/ldstr.tst, check/ldstxi-c.tst, check/ldstxi.tst,
+       check/ldstxr-c.tst, check/ldstxr.tst, check/rpn.tst,
+       check/lightning.c: Update for the change to jit_prepare and
+       addition of jit_ellipsis.
+
+2012-12-11 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_ppc-cpu.c: Make movr a function that checks arguments
+       so that other code can safely assume it is a noop if src and dst
+       are the same register.
+         Implement rem{r,i}{,_u} as a div{,u}/mul/sub.
+         Correct ANDIS, ORIS and XORIS calls to cast the argument to
+       unsigned before the shift to avoid an assertion if the argument
+       had the topmost bit set.
+         Implement lshi, rshi and rshi_u as functions to test for a
+       zero argument, that would otherwise trigger an assertion when
+       computing the shift value.
+         Do a simple implementation of bm{s,c}{r,i} with a temporary,
+       "andr" of arguments and jump based on comparison with zero.
+         Correct typo in ldxi_c.
+
+       * lib/jit_ppc-fpu.c: Correct wrong arguments to FDIV* and STF*.
+
+       * lib/jit_ppc.c: Correct wrong check for 6 instead of 8 integer
+       arguments in registers. If calling a varargs function and
+       passing a float or double argument, also either store the
+       value in the stack or in integer registers, as varargs functions
+       do not fetch it from float registers.
+         Add "case" for new functions and incorrectly missing ones.
+         Call libgcc's __clear_cache, that should know what to do
+       if the hardware needs flushing cache before execution.
+
+       * lib/lightning.c: Do a simple/trivial logic in jit_regset_scan1,
+       that should make it easier for the compiler to optimize it, and
+       that also corrects the previously wrong code for big endian, and
+       that was causing problems in ppc due to not saving all callee save
+       registers as it was not "finding" them in the regset due to the
+       little endian assumption bug.
+
+2012-12-11 Paulo Andrade <pcpa@gnu.org>
+
+       * configure.ac: Only default to using the builtin disassembler
+       if on GNU/Linux. This should be temporary, due to requiring
+       /proc/self/exe.
+         Correctly check $target_cpu for powerpc.
+
+       * include/lightning/jit_ppc.h: Correctly implement jit_v_num.
+
+       * include/lightning/jit_private.h: Declare proper prototype
+       for jit_init_debug and jit_finish_debug.
+
+       * lib/jit_ppc-cpu.c: Remove code to save/restore callee save
+       float registers, as it is not required since those float
+       registers are not usable currently.
+         Change prolog and epilog generation to, at least comparing
+       code, match what gcc generates in "gcc -O0", but it is still
+       failing in Darwin PPC, apparently due to the __clear_cache
+       call not being enough, as frequently it will also fail to
+       execute, and the code buffer is all zeroes.
+
+       * lib/lightning.c: Do not fail in jit_regset_scan1 calls due
+       to passing 64 as argument on computers with 64 registers.
+
+2012-12-10 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_mips-cpu.c: Correct all current test cases.
+         Call the "xori" not the "XORI" macro for jit_xori implementation,
+       as the XORI macro handles only 16 bit unsigned values.
+         Call the "movr" macro, not the "movi" macro in the special
+       case of adding or subtracting zero.
+         Use the proper temporary register in the jit_andr implementation.
+
+2012-12-09 Paulo Andrade <pcpa@gnu.org>
+
+       * check/alu.inc, check/alu_add.ok, check/alu_add.tst,
+       check/alu_and.ok, check/alu_and.tst, check/alu_com.ok,
+       check/alu_com.tst, check/alu_div.ok, check/alu_div.tst,
+       check/alu_lsh.ok, check/alu_lsh.tst, check/alu_mul.ok,
+       check/alu_mul.tst, check/alu_neg.ok, check/alu_neg.tst,
+       check/alu_or.ok, check/alu_or.tst, check/alu_rem.ok,
+       check/alu_rem.tst, check/alu_rsh.ok, check/alu_rsh.tst,
+       check/alu_sub.ok, check/alu_sub.tst, check/alu_xor.ok,
+       check/alu_xor.tst, check/alux_add.ok, check/alux_add.tst,
+       check/alux_sub.ok, check/alux_sub.tst, check/branch.ok,
+       check/branch.tst: New test cases for arithmetic and branch
+       tests.
+
+       * check/Makefile.am: Update for new test cases.
+
+       * include/lightning/jit_private.h: Make the jit_reg_free_p
+       macro shared by all backends. Previously was added for the
+       arm backend, but is useful in the x86_64 backend when checking
+       state of "special purpose register".
+       Also add the new jit_class_named register class, that must be
+       or'ed with the register value if calling jit_get_reg expecting
+       an specific value, because the specific register value may be
+       zero, that previously was treated as no register requested.
+
+       * lib/jit_arm-cpu.c: Correct argument order for T2_MVN.
+
+       * lib/jit_arm-swf.c: Call the proper function for double
+       divide. The "software float" implementation just calls
+       libgcc functions.
+
+       * lib/jit_arm.c: Return float/double values in the float
+       register if using the hard float ABI.
+
+       * lib/jit_x86-cpu.c: Change the can_sign_extend_int_p macro
+       to not include -0x80000000L, because there is code that
+       "abuses" it and thinks it can negate the immediate value
+       after calling that macro.
+         Correct implementation of jit_subi that had a wrong code
+       patch logic doing subtraction with reversed arguments.
+         Correct REX prefix calculation in the jit_muli implementation.
+         Correct logic to get/unget %*ax and %*dx registers in divremr
+       and divremi.
+         Correct divremi that was using the symbolic, unique %*ax
+       value in on place (not using the _REGNO name suffix).
+         Correct cut&paste error causing it to use "xor" instead of
+       "or" in one code path of the jit_ori implementation.
+         Correct several flaws when clobbering registers and/or when
+       one of the arguments was %*cx in the rotshr wrapper function
+       implementing most shift operations.
+
+       * lib/lightning.c: No longer expect that the backend be smart
+       enough to know what to do when asking for a named register
+       if that register is already an argument or is live. It fails
+       if it is an argument, or if register is live, fails if cannot
+       spill.
+         No longer incorrectly assume that eqr_{f,d} and ltgr_{f,d} are
+       safe to inverse value tests in jump thread optimization.
+
+2012-12-05 Paulo Andrade <pcpa@gnu.org>
+
+       * check/Makefile.am, check/cvt.ok, check/cvt.tst: Add new
+       "cvt" test case to test conversion from/to int/float types.
+
+       * check/lightning.c: Only define truncr_{f,d}_l in 64 bit mode.
+
+       * include/lightning.h: Correct typo that caused it to define
+       jit_truncr_{f,d}_l in 32 bit mode.
+
+       * lib/jit_arm-cpu.c: Avoid assertion failure in the signed/unsigned
+       extend opcodes generation as it shares an interface for 3 argument
+       opcode generation.
+
+       * lib/jit_x86-cpu.c: Correct wrong argument passed to
+       jit_unget_reg in the andi implementation and wrong byte
+       unsigned extend code generation.
+
+       * lib/jit_x86-sse.c: Correct conversion from "word" to float or
+       double as is dependent on wordsize.
+
+2012-12-05 Paulo Andrade <pcpa@gnu.org>
+
+       * check/ldstr-c.ok, check/ldstr-c.tst, check/ldstxi-c.ok,
+       check/ldstxi-c.tst, check/ldstxr-c.ok, check/ldstxr-c.tst:
+       New test case files testing load clobbering the base and/or
+       index register;
+
+       * check/ldst.inc: New file with common definition for all the
+       ldst* test cases.
+
+       check/Makefile.am, check/ldsti.tst, check/ldstr.tst,
+       check/ldstxi.tst, check/ldstxr.tst: Update for new common
+       definitions file and new register clobber ldst tests.
+
+2012-12-05 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_mips-fpu.c: Correct wrong register order in stxr_{f,d}
+       in the mips backend.
+
+2012-12-05 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_arm-vfp.c: Correct regression found in armv7l with
+       latest test cases.
+
+2012-12-05 Paulo Andrade <pcpa@gnu.org>
+
+       * check/ldstxi.tst, check/ldstxr.tst: Correct wrong argument
+       order for 32 bit mode tests.
+
+       * configure.ac: Correct check for ix86 target_cpu.
+
+2012-12-05 Paulo Andrade <pcpa@gnu.org>
+
+       * check/ldstr.ok, check/ldstr.tst, check/ldsti.ok,
+       check/ldsti.tst, check/ldstxr.ok, check/ldstxr.tst,
+       check/ldstxi.ok, check/ldstxi.tst:
+       New test case files exercising a very large amount of
+       register combinations to verify load/store implementation.
+
+       * check/Makefile.am: Update for new test cases.
+
+       * lib/jit_x86-cpu.c: Correct wrong argument order when
+       computing REX prefix for {ld,st}r_T codes;
+
+2012-12-04 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_mips-fpu.c, lib/jit_mips.c: Implement missing mips
+       jit_sqrtr_{f,d} codes.
+
+       * check/all.tst, include/lightning.h, lib/jit_print.c: Change
+       declaration order and call order in all.tst of {add,sub}c and
+       {add,sub}x. *c must be called before to set the carry and *x
+       second to use the carry and keep it set. The wrong call order
+       was causing all.tst to fail in mips, where a register is
+       allocated to keep a global carry state.
+
+2012-12-04 Paulo Andrade <pcpa@gnu.org>
+
+       * include/lightning/jit_mips.h, lib/jit_mips-cpu.c,
+       lib/jit_mips-fpu.c, lib/jit_mips.c: Correct float/double
+       argument handling and make the mips backend pass the initial
+       test cases.
+
+       * include/lightning.h, ib/jit_print.c, lib/lightning.c:
+       Add extra enum values for argument handling functions that
+       could not be abstracted to the current codes, that is, when
+       float values need to move from/to gpr from/to fpr. It would
+       be more tempting to add such primitives, but they would have
+       wordsize limitations, and it is not expected to add codes
+       with one gpr argument for 64 bit and two for 32 bit.
+
+       * lib/jit_ppc.c: Check _jit->function before calling jit_epilog()
+       to avoid a runtime exception.
+
+2012-12-04 Paulo Andrade <pcpa@gnu.org>
+
+       * include/lightning/jit_mips.h, lib/jit_mips.c: Update to
+       make the mips backend compile in a qemu image.
+
+       * lib/jit_ppc.c: Minor adaptations to help in having the
+       ppc backend compilable.
+
+2012-12-03 Paulo Andrade <pcpa@gnu.org>
+
+       * configure.ac, include/lightning/jit_private.h, lib/jit_arm-cpu.c,
+       lib/jit_arm-swf.c, lib/jit_arm.c, check/Makefile.am: Correct
+       implementation of the arm backend port to build and pass the
+       current test cases. Tested on armv7 with softfp abi.
+
+       * lib/jit_disasm.c: Rename and change prototype of static
+       disassemble function as in the arm backend it is required
+       to access state information stored in the jit_state_t object.
+
+       * check/3to2.tst, check/add.tst: Correct test case code assuming
+       JIT_RO and JIT_RET are the same, and even if they are the same,
+       the logic was incorrect because it must always call jit_retval*
+       to fetch a function call return before any other instruction.
+       The arm backend hash a special condition if jit_retval is not
+       called, because "r0" is not JIT_R0, but is JIT_RET and *also*
+       the first argument for a called function, so JIT_RET must be
+       only used as an argument to jit_retval.
+
+2012-12-03 Paulo Andrade <pcpa@gnu.org>
+
+       * check/all.tst, check/lightning.c: Only declare or use 64 bit
+       interfaces on 64 bit builds.
+
+       * check/fib.tst: Use simpler logic to not need preprocessor
+       conditionals for 32 or 64 bit.
+
+       * include/lightning.h: Only declare 64 bit macros on a 64 bit
+       build. Code using lightning must know about wordsize and the
+       jit generation limitations, also, this way it generates a
+       compile time failure, not a runtime assertion.
+
+       * include/lightning/jit_x86.h: Correct typo in macro name.
+
+       * lib/jit_arm.c, lib/jit_arm-cpu.c, lib/jit_mips.c,
+       lib/jit_mips-cpu.c, lib/jit_ppc.c, lib/jit_ppc-cpu.c, 
+       lib/jit_x86.c, lib/jit_x86-cpu.c: Correct wrong code to get
+       current jit function pointer.
+
+       * lib/lightning.c: Move call to the simplify() optimization
+       to after register liveness is known. Previous code did work
+       by accident but now with proper test cases the problem was
+       noticed.
+
+       * lib/jit_disasm.c: Always cast bfd_vma to long long when
+       passing it as printf argument.
+
+2012-12-03 Paulo Andrade <pcpa@gnu.org>
+
+       * configure.ac, check/Makefile.am, check/check.sh,
+       doc/Makefile.am, include/lightning/Makefile.am,
+       lib/Makefile.am: Correct make distcheck.
+
+2012-12-02 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_ppc.c: Assign copyright ownership to FSF.
+
+       * lib/jit_x86-cpu.c: Correct integer multiplication that was
+       generating code with reversed register arguments.
+
+       * check/rpn.ok, check/rpn.tst: New test case file.
+
+2012-12-02 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_x86-cpu.c, lib/jit_x86-sse.c, lib/jit_x86-x87.c:
+       Actually change copyright owner to FSF as avertised.
+
+       *  lib/jit_arm-cpu.c,  lib/jit_arm-swf.c,
+       lib/jit_arm-vfp.c, lib/jit_arm.c,
+       lib/jit_mips-cpu.c, lib/jit_mips-fpu.c, lib/jit_mips.c,
+       lib/jit_ppc-cpu.c, lib/jit_ppc-fpu.c, lib/jit_ppc.c: New
+       files implementing initial code different jit backends.
+
+       * include/lightning/jit_private.h: Add extra field to the
+       private jit_patch_t type, required by the arm port.
+
+       * lib/Makefile.am: Update for the new backend implementation
+       files.
+
+2012-12-02 Paulo Andrade <pcpa@gnu.org>
+
+       * check/Makefile.am: Add proper "make clean" rule and missing
+       check.sh to EXTRA_DIST.
+
+2012-12-02 Paulo Andrade <pcpa@gnu.org>
+
+       * .gitignore: Update pattern of ignored files.
+
+       * check/Makefile.am: Add rule to build liblightning.la dependency
+       in case of running "make check" before building the library.
+
+2012-12-02 Paulo Andrade <pcpa@gnu.org>
+
+       * lightning/Makefile.am, lightning/asm-common.h,
+       lightning/core-common.h, lightning/fp-common.h,
+       lightning/funcs-common.h, lightning/i386/Makefile.frag,
+       lightning/i386/asm-32.h, lightning/i386/asm-64.h,
+       lightning/i386/asm.h, lightning/i386/core-32.h,
+       lightning/i386/core-64.h, lightning/i386/core.h,
+       lightning/i386/fp-32.h, lightning/i386/fp-64.h,
+       lightning/i386/fp.h, lightning/i386/funcs.h,
+       lightning/ppc/asm.h, lightning/ppc/core.h,
+       lightning/ppc/fp.h, lightning/ppc/funcs.h,
+       lightning/sparc/asm.h, lightning/sparc/core.h,
+       lightning/sparc/fp.h, lightning/sparc/funcs.h:
+       Removed. The core logic is used in the new code, and new mips
+       and arm ports will be added. At first, sparc will not be
+       supported as it has not yet been ported to the new engine.
+
+2012-12-02 Paulo Andrade <pcpa@gnu.org>
+
+       * tests/Makefile.am, tests/3to2.c, tests/3to2.ok, tests/add.c,
+       tests/add.ok, tests/allocai.c, tests/allocai.ok, tests/bp.c,
+       tests/bp.ok, tests/divi.c, tests/divi.ok, tests/fib.c, tests/fib.ok,
+       tests/fibdelay.c, tests/fibdelay.ok, tests/fibit.c, tests/fibit.ok,
+       tests/funcfp.c, tests/funcfp.ok, tests/incr.c, tests/incr.ok,
+       tests/ldst.c, tests/ldst.ok, tests/ldxi.c, tests/ldxi.ok,
+       tests/modi.c, tests/modi.ok, tests/movi.c, tests/movi.ok,
+       tests/printf.c, tests/printf.ok, tests/printf2.c, tests/printf2.ok,
+       tests/ret.c, tests/ret.ok, tests/rpn.c, tests/rpn.ok, tests/rpnfp.c,
+       tests/rpnfp.ok, tests/sete.c, tests/sete.ok, tests/testfp.c,
+       tests/testfp.ok, tests-run-test: Removed previous test suite, in
+       favor of a newer one in the check subdirectory.
+
+       * check/3to2.ok, check/3to2.tst, check/add.ok, check/add.tst,
+       check/allocai.ok, check/allocai.tst, check/bp.ok, check/bp.tst,
+       check/divi.ok, check/divi.tst, check/fib.ok, check/fib.tst:
+       New sample input for the new test program, loosely matching
+       several of the previous test cases.
+
+       * check/Makefile.am: New test suite makefile.
+
+       * check/check.sh, check/run-test: New wrapper files for the
+       new test suite.
+
+       * check/lightning.c: New file. The main driver of the new test
+       suite, that compiles to a parser of a very simple assembly like
+       language, generates jit and executes it.
+
+       * check/all.tst: New file. A generic debug and sample test file
+       with a directive to prevent it from being executed, and useful to
+       read disassembly of all possible instructions, using a fixed set
+       of registers.
+
+       * include/Makefile.am, include/lightning.h,
+       include/lightning/Makefile.am, include/lightning/jit_arm.h,
+       include/lightning/jit_mips.h, include/lightning/jit_ppc.h,
+       include/lightning/jit_private.h, include/lightning/jit_x86.h,
+       lib/Makefile.am, lib/jit_disasm.c, lib/jit_print.c,
+       lib/jit_x86-cpu.c, lib/jit_x86-sse.c, lib/jit_x86-x87.c,
+       lib/jit_x86.c, lib/lightning.c: New files. These files are
+       written from scratch, only by <pcpa@gnu.org>, and have now
+       copyright assignment to the FSF. This is the core of the new
+       lightning rework. Previously it was integrated in code with
+       a garbage collector and several custom types like vectors and
+       hash tables, so this first code merge with lightning converts
+       that code into a library extracting only the jit bits, and at
+       first only for x86_64 GNU/Linux.
+
+       * lightning.h, m4/lightning.m4: Removed. These are no longer
+       required in the new lightning code.
+
+       .gitignore, Makefile.am, configure.ac: Update for the new
+       lightning code.
+
+2012-12-02 Paulo Andrade <pcpa@gnu.org>
+       * .cvsignore: Removed for extra cleanup.
+
+       * build-aux: Rename directory to m4.
+
+       * m4: Renamed to "default" name and for consistency with merge
+       with code rework to be imported in lightning.
+
+       * .gitignore, configure.ac, Makefile.am, doc/Makefile.am:
+       Update for build-aux to m4 rename.
+
+2012-12-01 Paulo Andrade <pcpa@gnu.org>
+
+       * opcode/Makefile.am, opcode/Makefile.in, opcode/ansidecl.h,
+       opcode/bfd.h, opcode/dis-asm.h, opcode/dis-buf.c, opcode/disass.c,
+       opcode/i386-dis.c, opcode/i386.h, opcode/ppc-dis.c, opcode/ppc-opc.c,
+       opcode/ppc.h, opcode/sparc-dis.c, opcode/sparc-opc.c, opcode/sparc.h,
+       opcode/sysdep.h: Removed. Do not bundle GNU binutils files.
+
+       * aclocal.m4, configure, Makefile.in, config.h.in, doc/Makefile.in,
+       lightning/Makefile.in, tests/Makefile.in: Removed. Do not maintain
+       autogenerated files that also generate too much diff noise when
+       regenerated in git.
+
+       * build-aux/help2man, build-aux/texinfo.tex, build-aux/texi2dvi:
+       Removed. Buildenvironment must have an up to date version from
+       upstream installed.
+
+       * build-aux/config.guess, build-aux/config.sub, build-aux/depcomp,
+       build-aux/install-sh build-aux/mdate-sh build-aux/missing: Removed.
+       Do not maintain a copy of automake files in git. Release tarballs
+       must use an up to date version.
+
+       * lightningize.in, doc/lightningize.1: Removed. Do not encourage
+       bundling lightning in other packages. It should use a system package
+       or a proper thirdy part subdirectory.
+
+       * INSTALL: Removed. Autoreconf removes it and creates a symlink
+       when regenerating files, so, avoid conflicts in git and let
+       automake create the symlink.
+
+       * .gitignore: Add INSTALL and autogenerated files.
+
+       * configure.ac, Makefile.am: Update for removal of opcode subdir,
+       auto generated files and lightningize.
+
+       * tests/Makefile.am, tests/3to2.c, tests/add.c, tests/bp.c,
+       tests/fib.c, tests/fibdelay.c, tests/fibit.c, tests/funcfp.c,
+       tests/incr.c, tests/printf.c, tests/rpn.c, tests/rpnfp.c,
+       tests/sete.c, tests/testfp.c: Update for removal of opcode subdir.
+
+       * doc/Makefile.am: Update for removal of lightningize.
+
+       * configure.ac, lightning/ppc/funcs.h, lightning/sparc/funcs.h,
+       lightning/i386/fp.h, lightning/i386/core.h, lightning/i386/asm.h,
+       tests/3to2.c, tests/add.c, tests/bp.c, tests/fib.c, tests/fibdelay.c,
+       tests/fibit.c, tests/funcfp.c, tests/incr.c, tests/printf.c,
+       tests/rpn.c, tests/rpnfp.c, tests/sete.c, tests/testfp.c:
+       Remove LIGHTNING_CROSS, it is half supported and incomplete.
+
+       * tests/3to2.c, tests/funcfp.c, tests/rpnfp.c: Remove preprocessor
+       check on JIT_FPR. If no hardware registers are available, the backend
+       must provide an alternative for software float.
+
+       * lightning/ppc/core.h, lightning/sparc/core.h, tests/Makefile.am:
+       Remove JIT_NEED_PUSH_POP. It is absolutely not trivial to implement
+       properly on some backends due to stack alignment constraints, and
+       whenever it is required, using jit_allocai and using a properly
+       aligned stack vector, or a heap buffer, is better.
+
+       * tests/push-pop.c, tests/push-pop.ok: Removed due to
+       JIT_NEED_PUSH_POP no longer available.
+
+2011-02-28  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/i386/core-64.h: Add jit_add{c,x}{i,r}_l, jit_mulr_{l,ul}_,
+       fix jit_mul{i,r}_{l,ul}.
+
+2010-08-20  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/i386/fp-64.h: Return patch address from jit_bXYr_{f,d}.
+       Reported by Paulo César Pereira de Andrade.
+       * lightning/ppc/fp.h: Likewise.
+       * lightning/sparc/fp.h: Implement FP branches.
+
+2010-08-18  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/i386/fp-64.h: Fix jp in jit_bner_{f,d}.
+
+2010-08-18  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/i386/fp-32.h: Fix -D_ASM_SAFETY compilation.
+       Reported by Paulo César Pereira de Andrade.
+
+2010-08-15  Paolo Bonzini  <bonzini@gnu.org>
+
+       * tests/ldst.c: Update.
+       * tests/Makefile.am: Use -ffloat-store to compile it.
+
+2010-08-15  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/i386/core.h (jit_ldr_c, jit_ldxr_c, jit_ldr_s,
+       jit_ldxr_s): Move...
+       * lightning/i386/core-32.h: ... here.
+       * lightning/i386/core-64.h (jit_ldr_c, jit_ldxr_c, jit_ldr_s,
+       Use movsbq and movswq.
+
+2010-08-10  Paulo César Pereira de Andrade <pcpa@mandriva.com.br>
+
+       * lightning/i386/core-32.h (jit_replace): Use MOVLrr, not MOVLir.
+       (jit_movbrm): Check index register as well.
+       * lightning/i386/fp-64.h: Add jit_extr_f_d and jit_extr_d_f.
+       * lightning/fp-common.h: Add jit_extr_f_d and jit_extr_d_f.
+
+2010-07-28  Paolo Bonzini  <bonzini@gnu.org>
+
+       * tests/Makefile.am: Add ldst test.
+       * tests/Makefile.in: Regenerate.
+       * tests/ldst.c: New.
+       * tests/ldst.ok: New.
+
+2010-07-28  Paolo Bonzini  <bonzini@gnu.org>
+
+       * THANKS: Add Paulo Cesar Pereira de Andrade.
+       * doc/porting.texi: Fix ordering of arguments in jit_stxi.
+       * lightning/i386/core-32.h (jit_replace): Remove cmp argument.
+       * lightning/i386/fp-64.h (jit_movi_f): Fix.
+
+2010-07-26  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/i386/core-32.h (jit_replace): Move here (removed
+       2009-03-01).
+
+2010-07-19  Paolo Bonzini  <bonzini@gnu.org>
+
+       * build-aux/lightning.m4: Always set and replace lightning_frag.
+       * Makefile.in: Regenerate.
+       * aclocal.m4: Regenerate.
+       * config.h.in: Regenerate.
+       * configure: Regenerate.
+       * doc/Makefile.in: Regenerate.
+       * doc/lightningize.1: Regenerate.
+       * doc/version.texi: Regenerate.
+       * lightning/Makefile.in: Regenerate.
+       * opcode/Makefile.in: Regenerate.
+       * tests/Makefile.in: Regenerate.
+
+2009-03-01  Paolo Bonzini  <bonzini@gnu.org>
+
+        * lightning/i386/core-64.h: Use Mike's macros for x86-64 too.
+        * lightning/i386/core.h: Remove jit_replace.
+
+       2009-02-27  Mike Spivey  <mike@comlab.ox.ac.uk>
+
+        * lightning/i386/core.h: Rewrite shift-handling macros.
+        * lightning/fp-common.h: Fix jit_extr_{f_d,d_f}.
+
+2009-02-17  Mike Spivey  <mike@comlab.ox.ac.uk>
+
+       * lightning/i386/core.h: Fix blunder in operand order.
+
+2009-02-17  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/i386/fp-32.h: Another fix to jit_fp_btest.
+
+2009-02-17  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/fp-common.h: Define double branches if missing.
+       * lightning/i386/asm.h: Define JC and JNC mnemonics.
+       * lightning/i386/fp-32.h: Fix jit_fp_btest.  All reported
+       by Mike Spivey.
+
+2008-10-09  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/ppc/funcs.h (jit_flush_code): Subtract 1 from end.
+       Reported by Eli Barzilay and Matthew Flatt.
+
+2008-08-23  Nix  <nix@esperi.org.uk>
+
+       * lightning/i386/Makefile.frag: fp-32.h and fp-64.h are target files.
+
+2008-07-02  Laurent Michel  <ldm@engr.uconn.edu>
+
+       * lightning/ppc/funcs.h (jit_flush_code): modified the computation
+       of start/end. The pointer arithmetic was done without casting. It
+       prevented compilation with recent gcc versions. 
+       * lightning/ppc/core.h (jit_pushr_i): The offset for the store was
+       incorrect. Should have been 4 bytes below SP (not above).
+       * lightning/ppc/core.h (jit_popr_i): The offset for the load was 
+       incorrect. Should have been 0 (not +8). 
+
+2008-06-17  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/i386/asm-64.h: Forward IMULQir to IMULQirr,
+       fix REXQ order for IMULQirr.
+
+2008-06-17  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/i386/core.h: Fix _rN vs. _rR.
+
+2008-06-16  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/i386/core.h: Use jit_save in jit_replace.  Move JIT_R
+       definition...
+       * lightning/i386/core-32.h: ... here; define jit_save so that
+       the core.h has no effect on the 32-bit backend.
+       * lightning/i386/core-64.h: Place JIT_R1/JIT_R2 in R10/R11,
+       place outgoing arguments in the right spot from the beginning,
+       define jit_save, fix jit_reg8/jit_reg16.
+
+2008-06-15  Paolo Bonzini  <bonzini@gnu.org>
+
+        * lightning/i386/core-64.h: Rewrite argument passing to
+       support up to 6 arguments and generate less code.
+
+2008-06-14  Laurent Michel  <ldm@thorgal.homelinux.org>
+
+       * lightning/i386/core-64.h (jit_movi_l): When the operand is 0,
+       the XOR should be on a quadword.
+       * lightning/i386/core-64.h (jit_prolog): Keep 16-byte stack
+       alignment.
+       (jit_ret): Always use LEAVE.
+
+2008-06-13  Laurent Michel  <ldm@thorgal.homelinux.org>
+
+       * lightning/i386/core-64.h: Add (void) casts for C++ compatibility.
+       * lightning/i386/asm.h: Likewise.
+
+2008-06-12  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/i386/core.h: Move JIT_V definition...
+       * lightning/i386/core-32.h: ... here.
+       * lightning/i386/core-64.h: ... and here.  Avoid dancing between
+       RSI/RDI and R12/R13, and place JIT_V1/JIT_V2 in R12/R13.
+
+2008-06-11  Paolo Bonzini  <bonzini@gnu.org>
+
+       * build-aux/lightning.m4: Adjust LIGHTNING_BACKENDS, don't
+       use suffix support to distinguish i386/x86_64.
+       * lightning/i386/Makefile.frag: Use LIGHTNING_TARGET_FILES
+       to distribute *-32.h and *-64.h files now.
+       * lightning/i386/asm-i386: Moved to...
+       * lightning/i386/asm.h: Include the appropriate subtarget file.
+       * lightning/i386/core-i386: Moved to...
+       * lightning/i386/core.h: Include the appropriate subtarget file.
+       * lightning/i386/fp.h: New, include the appropriate subtarget file.
+       * lightning/i386/asm-32: Do not include asm-i386.h.
+       * lightning/i386/asm-64.h: Likewise.
+       * lightning/i386/core-32: Do not include core-i386.h.
+       * lightning/i386/core-64.h: Likewise.
+       * lightning/Makefile.am: Adjust for renamed files.
+
+       * configure.ac: Define LIGHTNING_TARGET here.
+       * opcode/disass.c: Change list of valid LIGHTNING_TARGET values.
+
+       * lightningize.in: Robustify against missing subtarget files.
+
+2008-06-11  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/i386/core-32.h: Use MOVLir instead of jit_movi_l
+       to implement jit_movi_p.
+
+2008-06-11  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/i386/core-32.h: Use separate __APPLE__ and SysV
+       prolog/ret macros.  Subtract 12 bytes in __APPLE__ case to
+       keep stack aligned, and always use LEAVE in the epilog.
+
+2008-06-11  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/i386/core-i386.h: Fix C++ incompatibility.
+
+2008-06-10  Laurent Michel  <ldm@engr.uconn.edu>
+
+       * lightning/i386/core-i386.h: Fix jit_replace8 for
+       case when one of the operands is _EAX.
+
+2008-05-19  Paolo Bonzini  <bonzini@gnu.org>
+
+       * tests/run-test: Avoid CRLF issues on mingw.
+
+2008-03-21  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/i386/core-64.h: Fix jit_{ld,st}{,x}i_{i,l}.
+       Remove jit_ld{,x}i_ul.
+       * lightning/core-common.h: Make jit_ld{,x}{i,r}_ul
+       always a synonym of the _l variant.
+       * doc/porting.texi: Document this.
+
+2008-03-19  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/i386/core-64.h: Fix uses of jit_qop_.
+
+2008-03-19  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/i386/core-64.h: Add boolean operations.
+
+2008-03-19  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/i386/asm-64.h: Add LEAQmr.
+
+2008-03-19  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/i386/core-64.h: Misc bugfixes.
+
+2008-03-19  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/i386/core-i386.c: Remove jit_ldr_i, jit_ldxr_i.
+       * lightning/i386/core-32.h: Add jit_ldr_i, jit_ldxr_i.
+       * lightning/i386/core-64.h: Add jit_ld{r,xr,i,xi}_{ui,l,ul};
+       move jit_ldr_i, jit_ldxr_i, jit_str_l, jit_stxr_l with others.
+
+2008-03-19  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/asm-common.h: Add _s32P.
+
+2008-03-19  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/i386/core-64.h: Implement long mul/div/mod.
+
+2008-03-19  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/i386/asm-i386.h: Cast memory address to long for JCCim.
+
+2008-03-15  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/asm-common.h: Add underscores around __unused__
+       attribute.
+
+2008-03-15  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/ppc/core.h: Avoid some "value computed is not used"
+       warnings.
+       * lightnings/tests/allocai.c: Silence other warnings.
+
+2008-03-14  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightningize.in: Fix some problems (not all).
+
+2008-03-14  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/i386/core-32.h: Avoid some "value computed is not used"
+       warnings; reported by Sam Steingold.
+
+2008-03-08  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/i386/core-32.h: Fix stxr_c(_EAX, _EBX, _ESI).
+
+2008-02-13  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/i386/asm-32.h: Avoid redefinition of _r1, reported by
+       Sam Steingold.
+       * lightning/i386/asm-64.h: Likewise.
+
+2008-02-08  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/i386/asm-i386.h: Don't define _VOID, reported
+       by Reini Urban.
+
+2008-02-03  Paolo Bonzini  <bonzini@gnu.org>
+
+       * build-aux/lightning.m4: Add --with-lightning-prefix option, suggested
+       by Sam Steingold.
+
+2008-01-14  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/i386/core-64.h: Use CALLsr, not CALLLsr.
+
+2008-01-13  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/i386/core-i386.h: Move jit_calli and jit_callr...
+       * lightning/i386/core-32.h: ... here.
+       * lightning/i386/core-64.h: Redefine them.
+
+2008-01-05  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/i386/fp-32.h: Fix sub(a,0,a).
+       * lightning/tests/3to2.c: Add new testcases.
+       * lightning/tests/3to2.ok: Add new testcases.
+
+2008-01-02  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/i386/fp-32.h: Fix sub(a,b,a) with a ~= JIT_FPR0.
+       * lightning/tests/3to2.c: New.
+       * lightning/tests/3to2.ok: New.
+
+2007-11-07  Paolo Bonzini  <bonzini@gnu.org>
+
+       * opcode/Makefile.am: Fix AM_CPPFLAGS.
+
+2007-08-12  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/i386/core-i386.h: Improve encoding of set* instructions.
+       * lightning/i386/core-64.h: Fix jit_bra_l.
+       * tests/sete.c: New.
+       * tests/sete.ok: New.
+
+2007-06-29  Paolo Bonzini  <bonzini@gnu.org>
+
+       * tests/bp.c: Upgrade to GPL/LGPLv3.
+       * lightning/i386/asm-32.h: Upgrade to GPL/LGPLv3.
+       * lightning/i386/asm-64.h: Upgrade to GPL/LGPLv3.
+       * lightning/i386/core-32.h: Upgrade to GPL/LGPLv3.
+       * lightning/i386/core-64.h: Upgrade to GPL/LGPLv3.
+       * lightning/i386/fp-64.h: Upgrade to GPL/LGPLv3.
+       * lightning/sparc/asm.h: Upgrade to GPL/LGPLv3.
+       * lightning/sparc/core.h: Upgrade to GPL/LGPLv3.
+       * lightning/sparc/fp.h: Upgrade to GPL/LGPLv3.
+       * lightning/sparc/funcs.h: Upgrade to GPL/LGPLv3.
+       * lightning/i386/asm-i386.h: Upgrade to GPL/LGPLv3.
+       * lightning/i386/core-i386.h: Upgrade to GPL/LGPLv3.
+       * lightning/i386/fp-32.h: Upgrade to GPL/LGPLv3.
+       * lightning/i386/funcs.h: Upgrade to GPL/LGPLv3.
+       * lightning/ppc/asm.h: Upgrade to GPL/LGPLv3.
+       * lightning/ppc/core.h: Upgrade to GPL/LGPLv3.
+       * lightning/ppc/fp.h: Upgrade to GPL/LGPLv3.
+       * lightning/ppc/funcs.h: Upgrade to GPL/LGPLv3.
+       * lightning.h: Upgrade to GPL/LGPLv3.
+       * tests/add.c: Upgrade to GPL/LGPLv3.
+       * tests/fib.c: Upgrade to GPL/LGPLv3.
+       * tests/testfp.c: Upgrade to GPL/LGPLv3.
+       * tests/fibdelay.c: Upgrade to GPL/LGPLv3.
+       * tests/fibit.c: Upgrade to GPL/LGPLv3.
+       * tests/funcfp.c: Upgrade to GPL/LGPLv3.
+       * tests/incr.c: Upgrade to GPL/LGPLv3.
+       * tests/printf.c: Upgrade to GPL/LGPLv3.
+       * tests/printf2.c: Upgrade to GPL/LGPLv3.
+       * tests/rpn.c: Upgrade to GPL/LGPLv3.
+       * tests/rpnfp.c: Upgrade to GPL/LGPLv3.
+       * lightning/asm-common.h: Upgrade to GPL/LGPLv3.
+       * lightning/core-common.h: Upgrade to GPL/LGPLv3.
+       * lightning/fp-common.h: Upgrade to GPL/LGPLv3.
+       * lightning/funcs-common.h: Upgrade to GPL/LGPLv3.
+       * opcode/dis-buf.c: Upgrade to GPL/LGPLv3.
+       * opcode/disass.c: Upgrade to GPL/LGPLv3.
+       * opcode/i386-dis.c: Upgrade to GPL/LGPLv3.
+       * opcode/sparc-dis.c: Upgrade to GPL/LGPLv3.
+       * opcode/sparc-opc.c: Upgrade to GPL/LGPLv3.
+       * lightningize.in: Upgrade to GPL/LGPLv3.
+       * opcode/bfd.h: Upgrade to GPL/LGPLv3.
+       * opcode/i386.h: Upgrade to GPL/LGPLv3.
+       * opcode/sparc.h: Upgrade to GPL/LGPLv3.
+
+2007-01-26  Thomas Girard  <thomas.g.girard@free.fr>
+
+       * lightning/Makefile.am: Add clean-local target.
+
+2006-12-02  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/i386/asm-i386.h: Add CVTTS?2SIL.
+       * lightning/i386/asm-64.h: Add CVTTS?2SIQ.
+       * lightning/i386/fp-64.h: Use it.
+
+       * lightning/Makefile.am: Place files in nodist_lightning_HEADERS.
+
+2006-11-23  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/core-common.h: Add casts in "*i_p" variants.
+       * lightning/i386/asm-32.h: Add _r1.
+       * lightning/i386/asm-64.h: Likewise, and add SSE instructions.
+       * lightning/i386/asm-i386.h: Merge SSE instructions from Gwenole.
+       Use short form for 16-bit AX instructions.  Remove _r1
+       * lightning/i386/core-64.h: Add FP ABI support in its infancy.
+       * lightning/i386/core-i386.h: Move jit_arg_f and jit_arg_d...
+       * lightning/i386/core-32.h: ... and jit_prepare_f and jit_prepare_d...
+       * lightning/i386/fp-32.h: ... here.
+       * lightning/i386/fp-64.h: Write the code.
+       * lightning/sparc/fp.h: Fix jit_extr_{f_d,d_f} register order.
+       
+2006-11-22  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/i386/asm-i386.h: Move x86-64 instructions...
+       * lightning/i386/asm-64.h: ... here.
+       * lightning/i386/fp-32.h: Fix bugfixes worked around in froofyJIT.
+       Add JIT_FPRET.
+       * lightning/sparc/fp.h: Likewise.
+       * lightning/ppc/fp.h: Likewise.
+       * lightning/fp-common.h: Adjust for JIT_FPRET.
+       * tests/funcfp.c: Adjust for JIT_FPRET.
+       * tests/rpnfp.c: Adjust for JIT_FPRET.
+
+2006-11-20  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/i386/asm-i386.h:  Add an underscore to macros without
+       a parameter.
+
+2006-11-20  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/i386/core-i386.h: Move jit_movip, jit_check8, jit_reg8,
+       jit_reg16, jit_movbrm...
+       * lightning/i386/core-32.h: ... here.
+       * lightning/i386/core-64.h: Redefine them.  Fix other bugs.
+
+       * tests/printf.c: Do not do a varargs call.
+
+2006-11-20  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/i386/asm-i386.h: Check in rewrite from Basilisk II.
+       * lightning/i386/asm-32.h: Adjust.
+       * lightning/i386/asm-64.h: Adjust.
+       * lightning/i386/fp-32.h: Adjust.
+
+       * lightning/i386/core-32.h: Adjust.  Add jit_{ld,ldx,st,stx}i*.
+       * lightning/i386/core-64.h: Adjust.  Add jit_{ld,ldx,st,stx}i*.
+       * lightning/i386/core-i386.h: Adjust. Remove these patterns.
+
+2006-11-20  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/i386/asm-i386.h: Merge 64-bit cleanliness changes from
+       mzscheme.
+       Add SSE.
+       * lightning/i386/asm-64.h: Likewise.
+
+2006-11-20  Paolo Bonzini  <bonzini@gnu.org>
+           Ludovic Courtes  <ludo@chbouib.org>
+
+       * lightning/i386/core-32.h: Disable jit_push and jit_pop if stack not
+       needed.
+       * lightning/i386/core-64.h: Disable jit_push and jit_pop if stack not
+       needed.
+       * lightning/sparc/core.h: Merge final implementation of jit_pushr and
+       jit_popr.
+       * lightning/ppc/core.h: Fix implementation of jit_pushr and jit_popr to
+       work (more or less) across function calls.
+
+       * tests/push-pop.c, tests/push-pop.ok: New test.
+       * tests/Makefile.am: Run it.
+
+2006-11-20  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/asm-common.h: Make 64-bit safe.
+       * lightning/i386/funcs.h: Make 64-bit safe.
+
+       * lightning/i386/asm-64.h: More merge from mzscheme.
+       * lightning/i386/asm-i386.h: More merge from mzscheme.
+       * lightning/i386/core-32.h: More merge from mzscheme.
+       * lightning/i386/core-64.h: More merge from mzscheme.
+       * lightning/i386/core-i386.h: More merge from mzscheme.
+
+       * tests/rpnfp.c, tests/testfp.c, tests/funcfp.c: Skip if no
+       floating-point support.
+
+2006-11-04  Paolo Bonzini  <bonzini@gnu.org>
+
+       * tests/rpn.c: Remove pushr/popr.
+
+2006-11-04  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/ppc/core.h: Implement jit_allocai, define JIT_FP to be R1.
+       * lightning/ppc/funcs.h: Store frame size into _jitl.  Store R1 before
+       the STMW, so that the offset is unchanged when we patch the STMW.
+       * lightning/i386/core.h: Define JIT_FP to be EBP.
+       * lightning/i386/core-32.h: Implement jit_allocai, put LEAVE in the
+       epilog if jit_allocai was used.
+       * lightning/i386/core-64.h: Implement jit_allocai, put LEAVE in the
+       epilog if jit_allocai was used.
+
+2006-11-04  Ludovic Courtes  <ludo@chbouib.org>
+
+       * lightning/sparc/core.h: Implement jit_allocai.
+       * tests/allocai.c: New.
+       * tests/Makefile.am: Point to new tests.
+
+2006-11-03  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/ppc/core.h: Fix jit_bms using BNE rather than BGT.
+       "AND." does signed comparisons.
+
+2006-10-31  Paolo Bonzini  <bonzini@gnu.org>
+
+       * doc/porting.texi: Rename JIT_FP to JIT_AP.
+       * lightning/core-common.h: Likewise.
+       * lightning/i386/core-i386.h: Likewise.
+       * lightning/fp-common.h: Provide default versions of jit_getarg_[fd].
+       * lightning/i386/fp-32.h: Don't provide jit_getarg_[fd].
+       * lightning/ppc/fp.h: Likewise.
+
+2006-10-31  Ludovic Courtes  <ludo@chbouib.org>
+
+        * doc/using.texi (The instruction set): Clarified the use of `JIT_RET' and
+        documented `jit_retval'.
+        * tests/ret.c (generate_function_proxy): After `jit_finish', use
+        `jit_retval_i' to move FUNC's return value into the correct register.
+
+2006-10-31  Paolo Bonzini  <bonzini@gnu.org>
+           Ludovic Courtes  <ludo@chbouib.org>
+
+       * tests/divi.c, tests/divi.ok, tests/movi.c, tests/movi.ok: New.
+       * tests/ldxi.c: Ensure large pointer is generated.
+       * tests/Makefile.am: Point to new tests.
+       * lightning.h: Include funcs-common.h before funcs.h.
+       * lightning/sparc/core.h: Fix bugs in modi/divi.
+
+2006-10-30  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/Makefile.am: Use "ln -sf".
+       * lightning/core-common.h: Define jit_negr_l if necessary.
+
+2006-10-30  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/i386/asm.h (MOVS*, MOVZ*): Use correct _r[124] macros.
+
+2006-10-29  Paolo Bonzini  <bonzini@gnu.org>
+
+       * configure.ac: Use lightning.m4 macros.
+       * lightning.m4: Refactor to use common code in configure.ac.  Move...
+       * build-aux/lightning.m4: ... here.
+       * lightningize.in: Support suffixes.
+       * opcode/disass.in: Adapt to changes in configure.ac.
+
+       * lightning/ppc/funcs.h: Use __APPLE__ instead of _CALL_DARWIN.
+       * lightning/i386/core-32.h: Likewise.
+
+2006-10-26  Paolo Bonzini  <bonzini@gnu.org>
+
+       * configure.ac: Fix compilation test.
+       * lightning/Makefile.am: Symlink LIGHTNING_TARGET_FILES in
+       non-distribution mode.
+       * lightning/i386/Makefile.frag: Use LIGHTNING_TARGET_FILES.
+
+2006-10-26  Paolo Bonzini  <bonzini@gnu.org>
+
+       * configure.ac: Subst cpu.
+       * lightning/core-common.h: Make tests pass on i386.
+       * lightning/i386/asm-32.h: Make tests pass on i386.
+       * lightning/i386/asm-64.h: Make tests pass on i386.
+       * lightning/i386/asm-i386.h: Make tests pass on i386.
+       * lightning/i386/core-32.h: Make tests pass on i386.
+       * lightning/i386/core-64.h: Make tests pass on i386.
+       * lightning/i386/core-i386.h: Make tests pass on i386.
+       * tests/Makefile.am: Include files from cpu directory.
+
+2006-10-26  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/i386/asm.h: Move to asm-i386.h
+       * lightning/i386/asm-32.h: New, from Matthew Flatt.
+       * lightning/i386/asm-64.h: New, from Matthew Flatt.
+       * lightning/i386/core.h: Move to core-i386.h
+       * lightning/i386/core-32.h: New, from Matthew Flatt.
+       * lightning/i386/core-64.h: New, from Matthew Flatt.
+       * lightning/i386/fp.h: Move to fp-32.h
+       * lightning/i386/fp-64.h: New, dummy.
+       * lightning/i386/Makefile.frag: New.
+       * lightning/Makefile.am: Support per-target Makefile fragments.
+       * configure.ac: Support per-target Makefile fragments and CPU suffixes.
+
+2006-10-16  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/i386/i386.h (jit_flush_code): Fix syntax error. :-(
+
+2006-07-06  Paolo Bonzini  <bonzini@gnu.org>
+           Ludovic Courtes  <ludovic.courtes@laas.fr>
+
+       * doc/using.texi: Clarify "Using autoconf" section
+       and rename it to "Bundling lightning"
+       * lightning.m4: Work also if lightning is not bundled.
+
+2006-07-06  Paolo Bonzini  <bonzini@gnu.org>
+           Ludovic Courtes  <ludovic.courtes@laas.fr>
+
+       * lightning/ppc/core.h (_jit_mod): Replace with...
+       (_jit_mod_big, _jit_mod_small): ... these.
+       (jit_modi_i, jit_modi_ui): Rewrite.
+       * tests/modi.c, tests/modi.ok: New tests.
+
+2006-05-18  Matthew Flatt  <mflatt@cs.utah.edu>
+
+       * lightning/i386/asm.h: Fix test for extending the mprotect area
+       towards lower addresses.
+
+2006-05-16  Bruno Haible  <bruno@clisp.org>
+
+       * lightning/asm-common.h: Don't use __func__ nor __FUNCTION__ if
+       not compiling with GNU C.
+
+2006-02-16  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/ppc/core.h: Fix jit_ldxi_* with big displacement.
+
+2006-01-23  Paolo Bonzini  <bonzini@gnu.org>
+
+       * configure.ac: Fix comments in config.h.in.
+       
+2005-11-25  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/sparc/fp.h: Fix header comment.
+       * lightning/ppc/fp.h: Fix header comment.
+
+2005-04-27  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/i386/asm.h (JCm, JCSm, JNCm, JNCSm): New.
+
+2004-11-26  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/ppc/funcs.h (_jit_epilog): Remove unused variable.
+
+2004-11-13  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/i386/funcs.h [__linux__]: Include sys/mman.h.
+
+2004-11-09  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/sparc/fp.h: Fix fp-to-integer conversions.
+       * lightning/ppc/testfp.c: Test fp-to-integer conversions
+       of integer numbers.
+       * lightning/ppc/testfp.ok: Adjust for the above.
+
+2004-11-08  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/ppc/testfp.c: Always flush code before
+       testing it.
+
+2004-11-08  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/ppc/fp.h: Do not clobber f31.
+
+2004-11-08  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning.h: New name of...
+       * lightning-inst.h: ... this file.
+       * lightning.h.in: Removed.
+
+       * opcodes/disass.c: Include config.h.
+       * tests/add.c: Include config.h.
+       * tests/bp.c: Include config.h.
+       * tests/fib.c: Include config.h.
+       * tests/fibdelay.c: Include config.h.
+       * tests/fibit.c: Include config.h.
+       * tests/funcfp.c: Include config.h.
+       * tests/incr.c: Include config.h.
+       * tests/printf.c: Include config.h.
+       * tests/printf2.c: Include config.h.
+       * tests/rpn.c: Include config.h.
+       * tests/rpnfp.c: Include config.h.
+       * tests/testfp.c: Include config.h.
+
+2004-10-12  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/i386/fp.h: Fix bugs in conditional branches.
+
+2004-10-10  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/i386/funcs.h: Fix pasto in jit_flush_code.
+
+2004-10-08  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/ppc/fp.h: Optimized conditional branches.
+
+2004-09-20  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/ppc/asm.h: Fix more typos.
+
+2004-09-20  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/ppc/asm.h: Fix typos, replace `26' with JIT_AUX.
+
+2004-09-20  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/ppc/fp.h: Added conditional branches.
+
+2004-09-18  Laurent Michel  <ldm@thorgal.homelinux.org>
+
+       * lightning/ppc/fp.h (jit_unler_d, jit_unltr_d, jit_unger_d,
+       jit_ungtr_d, jit_ltgt_d, jit_uneq_d): Implemented missing tests
+       to fully support testfp.
+       (jit_floorr_d_i, jit_ceilr_d_i, jit_roundr_d_i, jit_truncr_d_i):
+       New macros.
+       * lightning/ppc/asm.h: Added missing opcodes FCTIWZ and MTFSFI.
+       * lightning/ppc/funcs.h (_jit_prolog): Fixed minor mistake in
+       the initialization of _jitl.nextarg_geti, relying on the
+       JIT_AUX macro as well to get the register offset.
+
+2004-09-07  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/ppc/funcs.h: Fix typo.
+
+2004-09-06  Paolo Bonzini  <bonzini@gnu.org>
+
+       * tests/funcfp.c: Use %g.  Remove C99 variable declarations.
+       * tests/testfp.c: Don't use __builtin_nan.
+
+       * lightning/ppc/core.h: Add three V registers.
+       * lightning/ppc/funcs.h: Adjust.
+
+       * lightning/sparc/core.h: Some fixes related to FP argument passing.
+       Move R0 to %g2, use %o7 for JIT_BIG2.
+       * lightning/sparc/fp.h: Some fixes related to FP argument passing.
+
+2004-09-02  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/sparc/core.h: Add another V register,
+       move R0 to %o7.
+
+2004-07-15  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/i386/funcs.h: Implement jit_flush_code,
+       in order to support Fedora's exec-shield.
+
+2004-07-14  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/core-common.h: Add more jit_extr_*_* macros.
+       * lightning/doc/using.texi: Be clearer about the order
+       of arguments in jit_extr_*_*.
+       * lightning/doc/porting.texi: Add more jit_extr_*_* macros.
+       * lightning/i386/fp.h: Fix typo in jit_extr_i_d.
+
+2004-07-14  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/ppc/funcs.h: Adjust offset of LR into
+       stack frame if running under the Darwin ABI.
+
+2004-07-13  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/i386/fp.h: Rename jit_exti_d to jit_extr_i_d.
+
+2004-07-13  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/ppc/core.h: Fix thinko.
+
+       * lightning/i386/core.h: Fix jit_lti_ui.
+       * lightning/core-common.h: Add missing macros.
+
+       * lightning/ppc/fp.h: Rename jit_neg_* to jit_negr_*.
+       * lightning/i386/fp.h: Rename jit_neg_* to jit_negr_*.
+       * lightning/sparc/fp.h: Rename jit_neg_* to jit_negr_*.
+       * lightning/fp-common.h: Rename jit_neg_* to jit_negr_*.
+       * doc/porting.texi: Add undocumented macros.
+
+2004-07-12  Paolo Bonzini  <bonzini@gnu.org>
+
+       * doc/porting.texi: Add missing macros.
+
+2004-07-12  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/ppc/funcs.h: Don't generate trampolines.
+       Separate prolog and epilog generation.
+       * lightning/ppc/core.h: Generate epilog explicitly.
+       Don't reserve r31 anymore.
+       * lightning/core-common.h: Remove call to jit_setup_code.
+
+2004-07-09  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/lightning.h.in: Avoid preprocessor warnings.
+       * lightning/lightning-inst.h: Likewise.
+
+       * lightning/i386/core.h: Define JIT_R, JIT_R_NUM, JIT_V,
+       JIT_V_NUM.
+       * lightning/ppc/core.h: Likewise.
+       * lightning/sparc/core.h: Likewise.
+       * lightning/i386/fp.h: Define JIT_FPR, JIT_FPR_NUM.
+       * lightning/ppc/fp.h: Likewise.
+       * lightning/sparc/fp.h: Likewise.
+       * lightning/core-common.h: Define fixed register names.
+       * lightning/fp-common.h: Likewise for FP regs.
+
+2004-07-09  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/ppc/funcs.h: Fix location where return address
+       is stored.
+       * lightning/i386/asm.h: Add a trailing _ to opcodes without
+       any parameter.
+       * lightning/i386/core.h: Adjust for the above.
+
+2004-04-15  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/i386/fp.h: Change "and" to "_and"
+       to satisfy C++ compilers.
+
+2004-04-14  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/sparc/fp.h: Use memcpy to implement jit_movi.
+       * lightning/ppc/fp.h: Use memcpy to implement jit_movi.
+       Move floating-point opcodes...
+       * lightning/ppc/asm.h: ... here.
+
+2004-04-14  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/core-common.h: Add jit_finishr.
+       * lightning/ppc/core.h: Add jit_callr and jit_finishr.
+       * lightning/i386/core.h: Add jit_callr.
+       * lightning/sparc/core.h: Add jit_callr.  Fix typo.
+
+2004-04-14  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/i386/core.h: Fix pasto in jit_b*_ui.
+
+2004-03-30  Laurent Michel
+
+       * lightning/ppc: Implement PowerPC floating point
+       (ChangeLog entry missing).
+
+2004-03-12  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/fp-common.h: Load/store macros are not the
+       same for floats and doubles anywhere, but jit_retval may be.
+       * lightning/i386/asm.h: Fix = mistaken for == in ESCrri.
+       * lightning/i386/core.h: Fix typo in jit_prepare_[fd].
+       * lightning/i386/fp.h: Rewritten.
+       * tests/testfp.c: Add tests for unordered comparisons.
+       * tests/testfp.ok: Add results.
+
+2004-03-15  Paolo Bonzini  <bonzini@gnu.org>
+
+       Merge changes from Laurent Michel.
+
+       * lightning/asm-common.h: Add _jit_I_noinc.
+       * lightning/core-common.h: Support jit_init,
+       jit_setup_code, jit_patch_at.  Return patchable IP from
+       jit_movi_p.
+       * lightning/funcs-common.h: Provide defaults
+       for jit_setup_code, jit_start_pfx, jit_end_pfx
+       * lightning/i386/core.h: Add jit_patch_at, jit_patch_movi.
+       * lightning/ppc/core.h: Likewise.
+       * lightning/sparc/core.h: Likewise.
+       * lightning/ppc/asm.h: Fix generation of branch destination
+       displacements in _FB and _BB
+       * lightning/ppc/core.h: Generate trampolines in the user
+       area.
+       * lightning/ppc/funcs.h: Add a few casts.
+       * tests/bc.c: New testcase.
+
+       * lightning/i386/asm.h: Wrap into #ifndef LIGHTNING_DEBUG.
+       * lightning/ppc/asm.h: Wrap into #ifndef LIGHTNING_DEBUG.
+       * lightning/sparc/asm.h: Wrap into #ifndef LIGHTNING_DEBUG.
+
+
+2004-03-09  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/sparc/fp.h: Rewrite.  Move macros for
+       FP code generation...
+       * lightning/sparc/asm.h: ... here.
+       * lightning/sparc/core.h: Rename jit_prepare to
+       jit_prepare_i, jit_retval to jit_retval_i.
+       * lightning/ppc/core.h: Rename jit_prepare to
+       jit_prepare_i, jit_retval to jit_retval_i.
+       * lightning/i386/core.h: Rename jit_prepare to
+       jit_prepare_i, jit_retval to jit_retval_i.
+       * lightning/core-common.h: Provide backwards
+       compatible synonyms for the above.
+       * lightning/fp-common.h: Rewrite.
+       * lightning-inst.h: Include fp unconditionally.
+       * lightning.h.in: Include fp unconditionally.
+       * tests/Makefile.am: Enable fp tests.
+       * tests/fib.c: Use jit_retval_i.
+       * tests/fibit.c: Cast codeBuffer to char *.
+       * tests/funcfp.c: Use new fp macros.
+       * tests/printf.c: Use jit_retval_i.
+       * tests/rpnfp.c: Use new fp macros.
+       * tests/testfp.c: Use new fp macros.
+
+2004-03-02  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/i386/core.h: generate correct code when
+       doing lt/le/ge/etc. on ESI and EDI.  Use MOVZX/MOVSX
+       where possible.
+       * lightning/i386/asm.h: Add macros for MOVZX/MOVSX.
+       Move macros for x87 here, and add many of them.
+       * lightning/i386/fp.h: Use new macros for x87.
+
+2004-02-06  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/i386/core.h: avoid generating MOV reg, reg.
+       * lightning/sparc/core.h: fix several bugs.
+       * lightning/ppc/core.h: fix several bugs.
+       * tests/rpn.c: rewritten.
+
+2004-01-08  Paolo Bonzini  <bonzini@gnu.org>
+
+       * tests/rpnfp.c: new example, suggested by Basile
+       Starynkevitch.
+       * tests/rpnfp.ok: new example.
+
+2003-12-12  Paolo Bonzini  <bonzini@gnu.org>
+
+       * tests/add.c: new test, suggested by Steve Dekorte.
+       * tests/add.c: new test.
+
+2003-11-14  Paolo Bonzini  <bonzini@gnu.org>
+           John Redford <eirenik@hotmail.com>
+
+       * lightning/asm-common.h: change the 'pc' field of _jit to
+       be a union of various data types, because ISO C99 doesn't
+       permit using ++ on a = cast.  Change the incremented casts of
+       _jit.pc to be _jit.x.uc_pc, _jit.x.us_pc, etc.
+       * all files: change all non-cast instances of _jit.pc to be
+       _jit.x.pc.
+       * lightning/i386/core.h: remove casts from jit_might.
+
+2003-05-25  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/i386/core.h: use JITSORRY in jit_replace
+       * lightning/asm-common.h: define JITSORRY
+
+2003-05-14  Paolo Bonzini  <bonzini@gnu.org>
+
+       * lightning/i386/core.h: fix missing comma in several
+       load/store macros.
+       * lightning/core-common.h: fix long/unsigned long/pointer
+       jit_pushr/jit_popr.
+       * lightning/ppc/funcs.h: correctly align stack pointer
+
+No changelogs for the assemblers (lightning directory) until 1.0
+       
+2003-03-27  Paolo Bonzini  <bonzini@gnu.org>
+
+       * tests/printf2.c: new test
+
+2001-05-03  Paolo Bonzini  <bonzini@gnu.org>
+
+       * tests/printf.c: made the message platform independent
+
+2001-01-19  Paolo Bonzini  <bonzini@gnu.org>
+
+       * configure.in: support cross-assembling
+       
+       * disass/bfd.h, disass/dis-asm.h, disass/dis-buf.c,
+       disass/i386-dis.c, disass/i386.h, disass/ppc-dis.c,
+       disass/ppc.h, disass/ppc-opc.c, disass/sparc-dis.c,
+       disass/sparc.h, disass/sparc-opc.c: new files, from GDB
+
+       * disass/disass.c, disass/Makefile.am: new files
+
+       * tests/fib.c, tests/fibit.c, tests/incr.c, tests/printf.c,
+       tests/rpn.c, tests/testfp.c, tests/Makefile.am: support
+       disassembling
diff --git a/deps/lightning/Makefile.am b/deps/lightning/Makefile.am
new file mode 100644 (file)
index 0000000..c921901
--- /dev/null
@@ -0,0 +1,45 @@
+#
+# Copyright 2000, 2001, 2002, 2012-2019 Free Software Foundation, Inc.
+#
+# This file is part of GNU lightning.
+#
+# GNU lightning is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published
+# by the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GNU lightning is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+
+ACLOCAL_AMFLAGS = -I m4
+
+SUBDIRS =              \
+       check           \
+       doc             \
+       include         \
+       lib
+
+pkgconfiglibdir = $(libdir)/pkgconfig
+pkgconfiglib_DATA = lightning.pc
+
+if get_jit_size
+JIT_SIZE_PATH = "$(top_builddir)/jit_$(cpu)-sz.c"
+AM_CPPFLAGS=-DGET_JIT_SIZE=1 -DJIT_SIZE_PATH='$(JIT_SIZE_PATH)'
+AM_CFLAGS = -I$(top_srcdir)/include -D_GNU_SOURCE $(LIGHTNING_CFLAGS)
+
+noinst_PROGRAMS = size
+size_LDADD = $(top_builddir)/lib/liblightning.la -lm $(SHLIB)
+size_SOURCES = size.c
+
+get_jit_size:: $(JIT_SIZE_PATH)
+
+$(JIT_SIZE_PATH):
+       make clean
+       make check
+       $(top_builddir)/size
+
+CLEANFILES = $(JIT_SIZE_PATH)
+endif
diff --git a/deps/lightning/NEWS b/deps/lightning/NEWS
new file mode 100644 (file)
index 0000000..f56dd79
--- /dev/null
@@ -0,0 +1,199 @@
+NEWS FROM 1.99 TO 1.99a
+
+o   Lightning now builds and pass all test cases on AIX 7.1 powerpc,
+    HP-UX 11iv2 hppa, HP-UX 11iv3 ia64, Solaris 10 Sparc, Solaris 11
+    x86_64, and Irix 6.5.30 mips (using n32 abi).
+
+NEWS FROM VERSION 1.3 TO 1.99
+
+o   The 1.99 version is a major lightning redesign and an
+    alpha version.
+
+o   Unless for some special power users usage, the major
+    difference in the rework is that now function calls push
+    arguments from left to right, what is both, more natural for
+    programers, and also more natural to implement for architectures
+    that pass arguments in registers and have alignment constraints,
+    usually for 64 bit double arguments.
+
+o   Add mips backend, implementing the o32 abi.
+
+o   Added arm backend implementing all combinations of software float,
+    vfp, neon, arm and thumb instruction sets, softfp and hardp abis,
+    armv5, armv6, and armv7.
+
+o   Added sse2+ code generation for the 32 bit x86 backend.
+
+o   Added sse3 and sse4.x optional code generation for the 64 bit
+    x86 backend, code generation based on detected cpu.
+
+o   Reworked and added full lightning instruction set to ppc 32;
+    tested on ppc64 hardware and Darwin 32 operating system.
+
+o   Added ppc64 backend, built and tested on Fedora ppc.
+
+o   Reworked the sparc backend, built and tested on Debian sparc.
+
+o   Added an ia64 backend, built and tested on Debian ia64.
+
+o   Added an hppa backend, built and tested on Debian hppa.
+
+---
+
+NEWS FROM VERSION 1.2 TO 1.3
+
+o   Initial support for x86-64 back-end (mostly untested).
+
+o   lightning is more strict on casts from integer to pointer.
+    Be sure to use the _p variants when your immediates are
+    of pointer type.  This was done to ease 64-bit cleanliness
+    tests.
+
+o   Many bug fixes.
+
+o   JIT_FPRET is used as JIT_RET to move return values.
+    jit_retval_[fd] is used to retrieve return values.
+
+o   jit_pushr/jit_popr are deprecated, you need to #define
+    JIT_NEED_PUSH_POP prior to including lightning.h if you
+    want to use them.
+
+o   Support for stack-allocated variables.  Because of this,
+    backends defining JIT_FP should now rename it to JIT_AP.
+    JIT_FP is now a user-visible register used in ldxi/ldxr
+    to access stack-allocated variables.
+
+
+---
+
+NEWS FROM VERSION 1.1.2 TO 1.2
+
+o   Floating-point interface rewritten, uses a register file
+    architecture rather than a stack.
+
+o   Many bug fixes.
+
+o   jit_prepare and jit_retval are now jit_prepare_i and
+    jit_retval_i.
+
+o   Support for Fedora Core 1's exec-shield feature.
+
+o   PPC supports both SysV and Darwin ABIs.
+
+o   More (and more complete) examples provided.
+
+---
+
+NEWS FROM VERSION 1.1.1 TO 1.1.2
+
+o   This release fixes the bugs in PowerPC cache flushing and in
+    SPARC testing.
+
+---
+
+NEWS FROM VERSION 1.1 TO 1.1.1
+
+o   Merge changes from Debian
+
+This version was released to have a distributable version of lightning
+after the recent crack of the GNU FTP machines.  It does not fix
+outstanding bugs; I apologize for the inconvenience.
+
+---
+
+NEWS FROM VERSION 1.0 TO 1.1
+
+o   Several bug fixes
+
+o   improved infrastructure for embedding GNU lightning (lightningize
+    script)
+    
+---
+
+NEWS FROM VERSION 0.99 TO 1.0
+
+o   SPARC backend tested on GNU Smalltalk
+
+
+---
+
+NEWS FROM VERSION 0.98 TO 0.99
+
+o   Added floating point function support (thanks to Laurent Michel);
+    unfortunately this broke even more the PPC and SPARC floating point
+    stuff :-(
+
+---
+
+NEWS FROM VERSION 0.97 to 0.98
+
+o   PPC backend tested on GNU Smalltalk
+
+o   switched to autoconf 2.50
+
+o   new (much faster) PPC cache flushing code by John McIntosh
+
+---
+
+NEWS FROM VERSION 0.96 to 0.97
+
+o   support for cross-assembling and for disassembling the code that the tests
+    generate
+
+o   PPC microtests pass (tested directly by me), SPARC was said to work
+
+---
+
+NEWS FROM VERSION 0.95 to 0.96
+
+o   fixed implementation of delay slots to be coherent with the manual
+
+---
+
+NEWS FROM VERSION 0.94 to 0.95
+
+o   adc/sbc replaced with addc/addx/subc/subx to allow for more optimization
+    (inspired by the PPC instruction set).
+
+o   A few fixes and much less warnings from the compiler
+
+o   Automake-ized everything
+
+o   i386 backend generates smaller code for bms/bmc/or/xor by using byte
+    or word versions if possible
+
+o   Moved backends to separate directories
+
+---
+
+NEWS FROM VERSION 0.93 to 0.94
+
+o   Manual builds as DVI file.
+
+---
+
+NEWS FROM VERSION 0.92 to 0.93
+
+o   Floating-point front-end (began supporting PPC & SPARC).
+
+---
+
+NEWS FROM VERSION 0.91 to 0.92
+
+o   Floating-point front-end (only x86 supported).
+
+---
+
+NEWS FROM VERSION 0.9 to 0.91
+
+o   Carrying supported in addition/subtraction.
+
+o   insn type changed to jit_insn.
+
+o   Misc bug fixes.
+
+o   Reentrancy supported.
+
+o   SPARC run-time assembler rewritten.
+
+o   The run-time assembler can be disabled for debugging purposes.
diff --git a/deps/lightning/README b/deps/lightning/README
new file mode 100644 (file)
index 0000000..ae36ea5
--- /dev/null
@@ -0,0 +1,3 @@
+GNU lightning is a library to aid in making portable programs
+that compile assembly code at run time.  For more information,
+look at the info documentation.
diff --git a/deps/lightning/THANKS b/deps/lightning/THANKS
new file mode 100644 (file)
index 0000000..0e0f1a9
--- /dev/null
@@ -0,0 +1,21 @@
+Thanks to all the following people for their help in
+improving GNU lightning:
+
+Paolo Bonzini                   <bonzini@gnu.org>
+Eli Barzilay                    <eli@barzilay.org>
+Ludovic Courtes                 <ludo@chbouib.org>
+Matthew Flatt                  <mflatt@cs.utah.edu>
+Laurent Michel                  <ldm@thorgal.homelinux.org>
+Paulo Cesar Pereira de Andrade  <pcpa@gnu.org>
+Mike Spivey                     <mike@comlab.ox.ac.uk>
+Basile Starynkevitch            <basile@starynkevitch.net>
+Sam Steingold                  <sds@gnu.org>
+Jens Troeger                    <savage@light-speed.de>
+Tom Tromey                      <tromey@redhat.com>
+Trent Nelson                    <trent@snakebite.org>
+Vitaly Magerya                  <vmagerya@gmail.com>
+Brandon Invergo                 <brandon@gnu.org>
+Holger Hans Peter Freyther      <holger@moiji-mobile.com>
+Jon Arintok                     <jon.arintok@gmail.com>
+Bruno Haible                    <bruno@clisp.org>
+Marc Nieper-Wißkirchen                <marc@nieper-wisskirchen.de>
diff --git a/deps/lightning/TODO b/deps/lightning/TODO
new file mode 100644 (file)
index 0000000..676af02
--- /dev/null
@@ -0,0 +1,28 @@
+       * Validate that divrem in jit_x86-cpu.c is not modifying
+       the non result arguments. This is not verified by clobber.tst,
+       as it only checks registers not involved in the operation
+       (because it does not know about values being set as input
+       for the the operation).
+
+       * Write a simple higher level language implementation generating
+       jit with lightning, that could be some lisp or C like language.
+
+       * rerun ./configure --enable-devel-get-jit-size and regenerate
+       the related jit_$arch-sz.c for the ports where nodata is
+       meaningful:
+       hppa            (done)
+       i586            (done)
+       ia64
+       mips o32        (done)
+       mips n32
+       mips n64
+       powerpc 32      (done)
+       powerpc 64      (done)
+       ppc
+       s390x           (done)
+       sparc           (done)
+       x86_64          (done)
+       Missing ones are due to no longer (remote) access to such hosts
+       and may be broken with jit_set_data(..., JIT_DISABLE_DATA).
+       (ia64 hp-ux or linx), (irix mips for 32 or 64 abi), and
+       (darwin ppc).
diff --git a/deps/lightning/check/3to2.ok b/deps/lightning/check/3to2.ok
new file mode 100644 (file)
index 0000000..de2c040
--- /dev/null
@@ -0,0 +1,22 @@
+0
+1
+1
+1
+0
+1
+1
+1
+0
+1
+1
+0
+1
+1
+1
+0
+1
+1
+1
+0
+1
+1
diff --git a/deps/lightning/check/3to2.tst b/deps/lightning/check/3to2.tst
new file mode 100644 (file)
index 0000000..563cf85
--- /dev/null
@@ -0,0 +1,118 @@
+.data  32
+dfmt:
+.c     "%1.0f\n"
+ifmt:
+.c     "%d\n"
+
+.code
+       jmpi main
+
+#define def_test_double(a, b, c)               \
+       name test_double_##a##_##b##_##c        \
+test_double_##a##_##b##_##c:                   \
+       prolog                                  \
+       arg_d $d0                               \
+       arg_d $d1                               \
+       getarg_d %b $d0                         \
+       getarg_d %c $d1                         \
+       subr_d %a %b %c                         \
+       retr_d %a                               \
+       epilog
+#define test_double(a, b, c, x, y)             \
+       prepare                                 \
+               pushargi_d x                    \
+               pushargi_d y                    \
+       finishi test_double_##a##_##b##_##c     \
+       retval_d %f0                            \
+       prepare                                 \
+               pushargi dfmt                   \
+               ellipsis                        \
+               pushargr_d %f0                  \
+       finishi @printf
+
+#define def_test_int(a, b, c)                  \
+       name test_int_##a##_##b##_##c           \
+test_int_##a##_##b##_##c:                      \
+       prolog                                  \
+       arg $i0                                 \
+       arg $i1                                 \
+       getarg %b $i0                           \
+       getarg %c $i1                           \
+       subr %a %b %c                           \
+       retr %a                                 \
+       epilog
+#define test_int(a, b, c, x, y)                        \
+       prepare                                 \
+               pushargi x                      \
+               pushargi y                      \
+       finishi test_int_##a##_##b##_##c        \
+       retval %r0                              \
+       prepare                                 \
+               pushargi ifmt                   \
+               ellipsis                        \
+               pushargr %r0                    \
+       finishi @printf
+
+def_test_double(f0, f0, f0)
+def_test_double(f0, f0, f1)
+def_test_double(f0, f1, f0)
+def_test_double(f0, f1, f2)
+
+def_test_double(f3, f3, f3)
+def_test_double(f3, f3, f1)
+def_test_double(f3, f1, f3)
+def_test_double(f3, f1, f2)
+
+def_test_double(f3, f0, f0)
+def_test_double(f3, f0, f3)
+def_test_double(f3, f3, f0)
+
+def_test_int(r0, r0, r0)
+def_test_int(r0, r0, r1)
+def_test_int(r0, r1, r0)
+def_test_int(r0, r1, r2)
+
+def_test_int(v0, v0, v0)
+def_test_int(v0, v0, r1)
+def_test_int(v0, r1, v0)
+def_test_int(v0, r1, r2)
+
+def_test_int(v0, r0, r0)
+def_test_int(v0, r0, v0)
+def_test_int(v0, v0, r0)
+
+
+       name main
+main:
+       prolog
+
+       test_double(f0, f0, f0, 3.0, 2.0)
+       test_double(f0, f0, f1, 3.0, 2.0)
+       test_double(f0, f1, f0, 3.0, 2.0)
+       test_double(f0, f1, f2, 3.0, 2.0)
+
+       test_double(f3, f3, f3, 3.0, 2.0)
+       test_double(f3, f3, f1, 3.0, 2.0)
+       test_double(f3, f1, f3, 3.0, 2.0)
+       test_double(f3, f1, f2, 3.0, 2.0)
+
+       test_double(f3, f0, f0, 3.0, 2.0)
+       test_double(f3, f0, f3, 3.0, 2.0)
+       test_double(f3, f3, f0, 3.0, 2.0)
+
+       test_int(r0, r0, r0, 3, 2)
+       test_int(r0, r0, r1, 3, 2)
+       test_int(r0, r1, r0, 3, 2)
+       test_int(r0, r1, r2, 3, 2)
+
+       test_int(v0, v0, v0, 3, 2)
+       test_int(v0, v0, r1, 3, 2)
+       test_int(v0, r1, v0, 3, 2)
+       test_int(v0, r1, r2, 3, 2)
+
+       test_int(v0, r0, r0, 3, 2)
+       test_int(v0, r0, v0, 3, 2)
+       test_int(v0, v0, r0, 3, 2)
+
+       ret
+       epilog
diff --git a/deps/lightning/check/Makefile.am b/deps/lightning/check/Makefile.am
new file mode 100644 (file)
index 0000000..e04f7ac
--- /dev/null
@@ -0,0 +1,318 @@
+#
+# Copyright 2012-2019 Free Software Foundation, Inc.
+#
+# This file is part of GNU lightning.
+#
+# GNU lightning is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published
+# by the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GNU lightning is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+
+AM_CFLAGS = -I$(top_srcdir)/include -D_GNU_SOURCE
+
+check_PROGRAMS = lightning ccall self setcode nodata ctramp carg cva_list
+
+lightning_LDADD = $(top_builddir)/lib/liblightning.la -lm $(SHLIB)
+lightning_SOURCES = lightning.c
+
+ccall_LDADD = $(top_builddir)/lib/liblightning.la -lm $(SHLIB)
+ccall_SOURCES = ccall.c
+
+self_LDADD = $(top_builddir)/lib/liblightning.la -lm $(SHLIB)
+self_SOURCES = self.c
+
+setcode_LDADD = $(top_builddir)/lib/liblightning.la -lm $(SHLIB)
+setcode_SOURCES = setcode.c
+
+nodata_LDADD = $(top_builddir)/lib/liblightning.la -lm $(SHLIB)
+nodata_SOURCES = nodata.c
+
+ctramp_LDADD = $(top_builddir)/lib/liblightning.la -lm $(SHLIB)
+ctramp_SOURCES = ctramp.c
+
+carg_LDADD = $(top_builddir)/lib/liblightning.la -lm $(SHLIB)
+carg_SOURCES = carg.c
+
+cva_list_LDADD = $(top_builddir)/lib/liblightning.la -lm $(SHLIB)
+cva_list_SOURCES = cva_list.c
+
+$(top_builddir)/lib/liblightning.la:
+       cd $(top_builddir)/lib; $(MAKE) $(AM_MAKEFLAGS) liblightning.la
+
+EXTRA_DIST =                           \
+       3to2.tst        3to2.ok         \
+       add.tst         add.ok          \
+       align.tst       align.ok        \
+       allocai.tst     allocai.ok      \
+       allocar.tst     allocar.ok      \
+       bp.tst          bp.ok           \
+       divi.tst        divi.ok         \
+       fib.tst         fib.ok          \
+       rpn.tst         rpn.ok          \
+       ldst.inc                        \
+       ldstr.tst       ldstr.ok        \
+       ldsti.tst       ldsti.ok        \
+       ldstxr.tst      ldstxr.ok       \
+       ldstxi.tst      ldstxi.ok       \
+       ldstr-c.tst     ldstr-c.ok      \
+       ldstxr-c.tst    ldstxr-c.ok     \
+       ldstxi-c.tst    ldstxi-c.ok     \
+       cvt.tst         cvt.ok          \
+       hton.tst        hton.ok         \
+       branch.tst      branch.ok       \
+       alu.inc                         \
+       alu_add.tst     alu_add.ok      \
+       alux_add.tst    alux_add.ok     \
+       alu_sub.tst     alu_sub.ok      \
+       alux_sub.tst    alux_sub.ok     \
+       alu_rsb.tst     alu_rsb.ok      \
+       alu_mul.tst     alu_mul.ok      \
+       alu_div.tst     alu_div.ok      \
+       alu_rem.tst     alu_rem.ok      \
+       alu_and.tst     alu_and.ok      \
+       alu_or.tst      alu_or.ok       \
+       alu_xor.tst     alu_xor.ok      \
+       alu_lsh.tst     alu_lsh.ok      \
+       alu_rsh.tst     alu_rsh.ok      \
+       alu_com.tst     alu_com.ok      \
+       alu_neg.tst     alu_neg.ok      \
+       fop_abs.tst     fop_abs.ok      \
+       fop_sqrt.tst    fop_sqrt.ok     \
+       varargs.tst     varargs.ok      \
+       stack.tst       stack.ok        \
+       clobber.tst     clobber.ok      \
+       carry.tst       carry.ok        \
+       call.tst        call.ok         \
+       float.tst       float.ok        \
+       jmpr.tst        jmpr.ok         \
+       put.tst         put.ok          \
+       qalu.inc                        \
+       qalu_mul.tst    qalu_mul.ok     \
+       qalu_div.tst    qalu_div.ok     \
+       range.tst       range.ok        \
+       ranger.tst      ranger.ok       \
+       ret.tst         ret.ok          \
+       tramp.tst       tramp.ok        \
+       va_list.tst     va_list.ok      \
+       check.sh                        \
+       check.x87.sh                    \
+       check.arm.sh    check.swf.sh    \
+       check.arm.swf.sh                \
+       check.arm4.swf.sh               \
+       check.nodata.sh                 \
+       check.x87.nodata.sh             \
+       run-test        all.tst
+
+base_TESTS =                           \
+       3to2 add align allocai          \
+       allocar bp divi fib rpn         \
+       ldstr ldsti                     \
+       ldstxr ldstxi                   \
+       ldstr-c ldstxr-c ldstxi-c       \
+       cvt hton branch                 \
+       alu_add alux_add                \
+       alu_sub alux_sub alu_rsb        \
+       alu_mul alu_div alu_rem         \
+       alu_and alu_or alu_xor          \
+       alu_lsh alu_rsh                 \
+       alu_com alu_neg                 \
+       fop_abs fop_sqrt                \
+       varargs stack                   \
+       clobber carry call              \
+       float jmpr put                  \
+       qalu_mul qalu_div               \
+       range ranger ret tramp          \
+       va_list
+
+$(base_TESTS): check.sh
+       $(LN_S) $(srcdir)/check.sh $@
+
+TESTS = $(base_TESTS)
+
+if test_x86_x87
+#x87_TESTS = $(addsuffix .x87, $(base_TESTS))
+x87_TESTS =                                    \
+       3to2.x87 add.x87 allocai.x87            \
+       allocar.x87 bp.x87 divi.x87 fib.x87     \
+       rpn.x87 ldstr.x87 ldsti.x87             \
+       ldstxr.x87 ldstxi.x87                   \
+       ldstr-c.x87 ldstxr-c.x87 ldstxi-c.x87   \
+       cvt.x87 branch.x87                      \
+       alu_add.x87 alux_add.x87                \
+       alu_sub.x87 alux_sub.x87 alu_rsb.x87    \
+       alu_mul.x87 alu_div.x87 alu_rem.x87     \
+       alu_and.x87 alu_or.x87 alu_xor.x87      \
+       alu_lsh.x87 alu_rsh.x87                 \
+       alu_com.x87 alu_neg.x87                 \
+       fop_abs.x87 fop_sqrt.x87                \
+       varargs.x87 stack.x87                   \
+       clobber.x87 carry.x87 call.x87          \
+       float.x87 jmpr.x87 put.x87              \
+       va_list.x87
+$(x87_TESTS):  check.x87.sh
+       $(LN_S) $(srcdir)/check.x87.sh $@
+TESTS += $(x87_TESTS)
+
+#x87_nodata_TESTS = $(addsuffix .x87.nodata, $(base_TESTS))
+x87_nodata_TESTS =                                                     \
+       3to2.x87.nodata add.x87.nodata allocai.x87.nodata               \
+       allocar.x87.nodata bp.x87.nodata divi.x87.nodata fib.x87.nodata \
+       rpn.x87.nodata ldstr.x87.nodata ldsti.x87.nodata                \
+       ldstxr.x87.nodata ldstxi.x87.nodata                             \
+       ldstr-c.x87.nodata ldstxr-c.x87.nodata ldstxi-c.x87.nodata      \
+       cvt.x87.nodata branch.x87.nodata                                \
+       alu_add.x87.nodata alux_add.x87.nodata                          \
+       alu_sub.x87.nodata alux_sub.x87.nodata alu_rsb.x87.nodata       \
+       alu_mul.x87.nodata alu_div.x87.nodata alu_rem.x87.nodata        \
+       alu_and.x87.nodata alu_or.x87.nodata alu_xor.x87.nodata         \
+       alu_lsh.x87.nodata alu_rsh.x87.nodata                           \
+       alu_com.x87.nodata alu_neg.x87.nodata                           \
+       fop_abs.x87.nodata fop_sqrt.x87.nodata                          \
+       varargs.x87.nodata stack.x87.nodata                             \
+       clobber.x87.nodata carry.x87.nodata call.x87.nodata             \
+       float.x87.nodata jmpr.x87.nodata put.x87.nodata                 \
+       va_list.x87.nodata
+$(x87_nodata_TESTS):   check.x87.nodata.sh
+       $(LN_S) $(srcdir)/check.x87.nodata.sh $@
+TESTS += $(x87_nodata_TESTS)
+endif
+
+if test_arm_arm
+#arm_TESTS = $(addsuffix .arm, $(base_TESTS))
+arm_TESTS =                                    \
+       3to2.arm add.arm align.arm allocai.arm  \
+       allocar.arm bp.arm divi.arm fib.arm     \
+       rpn.arm ldstr.arm ldsti.arm             \
+       ldstxr.arm ldstxi.arm                   \
+       ldstr-c.arm ldstxr-c.arm ldstxi-c.arm   \
+       cvt.arm hton.arm branch.arm             \
+       alu_add.arm alux_add.arm                \
+       alu_sub.arm alux_sub.arm alu_rsb.arm    \
+       alu_mul.arm alu_div.arm alu_rem.arm     \
+       alu_and.arm alu_or.arm alu_xor.arm      \
+       alu_lsh.arm alu_rsh.arm                 \
+       alu_com.arm alu_neg.arm                 \
+       fop_abs.arm fop_sqrt.arm                \
+       varargs.arm stack.arm                   \
+       clobber.arm carry.arm call.arm          \
+       float.arm jmpr.arm tramp.arm range.arm  \
+       ranger.arm put.arm va_list.arm
+$(arm_TESTS):  check.arm.sh
+       $(LN_S) $(srcdir)/check.arm.sh $@
+TESTS += $(arm_TESTS)
+endif
+
+if test_arm_swf
+#swf_TESTS = $(addsuffix .swf, $(base_TESTS))
+swf_TESTS =                                    \
+       3to2.swf add.swf allocai.swf            \
+       allocar.swf bp.swf divi.swf fib.swf     \
+       rpn.swf ldstr.swf ldsti.swf             \
+       ldstxr.swf ldstxi.swf                   \
+       ldstr-c.swf ldstxr-c.swf ldstxi-c.swf   \
+       cvt.swf hton.swf branch.swf             \
+       alu_add.swf alux_add.swf                \
+       alu_sub.swf alux_sub.swf alu_rsb.swf    \
+       alu_mul.swf alu_div.swf alu_rem.swf     \
+       alu_and.swf alu_or.swf alu_xor.swf      \
+       alu_lsh.swf alu_rsh.swf                 \
+       alu_com.swf alu_neg.swf                 \
+       fop_abs.swf fop_sqrt.swf                \
+       varargs.swf stack.swf                   \
+       clobber.swf carry.swf call.swf          \
+       float.swf jmpr.swf tramp.swf range.swf  \
+       ranger.swf put.swf va_list.swf
+$(swf_TESTS):  check.swf.sh
+       $(LN_S) $(srcdir)/check.swf.sh $@
+TESTS += $(swf_TESTS)
+if test_arm_arm
+#arm_swf_TESTS = $(addsuffix .arm.swf, $(base_TESTS))
+arm_swf_TESTS =                                                        \
+       3to2.arm.swf add.arm.swf allocai.arm.swf                \
+       allocar.arm.swf bp.arm.swf divi.arm.swf fib.arm.swf     \
+       rpn.arm.swf ldstr.arm.swf ldsti.arm.swf                 \
+       ldstxr.arm.swf ldstxi.arm.swf                           \
+       ldstr-c.arm.swf ldstxr-c.arm.swf ldstxi-c.arm.swf       \
+       cvt.arm.swf hton.arm.swf branch.arm.swf                 \
+       alu_add.arm.swf alux_add.arm.swf                        \
+       alu_sub.arm.swf alux_sub.arm.swf alu_rsb.arm.swf        \
+       alu_mul.arm.swf alu_div.arm.swf alu_rem.arm.swf         \
+       alu_and.arm.swf alu_or.arm.swf alu_xor.arm.swf          \
+       alu_lsh.arm.swf alu_rsh.arm.swf                         \
+       alu_com.arm.swf alu_neg.arm.swf                         \
+       fop_abs.arm.swf fop_sqrt.arm.swf                        \
+       varargs.arm.swf stack.arm.swf                           \
+       clobber.arm.swf carry.arm.swf call.arm.swf              \
+       float.arm.swf jmpr.arm.swf tramp.arm.swf range.arm.swf  \
+       ranger.arm.swf put.arm.swf va_list.arm.swf
+$(arm_swf_TESTS):      check.arm.swf.sh
+       $(LN_S) $(srcdir)/check.arm.swf.sh $@
+TESTS += $(arm_swf_TESTS)
+endif
+if test_arm_arm
+#arm4_swf_TESTS = $(addsuffix .arm4.swf, $(base_TESTS))
+arm4_swf_TESTS =                                               \
+       3to2.arm4.swf add.arm4.swf allocai.arm4.swf             \
+       allocar.arm4.swf bp.arm4.swf divi.arm4.swf fib.arm4.swf \
+       rpn.arm4.swf ldstr.arm4.swf ldsti.arm4.swf              \
+       ldstxr.arm4.swf ldstxi.arm4.swf                         \
+       ldstr-c.arm4.swf ldstxr-c.arm4.swf ldstxi-c.arm4.swf    \
+       cvt.arm4.swf hton.arm4.swf branch.arm4.swf              \
+       alu_add.arm4.swf alux_add.arm4.swf                      \
+       alu_sub.arm4.swf alux_sub.arm4.swf alu_rsb.arm4.swf     \
+       alu_mul.arm4.swf alu_div.arm4.swf alu_rem.arm4.swf      \
+       alu_and.arm4.swf alu_or.arm4.swf alu_xor.arm4.swf       \
+       alu_lsh.arm4.swf alu_rsh.arm4.swf                       \
+       alu_com.arm4.swf alu_neg.arm4.swf                       \
+       fop_abs.arm4.swf fop_sqrt.arm4.swf                      \
+       varargs.arm4.swf stack.arm4.swf                         \
+       clobber.arm4.swf carry.arm4.swf call.arm4.swf           \
+       float.arm4.swf jmpr.arm4.swf tramp.arm4.swf             \
+       range.arm4.swf ranger.arm4.swf put.arm4.swf             \
+       va_list.arm4.swf
+$(arm4_swf_TESTS):     check.arm4.swf.sh
+       $(LN_S) $(srcdir)/check.arm4.swf.sh $@
+TESTS += $(arm4_swf_TESTS)
+endif
+endif
+
+if test_nodata
+#nodata_TESTS = $(addsuffix .nodata, $(base_TESTS))
+nodata_TESTS =                                         \
+       3to2.nodata add.nodata allocai.nodata           \
+       allocar.nodata bp.nodata divi.nodata fib.nodata \
+       rpn.nodata ldstr.nodata ldsti.nodata            \
+       ldstxr.nodata ldstxi.nodata                     \
+       ldstr-c.nodata ldstxr-c.nodata ldstxi-c.nodata  \
+       cvt.nodata branch.nodata                        \
+       alu_add.nodata alux_add.nodata                  \
+       alu_sub.nodata alux_sub.nodata alu_rsb.nodata   \
+       alu_mul.nodata alu_div.nodata alu_rem.nodata    \
+       alu_and.nodata alu_or.nodata alu_xor.nodata     \
+       alu_lsh.nodata alu_rsh.nodata                   \
+       alu_com.nodata alu_neg.nodata                   \
+       fop_abs.nodata fop_sqrt.nodata                  \
+       varargs.nodata stack.nodata                     \
+       clobber.nodata carry.nodata call.nodata         \
+       float.nodata jmpr.nodata tramp.nodata           \
+       range.nodata ranger.nodata put.nodata           \
+       va_list.nodata
+$(nodata_TESTS):       check.nodata.sh
+       $(LN_S) $(srcdir)/check.nodata.sh $@
+TESTS += $(nodata_TESTS)
+endif
+
+TESTS += ccall self setcode nodata ctramp carg cva_list
+CLEANFILES = $(TESTS)
+
+#TESTS_ENVIRONMENT=$(srcdir)/run-test;
+
+debug:         lightning
+       $(LIBTOOL) --mode=execute gdb lightning
+
diff --git a/deps/lightning/check/add.ok b/deps/lightning/check/add.ok
new file mode 100644 (file)
index 0000000..f5f322c
--- /dev/null
@@ -0,0 +1 @@
+5 + 4 = 9
diff --git a/deps/lightning/check/add.tst b/deps/lightning/check/add.tst
new file mode 100644 (file)
index 0000000..9fc054d
--- /dev/null
@@ -0,0 +1,35 @@
+.data  32
+fmt:
+.c     "%d + %d = %d\n"
+
+.code
+       jmpi main
+
+       name test
+test:
+       prolog
+       arg $i0
+       arg $i1
+       getarg %r0 $i0
+       getarg %r1 $i1
+       addr %r0 %r0 %r1
+       retr %r0
+       epilog
+
+       name main
+main:
+       prolog
+       prepare
+               pushargi 5
+               pushargi 4
+       finishi test
+       retval %r0
+       prepare
+               pushargi fmt
+               ellipsis
+               pushargi 5
+               pushargi 4
+               pushargr %r0
+       finishi @printf
+       ret
+       epilog
diff --git a/deps/lightning/check/align.ok b/deps/lightning/check/align.ok
new file mode 100644 (file)
index 0000000..f599e28
--- /dev/null
@@ -0,0 +1 @@
+10
diff --git a/deps/lightning/check/align.tst b/deps/lightning/check/align.tst
new file mode 100644 (file)
index 0000000..5d5348c
--- /dev/null
@@ -0,0 +1,28 @@
+.data  32
+fmt:
+.c     "%d\n"
+.code
+       prolog
+       movi %r0 1
+       jmpi L1                         /* should not generate this */
+       align $(__WORDSIZE / 8)         /* possible nops */
+L1:
+       bgei L4 %r0 10
+       addi %r0 %r0 1
+       jmpi L2
+       movr %r1 %r0                    /* to force jump generation */
+       align $(__WORDSIZE / 8)         /* possible nops */
+L2:
+       bgti L4 %r0 10                  /* never executed */
+       align $(__WORDSIZE / 8)         /* possible nops */
+L3:
+       jmpi L1
+       align $(__WORDSIZE / 8)         /* possible nops */
+L4:
+       prepare
+               pushargi fmt
+               ellipsis
+               pushargr %r0
+       finishi @printf
+       ret
+       epilog
diff --git a/deps/lightning/check/all.tst b/deps/lightning/check/all.tst
new file mode 100644 (file)
index 0000000..ac4fc97
--- /dev/null
@@ -0,0 +1,415 @@
+.disasm                // only disassemble
+.code
+       prolog
+       allocai 32 $buf
+       arg $c
+       arg $uc
+       arg $s
+       arg $us
+       arg $i
+#if __WORDSIZE == 64
+       arg $ui
+       arg $l
+#endif
+       getarg_c %r0 $c
+       getarg_uc %r0 $uc
+       getarg_s %r0 $s
+       getarg_us %r0 $us
+       getarg_i %r0 $i
+#if __WORDSIZE == 64
+       getarg_ui %r0 $ui
+       getarg_l %r0 $l
+#endif
+       addr %r0 %r1 %r2
+       addi %r0 %r1 2
+       addcr %r0 %r1 %r2
+       addci %r0 %r1 2
+       addxr %r0 %r1 %r2
+       addxi %r0 %r1 2
+       subr %r0 %r1 %r2
+       subi %r0 %r1 2
+       subcr %r0 %r1 %r2
+       subci %r0 %r1 2
+       subxr %r0 %r1 %r2
+       subxi %r0 %r1 2
+       mulr %r0 %r1 %r2
+       muli %r0 %r1 2
+       qmulr %r0 %r1 %r2 %v0
+       qmuli %r0 %r1 %r2 3
+       qmulr_u %r0 %r1 %r2 %v0
+       qmuli_u %r0 %r1 %r2 3
+       divr %r0 %r1 %r2
+       divi %r0 %r1 2
+       divr_u %r0 %r1 %r2
+       divi_u %r0 %r1 2
+       qdivr %r0 %r1 %r2 %v0
+       qdivi %r0 %r1 %r2 3
+       qdivr_u %r0 %r1 %r2 %v0
+       qdivi_u %r0 %r1 %r2 3
+       remr %r0 %r1 %r2
+       remi %r0 %r1 2
+       remr_u %r0 %r1 %r2
+       remi_u %r0 %r1 2
+       andr %r0 %r1 %r2
+       andi %r0 %r1 2
+       orr %r0 %r1 %r2
+       ori %r0 %r1 2
+       xorr %r0 %r1 %r2
+       xori %r0 %r1 2
+       lshr %r0 %r1 %r2
+       lshi %r0 %r1 2
+       rshr %r0 %r1 %r2
+       rshi %r0 %r1 2
+       rshr_u %r0 %r1 %r2
+       rshi_u %r0 %r1 2
+       negr %r0 %r1
+       comr %r0 %r1
+       ltr %r0 %r1 %r2
+       lti %r0 %r1 2
+       ltr_u %r0 %r1 %r2
+       lti_u %r0 %r1 2
+       ler %r0 %r1 %r2
+       lei %r0 %r1 2
+       ler_u %r0 %r1 %r2
+       lei_u %r0 %r1 2
+       eqr %r0 %r1 %r2
+       eqi %r0 %r1 2
+       ger %r0 %r1 %r2
+       gei %r0 %r1 2
+       ger_u %r0 %r1 %r2
+       gei_u %r0 %r1 2
+       gtr %r0 %r1 %r2
+       gti %r0 %r1 2
+       gtr_u %r0 %r1 %r2
+       gti_u %r0 %r1 2
+       ner %r0 %r1 %r2
+       nei %r0 %r1 2
+       movr %r0 %r1
+       movi %r0 1
+       extr_c %r0 %r1
+       extr_uc %r0 %r1
+       extr_s %r0 %r1
+       extr_us %r0 %r1
+#if __WORDSIZE == 64
+       extr_i %r0 %r1
+       extr_ui %r0 %r1
+#endif
+       htonr %r0 %r1
+       ntohr %r0 %r1
+       ldr_c %r0 %r1
+       ldi_c %r0 0x80000000
+       ldr_uc %r0 %r1
+       ldi_uc %r0 0x80000000
+       ldr_s %r0 %r1
+       ldi_s %r0 0x80000000
+       ldr_us %r0 %r1
+       ldi_us %r0 0x80000000
+       ldr_i %r0 %r1
+       ldi_i %r0 0x80000000
+#if __WORDSIZE == 64
+       ldr_ui %r0 %r1
+       ldi_ui %r0 0x80000000
+       ldr_l %r0 %r1
+       ldi_l %r0 0x80000000
+#endif
+       ldxr_c %r0 %r1 %r2
+       ldxi_c %r0 %r1 1
+       ldxr_uc %r0 %r1 %r2
+       ldxi_uc %r0 %r1 1
+       ldxr_s %r0 %r1 %r2
+       ldxi_s %r0 %r1 2
+       ldxr_us %r0 %r1 %r2
+       ldxi_us %r0 %r1 2
+       ldxr_i %r0 %r1 %r2
+       ldxi_i %r0 %r1 4
+#if __WORDSIZE == 64
+       ldxr_ui %r0 %r1 %r2
+       ldxi_ui %r0 %r1 4
+       ldxr_l %r0 %r1 %r2
+       ldxi_l %r0 %r1 8
+#endif
+       str_c %r1 %r0
+       sti_c 0x80000000 %r1
+       str_s %r1 %r0
+       sti_s 0x80000000 %r1
+       str_i %r1 %r0
+       sti_i 0x80000000 %r1
+#if __WORDSIZE == 64
+       str_l %r1 %r0
+       sti_l 0x80000000 %r1
+#endif
+       stxr_c %r2 %r1 %r0
+       stxi_c 1 %r1 %r0
+       stxr_s %r2 %r1 %r0
+       stxi_s 2 %r1 %r0
+       stxr_i %r2 %r1 %r0
+       stxi_i 4 %r1 %r0
+#if __WORDSIZE == 64
+       stxr_l %r2 %r1 %r0
+       stxi_l 8 %r1 %r0
+#endif
+cond:
+       bltr cond %r0 %r1
+condi:
+       blti condi %r0 1
+condu:
+       bltr_u condu %r0 %r1
+condiu:
+       blti_u condiu %r0 1
+       bler cond %r0 %r1
+       blei condi %r0 1
+       bler_u condu %r0 %r1
+       blei_u condiu %r0 1
+bool:
+       beqr bool %r0 %r1
+booli:
+       beqi booli %r0 1
+       bger cond %r0 %r1
+       bgei condi %r0 1
+       bger_u condu %r0 %r1
+       bgei_u condiu %r0 1
+       bgtr cond %r0 %r1
+       bgti condi %r0 1
+       bgtr_u condu %r0 %r1
+       bgti_u condiu %r0 1
+       bner bool %r0 %r1
+       bnei booli %r0 1
+mask:
+       bmsr mask %r0 %r1
+maski:
+       bmsi maski %r0 1
+       bmcr mask %r0 %r1
+       bmci maski %r0 1
+as:
+       boaddr as %r0 %r1
+asi:
+       boaddi asi %r0 1
+asu:
+       boaddr_u as %r0 %r1
+       boaddi_u asi %r0 1
+       bxaddr as %r0 %r1
+       bxaddi asi %r0 1
+       bxaddr_u as %r0 %r1
+       bxaddi_u asi %r0 1
+       bosubr as %r0 %r1
+       bosubi asi %r0 1
+       bosubr_u as %r0 %r1
+       bosubi_u asi %r0 1
+       bxsubr as %r0 %r1
+       bxsubi asi %r0 1
+       bxsubr_u as %r0 %r1
+       bxsubi_u asi %r0 1
+label:
+       jmpr %r0
+       jmpi label
+       callr %r0
+       calli label
+       prepare
+       pushargr %r0
+       finishr %r0
+       prepare
+       pushargi 1
+       ellipsis
+       finishi 0x80000000
+       ret
+       retr %r1
+       reti 2
+       retval_c %r1
+       retval_uc %r1
+       retval_s %r1
+       retval_us %r1
+       retval_i %r1
+#if __WORDSIZE == 64
+       retval_ui %r1
+       retval_l %r1
+#endif
+       arg_f $f
+       getarg_f %f1 $f
+       addr_f %f0 %f1 %f2
+       addi_f %f0 %f1 0.5
+       subr_f %f0 %f1 %f2
+       subi_f %f0 %f1 0.5
+       mulr_f %f0 %f1 %f2
+       muli_f %f0 %f1 0.5
+       divr_f %f0 %f1 %f2
+       divi_f %f0 %f1 0.5
+       negr_f %f0 %f1
+       absr_f %f0 %f1
+       sqrtr_f %f0 %f1
+       ltr_f %r0 %f0 %f1
+       lti_f %r0 %f0 0.5
+       ler_f %r0 %f0 %f1
+       lei_f %r0 %f0 0.5
+       eqr_f %r0 %f0 %f1
+       eqi_f %r0 %f0 0.5
+       ger_f %r0 %f0 %f1
+       gei_f %r0 %f0 0.5
+       gtr_f %r0 %f0 %f1
+       gti_f %r0 %f0 0.5
+       ner_f %r0 %f0 %f1
+       nei_f %r0 %f0 0.5
+       unltr_f %r0 %f0 %f1
+       unlti_f %r0 %f0 0.5
+       unler_f %r0 %f0 %f1
+       unlei_f %r0 %f0 0.5
+       uneqr_f %r0 %f0 %f1
+       uneqi_f %r0 %f0 0.5
+       unger_f %r0 %f0 %f1
+       ungei_f %r0 %f0 0.5
+       ungtr_f %r0 %f0 %f1
+       ungti_f %r0 %f0 0.5
+       ltgtr_f %r0 %f0 %f1
+       ltgti_f %r0 %f0 0.5
+       ordr_f %r0 %f0 %f1
+       ordi_f %r0 %f0 0.5
+       unordr_f %r0 %f0 %f1
+       unordi_f %r0 %f0 0.5
+       truncr_f_i %r0 %f0
+#if __WORDSIZE == 64
+       truncr_f_l %r0 %f0
+#endif
+       extr_f %f0 %r0
+       extr_d_f %f0 %f1
+       movr_f %f0 %f1
+       movi_f %f0 1.5
+       ldr_f %f0 %r0
+       ldi_f %f0 0x80000000
+       ldxr_f %f0 %r0 %r1
+       ldxi_f %f0 %r0 4
+       str_f %r0 %f0
+       sti_f 0x80000000 %f0
+       stxr_f %r1 %r0 %f0
+       stxi_f 4 %r0 %f0
+/* FIXME the bordr_d at the end will cause an assertion on riscv due to
+ * too distant jump (does not fit in a 12 bit signed int) */
+ord:
+       bltr_f ord %f0 %f1
+ordi:
+       blti_f ordi %f0 0.5
+       bler_f ord %f0 %f1
+       blei_f ordi %f0 0.5
+       beqr_f ord %f0 %f1
+       beqi_f ordi %f0 0.5
+       bger_f ord %f0 %f1
+       bgei_f ordi %f0 0.5
+       bgtr_f ord %f0 %f1
+       bgti_f ordi %f0 0.5
+       bner_f ord %f0 %f1
+       bnei_f ordi %f0 0.5
+unord:
+       bunltr_f unord %f0 %f1
+unordi:
+       bunlti_f unordi %f0 0.5
+       bunler_f unord %f0 %f1
+       bunlei_f unordi %f0 0.5
+       buneqr_f unord %f0 %f1
+       buneqi_f unordi %f0 0.5
+       bunger_f unord %f0 %f1
+       bungei_f unordi %f0 0.5
+       bungtr_f unord %f0 %f1
+       bungti_f unordi %f0 0.5
+       bltgtr_f unord %f0 %f1
+       bltgti_f unordi %f0 0.5
+       bordr_f unord %f0 %f1
+       bordi_f unordi %f0 0.5
+       bunordr_f unord %f0 %f1
+       bunordi_f unordi %f0 0.5
+       prepare
+       pushargr_f %f1
+       pushargi_f 0.5
+       finishi 0x80000000
+       retr_f %f1
+       reti_f 0.5
+       retval_f %f1
+       arg_d $f
+       getarg_d %f1 $f
+       addr_d %f0 %f1 %f2
+       addi_d %f0 %f1 0.5
+       subr_d %f0 %f1 %f2
+       subi_d %f0 %f1 0.5
+       mulr_d %f0 %f1 %f2
+       muli_d %f0 %f1 0.5
+       divr_d %f0 %f1 %f2
+       divi_d %f0 %f1 0.5
+       negr_d %f0 %f1
+       absr_d %f0 %f1
+       sqrtr_d %f0 %f1
+       ltr_d %r0 %f0 %f1
+       lti_d %r0 %f0 0.5
+       ler_d %r0 %f0 %f1
+       lei_d %r0 %f0 0.5
+       eqr_d %r0 %f0 %f1
+       eqi_d %r0 %f0 0.5
+       ger_d %r0 %f0 %f1
+       gei_d %r0 %f0 0.5
+       gtr_d %r0 %f0 %f1
+       gti_d %r0 %f0 0.5
+       ner_d %r0 %f0 %f1
+       nei_d %r0 %f0 0.5
+       unltr_d %r0 %f0 %f1
+       unlti_d %r0 %f0 0.5
+       unler_d %r0 %f0 %f1
+       unlei_d %r0 %f0 0.5
+       uneqr_d %r0 %f0 %f1
+       uneqi_d %r0 %f0 0.5
+       unger_d %r0 %f0 %f1
+       ungei_d %r0 %f0 0.5
+       ungtr_d %r0 %f0 %f1
+       ungti_d %r0 %f0 0.5
+       ltgtr_d %r0 %f0 %f1
+       ltgti_d %r0 %f0 0.5
+       ordr_d %r0 %f0 %f1
+       ordi_d %r0 %f0 0.5
+       unordr_d %r0 %f0 %f1
+       unordi_d %r0 %f0 0.5
+       truncr_d_i %r0 %f0
+#if __WORDSIZE == 64
+       truncr_d_l %r0 %f0
+#endif
+       extr_d %f0 %r0
+       extr_f_d %f0 %f1
+       movr_d %f0 %f1
+       movi_d %f0 1.5
+       ldr_d %f0 %r0
+       ldi_d %f0 0x80000000
+       ldxr_d %f0 %r0 %r1
+       ldxi_d %f0 %r0 8
+       str_d %r0 %f0
+       sti_d 0x80000000 %f0
+       stxr_d %r1 %r0 %f0
+       stxi_d 8 %r0 %f0
+       bltr_d ord %f0 %f1
+       blti_d ordi %f0 0.5
+       bler_d ord %f0 %f1
+       blei_d ordi %f0 0.5
+       beqr_d ord %f0 %f1
+       beqi_d ordi %f0 0.5
+       bger_d ord %f0 %f1
+       bgei_d ordi %f0 0.5
+       bgtr_d ord %f0 %f1
+       bgti_d ordi %f0 0.5
+       bner_d ord %f0 %f1
+       bnei_d ordi %f0 0.5
+       bunltr_d unord %f0 %f1
+       bunlti_d unordi %f0 0.5
+       bunler_d unord %f0 %f1
+       bunlei_d unordi %f0 0.5
+       buneqr_d unord %f0 %f1
+       buneqi_d unordi %f0 0.5
+       bunger_d unord %f0 %f1
+       bungei_d unordi %f0 0.5
+       bungtr_d unord %f0 %f1
+       bungti_d unordi %f0 0.5
+       bltgtr_d unord %f0 %f1
+       bltgti_d unordi %f0 0.5
+       bordr_d unord %f0 %f1
+       bordi_d unordi %f0 0.5
+       bunordr_d unord %f0 %f1
+       bunordi_d unordi %f0 0.5
+       prepare
+       pushargr_d %f1
+       pushargi_d 0.5
+       finishi 0x80000000
+       retr_d %f1
+       reti_d 0.5
+       retval_d %f1
diff --git a/deps/lightning/check/allocai.ok b/deps/lightning/check/allocai.ok
new file mode 100644 (file)
index 0000000..2962f7a
--- /dev/null
@@ -0,0 +1,2 @@
+received 7777
+succeeded
diff --git a/deps/lightning/check/allocai.tst b/deps/lightning/check/allocai.tst
new file mode 100644 (file)
index 0000000..c20cad0
--- /dev/null
@@ -0,0 +1,100 @@
+.data  128
+idfmt:
+.c     "received %d\n"
+failure_message:
+.c     "numbers don't add up to zero\n"
+report_message:
+.c     "failed: got %i instead of %i\n"
+succeeded_message:
+.c     "succeeded\n"
+
+.code
+       jmpi main
+
+/*
+static int
+identity (int arg)
+{
+  printf ("received %i\n", arg);
+  return arg;
+}
+ */
+       name identify
+identify:
+       prolog
+       arg $i
+       getarg %v0 $i
+       prepare
+               pushargi idfmt
+               ellipsis
+               pushargr %v0
+       finishi @printf
+       retr %v0
+       epilog
+
+       name identity_func
+identity_func:
+       prolog
+       arg $i
+       getarg %r1 $i
+
+       /* Store the argument on the stack.  */
+       allocai $(__WORDSIZE >> 3) $off
+       stxi $off %fp %r1
+
+       /* Store the negative of the argument on the stack.  */
+       allocai $(__WORDSIZE >> 3) $neg
+       negr %r2 %r1
+       stxi $neg %fp %r2
+
+       /* Invoke FUNC.  */
+       prepare
+               pushargr %r1
+       finishi identify
+
+       /* Ignore the result.  */
+
+       /* Restore the negative and the argument from the stack.  */
+       ldxi %r2 %fp $neg
+       ldxi %v1 %fp $off
+
+       /* Make sure they still add to zero.  */
+       addr %r0 %v1 %r2
+       bnei branch %r0 0
+
+       /* Return it.  */
+       retr %v1
+
+       /* Display a failure message.  */
+branch:
+       prepare
+               pushargi failure_message
+               ellipsis
+       finishi @printf
+
+       /* Leave.  */
+       retr %v1
+       epilog
+
+       name main
+main:
+       prolog
+       prepare
+               pushargi 7777
+       finishi identity_func
+       retval %r0
+       beqi succeeded %r0 7777
+       prepare
+               pushargi report_message
+               ellipsis
+               pushargr %r0
+               pushargi 7777
+       finishi @printf
+       reti 1
+succeeded:
+       prepare
+               pushargi succeeded_message
+               ellipsis
+       finishi @printf
+       reti 0
+       epilog
diff --git a/deps/lightning/check/allocar.ok b/deps/lightning/check/allocar.ok
new file mode 100644 (file)
index 0000000..516b1e7
--- /dev/null
@@ -0,0 +1,4 @@
+1 2 3
+3 4 5
+5 6 7
+7 8 9
diff --git a/deps/lightning/check/allocar.tst b/deps/lightning/check/allocar.tst
new file mode 100644 (file)
index 0000000..e3ee010
--- /dev/null
@@ -0,0 +1,403 @@
+#define szof_c                 1
+#define szof_uc                        szof_c
+#define szof_s                 2
+#define szof_us                        szof_s
+#define szof_i                 4
+#if __WORDSIZE == 64
+#  define szof_ui              szof_i
+#  define szof_l               8
+#endif
+#define szof_f                 4
+#define szof_d                 8
+
+#define FILL(T)                                                        \
+       name fill##T                                            \
+fill##T:                                                       \
+       prolog                                                  \
+       arg $argp                                               \
+       getarg %v0 $argp                                        \
+       arg $argi                                               \
+       getarg %r0 $argi                                        \
+       muli %r0 %r0 szof##T                                    \
+       addr %v1 %v0 %r0                                        \
+       movi %r0 0                                              \
+fill##T##loop:                                                 \
+       bger fill##T##done %v0 %v1                              \
+       str##T %v0 %r0                                          \
+       addi %r0 %r0 1                                          \
+       addi %v0 %v0 szof##T                                    \
+       jmpi fill##T##loop                                      \
+fill##T##done:                                                 \
+       ret                                                     \
+       epilog
+#define FILLF(T)                                               \
+       name fill##T                                            \
+fill##T:                                                       \
+       prolog                                                  \
+       arg $argp                                               \
+       getarg %v0 $argp                                        \
+       arg $argi                                               \
+       getarg %r0 $argi                                        \
+       muli %r0 %r0 szof##T                                    \
+       addr %v1 %v0 %r0                                        \
+       movi##T %f0 0.0                                         \
+fill##T##loop:                                                 \
+       bger fill##T##done %v0 %v1                              \
+       str##T %v0 %f0                                          \
+       addi##T %f0 %f0 1.0                                     \
+       addi %v0 %v0 szof##T                                    \
+       jmpi fill##T##loop                                      \
+fill##T##done:                                                 \
+       ret                                                     \
+       epilog
+
+#define fill_uc                fill_c
+#define fill_us                fill_s
+#define fill_ui                fill_i
+
+#define ARG(  T, N)                    arg    $arg##T##N
+#define ARGF( T, N)                    arg##T $arg##T##N
+#define ARG1( K, T)                    ARG##K(T, 0)
+#define ARG2( K, T)    ARG1( K, T)     ARG##K(T, 1)
+#define ARG3( K, T)    ARG2( K, T)     ARG##K(T, 2)
+#define ARG4( K, T)    ARG3( K, T)     ARG##K(T, 3)
+#define ARG5( K, T)    ARG4( K, T)     ARG##K(T, 4)
+#define ARG6( K, T)    ARG5( K, T)     ARG##K(T, 5)
+#define ARG7( K, T)    ARG6( K, T)     ARG##K(T, 6)
+#define ARG8( K, T)    ARG7( K, T)     ARG##K(T, 7)
+#define ARG9( K, T)    ARG8( K, T)     ARG##K(T, 8)
+#define ARG10(K, T)    ARG9( K, T)     ARG##K(T, 9)
+#define ARG11(K, T)    ARG10(K, T)     ARG##K(T, 10)
+#define ARG12(K, T)    ARG11(K, T)     ARG##K(T, 11)
+#define ARG13(K, T)    ARG12(K, T)     ARG##K(T, 12)
+#define ARG14(K, T)    ARG13(K, T)     ARG##K(T, 13)
+#define ARG15(K, T)    ARG14(K, T)     ARG##K(T, 14)
+#define ARG16(K, T)    ARG15(K, T)     ARG##K(T, 15)
+#define ARG_c(N)                       ARG##N( , _c)
+#define ARG_uc(N)                      ARG##N( , _uc)
+#define ARG_s(N)                       ARG##N( , _s)
+#define ARG_us(N)                      ARG##N( , _us)
+#define ARG_i(N)                       ARG##N( , _i)
+#define ARG_ui(N)                      ARG##N( , _ui)
+#define ARG_l(N)                       ARG##N( , _l)
+#define ARG_f(N)                       ARG##N(F, _f)
+#define ARG_d(N)                       ARG##N(F, _d)
+
+#define CHK(N, T, V)                                           \
+       getarg %r0 $arg##T##V                                   \
+       ldxi##T %r1 %v0 $(V * szof##T)                          \
+       beqr N##T##V %r0 %r1                                    \
+       calli @abort                                            \
+N##T##V:
+#define CHKF(N, T, V)                                          \
+       getarg##T %f0 $arg##T##V                                \
+       ldxi##T %f1 %v0 $(V * szof##T)                          \
+       beqr##T N##T##V %f0 %f1                                 \
+       calli @abort                                            \
+N##T##V:
+
+#define GET1( K, N, T, V)                              CHK##K(N, T, 0)
+#define GET2( K, N, T, V)      GET1( K, N, T, V)       CHK##K(N, T, 1)
+#define GET3( K, N, T, V)      GET2( K, N, T, V)       CHK##K(N, T, 2)
+#define GET4( K, N, T, V)      GET3( K, N, T, V)       CHK##K(N, T, 3)
+#define GET5( K, N, T, V)      GET4( K, N, T, V)       CHK##K(N, T, 4)
+#define GET6( K, N, T, V)      GET5( K, N, T, V)       CHK##K(N, T, 5)
+#define GET7( K, N, T, V)      GET6( K, N, T, V)       CHK##K(N, T, 6)
+#define GET8( K, N, T, V)      GET7( K, N, T, V)       CHK##K(N, T, 7)
+#define GET9( K, N, T, V)      GET8( K, N, T, V)       CHK##K(N, T, 8)
+#define GET10(K, N, T, V)      GET9( K, N, T, V)       CHK##K(N, T, 9)
+#define GET11(K, N, T, V)      GET10(K, N, T, V)       CHK##K(N, T, 10)
+#define GET12(K, N, T, V)      GET11(K, N, T, V)       CHK##K(N, T, 11)
+#define GET13(K, N, T, V)      GET12(K, N, T, V)       CHK##K(N, T, 12)
+#define GET14(K, N, T, V)      GET13(K, N, T, V)       CHK##K(N, T, 13)
+#define GET15(K, N, T, V)      GET14(K, N, T, V)       CHK##K(N, T, 14)
+#define GET16(K, N, T, V)      GET15(K, N, T, V)       CHK##K(N, T, 15)
+
+#define GET_c(N, M)            GET##N( , c##N,  _c,  M)
+#define GET_uc(N, M)           GET##N( , uc##N, _uc, M)
+#define GET_s(N, M)            GET##N( , s##N,  _s,  M)
+#define GET_us(N, M)           GET##N( , us##N, _us, M)
+#define GET_i(N, M)            GET##N( , i##N,  _i,  M)
+#define GET_ui(N, M)           GET##N( , ui##N, _ui, M)
+#define GET_l(N, M)            GET##N( , l##N,  _l,  M)
+#define GET_f(N, M)            GET##N(F, f##N,  _f,  M)
+#define GET_d(N, M)            GET##N(F, d##N,  _d,  M)
+
+#define PUSH(  T, V)           pushargi    V
+#define PUSHF( T, V)           pushargi##T V
+#define PUSH0( K, T)           /**/
+#define PUSH1( K, T)                                   PUSH##K(T, 0)
+#define PUSH2( K, T)           PUSH1( K, T)            PUSH##K(T, 1)
+#define PUSH3( K, T)           PUSH2( K, T)            PUSH##K(T, 2)
+#define PUSH4( K, T)           PUSH3( K, T)            PUSH##K(T, 3)
+#define PUSH5( K, T)           PUSH4( K, T)            PUSH##K(T, 4)
+#define PUSH6( K, T)           PUSH5( K, T)            PUSH##K(T, 5)
+#define PUSH7( K, T)           PUSH6( K, T)            PUSH##K(T, 6)
+#define PUSH8( K, T)           PUSH7( K, T)            PUSH##K(T, 7)
+#define PUSH9( K, T)           PUSH8( K, T)            PUSH##K(T, 8)
+#define PUSH10(K, T)           PUSH9( K, T)            PUSH##K(T, 9)
+#define PUSH11(K, T)           PUSH10(K, T)            PUSH##K(T, 10)
+#define PUSH12(K, T)           PUSH11(K, T)            PUSH##K(T, 11)
+#define PUSH13(K, T)           PUSH12(K, T)            PUSH##K(T, 12)
+#define PUSH14(K, T)           PUSH13(K, T)            PUSH##K(T, 13)
+#define PUSH15(K, T)           PUSH14(K, T)            PUSH##K(T, 14)
+#define PUSH16(K, T)           PUSH15(K, T)            PUSH##K(T, 15)
+
+#define PUSH_c( N)             PUSH##N( , _c)
+#define PUSH_uc(N)             PUSH##N( , _uc)
+#define PUSH_s( N)             PUSH##N( , _s)
+#define PUSH_us(N)             PUSH##N( , _us)
+#define PUSH_i( N)             PUSH##N( , _i)
+#define PUSH_ui(N)             PUSH##N( , _ui)
+#define PUSH_l( N)             PUSH##N( , _l)
+#define PUSH_f( N)             PUSH##N(F, _f)
+#define PUSH_d( N)             PUSH##N(F, _d)
+
+/* bottom function */
+#define DEF0(T)                                                        \
+       name test##T##_0                                        \
+test##T##_0:                                                   \
+       prolog                                                  \
+       ret                                                     \
+       epilog
+
+#define DEFN(N, M, T)                                          \
+       name test##T##_##N                                      \
+test##T##_##N:                                                 \
+       prolog                                                  \
+       arg $argp                                               \
+       /* stack buffer in %v0 */                               \
+       getarg %v0 $argp                                        \
+       ARG##T(N)                                               \
+       /* validate arguments */                                \
+       GET##T(N, M)                                            \
+       /* heap buffer in %v1 */                                \
+       prepare                                                 \
+               pushargi $(N * szof##T)                         \
+       finishi @malloc                                         \
+       retval %v1                                              \
+       /* copy stack bufer to heap buffer */                   \
+       prepare                                                 \
+               pushargr %v1                                    \
+               pushargr %v0                                    \
+               pushargi $(N * szof##T)                         \
+       finishi MEMCPY                                          \
+       /* stack buffer for next function in %v2 */             \
+       movi %r0 $(M * szof##T)                                 \
+       allocar %v2 %r0                                         \
+       addr %v2 %v2 %fp                                        \
+       /* fill stack buffer for next function */               \
+       prepare                                                 \
+               pushargr %v2                                    \
+               pushargi M                                      \
+       finishi fill##T                                         \
+       /* call next function */                                \
+       prepare                                                 \
+               pushargr %v2                                    \
+               PUSH##T(M)                                      \
+       finishi test##T##_##M                                   \
+       /* validate stack buffer */                             \
+       prepare                                                 \
+               pushargr %v1                                    \
+               pushargr %v0                                    \
+               pushargi $(N * szof##T)                         \
+       finishi @memcmp                                         \
+       retval %r0                                              \
+       beqi test##T##_##N##_done %r0 0                         \
+       calli @abort                                            \
+test##T##_##N##_done:                                          \
+       /* release heap bufer */                                \
+       prepare                                                 \
+               pushargr %v1                                    \
+       finishi @free                                           \
+       ret                                                     \
+       epilog
+
+/* top function */
+#define DEFX(T)                                                        \
+       name test##T##_17                                       \
+test##T##_17:                                                  \
+       prolog                                                  \
+       /* heap buffer in %v1 */                                \
+       prepare                                                 \
+               pushargi $(16 * szof##T)                        \
+       finishi @malloc                                         \
+       retval %v1                                              \
+       /* stack buffer for next function in %v2 */             \
+       movi %r0 $(16 * szof##T)                                \
+       allocar %v2 %r0                                         \
+       addr %v2 %v2 %fp                                        \
+       /* fill stack buffer for next function */               \
+       prepare                                                 \
+               pushargr %v2                                    \
+               pushargi 16                                     \
+       finishi fill##T                                         \
+       /* copy stack buffer to heap buffer */                  \
+       prepare                                                 \
+               pushargr %v1                                    \
+               pushargr %v2                                    \
+               pushargi $(16 * szof##T)                        \
+       finishi MEMCPY                                          \
+       /* call next function */                                \
+       prepare                                                 \
+               pushargr %v2                                    \
+               PUSH##T(16)                                     \
+       finishi test##T##_16                                    \
+       /* validate stack buffer */                             \
+       prepare                                                 \
+               pushargr %v1                                    \
+               pushargr %v2                                    \
+               pushargi $(16 * szof##T)                        \
+       finishi @memcmp                                         \
+       retval %r0                                              \
+       beqi test##T##_17_done %r0 0                            \
+       calli @abort                                            \
+test##T##_17_done:                                             \
+       /* release heap bufer */                                \
+       prepare                                                 \
+               pushargr %v1                                    \
+       finishi @free                                           \
+       ret                                                     \
+       epilog
+
+#define DEF(  T)                                               \
+       DEF0( T)                                                \
+       DEFN( 1,  0, T)                                         \
+       DEFN( 2,  1, T)                                         \
+       DEFN( 3,  2, T)                                         \
+       DEFN( 4,  3, T)                                         \
+       DEFN( 5,  4, T)                                         \
+       DEFN( 6,  5, T)                                         \
+       DEFN( 7,  6, T)                                         \
+       DEFN( 8,  7, T)                                         \
+       DEFN( 9,  8, T)                                         \
+       DEFN(10,  9, T)                                         \
+       DEFN(11, 10, T)                                         \
+       DEFN(12, 11, T)                                         \
+       DEFN(13, 12, T)                                         \
+       DEFN(14, 13, T)                                         \
+       DEFN(15, 14, T)                                         \
+       DEFN(16, 15, T)                                         \
+       DEFX(T)
+
+#define CALL(T)                        calli test##T##_17
+
+.data  16
+fmt:
+.c     "%d %d %d\n"
+.code
+       jmpi main
+
+#if _AIX
+#  define MEMCPY               memcpy
+/* error: Function not implemented (memcpy) */
+       name memcpy
+memcpy:
+       prolog
+       arg $dst
+       arg $src
+       arg $len
+       getarg %r0 $dst
+       getarg %r1 $src
+       getarg %r2 $len
+       movr %v1 %r0
+       blti memcpy_done %r2 1
+memcpy_loop:
+       subi %r2 %r2 1
+       ldxr_c %v0 %r1 %r2
+       stxr_c %r2 %r0 %v0
+       bgti memcpy_loop %r2 0
+memcpy_done:
+       retr %v1
+       epilog
+#else
+#  define MEMCPY               @memcpy
+#endif
+
+       FILL(_c)
+       FILL(_s)
+       FILL(_i)
+#if __WORDSIZE == 64
+       FILL(_l)
+#endif
+       FILLF(_f)
+       FILLF(_d)
+
+       DEF(_c)
+       DEF(_uc)
+       DEF(_s)
+       DEF(_us)
+       DEF(_i)
+#if __WORDSIZE == 64
+       DEF(_ui)
+       DEF(_l)
+#endif
+       DEF(_f)
+       DEF(_d)
+
+       name main
+main:
+       prolog
+
+       CALL(_c)
+       CALL(_uc)
+       CALL(_s)
+       CALL(_us)
+       CALL(_i)
+#if __WORDSIZE == 64
+       CALL(_ui)
+       CALL(_l)
+#endif
+       CALL(_f)
+       CALL(_d)
+
+       // loop control
+       movi %v2 1
+
+       // loop a few times calling allocar
+loop:
+       // allocate 12 bytes
+       movi %r0 12
+       allocar %v0 %r0
+
+       // offset
+       movr %v1 %v0
+
+       // 1
+       stxr_i %v1 %fp %v2
+
+       // 2
+       addi %v2 %v2 1
+       addi %v1 %v1 4
+       stxr_i %v1 %fp %v2
+
+       // 3
+       addi %v2 %v2 1
+       addi %v1 %v1 4
+       stxr_i %v1 %fp %v2
+
+       // reload
+       movr %v1 %v0
+
+       // 1
+       ldxr_i %r0 %fp %v1
+
+       // 2
+       addi %v1 %v1 4
+       ldxr_i %r1 %fp %v1
+
+       // 3
+       addi %v1 %v1 4
+       ldxr_i %r2 %fp %v1
+
+       prepare
+               pushargi fmt
+               ellipsis
+               pushargr %r0
+               pushargr %r1
+               pushargr %r2
+       finishi @printf
+       blti loop %v2 9
+
+       ret
+       epilog
diff --git a/deps/lightning/check/alu.inc b/deps/lightning/check/alu.inc
new file mode 100644 (file)
index 0000000..0c259ea
--- /dev/null
@@ -0,0 +1,360 @@
+.data  8
+ok:
+.c     "ok\n"
+
+/* ia64 code generation is not optimized for size, and also some
+ * codes generate quite long sequences due to need for stops causing
+ * no code template match and needing to add nops, and other cases
+ * are division/remainder that needs function calls, or float division
+ * that requires a quite long sequence.
+ * (the brute force tests of all register combinations can easily
+ *  generate several GB of jit).
+ */
+
+/* 3 operand */
+
+/* reg0 = reg1 op reg2 */
+#define ALUR(N, T, OP, I0, I1, V, R0, R1, R2)          \
+       movi %R1 I0                                     \
+       movi %R2 I1                                     \
+       OP##r##T %R0 %R1 %R2                            \
+       beqi OP##T##N##r_##R0##R1##R2 %R0 V             \
+       calli @abort                                    \
+OP##T##N##r_##R0##R1##R2:
+
+/* reg0 = reg1 op im */
+#define ALUI(N, T, OP, I0, I1, V, R0, R1, R2)          \
+       movi %R1 I0                                     \
+       movi %R2 V                                      \
+       OP##i##T %R0 %R1 I1                             \
+       beqr OP##T##N##i_##R0##R1##R2 %R0 %R2           \
+       calli @abort                                    \
+OP##T##N##i_##R0##R1##R2:
+
+/* reg0 = reg0 op reg1 */
+#define ALUR0(N, T, OP, I0, I1, V, R0, R1, R2)         \
+       movi %R0 I0                                     \
+       movi %R1 I1                                     \
+       movi %R2 V                                      \
+       OP##r##T %R0 %R0 %R1                            \
+       beqr OP##T##N##r_0##R0##R1##R2 %R0 %R2          \
+       calli @abort                                    \
+OP##T##N##r_0##R0##R1##R2:
+
+/* reg0 = reg1 op reg0 */
+#define ALUR1(N, T, OP, I0, I1, V, R0, R1, R2)         \
+       movi %R0 I1                                     \
+       movi %R1 I0                                     \
+       movi %R2 V                                      \
+       OP##r##T %R0 %R1 %R0                            \
+       beqr OP##T##N##r_1##R0##R1##R2 %R0 %R2          \
+       calli @abort                                    \
+OP##T##N##r_1##R0##R1##R2:
+
+/* reg0 = reg0 op im */
+#define ALUI0(N, T, OP, I0, I1, V, R0, R1, R2)         \
+       movi %R0 I0                                     \
+       movi %R1 V                                      \
+       OP##i##T %R0 %R0 I1                             \
+       beqr OP##T##N##i_0##R0##R1##R2 %R0 %R1          \
+       calli @abort                                    \
+OP##T##N##i_0##R0##R1##R2:
+
+#define ALU3(N, T, OP, I0, I1, V, R0, R1, R2)          \
+       ALUR(N, T, OP, I0, I1, V, R0, R1, R2)           \
+       ALUI(N, T, OP, I0, I1, V, R0, R1, R2)           \
+       ALUR0(N, T, OP, I0, I1, V, R0, R1, R2)          \
+       ALUR1(N, T, OP, I0, I1, V, R0, R1, R2)          \
+       ALUI0(N, T, OP, I0, I1, V, R0, R1, R2)
+
+#define ALU2(N, T, OP, I0, I1, V, R0, R1, R2)          \
+       ALU3(N, T, OP, I0, I1, V, R0, R1, R2)           \
+       ALU3(N, T, OP, I0, I1, V, R0, R2, R1)
+
+#define ALU1(N, T, OP, I0, I1, V, R0, R1, R2)          \
+       ALU2(N, T, OP, I0, I1, V, R0, R1, R2)           \
+       ALU2(N, T, OP, I0, I1, V, R1, R0, R2)           \
+       ALU2(N, T, OP, I0, I1, V, R2, R1, R0)
+
+#if __ia64__
+#  define ALU(N, T, OP, I0, I1, V)                     \
+       ALU3(N, T, OP, I0, I1, V, r0, r1, r2)           \
+       ALU3(N, T, OP, I0, I1, V, v0, v1, v2)
+#else
+#  define ALU(N, T, OP, I0, I1, V)                     \
+       ALU1(N, T, OP, I0, I1, V, v0, v1, v2)           \
+       ALU1(N, T, OP, I0, I1, V, v0, v1, r0)           \
+       ALU1(N, T, OP, I0, I1, V, v0, v1, r1)           \
+       ALU1(N, T, OP, I0, I1, V, v0, v1, r2)           \
+       ALU1(N, T, OP, I0, I1, V, v1, v2, r1)           \
+       ALU1(N, T, OP, I0, I1, V, v1, v2, r2)           \
+       ALU1(N, T, OP, I0, I1, V, v2, r0, r1)           \
+       ALU1(N, T, OP, I0, I1, V, v2, r0, r2)           \
+       ALU1(N, T, OP, I0, I1, V, r0, r1, r2)
+#endif
+
+/* 3 carry set/propagate */
+
+/*
+ * r0 = i0
+ * r1 = i1
+ * r2 = 0
+ * r0 = r0 opc r1      <only want carry>
+ * r2 = r2 opx r2      <r2 must match v>
+ */
+#define ALUXII(N, OP, I0, I1, V, R0, R1, R2)           \
+       movi %R0 I0                                     \
+       movi %R2 0                                      \
+       OP##ci %R0 %R0 I1                               \
+       OP##xi %R2 %R2 0                                \
+       beqi OP##N##ii##R0##R1##R2 %R2 V                \
+       calli @abort                                    \
+OP##N##ii##R0##R1##R2:
+
+#define ALUXIR(N, OP, I0, I1, V, R0, R1, R2)           \
+       movi %R0 I0                                     \
+       movi %R2 0                                      \
+       OP##ci %R0 %R0 I1                               \
+       OP##xr %R2 %R2 %R2                              \
+       beqi OP##N##ir##R0##R1##R2 %R2 V                \
+       calli @abort                                    \
+OP##N##ir##R0##R1##R2:
+
+#define ALUXRI(N, OP, I0, I1, V, R0, R1, R2)           \
+       movi %R0 I0                                     \
+       movi %R1 I1                                     \
+       movi %R2 0                                      \
+       OP##cr %R0 %R0 %R1                              \
+       OP##xi %R2 %R2 0                                \
+       beqi OP##N##ri##R0##R1##R2 %R2 V                \
+       calli @abort                                    \
+OP##N##ri##R0##R1##R2:
+
+#define ALUXRR(N, OP, I0, I1, V, R0, R1, R2)           \
+       movi %R0 I0                                     \
+       movi %R1 I1                                     \
+       movi %R2 0                                      \
+       OP##cr %R0 %R0 %R1                              \
+       OP##xr %R2 %R2 %R2                              \
+       beqi OP##N##rr##R0##R1##R2 %R2 V                \
+       calli @abort                                    \
+OP##N##rr##R0##R1##R2:
+
+#define ALUX2(N, OP, I0, I1, V, R0, R1, R2)            \
+        ALUXII(N, OP, I0, I1, V, R0, R1, R2)           \
+        ALUXIR(N, OP, I0, I1, V, R0, R1, R2)           \
+        ALUXRI(N, OP, I0, I1, V, R0, R1, R2)           \
+        ALUXRR(N, OP, I0, I1, V, R0, R1, R2)
+
+#define ALUX1(N, OP, I0, I1, V, R0, R1, R2)            \
+       ALUX2(N, OP, I0, I1, V, R0, R1, R2)             \
+       ALUX2(N, OP, I0, I1, V, R0, R2, R1)
+
+#define ALUX0(N, OP, I0, I1, V, R0, R1, R2)            \
+       ALUX1(N, OP, I0, I1, V, R0, R1, R2)             \
+       ALUX1(N, OP, I0, I1, V, R1, R0, R2)             \
+       ALUX1(N, OP, I0, I1, V, R2, R1, R0)
+
+#if __ia64__
+#  define ALUX(N, OP, I0, I1, V)                       \
+       ALUX2(N, OP, I0, I1, V, r0, r1, r2)             \
+       ALUX2(N, OP, I0, I1, V, v0, v1, v2)
+#else
+#  define ALUX(N, OP, I0, I1, V)                       \
+       ALUX0(N, OP, I0, I1, V, v0, v1, v2)             \
+       ALUX0(N, OP, I0, I1, V, v0, v1, r0)             \
+       ALUX0(N, OP, I0, I1, V, v0, v1, r1)             \
+       ALUX0(N, OP, I0, I1, V, v0, v1, r2)             \
+       ALUX0(N, OP, I0, I1, V, v1, v2, r0)             \
+       ALUX0(N, OP, I0, I1, V, v1, v2, r1)             \
+       ALUX0(N, OP, I0, I1, V, v1, v2, r2)             \
+       ALUX0(N, OP, I0, I1, V, v2, r0, r1)             \
+       ALUX0(N, OP, I0, I1, V, v2, r0, r2)             \
+       ALUX0(N, OP, I0, I1, V, r0, r1, r2)
+#endif
+
+/* unary int */
+
+#define UNR(N, OP, I, V, R0, R1)                       \
+       movi %R1 I                                      \
+       OP##r %R0 %R1                                   \
+       beqi OP##N##R0##R1 %R0 V                        \
+       calli @abort                                    \
+OP##N##R0##R1:
+
+#define UNRC(N, OP, I, V, R0, R1)                      \
+       movi %R0 I                                      \
+       OP##r %R0 %R0                                   \
+       beqi OP##N##c##R0##R1 %R0 V                     \
+       calli @abort                                    \
+OP##N##c##R0##R1:
+
+#define UN2(N, OP, I, V, R0, R1)                       \
+       UNR(N, OP, I, V, R0, R1)                        \
+       UNRC(N, OP, I, V, R0, R1)
+
+#define UN1(N, OP, I, V, R0, R1)                       \
+       UN2(N, OP, I, V, R0, R1)                        \
+       UN2(N, OP, I, V, R1, R0)
+
+#if __ia64__
+#  define UN(N, OP, I, V)                              \
+       UN2(N, OP, I, V, r0, r1)                        \
+       UN2(N, OP, I, V, v0, v1)
+#else
+#  define UN(N, OP, I, V)                              \
+       UN1(N, OP, I, V, v0, v1)                        \
+       UN1(N, OP, I, V, v0, v2)                        \
+       UN1(N, OP, I, V, v0, r0)                        \
+       UN1(N, OP, I, V, v0, r1)                        \
+       UN1(N, OP, I, V, v0, r2)                        \
+       UN1(N, OP, I, V, v1, v2)                        \
+       UN1(N, OP, I, V, v1, r0)                        \
+       UN1(N, OP, I, V, v1, r1)                        \
+       UN1(N, OP, I, V, v1, r2)                        \
+       UN1(N, OP, I, V, v2, r0)                        \
+       UN1(N, OP, I, V, v2, r1)                        \
+       UN1(N, OP, I, V, v2, r2)                        \
+       UN1(N, OP, I, V, r0, r1)                        \
+       UN1(N, OP, I, V, r0, r2)                        \
+       UN1(N, OP, I, V, r1, r2)
+#endif
+
+/* reg0 = reg1 op reg2 */
+#define FOPR(N, T, OP, I0, I1, V, F0, F1, F2)          \
+       movi##T %F1 I0                                  \
+       movi##T %F2 I1                                  \
+       OP##r##T %F0 %F1 %F2                            \
+       beqi##T OP##T##N##F0##F1##F2 %F0 V              \
+       calli @abort                                    \
+OP##T##N##F0##F1##F2:
+
+/* reg0 = reg0 op reg1 */
+#define FOPR0(N, T, OP, I0, I1, V, F0, F1, F2)         \
+       movi##T %F0 I0                                  \
+       movi##T %F1 I1                                  \
+       OP##r##T %F0 %F0 %F1                            \
+       beqi##T OP##T##N##0##F0##F1##F2 %F0 V           \
+       calli @abort                                    \
+OP##T##N##0##F0##F1##F2:
+
+/* reg1 = reg0 op reg1 */
+#define FOPR1(N, T, OP, I0, I1, V, F0, F1, F2)         \
+       movi##T %F0 I0                                  \
+       movi##T %F1 I1                                  \
+       OP##r##T %F1 %F0 %F1                            \
+       beqi##T OP##T##N##1##F0##F1##F2 %F1 V           \
+       calli @abort                                    \
+OP##T##N##1##F0##F1##F2:
+
+/* reg0 = reg1 op im */
+#define FOPI(N, T, OP, I0, I1, V, F0, F1, F2)          \
+       movi##T %F1 I0                                  \
+       movi##T %F2 V                                   \
+       OP##i##T %F0 %F1 I1                             \
+       beqr##T OP##T##N##i##F0##F1##F2 %F0 %F2         \
+       calli @abort                                    \
+OP##T##N##i##F0##F1##F2:
+
+/* reg0 = reg0 op im */
+#define FOPI0(N, T, OP, I0, I1, V, F0, F1, F2)         \
+       movi##T %F0 I0                                  \
+       movi##T %F2 V                                   \
+       OP##i##T %F0 %F0 I1                             \
+       beqr##T OP##T##N##i0##F0##F1##F2 %F0 %F2        \
+       calli @abort                                    \
+OP##T##N##i0##F0##F1##F2:
+
+#define FOP1(N, T, OP, I0, I1, V, F0, F1, F2)          \
+       FOPR(N, T, OP, I0, I1, V, F0, F1, F2)           \
+       FOPR0(N, T, OP, I0, I1, V, F0, F1, F2)          \
+       FOPR1(N, T, OP, I0, I1, V, F0, F1, F2)          \
+       FOPI(N, T, OP, I0, I1, V, F0, F1, F2)           \
+       FOPI0(N, T, OP, I0, I1, V, F0, F1, F2)
+
+#if __ia64__
+#  define  FOP(N, T, OP, I0, I1, V)                    \
+       FOP1(N, T, OP, I0, I1, V, f0, f1, f2)
+#else
+#  define  FOP(N, T, OP, I0, I1, V)                    \
+       FOP1(N, T, OP, I0, I1, V, f0, f1, f2)           \
+       FOP1(N, T, OP, I0, I1, V, f0, f2, f3)           \
+       FOP1(N, T, OP, I0, I1, V, f0, f3, f4)           \
+       FOP1(N, T, OP, I0, I1, V, f0, f5, f1)
+#endif
+
+/* unary float */
+
+#define FUNR(N, T, OP, I, V, R0, R1)                   \
+       movi##T %R1 I                                   \
+       OP##r##T %R0 %R1                                \
+       beqi##T OP##N##T##R0##R1 %R0 V                  \
+       calli @abort                                    \
+OP##N##T##R0##R1:
+
+#define FUNRC(N, T, OP, I, V, R0, R1)                  \
+       movi##T %R0 I                                   \
+       OP##r##T %R0 %R0                                \
+       beqi##T OP##N##T##c##R0##R1 %R0 V               \
+       calli @abort                                    \
+OP##N##T##c##R0##R1:
+
+#define FUN2(N, T, OP, I, V, R0, R1)                   \
+       FUNR(N, T, OP, I, V, R0, R1)                    \
+       FUNRC(N, T, OP, I, V, R0, R1)
+
+#define FUN1(N, T, OP, I, V, R0, R1)                   \
+       FUN2(N, T, OP, I, V, R0, R1)                    \
+       FUN2(N, T, OP, I, V, R1, R0)
+
+#if __ia64__
+#  define FUN(N, T, OP, I, V)                          \
+       FUN2(N, T, OP, I, V, f0, f1)
+#else
+#  define FUN(N, T, OP, I, V)                          \
+       FUN1(N, T, OP, I, V, f0, f1)                    \
+       FUN1(N, T, OP, I, V, f0, f2)                    \
+       FUN1(N, T, OP, I, V, f0, f3)                    \
+       FUN1(N, T, OP, I, V, f0, f4)                    \
+       FUN1(N, T, OP, I, V, f0, f5)
+#endif
+
+/* unordered comparison unary float */
+
+#define UFUNR(N, T, OP, I, V, R0, R1)                  \
+       movi##T %R1 I                                   \
+       OP##r##T %R0 %R1                                \
+       buneqi##T OP##N##T##u##R0##R1 %R0 V             \
+       calli @abort                                    \
+OP##N##T##u##R0##R1:
+
+#define UFUNRC(N, T, OP, I, V, R0, R1)                 \
+       movi##T %R0 I                                   \
+       OP##r##T %R0 %R0                                \
+       buneqi##T OP##N##T##uc##R0##R1 %R0 V            \
+       calli @abort                                    \
+OP##N##T##uc##R0##R1:
+
+#define UFUN2(N, T, OP, I, V, R0, R1)                  \
+       UFUNR(N, T, OP, I, V, R0, R1)                   \
+       UFUNRC(N, T, OP, I, V, R0, R1)
+
+#define UFUN1(N, T, OP, I, V, R0, R1)                  \
+       UFUN2(N, T, OP, I, V, R0, R1)                   \
+       UFUN2(N, T, OP, I, V, R1, R0)
+
+#if __ia64__
+#  define UFUN(N, T, OP, I, V)                         \
+       UFUN2(N, T, OP, I, V, f0, f1)
+#else
+#  define UFUN(N, T, OP, I, V)                         \
+       UFUN1(N, T, OP, I, V, f0, f1)                   \
+       UFUN1(N, T, OP, I, V, f0, f2)                   \
+       UFUN1(N, T, OP, I, V, f0, f3)                   \
+       UFUN1(N, T, OP, I, V, f0, f4)                   \
+       UFUN1(N, T, OP, I, V, f0, f5)
+#endif
+
+.      $( $NaN =  0.0/0.0)
+.      $( $Inf =  1.0/0.0)
+.      $($nInf = -1.0/0.0)
diff --git a/deps/lightning/check/alu_add.ok b/deps/lightning/check/alu_add.ok
new file mode 100644 (file)
index 0000000..9766475
--- /dev/null
@@ -0,0 +1 @@
+ok
diff --git a/deps/lightning/check/alu_add.tst b/deps/lightning/check/alu_add.tst
new file mode 100644 (file)
index 0000000..16cdf38
--- /dev/null
@@ -0,0 +1,47 @@
+#include "alu.inc"
+
+.code
+       prolog
+
+#define ADD(N, I0, I1, V)      ALU(N, , add, I0, I1, V)
+
+       ADD(0,  0x7fffffff, 1,          0x80000000)
+       ADD(1,  1,          0x7fffffff, 0x80000000)
+       ADD(2,  0x80000000, 1,          0x80000001)
+       ADD(3,  1,          0x80000000, 0x80000001)
+       ADD(4,  0x7fffffff, 0x80000000, 0xffffffff)
+       ADD(5,  0x80000000, 0x7fffffff, 0xffffffff)
+       ADD(6,  0x7fffffff, 0,          0x7fffffff)
+       ADD(7,  0,          0x7fffffff, 0x7fffffff)
+#if __WORDSIZE == 32
+       ADD(8,  0x7fffffff, 0xffffffff, 0x7ffffffe)
+       ADD(9,  0xffffffff, 0x7fffffff, 0x7ffffffe)
+       ADD(10, 0xffffffff, 0xffffffff, 0xfffffffe)
+#else
+       ADD(8,  0x7fffffff,         0xffffffff,         0x17ffffffe)
+       ADD(9,  0xffffffff,         0x7fffffff,         0x17ffffffe)
+       ADD(10, 0xffffffff,         0xffffffff,         0x1fffffffe)
+       ADD(11, 0x7fffffffffffffff, 1,                  0x8000000000000000)
+       ADD(12, 1,                  0x7fffffffffffffff, 0x8000000000000000)
+       ADD(13, 0x8000000000000000, 1,                  0x8000000000000001)
+       ADD(14, 1,                  0x8000000000000000, 0x8000000000000001)
+       ADD(15, 0x7fffffffffffffff, 0x8000000000000000, 0xffffffffffffffff)
+       ADD(16, 0x8000000000000000, 0x7fffffffffffffff, 0xffffffffffffffff)
+       ADD(17, 0x7fffffffffffffff, 0xffffffffffffffff, 0x7ffffffffffffffe)
+       ADD(18, 0x7fffffffffffffff, 0x7fffffffffffffff, 0xfffffffffffffffe)
+       ADD(19, 0xffffffffffffffff, 0xffffffffffffffff, 0xfffffffffffffffe)
+#endif
+
+#undef ADD
+#define ADD(N, T, I0, I1, V)   FOP(N, T, add, I0, I1, V)
+       ADD(0, _f,      -0.5,       0.5,        0.0)
+       ADD(1, _f,       0.25,      0.75,       1.0)
+       ADD(0, _d,      -0.5,       0.5,        0.0)
+       ADD(1, _d,       0.25,      0.75,       1.0)
+
+       prepare
+               pushargi ok
+               ellipsis
+       finishi @printf
+       ret
+       epilog
diff --git a/deps/lightning/check/alu_and.ok b/deps/lightning/check/alu_and.ok
new file mode 100644 (file)
index 0000000..9766475
--- /dev/null
@@ -0,0 +1 @@
+ok
diff --git a/deps/lightning/check/alu_and.tst b/deps/lightning/check/alu_and.tst
new file mode 100644 (file)
index 0000000..7474271
--- /dev/null
@@ -0,0 +1,36 @@
+#include "alu.inc"
+
+.code
+       prolog
+
+#define AND(N, I0, I1, V)      ALU(N, , and, I0, I1, V)
+
+       AND(0,  0x7fffffff, 1,          1)
+       AND(1,  1,          0x7fffffff, 1)
+       AND(2,  0x80000000, 1,          0)
+       AND(3,  1,          0x80000000, 0)
+       AND(4,  0x7fffffff, 0x80000000, 0)
+       AND(5,  0x80000000, 0x7fffffff, 0)
+       AND(6,  0x7fffffff, 0xffffffff, 0x7fffffff)
+       AND(7,  0xffffffff, 0x7fffffff, 0x7fffffff)
+       AND(8,  0xffffffff, 0xffffffff, 0xffffffff)
+       AND(9,  0x7fffffff, 0,          0)
+       AND(10, 0,          0x7fffffff, 0)
+#if __WORDSIZE == 64
+       AND(11, 0x7fffffffffffffff, 1,                  1)
+       AND(12, 1,                  0x7fffffffffffffff, 1)
+       AND(13, 0x8000000000000000, 1,                  0)
+       AND(14, 1,                  0x8000000000000000, 0)
+       AND(15, 0x7fffffffffffffff, 0x8000000000000000, 0)
+       AND(16, 0x8000000000000000, 0x7fffffffffffffff, 0)
+       AND(17, 0x7fffffffffffffff, 0xffffffffffffffff, 0x7fffffffffffffff)
+       AND(18, 0xffffffffffffffff, 0x7fffffffffffffff, 0x7fffffffffffffff)
+       AND(19, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff)
+#endif
+
+       prepare
+               pushargi ok
+               ellipsis
+       finishi @printf
+       ret
+       epilog
diff --git a/deps/lightning/check/alu_com.ok b/deps/lightning/check/alu_com.ok
new file mode 100644 (file)
index 0000000..9766475
--- /dev/null
@@ -0,0 +1 @@
+ok
diff --git a/deps/lightning/check/alu_com.tst b/deps/lightning/check/alu_com.tst
new file mode 100644 (file)
index 0000000..581c940
--- /dev/null
@@ -0,0 +1,33 @@
+#include "alu.inc"
+
+.code
+       prolog
+
+#define COM(N, I0, V)          UN(N, com, I0, V)
+
+#if __WORDSIZE == 32
+       COM(0,  0,                      0xffffffff)
+       COM(1,  1,                      0xfffffffe)
+       COM(2,  0xffffffff,             0)
+       COM(3,  0x80000000,             0x7fffffff)
+       COM(4,  0x7fffffff,             0x80000000)
+       COM(5,  0x80000001,             0x7ffffffe)
+#else
+       COM(0,  0,                      0xffffffffffffffff)
+       COM(1,  1,                      0xfffffffffffffffe)
+       COM(2,  0xffffffff,             0xffffffff00000000)
+       COM(3,  0x80000000,             0xffffffff7fffffff)
+       COM(4,  0x7fffffff,             0xffffffff80000000)
+       COM(5,  0x80000001,             0xffffffff7ffffffe)
+       COM(6,  0xffffffffffffffff,     0)
+       COM(7,  0x8000000000000000,     0x7fffffffffffffff)
+       COM(8,  0x7fffffffffffffff,     0x8000000000000000)
+       COM(9,  0x8000000000000001,     0x7ffffffffffffffe)
+#endif
+
+       prepare
+               pushargi ok
+               ellipsis
+       finishi @printf
+       ret
+       epilog
diff --git a/deps/lightning/check/alu_div.ok b/deps/lightning/check/alu_div.ok
new file mode 100644 (file)
index 0000000..9766475
--- /dev/null
@@ -0,0 +1 @@
+ok
diff --git a/deps/lightning/check/alu_div.tst b/deps/lightning/check/alu_div.tst
new file mode 100644 (file)
index 0000000..97e024d
--- /dev/null
@@ -0,0 +1,83 @@
+#include "alu.inc"
+
+.code
+       prolog
+
+#define DIV(N, I0, I1, V)      ALU(N, , div, I0, I1, V)
+#define UDIV(N, I0, I1, V)     ALU(N, _u, div, I0, I1, V)
+
+       DIV(0,  0x7fffffff, 1,          0x7fffffff)
+       DIV(1,  1,          0x7fffffff, 0)
+       DIV(2,  0x80000000, 1,          0x80000000)
+       DIV(3,  1,          0x80000000, 0)
+       DIV(4,  0x7fffffff, 2,          0x3fffffff)
+       DIV(5,  2,          0x7fffffff, 0)
+       DIV(6,  2,          0x80000000, 0)
+       DIV(7,  0x7fffffff, 0x80000000, 0)
+       DIV(8,  0,          0x7fffffff, 0)
+       DIV(9,  0xffffffff, 0xffffffff, 1)
+       UDIV(0, 0x7fffffff, 1,          0x7fffffff)
+       UDIV(1, 1,          0x7fffffff, 0)
+       UDIV(2, 0x80000000, 1,          0x80000000)
+       UDIV(3, 1,          0x80000000, 0)
+       UDIV(4, 0x7fffffff, 2,          0x3fffffff)
+       UDIV(5, 2,          0x7fffffff, 0)
+       UDIV(6, 0x80000000, 2,          0x40000000)
+       UDIV(7, 2,          0x80000000, 0)
+       UDIV(8, 0x7fffffff, 0x80000000, 0)
+       UDIV(9, 0x80000000, 0x7fffffff, 1)
+       UDIV(10,0,          0x7fffffff, 0)
+       UDIV(11,0x7fffffff, 0xffffffff, 0)
+       UDIV(12,0xffffffff, 0x7fffffff, 2)
+       UDIV(13,0xffffffff, 0xffffffff, 1)
+#if __WORDSIZE == 32
+       DIV(10, 0x80000000, 2,          0xc0000000)
+       DIV(11, 0x80000000, 0x7fffffff, 0xffffffff)
+       DIV(12, 0x7fffffff, 0xffffffff, 0x80000001)
+       DIV(13, 0xffffffff, 0x7fffffff, 0)
+#else
+       DIV(10, 0x80000000,         2,                  0x40000000)
+       DIV(11, 0x80000000,         0x7fffffff,         1)
+       DIV(12, 0x7fffffff,         0xffffffff,         0)
+       DIV(13, 0xffffffff,         0x7fffffff,         2)
+       DIV(14, 0x7fffffffffffffff, 1,                  0x7fffffffffffffff)
+       DIV(15, 1,                  0x7fffffffffffffff, 0)
+       DIV(16, 0x8000000000000000, 1,                  0x8000000000000000)
+       DIV(17, 1,                  0x8000000000000000, 0)
+       DIV(18, 0x7fffffffffffffff, 2,                  0x3fffffffffffffff)
+       DIV(19, 2,                  0x7fffffffffffffff, 0)
+       DIV(20, 0x8000000000000000, 2,                  0xc000000000000000)
+       DIV(21, 2,                  0x8000000000000000, 0)
+       DIV(22, 0x7fffffffffffffff, 0x8000000000000000, 0)
+       DIV(23, 0x8000000000000000, 0x7fffffffffffffff, 0xffffffffffffffff)
+       DIV(24, 0x7fffffffffffffff, 0xffffffffffffffff, 0x8000000000000001)
+       DIV(25, 0xffffffffffffffff, 0x7fffffffffffffff, 0)
+       DIV(26, 0xffffffffffffffff, 0xffffffffffffffff, 1)
+       UDIV(14,0x7fffffffffffffff, 1,                  0x7fffffffffffffff)
+       UDIV(15,1,                  0x7fffffffffffffff, 0)
+       UDIV(16,0x8000000000000000, 1,                  0x8000000000000000)
+       UDIV(17,1,                  0x8000000000000000, 0)
+       UDIV(18,0x7fffffffffffffff, 2,                  0x3fffffffffffffff)
+       UDIV(19,2,                  0x7fffffffffffffff, 0)
+       UDIV(20,0x8000000000000000, 2,                  0x4000000000000000)
+       UDIV(21,2,                  0x8000000000000000, 0)
+       UDIV(22,0x7fffffffffffffff, 0x8000000000000000, 0)
+       UDIV(23,0x8000000000000000, 0x7fffffffffffffff, 1)
+       UDIV(24,0x7fffffffffffffff, 0xffffffffffffffff, 0)
+       UDIV(25,0xffffffffffffffff, 0x7fffffffffffffff, 2)
+       UDIV(26,0xffffffffffffffff, 0xffffffffffffffff, 1)
+#endif
+
+#undef DIV
+#define DIV(N, T, I0, I1, V)   FOP(N, T, div, I0, I1, V)
+       DIV(0, _f,      -0.5,       0.5,        -1.0)
+       DIV(1, _f,       1.25,      0.5,         2.5)
+       DIV(0, _d,      -0.5,       0.5,        -1.0)
+       DIV(1, _d,       1.25,      0.5,         2.5)
+
+       prepare
+               pushargi ok
+               ellipsis
+       finishi @printf
+       ret
+       epilog
diff --git a/deps/lightning/check/alu_lsh.ok b/deps/lightning/check/alu_lsh.ok
new file mode 100644 (file)
index 0000000..9766475
--- /dev/null
@@ -0,0 +1 @@
+ok
diff --git a/deps/lightning/check/alu_lsh.tst b/deps/lightning/check/alu_lsh.tst
new file mode 100644 (file)
index 0000000..c05fda0
--- /dev/null
@@ -0,0 +1,57 @@
+#include "alu.inc"
+
+.code
+       prolog
+
+#define LSH(N, I0, I1, V)      ALU(N, , lsh, I0, I1, V)
+
+       LSH(0,   0x7f,           1,     0xfe)
+       LSH(1,   0x7fff,         2,     0x1fffc)
+       LSH(2,   0x81,          16,     0x810000)
+       LSH(3,   0xff,          15,     0x7f8000)
+       LSH(4,   0x7fffffff,     0,     0x7fffffff)
+#if __WORDSIZE == 32
+       LSH(5,   0xffffffff,    8,      0xffffff00)
+       LSH(6,   0x7fffffff,    3,      0xfffffff8)
+       LSH(7,  -0x7f,          31,     0x80000000)
+       LSH(8,  -0x7fff,        30,     0x40000000)
+       LSH(9,  -0x7fffffff,    29,     0x20000000)
+       LSH(10,  0x80000001,    28,     0x10000000)
+       LSH(11,  0x8001,        17,     0x20000)
+       LSH(12,  0x80000001,    18,     0x40000)
+       LSH(13, -0xffff,        24,     0x1000000)
+#else
+       LSH(5,   0xffffffff,     8,     0xffffffff00)
+       LSH(6,   0x7fffffff,     3,     0x3fffffff8)
+       LSH(7,  -0x7f,          31,     0xffffffc080000000)
+       LSH(8,  -0x7fff,        30,     0xffffe00040000000)
+       LSH(9,  -0x7fffffff,    29,     0xf000000020000000)
+       LSH(10,  0x80000001,    28,     0x800000010000000)
+       LSH(11,  0x8001,        17,     0x100020000)
+       LSH(12,  0x80000001,    18,     0x2000000040000)
+       LSH(13, -0xffff,        24,     0xffffff0001000000)
+       LSH(14,  0x7f,          33,     0xfe00000000)
+       LSH(15,  0x7ffff,       34,     0x1ffffc00000000)
+       LSH(16,  0x7fffffff,    35,     0xfffffff800000000)
+       LSH(17, -0x7f,          63,     0x8000000000000000)
+       LSH(18, -0x7fff,        62,     0x4000000000000000)
+       LSH(19, -0x7fffffff,    61,     0x2000000000000000)
+       LSH(20,  0x80000001,    60,     0x1000000000000000)
+       LSH(21,  0x81,          48,     0x81000000000000)
+       LSH(22,  0x8001,        49,     0x2000000000000)
+       LSH(23,  0x80000001,    40,     0x10000000000)
+       LSH(24,  0xff,          47,     0x7f800000000000)
+       LSH(25,  0xffff0001,    56,     0x100000000000000)
+       LSH(26,  0xffffffff,    40,     0xffffff0000000000)
+       LSH(27,  0x7fffffffff,  33,     0xfffffffe00000000)
+       LSH(28, -0x7fffffffff,  63,     0x8000000000000000)
+       LSH(29,  0x8000000001,  48,     0x1000000000000)
+       LSH(30,  0xffffffffff,  47,     0xffff800000000000)
+#endif
+
+       prepare
+               pushargi ok
+               ellipsis
+       finishi @printf
+       ret
+       epilog
diff --git a/deps/lightning/check/alu_mul.ok b/deps/lightning/check/alu_mul.ok
new file mode 100644 (file)
index 0000000..9766475
--- /dev/null
@@ -0,0 +1 @@
+ok
diff --git a/deps/lightning/check/alu_mul.tst b/deps/lightning/check/alu_mul.tst
new file mode 100644 (file)
index 0000000..748417c
--- /dev/null
@@ -0,0 +1,59 @@
+#include "alu.inc"
+
+.code
+       prolog
+
+#define MUL(N, I0, I1, V)      ALU(N, , mul, I0, I1, V)
+
+       MUL(0,  0x7fffffff, 1,          0x7fffffff)
+       MUL(1,  1,          0x7fffffff, 0x7fffffff)
+       MUL(2,  0x80000000, 1,          0x80000000)
+       MUL(3,  1,          0x80000000, 0x80000000)
+       MUL(4,  0x7fffffff, 2,          0xfffffffe)
+       MUL(5,  2,          0x7fffffff, 0xfffffffe)
+       MUL(6,  0x7fffffff, 0,          0)
+       MUL(7,  0,          0x7fffffff, 0)
+#if __WORDSIZE == 32
+       MUL(8,  0x80000000, 2,          0)
+       MUL(9,  2,          0x80000000, 0)
+       MUL(10, 0x7fffffff, 0x80000000, 0x80000000)
+       MUL(11, 0x80000000, 0x7fffffff, 0x80000000)
+       MUL(12, 0x7fffffff, 0xffffffff, 0x80000001)
+       MUL(13, 0xffffffff, 0x7fffffff, 0x80000001)
+       MUL(14, 0xffffffff, 0xffffffff, 1)
+#else
+       MUL(8,  0x80000000,         2,                  0x100000000)
+       MUL(9,  2,                  0x80000000,         0x100000000)
+       MUL(10, 0x7fffffff,         0x80000000,         0x3fffffff80000000)
+       MUL(11, 0x80000000,         0x7fffffff,         0x3fffffff80000000)
+       MUL(12, 0x7fffffff,         0xffffffff,         0x7ffffffe80000001)
+       MUL(13, 0xffffffff,         0x7fffffff,         0x7ffffffe80000001)
+       MUL(14, 0xffffffff,         0xffffffff,         0xfffffffe00000001)
+       MUL(15, 0x7fffffffffffffff, 1,                  0x7fffffffffffffff)
+       MUL(16, 1,                  0x7fffffffffffffff, 0x7fffffffffffffff)
+       MUL(17, 0x8000000000000000, 1,                  0x8000000000000000)
+       MUL(18, 1,                  0x8000000000000000, 0x8000000000000000)
+       MUL(19, 0x7fffffffffffffff, 2,                  0xfffffffffffffffe)
+       MUL(20, 2,                  0x7fffffffffffffff, 0xfffffffffffffffe)
+       MUL(21, 0x8000000000000000, 2,                  0)
+       MUL(22, 2,                  0x8000000000000000, 0)
+       MUL(23, 0x7fffffffffffffff, 0x8000000000000000, 0x8000000000000000)
+       MUL(24, 0x8000000000000000, 0x7fffffffffffffff, 0x8000000000000000)
+       MUL(25, 0x7fffffffffffffff, 0xffffffffffffffff, 0x8000000000000001)
+       MUL(26, 0xffffffffffffffff, 0x7fffffffffffffff, 0x8000000000000001)
+       MUL(27, 0xffffffffffffffff, 0xffffffffffffffff, 1)
+#endif
+
+#undef MUL
+#define MUL(N, T, I0, I1, V)   FOP(N, T, mul, I0, I1, V)
+       MUL(0, _f,      -0.5,       0.5,        -0.25)
+       MUL(1, _f,       0.25,      0.75,        0.1875)
+       MUL(0, _d,      -0.5,       0.5,        -0.25)
+       MUL(1, _d,       0.25,      0.75,        0.1875)
+
+       prepare
+               pushargi ok
+               ellipsis
+       finishi @printf
+       ret
+       epilog
diff --git a/deps/lightning/check/alu_neg.ok b/deps/lightning/check/alu_neg.ok
new file mode 100644 (file)
index 0000000..9766475
--- /dev/null
@@ -0,0 +1 @@
+ok
diff --git a/deps/lightning/check/alu_neg.tst b/deps/lightning/check/alu_neg.tst
new file mode 100644 (file)
index 0000000..3264d13
--- /dev/null
@@ -0,0 +1,42 @@
+#include "alu.inc"
+
+.code
+       prolog
+
+#define NEG(N, I, V)                   UN(N, neg, I, V)
+
+       NEG(0,  0,                      0)
+#if __WORDSIZE == 32
+       NEG(1,  1,                      0xffffffff)
+       NEG(2,  0xffffffff,             1)
+       NEG(3,  0x80000000,             0x80000000)
+       NEG(4,  0x7fffffff,             0x80000001)
+       NEG(5,  0x80000001,             0x7fffffff)
+#else
+       NEG(1,  1,                      0xffffffffffffffff)
+       NEG(2,  0xffffffff,             0xffffffff00000001)
+       NEG(3,  0x80000000,             0xffffffff80000000)
+       NEG(4,  0x7fffffff,             0xffffffff80000001)
+       NEG(5,  0x80000001,             0xffffffff7fffffff)
+       NEG(6,  0xffffffffffffffff,     1)
+       NEG(7,  0x8000000000000000,     0x8000000000000000)
+       NEG(8,  0x7fffffffffffffff,     0x8000000000000001)
+#endif
+
+#undef NEG
+#define NEG(N, T, I, V)                        FUN(N, T, neg, I, V)
+       NEG(0, _f,      0.0,            -0.0)
+       NEG(1, _f,      0.5,            -0.5)
+       NEG(2, _f,      $(1 / 0.0),     $(-1.0 / 0))
+       NEG(3, _f,      -1.25,           1.25)
+       NEG(0, _d,       0.0,           -0.0)
+       NEG(1, _d,       0.5,           -0.5)
+       NEG(2, _d,       $(1.0 / 0),    $(-1 / 0.0))
+       NEG(3, _d,      -1.25,           1.25)
+
+       prepare
+               pushargi ok
+               ellipsis
+       finishi @printf
+       ret
+       epilog
diff --git a/deps/lightning/check/alu_or.ok b/deps/lightning/check/alu_or.ok
new file mode 100644 (file)
index 0000000..9766475
--- /dev/null
@@ -0,0 +1 @@
+ok
diff --git a/deps/lightning/check/alu_or.tst b/deps/lightning/check/alu_or.tst
new file mode 100644 (file)
index 0000000..1e55a86
--- /dev/null
@@ -0,0 +1,36 @@
+#include "alu.inc"
+
+.code
+       prolog
+
+#define OR(N, I0, I1, V)       ALU(N, , or, I0, I1, V)
+
+       OR(0,   0x7fffffff, 1,          0x7fffffff)
+       OR(1,   1,          0x7fffffff, 0x7fffffff)
+       OR(2,   0x80000000, 1,          0x80000001)
+       OR(3,   1,          0x80000000, 0x80000001)
+       OR(4,   0x7fffffff, 0x80000000, 0xffffffff)
+       OR(5,   0x80000000, 0x7fffffff, 0xffffffff)
+       OR(6,   0x7fffffff, 0xffffffff, 0xffffffff)
+       OR(7,   0xffffffff, 0x7fffffff, 0xffffffff)
+       OR(8,   0xffffffff, 0xffffffff, 0xffffffff)
+       OR(9,   0x7fffffff, 0,          0x7fffffff)
+       OR(10,  0,          0x7fffffff, 0x7fffffff)
+#if __WORDSIZE == 64
+       OR(11,  0x7fffffffffffffff, 1,                  0x7fffffffffffffff)
+       OR(12,  1,                  0x7fffffffffffffff, 0x7fffffffffffffff)
+       OR(13,  0x8000000000000000, 1,                  0x8000000000000001)
+       OR(14,  1,                  0x8000000000000000, 0x8000000000000001)
+       OR(15,  0x7fffffffffffffff, 0x8000000000000000, 0xffffffffffffffff)
+       OR(16,  0x8000000000000000, 0x7fffffffffffffff, 0xffffffffffffffff)
+       OR(17,  0x7fffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff)
+       OR(18,  0xffffffffffffffff, 0x7fffffffffffffff, 0xffffffffffffffff)
+       OR(19,  0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff)
+#endif
+
+       prepare
+               pushargi ok
+               ellipsis
+       finishi @printf
+       ret
+       epilog
diff --git a/deps/lightning/check/alu_rem.ok b/deps/lightning/check/alu_rem.ok
new file mode 100644 (file)
index 0000000..9766475
--- /dev/null
@@ -0,0 +1 @@
+ok
diff --git a/deps/lightning/check/alu_rem.tst b/deps/lightning/check/alu_rem.tst
new file mode 100644 (file)
index 0000000..5aea7cf
--- /dev/null
@@ -0,0 +1,76 @@
+#include "alu.inc"
+
+.code
+       prolog
+
+#define REM(N, I0, I1, V)      ALU(N, , rem, I0, I1, V)
+#define UREM(N, I0, I1, V)     ALU(N, _u, rem, I0, I1, V)
+
+       REM(0,  0x7fffffff, 1,          0)
+       REM(1,  1,          0x7fffffff, 1)
+       REM(2,  0x80000000, 1,          0)
+       REM(3,  1,          0x80000000, 1)
+       REM(4,  0x7fffffff, 2,          1)
+       REM(5,  2,          0x7fffffff, 2)
+       REM(6,  0x80000000, 2,          0)
+       REM(7,  2,          0x80000000, 2)
+       REM(8,  0x7fffffff, 0x80000000, 0x7fffffff)
+       REM(9,  0,          0x7fffffff, 0)
+       REM(10, 0xffffffff, 0xffffffff, 0)
+       UREM(0, 0x7fffffff, 1,          0)
+       UREM(1, 1,          0x7fffffff, 1)
+       UREM(2, 0x80000000, 1,          0)
+       UREM(3, 1,          0x80000000, 1)
+       UREM(4, 0x7fffffff, 2,          1)
+       UREM(5, 2,          0x7fffffff, 2)
+       UREM(6, 0x80000000, 2,          0)
+       UREM(7, 2,          0x80000000, 2)
+       UREM(8, 0x7fffffff, 0x80000000, 0x7fffffff)
+       UREM(9, 0x80000000, 0x7fffffff, 1)
+       UREM(10,0,          0x7fffffff, 0)
+       UREM(11,0x7fffffff, 0xffffffff, 0x7fffffff)
+       UREM(12,0xffffffff, 0x7fffffff, 1)
+       UREM(13,0xffffffff, 0xffffffff, 0)
+
+#if __WORDSIZE == 32
+       REM(11, 0x80000000, 0x7fffffff, 0xffffffff)
+       REM(12, 0x7fffffff, 0xffffffff, 0)
+       REM(13, 0xffffffff, 0x7fffffff, 0xffffffff)
+#else
+       REM(11, 0x80000000,         0x7fffffff,         1)
+       REM(12, 0x7fffffff,         0xffffffff,         0x7fffffff)
+       REM(13, 0xffffffff,         0x7fffffff,         1)
+       REM(14, 0x7fffffffffffffff, 1,                  0)
+       REM(15, 1,                  0x7fffffffffffffff, 1)
+       REM(16, 0x8000000000000000, 1,                  0)
+       REM(17, 1,                  0x8000000000000000, 1)
+       REM(18, 0x7fffffffffffffff, 2,                  1)
+       REM(19, 2,                  0x7fffffffffffffff, 2)
+       REM(20, 0x8000000000000000, 2,                  0)
+       REM(21, 2,                  0x8000000000000000, 2)
+       REM(22, 0x7fffffffffffffff, 0x8000000000000000, 0x7fffffffffffffff)
+       REM(23, 0x8000000000000000, 0x7fffffffffffffff, 0xffffffffffffffff)
+       REM(24, 0x7fffffffffffffff, 0xffffffffffffffff, 0)
+       REM(25, 0xffffffffffffffff, 0x7fffffffffffffff, 0xffffffffffffffff)
+       REM(26, 0xffffffffffffffff, 0xffffffffffffffff, 0)
+       UREM(14,0x7fffffffffffffff, 1,                  0)
+       UREM(15,1,                  0x7fffffffffffffff, 1)
+       UREM(16,0x8000000000000000, 1,                  0)
+       UREM(17,1,                  0x8000000000000000, 1)
+       UREM(18,0x7fffffffffffffff, 2,                  1)
+       UREM(19,2,                  0x7fffffffffffffff, 2)
+       UREM(20,0x8000000000000000, 2,                  0)
+       UREM(21,2,                  0x8000000000000000, 2)
+       UREM(22,0x7fffffffffffffff, 0x8000000000000000, 0x7fffffffffffffff)
+       UREM(23,0x8000000000000000, 0x7fffffffffffffff, 1)
+       UREM(24,0x7fffffffffffffff, 0xffffffffffffffff, 0x7fffffffffffffff)
+       UREM(25,0xffffffffffffffff, 0x7fffffffffffffff, 1)
+       UREM(26,0xffffffffffffffff, 0xffffffffffffffff, 0)
+#endif
+
+       prepare
+               pushargi ok
+               ellipsis
+       finishi @printf
+       ret
+       epilog
diff --git a/deps/lightning/check/alu_rsb.ok b/deps/lightning/check/alu_rsb.ok
new file mode 100644 (file)
index 0000000..9766475
--- /dev/null
@@ -0,0 +1 @@
+ok
diff --git a/deps/lightning/check/alu_rsb.tst b/deps/lightning/check/alu_rsb.tst
new file mode 100644 (file)
index 0000000..00e08c2
--- /dev/null
@@ -0,0 +1,49 @@
+#include "alu.inc"
+
+.code
+       prolog
+
+#define RSB(N, I0, I1, V)      ALU(N, , rsb, I0, I1, V)
+
+       RSB(0,  1,          0x7fffffff, 0x7ffffffe)
+       RSB(2,  1,          0x80000000, 0x7fffffff)
+       RSB(3,  0x7fffffff, 0x80000000, 1)
+       RSB(4,  0xffffffff, 0xffffffff, 0)
+       RSB(5,  0x7fffffff, 0xffffffff, 0x80000000)
+       RSB(6,  0, 0x7fffffff,          0x7fffffff)
+#if __WORDSIZE == 32
+       RSB(7,  0x7fffffff,          1, 0x80000002)
+       RSB(8,  0x80000000,          1, 0x80000001)
+       RSB(9,  0x80000000, 0x7fffffff, 0xffffffff)
+       RSB(10, 0xffffffff, 0x7fffffff, 0x80000000)
+       RSB(11, 0x7fffffff,          0, 0x80000001)
+#else
+       RSB(7,          0x7fffffff,                 1,  0xffffffff80000002)
+       RSB(8,  0xffffffff80000000,                 1,          0x80000001)
+       RSB(9,  0xffffffff80000000,        0x7fffffff,          0xffffffff)
+       RSB(10, 0xffffffffffffffff, 0xffffffff7fffffff, 0xffffffff80000000)
+       RSB(11,         0x7fffffff,                  0, 0xffffffff80000001)
+       RSB(12, 1,                  0x7fffffffffffffff, 0x7ffffffffffffffe)
+       RSB(13, 0x7fffffffffffffff,                  1, 0x8000000000000002)
+       RSB(14,                  1, 0x8000000000000000, 0x7fffffffffffffff)
+       RSB(15, 0x8000000000000000,                  1, 0x8000000000000001)
+       RSB(16, 0x8000000000000000, 0x7fffffffffffffff, 0xffffffffffffffff)
+       RSB(17, 0x7fffffffffffffff, 0x8000000000000000, 1)
+       RSB(18, 0xffffffffffffffff, 0x7fffffffffffffff, 0x8000000000000000)
+       RSB(19, 0x7fffffffffffffff, 0xffffffffffffffff, 0x8000000000000000)
+       RSB(20, 0xffffffffffffffff, 0xffffffffffffffff, 0)
+#endif
+
+#undef RSB
+#define RSB(N, T, I0, I1, V)   FOP(N, T, rsb, I0, I1, V)
+       RSB(0, _f,          0.5,        -0.5,   -1.0)
+       RSB(1, _f,          0.75,        0.25,  -0.5)
+       RSB(0, _d,          0.5,        -0.5,   -1.0)
+       RSB(1, _d,          0.75,        0.25,  -0.5)
+
+       prepare
+               pushargi ok
+               ellipsis
+       finishi @printf
+       ret
+       epilog
diff --git a/deps/lightning/check/alu_rsh.ok b/deps/lightning/check/alu_rsh.ok
new file mode 100644 (file)
index 0000000..9766475
--- /dev/null
@@ -0,0 +1 @@
+ok
diff --git a/deps/lightning/check/alu_rsh.tst b/deps/lightning/check/alu_rsh.tst
new file mode 100644 (file)
index 0000000..93f8c7b
--- /dev/null
@@ -0,0 +1,85 @@
+#include "alu.inc"
+
+.code
+       prolog
+
+#define RSH(N, I0, I1, V)      ALU(N, , rsh, I0, I1, V)
+#define URSH(N, I0, I1, V)     ALU(N, _u, rsh, I0, I1, V)
+
+       RSH(0,  0xfe,                1,         0x7f)
+       RSH(1,  0x1fffc,             2,         0x7fff)
+       RSH(2,  0x40000000,         30,         1)
+       RSH(3,  0x20000000,         29,         1)
+       RSH(4,  0x10000000,         28,         1)
+       RSH(5,  0x810000,           16,         0x81)
+       RSH(6,  0x20000,            17,         1)
+       RSH(7,  0x40000,            18,         1)
+       RSH(8,  0x7f8000,           15,         0xff)
+       RSH(9,  0x1000000,          24,         1)
+       RSH(10, 0x7fffffff,          0,         0x7fffffff)
+       URSH(0, 0xfe,                1,         0x7f)
+       URSH(1, 0x1fffc,             2,         0x7fff)
+       URSH(2, 0x80000000,         31,         1)
+       URSH(3, 0x40000000,         30,         1)
+       URSH(4, 0x20000000,         29,         1)
+       URSH(5, 0x10000000,         28,         1)
+       URSH(6, 0x810000,           16,         0x81)
+       URSH(7, 0x20000,            17,         1)
+       URSH(8, 0x40000,            18,         1)
+       URSH(9,0x7f8000,            15,         0xff)
+       URSH(10,0x1000000,          24,         1)
+       URSH(11,0xffffff00,          8,         0xffffff)
+       URSH(12,0x7fffffff,          0,         0x7fffffff)
+#if __WORDSIZE == 32
+       RSH(11, 0xfffffff8,          3,         0xffffffff)
+       RSH(12, 0x80000000,         31,         0xffffffff)
+       RSH(13, 0xffffff00,          8,         0xffffffff)
+       URSH(13,0xfffffff8,          3,         0x1fffffff)
+#else
+       RSH(11, 0x3fffffff8,         3,         0x7fffffff)
+       RSH(12, 0xffffffc080000000, 31,         0xffffffffffffff81)
+       RSH(13, 0xffffff00,          8,         0xffffff)
+       RSH(14, 0xfe00000000,       33,         0x7f)
+       RSH(15, 0x1ffffc00000000,   34,         0x7ffff)
+       RSH(16, 0xfffffff800000000, 29,         0xffffffffffffffc0)
+       RSH(17, 0x8000000000000000, 63,         0xffffffffffffffff)
+       RSH(18, 0x4000000000000000, 62,         1)
+       RSH(19, 0x2000000000000000, 61,         1)
+       RSH(20, 0x1000000000000000, 60,         1)
+       RSH(21, 0x81000000000000,   48,         0x81)
+       RSH(22, 0x2000000000000,    49,         1)
+       RSH(23, 0x10000000000,      40,         1)
+       RSH(24, 0x7f800000000000,   47,         0xff)
+       RSH(25, 0x100000000000000,  56,         1)
+       RSH(26, 0xffffff0000000000, 40,         0xffffffffffffffff)
+       RSH(27, 0xfffffffe00000000, 33,         0xffffffffffffffff)
+       RSH(28, 0x8000000000000001, 63,         0xffffffffffffffff)
+       RSH(29, 0x1000000000000,    48,         1)
+       RSH(30, 0xffff800000000000, 47,         0xffffffffffffffff)
+       URSH(13,0x3fffffff8,        3,          0x7fffffff)
+       URSH(14,0xffffffc080000000, 31,         0x1ffffff81)
+       URSH(15,0xfe00000000,       33,         0x7f)
+       URSH(16,0x1ffffc00000000,   34,         0x7ffff)
+       URSH(17,0xfffffff800000000, 29,         0x7ffffffc0)
+       URSH(18,0x8000000000000000, 63,         1)
+       URSH(19,0x4000000000000000, 62,         1)
+       URSH(20,0x2000000000000000, 61,         1)
+       URSH(21,0x1000000000000000, 60,         1)
+       URSH(22,0x81000000000000,   48,         0x81)
+       URSH(23,0x2000000000000,    49,         1)
+       URSH(24,0x10000000000,      40,         1)
+       URSH(25,0x7f800000000000,   47,         0xff)
+       URSH(26,0x100000000000000,  56,         1)
+       URSH(27,0xffffff0000000000, 40,         0xffffff)
+       URSH(28,0xfffffffe00000000, 33,         0x7fffffff)
+       URSH(29,0x8000000000000001, 63,         1)
+       URSH(30,0x1000000000000,    48,         1)
+       URSH(31,0xffff800000000000, 47,         0x1ffff)
+#endif
+
+       prepare
+               pushargi ok
+               ellipsis
+       finishi @printf
+       ret
+       epilog
diff --git a/deps/lightning/check/alu_sub.ok b/deps/lightning/check/alu_sub.ok
new file mode 100644 (file)
index 0000000..9766475
--- /dev/null
@@ -0,0 +1 @@
+ok
diff --git a/deps/lightning/check/alu_sub.tst b/deps/lightning/check/alu_sub.tst
new file mode 100644 (file)
index 0000000..8f07b62
--- /dev/null
@@ -0,0 +1,49 @@
+#include "alu.inc"
+
+.code
+       prolog
+
+#define SUB(N, I0, I1, V)      ALU(N, , sub, I0, I1, V)
+
+       SUB(0,  0x7fffffff, 1,          0x7ffffffe)
+       SUB(2,  0x80000000, 1,          0x7fffffff)
+       SUB(3,  0x80000000, 0x7fffffff, 1)
+       SUB(4,  0xffffffff, 0xffffffff, 0)
+       SUB(5,  0xffffffff, 0x7fffffff, 0x80000000)
+       SUB(6,  0x7fffffff, 0,          0x7fffffff)
+#if __WORDSIZE == 32
+       SUB(7,  1,          0x7fffffff, 0x80000002)
+       SUB(8,  1,          0x80000000, 0x80000001)
+       SUB(9,  0x7fffffff, 0x80000000, 0xffffffff)
+       SUB(10, 0x7fffffff, 0xffffffff, 0x80000000)
+       SUB(11, 0,          0x7fffffff, 0x80000001)
+#else
+       SUB(7,  1,                  0x7fffffff,         0xffffffff80000002)
+       SUB(8,  1,                  0xffffffff80000000,         0x80000001)
+       SUB(9,  0x7fffffff,         0xffffffff80000000,         0xffffffff)
+       SUB(10, 0xffffffff7fffffff, 0xffffffffffffffff, 0xffffffff80000000)
+       SUB(11, 0,                  0x7fffffff,         0xffffffff80000001)
+       SUB(12, 0x7fffffffffffffff, 1,                  0x7ffffffffffffffe)
+       SUB(13, 1,                  0x7fffffffffffffff, 0x8000000000000002)
+       SUB(14, 0x8000000000000000, 1,                  0x7fffffffffffffff)
+       SUB(15, 1,                  0x8000000000000000, 0x8000000000000001)
+       SUB(16, 0x7fffffffffffffff, 0x8000000000000000, 0xffffffffffffffff)
+       SUB(17, 0x8000000000000000, 0x7fffffffffffffff, 1)
+       SUB(18, 0x7fffffffffffffff, 0xffffffffffffffff, 0x8000000000000000)
+       SUB(19, 0xffffffffffffffff, 0x7fffffffffffffff, 0x8000000000000000)
+       SUB(20, 0xffffffffffffffff, 0xffffffffffffffff, 0)
+#endif
+
+#undef SUB
+#define SUB(N, T, I0, I1, V)   FOP(N, T, sub, I0, I1, V)
+       SUB(0, _f,      -0.5,       0.5,        -1.0)
+       SUB(1, _f,       0.25,      0.75,       -0.5)
+       SUB(0, _d,      -0.5,       0.5,        -1.0)
+       SUB(1, _d,       0.25,      0.75,       -0.5)
+
+       prepare
+               pushargi ok
+               ellipsis
+       finishi @printf
+       ret
+       epilog
diff --git a/deps/lightning/check/alu_xor.ok b/deps/lightning/check/alu_xor.ok
new file mode 100644 (file)
index 0000000..9766475
--- /dev/null
@@ -0,0 +1 @@
+ok
diff --git a/deps/lightning/check/alu_xor.tst b/deps/lightning/check/alu_xor.tst
new file mode 100644 (file)
index 0000000..d1976ab
--- /dev/null
@@ -0,0 +1,36 @@
+#include "alu.inc"
+
+.code
+       prolog
+
+#define XOR(N, I0, I1, V)      ALU(N, , xor, I0, I1, V)
+
+       XOR(0,  0x7fffffff,         1,                  0x7ffffffe)
+       XOR(1,  1,                  0x7fffffff,         0x7ffffffe)
+       XOR(2,  0x80000000,         1,                  0x80000001)
+       XOR(3,  1,                  0x80000000,         0x80000001)
+       XOR(4,  0x7fffffff,         0x80000000,         0xffffffff)
+       XOR(5,  0x80000000,         0x7fffffff,         0xffffffff)
+       XOR(6,  0x7fffffff,         0xffffffff,         0x80000000)
+       XOR(7,  0xffffffff,         0x7fffffff,         0x80000000)
+       XOR(9,  0xffffffff,         0xffffffff,         0)
+       XOR(10, 0x7fffffff,         0,                  0x7fffffff)
+       XOR(11, 0,                  0x7fffffff,         0x7fffffff)
+#if __WORDSIZE == 64
+       XOR(12, 0x7fffffffffffffff, 1,                  0x7ffffffffffffffe)
+       XOR(13, 1,                  0x7fffffffffffffff, 0x7ffffffffffffffe)
+       XOR(14, 0x8000000000000000, 1,                  0x8000000000000001)
+       XOR(15, 1,                  0x8000000000000000, 0x8000000000000001)
+       XOR(16, 0x7fffffffffffffff, 0x8000000000000000, 0xffffffffffffffff)
+       XOR(17, 0x8000000000000000, 0x7fffffffffffffff, 0xffffffffffffffff)
+       XOR(18, 0x7fffffffffffffff, 0xffffffffffffffff, 0x8000000000000000)
+       XOR(19, 0xffffffffffffffff, 0x7fffffffffffffff, 0x8000000000000000)
+       XOR(20, 0xffffffffffffffff, 0xffffffffffffffff, 0)
+#endif
+
+       prepare
+               pushargi ok
+               ellipsis
+       finishi @printf
+       ret
+       epilog
diff --git a/deps/lightning/check/alux_add.ok b/deps/lightning/check/alux_add.ok
new file mode 100644 (file)
index 0000000..9766475
--- /dev/null
@@ -0,0 +1 @@
+ok
diff --git a/deps/lightning/check/alux_add.tst b/deps/lightning/check/alux_add.tst
new file mode 100644 (file)
index 0000000..ddc4e57
--- /dev/null
@@ -0,0 +1,49 @@
+#include "alu.inc"
+
+.code
+       prolog
+
+#define ADDX(N, I0, I1, V)     ALUX(N, add, I0, I1, V)
+
+       /* nothing */
+       ADDX(0, 0,              0,              0)
+#if __WORDSIZE == 32
+       /* carry */
+       ADDX(1, 0xffffffff,     0xffffffff,     1)
+       /* overflow */
+       ADDX(2, 0x7fffffff,     1,              0)
+       /* overflow */
+       ADDX(3, 0x7fffffff,     0x7fffffff,     0)
+       /* carry */
+       ADDX(4, 0x7fffffff,     0x80000000,     0)
+       /* carry+overflow */
+       ADDX(5, 0x80000000,     0x80000000,     1)
+#else
+       /* nothing */
+       ADDX(1, 0xffffffff,             0xffffffff,             0)
+       /* nothing */
+       ADDX(2, 0x7fffffff,             1,                      0)
+       /* nothing */
+       ADDX(3, 0x7fffffff,             0x7fffffff,             0)
+       /* nothing */
+       ADDX(4, 0x7fffffff,             0x80000000,             0)
+       /* nothing */
+       ADDX(5, 0x80000000,             0x80000000,             0)
+       /* carry */
+       ADDX(6, 0xffffffffffffffff,     0xffffffffffffffff,     1)
+       /* overflow */
+       ADDX(7, 0x7fffffffffffffff,     1,                      0)
+       /* overflow */
+       ADDX(8, 0x7fffffffffffffff,     0x7fffffffffffffff,     0)
+       /* overflow */
+       ADDX(9, 0x7fffffffffffffff,     0x8000000000000000,     0)
+       /* carry+overflow */
+       ADDX(10,0x8000000000000000,     0x8000000000000000,     1)
+#endif
+
+       prepare
+               pushargi ok
+               ellipsis
+       finishi @printf
+       ret
+       epilog
diff --git a/deps/lightning/check/alux_sub.ok b/deps/lightning/check/alux_sub.ok
new file mode 100644 (file)
index 0000000..9766475
--- /dev/null
@@ -0,0 +1 @@
+ok
diff --git a/deps/lightning/check/alux_sub.tst b/deps/lightning/check/alux_sub.tst
new file mode 100644 (file)
index 0000000..8a2838d
--- /dev/null
@@ -0,0 +1,49 @@
+#include "alu.inc"
+
+.code
+       prolog
+
+#define SUBX(N, I0, I1, V)     ALUX(N, sub, I0, I1, V)
+
+       /* nothing */
+       SUBX(0,  0,             0,              0)
+#if __WORDSIZE == 32
+       /* carry */
+       SUBX(1, 0x7fffffff,     0xffffffff,     0xffffffff)
+       /* overflow */
+       SUBX(2, 0x80000000,     1,              0)
+       /* carry */
+       SUBX(3, 0x7fffffff,     0x80000000,     0xffffffff)
+       /* overflow */
+       SUBX(4, 0x80000000,     0x7fffffff,     0)
+       /* carry+overflow */
+       SUBX(5, 1,              0x80000000,     0xffffffff)
+#else
+       /* carry */
+       SUBX(1, 0x7fffffff,             0xffffffff,             -1)
+       /* nothing */
+       SUBX(2, 0x80000000,             1,                      0)
+       /* carry */
+       SUBX(3, 0x7fffffff,             0x80000000,             -1)
+       /* nothing */
+       SUBX(4, 0x80000000,             0x7fffffff,             0)
+       /* carry */
+       SUBX(5, 1,                      0x80000000,             -1)
+       /* carry */
+       SUBX(6, 0x7fffffffffffffff,     0xffffffffffffffff,     -1)
+       /* overflow */
+       SUBX(7, 0x8000000000000000,     1,                      0)
+       /* carry */
+       SUBX(8, 0x7fffffffffffffff,     0x8000000000000000,     -1)
+       /* overflow */
+       SUBX(9, 0x8000000000000000,     0x7fffffffffffffff,     0)
+       /* carry+overflow */
+       SUBX(10,1,                      0x8000000000000000,     -1)
+#endif
+
+       prepare
+               pushargi ok
+               ellipsis
+       finishi @printf
+       ret
+       epilog
diff --git a/deps/lightning/check/bp.ok b/deps/lightning/check/bp.ok
new file mode 100644 (file)
index 0000000..7e13ef0
--- /dev/null
@@ -0,0 +1 @@
+nfibs(32) = 2178309
diff --git a/deps/lightning/check/bp.tst b/deps/lightning/check/bp.tst
new file mode 100644 (file)
index 0000000..9e6798d
--- /dev/null
@@ -0,0 +1,46 @@
+.data  32
+fmt:
+.c     "nfibs(%d) = %d\n"
+
+.code
+       jmpi main
+
+       name rfibs
+rfibs:
+       prolog
+       arg $in
+       getarg %r0 $in          /* R0 = N */
+       beqi out %r0 0
+       movr %v0 %r0            /* V0 = R0 */
+       movi %r0 1
+       blei_u out %v0 2
+       subi %v1 %v0 1          /* V1 = N-1 */
+       subi %v2 %v0 2          /* V1 = N-2 */
+       prepare
+               pushargr %v1
+       finishi rfibs
+       retval %v1              /* V1 = rfibs(N-1) */
+       prepare
+               pushargr %v2
+       finishi rfibs
+       retval %r0              /* R0 = rfibs(N-2) */
+       addr %r0 %r0 %v1
+out:
+       retr %r0
+       epilog
+
+       name main
+main:
+       prolog
+       prepare
+               pushargi 32
+       finishi rfibs
+       retval %v0
+       prepare
+               pushargi fmt
+               ellipsis
+               pushargi 32
+               pushargr %v0
+       finishi @printf
+       ret
+       epilog
diff --git a/deps/lightning/check/branch.ok b/deps/lightning/check/branch.ok
new file mode 100644 (file)
index 0000000..9766475
--- /dev/null
@@ -0,0 +1 @@
+ok
diff --git a/deps/lightning/check/branch.tst b/deps/lightning/check/branch.tst
new file mode 100644 (file)
index 0000000..2252dff
--- /dev/null
@@ -0,0 +1,563 @@
+#if __WORDSIZE == 64
+#  define I7f          0x7fffffffffffffff
+#  define I80          0x8000000000000000
+#  define I81          0x8000000000000001
+#  define Iff          0xffffffffffffffff
+#else
+#  define I7f          0x7fffffff
+#  define I80          0x80000000
+#  define I81          0x80000001
+#  define Iff          0xffffffff
+#endif
+
+.data  12
+ok:
+.c     "ok\n"
+.      $($NaN  =  0.0 / 0.0)
+
+#define BOP(N, Ls, Rs, Lu, Ru, R0, R1)         \
+       movi %R0 Ls                             \
+       movi %R1 Rs                             \
+       b##N##r N##r_##R0##_##R1 %R0 %R1        \
+       calli @abort                            \
+N##r_##R0##_##R1:                              \
+       b##N##i N##i_##R0##_##R1 %R0 Rs         \
+       calli @abort                            \
+N##i_##R0##_##R1:                              \
+       movi %R0 Lu                             \
+       movi %R1 Ru                             \
+       b##N##r_u N##r_u_##R0##_##R1 %R0 %R1    \
+       calli @abort                            \
+N##r_u_##R0##_##R1:                            \
+       b##N##i_u N##i_u_##R0##_##R1 %R0 Ru     \
+       calli @abort                            \
+N##i_u_##R0##_##R1:                            \
+       movi %R0 Ls                             \
+       movi %R1 Rs                             \
+       N##r %R0 %R0 %R1                        \
+       beqi _##N##r_##R0##_##R1 %R0 1          \
+       calli @abort                            \
+_##N##r_##R0##_##R1:                           \
+       movi %R0 Ls                             \
+       N##i %R1 %R0 Rs                         \
+       beqi _##N##i_##R0##_##R1 %R1 1          \
+       calli @abort                            \
+_##N##i_##R0##_##R1:                           \
+       movi %R0 Lu                             \
+       movi %R1 Ru                             \
+       N##r_u %R0 %R0 %R1                      \
+       beqi _##N##r_u_##R0##_##R1 %R0 1        \
+       calli @abort                            \
+_##N##r_u_##R0##_##R1:                         \
+       movi %R0 Lu                             \
+       N##i_u %R1 %R0 Ru                       \
+       beqi _##N##i_u_##R0##_##R1 %R1 1        \
+       calli @abort                            \
+_##N##i_u_##R0##_##R1:
+
+#define EB(N, L, R, R0, R1)                    \
+       movi %R0 L                              \
+       movi %R1 R                              \
+       b##N##r N##r_##R0##_##R1 %R0 %R1        \
+       calli @abort                            \
+N##r_##R0##_##R1:                              \
+       b##N##i N##i_##R0##_##R1 %R0 R          \
+       calli @abort                            \
+N##i_##R0##_##R1:                              \
+       movi %R0 L                              \
+       movi %R1 R                              \
+       N##r %R0 %R0 %R1                        \
+       beqi _##N##r_##R0##_##R1 %R0 1          \
+       calli @abort                            \
+_##N##r_##R0##_##R1:                           \
+       movi %R0 L                              \
+       N##i %R1 %R0 R                          \
+       beqi _##N##i_##R0##_##R1 %R1 1          \
+       calli @abort                            \
+_##N##i_##R0##_##R1:
+
+#define XEB(N, L, R, R0, R1)                   \
+       movi %R0 L                              \
+       movi %R1 R                              \
+       b##N##r N##r_##R0##_##R1 %R0 %R1        \
+       calli @abort                            \
+N##r_##R0##_##R1:                              \
+       b##N##i N##i_##R0##_##R1 %R0 R          \
+       calli @abort                            \
+N##i_##R0##_##R1:
+
+#define XBOP(N, Ls, Rs, Lu, Ru, R0, R1)                \
+       movi %R0 Ls                             \
+       movi %R1 Rs                             \
+       b##N##r N##r_##R0##_##R1 %R0 %R1        \
+       calli @abort                            \
+N##r_##R0##_##R1:                              \
+       movi %R0 Ls                             \
+       b##N##i N##i_##R0##_##R1 %R0 Rs         \
+       calli @abort                            \
+N##i_##R0##_##R1:                              \
+       movi %R0 Lu                             \
+       movi %R1 Ru                             \
+       b##N##r_u N##r_u_##R0##_##R1 %R0 %R1    \
+       calli @abort                            \
+N##r_u_##R0##_##R1:                            \
+       movi %R0 Lu                             \
+       b##N##i_u N##i_u_##R0##_##R1 %R0 Ru     \
+       calli @abort                            \
+N##i_u_##R0##_##R1:
+
+#define BOPI(N, Ls, Rs, Lu, Ru)                        \
+       BOP(N, Ls, Rs, Lu, Ru, v0, v1)          \
+       BOP(N, Ls, Rs, Lu, Ru, v0, v2)          \
+       BOP(N, Ls, Rs, Lu, Ru, v0, r0)          \
+       BOP(N, Ls, Rs, Lu, Ru, v0, r1)          \
+       BOP(N, Ls, Rs, Lu, Ru, v0, r2)          \
+       BOP(N, Ls, Rs, Lu, Ru, v1, v0)          \
+       BOP(N, Ls, Rs, Lu, Ru, v1, v2)          \
+       BOP(N, Ls, Rs, Lu, Ru, v1, r0)          \
+       BOP(N, Ls, Rs, Lu, Ru, v1, r1)          \
+       BOP(N, Ls, Rs, Lu, Ru, v1, r2)          \
+       BOP(N, Ls, Rs, Lu, Ru, v2, v0)          \
+       BOP(N, Ls, Rs, Lu, Ru, v2, v1)          \
+       BOP(N, Ls, Rs, Lu, Ru, v2, r0)          \
+       BOP(N, Ls, Rs, Lu, Ru, v2, r1)          \
+       BOP(N, Ls, Rs, Lu, Ru, v2, r2)          \
+       BOP(N, Ls, Rs, Lu, Ru, r0, v0)          \
+       BOP(N, Ls, Rs, Lu, Ru, r0, v1)          \
+       BOP(N, Ls, Rs, Lu, Ru, r0, v2)          \
+       BOP(N, Ls, Rs, Lu, Ru, r0, r1)          \
+       BOP(N, Ls, Rs, Lu, Ru, r0, r2)          \
+       BOP(N, Ls, Rs, Lu, Ru, r1, v0)          \
+       BOP(N, Ls, Rs, Lu, Ru, r1, v1)          \
+       BOP(N, Ls, Rs, Lu, Ru, r1, v2)          \
+       BOP(N, Ls, Rs, Lu, Ru, r1, r0)          \
+       BOP(N, Ls, Rs, Lu, Ru, r1, r2)          \
+       BOP(N, Ls, Rs, Lu, Ru, r2, v0)          \
+       BOP(N, Ls, Rs, Lu, Ru, r2, v1)          \
+       BOP(N, Ls, Rs, Lu, Ru, r2, v2)          \
+       BOP(N, Ls, Rs, Lu, Ru, r2, r0)          \
+       BOP(N, Ls, Rs, Lu, Ru, r2, r1)
+
+#define EBI(N, L, R)                           \
+       EB(N, L, R, v0, v1)                     \
+       EB(N, L, R, v0, v2)                     \
+       EB(N, L, R, v0, r0)                     \
+       EB(N, L, R, v0, r1)                     \
+       EB(N, L, R, v0, r2)                     \
+       EB(N, L, R, v1, v0)                     \
+       EB(N, L, R, v1, v2)                     \
+       EB(N, L, R, v1, r0)                     \
+       EB(N, L, R, v1, r1)                     \
+       EB(N, L, R, v1, r2)                     \
+       EB(N, L, R, v2, v0)                     \
+       EB(N, L, R, v2, v1)                     \
+       EB(N, L, R, v2, r0)                     \
+       EB(N, L, R, v2, r1)                     \
+       EB(N, L, R, v2, r2)                     \
+       EB(N, L, R, r0, v0)                     \
+       EB(N, L, R, r0, v1)                     \
+       EB(N, L, R, r0, v2)                     \
+       EB(N, L, R, r0, r1)                     \
+       EB(N, L, R, r0, r2)                     \
+       EB(N, L, R, r1, v0)                     \
+       EB(N, L, R, r1, v1)                     \
+       EB(N, L, R, r1, v2)                     \
+       EB(N, L, R, r1, r0)                     \
+       EB(N, L, R, r1, r2)                     \
+       EB(N, L, R, r2, v0)                     \
+       EB(N, L, R, r2, v1)                     \
+       EB(N, L, R, r2, v2)                     \
+       EB(N, L, R, r2, r0)                     \
+       EB(N, L, R, r2, r1)
+
+
+#define XEBI(N, L, R)                          \
+       XEB(N, L, R, v0, v1)                    \
+       XEB(N, L, R, v0, v2)                    \
+       XEB(N, L, R, v0, r0)                    \
+       XEB(N, L, R, v0, r1)                    \
+       XEB(N, L, R, v0, r2)                    \
+       XEB(N, L, R, v1, v0)                    \
+       XEB(N, L, R, v1, v2)                    \
+       XEB(N, L, R, v1, r0)                    \
+       XEB(N, L, R, v1, r1)                    \
+       XEB(N, L, R, v1, r2)                    \
+       XEB(N, L, R, v2, v0)                    \
+       XEB(N, L, R, v2, v1)                    \
+       XEB(N, L, R, v2, r0)                    \
+       XEB(N, L, R, v2, r1)                    \
+       XEB(N, L, R, v2, r2)                    \
+       XEB(N, L, R, r0, v0)                    \
+       XEB(N, L, R, r0, v1)                    \
+       XEB(N, L, R, r0, v2)                    \
+       XEB(N, L, R, r0, r1)                    \
+       XEB(N, L, R, r0, r2)                    \
+       XEB(N, L, R, r1, v0)                    \
+       XEB(N, L, R, r1, v1)                    \
+       XEB(N, L, R, r1, v2)                    \
+       XEB(N, L, R, r1, r0)                    \
+       XEB(N, L, R, r1, r2)                    \
+       XEB(N, L, R, r2, v0)                    \
+       XEB(N, L, R, r2, v1)                    \
+       XEB(N, L, R, r2, v2)                    \
+       XEB(N, L, R, r2, r0)                    \
+       XEB(N, L, R, r2, r1)
+
+#define XBOPI(N, Ls, Rs, Lu, Ru)               \
+       XBOP(N, Ls, Rs, Lu, Ru, v0, v1)         \
+       XBOP(N, Ls, Rs, Lu, Ru, v0, v2)         \
+       XBOP(N, Ls, Rs, Lu, Ru, v0, r0)         \
+       XBOP(N, Ls, Rs, Lu, Ru, v0, r1)         \
+       XBOP(N, Ls, Rs, Lu, Ru, v0, r2)         \
+       XBOP(N, Ls, Rs, Lu, Ru, v1, v0)         \
+       XBOP(N, Ls, Rs, Lu, Ru, v1, v2)         \
+       XBOP(N, Ls, Rs, Lu, Ru, v1, r0)         \
+       XBOP(N, Ls, Rs, Lu, Ru, v1, r1)         \
+       XBOP(N, Ls, Rs, Lu, Ru, v1, r2)         \
+       XBOP(N, Ls, Rs, Lu, Ru, v2, v0)         \
+       XBOP(N, Ls, Rs, Lu, Ru, v2, v1)         \
+       XBOP(N, Ls, Rs, Lu, Ru, v2, r0)         \
+       XBOP(N, Ls, Rs, Lu, Ru, v2, r1)         \
+       XBOP(N, Ls, Rs, Lu, Ru, v2, r2)         \
+       XBOP(N, Ls, Rs, Lu, Ru, r0, v0)         \
+       XBOP(N, Ls, Rs, Lu, Ru, r0, v1)         \
+       XBOP(N, Ls, Rs, Lu, Ru, r0, v2)         \
+       XBOP(N, Ls, Rs, Lu, Ru, r0, r1)         \
+       XBOP(N, Ls, Rs, Lu, Ru, r0, r2)         \
+       XBOP(N, Ls, Rs, Lu, Ru, r1, v0)         \
+       XBOP(N, Ls, Rs, Lu, Ru, r1, v1)         \
+       XBOP(N, Ls, Rs, Lu, Ru, r1, v2)         \
+       XBOP(N, Ls, Rs, Lu, Ru, r1, r0)         \
+       XBOP(N, Ls, Rs, Lu, Ru, r1, r2)         \
+       XBOP(N, Ls, Rs, Lu, Ru, r2, v0)         \
+       XBOP(N, Ls, Rs, Lu, Ru, r2, v1)         \
+       XBOP(N, Ls, Rs, Lu, Ru, r2, v2)         \
+       XBOP(N, Ls, Rs, Lu, Ru, r2, r0)         \
+       XBOP(N, Ls, Rs, Lu, Ru, r2, r1)
+
+#define TBOPF(N, T, L, R)                      \
+       movi_##T %f0 L                          \
+       movi_##T %f1 R                          \
+       b##N##r##_##T N##r_##T %f0 %f1          \
+       calli @abort                            \
+N##r_##T:                                      \
+       b##N##i##_##T N##i_##T %f0 R            \
+       calli @abort                            \
+N##i_##T:                                      \
+       movi_##T %f1 $NaN                       \
+       b##N##r##_##T N##r_##T##_##u %f0 %f1    \
+       jmpi N##r_##T##_##u0                    \
+N##r_##T##_##u:                                        \
+       calli @abort                            \
+N##r##_##T##_##u0:                             \
+       b##N##i##_##T N##i_##T##_##u %f0 $NaN   \
+       jmpi N##i_##T##_##u0                    \
+N##i##_##T##_##u:                              \
+       calli @abort                            \
+N##i##_##T##_##u0:
+#define BOPF(N, L, R)                          \
+       TBOPF(N, f, L, R)                       \
+       TBOPF(N, d, L, R)
+
+#define TUBOPF(N, T, L, R)                     \
+       movi_##T %f0 L                          \
+       movi_##T %f1 R                          \
+       b##N##r##_##T N##r_##T %f0 %f1          \
+       calli @abort                            \
+N##r_##T:                                      \
+       b##N##i##_##T N##i_##T %f0 R            \
+       calli @abort                            \
+N##i_##T:                                      \
+       movi_##T %f1 $NaN                       \
+       b##N##r##_##T N##r_##T##_##u %f0 %f1    \
+       calli @abort                            \
+N##r_##T##_##u:                                        \
+       b##N##i##_##T N##i_##T##_##u %f0 $NaN   \
+       calli @abort                            \
+N##i##_##T##_##u:
+
+#define UBOPF(N, L, R)                         \
+       TUBOPF(N, f, L, R)                      \
+       TUBOPF(N, d, L, R)
+
+.code
+       prolog
+
+       movi %r0 -1
+       movi %r1 1
+       bltr xltr_r0_r1 %r0 %r1
+       calli @abort
+xltr_r0_r1:
+       blti xlti_r0_r1 %r0 1
+       calli @abort
+xlti_r0_r1:
+       movi %r0 1
+       movi %r1 -1
+       bltr_u xltru_r0_r1 %r0 %r1
+       calli @abort
+xltru_r0_r1:
+       blti_u xltiu_r0_r1 %r0 -1
+       calli @abort
+xltiu_r0_r1:
+       movi %r0 -1
+       movi %r1 -1
+       bler xler_r0_r1 %r0 %r1
+       calli @abort
+xler_r0_r1:
+       blti xlei_r0_r1 %r0 1
+       calli @abort
+xlei_r0_r1:
+       movi %r0 1
+       movi %r1 -1
+       bltr_u xlteu_r0_r1 %r0 %r1
+       calli @abort
+xlteu_r0_r1:
+       blei_u xleiu_r0_r1 %r0 -1
+       calli @abort
+xleiu_r0_r1:
+       movi %r0 32
+       movi %r1 32
+       beqr xeqr_r0_r1 %r0 %r1
+       calli @abort
+xeqr_r0_r1:
+       beqi xeqi_r0_r1 %r0 32
+       calli @abort
+xeqi_r0_r1:
+       movi %r0 -2
+       movi %r1 -2
+       bger xger_r0_r1 %r0 %r1
+       calli @abort
+xger_r0_r1:
+       bgei xgei_r0_r1 %r0 -2
+       calli @abort
+xgei_r0_r1:
+       movi %r0 2
+       movi %r1 2
+       bger_u xgeru_r0_r1 %r0 %r1
+       calli @abort
+xgeru_r0_r1:
+       bgei_u xgeiu_r0_r1 %r0 2
+       calli @abort
+xgeiu_r0_r1:
+       movi %r0 2
+       movi %r1 -2
+       bgtr xgtr_r0_r1 %r0 %r1
+       calli @abort
+xgtr_r0_r1:
+       bgti xgti_r0_r1 %r0 -2
+       calli @abort
+xgti_r0_r1:
+       movi %r0 -2
+       movi %r1 2
+       bgtr_u xgtru_r0_r1 %r0 %r1
+       calli @abort
+xgtru_r0_r1:
+       bgti_u xgtiu_r0_r1 %r0 2
+       calli @abort
+xgtiu_r0_r1:
+       movi %r0 -3
+       movi %r1 3
+       bner xner_r0_r1 %r0 %r1
+       calli @abort
+xner_r0_r1:
+       bnei xnei_r0_r1 %r0 3
+       calli @abort
+xnei_r0_r1:
+       movi %r0 1
+       movi %r1 3
+       bmsr xmsr_r0_r1 %r0 %r1
+       calli @abort
+xmsr_r0_r1:
+       bmsi xmsi_r0_r1 %r0 3
+       calli @abort
+xmsi_r0_r1:
+       movi %r0 1
+       movi %r1 2
+       bmcr xmcr_r0_r1 %r0 %r1
+       calli @abort
+xmcr_r0_r1:
+       bmci xmci_r0_r1 %r0 2
+       calli @abort
+xmci_r0_r1:
+       movi %r0 I7f
+       movi %r1 1
+       boaddr xoaddr_r0_r1 %r0 %r1
+       calli @abort
+xoaddr_r0_r1:
+       movi %r0 Iff
+       movi %r1 1
+       boaddr_u xoaddr_u_r0_r1 %r0 %r1
+       calli @abort
+xoaddr_u_r0_r1:
+       movi %r0 I7f
+       boaddi xoaddi_r0_r1 %r0 1
+       calli @abort
+xoaddi_r0_r1:
+       movi %r0 Iff
+       boaddi_u xoaddi_u_r0_r1 %r0 1
+       calli @abort
+xoaddi_u_r0_r1:
+       movi %r0 I80
+       movi %r1 1
+       bxaddr xxaddr_r0_r1 %r0 %r1
+       calli @abort
+xxaddr_r0_r1:
+       movi %r0 I80
+       bxaddi xxaddi_r0_r1 %r0 1
+       calli @abort
+xxaddi_r0_r1:
+       movi %r0 I7f
+       movi %r1 1
+       bxaddr_u xxaddr_u_r0_r1 %r0 %r1
+       calli @abort
+xxaddr_u_r0_r1:
+       movi %r0 I7f
+       bxaddi_u xxaddi_u_r0_r1 %r0 1
+       calli @abort
+xxaddi_u_r0_r1:
+       movi %r0 I80
+       movi %r1 1
+       bosubr xosubr_r0_r1 %r0 %r1
+       calli @abort
+xosubr_r0_r1:
+       movi %r0 0
+       movi %r1 1
+       bosubr_u xosubr_u_r0_r1 %r0 %r1
+       calli @abort
+xosubr_u_r0_r1:
+       movi %r0 I80
+       bosubi xosubi_r0_r1 %r0 1
+       calli @abort
+xosubi_r0_r1:
+       movi %r0 0
+       bosubi_u xosubi_u_r0_r1 %r0 1
+       calli @abort
+xosubi_u_r0_r1:
+       movi %r0 I81
+       movi %r1 1
+       bxsubr xxsubr_r0_r1 %r0 %r1
+       calli @abort
+xxsubr_r0_r1:
+       movi %r0 I81
+       bxsubi xxsubi_r0_r1 %r0 1
+       calli @abort
+xxsubi_r0_r1:
+       movi %r0 I80
+       movi %r1 1
+       bxsubr_u xxsubr_u_r0_r1 %r0 %r1
+       calli @abort
+xxsubr_u_r0_r1:
+       movi %r0 I80
+       bxsubi_u xxsubi_u_r0_r1 %r0 1
+       calli @abort
+xxsubi_u_r0_r1:
+       movi_f %f0 1
+       movi_f %f1 2
+       bltr_f xltr_f_f0_f1 %f0 %f1
+       calli @abort
+xltr_f_f0_f1:
+       blti_f xlti_f_f0_f1 %f0 2
+       calli @abort
+xlti_f_f0_f1:
+       movi_f %f0 -1
+       movi_f %f1 -1
+       bler_f xler_f_f0_f1 %f0 %f1
+       calli @abort
+xler_f_f0_f1:
+       blei_f xlei_f_f0_f1 %f0 -1
+       calli @abort
+xlei_f_f0_f1:
+       movi_f %f0 -2
+       movi_f %f1 -2
+       beqr_f xeqr_f_f0_f1 %f0 %f1
+       calli @abort
+xeqr_f_f0_f1:
+       beqi_f xeqi_f_f0_f1 %f0 -2
+       calli @abort
+xeqi_f_f0_f1:
+       movi_f %f0 -3
+       movi_f %f1 -3
+       bger_f xger_f_f0_f1 %f0 %f1
+       calli @abort
+xger_f_f0_f1:
+       bgei_f xgei_f_f0_f1 %f0 -3
+       calli @abort
+xgei_f_f0_f1:
+       movi_f %f0 2
+       movi_f %f1 1
+       bgtr_f xgtr_f_f0_f1 %f0 %f1
+       calli @abort
+xgtr_f_f0_f1:
+       bgti_f xgti_f_f0_f1 %f0 1
+       calli @abort
+xgti_f_f0_f1:
+       movi_f %f0 0
+       movi_f %f1 2
+       bner_f xner_f_f0_f1 %f0 %f1
+       calli @abort
+xner_f_f0_f1:
+       bnei_f xnei_f_f0_f1 %f0 2
+       calli @abort
+xnei_f_f0_f1:
+
+       BOPI(lt, -1, 1, 1, -1)
+       BOPI(le, -1, -1, 1, 1)
+       EBI(eq, 32, 32)
+       BOPI(ge, -2, -2, 2, 2)
+       BOPI(gt, 2, -2, -2, 2)
+       EBI(ne, 3, -3)
+       XEBI(ms, 1, 3)
+       XEBI(mc, 1, 2)
+       XBOPI(oadd, I7f, 1, Iff, 1)
+       XBOPI(xadd, I80, 1, I7f, 1)
+       XBOPI(osub, I80, 1, 0, 1)
+       XBOPI(xsub, I81, 1, I80, 1)
+       BOPF(lt, 1, 2)
+       BOPF(le, 2, 2)
+       BOPF(eq, 3, 3)
+       BOPF(ge, 3, 3)
+       BOPF(gt, 4, 3)
+       UBOPF(ne, 4, 3)
+       UBOPF(unlt, 1, 2)
+       UBOPF(unle, 2, 2)
+       UBOPF(uneq, 3, 3)
+       UBOPF(unge, 3, 3)
+       UBOPF(ungt, 4, 3)
+       BOPF(ltgt, 4, 3)
+       movi_f %f0 5
+       movi_f %f1 5
+       bordr_f ordr_f %f0 %f1
+       calli @abort
+ordr_f:
+       bordi_f ordi_f %f0 1
+       calli @abort
+ordi_f:
+       bordi_f ordi_f_u %f0 $NaN
+       jmpi ordi_f_u0
+ordi_f_u:
+       calli @abort
+ordi_f_u0:
+       movi_f %f0 5
+       movi_f %f1 5
+       bunordr_f unordr_f %f0 %f1
+       jmpi unordr_f_0
+unordr_f:
+       calli @abort
+unordr_f_0:
+       bunordi_f unordi_f %f0 1
+       jmpi unordi_f_0
+unordi_f:
+       calli @abort
+unordi_f_0:
+       bunordi_f unordi_f_1 %f0 $NaN
+       calli @abort
+unordi_f_1:
+
+       // just to know did not crash or abort
+       prepare
+               pushargi ok
+               ellipsis
+       finishi @printf
+
+       ret
+       epilog
diff --git a/deps/lightning/check/call.ok b/deps/lightning/check/call.ok
new file mode 100644 (file)
index 0000000..cc119df
--- /dev/null
@@ -0,0 +1,4 @@
+forward
+backward
+forward
+backward
diff --git a/deps/lightning/check/call.tst b/deps/lightning/check/call.tst
new file mode 100644 (file)
index 0000000..21068b6
--- /dev/null
@@ -0,0 +1,272 @@
+#define def_wi(i)                      \
+       name _w##i                      \
+_w##i:                                 \
+       prolog                          \
+       arg $arg##i                     \
+       getarg##i %r0 $arg##i           \
+       retr %r0                        \
+       epilog
+#define def_wf(f)                      \
+       name _w##f                      \
+_w##f:                                 \
+       prolog                          \
+       arg##f $arg##f                  \
+       getarg##f %f0 $arg##f           \
+       truncr##f %r0 %f0               \
+       retr %r0                        \
+       epilog
+#define def_fi(f, i)                   \
+       name f##i                       \
+f##i:                                  \
+       prolog                          \
+       arg $arg##i                     \
+       getarg##i %r0 $arg##i           \
+       extr##f %f0 %r0                 \
+       retr##f %f0                     \
+       epilog
+#define def_f(f)                       \
+       name f##f                       \
+f##f:                                  \
+       prolog                          \
+       arg##f $arg##f                  \
+       getarg##f %f0 $arg##f           \
+       retr##f %f0                     \
+       epilog
+#define def_ff(f, g)                   \
+       name f##g                       \
+       name f##g                       \
+f##g:                                  \
+       prolog                          \
+       arg##g $arg##g                  \
+       getarg##g %f0 $arg##g           \
+       extr##g##f %f0 %f0              \
+       retr##f %f0                     \
+       epilog
+
+.data  32
+fstr:
+.c     "forward"
+bstr:
+.c     "backward"
+
+.code
+       jmpi main
+
+       def_wi(_c)
+       def_wi(_uc)
+       def_wi(_s)
+       def_wi(_us)
+#if __WORDSIZE == 64
+       def_wi(_i)
+       def_wi(_ui)
+#endif
+       def_wf(_f)
+       def_wf(_d)
+       def_fi(_f, _c)
+       def_fi(_f, _uc)
+       def_fi(_f, _s)
+       def_fi(_f, _us)
+       def_fi(_f, _i)
+#if __WORDSIZE == 64
+       def_fi(_f, _ui)
+       def_fi(_f, _l)
+#endif
+       def_fi(_d, _c)
+       def_fi(_d, _uc)
+       def_fi(_d, _s)
+       def_fi(_d, _us)
+       def_fi(_d, _i)
+#if __WORDSIZE == 64
+       def_fi(_d, _ui)
+       def_fi(_d, _l)
+#endif
+       def_f(_f)
+       def_f(_d)
+       def_ff(_f, _d)
+       def_ff(_d, _f)
+
+       name main
+main:
+       prolog
+
+#define _call_w(n, i, a, r)            \
+       prepare                         \
+               pushargi a              \
+       finishi _w##i                   \
+       retval %r0                      \
+       extr##i %r0 %r0                 \
+       beqi _w##i##_##n %r0 r          \
+       calli @abort                    \
+_w##i##_##n:
+#define call_w(n, i, a, r)             _call_w(n, i, a, r)
+#define _call_wf(n, f, a, r)           \
+       prepare                         \
+               pushargi##f a           \
+       finishi _w##f                   \
+       retval %r0                      \
+       extr##f %f0 %r0                 \
+       beqi##f _w##f##_##n %f0 r       \
+       calli @abort                    \
+_w##f##_##n:
+#define call_wf(n, f, a, r)            _call_wf(n, f, a, r)
+#define _call_fi(n, f, i, a, r)                \
+       prepare                         \
+               pushargi a              \
+       finishi f##i                    \
+       retval##f %f0                   \
+       beqi##f f##i##n %f0 r           \
+       calli @abort                    \
+f##i##n:
+#define call_fi(n, f, i, a, r)         _call_fi(n, f, i, a, r)
+#define _call_f(n, f, a, r)            \
+       prepare                         \
+               pushargi##f a           \
+       finishi f##f                    \
+       retval##f %f0                   \
+       beqi##f f##f##n %f0 r           \
+       calli @abort                    \
+f##f##n:
+#define call_f(n, f, a, r)             _call_f(n, f, a, r)
+#define _call_ff(n, f, g, a, r)                \
+       prepare                         \
+               pushargi##g a           \
+       finishi f##g                    \
+       retval##f %f0                   \
+       beqi##f f##g##n %f0 r           \
+       calli @abort                    \
+f##g##n:
+#define call_ff(n, f, g, a, r)         _call_ff(n, f, g, a, r)
+
+#define c7f            0x7f
+#define c80            0x80
+#define c81            0x81
+#define cff            0xff
+#define s7f            0x7fff
+#define s80            0x8000
+#define s81            0x8001
+#define i7f            0x7fffffff
+#define i80            0x80000000
+#define i81            0x80000001
+#define iff            0xffffffff
+#define l7f            0x7fffffffffffffff
+#define l80            0x8000000000000000
+#define l81            0x8000000000000001
+#define f7f             127.0
+#define f80            -128.0
+#define f81            -127.0
+#define uf80            128.0
+#define uf81            127.0
+#if __WORDSIZE == 32
+#  define wc80         0xffffff80
+#  define wc81         0xffffff81
+#  define ws80         0xffff8000
+#  define ws81         0xffff8001
+#else
+#  define wc80         0xffffffffffffff80
+#  define wc81         0xffffffffffffff81
+#  define ws80         0xffffffffffff8000
+#  define ws81         0xffffffffffff8001
+#  define wi80         0xffffffff80000000
+#  define wi81         0xffffffff80000001
+#endif
+
+       call_w(__LINE__, _c,  c7f, c7f)
+       call_w(__LINE__, _c,  c80, wc80)
+       call_w(__LINE__, _c,  c81, wc81)
+       call_w(__LINE__, _uc, c7f, c7f)
+       call_w(__LINE__, _uc, c80, c80)
+       call_w(__LINE__, _uc, c81, c81)
+       call_w(__LINE__, _s,  s7f, s7f)
+       call_w(__LINE__, _s,  s80, ws80)
+       call_w(__LINE__, _s,  s81, ws81)
+       call_w(__LINE__, _us, s7f, s7f)
+       call_w(__LINE__, _us, s80, s80)
+       call_w(__LINE__, _us, s81, s81)
+#if __WORDSIZE == 64
+       call_w(__LINE__, _i,  i7f, i7f)
+       call_w(__LINE__, _i,  i80, wi80)
+       call_w(__LINE__, _i,  i81, wi81)
+       call_w(__LINE__, _ui, i7f, i7f)
+       call_w(__LINE__, _ui, i80, i80)
+       call_w(__LINE__, _ui, i81, i81)
+#endif
+       call_wf(__LINE__, _f, c7f, f7f)
+       call_wf(__LINE__, _f, wc80, f80)
+       call_wf(__LINE__, _f, wc81, f81)
+       call_wf(__LINE__, _d, c7f, f7f)
+       call_wf(__LINE__, _d, wc80, f80)
+       call_wf(__LINE__, _d, wc81, f81)
+       call_fi(__LINE__, _f, _c, c7f, f7f)
+       call_fi(__LINE__, _f, _c, c80, f80)
+       call_fi(__LINE__, _f, _uc, c7f, f7f)
+       call_fi(__LINE__, _f, _uc, c80, uf80)
+       call_fi(__LINE__, _f, _s, c7f, f7f)
+       call_fi(__LINE__, _f, _s, c80, uf80)
+       call_fi(__LINE__, _f, _us, c7f, f7f)
+       call_fi(__LINE__, _f, _us, c80, uf80)
+       call_fi(__LINE__, _f, _i, c7f, f7f)
+       call_fi(__LINE__, _f, _i, c80, uf80)
+#if __WORDSIZE == 64
+       call_fi(__LINE__, _f, _ui, c7f, f7f)
+       call_fi(__LINE__, _f, _ui, c80, uf80)
+       call_fi(__LINE__, _f, _l, c7f, f7f)
+       call_fi(__LINE__, _f, _l, c80, uf80)
+#endif
+       call_fi(__LINE__, _d, _c, c7f, f7f)
+       call_fi(__LINE__, _d, _c, c80, f80)
+       call_fi(__LINE__, _d, _uc, c7f, f7f)
+       call_fi(__LINE__, _d, _uc, c80, uf80)
+       call_fi(__LINE__, _d, _s, c7f, f7f)
+       call_fi(__LINE__, _d, _s, c80, uf80)
+       call_fi(__LINE__, _d, _us, c7f, f7f)
+       call_fi(__LINE__, _d, _us, c80, uf80)
+       call_fi(__LINE__, _d, _i, c7f, f7f)
+       call_fi(__LINE__, _d, _i, c80, uf80)
+#if __WORDSIZE == 64
+       call_fi(__LINE__, _d, _ui, c7f, f7f)
+       call_fi(__LINE__, _d, _ui, c80, uf80)
+       call_fi(__LINE__, _d, _l, c7f, f7f)
+       call_fi(__LINE__, _d, _l, c80, uf80)
+#endif
+       call_f(__LINE__, _f, f7f, f7f)
+       call_f(__LINE__, _d, f7f, f7f)
+       call_ff(__LINE__, _f, _d, f80, f80)
+       call_ff(__LINE__, _d, _f, f81, f81)
+
+       movi %r0 forward
+       callr %r0
+
+       calli iforward
+
+       ret
+       epilog
+
+       name backward
+backward:
+       prolog
+       prepare
+               pushargi bstr
+       finishi @puts
+       ret
+       epilog
+
+       name forward
+forward:
+       prolog
+       prepare
+               pushargi fstr
+       finishi @puts
+       movi %r0 backward
+       callr %r0
+       ret
+       epilog
+
+       name iforward
+iforward:
+       prolog
+       prepare
+               pushargi fstr
+       finishi @puts
+       calli backward
+       ret
+       epilog
diff --git a/deps/lightning/check/carg.c b/deps/lightning/check/carg.c
new file mode 100644 (file)
index 0000000..35b897e
--- /dev/null
@@ -0,0 +1,538 @@
+#include <lightning.h>
+#include <stdio.h>
+
+/*   Simple test for arguments handling, that also shows how to use
+ * arguments to store values.
+ *   Register arguments, if available, are very fast, but are also
+ * very volatile on some ports, because some ports will do C calls
+ * to implement division, remainder, sometimes multiplication, or
+ * some float operations.
+ *   Arguments in registers should be fetched in the prolog of the
+ * function, and if they must be saved, they should be saved in
+ * the prolog.
+ *   The predicate macro "jit_arg_register_p(arg)" allows knowing if
+ * an argument lives in a register, where it is known for being a very
+ * fast to read/write temporary storage.
+ */
+
+#define W              jit_word_t
+#define F              jit_float32_t
+#define D              jit_float64_t
+
+jit_state_t             *_jit;
+
+void
+cw(W a1, W  a2, W  a3, W  a4, W  a5, W  a6, W  a7, W  a8,
+   W a9, W a10, W a11, W a12, W a13, W a14, W a15, W a16)
+{
+    if ( a1 !=  1 ||  a2 !=  2 ||  a3 !=  3 ||  a4 !=  4 ||
+        a5 !=  5 ||  a6 !=  6 ||  a7 !=  7 ||  a8 !=  8 ||
+        a9 !=  9 || a10 != 10 || a11 != 11 || a12 != 12 ||
+       a13 != 13 || a14 != 14 || a15 != 15 || a16 != 16)
+       abort();
+}
+
+void
+cf(F a1, F  a2, F  a3, F  a4, F  a5, F  a6, F  a7, F  a8,
+   F a9, F a10, F a11, F a12, F a13, F a14, F a15, F a16)
+{
+    if ( a1 !=  1 ||  a2 !=  2 ||  a3 !=  3 ||  a4 !=  4 ||
+        a5 !=  5 ||  a6 !=  6 ||  a7 !=  7 ||  a8 !=  8 ||
+        a9 !=  9 || a10 != 10 || a11 != 11 || a12 != 12 ||
+       a13 != 13 || a14 != 14 || a15 != 15 || a16 != 16)
+       abort();
+}
+
+void
+cd(D a1, D  a2, D  a3, D  a4, D  a5, D  a6, D  a7, D  a8,
+   D a9, D a10, D a11, D a12, D a13, D a14, D a15, D a16)
+{
+    if ( a1 !=  1 ||  a2 !=  2 ||  a3 !=  3 ||  a4 !=  4 ||
+        a5 !=  5 ||  a6 !=  6 ||  a7 !=  7 ||  a8 !=  8 ||
+        a9 !=  9 || a10 != 10 || a11 != 11 || a12 != 12 ||
+       a13 != 13 || a14 != 14 || a15 != 15 || a16 != 16)
+       abort();
+}
+
+int
+main(int argc, char *argv[])
+{
+    void               (*code)(void);
+    jit_node_t         *jmp, *pass;
+    jit_node_t          *jw,  *jf,  *jd;
+    jit_int32_t                  s1,   s2,   s3,   s4,   s5,   s6,   s7,   s8,
+                         s9,  s10,  s11,  s12,  s13,  s14,  s15,  s16;
+    jit_node_t          *a1,  *a2,  *a3,  *a4,  *a5,  *a6,  *a7,  *a8,
+                        *a9, *a10, *a11, *a12, *a13, *a14, *a15, *a16;
+
+    init_jit(argv[0]);
+    _jit = jit_new_state();
+
+    /* jump to "main" label */
+    jmp = jit_jmpi();
+
+    /* Create jit function that
+     * o Receives 16 word arguments
+     * o Save in the stack any register argument. Also force register
+     *   arguments to be clobbered to properly make the test
+     * o Calls a C function that receives 16 word arguments, with
+     *   values different from the ones received by this function
+     * o Reload from stack any register argument
+     * o Validated all arguments were not modified in the known
+     *   cases it could have been clobbered
+     */
+    jw = jit_label();
+    jit_name("jw");
+    jit_note(__FILE__, __LINE__);
+    jit_prolog();
+    a1  = jit_arg();
+    a2  = jit_arg();
+    a3  = jit_arg();
+    a4  = jit_arg();
+    a5  = jit_arg();
+    a6  = jit_arg();
+    a7  = jit_arg();
+    a8  = jit_arg();
+    a9  = jit_arg();
+    a10 = jit_arg();
+    a11 = jit_arg();
+    a12 = jit_arg();
+    a13 = jit_arg();
+    a14 = jit_arg();
+    a15 = jit_arg();
+    a16 = jit_arg();
+#define SAVE_ARG(N)                                                    \
+    do {                                                               \
+       if (jit_arg_register_p(a##N)) {                                 \
+           s##N = jit_allocai(sizeof(W));                              \
+           jit_getarg(JIT_R0, a##N);                                   \
+           jit_stxi(s##N, JIT_FP, JIT_R0);                             \
+           jit_putargi(-1, a##N);                                      \
+       }                                                               \
+    } while (0)
+    SAVE_ARG(1);
+    SAVE_ARG(2);
+    SAVE_ARG(3);
+    SAVE_ARG(4);
+    SAVE_ARG(5);
+    SAVE_ARG(6);
+    SAVE_ARG(7);
+    SAVE_ARG(8);
+    SAVE_ARG(9);
+    SAVE_ARG(10);
+    SAVE_ARG(11);
+    SAVE_ARG(12);
+    SAVE_ARG(13);
+    SAVE_ARG(14);
+    SAVE_ARG(15);
+    SAVE_ARG(16);
+#undef SAVE_ARG
+    jit_prepare();
+    {
+       jit_pushargi(1);
+       jit_pushargi(2);
+       jit_pushargi(3);
+       jit_pushargi(4);
+       jit_pushargi(5);
+       jit_pushargi(6);
+       jit_pushargi(7);
+       jit_pushargi(8);
+       jit_pushargi(9);
+       jit_pushargi(10);
+       jit_pushargi(11);
+       jit_pushargi(12);
+       jit_pushargi(13);
+       jit_pushargi(14);
+       jit_pushargi(15);
+       jit_pushargi(16);
+    }
+    jit_finishi(cw);
+#define LOAD_ARG(N)                                                    \
+    do {                                                               \
+       if (jit_arg_register_p(a##N)) {                                 \
+           jit_ldxi(JIT_R0, JIT_FP, s##N);                             \
+           jit_putargr(JIT_R0, a##N);                                  \
+       }                                                               \
+    } while (0)
+    LOAD_ARG(1);
+    LOAD_ARG(2);
+    LOAD_ARG(3);
+    LOAD_ARG(4);
+    LOAD_ARG(5);
+    LOAD_ARG(6);
+    LOAD_ARG(7);
+    LOAD_ARG(8);
+    LOAD_ARG(9);
+    LOAD_ARG(10);
+    LOAD_ARG(11);
+    LOAD_ARG(12);
+    LOAD_ARG(13);
+    LOAD_ARG(14);
+    LOAD_ARG(15);
+    LOAD_ARG(16);
+#undef LOAD_ARG
+    pass = jit_forward();
+#define CHECK_ARG(N)                                                   \
+    do {                                                               \
+       jit_getarg(JIT_R0, a##N);                                       \
+       jit_patch_at(jit_beqi(JIT_R0, 17 - N), pass);                   \
+    } while (0)
+    CHECK_ARG(1);
+    CHECK_ARG(2);
+    CHECK_ARG(3);
+    CHECK_ARG(4);
+    CHECK_ARG(5);
+    CHECK_ARG(6);
+    CHECK_ARG(7);
+    CHECK_ARG(8);
+    CHECK_ARG(9);
+    CHECK_ARG(10);
+    CHECK_ARG(11);
+    CHECK_ARG(12);
+    CHECK_ARG(13);
+    CHECK_ARG(14);
+    CHECK_ARG(15);
+    CHECK_ARG(16);
+#undef CHECK_ARG
+    jit_calli(abort);
+    jit_link(pass);
+    jit_ret();
+    jit_epilog();
+
+    /* Create jit function that
+     * o Receives 16 float arguments
+     * o Save in the stack any register argument. Also force register
+     *   arguments to be clobbered to properly make the test
+     * o Calls a C function that receives 16 float arguments, with
+     *   values different from the ones received by this function
+     * o Reload from stack any register argument
+     * o Validated all arguments were not modified in the known
+     *   cases it could have been clobbered
+     */
+    jf = jit_label();
+    jit_name("jf");
+    jit_note(__FILE__, __LINE__);
+    jit_prolog();
+    a1  = jit_arg_f();
+    a2  = jit_arg_f();
+    a3  = jit_arg_f();
+    a4  = jit_arg_f();
+    a5  = jit_arg_f();
+    a6  = jit_arg_f();
+    a7  = jit_arg_f();
+    a8  = jit_arg_f();
+    a9  = jit_arg_f();
+    a10 = jit_arg_f();
+    a11 = jit_arg_f();
+    a12 = jit_arg_f();
+    a13 = jit_arg_f();
+    a14 = jit_arg_f();
+    a15 = jit_arg_f();
+    a16 = jit_arg_f();
+#define SAVE_ARG(N)                                                    \
+    do {                                                               \
+       if (jit_arg_register_p(a##N)) {                                 \
+           s##N = jit_allocai(sizeof(F));                              \
+           jit_getarg_f(JIT_F0, a##N);                                 \
+           jit_stxi_f(s##N, JIT_FP, JIT_F0);                           \
+           jit_putargi_f(-1, a##N);                                    \
+       }                                                               \
+    } while (0)
+    SAVE_ARG(1);
+    SAVE_ARG(2);
+    SAVE_ARG(3);
+    SAVE_ARG(4);
+    SAVE_ARG(5);
+    SAVE_ARG(6);
+    SAVE_ARG(7);
+    SAVE_ARG(8);
+    SAVE_ARG(9);
+    SAVE_ARG(10);
+    SAVE_ARG(11);
+    SAVE_ARG(12);
+    SAVE_ARG(13);
+    SAVE_ARG(14);
+    SAVE_ARG(15);
+    SAVE_ARG(16);
+#undef SAVE_ARG
+    jit_prepare();
+    {
+       jit_pushargi_f(1);
+       jit_pushargi_f(2);
+       jit_pushargi_f(3);
+       jit_pushargi_f(4);
+       jit_pushargi_f(5);
+       jit_pushargi_f(6);
+       jit_pushargi_f(7);
+       jit_pushargi_f(8);
+       jit_pushargi_f(9);
+       jit_pushargi_f(10);
+       jit_pushargi_f(11);
+       jit_pushargi_f(12);
+       jit_pushargi_f(13);
+       jit_pushargi_f(14);
+       jit_pushargi_f(15);
+       jit_pushargi_f(16);
+    }
+    jit_finishi(cf);
+#define LOAD_ARG(N)                                                    \
+    do {                                                               \
+       if (jit_arg_register_p(a##N)) {                                 \
+           jit_ldxi_f(JIT_F0, JIT_FP, s##N);                           \
+           jit_putargr_f(JIT_F0, a##N);                                \
+       }                                                               \
+    } while (0)
+    LOAD_ARG(1);
+    LOAD_ARG(2);
+    LOAD_ARG(3);
+    LOAD_ARG(4);
+    LOAD_ARG(5);
+    LOAD_ARG(6);
+    LOAD_ARG(7);
+    LOAD_ARG(8);
+    LOAD_ARG(9);
+    LOAD_ARG(10);
+    LOAD_ARG(11);
+    LOAD_ARG(12);
+    LOAD_ARG(13);
+    LOAD_ARG(14);
+    LOAD_ARG(15);
+    LOAD_ARG(16);
+#undef LOAD_ARG
+    pass = jit_forward();
+#define CHECK_ARG(N)                                                   \
+    do {                                                               \
+       jit_getarg_f(JIT_F0, a##N);                                     \
+       jit_patch_at(jit_beqi_f(JIT_F0, 17 - N), pass);                 \
+    } while (0)
+    CHECK_ARG(1);
+    CHECK_ARG(2);
+    CHECK_ARG(3);
+    CHECK_ARG(4);
+    CHECK_ARG(5);
+    CHECK_ARG(6);
+    CHECK_ARG(7);
+    CHECK_ARG(8);
+    CHECK_ARG(9);
+    CHECK_ARG(10);
+    CHECK_ARG(11);
+    CHECK_ARG(12);
+    CHECK_ARG(13);
+    CHECK_ARG(14);
+    CHECK_ARG(15);
+    CHECK_ARG(16);
+#undef CHECK_ARG
+    jit_calli(abort);
+    jit_link(pass);
+    jit_ret();
+    jit_epilog();
+
+    /* Create jit function that
+     * o Receives 16 double arguments
+     * o Save in the stack any register argument. Also force register
+     *   arguments to be clobbered to properly make the test
+     * o Calls a C function that receives 16 double arguments, with
+     *   values different from the ones received by this function
+     * o Reload from stack any register argument
+     * o Validated all arguments were not modified in the known
+     *   cases it could have been clobbered
+     */
+    jd = jit_label();
+    jit_name("jd");
+    jit_note(__FILE__, __LINE__);
+    jit_prolog();
+    a1  = jit_arg_d();
+    a2  = jit_arg_d();
+    a3  = jit_arg_d();
+    a4  = jit_arg_d();
+    a5  = jit_arg_d();
+    a6  = jit_arg_d();
+    a7  = jit_arg_d();
+    a8  = jit_arg_d();
+    a9  = jit_arg_d();
+    a10 = jit_arg_d();
+    a11 = jit_arg_d();
+    a12 = jit_arg_d();
+    a13 = jit_arg_d();
+    a14 = jit_arg_d();
+    a15 = jit_arg_d();
+    a16 = jit_arg_d();
+#define SAVE_ARG(N)                                                    \
+    do {                                                               \
+       if (jit_arg_register_p(a##N)) {                                 \
+           s##N = jit_allocai(sizeof(D));                              \
+           jit_getarg_d(JIT_F0, a##N);                                 \
+           jit_stxi_d(s##N, JIT_FP, JIT_F0);                           \
+           jit_putargi_d(-1, a##N);                                    \
+       }                                                               \
+    } while (0)
+    SAVE_ARG(1);
+    SAVE_ARG(2);
+    SAVE_ARG(3);
+    SAVE_ARG(4);
+    SAVE_ARG(5);
+    SAVE_ARG(6);
+    SAVE_ARG(7);
+    SAVE_ARG(8);
+    SAVE_ARG(9);
+    SAVE_ARG(10);
+    SAVE_ARG(11);
+    SAVE_ARG(12);
+    SAVE_ARG(13);
+    SAVE_ARG(14);
+    SAVE_ARG(15);
+    SAVE_ARG(16);
+#undef SAVE_ARG
+    jit_prepare();
+    {
+       jit_pushargi_d(1);
+       jit_pushargi_d(2);
+       jit_pushargi_d(3);
+       jit_pushargi_d(4);
+       jit_pushargi_d(5);
+       jit_pushargi_d(6);
+       jit_pushargi_d(7);
+       jit_pushargi_d(8);
+       jit_pushargi_d(9);
+       jit_pushargi_d(10);
+       jit_pushargi_d(11);
+       jit_pushargi_d(12);
+       jit_pushargi_d(13);
+       jit_pushargi_d(14);
+       jit_pushargi_d(15);
+       jit_pushargi_d(16);
+    }
+    jit_finishi(cd);
+#define LOAD_ARG(N)                                                    \
+    do {                                                               \
+       if (jit_arg_register_p(a##N)) {                                 \
+           jit_ldxi_d(JIT_F0, JIT_FP, s##N);                           \
+           jit_putargr_d(JIT_F0, a##N);                                \
+       }                                                               \
+    } while (0)
+    LOAD_ARG(1);
+    LOAD_ARG(2);
+    LOAD_ARG(3);
+    LOAD_ARG(4);
+    LOAD_ARG(5);
+    LOAD_ARG(6);
+    LOAD_ARG(7);
+    LOAD_ARG(8);
+    LOAD_ARG(9);
+    LOAD_ARG(10);
+    LOAD_ARG(11);
+    LOAD_ARG(12);
+    LOAD_ARG(13);
+    LOAD_ARG(14);
+    LOAD_ARG(15);
+    LOAD_ARG(16);
+#undef LOAD_ARG
+    pass = jit_forward();
+#define CHECK_ARG(N)                                                   \
+    do {                                                               \
+       jit_getarg_d(JIT_F0, a##N);                                     \
+       jit_patch_at(jit_beqi_d(JIT_F0, 17 - N), pass);                 \
+    } while (0)
+    CHECK_ARG(1);
+    CHECK_ARG(2);
+    CHECK_ARG(3);
+    CHECK_ARG(4);
+    CHECK_ARG(5);
+    CHECK_ARG(6);
+    CHECK_ARG(7);
+    CHECK_ARG(8);
+    CHECK_ARG(9);
+    CHECK_ARG(10);
+    CHECK_ARG(11);
+    CHECK_ARG(12);
+    CHECK_ARG(13);
+    CHECK_ARG(14);
+    CHECK_ARG(15);
+    CHECK_ARG(16);
+#undef CHECK_ARG
+    jit_calli(abort);
+    jit_link(pass);
+    jit_ret();
+    jit_epilog();
+
+    /* Create a jit function that calls the 3 previous ones.
+     * o First call the function that receives 16 word arguments
+     * o Then call the function that receives 16 float arguments
+     * o Finally call the function that receives 16 double arguments
+     */
+    jit_patch(jmp);
+    jit_name("main");
+    jit_note(__FILE__, __LINE__);
+    jit_prolog();
+    jit_prepare();
+    {
+       jit_pushargi(16);
+       jit_pushargi(15);
+       jit_pushargi(14);
+       jit_pushargi(13);
+       jit_pushargi(12);
+       jit_pushargi(11);
+       jit_pushargi(10);
+       jit_pushargi(9);
+       jit_pushargi(8);
+       jit_pushargi(7);
+       jit_pushargi(6);
+       jit_pushargi(5);
+       jit_pushargi(4);
+       jit_pushargi(3);
+       jit_pushargi(2);
+       jit_pushargi(1);
+    }
+    jit_patch_at(jit_finishi(NULL), jw);
+    jit_prepare();
+    {
+       jit_pushargi_f(16);
+       jit_pushargi_f(15);
+       jit_pushargi_f(14);
+       jit_pushargi_f(13);
+       jit_pushargi_f(12);
+       jit_pushargi_f(11);
+       jit_pushargi_f(10);
+       jit_pushargi_f(9);
+       jit_pushargi_f(8);
+       jit_pushargi_f(7);
+       jit_pushargi_f(6);
+       jit_pushargi_f(5);
+       jit_pushargi_f(4);
+       jit_pushargi_f(3);
+       jit_pushargi_f(2);
+       jit_pushargi_f(1);
+    }
+    jit_patch_at(jit_finishi(NULL), jf);
+    jit_prepare();
+    {
+       jit_pushargi_d(16);
+       jit_pushargi_d(15);
+       jit_pushargi_d(14);
+       jit_pushargi_d(13);
+       jit_pushargi_d(12);
+       jit_pushargi_d(11);
+       jit_pushargi_d(10);
+       jit_pushargi_d(9);
+       jit_pushargi_d(8);
+       jit_pushargi_d(7);
+       jit_pushargi_d(6);
+       jit_pushargi_d(5);
+       jit_pushargi_d(4);
+       jit_pushargi_d(3);
+       jit_pushargi_d(2);
+       jit_pushargi_d(1);
+    }
+    jit_patch_at(jit_finishi(NULL), jd);
+    jit_ret();
+    jit_epilog();
+
+    code = jit_emit();
+    jit_clear_state();
+
+    (*code)();
+
+    jit_destroy_state();
+    finish_jit();
+    return (0);
+}
diff --git a/deps/lightning/check/carry.ok b/deps/lightning/check/carry.ok
new file mode 100644 (file)
index 0000000..9766475
--- /dev/null
@@ -0,0 +1 @@
+ok
diff --git a/deps/lightning/check/carry.tst b/deps/lightning/check/carry.tst
new file mode 100644 (file)
index 0000000..180d896
--- /dev/null
@@ -0,0 +1,186 @@
+
+#define ix0            0
+#define lx0            0
+#define ix1            1
+#define lx1            1
+#define ix2            2
+#define lx2            2
+#define ix4            4
+#define lx4            4
+#if __WORDSIZE == 32
+#  define ix7fe                0x7ffffffe
+#  define ix7f         0x7fffffff
+#  define ix80         0x80000000
+#  define iff          0xffffffff
+#  define ife          0xfffffffe
+#  define ifd          0xfffffffd
+#  define ifc          0xfffffffc
+#else
+#  define ix7fe                0x7ffffffffffffffe
+#  define ix7f         0x7fffffffffffffff
+#  define ix80         0x8000000000000000
+#  define iff          0xffffffffffffffff
+#  define ife          0xfffffffffffffffe
+#  define ifd          0xfffffffffffffffd
+#  define ifc          0xfffffffffffffffc
+#endif
+
+/* check jumps are taken and result value is correct */
+#define bopr_t(l, u, op, r0, r1, il, ir, iv)                   \
+       movi %r0 il                                             \
+       movi %r1 ir                                             \
+       b##op##r##u op##u##r##l##r0##r1 %r0 %r1                 \
+       /* validate did jump */                                 \
+       movi %r0 0x5a5a5a5a                                     \
+op##u##r##l##r0##r1:                                           \
+       beqi op##u##r##l##ok##r0##r1 %r0 iv                     \
+       calli @abort                                            \
+op##u##r##l##ok##r0##r1:
+#define bopi_t(l, u, op, r0, il, ir, iv)                       \
+       movi %r0 il                                             \
+       b##op##i##u op##u##i##l##r0##r1 %r0 ir                  \
+       /* validate did jump */                                 \
+       movi %r0 0x5a5a5a5a                                     \
+op##u##i##l##r0##r1:                                           \
+       beqi op##u##i##l##ok##r0##r1 %r0 iv                     \
+       calli @abort                                            \
+op##u##i##l##ok##r0##r1:
+#define bopr_f(l, u, op, r0, r1, il, ir, iv)                   \
+       movi %r0 il                                             \
+       movi %r1 ir                                             \
+       b##op##r##u op##u##r##l##r0##r1 %r0 %r1                 \
+       beqi op##u##r##l##ok##r0##r1 %r0 iv                     \
+op##u##r##l##r0##r1:                                           \
+       calli @abort                                            \
+op##u##r##l##ok##r0##r1:
+#define bopi_f(l, u, op, r0, il, ir, iv)                       \
+       movi %r0 il                                             \
+       b##op##i##u op##u##i##l##r0##r1 %r0 ir                  \
+       beqi op##u##i##l##ok##r0##r1 %r0 iv                     \
+op##u##i##l##r0##r1:                                           \
+       calli @abort                                            \
+op##u##i##l##ok##r0##r1:
+#define ccop(cc, l, u, op, r0, r1, il, ir, iv)                 \
+       bopr##cc(l, u, op, r0, r1, i##il, i##ir, i##iv)         \
+       bopi##cc(l, u, op, r0, i##il, i##ir, i##iv)
+#define tadd(l, u, r0, r1, il, ir, iv)                         \
+       ccop(_t, l, u, oadd, r0, r1, il, ir, iv)                \
+       ccop(_f, l, u, xadd, r0, r1, il, ir, iv)
+#define fadd(l, u, r0, r1, il, ir, iv)                         \
+       ccop(_f, l, u, oadd, r0, r1, il, ir, iv)                \
+       ccop(_t, l, u, xadd, r0, r1, il, ir, iv)
+#define tsub(l, u, r0, r1, il, ir, iv)                         \
+       ccop(_t, l, u, osub, r0, r1, il, ir, iv)                \
+       ccop(_f, l, u, xsub, r0, r1, il, ir, iv)
+#define fsub(l, u, r0, r1, il, ir, iv)                         \
+       ccop(_f, l, u, osub, r0, r1, il, ir, iv)                \
+       ccop(_t, l, u, xsub, r0, r1, il, ir, iv)
+
+#define xopr6(l,op,r0,r1,r2,r3,r4,r5,llo,lhi,rlo,rhi,vlo,vhi)  \
+       movi %r1 llo                                            \
+       movi %r2 lhi                                            \
+       movi %r4 rlo                                            \
+       movi %r5 rhi                                            \
+       op##cr %r0 %r1 %r4                                      \
+       op##xr %r3 %r2 %r5                                      \
+       beqi op##l##L##r0##r1##r2##r3##r4##r5 %r0 vlo           \
+       calli @abort                                            \
+op##l##L##r0##r1##r2##r3##r4##r5:                              \
+       beqi op##l##H##r0##r1##r2##r3##r4##r5 %r3 vhi           \
+       calli @abort                                            \
+op##l##H##r0##r1##r2##r3##r4##r5:
+#define xopr4_(l,op,r0,r1,r2,r3,llo,lhi,rlo,rhi,vlo,vhi)       \
+       movi %r0 llo                                            \
+       movi %r1 lhi                                            \
+       movi %r2 rlo                                            \
+       movi %r3 rhi                                            \
+       op##cr %r0 %r0 %r2                                      \
+       op##xr %r1 %r1 %r3                                      \
+       beqi op##l##L_##r0##r1##r2##r3 %r0 vlo                  \
+       calli @abort                                            \
+op##l##L_##r0##r1##r2##r3:                                     \
+       beqi op##l##H_##r0##r1##r2##r3 %r1 vhi                  \
+       calli @abort                                            \
+op##l##H_##r0##r1##r2##r3:
+#define xopr_4(l,op,r0,r1,r2,r3,llo,lhi,rlo,rhi,vlo,vhi)       \
+       movi %r0 rlo                                            \
+       movi %r1 rhi                                            \
+       movi %r2 llo                                            \
+       movi %r3 lhi                                            \
+       op##cr %r0 %r2 %r0                                      \
+       op##xr %r1 %r3 %r1                                      \
+       beqi op##l##_L##r0##r1##r2##r3 %r0 vlo                  \
+       calli @abort                                            \
+op##l##_L##r0##r1##r2##r3:                                     \
+       beqi op##l##_H##r0##r1##r2##r3 %r1 vhi                  \
+       calli @abort                                            \
+op##l##_H##r0##r1##r2##r3:
+
+#define xaddr(l,llo,lhi,rlo,rhi,vlo,vhi)                                               \
+       xopr6(l,add,r0,r1,r2,v0,v1,v2,i##llo,i##lhi,i##rlo,i##rhi,i##vlo,i##vhi)        \
+       xopr4_(l,add,r0,r1,r2,v0,i##llo,i##lhi,i##rlo,i##rhi,i##vlo,i##vhi)             \
+       xopr_4(l,add,r0,r1,r2,v0,i##llo,i##lhi,i##rlo,i##rhi,i##vlo,i##vhi)
+#define xsubr(l,llo,lhi,rlo,rhi,vlo,vhi)                                               \
+       xopr6(l,sub,r0,r1,r2,v0,v1,v2,i##llo,i##lhi,i##rlo,i##rhi,i##vlo,i##vhi)        \
+       xopr4_(l,sub,r0,r1,r2,v0,i##llo,i##lhi,i##rlo,i##rhi,i##vlo,i##vhi)             \
+       xopr_4(l,sub,r0,r1,r2,v0,i##llo,i##lhi,i##rlo,i##rhi,i##vlo,i##vhi)
+
+.data  16
+ok:
+.c     "ok\n"
+
+.code
+       prolog
+
+       tadd(__LINE__,   , r0, r1, x7f,  x1, x80)
+       fadd(__LINE__,   , r0, r1, x7fe, x1, x7f)
+       tsub(__LINE__,   , r0, r1, x80,  x1, x7f)
+       fsub(__LINE__,   , r0, r1, x7f,  x1, x7fe)
+       tadd(__LINE__, _u, r0, r1, ff,   x1, x0)
+       fadd(__LINE__, _u, r0, r1, x7f,  x1, x80)
+       tsub(__LINE__, _u, r0, r1, x0,   x1, ff)
+       fsub(__LINE__, _u, r0, r1, x80,  x1, x7f)
+
+       /* 0xffffffffffffffff + 1 = 0x10000000000000000 */
+       xaddr(__LINE__, ff, ff, x1, x0, x0, x0)
+
+       /* 1 + 0xffffffffffffffff = 0x10000000000000000 */
+       xaddr(__LINE__, x1, x0, ff, ff, x0, x0)
+
+       /* 0xfffffffeffffffff + 1 = 0xffffffff00000000 */
+       xaddr(__LINE__, ff, fe, x1, x0, x0, ff)
+
+       /* 1 + 0xfffffffeffffffff = 0xffffffff00000000 */
+       xaddr(__LINE__, x1, x0, ff, fe, x0, ff)
+
+       /* 0xfffffffefffffffe + 2 = 0xffffffff00000000 */
+       xaddr(__LINE__, fe, fe, x2, x0, x0, ff)
+
+       /* 2 + 0xfffffffefffffffe = 0xffffffff00000000 */
+       xaddr(__LINE__, x2, x0, fe, fe, x0, ff)
+
+       /* 0xffffffffffffffff - 1 = 0xfffffffffffffffe */
+       xsubr(__LINE__, ff, ff, x1, x0, fe, ff)
+
+       /* 1 - 0xffffffffffffffff = -0xfffffffffffffffe */
+       xsubr(__LINE__, x1, x0, ff, ff, x2, x0)
+
+       /* 0xfffffffeffffffff - 1 = 0xfffffffefffffffe */
+       xsubr(__LINE__, ff, fe, x1, x0, fe, fe)
+
+       /* 1 - 0xfffffffeffffffff = -0xfffffffefffffffe */
+       xsubr(__LINE__, x1, x0, ff, fe, x2, x1)
+
+       /* 0xfffffffefffffffe - 2 = 0xfffffffefffffffc */
+       xsubr(__LINE__, fe, fe, x2, x0, fc, fe)
+
+       /* 2 + 0xfffffffefffffffe = -0xfffffffefffffffc */
+       xsubr(__LINE__, x2, x0, fe, fe, x4, x1)
+
+       prepare
+               pushargi ok
+               ellipsis
+       finishi @printf
+
+       ret
+       epilog
diff --git a/deps/lightning/check/ccall.c b/deps/lightning/check/ccall.c
new file mode 100644 (file)
index 0000000..9dae256
--- /dev/null
@@ -0,0 +1,903 @@
+#include <lightning.h>
+#include <stdio.h>
+
+#define _w0                    0
+#define _w1                    1
+#define _w2                    (_w1-2)
+#define _w3                    (_w2-3)
+#define _w4                    (_w3-4)
+#define _w5                    (_w4-5)
+#define _w6                    (_w5-6)
+#define _w7                    (_w6-7)
+#define _w8                    (_w7-8)
+#define _w9                    (_w8-9)
+#define _w10                   (_w9-10)
+#define _w11                   (_w10-11)
+#define _w12                   (_w11-12)
+#define _w13                   (_w12-13)
+#define _w14                   (_w13-14)
+#define _w15                   (_w14-15)
+#define _c0                    _w0
+#define _c1                    _w1
+#define _c2                    _w2
+#define _c3                    _w3
+#define _c4                    _w4
+#define _c5                    _w5
+#define _c6                    _w6
+#define _c7                    _w7
+#define _c8                    _w8
+#define _c9                    _w9
+#define _c10                   _w10
+#define _c11                   _w11
+#define _c12                   _w12
+#define _c13                   _w13
+#define _c14                   _w14
+#define _c15                   _w15
+#define _uc0                   (_w0&0xff)
+#define _uc1                   (_w1&0xff)
+#define _uc2                   (_w2&0xff)
+#define _uc3                   (_w3&0xff)
+#define _uc4                   (_w4&0xff)
+#define _uc5                   (_w5&0xff)
+#define _uc6                   (_w6&0xff)
+#define _uc7                   (_w7&0xff)
+#define _uc8                   (_w8&0xff)
+#define _uc9                   (_w9&0xff)
+#define _uc10                  (_w10&0xff)
+#define _uc11                  (_w11&0xff)
+#define _uc12                  (_w12&0xff)
+#define _uc13                  (_w13&0xff)
+#define _uc14                  (_w14&0xff)
+#define _uc15                  (_w15&0xff)
+#define _s0                    _w0
+#define _s1                    _w1
+#define _s2                    _w2
+#define _s3                    _w3
+#define _s4                    _w4
+#define _s5                    _w5
+#define _s6                    _w6
+#define _s7                    _w7
+#define _s8                    _w8
+#define _s9                    _w9
+#define _s10                   _w10
+#define _s11                   _w11
+#define _s12                   _w12
+#define _s13                   _w13
+#define _s14                   _w14
+#define _s15                   _w15
+#define _us0                   (_w0&0xffff)
+#define _us1                   (_w1&0xffff)
+#define _us2                   (_w2&0xffff)
+#define _us3                   (_w3&0xffff)
+#define _us4                   (_w4&0xffff)
+#define _us5                   (_w5&0xffff)
+#define _us6                   (_w6&0xffff)
+#define _us7                   (_w7&0xffff)
+#define _us8                   (_w8&0xffff)
+#define _us9                   (_w9&0xffff)
+#define _us10                  (_w10&0xffff)
+#define _us11                  (_w11&0xffff)
+#define _us12                  (_w12&0xffff)
+#define _us13                  (_w13&0xffff)
+#define _us14                  (_w14&0xffff)
+#define _us15                  (_w15&0xffff)
+#define _i0                    _w0
+#define _i1                    _w1
+#define _i2                    _w2
+#define _i3                    _w3
+#define _i4                    _w4
+#define _i5                    _w5
+#define _i6                    _w6
+#define _i7                    _w7
+#define _i8                    _w8
+#define _i9                    _w9
+#define _i10                   _w10
+#define _i11                   _w11
+#define _i12                   _w12
+#define _i13                   _w13
+#define _i14                   _w14
+#define _i15                   _w15
+#if __WORDSIZE == 64
+#  define _ui0                 (_w0&0xffffffff)
+#  define _ui1                 (_w1&0xffffffff)
+#  define _ui2                 (_w2&0xffffffff)
+#  define _ui3                 (_w3&0xffffffff)
+#  define _ui4                 (_w4&0xffffffff)
+#  define _ui5                 (_w5&0xffffffff)
+#  define _ui6                 (_w6&0xffffffff)
+#  define _ui7                 (_w7&0xffffffff)
+#  define _ui8                 (_w8&0xffffffff)
+#  define _ui9                 (_w9&0xffffffff)
+#  define _ui10                        (_w10&0xffffffff)
+#  define _ui11                        (_w11&0xffffffff)
+#  define _ui12                        (_w12&0xffffffff)
+#  define _ui13                        (_w13&0xffffffff)
+#  define _ui14                        (_w14&0xffffffff)
+#  define _ui15                        (_w15&0xffffffff)
+#  define _l0                  _w0
+#  define _l1                  _w1
+#  define _l2                  _w2
+#  define _l3                  _w3
+#  define _l4                  _w4
+#  define _l5                  _w5
+#  define _l6                  _w6
+#  define _l7                  _w7
+#  define _l8                  _w8
+#  define _l9                  _w9
+#  define _l10                 _w10
+#  define _l11                 _w11
+#  define _l12                 _w12
+#  define _l13                 _w13
+#  define _l14                 _w14
+#  define _l15                 _w15
+#endif
+
+/*
+ * Types
+ */
+typedef signed char            _c;
+typedef unsigned char          _uc;
+typedef signed short           _s;
+typedef unsigned short         _us;
+typedef signed int             _i;
+#if __WORDSIZE == 64
+typedef unsigned int           _ui;
+typedef jit_word_t             _l;
+#endif
+typedef float                  _f;
+typedef double                 _d;
+
+#define prt0(T)                        T C##T##0(void);
+#define prt1(T)                        prt0(T)                                 \
+                               T C##T##1(T);
+#define prt2(T)                        prt1(T)                                 \
+                               T C##T##2(T,T);
+#define prt3(T)                        prt2(T)                                 \
+                               T C##T##3(T,T,T);
+#define prt4(T)                        prt3(T)                                 \
+                               T C##T##4(T,T,T,T);
+#define prt5(T)                        prt4(T)                                 \
+                               T C##T##5(T,T,T,T,T);
+#define prt6(T)                        prt5(T)                                 \
+                               T C##T##6(T,T,T,T,T,T);
+#define prt7(T)                        prt6(T)                                 \
+                               T C##T##7(T,T,T,T,T,T,T);
+#define prt8(T)                        prt7(T)                                 \
+                               T C##T##8(T,T,T,T,T,T,T,T);
+#define prt9(T)                        prt8(T)                                 \
+                               T C##T##9(T,T,T,T,T,T,T,T,T);
+#define prt10(T)               prt9(T)                                 \
+                               T C##T##10(T,T,T,T,T,T,T,T,T,T);
+#define prt11(T)               prt10(T)                                \
+                               T C##T##11(T,T,T,T,T,T,T,T,T,T,T);
+#define prt12(T)               prt11(T)                                \
+                               T C##T##12(T,T,T,T,T,T,T,T,T,T,T,T);
+#define prt13(T)               prt12(T)                                \
+                               T C##T##13(T,T,T,T,T,T,T,T,T,T,T,T,T);
+#define prt14(T)               prt13(T)                                \
+                               T C##T##14(T,T,T,T,T,T,T,T,T,T,T,T,T,T);
+#define prt15(T)               prt14(T)                                \
+                               T C##T##15(T,T,T,T,T,T,T,T,T,T,T,T,T,T,T);
+#define prt(T)                 prt15(T)
+prt(_c)
+prt(_uc)
+prt(_s)
+prt(_us)
+prt(_i)
+#if __WORDSIZE == 64
+prt(_ui)
+prt(_l)
+#endif
+prt(_f)
+prt(_d)
+#undef prt
+#undef prt15
+#undef prt14
+#undef prt13
+#undef prt12
+#undef prt11
+#undef prt10
+#undef prt9
+#undef prt8
+#undef prt7
+#undef prt6
+#undef prt5
+#undef prt4
+#undef prt3
+#undef prt2
+#undef prt1
+#undef prt0
+
+#define prtn(N,T)              T J##T##n(void);
+#define prt0(T)                        prtn(0,T)
+#define prt1(T)                        prt0(T)                 prtn(1,T)
+#define prt2(T)                        prt1(T)                 prtn(2,T)
+#define prt3(T)                        prt2(T)                 prtn(3,T)
+#define prt4(T)                        prt3(T)                 prtn(4,T)
+#define prt5(T)                        prt4(T)                 prtn(5,T)
+#define prt6(T)                        prt5(T)                 prtn(6,T)
+#define prt7(T)                        prt6(T)                 prtn(7,T)
+#define prt8(T)                        prt7(T)                 prtn(8,T)
+#define prt9(T)                        prt8(T)                 prtn(9,T)
+#define prt10(T)               prt9(T)                 prtn(10,T)
+#define prt11(T)               prt10(T)                prtn(11,T)
+#define prt12(T)               prt11(T)                prtn(12,T)
+#define prt13(T)               prt12(T)                prtn(13,T)
+#define prt14(T)               prt13(T)                prtn(14,T)
+#define prt15(T)               prt14(T)                prtn(15,T)
+#define prt(T)                 prt15(T)
+prt(_c)
+prt(_uc)
+prt(_s)
+prt(_us)
+prt(_i)
+#if __WORDSIZE == 64
+prt(_ui)
+prt(_l)
+#endif
+prt(_f)
+prt(_d)
+#undef prt
+#undef prt15
+#undef prt14
+#undef prt13
+#undef prt12
+#undef prt11
+#undef prt10
+#undef prt9
+#undef prt8
+#undef prt7
+#undef prt6
+#undef prt5
+#undef prt4
+#undef prt3
+#undef prt2
+#undef prt1
+#undef prt0
+#undef prtn
+
+/*
+ * Initialization
+ */
+
+#define dat0(T)                        T (*j##T##0)(void);                     \
+                               jit_node_t *n##T##0;
+#define dat1(T)                        dat0(T)                                 \
+                               T (*j##T##1)(T);                        \
+                               jit_node_t *n##T##1;
+#define dat2(T)                        dat1(T)                                 \
+                               T (*j##T##2)(T,T);                      \
+                               jit_node_t *n##T##2;
+#define dat3(T)                        dat2(T)                                 \
+                               T (*j##T##3)(T,T,T);                    \
+                               jit_node_t *n##T##3;
+#define dat4(T)                        dat3(T)                                 \
+                               T (*j##T##4)(T,T,T,T);                  \
+                               jit_node_t *n##T##4;
+#define dat5(T)                        dat4(T)                                 \
+                               T (*j##T##5)(T,T,T,T,T);                \
+                               jit_node_t *n##T##5;
+#define dat6(T)                        dat5(T)                                 \
+                               T (*j##T##6)(T,T,T,T,T,T);              \
+                               jit_node_t *n##T##6;
+#define dat7(T)                        dat6(T)                                 \
+                               T (*j##T##7)(T,T,T,T,T,T,T);            \
+                               jit_node_t *n##T##7;
+#define dat8(T)                        dat7(T)                                 \
+                               T (*j##T##8)(T,T,T,T,T,T,T,T);          \
+                               jit_node_t *n##T##8;
+#define dat9(T)                        dat8(T)                                 \
+                               T (*j##T##9)(T,T,T,T,T,T,T,T,T);        \
+                               jit_node_t *n##T##9;
+#define dat10(T)               dat9(T)                                 \
+                               T (*j##T##10)(T,T,T,T,T,T,T,T,T,T);     \
+                               jit_node_t *n##T##10;
+#define dat11(T)               dat10(T)                                \
+                               T (*j##T##11)(T,T,T,T,T,T,T,T,T,T,T);   \
+                               jit_node_t *n##T##11;
+#define dat12(T)               dat11(T)                                \
+                               T (*j##T##12)(T,T,T,T,T,T,T,T,T,T,T,T); \
+                               jit_node_t *n##T##12;
+#define dat13(T)               dat12(T)                                \
+                               T (*j##T##13)(T,T,T,T,T,T,T,T,T,T,T,T,T);\
+                               jit_node_t *n##T##13;
+#define dat14(T)               dat13(T)                                \
+                               T (*j##T##14)(T,T,T,T,T,T,T,T,T,T,T,T,T,T);\
+                               jit_node_t *n##T##14;
+#define dat15(T)               dat14(T)                                \
+                               T (*j##T##15)(T,T,T,T,T,T,T,T,T,T,T,T,T,T,T);\
+                               jit_node_t *n##T##15;
+#define dat(T)                 dat15(T)
+dat(_c)
+dat(_uc)
+dat(_s)
+dat(_us)
+dat(_i)
+#if __WORDSIZE == 64
+dat(_ui)
+dat(_l)
+#endif
+dat(_f)
+dat(_d)
+#undef dat
+#undef dat15
+#undef dat14
+#undef dat13
+#undef dat12
+#undef dat11
+#undef dat10
+#undef dat9
+#undef dat8
+#undef dat7
+#undef dat6
+#undef dat5
+#undef dat4
+#undef dat3
+#undef dat2
+#undef dat1
+#undef dat0
+
+/*
+ * Implementation
+ */
+#define dcl0(T)                                                                \
+T C##T##0(void)                                                                \
+{                                                                      \
+    return (0);                                                                \
+}
+#define dcl1(T)                                                                \
+dcl0(T)                                                                        \
+T C##T##1(T A)                                                         \
+{                                                                      \
+    return (A);                                                                \
+}
+#define dcl2(T)                                                                \
+dcl1(T)                                                                        \
+T C##T##2(T A,T B)                                                     \
+{                                                                      \
+    return (A-B);                                                      \
+}
+#define dcl3(T)                                                                \
+dcl2(T)                                                                        \
+T C##T##3(T A,T B,T C)                                                 \
+{                                                                      \
+    return (A-B-C);                                                    \
+}
+#define dcl4(T)                                                                \
+dcl3(T)                                                                        \
+T C##T##4(T A,T B,T C,T D)                                             \
+{                                                                      \
+    return (A-B-C-D);                                                  \
+}
+#define dcl5(T)                                                                \
+dcl4(T)                                                                        \
+T C##T##5(T A,T B,T C,T D,T E)                                         \
+{                                                                      \
+    return (A-B-C-D-E);                                                        \
+}
+#define dcl6(T)                                                                \
+dcl5(T)                                                                        \
+T C##T##6(T A,T B,T C,T D,T E,T F)                                     \
+{                                                                      \
+    return (A-B-C-D-E-F);                                              \
+}
+#define dcl7(T)                                                                \
+dcl6(T)                                                                        \
+T C##T##7(T A,T B,T C,T D,T E,T F,T G)                                 \
+{                                                                      \
+    return (A-B-C-D-E-F-G);                                            \
+}
+#define dcl8(T)                                                                \
+dcl7(T)                                                                        \
+T C##T##8(T A,T B,T C,T D,T E,T F,T G,T H)                             \
+{                                                                      \
+    return (A-B-C-D-E-F-G-H);                                          \
+}
+#define dcl9(T)                                                                \
+dcl8(T)                                                                        \
+T C##T##9(T A,T B,T C,T D,T E,T F,T G,T H,T I)                         \
+{                                                                      \
+    return (A-B-C-D-E-F-G-H-I);                                                \
+}
+#define dcl10(T)                                                       \
+dcl9(T)                                                                        \
+T C##T##10(T A,T B,T C,T D,T E,T F,T G,T H,T I,T J)                    \
+{                                                                      \
+    return (A-B-C-D-E-F-G-H-I-J);                                      \
+}
+#define dcl11(T)                                                       \
+dcl10(T)                                                               \
+T C##T##11(T A,T B,T C,T D,T E,T F,T G,T H,T I,T J,T K)                        \
+{                                                                      \
+    return (A-B-C-D-E-F-G-H-I-J-K);                                    \
+}
+#define dcl12(T)                                                       \
+dcl11(T)                                                               \
+T C##T##12(T A,T B,T C,T D,T E,T F,T G,T H,T I,T J,T K,T L)            \
+{                                                                      \
+    return (A-B-C-D-E-F-G-H-I-J-K-L);                                  \
+}
+#define dcl13(T)                                                       \
+dcl12(T)                                                               \
+T C##T##13(T A,T B,T C,T D,T E,T F,T G,T H,T I,T J,T K,T L,T M)                \
+{                                                                      \
+    return (A-B-C-D-E-F-G-H-I-J-K-L-M);                                        \
+}
+#define dcl14(T)                                                       \
+dcl13(T)                                                               \
+T C##T##14(T A,T B,T C,T D,T E,T F,T G,T H,T I,T J,T K,T L,T M,T N)    \
+{                                                                      \
+    return (A-B-C-D-E-F-G-H-I-J-K-L-M-N);                              \
+}
+#define dcl15(T)                                                       \
+dcl14(T)                                                               \
+T C##T##15(T A,T B,T C,T D,T E,T F,T G,T H,T I,T J,T K,T L,T M,T N,T O)        \
+{                                                                      \
+    return (A-B-C-D-E-F-G-H-I-J-K-L-M-N-O);                            \
+}
+#define dcl(T) dcl15(T)
+dcl(_c)
+dcl(_uc)
+dcl(_s)
+dcl(_us)
+dcl(_i)
+#if __WORDSIZE == 64
+dcl(_ui)
+dcl(_l)
+#endif
+dcl(_f)
+dcl(_d)
+#undef dcl
+#undef dcl15
+#undef dcl14
+#undef dcl13
+#undef dcl12
+#undef dcl11
+#undef dcl10
+#undef dcl9
+#undef dcl8
+#undef dcl7
+#undef dcl6
+#undef dcl5
+#undef dcl4
+#undef dcl3
+#undef dcl2
+#undef dcl1
+#undef dcl0
+
+#define dcl0(T)                                                                \
+T CJ##T##0(void)                                                       \
+{                                                                      \
+    return ((*j##T##0)());                                             \
+}
+#define dcl1(T)                                                                \
+dcl0(T)                                                                        \
+T CJ##T##1(void)                                                       \
+{                                                                      \
+    return ((*j##T##1)(1));                                            \
+}
+#define dcl2(T)                                                                \
+dcl1(T)                                                                        \
+T CJ##T##2(void)                                                       \
+{                                                                      \
+    return ((*j##T##2)(1,2));                                          \
+}
+#define dcl3(T)                                                                \
+dcl2(T)                                                                        \
+T CJ##T##3(void)                                                       \
+{                                                                      \
+    return ((*j##T##3)(1,2,3));                                                \
+}
+#define dcl4(T)                                                                \
+dcl3(T)                                                                        \
+T CJ##T##4(void)                                                       \
+{                                                                      \
+    return ((*j##T##4)(1,2,3,4));                                      \
+}
+#define dcl5(T)                                                                \
+dcl4(T)                                                                        \
+T CJ##T##5(void)                                                       \
+{                                                                      \
+    return ((*j##T##5)(1,2,3,4,5));                                    \
+}
+#define dcl6(T)                                                                \
+dcl5(T)                                                                        \
+T CJ##T##6(void)                                                       \
+{                                                                      \
+    return ((*j##T##6)(1,2,3,4,5,6));                                  \
+}
+#define dcl7(T)                                                                \
+dcl6(T)                                                                        \
+T CJ##T##7(void)                                                       \
+{                                                                      \
+    return ((*j##T##7)(1,2,3,4,5,6,7));                                        \
+}
+#define dcl8(T)                                                                \
+dcl7(T)                                                                        \
+T CJ##T##8(void)                                                       \
+{                                                                      \
+    return ((*j##T##8)(1,2,3,4,5,6,7,8));                              \
+}
+#define dcl9(T)                                                                \
+dcl8(T)                                                                        \
+T CJ##T##9(void)                                                       \
+{                                                                      \
+    return ((*j##T##9)(1,2,3,4,5,6,7,8,9));                            \
+}
+#define dcl10(T)                                                       \
+dcl9(T)                                                                        \
+T CJ##T##10(void)                                                      \
+{                                                                      \
+    return ((*j##T##10)(1,2,3,4,5,6,7,8,9,10));                                \
+}
+#define dcl11(T)                                                       \
+dcl10(T)                                                               \
+T CJ##T##11(void)                                                      \
+{                                                                      \
+    return ((*j##T##11)(1,2,3,4,5,6,7,8,9,10,11));                     \
+}
+#define dcl12(T)                                                       \
+dcl11(T)                                                               \
+T CJ##T##12(void)                                                      \
+{                                                                      \
+    return ((*j##T##12)(1,2,3,4,5,6,7,8,9,10,11,12));                  \
+}
+#define dcl13(T)                                                       \
+dcl12(T)                                                               \
+T CJ##T##13(void)                                                      \
+{                                                                      \
+    return ((*j##T##13)(1,2,3,4,5,6,7,8,9,10,11,12,13));               \
+}
+#define dcl14(T)                                                       \
+dcl13(T)                                                               \
+T CJ##T##14(void)                                                      \
+{                                                                      \
+    return ((*j##T##14)(1,2,3,4,5,6,7,8,9,10,11,12,13,14));            \
+}
+#define dcl15(T)                                                       \
+dcl14(T)                                                               \
+T CJ##T##15(void)                                                      \
+{                                                                      \
+    return ((*j##T##15)(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15));         \
+}
+#define dcl(t) dcl15(t)
+dcl(_c)
+dcl(_uc)
+dcl(_s)
+dcl(_us)
+dcl(_i)
+#if __WORDSIZE == 64
+dcl(_ui)
+dcl(_l)
+#endif
+dcl(_f)
+dcl(_d)
+#undef dcl
+#undef dcl15
+#undef dcl14
+#undef dcl13
+#undef dcl12
+#undef dcl11
+#undef dcl10
+#undef dcl9
+#undef dcl8
+#undef dcl7
+#undef dcl6
+#undef dcl5
+#undef dcl4
+#undef dcl3
+#undef dcl2
+#undef dcl1
+#undef dcl0
+
+int
+main(int argc, char *argv[])
+{
+    jit_state_t                 *_jit;
+    jit_node_t          *jmpi_main;
+    void               (*function)(void);
+    jit_node_t          *a1,*a2,*a3,*a4,*a5,*a6,*a7,*a8,*a9;
+    jit_node_t          *a10,*a11,*a12,*a13,*a14,*a15;
+    jit_node_t          *jmp;
+
+    init_jit(argv[0]);
+    _jit = jit_new_state();
+
+    jmpi_main = jit_jmpi();
+
+
+#define arg0(T)                        /**/
+#define arg1(T)                                                a1 = jit_arg##T();
+#define arg2(T)                        arg1(T)                 a2 = jit_arg##T();
+#define arg3(T)                        arg2(T)                 a3 = jit_arg##T();
+#define arg4(T)                        arg3(T)                 a4 = jit_arg##T();
+#define arg5(T)                        arg4(T)                 a5 = jit_arg##T();
+#define arg6(T)                        arg5(T)                 a6 = jit_arg##T();
+#define arg7(T)                        arg6(T)                 a7 = jit_arg##T();
+#define arg8(T)                        arg7(T)                 a8 = jit_arg##T();
+#define arg9(T)                        arg8(T)                 a9 = jit_arg##T();
+#define arg10(T)               arg9(T)                 a10 = jit_arg##T();
+#define arg11(T)               arg10(T)                a11 = jit_arg##T();
+#define arg12(T)               arg11(T)                a12 = jit_arg##T();
+#define arg13(T)               arg12(T)                a13 = jit_arg##T();
+#define arg14(T)               arg13(T)                a14 = jit_arg##T();
+#define arg15(T)               arg14(T)                a15 = jit_arg##T();
+
+#define get0(B,T,R)            jit_movi##B(R##0,0);
+#define get1(B,T,R)            jit_getarg##B(R##0,a##1);
+#define get2(B,T,R)                                                    \
+       get1(B,T,R);                                                    \
+       jit_movr##B(R##1, R##0);                                        \
+       jit_getarg##T(R##0, a##2);                                      \
+       jit_subr##B(R##0, R##1, R##0);
+#define get3(B,T,R)                                                    \
+       get2(B,T,R);                                                    \
+       jit_movr##B(R##1, R##0);                                        \
+       jit_getarg##T(R##0, a##3);                                      \
+       jit_subr##B(R##0, R##1, R##0);
+#define get4(B,T,R)                                                    \
+       get3(B,T,R);                                                    \
+       jit_movr##B(R##1, R##0);                                        \
+       jit_getarg##T(R##0, a##4);                                      \
+       jit_subr##B(R##0, R##1, R##0);
+#define get5(B,T,R)                                                    \
+       get4(B,T,R);                                                    \
+       jit_movr##B(R##1, R##0);                                        \
+       jit_getarg##T(R##0, a##5);                                      \
+       jit_subr##B(R##0, R##1, R##0);
+#define get6(B,T,R)                                                    \
+       get5(B,T,R);                                                    \
+       jit_movr##B(R##1, R##0);                                        \
+       jit_getarg##T(R##0, a##6);                                      \
+       jit_subr##B(R##0, R##1, R##0);
+#define get7(B,T,R)                                                    \
+       get6(B,T,R);                                                    \
+       jit_movr##B(R##1, R##0);                                        \
+       jit_getarg##T(R##0, a##7);                                      \
+       jit_subr##B(R##0, R##1, R##0);
+#define get8(B,T,R)                                                    \
+       get7(B,T,R);                                                    \
+       jit_movr##B(R##1, R##0);                                        \
+       jit_getarg##T(R##0, a##8);                                      \
+       jit_subr##B(R##0, R##1, R##0);
+#define get9(B,T,R)                                                    \
+       get8(B,T,R);                                                    \
+       jit_movr##B(R##1, R##0);                                        \
+       jit_getarg##T(R##0, a##9);                                      \
+       jit_subr##B(R##0, R##1, R##0);
+#define get10(B,T,R)                                                   \
+       get9(B,T,R);                                                    \
+       jit_movr##B(R##1, R##0);                                        \
+       jit_getarg##T(R##0, a##10);                                     \
+       jit_subr##B(R##0, R##1, R##0);
+#define get11(B,T,R)                                                   \
+       get10(B,T,R);                                                   \
+       jit_movr##B(R##1, R##0);                                        \
+       jit_getarg##T(R##0, a##11);                                     \
+       jit_subr##B(R##0, R##1, R##0);
+#define get12(B,T,R)                                                   \
+       get11(B,T,R);                                                   \
+       jit_movr##B(R##1, R##0);                                        \
+       jit_getarg##T(R##0, a##12);                                     \
+       jit_subr##B(R##0, R##1, R##0);
+#define get13(B,T,R)                                                   \
+       get12(B,T,R);                                                   \
+       jit_movr##B(R##1, R##0);                                        \
+       jit_getarg##T(R##0, a##13);                                     \
+       jit_subr##B(R##0, R##1, R##0);
+#define get14(B,T,R)                                                   \
+       get13(B,T,R);                                                   \
+       jit_movr##B(R##1, R##0);                                        \
+       jit_getarg##T(R##0, a##14);                                     \
+       jit_subr##B(R##0, R##1, R##0);
+#define get15(B,T,R)                                                   \
+       get14(B,T,R);                                                   \
+       jit_movr##B(R##1, R##0);                                        \
+       jit_getarg##T(R##0, a##15);                                     \
+       jit_subr##B(R##0, R##1, R##0);
+
+#if __WORDSIZE == 32
+#  define jit_extr_i(u, v)                     /**/
+#else
+#  define jit_extr_l(u, v)                     /**/
+#endif
+
+#define strfy(n)                               #n
+#define defi(T, N)                                                     \
+    n##T##N = jit_name(strfy(n##T##N));                                        \
+    jit_note("ccall.c", __LINE__);                                     \
+    jit_prolog();                                                      \
+    arg##N();                                                          \
+    get##N(,T,JIT_R)                                                   \
+    jit_extr##T(JIT_R0, JIT_R0);                                       \
+    jit_retr(JIT_R0);                                                  \
+    jit_epilog();
+#define deff(T, N)                                                     \
+    n##T##N = jit_name(strfy(n##T##N));                                        \
+    jit_note("ccall.c", __LINE__);                                     \
+    jit_prolog();                                                      \
+    arg##N(T);                                                         \
+    get##N(T,T,JIT_F);                                                 \
+    jit_retr##T(JIT_F0);                                               \
+    jit_epilog();
+
+#define  def0(X, T)                            def##X(T, 0)
+#define  def1(X, T)    def0(X, T)              def##X(T, 1)
+#define  def2(X, T)    def1(X, T)              def##X(T, 2)
+#define  def3(X, T)    def2(X, T)              def##X(T, 3)
+#define  def4(X, T)    def3(X, T)              def##X(T, 4)
+#define  def5(X, T)    def4(X, T)              def##X(T, 5)
+#define  def6(X, T)    def5(X, T)              def##X(T, 6)
+#define  def7(X, T)    def6(X, T)              def##X(T, 7)
+#define  def8(X, T)    def7(X, T)              def##X(T, 8)
+#define  def9(X, T)    def8(X, T)              def##X(T, 9)
+#define def10(X, T)    def9(X, T)              def##X(T, 10)
+#define def11(X, T)    def10(X, T)             def##X(T, 11)
+#define def12(X, T)    def11(X, T)             def##X(T, 12)
+#define def13(X, T)    def12(X, T)             def##X(T, 13)
+#define def14(X, T)    def13(X, T)             def##X(T, 14)
+#define def15(X, T)    def14(X, T)             def##X(T, 15)
+#define def(T)         def15(i, T)
+       def(_c)
+       def(_uc)
+       def(_s)
+       def(_us)
+       def(_i)
+#if __WORDSIZE == 64
+       def(_ui)
+       def(_l)
+#endif
+#undef def
+#define def(T)         def15(f, T)
+       def(_f)
+       def(_d)
+#undef def
+
+    jit_patch(jmpi_main);
+    jit_name("main");
+    jit_note("ccall.c", __LINE__);
+    jit_prolog();
+
+#define  push0(T)      /**/
+#define  push1(T)                              jit_pushargi##T(1);
+#define  push2(T)      push1(T)                jit_pushargi##T(2);
+#define  push3(T)      push2(T)                jit_pushargi##T(3);
+#define  push4(T)      push3(T)                jit_pushargi##T(4);
+#define  push5(T)      push4(T)                jit_pushargi##T(5);
+#define  push6(T)      push5(T)                jit_pushargi##T(6);
+#define  push7(T)      push6(T)                jit_pushargi##T(7);
+#define  push8(T)      push7(T)                jit_pushargi##T(8);
+#define  push9(T)      push8(T)                jit_pushargi##T(9);
+#define  push10(T)     push9(T)                jit_pushargi##T(10);
+#define  push11(T)     push10(T)               jit_pushargi##T(11);
+#define  push12(T)     push11(T)               jit_pushargi##T(12);
+#define  push13(T)     push12(T)               jit_pushargi##T(13);
+#define  push14(T)     push13(T)               jit_pushargi##T(14);
+#define  push15(T)     push14(T)               jit_pushargi##T(15);
+
+#define calin(T,N)                                                     \
+       jit_prepare();                                                  \
+               push##N()                                               \
+       jit_finishi(C##T##N);                                           \
+       jit_retval##T(JIT_R0);                                          \
+       jmp = jit_beqi(JIT_R0, T##N);                                   \
+       jit_calli(abort);                                               \
+       jit_patch(jmp);
+#define calfn(T,N)                                                     \
+       jit_prepare();                                                  \
+               push##N(T)                                              \
+       jit_finishi(C##T##N);                                           \
+       jit_retval##T(JIT_F0);                                          \
+       jmp = jit_beqi##T(JIT_F0, _w##N);                               \
+       jit_calli(abort);                                               \
+       jit_patch(jmp);
+#define  calx0(X,T)                            cal##X##n(T,0)
+#define  calx1(X,T)    calx0(X,T)              cal##X##n(T,1)
+#define  calx2(X,T)    calx1(X,T)              cal##X##n(T,2)
+#define  calx3(X,T)    calx2(X,T)              cal##X##n(T,3)
+#define  calx4(X,T)    calx3(X,T)              cal##X##n(T,4)
+#define  calx5(X,T)    calx4(X,T)              cal##X##n(T,5)
+#define  calx6(X,T)    calx5(X,T)              cal##X##n(T,6)
+#define  calx7(X,T)    calx6(X,T)              cal##X##n(T,7)
+#define  calx8(X,T)    calx7(X,T)              cal##X##n(T,8)
+#define  calx9(X,T)    calx8(X,T)              cal##X##n(T,9)
+#define calx10(X,T)    calx9(X,T)              cal##X##n(T,10)
+#define calx11(X,T)    calx10(X,T)             cal##X##n(T,11)
+#define calx12(X,T)    calx11(X,T)             cal##X##n(T,12)
+#define calx13(X,T)    calx12(X,T)             cal##X##n(T,13)
+#define calx14(X,T)    calx13(X,T)             cal##X##n(T,14)
+#define calx15(X,T)    calx14(X,T)             cal##X##n(T,15)
+#define cali(T)                calx15(i,T)
+#define calf(T)                calx15(f,T)
+
+    cali(_c)
+    cali(_uc)
+    cali(_s)
+    cali(_us)
+    cali(_i)
+#if __WORDSIZE == 64
+    cali(_ui)
+    cali(_l)
+#endif
+    calf(_f)
+    calf(_d)
+
+#undef calin
+#undef calfn
+#define calin(T,N)                                                     \
+       jit_prepare();                                                  \
+               push##N()                                               \
+       jit_finishi(CJ##T##N);                                          \
+       jit_retval##T(JIT_R0);                                          \
+       jmp = jit_beqi(JIT_R0, T##N);                                   \
+       jit_calli(abort);                                               \
+       jit_patch(jmp);
+#define calfn(T,N)                                                     \
+       jit_prepare();                                                  \
+               push##N(T)                                              \
+       jit_finishi(CJ##T##N);                                          \
+       jit_retval##T(JIT_F0);                                          \
+       jmp = jit_beqi##T(JIT_F0, _w##N);                               \
+       jit_calli(abort);                                               \
+       jit_patch(jmp);
+    cali(_c)
+    cali(_uc)
+    cali(_s)
+    cali(_us)
+    cali(_i)
+#if __WORDSIZE == 64
+    cali(_ui)
+    cali(_l)
+#endif
+    calf(_f)
+    calf(_d)
+
+    jit_ret();
+
+    function = jit_emit();
+
+#define initn(T,N)     j##T##N = jit_address(n##T##N);
+#define init0(T)                       initn(T,0)
+#define init1(T)        init0(T)       initn(T,1)
+#define init2(T)        init1(T)       initn(T,2)
+#define init3(T)        init2(T)       initn(T,3)
+#define init4(T)        init3(T)       initn(T,4)
+#define init5(T)        init4(T)       initn(T,5)
+#define init6(T)        init5(T)       initn(T,6)
+#define init7(T)        init6(T)       initn(T,7)
+#define init8(T)        init7(T)       initn(T,8)
+#define init9(T)        init8(T)       initn(T,9)
+#define init10(T)       init9(T)       initn(T,10)
+#define init11(T)      init10(T)       initn(T,11)
+#define init12(T)      init11(T)       initn(T,12)
+#define init13(T)      init12(T)       initn(T,13)
+#define init14(T)      init13(T)       initn(T,14)
+#define init15(T)      init14(T)       initn(T,15)
+#define init(T)                init15(T)
+    init(_c)
+    init(_uc)
+    init(_s)
+    init(_us)
+    init(_i)
+#if __WORDSIZE == 64
+    init(_ui)
+    init(_l)
+#endif
+    init(_f)
+    init(_d)
+
+#if 0
+    jit_print();
+    jit_disassemble();
+#endif
+
+    jit_clear_state();
+    (*function)();
+    jit_destroy_state();
+
+    finish_jit();
+
+    printf("ok\n");
+
+    return (0);
+}
diff --git a/deps/lightning/check/check.arm.sh b/deps/lightning/check/check.arm.sh
new file mode 100755 (executable)
index 0000000..2f576be
--- /dev/null
@@ -0,0 +1,15 @@
+#!/bin/sh
+test=`basename $0 | sed -e 's|\.arm$||'`
+./lightning -mthumb=0 $srcdir/$test.tst | tr -d \\r > $test.out
+if test $? != 0; then
+  exit $?
+fi
+
+cmp -s $srcdir/$test.ok $test.out
+result=$?
+if test $result != 0; then
+    diff $srcdir/$test.ok $test.out
+    rm $test.out
+    exit 1
+fi
+rm $test.out
diff --git a/deps/lightning/check/check.arm.swf.sh b/deps/lightning/check/check.arm.swf.sh
new file mode 100755 (executable)
index 0000000..378b6d7
--- /dev/null
@@ -0,0 +1,15 @@
+#!/bin/sh
+test=`basename $0 | sed -e 's|\.arm\.swf$||'`
+./lightning -mthumb=0 -mvfp=0 $srcdir/$test.tst | tr -d \\r > $test.out
+if test $? != 0; then
+  exit $?
+fi
+
+cmp -s $srcdir/$test.ok $test.out
+result=$?
+if test $result != 0; then
+    diff $srcdir/$test.ok $test.out
+    rm $test.out
+    exit 1
+fi
+rm $test.out
diff --git a/deps/lightning/check/check.arm4.swf.sh b/deps/lightning/check/check.arm4.swf.sh
new file mode 100755 (executable)
index 0000000..21926b6
--- /dev/null
@@ -0,0 +1,15 @@
+#!/bin/sh
+test=`basename $0 | sed -e 's|\.arm4\.swf$||'`
+./lightning -mcpu=4 -mthumb=0 -mvfp=0 $srcdir/$test.tst | tr -d \\r > $test.out
+if test $? != 0; then
+  exit $?
+fi
+
+cmp -s $srcdir/$test.ok $test.out
+result=$?
+if test $result != 0; then
+    diff $srcdir/$test.ok $test.out
+    rm $test.out
+    exit 1
+fi
+rm $test.out
diff --git a/deps/lightning/check/check.nodata.sh b/deps/lightning/check/check.nodata.sh
new file mode 100755 (executable)
index 0000000..0fbc4e9
--- /dev/null
@@ -0,0 +1,15 @@
+#!/bin/sh
+test=`basename $0 | sed -e 's|\.nodata$||'`
+./lightning -d $srcdir/$test.tst | tr -d \\r > $test.out
+if test $? != 0; then
+  exit $?
+fi
+
+cmp -s $srcdir/$test.ok $test.out
+result=$?
+if test $result != 0; then
+    diff $srcdir/$test.ok $test.out
+    rm $test.out
+    exit 1
+fi
+rm $test.out
diff --git a/deps/lightning/check/check.sh b/deps/lightning/check/check.sh
new file mode 100755 (executable)
index 0000000..e0267a2
--- /dev/null
@@ -0,0 +1,15 @@
+#!/bin/sh
+test=`basename $0`
+./lightning $srcdir/$test.tst | tr -d \\r > $test.out
+if test $? != 0; then
+  exit $?
+fi
+
+cmp -s $srcdir/$test.ok $test.out
+result=$?
+if test $result != 0; then
+    diff $srcdir/$test.ok $test.out
+    rm $test.out
+    exit 1
+fi
+rm $test.out
diff --git a/deps/lightning/check/check.swf.sh b/deps/lightning/check/check.swf.sh
new file mode 100755 (executable)
index 0000000..9494eef
--- /dev/null
@@ -0,0 +1,15 @@
+#!/bin/sh
+test=`basename $0 | sed -e 's|\.swf$||'`
+./lightning -mvfp=0 $srcdir/$test.tst | tr -d \\r > $test.out
+if test $? != 0; then
+  exit $?
+fi
+
+cmp -s $srcdir/$test.ok $test.out
+result=$?
+if test $result != 0; then
+    diff $srcdir/$test.ok $test.out
+    rm $test.out
+    exit 1
+fi
+rm $test.out
diff --git a/deps/lightning/check/check.x87.nodata.sh b/deps/lightning/check/check.x87.nodata.sh
new file mode 100755 (executable)
index 0000000..1582e9f
--- /dev/null
@@ -0,0 +1,15 @@
+#!/bin/sh
+test=`basename $0 | sed -e 's|\.x87.nodata$||'`
+./lightning -data=0 -mx87=1 $srcdir/$test.tst | tr -d \\r > $test.out
+if test $? != 0; then
+  exit $?
+fi
+
+cmp -s $srcdir/$test.ok $test.out
+result=$?
+if test $result != 0; then
+    diff $srcdir/$test.ok $test.out
+    rm $test.out
+    exit 1
+fi
+rm $test.out
diff --git a/deps/lightning/check/check.x87.sh b/deps/lightning/check/check.x87.sh
new file mode 100755 (executable)
index 0000000..c0245e1
--- /dev/null
@@ -0,0 +1,15 @@
+#!/bin/sh
+test=`basename $0 | sed -e 's|\.x87$||'`
+./lightning -mx87=1 $srcdir/$test.tst | tr -d \\r > $test.out
+if test $? != 0; then
+  exit $?
+fi
+
+cmp -s $srcdir/$test.ok $test.out
+result=$?
+if test $result != 0; then
+    diff $srcdir/$test.ok $test.out
+    rm $test.out
+    exit 1
+fi
+rm $test.out
diff --git a/deps/lightning/check/clobber.ok b/deps/lightning/check/clobber.ok
new file mode 100644 (file)
index 0000000..9766475
--- /dev/null
@@ -0,0 +1 @@
+ok
diff --git a/deps/lightning/check/clobber.tst b/deps/lightning/check/clobber.tst
new file mode 100644 (file)
index 0000000..7530842
--- /dev/null
@@ -0,0 +1,1050 @@
+/* do not bother about result of operations, only ensure valid arguments
+ * and that registers not modified by the operation are not clobbered  */
+
+#define IV0            0x10000
+#define IV1            0x10001
+#define IV2            0x10002
+#define IV3            0x10003
+#define IV4            0x10004
+#define IV5            0x10005
+#define FV0            100.0
+#define FV1            101.0
+#define FV2            102.0
+#define FV3            103.0
+#define FV4            104.0
+#define FV5            105.0
+#define IR0            r0
+#define IR1            r1
+#define IR2            r2
+#define IR3            v0
+#define IR4            v1
+#define IR5            v2
+#define FR0            f0
+#define FR1            f1
+#define FR2            f2
+#define FR3            f3
+#define FR4            f4
+#define FR5            f5
+
+#define setup()                                                        \
+       movi %r0 IV0                                            \
+       movi %r1 IV1                                            \
+       movi %r2 IV2                                            \
+       movi %v0 IV3                                            \
+       movi %v1 IV4                                            \
+       movi %v2 IV5
+#define setup_f()                                              \
+       movi_f %f0 FV0                                          \
+       movi_f %f1 FV1                                          \
+       movi_f %f2 FV2                                          \
+       movi_f %f3 FV3                                          \
+       movi_f %f4 FV4                                          \
+       movi_f %f5 FV5
+#define setup_d()                                              \
+       movi_d %f0 FV0                                          \
+       movi_d %f1 FV1                                          \
+       movi_d %f2 FV2                                          \
+       movi_d %f3 FV3                                          \
+       movi_d %f4 FV4                                          \
+       movi_d %f5 FV5
+
+#define check(label, rn)                                       \
+       beqi label %IR##rn IV##rn                               \
+       calli @abort                                            \
+label:
+#define check1(k, l, i0)                                       \
+       check(k##l##i0##_0, i0)
+#define check2(k, l, i0, i1)                                   \
+       check(k##l##i0##i1##_0, i0)                             \
+       check(k##l##i0##i1##_1, i1)
+#define check3(k, l, i0, i1, i2)                               \
+       check(k##l##i0##i1##i2##_0, i0)                         \
+       check(k##l##i0##i1##i2##_1, i1)                         \
+       check(k##l##i0##i1##i2##_2, i2)
+#define check4(k, l, i0, i1, i2, i3)                           \
+       check(k##l##i0##i1##i2##i3##_0, i0)                     \
+       check(k##l##i0##i1##i2##i3##_1, i1)                     \
+       check(k##l##i0##i1##i2##i3##_2, i2)                     \
+       check(k##l##i0##i1##i2##i3##_3, i3)
+#define check5(k, l, i0, i1, i2, i3, i4)                       \
+       check(k##l##i0##i1##i2##i3##i4##_0, i0)                 \
+       check(k##l##i0##i1##i2##i3##i4##_1, i1)                 \
+       check(k##l##i0##i1##i2##i3##i3##_2, i2)                 \
+       check(k##l##i0##i1##i2##i3##i4##_3, i3)                 \
+       check(k##l##i0##i1##i2##i3##i4##_4, i4)
+#define check6(k, l, i0, i1, i2, i3, i4, i5)                   \
+       check(k##l##i0##i1##i2##i3##i4##i5##_0, i0)             \
+       check(k##l##i0##i1##i2##i3##i4##i5##_1, i1)             \
+       check(k##l##i0##i1##i2##i3##i3##i5##_2, i2)             \
+       check(k##l##i0##i1##i2##i3##i4##i5##_3, i3)             \
+       check(k##l##i0##i1##i2##i3##i4##i5##_4, i4)             \
+       check(k##l##i0##i1##i2##i3##i4##i5##_5, i5)
+
+#define checkf(f, label, rn)                                   \
+       beqi##f label %FR##rn FV##rn                            \
+       calli @abort                                            \
+label:
+#define checkf1(f, k, l, i0)                                   \
+       checkf(f, f##k##l##i0##_0, i0)
+#define checkf2(f, k, l, i0, i1)                               \
+       checkf(f, f##k##l##i0##i1##_0, i0)                      \
+       checkf(f, f##k##l##i0##i1##_1, i1)
+#define checkf3(f, k, l, i0, i1, i2)                           \
+       checkf(f, f##k##l##i0##i1##i2##_0, i0)                  \
+       checkf(f, f##k##l##i0##i1##i2##_1, i1)                  \
+       checkf(f, f##k##l##i0##i1##i2##_2, i2)
+#define checkf4(f, k, l, i0, i1, i2, i3)                       \
+       checkf(f, f##k##l##i0##i1##i2##i3##_0, i0)              \
+       checkf(f, f##k##l##i0##i1##i2##i3##_1, i1)              \
+       checkf(f, f##k##l##i0##i1##i2##i3##_2, i2)              \
+       checkf(f, f##k##l##i0##i1##i2##i3##_3, i3)
+#define checkf5(f, k, l, i0, i1, i2, i3, i4)                   \
+       checkf(f, f##k##l##i0##i1##i2##i3##i4##_0, i0)          \
+       checkf(f, f##k##l##i0##i1##i2##i3##i4##_1, i1)          \
+       checkf(f, f##k##l##i0##i1##i2##i3##i3##_2, i2)          \
+       checkf(f, f##k##l##i0##i1##i2##i3##i4##_3, i3)          \
+       checkf(f, f##k##l##i0##i1##i2##i3##i4##_4, i4)
+#define checkf6(f, k, l, i0, i1, i2, i3, i4, i5)               \
+       checkf(f, f##k##l##i0##i1##i2##i3##i4##i5##_0, i0)      \
+       checkf(f, f##k##l##i0##i1##i2##i3##i4##i5##_1, i1)      \
+       checkf(f, f##k##l##i0##i1##i2##i3##i3##i5##_2, i2)      \
+       checkf(f, f##k##l##i0##i1##i2##i3##i4##i5##_3, i3)      \
+       checkf(f, f##k##l##i0##i1##i2##i3##i4##i5##_4, i4)      \
+       checkf(f, f##k##l##i0##i1##i2##i3##i4##i5##_5, i5)
+
+#define alui(l, op, i0, i1, i2, i3, i4, i5)                    \
+       setup()                                                 \
+       movi %IR##i0 1                                          \
+       op##i %IR##i1 %IR##i0 1                                 \
+       check4(i, l, i2, i3, i4, i5)
+#define aluic(l, op, i0, i1, i2, i3, i4, i5)                   \
+       setup()                                                 \
+       movi %IR##i0 1                                          \
+       op##i %IR##i0 %IR##i0 1                                 \
+       check5(ic, l, i1, i2, i3, i4, i5)
+#define alur(l, op, i0, i1, i2, i3, i4, i5)                    \
+       setup()                                                 \
+       movi %IR##i0 1                                          \
+       movi %IR##i1 1                                          \
+       op##r %IR##i2 %IR##i0 %IR##i1                           \
+       check3(r, l, i3, i4, i5)
+#define alurc0(l, op, i0, i1, i2, i3, i4, i5)                  \
+       setup()                                                 \
+       movi %IR##i0 1                                          \
+       movi %IR##i1 1                                          \
+       op##r %IR##i0 %IR##i0 %IR##i1                           \
+       check4(r0, l, i2, i3, i4, i5)
+#define alurc1(l, op, i0, i1, i2, i3, i4, i5)                  \
+       setup()                                                 \
+       movi %IR##i0 1                                          \
+       movi %IR##i1 1                                          \
+       op##r %IR##i1 %IR##i0 %IR##i1                           \
+       check4(r1, l, i2, i3, i4, i5)
+#define alurc2(l, op, i0, i1, i2, i3, i4, i5)                  \
+       setup()                                                 \
+       movi %IR##i0 1                                          \
+       op##r %IR##i0 %IR##i0 %IR##i0                           \
+       check5(r2, l, i1, i2, i3, i4, i5)
+#define   xalu(l, op, i0, i1, i2, i3, i4, i5)                  \
+         alui(l, op, i0, i1,   i2, i3, i4, i5)                 \
+        aluic(l, op, i0,               i1, i2, i3, i4, i5)     \
+         alur(l, op, i0, i1, i2,       i3, i4, i5)             \
+       alurc0(l, op, i0, i1,   i2, i3, i4, i5)                 \
+       alurc1(l, op, i0, i1,   i2, i3, i4, i5)                 \
+       alurc2(l, op, i0,               i1, i2, i3, i4, i5)
+
+#if __ia64__
+#  define alu(l, op)                                           \
+        xalu(l, op, 0, 1, 2, 3, 4, 5)
+#else
+#  define alu(l, op)                                           \
+        xalu(l, op, 0, 1, 2, 3, 4, 5)                          \
+        xalu(l, op, 1, 2, 3, 4, 5, 0)                          \
+        xalu(l, op, 2, 3, 4, 5, 0, 1)                          \
+        xalu(l, op, 3, 4, 5, 0, 1, 2)                          \
+        xalu(l, op, 4, 5, 0, 1, 2, 3)                          \
+        xalu(l, op, 5, 0, 1, 2, 3, 4)
+#endif
+
+#define fopi(f, l, op, f0, f1, f2, f3, f4, f5)                 \
+       setup##f()                                              \
+       movi##f %FR##f0 1.0                                     \
+       op##i##f %FR##f1 %FR##f0 1.0                            \
+       checkf4(f, i, l, f2, f3, f4, f5)
+#define fopic(f, l, op, f0, f1, f2, f3, f4, f5)                        \
+       setup##f()                                              \
+       movi##f %FR##f0 1.0                                     \
+       op##i##f %FR##f0 %FR##f0 1.0                            \
+       checkf5(f, ic, l, f1, f2, f3, f4, f5)
+#define fopr(f, l, op, f0, f1, f2, f3, f4, f5)                 \
+       setup##f()                                              \
+       movi##f %FR##f0 1.0                                     \
+       movi##f %FR##f1 1.0                                     \
+       op##r##f %FR##f2 %FR##f0 %FR##f1                        \
+       checkf3(f, r, l, f3, f4, f5)
+#define foprc0(f, l, op, f0, f1, f2, f3, f4, f5)               \
+       setup##f()                                              \
+       movi##f %FR##f0 1.0                                     \
+       movi##f %FR##f1 1.0                                     \
+       op##r##f %FR##f0 %FR##f0 %FR##f1                        \
+       checkf4(f, r0, l, f2, f3, f4, f5)
+#define foprc1(f, l, op, f0, f1, f2, f3, f4, f5)               \
+       setup##f()                                              \
+       movi##f %FR##f0 1.0                                     \
+       movi##f %FR##f1 1.0                                     \
+       op##r##f %FR##f1 %FR##f0 %FR##f1                        \
+       checkf4(f, r1, l, f2, f3, f4, f5)
+#define foprc2(f, l, op, f0, f1, f2, f3, f4, f5)               \
+       setup##f()                                              \
+       movi##f %FR##f0 1.0                                     \
+       op##r##f %FR##f0 %FR##f0 %FR##f0                        \
+       checkf5(f, r2, l, f1, f2, f3, f4, f5)
+#define   xfop(f, l, op, f0, f1, f2, f3, f4, f5)               \
+         fopi(f, l, op, f0, f1, f2,            f3, f4, f5)     \
+        fopic(f, l, op, f0, f1, f2,            f3, f4, f5)     \
+         fopr(f, l, op, f0, f1, f2,            f3, f4, f5)     \
+       foprc0(f, l, op, f0, f1,                f2, f3, f4, f5) \
+       foprc1(f, l, op, f0, f1,                f2, f3, f4, f5) \
+       foprc2(f, l, op, f0, f1,                f2, f3, f4, f5)
+#if __ia64__
+#  define xxfop(l, op, f, f0, f1, f2, f3, f4, f5)              \
+          xfop(_f, l, op, f0, f1, f2, f3, f4, f5)
+#else
+#  define xxfop(l, op, f, f0, f1, f2, f3, f4, f5)              \
+          xfop(_f, l, op, f0, f1, f2, f3, f4, f5)              \
+          xfop(_d, l, op, f0, f1, f2, f3, f4, f5)
+#endif
+#if __ia64__
+#  define fop(l, op)                                           \
+       xxfop(l, op, f, 0, 1, 2, 3, 4, 5)
+#else
+#  define fop(l, op)                                           \
+       xxfop(l, op, f, 0, 1, 2, 3, 4, 5)                       \
+       xxfop(l, op, f, 1, 2, 3, 4, 5, 0)                       \
+       xxfop(l, op, f, 2, 3, 4, 5, 0, 1)                       \
+       xxfop(l, op, f, 3, 4, 5, 0, 1, 2)                       \
+       xxfop(l, op, f, 4, 5, 0, 1, 2, 3)                       \
+       xxfop(l, op, f, 5, 0, 1, 2, 3, 4)
+#endif
+
+#define aluxii(l, op, i0, i1, i2, i3, i4, i5)                  \
+       setup()                                                 \
+       movi %IR##i0 1                                          \
+       op##ci %IR##i1 %IR##i0 1                                \
+       op##xi %IR##i2 %IR##i0 1                                \
+       check3(ii, l, i3, i4, i5)
+#define aluxir(l, op, i0, i1, i2, i3, i4, i5)                  \
+       setup()                                                 \
+       movi %IR##i0 1                                          \
+       op##ci %IR##i1 %IR##i0 1                                \
+       op##xr %IR##i2 %IR##i0 %IR##i1                          \
+       check3(ir, l, i3, i4, i5)
+#define aluxri(l, op, i0, i1, i2, i3, i4, i5)                  \
+       setup()                                                 \
+       movi %IR##i0 1                                          \
+       movi %IR##i1 1                                          \
+       op##cr %IR##i2 %IR##i0 %IR##i1                          \
+       op##xi %IR##i0 %IR##i1 1                                \
+       check3(ri, l, i3, i4, i5)
+#define aluxrr(l, op, i0, i1, i2, i3, i4, i5)                  \
+       setup()                                                 \
+       movi %IR##i0 1                                          \
+       movi %IR##i1 1                                          \
+       op##cr %IR##i2 %IR##i0 %IR##i1                          \
+       op##xr %IR##i2 %IR##i0 %IR##i1                          \
+       check3(rr, l, i3, i4, i5)
+#define  xalux(l, op, i0, i1, i2, i3, i4, i5)                  \
+       aluxii(l, op, i0, i1, i2,       i3, i4, i5)             \
+       aluxir(l, op, i0, i1, i2,       i3, i4, i5)             \
+       aluxri(l, op, i0, i1, i2,       i3, i4, i5)             \
+       aluxrr(l, op, i0, i1, i2,       i3, i4, i5)
+#if __ia64__
+#  define alux(l, op)                                          \
+        xalux(l, op, 0, 1, 2, 3, 4, 5)
+#else
+#  define alux(l, op)                                          \
+        xalux(l, op, 0, 1, 2, 3, 4, 5)                         \
+        xalux(l, op, 1, 2, 3, 4, 5, 0)                         \
+        xalux(l, op, 2, 3, 4, 5, 0, 1)                         \
+        xalux(l, op, 3, 4, 5, 0, 1, 2)                         \
+        xalux(l, op, 4, 5, 0, 1, 2, 3)                         \
+        xalux(l, op, 5, 0, 1, 2, 3, 4)
+#endif
+
+#define alui_u(l, op, i0, i1, i2, i3, i4, i5)                  \
+       setup()                                                 \
+       movi %IR##i0 1                                          \
+       op##i_u %IR##i1 %IR##i0 1                               \
+       check4(i_u, l, i2, i3, i4, i5)
+#define aluic_u(l, op, i0, i1, i2, i3, i4, i5)                 \
+       setup()                                                 \
+       movi %IR##i0 1                                          \
+       op##i_u %IR##i0 %IR##i0 1                               \
+       check5(ic_u, l, i1, i2, i3, i4, i5)
+#define alur_u(l, op, i0, i1, i2, i3, i4, i5)                  \
+       setup()                                                 \
+       movi %IR##i0 1                                          \
+       movi %IR##i1 1                                          \
+       op##r_u %IR##i2 %IR##i0 %IR##i1                         \
+       check3(r_u, l, i3, i4, i5)
+#define alurc0_u(l, op, i0, i1, i2, i3, i4, i5)                        \
+       setup()                                                 \
+       movi %IR##i0 1                                          \
+       movi %IR##i1 1                                          \
+       op##r_u %IR##i0 %IR##i0 %IR##i1                         \
+       check4(r0_u, l, i2, i3, i4, i5)
+#define alurc1_u(l, op, i0, i1, i2, i3, i4, i5)                        \
+       setup()                                                 \
+       movi %IR##i0 1                                          \
+       movi %IR##i1 1                                          \
+       op##r_u %IR##i1 %IR##i0 %IR##i1                         \
+       check4(r1_u, l, i2, i3, i4, i5)
+#define alurc2_u(l, op, i0, i1, i2, i3, i4, i5)                        \
+       setup()                                                 \
+       movi %IR##i0 1                                          \
+       op##r_u %IR##i0 %IR##i0 %IR##i0                         \
+       check5(r2_u, l, i1, i2, i3, i4, i5)
+#define   xalu_u(l, op, i0, i1, i2, i3, i4, i5)                        \
+         alui_u(l, op, i0, i1, i2, i3, i4, i5)                 \
+        aluic_u(l, op, i0,             i1, i2, i3, i4, i5)     \
+         alur_u(l, op, i0, i1, i2,     i3, i4, i5)             \
+       alurc0_u(l, op, i0, i1, i2, i3, i4, i5)                 \
+       alurc1_u(l, op, i0, i1, i2, i3, i4, i5)                 \
+       alurc2_u(l, op, i0,             i1, i2, i3, i4, i5)
+#if __ia64__
+#  define alu_u(l, op)                                         \
+        xalu_u(l, op, 0, 1, 2, 3, 4, 5)
+#else
+#  define alu_u(l, op)                                         \
+        xalu_u(l, op, 0, 1, 2, 3, 4, 5)                        \
+        xalu_u(l, op, 1, 2, 3, 4, 5, 0)                        \
+        xalu_u(l, op, 2, 3, 4, 5, 0, 1)                        \
+        xalu_u(l, op, 3, 4, 5, 0, 1, 2)                        \
+        xalu_u(l, op, 4, 5, 0, 1, 2, 3)                        \
+        xalu_u(l, op, 5, 0, 1, 2, 3, 4)
+#endif
+
+#define unir(l, op, i0, i1, i2, i3, i4, i5)                    \
+       setup()                                                 \
+       movi %IR##i0 1                                          \
+       op %IR##i1 %IR##i0                                      \
+       check4(rr, l, i2, i3, i4, i5)
+#define unirc(l, op, i0, i1, i2, i3, i4, i5)                   \
+       setup()                                                 \
+       movi %IR##i0 1                                          \
+       op %IR##i0 %IR##i0                                      \
+       check5(rc, l, i1, i2, i3, i4, i5)
+#define   xuni(l, op, i0, i1, i2, i3, i4, i5)                  \
+         unir(l, op, i0, i1,   i2, i3, i4, i5)                 \
+        unirc(l, op, i0,               i1, i2, i3, i4, i5)
+#if __ia64__
+#  define uni(l, op)                                           \
+        xuni(l, op, 0, 1, 2, 3, 4, 5)
+#else
+#  define uni(l, op)                                           \
+        xuni(l, op, 0, 1, 2, 3, 4, 5)                          \
+        xuni(l, op, 1, 2, 3, 4, 5, 0)                          \
+        xuni(l, op, 2, 3, 4, 5, 0, 1)                          \
+        xuni(l, op, 3, 4, 5, 0, 1, 2)                          \
+        xuni(l, op, 4, 5, 0, 1, 2, 3)                          \
+        xuni(l, op, 5, 0, 1, 2, 3, 4)
+#endif
+
+#define unfr(f, l, op, f0, f1, f2, f3, f4, f5)                 \
+       setup##f()                                              \
+       movi##f %FR##f0 1                                       \
+       op##f %FR##f1 %FR##f0                                   \
+       checkf4(f, rr, l, f2, f3, f4, f5)
+#define unfrc(f, l, op, f0, f1, f2, f3, f4, f5)                        \
+       setup##f()                                              \
+       movi##f %FR##f0 1                                       \
+       op##f %FR##f0 %FR##f0                                   \
+       checkf5(f, rc, l, f1, f2, f3, f4, f5)
+#define   xunf(f, l, op, f0, f1, f2, f3, f4, f5)               \
+         unfr(f, l, op, f0, f1,        f2, f3, f4, f5)         \
+        unfrc(f, l, op, f0,            f1, f2, f3, f4, f5)
+#define xxunf(l, op, f0, f1, f2, f3, f4, f5)                   \
+        xunf(_f, l, op, f0, f1, f2, f3, f4, f5)                \
+        xunf(_d, l, op, f0, f1, f2, f3, f4, f5)
+#if __ia64__
+#  define unf(l, op)                                           \
+       xxunf(l, op, 0, 1, 2, 3, 4, 5)
+#else
+#  define unf(l, op)                                           \
+       xxunf(l, op, 0, 1, 2, 3, 4, 5)                          \
+       xxunf(l, op, 1, 2, 3, 4, 5, 0)                          \
+       xxunf(l, op, 2, 3, 4, 5, 0, 1)                          \
+       xxunf(l, op, 3, 4, 5, 0, 1, 2)                          \
+       xxunf(l, op, 4, 5, 0, 1, 2, 3)                          \
+       xxunf(l, op, 5, 0, 1, 2, 3, 4)
+#endif
+
+#define fcpi(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)   \
+       setup()                                                 \
+       setup##f()                                              \
+       movi##f %FR##f0 1.0                                     \
+       op##i##f %IR##r0 %FR##f0 1.0                            \
+       check5(i##f##f0, l, r1, r2, r3, r4, r5)                 \
+       checkf5(f, i##r0, l, f1, f2, f3, f4, f5)
+#define fcpr(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)   \
+       setup()                                                 \
+       setup##f()                                              \
+       movi##f %FR##f0 1.0                                     \
+       movi##f %FR##f1 1.0                                     \
+       op##r##f %IR##r0 %FR##f0 %FR##f1                        \
+       check5(r##f##f0, l, r1, r2, r3, r4, r5)                 \
+       checkf4(f, r##r0, l, f2, f3, f4, f5)
+#define fcprc(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)  \
+       setup()                                                 \
+       setup##f()                                              \
+       movi##f %FR##f0 1.0                                     \
+       op##r##f %IR##r0 %FR##f0 %FR##f0                        \
+       check5(rc##f##f0, l, r1, r2, r3, r4, r5)                \
+       checkf5(f, rc##r0, l, f1, f2, f3, f4, f5)
+#if __ia64__
+#  define ifcp(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+         fcpi(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)
+#else
+#  define ifcp(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+         fcpi(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)  \
+         fcpr(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)  \
+        fcprc(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)  \
+         fcpi(f, l, op, r1,r2,r3,r4,r5,r0, f0,f1,f2,f3,f4,f5)  \
+         fcpr(f, l, op, r1,r2,r3,r4,r5,r0, f0,f1,f2,f3,f4,f5)  \
+        fcprc(f, l, op, r1,r2,r3,r4,r5,r0, f0,f1,f2,f3,f4,f5)  \
+         fcpi(f, l, op, r2,r3,r4,r5,r0,r1, f0,f1,f2,f3,f4,f5)  \
+         fcpr(f, l, op, r2,r3,r4,r5,r0,r1, f0,f1,f2,f3,f4,f5)  \
+        fcprc(f, l, op, r2,r3,r4,r5,r0,r1, f0,f1,f2,f3,f4,f5)  \
+         fcpi(f, l, op, r3,r4,r5,r0,r1,r2, f0,f1,f2,f3,f4,f5)  \
+         fcpr(f, l, op, r3,r4,r5,r0,r1,r2, f0,f1,f2,f3,f4,f5)  \
+        fcprc(f, l, op, r3,r4,r5,r0,r1,r2, f0,f1,f2,f3,f4,f5)  \
+         fcpi(f, l, op, r4,r5,r0,r1,r2,r3, f0,f1,f2,f3,f4,f5)  \
+         fcpr(f, l, op, r4,r5,r0,r1,r2,r3, f0,f1,f2,f3,f4,f5)  \
+        fcprc(f, l, op, r4,r5,r0,r1,r2,r3, f0,f1,f2,f3,f4,f5)  \
+         fcpi(f, l, op, r5,r0,r1,r2,r3,r4, f0,f1,f2,f3,f4,f5)  \
+         fcpr(f, l, op, r5,r0,r1,r2,r3,r4, f0,f1,f2,f3,f4,f5)  \
+        fcprc(f, l, op, r5,r0,r1,r2,r3,r4, f0,f1,f2,f3,f4,f5)
+#endif
+#if __ia64__
+#  define xfcp(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+         ifcp(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)
+#else
+#  define xfcp(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+         ifcp(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)  \
+         ifcp(f, l, op, r0,r1,r2,r3,r4,r5, f1,f2,f3,f4,f5,f0)  \
+         ifcp(f, l, op, r0,r1,r2,r3,r4,r5, f2,f3,f4,f5,f0,f1)  \
+         ifcp(f, l, op, r0,r1,r2,r3,r4,r5, f3,f4,f5,f0,f1,f2)  \
+         ifcp(f, l, op, r0,r1,r2,r3,r4,r5, f4,f5,f0,f1,f2,f3)  \
+         ifcp(f, l, op, r0,r1,r2,r3,r4,r5, f5,f0,f1,f2,f3,f4)
+#endif
+#if __ia64__
+#  define fcmp(l, op)                                          \
+         xfcp(_f, l, op, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5)
+#else
+#  define fcmp(l, op)                                          \
+         xfcp(_f, l, op, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5)   \
+         xfcp(_d, l, op, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5)
+#endif
+
+#define imvi(l, i0, i1, i2, i3, i4, i5)                                \
+       setup()                                                 \
+       movi %IR##i0 1                                          \
+       check5(i, l, i1, i2, i3, i4, i5)
+#define imvr(l, i0, i1, i2, i3, i4, i5)                                \
+       setup()                                                 \
+       movi %IR##i1 1                                          \
+       movr %IR##i0 %IR##i1                                    \
+       check4(r, l, i2, i3, i4, i5)
+#define xmvi(l, i0, i1, i2, i3, i4, i5)                                \
+       imvi(l, i0,     i1, i2, i3, i4, i5)                     \
+       imvr(l, i0, i1, i2, i3, i4, i5)
+#if __ia64__
+#  define mvi(l)                                               \
+        xmvi(l, 0, 1, 2, 3, 4, 5)
+#else
+#  define mvi(l)                                               \
+        xmvi(l, 0, 1, 2, 3, 4, 5)                              \
+        xmvi(l, 1, 2, 3, 4, 5, 0)                              \
+        xmvi(l, 2, 3, 4, 5, 0, 1)                              \
+        xmvi(l, 3, 4, 5, 0, 1, 2)                              \
+        xmvi(l, 4, 5, 0, 1, 2, 3)                              \
+        xmvi(l, 5, 0, 1, 2, 3, 4)
+#endif
+
+#define fmvi(f, l, f0, f1, f2, f3, f4, f5)                     \
+       setup##f()                                              \
+       movi##f %FR##f0 1                                       \
+       checkf5(f, i, l, f1, f2, f3, f4, f5)
+#define fmvr(f, l, f0, f1, f2, f3, f4, f5)                     \
+       setup##f()                                              \
+       movi##f %FR##f1 1                                       \
+       movr##f %FR##f0 %FR##f1                                 \
+       checkf4(f, r, l, f2, f3, f4, f5)
+#define xmvf(f, l, f0, f1, f2, f3, f4, f5)                     \
+       fmvi(f, l, f0, f1, f2, f3, f4, f5)                      \
+       fmvr(f, l, f0, f1,      f2, f3, f4, f5)
+#if __ia64__
+#  define xxmvf(f, l)                                          \
+         xmvf(f, l, 0, 1, 2, 3, 4, 5)
+#else
+#  define xxmvf(f, l)                                          \
+          xmvf(f, l, 0, 1, 2, 3, 4, 5)                         \
+          xmvf(f, l, 1, 2, 3, 4, 5, 0)                         \
+          xmvf(f, l, 2, 3, 4, 5, 0, 1)                         \
+          xmvf(f, l, 3, 4, 5, 0, 1, 2)                         \
+          xmvf(f, l, 4, 5, 0, 1, 2, 3)                         \
+          xmvf(f, l, 5, 0, 1, 2, 3, 4)
+#endif
+#define   mvf(l)                                               \
+       xxmvf(_f, l)                                            \
+       xxmvf(_d, l)
+
+#define f2fr(f, l, op, f0, f1, f2, f3, f4, f5)                 \
+       setup##f()                                              \
+       movi##f %FR##f0 1                                       \
+       op %FR##f1 %FR##f0                                      \
+       checkf4(f, rr, l, f2, f3, f4, f5)
+#define f2frc(f, l, op, f0, f1, f2, f3, f4, f5)                        \
+       setup##f()                                              \
+       movi##f %FR##f0 1                                       \
+       op %FR##f0 %FR##f0                                      \
+       checkf5(f, rc, l, f1, f2, f3, f4, f5)
+#define  xf2f(f, l, op, f0, f1, f2, f3, f4, f5)                        \
+        f2fr(f, l, op, f0, f1, f2, f3, f4, f5)                 \
+       f2frc(f, l, op, f0,             f1, f2, f3, f4, f5)
+#if __ia64__
+#  define f2f(l, f, op)                                                \
+        xf2f(f, l, op, 0, 1, 2, 3, 4, 5)
+#else
+#  define f2f(l, f, op)                                                \
+        xf2f(f, l, op, 0, 1, 2, 3, 4, 5)                       \
+        xf2f(f, l, op, 1, 2, 3, 4, 5, 0)                       \
+        xf2f(f, l, op, 2, 3, 4, 5, 0, 1)                       \
+        xf2f(f, l, op, 3, 4, 5, 0, 1, 2)                       \
+        xf2f(f, l, op, 4, 5, 0, 1, 2, 3)                       \
+        xf2f(f, l, op, 5, 0, 1, 2, 3, 4)
+#endif
+
+#define f2ir(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)   \
+       setup()                                                 \
+       setup##f()                                              \
+       movi##f %FR##f0 1                                       \
+       op##f %IR##r0 %FR##f0                                   \
+       check5(r##f##f0, l, r1, r2, r3, r4, r5)                 \
+       checkf5(f, i##r0, l, f1, f2, f3, f4, f5)
+#if __ia64__
+#  define if2i(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+         f2ir(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)
+#  define xf2i(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+         if2i(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)
+#else
+#  define if2i(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+         f2ir(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)  \
+         f2ir(f, l, op, r1,r2,r3,r4,r5,r0, f0,f1,f2,f3,f4,f5)  \
+         f2ir(f, l, op, r2,r3,r4,r5,r0,r1, f0,f1,f2,f3,f4,f5)  \
+         f2ir(f, l, op, r3,r4,r5,r0,r1,r2, f0,f1,f2,f3,f4,f5)  \
+         f2ir(f, l, op, r4,r5,r0,r1,r2,r3, f0,f1,f2,f3,f4,f5)  \
+         f2ir(f, l, op, r5,r0,r1,r2,r3,r4, f0,f1,f2,f3,f4,f5)
+#  define xf2i(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+         if2i(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)  \
+         if2i(f, l, op, r0,r1,r2,r3,r4,r5, f1,f2,f3,f4,f5,f0)  \
+         if2i(f, l, op, r0,r1,r2,r3,r4,r5, f2,f3,f4,f5,f0,f1)  \
+         if2i(f, l, op, r0,r1,r2,r3,r4,r5, f3,f4,f5,f0,f1,f2)  \
+         if2i(f, l, op, r0,r1,r2,r3,r4,r5, f4,f5,f0,f1,f2,f3)  \
+         if2i(f, l, op, r0,r1,r2,r3,r4,r5, f5,f0,f1,f2,f3,f4)
+#endif
+#define f2i(l, op)                                             \
+       xf2i(_f, l, op, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5)     \
+       xf2i(_d, l, op, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5)
+
+#define i2fr(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)   \
+       setup()                                                 \
+       setup##f()                                              \
+       movi %IR##r0 1                                          \
+       op##f %FR##f0 %IR##r0                                   \
+       check5(r##f##f0, l, r1, r2, r3, r4, r5)                 \
+       checkf5(f, i##r0, l, f1, f2, f3, f4, f5)
+#if __ia64__
+#  define ii2f(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+         i2fr(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)
+#  define xi2f(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+         ii2f(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)
+#else
+#  define ii2f(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+         i2fr(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)  \
+         i2fr(f, l, op, r1,r2,r3,r4,r5,r0, f0,f1,f2,f3,f4,f5)  \
+         i2fr(f, l, op, r2,r3,r4,r5,r0,r1, f0,f1,f2,f3,f4,f5)  \
+         i2fr(f, l, op, r3,r4,r5,r0,r1,r2, f0,f1,f2,f3,f4,f5)  \
+         i2fr(f, l, op, r4,r5,r0,r1,r2,r3, f0,f1,f2,f3,f4,f5)  \
+         i2fr(f, l, op, r5,r0,r1,r2,r3,r4, f0,f1,f2,f3,f4,f5)
+#  define xi2f(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+         ii2f(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)  \
+         ii2f(f, l, op, r0,r1,r2,r3,r4,r5, f1,f2,f3,f4,f5,f0)  \
+         ii2f(f, l, op, r0,r1,r2,r3,r4,r5, f2,f3,f4,f5,f0,f1)  \
+         ii2f(f, l, op, r0,r1,r2,r3,r4,r5, f3,f4,f5,f0,f1,f2)  \
+         ii2f(f, l, op, r0,r1,r2,r3,r4,r5, f4,f5,f0,f1,f2,f3)  \
+         ii2f(f, l, op, r0,r1,r2,r3,r4,r5, f5,f0,f1,f2,f3,f4)
+#endif
+#define i2f(l, op)                                             \
+       xi2f(_f, l, op, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5)     \
+       xi2f(_d, l, op, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5)
+
+#define off_c          1
+#define off_uc         off_c
+#define off_s          2
+#define off_us         off_s
+#define off_i          4
+#define off_ui         off_i
+#define off_l          8
+#define off_f          4
+#define off_d          8
+
+#define ildi(i, l, r0, r1, r2, r3, r4, r5)                     \
+       setup()                                                 \
+       ldi##i %IR##r0 buff                                     \
+       check5(ldi##i, l, r1, r2, r3, r4, r5)
+#define ildr(i, l, r0, r1, r2, r3, r4, r5)                     \
+       setup()                                                 \
+       movi %IR##r1 buff                                       \
+       ldr##i %IR##r0 %IR##r1                                  \
+       check4(ldr##i, l, r2, r3, r4, r5)
+#define ildr0(i, l, r0, r1, r2, r3, r4, r5)                    \
+       setup()                                                 \
+       movi %IR##r0 buff                                       \
+       ldr##i %IR##r0 %IR##r0                                  \
+       check5(ldr##i, l, r1, r2, r3, r4, r5)
+#define ildxi(i, l, r0, r1, r2, r3, r4, r5)                    \
+       setup()                                                 \
+       movi %IR##r1 buff                                       \
+       ldxi##i %IR##r0 %IR##r1 off##i                          \
+       check4(ldxi##i, l, r2, r3, r4, r5)
+#define ildxr(i, l, r0, r1, r2, r3, r4, r5)                    \
+       setup()                                                 \
+       movi %IR##r1 buff                                       \
+       movi %IR##r2 off##i                                     \
+       ldxr##i %IR##r0 %IR##r1 %IR##r2                         \
+       check3(ldxr##i, l, r3, r4, r5)
+#define ildxr0(i, l, r0, r1, r2, r3, r4, r5)                   \
+       setup()                                                 \
+       movi %IR##r1 buff                                       \
+       movi %IR##r0 off##i                                     \
+       ldxr##i %IR##r0 %IR##r1 %IR##r0                         \
+       check4(ldxr0##i, l, r2, r3, r4, r5)
+#define ildxr1(i, l, r0, r1, r2, r3, r4, r5)                   \
+       setup()                                                 \
+       movi %IR##r0 buff                                       \
+       movi %IR##r1 off##i                                     \
+       ldxr##i %IR##r0 %IR##r0 %IR##r1                         \
+       check4(ldxr1##i, l, r2, r3, r4, r5)
+#define  xxldi(i, l, r0, r1, r2, r3, r4, r5)                   \
+         ildi(i, l, r0, r1, r2, r3, r4, r5)                    \
+         ildr(i, l, r0, r1, r2, r3, r4, r5)                    \
+        ildr0(i, l, r0, r1, r2, r3, r4, r5)                    \
+        ildxi(i, l, r0, r1, r2, r3, r4, r5)                    \
+        ildxr(i, l, r0, r1, r2, r3, r4, r5)                    \
+       ildxr0(i, l, r0, r1, r2, r3, r4, r5)                    \
+       ildxr1(i, l, r0, r1, r2, r3, r4, r5)
+#if __WORDSIZE == 32
+#define xxxldi(l, r0, r1, r2, r3, r4, r5)
+#else
+#define xxxldi(l, r0, r1, r2, r3, r4, r5)                      \
+        xxldi(_ui, l, r0, r1, r2, r3, r4, r5)                  \
+        xxldi( _l, l, r0, r1, r2, r3, r4, r5)
+#endif
+#define   xldi(l, r0, r1, r2, r3, r4, r5)                      \
+        xxldi( _c, l, r0, r1, r2, r3, r4, r5)                  \
+        xxldi(_uc, l, r0, r1, r2, r3, r4, r5)                  \
+        xxldi( _s, l, r0, r1, r2, r3, r4, r5)                  \
+        xxldi(_us, l, r0, r1, r2, r3, r4, r5)                  \
+        xxldi( _i, l, r0, r1, r2, r3, r4, r5)                  \
+       xxxldi(l, r0, r1, r2, r3, r4, r5)
+#if __ia64__
+#  define ldi(l)                                               \
+        xldi(l, 0, 1, 2, 3, 4, 5)
+#else
+#  define ldi(l)                                               \
+        xldi(l, 0, 1, 2, 3, 4, 5)                              \
+        xldi(l, 1, 2, 3, 4, 5, 0)                              \
+        xldi(l, 2, 3, 4, 5, 0, 1)                              \
+        xldi(l, 3, 4, 5, 0, 1, 2)                              \
+        xldi(l, 4, 5, 0, 1, 2, 3)                              \
+        xldi(l, 5, 0, 1, 2, 3, 4)
+#endif
+
+#define fldi(f, l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)       \
+       setup()                                                 \
+       setup##f()                                              \
+       ldi##f %FR##f0 buff                                     \
+       check6(ldi##f##r0##f0, l, r0, r1, r2, r3, r4, r5)       \
+       checkf5(f, ldi##r0##f0, l, f1, f2, f3, f4, f5)
+#define fldr(f, l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)       \
+       setup()                                                 \
+       setup##f()                                              \
+       movi %IR##r0 buff                                       \
+       ldr##f %FR##f0 %IR##r0                                  \
+       check5(ldr##f##r0##f0, l, r1, r2, r3, r4, r5)           \
+       checkf5(f, ldr##r0##f0, l, f1, f2, f3, f4, f5)
+#define fldxi(f, l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)      \
+       setup()                                                 \
+       setup##f()                                              \
+       movi %IR##r0 buff                                       \
+       ldxi##f %FR##f0 %IR##r0 off##f                          \
+       check5(ldxi##f##r0##f0, l, r1, r2, r3, r4, r5)          \
+       checkf5(f, ldxi##r0##f0, l, f1, f2, f3, f4, f5)
+#define fldxr(f, l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)      \
+       setup()                                                 \
+       setup##f()                                              \
+       movi %IR##r0 buff                                       \
+       movi %IR##r1 off##f                                     \
+       ldxr##f %FR##f0 %IR##r0 %IR##r1                         \
+       check4(ldxr##f##r0##f0, l, r2, r3, r4, r5)              \
+       checkf5(f, ldxr##r0##f0, l, f1, f2, f3, f4, f5)
+#define          xldf(f, l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)      \
+       fldi(f, l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)        \
+       fldr(f, l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)        \
+       fldxi(f, l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)       \
+       fldxr(f, l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)
+#define         xxldf(l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)         \
+         xldf(_f, l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)     \
+         xldf(_d, l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)
+#if __ia64__
+#  define ixldf(l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)       \
+         xxldf(l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)
+#else
+#  define fxldf(l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)       \
+         xxldf(l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)        \
+         xxldf(l, r0,r1,r2,r3,r4,r5, f1,f2,f3,f4,f5,f0)        \
+         xxldf(l, r0,r1,r2,r3,r4,r5, f2,f3,f4,f5,f0,f1)        \
+         xxldf(l, r0,r1,r2,r3,r4,r5, f3,f4,f5,f0,f1,f2)        \
+         xxldf(l, r0,r1,r2,r3,r4,r5, f4,f5,f0,f1,f2,f3)        \
+         xxldf(l, r0,r1,r2,r3,r4,r5, f5,f0,f1,f2,f3,f4)
+#  define ixldf(l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)       \
+         fxldf(l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)        \
+         fxldf(l, r1,r2,r3,r4,r5,r0, f0,f1,f2,f3,f4,f5)        \
+         fxldf(l, r2,r3,r4,r5,r0,r1, f0,f1,f2,f3,f4,f5)        \
+         fxldf(l, r3,r4,r5,r0,r1,r2, f0,f1,f2,f3,f4,f5)        \
+         fxldf(l, r4,r5,r0,r1,r2,r3, f0,f1,f2,f3,f4,f5)        \
+         fxldf(l, r5,r0,r1,r2,r3,r4, f0,f1,f2,f3,f4,f5)
+#endif
+#define   ldf(l)                                               \
+       ixldf(l, 0,1,2,3,4,5, 0,1,2,3,4,5)
+
+#define isti(i, l, r0, r1, r2, r3, r4, r5)                     \
+       setup()                                                 \
+       sti##i buff %IR##r0                                     \
+       check5(sti##i, l, r1, r2, r3, r4, r5)
+#define istr(i, l, r0, r1, r2, r3, r4, r5)                     \
+       setup()                                                 \
+       movi %IR##r1 buff                                       \
+       str##i %IR##r1 %IR##r0                                  \
+       check4(str##i, l, r2, r3, r4, r5)
+#define istr0(i, l, r0, r1, r2, r3, r4, r5)                    \
+       setup()                                                 \
+       movi %IR##r1 buff                                       \
+       str##i %IR##r1 %IR##r0                                  \
+       check4(str0##i, l, r2, r3, r4, r5)
+#define istxi(i, l, r0, r1, r2, r3, r4, r5)                    \
+       setup()                                                 \
+       movi %IR##r1 buff                                       \
+       stxi##i off##i %IR##r1 %IR##r0                          \
+       check4(stxi##i, l, r2, r3, r4, r5)
+#define istxr(i, l, r0, r1, r2, r3, r4, r5)                    \
+       setup()                                                 \
+       movi %IR##r1 buff                                       \
+       movi %IR##r2 off##i                                     \
+       stxr##i %IR##r2 %IR##r1 %IR##r0                         \
+       check3(stxr##i, l, r3, r4, r5)
+#define istxr0(i, l, r0, r1, r2, r3, r4, r5)                   \
+       setup()                                                 \
+       movi %IR##r1 buff                                       \
+       movi %IR##r0 off##i                                     \
+       stxr##i %IR##r0 %IR##r1 %IR##r0                         \
+       check4(stxr0##i, l, r2, r3, r4, r5)
+#define istxr1(i, l, r0, r1, r2, r3, r4, r5)                   \
+       setup()                                                 \
+       movi %IR##r0 buff                                       \
+       movi %IR##r1 off##i                                     \
+       stxr##i %IR##r1 %IR##r0 %IR##r0                         \
+       check4(stxr1##i, l, r2, r3, r4, r5)
+#define  xxsti(i, l, r0, r1, r2, r3, r4, r5)                   \
+         isti(i, l, r0, r1, r2, r3, r4, r5)                    \
+         istr(i, l, r0, r1, r2, r3, r4, r5)                    \
+        istr0(i, l, r0, r1, r2, r3, r4, r5)                    \
+        istxi(i, l, r0, r1, r2, r3, r4, r5)                    \
+        istxr(i, l, r0, r1, r2, r3, r4, r5)                    \
+       istxr0(i, l, r0, r1, r2, r3, r4, r5)                    \
+       istxr1(i, l, r0, r1, r2, r3, r4, r5)
+#if __WORDSIZE == 32
+#define xxxsti(l, r0, r1, r2, r3, r4, r5)
+#else
+#define xxxsti(l, r0, r1, r2, r3, r4, r5)                      \
+        xxsti( _l, l, r0, r1, r2, r3, r4, r5)
+#endif
+#define   xsti(l, r0, r1, r2, r3, r4, r5)                      \
+        xxsti( _c, l, r0, r1, r2, r3, r4, r5)                  \
+        xxsti( _s, l, r0, r1, r2, r3, r4, r5)                  \
+        xxsti( _i, l, r0, r1, r2, r3, r4, r5)                  \
+       xxxsti(l, r0, r1, r2, r3, r4, r5)
+#if __ia64__
+#  define sti(l)                                               \
+        xsti(l, 0, 1, 2, 3, 4, 5)
+#else
+#  define sti(l)                                               \
+        xsti(l, 0, 1, 2, 3, 4, 5)                              \
+        xsti(l, 1, 2, 3, 4, 5, 0)                              \
+        xsti(l, 2, 3, 4, 5, 0, 1)                              \
+        xsti(l, 3, 4, 5, 0, 1, 2)                              \
+        xsti(l, 4, 5, 0, 1, 2, 3)                              \
+        xsti(l, 5, 0, 1, 2, 3, 4)
+#endif
+
+#define fsti(f, l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)       \
+       setup()                                                 \
+       setup##f()                                              \
+       sti##f buff %FR##f0                                     \
+       check6(sti##f##r0##f0, l, r0, r1, r2, r3, r4, r5)       \
+       checkf5(f, sti##r0##f0, l, f1, f2, f3, f4, f5)
+#define fstr(f, l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)       \
+       setup()                                                 \
+       setup##f()                                              \
+       movi %IR##r0 buff                                       \
+       str##f %IR##r0 %FR##f0                                  \
+       check5(str##f##r0##f0, l, r1, r2, r3, r4, r5)           \
+       checkf5(f, str##r0##f0, l, f1, f2, f3, f4, f5)
+#define fstxi(f, l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)      \
+       setup()                                                 \
+       setup##f()                                              \
+       movi %IR##r0 buff                                       \
+       stxi##f off##f %IR##r0 %FR##f0                          \
+       check5(stxi##f##r0##f0, l, r1, r2, r3, r4, r5)          \
+       checkf5(f, stxi##r0##f0, l, f1, f2, f3, f4, f5)
+#define fstxr(f, l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)      \
+       setup()                                                 \
+       setup##f()                                              \
+       movi %IR##r0 buff                                       \
+       movi %IR##r1 off##f                                     \
+       stxr##f %IR##r1 %IR##r0 %FR##f0                         \
+       check4(stxr##f##r0##f0, l, r2, r3, r4, r5)              \
+       checkf5(f, stxr##r0##f0, l, f1, f2, f3, f4, f5)
+#define          xstf(f, l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)      \
+       fsti(f, l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)        \
+       fstr(f, l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)        \
+       fstxi(f, l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)       \
+       fstxr(f, l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)
+#define         xxstf(l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)         \
+         xstf(_f, l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)     \
+         xstf(_d, l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)
+#if __ia64__
+#  define ixstf(l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)       \
+         xxstf(l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)
+#else
+#  define fxstf(l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)       \
+         xxstf(l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)        \
+         xxstf(l, r0,r1,r2,r3,r4,r5, f1,f2,f3,f4,f5,f0)        \
+         xxstf(l, r0,r1,r2,r3,r4,r5, f2,f3,f4,f5,f0,f1)        \
+         xxstf(l, r0,r1,r2,r3,r4,r5, f3,f4,f5,f0,f1,f2)        \
+         xxstf(l, r0,r1,r2,r3,r4,r5, f4,f5,f0,f1,f2,f3)        \
+         xxstf(l, r0,r1,r2,r3,r4,r5, f5,f0,f1,f2,f3,f4)
+# define ixstf(l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)                \
+         fxstf(l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)        \
+         fxstf(l, r1,r2,r3,r4,r5,r0, f0,f1,f2,f3,f4,f5)        \
+         fxstf(l, r2,r3,r4,r5,r0,r1, f0,f1,f2,f3,f4,f5)        \
+         fxstf(l, r3,r4,r5,r0,r1,r2, f0,f1,f2,f3,f4,f5)        \
+         fxstf(l, r4,r5,r0,r1,r2,r3, f0,f1,f2,f3,f4,f5)        \
+         fxstf(l, r5,r0,r1,r2,r3,r4, f0,f1,f2,f3,f4,f5)
+#endif
+#define   stf(l)                                               \
+       ixstf(l, 0,1,2,3,4,5, 0,1,2,3,4,5)
+
+/* Need a valid jump or simple optimization will remove it */
+#define bri(l, op, u, il, ir, r0, r1, r2, r3, r4, r5)          \
+       setup()                                                 \
+       movi %IR##r0 il                                         \
+       b##op##i##u i##l##op##r0 %IR##r0 ir                     \
+       calli @abort                                            \
+i##l##op##r0:                                                  \
+       check5(i, l, r1, r2, r3, r4, r5)
+#define brr(l, op, u, il, ir, r0, r1, r2, r3, r4, r5)          \
+       setup()                                                 \
+       movi %IR##r0 il                                         \
+       movi %IR##r1 ir                                         \
+       b##op##r##u r##l##op##r0 %IR##r0 %IR##r1                \
+       calli @abort                                            \
+r##l##op##r0:                                                  \
+       check4(r, l, r2, r3, r4, r5)
+#define  xjmpi(l, op, u, il, ir, r0, r1, r2, r3, r4, r5)       \
+          bri(l, op, u, il, ir, r0, r1, r2, r3, r4, r5)        \
+          brr(l, op, u, il, ir, r0, r1, r2, r3, r4, r5)
+#if __ia64__
+#  define jmpi(l, op, u, il, ir)                               \
+        xjmpi(l, op, u, il, ir, 0, 1, 2, 3, 4, 5)
+#else
+#  define jmpi(l, op, u, il, ir)                               \
+        xjmpi(l, op, u, il, ir, 0, 1, 2, 3, 4, 5)              \
+        xjmpi(l, op, u, il, ir, 1, 2, 3, 4, 5, 0)              \
+        xjmpi(l, op, u, il, ir, 2, 3, 4, 5, 0, 1)              \
+        xjmpi(l, op, u, il, ir, 3, 4, 5, 0, 1, 2)              \
+        xjmpi(l, op, u, il, ir, 4, 5, 0, 1, 2, 3)              \
+        xjmpi(l, op, u, il, ir, 5, 0, 1, 2, 3, 4)
+#endif
+
+#define bfi(f, l, op, il, ir, f0, f1, f2, f3, f4, f5)          \
+       setup##f()                                              \
+       movi##f %FR##f0 il                                      \
+       b##op##i##f i##l##op##f##f0 %FR##f0 ir                  \
+       calli @abort                                            \
+i##l##op##f##f0:                                               \
+       checkf5(f, i, l, f1, f2, f3, f4, f5)
+#define bff(f, l, op, il, ir, f0, f1, f2, f3, f4, f5)          \
+       setup##f()                                              \
+       movi##f %FR##f0 il                                      \
+       movi##f %FR##f1 ir                                      \
+       b##op##r##f r##l##op##f##f0 %FR##f0 %FR##f1             \
+       calli @abort                                            \
+r##l##op##f##f0:                                               \
+       checkf4(f, r, l, f2, f3, f4, f5)
+#define  xjmpf(f, l, op, il, ir, f0, f1, f2, f3, f4, f5)       \
+          bfi(f, l, op, il, ir, f0, f1, f2, f3, f4, f5)        \
+          bff(f, l, op, il, ir, f0, f1, f2, f3, f4, f5)
+#define xxjmpf(l, op, il, ir, f0, f1, f2, f3, f4, f5)          \
+        xjmpf(_f, l, op, il, ir, f0, f1, f2, f3, f4, f5)       \
+        xjmpf(_d, l, op, il, ir, f0, f1, f2, f3, f4, f5)
+#if __ia64__
+#  define jmpf(l, op, il, ir)                                  \
+       xxjmpf(l, op, il, ir, 0, 1, 2, 3, 4, 5)
+#else
+#  define jmpf(l, op, il, ir)                                  \
+       xxjmpf(l, op, il, ir, 0, 1, 2, 3, 4, 5)                 \
+       xxjmpf(l, op, il, ir, 1, 2, 3, 4, 5, 0)                 \
+       xxjmpf(l, op, il, ir, 2, 3, 4, 5, 0, 1)                 \
+       xxjmpf(l, op, il, ir, 3, 4, 5, 0, 1, 2)                 \
+       xxjmpf(l, op, il, ir, 4, 5, 0, 1, 2, 3)                 \
+       xxjmpf(l, op, il, ir, 5, 0, 1, 2, 3, 4)
+#endif
+
+.data  32
+buff:
+.size  16
+ok:
+.c     "ok\n"
+
+.code
+       prolog
+
+         alu(__LINE__, add)
+        alux(__LINE__, add)
+         fop(__LINE__, add)
+         alu(__LINE__, sub)
+        alux(__LINE__, sub)
+         fop(__LINE__, sub)
+         alu(__LINE__, mul)
+         fop(__LINE__, mul)
+         alu(__LINE__, div)
+       alu_u(__LINE__, div)
+         fop(__LINE__, div)
+         alu(__LINE__, rem)
+       alu_u(__LINE__, rem)
+         alu(__LINE__, and)
+         alu(__LINE__, or)
+         alu(__LINE__, xor)
+         alu(__LINE__, lsh)
+         alu(__LINE__, rsh)
+       alu_u(__LINE__, rsh)
+         uni(__LINE__, negr)
+         unf(__LINE__, negr)
+         uni(__LINE__, comr)
+         unf(__LINE__, absr)
+         unf(__LINE__, sqrtr)
+         alu(__LINE__, lt)
+       alu_u(__LINE__, lt)
+        fcmp(__LINE__, lt)
+         alu(__LINE__, le)
+       alu_u(__LINE__, le)
+        fcmp(__LINE__, le)
+         alu(__LINE__, eq)
+        fcmp(__LINE__, eq)
+         alu(__LINE__, ge)
+       alu_u(__LINE__, ge)
+        fcmp(__LINE__, ge)
+         alu(__LINE__, gt)
+       alu_u(__LINE__, gt)
+        fcmp(__LINE__, gt)
+         alu(__LINE__, ne)
+        fcmp(__LINE__, ne)
+        fcmp(__LINE__, unlt)
+        fcmp(__LINE__, unle)
+        fcmp(__LINE__, uneq)
+        fcmp(__LINE__, unge)
+        fcmp(__LINE__, ungt)
+        fcmp(__LINE__, ltgt)
+        fcmp(__LINE__, ord)
+        fcmp(__LINE__, unord)
+         mvi(__LINE__)
+         mvf(__LINE__)
+         uni(__LINE__, extr_c)
+         uni(__LINE__, extr_uc)
+         uni(__LINE__, extr_s)
+         uni(__LINE__, extr_us)
+#if __WORDSIZE == 64
+         uni(__LINE__, extr_ui)
+#endif
+         uni(__LINE__, htonr)
+         f2f(__LINE__, _f, extr_d_f)
+         f2f(__LINE__, _d, extr_f_d)
+         f2i(__LINE__, truncr)
+         i2f(__LINE__, extr)
+         ldi(__LINE__)
+         ldf(__LINE__)
+         sti(__LINE__)
+         stf(__LINE__)
+        jmpi(__LINE__, lt,   ,  0,  1)
+        jmpi(__LINE__, lt, _u,  0,  1)
+        jmpf(__LINE__, lt,      0,  1)
+        jmpi(__LINE__, le,   ,  1,  1)
+        jmpi(__LINE__, le, _u,  1,  1)
+        jmpf(__LINE__, le,      1,  1)
+        jmpi(__LINE__, eq,   , -1, -1)
+        jmpf(__LINE__, eq,     -1, -1)
+        jmpi(__LINE__, ge,   ,  2,  2)
+        jmpi(__LINE__, ge, _u,  2,  2)
+        jmpf(__LINE__, ge,      2,  2)
+        jmpi(__LINE__, gt,   ,  2,  1)
+        jmpi(__LINE__, gt, _u,  2,  1)
+        jmpf(__LINE__, gt,      2,  1)
+        jmpi(__LINE__, ne,   ,  3,  2)
+        jmpf(__LINE__, ne,      3,  2)
+        jmpi(__LINE__, ms, , 1, 1)
+        jmpi(__LINE__, mc, , 1, 2)
+#if __WORDSIZE == 32
+#  define ix7f         0x7fffffff
+#  define ix80         0x80000000
+#  define ixff         0xffffffff
+#else
+#  define ix7f         0x7fffffffffffffff
+#  define ix80         0x8000000000000000
+#  define ixff         0xffffffffffffffff
+#endif
+        jmpi(__LINE__, oadd,   , ix7f, 1)
+        jmpi(__LINE__, oadd, _u, ixff, 1)
+        jmpi(__LINE__, xadd,   , ix80, 1)
+        jmpi(__LINE__, xadd, _u, ix7f, 1)
+        jmpi(__LINE__, osub,   , ix80, 1)
+        jmpi(__LINE__, osub, _u,    0, 1)
+        jmpi(__LINE__, xsub,   , ix7f, 1)
+        jmpi(__LINE__, xsub, _u, ix80, 1)
+        jmpf(__LINE__, unlt,        0, 1)
+        jmpf(__LINE__, unle,        1, 1)
+        jmpf(__LINE__, uneq,        2, 2)
+        jmpf(__LINE__, unge,        3, 3)
+        jmpf(__LINE__, ungt,        4, 3)
+        jmpf(__LINE__, ltgt,        5, 4)
+        jmpf(__LINE__, ord,         0, 0)
+        jmpf(__LINE__, unord,       0, $(0.0 / 0.0))
+
+       prepare
+               pushargi ok
+               ellipsis
+       finishi @printf
+
+       ret
+       epilog
diff --git a/deps/lightning/check/ctramp.c b/deps/lightning/check/ctramp.c
new file mode 100644 (file)
index 0000000..941c7c1
--- /dev/null
@@ -0,0 +1,123 @@
+#include <lightning.h>
+#include <stdio.h>
+
+jit_state_t            *_jit;
+long                    top;
+long                    stk[1024];
+
+int
+main(int argc, char *argv[])
+{
+    void                *address;
+    void               (*call)(void*);
+    jit_state_t                 *frame_jit, *tramp_jit;
+    jit_node_t          *arg, *done, *xfibs, *out, *ret1, *ret2;
+
+    init_jit(argv[0]);
+    _jit = frame_jit = jit_new_state();
+    jit_name("main");
+    jit_prolog();
+    jit_frame(64);
+
+    arg = jit_arg();
+    jit_getarg(JIT_R1, arg);
+
+    /* Initialize language stack */
+    jit_movi(JIT_R0, (jit_word_t)stk);
+    jit_sti(&top, JIT_R0);
+
+    /* return address */
+    done = jit_movi(JIT_R0, 0);
+    /* argument */
+    jit_movi(JIT_V0, 32);
+    /* jump to code */
+    jit_jmpr(JIT_R1);
+    jit_patch(done);
+
+    jit_prepare();
+    jit_pushargi((jit_word_t)"xfibs(%d) = %d\n");
+    jit_ellipsis();
+    jit_pushargi(32);
+    jit_pushargr(JIT_V0);
+    jit_finishi(printf);
+    jit_ret();
+    jit_epilog();
+    call = jit_emit();
+    jit_clear_state();
+
+#define SIZE                           sizeof(jit_word_t)
+    _jit = tramp_jit = jit_new_state();
+    jit_name("xfibs");
+    xfibs = jit_label();
+    jit_prolog();
+    jit_tramp(64);
+    out = jit_blti(JIT_V0, 2);
+    jit_subi(JIT_V1, JIT_V0, 1);       /* V1 = N-1 */
+    jit_subi(JIT_V2, JIT_V0, 2);       /* V1 = N-2 */
+
+    /* save return address */
+    jit_ldi(JIT_R1, &top);
+    jit_stxi(SIZE * 0, JIT_R1, JIT_R0);
+    /* save operands */
+    jit_stxi(SIZE * 1, JIT_R1, JIT_V0);
+    jit_stxi(SIZE * 2, JIT_R1, JIT_V1);
+    jit_stxi(SIZE * 3, JIT_R1, JIT_V2);
+    /* adjust "language" stack */
+    jit_addi(JIT_R1, JIT_R1, SIZE * 4);
+    jit_sti(&top, JIT_R1);
+
+    /* return address */
+    ret1 = jit_movi(JIT_R0, 0);
+    /* argument */
+    jit_movr(JIT_V0, JIT_V1);
+    /* indirect goto */
+    jit_patch_at(jit_jmpi(), xfibs);
+    jit_patch(ret1);
+    jit_movr(JIT_V1, JIT_V0);          /* V1 = rfibs(N-1) */
+    /* save V1 */
+    jit_ldi(JIT_R1, &top);
+    jit_stxi(-SIZE * 2, JIT_R1, JIT_V1);
+
+    /* reload V2 */
+    jit_ldxi(JIT_V2, JIT_R1, -SIZE * 1);
+
+    /* return address */
+    ret2 = jit_movi(JIT_R0, 0);
+    /* argument */
+    jit_movr(JIT_V0, JIT_V2);
+    /* indirect goto */
+    jit_patch_at(jit_jmpi(), xfibs);
+    jit_patch(ret2);
+    jit_movr(JIT_V2, JIT_V0);          /* V2 = rfibs(N-2) */
+
+    /* reload return address */
+    jit_ldi(JIT_R1, &top);
+    jit_subi(JIT_R1, JIT_R1, SIZE * 4);
+    jit_ldxi(JIT_R0, JIT_R1, SIZE * 0);
+    /* reload operands */
+    jit_ldxi(JIT_V0, JIT_R1, SIZE * 1);
+    jit_ldxi(JIT_V1, JIT_R1, SIZE * 2);
+    /* V2 already loaded */
+    /* update "language" stack */
+    jit_sti(&top, JIT_R1);
+
+    jit_addi(JIT_V1, JIT_V1, 1);
+    jit_addr(JIT_V0, JIT_V1, JIT_V2);
+    jit_jmpr(JIT_R0);
+
+    jit_patch(out);
+    jit_movi(JIT_V0, 1);
+    jit_jmpr(JIT_R0);
+    jit_epilog();
+
+    address = jit_emit();
+    jit_clear_state();
+
+    (*call)(address);
+
+    jit_destroy_state();
+
+    _jit = frame_jit;
+    jit_destroy_state();
+    return 0;
+}
diff --git a/deps/lightning/check/cva_list.c b/deps/lightning/check/cva_list.c
new file mode 100644 (file)
index 0000000..b0e668c
--- /dev/null
@@ -0,0 +1,1187 @@
+#include <lightning.h>
+#include <stdarg.h>
+#include <stdio.h>
+
+#define W              jit_word_t
+#define D              jit_float64_t
+#define VASTART(A)                                                     \
+    va_list            ap;                                             \
+    va_start(ap, A)
+#define VARG2()                a2 = va_arg(ap, jit_word_t);    VARG3()
+#define VARG3()                a3 = va_arg(ap, jit_word_t);    VARG4()
+#define VARG4()                a4 = va_arg(ap, jit_word_t);    VARG5()
+#define VARG5()                a5 = va_arg(ap, jit_word_t);    VARG6()
+#define VARG6()                a6 = va_arg(ap, jit_word_t);    VARG7()
+#define VARG7()                a7 = va_arg(ap, jit_word_t);    VARG8()
+#define VARG8()                a8 = va_arg(ap, jit_word_t);    VARG9()
+#define VARG9()                a9 = va_arg(ap, jit_word_t);    VARG10()
+#define VARG10()       a10 = va_arg(ap, jit_word_t);   va_end(ap)
+
+#define VARGD2()       a2 = va_arg(ap, jit_float64_t); VARGD3()
+#define VARGD3()       a3 = va_arg(ap, jit_float64_t); VARGD4()
+#define VARGD4()       a4 = va_arg(ap, jit_float64_t); VARGD5()
+#define VARGD5()       a5 = va_arg(ap, jit_float64_t); VARGD6()
+#define VARGD6()       a6 = va_arg(ap, jit_float64_t); VARGD7()
+#define VARGD7()       a7 = va_arg(ap, jit_float64_t); VARGD8()
+#define VARGD8()       a8 = va_arg(ap, jit_float64_t); VARGD9()
+#define VARGD9()       a9 = va_arg(ap, jit_float64_t); VARGD10()
+#define VARGD10()      a10 = va_arg(ap, jit_float64_t);va_end(ap)
+
+#define IDVARG2()      a2 = va_arg(ap, jit_float64_t); IDVARG3()
+#define IDVARG3()      a3 = va_arg(ap, jit_word_t);    IDVARG4()
+#define IDVARG4()      a4 = va_arg(ap, jit_float64_t); IDVARG5()
+#define IDVARG5()      a5 = va_arg(ap, jit_word_t);    IDVARG6()
+#define IDVARG6()      a6 = va_arg(ap, jit_float64_t); IDVARG7()
+#define IDVARG7()      a7 = va_arg(ap, jit_word_t);    IDVARG8()
+#define IDVARG8()      a8 = va_arg(ap, jit_float64_t); IDVARG9()
+#define IDVARG9()      a9 = va_arg(ap, jit_word_t);    IDVARG10()
+#define IDVARG10()     a10 = va_arg(ap, jit_float64_t);va_end(ap)
+
+#define DIVARG2()      a2 = va_arg(ap, jit_word_t);    DIVARG3()
+#define DIVARG3()      a3 = va_arg(ap, jit_float64_t); DIVARG4()
+#define DIVARG4()      a4 = va_arg(ap, jit_word_t);    DIVARG5()
+#define DIVARG5()      a5 = va_arg(ap, jit_float64_t); DIVARG6()
+#define DIVARG6()      a6 = va_arg(ap, jit_word_t);    DIVARG7()
+#define DIVARG7()      a7 = va_arg(ap, jit_float64_t); DIVARG8()
+#define DIVARG8()      a8 = va_arg(ap, jit_word_t);    DIVARG9()
+#define DIVARG9()      a9 = va_arg(ap, jit_float64_t); DIVARG10()
+#define DIVARG10()     a10 = va_arg(ap, jit_word_t);   va_end(ap)
+
+#define CHECK()                                                                \
+    do {                                                               \
+       if (a1 != 1 || a2 != 2 || a3 != 3 || a4 != 4 || a5 != 5 ||      \
+           a6 != 6 || a7 != 7 || a8 != 8 || a9 != 9 || a10 != 10)      \
+           abort();                                                    \
+    } while (0)
+
+
+void i_iiiiiiiii(W a1, ...)
+{
+    W          a2, a3, a4, a5, a6, a7, a8, a9, a10;
+    VASTART(a1);
+    VARG2();
+    CHECK();
+}
+
+void ii_iiiiiiii(W a1, W a2, ...)
+{
+    W          a3, a4, a5, a6, a7, a8, a9, a10;
+    VASTART(a2);
+    VARG3();
+    CHECK();
+}
+
+void iii_iiiiiii(W a1, W a2, W a3, ...)
+{
+    W          a4, a5, a6, a7, a8, a9, a10;
+    VASTART(a3);
+    VARG4();
+    CHECK();
+}
+
+void iiii_iiiiii(W a1, W a2, W a3, W a4, ...)
+{
+    W          a5, a6, a7, a8, a9, a10;
+    VASTART(a4);
+    VARG5();
+    CHECK();
+}
+
+void iiiii_iiiii(W a1, W a2, W a3, W a4, W a5, ...)
+{
+    W          a6, a7, a8, a9, a10;
+    VASTART(a5);
+    VARG6();
+    CHECK();
+}
+
+void iiiiii_iiii(W a1, W a2, W a3, W a4, W a5, W a6, ...)
+{
+    W          a7, a8, a9, a10;
+    VASTART(a6);
+    VARG7();
+    CHECK();
+}
+
+void iiiiiii_iii(W a1, W a2, W a3, W a4, W a5, W a6, W a7, ...)
+{
+    W          a8, a9, a10;
+    VASTART(a7);
+    VARG8();
+    CHECK();
+}
+
+void iiiiiiii_ii(W a1, W a2, W a3, W a4, W a5, W a6, W a7, W a8, ...)
+{
+    W          a9, a10;
+    VASTART(a8);
+    VARG9();
+    CHECK();
+}
+
+void iiiiiiiii_i(W a1, W a2, W a3, W a4, W a5, W a6, W a7, W a8, W a9, ...)
+{
+    W          a10;
+    VASTART(a9);
+    VARG10();
+    CHECK();
+}
+
+void d_ddddddddd(D a1, ...)
+{
+    D          a2, a3, a4, a5, a6, a7, a8, a9, a10;
+    VASTART(a1);
+    VARGD2();
+    CHECK();
+}
+
+void dd_dddddddd(D a1, D a2, ...)
+{
+    D          a3, a4, a5, a6, a7, a8, a9, a10;
+    VASTART(a2);
+    VARGD3();
+    CHECK();
+}
+
+void ddd_ddddddd(D a1, D a2, D a3, ...)
+{
+    D          a4, a5, a6, a7, a8, a9, a10;
+    VASTART(a3);
+    VARGD4();
+    CHECK();
+}
+
+void dddd_dddddd(D a1, D a2, D a3, D a4, ...)
+{
+    D          a5, a6, a7, a8, a9, a10;
+    VASTART(a4);
+    VARGD5();
+    CHECK();
+}
+
+void ddddd_ddddd(D a1, D a2, D a3, D a4, D a5, ...)
+{
+    D          a6, a7, a8, a9, a10;
+    VASTART(a5);
+    VARGD6();
+    CHECK();
+}
+
+void dddddd_dddd(D a1, D a2, D a3, D a4, D a5, D a6, ...)
+{
+    D          a7, a8, a9, a10;
+    VASTART(a6);
+    VARGD7();
+    CHECK();
+}
+
+void ddddddd_ddd(D a1, D a2, D a3, D a4, D a5, D a6, D a7, ...)
+{
+    D          a8, a9, a10;
+    VASTART(a7);
+    VARGD8();
+    CHECK();
+}
+
+void dddddddd_dd(D a1, D a2, D a3, D a4, D a5, D a6, D a7, D a8, ...)
+{
+    D          a9, a10;
+    VASTART(a8);
+    VARGD9();
+    CHECK();
+}
+
+void ddddddddd_d(D a1, D a2, D a3, D a4, D a5, D a6, D a7, D a8, D a9, ...)
+{
+    D          a10;
+    VASTART(a9);
+    VARGD10();
+    CHECK();
+}
+
+void i_didididid(W a1, ...)
+{
+    W          a3, a5, a7, a9;
+    D          a2, a4, a6, a8, a10;
+    VASTART(a1);
+    IDVARG2();
+    CHECK();
+}
+
+void id_idididid(W a1, D a2, ...)
+{
+    W          a3, a5, a7, a9;
+    D          a4, a6, a8, a10;
+    VASTART(a2);
+    IDVARG3();
+    CHECK();
+}
+
+void idi_dididid(W a1, D a2, W a3, ...)
+{
+    W          a5, a7, a9;
+    D          a4, a6, a8, a10;
+    VASTART(a3);
+    IDVARG4();
+    CHECK();
+}
+
+void idid_ididid(W a1, D a2, W a3, D a4, ...)
+{
+    W          a5, a7, a9;
+    D          a6, a8, a10;
+    VASTART(a4);
+    IDVARG5();
+    CHECK();
+}
+
+void ididi_didid(W a1, D a2, W a3, D a4, W a5, ...)
+{
+    W          a7, a9;
+    D          a6, a8, a10;
+    VASTART(a5);
+    IDVARG6();
+    CHECK();
+}
+
+void ididid_idid(W a1, D a2, W a3, D a4, W a5, D a6, ...)
+{
+    W          a7, a9;
+    D          a8, a10;
+    VASTART(a6);
+    IDVARG7();
+    CHECK();
+}
+
+void idididi_did(W a1, D a2, W a3, D a4, W a5, D a6, W a7, ...)
+{
+    W          a9;
+    D          a8, a10;
+    VASTART(a7);
+    IDVARG8();
+    CHECK();
+}
+
+void idididid_id(W a1, D a2, W a3, D a4, W a5, D a6, W a7, D a8, ...)
+{
+    W          a9;
+    D          a10;
+    VASTART(a8);
+    IDVARG9();
+    CHECK();
+}
+
+void ididididi_d(W a1, D a2, W a3, D a4, W a5, D a6, W a7, D a8, W a9, ...)
+{
+    D          a10;
+    VASTART(a9);
+    IDVARG10();
+    CHECK();
+}
+
+void d_ididididi(D a1, ...)
+{
+    W          a2, a4, a6, a8, a10;
+    D          a3, a5, a7, a9;
+    VASTART(a1);
+    DIVARG2();
+    CHECK();
+}
+
+void di_didididi(D a1, W a2, ...)
+{
+    W          a4, a6, a8, a10;
+    D          a3, a5, a7, a9;
+    VASTART(a2);
+    DIVARG3();
+    CHECK();
+}
+
+void did_idididi(D a1, W a2, D a3, ...)
+{
+    W          a4, a6, a8, a10;
+    D          a5, a7, a9;
+    VASTART(a3);
+    DIVARG4();
+    CHECK();
+}
+
+void didi_dididi(D a1, W a2, D a3, W a4, ...)
+{
+    W          a6, a8, a10;
+    D          a5, a7, a9;
+    VASTART(a4);
+    DIVARG5();
+    CHECK();
+}
+
+void didid_ididi(D a1, W a2, D a3, W a4, D a5, ...)
+{
+    W          a6, a8, a10;
+    D          a7, a9;
+    VASTART(a5);
+    DIVARG6();
+    CHECK();
+}
+
+void dididi_didi(D a1, W a2, D a3, W a4, D a5, W a6, ...)
+{
+    W          a8, a10;
+    D          a7, a9;
+    VASTART(a6);
+    DIVARG7();
+    CHECK();
+}
+
+void dididid_idi(D a1, W a2, D a3, W a4, D a5, W a6, D a7, ...)
+{
+    W          a8, a10;
+    D          a9;
+    VASTART(a7);
+    DIVARG8();
+    CHECK();
+}
+
+void didididi_di(D a1, W a2, D a3, W a4, D a5, W a6, D a7, W a8, ...)
+{
+    W          a10;
+    D          a9;
+    VASTART(a8);
+    DIVARG9();
+    CHECK();
+}
+
+void didididid_i(D a1, W a2, D a3, W a4, D a5, W a6, D a7, W a8, D a9, ...)
+{
+    W          a10;
+    VASTART(a9);
+    DIVARG10();
+    CHECK();
+}
+
+void va_i_iiiiiiiii(W a1, va_list ap)
+{
+    W          a2, a3, a4, a5, a6, a7, a8, a9, a10;
+    VARG2();
+    CHECK();
+}
+
+void va_ii_iiiiiiii(W a1, W a2, va_list ap)
+{
+    W          a3, a4, a5, a6, a7, a8, a9, a10;
+    VARG3();
+    CHECK();
+}
+
+void va_iii_iiiiiii(W a1, W a2, W a3, va_list ap)
+{
+    W          a4, a5, a6, a7, a8, a9, a10;
+    VARG4();
+    CHECK();
+}
+
+void va_iiii_iiiiii(W a1, W a2, W a3, W a4, va_list ap)
+{
+    W          a5, a6, a7, a8, a9, a10;
+    VARG5();
+    CHECK();
+}
+
+
+void va_d_ddddddddd(D a1, va_list ap)
+{
+    D          a2, a3, a4, a5, a6, a7, a8, a9, a10;
+    VARGD2();
+    CHECK();
+}
+
+void va_dd_dddddddd(D a1, D a2, va_list ap)
+{
+    D          a3, a4, a5, a6, a7, a8, a9, a10;
+    VARGD3();
+    CHECK();
+}
+
+void va_ddd_ddddddd(D a1, D a2, D a3, va_list ap)
+{
+    D          a4, a5, a6, a7, a8, a9, a10;
+    VARGD4();
+    CHECK();
+}
+
+void va_dddd_dddddd(D a1, D a2, D a3, D a4, va_list ap)
+{
+    D          a5, a6, a7, a8, a9, a10;
+    VARGD5();
+    CHECK();
+}
+
+void va_i_didididid(W a1, va_list ap)
+{
+    W          a3, a5, a7, a9;
+    D          a2, a4, a6, a8, a10;
+    IDVARG2();
+    CHECK();
+}
+
+void va_id_idididid(W a1, D a2, va_list ap)
+{
+    W          a3, a5, a7, a9;
+    D          a4, a6, a8, a10;
+    IDVARG3();
+    CHECK();
+}
+
+void va_idi_dididid(W a1, D a2, W a3, va_list ap)
+{
+    W          a5, a7, a9;
+    D          a4, a6, a8, a10;
+    IDVARG4();
+    CHECK();
+}
+
+void va_idid_ididid(W a1, D a2, W a3, D a4, va_list ap)
+{
+    W          a5, a7, a9;
+    D          a6, a8, a10;
+    IDVARG5();
+    CHECK();
+}
+
+void va_d_ididididi(D a1, va_list ap)
+{
+    W          a2, a4, a6, a8, a10;
+    D          a3, a5, a7, a9;
+    DIVARG2();
+    CHECK();
+}
+
+void va_di_didididi(D a1, W a2, va_list ap)
+{
+    W          a4, a6, a8, a10;
+    D          a3, a5, a7, a9;
+    DIVARG3();
+    CHECK();
+}
+
+void va_did_idididi(D a1, W a2, D a3, va_list ap)
+{
+    W          a4, a6, a8, a10;
+    D          a5, a7, a9;
+    DIVARG4();
+    CHECK();
+}
+
+void va_didi_dididi(D a1, W a2, D a3, W a4, va_list ap)
+{
+    W          a6, a8, a10;
+    D          a5, a7, a9;
+    DIVARG5();
+    CHECK();
+}
+
+#define PUSH1()                                        jit_pushargi(1)
+#define PUSH2()                PUSH1();                jit_pushargi(2)
+#define PUSH3()                PUSH2();                jit_pushargi(3)
+#define PUSH4()                PUSH3();                jit_pushargi(4)
+#define PUSH5()                PUSH4();                jit_pushargi(5)
+#define PUSH6()                PUSH5();                jit_pushargi(6)
+#define PUSH7()                PUSH6();                jit_pushargi(7)
+#define PUSH8()                PUSH7();                jit_pushargi(8)
+#define PUSH9()                PUSH8();                jit_pushargi(9)
+#define VPUSH2()       jit_pushargi(2);        VPUSH3()
+#define VPUSH3()       jit_pushargi(3);        VPUSH4()
+#define VPUSH4()       jit_pushargi(4);        VPUSH5()
+#define VPUSH5()       jit_pushargi(5);        VPUSH6()
+#define VPUSH6()       jit_pushargi(6);        VPUSH7()
+#define VPUSH7()       jit_pushargi(7);        VPUSH8()
+#define VPUSH8()       jit_pushargi(8);        VPUSH9()
+#define VPUSH9()       jit_pushargi(9);        VPUSH10()
+#define VPUSH10()      jit_pushargi(10);
+#define PUSHD1()                               jit_pushargi_d(1)
+#define PUSHD2()       PUSHD1();               jit_pushargi_d(2)
+#define PUSHD3()       PUSHD2();               jit_pushargi_d(3)
+#define PUSHD4()       PUSHD3();               jit_pushargi_d(4)
+#define PUSHD5()       PUSHD4();               jit_pushargi_d(5)
+#define PUSHD6()       PUSHD5();               jit_pushargi_d(6)
+#define PUSHD7()       PUSHD6();               jit_pushargi_d(7)
+#define PUSHD8()       PUSHD7();               jit_pushargi_d(8)
+#define PUSHD9()       PUSHD8();               jit_pushargi_d(9)
+#define VPUSHD2()      jit_pushargi_d(2);      VPUSHD3()
+#define VPUSHD3()      jit_pushargi_d(3);      VPUSHD4()
+#define VPUSHD4()      jit_pushargi_d(4);      VPUSHD5()
+#define VPUSHD5()      jit_pushargi_d(5);      VPUSHD6()
+#define VPUSHD6()      jit_pushargi_d(6);      VPUSHD7()
+#define VPUSHD7()      jit_pushargi_d(7);      VPUSHD8()
+#define VPUSHD8()      jit_pushargi_d(8);      VPUSHD9()
+#define VPUSHD9()      jit_pushargi_d(9);      VPUSHD10()
+#define VPUSHD10()     jit_pushargi_d(10);
+#define IDPUSH1()                              jit_pushargi(1)
+#define IDPUSH2()      IDPUSH1();              jit_pushargi_d(2)
+#define IDPUSH3()      IDPUSH2();              jit_pushargi(3)
+#define IDPUSH4()      IDPUSH3();              jit_pushargi_d(4)
+#define IDPUSH5()      IDPUSH4();              jit_pushargi(5)
+#define IDPUSH6()      IDPUSH5();              jit_pushargi_d(6)
+#define IDPUSH7()      IDPUSH6();              jit_pushargi(7)
+#define IDPUSH8()      IDPUSH7();              jit_pushargi_d(8)
+#define IDPUSH9()      IDPUSH8();              jit_pushargi(9)
+#define IDVPUSH2()     jit_pushargi_d(2);      IDVPUSH3()
+#define IDVPUSH3()     jit_pushargi(3);        IDVPUSH4()
+#define IDVPUSH4()     jit_pushargi_d(4);      IDVPUSH5()
+#define IDVPUSH5()     jit_pushargi(5);        IDVPUSH6()
+#define IDVPUSH6()     jit_pushargi_d(6);      IDVPUSH7()
+#define IDVPUSH7()     jit_pushargi(7);        IDVPUSH8()
+#define IDVPUSH8()     jit_pushargi_d(8);      IDVPUSH9()
+#define IDVPUSH9()     jit_pushargi(9);        IDVPUSH10()
+#define IDVPUSH10()    jit_pushargi_d(10);
+#define DIPUSH1()                              jit_pushargi_d(1)
+#define DIPUSH2()      DIPUSH1();              jit_pushargi(2)
+#define DIPUSH3()      DIPUSH2();              jit_pushargi_d(3)
+#define DIPUSH4()      DIPUSH3();              jit_pushargi(4)
+#define DIPUSH5()      DIPUSH4();              jit_pushargi_d(5)
+#define DIPUSH6()      DIPUSH5();              jit_pushargi(6)
+#define DIPUSH7()      DIPUSH6();              jit_pushargi_d(7)
+#define DIPUSH8()      DIPUSH7();              jit_pushargi(8)
+#define DIPUSH9()      DIPUSH8();              jit_pushargi_d(9)
+#define DIVPUSH2()     jit_pushargi(2);        DIVPUSH3()
+#define DIVPUSH3()     jit_pushargi_d(3);      DIVPUSH4()
+#define DIVPUSH4()     jit_pushargi(4);        DIVPUSH5()
+#define DIVPUSH5()     jit_pushargi_d(5);      DIVPUSH6()
+#define DIVPUSH6()     jit_pushargi(6);        DIVPUSH7()
+#define DIVPUSH7()     jit_pushargi_d(7);      DIVPUSH8()
+#define DIVPUSH8()     jit_pushargi(8);        DIVPUSH9()
+#define DIVPUSH9()     jit_pushargi_d(9);      DIVPUSH10()
+#define DIVPUSH10()    jit_pushargi(10);
+
+jit_state_t     *_jit;
+
+int main(int argc, char *argv[])
+{
+    void               (*function)(void);
+    jit_node_t         *jmpi_main;
+    jit_node_t         *a1, *a2, *node;
+    jit_node_t         *jva_i_iiiiiiiii, *jva_ii_iiiiiiii;
+    jit_node_t         *jva_d_ddddddddd, *jva_dd_dddddddd;
+    jit_node_t         *jva_i_didididid, *jva_id_idididid;
+    jit_node_t         *jva_d_ididididi, *jva_di_didididi;
+    jit_node_t         *jva_iii_iiiiiii, *jva_iiii_iiiiii;
+    jit_node_t         *jva_ddd_ddddddd, *jva_dddd_dddddd;
+    jit_node_t         *jva_idi_dididid, *jva_idid_ididid;
+    jit_node_t         *jva_did_idididi, *jva_didi_dididi;
+
+    init_jit(argv[0]);
+    _jit = jit_new_state();
+
+    jmpi_main = jit_jmpi();
+
+    /* Define simple functions to validate a jit_va_list_t
+     * is a valid va_list; these do not fetch arguments from
+     * the va_list. */
+    jva_i_iiiiiiiii = jit_label();
+    jit_name("va_i_iiiiiiiii");
+    jit_prolog();
+    a1 = jit_arg();
+    jit_getarg(JIT_V1, a1);
+    jit_ellipsis();
+    jit_va_start(JIT_V0);
+    jit_prepare();
+    jit_pushargr(JIT_V1);
+    jit_va_push(JIT_V0);
+    jit_finishi(va_i_iiiiiiiii);
+    jit_va_end(JIT_V0);
+    jit_ret();
+    jit_epilog();
+    jva_ii_iiiiiiii = jit_label();
+    jit_name("va_ii_iiiiiiii");
+    jit_prolog();
+    a1 = jit_arg();
+    a2 = jit_arg();
+    jit_getarg(JIT_V1, a1);
+    jit_getarg(JIT_V2, a2);
+    jit_ellipsis();
+    jit_va_start(JIT_V0);
+    jit_prepare();
+    jit_pushargr(JIT_V1);
+    jit_pushargr(JIT_V2);
+    jit_va_push(JIT_V0);
+    jit_finishi(va_ii_iiiiiiii);
+    jit_va_end(JIT_V0);
+    jit_ret();
+    jit_epilog();
+    jva_d_ddddddddd = jit_label();
+    jit_name("va_d_ddddddddd");
+    jit_prolog();
+    a1 = jit_arg_d();
+    jit_getarg_d(JIT_F3, a1);
+    jit_ellipsis();
+    jit_va_start(JIT_V0);
+    jit_prepare();
+    jit_pushargr_d(JIT_F3);
+    jit_va_push(JIT_V0);
+    jit_finishi(va_d_ddddddddd);
+    jit_va_end(JIT_V0);
+    jit_ret();
+    jit_epilog();
+    jva_dd_dddddddd = jit_label();
+    jit_name("va_dd_dddddddd");
+    jit_prolog();
+    a1 = jit_arg_d();
+    a2 = jit_arg_d();
+    jit_getarg_d(JIT_F3, a1);
+    jit_getarg_d(JIT_F4, a2);
+    jit_ellipsis();
+    jit_va_start(JIT_V0);
+    jit_prepare();
+    jit_pushargr_d(JIT_F3);
+    jit_pushargr_d(JIT_F4);
+    jit_va_push(JIT_V0);
+    jit_finishi(va_dd_dddddddd);
+    jit_va_end(JIT_V0);
+    jit_ret();
+    jit_epilog();
+    jva_i_didididid = jit_label();
+    jit_name("va_i_didididid");
+    jit_prolog();
+    a1 = jit_arg();
+    jit_getarg(JIT_V1, a1);
+    jit_ellipsis();
+    jit_va_start(JIT_V0);
+    jit_prepare();
+    jit_pushargr(JIT_V1);
+    jit_va_push(JIT_V0);
+    jit_finishi(va_i_didididid);
+    jit_va_end(JIT_V0);
+    jit_ret();
+    jit_epilog();
+    jva_id_idididid = jit_label();
+    jit_name("va_id_idididid");
+    jit_prolog();
+    a1 = jit_arg();
+    a2 = jit_arg_d();
+    jit_getarg(JIT_V1, a1);
+    jit_getarg_d(JIT_F3, a2);
+    jit_ellipsis();
+    jit_va_start(JIT_V0);
+    jit_prepare();
+    jit_pushargr(JIT_V1);
+    jit_pushargr_d(JIT_F3);
+    jit_va_push(JIT_V0);
+    jit_finishi(va_id_idididid);
+    jit_va_end(JIT_V0);
+    jit_ret();
+    jit_epilog();
+    jva_d_ididididi = jit_label();
+    jit_name("va_d_ididididi");
+    jit_prolog();
+    a1 = jit_arg_d();
+    jit_getarg_d(JIT_F3, a1);
+    jit_ellipsis();
+    jit_va_start(JIT_V0);
+    jit_prepare();
+    jit_pushargr_d(JIT_F3);
+    jit_va_push(JIT_V0);
+    jit_finishi(va_d_ididididi);
+    jit_va_end(JIT_V0);
+    jit_ret();
+    jit_epilog();
+    jva_di_didididi = jit_label();
+    jit_name("va_di_didididi");
+    jit_prolog();
+    a1 = jit_arg_d();
+    a2 = jit_arg();
+    jit_getarg_d(JIT_F3, a1);
+    jit_getarg(JIT_V1, a2);
+    jit_ellipsis();
+    jit_va_start(JIT_V0);
+    jit_prepare();
+    jit_pushargr_d(JIT_F3);
+    jit_pushargr(JIT_V1);
+    jit_va_push(JIT_V0);
+    jit_finishi(va_di_didididi);
+    jit_va_end(JIT_V0);
+    jit_ret();
+    jit_epilog();
+
+    /* Define complex functions to validate a jit_va_list_t
+     * is a valid va_list; these do fetch arguments from
+     * the va_list, to ensure it does the correct operations
+     * fetching arguments, and pass a valid va_list to the
+     * C function. */
+    jva_iii_iiiiiii = jit_label();
+    jit_name("va_iii_iiiiiii");
+    jit_prolog();
+    a1 = jit_arg();
+    jit_getarg(JIT_V1, a1);
+    node = jit_beqi(JIT_V1, 1);
+    jit_calli(abort);
+    jit_patch(node);
+    jit_ellipsis();
+    jit_va_start(JIT_V0);
+    jit_va_arg(JIT_V1, JIT_V0);
+    jit_va_arg(JIT_V2, JIT_V0);
+    jit_prepare();
+    jit_pushargi(1);
+    jit_pushargr(JIT_V1);
+    jit_pushargr(JIT_V2);
+    jit_va_push(JIT_V0);
+    jit_finishi(va_iii_iiiiiii);
+    jit_va_end(JIT_V0);
+    jit_ret();
+    jit_epilog();
+    jva_iiii_iiiiii = jit_label();
+    jit_name("va_iiii_iiiiii");
+    jit_prolog();
+    a1 = jit_arg();
+    a2 = jit_arg();
+    jit_getarg(JIT_V1, a1);
+    node = jit_beqi(JIT_V1, 1);
+    jit_calli(abort);
+    jit_patch(node);
+    jit_getarg(JIT_V1, a2);
+    node = jit_beqi(JIT_V1, 2);
+    jit_calli(abort);
+    jit_patch(node);
+    jit_ellipsis();
+    jit_va_start(JIT_V0);
+    jit_va_arg(JIT_V1, JIT_V0);
+    jit_va_arg(JIT_V2, JIT_V0);
+    jit_prepare();
+    jit_pushargi(1);
+    jit_pushargi(2);
+    jit_pushargr(JIT_V1);
+    jit_pushargr(JIT_V2);
+    jit_va_push(JIT_V0);
+    jit_finishi(va_iiii_iiiiii);
+    jit_va_end(JIT_V0);
+    jit_ret();
+    jit_epilog();
+    jva_ddd_ddddddd = jit_label();
+    jit_name("va_ddd_ddddddd");
+    jit_prolog();
+    a1 = jit_arg_d();
+    jit_getarg_d(JIT_F3, a1);
+    node = jit_beqi_d(JIT_F3, 1);
+    jit_calli(abort);
+    jit_patch(node);
+    jit_ellipsis();
+    jit_va_start(JIT_V0);
+    jit_va_arg_d(JIT_F3, JIT_V0);
+    jit_va_arg_d(JIT_F4, JIT_V0);
+    jit_prepare();
+    jit_pushargi_d(1);
+    jit_pushargr_d(JIT_F3);
+    jit_pushargr_d(JIT_F4);
+    jit_va_push(JIT_V0);
+    jit_finishi(va_ddd_ddddddd);
+    jit_va_end(JIT_V0);
+    jit_ret();
+    jit_epilog();
+    jva_dddd_dddddd = jit_label();
+    jit_name("va_dddd_dddddd");
+    jit_prolog();
+    a1 = jit_arg_d();
+    a2 = jit_arg_d();
+    jit_getarg_d(JIT_F3, a1);
+    node = jit_beqi_d(JIT_F3, 1);
+    jit_calli(abort);
+    jit_patch(node);
+    jit_getarg_d(JIT_F3, a2);
+    node = jit_beqi_d(JIT_F3, 2);
+    jit_calli(abort);
+    jit_patch(node);
+    jit_ellipsis();
+    jit_va_start(JIT_V0);
+    jit_va_arg_d(JIT_F3, JIT_V0);
+    jit_va_arg_d(JIT_F4, JIT_V0);
+    jit_prepare();
+    jit_pushargi_d(1);
+    jit_pushargi_d(2);
+    jit_pushargr_d(JIT_F3);
+    jit_pushargr_d(JIT_F4);
+    jit_va_push(JIT_V0);
+    jit_finishi(va_dddd_dddddd);
+    jit_va_end(JIT_V0);
+    jit_ret();
+    jit_epilog();
+    jva_idi_dididid = jit_label();
+    jit_name("va_idi_dididid");
+    jit_prolog();
+    a1 = jit_arg();
+    jit_getarg(JIT_V1, a1);
+    node = jit_beqi(JIT_V1, 1);
+    jit_calli(abort);
+    jit_patch(node);
+    jit_ellipsis();
+    jit_va_start(JIT_V0);
+    jit_va_arg_d(JIT_F3, JIT_V0);
+    jit_va_arg(JIT_V1, JIT_V0);
+    jit_prepare();
+    jit_pushargi(1);
+    jit_pushargr_d(JIT_F3);
+    jit_pushargr(JIT_V1);
+    jit_va_push(JIT_V0);
+    jit_finishi(va_idi_dididid);
+    jit_va_end(JIT_V0);
+    jit_ret();
+    jit_epilog();
+    jva_idid_ididid = jit_label();
+    jit_name("va_idid_ididid");
+    jit_prolog();
+    a1 = jit_arg();
+    a2 = jit_arg_d();
+    jit_getarg(JIT_V1, a1);
+    node = jit_beqi(JIT_V1, 1);
+    jit_calli(abort);
+    jit_patch(node);
+    jit_getarg_d(JIT_F3, a2);
+    node = jit_beqi_d(JIT_F3, 2);
+    jit_calli(abort);
+    jit_patch(node);
+    jit_ellipsis();
+    jit_va_start(JIT_V0);
+    jit_va_arg(JIT_V1, JIT_V0);
+    jit_va_arg_d(JIT_F3, JIT_V0);
+    jit_prepare();
+    jit_pushargi(1);
+    jit_pushargi_d(2);
+    jit_pushargr(JIT_V1);
+    jit_pushargr_d(JIT_F3);
+    jit_va_push(JIT_V0);
+    jit_finishi(va_idid_ididid);
+    jit_va_end(JIT_V0);
+    jit_ret();
+    jit_epilog();
+    jva_did_idididi = jit_label();
+    jit_name("va_did_idididi");
+    jit_prolog();
+    a1 = jit_arg_d();
+    jit_getarg_d(JIT_F3, a1);
+    node = jit_beqi_d(JIT_F3, 1);
+    jit_calli(abort);
+    jit_patch(node);
+    jit_ellipsis();
+    jit_va_start(JIT_V0);
+    jit_va_arg(JIT_V1, JIT_V0);
+    jit_va_arg_d(JIT_F3, JIT_V0);
+    jit_prepare();
+    jit_pushargi_d(1);
+    jit_pushargr(JIT_V1);
+    jit_pushargr_d(JIT_F3);
+    jit_va_push(JIT_V0);
+    jit_finishi(va_did_idididi);
+    jit_va_end(JIT_V0);
+    jit_ret();
+    jit_epilog();
+    jva_didi_dididi = jit_label();
+    jit_name("va_didi_dididi");
+    jit_prolog();
+    a1 = jit_arg_d();
+    a2 = jit_arg();
+    jit_getarg_d(JIT_F3, a1);
+    node = jit_beqi_d(JIT_F3, 1);
+    jit_calli(abort);
+    jit_patch(node);
+    jit_getarg(JIT_V1, a2);
+    node = jit_beqi(JIT_V1, 2);
+    jit_calli(abort);
+    jit_patch(node);
+    jit_ellipsis();
+    jit_va_start(JIT_V0);
+    jit_va_arg_d(JIT_F3, JIT_V0);
+    jit_va_arg(JIT_V1, JIT_V0);
+    jit_prepare();
+    jit_pushargi_d(1);
+    jit_pushargi(2);
+    jit_pushargr_d(JIT_F3);
+    jit_pushargr(JIT_V1);
+    jit_va_push(JIT_V0);
+    jit_finishi(va_didi_dididi);
+    jit_va_end(JIT_V0);
+    jit_ret();
+    jit_epilog();
+
+    jit_patch(jmpi_main);
+    jit_name("main");
+    jit_prolog();
+
+    /* Check that lightning properly calls vararg functions */
+    jit_prepare();
+    PUSH1();
+    jit_ellipsis();
+    VPUSH2();
+    jit_finishi(i_iiiiiiiii);
+    jit_prepare();
+    PUSH2();
+    jit_ellipsis();
+    VPUSH3();
+    jit_finishi(ii_iiiiiiii);
+    jit_prepare();
+    PUSH3();
+    jit_ellipsis();
+    VPUSH4();
+    jit_finishi(iii_iiiiiii);
+    jit_prepare();
+    PUSH4();
+    jit_ellipsis();
+    VPUSH5();
+    jit_finishi(iiii_iiiiii);
+    jit_prepare();
+    PUSH5();
+    jit_ellipsis();
+    VPUSH6();
+    jit_finishi(iiiii_iiiii);
+    jit_prepare();
+    PUSH6();
+    jit_ellipsis();
+    VPUSH7();
+    jit_finishi(iiiiii_iiii);
+    jit_prepare();
+    PUSH7();
+    jit_ellipsis();
+    VPUSH8();
+    jit_finishi(iiiiiii_iii);
+    jit_prepare();
+    PUSH8();
+    jit_ellipsis();
+    VPUSH9();
+    jit_finishi(iiiiiiii_ii);
+    jit_prepare();
+    PUSH9();
+    jit_ellipsis();
+    VPUSH10();
+    jit_finishi(iiiiiiiii_i);
+    jit_prepare();
+    PUSHD1();
+    jit_ellipsis();
+    VPUSHD2();
+    jit_finishi(d_ddddddddd);
+    jit_prepare();
+    PUSHD2();
+    jit_ellipsis();
+    VPUSHD3();
+    jit_finishi(dd_dddddddd);
+    jit_prepare();
+    PUSHD3();
+    jit_ellipsis();
+    VPUSHD4();
+    jit_finishi(ddd_ddddddd);
+    jit_prepare();
+    PUSHD4();
+    jit_ellipsis();
+    VPUSHD5();
+    jit_finishi(dddd_dddddd);
+    jit_prepare();
+    PUSHD5();
+    jit_ellipsis();
+    VPUSHD6();
+    jit_finishi(ddddd_ddddd);
+    jit_prepare();
+    PUSHD6();
+    jit_ellipsis();
+    VPUSHD7();
+    jit_finishi(dddddd_dddd);
+    jit_prepare();
+    PUSHD7();
+    jit_ellipsis();
+    VPUSHD8();
+    jit_finishi(ddddddd_ddd);
+    jit_prepare();
+    PUSHD8();
+    jit_ellipsis();
+    VPUSHD9();
+    jit_finishi(dddddddd_dd);
+    jit_prepare();
+    PUSHD9();
+    jit_ellipsis();
+    VPUSHD10();
+    jit_finishi(ddddddddd_d);
+    jit_prepare();
+    IDPUSH1();
+    jit_ellipsis();
+    IDVPUSH2();
+    jit_finishi(i_didididid);
+    jit_prepare();
+    IDPUSH2();
+    jit_ellipsis();
+    IDVPUSH3();
+    jit_finishi(id_idididid);
+    jit_prepare();
+    IDPUSH3();
+    jit_ellipsis();
+    IDVPUSH4();
+    jit_finishi(idi_dididid);
+    jit_prepare();
+    IDPUSH4();
+    jit_ellipsis();
+    IDVPUSH5();
+    jit_finishi(idid_ididid);
+    jit_prepare();
+    IDPUSH5();
+    jit_ellipsis();
+    IDVPUSH6();
+    jit_finishi(ididi_didid);
+    jit_prepare();
+    IDPUSH6();
+    jit_ellipsis();
+    IDVPUSH7();
+    jit_finishi(ididid_idid);
+    jit_prepare();
+    IDPUSH7();
+    jit_ellipsis();
+    IDVPUSH8();
+    jit_finishi(idididi_did);
+    jit_prepare();
+    IDPUSH8();
+    jit_ellipsis();
+    IDVPUSH9();
+    jit_finishi(idididid_id);
+    jit_prepare();
+    IDPUSH9();
+    jit_ellipsis();
+    IDVPUSH10();
+    jit_finishi(ididididi_d);
+    jit_prepare();
+    DIPUSH1();
+    jit_ellipsis();
+    DIVPUSH2();
+    jit_finishi(d_ididididi);
+    jit_prepare();
+    DIPUSH2();
+    jit_ellipsis();
+    DIVPUSH3();
+    jit_finishi(di_didididi);
+    jit_prepare();
+    DIPUSH3();
+    jit_ellipsis();
+    DIVPUSH4();
+    jit_finishi(did_idididi);
+    jit_prepare();
+    DIPUSH4();
+    jit_ellipsis();
+    DIVPUSH5();
+    jit_finishi(didi_dididi);
+    jit_prepare();
+    DIPUSH5();
+    jit_ellipsis();
+    DIVPUSH6();
+    jit_finishi(didid_ididi);
+    jit_prepare();
+    DIPUSH6();
+    jit_ellipsis();
+    DIVPUSH7();
+    jit_finishi(dididi_didi);
+    jit_prepare();
+    DIPUSH7();
+    jit_ellipsis();
+    DIVPUSH8();
+    jit_finishi(dididid_idi);
+    jit_prepare();
+    DIPUSH8();
+    jit_ellipsis();
+    DIVPUSH9();
+    jit_finishi(didididi_di);
+    jit_prepare();
+    DIPUSH9();
+    jit_ellipsis();
+    DIVPUSH10();
+    jit_finishi(didididid_i);
+
+    /* Check that unmodified jit_va_list_t is a valid va_list */
+    jit_prepare();
+    PUSH1();
+    jit_ellipsis();
+    VPUSH2();
+    jit_patch_at(jit_finishi(NULL), jva_i_iiiiiiiii);
+    jit_prepare();
+    PUSH2();
+    jit_ellipsis();
+    VPUSH3();
+    jit_patch_at(jit_finishi(NULL), jva_ii_iiiiiiii);
+    jit_prepare();
+    PUSHD1();
+    jit_ellipsis();
+    VPUSHD2();
+    jit_patch_at(jit_finishi(NULL), jva_d_ddddddddd);
+    jit_prepare();
+    PUSHD2();
+    jit_ellipsis();
+    VPUSHD3();
+    jit_patch_at(jit_finishi(NULL), jva_dd_dddddddd);
+    jit_prepare();
+    IDPUSH1();
+    jit_ellipsis();
+    IDVPUSH2();
+    jit_patch_at(jit_finishi(NULL), jva_i_didididid);
+    jit_prepare();
+    IDPUSH2();
+    jit_ellipsis();
+    IDVPUSH3();
+    jit_patch_at(jit_finishi(NULL), jva_id_idididid);
+    jit_prepare();
+    DIPUSH1();
+    jit_ellipsis();
+    DIVPUSH2();
+    jit_patch_at(jit_finishi(NULL), jva_d_ididididi);
+    jit_prepare();
+    DIPUSH2();
+    jit_ellipsis();
+    DIVPUSH3();
+    jit_patch_at(jit_finishi(NULL), jva_di_didididi);
+
+    /* Check that modified jit_va_list_t is a valid va_list */
+    jit_prepare();
+    PUSH1();
+    jit_ellipsis();
+    VPUSH2();
+    jit_patch_at(jit_finishi(NULL), jva_iii_iiiiiii);
+    jit_prepare();
+    PUSH2();
+    jit_ellipsis();
+    VPUSH3();
+    jit_patch_at(jit_finishi(NULL), jva_iiii_iiiiii);
+    jit_prepare();
+    PUSHD1();
+    jit_ellipsis();
+    VPUSHD2();
+    jit_patch_at(jit_finishi(NULL), jva_ddd_ddddddd);
+    jit_prepare();
+    PUSHD2();
+    jit_ellipsis();
+    VPUSHD3();
+    jit_patch_at(jit_finishi(NULL), jva_dddd_dddddd);
+    jit_prepare();
+    IDPUSH1();
+    jit_ellipsis();
+    IDVPUSH2();
+    jit_patch_at(jit_finishi(NULL), jva_idi_dididid);
+    jit_prepare();
+    IDPUSH2();
+    jit_ellipsis();
+    IDVPUSH3();
+    jit_patch_at(jit_finishi(NULL), jva_idid_ididid);
+    jit_prepare();
+    DIPUSH1();
+    jit_ellipsis();
+    DIVPUSH2();
+    jit_patch_at(jit_finishi(NULL), jva_did_idididi);
+    jit_prepare();
+    DIPUSH2();
+    jit_ellipsis();
+    DIVPUSH3();
+    jit_patch_at(jit_finishi(NULL), jva_didi_dididi);
+
+    jit_ret();
+    jit_epilog();
+
+
+    function = jit_emit();
+    jit_clear_state();
+    //jit_disassemble();
+    (*function)();
+    jit_destroy_state();
+
+    finish_jit();
+
+    printf("ok\n");
+    return 0;
+}
diff --git a/deps/lightning/check/cvt.ok b/deps/lightning/check/cvt.ok
new file mode 100644 (file)
index 0000000..9766475
--- /dev/null
@@ -0,0 +1 @@
+ok
diff --git a/deps/lightning/check/cvt.tst b/deps/lightning/check/cvt.tst
new file mode 100644 (file)
index 0000000..1828259
--- /dev/null
@@ -0,0 +1,380 @@
+.data  12
+ok:
+.c     "ok\n"
+
+#if __WORDSIZE == 32
+#  define w7f          0x7fffffff
+#  define w80          0x80000000
+#  define w81          0x80000001
+#  define wff          0xffffffff
+#  define LEXTII2(N, R0, R1)
+#  define LEXTIC2(N, R0)
+#else
+#  define w7f          0x7fffffffffffffff
+#  define w80          0x8000000000000000
+#  define w81          0x8000000000000001
+#  define wff          0xffffffffffffffff
+#  define i7f          wff
+#  define ui7f         0xffffffff
+#  define i80          0
+#  define ui80         0
+#  define i81          1
+#  define ui81         1
+#  define iff          wff
+#  define uiff         0xffffffff
+#  define LEXTII2(N, R0, R1)                                   \
+       EXTII2(N, i, R0, R1)                                    \
+       EXTII2(N, ui, R0, R1)
+#  define LEXTIC2(N, R0)                                       \
+       EXTIC2(N, i, R0)                                        \
+       EXTIC2(N, ui, R0)
+#endif
+#define c7f            wff
+#define uc7f           0xff
+#define s7f            wff
+#define us7f           0xffff
+#define c80            0
+#define uc80           0
+#define s80            0
+#define us80           0
+#define c81            1
+#define uc81           1
+#define s81            1
+#define us81           1
+#define cff            wff
+#define ucff           0xff
+#define sff            wff
+#define usff           0xffff
+
+#define EXTII2(N, T, R0, R1)                                   \
+       movi %R0 w##N                                           \
+       extr_##T %R1 %R0                                        \
+       beqi T##_##R0##_##R1##_##N %R1 T##N                     \
+       calli @abort                                            \
+T##_##R0##_##R1##_##N:
+#define EXTII1(N, R0, R1)                                      \
+       EXTII2(N, c, R0, R1)                                    \
+       EXTII2(N, uc, R0, R1)                                   \
+       EXTII2(N, s, R0, R1)                                    \
+       EXTII2(N, us, R0, R1)                                   \
+       LEXTII2(N, R0, R1)
+#define EXTII0(R0, R1)                                         \
+       EXTII1(7f, R0, R1)                                      \
+       EXTII1(80, R0, R1)                                      \
+       EXTII1(81, R0, R1)                                      \
+       EXTII1(ff, R0, R1)
+
+#define EXTIC2(N, T, R0)                                       \
+       movi %R0 w##N                                           \
+       extr_##T %R0 %R0                                        \
+       beqi T##_##R0##_##N %R0 T##N                            \
+       calli @abort                                            \
+T##_##R0##_##N:
+#define EXTIC1(N, R0)                                          \
+       EXTIC2(N, c, R0)                                        \
+       EXTIC2(N, uc, R0)                                       \
+       EXTIC2(N, s, R0)                                        \
+       EXTIC2(N, us, R0)                                       \
+       LEXTIC2(N, R0)
+#define EXTIC0(R0)                                             \
+       EXTIC1(7f, R0)                                          \
+       EXTIC1(80, R0)                                          \
+       EXTIC1(81, R0)                                          \
+       EXTIC1(ff, R0)
+
+#define EXTII(V0, V1, V2, R0, R1, R2)                          \
+       EXTII0(V0, V1)                                          \
+       EXTII0(V0, V2)                                          \
+       EXTII0(V0, R0)                                          \
+       EXTII0(V0, R1)                                          \
+       EXTII0(V0, R2)                                          \
+       EXTII0(V1, V0)                                          \
+       EXTII0(V1, V2)                                          \
+       EXTII0(V1, R0)                                          \
+       EXTII0(V1, R1)                                          \
+       EXTII0(V1, R2)                                          \
+       EXTII0(V2, V0)                                          \
+       EXTII0(V2, V1)                                          \
+       EXTII0(V2, R0)                                          \
+       EXTII0(V2, R1)                                          \
+       EXTII0(V2, R2)                                          \
+       EXTII0(R0, V0)                                          \
+       EXTII0(R0, V1)                                          \
+       EXTII0(R0, V2)                                          \
+       EXTII0(R0, R1)                                          \
+       EXTII0(R0, R2)                                          \
+       EXTII0(R1, V0)                                          \
+       EXTII0(R1, V1)                                          \
+       EXTII0(R1, V2)                                          \
+       EXTII0(R1, R0)                                          \
+       EXTII0(R1, R2)                                          \
+       EXTII0(R2, V0)                                          \
+       EXTII0(R2, V1)                                          \
+       EXTII0(R2, V2)                                          \
+       EXTII0(R2, R0)                                          \
+       EXTII0(R2, R1)                                          \
+       EXTIC0(V0)                                              \
+       EXTIC0(V1)                                              \
+       EXTIC0(V2)                                              \
+       EXTIC0(R0)                                              \
+       EXTIC0(R1)                                              \
+       EXTIC0(R2)
+
+#define EXIF1(N, V, R0, R1)                                    \
+       movi %R0 V                                              \
+       extr_f %R1 %R0                                          \
+       beqi_f wf##_##R0##_##R1##_##N %R1 V                     \
+wf##_##R0##_##R1##_##N:
+#define EXIF0(R0, R1)                                          \
+       EXIF1(0, -1, R0, R1)                                    \
+       EXIF1(1, 64, R0, R1)
+#define EXIF(V0, V1, V2, R0, R1, R2, F0, F1, F2, F3, F4, F5)   \
+       EXIF0(V0, F0)                                           \
+       EXIF0(V1, F1)                                           \
+       EXIF0(V2, F2)                                           \
+       EXIF0(R0, F3)                                           \
+       EXIF0(R1, F4)                                           \
+       EXIF0(R2, F5)
+#define EXID1(N, V, R0, R1)                                    \
+       movi %R0 V                                              \
+       extr_d %R1 %R0                                          \
+       beqi_d wd##_##R0##_##R1##_##N %R1 V                     \
+wd##_##R0##_##R1##_##N:
+#define EXID0(R0, R1)                                          \
+       EXID1(0, -1, R0, R1)                                    \
+       EXID1(1, 64, R0, R1)
+#define EXID(V0, V1, V2, R0, R1, R2, F0, F1, F2, F3, F4, F5)   \
+       EXID0(V0, F0)                                           \
+       EXID0(V1, F1)                                           \
+       EXID0(V2, F2)                                           \
+       EXID0(R0, F3)                                           \
+       EXID0(R1, F4)                                           \
+       EXID0(R2, F5)
+
+#define EXFI1(N, V, R0, R1)                                    \
+       movi_f %R1 V                                            \
+       truncr_f %R0 %R1                                        \
+       beqi fi##_##R0##_##R1##_##N %R0 V                       \
+       calli @abort                                            \
+fi##_##R0##_##R1##_##N:
+#define EXFI0(R0, R1)                                          \
+       EXFI1(0,   42, R0, R1)                                  \
+       EXFI1(1, -128, R0, R1)
+#define EXFI(V0, V1, V2, R0, R1, R2, F0, F1, F2, F3, F4, F5)   \
+       EXFI0(V0, F5)                                           \
+       EXFI0(V1, F4)                                           \
+       EXFI0(V2, F3)                                           \
+       EXFI0(R0, F2)                                           \
+       EXFI0(R1, F1)                                           \
+       EXFI0(R2, F0)
+#define EXDI1(N, V, R0, R1)                                    \
+       movi_d %R1 V                                            \
+       truncr_d %R0 %R1                                        \
+       beqi di##_##R0##_##R1##_##N %R0 V                       \
+       calli @abort                                            \
+di##_##R0##_##R1##_##N:
+#define EXDI0(R0, R1)                                          \
+       EXDI1(0,   42, R0, R1)                                  \
+       EXDI1(1, -128, R0, R1)
+#define EXDI(V0, V1, V2, R0, R1, R2, F0, F1, F2, F3, F4, F5)   \
+       EXDI0(V0, F5)                                           \
+       EXDI0(V1, F4)                                           \
+       EXDI0(V2, F3)                                           \
+       EXDI0(R0, F2)                                           \
+       EXDI0(R1, F1)                                           \
+       EXDI0(R2, F0)
+
+#define LEXFI1(N, V, R0, R1)                                   \
+       movi_f %R1 V                                            \
+       truncr_f_i %R0 %R1                                      \
+       andi %R0 %R0 0xffffffff                                 \
+       beqi lfi##_##R0##_##R1##_##N %R0 $(V & 0xffffffff)      \
+       calli @abort                                            \
+lfi##_##R0##_##R1##_##N:
+#define LEXFI0(R0, R1)                                         \
+       LEXFI1(0,   42, R0, R1)                                 \
+       LEXFI1(1, -128, R0, R1)
+#define LEXFI(V0, V1, V2, R0, R1, R2, F0, F1, F2, F3, F4, F5)  \
+       LEXFI0(V0, F5)                                          \
+       LEXFI0(V1, F4)                                          \
+       LEXFI0(V2, F3)                                          \
+       LEXFI0(R0, F2)                                          \
+       LEXFI0(R1, F1)                                          \
+       LEXFI0(R2, F0)
+#define LEXDI1(N, V, R0, R1)                                   \
+       movi_d %R1 V                                            \
+       truncr_d_i %R0 %R1                                      \
+       andi %R0 %R0 0xffffffff                                 \
+       beqi ldi##_##R0##_##R1##_##N %R0 $(V & 0xffffffff)      \
+       calli @abort                                            \
+ldi##_##R0##_##R1##_##N:
+#define LEXDI0(R0, R1)                                         \
+       LEXDI1(0,   42, R0, R1)                                 \
+       LEXDI1(1, -128, R0, R1)
+#define LEXDI(V0, V1, V2, R0, R1, R2, F0, F1, F2, F3, F4, F5)  \
+       LEXDI0(V0, F5)                                          \
+       LEXDI0(V1, F4)                                          \
+       LEXDI0(V2, F3)                                          \
+       LEXDI0(R0, F2)                                          \
+       LEXDI0(R1, F1)                                          \
+       LEXDI0(R2, F0)
+
+#define EXTFD2(V, R0, R1)                                      \
+       movi_f %R0 V                                            \
+       extr_f_d %R1 %R0                                        \
+       beqi_d fd##_##R0##_##R1 %R1 V                           \
+       calli @abort                                            \
+fd##_##R0##_##R1:
+#define EXTFD1(R0, R1)                                         \
+       EXTFD2(1.25, R0, R1)
+#define EXTFDC2(V, R0)                                         \
+       movi_f %R0 V                                            \
+       extr_f_d %R0 %R0                                        \
+       beqi_d fdc##_##R0 %R0 V                                 \
+       calli @abort                                            \
+fdc##_##R0:
+#define EXTFDC1(R0)                                            \
+       EXTFDC2(-0.75, R0)
+#define EXTFD(R0, R1, R2, R3, R4, R5)                          \
+       EXTFD1(R0, R1)                                          \
+       EXTFD1(R0, R2)                                          \
+       EXTFD1(R0, R3)                                          \
+       EXTFD1(R0, R4)                                          \
+       EXTFD1(R0, R5)                                          \
+       EXTFDC1(R0)                                             \
+       EXTFDC1(R1)                                             \
+       EXTFDC1(R2)                                             \
+       EXTFDC1(R3)                                             \
+       EXTFDC1(R4)                                             \
+       EXTFDC1(R5)
+
+#define EXTDF2(V, R0, R1)                                      \
+       movi_d %R0 V                                            \
+       extr_d_f %R1 %R0                                        \
+       beqi_f df##_##R0##_##R1 %R1 V                           \
+       calli @abort                                            \
+df##_##R0##_##R1:
+#define EXTDF1(R0, R1)                                         \
+       EXTDF2(1.25, R0, R1)
+#define EXTDFC2(V, R0)                                         \
+       movi_d %R0 V                                            \
+       extr_d_f %R0 %R0                                        \
+       beqi_f dfc##_##R0 %R0 V                                 \
+       calli @abort                                            \
+dfc##_##R0:
+#define EXTDFC1(R0)                                            \
+       EXTDFC2(-0.75, R0)
+#define EXTDF(R0, R1, R2, R3, R4, R5)                          \
+       EXTDF1(R0, R1)                                          \
+       EXTDF1(R0, R2)                                          \
+       EXTDF1(R0, R3)                                          \
+       EXTDF1(R0, R4)                                          \
+       EXTDF1(R0, R5)                                          \
+       EXTDFC1(R0)                                             \
+       EXTDFC1(R1)                                             \
+       EXTDFC1(R2)                                             \
+       EXTDFC1(R3)                                             \
+       EXTDFC1(R4)                                             \
+       EXTDFC1(R5)
+
+.code
+       prolog
+
+       /* simple sequence for easier disassembly reading and encoding check */
+       movi %r0 w7f
+       extr_c %r1 %r0
+       beqi xc %r1 c7f
+       calli @abort
+xc:
+       movi %r0 w7f
+       extr_uc %r1 %r0
+       beqi xuc %r1 uc7f
+       calli @abort
+xuc:
+       movi %r0 w7f
+       extr_s %r1 %r0
+       beqi xs %r1 s7f
+       calli @abort
+xs:
+       movi %r0 w7f
+       extr_us %r1 %r0
+       beqi xus %r1 us7f
+       calli @abort
+xus:
+#if __WORDSIZE == 64
+       movi %r0 w7f
+       extr_i %r1 %r0
+       beqi xi %r1 i7f
+       calli @abort
+xi:
+       movi %r0 w7f
+       extr_ui %r1 %r0
+       beqi xui %r1 ui7f
+       calli @abort
+xui:
+#endif
+       movi %r0 -2
+       extr_f %f0 %r0
+       beqi_f xif %f0 -2
+       calli @abort
+xif:
+       movi %r0 32
+       extr_d %f0 %r0
+       beqi_d xid %f0 32
+       calli @abort
+xid:
+       movi_f %f0 -128
+       truncr_f %r0 %f0
+       beqi xfi %r0 -128
+       calli @abort
+xfi:
+       movi_d %f0 -128
+       truncr_d %r0 %f0
+       beqi xdi %r0 -128
+       calli @abort
+xdi:
+#if __WORDSIZE == 64
+       movi_f %f0 -128
+       truncr_f_i %r0 %f0
+       andi %r0 %r0 0xffffffff
+       beqi yfi %r0 $(-128 & 0xffffffff)
+       calli @abort
+yfi:
+       movi_d %f0 -128
+       truncr_d_i %r0 %f0
+       andi %r0 %r0 0xffffffff
+       beqi ydi %r0 $(-128 & 0xffffffff)
+       calli @abort
+ydi:
+#endif
+       movi_f %f0 0.5
+       extr_f_d %f1 %f0
+       beqi_d xfd %f1 0.5
+       calli @abort
+xfd:
+       movi_d %f0 0.5
+       extr_d_f %f1 %f0
+       beqi_f xdf %f1 0.5
+       calli @abort
+xdf:
+
+       EXTII(v0, v1, v2, r0, r1, r2)
+       EXIF(v0, v1, v2, r0, r1, r2, f0, f1, f2, f3, f4, f5)
+       EXID(v0, v1, v2, r0, r1, r2, f0, f1, f2, f3, f4, f5)
+       EXFI(v0, v1, v2, r0, r1, r2, f0, f1, f2, f3, f4, f5)
+#if __WORDSIZE == 64
+       LEXFI(v0, v1, v2, r0, r1, r2, f0, f1, f2, f3, f4, f5)
+       LEXDI(v0, v1, v2, r0, r1, r2, f0, f1, f2, f3, f4, f5)
+#endif
+       EXDI(v0, v1, v2, r0, r1, r2, f0, f1, f2, f3, f4, f5)
+       EXTFD(f0, f1, f2, f3, f4, f5)
+       EXTDF(f0, f1, f2, f3, f4, f5)
+
+       // just to know did not abort
+       prepare
+               pushargi ok
+               ellipsis
+       finishi @printf
+
+       ret
+       epilog
diff --git a/deps/lightning/check/divi.ok b/deps/lightning/check/divi.ok
new file mode 100644 (file)
index 0000000..c23cbd2
--- /dev/null
@@ -0,0 +1,6 @@
+40/8 = 5 (expected 5)
+64/8 = 8 (expected 8)
+80/8 = 10 (expected 10)
+98304/32768 = 3 (expected 3)
+65536/32768 = 2 (expected 2)
+163840/32768 = 5 (expected 5)
diff --git a/deps/lightning/check/divi.tst b/deps/lightning/check/divi.tst
new file mode 100644 (file)
index 0000000..9c03ed8
--- /dev/null
@@ -0,0 +1,85 @@
+.data  128
+small_ops:
+.i     40 64 80
+large_ops:
+.i     98304 65536 163840
+fmt:
+.c     "%i/%i = %i (expected %i)\n"
+x:
+.c     "%d\n"
+.code
+       jmpi main
+
+#define generate_divider(operand)      \
+       name divider_##operand          \
+divider_##operand:                     \
+       prolog                          \
+       arg $i                          \
+       getarg %r1 $i                   \
+       divi %r2 %r1 operand            \
+       retr %r2                        \
+       epilog
+generate_divider(8)
+generate_divider(32768)
+
+#define generate_test_divider(divisor) \
+       name test_divider_##divisor     \
+test_divider_##divisor:                        \
+       prolog                          \
+       allocai 4 $loc                  \
+       arg $p                          \
+       arg $c                          \
+       getarg %v0 $p                   \
+       getarg %v1 $c                   \
+       muli %v1 %v1 4                  \
+       addr %v1 %v0 %v1                \
+loop_##divisor:                                \
+       bger done_##divisor %v0 %v1     \
+       ldr_i %v2 %v0                   \
+       prepare                         \
+               pushargr %v2            \
+       finishi divider_##divisor       \
+       retval %v2                      \
+       ldr_i %r2 %v0                   \
+       divi %r0 %r2 divisor            \
+       /* save div result */           \
+       stxi_i $loc %fp %r0             \
+       prepare                         \
+               pushargi fmt            \
+               ellipsis                \
+               pushargr %r2            \
+               pushargi divisor        \
+               pushargr %v2            \
+               pushargr %r0            \
+       finishi @printf                 \
+       addi %v0 %v0 4                  \
+       /* reload div result */         \
+       ldxi_i %r0 %fp $loc             \
+       beqr loop_##divisor %r0 %v2     \
+       /* return if failed */          \
+       reti 1                          \
+done_##divisor:                                \
+       reti 0                          \
+       epilog
+generate_test_divider(8)
+generate_test_divider(32768)
+
+       name main
+main:
+       prolog
+       prepare
+               pushargi small_ops
+               pushargi 3
+       finishi test_divider_8
+       retval %r0
+       bnei fail %r0 0 
+       prepare
+               pushargi large_ops
+               pushargi 3
+       finishi test_divider_32768
+       retval %r0
+       bnei fail %r0 0 
+       reti 0
+fail:
+       reti 1
+       epilog
diff --git a/deps/lightning/check/fib.ok b/deps/lightning/check/fib.ok
new file mode 100644 (file)
index 0000000..7e13ef0
--- /dev/null
@@ -0,0 +1 @@
+nfibs(32) = 2178309
diff --git a/deps/lightning/check/fib.tst b/deps/lightning/check/fib.tst
new file mode 100644 (file)
index 0000000..0835323
--- /dev/null
@@ -0,0 +1,62 @@
+.data  32
+format:
+.c     "nfibs(%d) = %d\n"
+
+.code
+       jmpi main
+
+       name nfibs
+nfibs:
+       prolog
+       arg $in
+       getarg %r0 $in          // R0 = n
+       beqi ref %r0 0
+       movr %r1 %r0
+       movi %r0 1
+       blei_u ref %r1 2
+       subi %r2 %r1 2
+       movr %r1 %r0
+loop:
+       subi %r2 %r2 1          // decr. counter
+       movr %v0 %r0            // V0 = R0
+       addr %r0 %r0 %r1        // R0 = R0 + R1
+       movr %r1 %v0            // R1 = V0
+       bnei loop %r2 0         // if (R2) goto loop
+ref:
+       retr %r0                // RET = R0
+       epilog
+
+       name main
+main:
+       prolog
+       arg $argc
+       arg $argv
+
+       getarg_i %r0 $argc
+       blei default %r0 1
+       getarg %r0 $argv
+       addi %r0 %r0 $(__WORDSIZE >> 3)
+       ldr %r0 %r0
+       prepare
+               pushargr %r0
+       finishi @atoi
+       retval %r0
+       jmpi call
+
+default:
+       movi %r0 32
+
+call:
+       movr %v0 %r0
+       prepare
+               pushargr %r0
+       finishi nfibs
+       retval %r0
+       prepare
+               pushargi format
+               ellipsis
+               pushargr %v0
+               pushargr %r0
+       finishi @printf
+       ret
+       epilog
diff --git a/deps/lightning/check/float.ok b/deps/lightning/check/float.ok
new file mode 100644 (file)
index 0000000..9766475
--- /dev/null
@@ -0,0 +1 @@
+ok
diff --git a/deps/lightning/check/float.tst b/deps/lightning/check/float.tst
new file mode 100644 (file)
index 0000000..ff5606a
--- /dev/null
@@ -0,0 +1,367 @@
+
+.data  4
+ok:
+.c     "ok"
+
+.      $($NaN  =                0.0 / 0.0)
+.      $($pInf =                1.0 / 0.0)
+.      $($nInf =               -1.0 / 0.0)
+#if __WORDSIZE == 32
+#  define x7f                  0x7fffffff
+#  define x80                  0x80000000
+#else
+#  define x7f                  0x7fffffffffffffff
+#  define x80                  0x8000000000000000
+#endif
+
+#if __mips__ || __sparc__ || __hppa__ || __riscv
+#  define wnan                 x7f
+#elif __arm__ || __aarch64__ || __alpha__
+#  define wnan                 0
+#else
+#  define wnan                 x80
+#endif
+#if __mips__ || __arm__ || __ppc__ || __sparc__ || __hppa__ || __aarch64__ || __s390__ || __riscv
+#  define wpinf                        x7f
+#elif __alpha__
+/* (at least) bug compatible with gcc 4.2.3 -ieee */
+#  define wpinf                        0
+#else
+#  define wpinf                        x80
+#endif
+#if __alpha__
+/* (at least) bug compatible with gcc 4.2.3 -ieee */
+#  define wninf                        0
+#else
+#  define wninf                        x80
+#endif
+
+/* ensure result is correct and 0 or 1 in the result register */
+#define xtcmp(l, t, op, r0, f0, f1, li, ri)            \
+       movi##t %f0 li                                  \
+       movi##t %f1 ri                                  \
+       op##r##t %r0 %f0 %f1                            \
+       bnei T##op##r##t##r0##f0##f1##l %r0 0           \
+       calli @abort                                    \
+T##op##r##t##r0##f0##f1##l:                            \
+       movi##t %f0 li                                  \
+       movi##t %f1 ri                                  \
+       b##op##r##t bT##op##r##t##r0##f0##f1##l %f0 %f1 \
+       calli @abort                                    \
+bT##op##r##t##r0##f0##f1##l:                           \
+       movi##t %f1 li                                  \
+       op##i##t %r0 %f0 ri                             \
+       bnei T##op##i##t##r0##f0##f1##l %r0 0           \
+       calli @abort                                    \
+T##op##i##t##r0##f0##f1##l:                            \
+       movi##t %f1 li                                  \
+       b##op##i##t bT##op##i##t##r0##f0##f1##l %f0 ri  \
+       calli @abort                                    \
+bT##op##i##t##r0##f0##f1##l:                           \
+       movi##t %f0 li                                  \
+       movi##t %f1 ri                                  \
+       op##r##t %r0 %f0 %f1                            \
+       beqi F##op##r##t##r0##f0##f1##l %r0 1           \
+       calli @abort                                    \
+F##op##r##t##r0##f0##f1##l:                            \
+       movi##t %f1 li                                  \
+       op##i##t %r0 %f0 ri                             \
+       beqi F##op##i##t##r0##f0##f1##l %r0 1           \
+       calli @abort                                    \
+F##op##i##t##r0##f0##f1##l:
+#define tcmp1(l, t, op, r0, li, ri)                    \
+       xtcmp(l, t, op, r0, f0, f1, li, ri)             \
+       xtcmp(l, t, op, r0, f1, f2, li, ri)             \
+       xtcmp(l, t, op, r0, f2, f3, li, ri)             \
+       xtcmp(l, t, op, r0, f3, f4, li, ri)             \
+       xtcmp(l, t, op, r0, f4, f5, li, ri)
+#define tcmp0(l, t, op, li, ri)                                \
+       tcmp1(l, t, op, v0, li, ri)                     \
+       tcmp1(l, t, op, v1, li, ri)                     \
+       tcmp1(l, t, op, v2, li, ri)                     \
+       tcmp1(l, t, op, r0, li, ri)                     \
+       tcmp1(l, t, op, r1, li, ri)                     \
+       tcmp1(l, t, op, r2, li, ri)
+#if __ia64__
+#  define tcmp(l, op, li, ri)                          \
+        xtcmp(l, _f, op, r0, f0, f1, li, ri)           \
+        xtcmp(l, _d, op, r0, f0, f1, li, ri)
+#else
+#  define tcmp(l, op, li, ri)                          \
+        tcmp0(l, _f, op, li, ri)                       \
+        tcmp0(l, _d, op, li, ri)
+#endif
+
+#define xfcmp(l, t, op, r0, f0, f1, li, ri)            \
+       movi##t %f0 li                                  \
+       movi##t %f1 ri                                  \
+       op##r##t %r0 %f0 %f1                            \
+       beqi T##op##r##t##r0##f0##f1##l %r0 0           \
+       calli @abort                                    \
+T##op##r##t##r0##f0##f1##l:                            \
+       movi##t %f1 li                                  \
+       op##i##t %r0 %f0 ri                             \
+       beqi T##op##i##t##r0##f0##f1##l %r0 0           \
+       calli @abort                                    \
+T##op##i##t##r0##f0##f1##l:                            \
+       movi##t %f0 li                                  \
+       movi##t %f1 ri                                  \
+       op##r##t %r0 %f0 %f1                            \
+       bnei F##op##r##t##r0##f0##f1##l %r0 1           \
+       calli @abort                                    \
+F##op##r##t##r0##f0##f1##l:                            \
+       movi##t %f1 li                                  \
+       op##i##t %r0 %f0 ri                             \
+       bnei F##op##i##t##r0##f0##f1##l %r0 1           \
+       calli @abort                                    \
+F##op##i##t##r0##f0##f1##l:
+#define fcmp1(l, t, op, r0, li, ri)                    \
+       xfcmp(l, t, op, r0, f0, f1, li, ri)             \
+       xfcmp(l, t, op, r0, f1, f2, li, ri)             \
+       xfcmp(l, t, op, r0, f2, f3, li, ri)             \
+       xfcmp(l, t, op, r0, f3, f4, li, ri)             \
+       xfcmp(l, t, op, r0, f4, f5, li, ri)
+#define fcmp0(l, t, op, li, ri)                                \
+       fcmp1(l, t, op, v0, li, ri)                     \
+       fcmp1(l, t, op, v1, li, ri)                     \
+       fcmp1(l, t, op, v2, li, ri)                     \
+       fcmp1(l, t, op, r0, li, ri)                     \
+       fcmp1(l, t, op, r1, li, ri)                     \
+       fcmp1(l, t, op, r2, li, ri)
+#if __ia64__
+#  define fcmp(l, op, li, ri)                          \
+       xfcmp(l, _f, op, r0, f0, f1, li, ri)            \
+       xfcmp(l, _d, op, r0, f0, f1, li, ri)
+#else
+#  define fcmp(l, op, li, ri)                          \
+        fcmp0(l, _f, op, li, ri)                       \
+        fcmp0(l, _d, op, li, ri)
+#endif
+
+#define xf2w(l, f, r0, f0, iv, fv)                     \
+       movi##f %f0 fv                                  \
+       truncr##f %r0 %f0                               \
+       beqi W##f##r0##f0##l %r0 iv                     \
+       calli @abort                                    \
+W##f##r0##f0##l:
+#define f2w1(l, t, r0, iv, fv)                         \
+       xf2w(l, t, r0, f0, iv, fv)                      \
+       xf2w(l, t, r0, f1, iv, fv)                      \
+       xf2w(l, t, r0, f2, iv, fv)                      \
+       xf2w(l, t, r0, f3, iv, fv)                      \
+       xf2w(l, t, r0, f4, iv, fv)                      \
+       xf2w(l, t, r0, f5, iv, fv)
+#define f2w0(l, t, iv, fv)                             \
+       f2w1(l, t, v0, iv, fv)                          \
+       f2w1(l, t, v1, iv, fv)                          \
+       f2w1(l, t, v2, iv, fv)                          \
+       f2w1(l, t, r0, iv, fv)                          \
+       f2w1(l, t, r1, iv, fv)                          \
+       f2w1(l, t, r2, iv, fv)
+#if __ia64__
+#  define f2w(l, iv, fv)                               \
+       xf2w(l, _f, r0, f0, iv, fv)                     \
+       xf2w(l, _d, r0, f0, iv, fv)
+#else
+#  define f2w(l, iv, fv)                               \
+       f2w0(l, _f, iv, fv)                             \
+       f2w0(l, _d, iv, fv)
+#endif
+
+.code
+       prolog
+
+       tcmp(__LINE__, lt, 0, 1)
+       tcmp(__LINE__, lt, $nInf, $pInf)
+       tcmp(__LINE__, lt, $nInf, 0)
+       tcmp(__LINE__, lt, 0, $pInf)
+       fcmp(__LINE__, lt, $NaN, 0)
+       fcmp(__LINE__, lt, $NaN, $NaN)
+       fcmp(__LINE__, lt, $nInf, $NaN)
+       fcmp(__LINE__, lt, 1, 0)
+       fcmp(__LINE__, lt, 0, 0)
+       fcmp(__LINE__, lt, $pInf, $nInf)
+       fcmp(__LINE__, lt, 0, $nInf)
+       fcmp(__LINE__, lt, 0, $NaN)
+
+       tcmp(__LINE__, le, 0, 1)
+       tcmp(__LINE__, le, 0, 0)
+       tcmp(__LINE__, le, 1, 1)
+       tcmp(__LINE__, le, $nInf, $pInf)
+       tcmp(__LINE__, le, $nInf, 0)
+       tcmp(__LINE__, le, 0, $pInf)
+       fcmp(__LINE__, le, $NaN, 0)
+       fcmp(__LINE__, le, $NaN, $NaN)
+       fcmp(__LINE__, le, $nInf, $NaN)
+       fcmp(__LINE__, le, 1, 0)
+       fcmp(__LINE__, le, $pInf, $nInf)
+       fcmp(__LINE__, le, 0, $nInf)
+       fcmp(__LINE__, le, 0, $NaN)
+
+       tcmp(__LINE__, eq, 0, 0)
+       tcmp(__LINE__, eq, 1, 1)
+       fcmp(__LINE__, eq, $NaN, 0)
+       fcmp(__LINE__, eq, $NaN, $NaN)
+       fcmp(__LINE__, eq, $nInf, $NaN)
+       fcmp(__LINE__, eq, 0, 1)
+       fcmp(__LINE__, eq, 1, 0)
+       fcmp(__LINE__, eq, $pInf, $nInf)
+       fcmp(__LINE__, eq, 0, $nInf)
+       fcmp(__LINE__, eq, 0, $NaN)
+
+       tcmp(__LINE__, ge, 1, 0)
+       tcmp(__LINE__, ge, 0, 0)
+       tcmp(__LINE__, ge, 1, 1)
+       tcmp(__LINE__, ge, $pInf, $nInf)
+       tcmp(__LINE__, ge, 0, $nInf)
+       tcmp(__LINE__, ge, $pInf, 0)
+       fcmp(__LINE__, ge, $NaN, 0)
+       fcmp(__LINE__, ge, $NaN, $NaN)
+       fcmp(__LINE__, ge, $nInf, $NaN)
+       fcmp(__LINE__, ge, 0, 1)
+       fcmp(__LINE__, ge, $nInf, $pInf)
+       fcmp(__LINE__, ge, $nInf, 0)
+       fcmp(__LINE__, ge, 0, $NaN)
+
+       tcmp(__LINE__, gt, 1, 0)
+       tcmp(__LINE__, gt, $pInf, $nInf)
+       tcmp(__LINE__, gt, 0, $nInf)
+       tcmp(__LINE__, gt, $pInf, 0)
+       fcmp(__LINE__, gt, $NaN, 0)
+       fcmp(__LINE__, gt, $NaN, $NaN)
+       fcmp(__LINE__, gt, $nInf, $NaN)
+       fcmp(__LINE__, gt, 0, 1)
+       fcmp(__LINE__, gt, 0, 0)
+       fcmp(__LINE__, gt, $nInf, $pInf)
+       fcmp(__LINE__, gt, $nInf, 0)
+       fcmp(__LINE__, gt, 0, $NaN)
+
+       tcmp(__LINE__, ne, 0, 1)
+       tcmp(__LINE__, ne, 1, 0)
+       tcmp(__LINE__, ne, $NaN, $NaN)
+       tcmp(__LINE__, ne, $nInf, $pInf)
+       tcmp(__LINE__, ne, $NaN, 0)
+       tcmp(__LINE__, ne, $nInf, $NaN)
+       tcmp(__LINE__, ne, $pInf, $nInf)
+       tcmp(__LINE__, ne, 0, $nInf)
+       tcmp(__LINE__, ne, 0, $NaN)
+       fcmp(__LINE__, ne, 0, 0)
+       fcmp(__LINE__, ne, 1, 1)
+
+       tcmp(__LINE__, unlt, 0, 1)
+       tcmp(__LINE__, unlt, $nInf, $pInf)
+       tcmp(__LINE__, unlt, $nInf, 0)
+       tcmp(__LINE__, unlt, 0, $pInf)
+       tcmp(__LINE__, unlt, $NaN, 0)
+       tcmp(__LINE__, unlt, $NaN, $NaN)
+       tcmp(__LINE__, unlt, $nInf, $NaN)
+       tcmp(__LINE__, unlt, 0, $NaN)
+       fcmp(__LINE__, unlt, 1, 0)
+       fcmp(__LINE__, unlt, 0, 0)
+       fcmp(__LINE__, unlt, $pInf, $nInf)
+       fcmp(__LINE__, unlt, 0, $nInf)
+
+       tcmp(__LINE__, unle, 0, 1)
+       tcmp(__LINE__, unle, 0, 0)
+       tcmp(__LINE__, unle, 1, 1)
+       tcmp(__LINE__, unle, $nInf, $pInf)
+       tcmp(__LINE__, unle, $nInf, 0)
+       tcmp(__LINE__, unle, 0, $pInf)
+       tcmp(__LINE__, unle, $NaN, 0)
+       tcmp(__LINE__, unle, $NaN, $NaN)
+       tcmp(__LINE__, unle, $nInf, $NaN)
+       tcmp(__LINE__, unle, 0, $NaN)
+       fcmp(__LINE__, unle, 1, 0)
+       fcmp(__LINE__, unle, $pInf, $nInf)
+       fcmp(__LINE__, unle, 0, $nInf)
+
+       tcmp(__LINE__, uneq, 0, 0)
+       tcmp(__LINE__, uneq, 1, 1)
+       tcmp(__LINE__, uneq, $NaN, 0)
+       tcmp(__LINE__, uneq, $NaN, $NaN)
+       tcmp(__LINE__, uneq, $nInf, $NaN)
+       tcmp(__LINE__, uneq, 0, $NaN)
+       fcmp(__LINE__, uneq, 0, 1)
+       fcmp(__LINE__, uneq, 1, 0)
+       fcmp(__LINE__, uneq, $pInf, $nInf)
+       fcmp(__LINE__, uneq, 0, $nInf)
+
+       tcmp(__LINE__, unge, 1, 0)
+       tcmp(__LINE__, unge, 0, 0)
+       tcmp(__LINE__, unge, 1, 1)
+       tcmp(__LINE__, unge, $pInf, $nInf)
+       tcmp(__LINE__, unge, 0, $nInf)
+       tcmp(__LINE__, unge, $pInf, 0)
+       tcmp(__LINE__, unge, $NaN, 0)
+       tcmp(__LINE__, unge, $NaN, $NaN)
+       tcmp(__LINE__, unge, $nInf, $NaN)
+       tcmp(__LINE__, unge, 0, $NaN)
+       fcmp(__LINE__, unge, 0, 1)
+       fcmp(__LINE__, unge, $nInf, $pInf)
+       fcmp(__LINE__, unge, $nInf, 0)
+
+       tcmp(__LINE__, ungt, 1, 0)
+       tcmp(__LINE__, ungt, $pInf, $nInf)
+       tcmp(__LINE__, ungt, 0, $nInf)
+       tcmp(__LINE__, ungt, $pInf, 0)
+       tcmp(__LINE__, ungt, $NaN, 0)
+       tcmp(__LINE__, ungt, $NaN, $NaN)
+       tcmp(__LINE__, ungt, $nInf, $NaN)
+       tcmp(__LINE__, ungt, 0, $NaN)
+       fcmp(__LINE__, ungt, 0, 1)
+       fcmp(__LINE__, ungt, 0, 0)
+       fcmp(__LINE__, ungt, $nInf, $pInf)
+       fcmp(__LINE__, ungt, $nInf, 0)
+
+       tcmp(__LINE__, ltgt, 0, 1)
+       tcmp(__LINE__, ltgt, 1, 0)
+       tcmp(__LINE__, ltgt, $nInf, $pInf)
+       tcmp(__LINE__, ltgt, $pInf, $nInf)
+       tcmp(__LINE__, ltgt, 0, $nInf)
+       fcmp(__LINE__, ltgt, $NaN, $NaN)
+       fcmp(__LINE__, ltgt, $NaN, 0)
+       fcmp(__LINE__, ltgt, $nInf, $NaN)
+       fcmp(__LINE__, ltgt, 0, $NaN)
+       fcmp(__LINE__, ltgt, 0, 0)
+       fcmp(__LINE__, ltgt, 1, 1)
+
+       tcmp(__LINE__, ord, 0, 1)
+       tcmp(__LINE__, ord, 1, 0)
+       tcmp(__LINE__, ord, $nInf, $pInf)
+       tcmp(__LINE__, ord, $pInf, $nInf)
+       tcmp(__LINE__, ord, 0, $nInf)
+       tcmp(__LINE__, ord, 0, 0)
+       tcmp(__LINE__, ord, 1, 1)
+       fcmp(__LINE__, ord, $NaN, $NaN)
+       fcmp(__LINE__, ord, $NaN, 0)
+       fcmp(__LINE__, ord, $nInf, $NaN)
+       fcmp(__LINE__, ord, 0, $NaN)
+
+       tcmp(__LINE__, unord, $NaN, $NaN)
+       tcmp(__LINE__, unord, $NaN, 0)
+       tcmp(__LINE__, unord, $nInf, $NaN)
+       tcmp(__LINE__, unord, 0, $NaN)
+       fcmp(__LINE__, unord, 0, 1)
+       fcmp(__LINE__, unord, 1, 0)
+       fcmp(__LINE__, unord, $nInf, $pInf)
+       fcmp(__LINE__, unord, $pInf, $nInf)
+       fcmp(__LINE__, unord, 0, $nInf)
+       fcmp(__LINE__, unord, 0, 0)
+       fcmp(__LINE__, unord, 1, 1)
+
+       f2w(__LINE__, 0, 0)
+       f2w(__LINE__, 1, 1)
+        /* not all loongson agree on it */
+#if !__mips__
+       f2w(__LINE__, wninf, $nInf)
+#endif
+       f2w(__LINE__, wpinf, $pInf)
+       f2w(__LINE__, wnan, $NaN)
+
+       prepare
+               pushargi ok
+               ellipsis
+       finishi @puts
+
+       ret
+       epilog
diff --git a/deps/lightning/check/fop_abs.ok b/deps/lightning/check/fop_abs.ok
new file mode 100644 (file)
index 0000000..9766475
--- /dev/null
@@ -0,0 +1 @@
+ok
diff --git a/deps/lightning/check/fop_abs.tst b/deps/lightning/check/fop_abs.tst
new file mode 100644 (file)
index 0000000..cb7be14
--- /dev/null
@@ -0,0 +1,31 @@
+#include "alu.inc"
+
+.code
+       prolog
+#define ABS(N, T, I, V)                FUN(N, T, abs, I, V)
+#define UABS(N, T, I, V)       UFUN(N, T, abs, I, V)
+       ABS(0, _f,      -0.0,            0.0)
+       ABS(1, _f,       0.5,            0.5)
+       ABS(2, _f,      -0.5,            0.5)
+       ABS(3, _f,      $Inf,           $Inf)
+       ABS(4, _f,      $nInf,          $Inf)
+       ABS(5, _f,       1.25,          1.25)
+       ABS(6, _f,      -1.25,          1.25)
+       ABS(7, _f,      $nInf,          $Inf)
+       UABS(0, _f,     $NaN,           $NaN)
+       ABS(0, _d,      -0.0,            0.0)
+       ABS(1, _d,       0.5,            0.5)
+       ABS(2, _d,      -0.5,            0.5)
+       ABS(3, _d,      $Inf,           $Inf)
+       ABS(4, _d,      $nInf,          $Inf)
+       ABS(5, _d,       1.25,           1.25)
+       ABS(6, _d,      -1.25,           1.25)
+       ABS(7, _d,      $nInf,          $Inf)
+       UABS(0, _d,     $NaN,           $NaN)
+
+       prepare
+               pushargi ok
+               ellipsis
+       finishi @printf
+       ret
+       epilog
diff --git a/deps/lightning/check/fop_sqrt.ok b/deps/lightning/check/fop_sqrt.ok
new file mode 100644 (file)
index 0000000..9766475
--- /dev/null
@@ -0,0 +1 @@
+ok
diff --git a/deps/lightning/check/fop_sqrt.tst b/deps/lightning/check/fop_sqrt.tst
new file mode 100644 (file)
index 0000000..fa93dbc
--- /dev/null
@@ -0,0 +1,23 @@
+#include "alu.inc"
+
+.code
+       prolog
+#define SQRT(N, T, I, V)       FUN(N, T, sqrt, I, V)
+#define USQRT(N, T, I, V)      UFUN(N, T, sqrt, I, V)
+        SQRT(0, _f,    -0.0,            0.0)
+        SQRT(1, _f,     4.0,            2.0)
+        SQRT(2, _f,     2.25,           1.5)
+        SQRT(3, _f,    $Inf,           $Inf)
+       USQRT(0, _f,    $NaN,           $NaN)
+        SQRT(0, _d,    -0.0,            0.0)
+        SQRT(1, _d,     4.0,            2.0)
+        SQRT(2, _d,     2.25,           1.5)
+        SQRT(3, _d,    $Inf,           $Inf)
+       USQRT(0, _d,    $NaN,           $NaN)
+
+       prepare
+               pushargi ok
+               ellipsis
+       finishi @printf
+       ret
+       epilog
diff --git a/deps/lightning/check/hton.ok b/deps/lightning/check/hton.ok
new file mode 100644 (file)
index 0000000..9766475
--- /dev/null
@@ -0,0 +1 @@
+ok
diff --git a/deps/lightning/check/hton.tst b/deps/lightning/check/hton.tst
new file mode 100644 (file)
index 0000000..fcaf147
--- /dev/null
@@ -0,0 +1,169 @@
+.data  16
+ok:
+.c     "ok\n"
+
+#define us12_i         0x1234
+#define us7f_i         0x7ff7
+#define us80_i         0x8008
+#define usff_i         0xffff
+#define ui12_i         0x01234567
+#define ui7f_i         0x7f7ff7f7
+#define ui80_i         0x80800808
+#define uiff_i         0xffffffff
+#define ul12_i         0x0123456789abcdef
+#define ul7f_i         0x7f7f7f7ff7f7f7f7
+#define ul80_i         0x8080808008080808
+#define ulff_i         0xffffffffffffffff
+
+#if __WORDSIZE == 32
+#  define xus12_i      0xffff1234
+#  define xus7f_i      0x10107ff7
+#  define xus80_i      0x81188008
+#  define xusff_i      0xeaaeffff
+#else
+#  define xus12_i      0xffffffffffff1234
+#  define xus7f_i      0x1010100101017ff7
+#  define xus80_i      0x8181811818818008
+#  define xusff_i      0xeaeaeaaeaeaeffff
+#  define xui12_i      0xffffffff01234567
+#  define xui7f_i      0x101001017f7ff7f7
+#  define xui80_i      0x8181181880800808
+#  define xuiff_i      0xeaeaaeaeffffffff
+#endif
+
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+#  define us12_o       0x3412
+#  define us7f_o       0xf77f
+#  define us80_o       0x0880
+#  define usff_o       0xffff
+#  define ui12_o       0x67452301
+#  define ui7f_o       0xf7f77f7f
+#  define ui80_o       0x08088080
+#  define uiff_o       0xffffffff
+#  define ul12_o       0xefcdab8967452301
+#  define ul7f_o       0xf7f7f7f77f7f7f7f
+#  define ul80_o       0x0808080880808080
+#  define ulff_o       0xffffffffffffffff
+#else
+#  define us12_o       us12_i
+#  define us7f_o       us7f_i
+#  define us80_o       us80_i
+#  define usff_o       usff_i
+#  define ui12_o       ui12_i
+#  define ui7f_o       ui7f_i
+#  define ui80_o       ui80_i
+#  define uiff_o       uiff_i
+#  define ul12_o       ul12_i
+#  define ul7f_o       ul7f_i
+#  define ul80_o       ul80_i
+#  define ulff_o       ulff_i
+#endif
+
+#define HTON4(I, O, T, R0, R1)                         \
+       movi %R0 I                                      \
+       htonr_##T %R1 %R0                               \
+       beqi T##R0##R1##I %R1 O                         \
+       calli @abort                                    \
+T##R0##R1##I:
+
+#define HTON3(T, R0, R1)                               \
+       HTON4(T##12_i, T##12_o, T, R0, R1)              \
+       HTON4(x##T##12_i, T##12_o, T, R0, R1)           \
+       HTON4(T##7f_i, T##7f_o, T, R0, R1)              \
+       HTON4(x##T##7f_i, T##7f_o, T, R0, R1)           \
+       HTON4(T##80_i, T##80_o, T, R0, R1)              \
+       HTON4(x##T##80_i, T##80_o, T, R0, R1)           \
+       HTON4(T##ff_i, T##ff_o, T, R0, R1)              \
+       HTON4(x##T##ff_i, T##ff_o, T, R0, R1)
+
+#define HTON3x(T, R0, R1)                              \
+       HTON4(T##12_i, T##12_o, T, R0, R1)              \
+       HTON4(T##7f_i, T##7f_o, T, R0, R1)              \
+       HTON4(T##80_i, T##80_o, T, R0, R1)              \
+       HTON4(T##ff_i, T##ff_o, T, R0, R1)
+
+#define HTON2(T, V0, V1, V2, R0, R1, R2)               \
+       HTON3(T, V0, V0)                                \
+       HTON3(T, V0, V1)                                \
+       HTON3(T, V0, V2)                                \
+       HTON3(T, V0, R0)                                \
+       HTON3(T, V0, R1)                                \
+       HTON3(T, V0, R2)                                \
+
+#define HTON2x(T, V0, V1, V2, R0, R1, R2)              \
+       HTON3x(T, V0, V0)                               \
+       HTON3x(T, V0, V1)                               \
+       HTON3x(T, V0, V2)                               \
+       HTON3x(T, V0, R0)                               \
+       HTON3x(T, V0, R1)                               \
+       HTON3x(T, V0, R2)                               \
+
+#define HTON1(T, V0, V1, V2, R0, R1, R2)               \
+       HTON2(T, V0, V1, V2, R0, R1, R2)                \
+       HTON2(T, V1, V2, R0, R1, R2, V0)                \
+       HTON2(T, V2, R0, R1, R2, V0, V1)                \
+       HTON2(T, R0, R1, R2, V0, V1, V2)                \
+       HTON2(T, R1, R2, V0, V1, V2, R0)                \
+       HTON2(T, R2, V0, V1, V2, R0, R1)
+
+#define HTON1x(T, V0, V1, V2, R0, R1, R2)              \
+       HTON2x(T, V0, V1, V2, R0, R1, R2)               \
+       HTON2x(T, V1, V2, R0, R1, R2, V0)               \
+       HTON2x(T, V2, R0, R1, R2, V0, V1)               \
+       HTON2x(T, R0, R1, R2, V0, V1, V2)               \
+       HTON2x(T, R1, R2, V0, V1, V2, R0)               \
+       HTON2x(T, R2, V0, V1, V2, R0, R1)
+
+#if __WORDSIZE == 32
+#  define HTON(V0, V1, V2, R0, R1, R2)                 \
+       HTON1(us, V0, V1, V2, R0, R1, R2)               \
+       HTON1x(ui, V0, V1, V2, R0, R1, R2)
+#else
+#  define HTON(V0, V1, V2, R0, R1, R2)                 \
+       HTON1(us, V0, V1, V2, R0, R1, R2)               \
+       HTON1(ui, V0, V1, V2, R0, R1, R2)               \
+       HTON1x(ul, V0, V1, V2, R0, R1, R2)
+#endif
+
+.code
+       prolog
+       /* simple sequence for easier disassembly reading and encoding check */
+       movi %r0 us12_i
+       htonr_us %r1 %r0
+       beqi us %r1 us12_o
+       calli @abort
+us:
+
+       movi %r0 xus12_i
+       htonr_us %r1 %r0
+       beqi xus %r1 us12_o
+       calli @abort
+xus:
+       movi %r0 ui12_i
+       htonr_ui %r1 %r0
+       beqi ui %r1 ui12_o
+       calli @abort
+ui:
+#if __WORDSIZE == 64
+       movi %r0 xui12_i
+       htonr_ui %r1 %r0
+       beqi xui %r1 ui12_o
+       calli @abort
+xui:
+       movi %r0 ul12_i
+       htonr_ul %r1 %r0
+       beqi ul %r1 ul12_o
+       calli @abort
+ul:
+#endif
+
+       HTON(v0, v1, v2, r0, r1, r2)
+
+       // just to know did not abort
+       prepare
+               pushargi ok
+               ellipsis
+       finishi @printf
+
+       ret
+       epilog
diff --git a/deps/lightning/check/jmpr.ok b/deps/lightning/check/jmpr.ok
new file mode 100644 (file)
index 0000000..9766475
--- /dev/null
@@ -0,0 +1 @@
+ok
diff --git a/deps/lightning/check/jmpr.tst b/deps/lightning/check/jmpr.tst
new file mode 100644 (file)
index 0000000..669f54e
--- /dev/null
@@ -0,0 +1,66 @@
+/*
+This is a very simple check to a condition that on lightning 2.0.5
+could cause an assertion on some backends, due to correcting a problem
+with temporaries that could not be saved/reloaded due to being used only
+in the hardware instruction, or being considered live for too long on the
+lightning instruction, and that could not be reloaded after the jump target
+(or after false/true target on conditional branches).
+
+If this code in lib/lightning.c:_jit_update():
+
+                   for (regno = 0; regno < _jitc->reglen; regno++) {
+                       spec = jit_class(_rvs[regno].spec);
+                       if (jit_regset_tstbit(mask, regno) &&
+                           (spec & (jit_class_gpr|jit_class_fpr)) &&
+                           !(spec & jit_class_sav))
+                           jit_regset_clrbit(mask, regno);
+                   }
+
+were removed, this test case, on x86_64 would fail like this:
+
+lt-lightning: lightning.c:305: _jit_get_reg: Assertion `regspec & 0x02000000' failed.
+Aborted (core dumped)
+ */
+
+.data  32
+ret:
+#if __WORDSIZE == 32
+.i     0
+#else
+.l     0
+#endif
+ok:
+.c     "ok"
+
+.code
+       prolog
+       jmpi start
+
+add_v1_v2:
+       addr %v1 %v1 %v2
+       ldi %r0 ret
+       jmpr %r0
+
+start:
+       movi %v1 1
+       movi %v2 2
+       movi %r0 ret_add_v1_v2
+       sti ret %r0
+       movi %v0 add_v1_v2
+       jmpr %v0
+       movi_d %f0 3
+       beqi_d pass_movi_f0 %f0 3
+       calli @abort
+pass_movi_f0:
+       beqi pass_check_v2 %v2 2
+       calli @abort
+pass_check_v2:
+ret_add_v1_v2:
+       beqi pass_add_v1_v2 %v1 3
+       calli @abort
+pass_add_v1_v2:
+       prepare
+               pushargi ok
+       finishi @puts
+       ret
+       epilog
diff --git a/deps/lightning/check/ldst.inc b/deps/lightning/check/ldst.inc
new file mode 100644 (file)
index 0000000..c2bc59a
--- /dev/null
@@ -0,0 +1,102 @@
+#if __WORDSIZE == 64
+#  define L0           0x8000000000000001
+#  define LL0          L0
+#  define LC0          0xffffffffffffff81
+#  define LS0          0xffffffffffff8001
+#  define LI0          0xffffffff80000001
+#  define L1           0x8000000000000000
+#  define LL1          L1
+#  define LC1          0xffffffffffffff80
+#  define LS1          0xffffffffffff8000
+#  define LI1          0xffffffff80000000
+#  define L2           0x7fffffffffffffff
+#  define LL2          L2
+#  define LC2          0x000000000000007f
+#  define LS2          0x0000000000007fff
+#  define LI2          0x000000007fffffff
+#  define L3           0xffffffffffffffff
+#  define LL3          L3
+#  define LC3          0xffffffffffffffff
+#  define LS3          0xffffffffffffffff
+#  define LI3          0xffffffffffffffff
+#  define XC           LC0
+#  define XS           LS0
+#  define XI           LI0
+#else
+#  define XC           IC0
+#  define XS           IS0
+#  define XI           II0
+#endif
+#define I0             0x80000001
+#define II0            I0
+#define IC0            0xffffff81
+#define IS0            0xffff8001
+#define I1             0x80000000
+#define II1            I1
+#define IC1            0xffffff80
+#define IS1            0xffff8000
+#define I2             0x7fffffff
+#define II2            I2
+#define IC2            0x0000007f
+#define IS2            0x00007fff
+#define I3             0xffffffff
+#define II3            I3
+#define IC3            0xffffffff
+#define IS3            0xffffffff
+#define S0             0x8001
+#define S1             0x8000
+#define S2             0x7fff
+#define S3             0xffff
+#define C0             0x81
+#define C1             0x80
+#define C2             0x7f
+#define C3             0xff
+#define F0              0.25
+#define F1              0.75
+#define F2             -0.25
+#define F3             -0.75
+#define D0              0.25
+#define D1              0.75
+#define D2             -0.25
+#define D3             -0.75
+
+.data  512
+ok:
+.c     "ok\n"
+.align 8
+t0:
+c0:
+.c     0
+uc0:
+.c     0
+s0:
+.s     0
+us0:
+.s     0
+.align 4
+i0:
+.i     0
+#if __WORDSIZE == 64
+ui0:
+.i     0
+.align 8
+l0:
+.l     0
+#endif
+f0:
+.f     0
+.align 8
+d0:
+.d     0
+
+.      $($offc  = c0  - t0)
+.      $($offuc = uc0 - t0)
+.      $($offs  = s0  - t0)
+.      $($offus = us0 - t0)
+.      $($offi  = i0  - t0)
+#if __WORDSIZE == 64
+.      $($offui = ui0 - t0)
+.      $($offl  = l0  - t0)
+#endif
+.      $($offf  = f0  - t0)
+.      $($offd  = d0  - t0)
diff --git a/deps/lightning/check/ldsti.ok b/deps/lightning/check/ldsti.ok
new file mode 100644 (file)
index 0000000..9766475
--- /dev/null
@@ -0,0 +1 @@
+ok
diff --git a/deps/lightning/check/ldsti.tst b/deps/lightning/check/ldsti.tst
new file mode 100644 (file)
index 0000000..362cb84
--- /dev/null
@@ -0,0 +1,146 @@
+#include "ldst.inc"
+
+#if __WORDSIZE == 64
+#  define LDSTL(N, R0)                                         \
+       sti_i ui0 %R0                                           \
+       movi %R0 L##N                                           \
+       sti_l l0 %R0
+
+#  define SI(C, N, x, X, R0)                                   \
+       ldi_##x %R0 x##0                                        \
+       beqi L##x##C %R0 L##X##N                                \
+       calli @abort                                            \
+L##x##C:
+
+#  define LDRL(C, N, R0)                                       \
+       UI(C, N, i, I, R0)                                      \
+       SI(C, N, l, L, R0)
+#else
+#  define LDSTL(C, R0)
+#  define SI(C, N, x, X, R0)                                   \
+       ldi_##x %R0 x##0                                        \
+       beqi L##x##C %R0 I##X##N                                \
+       calli @abort                                            \
+L##x##C:
+
+#  define LDRL(C, N, R0)
+
+#endif
+
+#define UI(C, N, x, X, R0)                                     \
+       ldi_u##x %R0 u##x##0                                    \
+       beqi Lu##x##C %R0 X##N                                  \
+       calli @abort                                            \
+Lu##x##C:
+
+#define FF(C, N, x, X, F0)                                     \
+       ldi_##x %F0 x##0                                        \
+       beqi_##x L##x##C %F0 X##N                               \
+L##x##C:
+
+#define LDST1(X, N, R0, F0)                                    \
+       movi %R0 C##N                                           \
+       sti_c c0 %R0                                            \
+       sti_c uc0 %R0                                           \
+       movi %R0 S##N                                           \
+       sti_s s0 %R0                                            \
+       sti_s us0 %R0                                           \
+       movi %R0 I##N                                           \
+       sti_i i0 %R0                                            \
+       LDSTL(N, R0)                                            \
+       movi_f %F0 F##N                                         \
+       sti_f f0 %F0                                            \
+       movi_d %F0 D##N                                         \
+       sti_d d0 %F0                                            \
+       SI(X, N, c, C, R0)                                      \
+       UI(X, N, c, C, R0)                                      \
+       SI(X, N, s, S, R0)                                      \
+       UI(X, N, s, S, R0)                                      \
+       SI(X, N, i, I, R0)                                      \
+       LDRL(X, N, R0)                                          \
+       FF(X, N, f, F, F0)                                      \
+       FF(X, N, d, D, F0)
+
+#define LDST0(R0, F0)                                          \
+       LDST1(0_##R0##_##F0, 0, R0, F0)                         \
+       LDST1(1_##R0##_##F0, 1, R0, F0)                         \
+       LDST1(2_##R0##_##F0, 2, R0, F0)                         \
+       LDST1(3_##R0##_##F0, 3, R0, F0)
+
+#define LDST(V0, V1, V2, R0, R1, R2, F0, F1, F2, F3, F4, F5)   \
+       LDST0(V0, F0)                                           \
+       LDST0(V1, F1)                                           \
+       LDST0(V2, F3)                                           \
+       LDST0(R0, F4)                                           \
+       LDST0(R1, F5)                                           \
+       LDST0(R2, F0)
+
+.code
+       prolog
+
+       /* Simple test to simplify validating encodings before
+        * brute force tests */
+       movi %r1 0x81
+       sti_c c0 %r1
+       sti_c uc0 %r1
+       movi %r1 0x8001
+       sti_s s0 %r1
+       sti_s us0 %r1
+       movi %r1 0x80000001
+       sti_i i0 %r1
+#if __WORDSIZE == 64
+       sti_i ui0 %r1
+       movi %r1 0x8000000000000001
+       sti_l l0 %r1
+#endif
+       movi_f %f0 0.5
+       sti_f f0 %f0
+       movi_d %f0 0.25
+       sti_d d0 %f0
+       ldi_c %r1 c0
+       beqi Lc %r1 XC
+       calli @abort
+Lc:
+       ldi_uc %r1 uc0
+       beqi Luc %r1 0x81
+       calli @abort
+Luc:
+       ldi_s %r1 s0
+       beqi Ls %r1 XS
+       calli @abort
+Ls:
+       ldi_us %r1 us0
+       beqi Lus %r1 0x8001
+       calli @abort
+Lus:
+       ldi_i %r1 i0
+       beqi Li %r1 XI
+       calli @abort
+Li:
+#if __WORDSIZE == 64
+       ldi_ui %r1 ui0
+       beqi Lui %r1 0x80000001
+       calli @abort
+Lui:
+       ldi_l %r1 l0
+       beqi Ll %r1 0x8000000000000001
+       calli @abort
+Ll:
+#endif
+       ldi_f %f0 f0
+       beqi_f Lf %f0 0.5
+       calli @abort
+Lf:
+       ldi_d %f0 d0
+       beqi_d Ld %f0 0.25
+       calli @abort
+Ld:
+
+       LDST(v0, v1, v2, r0, r1, r2, f0, f1, f2, f3, f4, f5)
+       // just to know did not abort
+       prepare
+               pushargi ok
+               ellipsis
+       finishi @printf
+       ret
+       epilog
diff --git a/deps/lightning/check/ldstr-c.ok b/deps/lightning/check/ldstr-c.ok
new file mode 100644 (file)
index 0000000..9766475
--- /dev/null
@@ -0,0 +1 @@
+ok
diff --git a/deps/lightning/check/ldstr-c.tst b/deps/lightning/check/ldstr-c.tst
new file mode 100644 (file)
index 0000000..6ddc86e
--- /dev/null
@@ -0,0 +1,155 @@
+#include "ldst.inc"
+
+#if __WORDSIZE == 64
+#  define LDSTL(N, R0, R1)                                     \
+       movi %R0 $(t0 + $offui)                                 \
+       str_i %R0 %R1                                           \
+       movi %R0 $(t0 + $offl)                                  \
+       movi %R1 L##N                                           \
+       str_l %R0 %R1
+
+#  define SI(C, N, x, X, R0)                                   \
+       movi %R0 $(t0 + $off##x)                                \
+       ldr_##x %R0 %R0                                         \
+       beqi L##x##C %R0 L##X##N                                \
+       calli @abort                                            \
+L##x##C:
+
+#  define LDRL(C, N, R0, R1)                                   \
+       UI(C, N, i, I, R0)                                      \
+       SI(C, N, l, L, R0)
+#else
+#  define LDSTL(C, R0, R1)
+#  define SI(C, N, x, X, R0)                                   \
+       movi %R0 $(t0 + $off##x)                                \
+       ldr_##x %R0 %R0                                         \
+       beqi L##x##C %R0 I##X##N                                \
+       calli @abort                                            \
+L##x##C:
+
+#  define LDRL(C, N, R0, R1)
+
+#endif
+
+#define UI(C, N, x, X, R0)                                     \
+       movi %R0 $(t0 + $offu##x)                               \
+       ldr_u##x %R0 %R0                                        \
+       beqi Lu##x##C %R0 X##N                                  \
+       calli @abort                                            \
+Lu##x##C:
+
+#define LDST1(X, N, R0, R1)                                    \
+       movi %R0 $(t0 + $offc)                                  \
+       movi %R1 C##N                                           \
+       str_c %R0 %R1                                           \
+       movi %R0 $(t0 + $offuc)                                 \
+       str_c %R0 %R1                                           \
+       movi %R0 $(t0 + $offs)                                  \
+       movi %R1 S##N                                           \
+       str_s %R0 %R1                                           \
+       movi %R0 $(t0 + $offus)                                 \
+       str_s %R0 %R1                                           \
+       movi %R0 $(t0 + $offi)                                  \
+       movi %R1 I##N                                           \
+       str_i %R0 %R1                                           \
+       LDSTL(N, R0, R1)                                        \
+       movi %R0 $(t0 + $offf)                                  \
+       SI(X, N, c, C, R0)                                      \
+       UI(X, N, c, C, R0)                                      \
+       SI(X, N, s, S, R0)                                      \
+       UI(X, N, s, S, R0)                                      \
+       SI(X, N, i, I, R0)                                      \
+       LDRL(X, N, R0, R1)                                      \
+
+#define LDST0(R0, R1)                                          \
+       LDST1(0_##R0##_##R1, 0, R0, R1)                         \
+       LDST1(1_##R0##_##R1, 1, R0, R1)                         \
+       LDST1(2_##R0##_##R1, 2, R0, R1)                         \
+       LDST1(3_##R0##_##R1, 3, R0, R1)
+
+#define LDST(V0, V1, V2, R0, R1, R2)                           \
+       LDST0(V0, V1)                                           \
+       LDST0(V0, V2)                                           \
+       LDST0(V0, R0)                                           \
+       LDST0(V0, R1)                                           \
+       LDST0(V0, R2)                                           \
+       LDST0(V1, V0)                                           \
+       LDST0(V1, V2)                                           \
+       LDST0(V1, R0)                                           \
+       LDST0(V1, R1)                                           \
+       LDST0(V1, R2)                                           \
+       LDST0(V2, R0)                                           \
+       LDST0(V2, R1)                                           \
+       LDST0(V2, R2)
+
+.code
+       prolog
+
+       /* Simple test to simplify validating encodings before
+        * brute force tests */
+       movi %r0 $(t0 + $offc)
+       movi %r1 0x81
+       str_c %r0 %r1
+       movi %r0 $(t0 + $offuc)
+       str_c %r0 %r1
+       movi %r0 $(t0 + $offs)
+       movi %r1 0x8001
+       str_s %r0 %r1
+       movi %r0 $(t0 + $offus)
+       str_s %r0 %r1
+       movi %r0 $(t0 + $offi)
+       movi %r1 0x80000001
+       str_i %r0 %r1
+#if __WORDSIZE == 64
+       movi %r0 $(t0 + $offui)
+       str_i %r0 %r1
+       movi %r0 $(t0 + $offl)
+       movi %r1 0x8000000000000001
+       str_l %r0 %r1
+#endif
+       movi %r0 $(t0 + $offc)
+       ldr_c %r0 %r0
+       beqi Lc %r0 XC
+       calli @abort
+Lc:
+       movi %r0 $(t0 + $offuc)
+       ldr_uc %r0 %r0
+       beqi Luc %r0 0x81
+       calli @abort
+Luc:
+       movi %r0 $(t0 + $offs)
+       ldr_s %r0 %r0
+       beqi Ls %r0 XS
+       calli @abort
+Ls:
+       movi %r0 $(t0 + $offus)
+       ldr_us %r0 %r0
+       beqi Lus %r0 0x8001
+       calli @abort
+Lus:
+       movi %r0 $(t0 + $offi)
+       ldr_i %r0 %r0
+       beqi Li %r0 XI
+       calli @abort
+Li:
+#if __WORDSIZE == 64
+       movi %r0 $(t0 + $offui)
+       ldr_ui %r0 %r0
+       beqi Lui %r0 0x80000001
+       calli @abort
+Lui:
+       movi %r0 $(t0 + $offl)
+       ldr_l %r0 %r0
+       beqi Ll %r0 0x8000000000000001
+       calli @abort
+Ll:
+#endif
+
+       LDST(v0, v1, v2, r0, r1, r2)
+       // just to know did not abort
+       prepare
+               pushargi ok
+               ellipsis
+       finishi @printf
+       ret
+       epilog
diff --git a/deps/lightning/check/ldstr.ok b/deps/lightning/check/ldstr.ok
new file mode 100644 (file)
index 0000000..9766475
--- /dev/null
@@ -0,0 +1 @@
+ok
diff --git a/deps/lightning/check/ldstr.tst b/deps/lightning/check/ldstr.tst
new file mode 100644 (file)
index 0000000..1ed26b1
--- /dev/null
@@ -0,0 +1,183 @@
+#include "ldst.inc"
+
+#if __WORDSIZE == 64
+#  define LDSTL(N, R0, R1)                                     \
+       movi %R0 $(t0 + $offui)                                 \
+       str_i %R0 %R1                                           \
+       movi %R0 $(t0 + $offl)                                  \
+       movi %R1 L##N                                           \
+       str_l %R0 %R1
+
+#  define SI(C, N, x, X, R0, R1)                               \
+       movi %R0 $(t0 + $off##x)                                \
+       ldr_##x %R1 %R0                                         \
+       beqi L##x##C %R1 L##X##N                                \
+       calli @abort                                            \
+L##x##C:
+
+#  define LDRL(C, N, R0, R1)                                   \
+       UI(C, N, i, I, R0, R1)                                  \
+       SI(C, N, l, L, R0, R1)
+#else
+#  define LDSTL(C, R0, R1)
+#  define SI(C, N, x, X, R0, R1)                               \
+       movi %R0 $(t0 + $off##x)                                \
+       ldr_##x %R1 %R0                                         \
+       beqi L##x##C %R1 I##X##N                                \
+       calli @abort                                            \
+L##x##C:
+
+#  define LDRL(C, N, R0, R1)
+
+#endif
+
+#define UI(C, N, x, X, R0, R1)                                 \
+       movi %R0 $(t0 + $offu##x)                               \
+       ldr_u##x %R1 %R0                                        \
+       beqi Lu##x##C %R1 X##N                                  \
+       calli @abort                                            \
+Lu##x##C:
+
+#define FF(C, N, x, X, R0, F0)                                 \
+       movi %R0 $(t0 + $off##x)                                \
+       ldr_##x %F0 %R0                                         \
+       beqi_##x L##x##C %F0 X##N                               \
+L##x##C:
+
+#define LDST1(X, N, R0, R1, F0)                                        \
+       movi %R0 $(t0 + $offc)                                  \
+       movi %R1 C##N                                           \
+       str_c %R0 %R1                                           \
+       movi %R0 $(t0 + $offuc)                                 \
+       str_c %R0 %R1                                           \
+       movi %R0 $(t0 + $offs)                                  \
+       movi %R1 S##N                                           \
+       str_s %R0 %R1                                           \
+       movi %R0 $(t0 + $offus)                                 \
+       str_s %R0 %R1                                           \
+       movi %R0 $(t0 + $offi)                                  \
+       movi %R1 I##N                                           \
+       str_i %R0 %R1                                           \
+       LDSTL(N, R0, R1)                                        \
+       movi %R0 $(t0 + $offf)                                  \
+       movi_f %F0 F##N                                         \
+       str_f %R0 %F0                                           \
+       movi %R0 $(t0 + $offd)                                  \
+       movi_d %F0 D##N                                         \
+       str_d %R0 %F0                                           \
+       SI(X, N, c, C, R0, R1)                                  \
+       UI(X, N, c, C, R0, R1)                                  \
+       SI(X, N, s, S, R0, R1)                                  \
+       UI(X, N, s, S, R0, R1)                                  \
+       SI(X, N, i, I, R0, R1)                                  \
+       LDRL(X, N, R0, R1)                                      \
+       FF(X, N, f, F, R0, F0)                                  \
+       FF(X, N, d, D, R0, F0)
+
+#define LDST0(R0, R1, F0)                                      \
+       LDST1(0_##R0##_##R1##_##F0, 0, R0, R1, F0)              \
+       LDST1(1_##R0##_##R1##_##F0, 1, R0, R1, F0)              \
+       LDST1(2_##R0##_##R1##_##F0, 2, R0, R1, F0)              \
+       LDST1(3_##R0##_##R1##_##F0, 3, R0, R1, F0)
+
+#define LDST(V0, V1, V2, R0, R1, R2, F0, F1, F2, F3, F4, F5)   \
+       LDST0(V0, V1, F0)                                       \
+       LDST0(V0, V2, F1)                                       \
+       LDST0(V0, R0, F3)                                       \
+       LDST0(V0, R1, F4)                                       \
+       LDST0(V0, R2, F5)                                       \
+       LDST0(V1, V2, F0)                                       \
+       LDST0(V1, R0, F1)                                       \
+       LDST0(V1, R1, F2)                                       \
+       LDST0(V1, R2, F3)                                       \
+       LDST0(V2, R0, F4)                                       \
+       LDST0(V2, R1, F5)                                       \
+       LDST0(V2, R2, F0)
+
+.code
+       prolog
+
+       /* Simple test to simplify validating encodings before
+        * brute force tests */
+       movi %r0 $(t0 + $offc)
+       movi %r1 0x81
+       str_c %r0 %r1
+       movi %r0 $(t0 + $offuc)
+       str_c %r0 %r1
+       movi %r0 $(t0 + $offs)
+       movi %r1 0x8001
+       str_s %r0 %r1
+       movi %r0 $(t0 + $offus)
+       str_s %r0 %r1
+       movi %r0 $(t0 + $offi)
+       movi %r1 0x80000001
+       str_i %r0 %r1
+#if __WORDSIZE == 64
+       movi %r0 $(t0 + $offui)
+       str_i %r0 %r1
+       movi %r0 $(t0 + $offl)
+       movi %r1 0x8000000000000001
+       str_l %r0 %r1
+#endif
+       movi %r0 $(t0 + $offf)
+       movi_f %f0 0.5
+       str_f %r0 %f0
+       movi %r0 $(t0 + $offd)
+       movi_d %f0 0.25
+       str_d %r0 %f0
+       movi %r0 $(t0 + $offc)
+       ldr_c %r1 %r0
+       beqi Lc %r1 XC
+       calli @abort
+Lc:
+       movi %r0 $(t0 + $offuc)
+       ldr_uc %r1 %r0
+       beqi Luc %r1 0x81
+       calli @abort
+Luc:
+       movi %r0 $(t0 + $offs)
+       ldr_s %r1 %r0
+       beqi Ls %r1 XS
+       calli @abort
+Ls:
+       movi %r0 $(t0 + $offus)
+       ldr_us %r1 %r0
+       beqi Lus %r1 0x8001
+       calli @abort
+Lus:
+       movi %r0 $(t0 + $offi)
+       ldr_i %r1 %r0
+       beqi Li %r1 XI
+       calli @abort
+Li:
+#if __WORDSIZE == 64
+       movi %r0 $(t0 + $offui)
+       ldr_ui %r1 %r0
+       beqi Lui %r1 0x80000001
+       calli @abort
+Lui:
+       movi %r0 $(t0 + $offl)
+       ldr_l %r1 %r0
+       beqi Ll %r1 0x8000000000000001
+       calli @abort
+Ll:
+#endif
+       movi %r0 $(t0 + $offf)
+       ldr_f %f0 %r0
+       beqi_f Lf %f0 0.5
+       calli @abort
+Lf:
+       movi %r0 $(t0 + $offd)
+       ldr_d %f0 %r0
+       beqi_d Ld %f0 0.25
+       calli @abort
+Ld:
+
+       LDST(v0, v1, v2, r0, r1, r2, f0, f1, f2, f3, f4, f5)
+       // just to know did not abort
+       prepare
+               pushargi ok
+               ellipsis
+       finishi @printf
+       ret
+       epilog
diff --git a/deps/lightning/check/ldstxi-c.ok b/deps/lightning/check/ldstxi-c.ok
new file mode 100644 (file)
index 0000000..9766475
--- /dev/null
@@ -0,0 +1 @@
+ok
diff --git a/deps/lightning/check/ldstxi-c.tst b/deps/lightning/check/ldstxi-c.tst
new file mode 100644 (file)
index 0000000..1ad0168
--- /dev/null
@@ -0,0 +1,158 @@
+#include "ldst.inc"
+
+#if __WORDSIZE == 64
+#  define LDSTL(N, R0, R1)                                     \
+       stxi_i $offui %R0 %R1                                   \
+       movi %R1 L##N                                           \
+       stxi_l $offl %R0 %R1
+
+#  define SI(C, N, x, X, R0)                                   \
+       ldxi_##x %R0 %R0 $off##x                                \
+       beqi L##x##C %R0 L##X##N                                \
+       calli @abort                                            \
+L##x##C:
+
+#  define LDRL(C, N, R0, R1)                                   \
+       UI(C, N, i, I, R0)                                      \
+       movi %R0 t0                                             \
+       SI(C, N, l, L, R0)
+#else
+#  define LDSTL(C, R0, R1)
+#  define SI(C, N, x, X, R0)                                   \
+       ldxi_##x %R0 %R0 $off##x                                \
+       beqi L##x##C %R0 I##X##N                                \
+       calli @abort                                            \
+L##x##C:
+
+#  define LDRL(C, N, R0, R1)
+
+#endif
+
+#define UI(C, N, x, X, R0)                                     \
+       ldxi_u##x %R0 %R0 $offu##x                              \
+       beqi Lu##x##C %R0 X##N                                  \
+       calli @abort                                            \
+Lu##x##C:
+
+#define LDST1(X, N, R0, R1)                                    \
+       movi %R0 t0                                             \
+       movi %R1 C##N                                           \
+       stxi_c $offc %R0 %R1                                    \
+       stxi_c $offuc %R0 %R1                                   \
+       movi %R1 S##N                                           \
+       stxi_s $offs %R0 %R1                                    \
+       stxi_s $offus %R0 %R1                                   \
+       movi %R1 I##N                                           \
+       stxi_i $offi %R0 %R1                                    \
+       LDSTL(N, R0, R1)                                        \
+       SI(X, N, c, C, R0)                                      \
+       movi %R0 t0                                             \
+       UI(X, N, c, C, R0)                                      \
+       movi %R0 t0                                             \
+       SI(X, N, s, S, R0)                                      \
+       movi %R0 t0                                             \
+       UI(X, N, s, S, R0)                                      \
+       movi %R0 t0                                             \
+       SI(X, N, i, I, R0)                                      \
+       movi %R0 t0                                             \
+       LDRL(X, N, R0, R1)                                      \
+
+#define LDST0(R0, R1)                                          \
+       LDST1(0_##R0##_##R1, 0, R0, R1)                         \
+       LDST1(1_##R0##_##R1, 1, R0, R1)                         \
+       LDST1(2_##R0##_##R1, 2, R0, R1)                         \
+       LDST1(3_##R0##_##R1, 3, R0, R1)
+
+#define LDST(V0, V1, V2, R0, R1, R2)                           \
+       LDST0(V0, V1)                                           \
+       LDST0(V0, V2)                                           \
+       LDST0(V0, R0)                                           \
+       LDST0(V0, R1)                                           \
+       LDST0(V0, R2)                                           \
+       LDST0(V1, V2)                                           \
+       LDST0(V1, R0)                                           \
+       LDST0(V1, R1)                                           \
+       LDST0(V1, R2)                                           \
+       LDST0(V2, R0)                                           \
+       LDST0(V2, R1)                                           \
+       LDST0(V2, R2)                                           \
+       LDST0(R0, V0)                                           \
+       LDST0(R0, V1)                                           \
+       LDST0(R0, V2)                                           \
+       LDST0(R0, R1)                                           \
+       LDST0(R0, R2)                                           \
+       LDST0(R1, V0)                                           \
+       LDST0(R1, V1)                                           \
+       LDST0(R1, V2)                                           \
+       LDST0(R1, R0)                                           \
+       LDST0(R1, R2)                                           \
+       LDST0(R2, V0)                                           \
+       LDST0(R2, V1)                                           \
+       LDST0(R2, V2)                                           \
+       LDST0(R2, R0)                                           \
+       LDST0(R2, R1)
+
+.code
+       prolog
+
+       /* Simple test to simplify validating encodings before
+        * brute force tests */
+       movi %r0 t0
+       movi %r1 0x81
+       stxi_c $offc %r0 %r1
+       stxi_c $offuc %r0 %r1
+       movi %r1 0x8001
+       stxi_s $offs %r0 %r1
+       stxi_s $offus %r0 %r1
+       movi %r1 0x80000001
+       stxi_i $offi %r0 %r1
+#if __WORDSIZE == 64
+       stxi_i $offui %r0 %r1
+       movi %r1 0x8000000000000001
+       stxi_l $offl %r0 %r1
+#endif
+       ldxi_c %r0 %r0 $offc
+       beqi Lc %r0 XC
+       calli @abort
+Lc:
+       movi %r0 t0
+       ldxi_uc %r0 %r0 $offuc
+       beqi Luc %r0 0x81
+       calli @abort
+Luc:
+       movi %r0 t0
+       ldxi_s %r0 %r0 $offs
+       beqi Ls %r0 XS
+       calli @abort
+Ls:
+       movi %r0 t0
+       ldxi_us %r0 %r0 $offus
+       beqi Lus %r0 0x8001
+       calli @abort
+Lus:
+       movi %r0 t0
+       ldxi_i %r0 %r0 $offi
+       beqi Li %r0 XI
+       calli @abort
+Li:
+#if __WORDSIZE == 64
+       movi %r0 t0
+       ldxi_ui %r0 %r0 $offui
+       beqi Lui %r0 0x80000001
+       calli @abort
+Lui:
+       movi %r0 t0
+       ldxi_l %r0 %r0 $offl
+       beqi Ll %r0 0x8000000000000001
+       calli @abort
+Ll:
+#endif
+
+       LDST(v0, v1, v2, r0, r1, r2)
+       // just to know did not abort
+       prepare
+               pushargi ok
+               ellipsis
+       finishi @printf
+       ret
+       epilog
diff --git a/deps/lightning/check/ldstxi.ok b/deps/lightning/check/ldstxi.ok
new file mode 100644 (file)
index 0000000..9766475
--- /dev/null
@@ -0,0 +1 @@
+ok
diff --git a/deps/lightning/check/ldstxi.tst b/deps/lightning/check/ldstxi.tst
new file mode 100644 (file)
index 0000000..574521a
--- /dev/null
@@ -0,0 +1,154 @@
+#include "ldst.inc"
+
+#if __WORDSIZE == 64
+#  define LDSTL(N, R0, R1)                                     \
+       stxi_i $offui %R0 %R1                                   \
+       movi %R1 L##N                                           \
+       stxi_l $offl %R0 %R1
+
+#  define SI(C, N, x, X, R0, R1)                               \
+       ldxi_##x %R1 %R0 $off##x                                \
+       beqi L##x##C %R1 L##X##N                                \
+       calli @abort                                            \
+L##x##C:
+
+#  define LDRL(C, N, R0, R1)                                   \
+       UI(C, N, i, I, R0, R1)                                  \
+       SI(C, N, l, L, R0, R1)
+#else
+#  define LDSTL(C, R0, R1)
+#  define SI(C, N, x, X, R0, R1)                               \
+       ldxi_##x %R1 %R0 $off##x                                \
+       beqi L##x##C %R1 I##X##N                                \
+       calli @abort                                            \
+L##x##C:
+
+#  define LDRL(C, N, R0, R1)
+
+#endif
+
+#define UI(C, N, x, X, R0, R1)                                 \
+       ldxi_u##x %R1 %R0 $offu##x                              \
+       beqi Lu##x##C %R1 X##N                                  \
+       calli @abort                                            \
+Lu##x##C:
+
+#define FF(C, N, x, X, R0, F0)                                 \
+       ldxi_##x %F0 %R0 $off##x                                \
+       beqi_##x L##x##C %F0 X##N                               \
+L##x##C:
+
+#define LDST1(X, N, R0, R1, F0)                                        \
+       movi %R0 t0                                             \
+       movi %R1 C##N                                           \
+       stxi_c $offc %R0 %R1                                    \
+       stxi_c $offuc %R0 %R1                                   \
+       movi %R1 S##N                                           \
+       stxi_s $offs %R0 %R1                                    \
+       stxi_s $offus %R0 %R1                                   \
+       movi %R1 I##N                                           \
+       stxi_i $offi %R0 %R1                                    \
+       LDSTL(N, R0, R1)                                        \
+       movi_f %F0 F##N                                         \
+       stxi_f $offf %R0 %F0                                    \
+       movi_d %F0 D##N                                         \
+       stxi_d $offd %R0 %F0                                    \
+       SI(X, N, c, C, R0, R1)                                  \
+       UI(X, N, c, C, R0, R1)                                  \
+       SI(X, N, s, S, R0, R1)                                  \
+       UI(X, N, s, S, R0, R1)                                  \
+       SI(X, N, i, I, R0, R1)                                  \
+       LDRL(X, N, R0, R1)                                      \
+       FF(X, N, f, F, R0, F0)                                  \
+       FF(X, N, d, D, R0, F0)
+
+#define LDST0(R0, R1, F0)                                      \
+       LDST1(0_##R0##_##R1##_##F0, 0, R0, R1, F0)              \
+       LDST1(1_##R0##_##R1##_##F0, 1, R0, R1, F0)              \
+       LDST1(2_##R0##_##R1##_##F0, 2, R0, R1, F0)              \
+       LDST1(3_##R0##_##R1##_##F0, 3, R0, R1, F0)
+
+#define LDST(V0, V1, V2, R0, R1, R2, F0, F1, F2, F3, F4, F5)   \
+       LDST0(V0, V1, F0)                                       \
+       LDST0(V0, V2, F1)                                       \
+       LDST0(V0, R0, F3)                                       \
+       LDST0(V0, R1, F4)                                       \
+       LDST0(V0, R2, F5)                                       \
+       LDST0(V1, V2, F0)                                       \
+       LDST0(V1, R0, F1)                                       \
+       LDST0(V1, R1, F2)                                       \
+       LDST0(V1, R2, F3)                                       \
+       LDST0(V2, R0, F4)                                       \
+       LDST0(V2, R1, F5)                                       \
+       LDST0(V2, R2, F0)
+
+.code
+       prolog
+
+       /* Simple test to simplify validating encodings before
+        * brute force tests */
+       movi %r0 t0
+       movi %r1 0x81
+       stxi_c $offc %r0 %r1
+       stxi_c $offuc %r0 %r1
+       movi %r1 0x8001
+       stxi_s $offs %r0 %r1
+       stxi_s $offus %r0 %r1
+       movi %r1 0x80000001
+       stxi_i $offi %r0 %r1
+#if __WORDSIZE == 64
+       stxi_i $offui %r0 %r1
+       movi %r1 0x8000000000000001
+       stxi_l $offl %r0 %r1
+#endif
+       movi_f %f0 0.5
+       stxi_f $offf %r0 %f0
+       movi_d %f0 0.25
+       stxi_d $offd %r0 %f0
+       ldxi_c %r1 %r0 $offc
+       beqi Lc %r1 XC
+       calli @abort
+Lc:
+       ldxi_uc %r1 %r0 $offuc
+       beqi Luc %r1 0x81
+       calli @abort
+Luc:
+       ldxi_s %r1 %r0 $offs
+       beqi Ls %r1 XS
+       calli @abort
+Ls:
+       ldxi_us %r1 %r0 $offus
+       beqi Lus %r1 0x8001
+       calli @abort
+Lus:
+       ldxi_i %r1 %r0 $offi
+       beqi Li %r1 XI
+       calli @abort
+Li:
+#if __WORDSIZE == 64
+       ldxi_ui %r1 %r0 $offui
+       beqi Lui %r1 0x80000001
+       calli @abort
+Lui:
+       ldxi_l %r1 %r0 $offl
+       beqi Ll %r1 0x8000000000000001
+       calli @abort
+Ll:
+#endif
+       ldxi_f %f0 %r0 $offf
+       beqi_f Lf %f0 0.5
+       calli @abort
+Lf:
+       ldxi_d %f0 %r0 $offd
+       beqi_d Ld %f0 0.25
+       calli @abort
+Ld:
+
+       LDST(v0, v1, v2, r0, r1, r2, f0, f1, f2, f3, f4, f5)
+       // just to know did not abort
+       prepare
+               pushargi ok
+               ellipsis
+       finishi @printf
+       ret
+       epilog
diff --git a/deps/lightning/check/ldstxr-c.ok b/deps/lightning/check/ldstxr-c.ok
new file mode 100644 (file)
index 0000000..9766475
--- /dev/null
@@ -0,0 +1 @@
+ok
diff --git a/deps/lightning/check/ldstxr-c.tst b/deps/lightning/check/ldstxr-c.tst
new file mode 100644 (file)
index 0000000..cd770a6
--- /dev/null
@@ -0,0 +1,219 @@
+#include "ldst.inc"
+
+#if __WORDSIZE == 64
+#  define LDSTL(N, R0, R1, R2)                                 \
+       movi %R2 $offui                                         \
+       stxr_i %R2 %R0 %R1                                      \
+       movi %R1 L##N                                           \
+       movi %R2 $offl                                          \
+       stxr_l %R2 %R0 %R1
+
+#  define SI(C, N, x, X, R0, R1)                               \
+       movi %R1 $off##x                                        \
+       ldxr_##x %R1 %R0 %R1                                    \
+       beqi L##x##C##0 %R1 L##X##N                             \
+       calli @abort                                            \
+L##x##C##0:                                                    \
+       movi %R1 $off##x                                        \
+       ldxr_##x %R0 %R0 %R1                                    \
+       beqi L##x##C##1 %R0 L##X##N                             \
+       calli @abort                                            \
+L##x##C##1:
+
+#  define LDRL(C, N, R0, R1, R2)                               \
+       UI(C, N, i, I, R0, R1)                                  \
+       movi %R0 t0                                             \
+       SI(C, N, l, L, R0, R1)
+#else
+#  define LDSTL(C, R0, R1, R2)
+#  define SI(C, N, x, X, R0, R1)                               \
+       movi %R1 $off##x                                        \
+       ldxr_##x %R1 %R0 %R1                                    \
+       beqi L##x##C##0 %R1 I##X##N                             \
+       calli @abort                                            \
+L##x##C##0:                                                    \
+       movi %R1 $off##x                                        \
+       ldxr_##x %R0 %R0 %R1                                    \
+       beqi L##x##C##1 %R0 I##X##N                             \
+       calli @abort                                            \
+L##x##C##1:
+
+#  define LDRL(C, N, R0, R1, R2)
+
+#endif
+
+#define UI(C, N, x, X, R0, R1)                                 \
+       movi %R1 $offu##x                                       \
+       ldxr_u##x %R1 %R0 %R1                                   \
+       beqi Lu##x##C##0 %R1 X##N                               \
+       calli @abort                                            \
+Lu##x##C##0:                                                   \
+       movi %R1 $offu##x                                       \
+       ldxr_u##x %R0 %R0 %R1                                   \
+       beqi Lu##x##C##1 %R0 X##N                               \
+       calli @abort                                            \
+Lu##x##C##1:
+
+#define LDST1(X, N, R0, R1, R2)                                        \
+       movi %R0 t0                                             \
+       movi %R1 C##N                                           \
+       movi %R2 $offc                                          \
+       stxr_c %R2 %R0 %R1                                      \
+       movi %R2 $offuc                                         \
+       stxr_c %R2 %R0 %R1                                      \
+       movi %R1 S##N                                           \
+       movi %R2 $offs                                          \
+       stxr_s %R2 %R0 %R1                                      \
+       movi %R2 $offus                                         \
+       stxr_s %R2 %R0 %R1                                      \
+       movi %R1 I##N                                           \
+       movi %R2 $offi                                          \
+       stxr_i %R2 %R0 %R1                                      \
+       LDSTL(N, R0, R1, R2)                                    \
+       SI(X, N, c, C, R0, R1)                                  \
+       movi %R0 t0                                             \
+       UI(X, N, c, C, R0, R1)                                  \
+       movi %R0 t0                                             \
+       SI(X, N, s, S, R0, R1)                                  \
+       movi %R0 t0                                             \
+       UI(X, N, s, S, R0, R1)                                  \
+       movi %R0 t0                                             \
+       SI(X, N, i, I, R0, R1)                                  \
+       movi %R0 t0                                             \
+       LDRL(X, N, R0, R1, R2)                                  \
+
+#define LDST0(R0, R1, R2)                                      \
+       LDST1(0_##R0##_##R1##_##R2, 0, R0, R1, R2)              \
+       LDST1(1_##R0##_##R1##_##R2, 1, R0, R1, R2)              \
+       LDST1(2_##R0##_##R1##_##R2, 2, R0, R1, R2)              \
+       LDST1(3_##R0##_##R1##_##R2, 3, R0, R1, R2)
+
+#define LDST(V0, V1, V2, R0, R1, R2)                           \
+       LDST0(V1, V2, V0)                                       \
+       LDST0(V1, R0, V0)                                       \
+       LDST0(V1, R1, V0)                                       \
+       LDST0(V1, R2, V0)                                       \
+       LDST0(V0, R0, V1)                                       \
+       LDST0(V0, R1, V1)                                       \
+       LDST0(V0, R2, V1)                                       \
+       LDST0(V0, V2, V1)                                       \
+       LDST0(V2, V0, V1)                                       \
+       LDST0(V2, R0, V1)                                       \
+       LDST0(V2, R1, V1)                                       \
+       LDST0(V2, R2, V1)                                       \
+       LDST0(R0, R1, V2)                                       \
+       LDST0(R0, R2, V2)
+
+.code
+       prolog
+
+       /* Simple test to simplify validating encodings before
+        * brute force tests */
+       movi %r0 t0
+       movi %r1 0x81
+       movi %r2 $offc
+       stxr_c %r2 %r0 %r1
+       movi %r2 $offuc
+       stxr_c %r2 %r0 %r1
+       movi %r1 0x8001
+       movi %r2 $offs
+       stxr_s %r2 %r0 %r1
+       movi %r2 $offus
+       stxr_s %r2 %r0 %r1
+       movi %r1 0x80000001
+       movi %r2 $offi
+       stxr_i %r2 %r0 %r1
+#if __WORDSIZE == 64
+       movi %r2 $offui
+       stxr_i %r2 %r0 %r1
+       movi %r1 0x8000000000000001
+       movi %r2 $offl
+       stxr_l %r2 %r0 %r1
+#endif
+       movi %r1 $offc
+       ldxr_c %r1 %r0 %r1
+       beqi Lc0 %r1 XC
+       calli @abort
+Lc0:
+       movi %r1 $offc
+       ldxr_c %r0 %r0 %r1
+       beqi Lc1 %r0 XC
+       calli @abort
+Lc1:
+       movi %r0 t0
+       movi %r1 $offuc
+       ldxr_uc %r1 %r0 %r1
+       beqi Luc0 %r1 0x81
+       calli @abort
+Luc0:
+       movi %r1 $offuc
+       ldxr_uc %r0 %r0 %r1
+       beqi Luc1 %r0 0x81
+       calli @abort
+Luc1:
+       movi %r0 t0
+       movi %r1 $offs
+       ldxr_s %r1 %r0 %r1
+       beqi Ls0 %r1 XS
+       calli @abort
+Ls0:
+       movi %r1 $offs
+       ldxr_s %r0 %r0 %r1
+       beqi Ls1 %r0 XS
+       calli @abort
+Ls1:
+       movi %r0 t0
+       movi %r1 $offus
+       ldxr_us %r1 %r0 %r1
+       beqi Lus0 %r1 0x8001
+       calli @abort
+Lus0:
+       movi %r1 $offus
+       ldxr_us %r0 %r0 %r1
+       beqi Lus1 %r0 0x8001
+       calli @abort
+Lus1:
+       movi %r0 t0
+       movi %r1 $offi
+       ldxr_i %r1 %r0 %r1
+       beqi Li0 %r1 XI
+       calli @abort
+Li0:
+       movi %r1 $offi
+       ldxr_i %r0 %r0 %r1
+       beqi Li1 %r0 XI
+       calli @abort
+Li1:
+#if __WORDSIZE == 64
+       movi %r0 t0
+       movi %r1 $offui
+       ldxr_ui %r1 %r0 %r1
+       beqi Lui0 %r1 0x80000001
+       calli @abort
+Lui0:
+       movi %r1 $offui
+       ldxr_ui %r0 %r0 %r1
+       beqi Lui1 %r0 0x80000001
+       calli @abort
+Lui1:
+       movi %r0 t0
+       movi %r1 $offl
+       ldxr_l %r1 %r0 %r1
+       beqi Ll0 %r1 0x8000000000000001
+       calli @abort
+Ll0:
+       movi %r1 $offl
+       ldxr_l %r0 %r0 %r1
+       beqi Ll1 %r0 0x8000000000000001
+       calli @abort
+Ll1:
+#endif
+
+       LDST(v0, v1, v2, r0, r1, r2)
+       // just to know did not abort
+       prepare
+               pushargi ok
+               ellipsis
+       finishi @printf
+       ret
+       epilog
diff --git a/deps/lightning/check/ldstxr.ok b/deps/lightning/check/ldstxr.ok
new file mode 100644 (file)
index 0000000..9766475
--- /dev/null
@@ -0,0 +1 @@
+ok
diff --git a/deps/lightning/check/ldstxr.tst b/deps/lightning/check/ldstxr.tst
new file mode 100644 (file)
index 0000000..14620dc
--- /dev/null
@@ -0,0 +1,209 @@
+#include "ldst.inc"
+
+#if __WORDSIZE == 64
+#  define LDSTL(N, R0, R1, R2)                                 \
+       movi %R2 $offui                                         \
+       stxr_i %R2 %R0 %R1                                      \
+       movi %R1 L##N                                           \
+       movi %R2 $offl                                          \
+       stxr_l %R2 %R0 %R1
+
+#  define SI(C, N, x, X, R0, R1, R2)                           \
+       movi %R2 $off##x                                        \
+       ldxr_##x %R1 %R0 %R2                                    \
+       beqi L##x##C %R1 L##X##N                                \
+       calli @abort                                            \
+L##x##C:
+
+#  define LDRL(C, N, R0, R1, R2)                               \
+       UI(C, N, i, I, R0, R1, R2)                              \
+       SI(C, N, l, L, R0, R1, R2)
+#else
+#  define LDSTL(C, R0, R1, R2)
+#  define SI(C, N, x, X, R0, R1, R2)                           \
+       movi %R2 $off##x                                        \
+       ldxr_##x %R1 %R0 %R2                                    \
+       beqi L##x##C %R1 I##X##N                                \
+       calli @abort                                            \
+L##x##C:
+
+#  define LDRL(C, N, R0, R1, R2)
+
+#endif
+
+#define UI(C, N, x, X, R0, R1, R2)                             \
+       movi %R2 $offu##x                                       \
+       ldxr_u##x %R1 %R0 %R2                                   \
+       beqi Lu##x##C %R1 X##N                                  \
+       calli @abort                                            \
+Lu##x##C:
+
+#define FF(C, N, x, X, R0, R1, F0)                             \
+       movi %R1 $off##x                                        \
+       ldxr_##x %F0 %R0 %R1                                    \
+       beqi_##x L##x##C %F0 X##N                               \
+L##x##C:
+
+#define LDST1(X, N, R0, R1, R2, F0)                            \
+       movi %R0 t0                                             \
+       movi %R1 C##N                                           \
+       movi %R2 $offc                                          \
+       stxr_c %R2 %R0 %R1                                      \
+       movi %R2 $offuc                                         \
+       stxr_c %R2 %R0 %R1                                      \
+       movi %R1 S##N                                           \
+       movi %R2 $offs                                          \
+       stxr_s %R2 %R0 %R1                                      \
+       movi %R2 $offus                                         \
+       stxr_s %R2 %R0 %R1                                      \
+       movi %R1 I##N                                           \
+       movi %R2 $offi                                          \
+       stxr_i %R2 %R0 %R1                                      \
+       LDSTL(N, R0, R1, R2)                                    \
+       movi_f %F0 F##N                                         \
+       movi %R2 $offf                                          \
+       stxr_f %R2 %R0 %F0                                      \
+       movi_d %F0 D##N                                         \
+       movi %R2 $offd                                          \
+       stxr_d %R2 %R0 %F0                                      \
+       SI(X, N, c, C, R0, R1, R2)                              \
+       UI(X, N, c, C, R0, R1, R2)                              \
+       SI(X, N, s, S, R0, R1, R2)                              \
+       UI(X, N, s, S, R0, R1, R2)                              \
+       SI(X, N, i, I, R0, R1, R2)                              \
+       LDRL(X, N, R0, R1, R2)                                  \
+       FF(X, N, f, F, R0, R1, F0)                              \
+       FF(X, N, d, D, R0, R1, F0)
+
+#define LDST0(R0, R1, R2, F0)                                  \
+       LDST1(0_##R0##_##R1##_##R2##_##F0, 0, R0, R1, R2, F0)   \
+       LDST1(1_##R0##_##R1##_##R2##_##F0, 1, R0, R1, R2, F0)   \
+       LDST1(2_##R0##_##R1##_##R2##_##F0, 2, R0, R1, R2, F0)   \
+       LDST1(3_##R0##_##R1##_##R2##_##F0, 3, R0, R1, R2, F0)
+
+#define LDST(V0, V1, V2, R0, R1, R2, F0, F1, F2, F3, F4, F5)   \
+       LDST0(V0, V1, R0, F0)                                   \
+       LDST0(V0, V1, R1, F1)                                   \
+       LDST0(V0, V1, R2, F2)                                   \
+       LDST0(V0, V2, R0, F3)                                   \
+       LDST0(V0, V2, R1, F4)                                   \
+       LDST0(V0, V2, R2, F5)                                   \
+       LDST0(V0, R0, V1, F0)                                   \
+       LDST0(V0, R0, V2, F1)                                   \
+       LDST0(V0, R0, R1, F2)                                   \
+       LDST0(V0, R0, R2, F3)                                   \
+       LDST0(V0, R0, V1, F4)                                   \
+       LDST0(V0, R1, V1, F5)                                   \
+       LDST0(V0, R1, V2, F0)                                   \
+       LDST0(V0, R1, R0, F1)                                   \
+       LDST0(V0, R1, R2, F2)                                   \
+       LDST0(V0, V1, V2, F3)                                   \
+       LDST0(V0, R1, R0, F4)                                   \
+       LDST0(V0, R1, R2, F5)                                   \
+       LDST0(R0, V1, V0, F0)                                   \
+       LDST0(R0, V1, R1, F1)                                   \
+       LDST0(R0, V1, R2, F2)                                   \
+       LDST0(R0, V2, V0, F3)                                   \
+       LDST0(R0, V2, R1, F4)                                   \
+       LDST0(R0, V2, R2, F5)                                   \
+       LDST0(R0, V0, V1, F0)                                   \
+       LDST0(R0, V0, V2, F1)                                   \
+       LDST0(R0, V0, R1, F2)                                   \
+       LDST0(R0, V0, R2, F3)                                   \
+       LDST0(R0, V0, V1, F4)                                   \
+       LDST0(R0, R1, V1, F5)                                   \
+       LDST0(R0, R1, V2, F0)                                   \
+       LDST0(R0, R1, V0, F1)                                   \
+       LDST0(R0, R1, R2, F2)                                   \
+       LDST0(R0, V1, V2, F3)                                   \
+       LDST0(R0, R1, V0, F4)                                   \
+       LDST0(R0, R1, R2, F5)
+
+.code
+       prolog
+
+       /* Simple test to simplify validating encodings before
+        * brute force tests */
+       movi %r0 t0
+       movi %r1 0x81
+       movi %r2 $offc
+       stxr_c %r2 %r0 %r1
+       movi %r2 $offuc
+       stxr_c %r2 %r0 %r1
+       movi %r1 0x8001
+       movi %r2 $offs
+       stxr_s %r2 %r0 %r1
+       movi %r2 $offus
+       stxr_s %r2 %r0 %r1
+       movi %r1 0x80000001
+       movi %r2 $offi
+       stxr_i %r2 %r0 %r1
+#if __WORDSIZE == 64
+       movi %r2 $offui
+       stxr_i %r2 %r0 %r1
+       movi %r1 0x8000000000000001
+       movi %r2 $offl
+       stxr_l %r2 %r0 %r1
+#endif
+       movi_f %f0 0.5
+       movi %r2 $offf
+       stxr_f %r2 %r0 %f0
+       movi_d %f0 0.25
+       movi %r2 $offd
+       stxr_d %r2 %r0 %f0
+       movi %r2 $offc
+       ldxr_c %r1 %r0 %r2
+       beqi Lc %r1 XC
+       calli @abort
+Lc:
+       movi %r2 $offuc
+       ldxr_uc %r1 %r0 %r2
+       beqi Luc %r1 0x81
+       calli @abort
+Luc:
+       movi %r2 $offs
+       ldxr_s %r1 %r0 %r2
+       beqi Ls %r1 XS
+       calli @abort
+Ls:
+       movi %r2 $offus
+       ldxr_us %r1 %r0 %r2
+       beqi Lus %r1 0x8001
+       calli @abort
+Lus:
+       movi %r2 $offi
+       ldxr_i %r1 %r0 %r2
+       beqi Li %r1 XI
+       calli @abort
+Li:
+#if __WORDSIZE == 64
+       movi %r2 $offui
+       ldxr_ui %r1 %r0 %r2
+       beqi Lui %r1 0x80000001
+       calli @abort
+Lui:
+       movi %r2 $offl
+       ldxr_l %r1 %r0 %r2
+       beqi Ll %r1 0x8000000000000001
+       calli @abort
+Ll:
+#endif
+       movi %r2 $offf
+       ldxr_f %f0 %r0 %r2
+       beqi_f Lf %f0 0.5
+       calli @abort
+Lf:
+       movi %r2 $offd
+       ldxr_d %f0 %r0 %r2
+       beqi_d Ld %f0 0.25
+       calli @abort
+Ld:
+
+       LDST(v0, v1, v2, r0, r1, r2, f0, f1, f2, f3, f4, f5)
+       // just to know did not abort
+       prepare
+               pushargi ok
+               ellipsis
+       finishi @printf
+       ret
+       epilog
diff --git a/deps/lightning/check/lightning.c b/deps/lightning/check/lightning.c
new file mode 100644 (file)
index 0000000..e60ef05
--- /dev/null
@@ -0,0 +1,4329 @@
+/*
+ * Copyright (C) 2012-2019  Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+#if HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#if HAVE_GETOPT_H
+#  include <getopt.h>
+#else
+#  include <unistd.h>
+#endif
+#include <stdio.h>
+#include <stdarg.h>
+#include <lightning.h>
+#include <dlfcn.h>
+
+#if defined(__linux__) && (defined(__i386__) || defined(__x86_64__))
+#  include <fpu_control.h>
+#endif
+
+/* The label_t identifier clashes with a system definitions */
+#if defined(_AIX) || defined(__sun__) || defined(__osf__)
+#  define label_t              l_label_t
+#endif
+
+#if defined(__hpux)
+#  define DL_HANDLE            RTLD_NEXT
+#elif defined(__sgi)
+static void                    *DL_HANDLE;
+#elif defined(__osf__)
+#  define DL_HANDLE            NULL
+#else
+#  define DL_HANDLE            RTLD_DEFAULT
+#endif
+
+#if defined(__GNUC__)
+#  define noreturn             __attribute__ ((noreturn))
+#  define printf_format(f, v)  __attribute__ ((format (printf, f, v)))
+#  define maybe_unused         __attribute__ ((unused))
+#else
+#  define noreturn             /**/
+#  define printf_format(f, v)  /**/
+#  define maybe_unused         /**/
+#endif
+
+#define check_data(length)                                             \
+    do {                                                               \
+       if (data_offset + length > data_length)                         \
+           error(".data too small (%ld < %ld)",                        \
+                 data_length, data_offset + length);                   \
+    } while (0)
+
+#define get_label_by_name(name)        ((label_t *)get_hash(labels, name))
+
+#define PARSING_NONE           0
+#define PARSING_DATA           1
+#define PARSING_CODE           2
+#define MAX_IDENTIFIER         256
+
+/*
+ * Types
+ */
+typedef struct instr             instr_t;
+typedef union value              value_t;
+typedef struct parser            parser_t;
+typedef struct label             label_t;
+typedef struct patch             patch_t;
+typedef struct symbol            symbol_t;
+typedef struct hash              hash_t;
+typedef struct entry             entry_t;
+typedef int                    (*function_t)(int argc, char *argv[]);
+
+typedef enum {
+    tok_eof = -1,
+    tok_symbol,
+    tok_char,
+    tok_int,
+    tok_float,
+    tok_pointer,
+    tok_string,
+    tok_register,
+    tok_dot,
+    tok_newline,
+    tok_semicollon,
+} token_t;
+
+typedef enum {
+    skip_none,
+    skip_ws,
+    skip_nl,
+} skip_t;
+
+typedef enum {
+    type_none,
+    type_c,
+    type_s,
+    type_i,
+    type_l,
+    type_f,
+    type_d,
+    type_p,
+} type_t;
+
+#define compose(a, b)          (((a) << 8) | b)
+typedef enum {
+    expr_inc    = compose('+', '+'),
+    expr_dec    = compose('-', '-'),
+    expr_not    = '!',
+    expr_com    = '~',
+    expr_mul    = '*',
+    expr_div    = '/',
+    expr_rem    = '%',
+    expr_add    = '+',
+    expr_sub    = '-',
+    expr_lsh    = compose('<', '<'),
+    expr_rsh    = compose('>', '>'),
+    expr_and    = '&',
+    expr_or     = '|',
+    expr_xor    = '^',
+    expr_set    = '=',
+    expr_mulset         = compose('*', '='),
+    expr_divset         = compose('/', '='),
+    expr_remset         = compose('%', '='),
+    expr_addset         = compose('+', '='),
+    expr_subset         = compose('-', '='),
+    expr_lshset         = compose(expr_lsh, '='),
+    expr_rshset         = compose(expr_rsh, '='),
+    expr_andset         = compose('&', '='),
+    expr_orset  = compose('|', '='),
+    expr_xorset         = compose('^', '='),
+    expr_lt     = '<',
+    expr_le     = compose('<', '='),
+    expr_eq     = compose('=', '='),
+    expr_ne     = compose('!', '='),
+    expr_gt     = '>',
+    expr_ge     = compose('>', '='),
+    expr_andand         = compose('&', '&'),
+    expr_oror   = compose('|', '|'),
+    expr_lparen         = '(',
+    expr_rparen         = ')',
+    expr_int    = '0',
+    expr_float  = '.',
+    expr_pointer = '@',
+    expr_symbol  = '$',
+} expr_t;
+#undef compose
+
+struct instr {
+    instr_t             *next;
+    const char          *name;
+    void               (*function)(void);
+    int                          flag;
+};
+
+union value {
+    jit_word_t          i;
+    jit_uword_t                 ui;
+    float               f;
+    double              d;
+    void               *p;
+    char               *cp;
+    label_t            *label;
+    patch_t            *patch;
+};
+
+struct parser {
+    FILE               *fp;
+    char                name[256];
+    int                         line;
+    int                         regval;
+    type_t              regtype;
+    expr_t              expr;
+    type_t              type;
+    value_t             value;
+
+    /* variable length string buffer */
+    char               *string;
+    int                         length;
+    int                         offset;
+
+    int                         newline;
+    expr_t              putback;
+    int                         short_circuit;
+    int                         parsing;
+
+    struct {
+       unsigned char    buffer[4096];
+       int              offset;
+       int              length;
+    } data;
+};
+
+typedef enum {
+    label_kind_data,
+    label_kind_code,
+    label_kind_code_forward,
+    label_kind_dynamic,
+} label_kind_t;
+
+struct hash {
+    entry_t            **entries;
+    int                          size;
+    int                          count;
+};
+
+struct entry {
+    entry_t             *next;
+    char                *name;
+    void                *value;
+    int                          flag;
+};
+
+struct label {
+    label_t            *next;
+    char               *name;
+    void               *value;
+    label_kind_t        kind;
+};
+
+typedef enum {
+    patch_kind_jmp,
+    patch_kind_mov,
+    patch_kind_call,
+} patch_kind_t;
+
+struct patch {
+    patch_t            *next;
+    label_t            *label;
+    void               *value;
+    patch_kind_t        kind;
+};
+
+/* minor support for expressions */
+struct symbol {
+    symbol_t           *next;
+    char               *name;
+    value_t             value;
+    type_t              type;
+};
+
+/*
+ * Prototypes
+ */
+static jit_gpr_t get_ireg(void);
+static jit_fpr_t get_freg(void);
+static symbol_t *get_symbol(void);
+static void jmp_forward(void *value, label_t *label);
+static void mov_forward(void *value, label_t *label);
+static void call_forward(void *value, label_t *label);
+static void make_arg(void *value);
+static jit_pointer_t get_arg(void);
+static jit_word_t get_imm(void);
+static void live(void);
+static void align(void);       static void name(void);
+static void prolog(void);
+static void frame(void);       static void tramp(void);
+static void ellipsis(void);
+static void allocai(void);     static void allocar(void);
+static void arg(void);
+static void getarg_c(void);    static void getarg_uc(void);
+static void getarg_s(void);    static void getarg_us(void);
+static void getarg_i(void);
+#if __WORDSIZE == 64
+static void getarg_ui(void);   static void getarg_l(void);
+#endif
+static void getarg(void);
+static void putargr(void);     static void putargi(void);
+static void addr(void);                static void addi(void);
+static void addxr(void);       static void addxi(void);
+static void addcr(void);       static void addci(void);
+static void subr(void);                static void subi(void);
+static void subxr(void);       static void subxi(void);
+static void subcr(void);       static void subci(void);
+static void rsbr(void);                static void rsbi(void);
+static void mulr(void);                static void muli(void);
+static void qmulr(void);       static void qmuli(void);
+static void qmulr_u(void);     static void qmuli_u(void);
+static void divr(void);                static void divi(void);
+static void divr_u(void);      static void divi_u(void);
+static void qdivr(void);       static void qdivi(void);
+static void qdivr_u(void);     static void qdivi_u(void);
+static void remr(void);                static void remi(void);
+static void remr_u(void);      static void remi_u(void);
+static void andr(void);                static void andi(void);
+static void orr(void);         static void ori(void);
+static void xorr(void);                static void xori(void);
+static void lshr(void);                static void lshi(void);
+static void rshr(void);                static void rshi(void);
+static void rshr_u(void);      static void rshi_u(void);
+static void negr(void);                static void comr(void);
+static void ltr(void);         static void lti(void);
+static void ltr_u(void);       static void lti_u(void);
+static void ler(void);         static void lei(void);
+static void ler_u(void);       static void lei_u(void);
+static void eqr(void);         static void eqi(void);
+static void ger(void);         static void gei(void);
+static void ger_u(void);       static void gei_u(void);
+static void gtr(void);         static void gti(void);
+static void gtr_u(void);       static void gti_u(void);
+static void ner(void);         static void nei(void);
+static void movr(void);                static void movi(void);
+static void extr_c(void);      static void extr_uc(void);
+static void extr_s(void);      static void extr_us(void);
+#if __WORDSIZE == 64
+static void extr_i(void);      static void extr_ui(void);
+#endif
+static void htonr_us(void);    static void ntohr_us(void);
+static void htonr_ui(void);    static void ntohr_ui(void);
+#if __WORDSIZE == 64
+static void htonr_ul(void);    static void ntohr_ul(void);
+#endif
+static void htonr(void);       static void ntohr(void);
+static void ldr_c(void);       static void ldi_c(void);
+static void ldr_uc(void);      static void ldi_uc(void);
+static void ldr_s(void);       static void ldi_s(void);
+static void ldr_us(void);      static void ldi_us(void);
+static void ldr_i(void);       static void ldi_i(void);
+#if __WORDSIZE == 64
+static void ldr_ui(void);      static void ldi_ui(void);
+static void ldr_l(void);       static void ldi_l(void);
+#endif
+static void ldr(void);         static void ldi(void);
+static void ldxr_c(void);      static void ldxi_c(void);
+static void ldxr_uc(void);     static void ldxi_uc(void);
+static void ldxr_s(void);      static void ldxi_s(void);
+static void ldxr_us(void);     static void ldxi_us(void);
+static void ldxr_i(void);      static void ldxi_i(void);
+#if __WORDSIZE == 64
+static void ldxr_ui(void);     static void ldxi_ui(void);
+static void ldxr_l(void);      static void ldxi_l(void);
+#endif
+static void ldxr(void);                static void ldxi(void);
+static void str_c(void);       static void sti_c(void);
+static void str_s(void);       static void sti_s(void);
+static void str_i(void);       static void sti_i(void);
+#if __WORDSIZE == 64
+static void str_l(void);       static void sti_l(void);
+#endif
+static void str(void);         static void sti(void);
+static void stxr_c(void);      static void stxi_c(void);
+static void stxr_s(void);      static void stxi_s(void);
+static void stxr_i(void);      static void stxi_i(void);
+#if __WORDSIZE == 64
+static void stxr_l(void);      static void stxi_l(void);
+#endif
+static void stxr(void);                static void stxi(void);
+static void bltr(void);                static void blti(void);
+static void bltr_u(void);      static void blti_u(void);
+static void bler(void);                static void blei(void);
+static void bler_u(void);      static void blei_u(void);
+static void beqr(void);                static void beqi(void);
+static void bger(void);                static void bgei(void);
+static void bger_u(void);      static void bgei_u(void);
+static void bgtr(void);                static void bgti(void);
+static void bgtr_u(void);      static void bgti_u(void);
+static void bner(void);                static void bnei(void);
+static void bmsr(void);                static void bmsi(void);
+static void bmcr(void);                static void bmci(void);
+static void boaddr(void);      static void boaddi(void);
+static void boaddr_u(void);    static void boaddi_u(void);
+static void bxaddr(void);      static void bxaddi(void);
+static void bxaddr_u(void);    static void bxaddi_u(void);
+static void bosubr(void);      static void bosubi(void);
+static void bosubr_u(void);    static void bosubi_u(void);
+static void bxsubr(void);      static void bxsubi(void);
+static void bxsubr_u(void);    static void bxsubi_u(void);
+static void jmpr(void);                static void jmpi(void);
+static void callr(void);       static void calli(void);
+static void prepare(void);
+static void pushargr(void);    static void pushargi(void);
+static void finishr(void);     static void finishi(void);
+static void ret(void);
+static void retr(void);                static void reti(void);
+static void retval_c(void);    static void retval_uc(void);
+static void retval_s(void);    static void retval_us(void);
+static void retval_i(void);
+#if __WORDSIZE == 64
+static void retval_ui(void);   static void retval_l(void);
+#endif
+static void retval(void);
+static void epilog(void);
+static void arg_f(void);       static void getarg_f(void);
+static void putargr_f(void);   static void putargi_f(void);
+static void addr_f(void);      static void addi_f(void);
+static void subr_f(void);      static void subi_f(void);
+static void rsbr_f(void);      static void rsbi_f(void);
+static void mulr_f(void);      static void muli_f(void);
+static void divr_f(void);      static void divi_f(void);
+static void negr_f(void);      static void absr_f(void);
+static void sqrtr_f(void);
+static void ltr_f(void);       static void lti_f(void);
+static void ler_f(void);       static void lei_f(void);
+static void eqr_f(void);       static void eqi_f(void);
+static void ger_f(void);       static void gei_f(void);
+static void gtr_f(void);       static void gti_f(void);
+static void ner_f(void);       static void nei_f(void);
+static void unltr_f(void);     static void unlti_f(void);
+static void unler_f(void);     static void unlei_f(void);
+static void uneqr_f(void);     static void uneqi_f(void);
+static void unger_f(void);     static void ungei_f(void);
+static void ungtr_f(void);     static void ungti_f(void);
+static void ltgtr_f(void);     static void ltgti_f(void);
+static void ordr_f(void);      static void ordi_f(void);
+static void unordr_f(void);    static void unordi_f(void);
+static void truncr_f_i(void);
+#if __WORDSIZE == 64
+static void truncr_f_l(void);
+#endif
+static void truncr_f(void);
+static void extr_f(void);      static void extr_d_f(void);
+static void movr_f(void);      static void movi_f(void);
+static void ldr_f(void);       static void ldi_f(void);
+static void ldxr_f(void);      static void ldxi_f(void);
+static void str_f(void);       static void sti_f(void);
+static void stxr_f(void);      static void stxi_f(void);
+static void bltr_f(void);      static void blti_f(void);
+static void bler_f(void);      static void blei_f(void);
+static void beqr_f(void);      static void beqi_f(void);
+static void bger_f(void);      static void bgei_f(void);
+static void bgtr_f(void);      static void bgti_f(void);
+static void bner_f(void);      static void bnei_f(void);
+static void bunltr_f(void);    static void bunlti_f(void);
+static void bunler_f(void);    static void bunlei_f(void);
+static void buneqr_f(void);    static void buneqi_f(void);
+static void bunger_f(void);    static void bungei_f(void);
+static void bungtr_f(void);    static void bungti_f(void);
+static void bltgtr_f(void);    static void bltgti_f(void);
+static void bordr_f(void);     static void bordi_f(void);
+static void bunordr_f(void);   static void bunordi_f(void);
+static void pushargr_f(void);  static void pushargi_f(void);
+static void retr_f(void);      static void reti_f(void);
+static void retval_f(void);
+static void arg_d(void);       static void getarg_d(void);
+static void putargr_d(void);   static void putargi_d(void);
+static void addr_d(void);      static void addi_d(void);
+static void subr_d(void);      static void subi_d(void);
+static void rsbr_d(void);      static void rsbi_d(void);
+static void mulr_d(void);      static void muli_d(void);
+static void divr_d(void);      static void divi_d(void);
+static void negr_d(void);      static void absr_d(void);
+static void sqrtr_d(void);
+static void ltr_d(void);       static void lti_d(void);
+static void ler_d(void);       static void lei_d(void);
+static void eqr_d(void);       static void eqi_d(void);
+static void ger_d(void);       static void gei_d(void);
+static void gtr_d(void);       static void gti_d(void);
+static void ner_d(void);       static void nei_d(void);
+static void unltr_d(void);     static void unlti_d(void);
+static void unler_d(void);     static void unlei_d(void);
+static void uneqr_d(void);     static void uneqi_d(void);
+static void unger_d(void);     static void ungei_d(void);
+static void ungtr_d(void);     static void ungti_d(void);
+static void ltgtr_d(void);     static void ltgti_d(void);
+static void ordr_d(void);      static void ordi_d(void);
+static void unordr_d(void);    static void unordi_d(void);
+static void truncr_d_i(void);
+#if __WORDSIZE == 64
+static void truncr_d_l(void);
+#endif
+static void truncr_d(void);
+static void extr_d(void);      static void extr_f_d(void);
+static void movr_d(void);      static void movi_d(void);
+static void ldr_d(void);       static void ldi_d(void);
+static void ldxr_d(void);      static void ldxi_d(void);
+static void str_d(void);       static void sti_d(void);
+static void stxr_d(void);      static void stxi_d(void);
+static void bltr_d(void);      static void blti_d(void);
+static void bler_d(void);      static void blei_d(void);
+static void beqr_d(void);      static void beqi_d(void);
+static void bger_d(void);      static void bgei_d(void);
+static void bgtr_d(void);      static void bgti_d(void);
+static void bner_d(void);      static void bnei_d(void);
+static void bunltr_d(void);    static void bunlti_d(void);
+static void bunler_d(void);    static void bunlei_d(void);
+static void buneqr_d(void);    static void buneqi_d(void);
+static void bunger_d(void);    static void bungei_d(void);
+static void bungtr_d(void);    static void bungti_d(void);
+static void bltgtr_d(void);    static void bltgti_d(void);
+static void bordr_d(void);     static void bordi_d(void);
+static void bunordr_d(void);   static void bunordi_d(void);
+static void pushargr_d(void);  static void pushargi_d(void);
+static void retr_d(void);      static void reti_d(void);
+static void retval_d(void);
+static void vastart(void);     static void vapush(void);
+static void vaarg(void);       static void vaarg_d(void);
+static void vaend(void);
+
+static void error(const char *format, ...) noreturn printf_format(1, 2);
+static void warn(const char *format, ...) printf_format(1, 2) maybe_unused;
+static void message(const char *kind, const char *format, va_list ap);
+
+static int getch(void);
+static int getch_noeof(void);
+static int ungetch(int ch);
+static int skipws(void);
+static int skipnl(void);
+static int skipct(void);
+static int skipcp(void);
+static jit_word_t get_int(skip_t skip);
+static jit_uword_t get_uint(skip_t skip);
+static double get_float(skip_t skip);
+static float make_float(double d);
+static void *get_pointer(skip_t skip);
+static label_t *get_label(skip_t skip);
+static token_t regname(void);
+static token_t identifier(int ch);
+static void get_data(type_t type);
+static void dot(void);
+static token_t number(int ch);
+static int escape(int ch);
+static token_t string(void);
+static token_t dynamic(void);
+static token_t character(void);
+static void expression_prim(void);
+static void expression_inc(int pre);
+static void expression_dec(int pre);
+static void expression_unary(void);
+static void expression_mul(void);
+static void expression_add(void);
+static void expression_shift(void);
+static void expression_bit(void);
+static void expression_rel(void);
+static void expression_cond(void);
+static token_t expression(void);
+static token_t primary(skip_t skip);
+static void parse(void);
+static int execute(int argc, char *argv[]);
+
+static void *xmalloc(size_t size);
+static void *xrealloc(void *pointer, size_t size);
+static void *xcalloc(size_t nmemb, size_t size);
+
+static label_t *new_label(label_kind_t kind, char *name, void *value);
+static patch_t *new_patch(patch_kind_t kind, label_t *label, void *value);
+static int bcmp_symbols(const void *left, const void *right);
+static int qcmp_symbols(const void *left, const void *right);
+static symbol_t *new_symbol(char *name);
+static symbol_t *get_symbol_by_name(char *name);
+
+static hash_t *new_hash(void);
+static int hash_string(char *name);
+static void put_hash(hash_t *hash, entry_t *entry);
+static entry_t *get_hash(hash_t *hash, char *name);
+static void rehash(hash_t *hash);
+
+/*
+ * Initialization
+ */
+static jit_state_t      *_jit;
+static int               flag_verbose;
+static int               flag_data;
+static int               flag_disasm;
+static char             *progname;
+static parser_t                  parser;
+static hash_t           *labels;
+static int               label_offset;
+static patch_t          *patches;
+static symbol_t                **symbols;
+static int               symbol_length;
+static int               symbol_offset;
+static hash_t           *instrs;
+static char             *data;
+static size_t            data_offset, data_length;
+static instr_t           instr_vector[] = {
+#define entry(value)   { NULL, #value, value }
+#define entry2(name, function) { NULL, name, function }
+    entry(live),
+    entry(align),      entry(name),
+    entry(prolog),
+    entry(frame),      entry(tramp),
+    entry(ellipsis),
+    entry(allocai),    entry(allocar),
+    entry(arg),
+    entry(getarg_c),   entry(getarg_uc),
+    entry(getarg_s),   entry(getarg_us),
+    entry(getarg_i),
+#if __WORDSIZE == 64
+    entry(getarg_ui),  entry(getarg_l),
+#endif
+    entry(getarg),
+    entry(putargr),    entry(putargi),
+    entry(addr),       entry(addi),
+    entry(addxr),      entry(addxi),
+    entry(addcr),      entry(addci),
+    entry(subr),       entry(subi),
+    entry(subxr),      entry(subxi),
+    entry(subcr),      entry(subci),
+    entry(rsbr),       entry(rsbi),
+    entry(mulr),       entry(muli),
+    entry(qmulr),      entry(qmuli),
+    entry(qmulr_u),    entry(qmuli_u),
+    entry(divr),       entry(divi),
+    entry(divr_u),     entry(divi_u),
+    entry(qdivr),      entry(qdivi),
+    entry(qdivr_u),    entry(qdivi_u),
+    entry(remr),       entry(remi),
+    entry(remr_u),     entry(remi_u),
+    entry(andr),       entry(andi),
+    entry(orr),                entry(ori),
+    entry(xorr),       entry(xori),
+    entry(lshr),       entry(lshi),
+    entry(rshr),       entry(rshi),
+    entry(rshr_u),     entry(rshi_u),
+    entry(negr),       entry(comr),
+    entry(ltr),                entry(lti),
+    entry(ltr_u),      entry(lti_u),
+    entry(ler),                entry(lei),
+    entry(ler_u),      entry(lei_u),
+    entry(eqr),                entry(eqi),
+    entry(ger),                entry(gei),
+    entry(ger_u),      entry(gei_u),
+    entry(gtr),                entry(gti),
+    entry(gtr_u),      entry(gti_u),
+    entry(ner),                entry(nei),
+    entry(movr),       entry(movi),
+    entry(extr_c),     entry(extr_uc),
+    entry(extr_s),     entry(extr_us),
+#if __WORDSIZE == 64
+    entry(extr_i),     entry(extr_ui),
+#endif
+    entry(htonr_us),   entry(ntohr_us),
+    entry(htonr_ui),   entry(ntohr_ui),
+#if __WORDSIZE == 64
+    entry(htonr_ul),   entry(ntohr_ul),
+#endif
+    entry(htonr),      entry(ntohr),
+    entry(ldr_c),      entry(ldi_c),
+    entry(ldr_uc),     entry(ldi_uc),
+    entry(ldr_s),      entry(ldi_s),
+    entry(ldr_us),     entry(ldi_us),
+    entry(ldr_i),      entry(ldi_i),
+#if __WORDSIZE == 64
+    entry(ldr_ui),     entry(ldi_ui),
+    entry(ldr_l),      entry(ldi_l),
+#endif
+    entry(ldr),                entry(ldi),
+    entry(ldxr_c),     entry(ldxi_c),
+    entry(ldxr_uc),    entry(ldxi_uc),
+    entry(ldxr_s),     entry(ldxi_s),
+    entry(ldxr_us),    entry(ldxi_us),
+    entry(ldxr_i),     entry(ldxi_i),
+#if __WORDSIZE == 64
+    entry(ldxr_ui),    entry(ldxi_ui),
+    entry(ldxr_l),     entry(ldxi_l),
+#endif
+    entry(ldxr),       entry(ldxi),
+    entry(str_c),      entry(sti_c),
+    entry(str_s),      entry(sti_s),
+    entry(str_i),      entry(sti_i),
+#if __WORDSIZE == 64
+    entry(str_l),      entry(sti_l),
+#endif
+    entry(str),                entry(sti),
+    entry(stxr_c),     entry(stxi_c),
+    entry(stxr_s),     entry(stxi_s),
+    entry(stxr_i),     entry(stxi_i),
+#if __WORDSIZE == 64
+    entry(stxr_l),     entry(stxi_l),
+#endif
+    entry(stxr),       entry(stxi),
+    entry(bltr),       entry(blti),
+    entry(bltr_u),     entry(blti_u),
+    entry(bler),       entry(blei),
+    entry(bler_u),     entry(blei_u),
+    entry(beqr),       entry(beqi),
+    entry(bger),       entry(bgei),
+    entry(bger_u),     entry(bgei_u),
+    entry(bgtr),       entry(bgti),
+    entry(bgtr_u),     entry(bgti_u),
+    entry(bner),       entry(bnei),
+    entry(bmsr),       entry(bmsi),
+    entry(bmcr),       entry(bmci),
+    entry(boaddr),     entry(boaddi),
+    entry(boaddr_u),   entry(boaddi_u),
+    entry(bxaddr),     entry(bxaddi),
+    entry(bxaddr_u),   entry(bxaddi_u),
+    entry(bosubr),     entry(bosubi),
+    entry(bosubr_u),   entry(bosubi_u),
+    entry(bxsubr),     entry(bxsubi),
+    entry(bxsubr_u),   entry(bxsubi_u),
+    entry(jmpr),       entry(jmpi),
+    entry(callr),      entry(calli),
+    entry(prepare),
+    entry(pushargr),   entry(pushargi),
+    entry(finishr),    entry(finishi),
+    entry(ret),
+    entry(retr),       entry(reti),
+    entry(retval_c),   entry(retval_uc),
+    entry(retval_s),   entry(retval_us),
+    entry(retval_i),
+#if __WORDSIZE == 64
+    entry(retval_ui),  entry(retval_l),
+#endif
+    entry(retval),
+    entry(epilog),
+    entry(arg_f),      entry(getarg_f),
+    entry(putargr_f),  entry(putargi_f),
+    entry(addr_f),     entry(addi_f),
+    entry(subr_f),     entry(subi_f),
+    entry(rsbr_f),     entry(rsbi_f),
+    entry(mulr_f),     entry(muli_f),
+    entry(divr_f),     entry(divi_f),
+    entry(negr_f),     entry(absr_f),
+    entry(sqrtr_f),
+    entry(ltr_f),      entry(lti_f),
+    entry(ler_f),      entry(lei_f),
+    entry(eqr_f),      entry(eqi_f),
+    entry(ger_f),      entry(gei_f),
+    entry(gtr_f),      entry(gti_f),
+    entry(ner_f),      entry(nei_f),
+    entry(unltr_f),    entry(unlti_f),
+    entry(unler_f),    entry(unlei_f),
+    entry(uneqr_f),    entry(uneqi_f),
+    entry(unger_f),    entry(ungei_f),
+    entry(ungtr_f),    entry(ungti_f),
+    entry(ltgtr_f),    entry(ltgti_f),
+    entry(ordr_f),     entry(ordi_f),
+    entry(unordr_f),   entry(unordi_f),
+    entry(truncr_f_i),
+#if __WORDSIZE == 64
+    entry(truncr_f_l),
+#endif
+    entry(truncr_f),
+    entry(extr_f),     entry(extr_d_f),
+    entry(movr_f),     entry(movi_f),
+    entry(ldr_f),      entry(ldi_f),
+    entry(ldxr_f),     entry(ldxi_f),
+    entry(str_f),      entry(sti_f),
+    entry(stxr_f),     entry(stxi_f),
+    entry(bltr_f),     entry(blti_f),
+    entry(bler_f),     entry(blei_f),
+    entry(beqr_f),     entry(beqi_f),
+    entry(bger_f),     entry(bgei_f),
+    entry(bgtr_f),     entry(bgti_f),
+    entry(bner_f),     entry(bnei_f),
+    entry(bunltr_f),   entry(bunlti_f),
+    entry(bunler_f),   entry(bunlei_f),
+    entry(buneqr_f),   entry(buneqi_f),
+    entry(bunger_f),   entry(bungei_f),
+    entry(bungtr_f),   entry(bungti_f),
+    entry(bltgtr_f),   entry(bltgti_f),
+    entry(bordr_f),    entry(bordi_f),
+    entry(bunordr_f),  entry(bunordi_f),
+    entry(pushargr_f), entry(pushargi_f),
+    entry(retr_f),     entry(reti_f),
+    entry(retval_f),
+    entry(arg_d),      entry(getarg_d),
+    entry(putargr_d),  entry(putargi_d),
+    entry(addr_d),     entry(addi_d),
+    entry(subr_d),     entry(subi_d),
+    entry(rsbr_d),     entry(rsbi_d),
+    entry(mulr_d),     entry(muli_d),
+    entry(divr_d),     entry(divi_d),
+    entry(negr_d),     entry(absr_d),
+    entry(sqrtr_d),
+    entry(ltr_d),      entry(lti_d),
+    entry(ler_d),      entry(lei_d),
+    entry(eqr_d),      entry(eqi_d),
+    entry(ger_d),      entry(gei_d),
+    entry(gtr_d),      entry(gti_d),
+    entry(ner_d),      entry(nei_d),
+    entry(unltr_d),    entry(unlti_d),
+    entry(unler_d),    entry(unlei_d),
+    entry(uneqr_d),    entry(uneqi_d),
+    entry(unger_d),    entry(ungei_d),
+    entry(ungtr_d),    entry(ungti_d),
+    entry(ltgtr_d),    entry(ltgti_d),
+    entry(ordr_d),     entry(ordi_d),
+    entry(unordr_d),   entry(unordi_d),
+    entry(truncr_d_i),
+#if __WORDSIZE == 64
+    entry(truncr_d_l),
+#endif
+    entry(truncr_d),
+    entry(extr_d),     entry(extr_f_d),
+    entry(movr_d),     entry(movi_d),
+    entry(ldr_d),      entry(ldi_d),
+    entry(ldxr_d),     entry(ldxi_d),
+    entry(str_d),      entry(sti_d),
+    entry(stxr_d),     entry(stxi_d),
+    entry(bltr_d),     entry(blti_d),
+    entry(bler_d),     entry(blei_d),
+    entry(beqr_d),     entry(beqi_d),
+    entry(bger_d),     entry(bgei_d),
+    entry(bgtr_d),     entry(bgti_d),
+    entry(bner_d),     entry(bnei_d),
+    entry(bunltr_d),   entry(bunlti_d),
+    entry(bunler_d),   entry(bunlei_d),
+    entry(buneqr_d),   entry(buneqi_d),
+    entry(bunger_d),   entry(bungei_d),
+    entry(bungtr_d),   entry(bungti_d),
+    entry(bltgtr_d),   entry(bltgti_d),
+    entry(bordr_d),    entry(bordi_d),
+    entry(bunordr_d),  entry(bunordi_d),
+    entry(pushargr_d), entry(pushargi_d),
+    entry(retr_d),     entry(reti_d),
+    entry(retval_d),
+    entry2("va_start", vastart),
+    entry2("va_push", vapush),
+    entry2("va_arg", vaarg),
+    entry2("va_arg_d", vaarg_d),
+    entry2("va_end", vaend),
+#undef entry
+};
+
+/*
+ * Implementation
+ */
+static jit_gpr_t
+get_ireg(void)
+{
+    if (primary(skip_ws) != tok_register)
+       error("bad register");
+    if (parser.regtype != type_l)
+       error("bad int register");
+
+    return ((jit_gpr_t)parser.regval);
+}
+
+static jit_fpr_t
+get_freg(void)
+{
+    if (primary(skip_ws) != tok_register)
+       error("bad register");
+    if (parser.regtype != type_d)
+       error("bad float register");
+
+    return ((jit_fpr_t)parser.regval);
+}
+
+static symbol_t *
+get_symbol(void)
+{
+    symbol_t   *symbol;
+    int                 ch = skipws();
+
+    if (ch != '$')
+       error("expecting variable");
+    (void)identifier('$');
+    if (parser.string[1] == '\0')
+       error("expecting variable");
+    if ((symbol = get_symbol_by_name(parser.string)) == NULL)
+       symbol = new_symbol(parser.string);
+
+    return (symbol);
+}
+
+static void
+jmp_forward(void *value, label_t *label)
+{
+    (void)new_patch(patch_kind_jmp, label, value);
+}
+
+static void
+mov_forward(void *value, label_t *label)
+{
+    (void)new_patch(patch_kind_mov, label, value);
+}
+
+static void
+call_forward(void *value, label_t *label)
+{
+    (void)new_patch(patch_kind_call, label, value);
+}
+
+static void
+make_arg(void *value)
+{
+    symbol_t   *symbol = get_symbol();
+
+    symbol->type = type_p;
+    symbol->value.p = value;
+}
+
+static jit_pointer_t
+get_arg(void)
+{
+    symbol_t   *symbol = get_symbol();
+
+    if (symbol->type != type_p)
+       error("bad argument %s type", symbol->name);
+
+    return symbol->value.p;
+}
+
+static jit_word_t
+get_imm(void)
+{
+    int                 ch;
+    label_t    *label;
+    jit_word_t  value;
+    ch = skipws();
+    switch (ch) {
+       case '+': case '-': case '0' ... '9':
+           ungetch(ch);
+           value = get_int(skip_none);
+           break;
+       case '\'':
+           character();
+           value = parser.value.i;
+           break;
+       case '$':
+           switch (expression()) {
+               case tok_int:
+               case tok_pointer:
+                   value = parser.value.i;
+                   break;
+               default:
+                   error("expecting immediate");
+           }
+           break;
+       case '@':
+           dynamic();
+           value = (jit_word_t)parser.value.p;
+           break;
+       default:
+           ungetch(ch);
+           label = get_label(skip_none);
+           if (label->kind == label_kind_data)
+               value = (jit_word_t)label->value;
+           else
+               error("expecting immediate");
+           break;
+    }
+    return (value);
+}
+
+#define entry(name)                                                    \
+static void                                                            \
+name(void)                                                             \
+{                                                                      \
+    jit_##name();                                                      \
+}
+#define entry_ca(name)                                                 \
+static void                                                            \
+name(void)                                                             \
+{                                                                      \
+    make_arg(jit_##name());                                            \
+}
+#define entry_ia(name)                                                 \
+static void                                                            \
+name(void)                                                             \
+{                                                                      \
+    jit_gpr_t          r0 = get_ireg();                                \
+    jit_pointer_t      ac = get_arg();                                 \
+    jit_##name(r0, ac);                                                        \
+}
+#define entry_im(name)                                                 \
+static void                                                            \
+name(void)                                                             \
+{                                                                      \
+    jit_word_t  im = get_imm();                                        \
+    jit_##name(im);                                                    \
+}
+#define entry_ir(name)                                                 \
+static void                                                            \
+name(void)                                                             \
+{                                                                      \
+    jit_gpr_t   r0 = get_ireg();                                       \
+    jit_##name(r0);                                                    \
+}
+#define entry_ima(name)                                                        \
+static void                                                            \
+name(void)                                                             \
+{                                                                      \
+    jit_word_t         im = get_imm();                                 \
+    jit_pointer_t      ac = get_arg();                                 \
+    jit_##name(im, ac);                                                        \
+}
+#define entry_ir_ir_ir(name)                                           \
+static void                                                            \
+name(void)                                                             \
+{                                                                      \
+    jit_gpr_t  r0 = get_ireg(), r1 = get_ireg(), r2 = get_ireg();      \
+    jit_##name(r0, r1, r2);                                            \
+}
+#define entry_ir_ir_im(name)                                           \
+static void                                                            \
+name(void)                                                             \
+{                                                                      \
+    jit_gpr_t  r0 = get_ireg(), r1 = get_ireg();                       \
+    jit_word_t im = get_imm();                                         \
+    jit_##name(r0, r1, im);                                            \
+}
+#define entry_ir_ir_ir_ir(name)                                                \
+static void                                                            \
+name(void)                                                             \
+{                                                                      \
+    jit_gpr_t  r0 = get_ireg(), r1 = get_ireg(),                       \
+               r2 = get_ireg(), r3 = get_ireg();                       \
+    jit_##name(r0, r1, r2, r3);                                                \
+}
+#define entry_ir_ir_ir_im(name)                                                \
+static void                                                            \
+name(void)                                                             \
+{                                                                      \
+    jit_gpr_t  r0 = get_ireg(), r1 = get_ireg(), r2 = get_ireg();      \
+    jit_word_t im = get_imm();                                         \
+    jit_##name(r0, r1, r2, im);                                                \
+}
+#define entry_ir_ir(name)                                              \
+static void                                                            \
+name(void)                                                             \
+{                                                                      \
+    jit_gpr_t  r0 = get_ireg(), r1 = get_ireg();                       \
+    jit_##name(r0, r1);                                                        \
+}
+#define entry_ir_im(name)                                              \
+static void                                                            \
+name(void)                                                             \
+{                                                                      \
+    jit_gpr_t  r0 = get_ireg();                                        \
+    jit_word_t im = get_imm();                                         \
+    jit_##name(r0, im);                                                        \
+}
+#define entry_ir_pm(name)                                              \
+static void                                                            \
+name(void)                                                             \
+{                                                                      \
+    jit_gpr_t   r0 = get_ireg();                                       \
+    void       *pm = get_pointer(skip_ws);                             \
+    jit_##name(r0, pm);                                                        \
+}
+#define entry_pm_ir(name)                                              \
+static void                                                            \
+name(void)                                                             \
+{                                                                      \
+    void       *pm = get_pointer(skip_ws);                             \
+    jit_gpr_t   r0 = get_ireg();                                       \
+    jit_##name(pm, r0);                                                        \
+}
+#define entry_im_ir_ir(name)                                           \
+static void                                                            \
+name(void)                                                             \
+{                                                                      \
+    jit_word_t im = get_imm();                                         \
+    jit_gpr_t  r0 = get_ireg(), r1 = get_ireg();                       \
+    (void)jit_##name(im, r0, r1);                                      \
+}
+#define entry_lb_ir_ir(name)                                           \
+static void                                                            \
+name(void)                                                             \
+{                                                                      \
+    jit_node_t *jmp;                                                   \
+    label_t    *label = get_label(skip_ws);                            \
+    jit_gpr_t   r0 = get_ireg(), r1 = get_ireg();                      \
+    if (label->kind == label_kind_code_forward)                                \
+       jmp_forward((void *)jit_##name(r0, r1), label);                 \
+    else {                                                             \
+       jmp = jit_##name(r0, r1);                                       \
+       jit_patch_at(jmp, (jit_node_t *)label->value);                  \
+    }                                                                  \
+}
+#define entry_lb_ir_im(name)                                           \
+static void                                                            \
+name(void)                                                             \
+{                                                                      \
+    jit_node_t *jmp;                                                   \
+    label_t    *label = get_label(skip_ws);                            \
+    jit_gpr_t   r0 = get_ireg();                                       \
+    jit_word_t  im = get_imm();                                        \
+    if (label->kind == label_kind_code_forward)                                \
+       jmp_forward((void *)jit_##name(r0, im), label);                 \
+    else {                                                             \
+       jmp = jit_##name(r0, im);                                       \
+       jit_patch_at(jmp, (jit_node_t *)label->value);                  \
+    }                                                                  \
+}
+#define entry_lb(name)                                                 \
+static void                                                            \
+name(void)                                                             \
+{                                                                      \
+    jit_node_t *jmp;                                                   \
+    label_t    *label = get_label(skip_ws);                            \
+    if (label->kind == label_kind_code_forward)                                \
+       jmp_forward((void *)jit_##name(), label);                       \
+    else {                                                             \
+       jmp = jit_##name();                                             \
+       jit_patch_at(jmp, (jit_node_t *)label->value);                  \
+    }                                                                  \
+}
+#define entry_pm(name)                                                 \
+static void                                                            \
+name(void)                                                             \
+{                                                                      \
+    void       *pm = get_pointer(skip_ws);                             \
+    jit_##name(pm);                                                    \
+}
+#define entry_fa(name)                                                 \
+static void                                                            \
+name(void)                                                             \
+{                                                                      \
+    jit_fpr_t          r0 = get_freg();                                \
+    jit_pointer_t      ac = get_arg();                                 \
+    jit_##name(r0, ac);                                                        \
+}
+#define entry_fma(name)                                                        \
+static void                                                            \
+name(void)                                                             \
+{                                                                      \
+    jit_float64_t      im = get_float(skip_ws);                        \
+    jit_pointer_t      ac = get_arg();                                 \
+    jit_##name(im, ac);                                                        \
+}
+#define entry_fr_fr_fr(name)                                           \
+static void                                                            \
+name(void)                                                             \
+{                                                                      \
+    jit_fpr_t  r0 = get_freg(), r1 = get_freg(), r2 = get_freg();      \
+    jit_##name(r0, r1, r2);                                            \
+}
+#define entry_fr_fr_fm(name)                                           \
+static void                                                            \
+name(void)                                                             \
+{                                                                      \
+    jit_fpr_t          r0 = get_freg(), r1 = get_freg();               \
+    jit_float64_t      im = get_float(skip_ws);                        \
+    jit_##name(r0, r1, make_float(im));                                        \
+}
+#define entry_fr_fr_dm(name)                                           \
+static void                                                            \
+name(void)                                                             \
+{                                                                      \
+    jit_fpr_t          r0 = get_freg(), r1 = get_freg();               \
+    jit_float64_t      im = get_float(skip_ws);                        \
+    jit_##name(r0, r1, im);                                            \
+}
+#define entry_fr_fr(name)                                              \
+static void                                                            \
+name(void)                                                             \
+{                                                                      \
+    jit_fpr_t  r0 = get_freg(), r1 = get_freg();                       \
+    jit_##name(r0, r1);                                                        \
+}
+#define entry_ir_fr_fr(name)                                           \
+static void                                                            \
+name(void)                                                             \
+{                                                                      \
+    jit_gpr_t  r0 = get_ireg();                                        \
+    jit_fpr_t  r1 = get_freg(), r2 = get_freg();                       \
+    jit_##name(r0, r1, r2);                                            \
+}
+#define entry_ir_fr_fm(name)                                           \
+static void                                                            \
+name(void)                                                             \
+{                                                                      \
+    jit_gpr_t          r0 = get_ireg();                                \
+    jit_fpr_t          r1 = get_freg();                                \
+    jit_float64_t      im = get_float(skip_ws);                        \
+    jit_##name(r0, r1, make_float(im));                                        \
+}
+#define entry_ir_fr_dm(name)                                           \
+static void                                                            \
+name(void)                                                             \
+{                                                                      \
+    jit_gpr_t          r0 = get_ireg();                                \
+    jit_fpr_t          r1 = get_freg();                                \
+    jit_float64_t      im = get_float(skip_ws);                        \
+    jit_##name(r0, r1, im);                                            \
+}
+#define entry_ir_fr(name)                                              \
+static void                                                            \
+name(void)                                                             \
+{                                                                      \
+    jit_gpr_t  r0 = get_ireg();                                        \
+    jit_fpr_t  r1 = get_freg();                                        \
+    jit_##name(r0, r1);                                                        \
+}
+#define entry_fr_ir(name)                                              \
+static void                                                            \
+name(void)                                                             \
+{                                                                      \
+    jit_fpr_t  r0 = get_freg();                                        \
+    jit_gpr_t  r1 = get_ireg();                                        \
+    jit_##name(r0, r1);                                                        \
+}
+#define entry_fr_fm(name)                                              \
+static void                                                            \
+name(void)                                                             \
+{                                                                      \
+    jit_fpr_t          r0 = get_freg();                                \
+    jit_float64_t      im = get_float(skip_ws);                        \
+    jit_##name(r0, make_float(im));                                    \
+}
+#define entry_fr_dm(name)                                              \
+static void                                                            \
+name(void)                                                             \
+{                                                                      \
+    jit_fpr_t          r0 = get_freg();                                \
+    jit_float64_t      im = get_float(skip_ws);                        \
+    jit_##name(r0, im);                                                        \
+}
+#define entry_fr_pm(name)                                              \
+static void                                                            \
+name(void)                                                             \
+{                                                                      \
+    jit_fpr_t   r0 = get_freg();                                       \
+    void       *pm = get_pointer(skip_ws);                             \
+    jit_##name(r0, pm);                                                        \
+}
+#define entry_fr_ir_ir(name)                                           \
+static void                                                            \
+name(void)                                                             \
+{                                                                      \
+    jit_fpr_t  r0 = get_freg();                                        \
+    jit_gpr_t  r1 = get_ireg(), r2 = get_ireg();                       \
+    jit_##name(r0, r1, r2);                                            \
+}
+#define entry_fr_ir_im(name)                                           \
+static void                                                            \
+name(void)                                                             \
+{                                                                      \
+    jit_fpr_t  r0 = get_freg();                                        \
+    jit_gpr_t  r1 = get_ireg();                                        \
+    jit_word_t im = get_imm();                                         \
+    jit_##name(r0, r1, im);                                            \
+}
+#define entry_pm_fr(name)                                              \
+static void                                                            \
+name(void)                                                             \
+{                                                                      \
+    void       *pm = get_pointer(skip_ws);                             \
+    jit_fpr_t   r0 = get_freg();                                       \
+    jit_##name(pm, r0);                                                        \
+}
+#define entry_ir_ir_fr(name)                                           \
+static void                                                            \
+name(void)                                                             \
+{                                                                      \
+    jit_gpr_t  r0 = get_ireg(), r1 = get_ireg();                       \
+    jit_fpr_t  r2 = get_freg();                                        \
+    jit_##name(r0, r1, r2);                                            \
+}
+#define entry_im_ir_fr(name)                                           \
+static void                                                            \
+name(void)                                                             \
+{                                                                      \
+    jit_word_t im = get_imm();                                         \
+    jit_gpr_t  r0 = get_ireg();                                        \
+    jit_fpr_t  r1 = get_freg();                                        \
+    jit_##name(im, r0, r1);                                            \
+}
+#define entry_lb_fr_fr(name)                                           \
+static void                                                            \
+name(void)                                                             \
+{                                                                      \
+    jit_node_t *jmp;                                                   \
+    label_t    *label = get_label(skip_ws);                            \
+    jit_fpr_t   r0 = get_freg(), r1 = get_freg();                      \
+    if (label->kind == label_kind_code_forward)                                \
+       jmp_forward((void *)jit_##name(r0, r1), label);                 \
+    else {                                                             \
+       jmp = jit_##name(r0, r1);                                       \
+       jit_patch_at(jmp, (jit_node_t *)label->value);                  \
+    }                                                                  \
+}
+#define entry_lb_fr_fm(name)                                           \
+static void                                                            \
+name(void)                                                             \
+{                                                                      \
+    jit_node_t         *jmp;                                           \
+    label_t            *label = get_label(skip_ws);                    \
+    jit_fpr_t           r0 = get_freg();                               \
+    jit_float64_t       im = get_float(skip_ws);                       \
+    if (label->kind == label_kind_code_forward)                                \
+       jmp_forward((void *)jit_##name(r0, make_float(im)), label);     \
+    else {                                                             \
+       jmp = jit_##name(r0, make_float(im));                           \
+       jit_patch_at(jmp, (jit_node_t *)label->value);                  \
+    }                                                                  \
+}
+#define entry_lb_fr_dm(name)                                           \
+static void                                                            \
+name(void)                                                             \
+{                                                                      \
+    jit_node_t         *jmp;                                           \
+    label_t            *label = get_label(skip_ws);                    \
+    jit_fpr_t           r0 = get_freg();                               \
+    jit_float64_t       im = get_float(skip_ws);                       \
+    if (label->kind == label_kind_code_forward)                                \
+       jmp_forward((void *)jit_##name(r0, im), label);                 \
+    else {                                                             \
+       jmp = jit_##name(r0, im);                                       \
+       jit_patch_at(jmp, (jit_node_t *)label->value);                  \
+    }                                                                  \
+}
+#define entry_fr(name)                                                 \
+static void                                                            \
+name(void)                                                             \
+{                                                                      \
+    jit_fpr_t  r0 = get_freg();                                        \
+    jit_##name(r0);                                                    \
+}
+#define entry_fm(name)                                                 \
+static void                                                            \
+name(void)                                                             \
+{                                                                      \
+    jit_float64_t      im = get_float(skip_ws);                        \
+    jit_##name(make_float(im));                                                \
+}
+#define entry_dm(name)                                                 \
+static void                                                            \
+name(void)                                                             \
+{                                                                      \
+    jit_float64_t      im = get_float(skip_ws);                        \
+    jit_##name(im);                                                    \
+}
+#define entry_fn(name)                                                 \
+static void                                                            \
+name(void)                                                             \
+{                                                                      \
+    int                 ch;                                                    \
+    label_t    *label;                                                 \
+    void       *value;                                                 \
+    ch = skipws();                                                     \
+    switch (ch) {                                                      \
+       case '0' ... '9':                                               \
+           ungetch(ch);                                                \
+           value = (void *)(jit_word_t)get_uint(skip_none);            \
+           break;                                                      \
+       case '$':                                                       \
+           switch (expression()) {                                     \
+               case tok_int:                                           \
+                   value = (void *)parser.value.i;                     \
+                   break;                                              \
+               case tok_pointer:                                       \
+                   value = parser.value.p;                             \
+                   break;                                              \
+               default:                                                \
+                   error("expecting pointer");                         \
+           }                                                           \
+           break;                                                      \
+       case '@':                                                       \
+           dynamic();                                                  \
+           value = parser.value.p;                                     \
+           break;                                                      \
+       default:                                                        \
+           ungetch(ch);                                                \
+           label = get_label(skip_none);                               \
+           if (label->kind == label_kind_code_forward)                 \
+               call_forward((void *)jit_##name(NULL), label);          \
+           else                                                        \
+               jit_patch_at(jit_##name(NULL), label->value);           \
+           return;                                                     \
+    }                                                                  \
+    jit_##name(value);                                                 \
+}
+static void
+name(void) {
+    int                 ch = skipws();
+    (void)identifier(ch);
+    jit_name(parser.string);
+}
+static void
+live(void) {
+    if (primary(skip_ws) != tok_register)
+       error("bad register");
+    jit_live(parser.regval);
+}
+entry_im(align)
+entry(prolog)
+entry_im(frame)                        entry_im(tramp)
+entry(ellipsis)
+void
+allocai(void) {
+    symbol_t   *symbol;
+    jit_word_t  i, im = get_imm();
+    i = jit_allocai(im);
+    symbol = get_symbol();
+    symbol->type = type_l;
+    symbol->value.i = i;
+}
+entry_ir_ir(allocar)
+entry_ca(arg)
+entry_ia(getarg_c)             entry_ia(getarg_uc)
+entry_ia(getarg_s)             entry_ia(getarg_us)
+entry_ia(getarg_i)
+#if __WORDSIZE == 64
+entry_ia(getarg_ui)            entry_ia(getarg_l)
+#endif
+entry_ia(getarg)
+entry_ia(putargr)              entry_ima(putargi)
+entry_ir_ir_ir(addr)           entry_ir_ir_im(addi)
+entry_ir_ir_ir(addxr)          entry_ir_ir_im(addxi)
+entry_ir_ir_ir(addcr)          entry_ir_ir_im(addci)
+entry_ir_ir_ir(subr)           entry_ir_ir_im(subi)
+entry_ir_ir_ir(subxr)          entry_ir_ir_im(subxi)
+entry_ir_ir_ir(subcr)          entry_ir_ir_im(subci)
+entry_ir_ir_ir(rsbr)           entry_ir_ir_im(rsbi)
+entry_ir_ir_ir(mulr)           entry_ir_ir_im(muli)
+entry_ir_ir_ir_ir(qmulr)       entry_ir_ir_ir_im(qmuli)
+entry_ir_ir_ir_ir(qmulr_u)     entry_ir_ir_ir_im(qmuli_u)
+entry_ir_ir_ir(divr)           entry_ir_ir_im(divi)
+entry_ir_ir_ir(divr_u)         entry_ir_ir_im(divi_u)
+entry_ir_ir_ir_ir(qdivr)       entry_ir_ir_ir_im(qdivi)
+entry_ir_ir_ir_ir(qdivr_u)     entry_ir_ir_ir_im(qdivi_u)
+entry_ir_ir_ir(remr)           entry_ir_ir_im(remi)
+entry_ir_ir_ir(remr_u)         entry_ir_ir_im(remi_u)
+entry_ir_ir_ir(andr)           entry_ir_ir_im(andi)
+entry_ir_ir_ir(orr)            entry_ir_ir_im(ori)
+entry_ir_ir_ir(xorr)           entry_ir_ir_im(xori)
+entry_ir_ir_ir(lshr)           entry_ir_ir_im(lshi)
+entry_ir_ir_ir(rshr)           entry_ir_ir_im(rshi)
+entry_ir_ir_ir(rshr_u)         entry_ir_ir_im(rshi_u)
+entry_ir_ir(negr)              entry_ir_ir(comr)
+entry_ir_ir_ir(ltr)            entry_ir_ir_im(lti)
+entry_ir_ir_ir(ltr_u)          entry_ir_ir_im(lti_u)
+entry_ir_ir_ir(ler)            entry_ir_ir_im(lei)
+entry_ir_ir_ir(ler_u)          entry_ir_ir_im(lei_u)
+entry_ir_ir_ir(eqr)            entry_ir_ir_im(eqi)
+entry_ir_ir_ir(ger)            entry_ir_ir_im(gei)
+entry_ir_ir_ir(ger_u)          entry_ir_ir_im(gei_u)
+entry_ir_ir_ir(gtr)            entry_ir_ir_im(gti)
+entry_ir_ir_ir(gtr_u)          entry_ir_ir_im(gti_u)
+entry_ir_ir_ir(ner)            entry_ir_ir_im(nei)
+entry_ir_ir(movr)
+static void
+movi(void)
+{
+    int                 ch;
+    label_t    *label;
+    void       *value;
+    jit_gpr_t   r0 = get_ireg();
+    ch = skipws();
+    switch (ch) {
+       case '+': case '-':
+       case '0' ... '9':
+           ungetch(ch);
+           value = (void *)(jit_word_t)get_uint(skip_none);
+           break;
+       case '\'':
+           character();
+           value = (void *)parser.value.i;
+           break;
+       case '$':
+           switch (expression()) {
+               case tok_int:
+                   value = (void *)parser.value.i;
+                   break;
+               case tok_pointer:
+                   value = parser.value.p;
+                   break;
+               default:
+                   error("expecting pointer");
+           }
+           break;
+       case '@':
+           dynamic();
+           value = parser.value.p;
+           break;
+       default:
+           ungetch(ch);
+           label = get_label(skip_none);
+           if (label->kind == label_kind_code ||
+               label->kind == label_kind_code_forward) {
+               mov_forward((void *)jit_movi(r0, 0), label);
+               return;
+           }
+           value = label->value;
+           break;
+    }
+    jit_movi(r0, (jit_word_t)value);
+}
+entry_ir_ir(extr_c)            entry_ir_ir(extr_uc)
+entry_ir_ir(extr_s)            entry_ir_ir(extr_us)
+#if __WORDSIZE == 64
+entry_ir_ir(extr_i)            entry_ir_ir(extr_ui)
+#endif
+entry_ir_ir(htonr_us)          entry_ir_ir(ntohr_us)
+entry_ir_ir(htonr_ui)          entry_ir_ir(ntohr_ui)
+#if __WORDSIZE == 64
+entry_ir_ir(htonr_ul)          entry_ir_ir(ntohr_ul)
+#endif
+entry_ir_ir(htonr)             entry_ir_ir(ntohr)
+entry_ir_ir(ldr_c)             entry_ir_pm(ldi_c)
+entry_ir_ir(ldr_uc)            entry_ir_pm(ldi_uc)
+entry_ir_ir(ldr_s)             entry_ir_pm(ldi_s)
+entry_ir_ir(ldr_us)            entry_ir_pm(ldi_us)
+entry_ir_ir(ldr_i)             entry_ir_pm(ldi_i)
+#if __WORDSIZE == 64
+entry_ir_ir(ldr_ui)            entry_ir_pm(ldi_ui)
+entry_ir_ir(ldr_l)             entry_ir_pm(ldi_l)
+#endif
+entry_ir_ir(ldr)               entry_ir_pm(ldi)
+entry_ir_ir_ir(ldxr_c)         entry_ir_ir_im(ldxi_c)
+entry_ir_ir_ir(ldxr_uc)                entry_ir_ir_im(ldxi_uc)
+entry_ir_ir_ir(ldxr_s)         entry_ir_ir_im(ldxi_s)
+entry_ir_ir_ir(ldxr_us)                entry_ir_ir_im(ldxi_us)
+entry_ir_ir_ir(ldxr_i)         entry_ir_ir_im(ldxi_i)
+#if __WORDSIZE == 64
+entry_ir_ir_ir(ldxr_ui)                entry_ir_ir_im(ldxi_ui)
+entry_ir_ir_ir(ldxr_l)         entry_ir_ir_im(ldxi_l)
+#endif
+entry_ir_ir_ir(ldxr)           entry_ir_ir_im(ldxi)
+entry_ir_ir(str_c)             entry_pm_ir(sti_c)
+entry_ir_ir(str_s)             entry_pm_ir(sti_s)
+entry_ir_ir(str_i)             entry_pm_ir(sti_i)
+#if __WORDSIZE == 64
+entry_ir_ir(str_l)             entry_pm_ir(sti_l)
+#endif
+entry_ir_ir(str)               entry_pm_ir(sti)
+entry_ir_ir_ir(stxr_c)         entry_im_ir_ir(stxi_c)
+entry_ir_ir_ir(stxr_s)         entry_im_ir_ir(stxi_s)
+entry_ir_ir_ir(stxr_i)         entry_im_ir_ir(stxi_i)
+#if __WORDSIZE == 64
+entry_ir_ir_ir(stxr_l)         entry_im_ir_ir(stxi_l)
+#endif
+entry_ir_ir_ir(stxr)           entry_im_ir_ir(stxi)
+entry_lb_ir_ir(bltr)           entry_lb_ir_im(blti)
+entry_lb_ir_ir(bltr_u)         entry_lb_ir_im(blti_u)
+entry_lb_ir_ir(bler)           entry_lb_ir_im(blei)
+entry_lb_ir_ir(bler_u)         entry_lb_ir_im(blei_u)
+entry_lb_ir_ir(beqr)           entry_lb_ir_im(beqi)
+entry_lb_ir_ir(bger)           entry_lb_ir_im(bgei)
+entry_lb_ir_ir(bger_u)         entry_lb_ir_im(bgei_u)
+entry_lb_ir_ir(bgtr)           entry_lb_ir_im(bgti)
+entry_lb_ir_ir(bgtr_u)         entry_lb_ir_im(bgti_u)
+entry_lb_ir_ir(bner)           entry_lb_ir_im(bnei)
+entry_lb_ir_ir(bmsr)           entry_lb_ir_im(bmsi)
+entry_lb_ir_ir(bmcr)           entry_lb_ir_im(bmci)
+entry_lb_ir_ir(boaddr)         entry_lb_ir_im(boaddi)
+entry_lb_ir_ir(boaddr_u)       entry_lb_ir_im(boaddi_u)
+entry_lb_ir_ir(bxaddr)         entry_lb_ir_im(bxaddi)
+entry_lb_ir_ir(bxaddr_u)       entry_lb_ir_im(bxaddi_u)
+entry_lb_ir_ir(bosubr)         entry_lb_ir_im(bosubi)
+entry_lb_ir_ir(bosubr_u)       entry_lb_ir_im(bosubi_u)
+entry_lb_ir_ir(bxsubr)         entry_lb_ir_im(bxsubi)
+entry_lb_ir_ir(bxsubr_u)       entry_lb_ir_im(bxsubi_u)
+entry_ir(jmpr)                 entry_lb(jmpi)
+entry_ir(callr)                        entry_fn(calli)
+entry(prepare)
+entry_ir(pushargr)             entry_im(pushargi)
+entry_ir(finishr)              entry_fn(finishi)
+entry(ret)
+entry_ir(retr)                 entry_im(reti)
+entry_ir(retval_c)             entry_ir(retval_uc)
+entry_ir(retval_s)             entry_ir(retval_us)
+entry_ir(retval_i)
+#if __WORDSIZE == 64
+entry_ir(retval_ui)            entry_ir(retval_l)
+#endif
+entry_ir(retval)
+entry(epilog)
+entry_ca(arg_f)                        entry_fa(getarg_f)
+entry_fa(putargr_f)            entry_fma(putargi_f)
+entry_fr_fr_fr(addr_f)         entry_fr_fr_fm(addi_f)
+entry_fr_fr_fr(subr_f)         entry_fr_fr_fm(subi_f)
+entry_fr_fr_fr(rsbr_f)         entry_fr_fr_fm(rsbi_f)
+entry_fr_fr_fr(mulr_f)         entry_fr_fr_fm(muli_f)
+entry_fr_fr_fr(divr_f)         entry_fr_fr_fm(divi_f)
+entry_fr_fr(negr_f)            entry_fr_fr(absr_f)
+entry_fr_fr(sqrtr_f)
+entry_ir_fr_fr(ltr_f)          entry_ir_fr_fm(lti_f)
+entry_ir_fr_fr(ler_f)          entry_ir_fr_fm(lei_f)
+entry_ir_fr_fr(eqr_f)          entry_ir_fr_fm(eqi_f)
+entry_ir_fr_fr(ger_f)          entry_ir_fr_fm(gei_f)
+entry_ir_fr_fr(gtr_f)          entry_ir_fr_fm(gti_f)
+entry_ir_fr_fr(ner_f)          entry_ir_fr_fm(nei_f)
+entry_ir_fr_fr(unltr_f)                entry_ir_fr_fm(unlti_f)
+entry_ir_fr_fr(unler_f)                entry_ir_fr_fm(unlei_f)
+entry_ir_fr_fr(uneqr_f)                entry_ir_fr_fm(uneqi_f)
+entry_ir_fr_fr(unger_f)                entry_ir_fr_fm(ungei_f)
+entry_ir_fr_fr(ungtr_f)                entry_ir_fr_fm(ungti_f)
+entry_ir_fr_fr(ltgtr_f)                entry_ir_fr_fm(ltgti_f)
+entry_ir_fr_fr(ordr_f)         entry_ir_fr_fm(ordi_f)
+entry_ir_fr_fr(unordr_f)       entry_ir_fr_fm(unordi_f)
+entry_ir_fr(truncr_f_i)
+#if __WORDSIZE == 64
+entry_ir_fr(truncr_f_l)
+#endif
+entry_ir_fr(truncr_f)
+entry_fr_ir(extr_f)            entry_fr_fr(extr_d_f)
+entry_fr_fr(movr_f)            entry_fr_fm(movi_f)
+entry_fr_ir(ldr_f)             entry_fr_pm(ldi_f)
+entry_fr_ir_ir(ldxr_f)         entry_fr_ir_im(ldxi_f)
+entry_ir_fr(str_f)             entry_pm_fr(sti_f)
+entry_ir_ir_fr(stxr_f)         entry_im_ir_fr(stxi_f)
+entry_lb_fr_fr(bltr_f)         entry_lb_fr_fm(blti_f)
+entry_lb_fr_fr(bler_f)         entry_lb_fr_fm(blei_f)
+entry_lb_fr_fr(beqr_f)         entry_lb_fr_fm(beqi_f)
+entry_lb_fr_fr(bger_f)         entry_lb_fr_fm(bgei_f)
+entry_lb_fr_fr(bgtr_f)         entry_lb_fr_fm(bgti_f)
+entry_lb_fr_fr(bner_f)         entry_lb_fr_fm(bnei_f)
+entry_lb_fr_fr(bunltr_f)       entry_lb_fr_fm(bunlti_f)
+entry_lb_fr_fr(bunler_f)       entry_lb_fr_fm(bunlei_f)
+entry_lb_fr_fr(buneqr_f)       entry_lb_fr_fm(buneqi_f)
+entry_lb_fr_fr(bunger_f)       entry_lb_fr_fm(bungei_f)
+entry_lb_fr_fr(bungtr_f)       entry_lb_fr_fm(bungti_f)
+entry_lb_fr_fr(bltgtr_f)       entry_lb_fr_fm(bltgti_f)
+entry_lb_fr_fr(bordr_f)                entry_lb_fr_fm(bordi_f)
+entry_lb_fr_fr(bunordr_f)      entry_lb_fr_fm(bunordi_f)
+entry_fr(pushargr_f)           entry_fm(pushargi_f)
+entry_fr(retr_f)               entry_fm(reti_f)
+entry_fr(retval_f)
+entry_ca(arg_d)                        entry_fa(getarg_d)
+entry_fa(putargr_d)            entry_fma(putargi_d)
+entry_fr_fr_fr(addr_d)         entry_fr_fr_dm(addi_d)
+entry_fr_fr_fr(subr_d)         entry_fr_fr_dm(subi_d)
+entry_fr_fr_fr(rsbr_d)         entry_fr_fr_dm(rsbi_d)
+entry_fr_fr_fr(mulr_d)         entry_fr_fr_dm(muli_d)
+entry_fr_fr_fr(divr_d)         entry_fr_fr_dm(divi_d)
+entry_fr_fr(negr_d)            entry_fr_fr(absr_d)
+entry_fr_fr(sqrtr_d)
+entry_ir_fr_fr(ltr_d)          entry_ir_fr_dm(lti_d)
+entry_ir_fr_fr(ler_d)          entry_ir_fr_dm(lei_d)
+entry_ir_fr_fr(eqr_d)          entry_ir_fr_dm(eqi_d)
+entry_ir_fr_fr(ger_d)          entry_ir_fr_dm(gei_d)
+entry_ir_fr_fr(gtr_d)          entry_ir_fr_dm(gti_d)
+entry_ir_fr_fr(ner_d)          entry_ir_fr_dm(nei_d)
+entry_ir_fr_fr(unltr_d)                entry_ir_fr_dm(unlti_d)
+entry_ir_fr_fr(unler_d)                entry_ir_fr_dm(unlei_d)
+entry_ir_fr_fr(uneqr_d)                entry_ir_fr_dm(uneqi_d)
+entry_ir_fr_fr(unger_d)                entry_ir_fr_dm(ungei_d)
+entry_ir_fr_fr(ungtr_d)                entry_ir_fr_dm(ungti_d)
+entry_ir_fr_fr(ltgtr_d)                entry_ir_fr_dm(ltgti_d)
+entry_ir_fr_fr(ordr_d)         entry_ir_fr_dm(ordi_d)
+entry_ir_fr_fr(unordr_d)       entry_ir_fr_dm(unordi_d)
+entry_ir_fr(truncr_d_i)
+#if __WORDSIZE == 64
+entry_ir_fr(truncr_d_l)
+#endif
+entry_ir_fr(truncr_d)
+entry_fr_ir(extr_d)            entry_fr_fr(extr_f_d)
+entry_fr_fr(movr_d)            entry_fr_dm(movi_d)
+entry_fr_ir(ldr_d)             entry_fr_pm(ldi_d)
+entry_fr_ir_ir(ldxr_d)         entry_fr_ir_im(ldxi_d)
+entry_ir_fr(str_d)             entry_pm_fr(sti_d)
+entry_ir_ir_fr(stxr_d)         entry_im_ir_fr(stxi_d)
+entry_lb_fr_fr(bltr_d)         entry_lb_fr_dm(blti_d)
+entry_lb_fr_fr(bler_d)         entry_lb_fr_dm(blei_d)
+entry_lb_fr_fr(beqr_d)         entry_lb_fr_dm(beqi_d)
+entry_lb_fr_fr(bger_d)         entry_lb_fr_dm(bgei_d)
+entry_lb_fr_fr(bgtr_d)         entry_lb_fr_dm(bgti_d)
+entry_lb_fr_fr(bner_d)         entry_lb_fr_dm(bnei_d)
+entry_lb_fr_fr(bunltr_d)       entry_lb_fr_dm(bunlti_d)
+entry_lb_fr_fr(bunler_d)       entry_lb_fr_dm(bunlei_d)
+entry_lb_fr_fr(buneqr_d)       entry_lb_fr_dm(buneqi_d)
+entry_lb_fr_fr(bunger_d)       entry_lb_fr_dm(bungei_d)
+entry_lb_fr_fr(bungtr_d)       entry_lb_fr_dm(bungti_d)
+entry_lb_fr_fr(bltgtr_d)       entry_lb_fr_dm(bltgti_d)
+entry_lb_fr_fr(bordr_d)                entry_lb_fr_dm(bordi_d)
+entry_lb_fr_fr(bunordr_d)      entry_lb_fr_dm(bunordi_d)
+entry_fr(pushargr_d)           entry_dm(pushargi_d)
+entry_fr(retr_d)               entry_dm(reti_d)
+entry_fr(retval_d)
+static void
+vastart(void)
+{
+    jit_gpr_t  r0 = get_ireg();
+    jit_va_start(r0);
+}
+static void
+vapush(void)
+{
+    jit_gpr_t  r0 = get_ireg();
+    jit_va_push(r0);
+}
+static void
+vaarg(void)
+{
+    jit_gpr_t  r0 = get_ireg(), r1 = get_ireg();
+    jit_va_arg(r0, r1);
+}
+static void
+vaarg_d(void)
+{
+    jit_fpr_t  r0 = get_freg();
+    jit_gpr_t  r1 = get_ireg();
+    jit_va_arg_d(r0, r1);
+}
+static void
+vaend(void)
+{
+    jit_gpr_t  r0 = get_ireg();
+    jit_va_end(r0);
+}
+#undef entry_fn
+#undef entry_fm
+#undef entry_dm
+#undef entry_lb_fr_fm
+#undef entry_lb_fr_dm
+#undef entry_lb_fr_fr
+#undef entry_im_ir_fr
+#undef entry_ir_ir_fr
+#undef entry_pm_fr
+#undef entry_fr_ir_ir
+#undef entry_fr_ir_im
+#undef entry_fr_pm
+#undef entry_fr_fm
+#undef entry_fr_dm
+#undef entry_fr_ir
+#undef entry_ir_fr
+#undef entry_ir_fr_fm
+#undef entry_ir_fr_dm
+#undef entry_ir_fr_fr
+#undef entry_fr_fr
+#undef entry_fr_fr_fm
+#undef entry_fr_fr_dm
+#undef entry_fr_fr_fr
+#undef entry_fma
+#undef entry_fa
+#undef entry_pm
+#undef entry_lb
+#undef entry_lb_ir_im
+#undef entry_lb_ir_ir
+#undef entry_im_ir_ir
+#undef entry_pm_ir
+#undef entry_ir_pm
+#undef entry_ir_im
+#undef entry_ir_ir
+#undef entry_ir_ir_im
+#undef entry_ir_ir_ir
+#undef entry_ima
+#undef entry_ir
+#undef entry_im
+#undef entry_ia
+#undef entry_ca
+#undef entry
+
+static void
+error(const char *format, ...)
+{
+    va_list     ap;
+    int                 length;
+    char       *string;
+
+    va_start(ap, format);
+    message("error", format, ap);
+    va_end(ap);
+    length = parser.data.length - parser.data.offset;
+    string = (char *)(parser.data.buffer + parser.data.offset - 1);
+    if (length > 77)
+       strcpy(string + 74, "...");
+    else
+       parser.data.buffer[parser.data.length - 1] = '\0';
+    fprintf(stderr, "(%s)\n", string);
+    exit(-1);
+}
+
+static void
+warn(const char *format, ...)
+{
+    va_list    ap;
+    va_start(ap, format);
+    message("warning", format, ap);
+    va_end(ap);
+}
+
+static void
+message(const char *kind, const char *format, va_list ap)
+{
+    fprintf(stderr, "%s:%d: %s: ", parser.name,
+           parser.line - parser.newline, kind);
+    vfprintf(stderr, format, ap);
+    fputc('\n', stderr);
+}
+
+static int
+getch(void)
+{
+    int                ch;
+
+    if (parser.data.offset < parser.data.length)
+       ch = parser.data.buffer[parser.data.offset++];
+    else {
+       /* keep first offset for ungetch */
+       if ((parser.data.length = fread(parser.data.buffer + 1, 1,
+                                       sizeof(parser.data.buffer) - 1,
+                                       parser.fp) + 1) <= 1) {
+           ch = EOF;
+           parser.data.offset = 1;
+       }
+       else {
+           ch = parser.data.buffer[1];
+           parser.data.offset = 2;
+       }
+    }
+    if ((parser.newline = ch == '\n'))
+       ++parser.line;
+
+    return (ch);
+}
+
+static int
+getch_noeof(void)
+{
+    int                ch = getch();
+
+    if (ch == EOF)
+       error("unexpected end of file");
+
+    return (ch);
+}
+
+static int
+ungetch(int ch)
+{
+    if ((parser.newline = ch == '\n'))
+       --parser.line;
+
+    if (parser.data.offset)
+       parser.data.buffer[--parser.data.offset] = ch;
+    else
+       /* overwrite */
+       parser.data.buffer[0] = ch;
+
+    return (ch);
+}
+
+static int
+skipws(void)
+{
+    int                ch;
+
+    for (ch = getch();; ch = getch()) {
+       switch (ch) {
+           case '/':
+               ch = skipct();
+               break;
+           case '#':
+               ch = skipcp();
+               break;
+       }
+       switch (ch) {
+           case ' ': case '\f': case '\r': case '\t':
+               break;
+           default:
+               return (ch);
+       }
+    }
+}
+
+static int
+skipnl(void)
+{
+    int                ch;
+
+    for (ch = getch();; ch = getch()) {
+       switch (ch) {
+           case '/':
+               ch = skipct();
+               break;
+           case '#':
+               ch = skipcp();
+               break;
+       }
+       switch (ch) {
+           case ' ': case '\f': case '\n': case '\r': case '\t':
+               break;
+               /* handle as newline */
+           case ';':
+               break;
+           default:
+               return (ch);
+       }
+    }
+}
+
+static int
+skipct(void)
+{
+    int                ch;
+
+    ch = getch();
+    switch (ch) {
+       case '/':
+           for (ch = getch(); ch != '\n' && ch != EOF; ch = getch())
+               ;
+           return (ch);
+       case '*':
+           for (; ch != '/';) {
+               while (getch_noeof() != '*')
+                   ;
+               while ((ch = getch_noeof()) == '*')
+                   ;
+           }
+           return (getch());
+       default:
+           ungetch(ch);
+           return ('/');
+    }
+}
+
+static int
+skipcp(void)
+{
+    int                ch;
+
+    for (ch = getch(); ch != '\n' && ch != EOF; ch = getch()) {
+       switch (ch) {
+           case '0' ... '9':
+               if ((number(ch)) == tok_int)
+                   parser.line = parser.value.i - 1;
+               break;
+           case '"':
+               string();
+               if (parser.offset >= (int)sizeof(parser.name)) {
+                   strncpy(parser.name, parser.string, sizeof(parser.name));
+                   parser.name[sizeof(parser.name) - 1] = '\0';
+               }
+               else
+                   strcpy(parser.name, parser.string);
+               break;
+           default:
+               break;
+       }
+    }
+
+    return (ch);
+}
+
+static jit_word_t
+get_int(skip_t skip)
+{
+    switch (primary(skip)) {
+       case tok_int:
+           break;
+       case tok_pointer:
+           parser.type = type_l;
+           parser.value.i = (jit_word_t)parser.value.p;
+           break;
+       default:
+           error("expecting integer");
+    }
+
+    return (parser.value.i);
+}
+
+static jit_uword_t
+get_uint(skip_t skip)
+{
+    switch (primary(skip)) {
+       case tok_char:          case tok_int:
+           break;
+       case tok_pointer:
+           parser.type = type_l;
+           parser.value.ui = (jit_uword_t)parser.value.p;
+           break;
+       default:
+           error("expecting integer");
+    }
+
+    return (parser.value.ui);
+}
+
+static double
+get_float(skip_t skip)
+{
+    switch (primary(skip)) {
+       case tok_char:
+       case tok_int:
+           parser.type = type_d;
+           parser.value.d = parser.value.i;
+           break;
+       case tok_float:
+           break;
+       default:
+           error("expecting float");
+    }
+
+    return (parser.value.d);
+}
+
+/* Workaround gcc not converting unordered values from double to
+ * float (as done in other architectures) on s390 */
+static float
+make_float(double d)
+{
+    /* This is an workaround to a bug in Hercules s390 emulator,
+     * and at least HP-UX ia64 not have these */
+#if defined(HAVE_ISNAN) && defined(HAVE_ISINF)
+    if (isnan(d))      return ( 0.0f/0.0f);
+    if (isinf(d)) {
+       if (d > 0.0)    return ( 1.0f/0.0f);
+       else            return (-1.0f/0.0f);
+    }
+#endif
+    return ((float)d);
+}
+
+static void *
+get_pointer(skip_t skip)
+{
+    label_t    *label;
+    token_t     token = primary(skip);
+
+    switch (token) {
+       case tok_symbol:
+           label = get_label_by_name(parser.string);
+           if (label == NULL)
+               error("bad identifier %s", parser.string);
+           switch (label->kind) {
+               case label_kind_data:
+               case label_kind_code:
+                   break;
+               case label_kind_code_forward:
+                   /* as expression arguments */
+                   error("forward references not implemented");
+                   break;
+               case label_kind_dynamic:
+                   break;
+           }
+           parser.type = type_p;
+           return (parser.value.p = label->value);
+       case tok_int:
+           parser.type = type_p;
+           return (parser.value.p = (void *)parser.value.ui);
+       case tok_pointer:
+           return (parser.value.p);
+       default:                error("bad pointer");
+    }
+}
+
+static label_t *
+get_label(skip_t skip)
+{
+    label_t    *label;
+    int                 ch = skipws();
+
+    switch (ch) {
+       case '@':
+           (void)dynamic();
+           break;
+       case 'a' ... 'z': case 'A' ... 'Z': case '_':
+           (void)identifier(ch);
+           break;
+       default:
+           error("expecting label/immediate");
+    }
+    if ((label = get_label_by_name(parser.string)) == NULL)
+       label = new_label(label_kind_code_forward,
+                         parser.string, jit_forward());
+
+    return (label);
+}
+
+static token_t
+regname(void)
+{
+    jit_word_t num;
+    int                check = 1, ch = getch();
+
+    switch (ch) {
+       case 'r':
+           parser.regtype = type_l;
+           switch (ch = getch()) {
+               case '0':       parser.regval = JIT_R0;         break;
+               case '1':       parser.regval = JIT_R1;         break;
+               case '2':       parser.regval = JIT_R2;         break;
+               case '(':
+                   num = get_int(skip_none);
+                   if (num < 0 || num >= JIT_R_NUM)            goto fail;
+                   parser.regval = JIT_R(num);
+                   if (getch() != ')')                         goto fail;
+                   check = 0;
+                   break;
+               default:                                        goto fail;
+           }
+           break;
+       case 'v':
+           parser.regtype = type_l;
+           switch (ch = getch()) {
+               case '0':       parser.regval = JIT_V0;         break;
+               case '1':       parser.regval = JIT_V1;         break;
+               case '2':       parser.regval = JIT_V2;         break;
+               default:                                        goto fail;
+               case '(':
+                   num = get_int(skip_none);
+                   if (num < 0 || num >= JIT_V_NUM)            goto fail;
+                   parser.regval = JIT_V(num);
+                   if (getch() != ')')                         goto fail;
+                   check = 0;
+                   break;
+           }
+           break;
+       case 'f':
+           parser.regtype = type_d;
+           switch (ch = getch()) {
+               case '0':       parser.regval = JIT_F0;         break;
+               case '1':       parser.regval = JIT_F1;         break;
+               case '2':       parser.regval = JIT_F2;         break;
+               case '3':       parser.regval = JIT_F3;         break;
+               case '4':       parser.regval = JIT_F4;         break;
+               case '5':       parser.regval = JIT_F5;         break;
+               case 'p':
+                   parser.regtype = type_l;    /* oops */
+                   parser.regval = JIT_FP;                     break;
+               case '(':
+                   num = get_int(skip_none);
+                   if (num < 0 || num >= JIT_F_NUM)            goto fail;
+                   parser.regval = JIT_F(num);
+                   if (getch() != ')')                         goto fail;
+                   check = 0;
+                   break;
+               default:                                        goto fail;
+           }
+           break;
+       default:
+       fail:
+           error("bad register");
+    }
+    if (check) {
+       ch = getch();
+       if ((ch >= 'a' && ch <= 'z') ||
+           (ch >= 'A' && ch <= 'Z') ||
+           (ch >= '0' && ch <= '9') ||
+           ch == '_')
+           goto fail;
+       ungetch(ch);
+    }
+
+    return (tok_register);
+}
+
+static token_t
+identifier(int ch)
+{
+    parser.string[0] = ch;
+    for (parser.offset = 1;;) {
+       switch ((ch = getch())) {
+           case 'a' ... 'z': case 'A' ... 'Z': case '0' ... '9' :  case '_':
+               if (parser.offset + 1 >= MAX_IDENTIFIER) {
+                   parser.string[parser.offset] = '\0';
+                   error("bad identifier %s", parser.string);
+               }
+               parser.string[parser.offset++] = ch;
+               break;
+           default:
+               parser.string[parser.offset] = '\0';
+               ungetch(ch);
+               return (tok_symbol);
+       }
+    }
+}
+
+static void
+get_data(type_t type)
+{
+    int                 ch;
+    token_t     token;
+    char       *test = data;
+
+    for (;;) {
+       switch (type) {
+           case type_c:
+               switch (token = primary(skip_ws)) {
+                   case tok_char: case tok_int:
+                       check_data(sizeof(signed char));
+                       *(signed char *)(data + data_offset) = parser.value.i;
+                       data_offset += sizeof(char);
+                       break;
+                   case tok_string:
+                       check_data(parser.offset);
+                       memcpy(data + data_offset, parser.string,
+                              parser.offset);
+                       data_offset += parser.offset;
+                       break;
+                   case tok_newline:
+                   case tok_semicollon:
+                       if (test == data)       error("syntax error");
+                       return;
+                   default:                    error("bad initializer");
+               }
+               break;
+           case type_s:
+               check_data(sizeof(signed short));
+               *(signed short *)(data + data_offset) = get_int(skip_ws);
+               data_offset += sizeof(short);
+               break;
+           case type_i:
+               check_data(sizeof(signed int));
+               *(signed int *)(data + data_offset) = get_int(skip_ws);
+               data_offset += sizeof(int);
+               break;
+           case type_l:
+               check_data(sizeof(jit_word_t));
+               *(jit_word_t *)(data + data_offset) = get_int(skip_ws);
+               data_offset += sizeof(jit_word_t);
+               break;
+           case type_f:
+               check_data(sizeof(float));
+               *(float *)(data + data_offset) = get_float(skip_ws);
+               data_offset += sizeof(float);
+               break;
+           case type_d:
+               check_data(sizeof(double));
+               *(double *)(data + data_offset) = get_float(skip_ws);
+               data_offset += sizeof(double);
+               break;
+           case type_p:
+               /* FIXME **patch if realloc** */
+               check_data(sizeof(void*));
+               *(void **)(data + data_offset) = get_pointer(skip_ws);
+               data_offset += sizeof(void*);
+               break;
+           default:
+               abort();
+       }
+       ch = skipws();
+       if (ch == '\n' || ch == ';' || ch == EOF)
+           break;
+       ungetch(ch);
+    }
+}
+
+static void
+dot(void)
+{
+    int                ch;
+    size_t     offset, length;
+
+    switch (ch = getch_noeof()) {
+       case '$':
+           /* use .$(expression) for non side effects expression */
+           (void)expression();
+           return;
+       case 'a' ... 'z': case 'A' ... 'Z': case '_':
+           (void)identifier(ch);
+           break;
+       default:
+           ungetch(ch);
+           if (skipws() != '$')
+               error("expecting symbol");
+           /* allow spaces before an expression */
+           (void)expression();
+           return;
+    }
+    if (parser.string[1] == '\0') {
+       switch (parser.string[0]) {
+           case 'c':   get_data(type_c);       break;
+           case 's':   get_data(type_s);       break;
+           case 'i':   get_data(type_i);       break;
+           case 'l':   get_data(type_l);       break;
+           case 'f':   get_data(type_f);       break;
+           case 'd':   get_data(type_d);       break;
+           case 'p':   get_data(type_p);       break;
+           default:    error("bad type .%c", parser.string[0]);
+       }
+    }
+    else if (strcmp(parser.string, "data") == 0) {
+       if (parser.parsing != PARSING_NONE)
+           error(".data must be specified once and be the first section");
+       parser.parsing = PARSING_DATA;
+       data_length = get_int(skip_ws);
+       data = (char *)xcalloc(1, data_length);
+    }
+    else if (strcmp(parser.string, "code") == 0) {
+       if (parser.parsing != PARSING_NONE &&
+           parser.parsing != PARSING_DATA)
+           error(".code must be specified once only");
+       parser.parsing = PARSING_CODE;
+    }
+    else if (strcmp(parser.string, "align") == 0) {
+       length = get_int(skip_ws);
+       if (parser.parsing != PARSING_DATA)
+           error(".align must be in .data");
+       if (length > 1 && length <= 4096 && !(length & (length - 1))) {
+           offset = data_offset;
+           offset += length - ((offset + length) % length);
+           check_data(offset - data_offset);
+           data_offset = offset;
+       }
+       else
+           error("bad .align %ld (must be a power of 2, >= 2 && <= 4096)",
+                 (jit_word_t)length);
+    }
+    else if (strcmp(parser.string, "size") == 0) {
+       length = get_int(skip_ws);
+       if (parser.parsing != PARSING_DATA)
+           error(".size must be in .data");
+       check_data(length);
+       data_offset += length;
+    }
+    else if (strcmp(parser.string, "disasm") == 0)
+       flag_disasm = 1;
+    else
+       error("unknown command .%s", parser.string);
+}
+
+static token_t
+number(int ch)
+{
+    char       buffer[1024], *endptr;
+    int                integer = 1, offset = 0, neg = 0, e = 0, d = 0, base = 10;
+
+    for (;; ch = getch()) {
+       switch (ch) {
+           case '-':
+               if (offset == 0) {
+                   neg = 1;
+                   continue;
+               }
+               if (offset && buffer[offset - 1] != 'e') {
+                   ungetch(ch);
+                   goto done;
+               }
+               break;
+           case '+':
+               if (offset == 0)
+                   continue;
+               if (offset && buffer[offset - 1] != 'e') {
+                   ungetch(ch);
+                   goto done;
+               }
+               break;
+           case '.':
+               if (d)
+                   goto fail;
+               d = 1;
+               base = 10;
+               integer = 0;
+               break;
+           case '0':
+               if (offset == 0 && base == 10) {
+                   base = 8;
+                   continue;
+               }
+               break;
+           case 'b':
+               if (offset == 0 && base == 8) {
+                   base = 2;
+                   continue;
+               }
+               if (base != 16)
+                   goto fail;
+               break;
+           case '1':
+               break;
+           case '2' ... '7':
+               if (base < 8)
+                   goto fail;
+               break;
+           case '8': case '9':
+               if (base < 10)
+                   goto fail;
+               break;
+           case 'x':
+               if (offset == 0 && base == 8) {
+                   base = 16;
+                   continue;
+               }
+               goto fail;
+           case 'a': case 'c': case 'd': case 'f':
+               if (base < 16)
+                   goto fail;
+               break;
+           case 'e':
+               if (e)
+                   goto fail;
+               if (base != 16) {
+                   e = 1;
+                   base = 10;
+                   integer = 0;
+               }
+               break;
+           case '_': case 'g' ... 'w': case 'y': case 'z': case 'A' ... 'Z':
+           fail:
+               buffer[offset++] = '\0';
+               error("bad constant %s", buffer);
+           default:
+               ungetch(ch);
+               goto done;
+       }
+       if (offset + 1 >= (int)sizeof(buffer))
+           goto fail;
+       buffer[offset++] = ch;
+    }
+done:
+    /* check for literal 0 */
+    if (offset == 0 && base == 8)      buffer[offset++] = '0';
+    buffer[offset] = '\0';
+    if (integer) {
+#if _WIN32
+#  define STRTOUL      strtoull
+#else
+#  define STRTOUL      strtoul
+#endif
+       parser.value.ui = STRTOUL(buffer, &endptr, base);
+       parser.type = type_l;
+       if (neg)
+           parser.value.i = -parser.value.i;
+    }
+    else {
+       parser.type = type_d;
+       parser.value.d = strtod(buffer, &endptr);
+       if (neg)
+           parser.value.d = -parser.value.d;
+    }
+    if (*endptr)
+       goto fail;
+
+    return (integer ? tok_int : tok_float);
+}
+
+static int
+escape(int ch)
+{
+    switch (ch) {
+       case 'a':       ch = '\a';      break;
+       case 'b':       ch = '\b';      break;
+       case 'f':       ch = '\f';      break;
+       case 'n':       ch = '\n';      break;
+       case 'r':       ch = '\r';      break;
+       case 't':       ch = '\t';      break;
+       case 'v':       ch = '\v';      break;
+       default:                        break;
+    }
+
+    return (ch);
+}
+
+static token_t
+string(void)
+{
+    int                ch, esc = 0;
+
+    for (parser.offset = 0;;) {
+       switch (ch = getch_noeof()) {
+           case '\\':
+               if (esc)                goto append;
+               esc = 1;
+               break;
+           case '"':
+               if (!esc) {
+                   parser.string[parser.offset++] = '\0';
+                   parser.value.p = parser.string;
+                   parser.type = type_p;
+                   return (tok_string);
+               }
+               /* FALLTHROUGH */
+           default:
+           append:
+               if (esc) {
+                   ch = escape(ch);
+                   esc = 0;
+               }
+               if (parser.offset + 1 >= parser.length) {
+                   parser.length += 4096;
+                   parser.string = (char *)xrealloc(parser.string,
+                                                    parser.length);
+               }
+               parser.string[parser.offset++] = ch;
+               break;
+       }
+    }
+}
+
+static token_t
+character(void)
+{
+    int                ch, esc = 0;
+
+    if ((ch = getch_noeof()) == '\\') {
+       esc = 1;
+       ch = getch();
+    }
+    if (getch_noeof() != '\'')
+       error("bad single byte char");
+    if (esc)
+       ch = escape(ch);
+    parser.type = type_l;
+    parser.value.i = ch & 0xff;
+
+    return (tok_char);
+}
+
+static token_t
+dynamic(void)
+{
+    label_t    *label;
+    void       *value;
+    char       *string;
+    (void)identifier('@');
+    if ((label = get_label_by_name(parser.string)) == NULL) {
+#if __CYGWIN__ ||_WIN32
+       /* FIXME kludge to pass varargs test case, otherwise,
+        * will not print/scan float values */
+       if (strcmp(parser.string + 1, "sprintf") == 0)
+           value = sprintf;
+       else if (strcmp(parser.string + 1, "sscanf") == 0)
+           value = sscanf;
+       else
+#endif
+       {
+           value = dlsym(DL_HANDLE, parser.string + 1);
+           if ((string = dlerror()))
+               error("%s", string);
+       }
+       label = new_label(label_kind_dynamic, parser.string, value);
+    }
+    parser.type = type_p;
+    parser.value.p = label->value;
+
+    return (tok_pointer);
+}
+
+static void
+expression_prim(void)
+{
+    int                 ch;
+    token_t     token;
+    label_t    *label;
+    symbol_t   *symbol;
+
+    if (parser.putback) {
+       parser.expr = parser.putback;
+       parser.putback = (expr_t)0;
+       return;
+    }
+    switch (ch = skipws()) {
+       case '!':
+           if ((ch = getch_noeof()) == '=')    parser.expr = expr_ne;
+           else {
+               ungetch(ch);                    parser.expr = expr_not;
+           }
+           break;
+       case '~':                               parser.expr = expr_com;
+           break;
+       case '*':
+           if ((ch = getch_noeof()) == '=')    parser.expr = expr_mulset;
+           else {
+               ungetch(ch);                    parser.expr = expr_mul;
+           }
+           break;
+       case '/':
+           if ((ch = getch_noeof()) == '=')    parser.expr = expr_divset;
+           else {
+               ungetch(ch);                    parser.expr = expr_div;
+           }
+           break;
+       case '%':
+           if ((ch = getch_noeof()) == '=')    parser.expr = expr_remset;
+           else {
+               ungetch(ch);                    parser.expr = expr_rem;
+           }
+           break;
+       case '+':
+           switch (ch = getch_noeof()) {
+               case '+':                       parser.expr = expr_inc;
+                   break;
+               case '=':                       parser.expr = expr_addset;
+                   break;
+               default:        ungetch(ch);    parser.expr = expr_add;
+                   break;
+           }
+           break;
+       case '-':
+           switch (ch = getch_noeof()) {
+               case '-':                       parser.expr = expr_dec;
+                   break;
+               case '=':                       parser.expr = expr_subset;
+                   break;
+               default:        ungetch(ch);    parser.expr = expr_sub;
+                   break;
+           }
+           break;
+       case '<':
+           switch (ch = getch_noeof()) {
+               case '=':                       parser.expr = expr_le;
+                   break;
+               case '<':                       ch = getch_noeof();
+                   if (ch == '=')              parser.expr = expr_lshset;
+                   else {
+                       ungetch(ch);            parser.expr = expr_lsh;
+                   }
+                   break;
+               default:        ungetch(ch);    parser.expr = expr_lt;
+                   break;
+           }
+           break;
+       case '>':
+           switch (ch = getch_noeof()) {
+               case '=':                       parser.expr = expr_ge;
+                   break;
+               case '>':                       ch = getch_noeof();
+                   if (ch == '=')              parser.expr = expr_rshset;
+                   else {
+                       ungetch(ch);            parser.expr = expr_rsh;
+                   }
+                   break;
+               default:        ungetch(ch);    parser.expr = expr_gt;
+                   break;
+           }
+           break;
+       case '&':
+           switch (ch = getch_noeof()) {
+               case '=':                       parser.expr = expr_andset;
+                   break;
+               case '&':                       parser.expr = expr_andand;
+                   break;
+               default:        ungetch(ch);    parser.expr = expr_and;
+                   break;
+           }
+           break;
+       case '|':
+           switch (ch = getch_noeof()) {
+               case '=':                       parser.expr = expr_orset;
+                   break;
+               case '|':                       parser.expr = expr_oror;
+                   break;
+               default:        ungetch(ch);    parser.expr = expr_or;
+                   break;
+           }
+           break;
+       case '^':
+           if ((ch = getch_noeof()) == '=')    parser.expr = expr_xorset;
+           else {
+               ungetch(ch);                    parser.expr = expr_xor;
+           }
+           break;
+       case '=':
+           if ((ch = getch_noeof()) == '=')    parser.expr = expr_eq;
+           else {
+               ungetch(ch);                    parser.expr = expr_set;
+           }
+           break;
+       case '(':                               parser.expr = expr_lparen;
+           break;
+       case ')':                               parser.expr = expr_rparen;
+           break;
+       case '0' ... '9':
+           token = number(ch);
+           parser.expr = token == tok_int ? expr_int : expr_float;
+           break;
+       case '@':
+           (void)dynamic();
+           parser.expr = expr_pointer;
+           break;
+       case '$':
+           identifier('$');
+           /* no support for nested expressions */
+           if (parser.string[0] == '\0')
+               error("syntax error");
+           parser.expr = expr_symbol;
+           if ((symbol = get_symbol_by_name(parser.string)) != NULL) {
+               parser.type = symbol->type;
+               parser.value = symbol->value;
+           }
+           else
+               /* only create symbol on assignment */
+               parser.type = type_none;
+           break;
+       case 'a' ... 'z': case 'A' ... 'Z': case '_':
+           identifier(ch);
+           if ((label = get_label_by_name(parser.string))) {
+               if (label->kind == label_kind_code_forward)
+                   error("forward value for %s not supported",
+                         parser.string);
+               parser.expr = expr_pointer;
+               parser.type = type_p;
+               parser.value.p = label->value;
+           }
+           else
+               error("invalid identifier %s", parser.string);
+           break;
+       case '\'':
+           character();
+           parser.expr = expr_int;
+           break;
+       case '"':
+           /* not smart enough to put it in data and/or relocate it, etc */
+           error("must declare strings as data");
+       default:
+           error("syntax error");
+    }
+}
+
+static void
+expression_inc(int pre)
+{
+    symbol_t   *symbol;
+
+    if (pre) {
+       expression_prim();
+       if (parser.expr != expr_symbol)
+           error("syntax error");
+    }
+    if ((symbol = get_symbol_by_name(parser.string)) == NULL) {
+       if (!parser.short_circuit)
+           error("undefined symbol %s", symbol->name);
+    }
+    if (!parser.short_circuit) {
+       parser.type = symbol->type;
+       if (!pre)
+           parser.value = symbol->value;
+       switch (symbol->type) {
+           case type_l:
+               ++symbol->value.i;
+               break;
+           case type_d:
+               /* should really be an error */
+               symbol->value.d += 1.0;
+               break;
+           default:
+               ++parser.value.cp;
+               break;
+       }
+       if (pre)
+           parser.value = symbol->value;
+    }
+    expression_prim();
+}
+
+static void
+expression_dec(int pre)
+{
+    symbol_t   *symbol;
+
+    if (pre) {
+       expression_prim();
+       if (parser.expr != expr_symbol)
+           error("syntax error");
+    }
+    if ((symbol = get_symbol_by_name(parser.string)) == NULL) {
+       if (!parser.short_circuit)
+           error("undefined symbol %s", symbol->name);
+    }
+    if (!parser.short_circuit) {
+       parser.type = symbol->type;
+       if (!pre)
+           parser.value = symbol->value;
+       switch (symbol->type) {
+           case type_l:
+               --symbol->value.i;
+               break;
+           case type_d:
+               /* should really be an error */
+               symbol->value.d -= 1.0;
+               break;
+           default:
+               --parser.value.cp;
+               break;
+       }
+       if (pre)
+           parser.value = symbol->value;
+    }
+    expression_prim();
+}
+
+static void
+expression_unary(void)
+{
+    symbol_t   *symbol;
+    char        buffer[256];
+
+    expression_prim();
+    switch (parser.expr) {
+       case expr_add:
+           expression_unary();
+           switch (parser.type) {
+               case type_l:
+               case type_d:
+                   break;
+               default:
+                   error("syntax error");
+           }
+           break;
+       case expr_sub:
+           expression_unary();
+           switch (parser.type) {
+               case type_l:
+                   parser.value.i = -parser.value.i;
+                   break;
+               case type_d:
+                   parser.value.d = -parser.value.d;
+                   break;
+               default:
+                   error("syntax error");
+           }
+           break;
+       case expr_inc:
+           expression_inc(1);
+           break;
+       case expr_dec:
+           expression_dec(1);
+           break;
+       case expr_not:
+           expression_unary();
+           switch (parser.type) {
+               case type_l:
+                   parser.value.i = !parser.value.i;
+                   break;
+               case type_d:
+                   parser.value.i = parser.value.d != 0;
+                   break;
+               case type_p:
+                   parser.value.i = parser.value.p != NULL;
+                   break;
+               default:
+                   error("syntax error");
+           }
+           parser.type = type_l;
+           break;
+       case expr_com:
+           expression_unary();
+           if (parser.type != type_l)
+               error("syntax error");
+           parser.value.i = ~parser.value.i;
+           break;
+       case expr_lparen:
+           expression_cond();
+           if (parser.expr != expr_rparen)
+               error("syntax error");
+           expression_prim();
+           break;
+       case expr_symbol:
+           strcpy(buffer, parser.string);
+           expression_prim();
+           switch (parser.expr) {
+               case expr_set:
+                   if ((symbol = get_symbol_by_name(buffer)) == NULL) {
+                       if (!parser.short_circuit)
+                           symbol = new_symbol(buffer);
+                   }
+                   expression_cond();
+               set:
+                   if (!parser.short_circuit) {
+                       if (symbol == NULL)
+                           error("syntax error");
+                       symbol->type = parser.type;
+                       symbol->value = parser.value;
+                   }
+                   break;
+               case expr_mulset:               parser.putback = expr_mul;
+                   goto check;
+               case expr_divset:               parser.putback = expr_div;
+                   goto check;
+               case expr_remset:               parser.putback = expr_rem;
+                   goto check;
+               case expr_addset:               parser.putback = expr_add;
+                   goto check;
+               case expr_subset:               parser.putback = expr_sub;
+                   goto check;
+               case expr_lshset:               parser.putback = expr_lsh;
+                   goto check;
+               case expr_rshset:               parser.putback = expr_rsh;
+                   goto check;
+               case expr_andset:               parser.putback = expr_and;
+                   goto check;
+               case expr_orset:                parser.putback = expr_or;
+                   goto check;
+               case expr_xorset:               parser.putback = expr_xor;
+               check:
+                   if ((symbol = get_symbol_by_name(buffer)) == NULL) {
+                       if (!parser.short_circuit)
+                           error("undefined symbol %s", buffer);
+                       parser.type = type_l;
+                       parser.value.i = 1;
+                   }
+                   switch (parser.putback) {
+                       case expr_mul:  case expr_div:  case expr_rem:
+                           expression_mul();
+                           break;
+                       case expr_add:  case expr_sub:
+                           expression_add();
+                           break;
+                       case expr_lsh:  case expr_rsh:
+                           expression_shift();
+                           break;
+                       case expr_and:  case expr_or:  case expr_xor:
+                           expression_bit();
+                           break;
+                       default:
+                           abort();
+                   }
+                   goto set;
+               case expr_inc:
+                   expression_inc(0);
+                   break;
+               case expr_dec:
+                   expression_dec(0);
+                   break;
+               default:
+                   break;
+           }
+           break;
+       case expr_int:
+       case expr_float:
+       case expr_pointer:
+           /* make next token available */
+           expression_prim();
+       default:
+           break;
+    }
+}
+
+static void
+expression_mul(void)
+{
+    type_t     type;
+    value_t    value;
+
+    expression_unary();
+    switch (parser.type) {
+       case type_l:    case type_d:    case type_p:    break;
+       default:                                        return;
+    }
+    for (;;) {
+       switch (parser.expr) {
+           case expr_mul:
+               type = parser.type, value = parser.value;
+               expression_unary();
+               switch (parser.type) {
+                   case type_l:
+                       if (type == type_l)
+                           value.i *= parser.value.i;
+                       else
+                           value.d *= parser.value.i;
+                       break;
+                   case type_d:
+                       if (type == type_l) {
+                           type = type_d;
+                           value.d = value.i;
+                       }
+                       value.d *= parser.value.d;
+                       break;
+                   default:
+                       error("invalid operand");
+               }
+               parser.type = type, parser.value = value;
+               break;
+           case expr_div:
+               type = parser.type, value = parser.value;
+               expression_unary();
+               switch (parser.type) {
+                   case type_l:
+                       if (type == type_l) {
+                           if (parser.value.i == 0)
+                               error("divide by zero");
+                           value.i /= parser.value.i;
+                       }
+                       else
+                           value.d /= parser.value.i;
+                       break;
+                   case type_d:
+                       if (type == type_l) {
+                           type = type_d;
+                           value.d = value.i;
+                       }
+                       value.d /= parser.value.d;
+                       break;
+                   default:
+                       error("invalid operand");
+               }
+               parser.type = type, parser.value = value;
+               break;
+           case expr_rem:
+               type = parser.type, value = parser.value;
+               expression_unary();
+               switch (parser.type) {
+                   case type_l:
+                       if (type == type_l) {
+                           if (parser.value.i == 0)
+                               error("divide by zero");
+                           value.i %= parser.value.i;
+                       }
+                       else
+                           error("invalid operand");
+                       break;
+                   default:
+                       error("invalid operand");
+               }
+               parser.type = type, parser.value = value;
+               break;
+           default:
+               return;
+       }
+    }
+}
+
+static void
+expression_add(void)
+{
+    type_t     type;
+    value_t    value;
+
+    expression_mul();
+    switch (parser.type) {
+       case type_l:    case type_d:    case type_p:    break;
+       default:                                        return;
+    }
+    for (;;) {
+       switch (parser.expr) {
+           case expr_add:
+               type = parser.type, value = parser.value;
+               expression_mul();
+               switch (parser.type) {
+                   case type_l:
+                       switch (type) {
+                           case type_l:
+                               value.i += parser.value.i;
+                               break;
+                           case type_d:
+                               value.d += parser.value.i;
+                               break;
+                           default:
+                               value.cp += parser.value.i;
+                               break;
+                       }
+                       break;
+                   case type_d:
+                       switch (type) {
+                           case type_l:
+                               type = type_d;
+                               value.d = value.i;
+                               break;
+                           case type_d:
+                               break;
+                           default:
+                               error("invalid operand");
+                       }
+                       value.d += parser.value.d;
+                       break;
+                   case type_p:
+                       switch (type) {
+                           case type_l:
+                               type = type_p;
+                               value.cp = value.i + parser.value.cp;
+                               break;
+                           default:
+                               error("invalid operand");
+                       }
+                       break;
+                   default:
+                       error("invalid operand");
+               }
+               parser.type = type, parser.value = value;
+               break;
+           case expr_sub:
+               type = parser.type, value = parser.value;
+               expression_mul();
+               switch (parser.type) {
+                   case type_l:
+                       switch (type) {
+                           case type_l:
+                               value.i -= parser.value.i;
+                               break;
+                           case type_d:
+                               value.d -= parser.value.i;
+                               break;
+                           default:
+                               value.cp -= parser.value.i;
+                               break;
+                       }
+                       break;
+                   case type_d:
+                       switch (type) {
+                           case type_l:
+                               type = type_d;
+                               value.d = value.i;
+                               break;
+                           case type_d:
+                               break;
+                           default:
+                               error("invalid operand");
+                       }
+                       value.d -= parser.value.d;
+                       break;
+                   case type_p:
+                       switch (type) {
+                           case type_p:
+                               type = type_l;
+                               value.i = value.cp - parser.value.cp;
+                               break;
+                           default:
+                               error("invalid operand");
+                       }
+                       break;
+                   default:
+                       error("invalid operand");
+               }
+               parser.type = type, parser.value = value;
+               break;
+           default:
+               return;
+       }
+    }
+}
+
+static void
+expression_shift(void)
+{
+    jit_word_t value;
+    expression_add();
+
+    switch (parser.type) {
+       case type_l:    case type_d:    case type_p:    break;
+       default:                                        return;
+    }
+    for (;;) {
+       switch (parser.expr) {
+           case expr_lsh:
+               value = parser.value.i;
+               if (parser.type != type_l)
+                   error("invalid operand");
+               expression_add();
+               if (parser.type != type_l)
+                   error("invalid operand");
+               value <<= parser.value.i;
+               parser.value.i = value;
+               break;
+           case expr_rsh:
+               value = parser.value.i;
+               if (parser.type != type_l)
+                   error("invalid operand");
+               expression_add();
+               if (parser.type != type_l)
+                   error("invalid operand");
+               value >>= parser.value.i;
+               parser.value.i = value;
+               break;
+           default:
+               return;
+       }
+    }
+}
+
+static void
+expression_bit(void)
+{
+    jit_word_t i;
+
+    expression_shift();
+    switch (parser.type) {
+       case type_l:    case type_d:    case type_p:    break;
+       default:                                        return;
+    }
+    for (;;) {
+       switch (parser.expr) {
+           case expr_and:
+               if (parser.type != type_l)
+                   error("invalid operand");
+               i = parser.value.i;
+               expression_shift();
+               if (parser.type != type_l)
+                   error("invalid operand");
+               i &= parser.value.i;
+               parser.value.i = i;
+               break;
+           case expr_or:
+               if (parser.type != type_l)
+                   error("invalid operand");
+               i = parser.value.i;
+               expression_shift();
+               if (parser.type != type_l)
+                   error("invalid operand");
+               i |= parser.value.i;
+               parser.value.i = i;
+               break;
+           case expr_xor:
+               if (parser.type != type_l)
+                   error("invalid operand");
+               i = parser.value.i;
+               expression_shift();
+               if (parser.type != type_l)
+                   error("invalid operand");
+               i ^= parser.value.i;
+               parser.value.i = i;
+               break;
+           default:
+               return;
+       }
+    }
+}
+
+static void
+expression_rel(void)
+{
+    type_t     type;
+    value_t    value;
+
+    expression_bit();
+    switch (parser.type) {
+       case type_l:    case type_d:    case type_p:    break;
+       default:                                        return;
+    }
+    for (;;) {
+       switch (parser.expr) {
+           case expr_lt:
+               type = parser.type, value = parser.value;
+               expression_bit();
+               switch (parser.type) {
+                   case type_l:
+                       switch (type) {
+                           case type_l:
+                               value.i = value.i < parser.value.i;
+                               break;
+                           case type_d:
+                               value.i = value.d < parser.value.i;
+                               break;
+                           default:
+                               value.i = (jit_word_t)value.p < parser.value.i;
+                               break;
+                       }
+                       break;
+                   case type_d:
+                       switch (type) {
+                           case type_l:
+                               value.i = value.i < parser.value.d;
+                               break;
+                           case type_d:
+                               value.i = value.d < parser.value.d;
+                               break;
+                           default:
+                               error("invalid operand");
+                       }
+                       break;
+                   case type_p:
+                       switch (type) {
+                           case type_l:
+                               value.i = value.i < (jit_word_t)parser.value.p;
+                               break;
+                           case type_d:
+                               error("invalid operand");
+                           default:
+                               value.i = (jit_word_t)value.p < (jit_word_t)parser.value.p;
+                               break;
+                       }
+                       break;
+                   default:
+                       error("invalid operand");
+               }
+               parser.type = type_l, parser.value = value;
+               break;
+           case expr_le:
+               type = parser.type, value = parser.value;
+               expression_bit();
+               switch (parser.type) {
+                   case type_l:
+                       switch (type) {
+                           case type_l:
+                               value.i = value.i <= parser.value.i;
+                               break;
+                           case type_d:
+                               value.i = value.d <= parser.value.i;
+                               break;
+                           default:
+                               value.i = (jit_word_t)value.p <= parser.value.i;
+                               break;
+                       }
+                       break;
+                   case type_d:
+                       switch (type) {
+                           case type_l:
+                               value.i = value.i <= parser.value.d;
+                               break;
+                           case type_d:
+                               value.i = value.d <= parser.value.d;
+                               break;
+                           default:
+                               value.i = (jit_word_t)value.p <= parser.value.d;
+                               break;
+                       }
+                       break;
+                   case type_p:
+                       switch (type) {
+                           case type_l:
+                               value.i = value.i <= (jit_word_t)parser.value.p;
+                               break;
+                           case type_d:
+                               error("invalid operand");
+                           default:
+                               value.i = (jit_word_t)value.p <= (jit_word_t)parser.value.p;
+                               break;
+                       }
+                       break;
+                   default:
+                       error("invalid operand");
+               }
+               parser.type = type_l, parser.value = value;
+               break;
+           case expr_eq:
+               type = parser.type, value = parser.value;
+               expression_bit();
+               switch (parser.type) {
+                   case type_l:
+                       switch (type) {
+                           case type_l:
+                               value.i = value.i == parser.value.i;
+                               break;
+                           case type_d:
+                               value.i = value.d == parser.value.i;
+                               break;
+                           default:
+                               value.i = (jit_word_t)value.p == parser.value.i;
+                               break;
+                       }
+                       break;
+                   case type_d:
+                       switch (type) {
+                           case type_l:
+                               value.i = value.i == parser.value.d;
+                               break;
+                           case type_d:
+                               value.i = value.d == parser.value.d;
+                               break;
+                           default:
+                               error("invalid operand");
+                       }
+                       break;
+                   case type_p:
+                       switch (type) {
+                           case type_l:
+                               value.i = value.i == (jit_word_t)parser.value.p;
+                               break;
+                           case type_d:
+                               error("invalid operand");
+                           default:
+                               value.i = value.p == parser.value.p;
+                               break;
+                       }
+                       break;
+                   default:
+                       error("invalid operand");
+               }
+               parser.type = type_l, parser.value = value;
+               break;
+           case expr_ge:
+               type = parser.type, value = parser.value;
+               expression_bit();
+               switch (parser.type) {
+                   case type_l:
+                       switch (type) {
+                           case type_l:
+                               value.i = value.i >= parser.value.i;
+                               break;
+                           case type_d:
+                               value.i = value.d >= parser.value.i;
+                               break;
+                           default:
+                               value.i = (jit_word_t)value.p >= parser.value.i;
+                               break;
+                       }
+                       break;
+                   case type_d:
+                       switch (type) {
+                           case type_l:
+                               value.i = value.i >= parser.value.d;
+                               break;
+                           case type_d:
+                               value.i = value.d >= parser.value.d;
+                               break;
+                           default:
+                               error("invalid operand");
+                       }
+                       break;
+                   case type_p:
+                       switch (type) {
+                           case type_l:
+                               value.i = value.i >= (jit_word_t)parser.value.p;
+                               break;
+                           case type_d:
+                               error("invalid operand");
+                           default:
+                               value.i = (jit_word_t)value.p >= (jit_word_t)parser.value.p;
+                               break;
+                       }
+                       break;
+                   default:
+                       error("invalid operand");
+               }
+               parser.type = type_l, parser.value = value;
+               break;
+           case expr_gt:
+               type = parser.type, value = parser.value;
+               expression_bit();
+               switch (parser.type) {
+                   case type_l:
+                       switch (type) {
+                           case type_l:
+                               value.i = value.i > parser.value.i;
+                               break;
+                           case type_d:
+                               value.i = value.d > parser.value.i;
+                               break;
+                           default:
+                               value.i = (jit_word_t)value.p > parser.value.i;
+                               break;
+                       }
+                       break;
+                   case type_d:
+                       switch (type) {
+                           case type_l:
+                               value.i = value.i > parser.value.d;
+                               break;
+                           case type_d:
+                               value.i = value.d > parser.value.d;
+                               break;
+                           default:
+                               error("invalid operand");
+                       }
+                       break;
+                   case type_p:
+                       switch (type) {
+                           case type_l:
+                               value.i = value.i > (jit_word_t)parser.value.p;
+                               break;
+                           case type_d:
+                               error("invalid operand");
+                           default:
+                               value.i = (jit_word_t)value.p > (jit_word_t)parser.value.p;
+                               break;
+                       }
+                       break;
+                   default:
+                       error("invalid operand");
+               }
+               parser.type = type_l, parser.value = value;
+               break;
+           case expr_ne:
+               type = parser.type, value = parser.value;
+               expression_bit();
+               switch (parser.type) {
+                   case type_l:
+                       switch (type) {
+                           case type_l:
+                               value.i = value.i != parser.value.i;
+                               break;
+                           case type_d:
+                               value.i = value.d != parser.value.i;
+                               break;
+                           default:
+                               value.i = (jit_word_t)value.p != parser.value.i;
+                               break;
+                       }
+                       break;
+                   case type_d:
+                       switch (type) {
+                           case type_l:
+                               value.i = value.i != parser.value.d;
+                               break;
+                           case type_d:
+                               value.i = value.d != parser.value.d;
+                               break;
+                           default:
+                               error("invalid operand");
+                       }
+                       break;
+                   case type_p:
+                       switch (type) {
+                           case type_l:
+                               value.i = value.i != (jit_word_t)parser.value.p;
+                               break;
+                           case type_d:
+                               error("invalid operand");
+                           default:
+                               value.i = value.p != parser.value.p;
+                               break;
+                       }
+                       break;
+                   default:
+                       error("invalid operand");
+               }
+               parser.type = type_l, parser.value = value;
+               break;
+           default:
+               return;
+       }
+    }
+}
+
+static void
+expression_cond(void)
+{
+    type_t     type;
+    value_t    value;
+    int                short_circuit;
+
+    expression_rel();
+    switch (parser.type) {
+       case type_l:    case type_d:    case type_p:    break;
+       default:                                        return;
+    }
+    for (;;) {
+       switch (parser.expr) {
+           case expr_andand:
+               type = parser.type, value = parser.value;
+               switch (type) {
+                   case type_l:
+                       short_circuit = value.i == 0;
+                       break;
+                   case type_d:
+                       short_circuit = value.d == 0.0;
+                       break;
+                   default:
+                       short_circuit = value.p == NULL;
+                       break;
+               }
+               parser.short_circuit += short_circuit;
+               expression_rel();
+               parser.short_circuit -= short_circuit;
+               switch (parser.type) {
+                   case type_l:
+                       switch (type) {
+                           case type_l:
+                               value.i = value.i && parser.value.i;
+                               break;
+                           case type_d:
+                               value.i = value.d && parser.value.i;
+                               break;
+                           default:
+                               value.i = value.p && parser.value.i;
+                               break;
+                       }
+                       break;
+                   case type_d:
+                       switch (type) {
+                           case type_l:
+                               value.i = value.i && parser.value.d;
+                               break;
+                           case type_d:
+                               value.i = value.d && parser.value.d;
+                               break;
+                           default:
+                               value.i = value.p && parser.value.d;
+                               break;
+                       }
+                       break;
+                   case type_p:
+                       switch (type) {
+                           case type_l:
+                               value.i = value.i && parser.value.p;
+                               break;
+                           case type_d:
+                               value.i = value.d && parser.value.p;
+                               break;
+                           default:
+                               value.i = value.p && parser.value.p;
+                               break;
+                       }
+                       break;
+                   default:
+                       error("invalid operand");
+               }
+               parser.type = type_l, parser.value.i = value.i;
+               break;
+           case expr_oror:
+               type = parser.type, value = parser.value;
+               switch (type) {
+                   case type_l:
+                       short_circuit = value.i != 0;
+                       break;
+                   case type_d:
+                       short_circuit = value.d != 0.0;
+                       break;
+                   default:
+                       short_circuit = value.p != NULL;
+                       break;
+               }
+               parser.short_circuit += short_circuit;
+               expression_rel();
+               parser.short_circuit -= short_circuit;
+               switch (parser.type) {
+                   case type_l:
+                       switch (type) {
+                           case type_l:
+                               value.i = value.i || parser.value.i;
+                               break;
+                           case type_d:
+                               value.i = value.d || parser.value.i;
+                               break;
+                           default:
+                               value.i = value.p || parser.value.i;
+                               break;
+                       }
+                       break;
+                   case type_d:
+                       switch (type) {
+                           case type_l:
+                               value.i = value.i || parser.value.d;
+                               break;
+                           case type_d:
+                               value.i = value.d || parser.value.d;
+                               break;
+                           default:
+                               value.i = value.p || parser.value.d;
+                               break;
+                       }
+                       break;
+                   case type_p:
+                       switch (type) {
+                           case type_l:
+                               value.i = value.i || parser.value.p;
+                               break;
+                           case type_d:
+                               value.i = value.d || parser.value.p;
+                               break;
+                           default:
+                               value.i = value.p || parser.value.p;
+                               break;
+                       }
+                       break;
+                   default:
+                       error("invalid operand");
+               }
+               parser.type = type_l, parser.value.i = value.i;
+               break;
+           default:
+               return;
+       }
+    }
+}
+
+static token_t
+expression(void)
+{
+    symbol_t   *symbol;
+
+    (void)identifier('$');
+    if (parser.string[1] == '\0') {
+       if (getch_noeof() != '(')
+           error("bad symbol or expression");
+       parser.type = type_none;
+       expression_cond();
+       if (parser.expr != expr_rparen)
+           error("bad expression");
+       switch (parser.type) {
+           case type_l:
+               return (tok_int);
+           case type_d:
+               return (tok_float);
+           case type_p:
+               return (tok_pointer);
+           default:
+               error("bad expression");
+       }
+    }
+    else if ((symbol = get_symbol_by_name(parser.string))) {
+       switch (parser.type = symbol->type) {
+           case type_l:
+               parser.value.i = symbol->value.i;
+               return (tok_int);
+           case type_d:
+               parser.value.d = symbol->value.d;
+               return (tok_float);
+           default:
+               parser.value.p = symbol->value.p;
+               return (tok_pointer);
+       }
+    }
+    else
+       error("undefined symbol %s", parser.string);
+}
+
+static token_t
+primary(skip_t skip)
+{
+    int                ch;
+
+    switch (skip) {
+       case skip_none: ch = getch();   break;
+       case skip_ws:   ch = skipws();  break;
+       case skip_nl:   ch = skipnl();  break;
+       default:                        abort();
+    }
+    switch (ch) {
+       case '%':
+           return (regname());
+       case 'a' ... 'z': case 'A' ... 'Z': case '_':
+           return (identifier(ch));
+       case '0' ... '9': case '+': case '-':
+           return (number(ch));
+       case '.':
+           return (tok_dot);
+       case '"':
+           return (string());
+       case '\'':
+           return (character());
+       case '@':
+           return (dynamic());
+       case '$':
+           return (expression());
+       case EOF:
+           return (tok_eof);
+       case '\n':
+           return (tok_newline);
+       case ';':
+           return (tok_semicollon);
+       default:
+           error("syntax error");
+    }
+}
+
+static void
+parse(void)
+{
+    int                 ch;
+    token_t     token;
+    instr_t    *instr;
+    label_t    *label;
+    void       *value;
+
+    for (;;) {
+       switch (token = primary(skip_nl)) {
+           case tok_symbol:
+               ch = getch_noeof();
+               if (ch == ':') {
+                   if ((label = get_label_by_name(parser.string))) {
+                       if (label->kind == label_kind_code_forward) {
+                           label->kind = label_kind_code;
+                           jit_link(label->value);
+                           jit_note(parser.name, parser.line);
+                       }
+                       else
+                           error("label %s: redefined", parser.string);
+                   }
+                   else {
+                       if (parser.parsing == PARSING_DATA) {
+                           value = data + data_offset;
+                           label = new_label(label_kind_data,
+                                             parser.string, value);
+                       }
+                       else if (parser.parsing == PARSING_CODE) {
+                           value = jit_label();
+                           jit_note(parser.name, parser.line);
+                           label = new_label(label_kind_code,
+                                             parser.string, value);
+                       }
+                       else
+                           error("label not in .code or .data");
+                   }
+                   break;
+               }
+               ungetch(ch);
+               if ((instr =
+                    (instr_t *)get_hash(instrs, parser.string)) == NULL)
+                   error("unhandled symbol %s", parser.string);
+               if (parser.parsing != PARSING_CODE)
+                   error(".code must be specified before instructions");
+               (*instr->function)();
+               break;
+           case tok_dot:
+               dot();
+               break;
+           case tok_eof:
+               return;
+           default:
+               error("syntax error");
+       }
+    }
+}
+
+static int
+execute(int argc, char *argv[])
+{
+    int                 result;
+    label_t    *label;
+    function_t  function;
+    patch_t    *patch, *next;
+
+    for (patch = patches; patch; patch = next) {
+       next = patch->next;
+       label = patch->label;
+       if (label->kind == label_kind_code_forward)
+           error("undefined label %s", label->name);
+       switch (patch->kind) {
+           case patch_kind_jmp:
+           case patch_kind_mov:
+           case patch_kind_call:
+               jit_patch_at(patch->value, label->value);
+               break;
+           default:
+               abort();
+       }
+       free(patch);
+       patch = next;
+    }
+
+    if (flag_data == 0) {
+       jit_realize();
+       jit_set_data(NULL, 0, JIT_DISABLE_DATA | JIT_DISABLE_NOTE);
+    }
+
+    function = jit_emit();
+    if (flag_verbose > 1 || flag_disasm) {
+       jit_print();
+       fprintf(stdout, "  - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n");
+    }
+    if (flag_verbose > 0 || flag_disasm) {
+       jit_disassemble();
+       fprintf(stdout, "  - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n");
+    }
+
+    jit_clear_state();
+    if (flag_disasm)
+       result = 0;
+    else
+       result = (*function)(argc, argv);
+    jit_destroy_state();
+
+    return (result);
+}
+
+static void *
+xmalloc(size_t size)
+{
+    void       *pointer = malloc(size);
+
+    if (pointer == NULL)
+       error("out of memory");
+
+    return (pointer);
+}
+
+static void *
+xrealloc(void *pointer, size_t size)
+{
+    pointer = realloc(pointer, size);
+
+    if (pointer == NULL)
+       error("out of memory");
+
+    return (pointer);
+}
+
+static void *
+xcalloc(size_t nmemb, size_t size)
+{
+    void       *pointer = calloc(nmemb, size);
+
+    if (pointer == NULL)
+       error("out of memory");
+
+    return (pointer);
+}
+
+static label_t *
+new_label(label_kind_t kind, char *name, void *value)
+{
+    label_t    *label;
+
+    label = (label_t *)xmalloc(sizeof(label_t));
+    label->kind = kind;
+    label->name = strdup(name);
+    label->value = value;
+    put_hash(labels, (entry_t *)label);
+    label_offset++;
+    return (label);
+}
+
+static patch_t *
+new_patch(patch_kind_t kind, label_t *label, void *value)
+{
+    patch_t    *patch = (patch_t *)xmalloc(sizeof(patch_t));
+    patch->kind = kind;
+    patch->label = label;
+    patch->value = value;
+    patch->next = patches;
+    patches = patch;
+
+    return (patch);
+}
+
+static int
+bcmp_symbols(const void *left, const void *right)
+{
+    return (strcmp((char *)left, (*(symbol_t **)right)->name));
+}
+
+static int
+qcmp_symbols(const void *left, const void *right)
+{
+    return (strcmp((*(symbol_t **)left)->name, (*(symbol_t **)right)->name));
+}
+
+static symbol_t *
+new_symbol(char *name)
+{
+    symbol_t   *symbol;
+
+    if ((symbol_offset & 15) == 0) {
+       if ((symbol_length += 16) == 16)
+           symbols = (symbol_t **)xmalloc(sizeof(symbol_t *) *
+                                          symbol_length);
+       else
+           symbols = (symbol_t **)xrealloc(symbols, sizeof(symbol_t *) *
+                                           symbol_length);
+    }
+    symbol = (symbol_t *)xmalloc(sizeof(symbol_t));
+    symbol->name = strdup(name);
+    symbols[symbol_offset++] = symbol;
+    qsort(symbols, symbol_offset, sizeof(symbol_t *), qcmp_symbols);
+
+    return (symbol);
+}
+
+static symbol_t *
+get_symbol_by_name(char *name)
+{
+    symbol_t   **symbol_pointer;
+
+    if (symbols == NULL)
+       return (NULL);
+    symbol_pointer = (symbol_t **)bsearch(name, symbols, symbol_offset,
+                                         sizeof(symbol_t *), bcmp_symbols);
+
+    return (symbol_pointer ? *symbol_pointer : NULL);
+}
+
+static hash_t *
+new_hash(void)
+{
+    hash_t     *hash;
+
+    hash = (hash_t *)xmalloc(sizeof(hash_t));
+    hash->count = 0;
+    hash->entries = (entry_t **)xcalloc(hash->size = 32, sizeof(void *));
+
+    return (hash);
+}
+
+static int
+hash_string(char *name)
+{
+    char       *ptr;
+    int                 key;
+
+    for (key = 0, ptr = name; *ptr; ptr++)
+       key = (key << (key & 1)) ^ *ptr;
+
+    return (key);
+}
+
+static void
+put_hash(hash_t *hash, entry_t *entry)
+{
+    entry_t    *prev, *ptr;
+    int                 key = hash_string(entry->name) & (hash->size - 1);
+
+    for (prev = ptr = hash->entries[key]; ptr; prev = ptr, ptr = ptr->next) {
+       if (strcmp(entry->name, ptr->name) == 0)
+           error("duplicated entry %s", entry->name);
+    }
+    if (prev == NULL)
+       hash->entries[key] = entry;
+    else
+       prev->next = entry;
+    entry->next = NULL;
+    ++hash->count;
+    if (hash->count > hash->size * 0.75)
+       rehash(hash);
+}
+
+static entry_t *
+get_hash(hash_t *hash, char *name)
+{
+    entry_t    *entry;
+    int                 key = hash_string(name) & (hash->size - 1);
+
+    for (entry = hash->entries[key]; entry; entry = entry->next) {
+       if (strcmp(entry->name, name) == 0)
+           return (entry);
+    }
+    return (NULL);
+}
+
+static void
+rehash(hash_t *hash)
+{
+    int                 i, size, key;
+    entry_t    *entry, *next, **entries;
+
+    entries = (entry_t **)xcalloc(size = hash->size * 2, sizeof(void *));
+    for (i = 0; i < hash->size; i++) {
+       for (entry = hash->entries[i]; entry; entry = next) {
+           next = entry->next;
+           key = hash_string(entry->name) & (size - 1);
+           entry->next = entries[key];
+           entries[key] = entry;
+       }
+    }
+    free(hash->entries);
+    hash->entries = entries;
+    hash->size = size;
+}
+
+static void
+usage(void)
+{
+#if HAVE_GETOPT_LONG_ONLY
+    fprintf(stderr, "\
+Usage: %s [jit assembler options] file [jit program options]\n\
+Jit assembler options:\n\
+  -help                    Display this information\n\
+  -v[0-3]                  Verbose output level\n\
+  -d                       Do not use a data buffer\n\
+  -D<macro>[=<val>]        Preprocessor options\n"
+#  if defined(__i386__) && __WORDSIZE == 32
+"  -mx87=1                  Force using x87 when sse2 available\n"
+#  endif
+#  if defined(__i386__) || defined(__x86_64__)
+"  -msse4_1=0               Do not use sse4_1 instructions when available\n"
+#  endif
+#  if defined(__arm__)
+"  -mcpu=<val>              Force cpu version (4, 5, 6 or 7)\n\
+  -mthumb[=0|1]            Enable or disable thumb\n\
+  -mvfp=<val>              Set vpf version (0 to disable)\n\
+  -mneon[=0|1]             Enable or disable neon\n"
+#  endif
+           , progname);
+#else
+    fprintf(stderr, "\
+Usage: %s [jit assembler options] file [jit program options]\n\
+Jit assembler options:\n\
+  -h                       Display this information\n\
+  -v                       Verbose output level\n\
+  -D<macro>[=<val>]        Preprocessor options\n", progname);
+#endif
+    finish_jit();
+    exit(1);
+}
+
+int
+main(int argc, char *argv[])
+{
+#if HAVE_GETOPT_LONG_ONLY
+    static const char  *short_options = "dv::";
+    static struct option long_options[] = {
+       { "help",               0, 0, 'h' },
+       { "data",               2, 0, 'd' },
+#  if defined(__i386__) && __WORDSIZE == 32
+       { "mx87",               2, 0, '7' },
+#  endif
+#  if defined(__i386__) || defined(__x86_64__)
+       { "msse4_1",            2, 0, '4' },
+#  endif
+#  if defined(__arm__)
+       { "mcpu",               2, 0, 'c' },
+       { "mthumb",             2, 0, 't' },
+       { "mvfp",               2, 0, 'f' },
+       { "mneon",              2, 0, 'n' },
+#  endif
+       { 0,                    0, 0, 0   }
+    };
+#else
+#endif /* HAVE_GETOPT_LONG_ONLY */
+    int                         offset;
+    char               *endptr;
+    int                         opt_index;
+    int                         opt_short;
+    char                cmdline[8192];
+
+#if defined(__CYGWIN__)
+    /* Cause a compile warning about redefinition without dllimport
+     * attribute, *but* cause correct linkage if liblightning.a is
+     * linked to binutils (that happens to have an internal
+     * getopt* implementation and an apparently conflicting
+     * optind global variable) */
+    extern int          optind;
+    optind = 1;
+#endif
+
+    progname = argv[0];
+
+    init_jit(progname);
+
+#if defined(__sgi)
+    DL_HANDLE = dlopen(NULL, RTLD_LAZY);
+#endif
+
+    flag_data = 1;
+#if HAVE_GETOPT_LONG_ONLY
+    for (;;) {
+       if ((opt_short = getopt_long_only(argc, argv, short_options,
+                                         long_options, &opt_index)) < 0)
+           break;
+       switch (opt_short) {
+           case 'h':
+           default:
+               usage();
+               break;
+           case 'v':
+               if (optarg) {
+                   flag_verbose = strtol(optarg, &endptr, 10);
+                   if (*endptr || flag_verbose < 0)
+                       usage();
+               }
+               else
+                   flag_verbose = 1;
+               break;
+           case 'd':
+               flag_data = 0;
+               break;
+#if defined(__i386__) && __WORDSIZE == 32
+           case '7':
+               if (optarg) {
+                   if (strcmp(optarg, "") == 0 || strcmp(optarg, "1") == 0)
+                       jit_cpu.sse2 = 0;
+                   else if (strcmp(optarg, "0"))
+                       usage();
+               }
+               else
+                   jit_cpu.sse2 = 0;
+               break;
+#endif
+#if defined(__i386__) || defined(__x86_64__)
+           case '4':
+               if (optarg) {
+                   if (strcmp(optarg, "0") == 0)
+                       jit_cpu.sse4_2 = 0;
+                   else if (strcmp(optarg, "1"))
+                       usage();
+               }
+               break;
+#endif
+#if defined(__arm__)
+           case 'c':
+               if (optarg) {
+                   offset = strtol(optarg, &endptr, 10);
+                   if (*endptr || offset < 0)
+                       usage();
+                   if (offset < jit_cpu.version)
+                       jit_cpu.version = offset;
+               }
+               break;
+           case 't':
+               if (optarg) {
+                   if (strcmp(optarg, "0") == 0)
+                       jit_cpu.thumb = 0;
+                   else if (strcmp(optarg, "1") && strcmp(optarg, "2"))
+                       usage();
+               }
+               break;
+           case 'f':
+#  if !defined(__ARM_PCS_VFP)
+               /* Do not allow overrinding hard float abi */
+               if (optarg) {
+                   offset = strtol(optarg, &endptr, 10);
+                   if (*endptr || offset < 0)
+                       usage();
+                   if (offset < jit_cpu.vfp)
+                       jit_cpu.vfp = offset;
+               }
+#  endif
+               break;
+           case 'n':
+               if (optarg) {
+                   if (strcmp(optarg, "0") == 0)
+                       jit_cpu.neon = 0;
+                   else if (strcmp(optarg, "1"))
+                       usage();
+               }
+               break;
+#endif
+       }
+    }
+#else
+    while ((opt_short = getopt(argc, argv, "hvd")) >= 0) {
+       if (opt_short == 'v')
+           ++flag_verbose;
+       else if (opt_short == 'd')
+           flag_data = 0;
+       else
+           usage();
+    }
+#endif
+
+    opt_index = optind;
+#if defined(__hpux)
+    /* Workaround */
+    if (opt_index < argc && argv[opt_index][0] == '-')
+       ++opt_index;
+#endif
+    if (opt_index < 0 || opt_index >= argc)
+       usage();
+    if (strcmp(argv[opt_index], "-") == 0)
+       strcpy(parser.name, "<stdin>");
+    else {
+       if ((endptr = strrchr(argv[opt_index], '/')) == NULL)
+           endptr = argv[opt_index];
+       else
+           ++endptr;
+       strncpy(parser.name, endptr, sizeof(parser.name));
+       parser.name[sizeof(parser.name) - 1] = '\0';
+    }
+#if __clang__
+#  define cc "clang"
+#else
+#  define cc "gcc"
+#endif
+    opt_short = snprintf(cmdline, sizeof(cmdline), cc " -E -x c %s", argv[opt_index]);
+    for (++opt_index; opt_index < argc; opt_index++) {
+       if (argv[opt_index][0] == '-')
+           opt_short += snprintf(cmdline + opt_short,
+                                 sizeof(cmdline) - opt_short,
+                                 " %s", argv[opt_index]);
+       else {
+           --opt_index;
+           break;
+       }
+    }
+    opt_short += snprintf(cmdline + opt_short,
+                         sizeof(cmdline) - opt_short,
+                         " -D__WORDSIZE=%d", __WORDSIZE);
+    opt_short += snprintf(cmdline + opt_short,
+                         sizeof(cmdline) - opt_short,
+                         " -D__LITTLE_ENDIAN=%d", __LITTLE_ENDIAN);
+    opt_short += snprintf(cmdline + opt_short,
+                         sizeof(cmdline) - opt_short,
+                         " -D__BIG_ENDIAN=%d", __BIG_ENDIAN);
+    opt_short += snprintf(cmdline + opt_short,
+                         sizeof(cmdline) - opt_short,
+                         " -D__BYTE_ORDER=%d", __BYTE_ORDER);
+#if defined(__i386__)
+    opt_short += snprintf(cmdline + opt_short,
+                         sizeof(cmdline) - opt_short,
+                         " -D__i386__=1");
+#endif
+#if defined(__x86_64__)
+    opt_short += snprintf(cmdline + opt_short,
+                         sizeof(cmdline) - opt_short,
+                         " -D__x86_64__=1");
+#endif
+#if defined(__mips__)
+    opt_short += snprintf(cmdline + opt_short,
+                         sizeof(cmdline) - opt_short,
+                         " -D__mips__=1");
+#endif
+#if defined(__arm__)
+    opt_short += snprintf(cmdline + opt_short,
+                         sizeof(cmdline) - opt_short,
+                         " -D__arm__=1");
+#endif
+#if defined(__powerpc__)
+    opt_short += snprintf(cmdline + opt_short,
+                         sizeof(cmdline) - opt_short,
+                         " -D__ppc__=1");
+#endif
+#if defined(__sparc__)
+    opt_short += snprintf(cmdline + opt_short,
+                         sizeof(cmdline) - opt_short,
+                         " -D__sparc__=1");
+#endif
+#if defined(__ia64__)
+    opt_short += snprintf(cmdline + opt_short,
+                         sizeof(cmdline) - opt_short,
+                         " -D__ia64__=1");
+#endif
+#if defined(__hppa__)
+    opt_short += snprintf(cmdline + opt_short,
+                         sizeof(cmdline) - opt_short,
+                         " -D__hppa__=1");
+#endif
+#if defined(_AIX)
+    opt_short += snprintf(cmdline + opt_short,
+                         sizeof(cmdline) - opt_short,
+                         " -D_AIX=1");
+#endif
+#if defined(__sgi__)
+    opt_short += snprintf(cmdline + opt_short,
+                         sizeof(cmdline) - opt_short,
+                         " -D__sgi__=1");
+#endif
+#if defined(__aarch64__)
+    opt_short += snprintf(cmdline + opt_short,
+                         sizeof(cmdline) - opt_short,
+                         " -D__aarch64__=1");
+#endif
+#if defined(__s390__) || defined(__s390x__)
+    opt_short += snprintf(cmdline + opt_short,
+                         sizeof(cmdline) - opt_short,
+                         " -D__s390__=1");
+#endif
+#if defined(__alpha__)
+    opt_short += snprintf(cmdline + opt_short,
+                         sizeof(cmdline) - opt_short,
+                         " -D__alpha__=1");
+#endif
+    if ((parser.fp = popen(cmdline, "r")) == NULL)
+       error("cannot execute %s", cmdline);
+
+    parser.line = 1;
+    parser.string = (char *)xmalloc(parser.length = 4096);
+
+#if defined(__linux__) && (defined(__i386__) || defined(__x86_64__))
+    /* double precision                0x200
+     * round nearest                   0x000
+     * invalid operation mask          0x001
+     * denormalized operand mask       0x002
+     * zero divide mask                0x004
+     * precision (inexact) mask        0x020
+     */
+    {
+       fpu_control_t fpu_control = 0x027f;
+       _FPU_SETCW(fpu_control);
+    }
+#endif
+
+    _jit = jit_new_state();
+
+    instrs = new_hash();
+    for (offset = 0;
+        offset < (int)(sizeof(instr_vector) / sizeof(instr_vector[0]));
+        offset++)
+       put_hash(instrs, (entry_t *)(instr_vector + offset));
+
+    labels = new_hash();
+
+    parse();
+    pclose(parser.fp);
+    parser.fp = NULL;
+
+    for (opt_short = 0; opt_index < argc; opt_short++, opt_index++)
+       argv[opt_short] = argv[opt_index];
+    argv[opt_short] = NULL;
+    argc = opt_short;
+    execute(argc, argv);
+
+    finish_jit();
+
+    return (0);
+}
diff --git a/deps/lightning/check/nodata.c b/deps/lightning/check/nodata.c
new file mode 100644 (file)
index 0000000..0e594c3
--- /dev/null
@@ -0,0 +1,106 @@
+/*
+ * Simple test of using an alternate buffer for the code.
+ */
+
+#include <lightning.h>
+#include <stdio.h>
+#include <assert.h>
+#include <sys/mman.h>
+#if defined(__sgi)
+#  include <fcntl.h>
+#endif
+
+#ifndef MAP_ANON
+#  define MAP_ANON                     MAP_ANONYMOUS
+#  ifndef MAP_ANONYMOUS
+#    define MAP_ANONYMOUS              0
+#  endif
+#endif
+
+#if !defined(__sgi)
+#define  mmap_fd                       -1
+#endif
+
+jit_uint8_t             *data;
+jit_state_t             *_jit;
+jit_word_t               data_length;
+jit_word_t               note_length;
+#if defined(__sgi)
+int                      mmap_fd;
+#endif
+void                   (*function)(void);
+
+void
+gencode(jit_word_t flags)
+{
+    jit_word_t         offset;
+    jit_word_t         length;
+
+    _jit = jit_new_state();
+
+    jit_name("main");
+    jit_prolog();
+    jit_prepare();
+    jit_pushargi((jit_word_t)"%f\n");
+    jit_ellipsis();
+    jit_pushargi_d(1.5);
+    jit_finishi(printf);
+    jit_note("nodata.c", __LINE__);
+
+    /* call to jit_realize() is only required when using an alternate
+     * code buffer. Note that not using mmap'ed memory may not work
+     * on several ports and/or operating system versions */
+    jit_realize();
+
+    if (jit_get_data(&data_length, &note_length) != NULL)
+       abort();
+
+    length = 0;
+    if (!(flags & JIT_DISABLE_DATA))
+       length += data_length;
+    if (!(flags & JIT_DISABLE_NOTE))
+       length += note_length;
+
+    /* check that a too small buffer fails */
+    if (flags)
+       jit_set_data(length ? data : NULL, length, flags);
+
+    /* and calling again with enough space works */
+    offset = (length + 7) & -8;
+    function = jit_emit();
+    if (function == NULL)
+       abort();
+
+    jit_clear_state();
+    (*function)();
+    jit_destroy_state();
+}
+
+int
+main(int argc, char *argv[])
+{
+#if defined(__sgi)
+    mmap_fd = open("/dev/zero", O_RDWR);
+#endif
+
+    data = mmap(NULL, 4096,
+               PROT_READ | PROT_WRITE,
+               MAP_PRIVATE | MAP_ANON, mmap_fd, 0);
+    assert(data != MAP_FAILED);
+#if defined(__sgi)
+    close(mmap_fd);
+#endif
+
+    init_jit(argv[0]);
+
+    gencode(0);
+    gencode(JIT_DISABLE_DATA);
+    gencode(JIT_DISABLE_NOTE);
+    gencode(JIT_DISABLE_DATA | JIT_DISABLE_NOTE);
+
+    finish_jit();
+
+    munmap(data, 4096);
+
+    return (0);
+}
diff --git a/deps/lightning/check/put.ok b/deps/lightning/check/put.ok
new file mode 100644 (file)
index 0000000..9766475
--- /dev/null
@@ -0,0 +1 @@
+ok
diff --git a/deps/lightning/check/put.tst b/deps/lightning/check/put.tst
new file mode 100644 (file)
index 0000000..a7e39e1
--- /dev/null
@@ -0,0 +1,428 @@
+.data  8
+ok:
+.c     "ok"
+
+.code
+       jmpi main
+
+       name putr
+putr:
+       prolog
+       frame 160
+       arg $ac
+       arg $auc
+       arg $as
+       arg $aus
+       arg $ai
+#if __WORDSIZE == 64
+       arg $aui
+       arg $al
+#endif
+       arg_f $af
+       arg_d $ad
+       arg $a
+#if __WORDSIZE == 64
+       arg $_l
+       arg $_ui
+#endif
+       arg $_i
+       arg $_us
+       arg $_s
+       arg $_uc
+       arg $_c
+       getarg_c %r0 $ac
+       negr %r0 %r0
+       putargr %r0 $ac
+       getarg_uc %r0 $auc
+       negr %r0 %r0
+       putargr %r0 $auc
+       getarg_s %r0 $as
+       negr %r0 %r0
+       putargr %r0 $as
+       getarg_us %r0 $aus
+       negr %r0 %r0
+       putargr %r0 $aus
+       getarg_i %r0 $ai
+       negr %r0 %r0
+       putargr %r0 $ai
+#if __WORDSIZE == 64
+       getarg_ui %r0 $aui
+       negr %r0 %r0
+       putargr %r0 $aui
+       getarg_l %r0 $al
+       negr %r0 %r0
+       putargr %r0 $al
+#endif
+       getarg_f %f0 $af
+       negr_f %f0 %f0
+       putargr_f %f0 $af
+       getarg_d %f0 $ad
+       negr_d %f0 %f0
+       putargr_d %f0 $ad
+       getarg %r0 $a
+       negr %r0 %r0
+       putargr %r0 $a
+#if __WORDSIZE == 64
+       getarg_l %r0 $_l
+       negr %r0 %r0
+       putargr %r0 $_l
+       getarg_ui %r0 $_ui
+       negr %r0 %r0
+       putargr %r0 $_ui
+#endif
+       getarg_i %r0 $_i
+       negr %r0 %r0
+       putargr %r0 $_i
+       getarg_us %r0 $_us
+       negr %r0 %r0
+       putargr %r0 $_us
+       getarg_s %r0 $_s
+       negr %r0 %r0
+       putargr %r0 $_s
+       getarg_uc %r0 $_uc
+       negr %r0 %r0
+       putargr %r0 $_uc
+       getarg_c %r0 $_c
+       negr %r0 %r0
+       putargr %r0 $_c
+       jmpi _putr
+rputr:
+       putargi 17 $ac
+       putargi 16 $auc
+       putargi 15 $as
+       putargi 14 $aus
+       putargi 13 $ai
+#if __WORDSIZE == 64
+       putargi 12 $aui
+       putargi 11 $al
+#endif
+       putargi_f 10 $af
+       putargi_d 9 $ad
+       putargi 8 $a
+#if __WORDSIZE == 64
+       putargi 7 $_l
+       putargi 6 $_ui
+#endif
+       putargi 5 $_i
+       putargi 4 $_us
+       putargi 3 $_s
+       putargi 2 $_uc
+       putargi 1 $_c
+       jmpi _puti
+rputi:
+       ret
+       epilog
+
+       name _putr
+_putr:
+       prolog
+       tramp 160
+       arg $ac
+       arg $auc
+       arg $as
+       arg $aus
+       arg $ai
+#if __WORDSIZE == 64
+       arg $aui
+       arg $al
+#endif
+       arg_f $af
+       arg_d $ad
+       arg $a
+#if __WORDSIZE == 64
+       arg $_l
+       arg $_ui
+#endif
+       arg $_i
+       arg $_us
+       arg $_s
+       arg $_uc
+       arg $_c
+       getarg_c %r0 $ac
+       beqi rac %r0 -1
+       calli @abort
+rac:
+       getarg_uc %r0 $auc
+       beqi rauc %r0 $(-2 & 0xff)
+       calli @abort
+rauc:
+       getarg_s %r0 $as
+       beqi ras %r0 -3
+       calli @abort
+ras:
+       getarg_us %r0 $aus
+       beqi raus %r0 $(-4 & 0xffff)
+       calli @abort
+raus:
+       getarg_i %r0 $ai
+       beqi rai %r0 -5
+       calli @abort
+rai:
+#if __WORDSIZE == 64
+       getarg_ui %r0 $aui
+       beqi raui %r0 $(-6 & 0xffffffff)
+       calli @abort
+raui:
+       getarg_l %r0 $al
+       beqi ral %r0 -7
+       calli @abort
+ral:
+#endif
+       getarg_f %f0 $af
+       beqi_f raf %f0 -8
+       calli @abort
+raf:
+       getarg_d %f0 $ad
+       beqi_d rad %f0 -9
+       calli @abort
+rad:
+       getarg %r0 $a
+       beqi ra %r0 -10
+       calli @abort
+ra:
+#if __WORDSIZE == 64
+       getarg %r0 $_l
+       beqi r_l %r0 -11
+       calli @abort
+r_l:
+       getarg_ui %r0 $_ui
+       beqi r_ui %r0 $(-12 & 0xffffffff)
+       calli @abort
+r_ui:
+#endif
+       getarg_i %r0 $_i
+       beqi r_i %r0 -13
+       calli @abort
+r_i:
+       getarg_us %r0 $_us
+       beqi r_us %r0 $(-14 & 0xffff)
+       calli @abort
+r_us:
+       getarg_s %r0 $_s
+       beqi r_s %r0 -15
+       calli @abort
+r_s:
+       getarg_uc %r0 $_uc
+       beqi r_uc %r0 $(-16 & 0xff)
+       calli @abort
+r_uc:
+       getarg_c %r0 $_c
+       beqi r_c %r0 -17
+       calli @abort
+r_c:
+       jmpi rputr
+       epilog
+
+       name _puti
+_puti:
+       prolog
+       tramp 160
+       arg $ac
+       arg $auc
+       arg $as
+       arg $aus
+       arg $ai
+#if __WORDSIZE == 64
+       arg $aui
+       arg $al
+#endif
+       arg_f $af
+       arg_d $ad
+       arg $a
+#if __WORDSIZE == 64
+       arg $_l
+       arg $_ui
+#endif
+       arg $_i
+       arg $_us
+       arg $_s
+       arg $_uc
+       arg $_c
+       getarg_c %r0 $ac
+       beqi iac %r0 17
+       calli @abort
+iac:
+       getarg_uc %r0 $auc
+       beqi iauc %r0 16
+       calli @abort
+iauc:
+       getarg_s %r0 $as
+       beqi ias %r0 15
+       calli @abort
+ias:
+       getarg_us %r0 $aus
+       beqi iaus %r0 14
+       calli @abort
+iaus:
+       getarg_i %r0 $ai
+       beqi iai %r0 13
+       calli @abort
+iai:
+#if __WORDSIZE == 64
+       getarg_ui %r0 $aui
+       beqi iaui %r0 12
+       calli @abort
+iaui:
+       getarg_l %r0 $al
+       beqi ial %r0 11
+       calli @abort
+ial:
+#endif
+       getarg_f %f0 $af
+       beqi_f iaf %f0 10
+       calli @abort
+iaf:
+       getarg_d %f0 $ad
+       beqi_d iad %f0 9
+       calli @abort
+iad:
+       getarg %r0 $a
+       beqi ia %r0 8
+       calli @abort
+ia:
+#if __WORDSIZE == 64
+       getarg %r0 $_l
+       beqi i_l %r0 7
+       calli @abort
+i_l:
+       getarg_ui %r0 $_ui
+       beqi i_ui %r0 6
+       calli @abort
+i_ui:
+#endif
+       getarg_i %r0 $_i
+       beqi i_i %r0 5
+       calli @abort
+i_i:
+       getarg_us %r0 $_us
+       beqi i_us %r0 4
+       calli @abort
+i_us:
+       getarg_s %r0 $_s
+       beqi i_s %r0 3
+       calli @abort
+i_s:
+       getarg_uc %r0 $_uc
+       beqi i_uc %r0 2
+       calli @abort
+i_uc:
+       getarg_c %r0 $_c
+       beqi i_c %r0 1
+       calli @abort
+i_c:
+       jmpi rputi
+       epilog
+
+       name putf
+putf:
+       prolog
+       frame 56
+       arg $i1
+       arg_d $d1
+       arg_f $f1
+       arg_f $f2
+       arg_f $f3
+       arg $i2
+       arg_d $d2
+       getarg %r0 $i1
+       addi %r0 %r0 1
+       putargr %r0 $i1
+       getarg_d %f0 $d1
+       addi_d %f0 %f0 1
+       putargr_d %f0 $d1
+       getarg_f %f0 $f1
+       addi_f %f0 %f0 1
+       putargr_f %f0 $f1
+       getarg_f %f0 $f2
+       subi_f %f0 %f0 1
+       putargr_f %f0 $f2
+       putargi_f -5 $f3
+       putargi -6 $i2
+       putargi_d -7 $d2
+       jmpi _putf
+rputf:
+       ret
+       epilog
+
+       name _putf
+_putf:
+       prolog
+       tramp 56
+       arg $i1
+       arg_d $d1
+       arg_f $f1
+       arg_f $f2
+       arg_f $f3
+       arg $i2
+       arg_d $d2
+       getarg %r0 $i1
+       beqi fi1 %r0 2
+       calli @abort
+fi1:
+       getarg_d %f0 $d1
+       beqi_d fd1 %f0 3
+       calli @abort
+fd1:
+       getarg_f %f0 $f1
+       beqi_f ff1 %f0 4
+       calli @abort
+ff1:
+       getarg_f %f0 $f2
+       beqi_f ff2 %f0 3
+       calli @abort
+ff2:
+       getarg_f %f0 $f3
+       beqi_f ff3 %f0 -5
+       calli @abort
+ff3:
+       getarg %r0 $i2
+       beqi fi2 %r0 -6
+       calli @abort
+fi2:
+       getarg_d %f0 $d2
+       beqi_d fd2 %f0 -7
+       calli @abort
+fd2:
+       jmpi rputf
+       epilog
+
+       name main
+main:
+       prolog
+       prepare
+               pushargi 1
+               pushargi 2
+               pushargi 3
+               pushargi 4
+               pushargi 5
+#if __WORDSIZE == 64
+               pushargi 6
+               pushargi 7
+#endif
+               pushargi_f 8
+               pushargi_d 9
+               pushargi 10
+#if __WORDSIZE == 64
+               pushargi 11
+               pushargi 12
+#endif
+               pushargi 13
+               pushargi 14
+               pushargi 15
+               pushargi 16
+               pushargi 17
+       finishi putr
+       prepare
+               pushargi 1
+               pushargi_d 2
+               pushargi_f 3
+               pushargi_f 4
+               pushargi_f 5
+               pushargi 6
+               pushargi_d 7
+       finishi putf
+       prepare
+               pushargi ok
+       finishi @puts
+       ret
+       epilog
diff --git a/deps/lightning/check/qalu.inc b/deps/lightning/check/qalu.inc
new file mode 100644 (file)
index 0000000..9daca82
--- /dev/null
@@ -0,0 +1,122 @@
+.data  8
+ok:
+.c     "ok\n"
+
+/* r0,r1 = r2 op r3 */
+#define QALUR(N, T, OP, I0, I1, LO, HI, R0, R1, R2, R3)        \
+       movi %R2 I0                                     \
+       movi %R3 I1                                     \
+       OP##r##T %R0 %R1 %R2 %R3                        \
+       bnei OP##T##N##rlo##R0##R1##R2##R3 %R0 LO       \
+       bnei OP##T##N##rlo##R0##R1##R2##R3 %R1 HI       \
+       bnei OP##T##N##rlo##R0##R1##R2##R3 %R2 I0       \
+       beqi OP##T##N##rhi##R0##R1##R2##R3 %R3 I1       \
+OP##T##N##rlo##R0##R1##R2##R3:                         \
+       calli @abort                                    \
+OP##T##N##rhi##R0##R1##R2##R3:
+
+/* r0,r1 = r2 op i0 */
+#define QALUI(N, T, OP, I0, I1, LO, HI, R0, R1, R2, R3)        \
+       movi %R2 I0                                     \
+       movi %R3 HI                                     \
+       OP##i##T %R0 %R1 %R2 I1                         \
+       bnei OP##T##N##ilo##R0##R1##R2##R3 %R0 LO       \
+       bner OP##T##N##ilo##R0##R1##R2##R3 %R1 %R3      \
+       beqi OP##T##N##ihi##R0##R1##R2##R3 %R2 I0       \
+OP##T##N##ilo##R0##R1##R2##R3:                         \
+       calli @abort                                    \
+OP##T##N##ihi##R0##R1##R2##R3:
+
+/* r0,r1 = r0 op r1 */
+#define QALUX(N, T, OP, I0, I1, LO, HI, R0, R1, R2, R3)        \
+       movi %R0 I0                                     \
+       movi %R1 I1                                     \
+       movi %R2 LO                                     \
+       movi %R3 HI                                     \
+       OP##r##T %R0 %R1 %R0 %R1                        \
+       bner OP##T##N##0lo##R0##R1##R2##R3 %R0 %R2      \
+       beqr OP##T##N##0hi##R0##R1##R2##R3 %R1 %R3      \
+OP##T##N##0lo##R0##R1##R2##R3:                         \
+       calli @abort                                    \
+OP##T##N##0hi##R0##R1##R2##R3:
+
+/* r0,r1 = r1 op r0 */
+#define QALUY(N, T, OP, I0, I1, LO, HI, R0, R1, R2, R3)        \
+       movi %R1 I0                                     \
+       movi %R0 I1                                     \
+       movi %R2 LO                                     \
+       movi %R3 HI                                     \
+       OP##r##T %R0 %R1 %R1 %R0                        \
+       bner OP##T##N##1lo##R0##R1##R2##R3 %R0 %R2      \
+       beqr OP##T##N##1hi##R0##R1##R2##R3 %R1 %R3      \
+OP##T##N##1lo##R0##R1##R2##R3:                         \
+       calli @abort                                    \
+OP##T##N##1hi##R0##R1##R2##R3:
+
+/* r0,r1 = r0 op r3 */
+#define QALUZ(N, T, OP, I0, I1, LO, HI, R0, R1, R2, R3)        \
+       movi %R0 I0                                     \
+       movi %R3 I1                                     \
+       movi %R2 LO                                     \
+       OP##r##T %R0 %R1 %R0 %R3                        \
+       bner OP##T##N##2lo##R0##R1##R2##R3 %R0 %R2      \
+       bnei OP##T##N##2lo##R0##R1##R2##R3 %R1 HI       \
+       beqi OP##T##N##2hi##R0##R1##R2##R3 %R3 I1       \
+OP##T##N##2lo##R0##R1##R2##R3:                         \
+       calli @abort                                    \
+OP##T##N##2hi##R0##R1##R2##R3:
+
+/* r0,r1 = r2 op r1 */
+#define QALUW(N, T, OP, I0, I1, LO, HI, R0, R1, R2, R3)        \
+       movi %R2 I0                                     \
+       movi %R1 I1                                     \
+       movi %R3 LO                                     \
+       OP##r##T %R0 %R1 %R2 %R1                        \
+       bner OP##T##N##3lo##R0##R1##R2##R3 %R0 %R3      \
+       bnei OP##T##N##3lo##R0##R1##R2##R3 %R1 HI       \
+       beqi OP##T##N##3hi##R0##R1##R2##R3 %R2 I0       \
+OP##T##N##3lo##R0##R1##R2##R3:                         \
+       calli @abort                                    \
+OP##T##N##3hi##R0##R1##R2##R3:
+
+#define QALU2(N, T, OP, I0, I1, LO, HI, R0, R1, R2, R3)        \
+       QALUR(N, T, OP, I0, I1, LO, HI, R0, R1, R2, R3) \
+       QALUI(N, T, OP, I0, I1, LO, HI, R0, R1, R2, R3) \
+       QALUX(N, T, OP, I0, I1, LO, HI, R0, R1, R2, R3) \
+       QALUY(N, T, OP, I0, I1, LO, HI, R0, R1, R2, R3) \
+       QALUZ(N, T, OP, I0, I1, LO, HI, R0, R1, R2, R3) \
+       QALUW(N, T, OP, I0, I1, LO, HI, R0, R1, R2, R3)
+
+#define QALU1(N, T, OP, I0, I1, LO, HI, R0, R1, R2, R3)        \
+       QALU2(N, T, OP, I0, I1, LO, HI, R0, R1, R2, R3) \
+       QALU2(N, T, OP, I0, I1, LO, HI, R0, R1, R3, R2) \
+       QALU2(N, T, OP, I0, I1, LO, HI, R0, R2, R1, R3) \
+       QALU2(N, T, OP, I0, I1, LO, HI, R0, R2, R3, R1) \
+       QALU2(N, T, OP, I0, I1, LO, HI, R0, R3, R1, R2) \
+       QALU2(N, T, OP, I0, I1, LO, HI, R0, R3, R2, R1) \
+       QALU2(N, T, OP, I0, I1, LO, HI, R1, R0, R2, R3) \
+       QALU2(N, T, OP, I0, I1, LO, HI, R1, R0, R3, R2) \
+       QALU2(N, T, OP, I0, I1, LO, HI, R1, R2, R0, R3) \
+       QALU2(N, T, OP, I0, I1, LO, HI, R1, R2, R3, R0) \
+       QALU2(N, T, OP, I0, I1, LO, HI, R1, R3, R0, R2) \
+       QALU2(N, T, OP, I0, I1, LO, HI, R1, R3, R2, R0) \
+       QALU2(N, T, OP, I0, I1, LO, HI, R2, R1, R0, R3) \
+       QALU2(N, T, OP, I0, I1, LO, HI, R2, R1, R3, R0) \
+       QALU2(N, T, OP, I0, I1, LO, HI, R2, R0, R1, R3) \
+       QALU2(N, T, OP, I0, I1, LO, HI, R2, R0, R3, R1) \
+       QALU2(N, T, OP, I0, I1, LO, HI, R2, R3, R1, R0) \
+       QALU2(N, T, OP, I0, I1, LO, HI, R2, R3, R0, R1) \
+       QALU2(N, T, OP, I0, I1, LO, HI, R3, R1, R2, R0) \
+       QALU2(N, T, OP, I0, I1, LO, HI, R3, R1, R0, R2) \
+       QALU2(N, T, OP, I0, I1, LO, HI, R3, R2, R1, R0) \
+       QALU2(N, T, OP, I0, I1, LO, HI, R3, R2, R0, R1) \
+       QALU2(N, T, OP, I0, I1, LO, HI, R3, R0, R1, R2) \
+       QALU2(N, T, OP, I0, I1, LO, HI, R3, R0, R2, R1)
+
+#define QALU(N, T, OP, I0, I1, LO, HI)                 \
+       QALU1(N, T, OP, I0, I1, LO, HI, v0, v1, v2, r0) \
+       QALU1(N, T, OP, I0, I1, LO, HI, v0, v1, v2, r1) \
+       QALU1(N, T, OP, I0, I1, LO, HI, v0, v1, v2, r2) \
+       QALU1(N, T, OP, I0, I1, LO, HI, v1, v2, r0, r1) \
+       QALU1(N, T, OP, I0, I1, LO, HI, v1, v2, r0, r2) \
+       QALU1(N, T, OP, I0, I1, LO, HI, v2, r0, r1, r2)
diff --git a/deps/lightning/check/qalu_div.ok b/deps/lightning/check/qalu_div.ok
new file mode 100644 (file)
index 0000000..9766475
--- /dev/null
@@ -0,0 +1 @@
+ok
diff --git a/deps/lightning/check/qalu_div.tst b/deps/lightning/check/qalu_div.tst
new file mode 100644 (file)
index 0000000..198dfbb
--- /dev/null
@@ -0,0 +1,18 @@
+#include "qalu.inc"
+
+.code
+       prolog
+#define QDIV(N, I0, I1, LO, HI)                QALU(N, , qdiv, I0, I1, LO, HI)
+#define UQDIV(N, I0, I1, LO, HI)       QALU(N, _u, qdiv, I0, I1, LO, HI)
+        QDIV(0, 10, 3, 3, 1)
+        QDIV(1, -33, 9, -3, -6)
+        QDIV(2, -41, -7, 5, -6)
+        QDIV(3, 65536, 4096, 16, 0)
+       UQDIV(4, -1, -2, 1, 1)
+       UQDIV(5, -2, -5, 1, 3)
+       prepare
+               pushargi ok
+               ellipsis
+       finishi @printf
+       ret
+       epilog
diff --git a/deps/lightning/check/qalu_mul.ok b/deps/lightning/check/qalu_mul.ok
new file mode 100644 (file)
index 0000000..9766475
--- /dev/null
@@ -0,0 +1 @@
+ok
diff --git a/deps/lightning/check/qalu_mul.tst b/deps/lightning/check/qalu_mul.tst
new file mode 100644 (file)
index 0000000..64b95a9
--- /dev/null
@@ -0,0 +1,31 @@
+#include "qalu.inc"
+
+.code
+       prolog
+#define QMUL(N, I0, I1, LO, HI)                QALU(N, , qmul, I0, I1, LO, HI)
+#define UQMUL(N, I0, I1, LO, HI)       QALU(N, _u, qmul, I0, I1, LO, HI)
+       QMUL(0, -2, -1, 2, 0)
+       QMUL(1, 0, -1, 0, 0)
+       QMUL(2, -1, 0, 0, 0)
+       QMUL(3, 1, -1, -1, -1)
+#if __WORDSIZE == 32
+        QMUL(4, 0x7ffff, 0x7ffff, 0xfff00001, 0x3f)
+       UQMUL(5, 0xffffff, 0xffffff, 0xfe000001, 0xffff)
+        QMUL(6, 0x80000000, -2, 0, 1)
+        QMUL(7, 0x80000000, 2, 0, -1)
+        QMUL(8, 0x80000001, 3, 0x80000003, -2)
+        QMUL(9, 0x80000001, -3, 0x7ffffffd, 1)
+#else
+        QMUL(4, 0x7ffffffff, 0x7ffffffff, 0xfffffff000000001, 0x3f)
+       UQMUL(5, 0xffffffffff, 0xffffffffff, 0xfffffe0000000001, 0xffff)
+        QMUL(6, 0x8000000000000000, -2, 0, 1)
+        QMUL(7, 0x8000000000000000, 2, 0, -1)
+        QMUL(8, 0x8000000000000001, 3, 0x8000000000000003, -2)
+        QMUL(9, 0x8000000000000001, -3, 0x7ffffffffffffffd, 1)
+#endif
+       prepare
+               pushargi ok
+               ellipsis
+       finishi @printf
+       ret
+       epilog
diff --git a/deps/lightning/check/range.ok b/deps/lightning/check/range.ok
new file mode 100644 (file)
index 0000000..9766475
--- /dev/null
@@ -0,0 +1 @@
+ok
diff --git a/deps/lightning/check/range.tst b/deps/lightning/check/range.tst
new file mode 100644 (file)
index 0000000..35ddf64
--- /dev/null
@@ -0,0 +1,504 @@
+#define M64    67108864
+
+#define aB1    (1<<1)
+#define aB2    (1<<2)
+#define aB3    (1<<3)
+#define aB4    (1<<4)
+#define aB5    (1<<5)
+#define aB6    (1<<6)
+#define aB7    (1<<7)
+#define aB8    (1<<8)
+#define aB9    (1<<9)
+#define aB10   (1<<10)
+#define aB11   (1<<11)
+#define aB12   (1<<12)
+#define aB13   (1<<13)
+#define aB14   (1<<14)
+#define aB15   (1<<15)
+#define aB16   (1<<16)
+#define aB17   (1<<17)
+#define aB18   (1<<18)
+#define aB19   (1<<19)
+#define aB20   (1<<20)
+#define aB21   (1<<21)
+#define aB22   (1<<22)
+#define aB23   (1<<23)
+#define aB24   (1<<24)
+#define aB25   (1<<25)
+#define aB26   (1<<26)
+#define bB1    (-aB1)
+#define bB2    (-aB2)
+#define bB3    (-aB3)
+#define bB4    (-aB4)
+#define bB5    (-aB5)
+#define bB6    (-aB6)
+#define bB7    (-aB7)
+#define bB8    (-aB8)
+#define bB9    (-aB9)
+#define bB10   (-aB10)
+#define bB11   (-aB11)
+#define bB12   (-aB12)
+#define bB13   (-aB13)
+#define bB14   (-aB14)
+#define bB15   (-aB15)
+#define bB16   (-aB16)
+#define bB17   (-aB17)
+#define bB18   (-aB18)
+#define bB19   (-aB19)
+#define bB20   (-aB20)
+#define bB21   (-aB21)
+#define bB22   (-aB22)
+#define bB23   (-aB23)
+#define bB24   (-aB24)
+#define bB25   (-aB25)
+#define bB26   (-aB26)
+#define cB1    (aB1-1)
+#define cB2    (aB2-1)
+#define cB3    (aB3-1)
+#define cB4    (aB4-1)
+#define cB5    (aB5-1)
+#define cB6    (aB6-1)
+#define cB7    (aB7-1)
+#define cB8    (aB8-1)
+#define cB9    (aB9-1)
+#define cB10   (aB10-1)
+#define cB11   (aB11-1)
+#define cB12   (aB12-1)
+#define cB13   (aB13-1)
+#define cB14   (aB14-1)
+#define cB15   (aB15-1)
+#define cB16   (aB16-1)
+#define cB17   (aB17-1)
+#define cB18   (aB18-1)
+#define cB19   (aB19-1)
+#define cB20   (aB20-1)
+#define cB21   (aB21-1)
+#define cB22   (aB22-1)
+#define cB23   (aB23-1)
+#define cB24   (aB24-1)
+#define cB25   (aB25-1)
+#define cB26   (aB26-1)
+#define dB1    (-aB1+1)
+#define dB2    (-aB2+1)
+#define dB3    (-aB3+1)
+#define dB4    (-aB4+1)
+#define dB5    (-aB5+1)
+#define dB6    (-aB6+1)
+#define dB7    (-aB7+1)
+#define dB8    (-aB8+1)
+#define dB9    (-aB9+1)
+#define dB10   (-aB10+1)
+#define dB11   (-aB11+1)
+#define dB12   (-aB12+1)
+#define dB13   (-aB13+1)
+#define dB14   (-aB14+1)
+#define dB15   (-aB15+1)
+#define dB16   (-aB16+1)
+#define dB17   (-aB17+1)
+#define dB18   (-aB18+1)
+#define dB19   (-aB19+1)
+#define dB20   (-aB20+1)
+#define dB21   (-aB21+1)
+#define dB22   (-aB22+1)
+#define dB23   (-aB23+1)
+#define dB24   (-aB24+1)
+#define dB25   (-aB25+1)
+#define dB26   (-aB26+1)
+
+#define add(a, b)              $(a + b)
+#define sub(a, b)              $(a - b)
+#define rsb(a, b)              $(b - a)
+#define mul(a, b)              $(a * b)
+#define div(a, b)              $(a / b)
+#define rem(a, b)              $(a % b)
+#define and(a, b)              $(a & b)
+#define or(a, b)               $(a | b)
+#define xor(a, b)              $(a ^ b)
+
+#define alu2(N, X, L, R, V)                                    \
+       movi %r1 L                                              \
+       N##i %r0 %r1 R                                          \
+       beqi X %r0 V                                            \
+       calli @abort                                            \
+X:
+#define alu1(N, M)                                             \
+       alu2(N, N##M##1, 3, $(M##1), N(3, M##1))                \
+       alu2(N, N##M##2, 3, $(M##2), N(3, M##2))                \
+       alu2(N, N##M##3, 3, $(M##3), N(3, M##3))                \
+       alu2(N, N##M##4, 3, $(M##4), N(3, M##4))                \
+       alu2(N, N##M##5, 3, $(M##5), N(3, M##5))                \
+       alu2(N, N##M##6, 3, $(M##6), N(3, M##6))                \
+       alu2(N, N##M##7, 3, $(M##7), N(3, M##7))                \
+       alu2(N, N##M##8, 3, $(M##8), N(3, M##8))                \
+       alu2(N, N##M##9, 3, $(M##9), N(3, M##9))                \
+       alu2(N, N##M##10, 3, $(M##10), N(3, M##10))             \
+       alu2(N, N##M##11, 3, $(M##11), N(3, M##11))             \
+       alu2(N, N##M##12, 3, $(M##12), N(3, M##12))             \
+       alu2(N, N##M##13, 3, $(M##13), N(3, M##13))             \
+       alu2(N, N##M##14, 3, $(M##14), N(3, M##14))             \
+       alu2(N, N##M##15, 3, $(M##15), N(3, M##15))             \
+       alu2(N, N##M##16, 3, $(M##16), N(3, M##16))             \
+       alu2(N, N##M##17, 3, $(M##17), N(3, M##17))             \
+       alu2(N, N##M##18, 3, $(M##18), N(3, M##18))             \
+       alu2(N, N##M##19, 3, $(M##19), N(3, M##19))             \
+       alu2(N, N##M##20, 3, $(M##20), N(3, M##20))             \
+       alu2(N, N##M##21, 3, $(M##21), N(3, M##21))             \
+       alu2(N, N##M##22, 3, $(M##22), N(3, M##22))             \
+       alu2(N, N##M##23, 3, $(M##23), N(3, M##23))             \
+       alu2(N, N##M##24, 3, $(M##24), N(3, M##24))             \
+       alu2(N, N##M##25, 3, $(M##25), N(3, M##25))             \
+       alu2(N, N##M##26, 3, $(M##26), N(3, M##26))
+
+#define alu(N)                                                 \
+       alu1(N, aB)                                             \
+       alu1(N, bB)                                             \
+       alu1(N, cB)                                             \
+       alu1(N, dB)
+
+#define _lsh(N)                                                        \
+       alu2(lsh, L##N, 3, N, $(3<<N))
+#define _rsh(N)                                                        \
+       alu2(rsh, R##N, $(1<<63), N, $((1<<63)>>N))
+
+#if __WORDSIZE == 32
+#  define xsh64(X)                     /**/
+#else
+#  define xsh64(X)                                             \
+       _##X##sh(32)                                            \
+       _##X##sh(33)                                            \
+       _##X##sh(34)                                            \
+       _##X##sh(35)                                            \
+       _##X##sh(36)                                            \
+       _##X##sh(37)                                            \
+       _##X##sh(38)                                            \
+       _##X##sh(39)                                            \
+       _##X##sh(40)                                            \
+       _##X##sh(41)                                            \
+       _##X##sh(42)                                            \
+       _##X##sh(43)                                            \
+       _##X##sh(44)                                            \
+       _##X##sh(45)                                            \
+       _##X##sh(46)                                            \
+       _##X##sh(47)                                            \
+       _##X##sh(48)                                            \
+       _##X##sh(49)                                            \
+       _##X##sh(50)                                            \
+       _##X##sh(51)                                            \
+       _##X##sh(52)                                            \
+       _##X##sh(53)                                            \
+       _##X##sh(54)                                            \
+       _##X##sh(55)                                            \
+       _##X##sh(56)                                            \
+       _##X##sh(57)                                            \
+       _##X##sh(58)                                            \
+       _##X##sh(59)                                            \
+       _##X##sh(60)                                            \
+       _##X##sh(61)                                            \
+       _##X##sh(62)                                            \
+       _##X##sh(63)
+#endif
+
+#define xsh(X)                                                 \
+       _##X##sh(0)                                             \
+       _##X##sh(1)                                             \
+       _##X##sh(2)                                             \
+       _##X##sh(3)                                             \
+       _##X##sh(4)                                             \
+       _##X##sh(5)                                             \
+       _##X##sh(6)                                             \
+       _##X##sh(7)                                             \
+       _##X##sh(8)                                             \
+       _##X##sh(9)                                             \
+       _##X##sh(10)                                            \
+       _##X##sh(11)                                            \
+       _##X##sh(12)                                            \
+       _##X##sh(13)                                            \
+       _##X##sh(14)                                            \
+       _##X##sh(15)                                            \
+       _##X##sh(16)                                            \
+       _##X##sh(17)                                            \
+       _##X##sh(18)                                            \
+       _##X##sh(19)                                            \
+       _##X##sh(20)                                            \
+       _##X##sh(21)                                            \
+       _##X##sh(22)                                            \
+       _##X##sh(23)                                            \
+       _##X##sh(24)                                            \
+       _##X##sh(25)                                            \
+       _##X##sh(26)                                            \
+       _##X##sh(27)                                            \
+       _##X##sh(28)                                            \
+       _##X##sh(29)                                            \
+       _##X##sh(30)                                            \
+       _##X##sh(31)                                            \
+       xsh64(X)
+
+#define lsh()                                                  \
+       xsh(l)
+#define rsh()                                                  \
+       xsh(r)
+
+#define reset(V)                                               \
+       prepare                                                 \
+               pushargi buf                                    \
+               pushargi V                                      \
+               pushargi $(M64 + 8)                             \
+       finishi @memset
+
+#define stx(T, N, O, V)                                                \
+       movi %r0 V                                              \
+       stxi##T O %v0 %r0
+#define stx8(T, M, V)                                          \
+       stx(T, 3, $(M##B3), V)                                  \
+       stx(T, 4, $(M##B4), V)                                  \
+       stx(T, 5, $(M##B5), V)                                  \
+       stx(T, 6, $(M##B6), V)                                  \
+       stx(T, 7, $(M##B7), V)                                  \
+       stx(T, 8, $(M##B8), V)                                  \
+       stx(T, 9, $(M##B9), V)                                  \
+       stx(T, 10, $(M##B10), V)                                \
+       stx(T, 11, $(M##B11), V)                                \
+       stx(T, 12, $(M##B12), V)                                \
+       stx(T, 13, $(M##B13), V)                                \
+       stx(T, 14, $(M##B14), V)                                \
+       stx(T, 15, $(M##B15), V)                                \
+       stx(T, 16, $(M##B16), V)                                \
+       stx(T, 17, $(M##B17), V)                                \
+       stx(T, 18, $(M##B18), V)                                \
+       stx(T, 19, $(M##B19), V)                                \
+       stx(T, 20, $(M##B20), V)                                \
+       stx(T, 21, $(M##B21), V)                                \
+       stx(T, 22, $(M##B22), V)                                \
+       stx(T, 23, $(M##B23), V)                                \
+       stx(T, 24, $(M##B24), V)                                \
+       stx(T, 25, $(M##B25), V)                                \
+       stx(T, 26, $(M##B26), V)
+#define stx4(T, M, V)                                          \
+       stx(T, 2, $(M##B2), V)                                  \
+       stx8(T, M, V)
+#define stx2(T, M, V)                                          \
+       stx(T, 1, $(M##B1), V)                                  \
+       stx4(T, M, V)
+#define ldx(T, N, M, O, V)                                     \
+       movi %r0 0                                              \
+       ldxi##T %r0 %v0 O                                       \
+       beqi ldx##T##N##M %r0 V                                 \
+       calli @abort                                            \
+ldx##T##N##M:
+#define ldx8(T, M, V)                                          \
+       ldx(T, 3, M, $(M##B3), V)                               \
+       ldx(T, 4, M, $(M##B4), V)                               \
+       ldx(T, 5, M, $(M##B5), V)                               \
+       ldx(T, 6, M, $(M##B6), V)                               \
+       ldx(T, 7, M, $(M##B7), V)                               \
+       ldx(T, 8, M, $(M##B8), V)                               \
+       ldx(T, 9, M, $(M##B9), V)                               \
+       ldx(T, 10, M, $(M##B10), V)                             \
+       ldx(T, 11, M, $(M##B11), V)                             \
+       ldx(T, 12, M, $(M##B12), V)                             \
+       ldx(T, 13, M, $(M##B13), V)                             \
+       ldx(T, 14, M, $(M##B14), V)                             \
+       ldx(T, 15, M, $(M##B15), V)                             \
+       ldx(T, 16, M, $(M##B16), V)                             \
+       ldx(T, 17, M, $(M##B17), V)                             \
+       ldx(T, 18, M, $(M##B18), V)                             \
+       ldx(T, 19, M, $(M##B19), V)                             \
+       ldx(T, 20, M, $(M##B20), V)                             \
+       ldx(T, 21, M, $(M##B21), V)                             \
+       ldx(T, 22, M, $(M##B22), V)                             \
+       ldx(T, 23, M, $(M##B23), V)                             \
+       ldx(T, 24, M, $(M##B24), V)                             \
+       ldx(T, 25, M, $(M##B25), V)                             \
+       ldx(T, 26, M, $(M##B26), V)
+#define ldx4(T, M, V)                                          \
+       ldx(T, 2, M, $(M##B2), V)                               \
+       ldx8(T, M, V)
+#define ldx2(T, M, V)                                          \
+       ldx(T, 1, M, $(M##B1), V)                               \
+       ldx4(T, M, V)
+
+#define stf(T, N, O, V)                                                \
+       movi##T %f0 V                                           \
+       stxi##T O %v0 %f0
+#define stf8(T, M, V)                                          \
+       stf(T, 3, $(M##B3), V)                                  \
+       stf(T, 4, $(M##B4), V)                                  \
+       stf(T, 5, $(M##B5), V)                                  \
+       stf(T, 6, $(M##B6), V)                                  \
+       stf(T, 7, $(M##B7), V)                                  \
+       stf(T, 8, $(M##B8), V)                                  \
+       stf(T, 9, $(M##B9), V)                                  \
+       stf(T, 10, $(M##B10), V)                                \
+       stf(T, 11, $(M##B11), V)                                \
+       stf(T, 12, $(M##B12), V)                                \
+       stf(T, 13, $(M##B13), V)                                \
+       stf(T, 14, $(M##B14), V)                                \
+       stf(T, 15, $(M##B15), V)                                \
+       stf(T, 16, $(M##B16), V)                                \
+       stf(T, 17, $(M##B17), V)                                \
+       stf(T, 18, $(M##B18), V)                                \
+       stf(T, 19, $(M##B19), V)                                \
+       stf(T, 20, $(M##B20), V)                                \
+       stf(T, 21, $(M##B21), V)                                \
+       stf(T, 22, $(M##B22), V)                                \
+       stf(T, 23, $(M##B23), V)                                \
+       stf(T, 24, $(M##B24), V)                                \
+       stf(T, 25, $(M##B25), V)                                \
+       stf(T, 26, $(M##B26), V)
+#define stf4(T, M, V)                                          \
+       stf(T, 2, $(M##B2), V)                                  \
+       stf8(T, M, V)
+#define ldf(T, N, M, O, V)                                     \
+       movi##T %f0 0                                           \
+       ldxi##T %f0 %v0 O                                       \
+       beqi##T ldf##T##N##M %f0 V                              \
+       calli @abort                                            \
+ldf##T##N##M:
+#define ldf8(T, M, V)                                          \
+       ldf(T, 3, M, $(M##B3), V)                               \
+       ldf(T, 4, M, $(M##B4), V)                               \
+       ldf(T, 5, M, $(M##B5), V)                               \
+       ldf(T, 6, M, $(M##B6), V)                               \
+       ldf(T, 7, M, $(M##B7), V)                               \
+       ldf(T, 8, M, $(M##B8), V)                               \
+       ldf(T, 9, M, $(M##B9), V)                               \
+       ldf(T, 10, M, $(M##B10), V)                             \
+       ldf(T, 11, M, $(M##B11), V)                             \
+       ldf(T, 12, M, $(M##B12), V)                             \
+       ldf(T, 13, M, $(M##B13), V)                             \
+       ldf(T, 14, M, $(M##B14), V)                             \
+       ldf(T, 15, M, $(M##B15), V)                             \
+       ldf(T, 16, M, $(M##B16), V)                             \
+       ldf(T, 17, M, $(M##B17), V)                             \
+       ldf(T, 18, M, $(M##B18), V)                             \
+       ldf(T, 19, M, $(M##B19), V)                             \
+       ldf(T, 20, M, $(M##B20), V)                             \
+       ldf(T, 21, M, $(M##B21), V)                             \
+       ldf(T, 22, M, $(M##B22), V)                             \
+       ldf(T, 23, M, $(M##B23), V)                             \
+       ldf(T, 24, M, $(M##B24), V)                             \
+       ldf(T, 25, M, $(M##B25), V)                             \
+       ldf(T, 26, M, $(M##B26), V)
+#define ldf4(T, M, V)                                          \
+       ldf(T, 2, M, $(M##B2), V)                               \
+       ldf8(T, M, V)
+
+#define ldst_c()                                               \
+       reset(0xa5)                                             \
+       movi %v0 buf                                            \
+       stx2(_c, a, 0x5a)                                       \
+       ldx2(_c, a, 0x5a)                                       \
+       reset(0xa5)                                             \
+       movi %v0 $(buf + M64)                                   \
+       stx2(_c, b, 0x5a)                                       \
+       ldx2(_c, b, 0x5a)
+#define ldst_uc()                                              \
+       reset(0xa5)                                             \
+       movi %v0 buf                                            \
+       stx2(_c, a, 0x5a)                                       \
+       ldx2(_uc, a, 0x5a)                                      \
+       movi %v0 $(buf + M64)                                   \
+       stx2(_c, b, 0x5a)                                       \
+       ldx2(_uc, b, 0x5a)
+#define ldst_s()                                               \
+       reset(0xa5)                                             \
+       movi %v0 buf                                            \
+       stx2(_s, a, 0x5a5a)                                     \
+       ldx2(_s, a, 0x5a5a)                                     \
+       reset(0xa5)                                             \
+       movi %v0 $(buf + M64)                                   \
+       stx2(_s, b, 0x5a5a)                                     \
+       ldx2(_s, b, 0x5a5a)
+#define ldst_us()                                              \
+       reset(0xa5)                                             \
+       movi %v0 buf                                            \
+       stx2(_s, a, 0x5a5a)                                     \
+       ldx2(_us, a, 0x5a5a)                                    \
+       reset(0xa5)                                             \
+       movi %v0 $(buf + M64)                                   \
+       stx2(_s, b, 0x5a5a)                                     \
+       ldx2(_us, b, 0x5a5a)
+#define ldst_i()                                               \
+       reset(0xa5)                                             \
+       movi %v0 buf                                            \
+       stx4(_i, a, 0x5a5a5a5a)                                 \
+       ldx4(_i, a, 0x5a5a5a5a)                                 \
+       reset(0xa5)                                             \
+       movi %v0 $(buf + M64)                                   \
+       stx4(_i, b, 0x5a5a5a5a)                                 \
+       ldx4(_i, b, 0x5a5a5a5a)
+#define ldst_ui()                                              \
+       reset(0xa5)                                             \
+       movi %v0 buf                                            \
+       stx4(_i, a, 0x5a5a5a5a)                                 \
+       ldx4(_ui, a, 0x5a5a5a5a)                                \
+       reset(0xa5)                                             \
+       movi %v0 $(buf + M64)                                   \
+       stx4(_i, b, 0x5a5a5a5a)                                 \
+       ldx4(_ui, b, 0x5a5a5a5a)
+#define ldst_l()                                               \
+       reset(0xa5)                                             \
+       movi %v0 buf                                            \
+       stx8(_l, a, 0x5a5a5a5a5a5a5a5a)                         \
+       ldx8(_l, a, 0x5a5a5a5a5a5a5a5a)                         \
+       reset(0xa5)                                             \
+       movi %v0 $(buf + M64)                                   \
+       stx8(_l, b, 0x5a5a5a5a5a5a5a5a)                         \
+       ldx8(_l, b, 0x5a5a5a5a5a5a5a5a)
+#define ldst_f()                                               \
+       reset(0xa5)                                             \
+       movi %v0 buf                                            \
+       stf4(_f, a, 0.5)                                        \
+       ldf4(_f, a, 0.5)                                        \
+       reset(0xa5)                                             \
+       movi %v0 $(buf + M64)                                   \
+       stf4(_f, b, 0.5)                                        \
+       ldf4(_f, b, 0.5)
+#define ldst_d()                                               \
+       reset(0xa5)                                             \
+       movi %v0 buf                                            \
+       stf8(_d, a, 0.5)                                        \
+       ldf8(_d, a, 0.5)                                        \
+       reset(0xa5)                                             \
+       movi %v0 $(buf + M64)                                   \
+       stf8(_d, b, 0.5)                                        \
+       ldf8(_d, b, 0.5)
+
+.data          67112960
+buf:
+.size          M64
+.size          8
+ok:
+.c             "ok"
+
+.code
+       prolog
+
+       alu(add)
+       alu(sub)
+       alu(rsb)
+       alu(mul)
+       alu(div)
+       alu(rem)
+       lsh()
+       rsh()
+       alu(and)
+       alu(or)
+       alu(xor)
+       ldst_c()
+       ldst_uc()
+       ldst_s()
+       ldst_us()
+       ldst_i()
+#if __WORDSIZE == 64
+       ldst_ui()
+       ldst_l()
+#endif
+       ldst_f()
+       ldst_d()
+
+       prepare
+               pushargi ok
+       finishi @puts
+       ret
+       epilog
diff --git a/deps/lightning/check/ranger.ok b/deps/lightning/check/ranger.ok
new file mode 100644 (file)
index 0000000..9766475
--- /dev/null
@@ -0,0 +1 @@
+ok
diff --git a/deps/lightning/check/ranger.tst b/deps/lightning/check/ranger.tst
new file mode 100644 (file)
index 0000000..de80196
--- /dev/null
@@ -0,0 +1,507 @@
+#define M64    67108864
+
+#define aB1    (1<<1)
+#define aB2    (1<<2)
+#define aB3    (1<<3)
+#define aB4    (1<<4)
+#define aB5    (1<<5)
+#define aB6    (1<<6)
+#define aB7    (1<<7)
+#define aB8    (1<<8)
+#define aB9    (1<<9)
+#define aB10   (1<<10)
+#define aB11   (1<<11)
+#define aB12   (1<<12)
+#define aB13   (1<<13)
+#define aB14   (1<<14)
+#define aB15   (1<<15)
+#define aB16   (1<<16)
+#define aB17   (1<<17)
+#define aB18   (1<<18)
+#define aB19   (1<<19)
+#define aB20   (1<<20)
+#define aB21   (1<<21)
+#define aB22   (1<<22)
+#define aB23   (1<<23)
+#define aB24   (1<<24)
+#define aB25   (1<<25)
+#define aB26   (1<<26)
+#define bB1    (-aB1)
+#define bB2    (-aB2)
+#define bB3    (-aB3)
+#define bB4    (-aB4)
+#define bB5    (-aB5)
+#define bB6    (-aB6)
+#define bB7    (-aB7)
+#define bB8    (-aB8)
+#define bB9    (-aB9)
+#define bB10   (-aB10)
+#define bB11   (-aB11)
+#define bB12   (-aB12)
+#define bB13   (-aB13)
+#define bB14   (-aB14)
+#define bB15   (-aB15)
+#define bB16   (-aB16)
+#define bB17   (-aB17)
+#define bB18   (-aB18)
+#define bB19   (-aB19)
+#define bB20   (-aB20)
+#define bB21   (-aB21)
+#define bB22   (-aB22)
+#define bB23   (-aB23)
+#define bB24   (-aB24)
+#define bB25   (-aB25)
+#define bB26   (-aB26)
+#define cB1    (aB1-1)
+#define cB2    (aB2-1)
+#define cB3    (aB3-1)
+#define cB4    (aB4-1)
+#define cB5    (aB5-1)
+#define cB6    (aB6-1)
+#define cB7    (aB7-1)
+#define cB8    (aB8-1)
+#define cB9    (aB9-1)
+#define cB10   (aB10-1)
+#define cB11   (aB11-1)
+#define cB12   (aB12-1)
+#define cB13   (aB13-1)
+#define cB14   (aB14-1)
+#define cB15   (aB15-1)
+#define cB16   (aB16-1)
+#define cB17   (aB17-1)
+#define cB18   (aB18-1)
+#define cB19   (aB19-1)
+#define cB20   (aB20-1)
+#define cB21   (aB21-1)
+#define cB22   (aB22-1)
+#define cB23   (aB23-1)
+#define cB24   (aB24-1)
+#define cB25   (aB25-1)
+#define cB26   (aB26-1)
+#define dB1    (-aB1+1)
+#define dB2    (-aB2+1)
+#define dB3    (-aB3+1)
+#define dB4    (-aB4+1)
+#define dB5    (-aB5+1)
+#define dB6    (-aB6+1)
+#define dB7    (-aB7+1)
+#define dB8    (-aB8+1)
+#define dB9    (-aB9+1)
+#define dB10   (-aB10+1)
+#define dB11   (-aB11+1)
+#define dB12   (-aB12+1)
+#define dB13   (-aB13+1)
+#define dB14   (-aB14+1)
+#define dB15   (-aB15+1)
+#define dB16   (-aB16+1)
+#define dB17   (-aB17+1)
+#define dB18   (-aB18+1)
+#define dB19   (-aB19+1)
+#define dB20   (-aB20+1)
+#define dB21   (-aB21+1)
+#define dB22   (-aB22+1)
+#define dB23   (-aB23+1)
+#define dB24   (-aB24+1)
+#define dB25   (-aB25+1)
+#define dB26   (-aB26+1)
+
+#define add(a, b)              $(a + b)
+#define sub(a, b)              $(a - b)
+#define rsb(a, b)              $(b - a)
+#define mul(a, b)              $(a * b)
+#define div(a, b)              $(a / b)
+#define rem(a, b)              $(a % b)
+#define and(a, b)              $(a & b)
+#define or(a, b)               $(a | b)
+#define xor(a, b)              $(a ^ b)
+
+#define alu2(N, X, L, R, V)                                    \
+       movi %r1 L                                              \
+       movi %r2 R                                              \
+       N##r %r0 %r1 %r2                                        \
+       beqi X %r0 V                                            \
+       calli @abort                                            \
+X:
+#define alu1(N, M)                                             \
+       alu2(N, N##M##1, 3, $(M##1), N(3, M##1))                \
+       alu2(N, N##M##2, 3, $(M##2), N(3, M##2))                \
+       alu2(N, N##M##3, 3, $(M##3), N(3, M##3))                \
+       alu2(N, N##M##4, 3, $(M##4), N(3, M##4))                \
+       alu2(N, N##M##5, 3, $(M##5), N(3, M##5))                \
+       alu2(N, N##M##6, 3, $(M##6), N(3, M##6))                \
+       alu2(N, N##M##7, 3, $(M##7), N(3, M##7))                \
+       alu2(N, N##M##8, 3, $(M##8), N(3, M##8))                \
+       alu2(N, N##M##9, 3, $(M##9), N(3, M##9))                \
+       alu2(N, N##M##10, 3, $(M##10), N(3, M##10))             \
+       alu2(N, N##M##11, 3, $(M##11), N(3, M##11))             \
+       alu2(N, N##M##12, 3, $(M##12), N(3, M##12))             \
+       alu2(N, N##M##13, 3, $(M##13), N(3, M##13))             \
+       alu2(N, N##M##14, 3, $(M##14), N(3, M##14))             \
+       alu2(N, N##M##15, 3, $(M##15), N(3, M##15))             \
+       alu2(N, N##M##16, 3, $(M##16), N(3, M##16))             \
+       alu2(N, N##M##17, 3, $(M##17), N(3, M##17))             \
+       alu2(N, N##M##18, 3, $(M##18), N(3, M##18))             \
+       alu2(N, N##M##19, 3, $(M##19), N(3, M##19))             \
+       alu2(N, N##M##20, 3, $(M##20), N(3, M##20))             \
+       alu2(N, N##M##21, 3, $(M##21), N(3, M##21))             \
+       alu2(N, N##M##22, 3, $(M##22), N(3, M##22))             \
+       alu2(N, N##M##23, 3, $(M##23), N(3, M##23))             \
+       alu2(N, N##M##24, 3, $(M##24), N(3, M##24))             \
+       alu2(N, N##M##25, 3, $(M##25), N(3, M##25))             \
+       alu2(N, N##M##26, 3, $(M##26), N(3, M##26))
+
+#define alu(N)                                                 \
+       alu1(N, aB)                                             \
+       alu1(N, bB)                                             \
+       alu1(N, cB)                                             \
+       alu1(N, dB)
+
+#define _lsh(N)                                                        \
+       alu2(lsh, L##N, 3, N, $(3<<N))
+#define _rsh(N)                                                        \
+       alu2(rsh, R##N, $(1<<63), N, $((1<<63)>>N))
+
+#if __WORDSIZE == 32
+#  define xsh64(X)                     /**/
+#else
+#  define xsh64(X)                                             \
+       _##X##sh(32)                                            \
+       _##X##sh(33)                                            \
+       _##X##sh(34)                                            \
+       _##X##sh(35)                                            \
+       _##X##sh(36)                                            \
+       _##X##sh(37)                                            \
+       _##X##sh(38)                                            \
+       _##X##sh(39)                                            \
+       _##X##sh(40)                                            \
+       _##X##sh(41)                                            \
+       _##X##sh(42)                                            \
+       _##X##sh(43)                                            \
+       _##X##sh(44)                                            \
+       _##X##sh(45)                                            \
+       _##X##sh(46)                                            \
+       _##X##sh(47)                                            \
+       _##X##sh(48)                                            \
+       _##X##sh(49)                                            \
+       _##X##sh(50)                                            \
+       _##X##sh(51)                                            \
+       _##X##sh(52)                                            \
+       _##X##sh(53)                                            \
+       _##X##sh(54)                                            \
+       _##X##sh(55)                                            \
+       _##X##sh(56)                                            \
+       _##X##sh(57)                                            \
+       _##X##sh(58)                                            \
+       _##X##sh(59)                                            \
+       _##X##sh(60)                                            \
+       _##X##sh(61)                                            \
+       _##X##sh(62)                                            \
+       _##X##sh(63)
+#endif
+
+#define xsh(X)                                                 \
+       _##X##sh(0)                                             \
+       _##X##sh(1)                                             \
+       _##X##sh(2)                                             \
+       _##X##sh(3)                                             \
+       _##X##sh(4)                                             \
+       _##X##sh(5)                                             \
+       _##X##sh(6)                                             \
+       _##X##sh(7)                                             \
+       _##X##sh(8)                                             \
+       _##X##sh(9)                                             \
+       _##X##sh(10)                                            \
+       _##X##sh(11)                                            \
+       _##X##sh(12)                                            \
+       _##X##sh(13)                                            \
+       _##X##sh(14)                                            \
+       _##X##sh(15)                                            \
+       _##X##sh(16)                                            \
+       _##X##sh(17)                                            \
+       _##X##sh(18)                                            \
+       _##X##sh(19)                                            \
+       _##X##sh(20)                                            \
+       _##X##sh(21)                                            \
+       _##X##sh(22)                                            \
+       _##X##sh(23)                                            \
+       _##X##sh(24)                                            \
+       _##X##sh(25)                                            \
+       _##X##sh(26)                                            \
+       _##X##sh(27)                                            \
+       _##X##sh(28)                                            \
+       _##X##sh(29)                                            \
+       _##X##sh(30)                                            \
+       _##X##sh(31)                                            \
+       xsh64(X)
+
+#define lsh()                                                  \
+       xsh(l)
+#define rsh()                                                  \
+       xsh(r)
+
+#define reset(V)                                               \
+       prepare                                                 \
+               pushargi buf                                    \
+               pushargi V                                      \
+               pushargi $(M64 + 8)                             \
+       finishi @memset
+
+#define stx(T, N, O, V)                                                \
+       movi %r0 V                                              \
+       movi %r1 O                                              \
+       stxr##T %r1 %v0 %r0
+#define stx8(T, M, V)                                          \
+       stx(T, 3, $(M##B3), V)                                  \
+       stx(T, 4, $(M##B4), V)                                  \
+       stx(T, 5, $(M##B5), V)                                  \
+       stx(T, 6, $(M##B6), V)                                  \
+       stx(T, 7, $(M##B7), V)                                  \
+       stx(T, 8, $(M##B8), V)                                  \
+       stx(T, 9, $(M##B9), V)                                  \
+       stx(T, 10, $(M##B10), V)                                \
+       stx(T, 11, $(M##B11), V)                                \
+       stx(T, 12, $(M##B12), V)                                \
+       stx(T, 13, $(M##B13), V)                                \
+       stx(T, 14, $(M##B14), V)                                \
+       stx(T, 15, $(M##B15), V)                                \
+       stx(T, 16, $(M##B16), V)                                \
+       stx(T, 17, $(M##B17), V)                                \
+       stx(T, 18, $(M##B18), V)                                \
+       stx(T, 19, $(M##B19), V)                                \
+       stx(T, 20, $(M##B20), V)                                \
+       stx(T, 21, $(M##B21), V)                                \
+       stx(T, 22, $(M##B22), V)                                \
+       stx(T, 23, $(M##B23), V)                                \
+       stx(T, 24, $(M##B24), V)                                \
+       stx(T, 25, $(M##B25), V)                                \
+       stx(T, 26, $(M##B26), V)
+#define stx4(T, M, V)                                          \
+       stx(T, 2, $(M##B2), V)                                  \
+       stx8(T, M, V)
+#define stx2(T, M, V)                                          \
+       stx(T, 1, $(M##B1), V)                                  \
+       stx4(T, M, V)
+#define ldx(T, N, M, O, V)                                     \
+       movi %r0 0                                              \
+       ldxi##T %r0 %v0 O                                       \
+       beqi ldx##T##N##M %r0 V                                 \
+       calli @abort                                            \
+ldx##T##N##M:
+#define ldx8(T, M, V)                                          \
+       ldx(T, 3, M, $(M##B3), V)                               \
+       ldx(T, 4, M, $(M##B4), V)                               \
+       ldx(T, 5, M, $(M##B5), V)                               \
+       ldx(T, 6, M, $(M##B6), V)                               \
+       ldx(T, 7, M, $(M##B7), V)                               \
+       ldx(T, 8, M, $(M##B8), V)                               \
+       ldx(T, 9, M, $(M##B9), V)                               \
+       ldx(T, 10, M, $(M##B10), V)                             \
+       ldx(T, 11, M, $(M##B11), V)                             \
+       ldx(T, 12, M, $(M##B12), V)                             \
+       ldx(T, 13, M, $(M##B13), V)                             \
+       ldx(T, 14, M, $(M##B14), V)                             \
+       ldx(T, 15, M, $(M##B15), V)                             \
+       ldx(T, 16, M, $(M##B16), V)                             \
+       ldx(T, 17, M, $(M##B17), V)                             \
+       ldx(T, 18, M, $(M##B18), V)                             \
+       ldx(T, 19, M, $(M##B19), V)                             \
+       ldx(T, 20, M, $(M##B20), V)                             \
+       ldx(T, 21, M, $(M##B21), V)                             \
+       ldx(T, 22, M, $(M##B22), V)                             \
+       ldx(T, 23, M, $(M##B23), V)                             \
+       ldx(T, 24, M, $(M##B24), V)                             \
+       ldx(T, 25, M, $(M##B25), V)                             \
+       ldx(T, 26, M, $(M##B26), V)
+#define ldx4(T, M, V)                                          \
+       ldx(T, 2, M, $(M##B2), V)                               \
+       ldx8(T, M, V)
+#define ldx2(T, M, V)                                          \
+       ldx(T, 1, M, $(M##B1), V)                               \
+       ldx4(T, M, V)
+
+#define stf(T, N, O, V)                                                \
+       movi##T %f0 V                                           \
+       movi %r0 O                                              \
+       stxr##T %r0 %v0 %f0
+#define stf8(T, M, V)                                          \
+       stf(T, 3, $(M##B3), V)                                  \
+       stf(T, 4, $(M##B4), V)                                  \
+       stf(T, 5, $(M##B5), V)                                  \
+       stf(T, 6, $(M##B6), V)                                  \
+       stf(T, 7, $(M##B7), V)                                  \
+       stf(T, 8, $(M##B8), V)                                  \
+       stf(T, 9, $(M##B9), V)                                  \
+       stf(T, 10, $(M##B10), V)                                \
+       stf(T, 11, $(M##B11), V)                                \
+       stf(T, 12, $(M##B12), V)                                \
+       stf(T, 13, $(M##B13), V)                                \
+       stf(T, 14, $(M##B14), V)                                \
+       stf(T, 15, $(M##B15), V)                                \
+       stf(T, 16, $(M##B16), V)                                \
+       stf(T, 17, $(M##B17), V)                                \
+       stf(T, 18, $(M##B18), V)                                \
+       stf(T, 19, $(M##B19), V)                                \
+       stf(T, 20, $(M##B20), V)                                \
+       stf(T, 21, $(M##B21), V)                                \
+       stf(T, 22, $(M##B22), V)                                \
+       stf(T, 23, $(M##B23), V)                                \
+       stf(T, 24, $(M##B24), V)                                \
+       stf(T, 25, $(M##B25), V)                                \
+       stf(T, 26, $(M##B26), V)
+#define stf4(T, M, V)                                          \
+       stf(T, 2, $(M##B2), V)                                  \
+       stf8(T, M, V)
+#define ldf(T, N, M, O, V)                                     \
+       movi##T %f0 0                                           \
+       ldxi##T %f0 %v0 O                                       \
+       beqi##T ldf##T##N##M %f0 V                              \
+       calli @abort                                            \
+ldf##T##N##M:
+#define ldf8(T, M, V)                                          \
+       ldf(T, 3, M, $(M##B3), V)                               \
+       ldf(T, 4, M, $(M##B4), V)                               \
+       ldf(T, 5, M, $(M##B5), V)                               \
+       ldf(T, 6, M, $(M##B6), V)                               \
+       ldf(T, 7, M, $(M##B7), V)                               \
+       ldf(T, 8, M, $(M##B8), V)                               \
+       ldf(T, 9, M, $(M##B9), V)                               \
+       ldf(T, 10, M, $(M##B10), V)                             \
+       ldf(T, 11, M, $(M##B11), V)                             \
+       ldf(T, 12, M, $(M##B12), V)                             \
+       ldf(T, 13, M, $(M##B13), V)                             \
+       ldf(T, 14, M, $(M##B14), V)                             \
+       ldf(T, 15, M, $(M##B15), V)                             \
+       ldf(T, 16, M, $(M##B16), V)                             \
+       ldf(T, 17, M, $(M##B17), V)                             \
+       ldf(T, 18, M, $(M##B18), V)                             \
+       ldf(T, 19, M, $(M##B19), V)                             \
+       ldf(T, 20, M, $(M##B20), V)                             \
+       ldf(T, 21, M, $(M##B21), V)                             \
+       ldf(T, 22, M, $(M##B22), V)                             \
+       ldf(T, 23, M, $(M##B23), V)                             \
+       ldf(T, 24, M, $(M##B24), V)                             \
+       ldf(T, 25, M, $(M##B25), V)                             \
+       ldf(T, 26, M, $(M##B26), V)
+#define ldf4(T, M, V)                                          \
+       ldf(T, 2, M, $(M##B2), V)                               \
+       ldf8(T, M, V)
+
+#define ldst_c()                                               \
+       reset(0xa5)                                             \
+       movi %v0 buf                                            \
+       stx2(_c, a, 0x5a)                                       \
+       ldx2(_c, a, 0x5a)                                       \
+       reset(0xa5)                                             \
+       movi %v0 $(buf + M64)                                   \
+       stx2(_c, b, 0x5a)                                       \
+       ldx2(_c, b, 0x5a)
+#define ldst_uc()                                              \
+       reset(0xa5)                                             \
+       movi %v0 buf                                            \
+       stx2(_c, a, 0x5a)                                       \
+       ldx2(_uc, a, 0x5a)                                      \
+       movi %v0 $(buf + M64)                                   \
+       stx2(_c, b, 0x5a)                                       \
+       ldx2(_uc, b, 0x5a)
+#define ldst_s()                                               \
+       reset(0xa5)                                             \
+       movi %v0 buf                                            \
+       stx2(_s, a, 0x5a5a)                                     \
+       ldx2(_s, a, 0x5a5a)                                     \
+       reset(0xa5)                                             \
+       movi %v0 $(buf + M64)                                   \
+       stx2(_s, b, 0x5a5a)                                     \
+       ldx2(_s, b, 0x5a5a)
+#define ldst_us()                                              \
+       reset(0xa5)                                             \
+       movi %v0 buf                                            \
+       stx2(_s, a, 0x5a5a)                                     \
+       ldx2(_us, a, 0x5a5a)                                    \
+       reset(0xa5)                                             \
+       movi %v0 $(buf + M64)                                   \
+       stx2(_s, b, 0x5a5a)                                     \
+       ldx2(_us, b, 0x5a5a)
+#define ldst_i()                                               \
+       reset(0xa5)                                             \
+       movi %v0 buf                                            \
+       stx4(_i, a, 0x5a5a5a5a)                                 \
+       ldx4(_i, a, 0x5a5a5a5a)                                 \
+       reset(0xa5)                                             \
+       movi %v0 $(buf + M64)                                   \
+       stx4(_i, b, 0x5a5a5a5a)                                 \
+       ldx4(_i, b, 0x5a5a5a5a)
+#define ldst_ui()                                              \
+       reset(0xa5)                                             \
+       movi %v0 buf                                            \
+       stx4(_i, a, 0x5a5a5a5a)                                 \
+       ldx4(_ui, a, 0x5a5a5a5a)                                \
+       reset(0xa5)                                             \
+       movi %v0 $(buf + M64)                                   \
+       stx4(_i, b, 0x5a5a5a5a)                                 \
+       ldx4(_ui, b, 0x5a5a5a5a)
+#define ldst_l()                                               \
+       reset(0xa5)                                             \
+       movi %v0 buf                                            \
+       stx8(_l, a, 0x5a5a5a5a5a5a5a5a)                         \
+       ldx8(_l, a, 0x5a5a5a5a5a5a5a5a)                         \
+       reset(0xa5)                                             \
+       movi %v0 $(buf + M64)                                   \
+       stx8(_l, b, 0x5a5a5a5a5a5a5a5a)                         \
+       ldx8(_l, b, 0x5a5a5a5a5a5a5a5a)
+#define ldst_f()                                               \
+       reset(0xa5)                                             \
+       movi %v0 buf                                            \
+       stf4(_f, a, 0.5)                                        \
+       ldf4(_f, a, 0.5)                                        \
+       reset(0xa5)                                             \
+       movi %v0 $(buf + M64)                                   \
+       stf4(_f, b, 0.5)                                        \
+       ldf4(_f, b, 0.5)
+#define ldst_d()                                               \
+       reset(0xa5)                                             \
+       movi %v0 buf                                            \
+       stf8(_d, a, 0.5)                                        \
+       ldf8(_d, a, 0.5)                                        \
+       reset(0xa5)                                             \
+       movi %v0 $(buf + M64)                                   \
+       stf8(_d, b, 0.5)                                        \
+       ldf8(_d, b, 0.5)
+
+.data          67112960
+buf:
+.size          M64
+.size          8
+ok:
+.c             "ok"
+
+.code
+       prolog
+
+       alu(add)
+       alu(sub)
+       alu(rsb)
+       alu(mul)
+       alu(div)
+       alu(rem)
+       lsh()
+       rsh()
+       alu(and)
+       alu(or)
+       alu(xor)
+       ldst_c()
+       ldst_uc()
+       ldst_s()
+       ldst_us()
+       ldst_i()
+#if __WORDSIZE == 64
+       ldst_ui()
+       ldst_l()
+#endif
+       ldst_f()
+       ldst_d()
+
+       prepare
+               pushargi ok
+       finishi @puts
+       ret
+       epilog
diff --git a/deps/lightning/check/ret.ok b/deps/lightning/check/ret.ok
new file mode 100644 (file)
index 0000000..9766475
--- /dev/null
@@ -0,0 +1 @@
+ok
diff --git a/deps/lightning/check/ret.tst b/deps/lightning/check/ret.tst
new file mode 100644 (file)
index 0000000..de1d82c
--- /dev/null
@@ -0,0 +1,51 @@
+.data  16
+ok:
+.c     "ok"
+
+.code
+       jmpi main
+
+/*
+ * very simple test on purpose because otherwise it would not trigger
+ * the bug where the retr %r0 or retr_d %f0 would be omitted because
+ * the argument was already the return register, but the register end
+ * clobbered by another instruction, like the div*, and the wrong
+ * value returned because the retr* was removed and this way, lost
+ * information that the register was live at function exit.
+ */
+
+check_r0:
+       prolog
+       movi %r0 1
+       movi %r2 10
+       // on x86 this changes %rax on other arches could use %r0 as temporary
+       divi %r1 %r2 3
+       // %r0 must still be 1
+       retr %r0
+       epilog
+
+check_f0:
+       prolog
+       movi_d %f0 0.5
+       movi_d %f2 10
+       divi_d %f1 %f2 3
+       retr_d %f0
+       epilog
+
+main:
+       prolog
+       calli check_r0
+       retval %r1
+       beqi r0_ok %r1 1
+       calli @abort
+r0_ok:
+       calli check_f0
+       retval_d %f1
+       beqi_d f0_ok %f1 0.5
+       calli @abort
+f0_ok:
+       prepare
+               pushargi ok
+       finishi @puts
+       ret
+       epilog
diff --git a/deps/lightning/check/rpn.ok b/deps/lightning/check/rpn.ok
new file mode 100644 (file)
index 0000000..b686c13
--- /dev/null
@@ -0,0 +1,6 @@
+
+C:  0  10  20  30  40  50  60  70  80  90 100 
+F: 32  50  68  86 104 122 140 158 176 194 212 
+
+F: 32  50  68  86 104 122 140 158 176 194 212 
+C:  0  10  20  30  40  50  60  70  80  90 100 
diff --git a/deps/lightning/check/rpn.tst b/deps/lightning/check/rpn.tst
new file mode 100644 (file)
index 0000000..62ef8d6
--- /dev/null
@@ -0,0 +1,183 @@
+.data  256
+.$($int = 4)
+C:
+.c "\nC:"
+F:
+.c "\nF:"
+format:
+.c "%3d "
+newline:
+.c "\n"
+
+.code
+       jmpi main
+
+       name c2f
+c2f:
+       prolog
+       arg $in
+
+       allocai $(32 * $int) $index
+
+       getarg %r2 $in
+
+       // 32x9*5/+
+       movi %r0 32
+
+       // x9*5/+
+       stxi_i $index %fp %r0
+.      $($index = $index + $int)
+       movr %r0 %r2
+
+       // 9*5/+
+       stxi_i $index %fp %r0
+.      $($index = $index + $int)
+       movi %r0 9
+
+       // *5/+
+.      $($index = $index - $int)
+       ldxi_i %r1 %fp $index
+       mulr %r0 %r1 %r0
+
+       // 5/+
+       stxi_i $index %fp %r0
+.      $($index = $index + $int)
+       movi %r0 5
+
+       // /+
+.      $($index = $index - $int)
+       ldxi_i %r1 %fp $index
+       divr %r0 %r1 %r0
+
+       // +
+.      $($index = $index - $int)
+       ldxi_i %r1 %fp $index
+       addr %r0 %r1 %r0
+
+       retr %r0
+       epilog
+
+       name f2c
+f2c:
+       prolog
+       arg $in
+
+       allocai $(32 * $int) $index
+
+       getarg %r2 $in
+
+       // x32-5*9/
+       stxi_i $index %fp %r0
+.      $($index = $index + $int)
+       movr %r0 %r2
+
+       // 32-5*9/
+       stxi_i $index %fp %r0
+.      $($index = $index + $int)
+       movi %r0 32
+
+       // -5*9/
+.      $($index = $index - $int)
+       ldxi_i %r1 %fp $index
+       subr %r0 %r1 %r0
+
+       // 5*9/
+       stxi_i $index %fp %r0
+.      $($index = $index + $int)
+       movi %r0 5
+
+       // *9/
+.      $($index = $index - $int)
+       ldxi_i %r1 %fp $index
+       mulr %r0 %r1 %r0
+
+       // 9/
+       stxi_i $index %fp %r0
+.      $($index = $index + $int)
+       movi %r0 9
+
+       // /
+.      $($index = $index - $int)
+       ldxi_i %r1 %fp $index
+       divr %r0 %r1 %r0
+
+       retr %r0
+       epilog
+
+//-----------------------------------------------------------------------
+       name main
+main:
+       prolog
+
+       prepare
+               pushargi C
+               ellipsis
+       finishi @printf
+       movi %v0 0
+loopC:
+       prepare
+               pushargi format
+               ellipsis
+               pushargr %v0
+       finishi @printf
+       addi %v0 %v0 10
+       blei loopC %v0 100
+       prepare
+               pushargi F
+               ellipsis
+       finishi @printf
+       movi %v0 0
+loopC2F:
+       prepare
+               pushargr %v0
+       finishi c2f
+       retval %r0
+       prepare
+               pushargi format
+               ellipsis
+               pushargr %r0
+       finishi @printf
+       addi %v0 %v0 10
+       blei loopC2F %v0 100
+       prepare
+               pushargi newline
+               ellipsis
+       finishi @printf
+
+       prepare
+               pushargi F
+               ellipsis
+       finishi @printf
+       movi %v0 32
+loopF:
+       prepare
+               pushargi format
+               ellipsis
+               pushargr %v0
+       finishi @printf
+       addi %v0 %v0 18
+       blei loopF %v0 212
+       prepare
+               pushargi C
+               ellipsis
+       finishi @printf
+       movi %v0 32
+loopF2C:
+       prepare
+               pushargr %v0
+       finishi f2c
+       retval %r0
+       prepare
+               pushargi format
+               ellipsis
+               pushargr %r0
+       finishi @printf
+       addi %v0 %v0 18
+       blei loopF2C %v0 212
+       prepare
+               pushargi newline
+               ellipsis
+       finishi @printf
+
+       ret
+       epilog
diff --git a/deps/lightning/check/run-test b/deps/lightning/check/run-test
new file mode 100755 (executable)
index 0000000..e8369d6
--- /dev/null
@@ -0,0 +1,10 @@
+#! /bin/sh
+
+ok=`echo $1 | sed -e 's@\.\(x87\|arm\|swf\)@@'`
+$1 | tr -d \\r > $1.log
+if cmp -s $srcdir/$ok.ok $1.log; then
+  rm $1.log
+else
+  diff $srcdir/$ok.ok $1.log
+  exit 1
+fi
diff --git a/deps/lightning/check/self.c b/deps/lightning/check/self.c
new file mode 100644 (file)
index 0000000..7cfbb94
--- /dev/null
@@ -0,0 +1,136 @@
+#include <lightning.h>
+#include <stdio.h>
+#include <assert.h>
+
+int
+main(int argc, char *argv[])
+{
+    jit_state_t                *_jit;
+
+    /* Same JIT_XY are not constants */
+    init_jit(argv[0]);
+
+    _jit = jit_new_state();
+    assert(JIT_R0 == jit_r(0));
+    (void)jit_callee_save_p(JIT_R0);
+    assert(JIT_R1 == jit_r(1));
+    (void)jit_callee_save_p(JIT_R1);
+    assert(JIT_R2 == jit_r(2));
+    (void)jit_callee_save_p(JIT_R2);
+#if defined(JIT_R3)
+    assert(JIT_R3 == jit_r(3));
+    (void)jit_callee_save_p(JIT_R3);
+#  if defined(JIT_R4)
+    assert(JIT_R4 == jit_r(4));
+    (void)jit_callee_save_p(JIT_R4);
+#    if defined(JIT_R5)
+    assert(JIT_R5 == jit_r(5));
+    (void)jit_callee_save_p(JIT_R5);
+#      if defined(JIT_R6)
+    assert(JIT_R6 == jit_r(6));
+    (void)jit_callee_save_p(JIT_R6);
+#        if defined(JIT_R7)
+    assert(JIT_R7 == jit_r(7));
+    (void)jit_callee_save_p(JIT_R7);
+#        endif
+#      endif
+#    endif
+#  endif
+#endif
+    assert(JIT_V0 == jit_v(0));
+    assert(jit_callee_save_p(JIT_V0));
+    assert(JIT_V1 == jit_v(1));
+    assert(jit_callee_save_p(JIT_V1));
+    assert(JIT_V2 == jit_v(2));
+    assert(jit_callee_save_p(JIT_V2));
+#if defined(JIT_V3)
+    assert(JIT_V3 == jit_v(3));
+    assert(jit_callee_save_p(JIT_V3));
+#  if defined(JIT_V4)
+    assert(JIT_V4 == jit_v(4));
+    assert(jit_callee_save_p(JIT_V4));
+#    if defined(JIT_V5)
+    assert(JIT_V5 == jit_v(5));
+    assert(jit_callee_save_p(JIT_V5));
+#      if defined(JIT_V6)
+    assert(JIT_V6 == jit_v(6));
+    assert(jit_callee_save_p(JIT_V6));
+#        if defined(JIT_V7)
+    assert(JIT_V7 == jit_v(7));
+    assert(jit_callee_save_p(JIT_V7));
+#          if defined(JIT_V8)
+    assert(JIT_V8 == jit_v(8));
+    assert(jit_callee_save_p(JIT_V8));
+#            if defined(JIT_V9)
+    assert(JIT_V9 == jit_v(9));
+    assert(jit_callee_save_p(JIT_V9));
+#              if defined(JIT_V10)
+    assert(JIT_V10 == jit_v(10));
+    assert(jit_callee_save_p(JIT_V10));
+#                if defined(JIT_V11)
+    assert(JIT_V11 == jit_v(11));
+    assert(jit_callee_save_p(JIT_V11));
+#                  if defined(JIT_V12)
+    assert(JIT_V12 == jit_v(12));
+    assert(jit_callee_save_p(JIT_V12));
+#                    if defined(JIT_V13)
+    assert(JIT_V13 == jit_v(13));
+    assert(jit_callee_save_p(JIT_V13));
+#                      if defined(JIT_V14)
+    assert(JIT_V14 == jit_v(14));
+    assert(jit_callee_save_p(JIT_V14));
+#                        if defined(JIT_V15)
+    assert(JIT_V15 == jit_v(15));
+    assert(jit_callee_save_p(JIT_V15));
+#                          if defined(JIT_V16)
+    assert(JIT_V16 == jit_v(16));
+    assert(jit_callee_save_p(JIT_V16));
+#                          endif
+#                        endif
+#                      endif
+#                    endif
+#                  endif
+#                endif
+#              endif
+#            endif
+#          endif
+#        endif
+#      endif
+#    endif
+#  endif
+#endif
+    assert(JIT_F0 == jit_f(0));
+    (void)jit_callee_save_p(JIT_F0);
+    assert(JIT_F1 == jit_f(1));
+    (void)jit_callee_save_p(JIT_F1);
+    assert(JIT_F2 == jit_f(2));
+    (void)jit_callee_save_p(JIT_F2);
+    assert(JIT_F3 == jit_f(3));
+    (void)jit_callee_save_p(JIT_F3);
+    assert(JIT_F4 == jit_f(4));
+    (void)jit_callee_save_p(JIT_F4);
+    assert(JIT_F5 == jit_f(5));
+    (void)jit_callee_save_p(JIT_F5);
+#if defined(JIT_F6)
+    assert(JIT_F6 == jit_f(6));
+    (void)jit_callee_save_p(JIT_F6);
+#  if defined(JIT_F7)
+    assert(JIT_F7 == jit_f(7));
+    (void)jit_callee_save_p(JIT_F7);
+#    if defined(JIT_F8)
+    assert(JIT_F8 == jit_f(8));
+    (void)jit_callee_save_p(JIT_F8);
+#      if defined(JIT_F9)
+    assert(JIT_F9 == jit_f(9));
+    (void)jit_callee_save_p(JIT_F9);
+#      endif
+#    endif
+#  endif
+#endif
+
+    jit_clear_state();
+    jit_destroy_state();
+    finish_jit();
+
+    return (0);
+}
diff --git a/deps/lightning/check/setcode.c b/deps/lightning/check/setcode.c
new file mode 100644 (file)
index 0000000..0047f34
--- /dev/null
@@ -0,0 +1,89 @@
+/*
+ * Simple test of using an alternate buffer for the code.
+ */
+
+#include <lightning.h>
+#include <stdio.h>
+#include <assert.h>
+#include <sys/mman.h>
+#if defined(__sgi)
+#  include <fcntl.h>
+#endif
+
+#ifndef MAP_ANON
+#  define MAP_ANON                     MAP_ANONYMOUS
+#  ifndef MAP_ANONYMOUS
+#    define MAP_ANONYMOUS              0
+#  endif
+#endif
+
+#if !defined(__sgi)
+#define  mmap_fd                       -1
+#endif
+
+int
+main(int argc, char *argv[])
+{
+    jit_uint8_t                 *ptr;
+    jit_state_t                 *_jit;
+    jit_word_t           length;
+#if defined(__sgi)
+    int                          mmap_fd;
+#endif
+    void               (*function)(void);
+
+#if defined(__sgi)
+    mmap_fd = open("/dev/zero", O_RDWR);
+#endif
+
+    ptr = mmap(NULL, 1024 * 1024,
+              PROT_EXEC | PROT_READ | PROT_WRITE,
+              MAP_PRIVATE | MAP_ANON, mmap_fd, 0);
+    assert(ptr != MAP_FAILED);
+#if defined(__sgi)
+    close(mmap_fd);
+#endif
+
+    init_jit(argv[0]);
+    _jit = jit_new_state();
+
+    jit_prolog();
+    jit_prepare();
+    jit_pushargi((jit_word_t)"%s\n");
+    jit_ellipsis();
+    jit_pushargi((jit_word_t)"ok");
+    jit_finishi(printf);
+
+    /* call to jit_realize() is only required when using an alternate
+     * code buffer. Note that not using mmap'ed memory may not work
+     * on several ports and/or operating system versions */
+    jit_realize();
+
+    length = 0;
+    if (jit_get_code(&length) != NULL)
+       abort();
+
+    if (length <= 4)
+       abort();
+
+    /* check that a too small buffer fails */
+    jit_set_code(ptr, 4);
+    function = jit_emit();
+    if (function != NULL)
+       abort();
+
+    /* and calling again with enough space works */
+    jit_set_code(ptr, 1024 * 1024);
+    function = jit_emit();
+    if (function == NULL)
+       abort();
+
+    jit_clear_state();
+    (*function)();
+    jit_destroy_state();
+    finish_jit();
+
+    munmap(ptr, 1024 * 1024);
+
+    return (0);
+}
diff --git a/deps/lightning/check/stack.ok b/deps/lightning/check/stack.ok
new file mode 100644 (file)
index 0000000..9766475
--- /dev/null
@@ -0,0 +1 @@
+ok
diff --git a/deps/lightning/check/stack.tst b/deps/lightning/check/stack.tst
new file mode 100644 (file)
index 0000000..e699719
--- /dev/null
@@ -0,0 +1,358 @@
+#define szof_c                 1
+#define szof_uc                        szof_c
+#define szof_s                 2
+#define szof_us                        szof_s
+#define szof_i                 4
+#if __WORDSIZE == 64
+#  define szof_ui              szof_i
+#  define szof_l               8
+#endif
+#define szof_f                 4
+#define szof_d                 8
+
+#define FILL(T)                                                        \
+       name fill##T                                            \
+fill##T:                                                       \
+       prolog                                                  \
+       arg $argp                                               \
+       getarg %v0 $argp                                        \
+       arg $argi                                               \
+       getarg %r0 $argi                                        \
+       muli %r0 %r0 szof##T                                    \
+       addr %v1 %v0 %r0                                        \
+       movi %r0 0                                              \
+fill##T##loop:                                                 \
+       bger fill##T##done %v0 %v1                              \
+       str##T %v0 %r0                                          \
+       addi %r0 %r0 1                                          \
+       addi %v0 %v0 szof##T                                    \
+       jmpi fill##T##loop                                      \
+fill##T##done:                                                 \
+       ret                                                     \
+       epilog
+#define FILLF(T)                                               \
+       name fill##T                                            \
+fill##T:                                                       \
+       prolog                                                  \
+       arg $argp                                               \
+       getarg %v0 $argp                                        \
+       arg $argi                                               \
+       getarg %r0 $argi                                        \
+       muli %r0 %r0 szof##T                                    \
+       addr %v1 %v0 %r0                                        \
+       movi##T %f0 0.0                                         \
+fill##T##loop:                                                 \
+       bger fill##T##done %v0 %v1                              \
+       str##T %v0 %f0                                          \
+       addi##T %f0 %f0 1.0                                     \
+       addi %v0 %v0 szof##T                                    \
+       jmpi fill##T##loop                                      \
+fill##T##done:                                                 \
+       ret                                                     \
+       epilog
+
+#define fill_uc                fill_c
+#define fill_us                fill_s
+#define fill_ui                fill_i
+
+#define ARG(  T, N)                    arg    $arg##T##N
+#define ARGF( T, N)                    arg##T $arg##T##N
+#define ARG1( K, T)                    ARG##K(T, 0)
+#define ARG2( K, T)    ARG1( K, T)     ARG##K(T, 1)
+#define ARG3( K, T)    ARG2( K, T)     ARG##K(T, 2)
+#define ARG4( K, T)    ARG3( K, T)     ARG##K(T, 3)
+#define ARG5( K, T)    ARG4( K, T)     ARG##K(T, 4)
+#define ARG6( K, T)    ARG5( K, T)     ARG##K(T, 5)
+#define ARG7( K, T)    ARG6( K, T)     ARG##K(T, 6)
+#define ARG8( K, T)    ARG7( K, T)     ARG##K(T, 7)
+#define ARG9( K, T)    ARG8( K, T)     ARG##K(T, 8)
+#define ARG10(K, T)    ARG9( K, T)     ARG##K(T, 9)
+#define ARG11(K, T)    ARG10(K, T)     ARG##K(T, 10)
+#define ARG12(K, T)    ARG11(K, T)     ARG##K(T, 11)
+#define ARG13(K, T)    ARG12(K, T)     ARG##K(T, 12)
+#define ARG14(K, T)    ARG13(K, T)     ARG##K(T, 13)
+#define ARG15(K, T)    ARG14(K, T)     ARG##K(T, 14)
+#define ARG16(K, T)    ARG15(K, T)     ARG##K(T, 15)
+#define ARG_c(N)                       ARG##N( , _c)
+#define ARG_uc(N)                      ARG##N( , _uc)
+#define ARG_s(N)                       ARG##N( , _s)
+#define ARG_us(N)                      ARG##N( , _us)
+#define ARG_i(N)                       ARG##N( , _i)
+#define ARG_ui(N)                      ARG##N( , _ui)
+#define ARG_l(N)                       ARG##N( , _l)
+#define ARG_f(N)                       ARG##N(F, _f)
+#define ARG_d(N)                       ARG##N(F, _d)
+
+#define CHK(N, T, V)                                           \
+       getarg %r0 $arg##T##V                                   \
+       ldxi##T %r1 %v0 $(V * szof##T)                          \
+       beqr N##T##V %r0 %r1                                    \
+       calli @abort                                            \
+N##T##V:
+#define CHKF(N, T, V)                                          \
+       getarg##T %f0 $arg##T##V                                \
+       ldxi##T %f1 %v0 $(V * szof##T)                          \
+       beqr##T N##T##V %f0 %f1                                 \
+       calli @abort                                            \
+N##T##V:
+
+#define GET1( K, N, T, V)                              CHK##K(N, T, 0)
+#define GET2( K, N, T, V)      GET1( K, N, T, V)       CHK##K(N, T, 1)
+#define GET3( K, N, T, V)      GET2( K, N, T, V)       CHK##K(N, T, 2)
+#define GET4( K, N, T, V)      GET3( K, N, T, V)       CHK##K(N, T, 3)
+#define GET5( K, N, T, V)      GET4( K, N, T, V)       CHK##K(N, T, 4)
+#define GET6( K, N, T, V)      GET5( K, N, T, V)       CHK##K(N, T, 5)
+#define GET7( K, N, T, V)      GET6( K, N, T, V)       CHK##K(N, T, 6)
+#define GET8( K, N, T, V)      GET7( K, N, T, V)       CHK##K(N, T, 7)
+#define GET9( K, N, T, V)      GET8( K, N, T, V)       CHK##K(N, T, 8)
+#define GET10(K, N, T, V)      GET9( K, N, T, V)       CHK##K(N, T, 9)
+#define GET11(K, N, T, V)      GET10(K, N, T, V)       CHK##K(N, T, 10)
+#define GET12(K, N, T, V)      GET11(K, N, T, V)       CHK##K(N, T, 11)
+#define GET13(K, N, T, V)      GET12(K, N, T, V)       CHK##K(N, T, 12)
+#define GET14(K, N, T, V)      GET13(K, N, T, V)       CHK##K(N, T, 13)
+#define GET15(K, N, T, V)      GET14(K, N, T, V)       CHK##K(N, T, 14)
+#define GET16(K, N, T, V)      GET15(K, N, T, V)       CHK##K(N, T, 15)
+
+#define GET_c(N, M)            GET##N( , c##N,  _c,  M)
+#define GET_uc(N, M)           GET##N( , uc##N, _uc, M)
+#define GET_s(N, M)            GET##N( , s##N,  _s,  M)
+#define GET_us(N, M)           GET##N( , us##N, _us, M)
+#define GET_i(N, M)            GET##N( , i##N,  _i,  M)
+#define GET_ui(N, M)           GET##N( , ui##N, _ui, M)
+#define GET_l(N, M)            GET##N( , l##N,  _l,  M)
+#define GET_f(N, M)            GET##N(F, f##N,  _f,  M)
+#define GET_d(N, M)            GET##N(F, d##N,  _d,  M)
+
+#define PUSH(  T, V)           pushargi    V
+#define PUSHF( T, V)           pushargi##T V
+#define PUSH0( K, T)           /**/
+#define PUSH1( K, T)                                   PUSH##K(T, 0)
+#define PUSH2( K, T)           PUSH1( K, T)            PUSH##K(T, 1)
+#define PUSH3( K, T)           PUSH2( K, T)            PUSH##K(T, 2)
+#define PUSH4( K, T)           PUSH3( K, T)            PUSH##K(T, 3)
+#define PUSH5( K, T)           PUSH4( K, T)            PUSH##K(T, 4)
+#define PUSH6( K, T)           PUSH5( K, T)            PUSH##K(T, 5)
+#define PUSH7( K, T)           PUSH6( K, T)            PUSH##K(T, 6)
+#define PUSH8( K, T)           PUSH7( K, T)            PUSH##K(T, 7)
+#define PUSH9( K, T)           PUSH8( K, T)            PUSH##K(T, 8)
+#define PUSH10(K, T)           PUSH9( K, T)            PUSH##K(T, 9)
+#define PUSH11(K, T)           PUSH10(K, T)            PUSH##K(T, 10)
+#define PUSH12(K, T)           PUSH11(K, T)            PUSH##K(T, 11)
+#define PUSH13(K, T)           PUSH12(K, T)            PUSH##K(T, 12)
+#define PUSH14(K, T)           PUSH13(K, T)            PUSH##K(T, 13)
+#define PUSH15(K, T)           PUSH14(K, T)            PUSH##K(T, 14)
+#define PUSH16(K, T)           PUSH15(K, T)            PUSH##K(T, 15)
+
+#define PUSH_c( N)             PUSH##N( , _c)
+#define PUSH_uc(N)             PUSH##N( , _uc)
+#define PUSH_s( N)             PUSH##N( , _s)
+#define PUSH_us(N)             PUSH##N( , _us)
+#define PUSH_i( N)             PUSH##N( , _i)
+#define PUSH_ui(N)             PUSH##N( , _ui)
+#define PUSH_l( N)             PUSH##N( , _l)
+#define PUSH_f( N)             PUSH##N(F, _f)
+#define PUSH_d( N)             PUSH##N(F, _d)
+
+/* bottom function */
+#define DEF0(T)                                                        \
+       name test##T##_0                                        \
+test##T##_0:                                                   \
+       prolog                                                  \
+       ret                                                     \
+       epilog
+
+#define DEFN(N, M, T)                                          \
+       name test##T##_##N                                      \
+test##T##_##N:                                                 \
+       prolog                                                  \
+       arg $argp                                               \
+       /* stack buffer in %v0 */                               \
+       getarg %v0 $argp                                        \
+       ARG##T(N)                                               \
+       /* validate arguments */                                \
+       GET##T(N, M)                                            \
+       /* heap buffer in %v1 */                                \
+       prepare                                                 \
+               pushargi $(N * szof##T)                         \
+       finishi @malloc                                         \
+       retval %v1                                              \
+       /* copy stack bufer to heap buffer */                   \
+       prepare                                                 \
+               pushargr %v1                                    \
+               pushargr %v0                                    \
+               pushargi $(N * szof##T)                         \
+       finishi MEMCPY                                          \
+       /* stack buffer for next function in %v2 */             \
+       allocai $(M * szof##T) $index                           \
+       addi %v2 %fp $index                                     \
+       /* fill stack buffer for next function */               \
+       prepare                                                 \
+               pushargr %v2                                    \
+               pushargi M                                      \
+       finishi fill##T                                         \
+       /* call next function */                                \
+       prepare                                                 \
+               pushargr %v2                                    \
+               PUSH##T(M)                                      \
+       finishi test##T##_##M                                   \
+       /* validate stack buffer */                             \
+       prepare                                                 \
+               pushargr %v1                                    \
+               pushargr %v0                                    \
+               pushargi $(N * szof##T)                         \
+       finishi @memcmp                                         \
+       retval %r0                                              \
+       beqi test##T##_##N##_done %r0 0                         \
+       calli @abort                                            \
+test##T##_##N##_done:                                          \
+       /* release heap bufer */                                \
+       prepare                                                 \
+               pushargr %v1                                    \
+       finishi @free                                           \
+       ret                                                     \
+       epilog
+
+/* top function */
+#define DEFX(T)                                                        \
+       name test##T##_17                                       \
+test##T##_17:                                                  \
+       prolog                                                  \
+       /* heap buffer in %v1 */                                \
+       prepare                                                 \
+               pushargi $(16 * szof##T)                        \
+       finishi @malloc                                         \
+       retval %v1                                              \
+       /* stack buffer for next function in %v2 */             \
+       allocai $(16 * szof##T) $index                          \
+       addi %v2 %fp $index                                     \
+       /* fill stack buffer for next function */               \
+       prepare                                                 \
+               pushargr %v2                                    \
+               pushargi 16                                     \
+       finishi fill##T                                         \
+       /* copy stack buffer to heap buffer */                  \
+       prepare                                                 \
+               pushargr %v1                                    \
+               pushargr %v2                                    \
+               pushargi $(16 * szof##T)                        \
+       finishi MEMCPY                                          \
+       /* call next function */                                \
+       prepare                                                 \
+               pushargr %v2                                    \
+               PUSH##T(16)                                     \
+       finishi test##T##_16                                    \
+       /* validate stack buffer */                             \
+       prepare                                                 \
+               pushargr %v1                                    \
+               pushargr %v2                                    \
+               pushargi $(16 * szof##T)                        \
+       finishi @memcmp                                         \
+       retval %r0                                              \
+       beqi test##T##_17_done %r0 0                            \
+       calli @abort                                            \
+test##T##_17_done:                                             \
+       /* release heap bufer */                                \
+       prepare                                                 \
+               pushargr %v1                                    \
+       finishi @free                                           \
+       ret                                                     \
+       epilog
+
+#define DEF(  T)                                               \
+       DEF0( T)                                                \
+       DEFN( 1,  0, T)                                         \
+       DEFN( 2,  1, T)                                         \
+       DEFN( 3,  2, T)                                         \
+       DEFN( 4,  3, T)                                         \
+       DEFN( 5,  4, T)                                         \
+       DEFN( 6,  5, T)                                         \
+       DEFN( 7,  6, T)                                         \
+       DEFN( 8,  7, T)                                         \
+       DEFN( 9,  8, T)                                         \
+       DEFN(10,  9, T)                                         \
+       DEFN(11, 10, T)                                         \
+       DEFN(12, 11, T)                                         \
+       DEFN(13, 12, T)                                         \
+       DEFN(14, 13, T)                                         \
+       DEFN(15, 14, T)                                         \
+       DEFN(16, 15, T)                                         \
+       DEFX(T)
+
+#define CALL(T)                        calli test##T##_17
+
+.data  16
+ok:
+.c     "ok\n"
+.code
+       jmpi main
+
+#if _AIX
+#  define MEMCPY               memcpy
+/* error: Function not implemented (memcpy) */
+       name memcpy
+memcpy:
+       prolog
+       arg $dst
+       arg $src
+       arg $len
+       getarg %r0 $dst
+       getarg %r1 $src
+       getarg %r2 $len
+       movr %v1 %r0
+       blti memcpy_done %r2 1
+memcpy_loop:
+       subi %r2 %r2 1
+       ldxr_c %v0 %r1 %r2
+       stxr_c %r2 %r0 %v0
+       bgti memcpy_loop %r2 0
+memcpy_done:
+       retr %v1
+       epilog
+#else
+#  define MEMCPY               @memcpy
+#endif
+
+       FILL(_c)
+       FILL(_s)
+       FILL(_i)
+#if __WORDSIZE == 64
+       FILL(_l)
+#endif
+       FILLF(_f)
+       FILLF(_d)
+
+       DEF(_c)
+       DEF(_uc)
+       DEF(_s)
+       DEF(_us)
+       DEF(_i)
+#if __WORDSIZE == 64
+       DEF(_ui)
+       DEF(_l)
+#endif
+       DEF(_f)
+       DEF(_d)
+
+       name main
+main:
+       prolog
+
+       CALL(_c)
+       CALL(_uc)
+       CALL(_s)
+       CALL(_us)
+       CALL(_i)
+#if __WORDSIZE == 64
+       CALL(_ui)
+       CALL(_l)
+#endif
+       CALL(_f)
+       CALL(_d)
+
+       prepare
+               pushargi ok
+               ellipsis
+       finishi @printf
+
+       ret
+       epilog
diff --git a/deps/lightning/check/tramp.ok b/deps/lightning/check/tramp.ok
new file mode 100644 (file)
index 0000000..6adb29f
--- /dev/null
@@ -0,0 +1 @@
+xfibs(32) = 7049155
diff --git a/deps/lightning/check/tramp.tst b/deps/lightning/check/tramp.tst
new file mode 100644 (file)
index 0000000..faf63d2
--- /dev/null
@@ -0,0 +1,111 @@
+#if __WORDSIZE == 32
+#  define SIZE 4
+#else
+#  define SIZE 8
+#endif
+.data  8192
+fmt:
+.c     "xfibs(%d) = %d\n"
+/* Simulate a language specific stack */
+.align SIZE
+top:
+/* Top, or base of the stack */
+.size  SIZE
+stk:
+.size  8160
+
+.code
+       jmpi main
+
+/* Usually a trampoline is created before the code that uses it, but
+ * for testing purposes it is not required.
+ * In this test case, it would mean "main" would be converted in a
+ * different jit_state_t to native code, before xfibs was know.
+ */
+       name xfibs
+xfibs:
+       /* return address is in %r0 */
+       /* argument and return value in %v0 */
+       prolog
+       tramp 64
+       blti_u out %v0 2
+       subi %v1 %v0 1          /* V1 = N-1 */
+       subi %v2 %v0 2          /* V1 = N-2 */
+
+       /* save return address */
+       ldi %r1 top
+       stxi $(SIZE * 0) %r1 %r0
+       /* save operands */
+       stxi $(SIZE * 1) %r1 %v0
+       stxi $(SIZE * 2) %r1 %v1
+       stxi $(SIZE * 3) %r1 %v2
+       /* adjust "language" stack */
+       addi %r1 %r1 $(SIZE * 4)
+       sti top %r1
+
+       /* return address */
+       movi %r0 ret1
+       /* argument */
+       movr %v0 %v1
+       /* indirect goto */
+       jmpi xfibs
+ret1:
+       movr %v1 %v0            /* V1 = rfibs(N-1) */
+       /* save V1 */
+       ldi %r1 top
+       stxi $(-SIZE * 2) %r1 %v1
+
+       /* reload V2 */
+       ldxi %v2 %r1 $(-SIZE * 1)
+
+       /* return address */
+       movi %r0 ret2
+       /* argument */
+       movr %v0 %v2
+       /* indirect goto */
+       jmpi xfibs
+ret2:
+       movr %v2 %v0            /* V2 = rfibs(N-2) */
+
+       /* reload return address */
+       ldi %r1 top
+       subi %r1 %r1 $(SIZE * 4)
+       ldxi %r0 %r1 $(SIZE * 0)
+       /* reload operands */
+       ldxi %v0 %r1 $(SIZE * 1)
+       ldxi %v1 %r1 $(SIZE * 2)
+       /* V2 already loaded */
+       /* update "language" stack */
+       sti top %r1
+
+       addi %v1 %v1 1
+       addr %v0 %v1 %v2
+       jmpr %r0
+out:
+       movi %v0 1
+       jmpr %r0
+       epilog
+
+       name main
+main:
+       prolog
+       frame 64
+
+       /* Initialize language stack */
+       movi %r0 stk
+       sti top %r0
+
+       /* return address */
+       movi %r0 done
+       /* argument */
+       movi %v0 32
+       jmpi xfibs
+done:
+       prepare
+               pushargi fmt
+               ellipsis
+               pushargi 32
+               pushargr %v0
+       finishi @printf
+       ret
+       epilog
diff --git a/deps/lightning/check/va_list.ok b/deps/lightning/check/va_list.ok
new file mode 100644 (file)
index 0000000..9766475
--- /dev/null
@@ -0,0 +1 @@
+ok
diff --git a/deps/lightning/check/va_list.tst b/deps/lightning/check/va_list.tst
new file mode 100644 (file)
index 0000000..ad704c9
--- /dev/null
@@ -0,0 +1,743 @@
+.data  8
+ok:
+.c     "ok\n"
+.code
+       jmpi main
+
+#define BEGIN(L)                                               \
+L:                                                             \
+       prolog
+#define VA_START()                                             \
+       ellipsis                                                \
+       va_start %v0
+#define VARG(L,N)                                              \
+       va_arg %r0 %v0                                          \
+       beqi L##N %r0 N                                         \
+       calli @abort                                            \
+L##N:
+#define VARGD(L,N)                                             \
+       va_arg_d %f0 %v0                                        \
+       beqi_d L##N %f0 N                                       \
+       calli @abort                                            \
+L##N:
+#define VA_END()                                               \
+       va_end %v0                                              \
+       ret                                                     \
+       epilog
+#define ARG(N)                 arg $arg##N
+#define ARGD(N)                        arg_d $arg##N
+#define GET(L,N)                                               \
+       getarg %r0 $arg##N                                      \
+       beqi L##N %r0 N                                         \
+       calli @abort                                            \
+L##N:
+#define GETD(L,N)                                              \
+       getarg_d %f0 $arg##N                                    \
+       beqi_d L##N %f0 N                                       \
+       calli @abort                                            \
+L##N:
+
+#define ARG1()                         ARG(1)
+#define ARG2()         ARG1()          ARG(2)
+#define ARG3()         ARG2()          ARG(3)
+#define ARG4()         ARG3()          ARG(4)
+#define ARG5()         ARG4()          ARG(5)
+#define ARG6()         ARG5()          ARG(6)
+#define ARG7()         ARG6()          ARG(7)
+#define ARG8()         ARG7()          ARG(8)
+#define ARG9()         ARG8()          ARG(9)
+#define GET1(L)                                GET(L,1)
+#define GET2(L)                GET1(L)         GET(L,2)
+#define GET3(L)                GET2(L)         GET(L,3)
+#define GET4(L)                GET3(L)         GET(L,4)
+#define GET5(L)                GET4(L)         GET(L,5)
+#define GET6(L)                GET5(L)         GET(L,6)
+#define GET7(L)                GET6(L)         GET(L,7)
+#define GET8(L)                GET7(L)         GET(L,8)
+#define GET9(L)                GET8(L)         GET(L,9)
+#define ARGD1()                                ARGD(1)
+#define ARGD2()                ARGD1()         ARGD(2)
+#define ARGD3()                ARGD2()         ARGD(3)
+#define ARGD4()                ARGD3()         ARGD(4)
+#define ARGD5()                ARGD4()         ARGD(5)
+#define ARGD6()                ARGD5()         ARGD(6)
+#define ARGD7()                ARGD6()         ARGD(7)
+#define ARGD8()                ARGD7()         ARGD(8)
+#define ARGD9()                ARGD8()         ARGD(9)
+#define GETD1(L)                       GETD(L,1)
+#define GETD2(L)       GETD1(L)        GETD(L,2)
+#define GETD3(L)       GETD2(L)        GETD(L,3)
+#define GETD4(L)       GETD3(L)        GETD(L,4)
+#define GETD5(L)       GETD4(L)        GETD(L,5)
+#define GETD6(L)       GETD5(L)        GETD(L,6)
+#define GETD7(L)       GETD6(L)        GETD(L,7)
+#define GETD8(L)       GETD7(L)        GETD(L,8)
+#define GETD9(L)       GETD8(L)        GETD(L,9)
+#define IDARG1()                       ARG(1)
+#define IDARG2()       IDARG1()        ARGD(2)
+#define IDARG3()       IDARG2()        ARG(3)
+#define IDARG4()       IDARG3()        ARGD(4)
+#define IDARG5()       IDARG4()        ARG(5)
+#define IDARG6()       IDARG5()        ARGD(6)
+#define IDARG7()       IDARG6()        ARG(7)
+#define IDARG8()       IDARG7()        ARGD(8)
+#define IDARG9()       IDARG8()        ARG(9)
+#define IDGET1(L)                      GET(L,1)
+#define IDGET2(L)      IDGET1(L)       GETD(L,2)
+#define IDGET3(L)      IDGET2(L)       GET(L,3)
+#define IDGET4(L)      IDGET3(L)       GETD(L,4)
+#define IDGET5(L)      IDGET4(L)       GET(L,5)
+#define IDGET6(L)      IDGET5(L)       GETD(L,6)
+#define IDGET7(L)      IDGET6(L)       GET(L,7)
+#define IDGET8(L)      IDGET7(L)       GETD(L,8)
+#define IDGET9(L)      IDGET8(L)       GET(L,9)
+#define DIARG1()                       ARGD(1)
+#define DIARG2()       DIARG1()        ARG(2)
+#define DIARG3()       DIARG2()        ARGD(3)
+#define DIARG4()       DIARG3()        ARG(4)
+#define DIARG5()       DIARG4()        ARGD(5)
+#define DIARG6()       DIARG5()        ARG(6)
+#define DIARG7()       DIARG6()        ARGD(7)
+#define DIARG8()       DIARG7()        ARG(8)
+#define DIARG9()       DIARG8()        ARGD(9)
+#define DIGET1(L)                      GETD(L,1)
+#define DIGET2(L)      DIGET1(L)       GET(L,2)
+#define DIGET3(L)      DIGET2(L)       GETD(L,3)
+#define DIGET4(L)      DIGET3(L)       GET(L,4)
+#define DIGET5(L)      DIGET4(L)       GETD(L,5)
+#define DIGET6(L)      DIGET5(L)       GET(L,6)
+#define DIGET7(L)      DIGET6(L)       GETD(L,7)
+#define DIGET8(L)      DIGET7(L)       GET(L,8)
+#define DIGET9(L)      DIGET8(L)       GETD(L,9)
+
+#define VARG1(L)                                               \
+       VARG(L, 10)
+#define VARG2(L)                                               \
+       VARG(L, 9)                                              \
+       VARG1(L)
+#define VARG3(L)                                               \
+       VARG(L, 8)                                              \
+       VARG2(L)
+#define VARG4(L)                                               \
+       VARG(L, 7)                                              \
+       VARG3(L)
+#define VARG5(L)                                               \
+       VARG(L, 6)                                              \
+       VARG4(L)
+#define VARG6(L)                                               \
+       VARG(L, 5)                                              \
+       VARG5(L)
+#define VARG7(L)                                               \
+       VARG(L, 4)                                              \
+       VARG6(L)
+#define VARG8(L)                                               \
+       VARG(L, 3)                                              \
+       VARG7(L)
+#define VARG9(L)                                               \
+       VARG(L, 2)                                              \
+       VARG8(L)
+#define VARG10(L)                                              \
+       VARG(L, 1)                                              \
+       VARG9(L)
+#define VARGD1(L)                                              \
+       VARGD(L, 10)
+#define VARGD2(L)                                              \
+       VARGD(L, 9)                                             \
+       VARGD1(L)
+#define VARGD3(L)                                              \
+       VARGD(L, 8)                                             \
+       VARGD2(L)
+#define VARGD4(L)                                              \
+       VARGD(L, 7)                                             \
+       VARGD3(L)
+#define VARGD5(L)                                              \
+       VARGD(L, 6)                                             \
+       VARGD4(L)
+#define VARGD6(L)                                              \
+       VARGD(L, 5)                                             \
+       VARGD5(L)
+#define VARGD7(L)                                              \
+       VARGD(L, 4)                                             \
+       VARGD6(L)
+#define VARGD8(L)                                              \
+       VARGD(L, 3)                                             \
+       VARGD7(L)
+#define VARGD9(L)                                              \
+       VARGD(L, 2)                                             \
+       VARGD8(L)
+#define VARGD10(L)                                             \
+       VARGD(L, 1)                                             \
+       VARGD9(L)
+#define IDVARG1(L)                                             \
+       VARGD(L, 10)
+#define IDVARG2(L)                                             \
+       VARG(L, 9)                                              \
+       IDVARG1(L)
+#define IDVARG3(L)                                             \
+       VARGD(L, 8)                                             \
+       IDVARG2(L)
+#define IDVARG4(L)                                             \
+       VARG(L, 7)                                              \
+       IDVARG3(L)
+#define IDVARG5(L)                                             \
+       VARGD(L, 6)                                             \
+       IDVARG4(L)
+#define IDVARG6(L)                                             \
+       VARG(L, 5)                                              \
+       IDVARG5(L)
+#define IDVARG7(L)                                             \
+       VARGD(L, 4)                                             \
+       IDVARG6(L)
+#define IDVARG8(L)                                             \
+       VARG(L, 3)                                              \
+       IDVARG7(L)
+#define IDVARG9(L)                                             \
+       VARGD(L, 2)                                             \
+       IDVARG8(L)
+#define IDVARG10(L)                                            \
+       VARG(L, 1)                                              \
+       IDVARG9(L)
+#define DIVARG1(L)                                             \
+       VARG(L, 10)
+#define DIVARG2(L)                                             \
+       VARGD(L, 9)                                             \
+       DIVARG1(L)
+#define DIVARG3(L)                                             \
+       VARG(L, 8)                                              \
+       DIVARG2(L)
+#define DIVARG4(L)                                             \
+       VARGD(L, 7)                                             \
+       DIVARG3(L)
+#define DIVARG5(L)                                             \
+       VARG(L, 6)                                              \
+       DIVARG4(L)
+#define DIVARG6(L)                                             \
+       VARGD(L, 5)                                             \
+       DIVARG5(L)
+#define DIVARG7(L)                                             \
+       VARG(L, 4)                                              \
+       DIVARG6(L)
+#define DIVARG8(L)                                             \
+       VARGD(L, 3)                                             \
+       DIVARG7(L)
+#define DIVARG9(L)                                             \
+       VARG(L, 2)                                              \
+       DIVARG8(L)
+#define DIVARG10(L)                                            \
+       VARGD(L, 1)                                             \
+       DIVARG9(L)
+
+BEGIN(_iiiiiiiiii)
+       VA_START()
+       VARG10(_iiiiiiiiii)
+       VA_END()
+BEGIN(i_iiiiiiiii)
+       ARG1()
+       GET1(i_iiiiiiiii)
+       VA_START()
+       VARG9(i_iiiiiiiii)
+       VA_END()
+BEGIN(ii_iiiiiiii)
+       ARG2()
+       GET2(ii_iiiiiiii)
+       VA_START()
+       VARG8(ii_iiiiiiii)
+       VA_END()
+BEGIN(iii_iiiiiii)
+       ARG3()
+       GET3(iii_iiiiiii)
+       VA_START()
+       VARG7(iii_iiiiiii)
+       VA_END()
+BEGIN(iiii_iiiiii)
+       ARG4()
+       GET4(iiii_iiiiii)
+       VA_START()
+       VARG6(iiii_iiiiii)
+       VA_END()
+BEGIN(iiiii_iiiii)
+       ARG5()
+       GET5(iiiii_iiiii)
+       VA_START()
+       VARG5(iiiii_iiiii)
+       VA_END()
+BEGIN(iiiiii_iiii)
+       ARG6()
+       GET6(iiiiii_iiii)
+       VA_START()
+       VARG4(iiiiii_iiii)
+       VA_END()
+BEGIN(iiiiiii_iii)
+       ARG7()
+       GET7(iiiiiii_iii)
+       VA_START()
+       VARG3(iiiiiii_iii)
+       VA_END()
+BEGIN(iiiiiiii_ii)
+       ARG8()
+       GET8(iiiiiiii_ii)
+       VA_START()
+       VARG2(iiiiiiii_ii)
+       VA_END()
+BEGIN(iiiiiiiii_i)
+       ARG9()
+       GET9(iiiiiiiii_i)
+       VA_START()
+       VARG1(iiiiiiiii_i)
+       VA_END()
+BEGIN(_dddddddddd)
+       VA_START()
+       VARGD10(_dddddddddd)
+       VA_END()
+BEGIN(d_ddddddddd)
+       ARGD1()
+       GETD1(d_ddddddddd)
+       VA_START()
+       VARGD9(d_ddddddddd)
+       VA_END()
+BEGIN(dd_dddddddd)
+       ARGD2()
+       GETD2(dd_dddddddd)
+       VA_START()
+       VARGD8(dd_dddddddd)
+       VA_END()
+BEGIN(ddd_ddddddd)
+       ARGD3()
+       GETD3(ddd_ddddddd)
+       VA_START()
+       VARGD7(ddd_ddddddd)
+       VA_END()
+BEGIN(dddd_dddddd)
+       ARGD4()
+       GETD4(dddd_dddddd)
+       VA_START()
+       VARGD6(dddd_dddddd)
+       VA_END()
+BEGIN(ddddd_ddddd)
+       ARGD5()
+       GETD5(ddddd_ddddd)
+       VA_START()
+       VARGD5(ddddd_ddddd)
+       VA_END()
+BEGIN(dddddd_dddd)
+       ARGD6()
+       GETD6(dddddd_dddd)
+       VA_START()
+       VARGD4(dddddd_dddd)
+       VA_END()
+BEGIN(ddddddd_ddd)
+       ARGD7()
+       GETD7(ddddddd_ddd)
+       VA_START()
+       VARGD3(ddddddd_ddd)
+       VA_END()
+BEGIN(dddddddd_dd)
+       ARGD8()
+       GETD8(dddddddd_dd)
+       VA_START()
+       VARGD2(dddddddd_dd)
+       VA_END()
+BEGIN(ddddddddd_d)
+       ARGD9()
+       GETD9(ddddddddd_d)
+       VA_START()
+       VARGD1(ddddddddd_d)
+       VA_END()
+BEGIN(_ididididid)
+       VA_START()
+       IDVARG10(_ididididid)
+       VA_END()
+BEGIN(i_didididid)
+       IDARG1()
+       IDGET1(i_didididid)
+       VA_START()
+       IDVARG9(i_didididid)
+       VA_END()
+BEGIN(id_idididid)
+       IDARG2()
+       IDGET2(id_idididid)
+       VA_START()
+       IDVARG8(id_idididid)
+       VA_END()
+BEGIN(idi_dididid)
+       IDARG3()
+       IDGET3(idi_dididid)
+       VA_START()
+       IDVARG7(idi_dididid)
+       VA_END()
+BEGIN(idid_ididid)
+       IDARG4()
+       IDGET4(idid_ididid)
+       VA_START()
+       IDVARG6(idid_ididid)
+       VA_END()
+BEGIN(ididi_didid)
+       IDARG5()
+       IDGET5(ididi_didid)
+       VA_START()
+       IDVARG5(ididi_didid)
+       VA_END()
+BEGIN(ididid_idid)
+       IDARG6()
+       IDGET6(ididid_idid)
+       VA_START()
+       IDVARG4(ididid_idid)
+       VA_END()
+BEGIN(idididi_did)
+       IDARG7()
+       IDGET7(idididi_did)
+       VA_START()
+       IDVARG3(idididi_did)
+       VA_END()
+BEGIN(idididid_id)
+       IDARG8()
+       IDGET8(idididid_id)
+       VA_START()
+       IDVARG2(idididid_id)
+       VA_END()
+BEGIN(ididididi_d)
+       IDARG9()
+       IDGET9(ididididi_d)
+       VA_START()
+       IDVARG1(ididididi_d)
+       VA_END()
+BEGIN(_dididididi)
+       VA_START()
+       DIVARG10(_dididididi)
+       VA_END()
+BEGIN(d_ididididi)
+       DIARG1()
+       DIGET1(d_ididididi)
+       VA_START()
+       DIVARG9(d_ididididi)
+       VA_END()
+BEGIN(di_didididi)
+       DIARG2()
+       DIGET2(di_didididi)
+       VA_START()
+       DIVARG8(di_didididi)
+       VA_END()
+BEGIN(did_idididi)
+       DIARG3()
+       DIGET3(did_idididi)
+       VA_START()
+       DIVARG7(did_idididi)
+       VA_END()
+BEGIN(didi_dididi)
+       DIARG4()
+       DIGET4(didi_dididi)
+       VA_START()
+       DIVARG6(didi_dididi)
+       VA_END()
+BEGIN(didid_ididi)
+       DIARG5()
+       DIGET5(didid_ididi)
+       VA_START()
+       DIVARG5(didid_ididi)
+       VA_END()
+BEGIN(dididi_didi)
+       DIARG6()
+       DIGET6(dididi_didi)
+       VA_START()
+       DIVARG4(dididi_didi)
+       VA_END()
+BEGIN(dididid_idi)
+       DIARG7()
+       DIGET7(dididid_idi)
+       VA_START()
+       DIVARG3(dididid_idi)
+       VA_END()
+BEGIN(didididi_di)
+       DIARG8()
+       DIGET8(didididi_di)
+       VA_START()
+       DIVARG2(didididi_di)
+       VA_END()
+BEGIN(didididid_i)
+       DIARG9()
+       DIGET9(didididid_i)
+       VA_START()
+       DIVARG1(didididid_i)
+       VA_END()
+
+#define PUSH1()                pushargi 1
+#define PUSH2()                PUSH1()         pushargi 2
+#define PUSH3()                PUSH2()         pushargi 3
+#define PUSH4()                PUSH3()         pushargi 4
+#define PUSH5()                PUSH4()         pushargi 5
+#define PUSH6()                PUSH5()         pushargi 6
+#define PUSH7()                PUSH6()         pushargi 7
+#define PUSH8()                PUSH7()         pushargi 8
+#define PUSH9()                PUSH8()         pushargi 9
+#define VPUSH1()       pushargi 1      VPUSH2()
+#define VPUSH2()       pushargi 2      VPUSH3()
+#define VPUSH3()       pushargi 3      VPUSH4()
+#define VPUSH4()       pushargi 4      VPUSH5()
+#define VPUSH5()       pushargi 5      VPUSH6()
+#define VPUSH6()       pushargi 6      VPUSH7()
+#define VPUSH7()       pushargi 7      VPUSH8()
+#define VPUSH8()       pushargi 8      VPUSH9()
+#define VPUSH9()       pushargi 9      VPUSH10()
+#define VPUSH10()      pushargi 10
+#define PUSHD1()       pushargi_d 1
+#define PUSHD2()       PUSHD1()        pushargi_d 2
+#define PUSHD3()       PUSHD2()        pushargi_d 3
+#define PUSHD4()       PUSHD3()        pushargi_d 4
+#define PUSHD5()       PUSHD4()        pushargi_d 5
+#define PUSHD6()       PUSHD5()        pushargi_d 6
+#define PUSHD7()       PUSHD6()        pushargi_d 7
+#define PUSHD8()       PUSHD7()        pushargi_d 8
+#define PUSHD9()       PUSHD8()        pushargi_d 9
+#define VPUSHD1()      pushargi_d 1    VPUSHD2()
+#define VPUSHD2()      pushargi_d 2    VPUSHD3()
+#define VPUSHD3()      pushargi_d 3    VPUSHD4()
+#define VPUSHD4()      pushargi_d 4    VPUSHD5()
+#define VPUSHD5()      pushargi_d 5    VPUSHD6()
+#define VPUSHD6()      pushargi_d 6    VPUSHD7()
+#define VPUSHD7()      pushargi_d 7    VPUSHD8()
+#define VPUSHD8()      pushargi_d 8    VPUSHD9()
+#define VPUSHD9()      pushargi_d 9    VPUSHD10()
+#define VPUSHD10()     pushargi_d 10
+#define IDPUSH1()      pushargi 1
+#define IDPUSH2()      IDPUSH1()       pushargi_d 2
+#define IDPUSH3()      IDPUSH2()       pushargi 3
+#define IDPUSH4()      IDPUSH3()       pushargi_d 4
+#define IDPUSH5()      IDPUSH4()       pushargi 5
+#define IDPUSH6()      IDPUSH5()       pushargi_d 6
+#define IDPUSH7()      IDPUSH6()       pushargi 7
+#define IDPUSH8()      IDPUSH7()       pushargi_d 8
+#define IDPUSH9()      IDPUSH8()       pushargi 9
+#define IDVPUSH1()     pushargi 1      IDVPUSH2()
+#define IDVPUSH2()     pushargi_d 2    IDVPUSH3()
+#define IDVPUSH3()     pushargi 3      IDVPUSH4()
+#define IDVPUSH4()     pushargi_d 4    IDVPUSH5()
+#define IDVPUSH5()     pushargi 5      IDVPUSH6()
+#define IDVPUSH6()     pushargi_d 6    IDVPUSH7()
+#define IDVPUSH7()     pushargi 7      IDVPUSH8()
+#define IDVPUSH8()     pushargi_d 8    IDVPUSH9()
+#define IDVPUSH9()     pushargi 9      IDVPUSH10()
+#define IDVPUSH10()    pushargi_d 10
+#define DIPUSH1()      pushargi_d 1
+#define DIPUSH2()      DIPUSH1()       pushargi 2
+#define DIPUSH3()      DIPUSH2()       pushargi_d 3
+#define DIPUSH4()      DIPUSH3()       pushargi 4
+#define DIPUSH5()      DIPUSH4()       pushargi_d 5
+#define DIPUSH6()      DIPUSH5()       pushargi 6
+#define DIPUSH7()      DIPUSH6()       pushargi_d 7
+#define DIPUSH8()      DIPUSH7()       pushargi 8
+#define DIPUSH9()      DIPUSH8()       pushargi_d 9
+#define DIVPUSH1()     pushargi_d 1    DIVPUSH2()
+#define DIVPUSH2()     pushargi 2      DIVPUSH3()
+#define DIVPUSH3()     pushargi_d 3    DIVPUSH4()
+#define DIVPUSH4()     pushargi 4      DIVPUSH5()
+#define DIVPUSH5()     pushargi_d 5    DIVPUSH6()
+#define DIVPUSH6()     pushargi 6      DIVPUSH7()
+#define DIVPUSH7()     pushargi_d 7    DIVPUSH8()
+#define DIVPUSH8()     pushargi 8      DIVPUSH9()
+#define DIVPUSH9()     pushargi_d 9    DIVPUSH10()
+#define DIVPUSH10()    pushargi 10
+
+main:
+       prolog
+       prepare
+               ellipsis
+               VPUSH1()
+       finishi _iiiiiiiiii
+       prepare
+               PUSH1()
+               ellipsis
+               VPUSH2()
+       finishi i_iiiiiiiii
+       prepare
+               PUSH2()
+               ellipsis
+               VPUSH3()
+       finishi ii_iiiiiiii
+       prepare
+               PUSH3()
+               ellipsis
+               VPUSH4()
+       finishi iii_iiiiiii
+       prepare
+               PUSH4()
+               ellipsis
+               VPUSH5()
+       finishi iiii_iiiiii
+       prepare
+               PUSH5()
+               ellipsis
+               VPUSH6()
+       finishi iiiii_iiiii
+       prepare
+               PUSH6()
+               ellipsis
+               VPUSH7()
+       finishi iiiiii_iiii
+       prepare
+               PUSH7()
+               ellipsis
+               VPUSH8()
+       finishi iiiiiii_iii
+       prepare
+               PUSH8()
+               ellipsis
+               VPUSH9()
+       finishi iiiiiiii_ii
+       prepare
+               PUSH9()
+               ellipsis
+               VPUSH10()
+       finishi iiiiiiiii_i
+       prepare
+               ellipsis
+               VPUSHD1()
+       finishi _dddddddddd
+       prepare
+               PUSHD1()
+               ellipsis
+               VPUSHD2()
+       finishi d_ddddddddd
+       prepare
+               PUSHD2()
+               ellipsis
+               VPUSHD3()
+       finishi dd_dddddddd
+       prepare
+               PUSHD3()
+               ellipsis
+               VPUSHD4()
+       finishi ddd_ddddddd
+       prepare
+               PUSHD4()
+               ellipsis
+               VPUSHD5()
+       finishi dddd_dddddd
+       prepare
+               PUSHD5()
+               ellipsis
+               VPUSHD6()
+       finishi ddddd_ddddd
+       prepare
+               PUSHD6()
+               ellipsis
+               VPUSHD7()
+       finishi dddddd_dddd
+       prepare
+               PUSHD7()
+               ellipsis
+               VPUSHD8()
+       finishi ddddddd_ddd
+       prepare
+               PUSHD8()
+               ellipsis
+               VPUSHD9()
+       finishi dddddddd_dd
+       prepare
+               PUSHD9()
+               ellipsis
+               VPUSHD10()
+       finishi ddddddddd_d
+       prepare
+               ellipsis
+               IDVPUSH1()
+       finishi _ididididid
+       prepare
+               IDPUSH1()
+               ellipsis
+               IDVPUSH2()
+       finishi i_didididid
+       prepare
+               IDPUSH2()
+               ellipsis
+               IDVPUSH3()
+       finishi id_idididid
+       prepare
+               IDPUSH3()
+               ellipsis
+               IDVPUSH4()
+       finishi idi_dididid
+       prepare
+               IDPUSH4()
+               ellipsis
+               IDVPUSH5()
+       finishi idid_ididid
+       prepare
+               IDPUSH5()
+               ellipsis
+               IDVPUSH6()
+       finishi ididi_didid
+       prepare
+               IDPUSH6()
+               ellipsis
+               IDVPUSH7()
+       finishi ididid_idid
+       prepare
+               IDPUSH7()
+               ellipsis
+               IDVPUSH8()
+       finishi idididi_did
+       prepare
+               IDPUSH8()
+               ellipsis
+               IDVPUSH9()
+       finishi idididid_id
+       prepare
+               IDPUSH9()
+               ellipsis
+               IDVPUSH10()
+       finishi ididididi_d
+       prepare
+               ellipsis
+               DIVPUSH1()
+       finishi _dididididi
+       prepare
+               DIPUSH1()
+               ellipsis
+               DIVPUSH2()
+       finishi d_ididididi
+       prepare
+               DIPUSH2()
+               ellipsis
+               DIVPUSH3()
+       finishi di_didididi
+       prepare
+               DIPUSH3()
+               ellipsis
+               DIVPUSH4()
+       finishi did_idididi
+       prepare
+               DIPUSH4()
+               ellipsis
+               DIVPUSH5()
+       finishi didi_dididi
+       prepare
+               DIPUSH5()
+               ellipsis
+               DIVPUSH6()
+       finishi didid_ididi
+       prepare
+               DIPUSH6()
+               ellipsis
+               DIVPUSH7()
+       finishi dididi_didi
+       prepare
+               DIPUSH7()
+               ellipsis
+               DIVPUSH8()
+       finishi dididid_idi
+       prepare
+               DIPUSH8()
+               ellipsis
+               DIVPUSH9()
+       finishi didididi_di
+       prepare
+               DIPUSH9()
+               ellipsis
+               DIVPUSH10()
+       finishi didididid_i
+       prepare
+               pushargi ok
+               ellipsis
+       finishi @printf
+       ret
+       epilog
diff --git a/deps/lightning/check/varargs.ok b/deps/lightning/check/varargs.ok
new file mode 100644 (file)
index 0000000..e103283
--- /dev/null
@@ -0,0 +1,4 @@
+0 1 2 3 4 5 6 7 8 9
+0.0 1.0 2.0 3.0 4.0 5.0 6.0 7.0 8.0 9.0
+0 0.0 1 1.0 2 2.0 3 3.0 4 4.0 5 5.0 6 6.0 7 7.0 8 8.0 9 9.0
+0.0 0 1.0 1 2.0 2 3.0 3 4.0 4 5.0 5 6.0 6 7.0 7 8.0 8 9.0 9
diff --git a/deps/lightning/check/varargs.tst b/deps/lightning/check/varargs.tst
new file mode 100644 (file)
index 0000000..11131d9
--- /dev/null
@@ -0,0 +1,398 @@
+.data  1024
+ifmt:
+.c     "%d %d %d %d %d %d %d %d %d %d\n"
+.align 4
+ichk:
+.i     9 8 7 6 5 4 3 2 1 0
+dfmt:
+.c     "%.1f %.1f %.1f %.1f %.1f %.1f %.1f %.1f %.1f %.1f\n"
+lfmt:
+.c     "%lf %lf %lf %lf %lf %lf %lf %lf %lf %lf\n"
+.align 8
+dchk:
+.d     9.0 8.0 7.0 6.0 5.0 4.0 3.0 2.0 1.0 0.0
+idfmt:
+.c     "%d %.1f %d %.1f %d %.1f %d %.1f %d %.1f %d %.1f %d %.1f %d %.1f %d %.1f %d %.1f\n"
+ldfmt:
+.c     "%d %lf %d %lf %d %lf %d %lf %d %lf %d %lf %d %lf %d %lf %d %lf %d %lf\n"
+difmt:
+.c     "%.1f %d %.1f %d %.1f %d %.1f %d %.1f %d %.1f %d %.1f %d %.1f %d %.1f %d %.1f %d\n"
+dlfmt:
+.c     "%lf %d %lf %d %lf %d %lf %d %lf %d %lf %d %lf %d %lf %d %lf %d %lf %d\n"
+.align 8
+buff:
+.size  256
+
+.code
+       prolog
+
+       /*
+               sprintf(buff, "%d %d %d %d %d %d %d %d %d %d\n",
+                       0, 1, 2, 3, 4, 5, 6, 7, 8, 9);
+        */
+       prepare
+               pushargi buff
+               pushargi ifmt
+               ellipsis
+               pushargi 0
+               pushargi 1
+               pushargi 2
+               pushargi 3
+               pushargi 4
+               pushargi 5
+               pushargi 6
+               pushargi 7
+               pushargi 8
+               pushargi 9
+       finishi @sprintf
+
+       /*
+               sscanf(buff, "%d %d %d %d %d %d %d %d %d %d\n",
+                      ichk+0, ichk+1, ichk+2, ichk+3, ichk+4,
+                      ichk+5, ichk+6, ichk+7, ichk+8, ichk+9);
+        */
+       movi %v0 ichk
+       prepare
+               pushargi buff
+               pushargi ifmt
+               ellipsis
+               pushargr %v0            /* 0 */
+               addi %v0 %v0 4
+               pushargr %v0            /* 1 */
+               addi %v0 %v0 4
+               pushargr %v0            /* 2 */
+               addi %v0 %v0 4
+               pushargr %v0            /* 3 */
+               addi %v0 %v0 4
+               pushargr %v0            /* 4 */
+               addi %v0 %v0 4
+               pushargr %v0            /* 5 */
+               addi %v0 %v0 4
+               pushargr %v0            /* 6 */
+               addi %v0 %v0 4
+               pushargr %v0            /* 7 */
+               addi %v0 %v0 4
+               pushargr %v0            /* 8 */
+               addi %v0 %v0 4
+               pushargr %v0            /* 9 */
+       finishi @sscanf
+
+       movi %v0 ichk
+       movi %r0 0
+loopi:
+       ldr_i %r1 %v0
+       beqr nexti %r0 %r1
+       calli @abort
+nexti:
+       addi %r0 %r0 1
+       bgei outi %r0 10
+       addi %v0 %v0 4
+       jmpi loopi
+outi:
+
+       prepare
+               pushargi buff
+               ellipsis
+       finishi @printf
+
+       /*
+               sprintf(buff,
+                       "%.1f %.1f %.1f %.1f %.1f "
+                       "%.1f %.1f %.1f %.1f %.1f\n",
+                       0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
+        */
+       prepare
+               pushargi buff
+               pushargi dfmt
+               ellipsis
+               pushargi_d 0.0
+               pushargi_d 1.0
+               pushargi_d 2.0
+               pushargi_d 3.0
+               pushargi_d 4.0
+               pushargi_d 5.0
+               pushargi_d 6.0
+               pushargi_d 7.0
+               pushargi_d 8.0
+               pushargi_d 9.0
+       finishi @sprintf
+
+       /*
+               sscanf(buff, "%lf %lf %lf %lf %lf %lf %lf %lf %lf %lf\n",
+                      dchk+0, dchk+1, dchk+2, dchk+3, dchk+4,
+                      dchk+5, dchk+6, dchk+7, dchk+8, dchk+9);
+        */
+       movi %v0 dchk
+       prepare
+               pushargi buff
+               pushargi lfmt
+               ellipsis
+               pushargr %v0            /* 0 */
+               addi %v0 %v0 8
+               pushargr %v0            /* 1 */
+               addi %v0 %v0 8
+               pushargr %v0            /* 2 */
+               addi %v0 %v0 8
+               pushargr %v0            /* 3 */
+               addi %v0 %v0 8
+               pushargr %v0            /* 4 */
+               addi %v0 %v0 8
+               pushargr %v0            /* 5 */
+               addi %v0 %v0 8
+               pushargr %v0            /* 6 */
+               addi %v0 %v0 8
+               pushargr %v0            /* 7 */
+               addi %v0 %v0 8
+               pushargr %v0            /* 8 */
+               addi %v0 %v0 8
+               pushargr %v0            /* 9 */
+       finishi @sscanf
+
+       movi %v0 dchk
+       movi_d %f0 0.0
+loopd:
+       ldr_d %f1 %v0
+       beqr_d nextd %f0 %f1
+       calli @abort
+nextd:
+       addi_d %f0 %f0 1.0
+       bgei_d outd %f0 10.0
+       addi %v0 %v0 8
+       jmpi loopd
+outd:
+
+       prepare
+               pushargi buff
+               ellipsis
+       finishi @printf
+
+       /*
+           sprintf(buff,
+                  "%d %.1f %d %.1f %d %.1f %d %.1f %d %.1f "
+                  "%d %.1f %d %.1f %d %.1f %d %.1f %d %.1f\n",
+                  0, 0.0, 1, 1.0, 2, 2.0, 3, 3.0, 4, 4.0,
+                  5, 5.0, 6, 6.0, 7, 7.0, 8, 8.0, 9, 9.0);
+        */
+       prepare
+               pushargi buff
+               pushargi idfmt
+               ellipsis
+               pushargi 0
+               pushargi_d 0.0
+               pushargi 1
+               pushargi_d 1.0
+               pushargi 2
+               pushargi_d 2.0
+               pushargi 3
+               pushargi_d 3.0
+               pushargi 4
+               pushargi_d 4.0
+               pushargi 5
+               pushargi_d 5.0
+               pushargi 6
+               pushargi_d 6.0
+               pushargi 7
+               pushargi_d 7.0
+               pushargi 8
+               pushargi_d 8.0
+               pushargi 9
+               pushargi_d 9.0
+       finishi @sprintf
+
+       /*
+               sscanf(buff,
+                     "%d %lf %d %lf %d %lf %d %lf %d %lf "
+                     "%d %lf %d %lf %d %lf %d %lf %d %lf\n",
+                     ichk+0, dchk+0, ichk+1, dchk+1, ichk+2,
+                     dchk+2, ichk+3, dchk+3, ichk+4, dchk+4,
+                     ichk+5, dchk+5, ichk+6, dchk+6, ichk+7,
+                     dchk+7, ichk+8, dchk+8, ichk+9, dchk+9);
+        */
+       movi %v0 ichk
+       movi %v1 dchk
+       prepare
+               pushargi buff
+               pushargi ldfmt
+               ellipsis
+               pushargr %v0            /* 0 */
+               addi %v0 %v0 4
+               pushargr %v1
+               addi %v1 %v1 8
+               pushargr %v0            /* 1 */
+               addi %v0 %v0 4
+               pushargr %v1
+               addi %v1 %v1 8
+               pushargr %v0            /* 2 */
+               addi %v0 %v0 4
+               pushargr %v1
+               addi %v1 %v1 8
+               pushargr %v0            /* 3 */
+               addi %v0 %v0 4
+               pushargr %v1
+               addi %v1 %v1 8
+               pushargr %v0            /* 4 */
+               addi %v0 %v0 4
+               pushargr %v1
+               addi %v1 %v1 8
+               pushargr %v0            /* 5 */
+               addi %v0 %v0 4
+               pushargr %v1
+               addi %v1 %v1 8
+               pushargr %v0            /* 6 */
+               addi %v0 %v0 4
+               pushargr %v1
+               addi %v1 %v1 8
+               pushargr %v0            /* 7 */
+               addi %v0 %v0 4
+               pushargr %v1
+               addi %v1 %v1 8
+               pushargr %v0            /* 8 */
+               addi %v0 %v0 4
+               pushargr %v1
+               addi %v1 %v1 8
+               pushargr %v0            /* 9 */
+               pushargr %v1
+       finishi @sscanf
+
+       movi %v0 ichk
+       movi %v1 dchk
+       movi %r0 0
+       movi_d %f0 0.0
+loopid:
+       ldr_i %r1 %v0
+       beqr checkd %r0 %r1
+       calli @abort
+checkd:
+       ldr_d %f1 %v1
+       beqr_d nextid %f0 %f1
+       calli @abort
+nextid:
+       addi %r0 %r0 1
+       addi_d %f0 %f0 1.0
+       bgei outid %r0 10
+       addi %v0 %v0 4
+       addi %v1 %v1 8
+       jmpi loopid
+outid:
+
+       prepare
+               pushargi buff
+               ellipsis
+       finishi @printf
+
+       /*
+           sprintf(buff,
+                  "%.1f %d %.1f %d %.1f %d %.1f %d %.1f %d "
+                  "%.1f %d %.1f %d %.1f %d %.1f %d %.1f %d\n",
+                  0.0, 0, 1.0, 1, 2.0, 2, 3.0, 3, 4.0, 4,
+                  5, 5.0, 6.0, 6, 7.0, 7, 8.0, 8, 9.0, 9);
+        */
+       prepare
+               pushargi buff
+               pushargi difmt
+               ellipsis
+               pushargi_d 0.0
+               pushargi 0
+               pushargi_d 1.0
+               pushargi 1
+               pushargi_d 2.0
+               pushargi 2
+               pushargi_d 3.0
+               pushargi 3
+               pushargi_d 4.0
+               pushargi 4
+               pushargi_d 5.0
+               pushargi 5
+               pushargi_d 6.0
+               pushargi 6
+               pushargi_d 7.0
+               pushargi 7
+               pushargi_d 8.0
+               pushargi 8
+               pushargi_d 9.0
+               pushargi 9
+       finishi @sprintf
+
+       /*
+               sscanf(buff,
+                     "%lf %d %lf %d %lf %d %lf %d %lf %d "
+                     "%lf %d %lf %d %lf %d %lf %d %lf %d \n",
+                     dchk+0, ichk+0, dchk+1, ichk+1, dchk+2,
+                     ichk+2, dchk+3, ichk+3, dchk+4, ichk+4,
+                     dchk+5, ichk+5, dchk+6, ichk+6, dchk+7,
+                     ichk+7, dchk+8, ichk+8, dchk+9, ichk+9);
+        */
+       movi %v0 dchk
+       movi %v1 ichk
+       prepare
+               pushargi buff
+               pushargi dlfmt
+               ellipsis
+               pushargr %v0            /* 0 */
+               addi %v0 %v0 8
+               pushargr %v1
+               addi %v1 %v1 4
+               pushargr %v0            /* 1 */
+               addi %v0 %v0 8
+               pushargr %v1
+               addi %v1 %v1 4
+               pushargr %v0            /* 2 */
+               addi %v0 %v0 8
+               pushargr %v1
+               addi %v1 %v1 4
+               pushargr %v0            /* 3 */
+               addi %v0 %v0 8
+               pushargr %v1
+               addi %v1 %v1 4
+               pushargr %v0            /* 4 */
+               addi %v0 %v0 8
+               pushargr %v1
+               addi %v1 %v1 4
+               pushargr %v0            /* 5 */
+               addi %v0 %v0 8
+               pushargr %v1
+               addi %v1 %v1 4
+               pushargr %v0            /* 6 */
+               addi %v0 %v0 8
+               pushargr %v1
+               addi %v1 %v1 4
+               pushargr %v0            /* 7 */
+               addi %v0 %v0 8
+               pushargr %v1
+               addi %v1 %v1 4
+               pushargr %v0            /* 8 */
+               addi %v0 %v0 8
+               pushargr %v1
+               addi %v1 %v1 4
+               pushargr %v0            /* 9 */
+               pushargr %v1
+       finishi @sscanf
+
+       movi %v0 ichk
+       movi %v1 dchk
+       movi %r0 0
+       movi_d %f0 0.0
+loopdi:
+       ldr_i %r1 %v0
+       beqr check_d %r0 %r1
+       calli @abort
+check_d:
+       ldr_d %f1 %v1
+       beqr_d nextdi %f0 %f1
+       calli @abort
+nextdi:
+       addi %r0 %r0 1
+       addi_d %f0 %f0 1.0
+       bgei outdi %r0 10
+       addi %v0 %v0 4
+       addi %v1 %v1 8
+       jmpi loopdi
+outdi:
+
+       prepare
+               pushargi buff
+               ellipsis
+       finishi @printf
+
+       ret
+       epilog
diff --git a/deps/lightning/configure.ac b/deps/lightning/configure.ac
new file mode 100644 (file)
index 0000000..9261255
--- /dev/null
@@ -0,0 +1,299 @@
+dnl
+dnl Copyright 2000, 2001, 2002, 2012-2019 Free Software Foundation, Inc.
+dnl
+dnl This file is part of GNU lightning.
+dnl
+dnl GNU lightning is free software; you can redistribute it and/or modify it
+dnl under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3, or (at your option)
+dnl any later version.
+dnl 
+dnl GNU lightning is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl License for more details.
+dnl
+
+AC_PREREQ(2.57)
+AC_INIT([GNU lightning], 2.1.3, pcpa@gnu.org, lightning)
+AC_CANONICAL_TARGET
+AC_CONFIG_SRCDIR([Makefile.am])
+AM_INIT_AUTOMAKE([dist-bzip2])
+m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
+AC_CONFIG_MACRO_DIR(m4)
+
+AC_CONFIG_HEADERS(config.h)
+
+AC_PROG_CC
+AC_PROG_INSTALL
+AC_PROG_LIBTOOL
+
+case "$target_cpu" in
+    ia64)
+       case "$host_os" in
+           # Only supported mode
+           *hpux*)
+               LIGHTNING_CFLAGS="$LIGHTNING_CFLAGS -mlp64"             ;;
+           *)                                  ;;
+       esac                                    ;;
+    *mips*)
+       case "$host_os" in
+           # (Hack) Flags to pass configure with gcc 3.x
+           # Should not set LIGHTNINT_CFLAGS
+           *irix*)
+               CFLAGS="$CFLAGS -D__c99 -Drestrict=";;
+           *)                                  ;;
+       esac                                    ;;
+    alpha*)
+       case "$host_os" in
+           osf*)
+               # Get proper varargs and mmap prototypes and definitions
+               CFLAGS="$CFLAGS -D_ANSI_C_SOURCE -D_XOPEN_SOURCE_EXTENDED -D_OSF_SOURCE -D_POSIX_C_SOURCE=199309L"
+               # Want to generate NaN with 0.0/0.0 and Inf with 1.0/0.0
+               if test x$GCC = "xyes"; then
+                   CFLAGS="$CFLAGS -mieee"
+               else
+                   CFLAGS="$CFLAGS -ieee_with_no_inexact"
+               fi                              ;;
+           *)                                  ;;
+       esac                                    ;;
+    *)                                         ;;
+esac
+
+AC_CHECK_FUNCS(mremap ffsl getopt_long_only isnan isinf,,)
+
+AC_CHECK_HEADERS([getopt.h stdint.h],,,)
+
+AC_ARG_ENABLE(disassembler,
+             AS_HELP_STRING([--enable-disassembler],
+                            [Enable jit disassembler using binutils]),
+             [DISASSEMBLER=$enableval], [DISASSEMBLER=auto])
+if test "x$DISASSEMBLER" != "xno"; then
+    # FIXME need to check for libiberty first or will fail to link
+    AC_CHECK_LIB(iberty, htab_try_create, ,
+                [HAVE_IBERTY="no"])
+    AC_CHECK_LIB(bfd, bfd_init, ,
+                [HAVE_BFD="no"])
+    AC_CHECK_LIB(z, compressBound, ,
+                [HAVE_Z="no"])
+    AC_CHECK_LIB(opcodes, init_disassemble_info, ,
+                [HAVE_OPCODES="no"])
+    if test "x$HAVE_IBERTY"  = "xno" -o \
+           "x$HAVE_BFD"     = "xno" -o \
+           "x$HAVE_Z"       = "xno" -o \
+           "x$HAVE_OPCODES" = "xno"; then
+       if test "x$DISASSEMBLER" != "xauto"; then
+           AC_MSG_ERROR([binutils not found, see http://www.gnu.org/software/binutils/])
+       else
+           AC_MSG_WARN([binutils not found, see http://www.gnu.org/software/binutils/])
+           DISASSEMBLER="no"
+       fi
+    fi
+fi
+AM_CONDITIONAL(with_disassembler, [test "x$DISASSEMBLER" != "xno"])
+if test "x$DISASSEMBLER" != "xno"; then
+    LIGHTNING_CFLAGS="$LIGHTNING_CFLAGS -DDISASSEMBLER=1"
+    save_CFLAGS=$CFLAGS
+    CFLAGS="$CFLAGS -I$PWD/include -D_GNU_SOURCE"
+    AC_COMPILE_IFELSE([AC_LANG_SOURCE(
+       #include <lightning.h>
+       #include <lightning/jit_private.h>
+       #include <dis-asm.h>
+       int main(int argc, char *argv[])
+       {
+               disassembler_ftype       print;
+               bfd                     *abfd;
+               print = disassembler(abfd);
+               return 0;
+       }
+    )], [ac_cv_test_new_disassembler=no],,)
+    CFLAGS="$save_CFLAGS"
+    if test "x$ac_cv_test_new_disassembler" != "xno"; then
+       LIGHTNING_CFLAGS="$LIGHTNING_CFLAGS -DBINUTILS_2_29=1"
+    fi
+fi
+
+AC_ARG_ENABLE(devel-disassembler,
+             AS_HELP_STRING([--enable-devel-disassembler],
+                            [Enable extra disassembly options]),
+             [DEVEL_DISASSEMBLER=$enableval], [DEVEL_DISASSEMBLER=no])
+if test "x$DEVEL_DISASSEMBLER" != "xno"; then
+    if test "x$DISASSEMBLER" = "xno"; then
+       AC_MSG_ERROR(devel-disassembler needs disassembler enabled)
+    fi
+    LIGHTNING_CFLAGS="$LIGHTNING_CFLAGS -DDEVEL_DISASSEMBLER=1"
+fi
+
+AC_ARG_ENABLE(assertions,
+             AS_HELP_STRING([--enable-assertions],
+                            [Enable runtime code generation assertions]),
+             [DEBUG=$enableval], [DEBUG=auto])
+if test "x$DEBUG" = xyes; then
+    LIGHTNING_CFLAGS="$LIGHTNING_CFLAGS -DDEBUG=1"
+else
+    LIGHTNING_CFLAGS="$LIGHTNING_CFLAGS -DNDEBUG"
+    DEBUG=no
+fi
+
+# This option is only useful during development.
+AC_ARG_ENABLE(devel-get-jit-size,
+             AS_HELP_STRING([--enable-devel-get-jit-size],
+                            [Devel mode to regenerate jit size information]),
+             [GET_JIT_SIZE=$enableval], [GET_JIT_SIZE=no])
+AM_CONDITIONAL(get_jit_size, [test $GET_JIT_SIZE = yes])
+
+case "$host_os" in
+    *bsd*|osf*)                SHLIB=""        ;;
+    *hpux*)            SHLIB="-ldld"   ;;
+    *)                 SHLIB="-ldl"    ;;
+esac
+AC_SUBST(SHLIB)
+
+cpu=
+case "$target_cpu" in
+    i?86|x86_64|amd64) cpu=x86         ;;
+    *arm*)             cpu=arm         ;;
+    *mips*)            cpu=mips        ;;
+    *powerpc*)         cpu=ppc         ;;
+    *sparc*)           cpu=sparc       ;;
+    ia64)              cpu=ia64        ;;
+    hppa*)             cpu=hppa        ;;
+    aarch64)           cpu=aarch64     ;;
+    s390*)             cpu=s390        ;;
+    alpha*)            cpu=alpha       ;;
+    riscv*)            cpu=riscv       ;;
+    *)                                 ;;
+esac
+AM_CONDITIONAL(cpu_arm,     [test cpu-$cpu = cpu-arm])
+AM_CONDITIONAL(cpu_mips,    [test cpu-$cpu = cpu-mips])
+AM_CONDITIONAL(cpu_ppc,     [test cpu-$cpu = cpu-ppc])
+AM_CONDITIONAL(cpu_sparc,   [test cpu-$cpu = cpu-sparc])
+AM_CONDITIONAL(cpu_x86,     [test cpu-$cpu = cpu-x86])
+AM_CONDITIONAL(cpu_ia64,    [test cpu-$cpu = cpu-ia64])
+AM_CONDITIONAL(cpu_hppa,    [test cpu-$cpu = cpu-hppa])
+AM_CONDITIONAL(cpu_aarch64, [test cpu-$cpu = cpu-aarch64])
+AM_CONDITIONAL(cpu_s390,    [test cpu-$cpu = cpu-s390])
+AM_CONDITIONAL(cpu_alpha,   [test cpu-$cpu = cpu-alpha])
+AM_CONDITIONAL(cpu_riscv,   [test cpu-$cpu = cpu-riscv])
+
+# Test x87 if both, x87 and sse2 available
+ac_cv_test_x86_x87=
+# Test arm instruction set if thumb instruction set available
+ac_cv_test_arm_arm=
+# Test sofware float if vfp available and not using hard float abi
+ac_cv_test_arm_swf=
+
+save_CFLAGS=$CFLAGS
+CFLAGS="$CFLAGS -I$PWD/include -D_GNU_SOURCE"
+if test x$cpu = x; then
+    AC_MSG_ERROR([cpu $target_cpu not supported])
+elif test $cpu = x86; then
+    AC_RUN_IFELSE([AC_LANG_SOURCE([[
+    #include <lightning.h>
+    int main(void) {
+       int                 ac, flags;
+       unsigned int        eax, ebx, ecx, edx;
+       if (__WORDSIZE == 64)
+           return 1;
+       __asm__ volatile ("pushfl;\n\t"
+                         "popl %0;\n\t"
+                         "movl \$0x240000, %1;\n\t"
+                         "xorl %0, %1;\n\t"
+                         "pushl %1;\n\t"
+                         "popfl;\n\t"
+                         "pushfl;\n\t"
+                         "popl %1;\n\t"
+                         "xorl %0, %1;\n\t"
+                         "pushl %0;\n\t"
+                         "popfl"
+                         : "=r" (flags), "=r" (ac));
+       if ((ac & (1 << 21)) == 0)
+           return 1;
+       __asm__ volatile ("xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1"
+                         : "=a" (eax), "=r" (ebx),
+                         "=c" (ecx), "=d" (edx)
+                         : "0" (1));
+       return (edx & 1 << 26) ? 0 : 1;
+    }
+    ]])],[ac_cv_test_x86_x87=yes],[],[ac_cv_test_x86_x87=no])
+elif test $cpu = arm; then
+    AC_RUN_IFELSE([AC_LANG_SOURCE([[
+    #include <stdio.h>
+    int main(void) {
+    #if defined(__linux__)
+       FILE    *fp;
+       char     buf[128];
+       if ((fp = fopen("/proc/cpuinfo", "r")) == NULL)
+           return 1;
+       while (fgets(buf, sizeof(buf), fp)) {
+           if (strncmp(buf, "Features\t:", 10) == 0 &&
+               strstr(buf + 10, "thumb")) {
+               fclose(fp);
+               return 0;
+           }
+       }
+       fclose(fp);
+    #elif defined(__thumb2__)
+       return 0;
+    #endif
+       return 1;
+    }
+    ]])],[ac_cv_test_arm_arm=yes],[],[ac_cv_test_arm_arm=no])
+    AC_RUN_IFELSE([AC_LANG_SOURCE([[
+    #include <stdio.h>
+    int main(void) {
+    #if defined(__linux__)
+       FILE    *fp;
+       char     buf[128];
+    #  if !defined(__ARM_PCS_VFP)
+       if ((fp = fopen("/proc/cpuinfo", "r")) == NULL)
+           return 1;
+       while (fgets(buf, sizeof(buf), fp)) {
+           if (strncmp(buf, "Features\t:", 10) == 0 &&
+               strstr(buf + 10, "vfp")) {
+               fclose(fp);
+               return 0;
+           }
+       }
+       fclose(fp);
+    #  endif
+    #endif
+       return 1;
+    }
+    ]])],[ac_cv_test_arm_swf=yes],[],[ac_cv_test_arm_swf=no])
+elif test $cpu = ppc; then
+    if test "x$DISASSEMBLER" != "xno"; then
+       save_LIBS="$LIBS"
+       LIBS="$LIBS $SHLIB"
+       AC_CHECK_FUNCS(disassemble_init_for_target disassemble_init_powerpc)
+       LIBS="$save_LIBS"
+    fi
+fi
+CFLAGS=$save_CFLAGS
+
+AM_CONDITIONAL(test_x86_x87, [test x$ac_cv_test_x86_x87 = xyes])
+AM_CONDITIONAL(test_arm_arm, [test x$ac_cv_test_arm_arm = xyes])
+AM_CONDITIONAL(test_arm_swf, [test x$ac_cv_test_arm_swf = xyes])
+
+AM_CONDITIONAL(test_nodata, [test cpu-$cpu = cpu-mips -o cpu-$cpu = cpu-ppc -o cpu-$cpu = cpu-sparc -o cpu-$cpu = cpu-x86 -o cpu-$cpu = cpu-ia64 -o cpu-$cpu = cpu-hppa -o cpu-$cpu = cpu-s390 -o cpu-$cpu = cpu-alpha])
+
+if test $cpu = arm; then
+     AC_CHECK_LIB(m, sqrtf, ,
+        [AC_MSG_ERROR([sqrtf required but not available])])
+fi
+AC_SUBST(cpu)
+
+AC_SUBST([LIGHTNING_CFLAGS])
+
+if test $ac_cv_header_stdint_h = yes; then
+    AC_SUBST(MAYBE_INCLUDE_STDINT_H, ["#include <stdint.h>"])
+fi
+
+AC_OUTPUT([Makefile
+          lightning.pc
+          doc/Makefile
+          include/Makefile
+          include/lightning/Makefile
+          include/lightning.h
+          lib/Makefile
+          check/Makefile])
diff --git a/deps/lightning/doc/.cvsignore b/deps/lightning/doc/.cvsignore
new file mode 100644 (file)
index 0000000..01e2da8
--- /dev/null
@@ -0,0 +1,3 @@
+*.info*
+stamp-*
+version.texi
diff --git a/deps/lightning/doc/.gitignore b/deps/lightning/doc/.gitignore
new file mode 100644 (file)
index 0000000..f62c13f
--- /dev/null
@@ -0,0 +1,2 @@
+*.info*
+stamp-*
diff --git a/deps/lightning/doc/Makefile.am b/deps/lightning/doc/Makefile.am
new file mode 100644 (file)
index 0000000..20d4456
--- /dev/null
@@ -0,0 +1,45 @@
+#
+# Copyright 2012-2019 Free Software Foundation, Inc.
+#
+# This file is part of GNU lightning.
+#
+# GNU lightning is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published
+# by the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GNU lightning is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+
+AM_CFLAGS = -I$(top_srcdir)/include -D_GNU_SOURCE
+
+info_TEXINFOS = lightning.texi
+MOSTLYCLEANFILES = lightning.tmp
+
+lightning_TEXINFOS = body.texi version.texi
+
+noinst_PROGRAMS = incr printf rpn rfib ifib fact
+
+$(top_builddir)/lib/liblightning.la:
+       cd $(top_builddir)/lib; $(MAKE) $(AM_MAKEFLAGS) liblightning.la
+
+incr_LDADD = $(top_builddir)/lib/liblightning.la -lm $(SHLIB)
+incr_SOURCES = incr.c
+
+printf_LDADD = $(top_builddir)/lib/liblightning.la -lm $(SHLIB)
+printf_SOURCES = printf.c
+
+rpn_LDADD = $(top_builddir)/lib/liblightning.la -lm $(SHLIB)
+rpn_SOURCES = rpn.c
+
+rfib_LDADD = $(top_builddir)/lib/liblightning.la -lm $(SHLIB)
+rfib_SOURCES = rfib.c
+
+ifib_LDADD = $(top_builddir)/lib/liblightning.la -lm $(SHLIB)
+ifib_SOURCES = ifib.c
+
+fact_LDADD = $(top_builddir)/lib/liblightning.la -lm $(SHLIB)
+fact_SOURCES = fact.c
diff --git a/deps/lightning/doc/body.texi b/deps/lightning/doc/body.texi
new file mode 100644 (file)
index 0000000..4aef7a3
--- /dev/null
@@ -0,0 +1,1680 @@
+@ifnottex
+@dircategory Software development
+@direntry
+* lightning: (lightning).       Library for dynamic code generation.
+@end direntry
+@end ifnottex
+
+@ifnottex
+@node Top
+@top @lightning{}
+
+@iftex
+@macro comma
+@verbatim{|,|}
+@end macro
+@end iftex
+
+@ifnottex
+@macro comma
+@verb{|,|}
+@end macro
+@end ifnottex
+
+This document describes @value{TOPIC} the @lightning{} library for
+dynamic code generation.
+
+@menu
+* Overview::                What GNU lightning is
+* Installation::            Configuring and installing GNU lightning
+* The instruction set::     The RISC instruction set used in GNU lightning
+* GNU lightning examples::  GNU lightning's examples
+* Reentrancy::              Re-entrant usage of GNU lightning
+* Customizations::          Advanced code generation customizations
+* Acknowledgements::        Acknowledgements for GNU lightning
+@end menu
+@end ifnottex
+
+@node Overview
+@chapter Introduction to @lightning{}
+
+@iftex
+This document describes @value{TOPIC} the @lightning{} library for
+dynamic code generation.
+@end iftex
+
+Dynamic code generation is the generation of machine code 
+at runtime. It is typically used to strip a layer of interpretation 
+by allowing compilation to occur at runtime.  One of the most
+well-known applications of dynamic code generation is perhaps that
+of interpreters that compile source code to an intermediate bytecode
+form, which is then recompiled to machine code at run-time: this
+approach effectively combines the portability of bytecode
+representations with the speed of machine code.  Another common
+application of dynamic code generation is in the field of hardware
+simulators and binary emulators, which can use the same techniques
+to translate simulated instructions to the instructions of the 
+underlying machine.
+
+Yet other applications come to mind: for example, windowing
+@dfn{bitblt} operations, matrix manipulations, and network packet
+filters.  Albeit very powerful and relatively well known within the
+compiler community, dynamic code generation techniques are rarely
+exploited to their full potential and, with the exception of the
+two applications described above, have remained curiosities because
+of their portability and functionality barriers: binary instructions
+are generated, so programs using dynamic code generation must be
+retargeted for each machine; in addition, coding a run-time code
+generator is a tedious and error-prone task more than a difficult one.
+
+@lightning{} provides a portable, fast and easily retargetable dynamic
+code generation system. 
+
+To be portable, @lightning{} abstracts over current architectures'
+quirks and unorthogonalities.  The interface that it exposes to is that
+of a standardized RISC architecture loosely based on the SPARC and MIPS
+chips.  There are a few general-purpose registers (six, not including
+those used to receive and pass parameters between subroutines), and
+arithmetic operations involve three operands---either three registers
+or two registers and an arbitrarily sized immediate value.
+
+On one hand, this architecture is general enough that it is possible to
+generate pretty efficient code even on CISC architectures such as the
+Intel x86 or the Motorola 68k families.  On the other hand, it matches
+real architectures closely enough that, most of the time, the
+compiler's constant folding pass ends up generating code which
+assembles machine instructions without further tests.
+
+@node Installation
+@chapter Configuring and installing @lightning{}
+
+The first thing to do to use @lightning{} is to configure the
+program, picking the set of macros to be used on the host
+architecture; this configuration is automatically performed by
+the @file{configure} shell script; to run it, merely type:
+@example
+     ./configure
+@end example
+
+@lightning{} supports the @code{--enable-disassembler} option, that
+enables linking to GNU binutils and optionally print human readable
+disassembly of the jit code. This option can be disabled by the
+@code{--disable-disassembler} option.
+
+Another option that @file{configure} accepts is
+@code{--enable-assertions}, which enables several consistency checks in
+the run-time assemblers.  These are not usually needed, so you can
+decide to simply forget about it; also remember that these consistency
+checks tend to slow down your code generator.
+
+After you've configured @lightning{}, run @file{make} as usual.
+
+@lightning{} has an extensive set of tests to validate it is working
+correctly in the build host. To test it run:
+@example
+    make check
+@end example
+
+The next important step is:
+@example
+    make install
+@end example
+
+This ends the process of installing @lightning{}.
+
+@node The instruction set
+@chapter @lightning{}'s instruction set
+
+@lightning{}'s instruction set was designed by deriving instructions
+that closely match those of most existing RISC architectures, or
+that can be easily syntesized if absent.  Each instruction is composed
+of:
+@itemize @bullet
+@item
+an operation, like @code{sub} or @code{mul}
+
+@item
+most times, a register/immediate flag (@code{r} or @code{i})
+
+@item
+an unsigned modifier (@code{u}), a type identifier or two, when applicable.
+@end itemize
+
+Examples of legal mnemonics are @code{addr} (integer add, with three
+register operands) and @code{muli} (integer multiply, with two
+register operands and an immediate operand).  Each instruction takes
+two or three operands; in most cases, one of them can be an immediate
+value instead of a register.
+
+Most @lightning{} integer operations are signed wordsize operations,
+with the exception of operations that convert types, or load or store
+values to/from memory. When applicable, the types and C types are as
+follow:
+
+@example
+     _c         @r{signed char}
+     _uc        @r{unsigned char}
+     _s         @r{short}
+     _us        @r{unsigned short}
+     _i         @r{int}
+     _ui        @r{unsigned int}
+     _l         @r{long}
+     _f         @r{float}
+     _d         @r{double}
+@end example
+
+Most integer operations do not need a type modifier, and when loading or
+storing values to memory there is an alias to the proper operation
+using wordsize operands, that is, if ommited, the type is @r{int} on
+32-bit architectures and @r{long} on 64-bit architectures.  Note
+that lightning also expects @code{sizeof(void*)} to match the wordsize.
+
+When an unsigned operation result differs from the equivalent signed
+operation, there is a the @code{_u} modifier.
+
+There are at least seven integer registers, of which six are
+general-purpose, while the last is used to contain the frame pointer
+(@code{FP}).  The frame pointer can be used to allocate and access local
+variables on the stack, using the @code{allocai} or @code{allocar}
+instruction.
+
+Of the general-purpose registers, at least three are guaranteed to be
+preserved across function calls (@code{V0}, @code{V1} and
+@code{V2}) and at least three are not (@code{R0}, @code{R1} and
+@code{R2}).  Six registers are not very much, but this
+restriction was forced by the need to target CISC architectures
+which, like the x86, are poor of registers; anyway, backends can
+specify the actual number of available registers with the calls
+@code{JIT_R_NUM} (for caller-save registers) and @code{JIT_V_NUM}
+(for callee-save registers).
+
+There are at least six floating-point registers, named @code{F0} to
+@code{F5}.  These are usually caller-save and are separate from the integer
+registers on the supported architectures; on Intel architectures,
+in 32 bit mode if SSE2 is not available or use of X87 is forced,
+the register stack is mapped to a flat register file.  As for the
+integer registers, the macro @code{JIT_F_NUM} yields the number of
+floating-point registers.
+
+The complete instruction set follows; as you can see, most non-memory
+operations only take integers (either signed or unsigned) as operands;
+this was done in order to reduce the instruction set, and because most
+architectures only provide word and long word operations on registers.
+There are instructions that allow operands to be extended to fit a larger
+data type, both in a signed and in an unsigned way.
+
+@table @b
+@item Binary ALU operations
+These accept three operands; the last one can be an immediate.
+@code{addx} operations must directly follow @code{addc}, and
+@code{subx} must follow @code{subc}; otherwise, results are undefined.
+Most, if not all, architectures do not support @r{float} or @r{double}
+immediate operands; lightning emulates those operations by moving the
+immediate to a temporary register and emiting the call with only
+register operands.
+@example
+addr         _f  _d  O1 = O2 + O3
+addi         _f  _d  O1 = O2 + O3
+addxr                O1 = O2 + (O3 + carry)
+addxi                O1 = O2 + (O3 + carry)
+addcr                O1 = O2 + O3, set carry
+addci                O1 = O2 + O3, set carry
+subr         _f  _d  O1 = O2 - O3
+subi         _f  _d  O1 = O2 - O3
+subxr                O1 = O2 - (O3 + carry)
+subxi                O1 = O2 - (O3 + carry)
+subcr                O1 = O2 - O3, set carry
+subci                O1 = O2 - O3, set carry
+rsbr         _f  _d  O1 = O3 - O1
+rsbi         _f  _d  O1 = O3 - O1
+mulr         _f  _d  O1 = O2 * O3
+muli         _f  _d  O1 = O2 * O3
+divr     _u  _f  _d  O1 = O2 / O3
+divi     _u  _f  _d  O1 = O2 / O3
+remr     _u          O1 = O2 % O3
+remi     _u          O1 = O2 % O3
+andr                 O1 = O2 & O3
+andi                 O1 = O2 & O3
+orr                  O1 = O2 | O3
+ori                  O1 = O2 | O3
+xorr                 O1 = O2 ^ O3
+xori                 O1 = O2 ^ O3
+lshr                 O1 = O2 << O3
+lshi                 O1 = O2 << O3
+rshr     _u          O1 = O2 >> O3@footnote{The sign bit is propagated unless using the @code{_u} modifier.}
+rshi     _u          O1 = O2 >> O3@footnote{The sign bit is propagated unless using the @code{_u} modifier.}
+@end example
+
+@item Four operand binary ALU operations
+These accept two result registers, and two operands; the last one can
+be an immediate. The first two arguments cannot be the same register.
+
+@code{qmul} stores the low word of the result in @code{O1} and the
+high word in @code{O2}. For unsigned multiplication, @code{O2} zero
+means there was no overflow. For signed multiplication, no overflow
+check is based on sign, and can be detected if @code{O2} is zero or
+minus one.
+
+@code{qdiv} stores the quotient in @code{O1} and the remainder in
+@code{O2}. It can be used as quick way to check if a division is
+exact, in which case the remainder is zero.
+
+@example
+qmulr    _u       O1 O2 = O3 * O4
+qmuli    _u       O1 O2 = O3 * O4
+qdivr    _u       O1 O2 = O3 / O4
+qdivi    _u       O1 O2 = O3 / O4
+@end example
+
+@item Unary ALU operations
+These accept two operands, both of which must be registers.
+@example
+negr         _f  _d  O1 = -O2
+comr                 O1 = ~O2
+@end example
+
+These unary ALU operations are only defined for float operands.
+@example
+absr         _f  _d  O1 = fabs(O2)
+sqrtr                O1 = sqrt(O2)
+@end example
+
+Besides requiring the @code{r} modifier, there are no unary operations
+with an immediate operand.
+
+@item Compare instructions
+These accept three operands; again, the last can be an immediate.
+The last two operands are compared, and the first operand, that must be
+an integer register, is set to either 0 or 1, according to whether the
+given condition was met or not.
+
+The conditions given below are for the standard behavior of C,
+where the ``unordered'' comparison result is mapped to false.
+
+@example
+ltr       _u  _f  _d  O1 =  (O2 <  O3)
+lti       _u  _f  _d  O1 =  (O2 <  O3)
+ler       _u  _f  _d  O1 =  (O2 <= O3)
+lei       _u  _f  _d  O1 =  (O2 <= O3)
+gtr       _u  _f  _d  O1 =  (O2 >  O3)
+gti       _u  _f  _d  O1 =  (O2 >  O3)
+ger       _u  _f  _d  O1 =  (O2 >= O3)
+gei       _u  _f  _d  O1 =  (O2 >= O3)
+eqr           _f  _d  O1 =  (O2 == O3)
+eqi           _f  _d  O1 =  (O2 == O3)
+ner           _f  _d  O1 =  (O2 != O3)
+nei           _f  _d  O1 =  (O2 != O3)
+unltr         _f  _d  O1 = !(O2 >= O3)
+unler         _f  _d  O1 = !(O2 >  O3)
+ungtr         _f  _d  O1 = !(O2 <= O3)
+unger         _f  _d  O1 = !(O2 <  O3)
+uneqr         _f  _d  O1 = !(O2 <  O3) && !(O2 >  O3)
+ltgtr         _f  _d  O1 = !(O2 >= O3) || !(O2 <= O3)
+ordr          _f  _d  O1 =  (O2 == O2) &&  (O3 == O3)
+unordr        _f  _d  O1 =  (O2 != O2) ||  (O3 != O3)
+@end example
+
+@item Transfer operations
+These accept two operands; for @code{ext} both of them must be
+registers, while @code{mov} accepts an immediate value as the second
+operand.
+
+Unlike @code{movr} and @code{movi}, the other instructions are used
+to truncate a wordsize operand to a smaller integer data type or to
+convert float data types. You can also use @code{extr} to convert an
+integer to a floating point value: the usual options are @code{extr_f}
+and @code{extr_d}.
+
+@example
+movr                                 _f  _d  O1 = O2
+movi                                 _f  _d  O1 = O2
+extr      _c  _uc  _s  _us  _i  _ui  _f  _d  O1 = O2
+truncr                               _f  _d  O1 = trunc(O2)
+@end example
+
+In 64-bit architectures it may be required to use @code{truncr_f_i},
+@code{truncr_f_l}, @code{truncr_d_i} and @code{truncr_d_l} to match
+the equivalent C code.  Only the @code{_i} modifier is available in
+32-bit architectures.
+
+@example
+truncr_f_i    = <int> O1 = <float> O2
+truncr_f_l    = <long>O1 = <float> O2
+truncr_d_i    = <int> O1 = <double>O2
+truncr_d_l    = <long>O1 = <double>O2
+@end example
+
+The float conversion operations are @emph{destination first,
+source second}, but the order of the types is reversed.  This happens
+for historical reasons.
+
+@example
+extr_f_d    = <double>O1 = <float> O2
+extr_d_f    = <float> O1 = <double>O2
+@end example
+
+@item Network extensions
+These accept two operands, both of which must be registers; these
+two instructions actually perform the same task, yet they are
+assigned to two mnemonics for the sake of convenience and
+completeness.  As usual, the first operand is the destination and
+the second is the source.
+The @code{_ul} variant is only available in 64-bit architectures.
+@example
+htonr    _us _ui _ul @r{Host-to-network (big endian) order}
+ntohr    _us _ui _ul @r{Network-to-host order }
+@end example
+
+@item Load operations
+@code{ld} accepts two operands while @code{ldx} accepts three;
+in both cases, the last can be either a register or an immediate
+value. Values are extended (with or without sign, according to
+the data type specification) to fit a whole register.
+The @code{_ui} and @code{_l} types are only available in 64-bit
+architectures.  For convenience, there is a version without a
+type modifier for integer or pointer operands that uses the
+appropriate wordsize call.
+@example
+ldr     _c  _uc  _s  _us  _i  _ui  _l  _f  _d  O1 = *O2
+ldi     _c  _uc  _s  _us  _i  _ui  _l  _f  _d  O1 = *O2
+ldxr    _c  _uc  _s  _us  _i  _ui  _l  _f  _d  O1 = *(O2+O3)
+ldxi    _c  _uc  _s  _us  _i  _ui  _l  _f  _d  O1 = *(O2+O3)
+@end example
+
+@item Store operations
+@code{st} accepts two operands while @code{stx} accepts three; in
+both cases, the first can be either a register or an immediate
+value. Values are sign-extended to fit a whole register.
+@example
+str     _c  _uc  _s  _us  _i  _ui  _l  _f  _d  *O1 = O2
+sti     _c  _uc  _s  _us  _i  _ui  _l  _f  _d  *O1 = O2
+stxr    _c  _uc  _s  _us  _i  _ui  _l  _f  _d  *(O1+O2) = O3
+stxi    _c  _uc  _s  _us  _i  _ui  _l  _f  _d  *(O1+O2) = O3
+@end example
+As for the load operations, the @code{_ui} and @code{_l} types are
+only available in 64-bit architectures, and for convenience, there
+is a version without a type modifier for integer or pointer operands
+that uses the appropriate wordsize call.
+
+@item Argument management
+These are:
+@example
+prepare     (not specified)
+va_start    (not specified)
+pushargr                                   _f  _d
+pushargi                                   _f  _d
+va_push     (not specified)
+arg                                        _f  _d
+getarg      _c  _uc  _s  _us  _i  _ui  _l  _f  _d
+va_arg                                         _d
+putargr                                    _f  _d
+putargi                                    _f  _d
+ret         (not specified)
+retr                                       _f  _d
+reti                                       _f  _d
+va_end      (not specified)
+retval      _c  _uc  _s  _us  _i  _ui  _l  _f  _d
+epilog      (not specified)
+@end example
+As with other operations that use a type modifier, the @code{_ui} and
+@code{_l} types are only available in 64-bit architectures, but there
+are operations without a type modifier that alias to the appropriate
+integer operation with wordsize operands.
+
+@code{prepare}, @code{pusharg}, and @code{retval} are used by the caller,
+while @code{arg}, @code{getarg} and @code{ret} are used by the callee.
+A code snippet that wants to call another procedure and has to pass
+arguments must, in order: use the @code{prepare} instruction and use
+the @code{pushargr} or @code{pushargi} to push the arguments @strong{in
+left to right order}; and use @code{finish} or @code{call} (explained below)
+to perform the actual call.
+
+@code{va_start} returns a @code{C} compatible @code{va_list}. To fetch
+arguments, use @code{va_arg} for integers and @code{va_arg_d} for doubles.
+@code{va_push} is required when passing a @code{va_list} to another function,
+because not all architectures expect it as a single pointer. Known case
+is DEC Alpha, that requires it as a structure passed by value.
+
+@code{arg}, @code{getarg} and @code{putarg} are used by the callee.
+@code{arg} is different from other instruction in that it does not
+actually generate any code: instead, it is a function which returns
+a value to be passed to @code{getarg} or @code{putarg}. @footnote{``Return
+a value'' means that @lightning{} code that compile these
+instructions return a value when expanded.} You should call
+@code{arg} as soon as possible, before any function call or, more
+easily, right after the @code{prolog} instructions
+(which is treated later).
+
+@code{getarg} accepts a register argument and a value returned by
+@code{arg}, and will move that argument to the register, extending
+it (with or without sign, according to the data type specification)
+to fit a whole register.  These instructions are more intimately
+related to the usage of the @lightning{} instruction set in code
+that generates other code, so they will be treated more
+specifically in @ref{GNU lightning examples, , Generating code at
+run-time}.
+
+@code{putarg} is a mix of @code{getarg} and @code{pusharg} in that
+it accepts as first argument a register or immediate, and as
+second argument a value returned by @code{arg}. It allows changing,
+or restoring an argument to the current function, and is a
+construct required to implement tail call optimization. Note that
+arguments in registers are very cheap, but will be overwritten
+at any moment, including on some operations, for example division,
+that on several ports is implemented as a function call.
+
+Finally, the @code{retval} instruction fetches the return value of a
+called function in a register.  The @code{retval} instruction takes a
+register argument and copies the return value of the previously called
+function in that register.  A function with a return value should use
+@code{retr} or @code{reti} to put the return value in the return register
+before returning.  @xref{Fibonacci, the Fibonacci numbers}, for an example.
+
+@code{epilog} is an optional call, that marks the end of a function
+body. It is automatically generated by @lightning{} if starting a new
+function (what should be done after a @code{ret} call) or finishing
+generating jit.
+It is very important to note that the fact that @code{epilog} being
+optional may cause a common mistake. Consider this:
+@example
+fun1:
+    prolog
+    ...
+    ret
+fun2:
+    prolog
+@end example
+Because @code{epilog} is added when finding a new @code{prolog},
+this will cause the @code{fun2} label to actually be before the
+return from @code{fun1}. Because @lightning{} will actually
+understand it as:
+@example
+fun1:
+    prolog
+    ...
+    ret
+fun2:
+    epilog
+    prolog
+@end example
+
+You should observe a few rules when using these macros.  First of
+all, if calling a varargs function, you should use the @code{ellipsis}
+call to mark the position of the ellipsis in the C prototype.
+
+You should not nest calls to @code{prepare} inside a
+@code{prepare/finish} block.  Doing this will result in undefined
+behavior. Note that for functions with zero arguments you can use
+just @code{call}.
+
+@item Branch instructions
+Like @code{arg}, these also return a value which, in this case,
+is to be used to compile forward branches as explained in
+@ref{Fibonacci, , Fibonacci numbers}.  They accept two operands to be
+compared; of these, the last can be either a register or an immediate.
+They are:
+@example
+bltr      _u  _f  _d  @r{if }(O2 <  O3)@r{ goto }O1
+blti      _u  _f  _d  @r{if }(O2 <  O3)@r{ goto }O1
+bler      _u  _f  _d  @r{if }(O2 <= O3)@r{ goto }O1
+blei      _u  _f  _d  @r{if }(O2 <= O3)@r{ goto }O1
+bgtr      _u  _f  _d  @r{if }(O2 >  O3)@r{ goto }O1
+bgti      _u  _f  _d  @r{if }(O2 >  O3)@r{ goto }O1
+bger      _u  _f  _d  @r{if }(O2 >= O3)@r{ goto }O1
+bgei      _u  _f  _d  @r{if }(O2 >= O3)@r{ goto }O1
+beqr          _f  _d  @r{if }(O2 == O3)@r{ goto }O1
+beqi          _f  _d  @r{if }(O2 == O3)@r{ goto }O1
+bner          _f  _d  @r{if }(O2 != O3)@r{ goto }O1
+bnei          _f  _d  @r{if }(O2 != O3)@r{ goto }O1
+
+bunltr        _f  _d  @r{if }!(O2 >= O3)@r{ goto }O1
+bunler        _f  _d  @r{if }!(O2 >  O3)@r{ goto }O1
+bungtr        _f  _d  @r{if }!(O2 <= O3)@r{ goto }O1
+bunger        _f  _d  @r{if }!(O2 <  O3)@r{ goto }O1
+buneqr        _f  _d  @r{if }!(O2 <  O3) && !(O2 >  O3)@r{ goto }O1
+bltgtr        _f  _d  @r{if }!(O2 >= O3) || !(O2 <= O3)@r{ goto }O1
+bordr         _f  _d  @r{if } (O2 == O2) &&  (O3 == O3)@r{ goto }O1
+bunordr       _f  _d  @r{if }!(O2 != O2) ||  (O3 != O3)@r{ goto }O1
+
+bmsr                  @r{if }O2 &  O3@r{ goto }O1
+bmsi                  @r{if }O2 &  O3@r{ goto }O1
+bmcr                  @r{if }!(O2 & O3)@r{ goto }O1
+bmci                  @r{if }!(O2 & O3)@r{ goto }O1@footnote{These mnemonics mean, respectively, @dfn{branch if mask set} and @dfn{branch if mask cleared}.}
+boaddr    _u          O2 += O3@r{, goto }O1@r{ if overflow}
+boaddi    _u          O2 += O3@r{, goto }O1@r{ if overflow}
+bxaddr    _u          O2 += O3@r{, goto }O1@r{ if no overflow}
+bxaddi    _u          O2 += O3@r{, goto }O1@r{ if no overflow}
+bosubr    _u          O2 -= O3@r{, goto }O1@r{ if overflow}
+bosubi    _u          O2 -= O3@r{, goto }O1@r{ if overflow}
+bxsubr    _u          O2 -= O3@r{, goto }O1@r{ if no overflow}
+bxsubi    _u          O2 -= O3@r{, goto }O1@r{ if no overflow}
+@end example
+
+@item Jump and return operations
+These accept one argument except @code{ret} and @code{jmpi} which
+have none; the difference between @code{finishi} and @code{calli}
+is that the latter does not clean the stack from pushed parameters
+(if any) and the former must @strong{always} follow a @code{prepare}
+instruction.
+@example
+callr     (not specified)                @r{function call to register O1}
+calli     (not specified)                @r{function call to immediate O1}
+finishr   (not specified)                @r{function call to register O1}
+finishi   (not specified)                @r{function call to immediate O1}
+jmpr      (not specified)                @r{unconditional jump to register}
+jmpi      (not specified)                @r{unconditional jump}
+ret       (not specified)                @r{return from subroutine}
+retr      _c _uc _s _us _i _ui _l _f _d
+reti      _c _uc _s _us _i _ui _l _f _d
+retval    _c _uc _s _us _i _ui _l _f _d  @r{move return value}
+                                         @r{to register}
+@end example
+
+Like branch instruction, @code{jmpi} also returns a value which is to
+be used to compile forward branches. @xref{Fibonacci, , Fibonacci
+numbers}.
+
+@item Labels
+There are 3 @lightning{} instructions to create labels:
+@example
+label     (not specified)                @r{simple label}
+forward   (not specified)                @r{forward label}
+indirect  (not specified)                @r{special simple label}
+@end example
+
+@code{label} is normally used as @code{patch_at} argument for backward
+jumps.
+
+@example
+        jit_node_t *jump, *label;
+label = jit_label();
+        ...
+        jump = jit_beqr(JIT_R0, JIT_R1);
+        jit_patch_at(jump, label);
+@end example
+
+@code{forward} is used to patch code generation before the actual
+position of the label is known.
+
+@example
+        jit_node_t *jump, *label;
+label = jit_forward();
+        jump = jit_beqr(JIT_R0, JIT_R1);
+        jit_patch_at(jump, label);
+        ...
+        jit_link(label);
+@end example
+
+@code{indirect} is useful when creating jump tables, and tells
+@lightning{} to not optimize out a label that is not the target of
+any jump, because an indirect jump may land where it is defined.
+
+@example
+        jit_node_t *jump, *label;
+        ...
+        jmpr(JIT_R0);                    @rem{/* may jump to label */}
+        ...
+label = jit_indirect();
+@end example
+
+@code{indirect} is an special case of @code{note} and @code{name}
+because it is a valid argument to @code{address}.
+
+Note that the usual idiom to write the previous example is
+@example
+        jit_node_t *addr, *jump;
+addr  = jit_movi(JIT_R0, 0);             @rem{/* immediate is ignored */}
+        ...
+        jmpr(JIT_R0);
+        ...
+        jit_patch(addr);                 @rem{/* implicit label added */}
+@end example
+
+that automatically binds the implicit label added by @code{patch} with
+the @code{movi}, but on some special conditions it is required to create
+an "unbound" label.
+
+@item Function prolog
+
+These macros are used to set up a function prolog.  The @code{allocai}
+call accept a single integer argument and returns an offset value
+for stack storage access.  The @code{allocar} accepts two registers
+arguments, the first is set to the offset for stack access, and the
+second is the size in bytes argument.
+
+@example
+prolog    (not specified)                @r{function prolog}
+allocai   (not specified)                @r{reserve space on the stack}
+allocar   (not specified)                @r{allocate space on the stack}
+@end example
+
+@code{allocai} receives the number of bytes to allocate and returns
+the offset from the frame pointer register @code{FP} to the base of
+the area.
+
+@code{allocar} receives two register arguments.  The first is where
+to store the offset from the frame pointer register @code{FP} to the
+base of the area.  The second argument is the size in bytes.  Note
+that @code{allocar} is dynamic allocation, and special attention
+should be taken when using it.  If called in a loop, every iteration
+will allocate stack space.  Stack space is aligned from 8 to 64 bytes
+depending on backend requirements, even if allocating only one byte.
+It is advisable to not use it with @code{frame} and @code{tramp}; it
+should work with @code{frame} with special care to call only once,
+but is not supported if used in @code{tramp}, even if called only
+once.
+
+As a small appetizer, here is a small function that adds 1 to the input
+parameter (an @code{int}).  I'm using an assembly-like syntax here which
+is a bit different from the one used when writing real subroutines with
+@lightning{}; the real syntax will be introduced in @xref{GNU lightning
+examples, , Generating code at run-time}.
+
+@example
+incr:
+     prolog
+in = arg                     @rem{! We have an integer argument}
+     getarg    R0, in        @rem{! Move it to R0}
+     addi      R0, R0, 1     @rem{! Add 1}
+     retr      R0            @rem{! And return the result}
+@end example
+
+And here is another function which uses the @code{printf} function from
+the standard C library to write a number in hexadecimal notation:
+
+@example
+printhex:
+     prolog
+in = arg                     @rem{! Same as above}
+     getarg    R0, in
+     prepare                 @rem{! Begin call sequence for printf}
+     pushargi  "%x"          @rem{! Push format string}
+     ellipsis                @rem{! Varargs start here}
+     pushargr  R0            @rem{! Push second argument}
+     finishi   printf        @rem{! Call printf}
+     ret                     @rem{! Return to caller}
+@end example
+
+@item Trampolines, continuations and tail call optimization
+
+Frequently it is required to generate jit code that must jump to
+code generated later, possibly from another @code{jit_context_t}.
+These require compatible stack frames.
+
+@lightning{} provides two primitives from where trampolines,
+continuations and tail call optimization can be implemented.
+
+@example
+frame   (not specified)                  @r{create stack frame}
+tramp   (not specified)                  @r{assume stack frame}
+@end example
+
+@code{frame} receives an integer argument@footnote{It is not
+automatically computed because it does not know about the
+requirement of later generated code.} that defines the size in
+bytes for the stack frame of the current, @code{C} callable,
+jit function. To calculate this value, a good formula is maximum
+number of arguments to any called native function times
+eight@footnote{Times eight so that it works for double arguments.
+And would not need conditionals for ports that pass arguments in
+the stack.}, plus the sum of the arguments to any call to
+@code{jit_allocai}. @lightning{} automatically adjusts this value
+for any backend specific stack memory it may need, or any
+alignment constraint.
+
+@code{frame} also instructs @lightning{} to save all callee
+save registers in the prolog and reload in the epilog.
+
+@example
+main:                        @rem{! jit entry point}
+     prolog                  @rem{! function prolog}
+     frame  256              @rem{! save all callee save registers and}
+                             @rem{! reserve at least 256 bytes in stack}
+main_loop:
+     ...
+     jmpi   handler          @rem{! jumps to external code}
+     ...
+     ret                     @rem{! return to the caller}
+@end example
+
+@code{tramp} differs from @code{frame} only that a prolog and epilog
+will not be generated. Note that @code{prolog} must still be used.
+The code under @code{tramp} must be ready to be entered with a jump
+at the prolog position, and instead of a return, it must end with
+a non conditional jump. @code{tramp} exists solely for the fact
+that it allows optimizing out prolog and epilog code that would
+never be executed.
+
+@example
+handler:                     @rem{! handler entry point}
+     prolog                  @rem{! function prolog}
+     tramp  256              @rem{! assumes all callee save registers}
+                             @rem{! are saved and there is at least}
+                             @rem{! 256 bytes in stack}
+     ...
+     jmpi   main_loop        @rem{! return to the main loop}
+@end example
+
+@lightning{} only supports Tail Call Optimization using the
+@code{tramp} construct. Any other way is not guaranteed to
+work on all ports.
+
+An example of a simple (recursive) tail call optimization:
+
+@example
+factorial:                   @rem{! Entry point of the factorial function}
+     prolog
+in = arg                     @rem{! Receive an integer argument}
+     getarg R0, in           @rem{! Move argument to RO}
+     prepare
+         pushargi 1          @rem{! This is the accumulator}
+         pushargr R0         @rem{! This is the argument}
+     finishi fact            @rem{! Call the tail call optimized function}
+     retval R0               @rem{! Fetch the result}
+     retr R0                 @rem{! Return it}
+     epilog                  @rem{! Epilog *before* label before prolog}
+
+fact:                        @rem{! Entry point of the helper function}
+     prolog
+     frame 16                @rem{! Reserve 16 bytes in the stack}
+fact_entry:                  @rem{! This is the tail call entry point}
+ac = arg                     @rem{! The accumulator is the first argument}
+in = arg                     @rem{! The factorial argument}
+     getarg R0, ac           @rem{! Move the accumulator to R0}
+     getarg R1, in           @rem{! Move the argument to R1}
+     blei fact_out, R1, 1    @rem{! Done if argument is one or less}
+     mulr R0, R0, R1         @rem{! accumulator *= argument}
+     putargr R0, ac          @rem{! Update the accumulator}
+     subi R1, R1, 1          @rem{! argument -= 1}
+     putargr R1, in          @rem{! Update the argument}
+     jmpi fact_entry         @rem{! Tail Call Optimize it!}
+fact_out:
+     retr R0                 @rem{! Return the accumulator}
+@end example
+
+@item Predicates
+@example
+forward_p      (not specified)           @r{forward label predicate}
+indirect_p     (not specified)           @r{indirect label predicate}
+target_p       (not specified)           @r{used label predicate}
+arg_register_p (not specified)           @r{argument kind predicate}
+callee_save_p  (not specified)           @r{callee save predicate}
+pointer_p      (not specified)           @r{pointer predicate}
+@end example
+
+@code{forward_p} expects a @code{jit_node_t*} argument, and
+returns non zero if it is a forward label reference, that is,
+a label returned by @code{forward}, that still needs a
+@code{link} call.
+
+@code{indirect_p} expects a @code{jit_node_t*} argument, and returns
+non zero if it is an indirect label reference, that is, a label that
+was returned by @code{indirect}.
+
+@code{target_p} expects a @code{jit_node_t*} argument, that is any
+kind of label, and will return non zero if there is at least one
+jump or move referencing it.
+
+@code{arg_register_p} expects a @code{jit_node_t*} argument, that must
+have been returned by @code{arg}, @code{arg_f} or @code{arg_d}, and
+will return non zero if the argument lives in a register. This call
+is useful to know the live range of register arguments, as those
+are very fast to read and write, but have volatile values.
+
+@code{callee_save_p} exects a valid @code{JIT_Rn}, @code{JIT_Vn}, or
+@code{JIT_Fn}, and will return non zero if the register is callee
+save. This call is useful because on several ports, the @code{JIT_Rn}
+and @code{JIT_Fn} registers are actually callee save; no need
+to save and load the values when making function calls.
+
+@code{pointer_p} expects a pointer argument, and will return non
+zero if the pointer is inside the generated jit code. Must be
+called after @code{jit_emit} and before @code{jit_destroy_state}.
+@end table
+
+@node GNU lightning examples
+@chapter Generating code at run-time
+
+To use @lightning{}, you should include the @file{lightning.h} file that
+is put in your include directory by the @samp{make install} command.
+
+Each of the instructions above translates to a macro or function call.
+All you have to do is prepend @code{jit_} (lowercase) to opcode names
+and @code{JIT_} (uppercase) to register names.  Of course, parameters
+are to be put between parentheses.
+
+This small tutorial presents three examples:
+
+@iftex
+@itemize @bullet
+@item
+The @code{incr} function found in @ref{The instruction set, ,
+@lightning{}'s instruction set}:
+
+@item
+A simple function call to @code{printf}
+
+@item
+An RPN calculator.
+
+@item
+Fibonacci numbers
+@end itemize
+@end iftex
+@ifnottex
+@menu
+* incr::             A function which increments a number by one
+* printf::           A simple function call to printf
+* RPN calculator::   A more complex example, an RPN calculator
+* Fibonacci::        Calculating Fibonacci numbers
+@end menu
+@end ifnottex
+
+@node incr
+@section A function which increments a number by one
+
+Let's see how to create and use the sample @code{incr} function created
+in @ref{The instruction set, , @lightning{}'s instruction set}:
+
+@example
+#include <stdio.h>
+#include <lightning.h>
+
+static jit_state_t *_jit;
+
+typedef int (*pifi)(int);    @rem{/* Pointer to Int Function of Int */}
+
+int main(int argc, char *argv[])
+@{
+  jit_node_t  *in;
+  pifi         incr;
+
+  init_jit(argv[0]);
+  _jit = jit_new_state();
+
+  jit_prolog();                    @rem{/* @t{     prolog             } */}
+  in = jit_arg();                  @rem{/* @t{     in = arg           } */}
+  jit_getarg(JIT_R0, in);          @rem{/* @t{     getarg R0          } */}
+  jit_addi(JIT_R0, JIT_R0, 1);     @rem{/* @t{     addi   R0@comma{} R0@comma{} 1   } */}
+  jit_retr(JIT_R0);                @rem{/* @t{     retr   R0          } */}
+
+  incr = jit_emit();
+  jit_clear_state();
+
+  @rem{/* call the generated code@comma{} passing 5 as an argument */}
+  printf("%d + 1 = %d\n", 5, incr(5));
+
+  jit_destroy_state();
+  finish_jit();
+  return 0;
+@}
+@end example
+
+Let's examine the code line by line (well, almost@dots{}):
+
+@table @t
+@item #include <lightning.h>
+You already know about this.  It defines all of @lightning{}'s macros.
+
+@item static jit_state_t *_jit;
+You might wonder about what is @code{jit_state_t}.  It is a structure
+that stores jit code generation information.  The name @code{_jit} is
+special, because since multiple jit generators can run at the same
+time, you must either @r{#define _jit my_jit_state} or name it
+@code{_jit}.
+
+@item typedef int (*pifi)(int);
+Just a handy typedef for a pointer to a function that takes an
+@code{int} and returns another.
+
+@item jit_node_t  *in;
+Declares a variable to hold an identifier for a function argument. It
+is an opaque pointer, that will hold the return of a call to @code{arg}
+and be used as argument to @code{getarg}.
+
+@item pifi         incr;
+Declares a function pointer variable to a function that receives an
+@code{int} and returns an @code{int}.
+
+@item init_jit(argv[0]);
+You must call this function before creating a @code{jit_state_t}
+object. This function does global state initialization, and may need
+to detect CPU or Operating System features.  It receives a string
+argument that is later used to read symbols from a shared object using
+GNU binutils if disassembly was enabled at configure time. If no
+disassembly will be performed a NULL pointer can be used as argument.
+
+@item _jit = jit_new_state();
+This call initializes a @lightning{} jit state.
+
+@item jit_prolog();
+Ok, so we start generating code for our beloved function@dots{}
+
+@item in = jit_arg();
+@itemx jit_getarg(JIT_R0, in);
+We retrieve the first (and only) argument, an integer, and store it
+into the general-purpose register @code{R0}.
+
+@item jit_addi(JIT_R0, JIT_R0, 1);
+We add one to the content of the register.
+
+@item jit_retr(JIT_R0);
+This instruction generates a standard function epilog that returns
+the contents of the @code{R0} register.
+
+@item incr = jit_emit();
+This instruction is very important.  It actually translates the
+@lightning{} macros used before to machine code, flushes the generated
+code area out of the processor's instruction cache and return a
+pointer to the start of the code.
+
+@item jit_clear_state();
+This call cleanups any data not required for jit execution. Note
+that it must be called after any call to @code{jit_print} or
+@code{jit_address}, as this call destroy the @lightning{}
+intermediate representation.
+
+@item printf("%d + 1 = %d", 5, incr(5));
+Calling our function is this simple---it is not distinguishable from
+a normal C function call, the only difference being that @code{incr}
+is a variable.
+
+@item jit_destroy_state();
+Releases all memory associated with the jit context. It should be
+called after known the jit will no longer be called.
+
+@item finish_jit();
+This call cleanups any global state hold by @lightning{}, and is
+advisable to call it once jit code will no longer be generated.
+@end table
+
+@lightning{} abstracts two phases of dynamic code generation: selecting
+instructions that map the standard representation, and emitting binary
+code for these instructions.  The client program has the responsibility
+of describing the code to be generated using the standard @lightning{}
+instruction set.
+
+Let's examine the code generated for @code{incr} on the SPARC and x86_64
+architecture (on the right is the code that an assembly-language
+programmer would write):
+
+@table @b
+@item SPARC
+@example
+      save  %sp, -112, %sp
+      mov  %i0, %g2                 retl
+      inc  %g2                      inc %o0
+      mov  %g2, %i0
+      restore 
+      retl 
+      nop 
+@end example
+In this case, @lightning{} introduces overhead to create a register
+window (not knowing that the procedure is a leaf procedure) and to
+move the argument to the general purpose register @code{R0} (which
+maps to @code{%g2} on the SPARC).
+@end table
+
+@table @b
+@item x86_64
+@example
+    sub   $0x30,%rsp
+    mov   %rbp,(%rsp)
+    mov   %rsp,%rbp
+    sub   $0x18,%rsp
+    mov   %rdi,%rax            mov %rdi, %rax
+    add   $0x1,%rax            inc %rax
+    mov   %rbp,%rsp
+    mov   (%rsp),%rbp
+    add   $0x30,%rsp
+    retq                       retq
+@end example
+In this case, the main overhead is due to the function's prolog and
+epilog, and stack alignment after reserving stack space for word
+to/from float conversions or moving data from/to x87 to/from SSE.
+Note that besides allocating space to save callee saved registers,
+no registers are saved/restored because @lightning{} notices those
+registers are not modified. There is currently no logic to detect
+if it needs to allocate stack space for type conversions neither
+proper leaf function detection, but these are subject to change
+(FIXME).
+@end table
+
+@node printf
+@section A simple function call to @code{printf}
+
+Again, here is the code for the example:
+
+@example
+#include <stdio.h>
+#include <lightning.h>
+
+static jit_state_t *_jit;
+
+typedef void (*pvfi)(int);      @rem{/* Pointer to Void Function of Int */}
+
+int main(int argc, char *argv[])
+@{
+  pvfi          myFunction;             @rem{/* ptr to generated code */}
+  jit_node_t    *start, *end;           @rem{/* a couple of labels */}
+  jit_node_t    *in;                    @rem{/* to get the argument */}
+
+  init_jit(argv[0]);
+  _jit = jit_new_state();
+
+  start = jit_note(__FILE__, __LINE__);
+  jit_prolog();
+  in = jit_arg();
+  jit_getarg(JIT_R1, in);
+  jit_prepare();
+  jit_pushargi((jit_word_t)"generated %d bytes\n");
+  jit_ellipsis();
+  jit_pushargr(JIT_R1);
+  jit_finishi(printf);
+  jit_ret();
+  jit_epilog();
+  end = jit_note(__FILE__, __LINE__);
+
+  myFunction = jit_emit();
+
+  @rem{/* call the generated code@comma{} passing its size as argument */}
+  myFunction((char*)jit_address(end) - (char*)jit_address(start));
+  jit_clear_state();
+
+  jit_disassemble();
+
+  jit_destroy_state();
+  finish_jit();
+  return 0;
+@}
+@end example
+
+The function shows how many bytes were generated.  Most of the code
+is not very interesting, as it resembles very closely the program
+presented in @ref{incr, , A function which increments a number by one}.
+
+For this reason, we're going to concentrate on just a few statements.
+
+@table @t
+@item start = jit_note(__FILE__, __LINE__);
+@itemx @r{@dots{}}
+@itemx end = jit_note(__FILE__, __LINE__);
+These two instruction call the @code{jit_note} macro, which creates
+a note in the jit code; arguments to @code{jit_note} usually are a
+filename string and line number integer, but using NULL for the
+string argument is perfectly valid if only need to create a simple
+marker in the code.
+
+@item jit_ellipsis();
+@code{ellipsis} usually is only required if calling varargs functions
+with double arguments, but it is a good practice to properly describe
+the @r{@dots{}} in the call sequence.
+
+@item jit_pushargi((jit_word_t)"generated %d bytes\n");
+Note the use of the @code{(jit_word_t)} cast, that is used only
+to avoid a compiler warning, due to using a pointer where a
+wordsize integer type was expected.
+
+@item jit_prepare();
+@itemx @r{@dots{}}
+@itemx jit_finishi(printf);
+Once the arguments to @code{printf} have been pushed, what means
+moving them to stack or register arguments, the @code{printf}
+function is called and the stack cleaned.  Note how @lightning{}
+abstracts the differences between different architectures and
+ABI's -- the client program does not know how parameter passing
+works on the host architecture.
+
+@item jit_epilog();
+Usually it is not required to call @code{epilog}, but because it
+is implicitly called when noticing the end of a function, if the
+@code{end} variable was set with a @code{note} call after the
+@code{ret}, it would not consider the function epilog.
+
+@item myFunction((char*)jit_address(end) - (char*)jit_address(start));
+This calls the generate jit function passing as argument the offset
+difference from the @code{start} and @code{end} notes. The @code{address}
+call must be done after the @code{emit} call or either a fatal error
+will happen (if @lightning{} is built with assertions enable) or an
+undefined value will be returned.
+
+@item jit_clear_state();
+Note that @code{jit_clear_state} was called after executing jit in
+this example. It was done because it must be called after any call
+to @code{jit_address} or @code{jit_print}.
+
+@item jit_disassemble();
+@code{disassemble} will dump the generated code to standard output,
+unless @lightning{} was built with the disassembler disabled, in which
+case no output will be shown.
+@end table
+
+@node RPN calculator
+@section A more complex example, an RPN calculator
+
+We create a small stack-based RPN calculator which applies a series
+of operators to a given parameter and to other numeric operands.
+Unlike previous examples, the code generator is fully parameterized
+and is able to compile different formulas to different functions.
+Here is the code for the expression compiler; a sample usage will
+follow.
+
+Since @lightning{} does not provide push/pop instruction, this
+example uses a stack-allocated area to store the data.  Such an
+area can be allocated using the macro @code{allocai}, which
+receives the number of bytes to allocate and returns the offset
+from the frame pointer register @code{FP} to the base of the
+area.
+
+Usually, you will use the @code{ldxi} and @code{stxi} instruction
+to access stack-allocated variables.  However, it is possible to
+use operations such as @code{add} to compute the address of the
+variables, and pass the address around.
+
+@example
+#include <stdio.h>
+#include <lightning.h>
+
+typedef int (*pifi)(int);       @rem{/* Pointer to Int Function of Int */}
+
+static jit_state_t *_jit;
+
+void stack_push(int reg, int *sp)
+@{
+  jit_stxi_i (*sp, JIT_FP, reg);
+  *sp += sizeof (int);
+@}
+
+void stack_pop(int reg, int *sp)
+@{
+  *sp -= sizeof (int);
+  jit_ldxi_i (reg, JIT_FP, *sp);
+@}
+
+jit_node_t *compile_rpn(char *expr)
+@{
+  jit_node_t *in, *fn;
+  int stack_base, stack_ptr;
+
+  fn = jit_note(NULL, 0);
+  jit_prolog();
+  in = jit_arg();
+  stack_ptr = stack_base = jit_allocai (32 * sizeof (int));
+
+  jit_getarg_i(JIT_R2, in);
+
+  while (*expr) @{
+    char buf[32];
+    int n;
+    if (sscanf(expr, "%[0-9]%n", buf, &n)) @{
+      expr += n - 1;
+      stack_push(JIT_R0, &stack_ptr);
+      jit_movi(JIT_R0, atoi(buf));
+    @} else if (*expr == 'x') @{
+      stack_push(JIT_R0, &stack_ptr);
+      jit_movr(JIT_R0, JIT_R2);
+    @} else if (*expr == '+') @{
+      stack_pop(JIT_R1, &stack_ptr);
+      jit_addr(JIT_R0, JIT_R1, JIT_R0);
+    @} else if (*expr == '-') @{
+      stack_pop(JIT_R1, &stack_ptr);
+      jit_subr(JIT_R0, JIT_R1, JIT_R0);
+    @} else if (*expr == '*') @{
+      stack_pop(JIT_R1, &stack_ptr);
+      jit_mulr(JIT_R0, JIT_R1, JIT_R0);
+    @} else if (*expr == '/') @{
+      stack_pop(JIT_R1, &stack_ptr);
+      jit_divr(JIT_R0, JIT_R1, JIT_R0);
+    @} else @{
+      fprintf(stderr, "cannot compile: %s\n", expr);
+      abort();
+    @}
+    ++expr;
+  @}
+  jit_retr(JIT_R0);
+  jit_epilog();
+  return fn;
+@}
+@end example
+
+The principle on which the calculator is based is easy: the stack top
+is held in R0, while the remaining items of the stack are held in the
+memory area that we allocate with @code{allocai}.  Compiling a numeric
+operand or the argument @code{x} pushes the old stack top onto the
+stack and moves the operand into R0; compiling an operator pops the
+second operand off the stack into R1, and compiles the operation so
+that the result goes into R0, thus becoming the new stack top.
+
+This example allocates a fixed area for 32 @code{int}s.  This is not
+a problem when the function is a leaf like in this case; in a full-blown
+compiler you will want to analyze the input and determine the number
+of needed stack slots---a very simple example of register allocation.
+The area is then managed like a stack using @code{stack_push} and
+@code{stack_pop}.
+
+Source code for the client (which lies in the same source file) follows:
+
+@example
+int main(int argc, char *argv[])
+@{
+  jit_node_t *nc, *nf;
+  pifi c2f, f2c;
+  int i;
+
+  init_jit(argv[0]);
+  _jit = jit_new_state();
+
+  nc = compile_rpn("32x9*5/+");
+  nf = compile_rpn("x32-5*9/");
+  (void)jit_emit();
+  c2f = (pifi)jit_address(nc);
+  f2c = (pifi)jit_address(nf);
+  jit_clear_state();
+
+  printf("\nC:");
+  for (i = 0; i <= 100; i += 10) printf("%3d ", i);
+  printf("\nF:");
+  for (i = 0; i <= 100; i += 10) printf("%3d ", c2f(i));
+  printf("\n");
+
+  printf("\nF:");
+  for (i = 32; i <= 212; i += 18) printf("%3d ", i);
+  printf("\nC:");
+  for (i = 32; i <= 212; i += 18) printf("%3d ", f2c(i));
+  printf("\n");
+
+  jit_destroy_state();
+  finish_jit();
+  return 0;
+@}
+@end example
+
+The client displays a conversion table between Celsius and Fahrenheit
+degrees (both Celsius-to-Fahrenheit and Fahrenheit-to-Celsius). The
+formulas are, @math{F(c) = c*9/5+32} and @math{C(f) = (f-32)*5/9},
+respectively.
+
+Providing the formula as an argument to @code{compile_rpn} effectively
+parameterizes code generation, making it possible to use the same code
+to compile different functions; this is what makes dynamic code
+generation so powerful.
+
+@node Fibonacci
+@section Fibonacci numbers
+
+The code in this section calculates the Fibonacci sequence. That is
+modeled by the recurrence relation:
+@display
+     f(0) = 0
+     f(1) = f(2) = 1
+     f(n) = f(n-1) + f(n-2)
+@end display
+
+The purpose of this example is to introduce branches.  There are two
+kind of branches: backward branches and forward branches.  We'll
+present the calculation in a recursive and iterative form; the
+former only uses forward branches, while the latter uses both.
+
+@example
+#include <stdio.h>
+#include <lightning.h>
+
+static jit_state_t *_jit;
+
+typedef int (*pifi)(int);       @rem{/* Pointer to Int Function of Int */}
+
+int main(int argc, char *argv[])
+@{
+  pifi       fib;
+  jit_node_t *label;
+  jit_node_t *call;
+  jit_node_t *in;                 @rem{/* offset of the argument */}
+  jit_node_t *ref;                @rem{/* to patch the forward reference */}
+  jit_node_t *zero;               @rem{/* to patch the forward reference */}
+
+  init_jit(argv[0]);
+  _jit = jit_new_state();
+
+  label = jit_label();
+        jit_prolog   ();
+  in =  jit_arg      ();
+        jit_getarg   (JIT_V0, in);              @rem{/* R0 = n */}
+ zero = jit_beqi     (JIT_R0, 0);
+        jit_movr     (JIT_V0, JIT_R0);          /* V0 = R0 */
+        jit_movi     (JIT_R0, 1);
+  ref = jit_blei     (JIT_V0, 2);
+        jit_subi     (JIT_V1, JIT_V0, 1);       @rem{/* V1 = n-1 */}
+        jit_subi     (JIT_V2, JIT_V0, 2);       @rem{/* V2 = n-2 */}
+        jit_prepare();
+          jit_pushargr(JIT_V1);
+        call = jit_finishi(NULL);
+        jit_patch_at(call, label);
+        jit_retval(JIT_V1);                     @rem{/* V1 = fib(n-1) */}
+        jit_prepare();
+          jit_pushargr(JIT_V2);
+        call = jit_finishi(NULL);
+        jit_patch_at(call, label);
+        jit_retval(JIT_R0);                     @rem{/* R0 = fib(n-2) */}
+        jit_addr(JIT_R0, JIT_R0, JIT_V1);       @rem{/* R0 = R0 + V1 */}
+
+  jit_patch(ref);                               @rem{/* patch jump */}
+  jit_patch(zero);                              @rem{/* patch jump */}
+        jit_retr(JIT_R0);
+
+  @rem{/* call the generated code@comma{} passing 32 as an argument */}
+  fib = jit_emit();
+  jit_clear_state();
+  printf("fib(%d) = %d\n", 32, fib(32));
+  jit_destroy_state();
+  finish_jit();
+  return 0;
+@}
+@end example
+
+As said above, this is the first example of dynamically compiling
+branches.  Branch instructions have two operands containing the
+values to be compared, and return a @code{jit_note_t *} object
+to be patched.
+
+Because labels final address are only known after calling @code{emit},
+it is required to call @code{patch} or @code{patch_at}, what does
+tell @lightning{} that the target to patch is actually a pointer to
+a @code{jit_node_t *} object, otherwise, it would assume that is
+a pointer to a C function. Note that conditional branches do not
+receive a label argument, so they must be patched.
+
+You need to call @code{patch_at} on the return of value @code{calli},
+@code{finishi}, and @code{calli} if it is actually referencing a label
+in the jit code. All branch instructions do not receive a label
+argument. Note that @code{movi} is an special case, and patching it
+is usually done to get the final address of a label, usually to later
+call @code{jmpr}.
+
+Now, here is the iterative version:
+
+@example
+#include <stdio.h>
+#include <lightning.h>
+
+static jit_state_t *_jit;
+
+typedef int (*pifi)(int);       @rem{/* Pointer to Int Function of Int */}
+
+int main(int argc, char *argv[])
+@{
+  pifi       fib;
+  jit_node_t *in;               @rem{/* offset of the argument */}
+  jit_node_t *ref;              @rem{/* to patch the forward reference */}
+  jit_node_t *zero;             @rem{/* to patch the forward reference */}
+  jit_node_t *jump;             @rem{/* jump to start of loop */}
+  jit_node_t *loop;             @rem{/* start of the loop */}
+
+  init_jit(argv[0]);
+  _jit = jit_new_state();
+
+        jit_prolog   ();
+  in =  jit_arg      ();
+        jit_getarg   (JIT_R0, in);              @rem{/* R0 = n */}
+ zero = jit_beqi     (JIT_R0, 0);
+        jit_movr     (JIT_R1, JIT_R0);
+        jit_movi     (JIT_R0, 1);
+  ref = jit_blti     (JIT_R1, 2);
+        jit_subi     (JIT_R2, JIT_R2, 2);
+        jit_movr     (JIT_R1, JIT_R0);
+
+  loop= jit_label();
+        jit_subi     (JIT_R2, JIT_R2, 1);       @rem{/* decr. counter */}
+        jit_movr     (JIT_V0, JIT_R0);          /* V0 = R0 */
+        jit_addr     (JIT_R0, JIT_R0, JIT_R1);  /* R0 = R0 + R1 */
+        jit_movr     (JIT_R1, JIT_V0);          /* R1 = V0 */
+  jump= jit_bnei     (JIT_R2, 0);               /* if (R2) goto loop; */
+  jit_patch_at(jump, loop);
+
+  jit_patch(ref);                               @rem{/* patch forward jump */}
+  jit_patch(zero);                              @rem{/* patch forward jump */}
+        jit_retr     (JIT_R0);
+
+  @rem{/* call the generated code@comma{} passing 36 as an argument */}
+  fib = jit_emit();
+  jit_clear_state();
+  printf("fib(%d) = %d\n", 36, fib(36));
+  jit_destroy_state();
+  finish_jit();
+  return 0;
+@}
+@end example
+
+This code calculates the recurrence relation using iteration (a
+@code{for} loop in high-level languages).  There are no function
+calls anymore: instead, there is a backward jump (the @code{bnei} at
+the end of the loop).
+
+Note that the program must remember the address for backward jumps;
+for forward jumps it is only required to remember the jump code,
+and call @code{patch} for the implicit label.
+
+@node Reentrancy
+@chapter Re-entrant usage of @lightning{}
+
+@lightning{} uses the special @code{_jit} identifier. To be able
+to be able to use multiple jit generation states at the same
+time, it is required to used code similar to:
+
+@example
+    struct jit_state lightning;
+    #define lightning _jit
+@end example
+
+This will cause the symbol defined to @code{_jit} to be passed as
+the first argument to the underlying @lightning{} implementation,
+that is usually a function with an @code{_} (underscode) prefix
+and with an argument named @code{_jit}, in the pattern:
+
+@example
+    static void _jit_mnemonic(jit_state_t *, jit_gpr_t, jit_gpr_t);
+    #define jit_mnemonic(u, v) _jit_mnemonic(_jit, u, v);
+@end example
+
+The reason for this is to use the same syntax as the initial lightning
+implementation and to avoid needing the user to keep adding an extra
+argument to every call, as multiple jit states generating code in
+paralell should be very uncommon.
+
+@section Registers
+@chapter Accessing the whole register file
+
+As mentioned earlier in this chapter, all @lightning{} back-ends are
+guaranteed to have at least six general-purpose integer registers and
+six floating-point registers, but many back-ends will have more.
+
+To access the entire register files, you can use the
+@code{JIT_R}, @code{JIT_V} and @code{JIT_F} macros.  They
+accept a parameter that identifies the register number, which
+must be strictly less than @code{JIT_R_NUM}, @code{JIT_V_NUM}
+and @code{JIT_F_NUM} respectively; the number need not be
+constant.  Of course, expressions like @code{JIT_R0} and
+@code{JIT_R(0)} denote the same register, and likewise for
+integer callee-saved, or floating-point, registers.
+
+@node Customizations
+@chapter Customizations
+
+Frequently it is desirable to have more control over how code is
+generated or how memory is used during jit generation or execution.
+
+@section Memory functions
+To aid in complete control of memory allocation and deallocation
+@lightning{} provides wrappers that default to standard @code{malloc},
+@code{realloc} and @code{free}. These are loosely based on the
+GNU GMP counterparts, with the difference that they use the same
+prototype of the system allocation functions, that is, no @code{size}
+for @code{free} or @code{old_size} for @code{realloc}.
+
+@deftypefun void jit_set_memory_functions (@* void *(*@var{alloc_func_ptr}) (size_t), @* void *(*@var{realloc_func_ptr}) (void *, size_t), @* void (*@var{free_func_ptr}) (void *))
+@lightning{} guarantees that memory is only allocated or released
+using these wrapped functions, but you must note that if lightning
+was linked to GNU binutils, malloc is probably will be called multiple
+times from there when initializing the disassembler.
+
+Because @code{init_jit} may call memory functions, if you need to call
+@code{jit_set_memory_functions}, it must be called before @code{init_jit},
+otherwise, when calling @code{finish_jit}, a pointer allocated with the
+previous or default wrappers will be passed.
+@end deftypefun
+
+@deftypefun void jit_get_memory_functions (@* void *(**@var{alloc_func_ptr}) (size_t), @* void *(**@var{realloc_func_ptr}) (void *, size_t), @* void (**@var{free_func_ptr}) (void *))
+Get the current memory allocation function. Also, unlike the GNU GMP
+counterpart, it is an error to pass @code{NULL} pointers as arguments.
+@end deftypefun
+
+@section Alternate code buffer
+To instruct @lightning{} to use an alternate code buffer it is required
+to call @code{jit_realize} before @code{jit_emit}, and then query states
+and customize as appropriate.
+
+@deftypefun void jit_realize ()
+Must be called once, before @code{jit_emit}, to instruct @lightning{}
+that no other @code{jit_xyz} call will be made.
+@end deftypefun
+
+@deftypefun jit_pointer_t jit_get_code (jit_word_t *@var{code_size})
+Returns NULL or the previous value set with @code{jit_set_code}, and
+sets the @var{code_size} argument to an appropriate value.
+If @code{jit_get_code} is called before @code{jit_emit}, the
+@var{code_size} argument is set to the expected amount of bytes
+required to generate code.
+If @code{jit_get_code} is called after @code{jit_emit}, the
+@var{code_size} argument is set to the exact amount of bytes used
+by the code.
+@end deftypefun
+
+@deftypefun void jit_set_code (jit_ponter_t @var{code}, jit_word_t @var{size})
+Instructs @lightning{} to output to the @var{code} argument and
+use @var{size} as a guard to not write to invalid memory. If during
+@code{jit_emit} @lightning{} finds out that the code would not fit
+in @var{size} bytes, it halts code emit and returns @code{NULL}.
+@end deftypefun
+
+A simple example of a loop using an alternate buffer is:
+
+@example
+  jit_uint8_t   *code;
+  int           *(func)(int);      @rem{/* function pointer */}
+  jit_word_t     code_size;
+  jit_word_t     real_code_size;
+  @rem{...}
+  jit_realize();                   @rem{/* ready to generate code */}
+  jit_get_code(&code_size);        @rem{/* get expected code size */}
+  code_size = (code_size + 4095) & -4096;
+  do (;;) @{
+    code = mmap(NULL, code_size, PROT_EXEC | PROT_READ | PROT_WRITE,
+                MAP_PRIVATE | MAP_ANON, -1, 0);
+    jit_set_code(code, code_size);
+    if ((func = jit_emit()) == NULL) @{
+      munmap(code, code_size);
+      code_size += 4096;
+    @}
+  @} while (func == NULL);
+  jit_get_code(&real_code_size);   @rem{/* query exact size of the code */}
+@end example
+
+The first call to @code{jit_get_code} should return @code{NULL} and set
+the @code{code_size} argument to the expected amount of bytes required
+to emit code.
+The second call to @code{jit_get_code} is after a successful call to
+@code{jit_emit}, and will return the value previously set with
+@code{jit_set_code} and set the @code{real_code_size} argument to the
+exact amount of bytes used to emit the code.
+
+@section Alternate data buffer
+Sometimes it may be desirable to customize how, or to prevent
+@lightning{} from using an extra buffer for constants or debug
+annotation. Usually when also using an alternate code buffer.
+
+@deftypefun jit_pointer_t jit_get_data (jit_word_t *@var{data_size}, jit_word_t *@var{note_size})
+Returns @code{NULL} or the previous value set with @code{jit_set_data},
+and sets the @var{data_size} argument to how many bytes are required
+for the constants data buffer, and @var{note_size} to how many bytes
+are required to store the debug note information.
+Note that it always preallocate one debug note entry even if
+@code{jit_name} or @code{jit_note} are never called, but will return
+zero in the @var{data_size} argument if no constant is required;
+constants are only used for the @code{float} and @code{double} operations
+that have an immediate argument, and not in all @lightning{} ports.
+@end deftypefun
+
+@deftypefun void jit_set_data (jit_pointer_t @var{data}, jit_word_t @var{size}, jit_word_t @var{flags})
+
+@var{data} can be NULL if disabling constants and annotations, otherwise,
+a valid pointer must be passed. An assertion is done that the data will
+fit in @var{size} bytes (but that is a noop if @lightning{} was built
+with @code{-DNDEBUG}).
+
+@var{size} tells the space in bytes available in @var{data}.
+
+@var{flags} can be zero to tell to just use the alternate data buffer,
+or a composition of @code{JIT_DISABLE_DATA} and @code{JIT_DISABLE_NOTE}
+
+@table @t
+@item JIT_DISABLE_DATA
+@cindex JIT_DISABLE_DATA
+Instructs @lightning{} to not use a constant table, but to use an
+alternate method to synthesize those, usually with a larger code
+sequence using stack space to transfer the value from a GPR to a
+FPR register.
+
+@item JIT_DISABLE_NOTE
+@cindex JIT_DISABLE_NOTE
+Instructs @lightning{} to not store file or function name, and
+line numbers in the constant buffer.
+@end table
+@end deftypefun
+
+A simple example of a preventing usage of a data buffer is:
+
+@example
+  @rem{...}
+  jit_realize();                        @rem{/* ready to generate code */}
+  jit_get_data(NULL, NULL);
+  jit_set_data(NULL, 0, JIT_DISABLE_DATA | JIT_DISABLE_NOTE);
+  @rem{...}
+@end example
+
+Or to only use a data buffer, if required:
+
+@example
+  jit_uint8_t   *data;
+  jit_word_t     data_size;
+  @rem{...}
+  jit_realize();                        @rem{/* ready to generate code */}
+  jit_get_data(&data_size, NULL);
+  if (data_size)
+    data = malloc(data_size);
+  else
+    data = NULL;
+  jit_set_data(data, data_size, JIT_DISABLE_NOTE);
+  @rem{...}
+  if (data)
+    free(data);
+  @rem{...}
+@end example
+
+@node Acknowledgements
+@chapter Acknowledgements
+
+As far as I know, the first general-purpose portable dynamic code
+generator is @sc{dcg}, by Dawson R.@: Engler and T.@: A.@: Proebsting.
+Further work by Dawson R. Engler resulted in the @sc{vcode} system;
+unlike @sc{dcg}, @sc{vcode} used no intermediate representation and
+directly inspired @lightning{}.
+
+Thanks go to Ian Piumarta, who kindly accepted to release his own
+program @sc{ccg} under the GNU General Public License, thereby allowing
+@lightning{} to use the run-time assemblers he had wrote for @sc{ccg}.
+@sc{ccg} provides a way of dynamically assemble programs written in the
+underlying architecture's assembly language.  So it is not portable,
+yet very interesting.
+
+I also thank Steve Byrne for writing GNU Smalltalk, since @lightning{}
+was first developed as a tool to be used in GNU Smalltalk's dynamic
+translator from bytecodes to native code.
diff --git a/deps/lightning/doc/fact.c b/deps/lightning/doc/fact.c
new file mode 100644 (file)
index 0000000..375905b
--- /dev/null
@@ -0,0 +1,75 @@
+#include <stdio.h>
+#include <lightning.h>
+
+static jit_state_t *_jit;
+
+typedef long (*pwfw_t)(long);          /* Pointer to Long Function of Long */
+
+int main(int argc, char *argv[])
+{
+    pwfw_t      factorial;
+    long        arg;
+    jit_node_t *ac;                    /* Accumulator */
+    jit_node_t *in;                    /* Argument */
+    jit_node_t *call;
+    jit_node_t *fact;
+    jit_node_t *jump;
+    jit_node_t *fact_entry;
+    jit_node_t *fact_out;
+
+    init_jit(argv[0]);
+    _jit = jit_new_state();
+
+    /* declare a forward label */
+    fact = jit_forward();
+
+    jit_prolog();                      /* Entry point of the factorial function */
+    in = jit_arg();                    /* Receive an integer argument */
+    jit_getarg(JIT_R0, in);            /* Move argument to RO */
+    jit_prepare();
+    jit_pushargi(1);                   /* This is the accumulator */
+    jit_pushargr(JIT_R0);              /* This is the argument */
+    call = jit_finishi(NULL);          /* Call the tail call optimized function */
+    jit_patch_at(call, fact);          /* Patch call to forward defined function */
+    /* the above could have been written as:
+     *         jit_patch_at(jit_finishi(NULL), fact);
+     */
+    jit_retval(JIT_R0);                        /* Fetch the result */
+    jit_retr(JIT_R0);                  /* Return it */
+    jit_epilog();                      /* Epilog *before* label before prolog */
+
+    /* define the forward label */
+    jit_link(fact);                    /* Entry point of the helper function */
+    jit_prolog();
+    jit_frame(16);                     /* Reserve 16 bytes in the stack */
+    fact_entry = jit_label();          /* This is the tail call entry point */
+    ac = jit_arg();                    /* The accumulator is the first argument */
+    in = jit_arg();                    /* The factorial argument */
+    jit_getarg(JIT_R0, ac);            /* Move the accumulator to R0 */
+    jit_getarg(JIT_R1, in);            /* Move the argument to R1 */
+    fact_out = jit_blei(JIT_R1, 1);    /* Done if argument is one or less */
+    jit_mulr(JIT_R0, JIT_R0, JIT_R1);  /* accumulator *= argument */
+    jit_putargr(JIT_R0, ac);           /* Update the accumulator */
+    jit_subi(JIT_R1, JIT_R1, 1);       /* argument -= 1 */
+    jit_putargr(JIT_R1, in);           /* Update the argument */
+    jump = jit_jmpi();
+    jit_patch_at(jump, fact_entry);    /* Tail Call Optimize it! */
+    jit_patch(fact_out);
+    jit_retr(JIT_R0);                  /* Return the accumulator */
+
+    factorial = jit_emit();
+    /* no need to query information about resolved addresses */
+    jit_clear_state();
+
+    if (argc == 2)
+       arg = atoi(argv[1]);
+    else
+       arg = 5;
+
+    /* call the generated code */
+    printf("factorial(%ld) = %ld\n", arg, factorial(arg));
+    /* release all memory associated with the _jit identifier */
+    jit_destroy_state();
+    finish_jit();
+    return 0;
+}
diff --git a/deps/lightning/doc/ifib.c b/deps/lightning/doc/ifib.c
new file mode 100644 (file)
index 0000000..745c80b
--- /dev/null
@@ -0,0 +1,49 @@
+#include <stdio.h>
+#include <lightning.h>
+
+static jit_state_t *_jit;
+
+typedef int (*pifi)(int);       /* Pointer to Int Function of Int */
+
+int main(int argc, char *argv[])
+{
+  pifi       fib;
+  jit_node_t *in;               /* offset of the argument */
+  jit_node_t *ref;              /* to patch the forward reference */
+  jit_node_t *zero;             /* to patch the forward reference */
+  jit_node_t *jump;             /* jump to start of loop */
+  jit_node_t *loop;             /* start of the loop */
+
+  init_jit(argv[0]);
+  _jit = jit_new_state();
+
+        jit_prolog   ();
+  in =  jit_arg      ();
+        jit_getarg   (JIT_R0, in);              /* R0 = n */
+ zero = jit_beqi     (JIT_R0, 0);
+        jit_movr     (JIT_R1, JIT_R0);
+        jit_movi     (JIT_R0, 1);
+  ref = jit_blei     (JIT_R1, 2);
+        jit_subi     (JIT_R2, JIT_R1, 2);
+        jit_movr     (JIT_R1, JIT_R0);
+
+  loop= jit_label();
+        jit_subi     (JIT_R2, JIT_R2, 1);       /* decr. counter */
+        jit_movr     (JIT_V0, JIT_R0);          /* V0 = R0 */
+        jit_addr     (JIT_R0, JIT_R0, JIT_R1);  /* R0 = R0 + R1 */
+        jit_movr     (JIT_R1, JIT_V0);          /* R1 = V0 */
+  jump= jit_bnei     (JIT_R2, 0);               /* if (R2) goto loop; */
+  jit_patch_at(jump, loop);
+
+  jit_patch(ref);                               /* patch forward jump */
+  jit_patch(zero);                              /* patch forward jump */
+        jit_retr     (JIT_R0);
+
+  /* call the generated code, passing 36 as an argument */
+  fib = jit_emit();
+  jit_clear_state();
+  printf("fib(%d) = %d\n", 36, fib(36));
+  jit_destroy_state();
+  finish_jit();
+  return 0;
+}
diff --git a/deps/lightning/doc/incr.c b/deps/lightning/doc/incr.c
new file mode 100644 (file)
index 0000000..88859a8
--- /dev/null
@@ -0,0 +1,31 @@
+#include <stdio.h>
+#include <lightning.h>
+
+static jit_state_t *_jit;
+
+typedef int (*pifi)(int);    /* Pointer to Int Function of Int */
+
+int main(int argc, char *argv[])
+{
+  jit_node_t  *in;
+  pifi         incr;
+
+  init_jit(argv[0]);
+  _jit = jit_new_state();
+
+  jit_prolog();                    /* @t{     prolog             } */
+  in = jit_arg();                  /* @t{     in = arg           } */
+  jit_getarg(JIT_R0, in);          /* @t{     getarg R0          } */
+  jit_addi(JIT_R0, JIT_R0, 1);     /* @t{     addi   R0\, R0\, 1 } */
+  jit_retr(JIT_R0);                /* @t{     retr   R0          } */
+
+  incr = jit_emit();
+  jit_clear_state();
+
+  /* call the generated code\, passing 5 as an argument */
+  printf("%d + 1 = %d\n", 5, incr(5));
+
+  jit_destroy_state();
+  finish_jit();
+  return 0;
+}
diff --git a/deps/lightning/doc/lightning.texi b/deps/lightning/doc/lightning.texi
new file mode 100644 (file)
index 0000000..c7d8f98
--- /dev/null
@@ -0,0 +1,78 @@
+\input texinfo.tex  @c -*- texinfo -*-
+@c %**start of header (This is for running Texinfo on a region.)
+
+@setfilename lightning.info
+
+@set TITLE       Using @sc{gnu} @i{lightning}
+@set TOPIC       installing and using
+
+@settitle @value{TITLE}
+
+@c ---------------------------------------------------------------------
+@c Common macros
+@c ---------------------------------------------------------------------
+
+@macro bulletize{a}
+@item
+\a\
+@end macro
+
+@macro rem{a}
+@r{@i{\a\}}
+@end macro
+
+@macro gnu{}
+@sc{gnu}
+@end macro
+
+@macro lightning{}
+@gnu{} @i{lightning}
+@end macro
+
+@c ---------------------------------------------------------------------
+@c Macros for Texinfo 3.1/4.0 compatibility
+@c ---------------------------------------------------------------------
+
+@c @hlink (macro), @url and @email are used instead of @uref for Texinfo 3.1
+@c compatibility
+@macro hlink{url, link}
+\link\ (\url\)
+@end macro
+
+@c ifhtml can only be true in Texinfo 4.0, which has uref
+@ifhtml
+@unmacro hlink
+
+@macro hlink{url, link}
+@uref{\url\, \link\}
+@end macro
+
+@macro email{mail}
+@uref{mailto:\mail\, , \mail\}
+@end macro
+
+@macro url{url}
+@uref{\url\}
+@end macro
+@end ifhtml
+
+@c ---------------------------------------------------------------------
+@c References to the other half of the manual
+@c ---------------------------------------------------------------------
+
+@macro usingref{node, name}
+@ref{\node\, , \name\}
+@end macro
+
+@c ---------------------------------------------------------------------
+@c End of macro section
+@c ---------------------------------------------------------------------
+
+@include version.texi
+@include body.texi
+
+@c %**end of header (This is for running Texinfo on a region.)
+
+@c ***********************************************************************
+
+@bye
diff --git a/deps/lightning/doc/printf.c b/deps/lightning/doc/printf.c
new file mode 100644 (file)
index 0000000..b36eec0
--- /dev/null
@@ -0,0 +1,41 @@
+#include <stdio.h>
+#include <lightning.h>
+
+static jit_state_t *_jit;
+
+typedef void (*pvfi)(int);    /* Pointer to Void Function of Int */
+
+int main(int argc, char *argv[])
+{
+  pvfi          myFunction;             /* ptr to generated code */
+  jit_node_t    *start, *end;           /* a couple of labels */
+  jit_node_t    *in;                    /* to get the argument */
+
+  init_jit(argv[0]);
+  _jit = jit_new_state();
+
+  start = jit_note(__FILE__, __LINE__);
+  jit_prolog();
+  in = jit_arg();
+  jit_getarg(JIT_R1, in);
+  jit_prepare();
+  jit_pushargi((jit_word_t)"generated %d bytes\n");
+  jit_ellipsis();
+  jit_pushargr(JIT_R1);
+  jit_finishi(printf);
+  jit_ret();
+  jit_epilog();
+  end = jit_note(__FILE__, __LINE__);
+
+  myFunction = jit_emit();
+
+  /* call the generated code, passing its size as argument */
+  myFunction((char*)jit_address(end) - (char*)jit_address(start));
+  jit_clear_state();
+
+  jit_disassemble();
+
+  jit_destroy_state();
+  finish_jit();
+  return 0;
+}
diff --git a/deps/lightning/doc/rfib.c b/deps/lightning/doc/rfib.c
new file mode 100644 (file)
index 0000000..f14da42
--- /dev/null
@@ -0,0 +1,53 @@
+#include <stdio.h>
+#include <lightning.h>
+
+static jit_state_t *_jit;
+
+typedef int (*pifi)(int);       /* Pointer to Int Function of Int */
+
+int main(int argc, char *argv[])
+{
+  pifi       fib;
+  jit_node_t *label;
+  jit_node_t *call;
+  jit_node_t *in;                 /* offset of the argument */
+  jit_node_t *ref;                /* to patch the forward reference */
+  jit_node_t *zero;             /* to patch the forward reference */
+
+  init_jit(argv[0]);
+  _jit = jit_new_state();
+
+  label = jit_label();
+        jit_prolog   ();
+  in =  jit_arg      ();
+        jit_getarg   (JIT_R0, in);              /* R0 = n */
+ zero = jit_beqi     (JIT_R0, 0);
+        jit_movr     (JIT_V0, JIT_R0);          /* V0 = R0 */
+        jit_movi     (JIT_R0, 1);
+  ref = jit_blei     (JIT_V0, 2);
+        jit_subi     (JIT_V1, JIT_V0, 1);       /* V1 = n-1 */
+        jit_subi     (JIT_V2, JIT_V0, 2);       /* V2 = n-2 */
+        jit_prepare();
+          jit_pushargr(JIT_V1);
+        call = jit_finishi(NULL);
+        jit_patch_at(call, label);
+        jit_retval(JIT_V1);                     /* V1 = fib(n-1) */
+        jit_prepare();
+          jit_pushargr(JIT_V2);
+        call = jit_finishi(NULL);
+        jit_patch_at(call, label);
+        jit_retval(JIT_R0);                     /* R0 = fib(n-2) */
+        jit_addr(JIT_R0, JIT_R0, JIT_V1);       /* R0 = R0 + V1 */
+
+  jit_patch(ref);                               /* patch jump */
+  jit_patch(zero);                              /* patch jump */
+        jit_retr(JIT_R0);
+
+  /* call the generated code, passing 32 as an argument */
+  fib = jit_emit();
+  jit_clear_state();
+  printf("fib(%d) = %d\n", 32, fib(32));
+  jit_destroy_state();
+  finish_jit();
+  return 0;
+}
diff --git a/deps/lightning/doc/rpn.c b/deps/lightning/doc/rpn.c
new file mode 100644 (file)
index 0000000..8131484
--- /dev/null
@@ -0,0 +1,96 @@
+#include <stdio.h>
+#include <lightning.h>
+
+typedef int (*pifi)(int);       /* Pointer to Int Function of Int */
+
+static jit_state_t *_jit;
+
+void stack_push(int reg, int *sp)
+{
+  jit_stxi_i (*sp, JIT_FP, reg);
+  *sp += sizeof (int);
+}
+
+void stack_pop(int reg, int *sp)
+{
+  *sp -= sizeof (int);
+  jit_ldxi_i (reg, JIT_FP, *sp);
+}
+
+jit_node_t *compile_rpn(char *expr)
+{
+  jit_node_t *in, *fn;
+  int stack_base, stack_ptr;
+
+  fn = jit_note(NULL, 0);
+  jit_prolog();
+  in = jit_arg();
+  stack_ptr = stack_base = jit_allocai (32 * sizeof (int));
+
+  jit_getarg_i(JIT_R2, in);
+
+  while (*expr) {
+    char buf[32];
+    int n;
+    if (sscanf(expr, "%[0-9]%n", buf, &n)) {
+      expr += n - 1;
+      stack_push(JIT_R0, &stack_ptr);
+      jit_movi(JIT_R0, atoi(buf));
+    } else if (*expr == 'x') {
+      stack_push(JIT_R0, &stack_ptr);
+      jit_movr(JIT_R0, JIT_R2);
+    } else if (*expr == '+') {
+      stack_pop(JIT_R1, &stack_ptr);
+      jit_addr(JIT_R0, JIT_R1, JIT_R0);
+    } else if (*expr == '-') {
+      stack_pop(JIT_R1, &stack_ptr);
+      jit_subr(JIT_R0, JIT_R1, JIT_R0);
+    } else if (*expr == '*') {
+      stack_pop(JIT_R1, &stack_ptr);
+      jit_mulr(JIT_R0, JIT_R1, JIT_R0);
+    } else if (*expr == '/') {
+      stack_pop(JIT_R1, &stack_ptr);
+      jit_divr(JIT_R0, JIT_R1, JIT_R0);
+    } else {
+      fprintf(stderr, "cannot compile: %s\n", expr);
+      abort();
+    }
+    ++expr;
+  }
+  jit_retr(JIT_R0);
+  jit_epilog();
+  return fn;
+}
+
+int main(int argc, char *argv[])
+{
+  jit_node_t *nc, *nf;
+  pifi c2f, f2c;
+  int i;
+
+  init_jit(argv[0]);
+  _jit = jit_new_state();
+
+  nc = compile_rpn("32x9*5/+");
+  nf = compile_rpn("x32-5*9/");
+  (void)jit_emit();
+  c2f = (pifi)jit_address(nc);
+  f2c = (pifi)jit_address(nf);
+  jit_clear_state();
+
+  printf("\nC:");
+  for (i = 0; i <= 100; i += 10) printf("%3d ", i);
+  printf("\nF:");
+  for (i = 0; i <= 100; i += 10) printf("%3d ", c2f(i));
+  printf("\n");
+
+  printf("\nF:");
+  for (i = 32; i <= 212; i += 18) printf("%3d ", i);
+  printf("\nC:");
+  for (i = 32; i <= 212; i += 18) printf("%3d ", f2c(i));
+  printf("\n");
+
+  jit_destroy_state();
+  finish_jit();
+  return 0;
+}
diff --git a/deps/lightning/doc/version.texi b/deps/lightning/doc/version.texi
new file mode 100644 (file)
index 0000000..b4a0c22
--- /dev/null
@@ -0,0 +1,4 @@
+@set UPDATED 3 October 2017
+@set UPDATED-MONTH October 2017
+@set EDITION 2.1.3
+@set VERSION 2.1.3
diff --git a/deps/lightning/include/Makefile.am b/deps/lightning/include/Makefile.am
new file mode 100644 (file)
index 0000000..8f91594
--- /dev/null
@@ -0,0 +1,20 @@
+#
+# Copyright 2000, 2001, 2002, 2012-2019 Free Software Foundation, Inc.
+#
+# This file is part of GNU lightning.
+#
+# GNU lightning is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published
+# by the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GNU lightning is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+
+SUBDIRS =              \
+       lightning
+
+nodist_include_HEADERS = lightning.h
diff --git a/deps/lightning/include/lightning.h.in b/deps/lightning/include/lightning.h.in
new file mode 100644 (file)
index 0000000..70560c9
--- /dev/null
@@ -0,0 +1,1092 @@
+/*
+ * Copyright (C) 2012-2019  Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+#ifndef _lightning_h
+#define _lightning_h
+
+#include <unistd.h>
+#include <stdlib.h>
+@MAYBE_INCLUDE_STDINT_H@
+#include <string.h>
+
+#if defined(__hpux) && defined(__hppa__)
+#  include <machine/param.h>
+#endif
+#if defined(__alpha__) && defined(__osf__)
+#  include <machine/endian.h>
+#endif
+
+#ifndef __WORDSIZE
+#  if defined(WORDSIZE)                                /* ppc darwin */
+#    define __WORDSIZE         WORDSIZE
+#  elif defined(__SIZEOF_POINTER__)            /* ppc aix */
+#    define __WORDSIZE         (__SIZEOF_POINTER__ << 3)
+#  elif defined(_ILP32)                                /* hppa hp-ux */
+#    define __WORDSIZE         32
+#  elif defined(_LP64)                         /* ia64 hp-ux (with cc +DD64) */
+#    define __WORDSIZE         64
+#  elif defined(_MIPS_SZPTR)                   /* mips irix */
+#    if _MIPS_SZPTR == 32
+#      define __WORDSIZE       32
+#    else
+#      define __WORDSIZE       64
+#    endif
+#  else                                                /* From FreeBSD 9.1 stdint.h */
+#    if defined(UINTPTR_MAX) && defined(UINT64_MAX) && \
+       (UINTPTR_MAX == UINT64_MAX)
+#      define __WORDSIZE       64
+#    else
+#      define __WORDSIZE       32
+#    endif
+#  endif
+#endif
+#ifndef __LITTLE_ENDIAN
+#  if defined(LITTLE_ENDIAN)                   /* ppc darwin */
+#    define __LITTLE_ENDIAN    LITTLE_ENDIAN
+#  elif defined(__ORDER_LITTLE_ENDIAN__)       /* ppc aix */
+#    define __LITTLE_ENDIAN    __ORDER_LITTLE_ENDIAN__
+#  else
+#    define __LITTLE_ENDIAN    1234
+#  endif
+#endif
+#ifndef __BIG_ENDIAN
+#  if defined(BIG_ENDIAN)                      /* ppc darwin */
+#    define __BIG_ENDIAN       BIG_ENDIAN
+#  elif defined(__ORDER_BIG_ENDIAN__)          /* ppc aix */
+#    define __BIG_ENDIAN       __ORDER_BIG_ENDIAN__
+#  else
+#    define __BIG_ENDIAN       4321
+#  endif
+#endif
+#ifndef __BYTE_ORDER
+#  if defined(BYTE_ORDER)                      /* ppc darwin */
+#    define __BYTE_ORDER       BYTE_ORDER
+#  elif defined(__BYTE_ORDER__)                        /* ppc aix */
+#    define __BYTE_ORDER       __BYTE_ORDER__
+#  elif defined(_BIG_ENDIAN)                   /* hppa hp-ux */
+#    define __BYTE_ORDER       __BIG_ENDIAN
+#  elif defined(__BIG_ENDIAN__)                        /* ia64 hp-ux */
+#    define __BYTE_ORDER       __BIG_ENDIAN
+#  elif defined(__i386__)                      /* 32 bit x86 solaris */
+#    define __BYTE_ORDER       __LITTLE_ENDIAN
+#  elif defined(__x86_64__)                    /* 64 bit x86 solaris */
+#    define __BYTE_ORDER       __LITTLE_ENDIAN
+#  elif defined(__MIPSEB)                      /* mips irix */
+#    define __BYTE_ORDER       __BIG_ENDIAN
+#  else
+#    error cannot figure __BYTE_ORDER
+#  endif
+#endif
+
+typedef signed char            jit_int8_t;
+typedef unsigned char          jit_uint8_t;
+typedef signed short           jit_int16_t;
+typedef unsigned short         jit_uint16_t;
+typedef signed int             jit_int32_t;
+typedef unsigned int           jit_uint32_t;
+#if __WORDSIZE == 32
+typedef signed long long       jit_int64_t;
+typedef unsigned long long     jit_uint64_t;
+typedef jit_int32_t            jit_word_t;
+typedef jit_uint32_t           jit_uword_t;
+#elif (_WIN32 && !__CYGWIN__)
+typedef signed long long       jit_int64_t;
+typedef unsigned long long     jit_uint64_t;
+typedef jit_int64_t            jit_word_t;
+typedef jit_uint64_t           jit_uword_t;
+#else
+typedef signed long            jit_int64_t;
+typedef unsigned long          jit_uint64_t;
+typedef jit_int64_t            jit_word_t;
+typedef jit_uint64_t           jit_uword_t;
+#endif
+typedef float                  jit_float32_t;
+typedef double                 jit_float64_t;
+typedef void*                  jit_pointer_t;
+typedef jit_int32_t            jit_bool_t;
+typedef jit_int32_t            jit_gpr_t;
+typedef jit_int32_t            jit_fpr_t;
+
+#if defined(__i386__) || defined(__x86_64__)
+#  include <lightning/jit_x86.h>
+#elif defined(__mips__)
+#  include <lightning/jit_mips.h>
+#elif defined(__arm__)
+#  include <lightning/jit_arm.h>
+#elif defined(__powerpc__)
+#  include <lightning/jit_ppc.h>
+#elif defined(__sparc__)
+#  include <lightning/jit_sparc.h>
+#elif defined(__ia64__)
+#  include <lightning/jit_ia64.h>
+#elif defined(__hppa__)
+#  include <lightning/jit_hppa.h>
+#elif defined(__aarch64__)
+#  include <lightning/jit_aarch64.h>
+#elif defined(__s390__) || defined(__s390x__)
+#  include <lightning/jit_s390.h>
+#elif defined(__alpha__)
+#  include <lightning/jit_alpha.h>
+#elif defined(__riscv)
+#  include <lightning/jit_riscv.h>
+#endif
+
+#define jit_flag_node          0x0001  /* patch node not absolute */
+#define jit_flag_patch         0x0002  /* jump already patched */
+#define jit_flag_data          0x0004  /* data in the constant pool */
+#define jit_flag_use           0x0008  /* do not remove marker label */
+#define jit_flag_synth         0x0010  /* synthesized instruction */
+#define jit_flag_head          0x1000  /* label reached by normal flow */
+#define jit_flag_varargs       0x2000  /* call{r,i} to varargs function */
+
+#define JIT_R(index)           jit_r(index)
+#define JIT_V(index)           jit_v(index)
+#define JIT_F(index)           jit_f(index)
+#define JIT_R_NUM              jit_r_num()
+#define JIT_V_NUM              jit_v_num()
+#define JIT_F_NUM              jit_f_num()
+
+#define JIT_DISABLE_DATA       1       /* force synthesize of constants */
+#define JIT_DISABLE_NOTE       2       /* disable debug info generation */
+
+#define jit_class_chk          0x02000000      /* just checking */
+#define jit_class_arg          0x08000000      /* argument register */
+#define jit_class_sav          0x10000000      /* callee save */
+#define jit_class_gpr          0x20000000      /* general purpose */
+#define jit_class_fpr          0x40000000      /* float */
+#define jit_class(reg)         ((reg) & 0xffff0000)
+#define jit_regno(reg)         ((reg) & 0x00007fff)
+
+typedef struct jit_node                jit_node_t;
+typedef struct jit_state       jit_state_t;
+
+typedef enum {
+    jit_code_data,
+#define jit_live(u)            jit_new_node_w(jit_code_live, u)
+#define jit_align(u)           jit_new_node_w(jit_code_align, u)
+    jit_code_live,             jit_code_align,
+    jit_code_save,             jit_code_load,
+#define jit_name(u)            _jit_name(_jit,u)
+    jit_code_name,
+#define jit_note(u, v)         _jit_note(_jit, u, v)
+#define jit_label()            _jit_label(_jit)
+#define jit_forward()          _jit_forward(_jit)
+#define jit_indirect()         _jit_indirect(_jit)
+#define jit_link(u)            _jit_link(_jit,u)
+    jit_code_note,             jit_code_label,
+
+#define jit_prolog()           _jit_prolog(_jit)
+    jit_code_prolog,
+
+#define jit_ellipsis()         _jit_ellipsis(_jit)
+    jit_code_ellipsis,
+#define jit_va_push(u)         _jit_va_push(_jit,u)
+    jit_code_va_push,
+#define jit_allocai(u)         _jit_allocai(_jit,u)
+#define jit_allocar(u, v)      _jit_allocar(_jit,u,v)
+    jit_code_allocai,          jit_code_allocar,
+
+#define jit_arg()              _jit_arg(_jit)
+    jit_code_arg,
+#define jit_getarg_c(u,v)      _jit_getarg_c(_jit,u,v)
+#define jit_getarg_uc(u,v)     _jit_getarg_uc(_jit,u,v)
+    jit_code_getarg_c,         jit_code_getarg_uc,
+#define jit_getarg_s(u,v)      _jit_getarg_s(_jit,u,v)
+#define jit_getarg_us(u,v)     _jit_getarg_us(_jit,u,v)
+    jit_code_getarg_s,         jit_code_getarg_us,
+#define jit_getarg_i(u,v)      _jit_getarg_i(_jit,u,v)
+#if __WORDSIZE == 32
+#  define jit_getarg(u,v)      jit_getarg_i(u,v)
+#else
+#  define jit_getarg(u,v)      jit_getarg_l(u,v)
+#  define jit_getarg_ui(u,v)   _jit_getarg_ui(_jit,u,v)
+#  define jit_getarg_l(u,v)    _jit_getarg_l(_jit,u,v)
+#endif
+    jit_code_getarg_i,         jit_code_getarg_ui,
+    jit_code_getarg_l,
+#  define jit_putargr(u,v)     _jit_putargr(_jit,u,v)
+#  define jit_putargi(u,v)     _jit_putargi(_jit,u,v)
+    jit_code_putargr,          jit_code_putargi,
+
+#define jit_va_start(u)                jit_new_node_w(jit_code_va_start, u)
+    jit_code_va_start,
+#define jit_va_arg(u, v)       jit_new_node_ww(jit_code_va_arg, u, v)
+#define jit_va_arg_d(u, v)     jit_new_node_ww(jit_code_va_arg_d, u, v)
+    jit_code_va_arg,           jit_code_va_arg_d,
+#define jit_va_end(u)          jit_new_node_w(jit_code_va_end, u)
+    jit_code_va_end,
+
+#define jit_addr(u,v,w)                jit_new_node_www(jit_code_addr,u,v,w)
+#define jit_addi(u,v,w)                jit_new_node_www(jit_code_addi,u,v,w)
+    jit_code_addr,             jit_code_addi,
+#define jit_addcr(u,v,w)       jit_new_node_www(jit_code_addcr,u,v,w)
+#define jit_addci(u,v,w)       jit_new_node_www(jit_code_addci,u,v,w)
+    jit_code_addcr,            jit_code_addci,
+#define jit_addxr(u,v,w)       jit_new_node_www(jit_code_addxr,u,v,w)
+#define jit_addxi(u,v,w)       jit_new_node_www(jit_code_addxi,u,v,w)
+    jit_code_addxr,            jit_code_addxi,
+#define jit_subr(u,v,w)                jit_new_node_www(jit_code_subr,u,v,w)
+#define jit_subi(u,v,w)                jit_new_node_www(jit_code_subi,u,v,w)
+    jit_code_subr,             jit_code_subi,
+#define jit_subcr(u,v,w)       jit_new_node_www(jit_code_subcr,u,v,w)
+#define jit_subci(u,v,w)       jit_new_node_www(jit_code_subci,u,v,w)
+    jit_code_subcr,            jit_code_subci,
+#define jit_subxr(u,v,w)       jit_new_node_www(jit_code_subxr,u,v,w)
+#define jit_subxi(u,v,w)       jit_new_node_www(jit_code_subxi,u,v,w)
+    jit_code_subxr,            jit_code_subxi,
+#define jit_rsbr(u,v,w)                jit_subr(u,w,v)
+#define jit_rsbi(u,v,w)                jit_new_node_www(jit_code_rsbi,u,v,w)
+    jit_code_rsbi,
+#define jit_mulr(u,v,w)                jit_new_node_www(jit_code_mulr,u,v,w)
+#define jit_muli(u,v,w)                jit_new_node_www(jit_code_muli,u,v,w)
+    jit_code_mulr,             jit_code_muli,
+#define jit_qmulr(l,h,v,w)     jit_new_node_qww(jit_code_qmulr,l,h,v,w)
+#define jit_qmuli(l,h,v,w)     jit_new_node_qww(jit_code_qmuli,l,h,v,w)
+    jit_code_qmulr,            jit_code_qmuli,
+#define jit_qmulr_u(l,h,v,w)   jit_new_node_qww(jit_code_qmulr_u,l,h,v,w)
+#define jit_qmuli_u(l,h,v,w)   jit_new_node_qww(jit_code_qmuli_u,l,h,v,w)
+    jit_code_qmulr_u,          jit_code_qmuli_u,
+#define jit_divr(u,v,w)                jit_new_node_www(jit_code_divr,u,v,w)
+#define jit_divi(u,v,w)                jit_new_node_www(jit_code_divi,u,v,w)
+    jit_code_divr,             jit_code_divi,
+#define jit_divr_u(u,v,w)      jit_new_node_www(jit_code_divr_u,u,v,w)
+#define jit_divi_u(u,v,w)      jit_new_node_www(jit_code_divi_u,u,v,w)
+    jit_code_divr_u,           jit_code_divi_u,
+#define jit_qdivr(l,h,v,w)     jit_new_node_qww(jit_code_qdivr,l,h,v,w)
+#define jit_qdivi(l,h,v,w)     jit_new_node_qww(jit_code_qdivi,l,h,v,w)
+    jit_code_qdivr,            jit_code_qdivi,
+#define jit_qdivr_u(l,h,v,w)   jit_new_node_qww(jit_code_qdivr_u,l,h,v,w)
+#define jit_qdivi_u(l,h,v,w)   jit_new_node_qww(jit_code_qdivi_u,l,h,v,w)
+    jit_code_qdivr_u,          jit_code_qdivi_u,
+#define jit_remr(u,v,w)                jit_new_node_www(jit_code_remr,u,v,w)
+#define jit_remi(u,v,w)                jit_new_node_www(jit_code_remi,u,v,w)
+    jit_code_remr,             jit_code_remi,
+#define jit_remr_u(u,v,w)      jit_new_node_www(jit_code_remr_u,u,v,w)
+#define jit_remi_u(u,v,w)      jit_new_node_www(jit_code_remi_u,u,v,w)
+    jit_code_remr_u,           jit_code_remi_u,
+
+#define jit_andr(u,v,w)                jit_new_node_www(jit_code_andr,u,v,w)
+#define jit_andi(u,v,w)                jit_new_node_www(jit_code_andi,u,v,w)
+    jit_code_andr,             jit_code_andi,
+#define jit_orr(u,v,w)         jit_new_node_www(jit_code_orr,u,v,w)
+#define jit_ori(u,v,w)         jit_new_node_www(jit_code_ori,u,v,w)
+    jit_code_orr,              jit_code_ori,
+#define jit_xorr(u,v,w)                jit_new_node_www(jit_code_xorr,u,v,w)
+#define jit_xori(u,v,w)                jit_new_node_www(jit_code_xori,u,v,w)
+    jit_code_xorr,             jit_code_xori,
+
+#define jit_lshr(u,v,w)                jit_new_node_www(jit_code_lshr,u,v,w)
+#define jit_lshi(u,v,w)                jit_new_node_www(jit_code_lshi,u,v,w)
+    jit_code_lshr,             jit_code_lshi,
+#define jit_rshr(u,v,w)                jit_new_node_www(jit_code_rshr,u,v,w)
+#define jit_rshi(u,v,w)                jit_new_node_www(jit_code_rshi,u,v,w)
+    jit_code_rshr,             jit_code_rshi,
+#define jit_rshr_u(u,v,w)      jit_new_node_www(jit_code_rshr_u,u,v,w)
+#define jit_rshi_u(u,v,w)      jit_new_node_www(jit_code_rshi_u,u,v,w)
+    jit_code_rshr_u,           jit_code_rshi_u,
+
+#define jit_negr(u,v)          jit_new_node_ww(jit_code_negr,u,v)
+#define jit_comr(u,v)          jit_new_node_ww(jit_code_comr,u,v)
+    jit_code_negr,             jit_code_comr,
+
+#define jit_ltr(u,v,w)         jit_new_node_www(jit_code_ltr,u,v,w)
+#define jit_lti(u,v,w)         jit_new_node_www(jit_code_lti,u,v,w)
+    jit_code_ltr,              jit_code_lti,
+#define jit_ltr_u(u,v,w)       jit_new_node_www(jit_code_ltr_u,u,v,w)
+#define jit_lti_u(u,v,w)       jit_new_node_www(jit_code_lti_u,u,v,w)
+    jit_code_ltr_u,            jit_code_lti_u,
+#define jit_ler(u,v,w)         jit_new_node_www(jit_code_ler,u,v,w)
+#define jit_lei(u,v,w)         jit_new_node_www(jit_code_lei,u,v,w)
+    jit_code_ler,              jit_code_lei,
+#define jit_ler_u(u,v,w)       jit_new_node_www(jit_code_ler_u,u,v,w)
+#define jit_lei_u(u,v,w)       jit_new_node_www(jit_code_lei_u,u,v,w)
+    jit_code_ler_u,            jit_code_lei_u,
+#define jit_eqr(u,v,w)         jit_new_node_www(jit_code_eqr,u,v,w)
+#define jit_eqi(u,v,w)         jit_new_node_www(jit_code_eqi,u,v,w)
+    jit_code_eqr,              jit_code_eqi,
+#define jit_ger(u,v,w)         jit_new_node_www(jit_code_ger,u,v,w)
+#define jit_gei(u,v,w)         jit_new_node_www(jit_code_gei,u,v,w)
+    jit_code_ger,              jit_code_gei,
+#define jit_ger_u(u,v,w)       jit_new_node_www(jit_code_ger_u,u,v,w)
+#define jit_gei_u(u,v,w)       jit_new_node_www(jit_code_gei_u,u,v,w)
+    jit_code_ger_u,            jit_code_gei_u,
+#define jit_gtr(u,v,w)         jit_new_node_www(jit_code_gtr,u,v,w)
+#define jit_gti(u,v,w)         jit_new_node_www(jit_code_gti,u,v,w)
+    jit_code_gtr,              jit_code_gti,
+#define jit_gtr_u(u,v,w)       jit_new_node_www(jit_code_gtr_u,u,v,w)
+#define jit_gti_u(u,v,w)       jit_new_node_www(jit_code_gti_u,u,v,w)
+    jit_code_gtr_u,            jit_code_gti_u,
+#define jit_ner(u,v,w)         jit_new_node_www(jit_code_ner,u,v,w)
+#define jit_nei(u,v,w)         jit_new_node_www(jit_code_nei,u,v,w)
+    jit_code_ner,              jit_code_nei,
+
+#define jit_movr(u,v)          jit_new_node_ww(jit_code_movr,u,v)
+#define jit_movi(u,v)          jit_new_node_ww(jit_code_movi,u,v)
+    jit_code_movr,             jit_code_movi,
+#define jit_extr_c(u,v)                jit_new_node_ww(jit_code_extr_c,u,v)
+#define jit_extr_uc(u,v)       jit_new_node_ww(jit_code_extr_uc,u,v)
+    jit_code_extr_c,           jit_code_extr_uc,
+#define jit_extr_s(u,v)                jit_new_node_ww(jit_code_extr_s,u,v)
+#define jit_extr_us(u,v)       jit_new_node_ww(jit_code_extr_us,u,v)
+    jit_code_extr_s,           jit_code_extr_us,
+#if __WORDSIZE == 64
+#  define jit_extr_i(u,v)      jit_new_node_ww(jit_code_extr_i,u,v)
+#  define jit_extr_ui(u,v)     jit_new_node_ww(jit_code_extr_ui,u,v)
+#endif
+    jit_code_extr_i,           jit_code_extr_ui,
+
+#define jit_htonr_us(u,v)      jit_new_node_ww(jit_code_htonr_us,u,v)
+#define jit_ntohr_us(u,v)      jit_new_node_ww(jit_code_htonr_us,u,v)
+    jit_code_htonr_us,
+#define jit_htonr_ui(u,v)      jit_new_node_ww(jit_code_htonr_ui,u,v)
+#define jit_ntohr_ui(u,v)      jit_new_node_ww(jit_code_htonr_ui,u,v)
+#if __WORDSIZE == 32
+#  define jit_htonr(u,v)       jit_new_node_ww(jit_code_htonr_ui,u,v)
+#  define jit_ntohr(u,v)       jit_new_node_ww(jit_code_htonr_ui,u,v)
+#else
+#define jit_htonr_ul(u,v)      jit_new_node_ww(jit_code_htonr_ul,u,v)
+#define jit_ntohr_ul(u,v)      jit_new_node_ww(jit_code_htonr_ul,u,v)
+#  define jit_htonr(u,v)       jit_new_node_ww(jit_code_htonr_ul,u,v)
+#  define jit_ntohr(u,v)       jit_new_node_ww(jit_code_htonr_ul,u,v)
+#endif
+    jit_code_htonr_ui,         jit_code_htonr_ul,
+
+#define jit_ldr_c(u,v)         jit_new_node_ww(jit_code_ldr_c,u,v)
+#define jit_ldi_c(u,v)         jit_new_node_wp(jit_code_ldi_c,u,v)
+    jit_code_ldr_c,            jit_code_ldi_c,
+#define jit_ldr_uc(u,v)                jit_new_node_ww(jit_code_ldr_uc,u,v)
+#define jit_ldi_uc(u,v)                jit_new_node_wp(jit_code_ldi_uc,u,v)
+    jit_code_ldr_uc,           jit_code_ldi_uc,
+#define jit_ldr_s(u,v)         jit_new_node_ww(jit_code_ldr_s,u,v)
+#define jit_ldi_s(u,v)         jit_new_node_wp(jit_code_ldi_s,u,v)
+    jit_code_ldr_s,            jit_code_ldi_s,
+#define jit_ldr_us(u,v)                jit_new_node_ww(jit_code_ldr_us,u,v)
+#define jit_ldi_us(u,v)                jit_new_node_wp(jit_code_ldi_us,u,v)
+    jit_code_ldr_us,           jit_code_ldi_us,
+#define jit_ldr_i(u,v)         jit_new_node_ww(jit_code_ldr_i,u,v)
+#define jit_ldi_i(u,v)         jit_new_node_wp(jit_code_ldi_i,u,v)
+    jit_code_ldr_i,            jit_code_ldi_i,
+#if __WORDSIZE == 32
+#  define jit_ldr(u,v)         jit_ldr_i(u,v)
+#  define jit_ldi(u,v)         jit_ldi_i(u,v)
+#else
+#  define jit_ldr(u,v)         jit_ldr_l(u,v)
+#  define jit_ldi(u,v)         jit_ldi_l(u,v)
+#  define jit_ldr_ui(u,v)      jit_new_node_ww(jit_code_ldr_ui,u,v)
+#  define jit_ldi_ui(u,v)      jit_new_node_wp(jit_code_ldi_ui,u,v)
+#define jit_ldr_l(u,v)         jit_new_node_ww(jit_code_ldr_l,u,v)
+#define jit_ldi_l(u,v)         jit_new_node_wp(jit_code_ldi_l,u,v)
+#endif
+    jit_code_ldr_ui,           jit_code_ldi_ui,
+    jit_code_ldr_l,            jit_code_ldi_l,
+
+#define jit_ldxr_c(u,v,w)      jit_new_node_www(jit_code_ldxr_c,u,v,w)
+#define jit_ldxi_c(u,v,w)      jit_new_node_www(jit_code_ldxi_c,u,v,w)
+    jit_code_ldxr_c,           jit_code_ldxi_c,
+#define jit_ldxr_uc(u,v,w)     jit_new_node_www(jit_code_ldxr_uc,u,v,w)
+#define jit_ldxi_uc(u,v,w)     jit_new_node_www(jit_code_ldxi_uc,u,v,w)
+    jit_code_ldxr_uc,          jit_code_ldxi_uc,
+#define jit_ldxr_s(u,v,w)      jit_new_node_www(jit_code_ldxr_s,u,v,w)
+#define jit_ldxi_s(u,v,w)      jit_new_node_www(jit_code_ldxi_s,u,v,w)
+    jit_code_ldxr_s,           jit_code_ldxi_s,
+#define jit_ldxr_us(u,v,w)     jit_new_node_www(jit_code_ldxr_us,u,v,w)
+#define jit_ldxi_us(u,v,w)     jit_new_node_www(jit_code_ldxi_us,u,v,w)
+    jit_code_ldxr_us,          jit_code_ldxi_us,
+#define jit_ldxr_i(u,v,w)      jit_new_node_www(jit_code_ldxr_i,u,v,w)
+#define jit_ldxi_i(u,v,w)      jit_new_node_www(jit_code_ldxi_i,u,v,w)
+    jit_code_ldxr_i,           jit_code_ldxi_i,
+#if __WORDSIZE == 32
+#  define jit_ldxr(u,v,w)      jit_ldxr_i(u,v,w)
+#  define jit_ldxi(u,v,w)      jit_ldxi_i(u,v,w)
+#else
+#  define jit_ldxr_ui(u,v,w)   jit_new_node_www(jit_code_ldxr_ui,u,v,w)
+#  define jit_ldxi_ui(u,v,w)   jit_new_node_www(jit_code_ldxi_ui,u,v,w)
+#  define jit_ldxr_l(u,v,w)    jit_new_node_www(jit_code_ldxr_l,u,v,w)
+#  define jit_ldxi_l(u,v,w)    jit_new_node_www(jit_code_ldxi_l,u,v,w)
+#  define jit_ldxr(u,v,w)      jit_ldxr_l(u,v,w)
+#  define jit_ldxi(u,v,w)      jit_ldxi_l(u,v,w)
+#endif
+    jit_code_ldxr_ui,          jit_code_ldxi_ui,
+    jit_code_ldxr_l,           jit_code_ldxi_l,
+
+#define jit_str_c(u,v)         jit_new_node_ww(jit_code_str_c,u,v)
+#define jit_sti_c(u,v)         jit_new_node_pw(jit_code_sti_c,u,v)
+    jit_code_str_c,            jit_code_sti_c,
+#define jit_str_s(u,v)         jit_new_node_ww(jit_code_str_s,u,v)
+#define jit_sti_s(u,v)         jit_new_node_pw(jit_code_sti_s,u,v)
+    jit_code_str_s,            jit_code_sti_s,
+#define jit_str_i(u,v)         jit_new_node_ww(jit_code_str_i,u,v)
+#define jit_sti_i(u,v)         jit_new_node_pw(jit_code_sti_i,u,v)
+    jit_code_str_i,            jit_code_sti_i,
+#if __WORDSIZE == 32
+#  define jit_str(u,v)         jit_str_i(u,v)
+#  define jit_sti(u,v)         jit_sti_i(u,v)
+#else
+#  define jit_str(u,v)         jit_str_l(u,v)
+#  define jit_sti(u,v)         jit_sti_l(u,v)
+#  define jit_str_l(u,v)       jit_new_node_ww(jit_code_str_l,u,v)
+#  define jit_sti_l(u,v)       jit_new_node_pw(jit_code_sti_l,u,v)
+#endif
+    jit_code_str_l,            jit_code_sti_l,
+
+#define jit_stxr_c(u,v,w)      jit_new_node_www(jit_code_stxr_c,u,v,w)
+#define jit_stxi_c(u,v,w)      jit_new_node_www(jit_code_stxi_c,u,v,w)
+    jit_code_stxr_c,           jit_code_stxi_c,
+#define jit_stxr_s(u,v,w)      jit_new_node_www(jit_code_stxr_s,u,v,w)
+#define jit_stxi_s(u,v,w)      jit_new_node_www(jit_code_stxi_s,u,v,w)
+    jit_code_stxr_s,           jit_code_stxi_s,
+#define jit_stxr_i(u,v,w)      jit_new_node_www(jit_code_stxr_i,u,v,w)
+#define jit_stxi_i(u,v,w)      jit_new_node_www(jit_code_stxi_i,u,v,w)
+    jit_code_stxr_i,           jit_code_stxi_i,
+#if __WORDSIZE == 32
+#  define jit_stxr(u,v,w)      jit_stxr_i(u,v,w)
+#  define jit_stxi(u,v,w)      jit_stxi_i(u,v,w)
+#else
+#  define jit_stxr(u,v,w)      jit_stxr_l(u,v,w)
+#  define jit_stxi(u,v,w)      jit_stxi_l(u,v,w)
+#  define jit_stxr_l(u,v,w)    jit_new_node_www(jit_code_stxr_l,u,v,w)
+#  define jit_stxi_l(u,v,w)    jit_new_node_www(jit_code_stxi_l,u,v,w)
+#endif
+    jit_code_stxr_l,           jit_code_stxi_l,
+
+#define jit_bltr(v,w)          jit_new_node_pww(jit_code_bltr,NULL,v,w)
+#define jit_blti(v,w)          jit_new_node_pww(jit_code_blti,NULL,v,w)
+    jit_code_bltr,             jit_code_blti,
+#define jit_bltr_u(v,w)                jit_new_node_pww(jit_code_bltr_u,NULL,v,w)
+#define jit_blti_u(v,w)                jit_new_node_pww(jit_code_blti_u,NULL,v,w)
+    jit_code_bltr_u,           jit_code_blti_u,
+#define jit_bler(v,w)          jit_new_node_pww(jit_code_bler,NULL,v,w)
+#define jit_blei(v,w)          jit_new_node_pww(jit_code_blei,NULL,v,w)
+    jit_code_bler,             jit_code_blei,
+#define jit_bler_u(v,w)                jit_new_node_pww(jit_code_bler_u,NULL,v,w)
+#define jit_blei_u(v,w)                jit_new_node_pww(jit_code_blei_u,NULL,v,w)
+    jit_code_bler_u,           jit_code_blei_u,
+#define jit_beqr(v,w)          jit_new_node_pww(jit_code_beqr,NULL,v,w)
+#define jit_beqi(v,w)          jit_new_node_pww(jit_code_beqi,NULL,v,w)
+    jit_code_beqr,             jit_code_beqi,
+#define jit_bger(v,w)          jit_new_node_pww(jit_code_bger,NULL,v,w)
+#define jit_bgei(v,w)          jit_new_node_pww(jit_code_bgei,NULL,v,w)
+    jit_code_bger,             jit_code_bgei,
+#define jit_bger_u(v,w)                jit_new_node_pww(jit_code_bger_u,NULL,v,w)
+#define jit_bgei_u(v,w)                jit_new_node_pww(jit_code_bgei_u,NULL,v,w)
+    jit_code_bger_u,           jit_code_bgei_u,
+#define jit_bgtr(v,w)          jit_new_node_pww(jit_code_bgtr,NULL,v,w)
+#define jit_bgti(v,w)          jit_new_node_pww(jit_code_bgti,NULL,v,w)
+    jit_code_bgtr,             jit_code_bgti,
+#define jit_bgtr_u(v,w)                jit_new_node_pww(jit_code_bgtr_u,NULL,v,w)
+#define jit_bgti_u(v,w)                jit_new_node_pww(jit_code_bgti_u,NULL,v,w)
+    jit_code_bgtr_u,           jit_code_bgti_u,
+#define jit_bner(v,w)          jit_new_node_pww(jit_code_bner,NULL,v,w)
+#define jit_bnei(v,w)          jit_new_node_pww(jit_code_bnei,NULL,v,w)
+    jit_code_bner,             jit_code_bnei,
+
+#define jit_bmsr(v,w)          jit_new_node_pww(jit_code_bmsr,NULL,v,w)
+#define jit_bmsi(v,w)          jit_new_node_pww(jit_code_bmsi,NULL,v,w)
+    jit_code_bmsr,             jit_code_bmsi,
+#define jit_bmcr(v,w)          jit_new_node_pww(jit_code_bmcr,NULL,v,w)
+#define jit_bmci(v,w)          jit_new_node_pww(jit_code_bmci,NULL,v,w)
+    jit_code_bmcr,             jit_code_bmci,
+
+#define jit_boaddr(v,w)                jit_new_node_pww(jit_code_boaddr,NULL,v,w)
+#define jit_boaddi(v,w)                jit_new_node_pww(jit_code_boaddi,NULL,v,w)
+    jit_code_boaddr,           jit_code_boaddi,
+#define jit_boaddr_u(v,w)      jit_new_node_pww(jit_code_boaddr_u,NULL,v,w)
+#define jit_boaddi_u(v,w)      jit_new_node_pww(jit_code_boaddi_u,NULL,v,w)
+    jit_code_boaddr_u,         jit_code_boaddi_u,
+#define jit_bxaddr(v,w)                jit_new_node_pww(jit_code_bxaddr,NULL,v,w)
+#define jit_bxaddi(v,w)                jit_new_node_pww(jit_code_bxaddi,NULL,v,w)
+    jit_code_bxaddr,           jit_code_bxaddi,
+#define jit_bxaddr_u(v,w)      jit_new_node_pww(jit_code_bxaddr_u,NULL,v,w)
+#define jit_bxaddi_u(v,w)      jit_new_node_pww(jit_code_bxaddi_u,NULL,v,w)
+    jit_code_bxaddr_u,         jit_code_bxaddi_u,
+#define jit_bosubr(v,w)                jit_new_node_pww(jit_code_bosubr,NULL,v,w)
+#define jit_bosubi(v,w)                jit_new_node_pww(jit_code_bosubi,NULL,v,w)
+    jit_code_bosubr,           jit_code_bosubi,
+#define jit_bosubr_u(v,w)      jit_new_node_pww(jit_code_bosubr_u,NULL,v,w)
+#define jit_bosubi_u(v,w)      jit_new_node_pww(jit_code_bosubi_u,NULL,v,w)
+    jit_code_bosubr_u,         jit_code_bosubi_u,
+#define jit_bxsubr(v,w)                jit_new_node_pww(jit_code_bxsubr,NULL,v,w)
+#define jit_bxsubi(v,w)                jit_new_node_pww(jit_code_bxsubi,NULL,v,w)
+    jit_code_bxsubr,           jit_code_bxsubi,
+#define jit_bxsubr_u(v,w)      jit_new_node_pww(jit_code_bxsubr_u,NULL,v,w)
+#define jit_bxsubi_u(v,w)      jit_new_node_pww(jit_code_bxsubi_u,NULL,v,w)
+    jit_code_bxsubr_u,         jit_code_bxsubi_u,
+
+#define jit_jmpr(u)            jit_new_node_w(jit_code_jmpr,u)
+#define jit_jmpi()             jit_new_node_p(jit_code_jmpi,NULL)
+    jit_code_jmpr,             jit_code_jmpi,
+#define jit_callr(u)           jit_new_node_w(jit_code_callr,u)
+#define jit_calli(u)           jit_new_node_p(jit_code_calli,u)
+    jit_code_callr,            jit_code_calli,
+
+#define jit_prepare()          _jit_prepare(_jit)
+    jit_code_prepare,
+#define jit_pushargr(u)                _jit_pushargr(_jit,u)
+#define jit_pushargi(u)                _jit_pushargi(_jit,u)
+    jit_code_pushargr,         jit_code_pushargi,
+#define jit_finishr(u)         _jit_finishr(_jit,u)
+#define jit_finishi(u)         _jit_finishi(_jit,u)
+    jit_code_finishr,          jit_code_finishi,
+#define jit_ret()              _jit_ret(_jit)
+    jit_code_ret,
+#define jit_retr(u)            _jit_retr(_jit,u)
+#define jit_reti(u)            _jit_reti(_jit,u)
+    jit_code_retr,             jit_code_reti,
+#define jit_retval_c(u)                _jit_retval_c(_jit,u)
+#define jit_retval_uc(u)       _jit_retval_uc(_jit,u)
+    jit_code_retval_c,         jit_code_retval_uc,
+#define jit_retval_s(u)                _jit_retval_s(_jit,u)
+#define jit_retval_us(u)       _jit_retval_us(_jit,u)
+    jit_code_retval_s,         jit_code_retval_us,
+#define jit_retval_i(u)                _jit_retval_i(_jit,u)
+#if __WORDSIZE == 32
+#  define jit_retval(u)                jit_retval_i(u)
+#else
+#  define jit_retval(u)                jit_retval_l(u)
+#  define jit_retval_ui(u)     _jit_retval_ui(_jit,u)
+#  define jit_retval_l(u)      _jit_retval_l(_jit,u)
+#endif
+    jit_code_retval_i,         jit_code_retval_ui,
+    jit_code_retval_l,
+
+#define jit_epilog()           _jit_epilog(_jit)
+    jit_code_epilog,
+
+#define jit_arg_f()            _jit_arg_f(_jit)
+    jit_code_arg_f,
+#define jit_getarg_f(u,v)      _jit_getarg_f(_jit,u,v)
+    jit_code_getarg_f,
+#define jit_putargr_f(u,v)     _jit_putargr_f(_jit,u,v)
+#define jit_putargi_f(u,v)     _jit_putargi_f(_jit,u,v)
+    jit_code_putargr_f,                jit_code_putargi_f,
+
+#define jit_addr_f(u,v,w)      jit_new_node_www(jit_code_addr_f,u,v,w)
+#define jit_addi_f(u,v,w)      jit_new_node_wwf(jit_code_addi_f,u,v,w)
+    jit_code_addr_f,           jit_code_addi_f,
+#define jit_subr_f(u,v,w)      jit_new_node_www(jit_code_subr_f,u,v,w)
+#define jit_subi_f(u,v,w)      jit_new_node_wwf(jit_code_subi_f,u,v,w)
+    jit_code_subr_f,           jit_code_subi_f,
+#define jit_rsbr_f(u,v,w)      jit_subr_f(u,w,v)
+#define jit_rsbi_f(u,v,w)      jit_new_node_wwf(jit_code_rsbi_f,u,v,w)
+    jit_code_rsbi_f,
+#define jit_mulr_f(u,v,w)      jit_new_node_www(jit_code_mulr_f,u,v,w)
+#define jit_muli_f(u,v,w)      jit_new_node_wwf(jit_code_muli_f,u,v,w)
+    jit_code_mulr_f,           jit_code_muli_f,
+#define jit_divr_f(u,v,w)      jit_new_node_www(jit_code_divr_f,u,v,w)
+#define jit_divi_f(u,v,w)      jit_new_node_wwf(jit_code_divi_f,u,v,w)
+    jit_code_divr_f,           jit_code_divi_f,
+#define jit_negr_f(u,v)                jit_new_node_ww(jit_code_negr_f,u,v)
+#define jit_absr_f(u,v)                jit_new_node_ww(jit_code_absr_f,u,v)
+#define jit_sqrtr_f(u,v)       jit_new_node_ww(jit_code_sqrtr_f,u,v)
+    jit_code_negr_f,           jit_code_absr_f,        jit_code_sqrtr_f,
+
+#define jit_ltr_f(u,v,w)       jit_new_node_www(jit_code_ltr_f,u,v,w)
+#define jit_lti_f(u,v,w)       jit_new_node_wwf(jit_code_lti_f,u,v,w)
+    jit_code_ltr_f,            jit_code_lti_f,
+#define jit_ler_f(u,v,w)       jit_new_node_www(jit_code_ler_f,u,v,w)
+#define jit_lei_f(u,v,w)       jit_new_node_wwf(jit_code_lei_f,u,v,w)
+    jit_code_ler_f,            jit_code_lei_f,
+#define jit_eqr_f(u,v,w)       jit_new_node_www(jit_code_eqr_f,u,v,w)
+#define jit_eqi_f(u,v,w)       jit_new_node_wwf(jit_code_eqi_f,u,v,w)
+    jit_code_eqr_f,            jit_code_eqi_f,
+#define jit_ger_f(u,v,w)       jit_new_node_www(jit_code_ger_f,u,v,w)
+#define jit_gei_f(u,v,w)       jit_new_node_wwf(jit_code_gei_f,u,v,w)
+    jit_code_ger_f,            jit_code_gei_f,
+#define jit_gtr_f(u,v,w)       jit_new_node_www(jit_code_gtr_f,u,v,w)
+#define jit_gti_f(u,v,w)       jit_new_node_wwf(jit_code_gti_f,u,v,w)
+    jit_code_gtr_f,            jit_code_gti_f,
+#define jit_ner_f(u,v,w)       jit_new_node_www(jit_code_ner_f,u,v,w)
+#define jit_nei_f(u,v,w)       jit_new_node_wwf(jit_code_nei_f,u,v,w)
+    jit_code_ner_f,            jit_code_nei_f,
+#define jit_unltr_f(u,v,w)     jit_new_node_www(jit_code_unltr_f,u,v,w)
+#define jit_unlti_f(u,v,w)     jit_new_node_wwf(jit_code_unlti_f,u,v,w)
+    jit_code_unltr_f,          jit_code_unlti_f,
+#define jit_unler_f(u,v,w)     jit_new_node_www(jit_code_unler_f,u,v,w)
+#define jit_unlei_f(u,v,w)     jit_new_node_wwf(jit_code_unlei_f,u,v,w)
+    jit_code_unler_f,          jit_code_unlei_f,
+#define jit_uneqr_f(u,v,w)     jit_new_node_www(jit_code_uneqr_f,u,v,w)
+#define jit_uneqi_f(u,v,w)     jit_new_node_wwf(jit_code_uneqi_f,u,v,w)
+    jit_code_uneqr_f,          jit_code_uneqi_f,
+#define jit_unger_f(u,v,w)     jit_new_node_www(jit_code_unger_f,u,v,w)
+#define jit_ungei_f(u,v,w)     jit_new_node_wwf(jit_code_ungei_f,u,v,w)
+    jit_code_unger_f,          jit_code_ungei_f,
+#define jit_ungtr_f(u,v,w)     jit_new_node_www(jit_code_ungtr_f,u,v,w)
+#define jit_ungti_f(u,v,w)     jit_new_node_wwf(jit_code_ungti_f,u,v,w)
+    jit_code_ungtr_f,          jit_code_ungti_f,
+#define jit_ltgtr_f(u,v,w)     jit_new_node_www(jit_code_ltgtr_f,u,v,w)
+#define jit_ltgti_f(u,v,w)     jit_new_node_wwf(jit_code_ltgti_f,u,v,w)
+    jit_code_ltgtr_f,          jit_code_ltgti_f,
+#define jit_ordr_f(u,v,w)      jit_new_node_www(jit_code_ordr_f,u,v,w)
+#define jit_ordi_f(u,v,w)      jit_new_node_wwf(jit_code_ordi_f,u,v,w)
+    jit_code_ordr_f,           jit_code_ordi_f,
+#define jit_unordr_f(u,v,w)    jit_new_node_www(jit_code_unordr_f,u,v,w)
+#define jit_unordi_f(u,v,w)    jit_new_node_wwf(jit_code_unordi_f,u,v,w)
+    jit_code_unordr_f,         jit_code_unordi_f,
+
+#define jit_truncr_f_i(u,v)    jit_new_node_ww(jit_code_truncr_f_i,u,v)
+    jit_code_truncr_f_i,
+#if __WORDSIZE == 32
+#  define jit_truncr_f(u,v)    jit_truncr_f_i(u,v)
+#else
+#  define jit_truncr_f(u,v)    jit_truncr_f_l(u,v)
+#  define jit_truncr_f_l(u,v)  jit_new_node_ww(jit_code_truncr_f_l,u,v)
+#endif
+    jit_code_truncr_f_l,
+#define jit_extr_f(u,v)                jit_new_node_ww(jit_code_extr_f,u,v)
+#define jit_extr_d_f(u,v)      jit_new_node_ww(jit_code_extr_d_f,u,v)
+    jit_code_extr_f,           jit_code_extr_d_f,
+#define jit_movr_f(u,v)                jit_new_node_ww(jit_code_movr_f,u,v)
+#define jit_movi_f(u,v)                jit_new_node_wf(jit_code_movi_f,u,v)
+    jit_code_movr_f,           jit_code_movi_f,
+
+#define jit_ldr_f(u,v)         jit_new_node_ww(jit_code_ldr_f,u,v)
+#define jit_ldi_f(u,v)         jit_new_node_wp(jit_code_ldi_f,u,v)
+    jit_code_ldr_f,            jit_code_ldi_f,
+#define jit_ldxr_f(u,v,w)      jit_new_node_www(jit_code_ldxr_f,u,v,w)
+#define jit_ldxi_f(u,v,w)      jit_new_node_www(jit_code_ldxi_f,u,v,w)
+    jit_code_ldxr_f,           jit_code_ldxi_f,
+#define jit_str_f(u,v)         jit_new_node_ww(jit_code_str_f,u,v)
+#define jit_sti_f(u,v)         jit_new_node_pw(jit_code_sti_f,u,v)
+    jit_code_str_f,            jit_code_sti_f,
+#define jit_stxr_f(u,v,w)      jit_new_node_www(jit_code_stxr_f,u,v,w)
+#define jit_stxi_f(u,v,w)      jit_new_node_www(jit_code_stxi_f,u,v,w)
+    jit_code_stxr_f,           jit_code_stxi_f,
+
+#define jit_bltr_f(v,w)                jit_new_node_pww(jit_code_bltr_f,NULL,v,w)
+#define jit_blti_f(v,w)                jit_new_node_pwf(jit_code_blti_f,NULL,v,w)
+    jit_code_bltr_f,           jit_code_blti_f,
+#define jit_bler_f(v,w)                jit_new_node_pww(jit_code_bler_f,NULL,v,w)
+#define jit_blei_f(v,w)                jit_new_node_pwf(jit_code_blei_f,NULL,v,w)
+    jit_code_bler_f,           jit_code_blei_f,
+#define jit_beqr_f(v,w)                jit_new_node_pww(jit_code_beqr_f,NULL,v,w)
+#define jit_beqi_f(v,w)                jit_new_node_pwf(jit_code_beqi_f,NULL,v,w)
+    jit_code_beqr_f,           jit_code_beqi_f,
+#define jit_bger_f(v,w)                jit_new_node_pww(jit_code_bger_f,NULL,v,w)
+#define jit_bgei_f(v,w)                jit_new_node_pwf(jit_code_bgei_f,NULL,v,w)
+    jit_code_bger_f,           jit_code_bgei_f,
+#define jit_bgtr_f(v,w)                jit_new_node_pww(jit_code_bgtr_f,NULL,v,w)
+#define jit_bgti_f(v,w)                jit_new_node_pwf(jit_code_bgti_f,NULL,v,w)
+    jit_code_bgtr_f,           jit_code_bgti_f,
+#define jit_bner_f(v,w)                jit_new_node_pww(jit_code_bner_f,NULL,v,w)
+#define jit_bnei_f(v,w)                jit_new_node_pwf(jit_code_bnei_f,NULL,v,w)
+    jit_code_bner_f,           jit_code_bnei_f,
+#define jit_bunltr_f(v,w)      jit_new_node_pww(jit_code_bunltr_f,NULL,v,w)
+#define jit_bunlti_f(v,w)      jit_new_node_pwf(jit_code_bunlti_f,NULL,v,w)
+    jit_code_bunltr_f,         jit_code_bunlti_f,
+#define jit_bunler_f(v,w)      jit_new_node_pww(jit_code_bunler_f,NULL,v,w)
+#define jit_bunlei_f(v,w)      jit_new_node_pwf(jit_code_bunlei_f,NULL,v,w)
+    jit_code_bunler_f,         jit_code_bunlei_f,
+#define jit_buneqr_f(v,w)      jit_new_node_pww(jit_code_buneqr_f,NULL,v,w)
+#define jit_buneqi_f(v,w)      jit_new_node_pwf(jit_code_buneqi_f,NULL,v,w)
+    jit_code_buneqr_f,         jit_code_buneqi_f,
+#define jit_bunger_f(v,w)      jit_new_node_pww(jit_code_bunger_f,NULL,v,w)
+#define jit_bungei_f(v,w)      jit_new_node_pwf(jit_code_bungei_f,NULL,v,w)
+    jit_code_bunger_f,         jit_code_bungei_f,
+#define jit_bungtr_f(v,w)      jit_new_node_pww(jit_code_bungtr_f,NULL,v,w)
+#define jit_bungti_f(v,w)      jit_new_node_pwf(jit_code_bungti_f,NULL,v,w)
+    jit_code_bungtr_f,         jit_code_bungti_f,
+#define jit_bltgtr_f(v,w)      jit_new_node_pww(jit_code_bltgtr_f,NULL,v,w)
+#define jit_bltgti_f(v,w)      jit_new_node_pwf(jit_code_bltgti_f,NULL,v,w)
+    jit_code_bltgtr_f,         jit_code_bltgti_f,
+#define jit_bordr_f(v,w)       jit_new_node_pww(jit_code_bordr_f,NULL,v,w)
+#define jit_bordi_f(v,w)       jit_new_node_pwf(jit_code_bordi_f,NULL,v,w)
+    jit_code_bordr_f,          jit_code_bordi_f,
+#define jit_bunordr_f(v,w)     jit_new_node_pww(jit_code_bunordr_f,NULL,v,w)
+#define jit_bunordi_f(v,w)     jit_new_node_pwf(jit_code_bunordi_f,NULL,v,w)
+    jit_code_bunordr_f,                jit_code_bunordi_f,
+
+#define jit_pushargr_f(u)      _jit_pushargr_f(_jit,u)
+#define jit_pushargi_f(u)      _jit_pushargi_f(_jit,u)
+    jit_code_pushargr_f,       jit_code_pushargi_f,
+#define jit_retr_f(u)          _jit_retr_f(_jit,u)
+#define jit_reti_f(u)          _jit_reti_f(_jit,u)
+    jit_code_retr_f,           jit_code_reti_f,
+#define jit_retval_f(u)                _jit_retval_f(_jit,u)
+    jit_code_retval_f,
+
+#define jit_arg_d()            _jit_arg_d(_jit)
+    jit_code_arg_d,
+#define jit_getarg_d(u,v)      _jit_getarg_d(_jit,u,v)
+    jit_code_getarg_d,
+#define jit_putargr_d(u,v)     _jit_putargr_d(_jit,u,v)
+#define jit_putargi_d(u,v)     _jit_putargi_d(_jit,u,v)
+    jit_code_putargr_d,                jit_code_putargi_d,
+
+#define jit_addr_d(u,v,w)      jit_new_node_www(jit_code_addr_d,u,v,w)
+#define jit_addi_d(u,v,w)      jit_new_node_wwd(jit_code_addi_d,u,v,w)
+    jit_code_addr_d,           jit_code_addi_d,
+#define jit_subr_d(u,v,w)      jit_new_node_www(jit_code_subr_d,u,v,w)
+#define jit_subi_d(u,v,w)      jit_new_node_wwd(jit_code_subi_d,u,v,w)
+    jit_code_subr_d,           jit_code_subi_d,
+#define jit_rsbr_d(u,v,w)      jit_subr_d(u,w,v)
+#define jit_rsbi_d(u,v,w)      jit_new_node_wwd(jit_code_rsbi_d,u,v,w)
+    jit_code_rsbi_d,
+#define jit_mulr_d(u,v,w)      jit_new_node_www(jit_code_mulr_d,u,v,w)
+#define jit_muli_d(u,v,w)      jit_new_node_wwd(jit_code_muli_d,u,v,w)
+    jit_code_mulr_d,           jit_code_muli_d,
+#define jit_divr_d(u,v,w)      jit_new_node_www(jit_code_divr_d,u,v,w)
+#define jit_divi_d(u,v,w)      jit_new_node_wwd(jit_code_divi_d,u,v,w)
+    jit_code_divr_d,           jit_code_divi_d,
+
+#define jit_negr_d(u,v)                jit_new_node_ww(jit_code_negr_d,u,v)
+#define jit_absr_d(u,v)                jit_new_node_ww(jit_code_absr_d,u,v)
+#define jit_sqrtr_d(u,v)       jit_new_node_ww(jit_code_sqrtr_d,u,v)
+    jit_code_negr_d,           jit_code_absr_d,        jit_code_sqrtr_d,
+
+#define jit_ltr_d(u,v,w)       jit_new_node_www(jit_code_ltr_d,u,v,w)
+#define jit_lti_d(u,v,w)       jit_new_node_wwd(jit_code_lti_d,u,v,w)
+    jit_code_ltr_d,            jit_code_lti_d,
+#define jit_ler_d(u,v,w)       jit_new_node_www(jit_code_ler_d,u,v,w)
+#define jit_lei_d(u,v,w)       jit_new_node_wwd(jit_code_lei_d,u,v,w)
+    jit_code_ler_d,            jit_code_lei_d,
+#define jit_eqr_d(u,v,w)       jit_new_node_www(jit_code_eqr_d,u,v,w)
+#define jit_eqi_d(u,v,w)       jit_new_node_wwd(jit_code_eqi_d,u,v,w)
+    jit_code_eqr_d,            jit_code_eqi_d,
+#define jit_ger_d(u,v,w)       jit_new_node_www(jit_code_ger_d,u,v,w)
+#define jit_gei_d(u,v,w)       jit_new_node_wwd(jit_code_gei_d,u,v,w)
+    jit_code_ger_d,            jit_code_gei_d,
+#define jit_gtr_d(u,v,w)       jit_new_node_www(jit_code_gtr_d,u,v,w)
+#define jit_gti_d(u,v,w)       jit_new_node_wwd(jit_code_gti_d,u,v,w)
+    jit_code_gtr_d,            jit_code_gti_d,
+#define jit_ner_d(u,v,w)       jit_new_node_www(jit_code_ner_d,u,v,w)
+#define jit_nei_d(u,v,w)       jit_new_node_wwd(jit_code_nei_d,u,v,w)
+    jit_code_ner_d,            jit_code_nei_d,
+#define jit_unltr_d(u,v,w)     jit_new_node_www(jit_code_unltr_d,u,v,w)
+#define jit_unlti_d(u,v,w)     jit_new_node_wwd(jit_code_unlti_d,u,v,w)
+    jit_code_unltr_d,          jit_code_unlti_d,
+#define jit_unler_d(u,v,w)     jit_new_node_www(jit_code_unler_d,u,v,w)
+#define jit_unlei_d(u,v,w)     jit_new_node_wwd(jit_code_unlei_d,u,v,w)
+    jit_code_unler_d,          jit_code_unlei_d,
+#define jit_uneqr_d(u,v,w)     jit_new_node_www(jit_code_uneqr_d,u,v,w)
+#define jit_uneqi_d(u,v,w)     jit_new_node_wwd(jit_code_uneqi_d,u,v,w)
+    jit_code_uneqr_d,          jit_code_uneqi_d,
+#define jit_unger_d(u,v,w)     jit_new_node_www(jit_code_unger_d,u,v,w)
+#define jit_ungei_d(u,v,w)     jit_new_node_wwd(jit_code_ungei_d,u,v,w)
+    jit_code_unger_d,          jit_code_ungei_d,
+#define jit_ungtr_d(u,v,w)     jit_new_node_www(jit_code_ungtr_d,u,v,w)
+#define jit_ungti_d(u,v,w)     jit_new_node_wwd(jit_code_ungti_d,u,v,w)
+    jit_code_ungtr_d,          jit_code_ungti_d,
+#define jit_ltgtr_d(u,v,w)     jit_new_node_www(jit_code_ltgtr_d,u,v,w)
+#define jit_ltgti_d(u,v,w)     jit_new_node_wwd(jit_code_ltgti_d,u,v,w)
+    jit_code_ltgtr_d,          jit_code_ltgti_d,
+#define jit_ordr_d(u,v,w)      jit_new_node_www(jit_code_ordr_d,u,v,w)
+#define jit_ordi_d(u,v,w)      jit_new_node_wwd(jit_code_ordi_d,u,v,w)
+    jit_code_ordr_d,           jit_code_ordi_d,
+#define jit_unordr_d(u,v,w)    jit_new_node_www(jit_code_unordr_d,u,v,w)
+#define jit_unordi_d(u,v,w)    jit_new_node_wwd(jit_code_unordi_d,u,v,w)
+    jit_code_unordr_d,         jit_code_unordi_d,
+
+#define jit_truncr_d_i(u,v)    jit_new_node_ww(jit_code_truncr_d_i,u,v)
+    jit_code_truncr_d_i,
+#if __WORDSIZE == 32
+#  define jit_truncr_d(u,v)    jit_truncr_d_i(u,v)
+#else
+#  define jit_truncr_d(u,v)    jit_truncr_d_l(u,v)
+#  define jit_truncr_d_l(u,v)  jit_new_node_ww(jit_code_truncr_d_l,u,v)
+#endif
+    jit_code_truncr_d_l,
+#define jit_extr_d(u,v)                jit_new_node_ww(jit_code_extr_d,u,v)
+#define jit_extr_f_d(u,v)      jit_new_node_ww(jit_code_extr_f_d,u,v)
+    jit_code_extr_d,           jit_code_extr_f_d,
+#define jit_movr_d(u,v)                jit_new_node_ww(jit_code_movr_d,u,v)
+#define jit_movi_d(u,v)                jit_new_node_wd(jit_code_movi_d,u,v)
+    jit_code_movr_d,           jit_code_movi_d,
+
+#define jit_ldr_d(u,v)         jit_new_node_ww(jit_code_ldr_d,u,v)
+#define jit_ldi_d(u,v)         jit_new_node_wp(jit_code_ldi_d,u,v)
+    jit_code_ldr_d,            jit_code_ldi_d,
+#define jit_ldxr_d(u,v,w)      jit_new_node_www(jit_code_ldxr_d,u,v,w)
+#define jit_ldxi_d(u,v,w)      jit_new_node_www(jit_code_ldxi_d,u,v,w)
+    jit_code_ldxr_d,           jit_code_ldxi_d,
+#define jit_str_d(u,v)         jit_new_node_ww(jit_code_str_d,u,v)
+#define jit_sti_d(u,v)         jit_new_node_pw(jit_code_sti_d,u,v)
+    jit_code_str_d,            jit_code_sti_d,
+#define jit_stxr_d(u,v,w)      jit_new_node_www(jit_code_stxr_d,u,v,w)
+#define jit_stxi_d(u,v,w)      jit_new_node_www(jit_code_stxi_d,u,v,w)
+    jit_code_stxr_d,           jit_code_stxi_d,
+
+#define jit_bltr_d(v,w)                jit_new_node_pww(jit_code_bltr_d,NULL,v,w)
+#define jit_blti_d(v,w)                jit_new_node_pwd(jit_code_blti_d,NULL,v,w)
+    jit_code_bltr_d,           jit_code_blti_d,
+#define jit_bler_d(v,w)                jit_new_node_pww(jit_code_bler_d,NULL,v,w)
+#define jit_blei_d(v,w)                jit_new_node_pwd(jit_code_blei_d,NULL,v,w)
+    jit_code_bler_d,           jit_code_blei_d,
+#define jit_beqr_d(v,w)                jit_new_node_pww(jit_code_beqr_d,NULL,v,w)
+#define jit_beqi_d(v,w)                jit_new_node_pwd(jit_code_beqi_d,NULL,v,w)
+    jit_code_beqr_d,           jit_code_beqi_d,
+#define jit_bger_d(v,w)                jit_new_node_pww(jit_code_bger_d,NULL,v,w)
+#define jit_bgei_d(v,w)                jit_new_node_pwd(jit_code_bgei_d,NULL,v,w)
+    jit_code_bger_d,           jit_code_bgei_d,
+#define jit_bgtr_d(v,w)                jit_new_node_pww(jit_code_bgtr_d,NULL,v,w)
+#define jit_bgti_d(v,w)                jit_new_node_pwd(jit_code_bgti_d,NULL,v,w)
+    jit_code_bgtr_d,           jit_code_bgti_d,
+#define jit_bner_d(v,w)                jit_new_node_pww(jit_code_bner_d,NULL,v,w)
+#define jit_bnei_d(v,w)                jit_new_node_pwd(jit_code_bnei_d,NULL,v,w)
+    jit_code_bner_d,           jit_code_bnei_d,
+#define jit_bunltr_d(v,w)      jit_new_node_pww(jit_code_bunltr_d,NULL,v,w)
+#define jit_bunlti_d(v,w)      jit_new_node_pwd(jit_code_bunlti_d,NULL,v,w)
+    jit_code_bunltr_d,         jit_code_bunlti_d,
+#define jit_bunler_d(v,w)      jit_new_node_pww(jit_code_bunler_d,NULL,v,w)
+#define jit_bunlei_d(v,w)      jit_new_node_pwd(jit_code_bunlei_d,NULL,v,w)
+    jit_code_bunler_d,         jit_code_bunlei_d,
+#define jit_buneqr_d(v,w)      jit_new_node_pww(jit_code_buneqr_d,NULL,v,w)
+#define jit_buneqi_d(v,w)      jit_new_node_pwd(jit_code_buneqi_d,NULL,v,w)
+    jit_code_buneqr_d,         jit_code_buneqi_d,
+#define jit_bunger_d(v,w)      jit_new_node_pww(jit_code_bunger_d,NULL,v,w)
+#define jit_bungei_d(v,w)      jit_new_node_pwd(jit_code_bungei_d,NULL,v,w)
+    jit_code_bunger_d,         jit_code_bungei_d,
+#define jit_bungtr_d(v,w)      jit_new_node_pww(jit_code_bungtr_d,NULL,v,w)
+#define jit_bungti_d(v,w)      jit_new_node_pwd(jit_code_bungti_d,NULL,v,w)
+    jit_code_bungtr_d,         jit_code_bungti_d,
+#define jit_bltgtr_d(v,w)      jit_new_node_pww(jit_code_bltgtr_d,NULL,v,w)
+#define jit_bltgti_d(v,w)      jit_new_node_pwd(jit_code_bltgti_d,NULL,v,w)
+    jit_code_bltgtr_d,         jit_code_bltgti_d,
+#define jit_bordr_d(v,w)       jit_new_node_pww(jit_code_bordr_d,NULL,v,w)
+#define jit_bordi_d(v,w)       jit_new_node_pwd(jit_code_bordi_d,NULL,v,w)
+    jit_code_bordr_d,          jit_code_bordi_d,
+#define jit_bunordr_d(v,w)     jit_new_node_pww(jit_code_bunordr_d,NULL,v,w)
+#define jit_bunordi_d(v,w)     jit_new_node_pwd(jit_code_bunordi_d,NULL,v,w)
+    jit_code_bunordr_d,                jit_code_bunordi_d,
+
+#define jit_pushargr_d(u)      _jit_pushargr_d(_jit,u)
+#define jit_pushargi_d(u)      _jit_pushargi_d(_jit,u)
+    jit_code_pushargr_d,       jit_code_pushargi_d,
+#define jit_retr_d(u)          _jit_retr_d(_jit,u)
+#define jit_reti_d(u)          _jit_reti_d(_jit,u)
+    jit_code_retr_d,           jit_code_reti_d,
+#define jit_retval_d(u)                _jit_retval_d(_jit,u)
+    jit_code_retval_d,
+
+    /* Special internal backend specific codes */
+    jit_code_movr_w_f,         jit_code_movr_ww_d,     /* w* -> f|d */
+#define jit_movr_w_f(u, v)     jit_new_node_ww(jit_code_movr_w_f, u, v)
+#define jit_movr_ww_d(u, v, w) jit_new_node_www(jit_code_movr_ww_d, u, v, w)
+    jit_code_movr_w_d,                                 /* w -> d */
+#define jit_movr_w_d(u, v)     jit_new_node_ww(jit_code_movr_w_d, u, v)
+
+    jit_code_movr_f_w,         jit_code_movi_f_w,      /* f|d -> w* */
+#define jit_movr_f_w(u, v)     jit_new_node_ww(jit_code_movr_f_w, u, v)
+#define jit_movi_f_w(u, v)     jit_new_node_wf(jit_code_movi_f_w, u, v)
+    jit_code_movr_d_ww,                jit_code_movi_d_ww,
+#define jit_movr_d_ww(u, v, w) jit_new_node_www(jit_code_movr_d_ww, u, v, w)
+#define jit_movi_d_ww(u, v, w) jit_new_node_wwd(jit_code_movi_d_ww, u, v, w)
+
+    jit_code_movr_d_w,         jit_code_movi_d_w,      /* d -> w */
+#define jit_movr_d_w(u, v)     jit_new_node_ww(jit_code_movr_d_w, u, v)
+#define jit_movi_d_w(u, v)     jit_new_node_wd(jit_code_movi_d_w, u, v)
+
+    jit_code_last_code
+} jit_code_t;
+
+typedef void* (*jit_alloc_func_ptr)    (size_t);
+typedef void* (*jit_realloc_func_ptr)  (void*, size_t);
+typedef void  (*jit_free_func_ptr)     (void*);
+
+/*
+ * Prototypes
+ */
+extern void init_jit(const char*);
+extern void finish_jit(void);
+
+extern jit_state_t *jit_new_state(void);
+#define jit_clear_state()      _jit_clear_state(_jit)
+extern void _jit_clear_state(jit_state_t*);
+#define jit_destroy_state()    _jit_destroy_state(_jit)
+extern void _jit_destroy_state(jit_state_t*);
+
+#define jit_address(node)      _jit_address(_jit, node)
+extern jit_pointer_t _jit_address(jit_state_t*, jit_node_t*);
+extern jit_node_t *_jit_name(jit_state_t*, const char*);
+extern jit_node_t *_jit_note(jit_state_t*, const char*, int);
+extern jit_node_t *_jit_label(jit_state_t*);
+extern jit_node_t *_jit_forward(jit_state_t*);
+extern jit_node_t *_jit_indirect(jit_state_t*);
+extern void _jit_link(jit_state_t*, jit_node_t*);
+#define jit_forward_p(u)       _jit_forward_p(_jit,u)
+extern jit_bool_t _jit_forward_p(jit_state_t*,jit_node_t*);
+#define jit_indirect_p(u)      _jit_indirect_p(_jit,u)
+extern jit_bool_t _jit_indirect_p(jit_state_t*,jit_node_t*);
+#define jit_target_p(u)                _jit_target_p(_jit,u)
+extern jit_bool_t _jit_target_p(jit_state_t*,jit_node_t*);
+
+extern void _jit_prolog(jit_state_t*);
+
+extern jit_int32_t _jit_allocai(jit_state_t*, jit_int32_t);
+extern void _jit_allocar(jit_state_t*, jit_int32_t, jit_int32_t);
+extern void _jit_ellipsis(jit_state_t*);
+
+extern jit_node_t *_jit_arg(jit_state_t*);
+extern void _jit_getarg_c(jit_state_t*, jit_gpr_t, jit_node_t*);
+extern void _jit_getarg_uc(jit_state_t*, jit_gpr_t, jit_node_t*);
+extern void _jit_getarg_s(jit_state_t*, jit_gpr_t, jit_node_t*);
+extern void _jit_getarg_us(jit_state_t*, jit_gpr_t, jit_node_t*);
+extern void _jit_getarg_i(jit_state_t*, jit_gpr_t, jit_node_t*);
+#if __WORDSIZE == 64
+extern void _jit_getarg_ui(jit_state_t*, jit_gpr_t, jit_node_t*);
+extern void _jit_getarg_l(jit_state_t*, jit_gpr_t, jit_node_t*);
+#endif
+extern void _jit_putargr(jit_state_t*, jit_gpr_t, jit_node_t*);
+extern void _jit_putargi(jit_state_t*, jit_word_t, jit_node_t*);
+
+extern void _jit_prepare(jit_state_t*);
+extern void _jit_ellipsis(jit_state_t*);
+extern void _jit_va_push(jit_state_t*, jit_gpr_t);
+extern void _jit_pushargr(jit_state_t*, jit_gpr_t);
+extern void _jit_pushargi(jit_state_t*, jit_word_t);
+extern void _jit_finishr(jit_state_t*, jit_gpr_t);
+extern jit_node_t *_jit_finishi(jit_state_t*, jit_pointer_t);
+extern void _jit_ret(jit_state_t*);
+extern void _jit_retr(jit_state_t*, jit_gpr_t);
+extern void _jit_reti(jit_state_t*, jit_word_t);
+extern void _jit_retval_c(jit_state_t*, jit_gpr_t);
+extern void _jit_retval_uc(jit_state_t*, jit_gpr_t);
+extern void _jit_retval_s(jit_state_t*, jit_gpr_t);
+extern void _jit_retval_us(jit_state_t*, jit_gpr_t);
+extern void _jit_retval_i(jit_state_t*, jit_gpr_t);
+#if __WORDSIZE == 64
+extern void _jit_retval_ui(jit_state_t*, jit_gpr_t);
+extern void _jit_retval_l(jit_state_t*, jit_gpr_t);
+#endif
+extern void _jit_epilog(jit_state_t*);
+
+#define jit_patch(u)           _jit_patch(_jit,u)
+extern void _jit_patch(jit_state_t*, jit_node_t*);
+#define jit_patch_at(u,v)      _jit_patch_at(_jit,u,v)
+extern void _jit_patch_at(jit_state_t*, jit_node_t*, jit_node_t*);
+#define jit_patch_abs(u,v)     _jit_patch_abs(_jit,u,v)
+extern void _jit_patch_abs(jit_state_t*, jit_node_t*, jit_pointer_t);
+#define jit_realize()          _jit_realize(_jit)
+extern void _jit_realize(jit_state_t*);
+#define jit_get_code(u)                _jit_get_code(_jit,u)
+extern jit_pointer_t _jit_get_code(jit_state_t*, jit_word_t*);
+#define jit_set_code(u,v)      _jit_set_code(_jit,u,v)
+extern void _jit_set_code(jit_state_t*, jit_pointer_t, jit_word_t);
+#define jit_get_data(u,v)      _jit_get_data(_jit,u,v)
+extern jit_pointer_t _jit_get_data(jit_state_t*, jit_word_t*, jit_word_t*);
+#define jit_set_data(u,v,w)    _jit_set_data(_jit,u,v,w)
+extern void _jit_set_data(jit_state_t*, jit_pointer_t, jit_word_t, jit_word_t);
+#define jit_frame(u)           _jit_frame(_jit,u)
+extern void _jit_frame(jit_state_t*, jit_int32_t);
+#define jit_tramp(u)           _jit_tramp(_jit,u)
+extern void _jit_tramp(jit_state_t*, jit_int32_t);
+#define jit_emit()             _jit_emit(_jit)
+extern jit_pointer_t _jit_emit(jit_state_t*);
+
+#define jit_print()            _jit_print(_jit)
+extern void _jit_print(jit_state_t*);
+
+extern jit_node_t *_jit_arg_f(jit_state_t*);
+extern void _jit_getarg_f(jit_state_t*, jit_fpr_t, jit_node_t*);
+extern void _jit_putargr_f(jit_state_t*, jit_fpr_t, jit_node_t*);
+extern void _jit_putargi_f(jit_state_t*, jit_float32_t, jit_node_t*);
+extern void _jit_pushargr_f(jit_state_t*, jit_fpr_t);
+extern void _jit_pushargi_f(jit_state_t*, jit_float32_t);
+extern void _jit_retr_f(jit_state_t*, jit_fpr_t);
+extern void _jit_reti_f(jit_state_t*, jit_float32_t);
+extern void _jit_retval_f(jit_state_t*, jit_fpr_t);
+
+extern jit_node_t *_jit_arg_d(jit_state_t*);
+extern void _jit_getarg_d(jit_state_t*, jit_fpr_t, jit_node_t*);
+extern void _jit_putargr_d(jit_state_t*, jit_fpr_t, jit_node_t*);
+extern void _jit_putargi_d(jit_state_t*, jit_float64_t, jit_node_t*);
+extern void _jit_pushargr_d(jit_state_t*, jit_fpr_t);
+extern void _jit_pushargi_d(jit_state_t*, jit_float64_t);
+extern void _jit_retr_d(jit_state_t*, jit_fpr_t);
+extern void _jit_reti_d(jit_state_t*, jit_float64_t);
+extern void _jit_retval_d(jit_state_t*, jit_fpr_t);
+
+#define jit_new_node(c)                _jit_new_node(_jit,c)
+extern jit_node_t *_jit_new_node(jit_state_t*, jit_code_t);
+#define jit_new_node_w(c,u)    _jit_new_node_w(_jit,c,u)
+extern jit_node_t *_jit_new_node_w(jit_state_t*, jit_code_t,
+                                  jit_word_t);
+#define jit_new_node_f(c,u)    _jit_new_node_f(_jit,c,u)
+extern jit_node_t *_jit_new_node_f(jit_state_t*, jit_code_t,
+                                  jit_float32_t);
+#define jit_new_node_d(c,u)    _jit_new_node_d(_jit,c,u)
+extern jit_node_t *_jit_new_node_d(jit_state_t*, jit_code_t,
+                                  jit_float64_t);
+#define jit_new_node_p(c,u)    _jit_new_node_p(_jit,c,u)
+extern jit_node_t *_jit_new_node_p(jit_state_t*, jit_code_t,
+                                  jit_pointer_t);
+#define jit_new_node_ww(c,u,v) _jit_new_node_ww(_jit,c,u,v)
+extern jit_node_t *_jit_new_node_ww(jit_state_t*,jit_code_t,
+                                   jit_word_t, jit_word_t);
+#define jit_new_node_wp(c,u,v) _jit_new_node_wp(_jit,c,u,v)
+extern jit_node_t *_jit_new_node_wp(jit_state_t*,jit_code_t,
+                                   jit_word_t, jit_pointer_t);
+#define jit_new_node_fp(c,u,v) _jit_new_node_fp(_jit,c,u,v)
+extern jit_node_t *_jit_new_node_fp(jit_state_t*,jit_code_t,
+                                   jit_float32_t, jit_pointer_t);
+#define jit_new_node_dp(c,u,v) _jit_new_node_dp(_jit,c,u,v)
+extern jit_node_t *_jit_new_node_dp(jit_state_t*,jit_code_t,
+                                   jit_float64_t, jit_pointer_t);
+#define jit_new_node_pw(c,u,v) _jit_new_node_pw(_jit,c,u,v)
+extern jit_node_t *_jit_new_node_pw(jit_state_t*,jit_code_t,
+                                   jit_pointer_t, jit_word_t);
+#define jit_new_node_wf(c,u,v) _jit_new_node_wf(_jit,c,u,v)
+extern jit_node_t *_jit_new_node_wf(jit_state_t*, jit_code_t,
+                                   jit_word_t, jit_float32_t);
+#define jit_new_node_wd(c,u,v) _jit_new_node_wd(_jit,c,u,v)
+extern jit_node_t *_jit_new_node_wd(jit_state_t*, jit_code_t,
+                                   jit_word_t, jit_float64_t);
+#define jit_new_node_www(c,u,v,w) _jit_new_node_www(_jit,c,u,v,w)
+extern jit_node_t *_jit_new_node_www(jit_state_t*, jit_code_t,
+                                    jit_word_t, jit_word_t, jit_word_t);
+#define jit_new_node_qww(c,l,h,v,w) _jit_new_node_qww(_jit,c,l,h,v,w)
+extern jit_node_t *_jit_new_node_qww(jit_state_t*, jit_code_t,
+                                    jit_int32_t, jit_int32_t,
+                                    jit_word_t, jit_word_t);
+#define jit_new_node_wwf(c,u,v,w) _jit_new_node_wwf(_jit,c,u,v,w)
+extern jit_node_t *_jit_new_node_wwf(jit_state_t*, jit_code_t,
+                                    jit_word_t, jit_word_t, jit_float32_t);
+#define jit_new_node_wwd(c,u,v,w) _jit_new_node_wwd(_jit,c,u,v,w)
+extern jit_node_t *_jit_new_node_wwd(jit_state_t*, jit_code_t,
+                                    jit_word_t, jit_word_t, jit_float64_t);
+#define jit_new_node_pww(c,u,v,w) _jit_new_node_pww(_jit,c,u,v,w)
+extern jit_node_t *_jit_new_node_pww(jit_state_t*, jit_code_t,
+                                    jit_pointer_t, jit_word_t, jit_word_t);
+#define jit_new_node_pwf(c,u,v,w) _jit_new_node_pwf(_jit,c,u,v,w)
+extern jit_node_t *_jit_new_node_pwf(jit_state_t*, jit_code_t,
+                                    jit_pointer_t, jit_word_t, jit_float32_t);
+#define jit_new_node_pwd(c,u,v,w) _jit_new_node_pwd(_jit,c,u,v,w)
+extern jit_node_t *_jit_new_node_pwd(jit_state_t*, jit_code_t,
+                                    jit_pointer_t, jit_word_t, jit_float64_t);
+
+#define jit_arg_register_p(u)          _jit_arg_register_p(_jit,u)
+extern jit_bool_t _jit_arg_register_p(jit_state_t*, jit_node_t*);
+#define jit_callee_save_p(u)           _jit_callee_save_p(_jit,u)
+extern jit_bool_t _jit_callee_save_p(jit_state_t*, jit_int32_t);
+#define jit_pointer_p(u)               _jit_pointer_p(_jit,u)
+extern jit_bool_t _jit_pointer_p(jit_state_t*,jit_pointer_t);
+
+#define jit_get_note(n,u,v,w)  _jit_get_note(_jit,n,u,v,w)
+extern jit_bool_t _jit_get_note(jit_state_t*,jit_pointer_t,char**,char**,int*);
+
+#define jit_disassemble()              _jit_disassemble(_jit)
+extern void _jit_disassemble(jit_state_t*);
+
+extern void jit_set_memory_functions(jit_alloc_func_ptr,
+                                    jit_realloc_func_ptr,
+                                    jit_free_func_ptr);
+extern void jit_get_memory_functions(jit_alloc_func_ptr*,
+                                    jit_realloc_func_ptr*,
+                                    jit_free_func_ptr*);
+
+#endif /* _lightning_h */
diff --git a/deps/lightning/include/lightning/Makefile.am b/deps/lightning/include/lightning/Makefile.am
new file mode 100644 (file)
index 0000000..9b1b3e6
--- /dev/null
@@ -0,0 +1,65 @@
+#
+# Copyright 2000, 2001, 2002, 2012-2019 Free Software Foundation, Inc.
+#
+# This file is part of GNU lightning.
+#
+# GNU lightning is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published
+# by the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GNU lightning is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+
+lightning_includedir = $(includedir)/lightning
+
+EXTRA_DIST =           \
+       jit_private.h
+
+if cpu_arm
+lightning_include_HEADERS =    \
+       jit_arm.h
+endif
+if cpu_mips
+lightning_include_HEADERS =    \
+       jit_mips.h
+endif
+if cpu_ppc
+lightning_include_HEADERS =    \
+       jit_ppc.h
+endif
+if cpu_sparc
+lightning_include_HEADERS =    \
+       jit_sparc.h
+endif
+if cpu_x86
+lightning_include_HEADERS =    \
+       jit_x86.h
+endif
+if cpu_ia64
+lightning_include_HEADERS =    \
+       jit_ia64.h
+endif
+if cpu_hppa
+lightning_include_HEADERS =    \
+       jit_hppa.h
+endif
+if cpu_aarch64
+lightning_include_HEADERS =    \
+       jit_aarch64.h
+endif
+if cpu_s390
+lightning_include_HEADERS =    \
+       jit_s390.h
+endif
+if cpu_alpha
+lightning_include_HEADERS =    \
+       jit_alpha.h
+endif
+if cpu_riscv
+lightning_include_HEADERS =    \
+       jit_riscv.h
+endif
diff --git a/deps/lightning/include/lightning/jit_aarch64.h b/deps/lightning/include/lightning/jit_aarch64.h
new file mode 100644 (file)
index 0000000..6e7d8be
--- /dev/null
@@ -0,0 +1,89 @@
+/*
+ * Copyright (C) 2013-2019  Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+#ifndef _jit_aarch64_h
+#define _jit_aarch64_h
+
+#define JIT_HASH_CONSTS                0
+#define JIT_NUM_OPERANDS       3
+
+/*
+ * Types
+ */
+#define JIT_FP                 _R29
+typedef enum {
+#define jit_r(i)               (_R9 + (i))
+#define jit_r_num()            7
+#define jit_v(i)               (_R19 + (i))
+#define jit_v_num()            10
+#define jit_f(i)               (_V8 + (i))
+#define jit_f_num()            8
+#define JIT_R0                 _R9
+#define JIT_R1                 _R10
+#define JIT_R2                 _R11
+#define JIT_R3                 _R12
+#define JIT_R4                 _R13
+#define JIT_R5                 _R14
+#define JIT_R6                 _R15
+    _R8,                               /* indirect result */
+    _R18,                              /* platform register */
+    _R17,                              /* IP1 */
+    _R16,                              /* IP0 */
+    _R9,       _R10,   _R11,   _R12,   /* temporaries */
+    _R13,      _R14,   _R15,
+#define JIT_V0                 _R19
+#define JIT_V1                 _R20
+#define JIT_V2                 _R21
+#define JIT_V3                 _R22
+#define JIT_V4                 _R23
+#define JIT_V5                 _R24
+#define JIT_V6                 _R25
+#define JIT_V7                 _R26
+#define JIT_V8                 _R27
+#define JIT_V9                 _R28
+    _R19,      _R20,   _R21,   _R22,   /* callee save */
+    _R23,      _R24,   _R25,   _R26,
+    _R27,      _R28,
+    _SP,                               /* stack pointer */
+    _R30,                              /* link register */
+    _R29,                              /* frame pointer */
+    _R7,       _R6,    _R5,    _R4,
+    _R3,       _R2,    _R1,    _R0,
+#define JIT_F0                 _V8
+#define JIT_F1                 _V9
+#define JIT_F2                 _V10
+#define JIT_F3                 _V11
+#define JIT_F4                 _V12
+#define JIT_F5                 _V13
+#define JIT_F6                 _V14
+#define JIT_F7                 _V15
+    _V31,      _V30,   _V29,   _V28,   /* temporaries */
+    _V27,      _V26,   _V25,   _V24,
+    _V23,      _V22,   _V21,   _V20,
+    _V19,      _V18,   _V17,   _V16,
+    /* callee save */
+    _V8,       _V9,    _V10,   _V11,
+    _V12,      _V13,   _V14,   _V15,
+    _V7,       _V6,    _V5,    _V4,    /* arguments */
+    _V3,       _V2,    _V1,    _V0,
+    _NOREG,
+#define JIT_NOREG              _NOREG
+} jit_reg_t;
+
+#endif /* _jit_aarch64_h */
diff --git a/deps/lightning/include/lightning/jit_alpha.h b/deps/lightning/include/lightning/jit_alpha.h
new file mode 100644 (file)
index 0000000..9bae343
--- /dev/null
@@ -0,0 +1,105 @@
+/*
+ * Copyright (C) 2014-2019  Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+#ifndef _jit_alpha_h
+#define _jit_alpha_h
+
+#define JIT_HASH_CONSTS                1
+#define JIT_NUM_OPERANDS       3
+
+/*
+ * Types
+ */
+#define JIT_FP                 _FP
+typedef enum {
+#define jit_r(i)               (_S0 + (i))
+#define jit_r_num()            3
+#define jit_v(i)               (_S3 + (i))
+#define jit_v_num()            3
+#define jit_f(i)               (_F2 + (i))
+#define jit_f_num()            8
+    /* Volatile - Assembly temporary register */
+    _AT,
+    /* Volatile - Return value register */
+    _V0,
+    /* Volatile - Temporary registers */
+    _T0,               _T1,            _T2,            _T3,
+    _T4,               _T5,            _T6,            _T7,
+    _T8,               _T9,            _T10,           _T11,
+    /* FIXME Use callee save register for r0-r2 (instead of 12+ JIT_RN
+     * and 6 JIT_VN because division must call a function)
+     * FIX would be to create proper functions that do not clobber
+     * registers and inject/inline them in the jit */
+#define JIT_R0                 _S0
+#define JIT_R1                 _S1
+#define JIT_R2                 _S2
+#define JIT_V0                 _S3
+#define JIT_V1                 _S4
+#define JIT_V2                 _S5
+    /* Nonvolatile - Saved registers */
+    _S0,               _S1,            _S2,            _S3,
+    _S4,               _S5,
+    /* Nonvolatile - Frame pointer */
+    _FP,
+    /* Volatile - Argument registers */
+    _A5,               _A4,            _A3,            _A2,
+    _A1,               _A0,
+    /* Volatile - Return address register */
+    _RA,
+    /* Volatile - Temporary register */
+    _PV,
+    /* Nonvolatile - Global pointer */
+    _GP,
+    /* Nonvolatile - Stack pointer */
+    _SP,
+    /* Constant  RAZ / writes ignored */
+    _ZERO,
+#define JIT_F0                 _F2
+#define JIT_F1                 _F3
+#define JIT_F2                 _F4
+#define JIT_F3                 _F5
+#define JIT_F4                 _F6
+#define JIT_F5                 _F7
+#define JIT_F6                 _F8
+#define JIT_F7                 _F9
+    /* Volatile - Return value register (real part) */
+    _F0,
+    /* Volatile - Return value register (imaginary part) */
+    _F1,
+    /* Nonvolatile - Saved registers */
+    _F2,               _F3,            _F4,            _F5,
+    _F6,               _F7,            _F8,            _F9,
+    /* Volatile - Temporary registers */
+    _F10,              _F11,           _F12,           _F13,
+    _F14,              _F15,
+    /* Volatile - Argument registers */
+    _F21,              _F20,           _F19,           _F18,
+    _F17,              _F16,
+    /* Volatile - Temporary registers */
+    _F22,              _F23,           _R24,           _F25,
+    _F26,              _F27,           _F28,           _F29,
+    _F30,
+    /* Constant - RAZ / writes ignored */
+    _F31,
+    /* Lightning internal invalid register identifier */
+    _NOREG,
+#define JIT_NOREG              _NOREG
+} jit_reg_t;
+
+#endif /* _jit_alpha_h */
diff --git a/deps/lightning/include/lightning/jit_arm.h b/deps/lightning/include/lightning/jit_arm.h
new file mode 100644 (file)
index 0000000..81451f1
--- /dev/null
@@ -0,0 +1,127 @@
+/*
+ * Copyright (C) 2012-2019  Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+#ifndef _jit_arm_h
+#define _jit_arm_h
+
+#define JIT_HASH_CONSTS                0
+#define JIT_NUM_OPERANDS       3
+
+/*
+ * Types
+ */
+#define jit_swf_p()            (jit_cpu.vfp == 0)
+#define jit_hardfp_p()         jit_cpu.abi
+#define jit_ldrt_strt_p()      jit_cpu.ldrt_strt
+
+#define JIT_FP                 _R11
+typedef enum {
+#define jit_r(i)               (_R4 + (i))
+#define jit_r_num()            3
+#define jit_v(i)               (_R7 + (i))
+#define jit_v_num()            3
+#define jit_f(i)               (jit_cpu.abi ? _D8 + ((i)<<1) : _D0 - ((i)<<1))
+#define jit_f_num()            8
+    _R12,                      /* ip - temporary */
+#define JIT_R0                 _R4
+#define JIT_R1                 _R5
+#define JIT_R2                 _R6
+    _R4,                       /* r4 - variable */
+    _R5,                       /* r5 - variable */
+    _R6,                       /* r6 - variable */
+#define JIT_V0                 _R7
+#define JIT_V1                 _R8
+#define JIT_V2                 _R9
+    _R7,                       /* r7 - variable */
+    _R8,                       /* r8 - variable */
+    _R9,                       /* r9 - variable */
+    _R10,                      /* sl - stack limit */
+    _R11,                      /* fp - frame pointer */
+    _R13,                      /* sp - stack pointer */
+    _R14,                      /* lr - link register */
+    _R15,                      /* pc - program counter */
+    _R3,                       /* r3 - argument/result */
+    _R2,                       /* r2 - argument/result */
+    _R1,                       /* r1 - argument/result */
+    _R0,                       /* r0 - argument/result */
+#define JIT_F0                 (jit_hardfp_p() ? _D8 : _D0)
+#define JIT_F1                 (jit_hardfp_p() ? _D9 : _D1)
+#define JIT_F2                 (jit_hardfp_p() ? _D10 : _D2)
+#define JIT_F3                 (jit_hardfp_p() ? _D11 : _D3)
+#define JIT_F4                 (jit_hardfp_p() ? _D12 : _D4)
+#define JIT_F5                 (jit_hardfp_p() ? _D13 : _D5)
+#define JIT_F6                 (jit_hardfp_p() ? _D14 : _D6)
+#define JIT_F7                 (jit_hardfp_p() ? _D15 : _D7)
+    _S16,      _D8 = _S16,     _Q4 = _D8,
+    _S17,
+    _S18,      _D9 = _S18,
+    _S19,
+    _S20,      _D10 = _S20,    _Q5 = _D10,
+    _S21,
+    _S22,      _D11 = _S22,
+    _S23,
+    _S24,      _D12 = _S24,    _Q6 = _D12,
+    _S25,
+    _S26,      _D13 = _S26,
+    _S27,
+    _S28,      _D14 = _S28,    _Q7 = _D14,
+    _S29,
+    _S30,      _D15 = _S30,
+    _S31,
+    _S15,
+    _S14,      _D7 = _S14,
+    _S13,
+    _S12,      _D6 = _S12,     _Q3 = _D6,
+    _S11,
+    _S10,      _D5 = _S10,
+    _S9,
+    _S8,       _D4 = _S8,      _Q2 = _D4,
+    _S7,
+    _S6,       _D3 = _S6,
+    _S5,
+    _S4,       _D2 = _S4,      _Q1 = _D2,
+    _S3,
+    _S2,       _D1 = _S2,
+    _S1,
+    _S0,       _D0 = _S0,      _Q0 = _D0,
+    _NOREG,
+#define JIT_NOREG              _NOREG
+} jit_reg_t;
+
+typedef struct {
+    jit_uint32_t version       : 4;
+    jit_uint32_t extend                : 1;
+    /* only generate thumb instructions for thumb2 */
+    jit_uint32_t thumb         : 1;
+    jit_uint32_t vfp           : 3;
+    jit_uint32_t neon          : 1;
+    jit_uint32_t abi           : 2;
+    /* use strt+offset instead of str.w?
+     * on special cases it causes a SIGILL at least on qemu, probably
+     * due to some memory ordering constraint not being respected, so,
+     * disable by default */
+    jit_uint32_t ldrt_strt     : 1;
+} jit_cpu_t;
+
+/*
+ * Initialization
+ */
+extern jit_cpu_t               jit_cpu;
+
+#endif /* _jit_arm_h */
diff --git a/deps/lightning/include/lightning/jit_hppa.h b/deps/lightning/include/lightning/jit_hppa.h
new file mode 100644 (file)
index 0000000..ddc3950
--- /dev/null
@@ -0,0 +1,134 @@
+/*
+ * Copyright (C) 2013-2019  Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+#ifndef _jit_hppa_h
+#define _jit_hppa_h
+
+#define JIT_HASH_CONSTS                1
+#define JIT_NUM_OPERANDS       3
+
+/*
+ * Types
+ */
+typedef enum {
+    _R0,                       /* Always zero */
+    _R1,                       /* ADDIL implicit target */
+    _R2,                       /* B,L implicit target */
+#define jit_r_num()            4
+#define jit_r(n)               ((n) < 3 ? _R4 + (n) : _R10 + (n) - 3)
+#define jit_v_num()            11
+#define jit_v(n)               ((n) < 3 ? _R7 + (n) : _R11 + (n) - 3)
+#define jit_f_num()            8
+#define jit_f(n)               (_F12 - (n))
+#define JIT_FP                 _R3
+#define JIT_R0                 _R4
+#define JIT_R1                 _R5
+#define JIT_R2                 _R6
+#define JIT_R3                 _R10
+#define JIT_V0                 _R7
+#define JIT_V1                 _R8
+#define JIT_V2                 _R9
+#define JIT_V3                 _R11
+#define JIT_V4                 _R12
+#define JIT_V5                 _R13
+#define JIT_V6                 _R14
+#define JIT_V7                 _R15
+#define JIT_V8                 _R16
+#define JIT_V9                 _R17
+#define JIT_V10                        _R18
+    _R3,
+    _R19,
+    _R20,
+    _R21,
+    _R22,
+    _R29,                      /* ret1 */
+    _R28,                      /* ret0 */
+    _R4,
+    _R5,
+    _R6,
+    _R7,
+    _R8,
+    _R9,
+    _R10,
+    _R11,
+    _R12,
+    _R13,
+    _R14,
+    _R15,
+    _R16,
+    _R17,
+    _R18,
+    _R23,                      /* arg3 */
+    _R24,                      /* arg2 */
+    _R25,                      /* arg1 */
+    _R26,                      /* arg0 */
+    _R27,                      /* Data Pointer */
+    _R30,                      /* Stack Pointer */
+    _R31,                      /* Link register */
+#define JIT_F0                 _F12
+#define JIT_F1                 _F13
+#define JIT_F2                 _F14
+#define JIT_F3                 _F15
+#define JIT_F4                 _F16
+#define JIT_F5                 _F17
+#define JIT_F6                 _F18
+#define JIT_F7                 _F19
+#define JIT_F8                 _F20
+#define JIT_F9                 _F21
+    /* Caller Saves */
+    _F31,
+    _F30,
+    _F29,
+    _F28,
+    _F27,
+    _F26,
+    _F25,
+    _F24,
+    _F23,
+    _F22,
+    _F11,
+    _F10,
+    _F9,
+    _F8,
+    /* Arguments */
+    _F7,                       /* farg3 */
+    _F6,                       /* farg2 */
+    _F5,                       /* farg1 */
+    _F4,                       /* farg0 */
+    /* Callee Saves */
+    _F21,
+    _F20,
+    _F19,
+    _F18,
+    _F17,
+    _F16,
+    _F15,
+    _F14,
+    _F13,
+    _F12,
+    /* Floating-Pointer Status and Exception */
+    _F0,
+    _F1,
+    _F2,
+    _F3,
+#define JIT_NOREG              _NOREG
+    _NOREG,
+} jit_reg_t;
+
+#endif /* _jit_hppa */
diff --git a/deps/lightning/include/lightning/jit_ia64.h b/deps/lightning/include/lightning/jit_ia64.h
new file mode 100644 (file)
index 0000000..718f191
--- /dev/null
@@ -0,0 +1,120 @@
+/*
+ * Copyright (C) 2013-2019  Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+#ifndef _jit_ia64_h
+#define _jit_ia64_h
+
+#define JIT_HASH_CONSTS                1
+#define JIT_NUM_OPERANDS       3
+
+/*
+ * Types
+ */
+#define JIT_FP                 _R4     /* Not abi specific */
+typedef enum {
+#define JIT_R0         _R40
+#define JIT_R1         _R41
+#define JIT_R2         _R42
+#define JIT_R3         _R43
+#define JIT_V0         _R44
+#define JIT_V1         _R45
+#define JIT_V2         _R46
+#define JIT_V3         _R47
+#define jit_r_num()    4
+#define jit_r(n)       (_R40 + (n))
+#define jit_v_num()    4
+#define jit_v(n)       (_R44 + (n))
+    _R0,       /* constant - Always 0 */
+    _R1,       /* special - Global Data pointer (gp) */
+    /* r2-r3    - scratch - Use with 22-bit immediate add - scratch */
+    _R2,       _R3,
+    /* r4-r7 - preserved */
+    _R4,       _R5,    _R6,    _R7,
+    _R8,       /* scratch - Return value; structure/union return pointer */
+    /* r9-r11   - scratch - Return values */
+    _R9,       _R10,   _R11,
+    _R12,      /* special - Memory stack pointer (sp) */
+    _R13,      /* special - Reserved as a thread pointer (tp)*/
+    /* r14-r31  - scratch */
+    _R31,      _R30,
+    _R29,      _R28,   _R27,   _R26,   _R25,   _R24,   _R23,   _R22,
+    _R21,      _R20,   _R19,   _R18,   _R17,   _R16,   _R15,   _R14,
+    /* r32-r39  - aka in0-in7 - Incoming register arguments */
+    _R32,      _R33,   _R34,   _R35,   _R36,   _R37,   _R38,   _R39,
+    /* r40-r127         - loc0...locn,out0...outn */
+    _R40,      _R41,   _R42,   _R43,   _R44,   _R45,   _R46,   _R47,
+    _R48,      _R49,   _R50,   _R51,   _R52,   _R53,   _R54,   _R55,
+    _R56,      _R57,   _R58,   _R59,   _R60,   _R61,   _R62,   _R63,
+    _R64,      _R65,   _R66,   _R67,   _R68,   _R69,   _R70,   _R71,
+    _R72,      _R73,   _R74,   _R75,   _R76,   _R77,   _R78,   _R79,
+    _R80,      _R81,   _R82,   _R83,   _R84,   _R85,   _R86,   _R87,
+    _R88,      _R89,   _R90,   _R91,   _R92,   _R93,   _R94,   _R95,
+    _R96,      _R97,   _R98,   _R99,   _R100,  _R101,  _R102,  _R103,
+    _R104,     _R105,  _R106,  _R107,  _R108,  _R109,  _R110,  _R111,
+    _R112,     _R113,  _R114,  _R115,  _R116,  _R117,  _R118,  _R119,
+    _R120,     _R121,  _R122,  _R123,  _R124,  _R125,  _R126,  _R127,
+#define JIT_F0         _F16
+#define JIT_F1         _F17
+#define JIT_F2         _F18
+#define JIT_F3         _F19
+#define JIT_F4         _F20
+#define JIT_F5         _F21
+#define jit_f_num()    6
+#define jit_f(n)       (_F16 + (n))
+    _F0,       /* constant - Always 0.0 */
+    _F1,       /* constant - Always 1.0 */
+    /* f2-f5    - preserved */
+    _F2,       _F3,    _F4,    _F5,
+    /* f6-f7    - scratch */
+    _F6,       _F7,
+    /* f8-f15   - scratch - Argument/return registers */
+    _F8,       _F9,    _F10,   _F11,   _F12,   _F13,   _F14,   _F15,
+    /* f16-f31  - preserved */
+    _F16,      _F17,   _F18,   _F19,   _F20,   _F21,   _F22,   _F23,
+    _F24,      _F25,   _F26,   _F27,   _F28,   _F29,   _F30,   _F31,
+    /* f32-f127         - scratch - Rotating registers or scratch */
+    _F32,      _F33,   _F34,   _F35,   _F36,   _F37,   _F38,   _F39,
+    _F40,      _F41,   _F42,   _F43,   _F44,   _F45,   _F46,   _F47,
+    _F48,      _F49,   _F50,   _F51,   _F52,   _F53,   _F54,   _F55,
+    _F56,      _F57,   _F58,   _F59,   _F60,   _F61,   _F62,   _F63,
+    _F64,      _F65,   _F66,   _F67,   _F68,   _F69,   _F70,   _F71,
+    _F72,      _F73,   _F74,   _F75,   _F76,   _F77,   _F78,   _F79,
+    _F80,      _F81,   _F82,   _F83,   _F84,   _F85,   _F86,   _F87,
+    _F88,      _F89,   _F90,   _F91,   _F92,   _F93,   _F94,   _F95,
+    _F96,      _F97,   _F98,   _F99,   _F100,  _F101,  _F102,  _F103,
+    _F104,     _F105,  _F106,  _F107,  _F108,  _F109,  _F110,  _F111,
+    _F112,     _F113,  _F114,  _F115,  _F116,  _F117,  _F118,  _F119,
+
+#if 0
+    /* Do not list these to not need an unique identifier larger
+     * than 255 for jit_regset_t */
+    _F120,     _F121,  _F122,  _F123,  _F124,  _F125,  _F126,  _F127,
+#endif
+
+    /* Fake registers. Required because while "in" parameters start at r32,
+     * "out" parameters start *after* registers allocated for temporaries,
+     * and that are supposed to kept alive (what is desirable, that is, to
+      * not spill/reload them in memory) */
+    _OUT0,     _OUT1,  _OUT2,  _OUT3,  _OUT4,  _OUT5,  _OUT6,  _OUT7,
+
+#define JIT_NOREG              _NOREG
+    _NOREG,
+} jit_reg_t;
+
+#endif /* _jit_ia64_h */
diff --git a/deps/lightning/include/lightning/jit_mips.h b/deps/lightning/include/lightning/jit_mips.h
new file mode 100644 (file)
index 0000000..eb7d783
--- /dev/null
@@ -0,0 +1,115 @@
+/*
+ * Copyright (C) 2012-2019  Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+#ifndef _jit_mips_h
+#define _jit_mips_h
+
+#define JIT_HASH_CONSTS                1
+#define JIT_NUM_OPERANDS       3
+
+#if _MIPS_SIM != _ABIO32
+#    define NEW_ABI            1
+#endif
+
+/*
+ * Types
+ */
+#define JIT_FP                 _FP
+typedef enum {
+#define jit_r(i)               (_V0 + (i))
+#if NEW_ABI
+#  define jit_r_num()          7
+#else
+#  define jit_r_num()          11
+#endif
+#define jit_v(i)               (_S0 + (i))
+#define jit_v_num()            8
+#define jit_f(i)               (_F0 + (i))
+#if NEW_ABI
+#  define jit_f_num()          6
+#else
+#  define jit_f_num()          8
+#endif
+    _AT,
+#define JIT_R0                 _V0
+#define JIT_R1                 _V1
+#if NEW_ABI
+#  define JIT_R2               _T4
+#  define JIT_R3               _T5
+#  define JIT_R4               _T6
+#  define JIT_R5               _T7
+#  define JIT_R6               _T8
+#else
+#  define JIT_R2               _T0
+#  define JIT_R3               _T1
+#  define JIT_R4               _T2
+#  define JIT_R5               _T3
+#  define JIT_R6               _T4
+#  define JIT_R7               _T5
+#  define JIT_R8               _T6
+#  define JIT_R9               _T7
+#  define JIT_R10              _T8
+#endif
+    _V0, _V1,
+#if !NEW_ABI
+    _T0, _T1, _T2, _T3,
+#endif
+    _T4, _T5, _T6, _T7, _T8, _T9,
+#define JIT_V0                 _S0
+#define JIT_V1                 _S1
+#define JIT_V2                 _S2
+#define JIT_V3                 _S3
+#define JIT_V4                 _S4
+#define JIT_V5                 _S5
+#define JIT_V6                 _S6
+#define JIT_V7                 _S7
+    _S0, _S1, _S2, _S3, _S4, _S5, _S6, _S7,
+    _ZERO, _K0, _K1, _RA,
+    _GP,
+    _SP, _FP,
+#if NEW_ABI
+    _A7, _A6, _A5, _A4,
+#endif
+    _A3, _A2, _A1, _A0,
+#define JIT_F0                 _F0
+#define JIT_F1                 _F2
+#define JIT_F2                 _F4
+#define JIT_F3                 _F6
+#define JIT_F4                 _F8
+#define JIT_F5                 _F10
+#if !NEW_ABI
+#  define JIT_F6               _F16
+#  define JIT_F7               _F18
+#endif
+    _F0, _F2, _F4, _F6, _F8, _F10,
+    /* callee save float registers */
+#if !NEW_ABI
+    _F16, _F18,
+#endif
+    _F20, _F22, _F24, _F26, _F28, _F30,
+#if NEW_ABI
+    _F19, _F18, _F17, _F16, _F15, _F14, _F13, _F12,
+#else
+    _F14, _F12,
+#endif
+#define JIT_NOREG              _NOREG
+    _NOREG,
+} jit_reg_t;
+
+#endif /* _jit_mips_h */
diff --git a/deps/lightning/include/lightning/jit_ppc.h b/deps/lightning/include/lightning/jit_ppc.h
new file mode 100644 (file)
index 0000000..f1bdbcb
--- /dev/null
@@ -0,0 +1,82 @@
+/*
+ * Copyright (C) 2012-2019  Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+#ifndef _jit_ppc_h
+#define _jit_ppc_h
+
+#define JIT_HASH_CONSTS                1
+#define JIT_NUM_OPERANDS       3
+
+/*
+ * Types
+ */
+typedef enum {
+#define jit_r(i)               (_R28 + (i))
+#define jit_r_num()            3
+#define jit_v(i)               (_R27 - (i))
+#define jit_v_num()            14
+#define jit_f(i)               (_F14 + (i))
+#define jit_f_num()            8
+    _R0,
+#define JIT_R0                 _R28
+#define JIT_R1                 _R29
+#define JIT_R2                 _R30
+    _R11,      _R12,   _R13,   _R2,
+#define JIT_V0                 jit_v(0)
+#define JIT_V1                 jit_v(1)
+#define JIT_V2                 jit_v(2)
+#define JIT_V3                 jit_v(3)
+#define JIT_V4                 jit_v(4)
+#define JIT_V5                 jit_v(5)
+#define JIT_V6                 jit_v(6)
+#define JIT_V7                 jit_v(7)
+#define JIT_V8                 jit_v(8)
+#define JIT_V9                 jit_v(9)
+#define JIT_V10                        jit_v(10)
+#define JIT_V11                        jit_v(11)
+#define JIT_V12                        jit_v(12)
+#define JIT_V13                        jit_v(13)
+    _R14,      _R15,   _R16,   _R17,   _R18,   _R19,   _R20,   _R21,
+    _R22,      _R23,   _R24,   _R25,   _R26,   _R27,   _R28,   _R29,
+    _R30,
+    _R1,
+#define JIT_FP                 _R31
+    _R31,
+    _R10,      _R9,    _R8,    _R7,    _R6,    _R5,    _R4,    _R3,
+    _F0,
+    _F14,      _F15,   _F16,   _F17,   _F18,   _F19,   _F20,   _F21,
+#define JIT_F0                 _F14
+#define JIT_F1                 _F15
+#define JIT_F2                 _F16
+#define JIT_F3                 _F17
+#define JIT_F4                 _F18
+#define JIT_F5                 _F19
+#define JIT_F6                 _F20
+#define JIT_F7                 _F21
+    /* FIXME _F20-_F31 not (easily) accessible and only _F14-_F21
+     * saved/restored (if used) */
+    _F22,      _F23,   _F24,   _F25,   _F26,   _F27,   _F28,   _F29,
+    _F30,      _F31,
+    _F13,      _F12,   _F11,   _F10,   _F9,    _F8,    _F7,    _F6,
+    _F5,       _F4,    _F3,    _F2,    _F1,
+    _NOREG,
+#define JIT_NOREG              _NOREG
+} jit_reg_t;
+
+#endif /* _jit_ppc_h */
diff --git a/deps/lightning/include/lightning/jit_private.h b/deps/lightning/include/lightning/jit_private.h
new file mode 100644 (file)
index 0000000..8c05853
--- /dev/null
@@ -0,0 +1,773 @@
+/*
+ * Copyright (C) 2012-2019  Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+#ifndef _jit_private_h
+#define _jit_private_h
+
+#if HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <assert.h>
+#include <limits.h>
+#include <stdio.h>
+
+#ifdef STDC_HEADERS
+#  include <stddef.h>
+#else
+#  if !defined(offsetof)
+#    define offsetof(type, field) ((char *)&((type *)0)->field - (char *)0)
+#  endif
+#endif
+
+#if defined(__GNUC__)
+#  define maybe_unused         __attribute__ ((unused))
+#  define unlikely(exprn)      __builtin_expect(!!(exprn), 0)
+#  define likely(exprn)                __builtin_expect(!!(exprn), 1)
+#  if (__GNUC__ >= 4)
+#    define PUBLIC             __attribute__ ((visibility("default")))
+#    define HIDDEN             __attribute__ ((visibility("hidden")))
+#  else
+#    define PUBLIC             /**/
+#    define HIDDEN             /**/
+#  endif
+#else
+#  define maybe_unused         /**/
+#  define unlikely(exprn)      exprn
+#  define likely(exprn)                exprn
+#  define PUBLIC               /**/
+#  define HIDDEN               /**/
+#endif
+
+#define rc(value)              jit_class_##value
+#define rn(reg)                        (jit_regno(_rvs[jit_regno(reg)].spec))
+
+#if defined(__i386__) || defined(__x86_64__)
+#  define JIT_SP               _RSP
+#  define JIT_RET              _RAX
+#  if __X32
+#    define JIT_FRET           _ST0
+typedef jit_uint32_t           jit_regset_t;
+#  else
+#    if __CYGWIN__ || _WIN32
+#      define JIT_RA0          _RCX
+#    else
+#      define JIT_RA0          _RDI
+#    endif
+#    define JIT_FA0            _XMM0
+#    define JIT_FRET           _XMM0
+typedef jit_uint64_t           jit_regset_t;
+#  endif
+#elif defined(__mips__)
+#  define JIT_RA0              _A0
+#  define JIT_FA0              _F12
+#  define JIT_SP               _SP
+#  define JIT_RET              _V0
+#  define JIT_FRET             _F0
+typedef jit_uint64_t           jit_regset_t;
+#elif defined(__arm__)
+#  define JIT_RA0              _R0
+#  define JIT_FA0              _D0
+#  define JIT_SP               _R13
+#  define JIT_RET              _R0
+#  if defined(__ARM_PCS_VFP)
+#    define JIT_FRET           _D0
+#  else
+#    define JIT_FRET           _R0
+#  endif
+typedef jit_uint64_t           jit_regset_t;
+#elif defined(__powerpc__)
+#  define JIT_RA0              _R3
+#  define JIT_FA0              _F1
+#  define JIT_SP               _R1
+#  define JIT_RET              _R3
+#  define JIT_FRET             _F1
+typedef jit_uint64_t           jit_regset_t;
+#elif defined(__sparc__)
+#  define JIT_SP               _SP
+#  define JIT_RET              _I0
+#  define JIT_FRET             _F0
+#  if __WORDSIZE == 32
+typedef jit_uint64_t           jit_regset_t;
+#  else
+typedef struct {
+    jit_uint64_t       rl;
+    jit_uint64_t       rh;
+} jit_regset_t;
+#  endif
+#elif defined(__ia64__)
+#  define JIT_SP               _R12
+#  define JIT_RET              _R8
+#  define JIT_FRET             _F8
+typedef struct {
+    jit_uint64_t       rl;
+    jit_uint64_t       rh;
+    jit_uint64_t       fl;
+    jit_uint64_t       fh;
+} jit_regset_t;
+#elif defined(__hppa__)
+#  define JIT_SP               _R30
+#  define JIT_RET              _R28
+#  define JIT_FRET             _F4
+typedef jit_uint64_t           jit_regset_t;
+#elif defined(__aarch64__)
+#  define JIT_RA0              _R0
+#  define JIT_FA0              _V0
+#  define JIT_SP               _SP
+#  define JIT_RET              _R0
+#  define JIT_FRET             _V0
+typedef jit_uint64_t           jit_regset_t;
+#elif defined(__s390__) || defined(__s390x__)
+#  define JIT_SP               _R15
+#  define JIT_RET              _R2
+#  define JIT_FRET             _F0
+typedef jit_uint32_t           jit_regset_t;
+#elif defined(__alpha__)
+#  define JIT_SP               _SP
+#  define JIT_RET              _V0
+#  define JIT_FRET             _F0
+typedef jit_uint64_t           jit_regset_t;
+#elif defined(__riscv)
+#  define JIT_RA0              _A0
+#  define JIT_FA0              _FA0
+#  define JIT_SP               _SP
+#  define JIT_RET              _A0
+#  define JIT_FRET             _FA0
+typedef jit_uint64_t           jit_regset_t;
+#endif
+
+#define jit_data(u,v,w)                _jit_data(_jit,u,v,w)
+extern jit_node_t *_jit_data(jit_state_t*, const void*,
+                            jit_word_t, jit_int32_t);
+
+#define jit_size(vector)       (sizeof(vector) / sizeof((vector)[0]))
+
+#define jit_reg_free_p(regno)                                          \
+    (!jit_regset_tstbit(&_jitc->reglive, regno) &&                     \
+     !jit_regset_tstbit(&_jitc->regarg, regno) &&                      \
+     !jit_regset_tstbit(&_jitc->regsav, regno))
+
+#define jit_reg_free_if_spill_p(regno)                                 \
+    (!jit_regset_tstbit(&_jitc->regarg, regno) &&                      \
+     !jit_regset_tstbit(&_jitc->regsav, regno))
+
+#define jit_inc_synth(code)                                            \
+    do {                                                               \
+       (void)jit_new_node(jit_code_##code);                            \
+       jit_synth_inc();                                                \
+    } while (0)
+#define jit_inc_synth_w(code, u)                                       \
+    do {                                                               \
+       (void)jit_new_node_w(jit_code_##code, u);                       \
+       jit_synth_inc();                                                \
+    } while (0)
+#define jit_inc_synth_f(code, u)                                       \
+    do {                                                               \
+       (void)jit_new_node_f(jit_code_##code, u);                       \
+       jit_synth_inc();                                                \
+    } while (0)
+#define jit_inc_synth_d(code, u)                                       \
+    do {                                                               \
+       (void)jit_new_node_d(jit_code_##code, u);                       \
+       jit_synth_inc();                                                \
+    } while (0)
+#define jit_inc_synth_ww(code, u, v)                                   \
+    do {                                                               \
+       (void)jit_new_node_ww(jit_code_##code, u, v);                   \
+       jit_synth_inc();                                                \
+    } while (0)
+#define jit_inc_synth_wp(code, u, v)                                   \
+    do {                                                               \
+       (void)jit_new_node_wp(jit_code_##code, u, v);                   \
+       jit_synth_inc();                                                \
+    } while (0)
+#define jit_inc_synth_fp(code, u, v)                                   \
+    do {                                                               \
+       (void)jit_new_node_fp(jit_code_##code, u, v);                   \
+       jit_synth_inc();                                                \
+    } while (0)
+#define jit_inc_synth_dp(code, u, v)                                   \
+    do {                                                               \
+       (void)jit_new_node_dp(jit_code_##code, u, v);                   \
+       jit_synth_inc();                                                \
+    } while (0)
+#define jit_dec_synth()                jit_synth_dec()
+
+#define jit_link_prolog()                                              \
+    do {                                                               \
+       _jitc->tail->link = _jitc->function->prolog->link;              \
+       _jitc->function->prolog->link = _jitc->tail;                    \
+    } while (0)
+#define jit_link_prepare()                                             \
+    do {                                                               \
+       _jitc->tail->link = _jitc->prepare->link;                       \
+       _jitc->prepare->link = _jitc->tail;                             \
+    } while (0)
+#define jit_link_reverse(where)                                                \
+    do {                                                               \
+       jit_node_t      *tmp, *tail = 0;                                \
+       while (where) {                                                 \
+           tmp = (where)->link;                                        \
+           (where)->link = tail;                                       \
+           tail = where;                                               \
+           where = tmp;                                                \
+       }                                                               \
+       where = tail;                                                   \
+    } while (0);
+
+/*
+ * Private jit_class bitmasks
+ */
+#define jit_class_named                0x00400000      /* hit must be the named reg */
+#define jit_class_nospill      0x00800000      /* hint to fail if need spill */
+#define jit_class_sft          0x01000000      /* not a hardware register */
+#define jit_class_rg8          0x04000000      /* x86 8 bits */
+#define jit_class_xpr          0x80000000      /* float / vector */
+/* Used on sparc64 where %f0-%f31 can be encode for single float
+ * but %f32 to %f62 only as double precision */
+#define jit_class_sng          0x10000000      /* Single precision float */
+#define jit_class_dbl          0x20000000      /* Only double precision float */
+#define jit_regno_patch                0x00008000      /* this is a register
+                                                * returned by a "user" call
+                                                * to jit_get_reg() */
+
+#define jit_call_default       0
+#define jit_call_varargs       1
+
+#define jit_kind_register      1
+#define jit_kind_code          2
+#define jit_kind_word          3
+#define jit_kind_float32       4
+#define jit_kind_float64       5
+
+#define jit_cc_a0_reg          0x00000001      /* arg0 is a register */
+#define jit_cc_a0_chg          0x00000002      /* arg0 is modified */
+#define jit_cc_a0_jmp          0x00000004      /* arg0 is a jump target */
+#define jit_cc_a0_rlh          0x00000008      /* arg0 is a register pair */
+#define jit_cc_a0_int          0x00000010      /* arg0 is immediate word */
+#define jit_cc_a0_flt          0x00000020      /* arg0 is immediate float */
+#define jit_cc_a0_dbl          0x00000040      /* arg0 is immediate double */
+#define jit_cc_a0_arg          0x00000080      /* arg1 is an argument int id */
+#define jit_cc_a1_reg          0x00000100      /* arg1 is a register */
+#define jit_cc_a1_chg          0x00000200      /* arg1 is modified */
+#define jit_cc_a1_int          0x00001000      /* arg1 is immediate word */
+#define jit_cc_a1_flt          0x00002000      /* arg1 is immediate float */
+#define jit_cc_a1_dbl          0x00004000      /* arg1 is immediate double */
+#define jit_cc_a1_arg          0x00008000      /* arg1 is an argument node */
+#define jit_cc_a2_reg          0x00010000      /* arg2 is a register */
+#define jit_cc_a2_chg          0x00020000      /* arg2 is modified */
+#define jit_cc_a2_int          0x00100000      /* arg2 is immediate word */
+#define jit_cc_a2_flt          0x00200000      /* arg2 is immediate float */
+#define jit_cc_a2_dbl          0x00400000      /* arg2 is immediate double */
+
+#if __ia64__ || (__sparc__ && __WORDSIZE == 64)
+extern void
+jit_regset_com(jit_regset_t*, jit_regset_t*);
+
+extern void
+jit_regset_and(jit_regset_t*, jit_regset_t*, jit_regset_t*);
+
+extern void
+jit_regset_ior(jit_regset_t*, jit_regset_t*, jit_regset_t*);
+
+extern void
+jit_regset_xor(jit_regset_t*, jit_regset_t*, jit_regset_t*);
+
+extern void
+jit_regset_set(jit_regset_t*, jit_regset_t*);
+
+extern void
+jit_regset_set_mask(jit_regset_t*, jit_int32_t);
+
+extern jit_bool_t
+jit_regset_cmp_ui(jit_regset_t*, jit_word_t);
+
+extern void
+jit_regset_set_ui(jit_regset_t*, jit_word_t);
+
+extern jit_bool_t
+jit_regset_set_p(jit_regset_t*);
+
+extern void
+jit_regset_clrbit(jit_regset_t*, jit_int32_t);
+
+extern void
+jit_regset_setbit(jit_regset_t*, jit_int32_t);
+
+extern jit_bool_t
+jit_regset_tstbit(jit_regset_t*, jit_int32_t);
+#  if __sparc__ && __WORDSIZE == 64
+#    define jit_regset_new(set)                                                \
+    do { (set)->rl = (set)->rh = 0; } while (0)
+#    define jit_regset_del(set)                                                \
+    do { (set)->rl = (set)->rh = 0; } while (0)
+#  else
+#    define jit_regset_new(set)                                                \
+    do { (set)->rl = (set)->rh = (set)->fl = (set)->fh = 0; } while (0)
+#    define jit_regset_del(set)                                                \
+    do { (set)->rl = (set)->rh = (set)->fl = (set)->fh = 0; } while (0)
+#  endif
+#else
+#  define jit_regset_com(u, v)         (*(u) = ~*(v))
+#  define jit_regset_and(u, v, w)      (*(u) = *(v) & *(w))
+#  define jit_regset_ior(u, v, w)      (*(u) = *(v) | *(w))
+#  define jit_regset_xor(u, v, w)      (*(u) = *(v) ^ *(w))
+#  define jit_regset_set(u, v)         (*(u) = *(v))
+#  define jit_regset_set_mask(u, v)    (*(u) = (1LL << (v)) - 1)
+#  define jit_regset_cmp_ui(u, v)      (*(u) != (v))
+#  define jit_regset_set_ui(u, v)      (*(u) = (v))
+#  define jit_regset_set_p(set)                (*set)
+#  define jit_regset_clrbit(set, bit)  (*(set) &= ~(1LL << (bit)))
+#  define jit_regset_setbit(set, bit)  (*(set) |= 1LL << (bit))
+#  define jit_regset_tstbit(set, bit)  (*(set) & (1LL << (bit)))
+#  define jit_regset_new(set)          (*(set) = 0)
+#  define jit_regset_del(set)          (*(set) = 0)
+#endif
+extern unsigned long
+jit_regset_scan1(jit_regset_t*, jit_int32_t);
+
+#define jit_reglive_setup()                                            \
+    do {                                                               \
+       jit_regset_set_ui(&_jitc->reglive, 0);                          \
+       jit_regset_set_ui(&_jitc->regmask, 0);                          \
+    } while (0)
+
+/*
+ * Types
+ */
+typedef union jit_data         jit_data_t;
+typedef struct jit_note                jit_note_t;
+typedef struct jit_line                jit_line_t;
+typedef struct jit_block       jit_block_t;
+typedef struct jit_value       jit_value_t;
+typedef struct jit_compiler    jit_compiler_t;
+typedef struct jit_function    jit_function_t;
+typedef struct jit_register    jit_register_t;
+#if __arm__
+#  if DISASSEMBLER
+typedef struct jit_data_info   jit_data_info_t;
+#  endif
+#endif
+
+union jit_data {
+    struct {
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+       jit_int32_t      l;
+       jit_int32_t      h;
+#else
+       jit_int32_t      h;
+       jit_int32_t      l;
+#endif
+    } q;
+    jit_word_t          w;
+    jit_float32_t       f;
+    jit_float64_t       d;
+    jit_pointer_t       p;
+    jit_node_t         *n;
+};
+
+struct jit_note {
+    jit_uint8_t                *code;
+    char               *name;
+    jit_line_t         *lines;
+    jit_word_t          length;
+    jit_word_t          size;          /* of code */
+};
+
+struct jit_line {
+    char               *file;
+    jit_int32_t                *linenos;
+    jit_int32_t                *offsets;
+    jit_word_t          length;
+};
+
+struct jit_node {
+    jit_node_t         *next;
+    jit_code_t          code;
+    jit_uint16_t        flag;
+    jit_uint16_t        offset;        /* Used if DEVEL_DISASSEMBLER */
+    jit_data_t          u;
+    jit_data_t          v;
+    jit_data_t          w;
+    jit_node_t         *link;
+};
+
+struct jit_block {
+    jit_node_t         *label;
+    jit_regset_t        reglive;
+    jit_regset_t        regmask;
+};
+
+struct jit_value {
+    jit_int32_t                kind;
+    jit_code_t         code;
+    jit_data_t         base;
+    jit_data_t         disp;
+};
+
+typedef struct {
+#if __arm__
+    jit_word_t          kind;
+#endif
+    jit_word_t          inst;
+    jit_node_t         *node;
+} jit_patch_t;
+
+#if __arm__ && DISASSEMBLER
+struct jit_data_info {
+    jit_uword_t                  code;         /* pointer in code buffer */
+    jit_word_t           length;       /* length of constant vector */
+};
+#endif
+
+struct jit_function {
+    struct {
+       jit_int32_t      argi;
+       jit_int32_t      argf;
+       jit_int32_t      size;
+       jit_int32_t      aoff;
+       jit_int32_t      alen;
+       jit_int32_t      call;
+       jit_int32_t      argn;          /* for debug output */
+    } self;
+    struct {
+       jit_int32_t      argi;
+       jit_int32_t      argf;
+       jit_int32_t      size;
+       jit_int32_t      call;
+    } call;
+    jit_node_t         *prolog;
+    jit_node_t         *epilog;
+    jit_int32_t                *regoff;
+    jit_regset_t        regset;
+    jit_int32_t                 stack;
+
+    /* Helper for common jit generation pattern, used in GNU Smalltalk
+     * and possibly others, where a static frame layout is required or
+     * assumed. */
+    jit_int32_t                 frame;
+    jit_uint32_t        define_frame : 1;
+    jit_uint32_t        assume_frame : 1;
+
+    /* alloca offset offset */
+    jit_int32_t                 aoffoff;
+    /* uses allocar flag */
+    jit_uint32_t        allocar : 1;
+
+    /* varargs state offsets */
+    jit_int32_t                 vaoff;         /* offset of jit_va_list */
+    jit_int32_t                 vagp;          /* first gp va argument */
+    jit_int32_t                 vafp;          /* first fp va argument */
+};
+
+/* data used only during jit generation */
+struct jit_compiler {
+#if __ia64__
+    struct {
+       jit_uint64_t      i : 41;
+       jit_uint64_t      t :  4;
+    } inst[3];
+    jit_regset_t         regs;         /* changed regs since last stop */
+    jit_int32_t                  pred;         /* changed preds last stop */
+    jit_int32_t                  ioff;         /* offset in inst vector */
+    jit_int32_t                  rout;         /* first output register */
+    jit_int32_t                  breg;         /* base register for prolog/epilog */
+#endif
+#if __mips__ || __ia64__ || __alpha__ || \
+       (__sparc__ && __WORDSIZE == 64) || __riscv
+    jit_int32_t                  carry;
+#define jit_carry        _jitc->carry
+#endif
+    jit_node_t          *head;
+    jit_node_t          *tail;
+    jit_node_t          *prepare;      /* inside prepare/finish* block */
+    jit_uint32_t         realize : 1;  /* jit_realize() called? */
+    jit_uint32_t         dataset : 1;  /* jit_dataset() called? */
+    jit_uint32_t         done  : 1;    /* emit state finished */
+    jit_uint32_t         emit  : 1;    /* emit state entered */
+    jit_uint32_t         again : 1;    /* start over emiting function */
+    jit_uint32_t         synth : 8;    /* emiting synthesized instructions */
+#if DEBUG
+    jit_uint32_t         getreg : 1;
+#endif
+    jit_uint32_t         no_data : 1;
+    jit_uint32_t         no_note : 1;
+    jit_int32_t                  reglen;       /* number of registers */
+    jit_regset_t         regarg;       /* cannot allocate */
+    jit_regset_t         regsav;       /* automatic spill only once */
+    jit_regset_t         reglive;      /* known live registers at some point */
+    jit_regset_t         regmask;      /* register mask to update reglive */
+    struct {
+       jit_uint8_t      *end;
+    } code;
+    struct {
+       jit_uint8_t      *ptr;
+       jit_node_t      **table;        /* very simple hash table */
+       jit_word_t        size;         /* number of vectors in table */
+       jit_word_t        count;        /* number of hash table entries */
+       jit_word_t        offset;       /* offset in bytes in ptr */
+    } data;
+    jit_node_t         **spill;
+    jit_int32_t                 *gen;          /* ssa like "register version" */
+    jit_value_t                 *values;       /* temporary jit_value_t vector */
+    struct {
+       jit_block_t      *ptr;
+       jit_word_t        offset;
+       jit_word_t        length;
+    } blocks;                          /* basic blocks */
+    struct {
+       jit_patch_t      *ptr;
+       jit_word_t        offset;
+       jit_word_t        length;
+    } patches;                         /* forward patch information */
+    jit_function_t      *function;     /* current function */
+    struct {
+       jit_function_t   *ptr;
+       jit_word_t        offset;
+       jit_word_t        length;
+    } functions;                       /* prolog/epilogue offsets in code */
+    struct {
+       jit_node_t      **ptr;
+       jit_word_t        offset;
+       jit_word_t        length;
+    } pool;
+    jit_node_t          *list;
+    struct {
+       jit_node_t       *head;         /* first note node */
+       jit_node_t       *tail;         /* linked list insertion */
+       /* fields to store temporary state information */
+       jit_word_t        size;
+       jit_node_t       *name;
+       jit_node_t       *note;
+       jit_uint8_t      *base;
+    } note;
+#if __arm__
+    /* prevent using thumb instructions that set flags? */
+    jit_uint32_t         no_set_flags : 1;
+#  if DISASSEMBLER
+    struct {
+       jit_data_info_t  *ptr;
+       jit_word_t        offset;
+       jit_word_t        length;
+    } data_info;                       /* constant pools information */
+#  endif
+    /* Note that this field is somewhat hackish, but required by most
+     * ways to implement jit, unless implementing a pure one function
+     * per jit, as most times it needs to start the jit buffer with a
+     * jump where the "main" prolog starts, and because the initial
+     * code is in "arm mode", need to make an "arm mode" patch on that
+     * jump. A good example is the test suite assembler, where most
+     * test cases start with a "jmpi main" call. */
+    jit_uword_t                  thumb;
+    struct {
+       jit_uint8_t      *data;         /* pointer to code */
+       jit_word_t        size;         /* size data */
+       jit_word_t        offset;       /* pending patches */
+       jit_word_t        length;       /* number of pending constants */
+       jit_int32_t       values[1024]; /* pending constants */
+       jit_word_t        patches[2048];
+    } consts;
+#elif (__powerpc__ && _CALL_AIXDESC) || __ia64__
+    /* Keep track of prolog addresses, just for the sake of making
+     * jit that starts with a jump to a "main" label work like other
+     * backends. */
+    struct {
+       jit_word_t       *ptr;
+       jit_word_t        offset;
+       jit_word_t        length;
+    } prolog;
+    jit_bool_t           jump;
+#endif
+#if GET_JIT_SIZE
+    /* Temporary storage to calculate instructions length */
+    jit_word_t           size;
+    /* Global flag for code buffer heuristic size computation */
+    jit_word_t           mult;
+    /* Pointer to code to prevent miscalculation if reallocating buffer */
+    jit_uint8_t                 *cptr;
+#endif
+};
+
+#define _jitc                          _jit->comp
+struct jit_state {
+    union {
+       jit_uint8_t      *uc;
+       jit_uint16_t     *us;
+       jit_uint32_t     *ui;
+       jit_uint64_t     *ul;
+       jit_word_t        w;
+    } pc;
+    struct {
+       jit_uint8_t     *ptr;
+       jit_word_t       length;
+    } code;
+    struct {
+       jit_uint8_t     *ptr;
+       jit_word_t       length;
+    } data;
+    struct {
+       jit_note_t      *ptr;
+       jit_word_t       length;
+    } note;
+    jit_compiler_t     *comp;
+    /* Flags to know if user did set the code and data buffers */
+    jit_uint32_t        user_code      : 1;
+    jit_uint32_t        user_data      : 1;
+};
+
+struct jit_register {
+    jit_reg_t           spec;
+    char               *name;
+};
+
+/*
+ * Prototypes
+ */
+extern void jit_get_cpu(void);
+
+#define jit_init()                     _jit_init(_jit)
+extern void _jit_init(jit_state_t*);
+
+#define jit_synth_inc()                        _jit_synth_inc(_jit)
+extern void _jit_synth_inc(jit_state_t*);
+
+#define jit_new_node_no_link(u)                _jit_new_node_no_link(_jit, u)
+extern jit_node_t *_jit_new_node_no_link(jit_state_t*, jit_code_t);
+
+#define jit_link_node(u)               _jit_link_node(_jit, u)
+extern void _jit_link_node(jit_state_t*, jit_node_t*);
+
+#define jit_link_label(l)      _jit_link_label(_jit,l)
+extern void
+_jit_link_label(jit_state_t*,jit_node_t*);
+
+#define jit_synth_dec()                        _jit_synth_dec(_jit)
+extern void _jit_synth_dec(jit_state_t*);
+
+#define jit_reglive(node)      _jit_reglive(_jit, node)
+extern void
+_jit_reglive(jit_state_t*, jit_node_t*);
+
+#define jit_regarg_set(n,v)    _jit_regarg_set(_jit,n,v)
+extern void
+_jit_regarg_set(jit_state_t*, jit_node_t*, jit_int32_t);
+
+#define jit_regarg_clr(n,v)    _jit_regarg_clr(_jit,n,v)
+extern void
+_jit_regarg_clr(jit_state_t*, jit_node_t*, jit_int32_t);
+
+#define jit_get_reg(s)         _jit_get_reg(_jit,s)
+extern jit_int32_t
+_jit_get_reg(jit_state_t*, jit_int32_t);
+
+#define jit_unget_reg(r)       _jit_unget_reg(_jit,r)
+extern void
+_jit_unget_reg(jit_state_t*, jit_int32_t);
+
+#define jit_save(reg)          _jit_save(_jit, reg)
+extern void
+_jit_save(jit_state_t*, jit_int32_t);
+
+#define jit_load(reg)          _jit_load(_jit, reg)
+extern void
+_jit_load(jit_state_t*, jit_int32_t);
+
+#define jit_trampoline(u,v)    _jit_trampoline(_jit, u, v)
+extern void _jit_trampoline(jit_state_t*, jit_int32_t, jit_bool_t);
+
+#define jit_optimize()         _jit_optimize(_jit)
+extern void
+_jit_optimize(jit_state_t*);
+
+#define jit_classify(code)     _jit_classify(_jit, code)
+extern jit_int32_t
+_jit_classify(jit_state_t*, jit_code_t);
+
+#define jit_regarg_p(n, r)     _jit_regarg_p(_jit, n, r)
+extern jit_bool_t
+_jit_regarg_p(jit_state_t*, jit_node_t*, jit_int32_t);
+
+#define emit_code()            _emit_code(_jit)
+extern jit_pointer_t
+_emit_code(jit_state_t*);
+
+extern void
+jit_flush(void *fptr, void *tptr);
+
+#define emit_ldxi(r0, r1, i0)  _emit_ldxi(_jit, r0, r1, i0)
+extern void
+_emit_ldxi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+
+#define emit_stxi(i0, r0, r1)  _emit_stxi(_jit, i0, r0, r1)
+extern void
+_emit_stxi(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
+
+#define emit_ldxi_d(r0, r1, i0)        _emit_ldxi_d(_jit, r0, r1, i0)
+extern void
+_emit_ldxi_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+
+#define emit_stxi_d(i0, r0, r1)        _emit_stxi_d(_jit, i0, r0, r1)
+extern void
+_emit_stxi_d(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
+
+extern void jit_init_debug(const char*);
+extern void jit_finish_debug(void);
+
+extern void jit_init_note(void);
+extern void jit_finish_note(void);
+#define jit_set_note(n,u,v,w)  _jit_set_note(_jit, n, u, v, w)
+extern void _jit_set_note(jit_state_t*, jit_note_t*, char*, int, jit_int32_t);
+#define jit_annotate()         _jit_annotate(_jit)
+extern void _jit_annotate(jit_state_t*);
+
+#define jit_print_node(u)      _jit_print_node(_jit,u)
+extern void _jit_print_node(jit_state_t*,jit_node_t*);
+
+extern jit_pointer_t jit_memcpy(jit_pointer_t,const void*,jit_word_t);
+extern jit_pointer_t jit_memmove(jit_pointer_t,const void*,jit_word_t);
+extern void jit_alloc(jit_pointer_t*, jit_word_t);
+extern void jit_realloc(jit_pointer_t*, jit_word_t, jit_word_t);
+void jit_free(jit_pointer_t*);
+
+extern void jit_init_size(void);
+extern void jit_finish_size(void);
+
+#if GET_JIT_SIZE
+#  define jit_size_prepare()           _jit_size_prepare(_jit)
+extern void
+_jit_size_prepare(jit_state_t*);
+
+#  define jit_size_collect(node)       _jit_size_collect(_jit, node)
+extern void
+_jit_size_collect(jit_state_t*, jit_node_t*);
+#else
+#  define jit_get_size()               _jit_get_size(_jit)
+extern jit_word_t
+_jit_get_size(jit_state_t*);
+#endif
+
+extern jit_word_t
+jit_get_max_instr(void);
+
+/*
+ * Externs
+ */
+extern jit_register_t   _rvs[];
+
+#endif /* _jit_private_h */
diff --git a/deps/lightning/include/lightning/jit_riscv.h b/deps/lightning/include/lightning/jit_riscv.h
new file mode 100644 (file)
index 0000000..1b4f93d
--- /dev/null
@@ -0,0 +1,147 @@
+/*
+ * Copyright (C) 2019  Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+#ifndef _jit_riscv_h
+#define _jit_riscv_h
+
+#define JIT_HASH_CONSTS                0
+#define JIT_NUM_OPERANDS       3
+
+/*
+ * Types
+ */
+#define JIT_FP                 _FP
+typedef enum {
+#define jit_r(i)               (JIT_R0 + (i))
+#define jit_r_num()            7
+#define jit_v(i)               (JIT_V0 + (i))
+#define jit_v_num()            11
+#define jit_f(i)               (JIT_F0 + (i))
+#define jit_f_num()            12
+    _ZERO,     /*  x0 - Hard-wired zero        ---             */
+    _RA,       /*  x1 - Return address         (CalleR save)   */
+    _SP,       /*  x2 - Stack pointer          (CalleE save)   */
+    _GP,       /*  x3 - Global pointer         ---             */
+
+#if 0          /* Pretend it does not exist, so _NOREG can be used in
+                * a 64 bit bitmask */
+    _TP,       /*  x4 - Thread pointer         ---             */
+#endif
+    
+#define JIT_R0         _T0
+#define JIT_R1         _T1
+#define JIT_R2         _T2
+#define JIT_R3         _T3
+#define JIT_R4         _T4
+#define JIT_R5         _T5
+#define JIT_R6         _T6
+    _T0,       /*  x5 - Temporary/alternate
+                        link register          (CalleR save)   */
+    _T1,       /*  x6 - Temporary              (CalleR save)   */
+    _T2,       /*  x7 - Temporary              (CalleR save)   */
+    _T3,       /* x28 - Temporary              (CalleR save)   */
+    _T4,       /* x28 - Temporary              (CalleR save)   */
+    _T5,       /* x30 - Temporary              (CalleR save)   */
+    _T6,       /* x31 - Temporary              (CalleR save)   */
+    _FP,       /*  x8 - Saved register/frame
+                        pointer                (CalleE save)   */
+    _S0 = _FP,
+#define JIT_V0         _S1
+#define JIT_V1         _S2
+#define JIT_V2         _S3
+#define JIT_V3         _S4
+#define JIT_V4         _S5
+#define JIT_V5         _S6
+#define JIT_V6         _S7
+#define JIT_V7         _S8
+#define JIT_V8         _S9
+#define JIT_V9         _S10
+#define JIT_V10                _S11
+    _S1,       /*  x9 - Saved register         (CalleE save)   */
+    _S2,       /* x18 - Saved register         (CalleE save)   */
+    _S3,       /* x19 - Saved register         (CalleE save)   */
+    _S4,       /* x20 - Saved register         (CalleE save)   */
+    _S5,       /* x21 - Saved register         (CalleE save)   */
+    _S6,       /* x22 - Saved register         (CalleE save)   */
+    _S7,       /* x23 - Saved register         (CalleE save)   */
+    _S8,       /* x24 - Saved register         (CalleE save)   */
+    _S9,       /* x25 - Saved register         (CalleE save)   */
+    _S10,      /* x26 - Saved register         (CalleE save)   */
+    _S11,      /* x27 - Saved register         (CalleE save)   */
+    _A7,       /* x17 - Function argument      (CalleR save)   */
+    _A6,       /* x16 - Function argument      (CalleR save)   */
+    _A5,       /* x15 - Function argument      (CalleR save)   */
+    _A4,       /* x14 - Function argument      (CalleR save)   */
+    _A3,       /* x13 - Function argument      (CalleR save)   */
+    _A2,       /* x12 - Function argument      (CalleR save)   */
+    _A1,       /* x11 - Function argument/
+                        return value           (CalleR save)   */
+    _A0,       /* x10 - Function argument/
+                        return value           (CalleR save)   */
+    _FT0,      /*  f0 - FP temporary           (CalleR save)   */
+    _FT1,      /*  f1 - FP temporary           (CalleR save)   */
+    _FT2,      /*  f2 - FP temporary           (CalleR save)   */
+    _FT3,      /*  f3 - FP temporary           (CalleR save)   */
+    _FT4,      /*  f4 - FP temporary           (CalleR save)   */
+    _FT5,      /*  f5 - FP temporary           (CalleR save)   */
+    _FT6,      /*  f6 - FP temporary           (CalleR save)   */
+    _FT7,      /*  f7 - FP temporary           (CalleR save)   */
+    _FT8,      /* f28 - FP temporary           (CalleR save)   */
+    _FT9,      /* f29 - FP temporary           (CalleR save)   */
+    _FT10,     /* f30 - FP temporary           (CalleR save)   */
+    _FT11,     /* f31 - FP temporary           (CalleR save)   */
+#define JIT_F0         _FS0
+#define JIT_F1         _FS1
+#define JIT_F2         _FS2
+#define JIT_F3         _FS3
+#define JIT_F4         _FS4
+#define JIT_F5         _FS5
+#define JIT_F6         _FS6
+#define JIT_F7         _FS7
+#define JIT_F8         _FS8
+#define JIT_F9         _FS9
+#define JIT_F10                _FS10
+#define JIT_F11                _FS11
+    _FS0,      /*  f8 - FP saved register      (CalleE save)   */
+    _FS1,      /*  f9 - FP saved register      (CalleE save)   */
+    _FS2,      /* f18 - FP saved register      (CalleE save)   */
+    _FS3,      /* f19 - FP saved register      (CalleE save)   */
+    _FS4,      /* f20 - FP saved register      (CalleE save)   */
+    _FS5,      /* f21 - FP saved register      (CalleE save)   */
+    _FS6,      /* f22 - FP saved register      (CalleE save)   */
+    _FS7,      /* f23 - FP saved register      (CalleE save)   */
+    _FS8,      /* f24 - FP saved register      (CalleE save)   */
+    _FS9,      /* f25 - FP saved register      (CalleE save)   */
+    _FS10,     /* f26 - FP saved register      (CalleE save)   */
+    _FS11,     /* f27 - FP saved register      (CalleE save)   */
+    _FA7,      /* f17 - FP Function argument   (CalleR save)   */
+    _FA6,      /* f16 - FP Function argument   (CalleR save)   */
+    _FA5,      /* f15 - FP Function argument   (CalleR save)   */
+    _FA4,      /* f14 - FP Function argument   (CalleR save)   */
+    _FA3,      /* f13 - FP Function argument   (CalleR save)   */
+    _FA2,      /* f12 - FP Function argument   (CalleR save)   */
+    _FA1,      /* f11 - FP function argument/
+                        return value           (CalleR save)   */
+    _FA0,      /* f10 - FP function argument/
+                        return value           (CalleR save)   */
+    _NOREG,
+#define JIT_NOREG              _NOREG
+} jit_reg_t;
+
+#endif /* _jit_riscv_h */
diff --git a/deps/lightning/include/lightning/jit_s390.h b/deps/lightning/include/lightning/jit_s390.h
new file mode 100644 (file)
index 0000000..6ab196b
--- /dev/null
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2013-2019  Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+#ifndef _jit_s390_h
+#define _jit_s390_h
+
+#define JIT_HASH_CONSTS                1
+#define JIT_NUM_OPERANDS       2
+
+/*
+ * Types
+ */
+#define JIT_FP                 _R13
+typedef enum {
+#define jit_r(i)               (_R12 + ((i) << 1))
+#define jit_r_num()            3
+#define jit_v(i)               (_R11 + ((i) << 1))
+#define jit_v_num()            3
+#define jit_f(i)               (_F8 + (i))
+#define jit_f_num()            6
+#define JIT_R0                 _R12
+#define JIT_R1                 _R10
+#define JIT_R2                 _R8
+#define JIT_V0                 _R11
+#define JIT_V1                 _R9
+#define JIT_V2                 _R7
+     _R0,  _R1,                        /* Volatile */
+    _R12,                      /* Saved, GOT */
+    _R11,  _R10,  _R9,  _R8,   /* Saved */
+     _R7,                      /* Saved */
+     _R6,                      /* Saved, parameter */
+     _R5,  _R4,  _R3,          /* Parameter passing */
+     _R2,                      /* Volatile, parameter and return value */
+    _R13,                      /* Saved, literal pool pointer */
+    _R14,                      /* Volatile, return address */
+    _R15,                      /* Saved, stack pointer */
+#define JIT_F0                 _F8
+#define JIT_F1                 _F9
+#define JIT_F2                 _F10
+#define JIT_F3                 _F11
+#define JIT_F4                 _F12
+#define JIT_F5                 _F13
+     _F1,  _F3,  _F5,  _F7,    /* Volatile */
+    _F14, _F15,         _F8,  _F9,     /* Saved */
+    _F10, _F11, _F12, _F13,    /* Saved */
+     _F6,  _F4,  _F2,          /* Volatile, parameter */
+     _F0,                      /* Volatile, parameter and return value */
+    _NOREG,
+#define JIT_NOREG              _NOREG
+} jit_reg_t;
+
+#endif /* _jit_s390_h */
diff --git a/deps/lightning/include/lightning/jit_sparc.h b/deps/lightning/include/lightning/jit_sparc.h
new file mode 100644 (file)
index 0000000..bee440b
--- /dev/null
@@ -0,0 +1,102 @@
+/*
+ * Copyright (C) 2013-2019  Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+#ifndef _jit_sparc_h
+#define _jit_sparc_h
+
+#define JIT_HASH_CONSTS                1
+#define JIT_NUM_OPERANDS       3
+
+/*
+ * Types
+ */
+#define JIT_FP                 _FP
+typedef enum {
+#define jit_r(i)               (_G2 + (i))
+#define jit_r_num()            3
+#define jit_v(i)               (_L0 + (i))
+#define jit_v_num()            8
+#if __WORDSIZE == 32
+#  define jit_f(i)             (_F0 + ((i) << 1))
+#  define jit_f_num()          8
+#else
+#  define jit_f(i)             (_F32 - (i))
+#  define jit_f_num()          16
+#endif
+#define JIT_R0                 _G2
+#define JIT_R1                 _G3
+#define JIT_R2                 _G4
+#define JIT_V0                 _L0
+#define JIT_V1                 _L1
+#define JIT_V2                 _L2
+#define JIT_V3                 _L3
+#define JIT_V4                 _L4
+#define JIT_V5                 _L5
+#define JIT_V6                 _L6
+#define JIT_V7                 _L7
+    _G0, _G1, _G2, _G3, _G4, _G5, _G6, _G7,
+    _O0, _O1, _O2, _O3, _O4, _O5, _SP, _O7,
+    _L0, _L1, _L2, _L3, _L4, _L5, _L6, _L7,
+    _I0, _I1, _I2, _I3, _I4, _I5, _FP, _I7,
+#if __WORDSIZE == 32
+#  define JIT_F0               _F0
+#  define JIT_F1               _F2
+#  define JIT_F2               _F4
+#  define JIT_F3               _F6
+#  define JIT_F4               _F8
+#  define JIT_F5               _F10
+#  define JIT_F6               _F12
+#  define JIT_F7               _F14
+    _F0, _F1,  _F2,  _F3,  _F4,  _F5,  _F6,  _F7,
+    _F8, _F9, _F10, _F11, _F12, _F13, _F14, _F15,
+#else
+    /* All single precision operations have a high cost due to being
+     * stored on registers only encodable as double precision.
+     * The cost is due to needing to move values to a register with
+     * value <= 31.
+     * This is a limitation due to using fixed named registers in
+     * lightning. */
+#  define JIT_F0               _F32
+#  define JIT_F1               _F34
+#  define JIT_F2               _F36
+#  define JIT_F3               _F38
+#  define JIT_F4               _F40
+#  define JIT_F5               _F42
+#  define JIT_F6               _F44
+#  define JIT_F7               _F46
+#  define JIT_F8               _F48
+#  define JIT_F9               _F50
+#  define JIT_F10              _F52
+#  define JIT_F11              _F54
+#  define JIT_F12              _F56
+#  define JIT_F13              _F58
+#  define JIT_F14              _F60
+#  define JIT_F15              _F62
+    _F62, _F60, _F58, _F56, _F54, _F52, _F50, _F48,
+    _F46, _F44, _F42, _F40, _F38, _F36, _F34, _F32,
+    _F31, _F30, _F29, _F28, _F27, _F26, _F25, _F24,
+    _F23, _F22, _F21, _F20, _F19, _F18, _F17, _F16,
+    _F15, _F14, _F13, _F12, _F11, _F10,  _F9,  _F8,
+     _F7,  _F6,  _F5,  _F4,  _F3,  _F2,  _F1,  _F0,
+#endif
+#define JIT_NOREG              _NOREG
+    _NOREG,
+} jit_reg_t;
+
+#endif /* _jit_sparc_h */
diff --git a/deps/lightning/include/lightning/jit_x86.h b/deps/lightning/include/lightning/jit_x86.h
new file mode 100644 (file)
index 0000000..a278d06
--- /dev/null
@@ -0,0 +1,199 @@
+/*
+ * Copyright (C) 2012-2019  Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+#ifndef _jit_x86_h
+#define _jit_x86_h
+
+#define JIT_HASH_CONSTS                1
+#define JIT_NUM_OPERANDS       2
+
+/*
+ * Types
+ */
+#define jit_sse2_p()           jit_cpu.sse2
+#define jit_x87_reg_p(reg)     ((reg) >= _ST0 && (reg) <= _ST6)
+#if __WORDSIZE == 32
+#  if defined(__x86_64__)
+#    define __X64_32           1
+#    define __X64              1
+#  else
+#    define __X32              1
+#  endif
+#else
+#  define __X64                        1
+#endif
+
+#define JIT_FP                 _RBP
+typedef enum {
+#if __X32
+#  define jit_r(i)             (_RAX + (i))
+#  define jit_r_num()          3
+#  define jit_v(i)             (_RBX + (i))
+#  define jit_v_num()          3
+#  define jit_f(i)             (jit_cpu.sse2 ? _XMM0 + (i) : _ST0 + (i))
+#  define jit_f_num()          (jit_cpu.sse2 ? 8 : 6)
+#  define JIT_R0               _RAX
+#  define JIT_R1               _RCX
+#  define JIT_R2               _RDX
+    _RAX,      _RCX,   _RDX,
+#  define JIT_V0               _RBX
+#  define JIT_V1               _RSI
+#  define JIT_V2               _RDI
+    _RBX,      _RSI,   _RDI,
+    _RSP,      _RBP,
+#  define JIT_F0               (jit_sse2_p() ? _XMM0 : _ST0)
+#  define JIT_F1               (jit_sse2_p() ? _XMM1 : _ST1)
+#  define JIT_F2               (jit_sse2_p() ? _XMM2 : _ST2)
+#  define JIT_F3               (jit_sse2_p() ? _XMM3 : _ST3)
+#  define JIT_F4               (jit_sse2_p() ? _XMM4 : _ST4)
+#  define JIT_F5               (jit_sse2_p() ? _XMM5 : _ST5)
+#  define JIT_F6               (jit_sse2_p() ? _XMM6 : _ST6)
+    _XMM0,     _XMM1,  _XMM2,  _XMM3,  _XMM4,  _XMM5,  _XMM6,   _XMM7,
+#  define jit_sse_reg_p(reg)   ((reg) >= _XMM0 && (reg) <= _XMM7)
+#else
+#  if __CYGWIN__ || _WIN32
+#    define jit_r(i)           (_RAX + (i))
+#    define jit_r_num()                3
+#    define jit_v(i)           (_RBX + (i))
+#    define jit_v_num()                7
+#    define jit_f(index)       (_XMM4 + (index))
+#    define jit_f_num()                12
+#    define JIT_R0             _RAX
+#    define JIT_R1             _R10
+#    define JIT_R2             _R11
+#    define JIT_V0             _RBX
+#    define JIT_V1             _RDI
+#    define JIT_V2             _RSI
+#    define JIT_V3             _R12
+#    define JIT_V4             _R13
+#    define JIT_V5             _R14
+#    define JIT_V6             _R15
+    /* Volatile - Return value register */
+    _RAX,
+    /* Volatile */
+    _R10,      _R11,
+    /* Nonvolatile */
+    _RBX,      _RDI,   _RSI,
+    _R12,      _R13,   _R14,   _R15,
+    /* Volatile - Integer arguments (4 to 1) */
+    _R9,       _R8,    _RDX,   _RCX,
+    /* Nonvolatile */
+    _RSP,      _RBP,
+#    define JIT_F0             _XMM4
+#    define JIT_F1             _XMM5
+#    define JIT_F2             _XMM6
+#    define JIT_F3             _XMM7
+#    define JIT_F4             _XMM8
+#    define JIT_F5             _XMM9
+#    define JIT_F6             _XMM10
+#    define JIT_F7             _XMM11
+#    define JIT_F8             _XMM12
+#    define JIT_F9             _XMM13
+#    define JIT_F10            _XMM14
+#    define JIT_F11            _XMM15
+    /* Volatile */
+    _XMM4,     _XMM5,
+    /* Nonvolatile */
+    _XMM6,     _XMM7,  _XMM8,  _XMM9,  _XMM10,
+    _XMM11,    _XMM12, _XMM13, _XMM14, _XMM15,
+    /* Volatile - FP arguments (4 to 1) */
+    _XMM3,     _XMM2,  _XMM1,  _XMM0,
+#    define jit_sse_reg_p(reg) ((reg) >= _XMM4 && (reg) <= _XMM0)
+#  else
+#    define jit_r(i)           (_RAX + (i))
+#    define jit_r_num()                3
+#    define jit_v(i)           (_RBX + (i))
+#    define jit_v_num()                5
+#    define jit_f(index)       (_XMM8 + (index))
+#    define jit_f_num()                8
+#    define JIT_R0             _RAX
+#    define JIT_R1             _R10
+#    define JIT_R2             _R11
+    _RAX,      _R10,   _R11,
+#    define JIT_V0             _RBX
+#    define JIT_V1             _R13
+#    define JIT_V2             _R14
+#    define JIT_V3             _R15
+#    define JIT_V4             _R12
+    _RBX,      _R13,   _R14,   _R15,   _R12,
+    _R9,       _R8,    _RCX,   _RDX,   _RSI,   _RDI,
+    _RSP,      _RBP,
+#    define JIT_F0             _XMM8
+#    define JIT_F1             _XMM9
+#    define JIT_F2             _XMM10
+#    define JIT_F3             _XMM11
+#    define JIT_F4             _XMM12
+#    define JIT_F5             _XMM13
+#    define JIT_F6             _XMM14
+#    define JIT_F7             _XMM15
+    _XMM8,     _XMM9,  _XMM10, _XMM11, _XMM12, _XMM13, _XMM14, _XMM15,
+    _XMM7,     _XMM6,  _XMM5,  _XMM4,  _XMM3,  _XMM2,  _XMM1,  _XMM0,
+#    define jit_sse_reg_p(reg) ((reg) >= _XMM8 && (reg) <= _XMM0)
+#  endif
+#endif
+    _ST0,      _ST1,   _ST2,   _ST3,   _ST4,   _ST5,   _ST6,
+#  define JIT_NOREG            _NOREG
+    _NOREG,
+} jit_reg_t;
+
+typedef struct {
+    /* x87 present */
+    jit_uint32_t fpu           : 1;
+    /* cmpxchg8b instruction */
+    jit_uint32_t cmpxchg8b     : 1;
+    /* cmov and fcmov branchless conditional mov */
+    jit_uint32_t cmov          : 1;
+    /* mmx registers/instructions available */
+    jit_uint32_t mmx           : 1;
+    /* sse registers/instructions available */
+    jit_uint32_t sse           : 1;
+    /* sse2 registers/instructions available */
+    jit_uint32_t sse2          : 1;
+    /* sse3 instructions available */
+    jit_uint32_t sse3          : 1;
+    /* pcmulqdq instruction */
+    jit_uint32_t pclmulqdq     : 1;
+    /* ssse3 suplemental sse3 instructions available */
+    jit_uint32_t ssse3         : 1;
+    /* fused multiply/add using ymm state */
+    jit_uint32_t fma           : 1;
+    /* cmpxchg16b instruction */
+    jit_uint32_t cmpxchg16b    : 1;
+    /* sse4.1 instructions available */
+    jit_uint32_t sse4_1                : 1;
+    /* sse4.2 instructions available */
+    jit_uint32_t sse4_2                : 1;
+    /* movbe instruction available */
+    jit_uint32_t movbe         : 1;
+    /* popcnt instruction available */
+    jit_uint32_t popcnt                : 1;
+    /* aes instructions available */
+    jit_uint32_t aes           : 1;
+    /* avx instructions available */
+    jit_uint32_t avx           : 1;
+    /* lahf/sahf available in 64 bits mode */
+    jit_uint32_t lahf          : 1;
+} jit_cpu_t;
+
+/*
+ * Initialization
+ */
+extern jit_cpu_t               jit_cpu;
+
+#endif /* _jit_x86_h */
diff --git a/deps/lightning/lib/Makefile.am b/deps/lightning/lib/Makefile.am
new file mode 100644 (file)
index 0000000..f2ac2ba
--- /dev/null
@@ -0,0 +1,83 @@
+#
+# Copyright 2000, 2001, 2002, 2012-2019 Free Software Foundation, Inc.
+#
+# This file is part of GNU lightning.
+#
+# GNU lightning is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published
+# by the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GNU lightning is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+
+AM_CFLAGS = -I$(top_srcdir)/include -D_GNU_SOURCE $(LIGHTNING_CFLAGS)
+liblightning_LTLIBRARIES = liblightning.la
+liblightning_la_LDFLAGS = -version-info 1:0:0
+
+if get_jit_size
+JIT_SIZE_PATH = "$(top_builddir)/jit_$(cpu)-sz.c"
+AM_CPPFLAGS=-DGET_JIT_SIZE=1 -DJIT_SIZE_PATH='$(JIT_SIZE_PATH)'
+endif
+
+liblightningdir = $(libdir)
+liblightning_la_SOURCES =      \
+       jit_disasm.c            \
+       jit_memory.c            \
+       jit_names.c             \
+       jit_note.c              \
+       jit_print.c             \
+       jit_size.c              \
+       lightning.c
+
+EXTRA_DIST =                   \
+       jit_rewind.c            \
+       jit_aarch64.c           \
+       jit_aarch64-cpu.c       \
+       jit_aarch64-fpu.c       \
+       jit_aarch64-sz.c        \
+       jit_alpha.c             \
+       jit_alpha-cpu.c         \
+       jit_alpha-fpu.c         \
+       jit_alpha-sz.c          \
+       jit_arm.c               \
+       jit_arm-cpu.c           \
+       jit_arm-swf.c           \
+       jit_arm-vfp.c           \
+       jit_arm-sz.c            \
+       jit_hppa.c              \
+       jit_hppa-cpu.c          \
+       jit_hppa-fpu.c          \
+       jit_hppa-sz.c           \
+       jit_ia64.c              \
+       jit_ia64-cpu.c          \
+       jit_ia64-fpu.c          \
+       jit_ia64-sz.c           \
+       jit_mips.c              \
+       jit_mips-cpu.c          \
+       jit_mips-fpu.c          \
+       jit_mips-sz.c           \
+       jit_ppc.c               \
+       jit_ppc-cpu.c           \
+       jit_ppc-fpu.c           \
+       jit_ppc-sz.c            \
+       jit_riscv.c             \
+       jit_riscv-cpu.c         \
+       jit_riscv-fpu.c         \
+       jit_riscv-sz.c          \
+       jit_s390.c              \
+       jit_s390-cpu.c          \
+       jit_s390-fpu.c          \
+       jit_s390-sz.c           \
+       jit_sparc.c             \
+       jit_sparc-cpu.c         \
+       jit_sparc-fpu.c         \
+       jit_sparc-sz.c          \
+       jit_x86.c               \
+       jit_x86-cpu.c           \
+       jit_x86-sse.c           \
+       jit_x86-x87.c           \
+       jit_x86-sz.c
diff --git a/deps/lightning/lib/jit_aarch64-cpu.c b/deps/lightning/lib/jit_aarch64-cpu.c
new file mode 100644 (file)
index 0000000..8e8a9a0
--- /dev/null
@@ -0,0 +1,2446 @@
+/*
+ * Copyright (C) 2013-2019  Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+#if PROTO
+typedef union {
+/* aarch64-opc.c */
+#  define ui                   jit_uint32_t
+#  if __BYTE_ORDER == __LITTLE_ENDIAN
+    /* cond2: condition in truly conditional-executed inst.  */
+    struct {           ui b:  4; } cond2;
+    /* nzcv: flag bit specifier, encoded in the "nzcv" field.  */
+    struct {           ui b:  4; } nzcv;
+    /* defgh: d:e:f:g:h bits in AdvSIMD modified immediate.  */
+    struct { ui _:  5; ui b:  5; } defgh;
+    /* abc: a:b:c bits in AdvSIMD modified immediate.  */
+    struct { ui _: 16; ui b:  3; } abc;
+    /* imm19: e.g. in CBZ.  */
+    struct { ui _:  5; ui b: 19; } imm19;
+    /* immhi: e.g. in ADRP.  */
+    struct { ui _:  5; ui b: 19; } immhi;
+    /* immlo: e.g. in ADRP.  */
+    struct { ui _: 29; ui b:  2; } immlo;
+    /* size: in most AdvSIMD and floating-point instructions.  */
+    struct { ui _: 22; ui b:  2; } size;
+    /* vldst_size: size field in the AdvSIMD load/store inst.  */
+    struct { ui _: 10; ui b:  2; } vldst_size;
+    /* op: in AdvSIMD modified immediate instructions.  */
+    struct { ui _: 29; ui b:  1; } op;
+    /* Q: in most AdvSIMD instructions.  */
+    struct { ui _: 30; ui b:  1; } Q;
+    /* Rt: in load/store instructions.  */
+    struct {           ui b:  5; } Rt;
+    /* Rd: in many integer instructions.  */
+    struct {           ui b:  5; } Rd;
+    /* Rn: in many integer instructions.  */
+    struct { ui _:  5; ui b:  5; } Rn;
+    /* Rt2: in load/store pair instructions.  */
+    struct { ui _: 10; ui b:  5; } Rt2;
+    /* Ra: in fp instructions.  */
+    struct { ui _: 10; ui b:  5; } Ra;
+    /* op2: in the system instructions.  */
+    struct { ui _:  5; ui b:  3; } op2;
+    /* CRm: in the system instructions.  */
+    struct { ui _:  8; ui b:  4; } CRm;
+    /* CRn: in the system instructions.  */
+    struct { ui _: 12; ui b:  4; } CRn;
+    /* op1: in the system instructions.  */
+    struct { ui _: 16; ui b:  3; } op1;
+    /* op0: in the system instructions.  */
+    struct { ui _: 19; ui b:  2; } op0;
+    /* imm3: in add/sub extended reg instructions.  */
+    struct { ui _: 10; ui b:  3; } imm3;
+    /* cond: condition flags as a source operand.  */
+    struct { ui _: 12; ui b:  4; } cond;
+    /* opcode: in advsimd load/store instructions.  */
+    struct { ui _: 12; ui b:  4; } opcode;
+    /* cmode: in advsimd modified immediate instructions.  */
+    struct { ui _: 12; ui b:  4; } cmode;
+    /* asisdlso_opcode: opcode in advsimd ld/st single element.  */
+    struct { ui _: 13; ui b:  3; } asisdlso_opcode;
+    /* len: in advsimd tbl/tbx instructions.  */
+    struct { ui _: 13; ui b:  2; } len;
+    /* Rm: in ld/st reg offset and some integer inst.  */
+    struct { ui _: 16; ui b:  5; } Rm;
+    /* Rs: in load/store exclusive instructions.  */
+    struct { ui _: 16; ui b:  5; } Rs;
+    /* option: in ld/st reg offset + add/sub extended reg inst.  */
+    struct { ui _: 13; ui b:  3; } option;
+    /* S: in load/store reg offset instructions.  */
+    struct { ui _: 12; ui b:  1; } S;
+    /* hw: in move wide constant instructions.  */
+    struct { ui _: 21; ui b:  2; } hw;
+    /* opc: in load/store reg offset instructions.  */
+    struct { ui _: 22; ui b:  2; } opc;
+    /* opc1: in load/store reg offset instructions.  */
+    struct { ui _: 23; ui b:  1; } opc1;
+    /* shift: in add/sub reg/imm shifted instructions.  */
+    struct { ui _: 22; ui b:  2; } shift;
+    /* type: floating point type field in fp data inst.  */
+    struct { ui _: 22; ui b:  2; } type;
+    /* ldst_size: size field in ld/st reg offset inst.  */
+    struct { ui _: 30; ui b:  2; } ldst_size;
+    /* imm6: in add/sub reg shifted instructions.  */
+    struct { ui _: 10; ui b:  6; } imm6;
+    /* imm4: in advsimd ext and advsimd ins instructions.  */
+    struct { ui _: 11; ui b:  4; } imm4;
+    /* imm5: in conditional compare (immediate) instructions.  */
+    struct { ui _: 16; ui b:  5; } imm5;
+    /* imm7: in load/store pair pre/post index instructions.  */
+    struct { ui _: 15; ui b:  7; } imm7;
+    /* imm8: in floating-point scalar move immediate inst.  */
+    struct { ui _: 13; ui b:  8; } imm8;
+    /* imm9: in load/store pre/post index instructions.  */
+    struct { ui _: 12; ui b:  9; } imm9;
+    /* imm12: in ld/st unsigned imm or add/sub shifted inst.  */
+    struct { ui _: 10; ui b: 12; } imm12;
+    /* imm14: in test bit and branch instructions.  */
+    struct { ui _:  5; ui b: 14; } imm14;
+    /* imm16: in exception instructions.  */
+    struct { ui _:  5; ui b: 16; } imm16;
+    /* imm26: in unconditional branch instructions.  */
+    struct {           ui b: 26; } imm26;
+    /* imms: in bitfield and logical immediate instructions.  */
+    struct { ui _: 10; ui b:  6; } imms;
+    /* immr: in bitfield and logical immediate instructions.  */
+    struct { ui _: 16; ui b:  6; } immr;
+    /* immb: in advsimd shift by immediate instructions.  */
+    struct { ui _: 16; ui b:  3; } immb;
+    /* immh: in advsimd shift by immediate instructions.  */
+    struct { ui _: 19; ui b:  4; } immh;
+    /* N: in logical (immediate) instructions.  */
+    struct { ui _: 22; ui b:  1; } N;
+    /* index: in ld/st inst deciding the pre/post-index.  */
+    struct { ui _: 11; ui b:  1; } index;
+    /* index2: in ld/st pair inst deciding the pre/post-index.  */
+    struct { ui _: 24; ui b:  1; } index2;
+    /* sf: in integer data processing instructions.  */
+    struct { ui _: 31; ui b:  1; } sf;
+    /* H: in advsimd scalar x indexed element instructions.  */
+    struct { ui _: 11; ui b:  1; } H;
+    /* L: in advsimd scalar x indexed element instructions.  */
+    struct { ui _: 21; ui b:  1; } L;
+    /* M: in advsimd scalar x indexed element instructions.  */
+    struct { ui _: 20; ui b:  1; } M;
+    /* b5: in the test bit and branch instructions.  */
+    struct { ui _: 31; ui b:  1; } b5;
+    /* b40: in the test bit and branch instructions.  */
+    struct { ui _: 19; ui b:  5; } b40;
+    /* scale: in the fixed-point scalar to fp converting inst.  */
+    struct { ui _: 10; ui b:  6; } scale;
+#  else
+    struct { ui _: 28; ui b:  4; } cond2;
+    struct { ui _: 28; ui b:  4; } nzcv;
+    struct { ui _: 22; ui b:  5; } defgh;
+    struct { ui _: 13; ui b:  3; } abc;
+    struct { ui _:  8; ui b: 19; } imm19;
+    struct { ui _:  8; ui b: 19; } immhi;
+    struct { ui _:  1; ui b: 29; } immlo;
+    struct { ui _:  8; ui b:  2; } size;
+    struct { ui _: 20; ui b:  2; } vldst_size;
+    struct { ui _:  2; ui b:  1; } op;
+    struct { ui _:  1; ui b:  1; } Q;
+    struct { ui _: 27; ui b:  1; } Rt;
+    struct { ui _: 27; ui b:  1; } Rd;
+    struct { ui _: 22; ui b:  5; } Rn;
+    struct { ui _: 17; ui b:  5; } Rt2;
+    struct { ui _: 17; ui b:  5; } Ra;
+    struct { ui _: 24; ui b:  3; } op2;
+    struct { ui _: 20; ui b:  4; } CRm;
+    struct { ui _: 16; ui b:  4; } CRn;
+    struct { ui _: 13; ui b:  3; } op1;
+    struct { ui _: 11; ui b:  2; } op0;
+    struct { ui _: 19; ui b:  3; } imm3;
+    struct { ui _: 16; ui b:  4; } cond;
+    struct { ui _: 16; ui b:  4; } opcode;
+    struct { ui _: 16; ui b:  4; } cmode;
+    struct { ui _: 16; ui b:  3; } asisdlso_opcode;
+    struct { ui _: 17; ui b:  2; } len;
+    struct { ui _: 11; ui b:  5; } Rm;
+    struct { ui _: 11; ui b:  5; } Rs;
+    struct { ui _: 16; ui b:  3; } option;
+    struct { ui _: 19; ui b:  1; } S;
+    struct { ui _:  9; ui b:  2; } hw;
+    struct { ui _:  8; ui b:  2; } opc;
+    struct { ui _:  8; ui b:  1; } opc1;
+    struct { ui _:  8; ui b:  2; } shift;
+    struct { ui _:  8; ui b:  2; } type;
+    struct {           ui b:  2; } ldst_size;
+    struct { ui _: 16; ui b:  6; } imm6;
+    struct { ui _: 17; ui b:  4; } imm4;
+    struct { ui _: 11; ui b:  5; } imm5;
+    struct { ui _: 10; ui b:  7; } imm7;
+    struct { ui _: 11; ui b:  8; } imm8;
+    struct { ui _: 11; ui b:  9; } imm9;
+    struct { ui _: 10; ui b: 12; } imm12;
+    struct { ui _: 13; ui b: 14; } imm14;
+    struct { ui _: 11; ui b: 16; } imm16;
+    struct { ui _:  6; ui b: 26; } imm26;
+    struct { ui _: 16; ui b:  6; } imms;
+    struct { ui _: 10; ui b:  6; } immr;
+    struct { ui _: 13; ui b:  3; } immb;
+    struct { ui _:  9; ui b:  4; } immh;
+    struct { ui _:  9; ui b:  1; } N;
+    struct { ui _: 20; ui b:  1; } index;
+    struct { ui _:  7; ui b:  1; } index2;
+    struct {           ui b:  1; } sf;
+    struct { ui _: 20; ui b:  1; } H;
+    struct { ui _: 10; ui b:  1; } L;
+    struct { ui _: 11; ui b:  1; } M;
+    struct {           ui b:  1; } b5;
+    struct { ui _:  8; ui b:  5; } b40;
+    struct { ui _: 16; ui b:  6; } scale;
+#  endif
+    jit_int32_t                w;
+#  undef ui
+} instr_t;
+#  define stack_framesize              160
+#  define ii(i)                                *_jit->pc.ui++ = i
+#  define ldr(r0,r1)                   ldr_l(r0,r1)
+#  define ldxr(r0,r1,r2)               ldxr_l(r0,r1,r2)
+#  define ldxi(r0,r1,i0)               ldxi_l(r0,r1,i0)
+#  define stxi(i0,r0,r1)               stxi_l(i0,r0,r1)
+#  define FP_REGNO                     0x1d
+#  define LR_REGNO                     0x1e
+#  define SP_REGNO                     0x1f
+#  define XZR_REGNO                    0x1f
+#  define WZR_REGNO                    XZR_REGNO
+#  define LSL_12                       0x00400000
+#  define MOVI_LSL_16                  0x00200000
+#  define MOVI_LSL_32                  0x00400000
+#  define MOVI_LSL_48                  0x00600000
+#  define XS                           0x80000000      /* Wn -> Xn */
+#  define DS                           0x00400000      /* Sn -> Dn */
+#  define CC_NE                                0x0
+#  define CC_EQ                                0x1
+#  define CC_CC                                0x2
+#  define CC_LO                                CC_CC
+#  define CC_CS                                0x3
+#  define CC_HS                                CC_CS
+#  define CC_PL                                0x4
+#  define CC_MI                                0x5
+#  define CC_VC                                0x6
+#  define CC_VS                                0x7
+#  define CC_LS                                0x8
+#  define CC_HI                                0x9
+#  define CC_LT                                0xa
+#  define CC_GE                                0xb
+#  define CC_LE                                0xc
+#  define CC_GT                                0xd
+#  define CC_NV                                0xe
+#  define CC_AL                                0xf
+/* Branches need inverted condition */
+#  define BCC_EQ                       0x0
+#  define BCC_NE                       0x1
+#  define BCC_CS                       0x2
+#  define BCC_HS                       BCC_CS
+#  define BCC_CC                       0x3
+#  define BCC_LO                       BCC_CC
+#  define BCC_MI                       0x4
+#  define BCC_PL                       0x5
+#  define BCC_VS                       0x6
+#  define BCC_VC                       0x7
+#  define BCC_HI                       0x8
+#  define BCC_LS                       0x9
+#  define BCC_GE                       0xa
+#  define BCC_LT                       0xb
+#  define BCC_GT                       0xc
+#  define BCC_LE                       0xd
+#  define BCC_AL                       0xe
+#  define BCC_NV                       0xf
+/* adapted and cut down to only tested and required by lightning,
+ * from data in binutils/aarch64-tbl.h */
+#  define A64_ADCS                     0x3a000000
+#  define A64_SBCS                     0x7a000000
+#  define A64_ADDI                     0x11000000
+#  define A64_ADDSI                    0xb1000000
+#  define A64_SUBI                     0x51000000
+#  define A64_SUBSI                    0x71000000
+#  define A64_ADD                      0x0b000000
+#  define A64_ADDS                     0x2b000000
+#  define A64_SUB                      0x4b000000
+#  define A64_NEG                      0x4b0003e0
+#  define A64_SUBS                     0x6b000000
+#  define A64_CMP                      0x6b00001f
+#  define A64_SBFM                     0x93400000
+#  define A64_UBFM                     0x53400000
+#  define A64_UBFX                     0x53000000
+#  define A64_B                                0x14000000
+#  define A64_BL                       0x94000000
+#  define A64_BR                       0xd61f0000
+#  define A64_BLR                      0xd63f0000
+#  define A64_RET                      0xd65f0000
+#  define A64_CBZ                      0x34000000
+#  define A64_CBNZ                     0x35000000
+#  define A64_B_C                      0x54000000
+#  define A64_CSINC                    0x1a800400
+#  define A64_REV                      0xdac00c00
+#  define A64_UDIV                     0x1ac00800
+#  define A64_SDIV                     0x1ac00c00
+#  define A64_LSL                      0x1ac02000
+#  define A64_LSR                      0x1ac02400
+#  define A64_ASR                      0x1ac02800
+#  define A64_MUL                      0x1b007c00
+#  define A64_SMULL                    0x9b207c00
+#  define A64_SMULH                    0x9b407c00
+#  define A64_UMULL                    0x9ba07c00
+#  define A64_UMULH                    0x9bc07c00
+#  define A64_STRBI                    0x39000000
+#  define A64_LDRBI                    0x39400000
+#  define A64_LDRSBI                   0x39800000
+#  define A64_STRI                     0xf9000000
+#  define A64_LDRI                     0xf9400000
+#  define A64_STRHI                    0x79000000
+#  define A64_LDRHI                    0x79400000
+#  define A64_LDRSHI                   0x79800000
+#  define A64_STRWI                    0xb9000000
+#  define A64_LDRWI                    0xb9400000
+#  define A64_LDRSWI                   0xb9800000
+#  define A64_STRB                     0x38206800
+#  define A64_LDRB                     0x38606800
+#  define A64_LDRSB                    0x38e06800
+#  define A64_STR                      0xf8206800
+#  define A64_LDR                      0xf8606800
+#  define A64_STRH                     0x78206800
+#  define A64_LDRH                     0x78606800
+#  define A64_LDRSH                    0x78a06800
+#  define A64_STRW                     0xb8206800
+#  define A64_LDRW                     0xb8606800
+#  define A64_LDRSW                    0xb8a06800
+#  define A64_STURB                    0x38000000
+#  define A64_LDURB                    0x38400000
+#  define A64_LDURSB                   0x38800000
+#  define A64_STUR                     0xf8000000
+#  define A64_LDUR                     0xf8400000
+#  define A64_STURH                    0x78000000
+#  define A64_LDURH                    0x78400000
+#  define A64_LDURSH                   0x78800000
+#  define A64_STURW                    0xb8000000
+#  define A64_LDURW                    0xb8400000
+#  define A64_LDURSW                   0xb8800000
+#  define A64_STP                      0x29000000
+#  define A64_LDP                      0x29400000
+#  define A64_STP_POS                  0x29800000
+#  define A64_LDP_PRE                  0x28c00000
+#  define A64_ANDI                     0x12400000
+#  define A64_ORRI                     0x32400000
+#  define A64_EORI                     0x52400000
+#  define A64_ANDSI                    0x72000000
+#  define A64_AND                      0x0a000000
+#  define A64_ORR                      0x2a000000
+#  define A64_MOV                      0x2a0003e0      /* AKA orr Rd,xzr,Rm */
+#  define A64_MVN                      0x2a2003e0
+#  define A64_UXTW                     0x2a0003e0      /* AKA MOV */
+#  define A64_EOR                      0x4a000000
+#  define A64_ANDS                     0x6a000000
+#  define A64_MOVN                     0x12800000
+#  define A64_MOVZ                     0x52800000
+#  define A64_MOVK                     0x72800000
+#  define SBFM(Rd,Rn,ImmR,ImmS)                oxxrs(A64_SBFM|XS,Rd,Rn,ImmR,ImmS)
+#  define UBFM(Rd,Rn,ImmR,ImmS)                oxxrs(A64_UBFM|XS,Rd,Rn,ImmR,ImmS)
+#  define UBFX(Rd,Rn,ImmR,ImmS)                oxxrs(A64_UBFX,Rd,Rn,ImmR,ImmS)
+#  define CMP(Rn,Rm)                   oxx_(A64_CMP|XS,Rn,Rm)
+#  define CMPI(Rn,Imm12)               oxxi(A64_SUBSI|XS,XZR_REGNO,Rn,Imm12)
+#  define CMPI_12(Rn,Imm12)            oxxi(A64_SUBSI|XS|LSL_12,XZR_REGNO,Rn,Imm12)
+#  define CMNI(Rn,Imm12)               oxxi(A64_ADDSI|XS,XZR_REGNO,Rn,Imm12)
+#  define CMNI_12(Rn,Imm12)            oxxi(A64_ADDSI|XS|LSL_12,XZR_REGNO,Rn,Imm12)
+#  define CSINC(Rd,Rn,Rm,Cc)           oxxxc(A64_CSINC|XS,Rd,Rn,Rm,Cc)
+#  define TST(Rn,Rm)                   oxxx(A64_ANDS|XS,XZR_REGNO,Rn,Rm)
+/* actually should use oxxrs but logical_immediate returns proper encoding */
+#  define TSTI(Rn,Imm12)               oxxi(A64_ANDSI,XZR_REGNO,Rn,Imm12)
+#  define MOV(Rd,Rm)                   ox_x(A64_MOV|XS,Rd,Rm)
+#  define MVN(Rd,Rm)                   ox_x(A64_MVN|XS,Rd,Rm)
+#  define NEG(Rd,Rm)                   ox_x(A64_NEG|XS,Rd,Rm)
+#  define MOVN(Rd,Imm16)               ox_h(A64_MOVN|XS,Rd,Imm16)
+#  define MOVN_16(Rd,Imm16)            ox_h(A64_MOVN|XS|MOVI_LSL_16,Rd,Imm16)
+#  define MOVN_32(Rd,Imm16)            ox_h(A64_MOVN|XS|MOVI_LSL_32,Rd,Imm16)
+#  define MOVN_48(Rd,Imm16)            ox_h(A64_MOVN|XS|MOVI_LSL_48,Rd,Imm16)
+#  define MOVZ(Rd,Imm16)               ox_h(A64_MOVZ|XS,Rd,Imm16)
+#  define MOVZ_16(Rd,Imm16)            ox_h(A64_MOVZ|XS|MOVI_LSL_16,Rd,Imm16)
+#  define MOVZ_32(Rd,Imm16)            ox_h(A64_MOVZ|XS|MOVI_LSL_32,Rd,Imm16)
+#  define MOVZ_48(Rd,Imm16)            ox_h(A64_MOVZ|XS|MOVI_LSL_48,Rd,Imm16)
+#  define MOVK(Rd,Imm16)               ox_h(A64_MOVK|XS,Rd,Imm16)
+#  define MOVK_16(Rd,Imm16)            ox_h(A64_MOVK|XS|MOVI_LSL_16,Rd,Imm16)
+#  define MOVK_32(Rd,Imm16)            ox_h(A64_MOVK|XS|MOVI_LSL_32,Rd,Imm16)
+#  define MOVK_48(Rd,Imm16)            ox_h(A64_MOVK|XS|MOVI_LSL_48,Rd,Imm16)
+#  define ADD(Rd,Rn,Rm)                        oxxx(A64_ADD|XS,Rd,Rn,Rm)
+#  define ADDI(Rd,Rn,Imm12)            oxxi(A64_ADDI|XS,Rd,Rn,Imm12)
+#  define ADDI_12(Rd,Rn,Imm12)         oxxi(A64_ADDI|XS|LSL_12,Rd,Rn,Imm12)
+#  define MOV_XSP(Rd,Rn)               ADDI(Rd,Rn,0)
+#  define ADDS(Rd,Rn,Rm)               oxxx(A64_ADDS|XS,Rd,Rn,Rm)
+#  define ADDSI(Rd,Rn,Imm12)           oxxi(A64_ADDSI|XS,Rd,Rn,Imm12)
+#  define ADDSI_12(Rd,Rn,Imm12)                oxxi(A64_ADDSI|XS|LSL_12,Rd,Rn,Imm12)
+#  define ADCS(Rd,Rn,Rm)               oxxx(A64_ADCS|XS,Rd,Rn,Rm)
+#  define SUB(Rd,Rn,Rm)                        oxxx(A64_SUB|XS,Rd,Rn,Rm)
+#  define SUBI(Rd,Rn,Imm12)            oxxi(A64_SUBI|XS,Rd,Rn,Imm12)
+#  define SUBI_12(Rd,Rn,Imm12)         oxxi(A64_SUBI|XS|LSL_12,Rd,Rn,Imm12)
+#  define SUBS(Rd,Rn,Rm)               oxxx(A64_SUBS|XS,Rd,Rn,Rm)
+#  define SUBSI(Rd,Rn,Imm12)           oxxi(A64_SUBSI|XS,Rd,Rn,Imm12)
+#  define SUBSI_12(Rd,Rn,Imm12)                oxxi(A64_SUBSI|XS|LSL_12,Rd,Rn,Imm12)
+#  define SBCS(Rd,Rn,Rm)               oxxx(A64_SBCS|XS,Rd,Rn,Rm)
+#  define MUL(Rd,Rn,Rm)                        oxxx(A64_MUL|XS,Rd,Rn,Rm)
+#  define SMULL(Rd,Rn,Rm)              oxxx(A64_SMULL,Rd,Rn,Rm)
+#  define SMULH(Rd,Rn,Rm)              oxxx(A64_SMULH,Rd,Rn,Rm)
+#  define UMULL(Rd,Rn,Rm)              oxxx(A64_UMULL,Rd,Rn,Rm)
+#  define UMULH(Rd,Rn,Rm)              oxxx(A64_UMULH,Rd,Rn,Rm)
+#  define SDIV(Rd,Rn,Rm)               oxxx(A64_SDIV|XS,Rd,Rn,Rm)
+#  define UDIV(Rd,Rn,Rm)               oxxx(A64_UDIV|XS,Rd,Rn,Rm)
+#  define LSL(Rd,Rn,Rm)                        oxxx(A64_LSL|XS,Rd,Rn,Rm)
+#  define LSLI(r0,r1,i0)               UBFM(r0,r1,(64-i0)&63,63-i0)
+#  define ASR(Rd,Rn,Rm)                        oxxx(A64_ASR|XS,Rd,Rn,Rm)
+#  define ASRI(r0,r1,i0)               SBFM(r0,r1,i0,63)
+#  define LSR(Rd,Rn,Rm)                        oxxx(A64_LSR|XS,Rd,Rn,Rm)
+#  define LSRI(r0,r1,i0)               UBFM(r0,r1,i0,63)
+#  define AND(Rd,Rn,Rm)                        oxxx(A64_AND|XS,Rd,Rn,Rm)
+/* actually should use oxxrs but logical_immediate returns proper encoding */
+#  define ANDI(Rd,Rn,Imm12)            oxxi(A64_ANDI|XS,Rd,Rn,Imm12)
+#  define ORR(Rd,Rn,Rm)                        oxxx(A64_ORR|XS,Rd,Rn,Rm)
+/* actually should use oxxrs but logical_immediate returns proper encoding */
+#  define ORRI(Rd,Rn,Imm12)            oxxi(A64_ORRI|XS,Rd,Rn,Imm12)
+#  define EOR(Rd,Rn,Rm)                        oxxx(A64_EOR|XS,Rd,Rn,Rm)
+/* actually should use oxxrs but logical_immediate returns proper encoding */
+#  define EORI(Rd,Rn,Imm12)            oxxi(A64_EORI|XS,Rd,Rn,Imm12)
+#  define SXTB(Rd,Rn)                  SBFM(Rd,Rn,0,7)
+#  define SXTH(Rd,Rn)                  SBFM(Rd,Rn,0,15)
+#  define SXTW(Rd,Rn)                  SBFM(Rd,Rn,0,31)
+#  define UXTB(Rd,Rn)                  UBFX(Rd,Rn,0,7)
+#  define UXTH(Rd,Rn)                  UBFX(Rd,Rn,0,15)
+#  define UXTW(Rd,Rm)                  ox_x(A64_UXTW,Rd,Rm)
+#  define REV(Rd,Rn)                   o_xx(A64_REV,Rd,Rn)
+#  define LDRSB(Rt,Rn,Rm)              oxxx(A64_LDRSB,Rt,Rn,Rm)
+#  define LDRSBI(Rt,Rn,Imm12)          oxxi(A64_LDRSBI,Rt,Rn,Imm12)
+#  define LDURSB(Rt,Rn,Imm9)           oxx9(A64_LDURSB,Rt,Rn,Imm9)
+#  define LDRB(Rt,Rn,Rm)               oxxx(A64_LDRB,Rt,Rn,Rm)
+#  define LDRBI(Rt,Rn,Imm12)           oxxi(A64_LDRBI,Rt,Rn,Imm12)
+#  define LDURB(Rt,Rn,Imm9)            oxx9(A64_LDURB,Rt,Rn,Imm9)
+#  define LDRSH(Rt,Rn,Rm)              oxxx(A64_LDRSH,Rt,Rn,Rm)
+#  define LDRSHI(Rt,Rn,Imm12)          oxxi(A64_LDRSHI,Rt,Rn,Imm12)
+#  define LDURSH(Rt,Rn,Imm9)           oxx9(A64_LDURSH,Rt,Rn,Imm9)
+#  define LDRH(Rt,Rn,Rm)               oxxx(A64_LDRH,Rt,Rn,Rm)
+#  define LDRHI(Rt,Rn,Imm12)           oxxi(A64_LDRHI,Rt,Rn,Imm12)
+#  define LDURH(Rt,Rn,Imm9)            oxx9(A64_LDURH,Rt,Rn,Imm9)
+#  define LDRSW(Rt,Rn,Rm)              oxxx(A64_LDRSW,Rt,Rn,Rm)
+#  define LDRSWI(Rt,Rn,Imm12)          oxxi(A64_LDRSWI,Rt,Rn,Imm12)
+#  define LDURSW(Rt,Rn,Imm9)           oxx9(A64_LDURSW,Rt,Rn,Imm9)
+#  define LDRW(Rt,Rn,Rm)               oxxx(A64_LDRW,Rt,Rn,Rm)
+#  define LDRWI(Rt,Rn,Imm12)           oxxi(A64_LDRWI,Rt,Rn,Imm12)
+#  define LDURW(Rt,Rn,Imm9)            oxx9(A64_LDURW,Rt,Rn,Imm9)
+#  define LDR(Rt,Rn,Rm)                        oxxx(A64_LDR,Rt,Rn,Rm)
+#  define LDRI(Rt,Rn,Imm12)            oxxi(A64_LDRI,Rt,Rn,Imm12)
+#  define LDUR(Rt,Rn,Imm9)             oxx9(A64_LDUR,Rt,Rn,Imm9)
+#  define STRB(Rt,Rn,Rm)               oxxx(A64_STRB,Rt,Rn,Rm)
+#  define STRBI(Rt,Rn,Imm12)           oxxi(A64_STRBI,Rt,Rn,Imm12)
+#  define STURB(Rt,Rn,Imm9)            oxx9(A64_STURB,Rt,Rn,Imm9)
+#  define STRH(Rt,Rn,Rm)               oxxx(A64_STRH,Rt,Rn,Rm)
+#  define STRHI(Rt,Rn,Imm12)           oxxi(A64_STRHI,Rt,Rn,Imm12)
+#  define STURH(Rt,Rn,Imm9)            oxx9(A64_STURH,Rt,Rn,Imm9)
+#  define STRW(Rt,Rn,Rm)               oxxx(A64_STRW,Rt,Rn,Rm)
+#  define STRWI(Rt,Rn,Imm12)           oxxi(A64_STRWI,Rt,Rn,Imm12)
+#  define STURW(Rt,Rn,Imm9)            oxx9(A64_STURW,Rt,Rn,Imm9)
+#  define STR(Rt,Rn,Rm)                        oxxx(A64_STR,Rt,Rn,Rm)
+#  define STRI(Rt,Rn,Imm12)            oxxi(A64_STRI,Rt,Rn,Imm12)
+#  define STUR(Rt,Rn,Imm9)             oxx9(A64_STUR,Rt,Rn,Imm9)
+#  define LDPI(Rt,Rt2,Rn,Simm7)                oxxx7(A64_LDP|XS,Rt,Rt2,Rn,Simm7)
+#  define STPI(Rt,Rt2,Rn,Simm7)                oxxx7(A64_STP|XS,Rt,Rt2,Rn,Simm7)
+#  define LDPI_PRE(Rt,Rt2,Rn,Simm7)    oxxx7(A64_LDP_PRE|XS,Rt,Rt2,Rn,Simm7)
+#  define STPI_POS(Rt,Rt2,Rn,Simm7)    oxxx7(A64_STP_POS|XS,Rt,Rt2,Rn,Simm7)
+#  define CSET(Rd,Cc)                  CSINC(Rd,XZR_REGNO,XZR_REGNO,Cc)
+#  define B(Simm26)                    o26(A64_B,Simm26)
+#  define BL(Simm26)                   o26(A64_BL,Simm26)
+#  define BR(Rn)                       o_x_(A64_BR,Rn)
+#  define BLR(Rn)                      o_x_(A64_BLR,Rn)
+#  define RET()                                o_x_(A64_RET,LR_REGNO)
+#  define B_C(Cc,Simm19)               oc19(A64_B_C,Cc,Simm19)
+#  define CBZ(Rd,Simm19)               ox19(A64_CBZ|XS,Rd,Simm19)
+#  define CBNZ(Rd,Simm19)              ox19(A64_CBNZ|XS,Rd,Simm19)
+#  define NOP()                                ii(0xd503201f)
+static jit_int32_t logical_immediate(jit_word_t);
+#  define oxxx(Op,Rd,Rn,Rm)            _oxxx(_jit,Op,Rd,Rn,Rm)
+static void _oxxx(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define oxxi(Op,Rd,Rn,Imm12)         _oxxi(_jit,Op,Rd,Rn,Imm12)
+static void _oxxi(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define oxx9(Op,Rd,Rn,Imm9)          _oxx9(_jit,Op,Rd,Rn,Imm9)
+static void _oxx9(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ox19(Op,Rd,Simm19)           _ox19(_jit,Op,Rd,Simm19)
+static void _ox19(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define oc19(Op,Cc,Simm19)           _oc19(_jit,Op,Cc,Simm19)
+static void _oc19(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define o26(Op,Simm26)               _o26(_jit,Op,Simm26)
+static void _oc26(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define ox_x(Op,Rd,Rn)               _ox_x(_jit,Op,Rd,Rn)
+static void _ox_x(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define o_xx(Op,Rd,Rn)               _o_xx(_jit,Op,Rd,Rn)
+static void _o_xx(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define oxx_(Op,Rn,Rm)               _oxx_(_jit,Op,Rn,Rm)
+static void _oxx_(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define o_x_(Op,Rn)                  _o_x_(_jit,Op,Rn)
+static void _o_x_(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define ox_h(Op,Rd,Imm16)            _ox_h(_jit,Op,Rd,Imm16)
+static void _ox_h(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define oxxrs(Op,Rd,Rn,R,S)          _oxxrs(_jit,Op,Rd,Rn,R,S)
+static void _oxxrs(jit_state_t*,jit_int32_t,jit_int32_t,
+                  jit_int32_t,jit_int32_t,jit_int32_t);
+#  define oxxxc(Op,Rd,Rn,Rm,Cc)                _oxxxc(_jit,Op,Rd,Rn,Rm,Cc)
+static void _oxxxc(jit_state_t*,jit_int32_t,jit_int32_t,
+                  jit_int32_t,jit_int32_t,jit_int32_t);
+#  define oxxx7(Op,Rt,Rt2,Rn,Simm7)    _oxxx7(_jit,Op,Rt,Rt2,Rn,Simm7)
+static void _oxxx7(jit_state_t*,jit_int32_t,
+                  jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define nop(i0)                      _nop(_jit,i0)
+static void _nop(jit_state_t*,jit_int32_t);
+#  define addr(r0,r1,r2)               ADD(r0,r1,r2)
+#  define addi(r0,r1,i0)               _addi(_jit,r0,r1,i0)
+static void _addi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define addcr(r0,r1,r2)              ADDS(r0,r1,r2)
+#  define addci(r0,r1,i0)              _addci(_jit,r0,r1,i0)
+static void _addci(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define addxr(r0,r1,r2)              ADCS(r0,r1,r2)
+#  define addxi(r0,r1,i0)              _addxi(_jit,r0,r1,i0)
+static void _addxi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define subr(r0,r1,r2)               SUB(r0,r1,r2)
+#  define subi(r0,r1,i0)               _subi(_jit,r0,r1,i0)
+static void _subi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define subcr(r0,r1,r2)              SUBS(r0,r1,r2)
+#  define subci(r0,r1,i0)              _subci(_jit,r0,r1,i0)
+static void _subci(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define subxr(r0,r1,r2)              SBCS(r0,r1,r2)
+#  define subxi(r0,r1,i0)              _subxi(_jit,r0,r1,i0)
+static void _subxi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define rsbi(r0, r1, i0)             _rsbi(_jit, r0, r1, i0)
+static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define mulr(r0,r1,r2)               MUL(r0,r1,r2)
+#  define muli(r0,r1,i0)               _muli(_jit,r0,r1,i0)
+static void _muli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define qmulr(r0,r1,r2,r3)           _qmulr(_jit,r0,r1,r2,r3)
+static void _qmulr(jit_state_t*,jit_int32_t,
+                  jit_int32_t,jit_int32_t,jit_int32_t);
+#  define qmuli(r0,r1,r2,i0)           _qmuli(_jit,r0,r1,r2,i0)
+static void _qmuli(jit_state_t*,jit_int32_t,
+                  jit_int32_t,jit_int32_t,jit_word_t);
+#  define qmulr_u(r0,r1,r2,r3)         _qmulr_u(_jit,r0,r1,r2,r3)
+static void _qmulr_u(jit_state_t*,jit_int32_t,
+                    jit_int32_t,jit_int32_t,jit_int32_t);
+#  define qmuli_u(r0,r1,r2,i0)         _qmuli_u(_jit,r0,r1,r2,i0)
+static void _qmuli_u(jit_state_t*,jit_int32_t,
+                    jit_int32_t,jit_int32_t,jit_word_t);
+#  define divr(r0,r1,r2)               SDIV(r0,r1,r2)
+#  define divi(r0,r1,i0)               _divi(_jit,r0,r1,i0)
+static void _divi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define divr_u(r0,r1,r2)             UDIV(r0,r1,r2)
+#  define divi_u(r0,r1,i0)             _divi_u(_jit,r0,r1,i0)
+static void _divi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define qdivr(r0,r1,r2,r3)           _iqdivr(_jit,1,r0,r1,r2,r3)
+#  define qdivr_u(r0,r1,r2,r3)         _iqdivr(_jit,0,r0,r1,r2,r3)
+static void _iqdivr(jit_state_t*,jit_bool_t,
+                   jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define qdivi(r0,r1,r2,i0)           _qdivi(_jit,r0,r1,r2,i0)
+static void _qdivi(jit_state_t*,jit_int32_t,
+                  jit_int32_t,jit_int32_t,jit_word_t);
+#  define qdivi_u(r0,r1,r2,i0)         _qdivi_u(_jit,r0,r1,r2,i0)
+static void _qdivi_u(jit_state_t*,jit_int32_t,
+                    jit_int32_t,jit_int32_t,jit_word_t);
+#  define remr(r0,r1,r2)               _remr(_jit,r0,r1,r2)
+static void _remr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define remi(r0,r1,i0)               _remi(_jit,r0,r1,i0)
+static void _remi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define remr_u(r0,r1,r2)             _remr_u(_jit,r0,r1,r2)
+static void _remr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define remi_u(r0,r1,i0)             _remi_u(_jit,r0,r1,i0)
+static void _remi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define lshr(r0,r1,r2)               LSL(r0,r1,r2)
+#  define lshi(r0,r1,i0)               _lshi(_jit,r0,r1,i0)
+static void _lshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define rshr(r0,r1,r2)               ASR(r0,r1,r2)
+#  define rshi(r0,r1,i0)               _rshi(_jit,r0,r1,i0)
+static void _rshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define rshr_u(r0,r1,r2)             LSR(r0,r1,r2)
+#  define rshi_u(r0,r1,i0)             _rshi_u(_jit,r0,r1,i0)
+static void _rshi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define negr(r0,r1)                  NEG(r0,r1)
+#  define comr(r0,r1)                  MVN(r0,r1)
+#  define andr(r0,r1,r2)               AND(r0,r1,r2)
+#  define andi(r0,r1,i0)               _andi(_jit,r0,r1,i0)
+static void _andi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define orr(r0,r1,r2)                        ORR(r0,r1,r2)
+#  define ori(r0,r1,i0)                        _ori(_jit,r0,r1,i0)
+static void _ori(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define xorr(r0,r1,r2)               EOR(r0,r1,r2)
+#  define xori(r0,r1,i0)               _xori(_jit,r0,r1,i0)
+static void _xori(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldr_c(r0,r1)                 LDRSBI(r0,r1,0)
+#  define ldi_c(r0,i0)                 _ldi_c(_jit,r0,i0)
+static void _ldi_c(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldr_uc(r0,r1)                        _ldr_uc(_jit,r0,r1)
+static void _ldr_uc(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define ldi_uc(r0,i0)                        _ldi_uc(_jit,r0,i0)
+static void _ldi_uc(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldr_s(r0,r1)                 LDRSHI(r0,r1,0)
+#  define ldi_s(r0,i0)                 _ldi_s(_jit,r0,i0)
+static void _ldi_s(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldr_us(r0,r1)                        _ldr_us(_jit,r0,r1)
+static void _ldr_us(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define ldi_us(r0,i0)                        _ldi_us(_jit,r0,i0)
+static void _ldi_us(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldr_i(r0,r1)                 LDRSWI(r0,r1,0)
+#  define ldi_i(r0,i0)                 _ldi_i(_jit,r0,i0)
+static void _ldi_i(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldr_ui(r0,r1)                        _ldr_ui(_jit,r0,r1)
+static void _ldr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define ldi_ui(r0,i0)                        _ldi_ui(_jit,r0,i0)
+static void _ldi_ui(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldr_l(r0,r1)                 LDRI(r0,r1,0)
+static void _ldr_l(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define ldi_l(r0,i0)                 _ldi_l(_jit,r0,i0)
+static void _ldi_l(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldxr_c(r0,r1,r2)             _ldxr_c(_jit,r0,r1,r2)
+static void _ldxr_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxi_c(r0,r1,i0)             _ldxi_c(_jit,r0,r1,i0)
+static void _ldxi_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldxr_uc(r0,r1,r2)            _ldxr_uc(_jit,r0,r1,r2)
+static void _ldxr_uc(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxi_uc(r0,r1,i0)            _ldxi_uc(_jit,r0,r1,i0)
+static void _ldxi_uc(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldxr_s(r0,r1,r2)             LDRSH(r0,r1,r2)
+#  define ldxi_s(r0,r1,i0)             _ldxi_s(_jit,r0,r1,i0)
+static void _ldxi_s(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldxr_us(r0,r1,r2)            _ldxr_us(_jit,r0,r1,r2)
+static void _ldxr_us(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxi_us(r0,r1,i0)            _ldxi_us(_jit,r0,r1,i0)
+static void _ldxi_us(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldxr_i(r0,r1,r2)             LDRSW(r0,r1,r2)
+#  define ldxi_i(r0,r1,i0)             _ldxi_i(_jit,r0,r1,i0)
+static void _ldxi_i(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldxr_ui(r0,r1,r2)            _ldxr_ui(_jit,r0,r1,r2)
+static void _ldxr_ui(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxi_ui(r0,r1,i0)            _ldxi_ui(_jit,r0,r1,i0)
+static void _ldxi_ui(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldxr_l(r0,r1,r2)             LDR(r0,r1,r2)
+#  define ldxi_l(r0,r1,i0)             _ldxi_l(_jit,r0,r1,i0)
+static void _ldxi_l(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define str_c(r0,r1)                 STRBI(r1,r0,0)
+#  define sti_c(i0,r0)                 _sti_c(_jit,i0,r0)
+static void _sti_c(jit_state_t*,jit_word_t,jit_int32_t);
+#  define str_s(r0,r1)                 STRHI(r1,r0,0)
+#  define sti_s(i0,r0)                 _sti_s(_jit,i0,r0)
+static void _sti_s(jit_state_t*,jit_word_t,jit_int32_t);
+#  define str_i(r0,r1)                 STRWI(r1,r0,0)
+#  define sti_i(i0,r0)                 _sti_i(_jit,i0,r0)
+static void _sti_i(jit_state_t*,jit_word_t,jit_int32_t);
+#  define str_l(r0,r1)                 STRI(r1,r0,0)
+#  define sti_l(i0,r0)                 _sti_l(_jit,i0,r0)
+static void _sti_l(jit_state_t*,jit_word_t,jit_int32_t);
+#  define stxr_c(r0,r1,r2)             STRB(r2,r1,r0)
+#  define stxi_c(i0,r0,r1)             _stxi_c(_jit,i0,r0,r1)
+static void _stxi_c(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define stxr_s(r0,r1,r2)             STRH(r2,r1,r0)
+#  define stxi_s(i0,r0,r1)             _stxi_s(_jit,i0,r0,r1)
+static void _stxi_s(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define stxr_i(r0,r1,r2)             STRW(r2,r1,r0)
+#  define stxi_i(i0,r0,r1)             _stxi_i(_jit,i0,r0,r1)
+static void _stxi_i(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define stxr_l(r0,r1,r2)             STR(r2,r1,r0)
+#  define stxi_l(i0,r0,r1)             _stxi_l(_jit,i0,r0,r1)
+static void _stxi_l(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  if __BYTE_ORDER == __LITTLE_ENDIAN
+#  define htonr_us(r0,r1)              _htonr_us(_jit,r0,r1)
+static void _htonr_us(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define htonr_ui(r0,r1)              _htonr_ui(_jit,r0,r1)
+static void _htonr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
+#    define htonr_ul(r0,r1)            REV(r0,r1)
+#  else
+#    define htonr_us(r0,r1)            extr_us(r0,r1)
+#    define htonr_ui(r0,r1)            extr_ui(r0,r1)
+#    define htonr_ul(r0,r1)            movr(r0,r1)
+#  endif
+#  define extr_c(r0,r1)                        SXTB(r0,r1)
+#  define extr_uc(r0,r1)               UXTB(r0,r1)
+#  define extr_s(r0,r1)                        SXTH(r0,r1)
+#  define extr_us(r0,r1)               UXTH(r0,r1)
+#  define extr_i(r0,r1)                        SXTW(r0,r1)
+#  define extr_ui(r0,r1)               UXTW(r0,r1)
+#  define movr(r0,r1)                  _movr(_jit,r0,r1)
+static void _movr(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define movi(r0,i0)                  _movi(_jit,r0,i0)
+static void _movi(jit_state_t*,jit_int32_t,jit_word_t);
+#  define movi_p(r0,i0)                        _movi_p(_jit,r0,i0)
+static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ccr(cc,r0,r1,r2)             _ccr(_jit,cc,r0,r1,r2)
+static void _ccr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define cci(cc,r0,r1,i0)             _cci(_jit,cc,r0,r1,i0)
+static void _cci(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ltr(r0,r1,r2)                        ccr(CC_LT,r0,r1,r2)
+#  define lti(r0,r1,i0)                        cci(CC_LT,r0,r1,i0)
+#  define ltr_u(r0,r1,r2)              ccr(CC_CC,r0,r1,r2)
+#  define lti_u(r0,r1,i0)              cci(CC_CC,r0,r1,i0)
+#  define ler(r0,r1,r2)                        ccr(CC_LE,r0,r1,r2)
+#  define lei(r0,r1,i0)                        cci(CC_LE,r0,r1,i0)
+#  define ler_u(r0,r1,r2)              ccr(CC_LS,r0,r1,r2)
+#  define lei_u(r0,r1,i0)              cci(CC_LS,r0,r1,i0)
+#  define eqr(r0,r1,r2)                        ccr(CC_EQ,r0,r1,r2)
+#  define eqi(r0,r1,i0)                        cci(CC_EQ,r0,r1,i0)
+#  define ger(r0,r1,r2)                        ccr(CC_GE,r0,r1,r2)
+#  define gei(r0,r1,i0)                        cci(CC_GE,r0,r1,i0)
+#  define ger_u(r0,r1,r2)              ccr(CC_CS,r0,r1,r2)
+#  define gei_u(r0,r1,i0)              cci(CC_CS,r0,r1,i0)
+#  define gtr(r0,r1,r2)                        ccr(CC_GT,r0,r1,r2)
+#  define gti(r0,r1,i0)                        cci(CC_GT,r0,r1,i0)
+#  define gtr_u(r0,r1,r2)              ccr(CC_HI,r0,r1,r2)
+#  define gti_u(r0,r1,i0)              cci(CC_HI,r0,r1,i0)
+#  define ner(r0,r1,r2)                        ccr(CC_NE,r0,r1,r2)
+#  define nei(r0,r1,i0)                        cci(CC_NE,r0,r1,i0)
+#  define bccr(cc,i0,r0,r1)            _bccr(_jit,cc,i0,r0,r1)
+static jit_word_t
+_bccr(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bcci(cc,i0,r0,i1)            _bcci(_jit,cc,i0,r0,i1)
+static jit_word_t
+_bcci(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_word_t);
+#  define bltr(i0,r0,r1)               bccr(BCC_LT,i0,r0,r1)
+#  define blti(i0,r0,i1)               bcci(BCC_LT,i0,r0,i1)
+#  define bltr_u(i0,r0,r1)             bccr(BCC_CC,i0,r0,r1)
+#  define blti_u(i0,r0,i1)             bcci(BCC_CC,i0,r0,i1)
+#  define bler(i0,r0,r1)               bccr(BCC_LE,i0,r0,r1)
+#  define blei(i0,r0,i1)               bcci(BCC_LE,i0,r0,i1)
+#  define bler_u(i0,r0,r1)             bccr(BCC_LS,i0,r0,r1)
+#  define blei_u(i0,r0,i1)             bcci(BCC_LS,i0,r0,i1)
+#  define beqr(i0,r0,r1)               bccr(BCC_EQ,i0,r0,r1)
+#  define beqi(i0,r0,i1)               _beqi(_jit,i0,r0,i1)
+static jit_word_t _beqi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#  define bger(i0,r0,r1)               bccr(BCC_GE,i0,r0,r1)
+#  define bgei(i0,r0,i1)               bcci(BCC_GE,i0,r0,i1)
+#  define bger_u(i0,r0,r1)             bccr(BCC_CS,i0,r0,r1)
+#  define bgei_u(i0,r0,i1)             bcci(BCC_CS,i0,r0,i1)
+#  define bgtr(i0,r0,r1)               bccr(BCC_GT,i0,r0,r1)
+#  define bgti(i0,r0,i1)               bcci(BCC_GT,i0,r0,i1)
+#  define bgtr_u(i0,r0,r1)             bccr(BCC_HI,i0,r0,r1)
+#  define bgti_u(i0,r0,i1)             bcci(BCC_HI,i0,r0,i1)
+#  define bner(i0,r0,r1)               bccr(BCC_NE,i0,r0,r1)
+#  define bnei(i0,r0,i1)               _bnei(_jit,i0,r0,i1)
+static jit_word_t _bnei(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#  define baddr(cc,i0,r0,r1)           _baddr(_jit,cc,i0,r0,r1)
+static jit_word_t
+_baddr(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_int32_t);
+#  define baddi(cc,i0,r0,i1)           _baddi(_jit,cc,i0,r0,i1)
+static jit_word_t
+_baddi(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_word_t);
+#  define boaddr(i0,r0,r1)             baddr(BCC_VS,i0,r0,r1)
+#  define boaddi(i0,r0,i1)             baddi(BCC_VS,i0,r0,i1)
+#  define boaddr_u(i0,r0,r1)           baddr(BCC_HS,i0,r0,r1)
+#  define boaddi_u(i0,r0,i1)           baddi(BCC_HS,i0,r0,i1)
+#  define bxaddr(i0,r0,r1)             baddr(BCC_VC,i0,r0,r1)
+#  define bxaddi(i0,r0,i1)             baddi(BCC_VC,i0,r0,i1)
+#  define bxaddr_u(i0,r0,r1)           baddr(BCC_LO,i0,r0,r1)
+#  define bxaddi_u(i0,r0,i1)           baddi(BCC_LO,i0,r0,i1)
+#  define bsubr(cc,i0,r0,r1)           _bsubr(_jit,cc,i0,r0,r1)
+static jit_word_t
+_bsubr(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bsubi(cc,i0,r0,i1)           _bsubi(_jit,cc,i0,r0,i1)
+static jit_word_t
+_bsubi(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_word_t);
+#  define bosubr(i0,r0,r1)             bsubr(BCC_VS,i0,r0,r1)
+#  define bosubi(i0,r0,i1)             bsubi(BCC_VS,i0,r0,i1)
+#  define bosubr_u(i0,r0,r1)           bsubr(BCC_LO,i0,r0,r1)
+#  define bosubi_u(i0,r0,i1)           bsubi(BCC_LO,i0,r0,i1)
+#  define bxsubr(i0,r0,r1)             bsubr(BCC_VC,i0,r0,r1)
+#  define bxsubi(i0,r0,i1)             bsubi(BCC_VC,i0,r0,i1)
+#  define bxsubr_u(i0,r0,r1)           bsubr(BCC_HS,i0,r0,r1)
+#  define bxsubi_u(i0,r0,i1)           bsubi(BCC_HS,i0,r0,i1)
+#  define bmxr(cc,i0,r0,r1)            _bmxr(_jit,cc,i0,r0,r1)
+static jit_word_t
+_bmxr(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bmxi(cc,i0,r0,r1)            _bmxi(_jit,cc,i0,r0,r1)
+static jit_word_t
+_bmxi(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_word_t);
+#  define bmsr(i0,r0,r1)               bmxr(BCC_NE,i0,r0,r1)
+#  define bmsi(i0,r0,i1)               bmxi(BCC_NE,i0,r0,i1)
+#  define bmcr(i0,r0,r1)               bmxr(BCC_EQ,i0,r0,r1)
+#  define bmci(i0,r0,i1)               bmxi(BCC_EQ,i0,r0,i1)
+#  define jmpr(r0)                     BR(r0)
+#  define jmpi(i0)                     _jmpi(_jit,i0)
+static void _jmpi(jit_state_t*,jit_word_t);
+#  define jmpi_p(i0)                   _jmpi_p(_jit,i0)
+static jit_word_t _jmpi_p(jit_state_t*,jit_word_t);
+#  define callr(r0)                    BLR(r0)
+#  define calli(i0)                    _calli(_jit,i0)
+static void _calli(jit_state_t*,jit_word_t);
+#  define calli_p(i0)                  _calli_p(_jit,i0)
+static jit_word_t _calli_p(jit_state_t*,jit_word_t);
+#  define prolog(i0)                   _prolog(_jit,i0)
+static void _prolog(jit_state_t*,jit_node_t*);
+#  define epilog(i0)                   _epilog(_jit,i0)
+static void _epilog(jit_state_t*,jit_node_t*);
+#  define vastart(r0)                  _vastart(_jit, r0)
+static void _vastart(jit_state_t*, jit_int32_t);
+#  define vaarg(r0, r1)                        _vaarg(_jit, r0, r1)
+static void _vaarg(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define patch_at(jump,label)         _patch_at(_jit,jump,label)
+static void _patch_at(jit_state_t*,jit_word_t,jit_word_t);
+#endif
+
+#if CODE
+static jit_int32_t
+logical_immediate(jit_word_t imm)
+{
+    /* There are 5334 possible immediate values, but to avoid the
+     * need of either too complex code or large lookup tables,
+     * only check for (simply) encodable common/small values */
+    switch (imm) {
+       case -16:       return (0xf3b);
+       case -15:       return (0xf3c);
+       case -13:       return (0xf3d);
+       case -9:        return (0xf3e);
+       case -8:        return (0xf7c);
+       case -7:        return (0xf7d);
+       case -5:        return (0xf7e);
+       case -4:        return (0xfbd);
+       case -3:        return (0xfbe);
+       case -2:        return (0xffe);
+       case 1:         return (0x000);
+       case 2:         return (0xfc0);
+       case 3:         return (0x001);
+       case 4:         return (0xf80);
+       case 6:         return (0xfc1);
+       case 7:         return (0x002);
+       case 8:         return (0xf40);
+       case 12:        return (0xf81);
+       case 14:        return (0xfc2);
+       case 15:        return (0x003);
+       case 16:        return (0xf00);
+       default:        return (-1);
+    }
+}
+
+static void
+_oxxx(jit_state_t *_jit, jit_int32_t Op,
+      jit_int32_t Rd, jit_int32_t Rn, jit_int32_t Rm)
+{
+    instr_t    i;
+    assert(!(Rd &       ~0x1f));
+    assert(!(Rn &       ~0x1f));
+    assert(!(Rm &       ~0x1f));
+    assert(!(Op & ~0xffe0fc00));
+    i.w = Op;
+    i.Rd.b = Rd;
+    i.Rn.b = Rn;
+    i.Rm.b = Rm;
+    ii(i.w);
+}
+
+static void
+_oxxi(jit_state_t *_jit, jit_int32_t Op,
+      jit_int32_t Rd, jit_int32_t Rn, jit_int32_t Imm12)
+{
+    instr_t    i;
+    assert(!(Rd    &       ~0x1f));
+    assert(!(Rn    &       ~0x1f));
+    assert(!(Imm12 &      ~0xfff));
+    assert(!(Op    & ~0xffe00000));
+    i.w = Op;
+    i.Rd.b = Rd;
+    i.Rn.b = Rn;
+    i.imm12.b = Imm12;
+    ii(i.w);
+}
+
+static void
+_oxx9(jit_state_t *_jit, jit_int32_t Op,
+      jit_int32_t Rd, jit_int32_t Rn, jit_int32_t Imm9)
+{
+    instr_t    i;
+    assert(!(Rd   &       ~0x1f));
+    assert(!(Rn   &       ~0x1f));
+    assert(!(Imm9 &      ~0x1ff));
+    assert(!(Op   & ~0xffe00000));
+    i.w = Op;
+    i.Rd.b = Rd;
+    i.Rn.b = Rn;
+    i.imm9.b = Imm9;
+    ii(i.w);
+}
+
+static void
+_ox19(jit_state_t *_jit, jit_int32_t Op, jit_int32_t Rd, jit_int32_t Simm19)
+{
+    instr_t    i;
+    assert(!(Rd &         ~0x1f));
+    assert(Simm19 >= -262148 && Simm19 <= 262143);
+    assert(!(Op   & ~0xff000000));
+    i.w = Op;
+    i.Rd.b = Rd;
+    i.imm19.b = Simm19;
+    ii(i.w);
+}
+
+static void
+_oc19(jit_state_t *_jit, jit_int32_t Op, jit_int32_t Cc, jit_int32_t Simm19)
+{
+    instr_t    i;
+    assert(!(Cc &          ~0xf));
+    assert(Simm19 >= -262148 && Simm19 <= 262143);
+    assert(!(Op   & ~0xff000000));
+    i.w = Op;
+    i.cond2.b = Cc;
+    i.imm19.b = Simm19;
+    ii(i.w);
+}
+
+static void
+_o26(jit_state_t *_jit, jit_int32_t Op, jit_int32_t Simm26)
+{
+    instr_t    i;
+    assert(Simm26 >= -33554432 && Simm26 <= 33554431);
+    assert(!(Op   & ~0xfc000000));
+    i.w = Op;
+    i.imm26.b = Simm26;
+    ii(i.w);
+}
+
+static void
+_ox_x(jit_state_t *_jit, jit_int32_t Op, jit_int32_t Rd, jit_int32_t Rm)
+{
+    instr_t    i;
+    assert(!(Rd &       ~0x1f));
+    assert(!(Rm &       ~0x1f));
+    assert(!(Op & ~0xffe0ffe0));
+    i.w = Op;
+    i.Rd.b = Rd;
+    i.Rm.b = Rm;
+    ii(i.w);
+}
+
+static void
+_o_xx(jit_state_t *_jit, jit_int32_t Op, jit_int32_t Rd, jit_int32_t Rn)
+{
+    instr_t    i;
+    assert(!(Rd &       ~0x1f));
+    assert(!(Rn &       ~0x1f));
+    assert(!(Op & ~0xfffffc00));
+    i.w = Op;
+    i.Rd.b = Rd;
+    i.Rn.b = Rn;
+    ii(i.w);
+}
+
+static void
+_oxx_(jit_state_t *_jit, jit_int32_t Op, jit_int32_t Rn, jit_int32_t Rm)
+{
+    instr_t    i;
+    assert(!(Rn &       ~0x1f));
+    assert(!(Rm &       ~0x1f));
+    assert(!(Op & ~0xffc0fc1f));
+    i.w = Op;
+    i.Rn.b = Rn;
+    i.Rm.b = Rm;
+    ii(i.w);
+}
+
+static void
+_o_x_(jit_state_t *_jit, jit_int32_t Op, jit_int32_t Rn)
+{
+    instr_t    i;
+    assert(!(Rn & ~0x1f));
+    assert(!(Op & 0x3e0));
+    i.w = Op;
+    i.Rn.b = Rn;
+    ii(i.w);
+}
+
+static void
+_ox_h(jit_state_t *_jit, jit_int32_t Op, jit_int32_t Rd, jit_int32_t Imm16)
+{
+    instr_t    i;
+    assert(!(Rd    &       ~0x1f));
+    assert(!(Imm16 &     ~0xffff));
+    assert(!(Op    & ~0xffe00000));
+    i.w = Op;
+    i.Rd.b = Rd;
+    i.imm16.b = Imm16;
+    ii(i.w);
+}
+
+static void
+_oxxrs(jit_state_t *_jit, jit_int32_t Op,
+       jit_int32_t Rd, jit_int32_t Rn, jit_int32_t R, jit_int32_t S)
+{
+    instr_t    i;
+    assert(!(Rd &       ~0x1f));
+    assert(!(Rn &       ~0x1f));
+    assert(!(R  &       ~0x3f));
+    assert(!(S  &       ~0x3f));
+    assert(!(Op & ~0xffc00000));
+    i.w = Op;
+    i.Rd.b = Rd;
+    i.Rn.b = Rn;
+    i.immr.b = R;
+    i.imms.b = S;
+    ii(i.w);
+}
+
+static void
+_oxxxc(jit_state_t *_jit, jit_int32_t Op,
+       jit_int32_t Rd, jit_int32_t Rn, jit_int32_t Rm, jit_int32_t Cc)
+{
+    instr_t    i;
+    assert(!(Rd &       ~0x1f));
+    assert(!(Rn &       ~0x1f));
+    assert(!(Rm &       ~0x1f));
+    assert(!(Cc  &       ~0xf));
+    assert(!(Op & ~0xffc00c00));
+    i.w = Op;
+    i.Rd.b = Rd;
+    i.Rn.b = Rn;
+    i.Rm.b = Rm;
+    i.cond.b = Cc;
+    ii(i.w);
+}
+
+static void
+_oxxx7(jit_state_t *_jit, jit_int32_t Op,
+       jit_int32_t Rt, jit_int32_t Rt2, jit_int32_t Rn, jit_int32_t Simm7)
+{
+    instr_t    i;
+    assert(!(Rt  &       ~0x1f));
+    assert(!(Rt2 &       ~0x1f));
+    assert(!(Rn  &       ~0x1f));
+    assert(Simm7 >= -128 && Simm7 <= 127);
+    assert(!(Op & ~0xffc003e0));
+    i.w = Op;
+    i.Rt.b = Rt;
+    i.Rt2.b = Rt2;
+    i.Rn.b = Rn;
+    i.imm7.b = Simm7;
+    ii(i.w);
+}
+
+static void
+_nop(jit_state_t *_jit, jit_int32_t i0)
+{
+    for (; i0 > 0; i0 -= 4)
+       NOP();
+    assert(i0 == 0);
+}
+
+static void
+_addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    jit_word_t         is =  i0 >> 12;
+    jit_word_t         in = -i0;
+    jit_word_t         iS =  in >> 12;
+    if (      i0 >= 0 && i0 <= 0xfff)
+       ADDI   (r0, r1, i0);
+    else if ((is << 12) == i0 && is >= 0 && is <= 0xfff)
+       ADDI_12(r0, r1, is);
+    else if ( in >= 0 && in <= 0xfff)
+       SUBI   (r0, r1, in);
+    else if ((iS << 12) == is && iS >= 0 && iS <= 0xfff)
+       SUBI_12(r0, r1, iS);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       addr(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_addci(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    jit_word_t         is =  i0 >> 12;
+    jit_word_t         in = -i0;
+    jit_word_t         iS =  in >> 12;
+    if (      i0 >= 0 && i0 <= 0xfff)
+       ADDSI   (r0, r1, i0);
+    else if ((is << 12) == i0 && is >= 0 && is <= 0xfff)
+       ADDSI_12(r0, r1, is);
+    else if ( in >= 0 && in <= 0xfff)
+       SUBSI   (r0, r1, in);
+    else if ((iS << 12) == is && iS >= 0 && iS <= 0xfff)
+       SUBSI_12(r0, r1, iS);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       addcr(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_addxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    addxr(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_subi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    jit_word_t         is = i0 >> 12;
+    if (      i0 >= 0 && i0 <= 0xfff)
+       SUBI   (r0, r1, i0);
+    else if ((is << 12) == i0 && is >= 0 && is <= 0xfff)
+       SUBI_12(r0, r1, is);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       subr(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_subci(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    jit_word_t         is = i0 >> 12;
+    if (      i0 >= 0 && i0 <= 0xfff)
+       SUBSI   (r0, r1, i0);
+    else if ((is << 12) == i0 && is >= 0 && is <= 0xfff)
+       SUBSI_12(r0, r1, is);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       subcr(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_subxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    subxr(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_rsbi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    subi(r0, r1, i0);
+    negr(r0, r0);
+}
+
+static void
+_muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    mulr(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_qmulr(jit_state_t *_jit, jit_int32_t r0,
+       jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+    jit_int32_t                reg;
+    if (r0 == r2 || r0 == r3) {
+       reg = jit_get_reg(jit_class_gpr);
+       mulr(rn(reg), r2, r3);
+    }
+    else
+       mulr(r0, r2, r3);
+    SMULH(r1, r2, r3);
+    if (r0 == r2 || r0 == r3) {
+       movr(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_qmuli(jit_state_t *_jit, jit_int32_t r0,
+       jit_int32_t r1, jit_int32_t r2, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    qmulr(r0, r1, r2, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_qmulr_u(jit_state_t *_jit, jit_int32_t r0,
+        jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+    jit_int32_t                reg;
+    if (r0 == r2 || r0 == r3) {
+       reg = jit_get_reg(jit_class_gpr);
+       mulr(rn(reg), r2, r3);
+    }
+    else
+       mulr(r0, r2, r3);
+    UMULH(r1, r2, r3);
+    if (r0 == r2 || r0 == r3) {
+       movr(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_qmuli_u(jit_state_t *_jit, jit_int32_t r0,
+        jit_int32_t r1, jit_int32_t r2, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    qmulr_u(r0, r1, r2, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_divi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    divr(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_divi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    divr_u(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_iqdivr(jit_state_t *_jit, jit_bool_t sign,
+       jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+    jit_int32_t                sv0, rg0;
+    jit_int32_t                sv1, rg1;
+    if (r0 == r2 || r0 == r3) {
+       sv0 = jit_get_reg(jit_class_gpr);
+       rg0 = rn(sv0);
+    }
+    else
+       rg0 = r0;
+    if (r1 == r2 || r1 == r3) {
+       sv1 = jit_get_reg(jit_class_gpr);
+       rg1 = rn(sv1);
+    }
+    else
+       rg1 = r1;
+    if (sign)
+       divr(rg0, r2, r3);
+    else
+       divr_u(rg0, r2, r3);
+    mulr(rg1, r3, rg0);
+    subr(rg1, r2, rg1);
+    if (rg0 != r0) {
+       movr(r0, rg0);
+       jit_unget_reg(sv0);
+    }
+    if (rg1 != r1) {
+       movr(r1, rg1);
+       jit_unget_reg(sv1);
+    }
+}
+
+static void
+_qdivi(jit_state_t *_jit, jit_int32_t r0,
+       jit_int32_t r1, jit_int32_t r2, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    qdivr(r0, r1, r2, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_qdivi_u(jit_state_t *_jit, jit_int32_t r0,
+        jit_int32_t r1, jit_int32_t r2, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    qdivr_u(r0, r1, r2, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_remr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (r0 == r1 || r0 == r2) {
+       reg = jit_get_reg(jit_class_gpr);
+       divr(rn(reg), r1, r2);
+       mulr(rn(reg), r2, rn(reg));
+       subr(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else {
+       divr(r0, r1, r2);
+       mulr(r0, r2, r0);
+       subr(r0, r1, r0);
+    }
+}
+
+static void
+_remi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    remr(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_remr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (r0 == r1 || r0 == r2) {
+       reg = jit_get_reg(jit_class_gpr);
+       divr_u(rn(reg), r1, r2);
+       mulr(rn(reg), r2, rn(reg));
+       subr(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else {
+       divr_u(r0, r1, r2);
+       mulr(r0, r2, r0);
+       subr(r0, r1, r0);
+    }
+}
+
+static void
+_remi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    remr_u(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_lshi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    if (i0 == 0)
+       movr(r0, r1);
+    else {
+       assert(i0 > 0 && i0 < 64);
+       LSLI(r0, r1, i0);
+    }
+}
+
+static void
+_rshi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    if (i0 == 0)
+       movr(r0, r1);
+    else {
+       assert(i0 > 0 && i0 < 64);
+       ASRI(r0, r1, i0);
+    }
+}
+
+static void
+_rshi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    if (i0 == 0)
+       movr(r0, r1);
+    else {
+       assert(i0 > 0 && i0 < 64);
+       LSRI(r0, r1, i0);
+    }
+}
+
+static void
+_andi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    jit_int32_t                imm;
+    if (i0 == 0)
+       movi(r0, 0);
+    else if (i0 == -1)
+       movr(r0, r1);
+    else {
+       imm = logical_immediate(i0);
+       if (imm != -1)
+           ANDI(r0, r1, imm);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           andr(r0, r1, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+}
+
+static void
+_ori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    jit_int32_t                imm;
+    if (i0 == 0)
+       movr(r0, r1);
+    else if (i0 == -1)
+       movi(r0, -1);
+    else {
+       imm = logical_immediate(i0);
+       if (imm != -1)
+           ORRI(r0, r1, imm);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           orr(r0, r1, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+}
+
+static void
+_xori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    jit_int32_t                imm;
+    if (i0 == 0)
+       movr(r0, r1);
+    else if (i0 == -1)
+       comr(r0, r1);
+    else {
+       imm = logical_immediate(i0);
+       if (imm != -1)
+           EORI(r0, r1, imm);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           xorr(r0, r1, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+}
+
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+static void
+_htonr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    htonr_ul(r0, r1);
+    rshi_u(r0, r0, 48);
+}
+
+static void
+_htonr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    htonr_ul(r0, r1);
+    rshi_u(r0, r0, 32);
+}
+#endif
+
+static void
+_ldi_c(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    ldr_c(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldr_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    LDRBI(r0, r1, 0);
+#if 0
+    extr_uc(r0, r0);
+#endif
+}
+
+static void
+_ldi_uc(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    ldr_uc(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldi_s(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    ldr_s(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    LDRHI(r0, r1, 0);
+#if 0
+    extr_us(r0, r0);
+#endif
+}
+
+static void
+_ldi_us(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    ldr_us(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldi_i(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    ldr_i(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    LDRWI(r0, r1, 0);
+#if 0
+    extr_ui(r0, r0);
+#endif
+}
+
+static void
+_ldi_ui(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    ldr_ui(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldi_l(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    ldr_l(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    LDRSB(r0, r1, r2);
+    extr_c(r0, r0);
+}
+
+static void
+_ldxi_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0 >= 0 && i0 <= 4095)
+       LDRSBI(r0, r1, i0);
+    else if (i0 > -256 && i0 < 0)
+       LDURSB(r0, r1, i0 & 0x1ff);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       LDRSB(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+    extr_c(r0, r0);
+}
+
+static void
+_ldxr_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    LDRB(r0, r1, r2);
+#if 0
+    extr_uc(r0, r0);
+#endif
+}
+
+static void
+_ldxi_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0 >= 0 && i0 <= 4095)
+       LDRBI(r0, r1, i0);
+    else if (i0 > -256 && i0 < 0)
+       LDURB(r0, r1, i0 & 0x1ff);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r1, i0);
+       ldr_uc(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+#if 0
+    extr_uc(r0, r0);
+#endif
+}
+
+static void
+_ldxi_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    assert(!(i0 & 1));
+    if (i0 >= 0 && i0 <= 8191)
+       LDRSHI(r0, r1, i0 >> 1);
+    else if (i0 > -256 && i0 < 0)
+       LDURSH(r0, r1, i0 & 0x1ff);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       LDRSH(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldxr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    LDRH(r0, r1, r2);
+#if 0
+    extr_us(r0, r0);
+#endif
+}
+
+static void
+_ldxi_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    assert(!(i0 & 1));
+    if (i0 >= 0 && i0 <= 8191)
+       LDRHI(r0, r1, i0 >> 1);
+    else if (i0 > -256 && i0 < 0)
+       LDURH(r0, r1, i0 & 0x1ff);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       LDRH(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+#if 0
+    extr_us(r0, r0);
+#endif
+}
+
+static void
+_ldxi_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    assert(!(i0 & 3));
+    if (i0 >= 0 && i0 <= 16383)
+       LDRSWI(r0, r1, i0 >> 2);
+    else if (i0 > -256 && i0 < 0)
+       LDURSW(r0, r1, i0 & 0x1ff);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r1, i0);
+       ldr_i(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldxr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    LDRW(r0, r1, r2);
+#if 0
+    extr_ui(r0, r0);
+#endif
+}
+
+static void
+_ldxi_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    assert(!(i0 & 3));
+    if (i0 >= 0 && i0 <= 16383)
+       LDRWI(r0, r1, i0 >> 2);
+    else if (i0 > -256 && i0 < 0)
+       LDURW(r0, r1, i0 & 0x1ff);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       LDRW(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+#if 0
+    extr_ui(r0, r0);
+#endif
+}
+
+static void
+_ldxi_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    assert(!(i0 & 7));
+    if (i0 >= 0 && i0 <= 32767)
+       LDRI(r0, r1, i0 >> 3);
+    else if (i0 > -256 && i0 < 0)
+       LDUR(r0, r1, i0 & 0x1ff);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r1, i0);
+       ldr_l(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_sti_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    str_c(rn(reg), r0);
+    jit_unget_reg(reg);
+}
+
+static void
+_sti_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    str_s(rn(reg), r0);
+    jit_unget_reg(reg);
+}
+
+static void
+_sti_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    str_i(rn(reg), r0);
+    jit_unget_reg(reg);
+}
+
+static void
+_sti_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    str_l(rn(reg), r0);
+    jit_unget_reg(reg);
+}
+
+static void
+_stxi_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (i0 >= 0 && i0 <= 4095)
+       STRBI(r1, r0, i0);
+    else if (i0 > -256 && i0 < 0)
+       STURB(r1, r0, i0 & 0x1ff);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r0, i0);
+       str_c(rn(reg), r1);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_stxi_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    assert(!(i0 & 1));
+    if (i0 >= 0 && i0 <= 8191)
+       STRHI(r1, r0, i0 >> 1);
+    else if (i0 > -256 && i0 < 0)
+       STURH(r1, r0, i0 & 0x1ff);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r0, i0);
+       str_s(rn(reg), r1);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_stxi_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    assert(!(i0 & 3));
+    if (i0 >= 0 && i0 <= 16383)
+       STRWI(r1, r0, i0 >> 2);
+    else if (i0 > -256 && i0 < 0)
+       STURW(r1, r0, i0 & 0x1ff);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r0, i0);
+       str_i(rn(reg), r1);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_stxi_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    assert(!(i0 & 7));
+    if (i0 >= 0 && i0 <= 32767)
+       STRI(r1, r0, i0 >> 3);
+    else if (i0 > -256 && i0 < 0)
+       STUR(r1, r0, i0 & 0x1ff);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r0, i0);
+       str_l(rn(reg), r1);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_movr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (r0 != r1)
+       MOV(r0, r1);
+}
+
+static void
+_movi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_word_t         n0, ibit, nbit;
+    n0 = ~i0;
+    ibit = nbit = 0;
+    if (i0 & 0x000000000000ffffL)      ibit |= 1;
+    if (i0 & 0x00000000ffff0000L)      ibit |= 2;
+    if (i0 & 0x0000ffff00000000L)      ibit |= 4;
+    if (i0 & 0xffff000000000000L)      ibit |= 8;
+    if (n0 & 0x000000000000ffffL)      nbit |= 1;
+    if (n0 & 0x00000000ffff0000L)      nbit |= 2;
+    if (n0 & 0x0000ffff00000000L)      nbit |= 4;
+    if (n0 & 0xffff000000000000L)      nbit |= 8;
+    switch (ibit) {
+       case 0:
+           MOVZ   (r0,  0);
+           break;
+       case 1:
+           MOVZ   (r0,  i0        & 0xffff);
+           break;
+       case 2:
+           MOVZ_16(r0, (i0 >> 16) & 0xffff);
+           break;
+       case 3:
+           MOVZ   (r0,  i0        & 0xffff);
+           MOVK_16(r0, (i0 >> 16) & 0xffff);
+           break;
+       case 4:
+           MOVZ_32(r0, (i0 >> 32) & 0xffff);
+           break;
+       case 5:
+           MOVZ   (r0,  i0        & 0xffff);
+           MOVK_32(r0, (i0 >> 32) & 0xffff);
+           break;
+       case 6:
+           MOVZ_16(r0, (i0 >> 16) & 0xffff);
+           MOVK_32(r0, (i0 >> 32) & 0xffff);
+           break;
+       case 7:
+           if (nbit == 8)
+               MOVN_48(r0, (n0 >> 48) & 0xffff);
+           else {
+               MOVZ   (r0,  i0        & 0xffff);
+               MOVK_16(r0, (i0 >> 16) & 0xffff);
+               MOVK_32(r0, (i0 >> 32) & 0xffff);
+           }
+           break;
+       case 8:
+           MOVZ_48(r0, (i0 >> 48) & 0xffff);
+           break;
+       case 9:
+           MOVZ   (r0,  i0        & 0xffff);
+           MOVK_48(r0, (i0 >> 48) & 0xffff);
+           break;
+       case 10:
+           MOVZ_16(r0, (i0 >> 16) & 0xffff);
+           MOVK_48(r0, (i0 >> 48) & 0xffff);
+           break;
+       case 11:
+           if (nbit == 4)
+               MOVN_32(r0, (n0 >> 32) & 0xffff);
+           else {
+               MOVZ   (r0,  i0        & 0xffff);
+               MOVK_16(r0, (i0 >> 16) & 0xffff);
+               MOVK_48(r0, (i0 >> 48) & 0xffff);
+           }
+           break;
+       case 12:
+           MOVZ_32(r0, (i0 >> 32) & 0xffff);
+           MOVK_48(r0, (i0 >> 48) & 0xffff);
+           break;
+       case 13:
+           if (nbit == 2)
+               MOVN_16(r0, (n0 >> 16) & 0xffff);
+           else {
+               MOVZ   (r0,  i0        & 0xffff);
+               MOVK_32(r0, (i0 >> 32) & 0xffff);
+               MOVK_48(r0, (i0 >> 48) & 0xffff);
+           }
+           break;
+       case 14:
+           if (nbit == 1)
+               MOVN   (r0, (n0)       & 0xffff);
+           else {
+               MOVZ_16(r0, (i0 >> 16) & 0xffff);
+               MOVK_32(r0, (i0 >> 32) & 0xffff);
+               MOVK_48(r0, (i0 >> 48) & 0xffff);
+           }
+           break;
+       case 15:
+           if (nbit == 0)
+               MOVN   (r0,  0);
+           else if (nbit == 1)
+               MOVN   (r0,  n0        & 0xffff);
+           else if (nbit == 8)
+               MOVN_48(r0, (n0 >> 48) & 0xffff);
+           else {
+               MOVZ   (r0,  i0        & 0xffff);
+               MOVK_16(r0, (i0 >> 16) & 0xffff);
+               MOVK_32(r0, (i0 >> 32) & 0xffff);
+               MOVK_48(r0, (i0 >> 48) & 0xffff);
+           }
+           break;
+       default:
+           abort();
+    }
+}
+
+static jit_word_t
+_movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_word_t         w;
+    w = _jit->pc.w;
+    MOVZ   (r0,  i0        & 0xffff);
+    MOVK_16(r0, (i0 >> 16) & 0xffff);
+    MOVK_32(r0, (i0 >> 32) & 0xffff);
+    MOVK_48(r0, (i0 >> 48) & 0xffff);
+    return (w);
+}
+
+static void
+_ccr(jit_state_t *_jit, jit_int32_t cc,
+     jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    CMP(r1, r2);
+    CSET(r0, cc);
+}
+
+static void
+_cci(jit_state_t *_jit, jit_int32_t cc,
+     jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    jit_word_t         is =  i0 >> 12;
+    jit_word_t         in = -i0;
+    jit_word_t         iS =  in >> 12;
+    if (      i0 >= 0 && i0 <= 0xfff)
+       CMPI   (r1, i0);
+    else if ((is << 12) == i0 && is >= 0 && is <= 0xfff)
+       CMPI_12(r1, is);
+    else if ( in >= 0 && in <= 0xfff)
+       CMNI   (r1, in);
+    else if ((iS << 12) == is && iS >= 0 && iS <= 0xfff)
+       CMNI_12(r1, iS);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       CMP(r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+    CSET(r0, cc);
+}
+
+static jit_word_t
+_bccr(jit_state_t *_jit, jit_int32_t cc,
+      jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w, d;
+    CMP(r0, r1);
+    w = _jit->pc.w;
+    d = (i0 - w) >> 2;
+    B_C(cc, d);
+    return (w);
+}
+
+static jit_word_t
+_bcci(jit_state_t *_jit, jit_int32_t cc,
+      jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_int32_t                reg;
+    jit_word_t         w, d;
+    jit_word_t         is =  i1 >> 12;
+    jit_word_t         in = -i1;
+    jit_word_t         iS =  in >> 12;
+    if (      i1 >= 0 && i1 <= 0xfff)
+       CMPI   (r0, i1);
+    else if ((is << 12) == i1 && is >= 0 && is <= 0xfff)
+       CMPI_12(r0, is);
+    else if ( in >= 0 && in <= 0xfff)
+       CMNI   (r0, in);
+    else if ((iS << 12) == is && iS >= 0 && iS <= 0xfff)
+       CMNI_12(r0, iS);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i1);
+       CMP(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    w = _jit->pc.w;
+    d = (i0 - w) >> 2;
+    B_C(cc, d);
+    return (w);
+}
+
+static jit_word_t
+_beqi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    if (i1 == 0) {
+       w = _jit->pc.w;
+       CBZ(r0, (i0 - w) >> 2);
+    }
+    else
+       w = bcci(BCC_EQ, i0, r0, i1);
+    return (w);
+}
+
+static jit_word_t
+_bnei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    if (i1 == 0) {
+       w = _jit->pc.w;
+       CBNZ(r0, (i0 - w) >> 2);
+    }
+    else
+       w = bcci(BCC_NE, i0, r0, i1);
+    return (w);
+}
+
+static jit_word_t
+_baddr(jit_state_t *_jit, jit_int32_t cc,
+       jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    addcr(r0, r0, r1);
+    w = _jit->pc.w;
+    B_C(cc, (i0 - w) >> 2);
+    return (w);
+}
+
+static jit_word_t
+_baddi(jit_state_t *_jit, jit_int32_t cc,
+       jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    addci(r0, r0, i1);
+    w = _jit->pc.w;
+    B_C(cc, (i0 - w) >> 2);
+    return (w);
+}
+
+static jit_word_t
+_bsubr(jit_state_t *_jit, jit_int32_t cc,
+       jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    subcr(r0, r0, r1);
+    w = _jit->pc.w;
+    B_C(cc, (i0 - w) >> 2);
+    return (w);
+}
+
+static jit_word_t
+_bsubi(jit_state_t *_jit, jit_int32_t cc,
+       jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    subci(r0, r0, i1);
+    w = _jit->pc.w;
+    B_C(cc, (i0 - w) >> 2);
+    return (w);
+}
+
+static jit_word_t
+_bmxr(jit_state_t *_jit, jit_int32_t cc,
+      jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    TST(r0, r1);
+    w = _jit->pc.w;
+    B_C(cc, (i0 - w) >> 2);
+    return (w);
+}
+
+static jit_word_t
+_bmxi(jit_state_t *_jit, jit_int32_t cc,
+      jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    jit_int32_t                imm;
+    imm = logical_immediate(i1);
+    if (imm != -1)
+       TSTI(r0, imm);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i1);
+       TST(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    w = _jit->pc.w;
+    B_C(cc, (i0 - w) >> 2);
+    return (w);
+}
+
+static void
+_jmpi(jit_state_t *_jit, jit_word_t i0)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    w = (i0 - _jit->pc.w) >> 2;
+    if (w >= -33554432 && w <= 33554431)
+       B(w);
+    else {
+       reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+       movi(rn(reg), i0);
+       jmpr(rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static jit_word_t
+_jmpi_p(jit_state_t *_jit, jit_word_t i0)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    w = movi_p(rn(reg), i0);
+    jmpr(rn(reg));
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static void
+_calli(jit_state_t *_jit, jit_word_t i0)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    w = (i0 - _jit->pc.w) >> 2;
+    if (w >= -33554432 && w <= 33554431)
+       BL(w);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       callr(rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static jit_word_t
+_calli_p(jit_state_t *_jit, jit_word_t i0)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    w = movi_p(rn(reg), i0);
+    callr(rn(reg));
+    jit_unget_reg(reg);
+    return (w);
+}
+
+/*
+ * prolog and epilog not as "optimized" as one would like, but the
+ * problem of overallocating stack space to save callee save registers
+ * exists on all ports, and is still a todo to use a variable
+ *     stack_framesize
+ * value, what would cause needing to patch some calls, most likely
+ * the offset of jit_arg* of stack arguments.
+ */
+static void
+_prolog(jit_state_t *_jit, jit_node_t *node)
+{
+    jit_int32_t                reg;
+    if (_jitc->function->define_frame || _jitc->function->assume_frame) {
+       jit_int32_t     frame = -_jitc->function->frame;
+       assert(_jitc->function->self.aoff >= frame);
+       if (_jitc->function->assume_frame)
+           return;
+       _jitc->function->self.aoff = frame;
+    }
+    if (_jitc->function->allocar)
+       _jitc->function->self.aoff &= -16;
+    _jitc->function->stack = ((_jitc->function->self.alen -
+                             /* align stack at 16 bytes */
+                             _jitc->function->self.aoff) + 15) & -16;
+    STPI_POS(FP_REGNO, LR_REGNO, SP_REGNO, -(stack_framesize >> 3));
+    MOV_XSP(FP_REGNO, SP_REGNO);
+#define SPILL(L, R, O)                                                 \
+    do {                                                               \
+       if (jit_regset_tstbit(&_jitc->function->regset, _R##L)) {       \
+           if (jit_regset_tstbit(&_jitc->function->regset, _R##R))     \
+               STPI(L, R, SP_REGNO, O);                                \
+           else                                                        \
+               STRI(L, SP_REGNO, O);                                   \
+       }                                                               \
+       else if (jit_regset_tstbit(&_jitc->function->regset, _R##R))    \
+           STRI(R, SP_REGNO, O + 1);                                   \
+    } while (0)
+    SPILL(19, 20,  2);
+    SPILL(21, 22,  4);
+    SPILL(23, 24,  6);
+    SPILL(25, 26,  8);
+    SPILL(27, 28, 10);
+#undef SPILL
+#define SPILL(R, O)                                                    \
+    do {                                                               \
+       if (jit_regset_tstbit(&_jitc->function->regset, _V##R))         \
+               stxi_d(O, SP_REGNO, R);                                 \
+    } while (0)
+    SPILL( 8,  96);
+    SPILL( 9, 104);
+    SPILL(10, 112);
+    SPILL(11, 120);
+    SPILL(12, 128);
+    SPILL(13, 136);
+    SPILL(14, 144);
+    SPILL(15, 152);
+#undef SPILL
+    if (_jitc->function->stack)
+       subi(SP_REGNO, SP_REGNO, _jitc->function->stack);
+    if (_jitc->function->allocar) {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), _jitc->function->self.aoff);
+       stxi_i(_jitc->function->aoffoff, FP_REGNO, rn(reg));
+       jit_unget_reg(reg);
+    }
+
+    if (_jitc->function->self.call & jit_call_varargs) {
+       /* Save gp registers in the save area, if any is a vararg */
+       for (reg = 8 - _jitc->function->vagp / -8;
+            jit_arg_reg_p(reg); ++reg)
+           stxi(_jitc->function->vaoff + offsetof(jit_va_list_t, x0) +
+                reg * 8, FP_REGNO, rn(JIT_RA0 - reg));
+
+       for (reg = 8 - _jitc->function->vafp / -16;
+            jit_arg_f_reg_p(reg); ++reg)
+           /* Save fp registers in the save area, if any is a vararg */
+           /* Note that the full 16 byte register is not saved, because
+            * lightning only handles float and double, and, while
+            * attempting to provide a va_list compatible pointer as
+            * jit_va_start return, does not guarantee it (on all ports). */
+           stxi_d(_jitc->function->vaoff + offsetof(jit_va_list_t, q0) +
+                  reg * 16 + offsetof(jit_qreg_t, l), FP_REGNO, rn(_V0 - reg));
+    }
+}
+
+static void
+_epilog(jit_state_t *_jit, jit_node_t *node)
+{
+    if (_jitc->function->assume_frame)
+       return;
+    if (_jitc->function->stack)
+       MOV_XSP(SP_REGNO, FP_REGNO);
+#define LOAD(L, R, O)                                                  \
+    do {                                                               \
+       if (jit_regset_tstbit(&_jitc->function->regset, _R##L)) {       \
+           if (jit_regset_tstbit(&_jitc->function->regset, _R##R))     \
+               LDPI(L, R, SP_REGNO, O);                                \
+           else                                                        \
+               LDRI(L, SP_REGNO, O);                                   \
+       }                                                               \
+       else if (jit_regset_tstbit(&_jitc->function->regset, _R##R))    \
+           LDRI(R, SP_REGNO, O + 1);                                   \
+    } while (0)
+    LOAD(19, 20,  2);
+    LOAD(21, 22,  4);
+    LOAD(23, 24,  6);
+    LOAD(25, 26,  8);
+    LOAD(27, 28, 10);
+#undef LOAD
+#define LOAD(R, O)                                                     \
+    do {                                                               \
+       if (jit_regset_tstbit(&_jitc->function->regset, _V##R))         \
+               ldxi_d(R, SP_REGNO, O);                                 \
+    } while (0)
+    LOAD( 8,  96);
+    LOAD( 9, 104);
+    LOAD(10, 112);
+    LOAD(11, 120);
+    LOAD(12, 128);
+    LOAD(13, 136);
+    LOAD(14, 144);
+    LOAD(15, 152);
+#undef LOAD
+    LDPI_PRE(FP_REGNO, LR_REGNO, SP_REGNO, stack_framesize >> 3);
+    RET();
+}
+
+static void
+_vastart(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+
+    assert(_jitc->function->self.call & jit_call_varargs);
+
+    /* Return jit_va_list_t in the register argument */
+    addi(r0, FP_REGNO, _jitc->function->vaoff);
+
+    reg = jit_get_reg(jit_class_gpr);
+
+    /* Initialize stack pointer to the first stack argument. */
+    addi(rn(reg), FP_REGNO, _jitc->function->self.size);
+    stxi(offsetof(jit_va_list_t, stack), r0, rn(reg));
+
+    /* Initialize gp top pointer to the first stack argument. */
+    addi(rn(reg), r0, va_gp_top_offset);
+    stxi(offsetof(jit_va_list_t, gptop), r0, rn(reg));
+
+    /* Initialize fp top pointer to the first stack argument. */
+    addi(rn(reg), r0, va_fp_top_offset);
+    stxi(offsetof(jit_va_list_t, fptop), r0, rn(reg));
+
+    /* Initialize gp offset in the save area. */
+    movi(rn(reg), _jitc->function->vagp);
+    stxi_i(offsetof(jit_va_list_t, gpoff), r0, rn(reg));
+
+    /* Initialize fp offset in the save area. */
+    movi(rn(reg), _jitc->function->vafp);
+    stxi_i(offsetof(jit_va_list_t, fpoff), r0, rn(reg));
+
+    jit_unget_reg(reg);
+}
+
+static void
+_vaarg(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         ge_code;
+    jit_word_t         lt_code;
+    jit_int32_t                rg0, rg1;
+
+    assert(_jitc->function->self.call & jit_call_varargs);
+
+    rg0 = jit_get_reg(jit_class_gpr);
+    rg1 = jit_get_reg(jit_class_gpr);
+
+    /* Load the gp offset in save area in the first temporary. */
+    ldxi_i(rn(rg0), r1, offsetof(jit_va_list_t, gpoff));
+
+    /* Jump over if there are no remaining arguments in the save area. */
+    ge_code = bgei(_jit->pc.w, rn(rg0), 0);
+
+    /* Load the gp save pointer in the second temporary. */
+    ldxi(rn(rg1), r1, offsetof(jit_va_list_t, gptop));
+
+    /* Load the vararg argument in the first argument. */
+    ldxr(r0, rn(rg1), rn(rg0));
+
+    /* Update the gp offset. */
+    addi(rn(rg0), rn(rg0), 8);
+    stxi_i(offsetof(jit_va_list_t, gpoff), r1, rn(rg0));
+
+    /* Will only need one temporary register below. */
+    jit_unget_reg(rg1);
+
+    /* Jump over overflow code. */
+    lt_code = jmpi_p(_jit->pc.w);
+
+    /* Where to land if argument is in overflow area. */
+    patch_at(ge_code, _jit->pc.w);
+
+    /* Load stack pointer. */
+    ldxi(rn(rg0), r1, offsetof(jit_va_list_t, stack));
+
+    /* Load argument. */
+    ldr(r0, rn(rg0));
+
+    /* Update stack pointer. */
+    addi(rn(rg0), rn(rg0), 8);
+    stxi(offsetof(jit_va_list_t, stack), r1, rn(rg0));
+
+    /* Where to land if argument is in gp save area. */
+    patch_at(lt_code, _jit->pc.w);
+
+    jit_unget_reg(rg0);
+}
+
+static void
+_patch_at(jit_state_t *_jit, jit_word_t instr, jit_word_t label)
+{
+    instr_t             i;
+    jit_word_t          d;
+    jit_int32_t                 fc, ff, ffc;
+    union {
+       jit_int32_t     *i;
+       jit_word_t       w;
+    } u;
+    u.w = instr;
+    i.w = u.i[0];
+    fc  = i.w & 0xfc000000;
+    ff  = i.w & 0xff000000;
+    ffc = i.w & 0xffc00000;
+    if (fc == A64_B || fc == A64_BL) {
+       d = (label - instr) >> 2;
+       assert(d >= -33554432 && d <= 33554431);
+       i.imm26.b = d;
+       u.i[0] = i.w;
+    }
+    else if (ff == A64_B_C || ff == (A64_CBZ|XS) || ff == (A64_CBNZ|XS)) {
+       d = (label - instr) >> 2;
+       assert(d >= -262148 && d <= 262143);
+       i.imm19.b = d;
+       u.i[0] = i.w;
+    }
+    else if (ffc == (A64_MOVZ|XS)) {
+       i.imm16.b = label;
+       u.i[0] = i.w;
+       i.w = u.i[1];
+       assert((i.w & 0xffe00000) == (A64_MOVK|XS|MOVI_LSL_16));
+       i.imm16.b = label >> 16;
+       u.i[1] = i.w;
+       i.w = u.i[2];
+       assert((i.w & 0xffe00000) == (A64_MOVK|XS|MOVI_LSL_32));
+       i.imm16.b = label >> 32;
+       u.i[2] = i.w;
+       i.w = u.i[3];
+       assert((i.w & 0xffe00000) == (A64_MOVK|XS|MOVI_LSL_48));
+       i.imm16.b = label >> 48;
+       u.i[3] = i.w;
+    }
+    else
+       abort();
+}
+#endif
diff --git a/deps/lightning/lib/jit_aarch64-fpu.c b/deps/lightning/lib/jit_aarch64-fpu.c
new file mode 100644 (file)
index 0000000..871ba7e
--- /dev/null
@@ -0,0 +1,914 @@
+/*
+ * Copyright (C) 2013-2019  Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+#if PROTO
+#  define A64_SCVTF                    0x1e220000
+#  define A64_FMOVWV                   0x1e260000
+#  define A64_FMOVVW                   0x1e270000
+#  define A64_FMOVXV                   0x9e260000
+#  define A64_FMOVVX                   0x9e270000
+#  define A64_FCVTZS                   0x1e380000
+#  define A64_FCMPE                    0x1e202010
+#  define A64_FMOV                     0x1e204000
+#  define A64_FABS                     0x1e20c000
+#  define A64_FNEG                     0x1e214000
+#  define A64_FSQRT                    0x1e21c000
+#  define A64_FCVTS                    0x1e224000
+#  define A64_FCVTD                    0x1e22c000
+#  define A64_FMUL                     0x1e200800
+#  define A64_FDIV                     0x1e201800
+#  define A64_FADD                     0x1e202800
+#  define A64_FSUB                     0x1e203800
+#  define FCMPES(Rn,Rm)                        os_vv(A64_FCMPE,0,Rn,Rm)
+#  define FCMPED(Rn,Rm)                        os_vv(A64_FCMPE,1,Rn,Rm)
+#  define FMOVS(Rd,Rn)                 osvv_(A64_FMOV,0,Rd,Rn)
+#  define FMOVD(Rd,Rn)                 osvv_(A64_FMOV,1,Rd,Rn)
+#  define FMOVWS(Rd,Rn)                        osvv_(A64_FMOVWV,0,Rd,Rn)
+#  define FMOVSW(Rd,Rn)                        osvv_(A64_FMOVVW,0,Rd,Rn)
+#  define FMOVXD(Rd,Rn)                        osvv_(A64_FMOVXV,1,Rd,Rn)
+#  define FMOVDX(Rd,Rn)                        osvv_(A64_FMOVVX,1,Rd,Rn)
+#  define FCVT_SD(Rd,Rn)               osvv_(A64_FCVTS,1,Rd,Rn)
+#  define FCVT_DS(Rd,Rn)               osvv_(A64_FCVTD,0,Rd,Rn)
+#  define SCVTFS(Rd,Rn)                        osvv_(A64_SCVTF|XS,0,Rd,Rn)
+#  define SCVTFD(Rd,Rn)                        osvv_(A64_SCVTF|XS,1,Rd,Rn)
+#  define FCVTSZ_WS(Rd,Rn)             osvv_(A64_FCVTZS,0,Rd,Rn)
+#  define FCVTSZ_WD(Rd,Rn)             osvv_(A64_FCVTZS,1,Rd,Rn)
+#  define FCVTSZ_XS(Rd,Rn)             osvv_(A64_FCVTZS|XS,0,Rd,Rn)
+#  define FCVTSZ_XD(Rd,Rn)             osvv_(A64_FCVTZS|XS,1,Rd,Rn)
+#  define FABSS(Rd,Rn)                 osvv_(A64_FABS,0,Rd,Rn)
+#  define FABSD(Rd,Rn)                 osvv_(A64_FABS,1,Rd,Rn)
+#  define FNEGS(Rd,Rn)                 osvv_(A64_FNEG,0,Rd,Rn)
+#  define FNEGD(Rd,Rn)                 osvv_(A64_FNEG,1,Rd,Rn)
+#  define FSQRTS(Rd,Rn)                        osvv_(A64_FSQRT,0,Rd,Rn)
+#  define FSQRTD(Rd,Rn)                        osvv_(A64_FSQRT,1,Rd,Rn)
+#  define FADDS(Rd,Rn,Rm)              osvvv(A64_FADD,0,Rd,Rn,Rm)
+#  define FADDD(Rd,Rn,Rm)              osvvv(A64_FADD,1,Rd,Rn,Rm)
+#  define FSUBS(Rd,Rn,Rm)              osvvv(A64_FSUB,0,Rd,Rn,Rm)
+#  define FSUBD(Rd,Rn,Rm)              osvvv(A64_FSUB,1,Rd,Rn,Rm)
+#  define FMULS(Rd,Rn,Rm)              osvvv(A64_FMUL,0,Rd,Rn,Rm)
+#  define FMULD(Rd,Rn,Rm)              osvvv(A64_FMUL,1,Rd,Rn,Rm)
+#  define FDIVS(Rd,Rn,Rm)              osvvv(A64_FDIV,0,Rd,Rn,Rm)
+#  define FDIVD(Rd,Rn,Rm)              osvvv(A64_FDIV,1,Rd,Rn,Rm)
+#  define osvvv(Op,Sz,Rd,Rn,Rm)                _osvvv(_jit,Op,Sz,Rd,Rn,Rm)
+static void _osvvv(jit_state_t*,jit_int32_t,jit_int32_t,
+                  jit_int32_t,jit_int32_t,jit_int32_t);
+#  define osvv_(Op,Sz,Rd,Rn)           _osvv_(_jit,Op,Sz,Rd,Rn)
+static void _osvv_(jit_state_t*,jit_int32_t,
+                  jit_int32_t,jit_int32_t,jit_int32_t);
+#  define os_vv(Op,Sz,Rn,Rm)           _os_vv(_jit,Op,Sz,Rn,Rm)
+static void _os_vv(jit_state_t*,jit_int32_t,
+                  jit_int32_t,jit_int32_t,jit_int32_t);
+#  define truncr_f_i(r0,r1)            _truncr_f_i(_jit,r0,r1)
+static void _truncr_f_i(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define truncr_f_l(r0,r1)            FCVTSZ_XS(r0,r1)
+#  define truncr_d_i(r0,r1)            _truncr_d_i(_jit,r0,r1)
+static void _truncr_d_i(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define truncr_d_l(r0,r1)            FCVTSZ_XD(r0,r1)
+#  define addr_f(r0,r1,r2)             FADDS(r0,r1,r2)
+#  define addi_f(r0,r1,i0)             _addi_f(_jit,r0,r1,i0)
+static void _addi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
+#  define subr_f(r0,r1,r2)             FSUBS(r0,r1,r2)
+#  define subi_f(r0,r1,i0)             _subi_f(_jit,r0,r1,i0)
+static void _subi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
+#  define rsbr_f(r0, r1, r2)           subr_f(r0, r2, r1)
+#  define rsbi_f(r0, r1, i0)           _rsbi_f(_jit, r0, r1, i0)
+static void _rsbi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
+#  define mulr_f(r0,r1,r2)             FMULS(r0,r1,r2)
+#  define muli_f(r0,r1,i0)             _muli_f(_jit,r0,r1,i0)
+static void _muli_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
+#  define divr_f(r0,r1,r2)             FDIVS(r0,r1,r2)
+#  define divi_f(r0,r1,i0)             _divi_f(_jit,r0,r1,i0)
+static void _divi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
+#  define absr_f(r0,r1)                        FABSS(r0,r1)
+#  define negr_f(r0,r1)                        FNEGS(r0,r1)
+#  define sqrtr_f(r0,r1)               FSQRTS(r0,r1)
+#  define extr_f(r0,r1)                        SCVTFS(r0,r1)
+#  define ldr_f(r0,r1)                 _ldr_f(_jit,r0,r1)
+static void _ldr_f(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define ldi_f(r0,i0)                 _ldi_f(_jit,r0,i0)
+static void _ldi_f(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldxr_f(r0,r1,r2)             _ldxr_f(_jit,r0,r1,r2)
+static void _ldxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxi_f(r0,r1,i0)             _ldxi_f(_jit,r0,r1,i0)
+static void _ldxi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define str_f(r0,r1)                 _str_f(_jit,r0,r1)
+static void _str_f(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define sti_f(i0,r0)                 _sti_f(_jit,i0,r0)
+static void _sti_f(jit_state_t*,jit_word_t,jit_int32_t);
+#  define stxr_f(r0,r1,r2)             _stxr_f(_jit,r0,r1,r2)
+static void _stxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define stxi_f(i0,r0,r1)             _stxi_f(_jit,i0,r0,r1)
+static void _stxi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define movr_f(r0,r1)                        _movr_f(_jit,r0,r1)
+static void _movr_f(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define movi_f(r0,i0)                        _movi_f(_jit,r0,i0)
+static void _movi_f(jit_state_t*,jit_int32_t,jit_float32_t);
+#  define extr_d_f(r0,r1)              FCVT_SD(r0,r1)
+#  define fccr(cc,r0,r1,r2)            _fccr(_jit,cc,r0,r1,r2)
+static void _fccr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define fcci(cc,r0,r1,i0)            _fcci(_jit,cc,r0,r1,i0)
+static void _fcci(jit_state_t*,
+                 jit_int32_t,jit_int32_t,jit_int32_t,jit_float32_t);
+#  define ltr_f(r0,r1,r2)              fccr(CC_MI,r0,r1,r2)
+#  define lti_f(r0,r1,i0)              fcci(CC_MI,r0,r1,i0)
+#  define ler_f(r0,r1,r2)              fccr(CC_LS,r0,r1,r2)
+#  define lei_f(r0,r1,i0)              fcci(CC_LS,r0,r1,i0)
+#  define eqr_f(r0,r1,r2)              fccr(CC_EQ,r0,r1,r2)
+#  define eqi_f(r0,r1,i0)              fcci(CC_EQ,r0,r1,i0)
+#  define ger_f(r0,r1,r2)              fccr(CC_GE,r0,r1,r2)
+#  define gei_f(r0,r1,i0)              fcci(CC_GE,r0,r1,i0)
+#  define gtr_f(r0,r1,r2)              fccr(CC_GT,r0,r1,r2)
+#  define gti_f(r0,r1,i0)              fcci(CC_GT,r0,r1,i0)
+#  define ner_f(r0,r1,r2)              fccr(CC_NE,r0,r1,r2)
+#  define nei_f(r0,r1,i0)              fcci(CC_NE,r0,r1,i0)
+#  define unltr_f(r0,r1,r2)            fccr(CC_LT,r0,r1,r2)
+#  define unlti_f(r0,r1,i0)            fcci(CC_LT,r0,r1,i0)
+#  define unler_f(r0,r1,r2)            fccr(CC_LE,r0,r1,r2)
+#  define unlei_f(r0,r1,i0)            fcci(CC_LE,r0,r1,i0)
+#  define uneqr_f(r0,r1,r2)            _uneqr_f(_jit,r0,r1,r2)
+static void _uneqr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define uneqi_f(r0,r1,i0)            _uneqi_f(_jit,r0,r1,i0)
+static void _uneqi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
+#  define unger_f(r0,r1,r2)            fccr(CC_PL,r0,r1,r2)
+#  define ungei_f(r0,r1,i0)            fcci(CC_PL,r0,r1,i0)
+#  define ungtr_f(r0,r1,r2)            fccr(CC_HI,r0,r1,r2)
+#  define ungti_f(r0,r1,i0)            fcci(CC_HI,r0,r1,i0)
+#  define ltgtr_f(r0,r1,r2)            _ltgtr_f(_jit,r0,r1,r2)
+static void _ltgtr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ltgti_f(r0,r1,i0)            _ltgti_f(_jit,r0,r1,i0)
+static void _ltgti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
+#  define ordr_f(r0,r1,r2)             fccr(CC_VC,r0,r1,r2)
+#  define ordi_f(r0,r1,i0)             fcci(CC_VC,r0,r1,i0)
+#  define unordr_f(r0,r1,r2)           fccr(CC_VS,r0,r1,r2)
+#  define unordi_f(r0,r1,i0)           fcci(CC_VS,r0,r1,i0)
+#define fbccr(cc,i0,r0,r1)             _fbccr(_jit,cc,i0,r0,r1)
+static jit_word_t
+_fbccr(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_int32_t);
+#define fbcci(cc,i0,r0,i1)             _fbcci(_jit,cc,i0,r0,i1)
+static jit_word_t
+_fbcci(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_float32_t);
+#  define bltr_f(i0,r0,r1)             fbccr(BCC_MI,i0,r0,r1)
+#  define blti_f(i0,r0,i1)             fbcci(BCC_MI,i0,r0,i1)
+#  define bler_f(i0,r0,r1)             fbccr(BCC_LS,i0,r0,r1)
+#  define blei_f(i0,r0,i1)             fbcci(BCC_LS,i0,r0,i1)
+#  define beqr_f(i0,r0,r1)             fbccr(BCC_EQ,i0,r0,r1)
+#  define beqi_f(i0,r0,i1)             fbcci(BCC_EQ,i0,r0,i1)
+#  define bger_f(i0,r0,r1)             fbccr(BCC_GE,i0,r0,r1)
+#  define bgei_f(i0,r0,i1)             fbcci(BCC_GE,i0,r0,i1)
+#  define bgtr_f(i0,r0,r1)             fbccr(BCC_GT,i0,r0,r1)
+#  define bgti_f(i0,r0,i1)             fbcci(BCC_GT,i0,r0,i1)
+#  define bner_f(i0,r0,r1)             fbccr(BCC_NE,i0,r0,r1)
+#  define bnei_f(i0,r0,i1)             fbcci(BCC_NE,i0,r0,i1)
+#  define bunltr_f(i0,r0,r1)           fbccr(BCC_LT,i0,r0,r1)
+#  define bunlti_f(i0,r0,i1)           fbcci(BCC_LT,i0,r0,i1)
+#  define bunler_f(i0,r0,r1)           fbccr(BCC_LE,i0,r0,r1)
+#  define bunlei_f(i0,r0,i1)           fbcci(BCC_LE,i0,r0,i1)
+#  define buneqr_f(i0,r0,r1)           _buneqr_f(_jit,i0,r0,r1)
+static jit_word_t _buneqr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define buneqi_f(i0,r0,i1)           _buneqi_f(_jit,i0,r0,i1)
+static jit_word_t _buneqi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
+#  define bunger_f(i0,r0,r1)           fbccr(BCC_PL,i0,r0,r1)
+#  define bungei_f(i0,r0,i1)           fbcci(BCC_PL,i0,r0,i1)
+#  define bungtr_f(i0,r0,r1)           fbccr(BCC_HI,i0,r0,r1)
+#  define bungti_f(i0,r0,i1)           fbcci(BCC_HI,i0,r0,i1)
+#  define bltgtr_f(i0,r0,r1)           _bltgtr_f(_jit,i0,r0,r1)
+static jit_word_t _bltgtr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bltgti_f(i0,r0,i1)           _bltgti_f(_jit,i0,r0,i1)
+static jit_word_t _bltgti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
+#  define bordr_f(i0,r0,r1)            fbccr(BCC_VC,i0,r0,r1)
+#  define bordi_f(i0,r0,i1)            fbcci(BCC_VC,i0,r0,i1)
+#  define bunordr_f(i0,r0,r1)          fbccr(BCC_VS,i0,r0,r1)
+#  define bunordi_f(i0,r0,i1)          fbcci(BCC_VS,i0,r0,i1)
+#  define addr_d(r0,r1,r2)             FADDD(r0,r1,r2)
+#  define addi_d(r0,r1,i0)             _addi_d(_jit,r0,r1,i0)
+static void _addi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
+#  define subr_d(r0,r1,r2)             FSUBD(r0,r1,r2)
+#  define subi_d(r0,r1,i0)             _subi_d(_jit,r0,r1,i0)
+static void _subi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
+#  define rsbr_d(r0, r1, r2)           subr_d(r0, r2, r1)
+#  define rsbi_d(r0, r1, i0)           _rsbi_d(_jit, r0, r1, i0)
+static void _rsbi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
+#  define mulr_d(r0,r1,r2)             FMULD(r0,r1,r2)
+#  define muli_d(r0,r1,i0)             _muli_d(_jit,r0,r1,i0)
+static void _muli_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
+#  define divr_d(r0,r1,r2)             FDIVD(r0,r1,r2)
+#  define divi_d(r0,r1,i0)             _divi_d(_jit,r0,r1,i0)
+static void _divi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
+#  define absr_d(r0,r1)                        FABSD(r0,r1)
+#  define negr_d(r0,r1)                        FNEGD(r0,r1)
+#  define sqrtr_d(r0,r1)               FSQRTD(r0,r1)
+#  define extr_d(r0,r1)                        SCVTFD(r0,r1)
+#  define ldr_d(r0,r1)                 _ldr_d(_jit,r0,r1)
+static void _ldr_d(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define ldi_d(r0,i0)                 _ldi_d(_jit,r0,i0)
+static void _ldi_d(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldxr_d(r0,r1,r2)             _ldxr_d(_jit,r0,r1,r2)
+static void _ldxr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxi_d(r0,r1,i0)             _ldxi_d(_jit,r0,r1,i0)
+static void _ldxi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define str_d(r0,r1)                 _str_d(_jit,r0,r1)
+static void _str_d(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define sti_d(i0,r0)                 _sti_d(_jit,i0,r0)
+static void _sti_d(jit_state_t*,jit_word_t,jit_int32_t);
+#  define stxr_d(r0,r1,r2)             _stxr_d(_jit,r0,r1,r2)
+static void _stxr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define stxi_d(i0,r0,r1)             _stxi_d(_jit,i0,r0,r1)
+static void _stxi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define movr_d(r0,r1)                        _movr_d(_jit,r0,r1)
+static void _movr_d(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define movi_d(r0,i0)                        _movi_d(_jit,r0,i0)
+static void _movi_d(jit_state_t*,jit_int32_t,jit_float64_t);
+#  define extr_f_d(r0,r1)              FCVT_DS(r0,r1)
+#  define dccr(cc,r0,r1,r2)            _dccr(_jit,cc,r0,r1,r2)
+static void _dccr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define dcci(cc,r0,r1,i0)            _dcci(_jit,cc,r0,r1,i0)
+static void _dcci(jit_state_t*,
+                 jit_int32_t,jit_int32_t,jit_int32_t,jit_float64_t);
+#  define ltr_d(r0,r1,r2)              dccr(CC_MI,r0,r1,r2)
+#  define lti_d(r0,r1,i0)              dcci(CC_MI,r0,r1,i0)
+#  define ler_d(r0,r1,r2)              dccr(CC_LS,r0,r1,r2)
+#  define lei_d(r0,r1,i0)              dcci(CC_LS,r0,r1,i0)
+#  define eqr_d(r0,r1,r2)              dccr(CC_EQ,r0,r1,r2)
+#  define eqi_d(r0,r1,i0)              dcci(CC_EQ,r0,r1,i0)
+#  define ger_d(r0,r1,r2)              dccr(CC_GE,r0,r1,r2)
+#  define gei_d(r0,r1,i0)              dcci(CC_GE,r0,r1,i0)
+#  define gtr_d(r0,r1,r2)              dccr(CC_GT,r0,r1,r2)
+#  define gti_d(r0,r1,i0)              dcci(CC_GT,r0,r1,i0)
+#  define ner_d(r0,r1,r2)              dccr(CC_NE,r0,r1,r2)
+#  define nei_d(r0,r1,i0)              dcci(CC_NE,r0,r1,i0)
+#  define unltr_d(r0,r1,r2)            dccr(CC_LT,r0,r1,r2)
+#  define unlti_d(r0,r1,i0)            dcci(CC_LT,r0,r1,i0)
+#  define unler_d(r0,r1,r2)            dccr(CC_LE,r0,r1,r2)
+#  define unlei_d(r0,r1,i0)            dcci(CC_LE,r0,r1,i0)
+#  define uneqr_d(r0,r1,r2)            _uneqr_d(_jit,r0,r1,r2)
+static void _uneqr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define uneqi_d(r0,r1,i0)            _uneqi_d(_jit,r0,r1,i0)
+static void _uneqi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
+#  define unger_d(r0,r1,r2)            dccr(CC_PL,r0,r1,r2)
+#  define ungei_d(r0,r1,i0)            dcci(CC_PL,r0,r1,i0)
+#  define ungtr_d(r0,r1,r2)            dccr(CC_HI,r0,r1,r2)
+#  define ungti_d(r0,r1,i0)            dcci(CC_HI,r0,r1,i0)
+#  define ltgtr_d(r0,r1,r2)            _ltgtr_d(_jit,r0,r1,r2)
+static void _ltgtr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ltgti_d(r0,r1,i0)            _ltgti_d(_jit,r0,r1,i0)
+static void _ltgti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
+#  define ordr_d(r0,r1,r2)             dccr(CC_VC,r0,r1,r2)
+#  define ordi_d(r0,r1,i0)             dcci(CC_VC,r0,r1,i0)
+#  define unordr_d(r0,r1,r2)           dccr(CC_VS,r0,r1,r2)
+#  define unordi_d(r0,r1,i0)           dcci(CC_VS,r0,r1,i0)
+#define dbccr(cc,i0,r0,r1)             _dbccr(_jit,cc,i0,r0,r1)
+static jit_word_t
+_dbccr(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_int32_t);
+#define dbcci(cc,i0,r0,i1)             _dbcci(_jit,cc,i0,r0,i1)
+static jit_word_t
+_dbcci(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_float64_t);
+#  define bltr_d(i0,r0,r1)             dbccr(BCC_MI,i0,r0,r1)
+#  define blti_d(i0,r0,i1)             dbcci(BCC_MI,i0,r0,i1)
+#  define bler_d(i0,r0,r1)             dbccr(BCC_LS,i0,r0,r1)
+#  define blei_d(i0,r0,i1)             dbcci(BCC_LS,i0,r0,i1)
+#  define beqr_d(i0,r0,r1)             dbccr(BCC_EQ,i0,r0,r1)
+#  define beqi_d(i0,r0,i1)             dbcci(BCC_EQ,i0,r0,i1)
+#  define bger_d(i0,r0,r1)             dbccr(BCC_GE,i0,r0,r1)
+#  define bgei_d(i0,r0,i1)             dbcci(BCC_GE,i0,r0,i1)
+#  define bgtr_d(i0,r0,r1)             dbccr(BCC_GT,i0,r0,r1)
+#  define bgti_d(i0,r0,i1)             dbcci(BCC_GT,i0,r0,i1)
+#  define bner_d(i0,r0,r1)             dbccr(BCC_NE,i0,r0,r1)
+#  define bnei_d(i0,r0,i1)             dbcci(BCC_NE,i0,r0,i1)
+#  define bunltr_d(i0,r0,r1)           dbccr(BCC_LT,i0,r0,r1)
+#  define bunlti_d(i0,r0,i1)           dbcci(BCC_LT,i0,r0,i1)
+#  define bunler_d(i0,r0,r1)           dbccr(BCC_LE,i0,r0,r1)
+#  define bunlei_d(i0,r0,i1)           dbcci(BCC_LE,i0,r0,i1)
+#  define buneqr_d(i0,r0,r1)           _buneqr_d(_jit,i0,r0,r1)
+static jit_word_t _buneqr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define buneqi_d(i0,r0,i1)           _buneqi_d(_jit,i0,r0,i1)
+static jit_word_t _buneqi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
+#  define bunger_d(i0,r0,r1)           dbccr(BCC_PL,i0,r0,r1)
+#  define bungei_d(i0,r0,i1)           dbcci(BCC_PL,i0,r0,i1)
+#  define bungtr_d(i0,r0,r1)           dbccr(BCC_HI,i0,r0,r1)
+#  define bungti_d(i0,r0,i1)           dbcci(BCC_HI,i0,r0,i1)
+#  define bltgtr_d(i0,r0,r1)           _bltgtr_d(_jit,i0,r0,r1)
+static jit_word_t _bltgtr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bltgti_d(i0,r0,i1)           _bltgti_d(_jit,i0,r0,i1)
+static jit_word_t _bltgti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
+#  define bordr_d(i0,r0,r1)            dbccr(BCC_VC,i0,r0,r1)
+#  define bordi_d(i0,r0,i1)            dbcci(BCC_VC,i0,r0,i1)
+#  define bunordr_d(i0,r0,r1)          dbccr(BCC_VS,i0,r0,r1)
+#  define bunordi_d(i0,r0,i1)          dbcci(BCC_VS,i0,r0,i1)
+#  define vaarg_d(r0, r1)              _vaarg_d(_jit, r0, r1)
+static void _vaarg_d(jit_state_t*, jit_int32_t, jit_int32_t);
+#endif
+
+#if CODE
+static void
+_osvvv(jit_state_t *_jit, jit_int32_t Op, jit_int32_t Sz,
+       jit_int32_t Rd, jit_int32_t Rn, jit_int32_t Rm)
+{
+    instr_t    i;
+    assert(!(Rd &       ~0x1f));
+    assert(!(Rn &       ~0x1f));
+    assert(!(Rm &       ~0x1f));
+    assert(!(Sz &        ~0x3));
+    assert(!(Op & ~0xffe0fc00));
+    i.w = Op;
+    i.size.b = Sz;
+    i.Rd.b = Rd;
+    i.Rn.b = Rn;
+    i.Rm.b = Rm;
+    ii(i.w);
+}
+
+static void
+_osvv_(jit_state_t *_jit, jit_int32_t Op,
+       jit_int32_t Sz, jit_int32_t Rd, jit_int32_t Rn)
+{
+    instr_t    i;
+    assert(!(Rd &       ~0x1f));
+    assert(!(Rn &       ~0x1f));
+    assert(!(Sz &        ~0x3));
+    assert(!(Op & ~0xfffffc00));
+    i.w = Op;
+    i.size.b = Sz;
+    i.Rd.b = Rd;
+    i.Rn.b = Rn;
+    ii(i.w);
+}
+
+static void
+_os_vv(jit_state_t *_jit, jit_int32_t Op,
+       jit_int32_t Sz, jit_int32_t Rn, jit_int32_t Rm)
+{
+    instr_t    i;
+    assert(!(Rn &       ~0x1f));
+    assert(!(Rm &       ~0x1f));
+    assert(!(Sz &        ~0x3));
+    assert(!(Op & ~0xff20fc1f));
+    i.w = Op;
+    i.size.b = Sz;
+    i.Rn.b = Rn;
+    i.Rm.b = Rm;
+    ii(i.w);
+}
+
+#define fopi(name)                                                     \
+static void                                                            \
+_##name##i_f(jit_state_t *_jit,                                                \
+            jit_int32_t r0, jit_int32_t r1, jit_float32_t i0)          \
+{                                                                      \
+    jit_int32_t                reg = jit_get_reg(jit_class_fpr);               \
+    movi_f(rn(reg), i0);                                               \
+    name##r_f(r0, r1, rn(reg));                                                \
+    jit_unget_reg(reg);                                                        \
+}
+#define dopi(name)                                                     \
+static void                                                            \
+_##name##i_d(jit_state_t *_jit,                                                \
+            jit_int32_t r0, jit_int32_t r1, jit_float64_t i0)          \
+{                                                                      \
+    jit_int32_t                reg = jit_get_reg(jit_class_fpr);               \
+    movi_d(rn(reg), i0);                                               \
+    name##r_d(r0, r1, rn(reg));                                                \
+    jit_unget_reg(reg);                                                        \
+}
+#define fbopi(name)                                                    \
+static jit_word_t                                                      \
+_b##name##i_f(jit_state_t *_jit,                                       \
+             jit_word_t i0, jit_int32_t r0, jit_float32_t i1)          \
+{                                                                      \
+    jit_word_t         word;                                           \
+    jit_int32_t                reg = jit_get_reg(jit_class_fpr|                \
+                                         jit_class_nospill);           \
+    movi_f(rn(reg), i1);                                               \
+    word = b##name##r_f(i0, r0, rn(reg));                              \
+    jit_unget_reg(reg);                                                        \
+    return (word);                                                     \
+}
+#define dbopi(name)                                                    \
+static jit_word_t                                                      \
+_b##name##i_d(jit_state_t *_jit,                                       \
+             jit_word_t i0, jit_int32_t r0, jit_float64_t i1)          \
+{                                                                      \
+    jit_word_t         word;                                           \
+    jit_int32_t                reg = jit_get_reg(jit_class_fpr|                \
+                                         jit_class_nospill);           \
+    movi_d(rn(reg), i1);                                               \
+    word = b##name##r_d(i0, r0, rn(reg));                              \
+    jit_unget_reg(reg);                                                        \
+    return (word);                                                     \
+}
+
+static void
+_truncr_f_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    FCVTSZ_WS(r0, r1);
+    extr_i(r0, r0);
+}
+
+static void
+_truncr_d_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    FCVTSZ_WD(r0, r1);
+    extr_i(r0, r0);
+}
+
+fopi(add)
+fopi(sub)
+fopi(rsb)
+fopi(mul)
+fopi(div)
+
+static void
+_ldr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    ldr_i(rn(reg), r1);
+    FMOVSW(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldi_f(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    ldi_i(rn(reg), i0);
+    FMOVSW(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    ldxr_i(rn(reg), r1, r2);
+    FMOVSW(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxi_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    ldxi_i(rn(reg), r1, i0);
+    FMOVSW(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_str_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    FMOVWS(rn(reg), r1);
+    str_i(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    FMOVWS(rn(reg), r0);
+    sti_i(i0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_stxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    FMOVWS(rn(reg), r2);
+    stxr_i(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_stxi_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    FMOVWS(rn(reg), r1);
+    stxi_i(i0, r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_movr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (r0 != r1)
+       FMOVS(r0, r1);
+}
+
+static void
+_movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t i0)
+{
+    union {
+       jit_int32_t     i;
+       jit_float32_t   f;
+    } u;
+    jit_int32_t                reg;
+    u.f = i0;
+    if (u.i == 0)
+       FMOVSW(r0, WZR_REGNO);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       /* prevent generating unused top 32 bits */
+       movi(rn(reg), ((jit_word_t)u.i) & 0xffffffff);
+       FMOVSW(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_fccr(jit_state_t *_jit, jit_int32_t cc,
+      jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    FCMPES(r1, r2);
+    CSET(r0, cc);
+}
+
+static void
+_fcci(jit_state_t *_jit, jit_int32_t cc,
+      jit_int32_t r0, jit_int32_t r1, jit_float32_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_fpr);
+    movi_f(rn(reg), i0);
+    fccr(cc, r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_uneqr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    FCMPES(r1, r2);
+    CSET(r0, CC_VS);
+    w = _jit->pc.w;
+    B_C(BCC_VS, 1);            /* unordered satisfies condition */
+    CSET(r0, CC_EQ);           /* equal satisfies condition */
+    patch_at(w, _jit->pc.w);
+}
+fopi(uneq)
+
+static void
+_ltgtr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    FCMPES(r1, r2);
+    CSET(r0, CC_VC);           /* set to 1 if ordered */
+    w = _jit->pc.w;
+    B_C(BCC_VS, 1);            /* unordered does not satisfy condition */
+    CSET(r0, CC_NE);           /* set to 1 if not equal */
+    patch_at(w, _jit->pc.w);
+}
+fopi(ltgt)
+
+static jit_word_t
+_fbccr(jit_state_t *_jit, jit_int32_t cc,
+       jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w, d;
+    FCMPES(r0, r1);
+    w = _jit->pc.w;
+    d = (i0 - w) >> 2;
+    B_C(cc, d);
+    return (w);
+}
+
+static jit_word_t
+_fbcci(jit_state_t *_jit, jit_int32_t cc,
+       jit_word_t i0, jit_int32_t r0, jit_float32_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_fpr|jit_class_nospill);
+    movi_f(rn(reg), i1);
+    w = fbccr(cc, i0, r0, rn(reg));
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static jit_word_t
+_buneqr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         u, v, w;
+    FCMPES(r0, r1);
+    u = _jit->pc.w;
+    B_C(BCC_VS, 1);            /* unordered satisfies condition */
+    v = _jit->pc.w;
+    B_C(BCC_NE, 1);            /* not equal (or unordered) does not satisfy */
+    patch_at(u, _jit->pc.w);
+    w = _jit->pc.w;
+    B((i0 - w) >> 2);
+    patch_at(v, _jit->pc.w);
+    return (w);
+}
+fbopi(uneq)
+
+static jit_word_t
+_bltgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         u, v, w;
+    FCMPES(r0, r1);
+    u = _jit->pc.w;
+    B_C(BCC_VS, 2);            /* jump over if unordered */
+    v = _jit->pc.w;
+    B_C(BCC_EQ, 1);            /* jump over if equal */
+    w = _jit->pc.w;
+    B((i0 - w) >> 2);
+    patch_at(u, _jit->pc.w);
+    patch_at(v, _jit->pc.w);
+    return (w);
+}
+fbopi(ltgt)
+
+dopi(add)
+dopi(sub)
+dopi(rsb)
+dopi(mul)
+dopi(div)
+
+static void
+_ldr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    ldr_l(rn(reg), r1);
+    FMOVDX(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldi_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    ldi_l(rn(reg), i0);
+    FMOVDX(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    ldxr_l(rn(reg), r1, r2);
+    FMOVDX(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    ldxi_l(rn(reg), r1, i0);
+    FMOVDX(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_str_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    FMOVXD(rn(reg), r1);
+    str_l(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_sti_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    FMOVXD(rn(reg), r0);
+    sti_l(i0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_stxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    FMOVXD(rn(reg), r2);
+    stxr_l(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    FMOVXD(rn(reg), r1);
+    stxi_l(i0, r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_movr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (r0 != r1)
+       FMOVD(r0, r1);
+}
+
+static void
+_movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t i0)
+{
+    union {
+       jit_int64_t     l;
+       jit_float64_t   d;
+    } u;
+    jit_int32_t                reg;
+    u.d = i0;
+    if (u.l == 0)
+       FMOVDX(r0, XZR_REGNO);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), u.l);
+       FMOVDX(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_dccr(jit_state_t *_jit, jit_int32_t cc,
+      jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    FCMPED(r1, r2);
+    CSET(r0, cc);
+}
+
+static void
+_dcci(jit_state_t *_jit, jit_int32_t cc,
+      jit_int32_t r0, jit_int32_t r1, jit_float64_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_fpr);
+    movi_d(rn(reg), i0);
+    dccr(cc, r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_uneqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    FCMPED(r1, r2);
+    CSET(r0, CC_VS);
+    w = _jit->pc.w;
+    B_C(BCC_VS, 1);            /* unordered satisfies condition */
+    CSET(r0, CC_EQ);           /* equal satisfies condition */
+    patch_at(w, _jit->pc.w);
+}
+dopi(uneq)
+
+static void
+_ltgtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    FCMPED(r1, r2);
+    CSET(r0, CC_VC);           /* set to 1 if ordered */
+    w = _jit->pc.w;
+    B_C(BCC_VS, 1);            /* unordered does not satisfy condition */
+    CSET(r0, CC_NE);           /* set to 1 if not equal */
+    patch_at(w, _jit->pc.w);
+}
+dopi(ltgt)
+
+static jit_word_t
+_dbccr(jit_state_t *_jit, jit_int32_t cc,
+       jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w, d;
+    FCMPED(r0, r1);
+    w = _jit->pc.w;
+    d = (i0 - w) >> 2;
+    B_C(cc, d);
+    return (w);
+}
+
+static jit_word_t
+_dbcci(jit_state_t *_jit, jit_int32_t cc,
+       jit_word_t i0, jit_int32_t r0, jit_float64_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_fpr|jit_class_nospill);
+    movi_d(rn(reg), i1);
+    w = dbccr(cc, i0, r0, rn(reg));
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static jit_word_t
+_buneqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         u, v, w;
+    FCMPED(r0, r1);
+    u = _jit->pc.w;
+    B_C(BCC_VS, 1);            /* unordered satisfies condition */
+    v = _jit->pc.w;
+    B_C(BCC_NE, 1);            /* not equal (or unordered) does not satisfy */
+    patch_at(u, _jit->pc.w);
+    w = _jit->pc.w;
+    B((i0 - w) >> 2);
+    patch_at(v, _jit->pc.w);
+    return (w);
+}
+dbopi(uneq)
+
+static jit_word_t
+_bltgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         u, v, w;
+    FCMPED(r0, r1);
+    u = _jit->pc.w;
+    B_C(BCC_VS, 2);            /* jump over if unordered */
+    v = _jit->pc.w;
+    B_C(BCC_EQ, 1);            /* jump over if equal */
+    w = _jit->pc.w;
+    B((i0 - w) >> 2);
+    patch_at(u, _jit->pc.w);
+    patch_at(v, _jit->pc.w);
+    return (w);
+}
+dbopi(ltgt)
+
+static void
+_vaarg_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         ge_code;
+    jit_word_t         lt_code;
+    jit_int32_t                rg0, rg1;
+
+    assert(_jitc->function->self.call & jit_call_varargs);
+
+    rg0 = jit_get_reg(jit_class_gpr);
+    rg1 = jit_get_reg(jit_class_gpr);
+
+    /* Load the fp offset in save area in the first temporary. */
+    ldxi_i(rn(rg0), r1, offsetof(jit_va_list_t, fpoff));
+
+    /* Jump over if there are no remaining arguments in the save area. */
+    ge_code = bgei(_jit->pc.w, rn(rg0), 0);
+
+    /* Load the gp save pointer in the second temporary. */
+    ldxi(rn(rg1), r1, offsetof(jit_va_list_t, fptop));
+
+    /* Load the vararg argument in the first argument. */
+    ldxr_d(r0, rn(rg1), rn(rg0));
+
+    /* Update the fp offset. */
+    addi(rn(rg0), rn(rg0), 16);
+    stxi_i(offsetof(jit_va_list_t, fpoff), r1, rn(rg0));
+
+    /* Will only need one temporary register below. */
+    jit_unget_reg(rg1);
+
+    /* Jump over overflow code. */
+    lt_code = jmpi_p(_jit->pc.w);
+
+    /* Where to land if argument is in overflow area. */
+    patch_at(ge_code, _jit->pc.w);
+
+    /* Load stack pointer. */
+    ldxi(rn(rg0), r1, offsetof(jit_va_list_t, stack));
+
+    /* Load argument. */
+    ldr_d(r0, rn(rg0));
+
+    /* Update stack pointer. */
+    addi(rn(rg0), rn(rg0), 8);
+    stxi(offsetof(jit_va_list_t, stack), r1, rn(rg0));
+
+    /* Where to land if argument is in gp save area. */
+    patch_at(lt_code, _jit->pc.w);
+
+    jit_unget_reg(rg0);
+}
+#endif
diff --git a/deps/lightning/lib/jit_aarch64-sz.c b/deps/lightning/lib/jit_aarch64-sz.c
new file mode 100644 (file)
index 0000000..7e22e0e
--- /dev/null
@@ -0,0 +1,402 @@
+
+#if __WORDSIZE == 64
+#define JIT_INSTR_MAX 120
+    0, /* data */
+    0, /* live */
+    4, /* align */
+    0, /* save */
+    0, /* load */
+    0, /* #name */
+    0, /* #note */
+    0, /* label */
+    120,       /* prolog */
+    0, /* ellipsis */
+    0, /* va_push */
+    0, /* allocai */
+    0, /* allocar */
+    0, /* arg */
+    0, /* getarg_c */
+    0, /* getarg_uc */
+    0, /* getarg_s */
+    0, /* getarg_us */
+    0, /* getarg_i */
+    0, /* getarg_ui */
+    0, /* getarg_l */
+    0, /* putargr */
+    0, /* putargi */
+    44,        /* va_start */
+    64,        /* va_arg */
+    72,        /* va_arg_d */
+    0, /* va_end */
+    4, /* addr */
+    20,        /* addi */
+    4, /* addcr */
+    12,        /* addci */
+    4, /* addxr */
+    8, /* addxi */
+    4, /* subr */
+    20,        /* subi */
+    4, /* subcr */
+    12,        /* subci */
+    4, /* subxr */
+    8, /* subxi */
+    24,        /* rsbi */
+    4, /* mulr */
+    20,        /* muli */
+    12,        /* qmulr */
+    20,        /* qmuli */
+    12,        /* qmulr_u */
+    20,        /* qmuli_u */
+    4, /* divr */
+    20,        /* divi */
+    4, /* divr_u */
+    12,        /* divi_u */
+    20,        /* qdivr */
+    16,        /* qdivi */
+    20,        /* qdivr_u */
+    16,        /* qdivi_u */
+    12,        /* remr */
+    28,        /* remi */
+    12,        /* remr_u */
+    20,        /* remi_u */
+    4, /* andr */
+    20,        /* andi */
+    4, /* orr */
+    20,        /* ori */
+    4, /* xorr */
+    20,        /* xori */
+    4, /* lshr */
+    4, /* lshi */
+    4, /* rshr */
+    4, /* rshi */
+    4, /* rshr_u */
+    4, /* rshi_u */
+    4, /* negr */
+    4, /* comr */
+    8, /* ltr */
+    8, /* lti */
+    8, /* ltr_u */
+    8, /* lti_u */
+    8, /* ler */
+    8, /* lei */
+    8, /* ler_u */
+    8, /* lei_u */
+    8, /* eqr */
+    8, /* eqi */
+    8, /* ger */
+    8, /* gei */
+    8, /* ger_u */
+    8, /* gei_u */
+    8, /* gtr */
+    8, /* gti */
+    8, /* gtr_u */
+    8, /* gti_u */
+    8, /* ner */
+    8, /* nei */
+    4, /* movr */
+    16,        /* movi */
+    4, /* extr_c */
+    4, /* extr_uc */
+    4, /* extr_s */
+    4, /* extr_us */
+    4, /* extr_i */
+    4, /* extr_ui */
+    8, /* htonr_us */
+    8, /* htonr_ui */
+    4, /* htonr_ul */
+    4, /* ldr_c */
+    12,        /* ldi_c */
+    4, /* ldr_uc */
+    12,        /* ldi_uc */
+    4, /* ldr_s */
+    12,        /* ldi_s */
+    4, /* ldr_us */
+    12,        /* ldi_us */
+    4, /* ldr_i */
+    12,        /* ldi_i */
+    4, /* ldr_ui */
+    12,        /* ldi_ui */
+    4, /* ldr_l */
+    12,        /* ldi_l */
+    8, /* ldxr_c */
+    20,        /* ldxi_c */
+    4, /* ldxr_uc */
+    20,        /* ldxi_uc */
+    4, /* ldxr_s */
+    16,        /* ldxi_s */
+    4, /* ldxr_us */
+    16,        /* ldxi_us */
+    4, /* ldxr_i */
+    20,        /* ldxi_i */
+    4, /* ldxr_ui */
+    16,        /* ldxi_ui */
+    4, /* ldxr_l */
+    20,        /* ldxi_l */
+    4, /* str_c */
+    12,        /* sti_c */
+    4, /* str_s */
+    12,        /* sti_s */
+    4, /* str_i */
+    12,        /* sti_i */
+    4, /* str_l */
+    12,        /* sti_l */
+    4, /* stxr_c */
+    20,        /* stxi_c */
+    4, /* stxr_s */
+    20,        /* stxi_s */
+    4, /* stxr_i */
+    20,        /* stxi_i */
+    4, /* stxr_l */
+    20,        /* stxi_l */
+    8, /* bltr */
+    8, /* blti */
+    8, /* bltr_u */
+    8, /* blti_u */
+    8, /* bler */
+    8, /* blei */
+    8, /* bler_u */
+    8, /* blei_u */
+    8, /* beqr */
+    24,        /* beqi */
+    8, /* bger */
+    8, /* bgei */
+    8, /* bger_u */
+    8, /* bgei_u */
+    8, /* bgtr */
+    8, /* bgti */
+    8, /* bgtr_u */
+    8, /* bgti_u */
+    8, /* bner */
+    24,        /* bnei */
+    8, /* bmsr */
+    8, /* bmsi */
+    8, /* bmcr */
+    8, /* bmci */
+    8, /* boaddr */
+    8, /* boaddi */
+    8, /* boaddr_u */
+    8, /* boaddi_u */
+    8, /* bxaddr */
+    8, /* bxaddi */
+    8, /* bxaddr_u */
+    8, /* bxaddi_u */
+    8, /* bosubr */
+    8, /* bosubi */
+    8, /* bosubr_u */
+    8, /* bosubi_u */
+    8, /* bxsubr */
+    8, /* bxsubi */
+    8, /* bxsubr_u */
+    8, /* bxsubi_u */
+    4, /* jmpr */
+    20,        /* jmpi */
+    4, /* callr */
+    20,        /* calli */
+    0, /* prepare */
+    0, /* pushargr */
+    0, /* pushargi */
+    0, /* finishr */
+    0, /* finishi */
+    0, /* ret */
+    0, /* retr */
+    0, /* reti */
+    0, /* retval_c */
+    0, /* retval_uc */
+    0, /* retval_s */
+    0, /* retval_us */
+    0, /* retval_i */
+    0, /* retval_ui */
+    0, /* retval_l */
+    96,        /* epilog */
+    0, /* arg_f */
+    0, /* getarg_f */
+    0, /* putargr_f */
+    0, /* putargi_f */
+    4, /* addr_f */
+    12,        /* addi_f */
+    4, /* subr_f */
+    12,        /* subi_f */
+    12,        /* rsbi_f */
+    4, /* mulr_f */
+    12,        /* muli_f */
+    4, /* divr_f */
+    12,        /* divi_f */
+    4, /* negr_f */
+    4, /* absr_f */
+    4, /* sqrtr_f */
+    8, /* ltr_f */
+    16,        /* lti_f */
+    8, /* ler_f */
+    16,        /* lei_f */
+    8, /* eqr_f */
+    16,        /* eqi_f */
+    8, /* ger_f */
+    16,        /* gei_f */
+    8, /* gtr_f */
+    16,        /* gti_f */
+    8, /* ner_f */
+    16,        /* nei_f */
+    8, /* unltr_f */
+    16,        /* unlti_f */
+    8, /* unler_f */
+    16,        /* unlei_f */
+    16,        /* uneqr_f */
+    24,        /* uneqi_f */
+    8, /* unger_f */
+    16,        /* ungei_f */
+    8, /* ungtr_f */
+    16,        /* ungti_f */
+    16,        /* ltgtr_f */
+    24,        /* ltgti_f */
+    8, /* ordr_f */
+    16,        /* ordi_f */
+    8, /* unordr_f */
+    16,        /* unordi_f */
+    8, /* truncr_f_i */
+    4, /* truncr_f_l */
+    4, /* extr_f */
+    4, /* extr_d_f */
+    4, /* movr_f */
+    8, /* movi_f */
+    8, /* ldr_f */
+    16,        /* ldi_f */
+    8, /* ldxr_f */
+    24,        /* ldxi_f */
+    8, /* str_f */
+    16,        /* sti_f */
+    8, /* stxr_f */
+    24,        /* stxi_f */
+    8, /* bltr_f */
+    16,        /* blti_f */
+    8, /* bler_f */
+    16,        /* blei_f */
+    8, /* beqr_f */
+    16,        /* beqi_f */
+    8, /* bger_f */
+    16,        /* bgei_f */
+    8, /* bgtr_f */
+    16,        /* bgti_f */
+    8, /* bner_f */
+    16,        /* bnei_f */
+    8, /* bunltr_f */
+    16,        /* bunlti_f */
+    8, /* bunler_f */
+    16,        /* bunlei_f */
+    16,        /* buneqr_f */
+    24,        /* buneqi_f */
+    8, /* bunger_f */
+    16,        /* bungei_f */
+    8, /* bungtr_f */
+    16,        /* bungti_f */
+    16,        /* bltgtr_f */
+    24,        /* bltgti_f */
+    8, /* bordr_f */
+    16,        /* bordi_f */
+    8, /* bunordr_f */
+    16,        /* bunordi_f */
+    0, /* pushargr_f */
+    0, /* pushargi_f */
+    0, /* retr_f */
+    0, /* reti_f */
+    0, /* retval_f */
+    0, /* arg_d */
+    0, /* getarg_d */
+    0, /* putargr_d */
+    0, /* putargi_d */
+    4, /* addr_d */
+    12,        /* addi_d */
+    4, /* subr_d */
+    12,        /* subi_d */
+    12,        /* rsbi_d */
+    4, /* mulr_d */
+    12,        /* muli_d */
+    4, /* divr_d */
+    12,        /* divi_d */
+    4, /* negr_d */
+    4, /* absr_d */
+    4, /* sqrtr_d */
+    8, /* ltr_d */
+    16,        /* lti_d */
+    8, /* ler_d */
+    16,        /* lei_d */
+    8, /* eqr_d */
+    16,        /* eqi_d */
+    8, /* ger_d */
+    16,        /* gei_d */
+    8, /* gtr_d */
+    16,        /* gti_d */
+    8, /* ner_d */
+    16,        /* nei_d */
+    8, /* unltr_d */
+    16,        /* unlti_d */
+    8, /* unler_d */
+    16,        /* unlei_d */
+    16,        /* uneqr_d */
+    24,        /* uneqi_d */
+    8, /* unger_d */
+    16,        /* ungei_d */
+    8, /* ungtr_d */
+    16,        /* ungti_d */
+    16,        /* ltgtr_d */
+    24,        /* ltgti_d */
+    8, /* ordr_d */
+    16,        /* ordi_d */
+    8, /* unordr_d */
+    16,        /* unordi_d */
+    8, /* truncr_d_i */
+    4, /* truncr_d_l */
+    4, /* extr_d */
+    4, /* extr_f_d */
+    4, /* movr_d */
+    12,        /* movi_d */
+    8, /* ldr_d */
+    16,        /* ldi_d */
+    8, /* ldxr_d */
+    24,        /* ldxi_d */
+    8, /* str_d */
+    16,        /* sti_d */
+    8, /* stxr_d */
+    24,        /* stxi_d */
+    8, /* bltr_d */
+    16,        /* blti_d */
+    8, /* bler_d */
+    16,        /* blei_d */
+    8, /* beqr_d */
+    20,        /* beqi_d */
+    8, /* bger_d */
+    16,        /* bgei_d */
+    8, /* bgtr_d */
+    16,        /* bgti_d */
+    8, /* bner_d */
+    16,        /* bnei_d */
+    8, /* bunltr_d */
+    16,        /* bunlti_d */
+    8, /* bunler_d */
+    16,        /* bunlei_d */
+    16,        /* buneqr_d */
+    24,        /* buneqi_d */
+    8, /* bunger_d */
+    16,        /* bungei_d */
+    8, /* bungtr_d */
+    16,        /* bungti_d */
+    16,        /* bltgtr_d */
+    24,        /* bltgti_d */
+    8, /* bordr_d */
+    16,        /* bordi_d */
+    8, /* bunordr_d */
+    16,        /* bunordi_d */
+    0, /* pushargr_d */
+    0, /* pushargi_d */
+    0, /* retr_d */
+    0, /* reti_d */
+    0, /* retval_d */
+    0, /* movr_w_f */
+    0, /* movr_ww_d */
+    0, /* movr_w_d */
+    0, /* movr_f_w */
+    0, /* movi_f_w */
+    0, /* movr_d_ww */
+    0, /* movi_d_ww */
+    0, /* movr_d_w */
+    0, /* movi_d_w */
+#endif /* __WORDSIZE */
diff --git a/deps/lightning/lib/jit_aarch64.c b/deps/lightning/lib/jit_aarch64.c
new file mode 100644 (file)
index 0000000..5b2ff49
--- /dev/null
@@ -0,0 +1,1582 @@
+/*
+ * Copyright (C) 2013-2019  Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+#define jit_arg_reg_p(i)               ((i) >= 0 && (i) < 8)
+#define jit_arg_f_reg_p(i)             ((i) >= 0 && (i) < 8)
+
+typedef struct jit_qreg {
+    jit_float64_t      l;
+    jit_float64_t      h;
+} jit_qreg_t;
+
+#define va_gp_top_offset               offsetof(jit_va_list_t, q0)
+#define va_fp_top_offset               sizeof(jit_va_list_t)
+typedef struct jit_va_list {
+    jit_pointer_t      stack;
+    jit_pointer_t      gptop;
+    jit_pointer_t      fptop;
+    jit_int32_t                gpoff;
+    jit_int32_t                fpoff;
+
+    jit_int64_t                x0;
+    jit_int64_t                x1;
+    jit_int64_t                x2;
+    jit_int64_t                x3;
+    jit_int64_t                x4;
+    jit_int64_t                x5;
+    jit_int64_t                x6;
+    jit_int64_t                x7;
+
+    jit_qreg_t         q0;
+    jit_qreg_t         q1;
+    jit_qreg_t         q2;
+    jit_qreg_t         q3;
+    jit_qreg_t         q4;
+    jit_qreg_t         q5;
+    jit_qreg_t         q6;
+    jit_qreg_t         q7;
+} jit_va_list_t;
+
+/*
+ * Prototypes
+ */
+#define patch(instr, node)             _patch(_jit, instr, node)
+static void _patch(jit_state_t*,jit_word_t,jit_node_t*);
+
+/* libgcc */
+extern void __clear_cache(void *, void *);
+
+#define PROTO                          1
+#  include "jit_aarch64-cpu.c"
+#  include "jit_aarch64-fpu.c"
+#undef PROTO
+
+/*
+ * Initialization
+ */
+jit_register_t         _rvs[] = {
+    { rc(gpr) | 0x08,                  "x8" },
+    { rc(gpr) | 0x12,                  "x18" },
+    { rc(gpr) | 0x11,                  "x17" },
+    { rc(gpr) | 0x10,                  "x16" },
+    { rc(gpr) | 0x09,                  "x9" },
+    { rc(gpr) | 0x0a,                  "x10" },
+    { rc(gpr) | 0x0b,                  "x11" },
+    { rc(gpr) | 0x0c,                  "x12" },
+    { rc(gpr) | 0x0d,                  "x13" },
+    { rc(gpr) | 0x0e,                  "x14" },
+    { rc(gpr) | 0x0f,                  "x15" },
+    { rc(sav) | rc(gpr) | 0x13,                "x19" },
+    { rc(sav) | rc(gpr) | 0x14,                "x20" },
+    { rc(sav) | rc(gpr) | 0x15,                "x21" },
+    { rc(sav) | rc(gpr) | 0x16,                "x22" },
+    { rc(sav) | rc(gpr) | 0x17,                "x23" },
+    { rc(sav) | rc(gpr) | 0x18,                "x24" },
+    { rc(sav) | rc(gpr) | 0x19,                "x25" },
+    { rc(sav) | rc(gpr) | 0x1a,                "x26" },
+    { rc(sav) | rc(gpr) | 0x1b,                "x27" },
+    { rc(sav) | rc(gpr) | 0x1c,                "x28" },
+    { 0x1f,                            "sp" },
+    { 0x1e,                            "lr" },
+    { 0x1d,                            "fp" },
+    { rc(arg) | rc(gpr) | 0x07,                "x7" },
+    { rc(arg) | rc(gpr) | 0x06,                "x6" },
+    { rc(arg) | rc(gpr) | 0x05,                "x5" },
+    { rc(arg) | rc(gpr) | 0x04,                "x4" },
+    { rc(arg) | rc(gpr) | 0x03,                "x3" },
+    { rc(arg) | rc(gpr) | 0x02,                "x2" },
+    { rc(arg) | rc(gpr) | 0x01,                "x1" },
+    { rc(arg) | rc(gpr) | 0x00,                "x0" },
+    { rc(fpr) | 0x1f,                  "v31" },
+    { rc(fpr) | 0x1e,                  "v30" },
+    { rc(fpr) | 0x1d,                  "v29" },
+    { rc(fpr) | 0x1c,                  "v28" },
+    { rc(fpr) | 0x1b,                  "v27" },
+    { rc(fpr) | 0x1a,                  "v26" },
+    { rc(fpr) | 0x19,                  "v25" },
+    { rc(fpr) | 0x18,                  "v24" },
+    { rc(fpr) | 0x17,                  "v23" },
+    { rc(fpr) | 0x16,                  "v22" },
+    { rc(fpr) | 0x15,                  "v21" },
+    { rc(fpr) | 0x14,                  "v20" },
+    { rc(fpr) | 0x13,                  "v19" },
+    { rc(fpr) | 0x12,                  "v18" },
+    { rc(fpr) | 0x11,                  "v17" },
+    { rc(fpr) | 0x10,                  "v16" },
+    { rc(sav) | rc(fpr) | 0x08,                "v8" },
+    { rc(sav) | rc(fpr) | 0x09,                "v9" },
+    { rc(sav) | rc(fpr) | 0x0a,                "v10" },
+    { rc(sav) | rc(fpr) | 0x0b,                "v11" },
+    { rc(sav) | rc(fpr) | 0x0c,                "v12" },
+    { rc(sav) | rc(fpr) | 0x0d,                "v13" },
+    { rc(sav) | rc(fpr) | 0x0e,                "v14" },
+    { rc(sav) | rc(fpr) | 0x0f,                "v15" },
+    { rc(arg) | rc(fpr) | 0x07,                "v7" },
+    { rc(arg) | rc(fpr) | 0x06,                "v6" },
+    { rc(arg) | rc(fpr) | 0x05,                "v5" },
+    { rc(arg) | rc(fpr) | 0x04,                "v4" },
+    { rc(arg) | rc(fpr) | 0x03,                "v3" },
+    { rc(arg) | rc(fpr) | 0x02,                "v2" },
+    { rc(arg) | rc(fpr) | 0x01,                "v1" },
+    { rc(arg) | rc(fpr) | 0x00,                "v0" },
+    { _NOREG,                          "<none>" },
+};
+
+/*
+ * Implementation
+ */
+void
+jit_get_cpu(void)
+{
+}
+
+void
+_jit_init(jit_state_t *_jit)
+{
+    _jitc->reglen = jit_size(_rvs) - 1;
+}
+
+void
+_jit_prolog(jit_state_t *_jit)
+{
+    jit_int32_t                 offset;
+
+    if (_jitc->function)
+       jit_epilog();
+    assert(jit_regset_cmp_ui(&_jitc->regarg, 0) == 0);
+    jit_regset_set_ui(&_jitc->regsav, 0);
+    offset = _jitc->functions.offset;
+    if (offset >= _jitc->functions.length) {
+       jit_realloc((jit_pointer_t *)&_jitc->functions.ptr,
+                   _jitc->functions.length * sizeof(jit_function_t),
+                   (_jitc->functions.length + 16) * sizeof(jit_function_t));
+       _jitc->functions.length += 16;
+    }
+    _jitc->function = _jitc->functions.ptr + _jitc->functions.offset++;
+    _jitc->function->self.size = stack_framesize;
+    _jitc->function->self.argi = _jitc->function->self.argf =
+       _jitc->function->self.alen = 0;
+    _jitc->function->self.aoff = 0;
+    _jitc->function->self.call = jit_call_default;
+    jit_alloc((jit_pointer_t *)&_jitc->function->regoff,
+             _jitc->reglen * sizeof(jit_int32_t));
+
+    /* _no_link here does not mean the jit_link() call can be removed
+     * by rewriting as:
+     * _jitc->function->prolog = jit_new_node(jit_code_prolog);
+     */
+    _jitc->function->prolog = jit_new_node_no_link(jit_code_prolog);
+    jit_link(_jitc->function->prolog);
+    _jitc->function->prolog->w.w = offset;
+    _jitc->function->epilog = jit_new_node_no_link(jit_code_epilog);
+    /* u:      label value
+     * v:      offset in blocks vector
+     * w:      offset in functions vector
+     */
+    _jitc->function->epilog->w.w = offset;
+
+    jit_regset_new(&_jitc->function->regset);
+}
+
+jit_int32_t
+_jit_allocai(jit_state_t *_jit, jit_int32_t length)
+{
+    assert(_jitc->function);
+    switch (length) {
+       case 0: case 1:                                         break;
+       case 2:         _jitc->function->self.aoff &= -2;       break;
+       case 3: case 4: _jitc->function->self.aoff &= -4;       break;
+       default:        _jitc->function->self.aoff &= -8;       break;
+    }
+    _jitc->function->self.aoff -= length;
+    if (!_jitc->realize) {
+       jit_inc_synth_ww(allocai, _jitc->function->self.aoff, length);
+       jit_dec_synth();
+    }
+    return (_jitc->function->self.aoff);
+}
+
+void
+_jit_allocar(jit_state_t *_jit, jit_int32_t u, jit_int32_t v)
+{
+    jit_int32_t                 r0, r1;
+    assert(_jitc->function);
+    jit_inc_synth_ww(allocar, u, v);
+    if (!_jitc->function->allocar) {
+       _jitc->function->aoffoff = jit_allocai(sizeof(jit_int32_t));
+       _jitc->function->allocar = 1;
+    }
+    r0 = jit_get_reg(jit_class_gpr);
+    jit_negr(r0, v);
+    jit_andi(r0, r0, -16);
+    jit_ldxi_i(u, JIT_FP, _jitc->function->aoffoff);
+    jit_addr(u, u, r0);
+    /* Cannot "addr sp, sp, reg" because in this context "sp" is "[w|x]zr",
+     * the zero register */
+#if 0
+    jit_addr(JIT_SP, JIT_SP, r0);
+#else
+    r1 = jit_get_reg(jit_class_gpr);
+    /* note that "mov r1, sp" does not work, but the proper encoding
+     * can be triggered before actually emiting with "add r1, sp, 0" */
+    jit_addi(r1, JIT_SP, 0);
+    jit_addr(r1, r1, r0);
+    jit_addi(JIT_SP, r1, 0);
+    jit_unget_reg(r1);
+#endif
+    jit_stxi_i(_jitc->function->aoffoff, JIT_FP, u);
+    jit_unget_reg(r0);
+    jit_dec_synth();
+}
+
+void
+_jit_ret(jit_state_t *_jit)
+{
+    jit_node_t         *instr;
+    assert(_jitc->function);
+    jit_inc_synth(ret);
+    /* jump to epilog */
+    instr = jit_jmpi();
+    jit_patch_at(instr, _jitc->function->epilog);
+    jit_dec_synth();
+}
+
+void
+_jit_retr(jit_state_t *_jit, jit_int32_t u)
+{
+    jit_inc_synth_w(retr, u);
+    if (JIT_RET != u)
+       jit_movr(JIT_RET, u);
+    jit_live(JIT_RET);
+    jit_ret();
+    jit_dec_synth();
+}
+
+void
+_jit_reti(jit_state_t *_jit, jit_word_t u)
+{
+    jit_inc_synth_w(reti, u);
+    jit_movi(JIT_RET, u);
+    jit_ret();
+    jit_dec_synth();
+}
+
+void
+_jit_retr_f(jit_state_t *_jit, jit_int32_t u)
+{
+    jit_inc_synth_w(retr_f, u);
+    if (u != JIT_FRET)
+       jit_movr_f(JIT_FRET, u);
+    else
+       jit_live(JIT_FRET);
+    jit_ret();
+    jit_dec_synth();
+}
+
+void
+_jit_reti_f(jit_state_t *_jit, jit_float32_t u)
+{
+    jit_inc_synth_f(reti_f, u);
+    jit_movi_f(JIT_FRET, u);
+    jit_ret();
+    jit_dec_synth();
+}
+
+void
+_jit_retr_d(jit_state_t *_jit, jit_int32_t u)
+{
+    jit_inc_synth_w(retr_d, u);
+    if (u != JIT_FRET)
+       jit_movr_d(JIT_FRET, u);
+    else
+       jit_live(JIT_FRET);
+    jit_ret();
+    jit_dec_synth();
+}
+
+void
+_jit_reti_d(jit_state_t *_jit, jit_float64_t u)
+{
+    jit_inc_synth_d(reti_d, u);
+    jit_movi_d(JIT_FRET, u);
+    jit_ret();
+    jit_dec_synth();
+}
+
+void
+_jit_epilog(jit_state_t *_jit)
+{
+    assert(_jitc->function);
+    assert(_jitc->function->epilog->next == NULL);
+    jit_link(_jitc->function->epilog);
+    _jitc->function = NULL;
+}
+
+jit_bool_t
+_jit_arg_register_p(jit_state_t *_jit, jit_node_t *u)
+{
+    if (u->code == jit_code_arg)
+       return (jit_arg_reg_p(u->u.w));
+    assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d);
+    return (jit_arg_f_reg_p(u->u.w));
+}
+
+void
+_jit_ellipsis(jit_state_t *_jit)
+{
+    jit_inc_synth(ellipsis);
+    if (_jitc->prepare) {
+       jit_link_prepare();
+       assert(!(_jitc->function->call.call & jit_call_varargs));
+       _jitc->function->call.call |= jit_call_varargs;
+    }
+    else {
+       jit_link_prolog();
+       assert(!(_jitc->function->self.call & jit_call_varargs));
+       _jitc->function->self.call |= jit_call_varargs;
+
+       /* Allocate va_list like object in the stack,
+        * with enough space to save all argument
+        * registers, and use fixed offsets for them. */
+       _jitc->function->vaoff = jit_allocai(sizeof(jit_va_list_t));
+
+       /* Initialize gp offset in save area. */
+       if (jit_arg_reg_p(_jitc->function->self.argi))
+           _jitc->function->vagp = (8 - _jitc->function->self.argi) * -8;
+       else
+           _jitc->function->vagp = 0;
+
+       /* Initialize fp offset in save area. */
+       if (jit_arg_f_reg_p(_jitc->function->self.argf))
+           _jitc->function->vafp = (8 - _jitc->function->self.argf) * -16;
+       else
+           _jitc->function->vafp = 0;
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_va_push(jit_state_t *_jit, jit_int32_t u)
+{
+    jit_inc_synth_w(va_push, u);
+    jit_pushargr(u);
+    jit_dec_synth();
+}
+
+jit_node_t *
+_jit_arg(jit_state_t *_jit)
+{
+    jit_node_t         *node;
+    jit_int32_t                 offset;
+    assert(_jitc->function);
+    assert(!(_jitc->function->self.call & jit_call_varargs));
+    if (jit_arg_reg_p(_jitc->function->self.argi))
+       offset = _jitc->function->self.argi++;
+    else {
+       offset = _jitc->function->self.size;
+       _jitc->function->self.size += sizeof(jit_word_t);
+    }
+    node = jit_new_node_ww(jit_code_arg, offset,
+                          ++_jitc->function->self.argn);
+    jit_link_prolog();
+    return (node);
+}
+
+jit_node_t *
+_jit_arg_f(jit_state_t *_jit)
+{
+    jit_node_t         *node;
+    jit_int32_t                 offset;
+    assert(_jitc->function);
+    assert(!(_jitc->function->self.call & jit_call_varargs));
+    if (jit_arg_f_reg_p(_jitc->function->self.argf))
+       offset = _jitc->function->self.argf++;
+    else {
+       offset = _jitc->function->self.size;
+       _jitc->function->self.size += sizeof(jit_word_t);
+    }
+    node = jit_new_node_ww(jit_code_arg_f, offset,
+                          ++_jitc->function->self.argn);
+    jit_link_prolog();
+    return (node);
+}
+
+jit_node_t *
+_jit_arg_d(jit_state_t *_jit)
+{
+    jit_node_t         *node;
+    jit_int32_t                 offset;
+    assert(_jitc->function);
+    assert(!(_jitc->function->self.call & jit_call_varargs));
+    if (jit_arg_f_reg_p(_jitc->function->self.argf))
+       offset = _jitc->function->self.argf++;
+    else {
+       offset = _jitc->function->self.size;
+       _jitc->function->self.size += sizeof(jit_word_t);
+    }
+    node = jit_new_node_ww(jit_code_arg_d, offset,
+                          ++_jitc->function->self.argn);
+    jit_link_prolog();
+    return (node);
+}
+
+void
+_jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_c, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_extr_c(u, JIT_RA0 - v->u.w);
+    else
+       jit_ldxi_c(u, JIT_FP, v->u.w);
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_uc, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_extr_uc(u, JIT_RA0 - v->u.w);
+    else
+       jit_ldxi_uc(u, JIT_FP, v->u.w);
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_s, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_extr_s(u, JIT_RA0 - v->u.w);
+    else
+       jit_ldxi_s(u, JIT_FP, v->u.w);
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_us, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_extr_us(u, JIT_RA0 - v->u.w);
+    else
+       jit_ldxi_us(u, JIT_FP, v->u.w);
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_i, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_extr_i(u, JIT_RA0 - v->u.w);
+    else
+       jit_ldxi_i(u, JIT_FP, v->u.w);
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_ui, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_extr_ui(u, JIT_RA0 - v->u.w);
+    else
+       jit_ldxi_ui(u, JIT_FP, v->u.w);
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_l, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_movr(u, JIT_RA0 - v->u.w);
+    else
+       jit_ldxi_l(u, JIT_FP, v->u.w);
+    jit_dec_synth();
+}
+
+void
+_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(putargr, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_movr(JIT_RA0 - v->u.w, u);
+    else
+       jit_stxi(v->u.w, JIT_FP, u);
+    jit_dec_synth();
+}
+
+void
+_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v)
+{
+    jit_int32_t                regno;
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(putargi, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_movi(JIT_RA0 - v->u.w, u);
+    else {
+       regno = jit_get_reg(jit_class_gpr);
+       jit_movi(regno, u);
+       jit_stxi(v->u.w, JIT_FP, regno);
+       jit_unget_reg(regno);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg_f);
+    jit_inc_synth_wp(getarg_f, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_movr_f(u, JIT_FA0 - v->u.w);
+    else
+       jit_ldxi_f(u, JIT_FP, v->u.w);
+    jit_dec_synth();
+}
+
+void
+_jit_putargr_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg_f);
+    jit_inc_synth_wp(putargr_f, u, v);
+    if (jit_arg_f_reg_p(v->u.w))
+       jit_movr_f(JIT_FA0 - v->u.w, u);
+    else
+       jit_stxi_f(v->u.w, JIT_FP, u);
+    jit_dec_synth();
+}
+
+void
+_jit_putargi_f(jit_state_t *_jit, jit_float32_t u, jit_node_t *v)
+{
+    jit_int32_t                regno;
+    assert(v->code == jit_code_arg_f);
+    jit_inc_synth_fp(putargi_f, u, v);
+    if (jit_arg_f_reg_p(v->u.w))
+       jit_movi_f(JIT_FA0 - v->u.w, u);
+    else {
+       regno = jit_get_reg(jit_class_fpr);
+       jit_movi_f(regno, u);
+       jit_stxi_f(v->u.w, JIT_FP, regno);
+       jit_unget_reg(regno);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg_d);
+    jit_inc_synth_wp(getarg_d, u, v);
+    if (jit_arg_f_reg_p(v->u.w))
+       jit_movr_d(u, JIT_FA0 - v->u.w);
+    else
+       jit_ldxi_d(u, JIT_FP, v->u.w);
+    jit_dec_synth();
+}
+
+void
+_jit_putargr_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg_d);
+    jit_inc_synth_wp(putargr_d, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_movr_d(JIT_FA0 - v->u.w, u);
+    else
+       jit_stxi_d(v->u.w, JIT_FP, u);
+    jit_dec_synth();
+}
+
+void
+_jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v)
+{
+    jit_int32_t                regno;
+    assert(v->code == jit_code_arg_d);
+    jit_inc_synth_dp(putargi_d, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_movi_d(JIT_FA0 - v->u.w, u);
+    else {
+       regno = jit_get_reg(jit_class_fpr);
+       jit_movi_d(regno, u);
+       jit_stxi_d(v->u.w, JIT_FP, regno);
+       jit_unget_reg(regno);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_pushargr(jit_state_t *_jit, jit_int32_t u)
+{
+    assert(_jitc->function);
+    jit_inc_synth_w(pushargr, u);
+    jit_link_prepare();
+    if (jit_arg_reg_p(_jitc->function->call.argi)) {
+       jit_movr(JIT_RA0 - _jitc->function->call.argi, u);
+       ++_jitc->function->call.argi;
+    }
+    else {
+       jit_stxi(_jitc->function->call.size, JIT_SP, u);
+       _jitc->function->call.size += sizeof(jit_word_t);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_pushargi(jit_state_t *_jit, jit_word_t u)
+{
+    jit_int32_t                 regno;
+    assert(_jitc->function);
+    jit_inc_synth_w(pushargi, u);
+    jit_link_prepare();
+    if (jit_arg_reg_p(_jitc->function->call.argi)) {
+       jit_movi(JIT_RA0 - _jitc->function->call.argi, u);
+       ++_jitc->function->call.argi;
+    }
+    else {
+       regno = jit_get_reg(jit_class_gpr);
+       jit_movi(regno, u);
+       jit_stxi(_jitc->function->call.size, JIT_SP, regno);
+       jit_unget_reg(regno);
+       _jitc->function->call.size += sizeof(jit_word_t);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_pushargr_f(jit_state_t *_jit, jit_int32_t u)
+{
+    assert(_jitc->function);
+    jit_inc_synth_w(pushargr_f, u);
+    jit_link_prepare();
+    if (jit_arg_f_reg_p(_jitc->function->call.argf)) {
+       jit_movr_f(JIT_FA0 - _jitc->function->call.argf, u);
+       ++_jitc->function->call.argf;
+    }
+    else {
+       jit_stxi_f(_jitc->function->call.size, JIT_SP, u);
+       _jitc->function->call.size += sizeof(jit_word_t);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_pushargi_f(jit_state_t *_jit, jit_float32_t u)
+{
+    jit_int32_t                regno;
+    assert(_jitc->function);
+    jit_inc_synth_f(pushargi_f, u);
+    jit_link_prepare();
+    if (jit_arg_f_reg_p(_jitc->function->call.argf)) {
+       jit_movi_f(JIT_FA0 - _jitc->function->call.argf, u);
+       ++_jitc->function->call.argf;
+    }
+    else {
+       regno = jit_get_reg(jit_class_fpr);
+       jit_movi_f(regno, u);
+       jit_stxi_f(_jitc->function->call.size, JIT_SP, regno);
+       jit_unget_reg(regno);
+       _jitc->function->call.size += sizeof(jit_word_t);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_pushargr_d(jit_state_t *_jit, jit_int32_t u)
+{
+    assert(_jitc->function);
+    jit_inc_synth_w(pushargr_d, u);
+    jit_link_prepare();
+    if (jit_arg_f_reg_p(_jitc->function->call.argf)) {
+       jit_movr_d(JIT_FA0 - _jitc->function->call.argf, u);
+       ++_jitc->function->call.argf;
+    }
+    else {
+       jit_stxi_d(_jitc->function->call.size, JIT_SP, u);
+       _jitc->function->call.size += sizeof(jit_word_t);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_pushargi_d(jit_state_t *_jit, jit_float64_t u)
+{
+    jit_int32_t                regno;
+    assert(_jitc->function);
+    jit_inc_synth_d(pushargi_d, u);
+    jit_link_prepare();
+    if (jit_arg_f_reg_p(_jitc->function->call.argf)) {
+       jit_movi_d(JIT_FA0 - _jitc->function->call.argf, u);
+       ++_jitc->function->call.argf;
+    }
+    else {
+       regno = jit_get_reg(jit_class_fpr);
+       jit_movi_d(regno, u);
+       jit_stxi_d(_jitc->function->call.size, JIT_SP, regno);
+       jit_unget_reg(regno);
+       _jitc->function->call.size += sizeof(jit_word_t);
+    }
+    jit_dec_synth();
+}
+
+jit_bool_t
+_jit_regarg_p(jit_state_t *_jit, jit_node_t *node, jit_int32_t regno)
+{
+    jit_int32_t                spec;
+    spec = jit_class(_rvs[regno].spec);
+    if (spec & jit_class_arg) {
+       regno = JIT_RA0 - regno;
+       if (regno >= 0 && regno < node->v.w)
+           return (1);
+       if (spec & jit_class_fpr) {
+           regno = JIT_FA0 - regno;
+           if (regno >= 0 && regno < node->w.w)
+               return (1);
+       }
+    }
+
+    return (0);
+}
+
+void
+_jit_finishr(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_node_t         *node;
+    assert(_jitc->function);
+    jit_inc_synth_w(finishr, r0);
+    if (_jitc->function->self.alen < _jitc->function->call.size)
+       _jitc->function->self.alen = _jitc->function->call.size;
+    node = jit_callr(r0);
+    node->v.w = _jitc->function->self.argi;
+    node->w.w = _jitc->function->call.argf;
+    _jitc->function->call.argi = _jitc->function->call.argf =
+       _jitc->function->call.size = 0;
+    _jitc->prepare = 0;
+    jit_dec_synth();
+}
+
+jit_node_t *
+_jit_finishi(jit_state_t *_jit, jit_pointer_t i0)
+{
+    jit_node_t         *node;
+    assert(_jitc->function);
+    jit_inc_synth_w(finishi, (jit_word_t)i0);
+    if (_jitc->function->self.alen < _jitc->function->call.size)
+       _jitc->function->self.alen = _jitc->function->call.size;
+    node = jit_calli(i0);
+    node->v.w = _jitc->function->call.argi;
+    node->w.w = _jitc->function->call.argf;
+    _jitc->function->call.argi = _jitc->function->call.argf =
+       _jitc->function->call.size = 0;
+    _jitc->prepare = 0;
+    jit_dec_synth();
+    return (node);
+}
+
+void
+_jit_retval_c(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_c, r0);
+    jit_extr_c(r0, JIT_RET);
+    jit_dec_synth();
+}
+
+void
+_jit_retval_uc(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_uc, r0);
+    jit_extr_uc(r0, JIT_RET);
+    jit_dec_synth();
+}
+
+void
+_jit_retval_s(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_s, r0);
+    jit_extr_s(r0, JIT_RET);
+    jit_dec_synth();
+}
+
+void
+_jit_retval_us(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_us, r0);
+    jit_extr_us(r0, JIT_RET);
+    jit_dec_synth();
+}
+
+void
+_jit_retval_i(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_i, r0);
+    jit_extr_i(r0, JIT_RET);
+    jit_dec_synth();
+}
+
+void
+_jit_retval_ui(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_ui, r0);
+    jit_extr_ui(r0, JIT_RET);
+    jit_dec_synth();
+}
+
+void
+_jit_retval_l(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_l, r0);
+    if (r0 != JIT_RET)
+       jit_movr(r0, JIT_RET);
+    jit_dec_synth();
+}
+
+void
+_jit_retval_f(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_f, r0);
+    if (r0 != JIT_FRET)
+       jit_movr_f(r0, JIT_FRET);
+    jit_dec_synth();
+}
+
+void
+_jit_retval_d(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_d, r0);
+    if (r0 != JIT_FRET)
+       jit_movr_d(r0, JIT_FRET);
+    jit_dec_synth();
+}
+
+jit_pointer_t
+_emit_code(jit_state_t *_jit)
+{
+    jit_node_t         *node;
+    jit_node_t         *temp;
+    jit_word_t          word;
+    jit_word_t          value;
+    jit_int32_t                 offset;
+    struct {
+       jit_node_t      *node;
+       jit_uint8_t     *data;
+       jit_word_t       word;
+#if DEVEL_DISASSEMBLER
+       jit_word_t       prevw;
+#endif
+       jit_int32_t      const_offset;
+       jit_int32_t      patch_offset;
+    } undo;
+#if DEVEL_DISASSEMBLER
+    jit_word_t          prevw;
+#endif
+
+    _jitc->function = NULL;
+
+    jit_reglive_setup();
+
+    undo.word = 0;
+    undo.node = NULL;
+    undo.const_offset = undo.patch_offset = 0;
+#  define assert_data(node)            /**/
+#define case_rr(name, type)                                            \
+           case jit_code_##name##r##type:                              \
+               name##r##type(rn(node->u.w), rn(node->v.w));            \
+               break
+#define case_rw(name, type)                                            \
+           case jit_code_##name##i##type:                              \
+               name##i##type(rn(node->u.w), node->v.w);                \
+               break
+#define case_wr(name, type)                                            \
+           case jit_code_##name##i##type:                              \
+               name##i##type(node->u.w, rn(node->v.w));                \
+               break
+#define case_rrr(name, type)                                           \
+           case jit_code_##name##r##type:                              \
+               name##r##type(rn(node->u.w),                            \
+                             rn(node->v.w), rn(node->w.w));            \
+               break
+#define case_rrrr(name, type)                                          \
+           case jit_code_##name##r##type:                              \
+               name##r##type(rn(node->u.q.l), rn(node->u.q.h),         \
+                             rn(node->v.w), rn(node->w.w));            \
+               break
+#define case_rrw(name, type)                                           \
+           case jit_code_##name##i##type:                              \
+               name##i##type(rn(node->u.w), rn(node->v.w), node->w.w); \
+               break
+#define case_rrrw(name, type)                                          \
+           case jit_code_##name##i##type:                              \
+               name##i##type(rn(node->u.q.l), rn(node->u.q.h),         \
+                             rn(node->v.w), node->w.w);                \
+               break
+#define case_rrf(name)                                                 \
+           case jit_code_##name##i_f:                                  \
+               assert_data(node);                                      \
+               name##i_f(rn(node->u.w), rn(node->v.w), node->w.f);     \
+               break
+#define case_rrd(name)                                                 \
+           case jit_code_##name##i_d:                                  \
+               assert_data(node);                                      \
+               name##i_d(rn(node->u.w), rn(node->v.w), node->w.d);     \
+               break
+#define case_wrr(name, type)                                           \
+           case jit_code_##name##i##type:                              \
+               name##i##type(node->u.w, rn(node->v.w), rn(node->w.w)); \
+               break
+#define case_brr(name, type)                                           \
+           case jit_code_##name##r##type:                              \
+               temp = node->u.n;                                       \
+               assert(temp->code == jit_code_label ||                  \
+                      temp->code == jit_code_epilog);                  \
+               if (temp->flag & jit_flag_patch)                        \
+                   name##r##type(temp->u.w, rn(node->v.w),             \
+                                 rn(node->w.w));                       \
+               else {                                                  \
+                   word = name##r##type(_jit->pc.w,                    \
+                                        rn(node->v.w), rn(node->w.w)); \
+                   patch(word, node);                                  \
+               }                                                       \
+               break
+#define case_brw(name, type)                                           \
+           case jit_code_##name##i##type:                              \
+               temp = node->u.n;                                       \
+               assert(temp->code == jit_code_label ||                  \
+                      temp->code == jit_code_epilog);                  \
+               if (temp->flag & jit_flag_patch)                        \
+                   name##i##type(temp->u.w,                            \
+                                 rn(node->v.w), node->w.w);            \
+               else {                                                  \
+                   word = name##i##type(_jit->pc.w,                    \
+                                        rn(node->v.w), node->w.w);     \
+                   patch(word, node);                                  \
+               }                                                       \
+               break;
+#define case_brf(name)                                                 \
+           case jit_code_##name##i_f:                                  \
+               temp = node->u.n;                                       \
+               assert(temp->code == jit_code_label ||                  \
+                      temp->code == jit_code_epilog);                  \
+               if (temp->flag & jit_flag_patch)                        \
+                   name##i_f(temp->u.w, rn(node->v.w), node->w.f);     \
+               else {                                                  \
+                   word = name##i_f(_jit->pc.w, rn(node->v.w),         \
+                                    node->w.f);                        \
+                   patch(word, node);                                  \
+               }                                                       \
+               break
+#define case_brd(name)                                                 \
+           case jit_code_##name##i_d:                                  \
+               temp = node->u.n;                                       \
+               assert(temp->code == jit_code_label ||                  \
+                      temp->code == jit_code_epilog);                  \
+               if (temp->flag & jit_flag_patch)                        \
+                   name##i_d(temp->u.w, rn(node->v.w), node->w.d);     \
+               else {                                                  \
+                   word = name##i_d(_jit->pc.w, rn(node->v.w),         \
+                                    node->w.d);                        \
+                   patch(word, node);                                  \
+               }                                                       \
+               break
+#if DEVEL_DISASSEMBLER
+    prevw = _jit->pc.w;
+#endif
+    for (node = _jitc->head; node; node = node->next) {
+       if (_jit->pc.uc >= _jitc->code.end)
+           return (NULL);
+
+#if DEVEL_DISASSEMBLER
+       node->offset = (jit_uword_t)_jit->pc.w - (jit_uword_t)prevw;
+       prevw = _jit->pc.w;
+#endif
+       value = jit_classify(node->code);
+       jit_regarg_set(node, value);
+       switch (node->code) {
+           case jit_code_align:
+               assert(!(node->u.w & (node->u.w - 1)) &&
+                      node->u.w <= sizeof(jit_word_t));
+               if (node->u.w == sizeof(jit_word_t) &&
+                   (word = _jit->pc.w & (sizeof(jit_word_t) - 1)))
+                   nop(sizeof(jit_word_t) - word);
+               break;
+           case jit_code_note:         case jit_code_name:
+               node->u.w = _jit->pc.w;
+               break;
+           case jit_code_label:
+               /* remember label is defined */
+               node->flag |= jit_flag_patch;
+               node->u.w = _jit->pc.w;
+               break;
+               case_rrr(add,);
+               case_rrw(add,);
+               case_rrr(addc,);
+               case_rrw(addc,);
+               case_rrr(addx,);
+               case_rrw(addx,);
+               case_rrr(sub,);
+               case_rrw(sub,);
+               case_rrr(subc,);
+               case_rrw(subc,);
+               case_rrr(subx,);
+               case_rrw(subx,);
+               case_rrw(rsb,);
+               case_rrr(mul,);
+               case_rrw(mul,);
+               case_rrrr(qmul,);
+               case_rrrw(qmul,);
+               case_rrrr(qmul, _u);
+               case_rrrw(qmul, _u);
+               case_rrr(div,);
+               case_rrw(div,);
+               case_rrr(div, _u);
+               case_rrw(div, _u);
+               case_rrrr(qdiv,);
+               case_rrrw(qdiv,);
+               case_rrrr(qdiv, _u);
+               case_rrrw(qdiv, _u);
+               case_rrr(rem,);
+               case_rrw(rem,);
+               case_rrr(rem, _u);
+               case_rrw(rem, _u);
+               case_rrr(lsh,);
+               case_rrw(lsh,);
+               case_rrr(rsh,);
+               case_rrw(rsh,);
+               case_rrr(rsh, _u);
+               case_rrw(rsh, _u);
+               case_rr(neg,);
+               case_rr(com,);
+               case_rrr(and,);
+               case_rrw(and,);
+               case_rrr(or,);
+               case_rrw(or,);
+               case_rrr(xor,);
+               case_rrw(xor,);
+               case_rr(trunc, _f_i);
+               case_rr(trunc, _d_i);
+               case_rr(trunc, _f_l);
+               case_rr(trunc, _d_l);
+               case_rr(ld, _c);
+               case_rw(ld, _c);
+               case_rr(ld, _uc);
+               case_rw(ld, _uc);
+               case_rr(ld, _s);
+               case_rw(ld, _s);
+               case_rr(ld, _us);
+               case_rw(ld, _us);
+               case_rr(ld, _i);
+               case_rw(ld, _i);
+               case_rr(ld, _ui);
+               case_rw(ld, _ui);
+               case_rr(ld, _l);
+               case_rw(ld, _l);
+               case_rrr(ldx, _c);
+               case_rrw(ldx, _c);
+               case_rrr(ldx, _uc);
+               case_rrw(ldx, _uc);
+               case_rrr(ldx, _s);
+               case_rrw(ldx, _s);
+               case_rrr(ldx, _us);
+               case_rrw(ldx, _us);
+               case_rrr(ldx, _i);
+               case_rrw(ldx, _i);
+               case_rrr(ldx, _ui);
+               case_rrw(ldx, _ui);
+               case_rrr(ldx, _l);
+               case_rrw(ldx, _l);
+               case_rr(st, _c);
+               case_wr(st, _c);
+               case_rr(st, _s);
+               case_wr(st, _s);
+               case_rr(st, _i);
+               case_wr(st, _i);
+               case_rr(st, _l);
+               case_wr(st, _l);
+               case_rrr(stx, _c);
+               case_wrr(stx, _c);
+               case_rrr(stx, _s);
+               case_wrr(stx, _s);
+               case_rrr(stx, _i);
+               case_wrr(stx, _i);
+               case_rrr(stx, _l);
+               case_wrr(stx, _l);
+               case_rr(hton, _us);
+               case_rr(hton, _ui);
+               case_rr(hton, _ul);
+               case_rr(ext, _c);
+               case_rr(ext, _uc);
+               case_rr(ext, _s);
+               case_rr(ext, _us);
+               case_rr(ext, _i);
+               case_rr(ext, _ui);
+               case_rr(mov,);
+           case jit_code_movi:
+               if (node->flag & jit_flag_node) {
+                   temp = node->v.n;
+                   if (temp->code == jit_code_data ||
+                       (temp->code == jit_code_label &&
+                        (temp->flag & jit_flag_patch)))
+                       movi(rn(node->u.w), temp->u.w);
+                   else {
+                       assert(temp->code == jit_code_label ||
+                              temp->code == jit_code_epilog);
+                       word = movi_p(rn(node->u.w), temp->u.w);
+                       patch(word, node);
+                   }
+               }
+               else
+                   movi(rn(node->u.w), node->v.w);
+               break;
+               case_rrr(lt,);
+               case_rrw(lt,);
+               case_rrr(lt, _u);
+               case_rrw(lt, _u);
+               case_rrr(le,);
+               case_rrw(le,);
+               case_rrr(le, _u);
+               case_rrw(le, _u);
+               case_rrr(eq,);
+               case_rrw(eq,);
+               case_rrr(ge,);
+               case_rrw(ge,);
+               case_rrr(ge, _u);
+               case_rrw(ge, _u);
+               case_rrr(gt,);
+               case_rrw(gt,);
+               case_rrr(gt, _u);
+               case_rrw(gt, _u);
+               case_rrr(ne,);
+               case_rrw(ne,);
+               case_brr(blt,);
+               case_brw(blt,);
+               case_brr(blt, _u);
+               case_brw(blt, _u);
+               case_brr(ble,);
+               case_brw(ble,);
+               case_brr(ble, _u);
+               case_brw(ble, _u);
+               case_brr(beq,);
+               case_brw(beq,);
+               case_brr(bge,);
+               case_brw(bge,);
+               case_brr(bge, _u);
+               case_brw(bge, _u);
+               case_brr(bgt,);
+               case_brw(bgt,);
+               case_brr(bgt, _u);
+               case_brw(bgt, _u);
+               case_brr(bne,);
+               case_brw(bne,);
+               case_brr(boadd,);
+               case_brw(boadd,);
+               case_brr(boadd, _u);
+               case_brw(boadd, _u);
+               case_brr(bxadd,);
+               case_brw(bxadd,);
+               case_brr(bxadd, _u);
+               case_brw(bxadd, _u);
+               case_brr(bosub,);
+               case_brw(bosub,);
+               case_brr(bosub, _u);
+               case_brw(bosub, _u);
+               case_brr(bxsub,);
+               case_brw(bxsub,);
+               case_brr(bxsub, _u);
+               case_brw(bxsub, _u);
+               case_brr(bms,);
+               case_brw(bms,);
+               case_brr(bmc,);
+               case_brw(bmc,);
+               case_rrr(add, _f);
+               case_rrf(add);
+               case_rrr(sub, _f);
+               case_rrf(sub);
+               case_rrf(rsb);
+               case_rrr(mul, _f);
+               case_rrf(mul);
+               case_rrr(div, _f);
+               case_rrf(div);
+               case_rr(abs, _f);
+               case_rr(neg, _f);
+               case_rr(sqrt, _f);
+               case_rr(ext, _f);
+               case_rr(ld, _f);
+               case_rw(ld, _f);
+               case_rrr(ldx, _f);
+               case_rrw(ldx, _f);
+               case_rr(st, _f);
+               case_wr(st, _f);
+               case_rrr(stx, _f);
+               case_wrr(stx, _f);
+               case_rr(mov, _f);
+           case jit_code_movi_f:
+               assert_data(node);
+               movi_f(rn(node->u.w), node->v.f);
+               break;
+               case_rr(ext, _d_f);
+               case_rrr(lt, _f);
+               case_rrf(lt);
+               case_rrr(le, _f);
+               case_rrf(le);
+               case_rrr(eq, _f);
+               case_rrf(eq);
+               case_rrr(ge, _f);
+               case_rrf(ge);
+               case_rrr(gt, _f);
+               case_rrf(gt);
+               case_rrr(ne, _f);
+               case_rrf(ne);
+               case_rrr(unlt, _f);
+               case_rrf(unlt);
+               case_rrr(unle, _f);
+               case_rrf(unle);
+               case_rrr(uneq, _f);
+               case_rrf(uneq);
+               case_rrr(unge, _f);
+               case_rrf(unge);
+               case_rrr(ungt, _f);
+               case_rrf(ungt);
+               case_rrr(ltgt, _f);
+               case_rrf(ltgt);
+               case_rrr(ord, _f);
+               case_rrf(ord);
+               case_rrr(unord, _f);
+               case_rrf(unord);
+               case_brr(blt, _f);
+               case_brf(blt);
+               case_brr(ble, _f);
+               case_brf(ble);
+               case_brr(beq, _f);
+               case_brf(beq);
+               case_brr(bge, _f);
+               case_brf(bge);
+               case_brr(bgt, _f);
+               case_brf(bgt);
+               case_brr(bne, _f);
+               case_brf(bne);
+               case_brr(bunlt, _f);
+               case_brf(bunlt);
+               case_brr(bunle, _f);
+               case_brf(bunle);
+               case_brr(buneq, _f);
+               case_brf(buneq);
+               case_brr(bunge, _f);
+               case_brf(bunge);
+               case_brr(bungt, _f);
+               case_brf(bungt);
+               case_brr(bltgt, _f);
+               case_brf(bltgt);
+               case_brr(bord, _f);
+               case_brf(bord);
+               case_brr(bunord, _f);
+               case_brf(bunord);
+               case_rrr(add, _d);
+               case_rrd(add);
+               case_rrr(sub, _d);
+               case_rrd(sub);
+               case_rrd(rsb);
+               case_rrr(mul, _d);
+               case_rrd(mul);
+               case_rrr(div, _d);
+               case_rrd(div);
+               case_rr(abs, _d);
+               case_rr(neg, _d);
+               case_rr(sqrt, _d);
+               case_rr(ext, _d);
+               case_rr(ld, _d);
+               case_rw(ld, _d);
+               case_rrr(ldx, _d);
+               case_rrw(ldx, _d);
+               case_rr(st, _d);
+               case_wr(st, _d);
+               case_rrr(stx, _d);
+               case_wrr(stx, _d);
+               case_rr(mov, _d);
+           case jit_code_movi_d:
+               assert_data(node);
+               movi_d(rn(node->u.w), node->v.d);
+               break;
+               case_rr(ext, _f_d);
+               case_rrr(lt, _d);
+               case_rrd(lt);
+               case_rrr(le, _d);
+               case_rrd(le);
+               case_rrr(eq, _d);
+               case_rrd(eq);
+               case_rrr(ge, _d);
+               case_rrd(ge);
+               case_rrr(gt, _d);
+               case_rrd(gt);
+               case_rrr(ne, _d);
+               case_rrd(ne);
+               case_rrr(unlt, _d);
+               case_rrd(unlt);
+               case_rrr(unle, _d);
+               case_rrd(unle);
+               case_rrr(uneq, _d);
+               case_rrd(uneq);
+               case_rrr(unge, _d);
+               case_rrd(unge);
+               case_rrr(ungt, _d);
+               case_rrd(ungt);
+               case_rrr(ltgt, _d);
+               case_rrd(ltgt);
+               case_rrr(ord, _d);
+               case_rrd(ord);
+               case_rrr(unord, _d);
+               case_rrd(unord);
+               case_brr(blt, _d);
+               case_brd(blt);
+               case_brr(ble, _d);
+               case_brd(ble);
+               case_brr(beq, _d);
+               case_brd(beq);
+               case_brr(bge, _d);
+               case_brd(bge);
+               case_brr(bgt, _d);
+               case_brd(bgt);
+               case_brr(bne, _d);
+               case_brd(bne);
+               case_brr(bunlt, _d);
+               case_brd(bunlt);
+               case_brr(bunle, _d);
+               case_brd(bunle);
+               case_brr(buneq, _d);
+               case_brd(buneq);
+               case_brr(bunge, _d);
+               case_brd(bunge);
+               case_brr(bungt, _d);
+               case_brd(bungt);
+               case_brr(bltgt, _d);
+               case_brd(bltgt);
+               case_brr(bord, _d);
+               case_brd(bord);
+               case_brr(bunord, _d);
+               case_brd(bunord);
+           case jit_code_jmpr:
+               jmpr(rn(node->u.w));
+               break;
+           case jit_code_jmpi:
+               if (node->flag & jit_flag_node) {
+                   temp = node->u.n;
+                   assert(temp->code == jit_code_label ||
+                          temp->code == jit_code_epilog);
+                   if (temp->flag & jit_flag_patch)
+                       jmpi(temp->u.w);
+                   else {
+                       word = jmpi_p(_jit->pc.w);
+                       patch(word, node);
+                   }
+               }
+               else
+                   jmpi(node->u.w);
+               break;
+           case jit_code_callr:
+               callr(rn(node->u.w));
+               break;
+           case jit_code_calli:
+               if (node->flag & jit_flag_node) {
+                   temp = node->u.n;
+                   assert(temp->code == jit_code_label ||
+                          temp->code == jit_code_epilog);
+                   if (temp->flag & jit_flag_patch)
+                       calli(temp->u.w);
+                   else {
+                       word = calli_p(_jit->pc.w);
+                       patch(word, node);
+                   }
+               }
+               else
+                   calli(node->u.w);
+               break;
+           case jit_code_prolog:
+               _jitc->function = _jitc->functions.ptr + node->w.w;
+               undo.node = node;
+               undo.word = _jit->pc.w;
+#if DEVEL_DISASSEMBLER
+               undo.prevw = prevw;
+#endif
+               undo.patch_offset = _jitc->patches.offset;
+           restart_function:
+               _jitc->again = 0;
+               prolog(node);
+               break;
+           case jit_code_epilog:
+               assert(_jitc->function == _jitc->functions.ptr + node->w.w);
+               if (_jitc->again) {
+                   for (temp = undo.node->next;
+                        temp != node; temp = temp->next) {
+                       if (temp->code == jit_code_label ||
+                           temp->code == jit_code_epilog)
+                           temp->flag &= ~jit_flag_patch;
+                   }
+                   temp->flag &= ~jit_flag_patch;
+                   node = undo.node;
+                   _jit->pc.w = undo.word;
+#if DEVEL_DISASSEMBLER
+                   prevw = undo.prevw;
+#endif
+                   _jitc->patches.offset = undo.patch_offset;
+                   goto restart_function;
+               }
+               /* remember label is defined */
+               node->flag |= jit_flag_patch;
+               node->u.w = _jit->pc.w;
+               epilog(node);
+               _jitc->function = NULL;
+               break;
+           case jit_code_va_start:
+               vastart(rn(node->u.w));
+               break;
+           case jit_code_va_arg:
+               vaarg(rn(node->u.w), rn(node->v.w));
+               break;
+           case jit_code_va_arg_d:
+               vaarg_d(rn(node->u.w), rn(node->v.w));
+               break;
+           case jit_code_live:                 case jit_code_ellipsis:
+           case jit_code_va_push:
+           case jit_code_allocai:              case jit_code_allocar:
+           case jit_code_arg:
+           case jit_code_arg_f:                case jit_code_arg_d:
+           case jit_code_va_end:
+           case jit_code_ret:
+           case jit_code_retr:                 case jit_code_reti:
+           case jit_code_retr_f:               case jit_code_reti_f:
+           case jit_code_retr_d:               case jit_code_reti_d:
+           case jit_code_getarg_c:             case jit_code_getarg_uc:
+           case jit_code_getarg_s:             case jit_code_getarg_us:
+           case jit_code_getarg_i:             case jit_code_getarg_ui:
+           case jit_code_getarg_l:
+           case jit_code_getarg_f:             case jit_code_getarg_d:
+           case jit_code_putargr:              case jit_code_putargi:
+           case jit_code_putargr_f:            case jit_code_putargi_f:
+           case jit_code_putargr_d:            case jit_code_putargi_d:
+           case jit_code_pushargr:             case jit_code_pushargi:
+           case jit_code_pushargr_f:           case jit_code_pushargi_f:
+           case jit_code_pushargr_d:           case jit_code_pushargi_d:
+           case jit_code_retval_c:             case jit_code_retval_uc:
+           case jit_code_retval_s:             case jit_code_retval_us:
+           case jit_code_retval_i:
+           case jit_code_retval_ui:            case jit_code_retval_l:
+           case jit_code_retval_f:             case jit_code_retval_d:
+           case jit_code_prepare:
+           case jit_code_finishr:              case jit_code_finishi:
+               break;
+           default:
+               abort();
+       }
+       jit_regarg_clr(node, value);
+       assert(_jitc->regarg == 0 && _jitc->synth == 0);
+       /* update register live state */
+       jit_reglive(node);
+    }
+#undef case_brw
+#undef case_brr
+#undef case_wrr
+#undef case_rrw
+#undef case_rrr
+#undef case_wr
+#undef case_rw
+#undef case_rr
+
+    for (offset = 0; offset < _jitc->patches.offset; offset++) {
+       node = _jitc->patches.ptr[offset].node;
+       word = _jitc->patches.ptr[offset].inst;
+       value = node->code == jit_code_movi ? node->v.n->u.w : node->u.n->u.w;
+       patch_at(word, value);
+    }
+
+    jit_flush(_jit->code.ptr, _jit->pc.uc);
+
+    return (_jit->code.ptr);
+}
+
+#define CODE                           1
+#  include "jit_aarch64-cpu.c"
+#  include "jit_aarch64-fpu.c"
+#undef CODE
+
+void
+jit_flush(void *fptr, void *tptr)
+{
+#if defined(__GNUC__)
+    jit_word_t         f, t, s;
+
+    s = sysconf(_SC_PAGE_SIZE);
+    f = (jit_word_t)fptr & -s;
+    t = (((jit_word_t)tptr) + s - 1) & -s;
+    __clear_cache((void *)f, (void *)t);
+#endif
+}
+
+void
+_emit_ldxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    ldxi(rn(r0), rn(r1), i0);
+}
+
+void
+_emit_stxi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    stxi(i0, rn(r0), rn(r1));
+}
+
+void
+_emit_ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    ldxi_d(rn(r0), rn(r1), i0);
+}
+
+void
+_emit_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    stxi_d(i0, rn(r0), rn(r1));
+}
+
+static void
+_patch(jit_state_t *_jit, jit_word_t instr, jit_node_t *node)
+{
+    jit_int32_t                 flag;
+
+    assert(node->flag & jit_flag_node);
+    if (node->code == jit_code_movi)
+       flag = node->v.n->flag;
+    else
+       flag = node->u.n->flag;
+    assert(!(flag & jit_flag_patch));
+    if (_jitc->patches.offset >= _jitc->patches.length) {
+       jit_realloc((jit_pointer_t *)&_jitc->patches.ptr,
+                   _jitc->patches.length * sizeof(jit_patch_t),
+                   (_jitc->patches.length + 1024) * sizeof(jit_patch_t));
+       _jitc->patches.length += 1024;
+    }
+    _jitc->patches.ptr[_jitc->patches.offset].inst = instr;
+    _jitc->patches.ptr[_jitc->patches.offset].node = node;
+    ++_jitc->patches.offset;
+}
diff --git a/deps/lightning/lib/jit_alpha-cpu.c b/deps/lightning/lib/jit_alpha-cpu.c
new file mode 100644 (file)
index 0000000..8bfef9c
--- /dev/null
@@ -0,0 +1,2792 @@
+/*
+ * Copyright (C) 2014-2019  Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+#if PROTO
+#  define _u2(v)                       ((v) & 0x3)
+#  define _s2_p(v)                     ((v) >= -0x2 && (v) <= 0x1)
+#  define _u2_p(v)                     ((v) >= 0 && (v) <= 0x3)
+#  define _u5(v)                       ((v) & 0x1f)
+#  define _s5_p(v)                     ((v) >= -0x10 && (v) <= 0xf)
+#  define _u5_p(v)                     ((v) >= 0 && (v) <= 0x1f)
+#  define _u6(v)                       ((v) & 0x3f)
+#  define _s6_p(v)                     ((v) >= -0x20 && (v) <= 0x1f)
+#  define _u6_p(v)                     ((v) >= 0 && (v) <= 0x3f)
+#  define _u7(v)                       ((v) & 0x7f)
+#  define _s7_p(v)                     ((v) >= -0x40 && (v) <= 0x3f)
+#  define _u7_p(v)                     ((v) >= 0 && (v) <= 0x7f)
+#  define _u8(v)                       ((v) & 0xff)
+#  define _s8_p(v)                     ((v) >= -0x80 && (v) <= 0x7f)
+#  define _u8_p(v)                     ((v) >= 0 && (v) <= 0xff)
+#  define _u11(v)                      ((v) & 0x7ff)
+#  define _s11_p(v)                    ((v) >= -0x400 && (v) <= 0x3ff)
+#  define _u11_p(v)                    ((v) >= 0 && (v) <= 0x7ff)
+#  define _u14(v)                      ((v) & 0x3fff)
+#  define _s14_p(v)                    ((v) >= -0x2000 && (v) <= 0x1fff)
+#  define _u14_p(v)                    ((v) >= 0 && (v) <= 0x3fff)
+#  define _u16(v)                      ((v) & 0xffff)
+#  define _s16_p(v)                    ((v) >= -0x8000 && (v) <= 0x7fff)
+#  define _u16_p(v)                    ((v) >= 0 && (v) <= 0xffff)
+#  define _u21(v)                      ((v) & 0x1fffff)
+#  define _s21_p(v)                    ((v) >= -0x100000 && (v) <= 0xfffff)
+#  define _u21_p(v)                    ((v) >= 0 && (v) <= 0x1fffff)
+#  define _u26(v)                      ((v) & 0x3ffffff)
+#  define _s26_p(v)                    ((v) >= -0x2000000 && (v) <= 0x1ffffff)
+#  define _u26_p(v)                    ((v) >= 0 && (v) <= 0x3ffffff)
+#  define _u32(v)                      ((v) & 0xffffffff)
+#  define _s32_p(v)                    ((v) >= -0x80000000 && (v) <= 0x7fffffff)
+#  define _u32_p(v)                    ((v) >= 0 && (v) <= 0xffffffff)
+#  define ii(i)                                *_jit->pc.ui++ = i
+#  define stack_framesize              224
+#  define _S0_REGNO                    0x09
+#  define _S1_REGNO                    0x0a
+#  define _S2_REGNO                    0x0b
+#  define _S3_REGNO                    0x0c
+#  define _S4_REGNO                    0x0d
+#  define _S5_REGNO                    0x0e
+#  define _FP_REGNO                    0x0f
+#  define _A0_REGNO                    0x10
+#  define _A1_REGNO                    0x11
+#  define _A2_REGNO                    0x12
+#  define _V0_REGNO                    0x00
+#  define _T0_REGNO                    0x01
+#  define _T1_REGNO                    0x02
+#  define _RA_REGNO                    0x1a
+#  define _PV_REGNO                    0x1b
+#  define _GP_REGNO                    0x1d
+#  define _SP_REGNO                    0x1e
+#  define _R31_REGNO                   0x1f
+#  define Pcd(o,n)                     _Pcd(_jit,o,n)
+static void _Pcd(jit_state_t*,int,unsigned int) maybe_unused;
+#  define Bra(o,ra,d)                  _Bra(_jit,o,ra,d)
+static void _Bra(jit_state_t*,int,int,int);
+#  define Mem(o,ra,rb,d)                       _Mem(_jit,o,ra,rb,d)
+static void _Mem(jit_state_t*,int,int,int,unsigned int);
+#  define Mbr(o,ra,rb,h,d)             _Mbr(_jit,o,ra,rb,h,d)
+static void _Mbr(jit_state_t*,int,int,int,int,int);
+#  define Opr(o,ra,rb,f,rc)            _Opr(_jit,o,ra,rb,f,rc)
+static void _Opr(jit_state_t*,int,int,int,unsigned int,int);
+#  define Opi(o,ra,i,f,rc)             _Opi(_jit,o,ra,i,f,rc)
+static void _Opi(jit_state_t*,int,int,unsigned int,unsigned int,int);
+#  define ADDL(ra,rb,rc)               Opr(0x10,ra,rb,0x00,rc)
+#  define ADDLi(ra,im,rc)              Opi(0x10,ra,im,0x00,rc)
+#  define ADDL_V(ra,rb,rc)             Opr(0x10,ra,rb,0x40,rc)
+#  define ADDL_Vi(ra,im,rc)            Opi(0x10,ra,im,0x40,rc)
+#  define ADDQ(ra,rb,rc)               Opr(0x10,ra,rb,0x20,rc)
+#  define ADDQi(ra,im,rc)              Opi(0x10,ra,im,0x20,rc)
+#  define ADDQ_V(ra,rb,rc)             Opr(0x10,ra,rb,0x60,rc)
+#  define ADDQ_Vi(ra,im,rc)            Opi(0x10,ra,im,0x60,rc)
+#  define AMASK(rb,rc)                 Opr(0x11,_R31_REGNO,rb,0x61,rc)
+#  define AND(ra,rb,rc)                        Opr(0x11,ra,rb,0x00,rc)
+#  define ANDi(ra,im,rc)               Opi(0x11,ra,im,0x00,rc)
+#  define BEQ(ra,d)                    Bra(0x39,ra,d)
+#  define BGE(ra,d)                    Bra(0x3e,ra,d)
+#  define BGT(ra,d)                    Bra(0x3f,ra,d)
+#  define BIC(ra,rb,rc)                        Opr(0x11,ra,rb,0x08,rc)
+#  define ANDNOT(ra,rb,rc)             BIC(ra,rb,rc)
+#  define BICi(ra,im,rc)               Opi(0x11,ra,im,0x08,rc)
+#  define ANDNOTi(ra,im,rc)            BICi(ra,im,rc)
+#  define BIS(ra,rb,rc)                        Opr(0x11,ra,rb,0x20,rc)
+#  define BISi(ra,im,rc)               Opi(0x11,ra,im,0x20,rc)
+#  define OR(ra,rb,rc)                 BIS(ra,rb,rc)
+#  define ORi(ra,im,rc)                        BISi(ra,im,rc)
+#  define BLBC(ra,d)                   Bra(0x38,ra,d)
+#  define BLBS(ra,d)                   Bra(0x3c,ra,d)
+#  define BLE(ra,d)                    Bra(0x3b,ra,d)
+#  define BLT(ra,d)                    Bra(0x3a,ra,d)
+#  define BNE(ra,d)                    Bra(0x3d,ra,d)
+#  define BR(ra,d)                     Bra(0x30,ra,d)
+#  define BSR(ra,d)                    Bra(0x34,ra,d)
+#  define CALL_PAL(c)                  Pcd(0x00,c)
+#  define CMOVEQ(ra,rb,rc)             Opr(0x11,ra,rb,0x24,rc)
+#  define CMOVEQi(ra,im,rc)            Opi(0x11,ra,im,0x24,rc)
+#  define CMOVGE(ra,rb,rc)             Opr(0x11,ra,rb,0x46,rc)
+#  define CMOVGEi(ra,im,rc)            Opi(0x11,ra,im,0x46,rc)
+#  define CMOVGT(ra,rb,rc)             Opr(0x11,ra,rb,0x66,rc)
+#  define CMOVGTi(ra,im,rc)            Opi(0x11,ra,im,0x66,rc)
+#  define CMOVLBC(ra,rb,rc)            Opr(0x11,ra,rb,0x16,rc)
+#  define CMOVLBCi(ra,im,rc)           Opi(0x11,ra,im,0x16,rc)
+#  define CMOVLBS(ra,rb,rc)            Opr(0x11,ra,rb,0x14,rc)
+#  define CMOVLBSi(ra,im,rc)           Opi(0x11,ra,im,0x14,rc)
+#  define CMOVLE(ra,rb,rc)             Opr(0x11,ra,rb,0x64,rc)
+#  define CMOVLEi(ra,im,rc)            Opi(0x11,ra,im,0x64,rc)
+#  define CMOVLT(ra,rb,rc)             Opr(0x11,ra,rb,0x44,rc)
+#  define CMOVLTi(ra,im,rc)            Opi(0x11,ra,im,0x44,rc)
+#  define CMOVNE(ra,rb,rc)             Opr(0x11,ra,rb,0x26,rc)
+#  define CMOVNEi(ra,im,rc)            Opi(0x11,ra,im,0x26,rc)
+#  define CMPBGE(ra,rb,rc)             Opr(0x10,ra,rb,0x0f,rc)
+#  define CMPBGEi(ra,im,rc)            Opi(0x10,ra,im,0x0f,rc)
+#  define CMPEQ(ra,rb,rc)              Opr(0x10,ra,rb,0x2d,rc)
+#  define CMPEQi(ra,im,rc)             Opi(0x10,ra,im,0x2d,rc)
+#  define CMPLE(ra,rb,rc)              Opr(0x10,ra,rb,0x6d,rc)
+#  define CMPLEi(ra,im,rc)             Opi(0x10,ra,im,0x6d,rc)
+#  define CMPLT(ra,rb,rc)              Opr(0x10,ra,rb,0x4d,rc)
+#  define CMPLTi(ra,im,rc)             Opi(0x10,ra,im,0x4d,rc)
+#  define CMPULE(ra,rb,rc)             Opr(0x10,ra,rb,0x3d,rc)
+#  define CMPULEi(ra,im,rc)            Opi(0x10,ra,im,0x3d,rc)
+#  define CMPULT(ra,rb,rc)             Opr(0x10,ra,rb,0x1d,rc)
+#  define CMPULTi(ra,im,rc)            Opi(0x10,ra,im,0x1d,rc)
+#  define CTLZ(rb,rc)                  Opr(0x1c,_R31_REGNO,rb,0x32,rc)
+#  define CTPOP(rb,rc)                 Opr(0x1c,_R31_REGNO,rb,0x30,rc)
+#  define CTTZ(rb,rc)                  Opr(0x1c,_R31_REGNO,rb,0x33,rc)
+#  define ECB(rb,d)                    Mem(0x18,_R31_REGNO,rb,0xe800)
+#  define EQV(ra,rb,rc)                        Opr(0x11,ra,rb,0x48,rc)
+#  define XORNOT(ra,rb,rc)             EQV(ra,rb,rc)
+#  define EQVi(ra,im,rc)               Opi(0x11,ra,im,0x48,rc)
+#  define XORNOTi(ra,im,rc)            EQVi(ra,im,rc)
+#  define EXCB()                       Mem(0x18,0,0,0x0400)
+#  define EXTBL(ra,rb,rc)              Opr(0x12,ra,rb,0x06,rc)
+#  define EXTBLi(ra,im,rc)             Opi(0x12,ra,im,0x06,rc)
+#  define EXTLH(ra,rb,rc)              Opr(0x12,ra,rb,0x6a,rc)
+#  define EXTLHi(ra,im,rc)             Opi(0x12,ra,im,0x6a,rc)
+#  define EXTLL(ra,rb,rc)              Opr(0x12,ra,rb,0x26,rc)
+#  define EXTLLi(ra,im,rc)             Opi(0x12,ra,im,0x26,rc)
+#  define EXTQH(ra,rb,rc)              Opr(0x12,ra,rb,0x7a,rc)
+#  define EXTQHi(ra,im,rc)             Opi(0x12,ra,im,0x7a,rc)
+#  define EXTQL(ra,rb,rc)              Opr(0x12,ra,rb,0x36,rc)
+#  define EXTQLi(ra,im,rc)             Opi(0x12,ra,im,0x36,rc)
+#  define EXTWH(ra,rb,rc)              Opr(0x12,ra,rb,0x5a,rc)
+#  define EXTWHi(ra,im,rc)             Opi(0x12,ra,im,0x5a,rc)
+#  define EXTWL(ra,rb,rc)              Opr(0x12,ra,rb,0x16,rc)
+#  define EXTWLi(ra,im,rc)             Opi(0x12,ra,im,0x16,rc)
+#  define FETCH(rb,d)                  Mem(0x18,_R31_REGNO,rb,0x8000)
+#  define FETCH_Mem(rb,d)              Mem(0x18,_R31_REGNO,rb,0xa000)
+/* FIXME IMPLVER not disassembled */
+#  define IMPLVER(rc)                  Opr(0x11,_R31_REGNO,1,0x6c,rc)
+#  define INSBL(ra,rb,rc)              Opr(0x12,ra,rb,0x0b,rc)
+#  define INSBLi(ra,im,rc)             Opi(0x12,ra,im,0x0b,rc)
+#  define INSLH(ra,rb,rc)              Opr(0x12,ra,rb,0x67,rc)
+#  define INSLHi(ra,im,rc)             Opi(0x12,ra,im,0x67,rc)
+#  define INSLL(ra,rb,rc)              Opr(0x12,ra,rb,0x2b,rc)
+#  define INSLLi(ra,im,rc)             Opi(0x12,ra,im,0x2b,rc)
+#  define INSQH(ra,rb,rc)              Opr(0x12,ra,rb,0x77,rc)
+#  define INSQHi(ra,im,rc)             Opi(0x12,ra,im,0x77,rc)
+#  define INSQL(ra,rb,rc)              Opr(0x12,ra,rb,0x3b,rc)
+#  define INSQLi(ra,im,rc)             Opi(0x12,ra,im,0x3b,rc)
+#  define INSWH(ra,rb,rc)              Opr(0x12,ra,rb,0x57,rc)
+#  define INSWHi(ra,im,rc)             Opi(0x12,ra,im,0x57,rc)
+#  define INSWL(ra,rb,rc)              Opr(0x12,ra,rb,0x1b,rc)
+#  define INSWLi(ra,im,rc)             Opi(0x12,ra,im,0x1b,rc)
+#  define JMP(ra,rb,d)                 Mbr(0x1a,ra,rb,0,d)
+#  define JSR(ra,rb,d)                 Mbr(0x1a,ra,rb,1,d)
+#  define JSR_COROUTINE(ra,rb,d)       Mbr(0x1a,ra,rb,3,d)
+#  define JCR(ra,rb,rd)                        JSR_COROUTINE(ra,rb,d)
+#  define LDA(ra,rb,d)                 Mem(0x08,ra,rb,d)
+#  define LDAH(ra,rb,d)                        Mem(0x09,ra,rb,d)
+#  define LDBU(ra,rb,d)                        Mem(0x0a,ra,rb,d)
+#  define LDWU(ra,rb,d)                        Mem(0x0c,ra,rb,d)
+#  define LDL(ra,rb,d)                 Mem(0x28,ra,rb,d)
+#  define LDL_L(ra,rb,d)               Mem(0x2a,ra,rb,d)
+#  define LDQ(ra,rb,d)                 Mem(0x29,ra,rb,d)
+#  define LDQ_L(ra,rb,d)               Mem(0x2b,ra,rb,d)
+#  define LDQ_U(ra,rb,d)               Mem(0x0b,ra,rb,d)
+#  define MAXSB8(ra,rb,rc)             Opr(0x1c,ra,rb,0x3e,rc)
+#  define MAXSW4(ra,rb,rc)             Opr(0x1c,ra,rb,0x3f,rc)
+#  define MAXSUB8(ra,rb,rc)            Opr(0x1c,ra,rb,0x3c,rc)
+#  define MAXSUW4(ra,rb,rc)            Opr(0x1c,ra,rb,0x3d,rc)
+#  define MB()                         Mem(0x18,_R31_REGNO,_R31_REGNO,0x4000)
+#  define MINSB8(ra,rb,rc)             Opr(0x1c,ra,rb,0x38,rc)
+#  define MINSW4(ra,rb,rc)             Opr(0x1c,ra,rb,0x39,rc)
+#  define MINSUB8(ra,rb,rc)            Opr(0x1c,ra,rb,0x3a,rc)
+#  define MINSUW4(ra,rb,rc)            Opr(0x1c,ra,rb,0x3b,rc)
+#  define MSKBL(ra,rb,rc)              Opr(0x12,ra,rb,0x02,rc)
+#  define MSKBLi(ra,im,rc)             Opi(0x12,ra,im,0x02,rc)
+#  define MSKLH(ra,rb,rc)              Opr(0x12,ra,rb,0x62,rc)
+#  define MSKLHi(ra,im,rc)             Opi(0x12,ra,im,0x62,rc)
+#  define MSKLL(ra,rb,rc)              Opr(0x12,ra,rb,0x22,rc)
+#  define MSKLLi(ra,im,rc)             Opi(0x12,ra,im,0x22,rc)
+#  define MSKQH(ra,rb,rc)              Opr(0x12,ra,rb,0x72,rc)
+#  define MSKQHi(ra,im,rc)             Opi(0x12,ra,im,0x72,rc)
+#  define MSKQL(ra,rb,rc)              Opr(0x12,ra,rb,0x32,rc)
+#  define MSKQLi(ra,im,rc)             Opi(0x12,ra,im,0x32,rc)
+#  define MSKWH(ra,rb,rc)              Opr(0x12,ra,rb,0x52,rc)
+#  define MSKWHi(ra,im,rc)             Opi(0x12,ra,im,0x52,rc)
+#  define MSKWL(ra,rb,rc)              Opr(0x12,ra,rb,0x12,rc)
+#  define MSKWLi(ra,im,rc)             Opi(0x12,ra,im,0x12,rc)
+#  define MULL(ra,rb,rc)               Opr(0x13,ra,rb,0x00,rc)
+#  define MULLi(ra,im,rc)              Opi(0x13,ra,im,0x00,rc)
+#  define MULL_V(ra,rb,rc)             Opr(0x13,ra,rb,0x40,rc)
+#  define MULL_Vi(ra,im,rc)            Opi(0x13,ra,im,0x40,rc)
+#  define MULQ(ra,rb,rc)               Opr(0x13,ra,rb,0x20,rc)
+#  define MULQi(ra,im,rc)              Opi(0x13,ra,im,0x20,rc)
+#  define MULQ_V(ra,rb,rc)             Opr(0x13,ra,rb,0x60,rc)
+#  define MULQ_Vi(ra,im,rc)            Opi(0x13,ra,im,0x60,rc)
+#  define ORNOT(ra,rb,rc)              Opr(0x11,ra,rb,0x28,rc)
+#  define ORNOTi(ra,im,rc)             Opi(0x11,ra,im,0x28,rc)
+#  define PERR(ra,rb,rc)               Opr(0x1c,ra,rb,0x31,rc)
+#  define PKLB(rb,rc)                  Opr(0x1c,_R31_REGNO,rb,0x37,rc)
+#  define PKWB(rb,rc)                  Opr(0x1c,_R31_REGNO,rb,0x36,rc)
+/* FIXME PREFETCH* not disassembled */
+#  define PREFETCH(rb,d)               Mem(0x28,_R31_REGNO,rb,d)
+#  define PREFETCH_EN(rb,d)            Mem(0x29,_R31_REGNO,rb,d)
+#  define PREFETCH_M(rb,d)             Mem(0x22,_R31_REGNO,rb,d)
+#  define PREFETCH_MEN(rb,d)           Mem(0x23,_R31_REGNO,rb,d)
+#  define RC(ra)                       Mem(0x18,ra,_R31_REGNO,0xe000)
+#  define RET(ra,rb,d)                 Mbr(0x1a,ra,rb,2,d)
+#  define RPCC(ra)                     Mem(0x18,ra,_R31_REGNO,0xc000)
+#  define RS(ra)                       Mem(0x18,ra,_R31_REGNO,0xf000)
+#  define S4ADDL(ra,rb,rc)             Opr(0x10,ra,rb,0x02,rc)
+#  define S4ADDi(ra,im,rc)             Opi(0x10,ra,im,0x02,rc)
+#  define S4ADDQ(ra,rb,rc)             Opr(0x10,ra,rb,0x22,rc)
+#  define S4ADDQi(ra,im,rc)            Opi(0x10,ra,im,0x22,rc)
+#  define S4SUBL(ra,rb,rc)             Opr(0x10,ra,rb,0x0b,rc)
+#  define S4SUBLi(ra,im,rc)            Opi(0x10,ra,im,0x0b,rc)
+#  define S4SUBQ(ra,rb,rc)             Opr(0x10,ra,rb,0x2b,rc)
+#  define S4SUBQi(ra,im,rc)            Opi(0x10,ra,im,0x2b,rc)
+#  define S8ADDL(ra,rb,rc)             Opr(0x10,ra,rb,0x12,rc)
+#  define S8ADDLi(ra,im,rc)            Opi(0x10,ra,im,0x12,rc)
+#  define S8ADDQ(ra,rb,rc)             Opr(0x10,ra,rb,0x32,rc)
+#  define S8ADDQi(ra,im,rc)            Opi(0x10,ra,im,0x32,rc)
+#  define S8SUBL(ra,rb,rc)             Opr(0x10,ra,rb,0x1b,rc)
+#  define S8SUBLi(ra,im,rc)            Opi(0x10,ra,im,0x1b,rc)
+#  define S8SUBQ(ra,rb,rc)             Opr(0x10,ra,rb,0x3b,rc)
+#  define S8SUBQi(ra,im,rc)            Opi(0x10,ra,im,0x3b,rc)
+#  define SEXTB(rb,rc)                 Opr(0x1c,_R31_REGNO,rb,0x00,rc)
+/* FIXME not disassembled */
+#  define SEXTBi(im,rc)                        Opi(0x1c,_R31_REGNO,im,0x00,rc)
+#  define SEXTW(rb,rc)                 Opr(0x1c,_R31_REGNO,rb,0x01,rc)
+/* FIXME not disassembled */
+#  define SEXTWi(im,rc)                        Opi(0x1c,_R31_REGNO,im,0x01,rc)
+#  define SLL(ra,rb,rc)                        Opr(0x12,ra,rb,0x39,rc)
+#  define SLLi(ra,im,rc)               Opi(0x12,ra,im,0x39,rc)
+#  define SRA(ra,rb,rc)                        Opr(0x12,ra,rb,0x3c,rc)
+#  define SRAi(ra,im,rc)               Opi(0x12,ra,im,0x3c,rc)
+#  define SRL(ra,rb,rc)                        Opr(0x12,ra,rb,0x34,rc)
+#  define SRLi(ra,im,rc)               Opi(0x12,ra,im,0x34,rc)
+#  define STB(ra,rb,d)                 Mem(0x0e,ra,rb,d)
+#  define STL(ra,rb,d)                 Mem(0x2c,ra,rb,d)
+#  define STL_C(ra,rb,d)               Mem(0x2e,ra,rb,d)
+#  define STQ(ra,rb,d)                 Mem(0x2d,ra,rb,d)
+#  define STQ_C(ra,rb,d)               Mem(0x2f,ra,rb,d)
+#  define STQ_U(ra,rb,d)               Mem(0x0f,ra,rb,d)
+#  define STW(ra,rb,d)                 Mem(0x0d,ra,rb,d)
+#  define SUBL(ra,rb,rc)               Opr(0x10,ra,rb,0x09,rc)
+#  define SUBLi(ra,im,rc)              Opi(0x10,ra,im,0x09,rc)
+#  define SUBL_V(ra,rb,rc)             Opr(0x10,ra,rb,0x49,rc)
+#  define SUBL_Vi(ra,im,rc)            Opi(0x10,ra,im,0x49,rc)
+#  define SUBQ(ra,rb,rc)               Opr(0x10,ra,rb,0x29,rc)
+#  define SUBQi(ra,im,rc)              Opi(0x10,ra,im,0x29,rc)
+#  define SUBQ_V(ra,rb,rc)             Opr(0x10,ra,rb,0x69,rc)
+#  define SUBQ_Vi(ra,im,rc)            Opi(0x10,ra,im,0x69,rc)
+#  define TRAPB()                      Mem(0x18,_R31_REGNO,_R31_REGNO,0x0000)
+#  define UMULH(ra,rb,rc)              Opr(0x13,ra,rb,0x30,rc)
+#  define UMULHi(ra,im,rc)             Opi(0x13,ra,im,0x30,rc)
+#  define UNPKBL(rb,rc)                        Opr(0x1c,_R31_REGNO,rb,0x35,rc)
+#  define UNPKBW(rb,rc)                        Opr(0x1c,_R31_REGNO,rb,0x34,rc)
+#  define WH64(ra)                     Mem(0x18,ra,_R31_REGNO,0xf800)
+#  define WH64EN(ra)                   Mem(0x18,ra,_R31_REGNO,0xfc00)
+#  define WMB()                                Mem(0x18,_R31_REGNO,_R31_REGNO,0x4400)
+#  define XOR(ra,rb,rc)                        Opr(0x11,ra,rb,0x40,rc)
+#  define XORi(ra,im,rc)               Opi(0x11,ra,im,0x40,rc)
+#  define ZAP(ra,rb,rc)                        Opr(0x12,ra,rb,0x30,rc)
+#  define ZAPi(ra,im,rc)               Opi(0x12,ra,im,0x30,rc)
+#  define ZAPNOT(ra,rb,rc)             Opr(0x12,ra,rb,0x31,rc)
+#  define ZAPNOTi(ra,im,rc)            Opi(0x12,ra,im,0x31,rc)
+#  define NOP()                                BIS(_R31_REGNO,_R31_REGNO,_R31_REGNO)
+#  define MOV(ra,rc)                   BIS(ra,ra,rc)
+#  define MOVi(im,rc)                  BISi(_R31_REGNO,im,rc)
+#  define NEGL(ra,rc)                  SUBL(_R31_REGNO,ra,rc)
+#  define NEGQ(ra,rc)                  SUBQ(_R31_REGNO,ra,rc)
+#  define NOT(ra,rc)                   ORNOT(_R31_REGNO,ra,rc)
+#  define nop(i0)                      _nop(_jit,i0)
+static void _nop(jit_state_t*,jit_int32_t);
+#  define movr(r0,r1)                  _movr(_jit,r0,r1)
+static void _movr(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define movi(r0,i0)                  _movi(_jit,r0,i0)
+static void _movi(jit_state_t*,jit_int32_t,jit_word_t);
+#  define movi_p(r0,i0)                        _movi_p(_jit,r0,i0)
+static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t);
+#  define negr(r0,r1)                  NEGQ(r1,r0)
+#  define comr(r0,r1)                  NOT(r1,r0)
+#  define addr(r0,r1,r2)               ADDQ(r1,r2,r0)
+#  define addi(r0,r1,i0)               _addi(_jit,r0,r1,i0)
+static void _addi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define addcr(r0,r1,i0)              _addcr(_jit,r0,r1,i0)
+static void _addcr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define addci(r0,r1,i0)              _addci(_jit,r0,r1,i0)
+static void _addci(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define addxr(r0,r1,i0)              _addxr(_jit,r0,r1,i0)
+static void _addxr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define addxi(r0,r1,i0)              _addxi(_jit,r0,r1,i0)
+static void _addxi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define subr(r0,r1,r2)               SUBQ(r1,r2,r0)
+#  define subi(r0,r1,i0)               _subi(_jit,r0,r1,i0)
+static void _subi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define subcr(r0,r1,i0)              _subcr(_jit,r0,r1,i0)
+static void _subcr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define subci(r0,r1,i0)              _subci(_jit,r0,r1,i0)
+static void _subci(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define subxr(r0,r1,i0)              _subxr(_jit,r0,r1,i0)
+static void _subxr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define subxi(r0,r1,i0)              _subxi(_jit,r0,r1,i0)
+static void _subxi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define rsbi(r0, r1, i0)             _rsbi(_jit, r0, r1, i0)
+static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t, jit_word_t);
+#  define mulr(r0,r1,r2)               MULQ(r1,r2,r0)
+#  define muli(r0,r1,i0)               _muli(_jit,r0,r1,i0)
+static void _muli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define qmulr(r0,r1,r2,r3)           _qmulr(_jit,r0,r1,r2,r3)
+static void _qmulr(jit_state_t*,jit_int32_t,
+                  jit_int32_t,jit_int32_t,jit_int32_t);
+#  define qmuli(r0,r1,r2,i0)           _qmuli(_jit,r0,r1,r2,i0)
+static void _qmuli(jit_state_t*,jit_int32_t,
+                  jit_int32_t,jit_int32_t,jit_word_t);
+#  define qmulr_u(r0,r1,r2,r3)         _qmulr_u(_jit,r0,r1,r2,r3)
+static void _qmulr_u(jit_state_t*,jit_int32_t,
+                    jit_int32_t,jit_int32_t,jit_int32_t);
+#  define qmuli_u(r0,r1,r2,i0)         _qmuli_u(_jit,r0,r1,r2,i0)
+static void _qmuli_u(jit_state_t*,jit_int32_t,
+                    jit_int32_t,jit_int32_t,jit_word_t);
+static jit_word_t __idiv(jit_word_t, jit_word_t);
+#  define divr(r0,r1,r2)               _divr(_jit,r0,r1,r2)
+static void _divr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define divi(r0,r1,i0)               _divi(_jit,r0,r1,i0)
+static void _divi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+static jit_uword_t __udiv(jit_uword_t, jit_uword_t);
+#  define divr_u(r0,r1,r2)             _divr_u(_jit,r0,r1,r2)
+static void _divr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define divi_u(r0,r1,i0)             _divi_u(_jit,r0,r1,i0)
+static void _divi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+static jit_word_t __irem(jit_word_t, jit_word_t);
+#  define remr(r0,r1,r2)               _remr(_jit,r0,r1,r2)
+static void _remr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define remi(r0,r1,i0)               _remi(_jit,r0,r1,i0)
+static void _remi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+static jit_uword_t __urem(jit_uword_t, jit_uword_t);
+#  define remr_u(r0,r1,r2)             _remr_u(_jit,r0,r1,r2)
+static void _remr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define remi_u(r0,r1,i0)             _remi_u(_jit,r0,r1,i0)
+static void _remi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+static jit_word_t __idivrem(jit_word_t, jit_word_t, jit_word_t*);
+#  define qdivr(r0,r1,r2,r3)           _qdivr(_jit,r0,r1,r2,r3)
+static void _qdivr(jit_state_t*,
+                  jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define qdivi(r0,r1,r2,i0)           _qdivi(_jit,r0,r1,r2,i0)
+static void _qdivi(jit_state_t*,
+                  jit_int32_t,jit_int32_t,jit_int32_t,jit_word_t);
+static jit_word_t __udivrem(jit_uword_t, jit_uword_t, jit_uword_t*);
+#  define qdivr_u(r0,r1,r2,r3)         _qdivr_u(_jit,r0,r1,r2,r3)
+static void _qdivr_u(jit_state_t*,
+                    jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define qdivi_u(r0,r1,r2,i0)         _qdivi_u(_jit,r0,r1,r2,i0)
+static void _qdivi_u(jit_state_t*,
+                    jit_int32_t,jit_int32_t,jit_int32_t,jit_word_t);
+#  define lshr(r0,r1,r2)               SLL(r1,r2,r0)
+#  define lshi(r0,r1,i0)               _lshi(_jit,r0,r1,i0)
+static void _lshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define rshr(r0,r1,r2)               SRA(r1,r2,r0)
+#  define rshi(r0,r1,i0)               _rshi(_jit,r0,r1,i0)
+static void _rshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define rshr_u(r0,r1,r2)             SRL(r1,r2,r0)
+#  define rshi_u(r0,r1,i0)             _rshi_u(_jit,r0,r1,i0)
+static void _rshi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define andr(r0,r1,r2)               AND(r1,r2,r0)
+#  define andi(r0,r1,i0)               _andi(_jit,r0,r1,i0)
+static void _andi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define orr(r0,r1,r2)                        OR(r1,r2,r0)
+#  define ori(r0,r1,i0)                        _ori(_jit,r0,r1,i0)
+static void _ori(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define xorr(r0,r1,r2)               XOR(r1,r2,r0)
+#  define xori(r0,r1,i0)               _xori(_jit,r0,r1,i0)
+static void _xori(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ltr(r0,r1,r2)                        CMPLT(r1,r2,r0)
+#  define lti(r0,r1,i0)                        _lti(_jit,r0,r1,i0)
+static void _lti(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ltr_u(r0,r1,r2)              CMPULT(r1,r2,r0)
+#  define lti_u(r0,r1,i0)              _lti_u(_jit,r0,r1,i0)
+static void _lti_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ler(r0,r1,r2)                        CMPLE(r1,r2,r0)
+#  define lei(r0,r1,i0)                        _lei(_jit,r0,r1,i0)
+static void _lei(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ler_u(r0,r1,r2)              CMPULE(r1,r2,r0)
+#  define lei_u(r0,r1,i0)              _lei_u(_jit,r0,r1,i0)
+static void _lei_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define eqr(r0,r1,r2)                        CMPEQ(r1,r2,r0)
+#  define eqi(r0,r1,i0)                        _eqi(_jit,r0,r1,i0)
+static void _eqi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ger(r0,r1,r2)                        CMPLE(r2,r1,r0)
+#  define gei(r0,r1,i0)                        _gei(_jit,r0,r1,i0)
+static void _gei(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ger_u(r0,r1,r2)              CMPULE(r2,r1,r0)
+#  define gei_u(r0,r1,i0)              _gei_u(_jit,r0,r1,i0)
+static void _gei_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define gtr(r0,r1,r2)                        CMPLT(r2,r1,r0)
+#  define gti(r0,r1,i0)                        _gti(_jit,r0,r1,i0)
+static void _gti(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define gtr_u(r0,r1,r2)              CMPULT(r2,r1,r0)
+#  define gti_u(r0,r1,i0)              _gti_u(_jit,r0,r1,i0)
+static void _gti_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ner(r0,r1,r2)                        _ner(_jit,r0,r1,r2)
+static void _ner(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define nei(r0,r1,i0)                        _nei(_jit,r0,r1,i0)
+static void _nei(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define bltr(i0,r0,r1)               _bltr(_jit,i0,r0,r1)
+static jit_word_t _bltr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define blti(i0,r0,i1)               _blti(_jit,i0,r0,i1)
+static jit_word_t _blti(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#  define bltr_u(i0,r0,r1)             _bltr_u(_jit,i0,r0,r1)
+static jit_word_t _bltr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define blti_u(i0,r0,i1)             _blti_u(_jit,i0,r0,i1)
+static jit_word_t _blti_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#  define bler(i0,r0,r1)               _bler(_jit,i0,r0,r1)
+static jit_word_t _bler(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define blei(i0,r0,i1)               _blei(_jit,i0,r0,i1)
+static jit_word_t _blei(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#  define bler_u(i0,r0,r1)             _bler_u(_jit,i0,r0,r1)
+static jit_word_t _bler_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define blei_u(i0,r0,i1)             _blei_u(_jit,i0,r0,i1)
+static jit_word_t _blei_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#  define beqr(i0,r0,r1)               _beqr(_jit,i0,r0,r1)
+static jit_word_t _beqr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define beqi(i0,r0,i1)               _beqi(_jit,i0,r0,i1)
+static jit_word_t _beqi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#  define bger(i0,r0,r1)               _bger(_jit,i0,r0,r1)
+static jit_word_t _bger(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bgei(i0,r0,i1)                       _bgei(_jit,i0,r0,i1)
+static jit_word_t _bgei(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#  define bger_u(i0,r0,r1)             _bger_u(_jit,i0,r0,r1)
+static jit_word_t _bger_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bgei_u(i0,r0,i1)             _bgei_u(_jit,i0,r0,i1)
+static jit_word_t _bgei_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#  define bgtr(i0,r0,r1)               _bgtr(_jit,i0,r0,r1)
+static jit_word_t _bgtr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bgti(i0,r0,i1)               _bgti(_jit,i0,r0,i1)
+static jit_word_t _bgti(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#  define bgtr_u(i0,r0,r1)             _bgtr_u(_jit,i0,r0,r1)
+static jit_word_t _bgtr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bgti_u(i0,r0,i1)             _bgti_u(_jit,i0,r0,i1)
+static jit_word_t _bgti_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#  define bner(i0,r0,r1)               _bner(_jit,i0,r0,r1)
+static jit_word_t _bner(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bnei(i0,r0,i1)               _bnei(_jit,i0,r0,i1)
+static jit_word_t _bnei(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#  define baddr(i0,r0,r1,cc)           _baddr(_jit,i0,r0,r1,cc)
+static jit_word_t _baddr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t,
+                        jit_bool_t);
+#  define baddi(i0,r0,i1,cc)           _baddi(_jit,i0,r0,i1,cc)
+static jit_word_t _baddi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t,
+                        jit_bool_t);
+#  define baddr_u(i0,r0,r1,cc)         _baddr_u(_jit,i0,r0,r1,cc)
+static jit_word_t _baddr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t,
+                          jit_bool_t);
+#  define baddi_u(i0,r0,i1,cc)         _baddi_u(_jit,i0,r0,i1,cc)
+static jit_word_t _baddi_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t,
+                          jit_bool_t);
+#  define boaddr(i0,r0,r1)             baddr(i0,r0,r1,1)
+#  define boaddi(i0,r0,i1)             baddi(i0,r0,i1,1)
+#  define boaddr_u(i0,r0,r1)           baddr_u(i0,r0,r1,1)
+#  define boaddi_u(i0,r0,i1)           baddi_u(i0,r0,i1,1)
+#  define bxaddr(i0,r0,r1)             baddr(i0,r0,r1,0)
+#  define bxaddi(i0,r0,i1)             baddi(i0,r0,i1,0)
+#  define bxaddr_u(i0,r0,r1)           baddr_u(i0,r0,r1,0)
+#  define bxaddi_u(i0,r0,i1)           baddi_u(i0,r0,i1,0)
+#  define bsubr(i0,r0,r1,cc)           _bsubr(_jit,i0,r0,r1,cc)
+static jit_word_t _bsubr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t,
+                        jit_bool_t);
+#  define bsubi(i0,r0,i1,cc)           _bsubi(_jit,i0,r0,i1,cc)
+static jit_word_t _bsubi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t,
+                        jit_bool_t);
+#  define bsubr_u(i0,r0,r1,cc)         _bsubr_u(_jit,i0,r0,r1,cc)
+static jit_word_t _bsubr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t,
+                          jit_bool_t);
+#  define bsubi_u(i0,r0,i1,cc)         _bsubi_u(_jit,i0,r0,i1,cc)
+static jit_word_t _bsubi_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t,
+                          jit_bool_t);
+#  define bosubr(i0,r0,r1)             bsubr(i0,r0,r1,1)
+#  define bosubi(i0,r0,i1)             bsubi(i0,r0,i1,1)
+#  define bosubr_u(i0,r0,r1)           bsubr_u(i0,r0,r1,1)
+#  define bosubi_u(i0,r0,i1)           bsubi_u(i0,r0,i1,1)
+#  define bxsubr(i0,r0,r1)             bsubr(i0,r0,r1,0)
+#  define bxsubi(i0,r0,i1)             bsubi(i0,r0,i1,0)
+#  define bxsubr_u(i0,r0,r1)           bsubr_u(i0,r0,r1,0)
+#  define bxsubi_u(i0,r0,i1)           bsubi_u(i0,r0,i1,0)
+#  define bmxr(i0,r0,r1,cc)            _bmxr(_jit,i0,r0,r1,cc)
+static jit_word_t _bmxr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t,
+                       jit_bool_t);
+#  define bmxi(i0,r0,i1,cc)            _bmxi(_jit,i0,r0,i1,cc)
+static jit_word_t _bmxi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t,
+                       jit_bool_t);
+#  define bmsr(i0,r0,r1)               bmxr(i0,r0,r1,1)
+#  define bmsi(i0,r0,i1)               bmxi(i0,r0,i1,1)
+#  define bmcr(i0,r0,r1)               bmxr(i0,r0,r1,0)
+#  define bmci(i0,r0,i1)               bmxi(i0,r0,i1,0)
+#  define ldr_c(r0,r1)                 _ldr_c(_jit,r0,r1)
+static void _ldr_c(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define ldi_c(r0,i0)                 _ldi_c(_jit,r0,i0)
+static void _ldi_c(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldr_uc(r0,r1)                        LDBU(r0,r1,0)
+#  define ldi_uc(r0,i0)                        _ldi_uc(_jit,r0,i0)
+static void _ldi_uc(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldr_s(r0,r1)                 _ldr_s(_jit,r0,r1)
+static void _ldr_s(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define ldi_s(r0,i0)                 _ldi_s(_jit,r0,i0)
+static void _ldi_s(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldr_us(r0,r1)                        LDWU(r0,r1,0)
+#  define ldi_us(r0,i0)                        _ldi_us(_jit,r0,i0)
+static void _ldi_us(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldr_i(r0,r1)                 LDL(r0,r1,0)
+#  define ldi_i(r0,i0)                 _ldi_i(_jit,r0,i0)
+static void _ldi_i(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldr_ui(r0,r1)                        _ldr_ui(_jit,r0,r1)
+static void _ldr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define ldi_ui(r0,i0)                        _ldi_ui(_jit,r0,i0)
+static void _ldi_ui(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldr(r0,r1)                   ldr_l(r0,r1)
+#  define ldr_l(r0,r1)                 LDQ(r0,r1,0)
+#  define ldi_l(r0,i0)                 _ldi_l(_jit,r0,i0)
+static void _ldi_l(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldxr_c(r0,r1,r2)             _ldxr_c(_jit,r0,r1,r2)
+static void _ldxr_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxi_c(r0,r1,i0)             _ldxi_c(_jit,r0,r1,i0)
+static void _ldxi_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldxr_uc(r0,r1,r2)            _ldxr_uc(_jit,r0,r1,r2)
+static void _ldxr_uc(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxi_uc(r0,r1,i0)            _ldxi_uc(_jit,r0,r1,i0)
+static void _ldxi_uc(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldxr_s(r0,r1,r2)             _ldxr_s(_jit,r0,r1,r2)
+static void _ldxr_s(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxi_s(r0,r1,i0)             _ldxi_s(_jit,r0,r1,i0)
+static void _ldxi_s(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldxr_us(r0,r1,r2)            _ldxr_us(_jit,r0,r1,r2)
+static void _ldxr_us(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxi_us(r0,r1,i0)            _ldxi_us(_jit,r0,r1,i0)
+static void _ldxi_us(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldxr_i(r0,r1,r2)             _ldxr_i(_jit,r0,r1,r2)
+static void _ldxr_i(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxi_i(r0,r1,i0)             _ldxi_i(_jit,r0,r1,i0)
+static void _ldxi_i(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldxr_ui(r0,r1,r2)            _ldxr_ui(_jit,r0,r1,r2)
+static void _ldxr_ui(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxi_ui(r0,r1,i0)            _ldxi_ui(_jit,r0,r1,i0)
+static void _ldxi_ui(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldxr(r0,r1,r2)               ldxr_l(r0,r1,r2)
+#  define ldxr_l(r0,r1,r2)             _ldxr_l(_jit,r0,r1,r2)
+static void _ldxr_l(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxi(r0,r1,i0)               ldxi_l(r0,r1,i0)
+#  define ldxi_l(r0,r1,i0)             _ldxi_l(_jit,r0,r1,i0)
+static void _ldxi_l(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define str_c(r0,r1)                 STB(r1,r0,0)
+#  define sti_c(i0,r0)                 _sti_c(_jit,i0,r0)
+static void _sti_c(jit_state_t*,jit_word_t,jit_int32_t);
+#  define str_s(r0,r1)                 STW(r1,r0,0)
+#  define sti_s(i0,r0)                 _sti_s(_jit,i0,r0)
+static void _sti_s(jit_state_t*,jit_word_t,jit_int32_t);
+#  define str_i(r0,r1)                 STL(r1,r0,0)
+#  define sti_i(i0,r0)                 _sti_i(_jit,i0,r0)
+static void _sti_i(jit_state_t*,jit_word_t,jit_int32_t);
+#  define str(r0,r1)                   str_l(r0,r1)
+#  define str_l(r0,r1)                 STQ(r1,r0,0)
+#  define sti_l(i0,r0)                 _sti_l(_jit,i0,r0)
+static void _sti_l(jit_state_t*,jit_word_t,jit_int32_t);
+#  define stxr_c(r0,r1,r2)             _stxr_c(_jit,r0,r1,r2)
+static void _stxr_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define stxi_c(i0,r0,r1)             _stxi_c(_jit,i0,r0,r1)
+static void _stxi_c(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define stxr_s(r0,r1,r2)             _stxr_s(_jit,r0,r1,r2)
+static void _stxr_s(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define stxi_s(i0,r0,r1)             _stxi_s(_jit,i0,r0,r1)
+static void _stxi_s(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define stxr_i(r0,r1,r2)             _stxr_i(_jit,r0,r1,r2)
+static void _stxr_i(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define stxi_i(i0,r0,r1)             _stxi_i(_jit,i0,r0,r1)
+static void _stxi_i(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define stxr_l(r0,r1,r2)             _stxr_l(_jit,r0,r1,r2)
+static void _stxr_l(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define stxi(i0,r0,r1)               stxi_l(i0,r0,r1)
+#  define stxi_l(i0,r0,r1)             _stxi_l(_jit,i0,r0,r1)
+static void _stxi_l(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define extr_c(r0,r1)                        _extr_c(_jit,r0,r1)
+static void _extr_c(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define extr_uc(r0,r1)               _extr_uc(_jit,r0,r1)
+static void _extr_uc(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define extr_s(r0,r1)                        _extr_s(_jit,r0,r1)
+static void _extr_s(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define extr_us(r0,r1)               _extr_us(_jit,r0,r1)
+static void _extr_us(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define extr_i(r0,r1)                        _extr_i(_jit,r0,r1)
+static void _extr_i(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define extr_ui(r0,r1)               _extr_ui(_jit,r0,r1)
+static void _extr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
+#  if __BYTE_ORDER == __LITTLE_ENDIAN
+#    define htonr_us(r0,r1)            _htonr_us(_jit,r0,r1)
+static void _htonr_us(jit_state_t*,jit_int32_t,jit_int32_t);
+#    define htonr_ui(r0,r1)            _htonr_ui(_jit,r0,r1)
+static void _htonr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
+#    define htonr_ul(r0,r1)            _htonr_ul(_jit,r0,r1)
+static void _htonr_ul(jit_state_t*,jit_int32_t,jit_int32_t);
+#  else
+#    define htonr_us(r0,r1)            extr_us(r0,r1)
+#    define htonr_ui(r0,r1)            extr_ui(r0,r1)
+#    define htonr_ul(r0,r1)            movr(r0,r1)
+#  endif
+#  define jmpr(r0)                     JMP(_R31_REGNO,r0,0)
+#  define jmpi(i0)                     _jmpi(_jit,i0)
+static void _jmpi(jit_state_t*, jit_word_t);
+#  define jmpi_p(i0)                   _jmpi_p(_jit,i0)
+static jit_word_t _jmpi_p(jit_state_t*, jit_word_t);
+#define callr(r0)                      _callr(_jit,r0)
+static void _callr(jit_state_t*, jit_int32_t);
+#  define calli(i0)                    _calli(_jit,i0)
+static void _calli(jit_state_t*, jit_word_t);
+#  define calli_p(i0)                  _calli_p(_jit,i0)
+static jit_word_t _calli_p(jit_state_t*, jit_word_t);
+#  define prolog(node)                 _prolog(_jit,node)
+static void _prolog(jit_state_t*,jit_node_t*);
+#  define epilog(node)                 _epilog(_jit,node)
+static void _epilog(jit_state_t*,jit_node_t*);
+#  define vastart(r0)                  _vastart(_jit, r0)
+static void _vastart(jit_state_t*, jit_int32_t);
+#  define vaarg(r0, r1)                        _vaarg(_jit, r0, r1)
+static void _vaarg(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define patch_at(jump,label)         _patch_at(_jit,jump,label)
+static void _patch_at(jit_state_t*,jit_word_t,jit_word_t);
+#endif
+
+#if CODE
+static void
+_Pcd(jit_state_t *_jit, int o, unsigned int n)
+{
+    assert(_u6_p(o));
+    assert(_u26_p(n));
+    ii((o<<26)|_u26(n));
+}
+
+static void
+_Bra(jit_state_t *_jit, int o, int ra, int d)
+{
+    assert(_u6_p(o));
+    assert(_u5_p(ra));
+    assert(_s21_p(d));
+    ii((o<<26)|(ra<<21)|_u21(d));
+}
+
+static void
+_Mem(jit_state_t *_jit, int o, int ra, int rb, unsigned int d)
+{
+    assert(_u6_p(o));
+    assert(_u5_p(ra));
+    assert(_u5_p(rb));
+    assert(_u16_p(d));
+    ii((o<<26)|(ra<<21)|(rb<<16)|_u16(d));
+}
+
+static void
+_Mbr(jit_state_t *_jit, int o, int ra, int rb, int h, int d)
+{
+    assert(_u6_p(o));
+    assert(_u5_p(ra));
+    assert(_u5_p(rb));
+    assert(_u2_p(h));
+    assert(_s14_p(d));
+    ii((o<<26)|(ra<<21)|(rb<<16)|(h<<14)|_u14(d));
+}
+
+static void
+_Opr(jit_state_t *_jit, int o, int ra, int rb, unsigned int f, int rc)
+{
+    assert(_u6_p(o));
+    assert(_u5_p(ra));
+    assert(_u5_p(rb));
+    assert(_u5_p(rc));
+    assert(_u11_p(f));
+    ii((o<<26)|(ra<<21)|(rb<<16)|(_u11(f)<<5)|rc);
+}
+
+static void
+_Opi(jit_state_t *_jit, int o, int ra, unsigned int i, unsigned int f, int rc)
+{
+    assert(_u6_p(o));
+    assert(_u5_p(ra));
+    assert(_u8_p(i));
+    assert(_u5_p(rc));
+    assert(_u7_p(f));
+    ii((o<<26)|(ra<<21)|(_u8(i)<<13)|(1<<12)|(_u7(f)<<5)|rc);
+}
+
+static void
+_nop(jit_state_t *_jit, jit_int32_t i0)
+{
+    for (; i0 > 0; i0 -= 4)
+       NOP();
+    assert(i0 == 0);
+}
+
+static void
+_movr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (r0 != r1)
+       MOV(r1, r0);
+}
+
+static void
+_movi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    jit_int16_t                s0, s1, s2, s3;
+    s0 = i0;
+    s1 = i0 >> 16;
+    s2 = i0 >> 32;
+    s3 = i0 >> 48;
+    if (s0 < 0)
+       ++s1;
+    if (s2 < 0)
+       ++s3;
+    if (_u8_p(i0))
+       MOVi(_u8(i0), r0);
+    else if (_s16_p(i0))
+       LDA(r0, _R31_REGNO, _u16(s0));
+    else if (_s32_p(i0)) {
+       LDA(r0, _R31_REGNO, _u16(s0));
+       LDAH(r0, r0, _u16(s1));
+    }
+    else if (_u32_p(i0)) {
+       LDA(r0, _R31_REGNO, _u16(s0));
+       if (s1)
+           LDAH(r0, r0, _u16(s1));
+       lshi(r0, r0, 32);
+       rshi_u(r0, r0, 32);
+    }
+    else if (_u32(i0) == 0) {
+       LDA(r0, _R31_REGNO, _u16(s2));
+       if (s3)
+           LDAH(r0, r0, _u16(s3));
+       lshi(r0, r0, 32);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       LDA(r0, _R31_REGNO, _u16(s0));
+       LDA(rn(reg), _R31_REGNO, _u16(s2));
+       if (s1)
+           LDAH(r0, r0, _u16(s1));
+       if (s3)
+           LDAH(rn(reg), rn(reg), _u16(s3));
+       lshi(r0, r0, 32);
+       rshi_u(r0, r0, 32);
+       lshi(rn(reg), rn(reg), 32);
+       orr(r0, r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static jit_word_t
+_movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    jit_int16_t                s0, s1, s2, s3;
+    w = _jit->pc.w;
+    reg = jit_get_reg(jit_class_gpr);
+    s0 = i0;
+    s1 = i0 >> 16;
+    s2 = i0 >> 32;
+    s3 = i0 >> 48;
+    if (s0 < 0)
+       ++s1;
+    if (s2 < 0)
+       ++s3;
+    LDA(r0, _R31_REGNO, _u16(s0));
+    LDA(rn(reg), _R31_REGNO, _u16(s2));
+    LDAH(r0, r0, _u16(s1));
+    LDAH(rn(reg), rn(reg), _u16(s3));
+    lshi(r0, r0, 32);
+    rshi_u(r0, r0, 32);
+    lshi(rn(reg), rn(reg), 32);
+    orr(r0, r0, rn(reg));
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static void
+_addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (_u8_p(i0))
+       ADDQi(r1, i0, r0);
+    else if (_s16_p(i0))
+       LDA(r0, r1, _u16(i0));
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       addr(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_addcr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (jit_carry == _NOREG)
+       jit_carry = jit_get_reg(jit_class_gpr);
+    if (r0 == r1) {
+       reg = jit_get_reg(jit_class_gpr);
+       addr(rn(reg), r1, r2);
+       ltr_u(rn(jit_carry), rn(reg), r1);
+       movr(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else {
+       addr(r0, r1, r2);
+       ltr_u(rn(jit_carry), r0, r1);
+    }
+}
+
+static void
+_addci(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (jit_carry == _NOREG)
+       jit_carry = jit_get_reg(jit_class_gpr);
+    if (r0 == r1) {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r1, i0);
+       ltr_u(rn(jit_carry), rn(reg), r1);
+       movr(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else {
+       addi(r0, r1, i0);
+       ltr_u(rn(jit_carry), r0, r1);
+    }
+}
+
+static void
+_addxr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    assert(jit_carry != _NOREG);
+    reg = jit_get_reg(jit_class_gpr);
+    movr(rn(reg), rn(jit_carry));
+    addcr(r0, r1, r2);
+    addcr(r0, r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_addxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    assert(jit_carry != _NOREG);
+    reg = jit_get_reg(jit_class_gpr);
+    movr(rn(reg), rn(jit_carry));
+    addci(r0, r1, i0);
+    addcr(r0, r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_subi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (_u8_p(i0))
+       SUBQi(r1, i0, r0);
+    else if (_s16_p(-i0))
+       LDA(r0, r1, _u16(-i0));
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       subr(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_subcr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (jit_carry == _NOREG)
+       jit_carry = jit_get_reg(jit_class_gpr);
+    if (r0 == r1) {
+       reg = jit_get_reg(jit_class_gpr);
+       subr(rn(reg), r1, r2);
+       ltr_u(rn(jit_carry), r1, rn(reg));
+       movr(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else {
+       subr(r0, r1, r2);
+       ltr_u(rn(jit_carry), r1, r0);
+    }
+}
+
+static void
+_subci(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (jit_carry == _NOREG)
+       jit_carry = jit_get_reg(jit_class_gpr);
+    if (r0 == r1) {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r1, -i0);
+       ltr_u(rn(jit_carry), r1, rn(reg));
+       movr(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else {
+       addi(r0, r1, -i0);
+       ltr_u(rn(jit_carry), r1, r0);
+    }
+}
+
+static void
+_subxr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    assert(jit_carry != _NOREG);
+    reg = jit_get_reg(jit_class_gpr);
+    movr(rn(reg), rn(jit_carry));
+    subcr(r0, r1, r2);
+    subcr(r0, r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_subxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    assert(jit_carry != _NOREG);
+    reg = jit_get_reg(jit_class_gpr);
+    movr(rn(reg), rn(jit_carry));
+    subci(r0, r1, i0);
+    subcr(r0, r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_rsbi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    subi(r0, r1, i0);
+    negr(r0, r0);
+}
+
+static void
+_muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (_u8_p(i0))
+       MULQi(r1, i0, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       mulr(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_qmulr(jit_state_t *_jit, jit_int32_t r0,
+       jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+    jit_int32_t                reg;
+    /* The only invalid condition is r0 == r1 */
+    jit_int32_t                t2, t3, s2, s3;
+    if (r2 == r0 || r2 == r1) {
+       s2 = jit_get_reg(jit_class_gpr);
+       t2 = rn(s2);
+       movr(t2, r2);
+    }
+    else
+       t2 = r2;
+    if (r3 == r0 || r3 == r1) {
+       s3 = jit_get_reg(jit_class_gpr);
+       t3 = rn(s3);
+       movr(t3, r3);
+    }
+    else
+       t3 = r3;
+    qmulr_u(r0, r1, r2, r3);
+    reg = jit_get_reg(jit_class_gpr);
+    /**/
+    rshi(rn(reg), t2, 63);
+    mulr(rn(reg), rn(reg), t3);
+    addr(r1, r1, rn(reg));
+    /**/
+    rshi(rn(reg), t3, 63);
+    mulr(rn(reg), rn(reg), t2);
+    addr(r1, r1, rn(reg));
+    jit_unget_reg(reg);
+    if (t2 != r2)
+       jit_unget_reg(s2);
+    if (t3 != r3)
+       jit_unget_reg(s3);
+}
+
+static void
+_qmuli(jit_state_t *_jit, jit_int32_t r0,
+       jit_int32_t r1, jit_int32_t r2, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    qmulr(r0, r1, r2, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_qmulr_u(jit_state_t *_jit, jit_int32_t r0,
+        jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+    jit_int32_t                reg;
+    if (r0 == r2 || r0 == r3) {
+       reg = jit_get_reg(jit_class_gpr);
+       mulr(rn(reg), r2, r3);
+    }
+    else
+       mulr(r0, r2, r3);
+    UMULH(r2, r3, r1);
+    if (r0 == r2 || r0 == r3) {
+       movr(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_qmuli_u(jit_state_t *_jit, jit_int32_t r0,
+        jit_int32_t r1, jit_int32_t r2, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (_u8_p(i0)) {
+       if (r0 == r2) {
+           reg = jit_get_reg(jit_class_gpr);
+           muli(rn(reg), r2, i0);
+       }
+       else
+           muli(r0, r2, i0);
+       UMULHi(r2, i0, r1);
+       if (r0 == r2) {
+           movr(r0, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       qmulr_u(r0, r1, r2, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static jit_word_t
+__idiv(jit_word_t u, jit_word_t v)
+{
+    return (u / v);
+}
+
+static void
+_divr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    movr(_A0_REGNO, r1);
+    movr(_A1_REGNO, r2);
+    calli((jit_word_t)__idiv);
+    movr(r0, _V0_REGNO);
+}
+
+static void
+_divi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    movr(_A0_REGNO, r1);
+    movi(_A1_REGNO, i0);
+    calli((jit_word_t)__idiv);
+    movr(r0, _V0_REGNO);
+}
+
+static jit_uword_t
+__udiv(jit_uword_t u, jit_uword_t v)
+{
+    return (u / v);
+}
+
+static void
+_divr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    movr(_A0_REGNO, r1);
+    movr(_A1_REGNO, r2);
+    calli((jit_word_t)__udiv);
+    movr(r0, _V0_REGNO);
+}
+
+static void
+_divi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    movr(_A0_REGNO, r1);
+    movi(_A1_REGNO, i0);
+    calli((jit_word_t)__udiv);
+    movr(r0, _V0_REGNO);
+}
+
+static jit_word_t
+__irem(jit_word_t u, jit_word_t v)
+{
+    return (u % v);
+}
+
+static void
+_remr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    movr(_A0_REGNO, r1);
+    movr(_A1_REGNO, r2);
+    calli((jit_word_t)__irem);
+    movr(r0, _V0_REGNO);
+}
+
+static void
+_remi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    movr(_A0_REGNO, r1);
+    movi(_A1_REGNO, i0);
+    calli((jit_word_t)__irem);
+    movr(r0, _V0_REGNO);
+}
+
+static jit_uword_t
+__urem(jit_uword_t u, jit_uword_t v)
+{
+    return (u % v);
+}
+
+static void
+_remr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    movr(_A0_REGNO, r1);
+    movr(_A1_REGNO, r2);
+    calli((jit_word_t)__urem);
+    movr(r0, _V0_REGNO);
+}
+
+static void
+_remi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    movr(_A0_REGNO, r1);
+    movi(_A1_REGNO, i0);
+    calli((jit_word_t)__urem);
+    movr(r0, _V0_REGNO);
+}
+
+static jit_word_t
+__idivrem(jit_word_t u, jit_word_t v, jit_word_t *rem)
+{
+    *rem = u % v;
+    return (u / v);
+}
+
+static void
+_qdivr(jit_state_t *_jit,
+       jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+    movr(_A0_REGNO, r2);
+    movr(_A1_REGNO, r3);
+    subi(_A2_REGNO, _FP_REGNO, 8);
+    calli((jit_word_t)__idivrem);
+    movr(r0, _V0_REGNO);
+    ldxi(r1, _FP_REGNO, -8);
+}
+
+static void
+_qdivi(jit_state_t *_jit,
+       jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_word_t i0)
+{
+    movr(_A0_REGNO, r2);
+    movi(_A1_REGNO, i0);
+    subi(_A2_REGNO, _FP_REGNO, 8);
+    calli((jit_word_t)__idivrem);
+    movr(r0, _V0_REGNO);
+    ldxi(r1, _FP_REGNO, -8);
+}
+
+static jit_word_t
+__udivrem(jit_uword_t u, jit_uword_t v, jit_uword_t *rem)
+{
+    *rem = u % v;
+    return (u / v);
+}
+
+static void
+_qdivr_u(jit_state_t *_jit,
+        jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+    movr(_A0_REGNO, r2);
+    movr(_A1_REGNO, r3);
+    subi(_A2_REGNO, _FP_REGNO, 8);
+    calli((jit_word_t)__udivrem);
+    movr(r0, _V0_REGNO);
+    ldxi(r1, _FP_REGNO, -8);
+}
+
+static void
+_qdivi_u(jit_state_t *_jit,
+        jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_word_t i0)
+{
+    movr(_A0_REGNO, r2);
+    movi(_A1_REGNO, i0);
+    subi(_A2_REGNO, _FP_REGNO, 8);
+    calli((jit_word_t)__udivrem);
+    movr(r0, _V0_REGNO);
+    ldxi(r1, _FP_REGNO, -8);
+}
+
+static void
+_lshi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    assert(i0 >= 0 && i0 < 64);
+    SLLi(r1, i0, r0);
+}
+
+static void
+_rshi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    assert(i0 >= 0 && i0 < 64);
+    SRAi(r1, i0, r0);
+}
+
+static void
+_rshi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    assert(i0 >= 0 && i0 < 64);
+    SRLi(r1, i0, r0);
+}
+
+static void
+_andi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (_u8_p(i0))
+       ANDi(r1, i0, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       andr(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (_u8_p(i0))
+       ORi(r1, i0, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       orr(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_xori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (_u8_p(i0))
+       XORi(r1, i0, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       xorr(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_lti(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (_u8_p(i0))
+       CMPLTi(r1, i0, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ltr(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_lti_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (_u8_p(i0))
+       CMPULTi(r1, i0, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ltr_u(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_lei(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (_u8_p(i0))
+       CMPLEi(r1, i0, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ler(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_lei_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    jit_word_t         ni0;
+    ni0 = -i0;
+    if (_u8_p(i0))
+       CMPULEi(r1, i0, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ler_u(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_eqi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (_u8_p(i0))
+       CMPEQi(r1, i0, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       eqr(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_gei(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    ger(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_gei_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    ger_u(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_gti(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    gtr(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_gti_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    gtr_u(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ner(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    CMPEQ(r1, r2, r0);
+    CMPEQi(r0, 0, r0);
+}
+
+static void
+_nei(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (_u8_p(i0)) {
+       CMPEQi(r1, i0, r0);
+       CMPEQi(r0, 0, r0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ner(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static jit_word_t
+_bltr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    ltr(rn(reg), r0, r1);
+    w = _jit->pc.w;
+    BNE(rn(reg), ((i0 - w) >> 2) - 1);
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static jit_word_t
+_blti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    if (i1 == 0) {
+       w = _jit->pc.w;
+       BLT(r0, ((i0 - w) >> 2) - 1);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+       lti(rn(reg), r0, i1);
+       w = _jit->pc.w;
+       BNE(rn(reg), ((i0 - w) >> 2) - 1);
+       jit_unget_reg(reg);
+    }
+    return (w);
+}
+
+static jit_word_t
+_bltr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    ltr_u(rn(reg), r0, r1);
+    w = _jit->pc.w;
+    BNE(rn(reg), ((i0 - w) >> 2) - 1);
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static jit_word_t
+_blti_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    /* FIXME cannot optimize zero because need to return a patcheable address */
+    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    lti_u(rn(reg), r0, i1);
+    w = _jit->pc.w;
+    BNE(rn(reg), ((i0 - w) >> 2) - 1);
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static jit_word_t
+_bler(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    ler(rn(reg), r0, r1);
+    w = _jit->pc.w;
+    BNE(rn(reg), ((i0 - w) >> 2) - 1);
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static jit_word_t
+_blei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    if (i1 == 0) {
+       w = _jit->pc.w;
+       BLE(r0, ((i0 - w) >> 2) - 1);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+       lei(rn(reg), r0, i1);
+       w = _jit->pc.w;
+       BNE(rn(reg), ((i0 - w) >> 2) - 1);
+       jit_unget_reg(reg);
+    }
+    return (w);
+}
+
+static jit_word_t
+_bler_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    ler_u(rn(reg), r0, r1);
+    w = _jit->pc.w;
+    BNE(rn(reg), ((i0 - w) >> 2) - 1);
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static jit_word_t
+_blei_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    if (i1 == 0) {
+       w = _jit->pc.w;
+       BEQ(r0, ((i0 - w) >> 2) - 1);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+       lei_u(rn(reg), r0, i1);
+       w = _jit->pc.w;
+       BNE(rn(reg), ((i0 - w) >> 2) - 1);
+       jit_unget_reg(reg);
+     }
+    return (w);
+}
+
+static jit_word_t
+_beqr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    eqr(rn(reg), r0, r1);
+    w = _jit->pc.w;
+    BNE(rn(reg), ((i0 - w) >> 2) - 1);
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static jit_word_t
+_beqi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    if (i1 == 0) {
+       w = _jit->pc.w;
+       BEQ(r0, ((i0 - w) >> 2) - 1);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+       eqi(rn(reg), r0, i1);
+       w = _jit->pc.w;
+       BNE(rn(reg), ((i0 - w) >> 2) - 1);
+       jit_unget_reg(reg);
+    }
+    return (w);
+}
+
+static jit_word_t
+_bger(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    ger(rn(reg), r0, r1);
+    w = _jit->pc.w;
+    BNE(rn(reg), ((i0 - w) >> 2) - 1);
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static jit_word_t
+_bgei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    if (i1 == 0) {
+       w = _jit->pc.w;
+       BGE(r0, ((i0 - w) >> 2) - 1);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+       gei(rn(reg), r0, i1);
+       w = _jit->pc.w;
+       BNE(rn(reg), ((i0 - w) >> 2) - 1);
+       jit_unget_reg(reg);
+    }
+    return (w);
+}
+
+static jit_word_t
+_bger_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    ger_u(rn(reg), r0, r1);
+    w = _jit->pc.w;
+    BNE(rn(reg), ((i0 - w) >> 2) - 1);
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static jit_word_t
+_bgei_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    /* always true if i1 == 0 */
+    if (i0 == 0) {
+       w = _jit->pc.w;
+       BR(r0, ((i0 - w) >> 2) - 1);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+       gei_u(rn(reg), r0, i1);
+       w = _jit->pc.w;
+       BNE(rn(reg), ((i0 - w) >> 2) - 1);
+       jit_unget_reg(reg);
+    }
+    return (w);
+}
+
+static jit_word_t
+_bgtr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    gtr(rn(reg), r0, r1);
+    w = _jit->pc.w;
+    BNE(rn(reg), ((i0 - w) >> 2) - 1);
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static jit_word_t
+_bgti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    if (i1 == 0) {
+       w = _jit->pc.w;
+       BGT(r0, ((i0 - w) >> 2) - 1);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+       gti(rn(reg), r0, i1);
+       w = _jit->pc.w;
+       BNE(rn(reg), ((i0 - w) >> 2) - 1);
+       jit_unget_reg(reg);
+    }
+    return (w);
+}
+
+static jit_word_t
+_bgtr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    gtr_u(rn(reg), r0, r1);
+    w = _jit->pc.w;
+    BNE(rn(reg), ((i0 - w) >> 2) - 1);
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static jit_word_t
+_bgti_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    gti_u(rn(reg), r0, i1);
+    w = _jit->pc.w;
+    BNE(rn(reg), ((i0 - w) >> 2) - 1);
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static jit_word_t
+_bner(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    eqr(rn(reg), r0, r1);
+    w = _jit->pc.w;
+    BEQ(rn(reg), ((i0 - w) >> 2) - 1);
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static jit_word_t
+_bnei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    if (i1 == 0) {
+       w = _jit->pc.w;
+       BNE(r0, ((i0 - w) >> 2) - 1);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+       eqi(rn(reg), r0, i1);
+       w = _jit->pc.w;
+       BEQ(rn(reg), ((i0 - w) >> 2) - 1);
+       jit_unget_reg(reg);
+    }
+    return (w);
+}
+
+static jit_word_t
+_baddr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1,
+       jit_bool_t carry)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    jit_int32_t                t1;
+    jit_int32_t                t2;
+    jit_int32_t                t3;
+    /* t0 = r0 + r1;   overflow = r1 < 0 ? r0 < t0 : t0 < r0 */
+    t0 = jit_get_reg(jit_class_gpr);
+    t1 = jit_get_reg(jit_class_gpr);
+    t2 = jit_get_reg(jit_class_gpr);
+    t3 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    addr(rn(t0), r0, r1);              /* t0 = r0 + r1 */
+    ltr(rn(t1), r1, _R31_REGNO);       /* t1 = r1 < 0 */
+    ltr(rn(t2), r0, rn(t0));           /* t2 = r0 < t0 */
+    ltr(rn(t3), rn(t0), r0);           /* t3 = t0 < r0 */
+    movr(r0, rn(t0));                  /* r0 += r1 */
+    CMOVNE(rn(t1), rn(t2), rn(t3));    /* if (t1 == 0) t3 = t2; */
+    jit_unget_reg(t2);
+    jit_unget_reg(t1);
+    jit_unget_reg(t0);
+    w = _jit->pc.w;
+    if (carry)
+       BNE(rn(t3), ((i0 - w) >> 2) - 1);
+    else
+       BEQ(rn(t3), ((i0 - w) >> 2) - 1);
+    jit_unget_reg(t3);
+    return (w);
+}
+
+static jit_word_t
+_baddi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1,
+       jit_bool_t carry)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    movi(rn(reg), i1);
+    w = baddr(i0, r0, rn(reg), carry);
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static jit_word_t
+_baddr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1,
+        jit_bool_t carry)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    jit_int32_t                t1;
+    t0 = jit_get_reg(jit_class_gpr);
+    t1 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    addr(rn(t0), r0, r1);
+    ltr_u(rn(t1), rn(t0), r0);
+    movr(r0, rn(t0));
+    jit_unget_reg(t0);
+    w = _jit->pc.w;
+    if (carry)
+       BNE(rn(t1), ((i0 - w) >> 2) - 1);
+    else
+       BEQ(rn(t1), ((i0 - w) >> 2) - 1);
+    jit_unget_reg(t1);
+    return (w);
+}
+
+static jit_word_t
+_baddi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1,
+        jit_bool_t carry)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    jit_int32_t                t1;
+    t0 = jit_get_reg(jit_class_gpr);
+    t1 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    addi(rn(t0), r0, i1);
+    ltr_u(rn(t1), rn(t0), r0);
+    movr(r0, rn(t0));
+    jit_unget_reg(t0);
+    w = _jit->pc.w;
+    if (carry)
+       BNE(rn(t1), ((i0 - w) >> 2) - 1);
+    else
+       BEQ(rn(t1), ((i0 - w) >> 2) - 1);
+    jit_unget_reg(t1);
+    return (w);
+}
+
+static jit_word_t
+_bsubr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1,
+       jit_bool_t carry)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    jit_int32_t                t1;
+    jit_int32_t                t2;
+    jit_int32_t                t3;
+    /* t0 = r0 - r1;   overflow = 0 < r1 ? r0 < t0 : t0 < r0 */
+    t0 = jit_get_reg(jit_class_gpr);
+    t1 = jit_get_reg(jit_class_gpr);
+    t2 = jit_get_reg(jit_class_gpr);
+    t3 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    subr(rn(t0), r0, r1);              /* r0 = r0 - r1 */
+    ltr(rn(t1), _R31_REGNO, r1);       /* t1 = 0 < r1 */
+    ltr(rn(t2), r0, rn(t0));           /* t2 = r0 < t0 */
+    ltr(rn(t3), rn(t0), r0);           /* t3 = t0 < r0 */
+    movr(r0, rn(t0));                  /* r0 -= r1 */
+    CMOVNE(rn(t1), rn(t2), rn(t3));    /* if (t1 == 0) t3 = t2; */
+    jit_unget_reg(t2);
+    jit_unget_reg(t1);
+    jit_unget_reg(t0);
+    w = _jit->pc.w;
+    if (carry)
+       BNE(rn(t3), ((i0 - w) >> 2) - 1);
+    else
+       BEQ(rn(t3), ((i0 - w) >> 2) - 1);
+    jit_unget_reg(t3);
+    return (w);
+}
+
+static jit_word_t
+_bsubi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1,
+       jit_bool_t carry)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    movi(rn(reg), i1);
+    w = bsubr(i0, r0, rn(reg), carry);
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static jit_word_t
+_bsubr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1,
+        jit_bool_t carry)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    jit_int32_t                t1;
+    t0 = jit_get_reg(jit_class_gpr);
+    t1 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    subr(rn(t0), r0, r1);
+    ltr_u(rn(t1), r0, rn(t0));
+    movr(r0, rn(t0));
+    jit_unget_reg(t0);
+    w = _jit->pc.w;
+    if (carry)
+       BNE(rn(t1), ((i0 - w) >> 2) - 1);
+    else
+       BEQ(rn(t1), ((i0 - w) >> 2) - 1);
+    jit_unget_reg(t1);
+    return (w);
+}
+
+static jit_word_t
+_bsubi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1,
+        jit_bool_t carry)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    jit_int32_t                t1;
+    t0 = jit_get_reg(jit_class_gpr);
+    t1 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    subi(rn(t0), r0, i1);
+    ltr_u(rn(t1), r0, rn(t0));
+    movr(r0, rn(t0));
+    jit_unget_reg(t0);
+    w = _jit->pc.w;
+    if (carry)
+       BNE(rn(t1), ((i0 - w) >> 2) - 1);
+    else
+       BEQ(rn(t1), ((i0 - w) >> 2) - 1);
+    jit_unget_reg(t1);
+    return (w);
+}
+
+static jit_word_t
+_bmxr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1,
+      jit_bool_t set)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    andr(rn(t0), r0, r1);
+    w = _jit->pc.w;
+    if (set)
+       BNE(rn(t0), ((i0 - w) >> 2) - 1);
+    else
+       BEQ(rn(t0), ((i0 - w) >> 2) - 1);
+    jit_unget_reg(t0);
+    return (w);
+}
+
+static jit_word_t
+_bmxi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1,
+      jit_bool_t set)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    andi(rn(t0), r0, i1);
+    w = _jit->pc.w;
+    if (set)
+       BNE(rn(t0), ((i0 - w) >> 2) - 1);
+    else
+       BEQ(rn(t0), ((i0 - w) >> 2) - 1);
+    jit_unget_reg(t0);
+    return (w);
+}
+
+static void
+_ldr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    ldr_uc(r0, r1);
+    extr_c(r0, r0);
+}
+
+static void
+_ldi_c(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (_s16_p(i0)) {
+       LDBU(r0, _R31_REGNO, _u16(i0));
+       extr_c(r0, r0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldr_c(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldi_uc(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (_s16_p(i0))
+       LDBU(r0, _R31_REGNO, _u16(i0));
+    else  {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldr_uc(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    ldr_us(r0, r1);
+    extr_s(r0, r0);
+}
+
+static void
+_ldi_s(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (_s16_p(i0)) {
+       LDWU(r0, _R31_REGNO, _u16(i0));
+       extr_s(r0, r0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldr_s(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldi_us(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (_s16_p(i0))
+       LDWU(r0, _R31_REGNO, _u16(i0));
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldr_us(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldi_i(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (_s16_p(i0))
+       LDL(r0, _R31_REGNO, _u16(i0));
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldr_i(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    ldr_i(r0, r1);
+    extr_ui(r0, r0);
+}
+
+static void
+_ldi_ui(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (_s16_p(i0)) {
+       LDL(r0, _R31_REGNO, _u16(i0));
+       extr_ui(r0, r0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldr_ui(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldi_l(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (_s16_p(i0))
+       LDQ(r0, _R31_REGNO, _u16(i0));
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldr_l(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldxr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r1, r2);
+    ldr_c(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxi_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (_s16_p(i0)) {
+       LDBU(r0, r1, _u16(i0));
+       extr_c(r0, r0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r1, i0);
+       ldr_c(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldxr_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r1, r2);
+    ldr_uc(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxi_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (_s16_p(i0))
+       LDBU(r0, r1, _u16(i0));
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r1, i0);
+       ldr_uc(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldxr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r1, r2);
+    ldr_s(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxi_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (_s16_p(i0)) {
+       LDWU(r0, r1, _u16(i0));
+       extr_s(r0, r0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r1, i0);
+       ldr_s(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldxr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r1, r2);
+    ldr_us(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxi_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (_s16_p(i0))
+       LDWU(r0, r1, _u16(i0));
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r1, i0);
+       ldr_us(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldxr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r1, r2);
+    ldr_i(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxi_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (_s16_p(i0))
+       LDL(r0, r1, _u16(i0));
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r1, i0);
+       ldr_i(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldxr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r1, r2);
+    ldr_ui(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxi_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (_s16_p(i0)) {
+       LDL(r0, r1, _u16(i0));
+       extr_ui(r0, r0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r1, i0);
+       ldr_ui(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldxr_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r1, r2);
+    ldr_l(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxi_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (_s16_p(i0))
+       LDQ(r0, r1, _u16(i0));
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r1, i0);
+       ldr_l(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_sti_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    if (_s16_p(i0))
+       STB(r0, _R31_REGNO, _u16(i0));
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       str_c(rn(reg), r0);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_sti_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    if (_s16_p(i0))
+       STW(r0, _R31_REGNO, _u16(i0));
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       str_s(rn(reg), r0);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_sti_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    if (_s16_p(i0))
+       STL(r0, _R31_REGNO, _u16(i0));
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       str_i(rn(reg), r0);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_sti_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    if (_s16_p(i0))
+       STQ(r0, _R31_REGNO, _u16(i0));
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       str_l(rn(reg), r0);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_stxr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r0, r1);
+    str_c(rn(reg), r2);
+    jit_unget_reg(reg);
+}
+
+static void
+_stxi_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (_s16_p(i0))
+       STB(r1, r0, _u16(i0));
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r0, i0);
+       str_c(rn(reg), r1);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_stxr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r0, r1);
+    str_s(rn(reg), r2);
+    jit_unget_reg(reg);
+}
+
+static void
+_stxi_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (_s16_p(i0))
+       STW(r1, r0, _u16(i0));
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r0, i0);
+       str_s(rn(reg), r1);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_stxr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r0, r1);
+    str_i(rn(reg), r2);
+    jit_unget_reg(reg);
+}
+
+static void
+_stxi_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (_s16_p(i0))
+       STL(r1, r0, _u16(i0));
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r0, i0);
+       str_i(rn(reg), r1);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_stxr_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r0, r1);
+    str_l(rn(reg), r2);
+    jit_unget_reg(reg);
+}
+
+static void
+_stxi_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (_s16_p(i0))
+       STQ(r1, r0, _u16(i0));
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r0, i0);
+       str_l(rn(reg), r1);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_extr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    lshi(r0, r1, 56);
+    rshi(r0, r0, 56);
+}
+
+static void
+_extr_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    lshi(r0, r1, 56);
+    rshi_u(r0, r0, 56);
+}
+
+static void
+_extr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    lshi(r0, r1, 48);
+    rshi(r0, r0, 48);
+}
+
+static void
+_extr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    lshi(r0, r1, 48);
+    rshi_u(r0, r0, 48);
+}
+
+static void
+_extr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    lshi(r0, r1, 32);
+    rshi(r0, r0, 32);
+}
+
+static void
+_extr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    lshi(r0, r1, 32);
+    rshi_u(r0, r0, 32);
+}
+
+static void
+_htonr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_gpr);
+    EXTBLi(r1, 0, rn(t0));
+    EXTBLi(r1, 1, r0);
+    SLLi(rn(t0), 8, rn(t0));
+    OR(r0, rn(t0), r0);
+    jit_unget_reg(t0);
+}
+
+static void
+_htonr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                t0;
+    jit_int32_t                t1;
+    jit_int32_t                t2;
+    jit_int32_t                t3;
+    t0 = jit_get_reg(jit_class_gpr);
+    t1 = jit_get_reg(jit_class_gpr);
+    t2 = jit_get_reg(jit_class_gpr);
+    t3 = jit_get_reg(jit_class_gpr);
+    EXTBLi(r1, 3, rn(t0));
+    INSBLi(r1, 3, rn(t1));
+    SLLi(r1, 8, rn(t2));
+    ZAPNOTi(rn(t2), 4, rn(t2));
+    SRLi(r1, 8, rn(t3));
+    OR(rn(t0), rn(t1), r0);
+    OR(rn(t2), r0, r0);
+    ZAPNOTi(rn(t3), 2, rn(t3));
+    OR(rn(t3), r0, r0);
+    jit_unget_reg(t3);
+    jit_unget_reg(t2);
+    jit_unget_reg(t1);
+    jit_unget_reg(t0);
+}
+
+static void
+_htonr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                t0;
+    jit_int32_t                t1;
+    jit_int32_t                t2;
+    assert(_jitc->function != NULL);
+    t0 = jit_get_reg(jit_class_fpr);
+    t1 = jit_get_reg(jit_class_gpr);
+    t2 = jit_get_reg(jit_class_gpr);
+    stxi(-8, _FP_REGNO, r1);                   /* r1 = ABCD EFGH */
+    LDG(rn(t0), _FP_REGNO, _u16(-8));          /* t0 = GHEF CDAB */
+    STT(rn(t0), _FP_REGNO, _u16(-8));
+    ldxi(rn(t1), _FP_REGNO, -8);               /* t1 = GHEF CDAB */
+    lshi(rn(t2), rn(t1), 8);                   /* t2 = HEFC DAB. */
+    rshi_u(rn(t1), rn(t1), 8);                 /* t1 = .GHE FCDA */
+    ZAPi(rn(t2), 0x55, rn(t2));                        /* t2 = H.F. D.B. */
+    ZAPi(rn(t1), 0xaa, rn(t1));                        /* t1 = .G.E .C.A */
+    orr(r0, rn(t1), rn(t2));                   /* r0 = HGFE DCBA */
+    jit_unget_reg(t2);
+    jit_unget_reg(t1);
+    jit_unget_reg(t0);
+}
+
+static void
+_jmpi(jit_state_t *_jit, jit_word_t i0)
+{
+    jit_word_t         w;
+    jit_word_t         d;
+    w = _jit->pc.w;
+    d = ((i0 - w) >> 2) - 1;
+    if (_s21_p(d))
+       BR(_R31_REGNO, d);
+    else
+       (void)jmpi_p(i0);
+}
+
+static jit_word_t
+_jmpi_p(jit_state_t *_jit, jit_word_t i0)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    w = movi_p(rn(reg), i0);
+    jmpr(rn(reg));
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static void
+_callr(jit_state_t *_jit, jit_int32_t r0)
+{
+    if (r0 != _PV_REGNO)
+       MOV(r0, _PV_REGNO);
+    JSR(_RA_REGNO, _PV_REGNO, 0);
+}
+
+static void
+_calli(jit_state_t *_jit, jit_word_t i0)
+{
+    /* FIXME use a small buffer to load constants - using gp */
+#if 0
+    jit_word_t         w;
+    jit_word_t         d;
+    w = _jit->pc.w;
+    d = ((i0 - w) >> 2) - 1;
+    if (_s21_p(d))
+       BSR(_RA_REGNO, d);
+    else
+       (void)calli_p(i0);
+#else
+    movi(_PV_REGNO, i0);
+    callr(_PV_REGNO);
+#endif
+}
+
+static jit_word_t
+_calli_p(jit_state_t *_jit, jit_word_t i0)
+{
+    jit_word_t         w;
+    w = movi_p(_PV_REGNO, i0);
+    callr(_PV_REGNO);
+    return (w);
+}
+
+static void
+_prolog(jit_state_t *_jit, jit_node_t *node)
+{
+    jit_int32_t                reg;
+    if (_jitc->function->define_frame || _jitc->function->assume_frame) {
+       jit_int32_t     frame = -_jitc->function->frame;
+       assert(_jitc->function->self.aoff >= frame);
+       if (_jitc->function->assume_frame)
+           return;
+       _jitc->function->self.aoff = frame;
+    }
+    if (_jitc->function->allocar)
+       _jitc->function->self.aoff &= -8;
+    _jitc->function->stack = ((_jitc->function->self.alen -
+                              _jitc->function->self.aoff) + 7) & -8;
+    /* ldgp gp, 0(pv) */
+    LDAH(_PV_REGNO, _GP_REGNO, 0);
+    LDA(_GP_REGNO, _GP_REGNO, 0);
+    /* callee save registers */
+    subi(_SP_REGNO, _SP_REGNO, stack_framesize);
+    stxi(0, _SP_REGNO, _RA_REGNO);
+    stxi(8, _SP_REGNO, _FP_REGNO);
+#  define SPILL(N, O)                                                  \
+    if (jit_regset_tstbit(&_jitc->function->regset, N))                        \
+       stxi(O, _SP_REGNO, N##_REGNO)
+#  define SPILLD(N, O)                                                 \
+    if (jit_regset_tstbit(&_jitc->function->regset, N))                        \
+       stxi_d(O, _SP_REGNO, N##_REGNO)
+    SPILL(_S0, 16);
+    SPILL(_S1, 24);
+    SPILL(_S2, 32);
+    SPILL(_S3, 40);
+    SPILL(_S4, 48);
+    SPILL(_S5, 56);
+    SPILLD(_F2, 64);
+    SPILLD(_F3, 72);
+    SPILLD(_F4, 80);
+    SPILLD(_F5, 88);
+    SPILLD(_F6, 96);
+    SPILLD(_F7, 104);
+    SPILLD(_F8, 112);
+    SPILLD(_F9, 120);
+#  undef SPILLD
+#  undef SPILL
+    movr(_FP_REGNO, _SP_REGNO);
+    /* alloca */
+    if (_jitc->function->stack)
+       subi(_SP_REGNO, _SP_REGNO, _jitc->function->stack);
+    if (_jitc->function->allocar) {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), _jitc->function->self.aoff);
+       stxi_i(_jitc->function->aoffoff, _FP_REGNO, rn(reg));
+       jit_unget_reg(reg);
+    }
+
+    if (_jitc->function->self.call & jit_call_varargs) {
+       for (reg = _jitc->function->self.argi; jit_arg_reg_p(reg); ++reg)
+           stxi(stack_framesize - 48 + reg * 8, _FP_REGNO, rn(_A0 - reg));
+       for (reg = _jitc->function->self.argi; jit_arg_reg_p(reg); ++reg)
+           stxi_d(stack_framesize - 96 + reg * 8, _FP_REGNO, rn(_F16 - reg));
+    }
+}
+
+static void
+_epilog(jit_state_t *_jit, jit_node_t *node)
+{
+    if (_jitc->function->assume_frame)
+       return;
+    movr(_SP_REGNO, _FP_REGNO);
+    ldxi(_RA_REGNO, _SP_REGNO, 0);
+    ldxi(_FP_REGNO, _SP_REGNO, 8);
+#  define LOAD(N, O)                                                   \
+    if (jit_regset_tstbit(&_jitc->function->regset, N))                        \
+       ldxi(N##_REGNO, _SP_REGNO,  O)
+#  define LOADD(N, O)                                                  \
+    if (jit_regset_tstbit(&_jitc->function->regset, N))                        \
+       ldxi_d(N##_REGNO, _SP_REGNO,  O)
+    LOAD(_S0, 16);
+    LOAD(_S1, 24);
+    LOAD(_S2, 32);
+    LOAD(_S3, 40);
+    LOAD(_S4, 48);
+    LOAD(_S5, 56);
+    LOADD(_F2, 64);
+    LOADD(_F3, 72);
+    LOADD(_F4, 80);
+    LOADD(_F5, 88);
+    LOADD(_F6, 96);
+    LOADD(_F7, 104);
+    LOADD(_F8, 112);
+    LOADD(_F9, 120);
+#  undef LOADD
+#  undef LOAD
+    addi(_SP_REGNO, _SP_REGNO, stack_framesize);
+    RET(_R31_REGNO, _RA_REGNO, 1);     /* 1 means procedure return
+                                        * 0 means no procedure return
+                                        * other values are reserved */
+}
+
+static void
+_vastart(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+
+    /* Return jit_va_list_t in the register argument */
+    addi(r0, _FP_REGNO, _jitc->function->vaoff);
+
+    reg = jit_get_reg(jit_class_gpr);
+
+    /* The base field is constant. */
+    addi(rn(reg), _FP_REGNO, stack_framesize - 48);
+    stxi(offsetof(jit_va_list_t, base), r0, rn(reg));
+
+    /* Initialize the offset field */
+    if (_jitc->function->vagp < 6)
+       movi(rn(reg), _jitc->function->vagp * 8);
+    else
+       movi(rn(reg), _jitc->function->self.size - (stack_framesize - 48));
+    stxi(offsetof(jit_va_list_t, offset), r0, rn(reg));
+
+    jit_unget_reg(reg);
+}
+
+static void
+_vaarg(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                rg0, rg1;
+
+    assert(_jitc->function->self.call & jit_call_varargs);
+
+    rg0 = jit_get_reg(jit_class_gpr);
+    rg1 = jit_get_reg(jit_class_gpr);
+
+    /* Load the base in first temporary. */
+    ldxi(rn(rg0), r1, offsetof(jit_va_list_t, base));
+
+    /* Load the offset in the second temporary. */
+    ldxi(rn(rg1), r1, offsetof(jit_va_list_t, offset));
+
+    /* Load the argument */
+    ldxr(r0, rn(rg0), rn(rg1));
+
+    /* No longer needed. */
+    jit_unget_reg(rg0);
+
+    /* Update offset. */
+    addi(rn(rg1), rn(rg1), 8);
+    stxi(offsetof(jit_va_list_t, offset), r1, rn(rg1));
+    jit_unget_reg(rg1);
+}
+
+static void
+_patch_at(jit_state_t *_jit, jit_word_t instr, jit_word_t label)
+{
+    union {
+       jit_int32_t     *i;
+       jit_word_t       w;
+    } u;
+    jit_word_t          d;
+    jit_int16_t                s0, s1, s2, s3;
+    u.w = instr;
+    switch (_u6(u.i[0] >> 26)) {
+       /*   BLT             BLE             BEQ             BGE */
+       case 0x3a:      case 0x3b:      case 0x39:      case 0x3e:
+       /*   BGT             BNE             BLBC            BLBS */
+       case 0x3f:      case 0x3d:      case 0x38:      case 0x3c:
+       /*   BR              BSR */
+       case 0x30:      case 0x34:
+       /*   FBLT            FBLE            FBEQ            FBGE */
+       case 0x32:      case 0x33:      case 0x31:      case 0x36:
+       /*   FBGT            FBNE */
+       case 0x37:      case 0x35:
+           d = ((label - instr) >> 2) - 1;
+           assert(_s21_p(d));
+           u.i[0] &= ~0x1fffff;
+           u.i[0] |= _u21(d);
+           break;
+       /*   LDA */
+       case 0x08:              /* movi_p */
+           s0 = label;
+           s1 = label >> 16;
+           s2 = label >> 32;
+           s3 = label >> 48;
+           if (s0 < 0)
+               ++s1;
+           if (s2 < 0)
+               ++s3;
+           u.i[0] &= ~0xffff;
+           u.i[0] |= _u16(s0);
+           /*                          LDA */
+           assert(_u6(u.i[1] >> 26) == 0x08);
+           u.i[1] &= ~0xffff;
+           u.i[1] |= _u16(s2);
+           /*                          LDAH */
+           assert(_u6(u.i[2] >> 26) == 0x09);
+           u.i[2] &= ~0xffff;
+           u.i[2] |= _u16(s1);
+           /*                          LDAH */
+           assert(_u6(u.i[3] >> 26) == 0x09);
+           u.i[3] &= ~0xffff;
+           u.i[3] |= _u16(s3);
+           /*                          SLL */
+           assert(_u6(u.i[4] >> 26) == 0x12 && _u7(u.i[4] >> 5) == 0x39);
+           /*                          SRL */
+           assert(_u6(u.i[5] >> 26) == 0x12 && _u7(u.i[5] >> 5) == 0x34);
+           /*                          SLL */
+           assert(_u6(u.i[6] >> 26) == 0x12 && _u7(u.i[6] >> 5) == 0x39);
+           /*                          BIS */
+           assert(_u6(u.i[7] >> 26) == 0x11 && _u7(u.i[7] >> 5) == 0x20);
+           break;
+       default:
+           abort();
+    }
+}
+#endif
diff --git a/deps/lightning/lib/jit_alpha-fpu.c b/deps/lightning/lib/jit_alpha-fpu.c
new file mode 100644 (file)
index 0000000..ea5c746
--- /dev/null
@@ -0,0 +1,1588 @@
+/*
+ * Copyright (C) 2014-2019  Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+#if PROTO
+#  define _F2_REGNO                    0x02
+#  define _F3_REGNO                    0x03
+#  define _F4_REGNO                    0x04
+#  define _F5_REGNO                    0x05
+#  define _F6_REGNO                    0x06
+#  define _F7_REGNO                    0x07
+#  define _F8_REGNO                    0x08
+#  define _F9_REGNO                    0x09
+#  define F_P(o,ra,rb,f,rc)            _Opr(_jit,o,ra,rb,f,rc)
+static void _Opr(jit_state_t*,int,int,int,unsigned int,int);
+#  define ADDF(ra,rb,rc)               F_P(0x15,ra,rb,0x080,rc)
+#  define ADDG(ra,rb,rc)               F_P(0x15,ra,rb,0x0a0,rc)
+#  define ADDS(ra,rb,rc)               F_P(0x16,ra,rb,0x080,rc)
+#  define ADDS_C(ra,rb,rc)             F_P(0x16,ra,rb,0x000,rc)
+#  define ADDS_M(ra,rb,rc)             F_P(0x16,ra,rb,0x040,rc)
+#  define ADDS_D(ra,rb,rc)             F_P(0x16,ra,rb,0x0c0,rc)
+#  define ADDS_U(ra,rb,rc)             F_P(0x16,ra,rb,0x180,rc)
+#  define ADDS_UC(ra,rb,rc)            F_P(0x16,ra,rb,0x100,rc)
+#  define ADDS_UM(ra,rb,rc)            F_P(0x16,ra,rb,0x140,rc)
+#  define ADDS_UD(ra,rb,rc)            F_P(0x16,ra,rb,0x1c0,rc)
+#  define ADDS_SU(ra,rb,rc)            F_P(0x16,ra,rb,0x580,rc)
+#  define ADDS_SUC(ra,rb,rc)           F_P(0x16,ra,rb,0x500,rc)
+#  define ADDS_SUM(ra,rb,rc)           F_P(0x16,ra,rb,0x540,rc)
+#  define ADDS_SUD(ra,rb,rc)           F_P(0x16,ra,rb,0x5c0,rc)
+#  define ADDS_SUI(ra,rb,rc)           F_P(0x16,ra,rb,0x780,rc)
+#  define ADDS_SUIC(ra,rb,rc)          F_P(0x16,ra,rb,0x700,rc)
+#  define ADDS_SUIM(ra,rb,rc)          F_P(0x16,ra,rb,0x740,rc)
+#  define ADDS_SUID(ra,rb,rc)          F_P(0x16,ra,rb,0x7c0,rc)
+#  define ADDT(ra,rb,rc)               F_P(0x16,ra,rb,0x0a0,rc)
+#  define ADDT_C(ra,rb,rc)             F_P(0x16,ra,rb,0x020,rc)
+#  define ADDT_M(ra,rb,rc)             F_P(0x16,ra,rb,0x060,rc)
+#  define ADDT_D(ra,rb,rc)             F_P(0x16,ra,rb,0x0e0,rc)
+#  define ADDT_U(ra,rb,rc)             F_P(0x16,ra,rb,0x1a0,rc)
+#  define ADDT_UC(ra,rb,rc)            F_P(0x16,ra,rb,0x120,rc)
+#  define ADDT_UM(ra,rb,rc)            F_P(0x16,ra,rb,0x160,rc)
+#  define ADDT_UD(ra,rb,rc)            F_P(0x16,ra,rb,0x1e0,rc)
+#  define ADDT_SU(ra,rb,rc)            F_P(0x16,ra,rb,0x5a0,rc)
+#  define ADDT_SUC(ra,rb,rc)           F_P(0x16,ra,rb,0x520,rc)
+#  define ADDT_SUM(ra,rb,rc)           F_P(0x16,ra,rb,0x560,rc)
+#  define ADDT_SUD(ra,rb,rc)           F_P(0x16,ra,rb,0x5e0,rc)
+#  define ADDT_SUI(ra,rb,rc)           F_P(0x16,ra,rb,0x7a0,rc)
+#  define ADDT_SUIC(ra,rb,rc)          F_P(0x16,ra,rb,0x720,rc)
+#  define ADDT_SUIM(ra,rb,rc)          F_P(0x16,ra,rb,0x760,rc)
+#  define ADDT_SUID(ra,rb,rc)          F_P(0x16,ra,rb,0x7e0,rc)
+#  define CMPGEQ(ra,rb,rc)             F_P(0x15,ra,rb,0x0a5,rc)
+#  define CMPGLE(ra,rb,rc)             F_P(0x15,ra,rb,0x0a7,rc)
+#  define CMPTEQ(ra,rb,rc)             F_P(0x16,ra,rb,0x0a5,rc)
+#  define CMPTEQ_SU(ra,rb,rc)          F_P(0x16,ra,rb,0x5a5,rc)
+#  define CMPTLE(ra,rb,rc)             F_P(0x16,ra,rb,0x0a7,rc)
+#  define CMPTLE_SU(ra,rb,rc)          F_P(0x16,ra,rb,0x5a7,rc)
+#  define CMPTLT(ra,rb,rc)             F_P(0x16,ra,rb,0x0a6,rc)
+#  define CMPTLT_SU(ra,rb,rc)          F_P(0x16,ra,rb,0x5a6,rc)
+#  define CMPTUN(ra,rb,rc)             F_P(0x16,ra,rb,0x0a4,rc)
+#  define CMPTUN_SU(ra,rb,rc)          F_P(0x16,ra,rb,0x5a4,rc)
+#  define CPYS(ra,rb,rc)               F_P(0x17,ra,rb,0x020,rc)
+#  define CPYSE(ra,rb,rc)              F_P(0x17,ra,rb,0x022,rc)
+#  define CPYSN(ra,rb,rc)              F_P(0x17,ra,rb,0x021,rc)
+#  define DIVF(ra,rb,rc)               F_P(0x15,ra,rb,0x083,rc)
+#  define DIVG(ra,rb,rc)               F_P(0x15,ra,rb,0x0a3,rc)
+#  define DIVS(ra,rb,rc)               F_P(0x16,ra,rb,0x083,rc)
+#  define DIVS_C(ra,rb,rc)             F_P(0x16,ra,rb,0x003,rc)
+#  define DIVS_M(ra,rb,rc)             F_P(0x16,ra,rb,0x043,rc)
+#  define DIVS_D(ra,rb,rc)             F_P(0x16,ra,rb,0x0c3,rc)
+#  define DIVS_U(ra,rb,rc)             F_P(0x16,ra,rb,0x183,rc)
+#  define DIVS_UC(ra,rb,rc)            F_P(0x16,ra,rb,0x103,rc)
+#  define DIVS_UM(ra,rb,rc)            F_P(0x16,ra,rb,0x143,rc)
+#  define DIVS_UD(ra,rb,rc)            F_P(0x16,ra,rb,0x1c3,rc)
+#  define DIVS_SU(ra,rb,rc)            F_P(0x16,ra,rb,0x583,rc)
+#  define DIVS_SUC(ra,rb,rc)           F_P(0x16,ra,rb,0x503,rc)
+#  define DIVS_SUM(ra,rb,rc)           F_P(0x16,ra,rb,0x543,rc)
+#  define DIVS_SUD(ra,rb,rc)           F_P(0x16,ra,rb,0x5c3,rc)
+#  define DIVS_SUI(ra,rb,rc)           F_P(0x16,ra,rb,0x783,rc)
+#  define DIVS_SUIC(ra,rb,rc)          F_P(0x16,ra,rb,0x703,rc)
+#  define DIVS_SUIM(ra,rb,rc)          F_P(0x16,ra,rb,0x743,rc)
+#  define DIVS_SUID(ra,rb,rc)          F_P(0x16,ra,rb,0x7c3,rc)
+#  define DIVT(ra,rb,rc)               F_P(0x16,ra,rb,0x0a3,rc)
+#  define DIVT_C(ra,rb,rc)             F_P(0x16,ra,rb,0x023,rc)
+#  define DIVT_M(ra,rb,rc)             F_P(0x16,ra,rb,0x063,rc)
+#  define DIVT_D(ra,rb,rc)             F_P(0x16,ra,rb,0x0e3,rc)
+#  define DIVT_U(ra,rb,rc)             F_P(0x16,ra,rb,0x1a3,rc)
+#  define DIVT_UC(ra,rb,rc)            F_P(0x16,ra,rb,0x123,rc)
+#  define DIVT_UM(ra,rb,rc)            F_P(0x16,ra,rb,0x163,rc)
+#  define DIVT_UD(ra,rb,rc)            F_P(0x16,ra,rb,0x1e3,rc)
+#  define DIVT_SU(ra,rb,rc)            F_P(0x16,ra,rb,0x5a3,rc)
+#  define DIVT_SUC(ra,rb,rc)           F_P(0x16,ra,rb,0x523,rc)
+#  define DIVT_SUM(ra,rb,rc)           F_P(0x16,ra,rb,0x563,rc)
+#  define DIVT_SUD(ra,rb,rc)           F_P(0x16,ra,rb,0x5e3,rc)
+#  define DIVT_SUI(ra,rb,rc)           F_P(0x16,ra,rb,0x7a3,rc)
+#  define DIVT_SUIC(ra,rb,rc)          F_P(0x16,ra,rb,0x723,rc)
+#  define DIVT_SUIM(ra,rb,rc)          F_P(0x16,ra,rb,0x7a3,rc)
+#  define DIVT_SUID(ra,rb,rc)          F_P(0x16,ra,rb,0x7e3,rc)
+#  define CVTDG(rb,rc)                 F_P(0x15,_R31_REGNO,rb,0x09e,rc)
+#  define CVTGD(rb,rc)                 F_P(0x15,_R31_REGNO,rb,0x0ad,rc)
+#  define CVTGF(rb,rc)                 F_P(0x15,_R31_REGNO,rb,0x0ac,rc)
+#  define CVTGQ(rb,rc)                 F_P(0x15,_R31_REGNO,rb,0x0af,rc)
+#  define CVTLQ(rb,rc)                 F_P(0x17,_R31_REGNO,rb,0x010,rc)
+#  define CVTQF(rb,rc)                 F_P(0x15,_R31_REGNO,rb,0x0bc,rc)
+#  define CVTQG(rb,rc)                 F_P(0x15,_R31_REGNO,rb,0x0be,rc)
+#  define CVTQL(rb,rc)                 F_P(0x17,_R31_REGNO,rb,0x030,rc)
+#  define CVTQS(rb,rc)                 F_P(0x16,_R31_REGNO,rb,0x0bc,rc)
+#  define CVTQS_C(rb,rc)               F_P(0x16,_R31_REGNO,rb,0x03c,rc)
+#  define CVTQS_M(rb,rc)               F_P(0x16,_R31_REGNO,rb,0x07c,rc)
+#  define CVTQS_D(rb,rc)               F_P(0x16,_R31_REGNO,rb,0x0fc,rc)
+#  define CVTQS_SUI(rb,rc)             F_P(0x16,_R31_REGNO,rb,0x7bc,rc)
+#  define CVTQS_SUIC(rb,rc)            F_P(0x16,_R31_REGNO,rb,0x73c,rc)
+#  define CVTQS_SUIM(rb,rc)            F_P(0x16,_R31_REGNO,rb,0x77c,rc)
+#  define CVTQS_SUID(rb,rc)            F_P(0x16,_R31_REGNO,rb,0x7fc,rc)
+#  define CVTQT(rb,rc)                 F_P(0x16,_R31_REGNO,rb,0x0be,rc)
+#  define CVTQT_C(rb,rc)               F_P(0x16,_R31_REGNO,rb,0x03e,rc)
+#  define CVTQT_M(rb,rc)               F_P(0x16,_R31_REGNO,rb,0x0te,rc)
+#  define CVTQT_D(rb,rc)               F_P(0x16,_R31_REGNO,rb,0x0fe,rc)
+#  define CVTQT_SUI(rb,rc)             F_P(0x16,_R31_REGNO,rb,0x7be,rc)
+#  define CVTQT_SUIC(rb,rc)            F_P(0x16,_R31_REGNO,rb,0x73e,rc)
+#  define CVTQT_SUIM(rb,rc)            F_P(0x16,_R31_REGNO,rb,0x77e,rc)
+#  define CVTQT_SUID(rb,rc)            F_P(0x16,_R31_REGNO,rb,0x7fe,rc)
+#  define CVTST(rb,rc)                 F_P(0x16,_R31_REGNO,rb,0x2ac,rc)
+#  define CVTST_S(rb,rc)               F_P(0x16,_R31_REGNO,rb,0x6ac,rc)
+#  define CVTTQ(rb,rc)                 F_P(0x16,_R31_REGNO,rb,0x0af,rc)
+#  define CVTTQ_C(rb,rc)               F_P(0x16,_R31_REGNO,rb,0x02f,rc)
+#  define CVTTQ_V(rb,rc)               F_P(0x16,_R31_REGNO,rb,0x1af,rc)
+#  define CVTTQ_VC(rb,rc)              F_P(0x16,_R31_REGNO,rb,0x12f,rc)
+#  define CVTTQ_SV(rb,rc)              F_P(0x16,_R31_REGNO,rb,0x5af,rc)
+#  define CVTTQ_SVC(rb,rc)             F_P(0x16,_R31_REGNO,rb,0x52f,rc)
+#  define CVTTQ_SVI(rb,rc)             F_P(0x16,_R31_REGNO,rb,0x7af,rc)
+#  define CVTTQ_SVIC(rb,rc)            F_P(0x16,_R31_REGNO,rb,0x72f,rc)
+#  define CVTTQ_D(rb,rc)               F_P(0x16,_R31_REGNO,rb,0x0ef,rc)
+#  define CVTTQ_VD(rb,rc)              F_P(0x16,_R31_REGNO,rb,0x1ef,rc)
+#  define CVTTQ_SVD(rb,rc)             F_P(0x16,_R31_REGNO,rb,0x5ef,rc)
+#  define CVTTQ_SVID(rb,rc)            F_P(0x16,_R31_REGNO,rb,0x7ef,rc)
+#  define CVTTQ_M(rb,rc)               F_P(0x16,_R31_REGNO,rb,0x06f,rc)
+#  define CVTTQ_VM(rb,rc)              F_P(0x16,_R31_REGNO,rb,0x16f,rc)
+#  define CVTTQ_SVM(rb,rc)             F_P(0x16,_R31_REGNO,rb,0x56f,rc)
+#  define CVTTQ_SVIM(rb,rc)            F_P(0x16,_R31_REGNO,rb,0x76f,rc)
+#  define CVTTS(rb,rc)                 F_P(0x16,_R31_REGNO,rb,0x0ac,rc)
+#  define CVTTS_C(rb,rc)               F_P(0x16,_R31_REGNO,rb,0x02c,rc)
+#  define CVTTS_M(rb,rc)               F_P(0x16,_R31_REGNO,rb,0x06c,rc)
+#  define CVTTS_D(rb,rc)               F_P(0x16,_R31_REGNO,rb,0x0ec,rc)
+#  define CVTTS_U(rb,rc)               F_P(0x16,_R31_REGNO,rb,0x1ac,rc)
+#  define CVTTS_UC(rb,rc)              F_P(0x16,_R31_REGNO,rb,0x12c,rc)
+#  define CVTTS_UM(rb,rc)              F_P(0x16,_R31_REGNO,rb,0x16c,rc)
+#  define CVTTS_UD(rb,rc)              F_P(0x16,_R31_REGNO,rb,0x1ec,rc)
+#  define FBEQ(ra,d)                   Bra(0x31,ra,d)
+#  define FBGE(ra,d)                   Bra(0x36,ra,d)
+#  define FBGT(ra,d)                   Bra(0x37,ra,d)
+#  define FBLE(ra,d)                   Bra(0x33,ra,d)
+#  define FBLT(ra,d)                   Bra(0x32,ra,d)
+#  define FBNE(ra,d)                   Bra(0x35,ra,d)
+#  define FCMOVEQ(ra,rb,rc)            F_P(0x17,ra,rb,0x02a,rc)
+#  define FCMOVGE(ra,rb,rc)            F_P(0x17,ra,rb,0x02d,rc)
+#  define FCMOVGT(ra,rb,rc)            F_P(0x17,ra,rb,0x02f,rc)
+#  define FCMOVLE(ra,rb,rc)            F_P(0x17,ra,rb,0x02e,rc)
+#  define FCMOVLT(ra,rb,rc)            F_P(0x17,ra,rb,0x02c,rc)
+#  define FCMOVNE(ra,rb,rc)            F_P(0x17,ra,rb,0x02b,rc)
+#  define FTOIS(ra,rc)                 F_P(0x1c,ra,_R31_REGNO,0x078,rc)
+#  define FTOIT(ra,rc)                 F_P(0x1c,ra,_R31_REGNO,0x070,rc)
+#  define ITOFF(ra,rc)                 F_P(0x14,ra,_R31_REGNO,0x014,rc)
+#  define ITOFS(ra,rc)                 F_P(0x14,ra,_R31_REGNO,0x004,rc)
+#  define ITOFT(ra,rc)                 F_P(0x14,ra,_R31_REGNO,0x024,rc)
+#  define LDF(ra,rb,d)                 Mem(0x20,ra,rb,d)
+#  define LDG(ra,rb,d)                 Mem(0x21,ra,rb,d)
+#  define LDS(ra,rb,d)                 Mem(0x22,ra,rb,d)
+#  define LDT(ra,rb,d)                 Mem(0x23,ra,rb,d)
+#  define MF_FPCR(ra)                  F_P(0x17,ra,ra,0x025,ra)
+#  define MT_FPCR(ra)                  F_P(0x17,ra,ra,0x024,ra)
+#  define MULF(ra,rb,rc)               F_P(0x15,ra,rb,0x082,rc)
+#  define MULG(ra,rb,rc)               F_P(0x15,ra,rb,0x0a2,rc)
+#  define MULS(ra,rb,rc)               F_P(0x16,ra,rb,0x082,rc)
+#  define MULS_C(ra,rb,rc)             F_P(0x16,ra,rb,0x002,rc)
+#  define MULS_M(ra,rb,rc)             F_P(0x16,ra,rb,0x042,rc)
+#  define MULS_D(ra,rb,rc)             F_P(0x16,ra,rb,0x0c2,rc)
+#  define MULS_U(ra,rb,rc)             F_P(0x16,ra,rb,0x182,rc)
+#  define MULS_UC(ra,rb,rc)            F_P(0x16,ra,rb,0x102,rc)
+#  define MULS_UM(ra,rb,rc)            F_P(0x16,ra,rb,0x142,rc)
+#  define MULS_UD(ra,rb,rc)            F_P(0x16,ra,rb,0x1c2,rc)
+#  define MULS_SU(ra,rb,rc)            F_P(0x16,ra,rb,0x582,rc)
+#  define MULS_SUC(ra,rb,rc)           F_P(0x16,ra,rb,0x502,rc)
+#  define MULS_SUM(ra,rb,rc)           F_P(0x16,ra,rb,0x642,rc)
+#  define MULS_SUD(ra,rb,rc)           F_P(0x16,ra,rb,0x5c2,rc)
+#  define MULS_SUI(ra,rb,rc)           F_P(0x16,ra,rb,0x782,rc)
+#  define MULS_SUIC(ra,rb,rc)          F_P(0x16,ra,rb,0x702,rc)
+#  define MULS_SUIM(ra,rb,rc)          F_P(0x16,ra,rb,0x742,rc)
+#  define MULS_SUID(ra,rb,rc)          F_P(0x16,ra,rb,0x7c2,rc)
+#  define MULT(ra,rb,rc)               F_P(0x16,ra,rb,0x0a2,rc)
+#  define MULT_C(ra,rb,rc)             F_P(0x16,ra,rb,0x022,rc)
+#  define MULT_M(ra,rb,rc)             F_P(0x16,ra,rb,0x062,rc)
+#  define MULT_D(ra,rb,rc)             F_P(0x16,ra,rb,0x0e2,rc)
+#  define MULT_U(ra,rb,rc)             F_P(0x16,ra,rb,0x1a2,rc)
+#  define MULT_UC(ra,rb,rc)            F_P(0x16,ra,rb,0x122,rc)
+#  define MULT_UM(ra,rb,rc)            F_P(0x16,ra,rb,0x162,rc)
+#  define MULT_UD(ra,rb,rc)            F_P(0x16,ra,rb,0x1e2,rc)
+#  define MULT_SU(ra,rb,rc)            F_P(0x16,ra,rb,0x5a2,rc)
+#  define MULT_SUC(ra,rb,rc)           F_P(0x16,ra,rb,0x522,rc)
+#  define MULT_SUM(ra,rb,rc)           F_P(0x16,ra,rb,0x562,rc)
+#  define MULT_SUD(ra,rb,rc)           F_P(0x16,ra,rb,0x5e2,rc)
+#  define MULT_SUI(ra,rb,rc)           F_P(0x16,ra,rb,0x7a2,rc)
+#  define MULT_SUIC(ra,rb,rc)          F_P(0x16,ra,rb,0x722,rc)
+#  define MULT_SUIM(ra,rb,rc)          F_P(0x16,ra,rb,0x762,rc)
+#  define MULT_SUID(ra,rb,rc)          F_P(0x16,ra,rb,0x7e2,rc)
+#  define SQRTF(rb,rc)                 F_P(0x14,_R31_REGNO,rb,0x08a,rc)
+#  define SQRTG(rb,rc)                 F_P(0x14,_R31_REGNO,rb,0x0aa,rc)
+#  define SQRTS(rb,rc)                 F_P(0x14,_R31_REGNO,rb,0x08b,rc)
+#  define SQRTS_C(rb,rc)               F_P(0x14,_R31_REGNO,rb,0x00b,rc)
+#  define SQRTS_M(rb,rc)               F_P(0x14,_R31_REGNO,rb,0x04b,rc)
+#  define SQRTS_D(rb,rc)               F_P(0x14,_R31_REGNO,rb,0x0cb,rc)
+#  define SQRTS_U(rb,rc)               F_P(0x14,_R31_REGNO,rb,0x18b,rc)
+#  define SQRTS_UC(rb,rc)              F_P(0x14,_R31_REGNO,rb,0x10b,rc)
+#  define SQRTS_UM(rb,rc)              F_P(0x14,_R31_REGNO,rb,0x14b,rc)
+#  define SQRTS_UD(rb,rc)              F_P(0x14,_R31_REGNO,rb,0x1cb,rc)
+#  define SQRTS_SU(rb,rc)              F_P(0x14,_R31_REGNO,rb,0x58b,rc)
+#  define SQRTS_SUC(rb,rc)             F_P(0x14,_R31_REGNO,rb,0x50b,rc)
+#  define SQRTS_SUM(rb,rc)             F_P(0x14,_R31_REGNO,rb,0x54b,rc)
+#  define SQRTS_SUD(rb,rc)             F_P(0x14,_R31_REGNO,rb,0x5cb,rc)
+#  define SQRTS_SUI(rb,rc)             F_P(0x14,_R31_REGNO,rb,0x78b,rc)
+#  define SQRTS_SUIC(rb,rc)            F_P(0x14,_R31_REGNO,rb,0x70b,rc)
+#  define SQRTS_SUIM(rb,rc)            F_P(0x14,_R31_REGNO,rb,0x74b,rc)
+#  define SQRTS_SUID(rb,rc)            F_P(0x14,_R31_REGNO,rb,0x7cb,rc)
+#  define SQRTT(rb,rc)                 F_P(0x14,_R31_REGNO,rb,0x0ab,rc)
+#  define SQRTT_C(rb,rc)               F_P(0x14,_R31_REGNO,rb,0x02b,rc)
+#  define SQRTT_M(rb,rc)               F_P(0x14,_R31_REGNO,rb,0x06b,rc)
+#  define SQRTT_D(rb,rc)               F_P(0x14,_R31_REGNO,rb,0x0eb,rc)
+#  define SQRTT_U(rb,rc)               F_P(0x14,_R31_REGNO,rb,0x1ab,rc)
+#  define SQRTT_UC(rb,rc)              F_P(0x14,_R31_REGNO,rb,0x12b,rc)
+#  define SQRTT_UM(rb,rc)              F_P(0x14,_R31_REGNO,rb,0x16b,rc)
+#  define SQRTT_UD(rb,rc)              F_P(0x14,_R31_REGNO,rb,0x1eb,rc)
+#  define SQRTT_SU(rb,rc)              F_P(0x14,_R31_REGNO,rb,0x5ab,rc)
+#  define SQRTT_SUC(rb,rc)             F_P(0x14,_R31_REGNO,rb,0x52b,rc)
+#  define SQRTT_SUM(rb,rc)             F_P(0x14,_R31_REGNO,rb,0x56b,rc)
+#  define SQRTT_SUD(rb,rc)             F_P(0x14,_R31_REGNO,rb,0x5eb,rc)
+#  define SQRTT_SUI(rb,rc)             F_P(0x14,_R31_REGNO,rb,0x7ab,rc)
+#  define SQRTT_SUIC(rb,rc)            F_P(0x14,_R31_REGNO,rb,0x72b,rc)
+#  define SQRTT_SUIM(rb,rc)            F_P(0x14,_R31_REGNO,rb,0x76b,rc)
+#  define SQRTT_SUID(rb,rc)            F_P(0x14,_R31_REGNO,rb,0x7eb,rc)
+#  define STF(ra,rb,d)                 Mem(0x24,ra,rb,d)
+#  define STG(ra,rb,d)                 Mem(0x25,ra,rb,d)
+#  define STS(ra,rb,d)                 Mem(0x26,ra,rb,d)
+#  define STT(ra,rb,d)                 Mem(0x27,ra,rb,d)
+#  define SUBF(ra,rb,rc)               F_P(0x15,ra,rb,0x081,rc)
+#  define SUBG(ra,rb,rc)               F_P(0x15,ra,rb,0x0a1,rc)
+#  define SUBS(ra,rb,rc)               F_P(0x16,ra,rb,0x081,rc)
+#  define SUBS_C(ra,rb,rc)             F_P(0x16,ra,rb,0x001,rc)
+#  define SUBS_M(ra,rb,rc)             F_P(0x16,ra,rb,0x041,rc)
+#  define SUBS_D(ra,rb,rc)             F_P(0x16,ra,rb,0x0c1,rc)
+#  define SUBS_U(ra,rb,rc)             F_P(0x16,ra,rb,0x181,rc)
+#  define SUBS_UC(ra,rb,rc)            F_P(0x16,ra,rb,0x101,rc)
+#  define SUBS_UM(ra,rb,rc)            F_P(0x16,ra,rb,0x141,rc)
+#  define SUBS_UD(ra,rb,rc)            F_P(0x16,ra,rb,0x1c1,rc)
+#  define SUBS_SU(ra,rb,rc)            F_P(0x16,ra,rb,0x581,rc)
+#  define SUBS_SUC(ra,rb,rc)           F_P(0x16,ra,rb,0x501,rc)
+#  define SUBS_SUM(ra,rb,rc)           F_P(0x16,ra,rb,0x541,rc)
+#  define SUBS_SUD(ra,rb,rc)           F_P(0x16,ra,rb,0x5c1,rc)
+#  define SUBS_SUI(ra,rb,rc)           F_P(0x16,ra,rb,0x781,rc)
+#  define SUBS_SUIC(ra,rb,rc)          F_P(0x16,ra,rb,0x701,rc)
+#  define SUBS_SUIM(ra,rb,rc)          F_P(0x16,ra,rb,0x741,rc)
+#  define SUBS_SUID(ra,rb,rc)          F_P(0x16,ra,rb,0x7c1,rc)
+#  define SUBT(ra,rb,rc)               F_P(0x16,ra,rb,0x0a1,rc)
+#  define SUBT_C(ra,rb,rc)             F_P(0x16,ra,rb,0x021,rc)
+#  define SUBT_M(ra,rb,rc)             F_P(0x16,ra,rb,0x061,rc)
+#  define SUBT_D(ra,rb,rc)             F_P(0x16,ra,rb,0x0e1,rc)
+#  define SUBT_U(ra,rb,rc)             F_P(0x16,ra,rb,0x1a1,rc)
+#  define SUBT_UC(ra,rb,rc)            F_P(0x16,ra,rb,0x121,rc)
+#  define SUBT_UM(ra,rb,rc)            F_P(0x16,ra,rb,0x161,rc)
+#  define SUBT_UD(ra,rb,rc)            F_P(0x16,ra,rb,0x1e1,rc)
+#  define SUBT_SU(ra,rb,rc)            F_P(0x16,ra,rb,0x5a1,rc)
+#  define SUBT_SUC(ra,rb,rc)           F_P(0x16,ra,rb,0x521,rc)
+#  define SUBT_SUM(ra,rb,rc)           F_P(0x16,ra,rb,0x561,rc)
+#  define SUBT_SUD(ra,rb,rc)           F_P(0x16,ra,rb,0x5e1,rc)
+#  define SUBT_SUI(ra,rb,rc)           F_P(0x16,ra,rb,0x7a1,rc)
+#  define SUBT_SUIC(ra,rb,rc)          F_P(0x16,ra,rb,0x721,rc)
+#  define SUBT_SUIM(ra,rb,rc)          F_P(0x16,ra,rb,0x761,rc)
+#  define SUBT_SUID(ra,rb,rc)          F_P(0x16,ra,rb,0x7e1,rc)
+#  define FABS(ra,rc)                  CPYS(_R31_REGNO,ra,rc)
+#  define FMOV(ra,rc)                  CPYS(ra,ra,rc)
+#  define NEGF(ra,rc)                  SUBF(_R31_REGNO,ra,rc)
+#  define NEGG(ra,rc)                  SUBG(_R31_REGNO,ra,rc)
+#  define NEGS(ra,rc)                  SUBS(_R31_REGNO,ra,rc)
+#  define NEGT(ra,rc)                  SUBT(_R31_REGNO,ra,rc)
+#  define FNEGF(ra,rc)                 CPYSN(ra,ra,rc)
+#  define FNEGG(ra,rc)                 CPYSN(ra,ra,rc)
+#  define FNEGS(ra,rc)                 CPYSN(ra,ra,rc)
+#  define FNEGT(ra,rc)                 CPYSN(ra,ra,rc)
+#  define movr_f(r0,r1)                        movr_d(r0,r1)
+#  define movr_d(r0,r1)                        _movr_d(_jit,r0,r1)
+static void _movr_d(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define movi_f(r0,i0)                        _movi_f(_jit,r0,i0)
+static void _movi_f(jit_state_t*,jit_int32_t,jit_float32_t*);
+#  define movi_d(r0,i0)                        _movi_d(_jit,r0,i0)
+static void _movi_d(jit_state_t*,jit_int32_t,jit_float64_t*);
+#  define absr_f(r0,r1)                        FABS(r1,r0)
+#  define absr_d(r0,r1)                        FABS(r1,r0)
+#  define negr_f(r0,r1)                        FNEGS(r1,r0)
+#  define negr_d(r0,r1)                        FNEGT(r1,r0)
+#  define sqrtr_f(r0,r1)               _sqrtr_f(_jit,r0,r1)
+static void _sqrtr_f(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define sqrtr_d(r0,r1)               _sqrtr_d(_jit,r0,r1)
+static void _sqrtr_d(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define extr_f_d(r0,r1)              movr_d(r0,r1)
+#  define extr_d_f(r0,r1)              movr_f(r0,r1)
+#  define truncr_f_i(r0,r1)            truncr_d_i(r0,r1)
+#  define truncr_f_l(r0,r1)            truncr_d_l(r0,r1)
+#  define truncr_d_i(r0,r1)            truncr_d_l(r0,r1)
+#  define truncr_d_l(r0,r1)            _truncr_d_l(_jit,r0,r1)
+static void _truncr_d_l(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define extr_f(r0,r1)                        _extr_f(_jit,r0,r1)
+static void _extr_f(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define extr_d(r0,r1)                        _extr_d(_jit,r0,r1)
+static void _extr_d(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define addr_f(r0,r1,r2)             _addr_f(_jit,r0,r1,r2)
+static void _addr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define addi_f(r0,r1,i0)             _addi_f(_jit,r0,r1,i0)
+static void _addi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define addr_d(r0,r1,r2)             _addr_d(_jit,r0,r1,r2)
+static void _addr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define addi_d(r0,r1,i0)             _addi_d(_jit,r0,r1,i0)
+static void _addi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define subr_f(r0,r1,r2)             _subr_f(_jit,r0,r1,r2)
+static void _subr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define subi_f(r0,r1,i0)             _subi_f(_jit,r0,r1,i0)
+static void _subi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define subr_d(r0,r1,r2)             _subr_d(_jit,r0,r1,r2)
+static void _subr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define subi_d(r0,r1,i0)             _subi_d(_jit,r0,r1,i0)
+static void _subi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define rsbr_f(r0, r1, r2)           subr_f(r0, r2, r1)
+#  define rsbi_f(r0, r1, i0)           _rsbi_f(_jit, r0, r1, i0)
+static void _rsbi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define rsbr_d(r0, r1, r2)           subr_d(r0, r2, r1)
+#  define rsbi_d(r0, r1, i0)           _rsbi_d(_jit, r0, r1, i0)
+static void _rsbi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define mulr_f(r0,r1,r2)             _mulr_f(_jit,r0,r1,r2)
+static void _mulr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define muli_f(r0,r1,i0)             _muli_f(_jit,r0,r1,i0)
+static void _muli_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define mulr_d(r0,r1,r2)             _mulr_d(_jit,r0,r1,r2)
+static void _mulr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define muli_d(r0,r1,i0)             _muli_d(_jit,r0,r1,i0)
+static void _muli_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define divr_f(r0,r1,r2)             _divr_f(_jit,r0,r1,r2)
+static void _divr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define divi_f(r0,r1,i0)             _divi_f(_jit,r0,r1,i0)
+static void _divi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define divr_d(r0,r1,r2)             _divr_d(_jit,r0,r1,r2)
+static void _divr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define divi_d(r0,r1,i0)             _divi_d(_jit,r0,r1,i0)
+static void _divi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define ltr_f(r0,r1,r2)              ltr_d(r0,r1,r2)
+#  define ltr_d(r0,r1,r2)              _ltr_d(_jit,r0,r1,r2)
+static void _ltr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define lti_f(r0,r1,i0)              _lti_f(_jit,r0,r1,i0)
+static void _lti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define lti_d(r0,r1,i0)              _lti_d(_jit,r0,r1,i0)
+static void _lti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define ler_f(r0,r1,r2)              ler_d(r0,r1,r2)
+#  define ler_d(r0,r1,r2)              _ler_d(_jit,r0,r1,r2)
+static void _ler_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define lei_f(r0,r1,i0)              _lei_f(_jit,r0,r1,i0)
+static void _lei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define lei_d(r0,r1,i0)              _lei_d(_jit,r0,r1,i0)
+static void _lei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define eqr_f(r0,r1,r2)              eqr_d(r0,r1,r2)
+#  define eqr_d(r0,r1,r2)              _eqr_d(_jit,r0,r1,r2)
+static void _eqr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define eqi_f(r0,r1,i0)              _eqi_f(_jit,r0,r1,i0)
+static void _eqi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define eqi_d(r0,r1,i0)              _eqi_d(_jit,r0,r1,i0)
+static void _eqi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define ger_f(r0,r1,r2)              ger_d(r0,r1,r2)
+#  define ger_d(r0,r1,r2)              _ger_d(_jit,r0,r1,r2)
+static void _ger_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define gei_f(r0,r1,i0)              _gei_f(_jit,r0,r1,i0)
+static void _gei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define gei_d(r0,r1,i0)              _gei_d(_jit,r0,r1,i0)
+static void _gei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define gtr_f(r0,r1,r2)              gtr_d(r0,r1,r2)
+#  define gtr_d(r0,r1,r2)              _gtr_d(_jit,r0,r1,r2)
+static void _gtr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define gti_f(r0,r1,i0)              _gti_f(_jit,r0,r1,i0)
+static void _gti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define gti_d(r0,r1,i0)              _gti_d(_jit,r0,r1,i0)
+static void _gti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define ner_f(r0,r1,r2)              ner_d(r0,r1,r2)
+#  define ner_d(r0,r1,r2)              _ner_d(_jit,r0,r1,r2)
+static void _ner_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define nei_f(r0,r1,i0)              _nei_f(_jit,r0,r1,i0)
+static void _nei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define nei_d(r0,r1,i0)              _nei_d(_jit,r0,r1,i0)
+static void _nei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define unltr_f(r0,r1,r2)            unltr_d(r0,r1,r2)
+#  define unltr_d(r0,r1,r2)            _unltr_d(_jit,r0,r1,r2)
+static void _unltr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define unlti_f(r0,r1,i0)            _unlti_f(_jit,r0,r1,i0)
+static void _unlti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define unlti_d(r0,r1,i0)            _unlti_d(_jit,r0,r1,i0)
+static void _unlti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define unler_f(r0,r1,r2)            unler_d(r0,r1,r2)
+#  define unler_d(r0,r1,r2)            _unler_d(_jit,r0,r1,r2)
+static void _unler_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define unlei_f(r0,r1,i0)            _unlei_f(_jit,r0,r1,i0)
+static void _unlei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define unlei_d(r0,r1,i0)            _unlei_d(_jit,r0,r1,i0)
+static void _unlei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define uneqr_f(r0,r1,r2)            uneqr_d(r0,r1,r2)
+#  define uneqr_d(r0,r1,r2)            _uneqr_d(_jit,r0,r1,r2)
+static void _uneqr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define uneqi_f(r0,r1,i0)            _uneqi_f(_jit,r0,r1,i0)
+static void _uneqi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define uneqi_d(r0,r1,i0)            _uneqi_d(_jit,r0,r1,i0)
+static void _uneqi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define unger_f(r0,r1,r2)            unger_d(r0,r1,r2)
+#  define unger_d(r0,r1,r2)            _unger_d(_jit,r0,r1,r2)
+static void _unger_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ungei_f(r0,r1,i0)            _ungei_f(_jit,r0,r1,i0)
+static void _ungei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define ungei_d(r0,r1,i0)            _ungei_d(_jit,r0,r1,i0)
+static void _ungei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define ungtr_f(r0,r1,r2)            ungtr_d(r0,r1,r2)
+#  define ungtr_d(r0,r1,r2)            _ungtr_d(_jit,r0,r1,r2)
+static void _ungtr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ungti_f(r0,r1,i0)            _ungti_f(_jit,r0,r1,i0)
+static void _ungti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define ungti_d(r0,r1,i0)            _ungti_d(_jit,r0,r1,i0)
+static void _ungti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define ltgtr_f(r0,r1,r2)            ltgtr_d(r0,r1,r2)
+#  define ltgtr_d(r0,r1,r2)            _ltgtr_d(_jit,r0,r1,r2)
+static void _ltgtr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ltgti_f(r0,r1,i0)            _ltgti_f(_jit,r0,r1,i0)
+static void _ltgti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define ltgti_d(r0,r1,i0)            _ltgti_d(_jit,r0,r1,i0)
+static void _ltgti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define ordr_f(r0,r1,r2)             ordr_d(r0,r1,r2)
+#  define ordr_d(r0,r1,r2)             _ordr_d(_jit,r0,r1,r2)
+static void _ordr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ordi_f(r0,r1,i0)             _ordi_f(_jit,r0,r1,i0)
+static void _ordi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define ordi_d(r0,r1,i0)             _ordi_d(_jit,r0,r1,i0)
+static void _ordi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define unordr_f(r0,r1,r2)           unordr_d(r0,r1,r2)
+#  define unordr_d(r0,r1,r2)           _unordr_d(_jit,r0,r1,r2)
+static void _unordr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define unordi_f(r0,r1,i0)           _unordi_f(_jit,r0,r1,i0)
+static void _unordi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define unordi_d(r0,r1,i0)           _unordi_d(_jit,r0,r1,i0)
+static void _unordi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define bltr_f(i0,r0,r1)             bltr_d(i0,r0,r1)
+#  define bltr_d(i0,r0,r1)             _bltr_d(_jit,i0,r0,r1)
+static jit_word_t _bltr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define blti_f(i0,r0,i1)             _blti_f(_jit,i0,r0,i1)
+static jit_word_t _blti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t*);
+#  define blti_d(i0,r0,i1)             _blti_d(_jit,i0,r0,i1)
+static jit_word_t _blti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t*);
+#  define bler_f(i0,r0,r1)             bler_d(i0,r0,r1)
+#  define bler_d(i0,r0,r1)             _bler_d(_jit,i0,r0,r1)
+static jit_word_t _bler_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define blei_f(i0,r0,i1)             _blei_f(_jit,i0,r0,i1)
+static jit_word_t _blei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t*);
+#  define blei_d(i0,r0,i1)             _blei_d(_jit,i0,r0,i1)
+static jit_word_t _blei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t*);
+#  define beqr_f(i0,r0,r1)             beqr_d(i0,r0,r1)
+#  define beqr_d(i0,r0,r1)             _beqr_d(_jit,i0,r0,r1)
+static jit_word_t _beqr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define beqi_f(i0,r0,i1)             _beqi_f(_jit,i0,r0,i1)
+static jit_word_t _beqi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t*);
+#  define beqi_d(i0,r0,i1)             _beqi_d(_jit,i0,r0,i1)
+static jit_word_t _beqi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t*);
+#  define bger_f(i0,r0,r1)             bger_d(i0,r0,r1)
+#  define bger_d(i0,r0,r1)             _bger_d(_jit,i0,r0,r1)
+static jit_word_t _bger_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bgei_f(i0,r0,i1)             _bgei_f(_jit,i0,r0,i1)
+static jit_word_t _bgei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t*);
+#  define bgei_d(i0,r0,i1)             _bgei_d(_jit,i0,r0,i1)
+static jit_word_t _bgei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t*);
+#  define bgtr_f(i0,r0,r1)             bgtr_d(i0,r0,r1)
+#  define bgtr_d(i0,r0,r1)             _bgtr_d(_jit,i0,r0,r1)
+static jit_word_t _bgtr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bgti_f(i0,r0,i1)             _bgti_f(_jit,i0,r0,i1)
+static jit_word_t _bgti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t*);
+#  define bgti_d(i0,r0,i1)             _bgti_d(_jit,i0,r0,i1)
+static jit_word_t _bgti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t*);
+#  define bner_f(i0,r0,r1)             bner_d(i0,r0,r1)
+#  define bner_d(i0,r0,r1)             _bner_d(_jit,i0,r0,r1)
+static jit_word_t _bner_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bnei_f(i0,r0,i1)             _bnei_f(_jit,i0,r0,i1)
+static jit_word_t _bnei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t*);
+#  define bnei_d(i0,r0,i1)             _bnei_d(_jit,i0,r0,i1)
+static jit_word_t _bnei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t*);
+#  define bunltr_f(i0,r0,r1)           bunltr_d(i0,r0,r1)
+#  define bunltr_d(i0,r0,r1)           _bunltr_d(_jit,i0,r0,r1)
+static jit_word_t _bunltr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bunlti_f(i0,r0,i1)           _bunlti_f(_jit,i0,r0,i1)
+static jit_word_t _bunlti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t*);
+#  define bunlti_d(i0,r0,i1)           _bunlti_d(_jit,i0,r0,i1)
+static jit_word_t _bunlti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t*);
+#  define bunler_f(i0,r0,r1)           bunler_d(i0,r0,r1)
+#  define bunler_d(i0,r0,r1)           _bunler_d(_jit,i0,r0,r1)
+static jit_word_t _bunler_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bunlei_f(i0,r0,i1)           _bunlei_f(_jit,i0,r0,i1)
+static jit_word_t _bunlei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t*);
+#  define bunlei_d(i0,r0,i1)           _bunlei_d(_jit,i0,r0,i1)
+static jit_word_t _bunlei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t*);
+#  define buneqr_f(i0,r0,r1)           buneqr_d(i0,r0,r1)
+#  define buneqr_d(i0,r0,r1)           _buneqr_d(_jit,i0,r0,r1)
+static jit_word_t _buneqr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define buneqi_f(i0,r0,i1)           _buneqi_f(_jit,i0,r0,i1)
+static jit_word_t _buneqi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t*);
+#  define buneqi_d(i0,r0,i1)           _buneqi_d(_jit,i0,r0,i1)
+static jit_word_t _buneqi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t*);
+#  define bunger_f(i0,r0,r1)           bunger_d(i0,r0,r1)
+#  define bunger_d(i0,r0,r1)           _bunger_d(_jit,i0,r0,r1)
+static jit_word_t _bunger_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bungei_f(i0,r0,i1)           _bungei_f(_jit,i0,r0,i1)
+static jit_word_t _bungei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t*);
+#  define bungei_d(i0,r0,i1)           _bungei_d(_jit,i0,r0,i1)
+static jit_word_t _bungei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t*);
+#  define bungtr_f(i0,r0,r1)           bungtr_d(i0,r0,r1)
+#  define bungtr_d(i0,r0,r1)           _bungtr_d(_jit,i0,r0,r1)
+static jit_word_t _bungtr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bungti_f(i0,r0,i1)           _bungti_f(_jit,i0,r0,i1)
+static jit_word_t _bungti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t*);
+#  define bungti_d(i0,r0,i1)           _bungti_d(_jit,i0,r0,i1)
+static jit_word_t _bungti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t*);
+#  define bltgtr_f(i0,r0,r1)           bltgtr_d(i0,r0,r1)
+#  define bltgtr_d(i0,r0,r1)           _bltgtr_d(_jit,i0,r0,r1)
+static jit_word_t _bltgtr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bltgti_f(i0,r0,i1)           _bltgti_f(_jit,i0,r0,i1)
+static jit_word_t _bltgti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t*);
+#  define bltgti_d(i0,r0,i1)           _bltgti_d(_jit,i0,r0,i1)
+static jit_word_t _bltgti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t*);
+#  define bordr_f(i0,r0,r1)            bordr_d(i0,r0,r1)
+#  define bordr_d(i0,r0,r1)            _bordr_d(_jit,i0,r0,r1)
+static jit_word_t _bordr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bordi_f(i0,r0,i1)            _bordi_f(_jit,i0,r0,i1)
+static jit_word_t _bordi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t*);
+#  define bordi_d(i0,r0,i1)            _bordi_d(_jit,i0,r0,i1)
+static jit_word_t _bordi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t*);
+#  define bunordr_f(i0,r0,r1)          bunordr_d(i0,r0,r1)
+#  define bunordr_d(i0,r0,r1)          _bunordr_d(_jit,i0,r0,r1)
+static jit_word_t _bunordr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bunordi_f(i0,r0,i1)          _bunordi_f(_jit,i0,r0,i1)
+static jit_word_t _bunordi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t*);
+#  define bunordi_d(i0,r0,i1)          _bunordi_d(_jit,i0,r0,i1)
+static jit_word_t _bunordi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t*);
+#  define ldr_f(r0,r1)                 LDS(r0,r1,0)
+#  define ldi_f(r0,i0)                 _ldi_f(_jit,r0,i0)
+static void _ldi_f(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldr_d(r0,r1)                 LDT(r0,r1,0)
+#  define ldi_d(r0,i0)                 _ldi_d(_jit,r0,i0)
+static void _ldi_d(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldxr_f(r0,r1,r2)             _ldxr_f(_jit,r0,r1,r2)
+static void _ldxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxi_f(r0,r1,i0)             _ldxi_f(_jit,r0,r1,i0)
+static void _ldxi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldxr_d(r0,r1,r2)             _ldxr_d(_jit,r0,r1,r2)
+static void _ldxr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxi_d(r0,r1,i0)             _ldxi_d(_jit,r0,r1,i0)
+static void _ldxi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define str_f(r0,r1)                 STS(r1,r0,0)
+#  define sti_f(i0,r0)                 _sti_f(_jit,i0,r0)
+static void _sti_f(jit_state_t*,jit_word_t,jit_int32_t);
+#  define str_d(r0,r1)                 STT(r1,r0,0)
+#  define sti_d(i0,r0)                 _sti_d(_jit,i0,r0)
+static void _sti_d(jit_state_t*,jit_word_t,jit_int32_t);
+#  define stxr_f(r0,r1,r2)             _stxr_f(_jit,r0,r1,r2)
+static void _stxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define stxi_f(i0,r0,r1)             _stxi_f(_jit,i0,r0,r1)
+static void _stxi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define stxr_d(r0,r1,r2)             _stxr_d(_jit,r0,r1,r2)
+static void _stxr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define stxi_d(i0,r0,r1)             _stxi_d(_jit,i0,r0,r1)
+static void _stxi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define vaarg_d(r0, r1)              _vaarg_d(_jit, r0, r1)
+static void _vaarg_d(jit_state_t*, jit_int32_t, jit_int32_t);
+#endif
+
+#if CODE
+#  define fpr_opi(name, type, size)                                    \
+static void                                                            \
+_##name##i_##type(jit_state_t *_jit,                                   \
+                 jit_int32_t r0, jit_int32_t r1,                       \
+                 jit_float##size##_t *i0)                              \
+{                                                                      \
+    jit_int32_t                reg = jit_get_reg(jit_class_fpr);               \
+    movi_##type(rn(reg), i0);                                          \
+    name##r_##type(r0, r1, rn(reg));                                   \
+    jit_unget_reg(reg);                                                        \
+}
+#  define fpr_bopi(name, type, size)                                   \
+static jit_word_t                                                      \
+_b##name##i_##type(jit_state_t *_jit,                                  \
+                 jit_word_t i0, jit_int32_t r0,                        \
+                 jit_float##size##_t *i1)                              \
+{                                                                      \
+    jit_word_t         word;                                           \
+    jit_int32_t                reg = jit_get_reg(jit_class_fpr|jit_class_nospill);\
+    movi_##type(rn(reg), i1);                                          \
+    word = b##name##r_##type(i0, r0, rn(reg));                         \
+    jit_unget_reg(reg);                                                        \
+    return (word);                                                     \
+}
+#  define fopi(name)                   fpr_opi(name, f, 32)
+#  define fbopi(name)                  fpr_bopi(name, f, 32)
+#  define dopi(name)                   fpr_opi(name, d, 64)
+#  define dbopi(name)                  fpr_bopi(name, d, 64)
+
+static void
+_movr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (r0 != r1)
+       FMOV(r1, r0);
+}
+
+static void
+_movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t *i0)
+{
+    union {
+       jit_int32_t      i;
+       jit_float32_t    f;
+    } data;
+    jit_int32_t                 reg;
+
+    if (_jitc->no_data) {
+       data.f = *i0;
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), data.i & 0xffffffff);
+       stxi_i(-8, _FP_REGNO, rn(reg));
+       jit_unget_reg(reg);
+       ldxi_f(r0, _FP_REGNO, -8);
+    }
+    else
+       ldi_f(r0, (jit_word_t)i0);
+}
+
+static void
+_movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t *i0)
+{
+    union {
+       jit_word_t       w;
+       jit_float64_t    d;
+    } data;
+    jit_int32_t                 reg;
+
+    if (_jitc->no_data) {
+       data.d = *i0;
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), data.w);
+       stxi_l(-8, _FP_REGNO, rn(reg));
+       jit_unget_reg(reg);
+       ldxi_d(r0, _FP_REGNO, -8);
+    }
+    else
+       ldi_d(r0, (jit_word_t)i0);
+}
+
+static void
+_truncr_d_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_fpr);
+    CVTTQ_SVC(r1, rn(reg));
+    TRAPB();
+    stxi_d(-8, _FP_REGNO, rn(reg));
+    ldxi(r0, _FP_REGNO, -8);
+    jit_unget_reg(reg);
+}
+
+static void
+_sqrtr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    SQRTS_SU(r1, r0);
+    TRAPB();
+}
+
+static void
+_sqrtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    SQRTT_SU(r1, r0);
+    TRAPB();
+}
+
+static void
+_extr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    stxi_l(-8, _FP_REGNO, r1);
+    ldxi_d(r0, _FP_REGNO, -8);
+    CVTQS(r0, r0);
+}
+
+static void
+_extr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    stxi_l(-8, _FP_REGNO, r1);
+    ldxi_d(r0, _FP_REGNO, -8);
+    CVTQT(r0, r0);
+}
+
+static void
+_addr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    ADDS_SU(r1, r2, r0);
+    TRAPB();
+}
+fopi(add)
+
+static void
+_addr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    ADDT_SU(r1, r2, r0);
+    TRAPB();
+}
+dopi(add)
+
+static void
+_subr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    SUBS_SU(r1, r2, r0);
+    TRAPB();
+}
+fopi(sub)
+fopi(rsb)
+
+static void
+_subr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    SUBT_SU(r1, r2, r0);
+    TRAPB();
+}
+dopi(sub)
+dopi(rsb)
+
+static void
+_mulr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    MULS_SU(r1, r2, r0);
+    TRAPB();
+}
+fopi(mul)
+
+static void
+_mulr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    MULT_SU(r1, r2, r0);
+    TRAPB();
+}
+dopi(mul)
+
+static void
+_divr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    DIVS_SU(r1, r2, r0);
+    TRAPB();
+}
+fopi(div)
+
+static void
+_divr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    DIVT_SU(r1, r2, r0);
+    TRAPB();
+}
+dopi(div)
+
+static void
+_ltr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    jit_word_t         v, w;
+    reg = jit_get_reg(jit_class_fpr|jit_class_nospill);
+    movi(r0, 0);
+    CMPTUN_SU(r1, r2, rn(reg));
+    TRAPB();
+    v = _jit->pc.w;
+    FBNE(rn(reg), 4);
+    CMPTLT_SU(r1, r2, rn(reg));
+    TRAPB();
+    w = _jit->pc.w;
+    FBEQ(rn(reg), 1);
+    movi(r0, 1);
+    patch_at(v, _jit->pc.w);
+    patch_at(w, _jit->pc.w);
+    jit_unget_reg(reg);
+}
+fopi(lt);
+dopi(lt);
+
+static void
+_ler_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    jit_word_t         v, w;
+    reg = jit_get_reg(jit_class_fpr|jit_class_nospill);
+    movi(r0, 0);
+    CMPTUN_SU(r1, r2, rn(reg));
+    TRAPB();
+    v = _jit->pc.w;
+    FBNE(rn(reg), 4);
+    CMPTLE_SU(r1, r2, rn(reg));
+    TRAPB();
+    w = _jit->pc.w;
+    FBEQ(rn(reg), 1);
+    movi(r0, 1);
+    patch_at(v, _jit->pc.w);
+    patch_at(w, _jit->pc.w);
+    jit_unget_reg(reg);
+}
+fopi(le);
+dopi(le);
+
+static void
+_eqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    jit_word_t         v, w;
+    reg = jit_get_reg(jit_class_fpr|jit_class_nospill);
+    movi(r0, 0);
+    CMPTUN_SU(r1, r2, rn(reg));
+    TRAPB();
+    v = _jit->pc.w;
+    FBNE(rn(reg), 4);
+    CMPTEQ_SU(r1, r2, rn(reg));
+    TRAPB();
+    w = _jit->pc.w;
+    FBEQ(rn(reg), 1);
+    movi(r0, 1);
+    patch_at(v, _jit->pc.w);
+    patch_at(w, _jit->pc.w);
+    jit_unget_reg(reg);
+}
+fopi(eq);
+dopi(eq);
+
+static void
+_ger_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    jit_word_t         v, w;
+    reg = jit_get_reg(jit_class_fpr|jit_class_nospill);
+    movi(r0, 0);
+    CMPTUN_SU(r1, r2, rn(reg));
+    TRAPB();
+    v = _jit->pc.w;
+    FBNE(rn(reg), 4);
+    CMPTLT_SU(r1, r2, rn(reg));
+    TRAPB();
+    w = _jit->pc.w;
+    FBNE(rn(reg), 1);
+    movi(r0, 1);
+    patch_at(v, _jit->pc.w);
+    patch_at(w, _jit->pc.w);
+    jit_unget_reg(reg);
+}
+fopi(ge);
+dopi(ge);
+
+static void
+_gtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    jit_word_t         v, w;
+    reg = jit_get_reg(jit_class_fpr|jit_class_nospill);
+    movi(r0, 0);
+    CMPTUN_SU(r1, r2, rn(reg));
+    TRAPB();
+    v = _jit->pc.w;
+    FBNE(rn(reg), 4);
+    CMPTLE_SU(r1, r2, rn(reg));
+    TRAPB();
+    w = _jit->pc.w;
+    FBNE(rn(reg), 1);
+    movi(r0, 1);
+    patch_at(v, _jit->pc.w);
+    patch_at(w, _jit->pc.w);
+    jit_unget_reg(reg);
+}
+fopi(gt);
+dopi(gt);
+
+static void
+_ner_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    jit_word_t         v, w;
+    reg = jit_get_reg(jit_class_fpr|jit_class_nospill);
+    movi(r0, 1);
+    CMPTUN_SU(r1, r2, rn(reg));
+    TRAPB();
+    v = _jit->pc.w;
+    FBNE(rn(reg), 3);
+    CMPTEQ_SU(r1, r2, rn(reg));
+    TRAPB();
+    w = _jit->pc.w;
+    FBEQ(rn(reg), 1);
+    movi(r0, 0);
+    patch_at(v, _jit->pc.w);
+    patch_at(w, _jit->pc.w);
+    jit_unget_reg(reg);
+}
+fopi(ne);
+dopi(ne);
+
+static void
+_unltr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    jit_word_t         v, w;
+    reg = jit_get_reg(jit_class_fpr|jit_class_nospill);
+    movi(r0, 1);
+    CMPTUN_SU(r1, r2, rn(reg));
+    TRAPB();
+    v = _jit->pc.w;
+    FBNE(rn(reg), 4);
+    CMPTLT_SU(r1, r2, rn(reg));
+    TRAPB();
+    w = _jit->pc.w;
+    FBNE(rn(reg), 1);
+    movi(r0, 0);
+    patch_at(v, _jit->pc.w);
+    patch_at(w, _jit->pc.w);
+    jit_unget_reg(reg);
+}
+fopi(unlt);
+dopi(unlt);
+
+static void
+_unler_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    jit_word_t         v, w;
+    reg = jit_get_reg(jit_class_fpr|jit_class_nospill);
+    movi(r0, 1);
+    CMPTUN_SU(r1, r2, rn(reg));
+    TRAPB();
+    v = _jit->pc.w;
+    FBNE(rn(reg), 4);
+    CMPTLE_SU(r1, r2, rn(reg));
+    TRAPB();
+    w = _jit->pc.w;
+    FBNE(rn(reg), 1);
+    movi(r0, 0);
+    patch_at(v, _jit->pc.w);
+    patch_at(w, _jit->pc.w);
+    jit_unget_reg(reg);
+}
+fopi(unle);
+dopi(unle);
+
+static void
+_uneqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    jit_word_t         v, w;
+    reg = jit_get_reg(jit_class_fpr|jit_class_nospill);
+    movi(r0, 1);
+    CMPTUN_SU(r1, r2, rn(reg));
+    TRAPB();
+    v = _jit->pc.w;
+    FBNE(rn(reg), 4);
+    CMPTEQ_SU(r1, r2, rn(reg));
+    TRAPB();
+    w = _jit->pc.w;
+    FBNE(rn(reg), 1);
+    movi(r0, 0);
+    patch_at(v, _jit->pc.w);
+    patch_at(w, _jit->pc.w);
+    jit_unget_reg(reg);
+}
+fopi(uneq);
+dopi(uneq);
+
+static void
+_unger_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    jit_word_t         v, w;
+    reg = jit_get_reg(jit_class_fpr|jit_class_nospill);
+    movi(r0, 1);
+    CMPTUN_SU(r1, r2, rn(reg));
+    TRAPB();
+    v = _jit->pc.w;
+    FBNE(rn(reg), 4);
+    CMPTLT_SU(r1, r2, rn(reg));
+    TRAPB();
+    w = _jit->pc.w;
+    FBEQ(rn(reg), 1);
+    movi(r0, 0);
+    patch_at(v, _jit->pc.w);
+    patch_at(w, _jit->pc.w);
+    jit_unget_reg(reg);
+}
+fopi(unge);
+dopi(unge);
+
+static void
+_ungtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    jit_word_t         v, w;
+    reg = jit_get_reg(jit_class_fpr|jit_class_nospill);
+    movi(r0, 1);
+    CMPTUN_SU(r1, r2, rn(reg));
+    TRAPB();
+    v = _jit->pc.w;
+    FBNE(rn(reg), 4);
+    CMPTLE_SU(r1, r2, rn(reg));
+    TRAPB();
+    w = _jit->pc.w;
+    FBEQ(rn(reg), 1);
+    movi(r0, 0);
+    patch_at(v, _jit->pc.w);
+    patch_at(w, _jit->pc.w);
+    jit_unget_reg(reg);
+}
+fopi(ungt);
+dopi(ungt);
+
+static void
+_ltgtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    jit_word_t         v, w;
+    reg = jit_get_reg(jit_class_fpr|jit_class_nospill);
+    movi(r0, 0);
+    CMPTUN_SU(r1, r2, rn(reg));
+    TRAPB();
+    v = _jit->pc.w;
+    FBNE(rn(reg), 4);
+    CMPTEQ_SU(r1, r2, rn(reg));
+    TRAPB();
+    w = _jit->pc.w;
+    FBNE(rn(reg), 1);
+    movi(r0, 1);
+    patch_at(v, _jit->pc.w);
+    patch_at(w, _jit->pc.w);
+    jit_unget_reg(reg);
+}
+fopi(ltgt);
+dopi(ltgt);
+
+static void
+_ordr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_fpr|jit_class_nospill);
+    movi(r0, 0);
+    CMPTUN_SU(r1, r2, rn(reg));
+    TRAPB();
+    w = _jit->pc.w;
+    FBNE(rn(reg), 1);
+    movi(r0, 1);
+    patch_at(w, _jit->pc.w);
+    jit_unget_reg(reg);
+}
+fopi(ord);
+dopi(ord);
+
+static void
+_unordr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_fpr|jit_class_nospill);
+    movi(r0, 1);
+    CMPTUN_SU(r1, r2, rn(reg));
+    TRAPB();
+    w = _jit->pc.w;
+    FBNE(rn(reg), 1);
+    jit_unget_reg(reg);
+    movi(r0, 0);
+    patch_at(w, _jit->pc.w);
+}
+fopi(unord);
+dopi(unord);
+
+static jit_word_t
+_bltr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    jit_word_t         v, w;
+    reg = jit_get_reg(jit_class_fpr|jit_class_nospill);
+    CMPTUN_SU(r0, r1, rn(reg));                /* unord does not satisfy condition */
+    TRAPB();
+    v = _jit->pc.w;
+    FBNE(rn(reg), 3);
+    CMPTLT_SU(r0, r1, rn(reg));                /* lt satisfy condition */
+    TRAPB();
+    w = _jit->pc.w;
+    FBNE(rn(reg), ((i0 - w) >> 2) - 1);
+    patch_at(v, _jit->pc.w);
+    jit_unget_reg(reg);
+    return (w);
+}
+fbopi(lt);
+dbopi(lt);
+
+static jit_word_t
+_bler_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    jit_word_t         v, w;
+    reg = jit_get_reg(jit_class_fpr|jit_class_nospill);
+    CMPTUN_SU(r0, r1, rn(reg));                /* unord does not satisfy condition */
+    TRAPB();
+    v = _jit->pc.w;
+    FBNE(rn(reg), 3);
+    CMPTLE_SU(r0, r1, rn(reg));                /* le satisfy condition */
+    TRAPB();
+    w = _jit->pc.w;
+    FBNE(rn(reg), ((i0 - w) >> 2) - 1);
+    patch_at(v, _jit->pc.w);
+    jit_unget_reg(reg);
+    return (w);
+}
+fbopi(le);
+dbopi(le);
+
+static jit_word_t
+_beqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    jit_word_t         v, w;
+    reg = jit_get_reg(jit_class_fpr|jit_class_nospill);
+    CMPTUN_SU(r0, r1, rn(reg));                /* unord does not satisfy condition */
+    TRAPB();
+    v = _jit->pc.w;
+    FBNE(rn(reg), 3);
+    CMPTEQ_SU(r0, r1, rn(reg));                /* eq satisfy condition */
+    TRAPB();
+    w = _jit->pc.w;
+    FBNE(rn(reg), ((i0 - w) >> 2) - 1);
+    patch_at(v, _jit->pc.w);
+    jit_unget_reg(reg);
+    return (w);
+}
+fbopi(eq);
+dbopi(eq);
+
+static jit_word_t
+_bger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    jit_word_t         v, w;
+    reg = jit_get_reg(jit_class_fpr|jit_class_nospill);
+    CMPTUN_SU(r0, r1, rn(reg));                /* unord does not satisfy condition */
+    TRAPB();
+    v = _jit->pc.w;
+    FBNE(rn(reg), 3);
+    CMPTLT_SU(r0, r1, rn(reg));                /* ge satisfy condition */
+    TRAPB();
+    w = _jit->pc.w;
+    FBEQ(rn(reg), ((i0 - w) >> 2) - 1);
+    patch_at(v, _jit->pc.w);
+    jit_unget_reg(reg);
+    return (w);
+}
+fbopi(ge);
+dbopi(ge);
+
+static jit_word_t
+_bgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    jit_word_t         v, w;
+    reg = jit_get_reg(jit_class_fpr|jit_class_nospill);
+    CMPTUN_SU(r0, r1, rn(reg));                /* unord does not satisfy condition */
+    TRAPB();
+    v = _jit->pc.w;
+    FBNE(rn(reg), 3);
+    CMPTLE_SU(r0, r1, rn(reg));                /* gt satisfy condition */
+    TRAPB();
+    w = _jit->pc.w;
+    FBEQ(rn(reg), ((i0 - w) >> 2) - 1);
+    patch_at(v, _jit->pc.w);
+    jit_unget_reg(reg);
+    return (w);
+}
+fbopi(gt);
+dbopi(gt);
+
+static jit_word_t
+_bner_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    jit_word_t         u, v, w;
+    reg = jit_get_reg(jit_class_fpr|jit_class_nospill);
+    CMPTUN_SU(r0, r1, rn(reg));                /* unord satisfy condition */
+    TRAPB();
+    u = _jit->pc.w;
+    FBNE(rn(reg), 4);
+    CMPTEQ_SU(r0, r1, rn(reg));                /* ne satisfy condition */
+    TRAPB();
+    v = _jit->pc.w;
+    FBNE(rn(reg), 1);
+    patch_at(u, _jit->pc.w);
+    w = _jit->pc.w;
+    BR(_R31_REGNO, ((i0 - w) >> 2) - 1);
+    patch_at(v, _jit->pc.w);
+    jit_unget_reg(reg);
+    return (w);
+}
+fbopi(ne);
+dbopi(ne);
+
+static jit_word_t
+_bunltr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    jit_word_t         u, v, w;
+    reg = jit_get_reg(jit_class_fpr|jit_class_nospill);
+    CMPTUN_SU(r0, r1, rn(reg));                /* unord satisfy condition */
+    TRAPB();
+    u = _jit->pc.w;
+    FBNE(rn(reg), 4);
+    CMPTLT_SU(r0, r1, rn(reg));                /* lt satisfy condition */
+    TRAPB();
+    v = _jit->pc.w;
+    FBEQ(rn(reg), 1);
+    w = _jit->pc.w;
+    patch_at(u, _jit->pc.w);
+    BR(_R31_REGNO, ((i0 - w) >> 2) - 1);
+    patch_at(v, _jit->pc.w);
+    jit_unget_reg(reg);
+    return (w);
+}
+fbopi(unlt);
+dbopi(unlt);
+
+static jit_word_t
+_bunler_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    jit_word_t         u, v, w;
+    reg = jit_get_reg(jit_class_fpr|jit_class_nospill);
+    CMPTUN_SU(r0, r1, rn(reg));                /* unord satisfy condition */
+    TRAPB();
+    u = _jit->pc.w;
+    FBNE(rn(reg), 4);
+    CMPTLE_SU(r0, r1, rn(reg));                /* le satisfy condition */
+    TRAPB();
+    v = _jit->pc.w;
+    FBEQ(rn(reg), 1);
+    w = _jit->pc.w;
+    patch_at(u, _jit->pc.w);
+    BR(_R31_REGNO, ((i0 - w) >> 2) - 1);
+    patch_at(v, _jit->pc.w);
+    jit_unget_reg(reg);
+    return (w);
+}
+fbopi(unle);
+dbopi(unle);
+
+static jit_word_t
+_buneqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    jit_word_t         u, v, w;
+    reg = jit_get_reg(jit_class_fpr|jit_class_nospill);
+    CMPTUN_SU(r0, r1, rn(reg));                /* unord satisfy condition */
+    TRAPB();
+    u = _jit->pc.w;
+    FBNE(rn(reg), 4);
+    CMPTEQ_SU(r0, r1, rn(reg));                /* eq satisfy condition */
+    TRAPB();
+    v = _jit->pc.w;
+    FBEQ(rn(reg), 1);
+    w = _jit->pc.w;
+    patch_at(u, _jit->pc.w);
+    BR(_R31_REGNO, ((i0 - w) >> 2) - 1);
+    patch_at(v, _jit->pc.w);
+    jit_unget_reg(reg);
+    return (w);
+}
+fbopi(uneq);
+dbopi(uneq);
+
+static jit_word_t
+_bunger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    jit_word_t         u, v, w;
+    reg = jit_get_reg(jit_class_fpr|jit_class_nospill);
+    CMPTUN_SU(r0, r1, rn(reg));                /* unord satisfy condition */
+    TRAPB();
+    u = _jit->pc.w;
+    FBNE(rn(reg), 4);
+    CMPTLT_SU(r0, r1, rn(reg));                /* ge satisfy condition */
+    TRAPB();
+    v = _jit->pc.w;
+    FBNE(rn(reg), 1);
+    w = _jit->pc.w;
+    patch_at(u, _jit->pc.w);
+    BR(_R31_REGNO, ((i0 - w) >> 2) - 1);
+    patch_at(v, _jit->pc.w);
+    jit_unget_reg(reg);
+    return (w);
+}
+fbopi(unge);
+dbopi(unge);
+
+static jit_word_t
+_bungtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    jit_word_t         u, v, w;
+    reg = jit_get_reg(jit_class_fpr|jit_class_nospill);
+    CMPTUN_SU(r0, r1, rn(reg));                /* unord satisfy condition */
+    TRAPB();
+    u = _jit->pc.w;
+    FBNE(rn(reg), 4);
+    CMPTLE_SU(r0, r1, rn(reg));                /* gt does satisfy condition */
+    TRAPB();
+    v = _jit->pc.w;
+    FBNE(rn(reg), 1);
+    w = _jit->pc.w;
+    patch_at(u, _jit->pc.w);
+    BR(_R31_REGNO, ((i0 - w) >> 2) - 1);
+    patch_at(v, _jit->pc.w);
+    jit_unget_reg(reg);
+    return (w);
+}
+fbopi(ungt);
+dbopi(ungt);
+
+static jit_word_t
+_bltgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         u, v, w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_fpr|jit_class_nospill);
+    CMPTUN_SU(r0, r1, rn(reg));                /* unord does not satisfy condition */
+    TRAPB();
+    u = _jit->pc.w;
+    FBNE(rn(reg), 4);
+    CMPTEQ_SU(r1, r0, rn(reg));
+    TRAPB();
+    v = _jit->pc.w;                    /* eq does not satisfy condition */
+    FBNE(rn(reg), 1);
+    w = _jit->pc.w;
+    BR(_R31_REGNO, ((i0 - w) >> 2) - 1);
+    patch_at(u, _jit->pc.w);
+    patch_at(v, _jit->pc.w);
+    jit_unget_reg(reg);
+    return (w);
+}
+fbopi(ltgt);
+dbopi(ltgt);
+
+static jit_word_t
+_bordr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_fpr|jit_class_nospill);
+    CMPTUN_SU(r0, r1, rn(reg));                /* unord does not satisfy condition */
+    TRAPB();
+    w = _jit->pc.w;
+    FBEQ(rn(reg), ((i0 - w) >> 2) - 1);
+    jit_unget_reg(reg);
+    return (w);
+}
+fbopi(ord);
+dbopi(ord);
+
+static jit_word_t
+_bunordr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_fpr|jit_class_nospill);
+    CMPTUN_SU(r0, r1, rn(reg));                /* unord satisfy condition */
+    TRAPB();
+    w = _jit->pc.w;
+    FBNE(rn(reg), ((i0 - w) >> 2) - 1);
+    jit_unget_reg(reg);
+    return (w);
+}
+fbopi(unord);
+dbopi(unord);
+
+static void
+_ldi_f(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_word_t         reg;
+    if (_s16_p(i0))
+       LDS(r0, _R31_REGNO, _u16(i0));
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldr_f(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldi_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_word_t         reg;
+    if (_s16_p(i0))
+       LDT(r0, _R31_REGNO, _u16(i0));
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldr_d(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r1, r2);
+    ldr_f(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxi_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_word_t         reg;
+    if (_s16_p(i0))
+       LDS(r0, r1, _u16(i0));
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r1, i0);
+       ldr_f(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r1, r2);
+    ldr_d(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_word_t         reg;
+    if (_s16_p(i0))
+       LDT(r0, r1, _u16(i0));
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r1, i0);
+       ldr_d(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_word_t         reg;
+    if (_s16_p(i0))
+       STS(r0, _R31_REGNO, _u16(i0));
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       str_f(rn(reg), r0);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_sti_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_word_t         reg;
+    if (_s16_p(i0))
+       STT(r0, _R31_REGNO, _u16(i0));
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       str_d(rn(reg), r0);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_stxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r0, r1);
+    str_f(rn(reg), r2);
+    jit_unget_reg(reg);
+}
+
+static void
+_stxi_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         reg;
+    if (_s16_p(i0))
+       STS(r1, r0, _u16(i0));
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r0, i0);
+       str_f(rn(reg), r1);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_stxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r0, r1);
+    str_d(rn(reg), r2);
+    jit_unget_reg(reg);
+}
+
+static void
+_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         reg;
+    if (_s16_p(i0))
+       STT(r1, r0, _u16(i0));
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r0, i0);
+       str_d(rn(reg), r1);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_vaarg_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         ge_code;
+    jit_int32_t                rg0, rg1, rg2;
+
+    assert(_jitc->function->self.call & jit_call_varargs);
+
+    rg0 = jit_get_reg(jit_class_gpr);
+    rg1 = jit_get_reg(jit_class_gpr);
+    rg2 = jit_get_reg(jit_class_gpr);
+
+    /* Load the base in first temporary. */
+    ldxi(rn(rg0), r1, offsetof(jit_va_list_t, base));
+
+    /* Load the offset in the second temporary. */
+    ldxi(rn(rg1), r1, offsetof(jit_va_list_t, offset));
+
+    /* Remember absolute offset */
+    movr(rn(rg2), rn(rg1));
+
+    /* Jump if overflowed register saved area. */
+    ge_code = bgei(_jit->pc.w, rn(rg1), 48);
+    /* Otherwise load from the float registers save area. */
+    subi(rn(rg1), rn(rg1), 48);
+    patch_at(ge_code, _jit->pc.w);
+
+    /* Load the argument */
+    ldxr_d(r0, rn(rg0), rn(rg1));
+
+    /* No longer needed. */
+    jit_unget_reg(rg1);
+    jit_unget_reg(rg0);
+
+    /* Update offset. */
+    addi(rn(rg2), rn(rg2), 8);
+    stxi(offsetof(jit_va_list_t, offset), r1, rn(rg2));
+    jit_unget_reg(rg2);
+}
+#endif
diff --git a/deps/lightning/lib/jit_alpha-sz.c b/deps/lightning/lib/jit_alpha-sz.c
new file mode 100644 (file)
index 0000000..e1a572a
--- /dev/null
@@ -0,0 +1,402 @@
+
+#if __WORDSIZE == 64
+#define JIT_INSTR_MAX 76
+    0, /* data */
+    0, /* live */
+    4, /* align */
+    0, /* save */
+    0, /* load */
+    0, /* #name */
+    0, /* #note */
+    0, /* label */
+    76,        /* prolog */
+    0, /* ellipsis */
+    0, /* va_push */
+    0, /* allocai */
+    0, /* allocar */
+    0, /* arg */
+    0, /* getarg_c */
+    0, /* getarg_uc */
+    0, /* getarg_s */
+    0, /* getarg_us */
+    0, /* getarg_i */
+    0, /* getarg_ui */
+    0, /* getarg_l */
+    0, /* putargr */
+    0, /* putargi */
+    0, /* va_start */
+    0, /* va_arg */
+    0, /* va_arg_d */
+    0, /* va_end */
+    4, /* addr */
+    32,        /* addi */
+    12,        /* addcr */
+    40,        /* addci */
+    28,        /* addxr */
+    28,        /* addxi */
+    4, /* subr */
+    32,        /* subi */
+    12,        /* subcr */
+    40,        /* subci */
+    28,        /* subxr */
+    28,        /* subxi */
+    36,        /* rsbi */
+    4, /* mulr */
+    32,        /* muli */
+    44,        /* qmulr */
+    56,        /* qmuli */
+    12,        /* qmulr_u */
+    32,        /* qmuli_u */
+    48,        /* divr */
+    72,        /* divi */
+    48,        /* divr_u */
+    72,        /* divi_u */
+    56,        /* qdivr */
+    56,        /* qdivi */
+    56,        /* qdivr_u */
+    56,        /* qdivi_u */
+    48,        /* remr */
+    72,        /* remi */
+    48,        /* remr_u */
+    72,        /* remi_u */
+    4, /* andr */
+    32,        /* andi */
+    4, /* orr */
+    32,        /* ori */
+    4, /* xorr */
+    32,        /* xori */
+    4, /* lshr */
+    4, /* lshi */
+    4, /* rshr */
+    4, /* rshi */
+    4, /* rshr_u */
+    4, /* rshi_u */
+    4, /* negr */
+    4, /* comr */
+    4, /* ltr */
+    4, /* lti */
+    4, /* ltr_u */
+    8, /* lti_u */
+    4, /* ler */
+    8, /* lei */
+    4, /* ler_u */
+    4, /* lei_u */
+    4, /* eqr */
+    4, /* eqi */
+    4, /* ger */
+    8, /* gei */
+    4, /* ger_u */
+    8, /* gei_u */
+    4, /* gtr */
+    8, /* gti */
+    4, /* gtr_u */
+    8, /* gti_u */
+    8, /* ner */
+    12,        /* nei */
+    4, /* movr */
+    32,        /* movi */
+    8, /* extr_c */
+    8, /* extr_uc */
+    8, /* extr_s */
+    8, /* extr_us */
+    8, /* extr_i */
+    8, /* extr_ui */
+    16,        /* htonr_us */
+    36,        /* htonr_ui */
+    36,        /* htonr_ul */
+    12,        /* ldr_c */
+    40,        /* ldi_c */
+    4, /* ldr_uc */
+    32,        /* ldi_uc */
+    12,        /* ldr_s */
+    40,        /* ldi_s */
+    4, /* ldr_us */
+    32,        /* ldi_us */
+    4, /* ldr_i */
+    32,        /* ldi_i */
+    12,        /* ldr_ui */
+    40,        /* ldi_ui */
+    4, /* ldr_l */
+    32,        /* ldi_l */
+    16,        /* ldxr_c */
+    12,        /* ldxi_c */
+    8, /* ldxr_uc */
+    4, /* ldxi_uc */
+    16,        /* ldxr_s */
+    12,        /* ldxi_s */
+    8, /* ldxr_us */
+    4, /* ldxi_us */
+    8, /* ldxr_i */
+    4, /* ldxi_i */
+    16,        /* ldxr_ui */
+    12,        /* ldxi_ui */
+    8, /* ldxr_l */
+    4, /* ldxi_l */
+    4, /* str_c */
+    32,        /* sti_c */
+    4, /* str_s */
+    32,        /* sti_s */
+    4, /* str_i */
+    32,        /* sti_i */
+    4, /* str_l */
+    32,        /* sti_l */
+    8, /* stxr_c */
+    4, /* stxi_c */
+    8, /* stxr_s */
+    4, /* stxi_s */
+    8, /* stxr_i */
+    4, /* stxi_i */
+    8, /* stxr_l */
+    4, /* stxi_l */
+    8, /* bltr */
+    8, /* blti */
+    8, /* bltr_u */
+    12,        /* blti_u */
+    8, /* bler */
+    12,        /* blei */
+    8, /* bler_u */
+    12,        /* blei_u */
+    8, /* beqr */
+    40,        /* beqi */
+    8, /* bger */
+    12,        /* bgei */
+    8, /* bger_u */
+    12,        /* bgei_u */
+    8, /* bgtr */
+    12,        /* bgti */
+    8, /* bgtr_u */
+    12,        /* bgti_u */
+    8, /* bner */
+    36,        /* bnei */
+    8, /* bmsr */
+    8, /* bmsi */
+    8, /* bmcr */
+    8, /* bmci */
+    28,        /* boaddr */
+    32,        /* boaddi */
+    16,        /* boaddr_u */
+    16,        /* boaddi_u */
+    28,        /* bxaddr */
+    32,        /* bxaddi */
+    16,        /* bxaddr_u */
+    16,        /* bxaddi_u */
+    28,        /* bosubr */
+    32,        /* bosubi */
+    16,        /* bosubr_u */
+    16,        /* bosubi_u */
+    28,        /* bxsubr */
+    32,        /* bxsubi */
+    16,        /* bxsubr_u */
+    16,        /* bxsubi_u */
+    0, /* jmpr */
+    36,        /* jmpi */
+    8, /* callr */
+    36,        /* calli */
+    0, /* prepare */
+    0, /* pushargr */
+    0, /* pushargi */
+    0, /* finishr */
+    0, /* finishi */
+    0, /* ret */
+    0, /* retr */
+    0, /* reti */
+    0, /* retval_c */
+    0, /* retval_uc */
+    0, /* retval_s */
+    0, /* retval_us */
+    0, /* retval_i */
+    0, /* retval_ui */
+    0, /* retval_l */
+    68,        /* epilog */
+    0, /* arg_f */
+    0, /* getarg_f */
+    0, /* putargr_f */
+    0, /* putargi_f */
+    8, /* addr_f */
+    32,        /* addi_f */
+    8, /* subr_f */
+    32,        /* subi_f */
+    32,        /* rsbi_f */
+    8, /* mulr_f */
+    32,        /* muli_f */
+    8, /* divr_f */
+    32,        /* divi_f */
+    4, /* negr_f */
+    4, /* absr_f */
+    8, /* sqrtr_f */
+    32,        /* ltr_f */
+    56,        /* lti_f */
+    32,        /* ler_f */
+    56,        /* lei_f */
+    32,        /* eqr_f */
+    56,        /* eqi_f */
+    32,        /* ger_f */
+    56,        /* gei_f */
+    32,        /* gtr_f */
+    56,        /* gti_f */
+    32,        /* ner_f */
+    56,        /* nei_f */
+    32,        /* unltr_f */
+    56,        /* unlti_f */
+    32,        /* unler_f */
+    56,        /* unlei_f */
+    32,        /* uneqr_f */
+    56,        /* uneqi_f */
+    32,        /* unger_f */
+    56,        /* ungei_f */
+    32,        /* ungtr_f */
+    56,        /* ungti_f */
+    32,        /* ltgtr_f */
+    56,        /* ltgti_f */
+    20,        /* ordr_f */
+    44,        /* ordi_f */
+    20,        /* unordr_f */
+    44,        /* unordi_f */
+    16,        /* truncr_f_i */
+    16,        /* truncr_f_l */
+    12,        /* extr_f */
+    4, /* extr_d_f */
+    4, /* movr_f */
+    24,        /* movi_f */
+    4, /* ldr_f */
+    32,        /* ldi_f */
+    8, /* ldxr_f */
+    4, /* ldxi_f */
+    4, /* str_f */
+    32,        /* sti_f */
+    8, /* stxr_f */
+    4, /* stxi_f */
+    24,        /* bltr_f */
+    48,        /* blti_f */
+    24,        /* bler_f */
+    48,        /* blei_f */
+    24,        /* beqr_f */
+    48,        /* beqi_f */
+    24,        /* bger_f */
+    48,        /* bgei_f */
+    24,        /* bgtr_f */
+    48,        /* bgti_f */
+    28,        /* bner_f */
+    52,        /* bnei_f */
+    28,        /* bunltr_f */
+    52,        /* bunlti_f */
+    28,        /* bunler_f */
+    52,        /* bunlei_f */
+    28,        /* buneqr_f */
+    52,        /* buneqi_f */
+    28,        /* bunger_f */
+    52,        /* bungei_f */
+    28,        /* bungtr_f */
+    52,        /* bungti_f */
+    28,        /* bltgtr_f */
+    52,        /* bltgti_f */
+    12,        /* bordr_f */
+    36,        /* bordi_f */
+    12,        /* bunordr_f */
+    36,        /* bunordi_f */
+    0, /* pushargr_f */
+    0, /* pushargi_f */
+    0, /* retr_f */
+    0, /* reti_f */
+    0, /* retval_f */
+    0, /* arg_d */
+    0, /* getarg_d */
+    0, /* putargr_d */
+    0, /* putargi_d */
+    8, /* addr_d */
+    28,        /* addi_d */
+    8, /* subr_d */
+    28,        /* subi_d */
+    28,        /* rsbi_d */
+    8, /* mulr_d */
+    28,        /* muli_d */
+    8, /* divr_d */
+    28,        /* divi_d */
+    4, /* negr_d */
+    4, /* absr_d */
+    8, /* sqrtr_d */
+    32,        /* ltr_d */
+    52,        /* lti_d */
+    32,        /* ler_d */
+    52,        /* lei_d */
+    32,        /* eqr_d */
+    52,        /* eqi_d */
+    32,        /* ger_d */
+    52,        /* gei_d */
+    32,        /* gtr_d */
+    52,        /* gti_d */
+    32,        /* ner_d */
+    52,        /* nei_d */
+    32,        /* unltr_d */
+    52,        /* unlti_d */
+    32,        /* unler_d */
+    52,        /* unlei_d */
+    32,        /* uneqr_d */
+    52,        /* uneqi_d */
+    32,        /* unger_d */
+    52,        /* ungei_d */
+    32,        /* ungtr_d */
+    52,        /* ungti_d */
+    32,        /* ltgtr_d */
+    52,        /* ltgti_d */
+    20,        /* ordr_d */
+    40,        /* ordi_d */
+    20,        /* unordr_d */
+    40,        /* unordi_d */
+    16,        /* truncr_d_i */
+    16,        /* truncr_d_l */
+    12,        /* extr_d */
+    4, /* extr_f_d */
+    4, /* movr_d */
+    20,        /* movi_d */
+    4, /* ldr_d */
+    32,        /* ldi_d */
+    8, /* ldxr_d */
+    4, /* ldxi_d */
+    4, /* str_d */
+    32,        /* sti_d */
+    8, /* stxr_d */
+    4, /* stxi_d */
+    24,        /* bltr_d */
+    44,        /* blti_d */
+    24,        /* bler_d */
+    44,        /* blei_d */
+    24,        /* beqr_d */
+    44,        /* beqi_d */
+    24,        /* bger_d */
+    44,        /* bgei_d */
+    24,        /* bgtr_d */
+    44,        /* bgti_d */
+    28,        /* bner_d */
+    48,        /* bnei_d */
+    28,        /* bunltr_d */
+    48,        /* bunlti_d */
+    28,        /* bunler_d */
+    48,        /* bunlei_d */
+    28,        /* buneqr_d */
+    48,        /* buneqi_d */
+    28,        /* bunger_d */
+    48,        /* bungei_d */
+    28,        /* bungtr_d */
+    48,        /* bungti_d */
+    28,        /* bltgtr_d */
+    48,        /* bltgti_d */
+    12,        /* bordr_d */
+    32,        /* bordi_d */
+    12,        /* bunordr_d */
+    32,        /* bunordi_d */
+    0, /* pushargr_d */
+    0, /* pushargi_d */
+    0, /* retr_d */
+    0, /* reti_d */
+    0, /* retval_d */
+    0, /* movr_w_f */
+    0, /* movr_ww_d */
+    0, /* movr_w_d */
+    0, /* movr_f_w */
+    0, /* movi_f_w */
+    0, /* movr_d_ww */
+    0, /* movi_d_ww */
+    0, /* movr_d_w */
+    0, /* movi_d_w */
+#endif /* __WORDSIZE */
diff --git a/deps/lightning/lib/jit_alpha.c b/deps/lightning/lib/jit_alpha.c
new file mode 100644 (file)
index 0000000..9a067aa
--- /dev/null
@@ -0,0 +1,1552 @@
+/*
+ * Copyright (C) 2014-2019  Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+#define jit_arg_reg_p(i)               ((i) >= 0 && (i) < 6)
+#define jit_arg_f_reg_p(i)             ((i) >= 0 && (i) < 6)
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+#  define C_DISP                       0
+#  define S_DISP                       0
+#  define I_DISP                       0
+#  define F_DISP                       0
+#else
+#  define C_DISP                       8 - sizeof(jit_int8_t)
+#  define S_DISP                       8 - sizeof(jit_int16_t)
+#  define I_DISP                       8 - sizeof(jit_int32_t)
+#  define F_DISP                       8 - sizeof(jit_float32_t)
+#endif
+
+/*
+ * Types
+ */
+/*
+ * What I could understand from gcc/config/alpha/alpha.c:alpha_build_builtin_va_list()
+ * and other helpers, as well as objdump of simple test programs; could not
+ * get gdb working on the test system I had access...
+ *
+ * base-48 to base is where up to 6 float registers are saved.
+ * base to base+48 is where up to 6 integer registers are saved.
+ * base+48... is where varargs arguments are stored.
+ *
+ *     if (offset < 48) {
+ *             if (type == double)
+ *                     offset -= 48;
+ *     }
+ *     load(reg, base, offset);
+ *     offset += 8;
+ */
+typedef struct jit_va_list {
+    jit_pointer_t      base;
+    jit_word_t         offset;
+} jit_va_list_t;
+
+/*
+ * Prototypes
+ */
+#define patch(instr, node)             _patch(_jit, instr, node)
+static void _patch(jit_state_t*,jit_word_t,jit_node_t*);
+
+#define PROTO                          1
+#  include "jit_alpha-cpu.c"
+#  include "jit_alpha-fpu.c"
+#undef PROTO
+
+/*
+ * Initialization
+ */
+jit_register_t         _rvs[] = {
+    { rc(gpr) | 0x1c,                  "at" },
+    { rc(gpr) | 0x00,                  "v0" },
+    { rc(gpr) | 0x01,                  "t0" },
+    { rc(gpr) | 0x02,                  "t1" },
+    { rc(gpr) | 0x03,                  "t2" },
+    { rc(gpr) | 0x04,                  "t3" },
+    { rc(gpr) | 0x05,                  "t4" },
+    { rc(gpr) | 0x06,                  "t5" },
+    { rc(gpr) | 0x07,                  "t6" },
+    { rc(gpr) | 0x08,                  "t7" },
+    { rc(gpr) | 0x16,                  "t8" },
+    { rc(gpr) | 0x17,                  "t9" },
+    { rc(gpr) | 0x18,                  "t10" },
+    { rc(gpr) | 0x19,                  "t11" },
+    { rc(sav) | rc(gpr) | 0x09,                "s0" },
+    { rc(sav) | rc(gpr) | 0x0a,                "s1" },
+    { rc(sav) | rc(gpr) | 0x0b,                "s2" },
+    { rc(sav) | rc(gpr) | 0x0c,                "s3" },
+    { rc(sav) | rc(gpr) | 0x0d,                "s4" },
+    { rc(sav) | rc(gpr) | 0x0e,                "s5" },
+    { 0x0f,                            "fp" },
+    { rc(arg) | rc(gpr) | 0x15,                "a5" },
+    { rc(arg) | rc(gpr) | 0x14,                "a4" },
+    { rc(arg) | rc(gpr) | 0x13,                "a3" },
+    { rc(arg) | rc(gpr) | 0x12,                "a2" },
+    { rc(arg) | rc(gpr) | 0x11,                "a1" },
+    { rc(arg) | rc(gpr) | 0x10,                "a0" },
+    { 0x1a,                            "ra" },
+    { 0x1b,                            "pv" },
+    { 0x1d,                            "gp" },
+    { 0x1e,                            "sp" },
+    { 0x1f,                            "zero" },
+    { rc(fpr) | 0x00,                  "$f0" },
+    { rc(fpr) | 0x01,                  "$f1" },
+    { rc(sav) | rc(fpr) | 0x02,                "$f2" },
+    { rc(sav) | rc(fpr) | 0x03,                "$f3" },
+    { rc(sav) | rc(fpr) | 0x04,                "$f4" },
+    { rc(sav) | rc(fpr) | 0x05,                "$f5" },
+    { rc(sav) | rc(fpr) | 0x06,                "$f6" },
+    { rc(sav) | rc(fpr) | 0x07,                "$f7" },
+    { rc(sav) | rc(fpr) | 0x08,                "$f8" },
+    { rc(sav) | rc(fpr) | 0x09,                "$f9" },
+    { rc(fpr) | 0x0a,                  "$f10" },
+    { rc(fpr) | 0x0b,                  "$f11" },
+    { rc(fpr) | 0x0c,                  "$f12" },
+    { rc(fpr) | 0x0d,                  "$f13" },
+    { rc(fpr) | 0x0e,                  "$f14" },
+    { rc(fpr) | 0x0f,                  "$f15" },
+    { rc(arg) | rc(fpr) | 0x15,                "$f21" },
+    { rc(arg) | rc(fpr) | 0x14,                "$f20" },
+    { rc(arg) | rc(fpr) | 0x13,                "$f19" },
+    { rc(arg) | rc(fpr) | 0x12,                "$f18" },
+    { rc(arg) | rc(fpr) | 0x11,                "$f17" },
+    { rc(arg) | rc(fpr) | 0x10,                "$f16" },
+    { rc(fpr) | 0x16,                  "$f22" },
+    { rc(fpr) | 0x17,                  "$f23" },
+    { rc(fpr) | 0x18,                  "$f24" },
+    { rc(fpr) | 0x19,                  "$f25" },
+    { rc(fpr) | 0x1a,                  "$f26" },
+    { rc(fpr) | 0x1b,                  "$f27" },
+    { rc(fpr) | 0x1c,                  "$f28" },
+    { rc(fpr) | 0x1d,                  "$f29" },
+    { rc(fpr) | 0x1e,                  "$f30" },
+    { 0x1f,                            "$f31" },
+    { _NOREG,                          "<none>" },
+};
+
+/*
+ * Implementation
+ */
+void
+jit_get_cpu(void)
+{
+}
+
+void
+_jit_init(jit_state_t *_jit)
+{
+    _jitc->reglen = jit_size(_rvs) - 1;
+    jit_carry = _NOREG;
+}
+
+void
+_jit_prolog(jit_state_t *_jit)
+{
+    jit_int32_t                 offset;
+
+    if (_jitc->function)
+       jit_epilog();
+    assert(jit_regset_cmp_ui(&_jitc->regarg, 0) == 0);
+    jit_regset_set_ui(&_jitc->regsav, 0);
+    offset = _jitc->functions.offset;
+    if (offset >= _jitc->functions.length) {
+       jit_realloc((jit_pointer_t *)&_jitc->functions.ptr,
+                   _jitc->functions.length * sizeof(jit_function_t),
+                   (_jitc->functions.length + 16) * sizeof(jit_function_t));
+       _jitc->functions.length += 16;
+    }
+    _jitc->function = _jitc->functions.ptr + _jitc->functions.offset++;
+    _jitc->function->self.size = stack_framesize;
+    _jitc->function->self.argi = _jitc->function->self.alen = 0;
+    /* float conversion */
+    _jitc->function->self.aoff = -8;
+    _jitc->function->self.call = jit_call_default;
+    jit_alloc((jit_pointer_t *)&_jitc->function->regoff,
+             _jitc->reglen * sizeof(jit_int32_t));
+
+    /* _no_link here does not mean the jit_link() call can be removed
+     * by rewriting as:
+     * _jitc->function->prolog = jit_new_node(jit_code_prolog);
+     */
+    _jitc->function->prolog = jit_new_node_no_link(jit_code_prolog);
+    jit_link(_jitc->function->prolog);
+    _jitc->function->prolog->w.w = offset;
+    _jitc->function->epilog = jit_new_node_no_link(jit_code_epilog);
+    /* u:      label value
+     * v:      offset in blocks vector
+     * w:      offset in functions vector
+     */
+    _jitc->function->epilog->w.w = offset;
+
+    jit_regset_new(&_jitc->function->regset);
+}
+
+jit_int32_t
+_jit_allocai(jit_state_t *_jit, jit_int32_t length)
+{
+    assert(_jitc->function != NULL);
+    switch (length) {
+       case 0: case 1:                                         break;
+       case 2:         _jitc->function->self.aoff &= -2;       break;
+       case 3: case 4: _jitc->function->self.aoff &= -4;       break;
+       default:        _jitc->function->self.aoff &= -8;       break;
+    }
+    _jitc->function->self.aoff -= length;
+    if (!_jitc->realize) {
+       jit_inc_synth_ww(allocai, _jitc->function->self.aoff, length);
+       jit_dec_synth();
+    }
+    return (_jitc->function->self.aoff);
+}
+
+void
+_jit_allocar(jit_state_t *_jit, jit_int32_t u, jit_int32_t v)
+{
+    jit_int32_t                 reg;
+    assert(_jitc->function != NULL);
+    jit_inc_synth_ww(allocar, u, v);
+    if (!_jitc->function->allocar) {
+       _jitc->function->aoffoff = jit_allocai(sizeof(jit_int32_t));
+       _jitc->function->allocar = 1;
+    }
+    reg = jit_get_reg(jit_class_gpr);
+    jit_negr(reg, v);
+    jit_andi(reg, reg, -8);
+    jit_ldxi_i(u, JIT_FP, _jitc->function->aoffoff);
+    jit_addr(u, u, reg);
+    jit_addr(JIT_SP, JIT_SP, reg);
+    jit_stxi_i(_jitc->function->aoffoff, JIT_FP, u);
+    jit_unget_reg(reg);
+    jit_dec_synth();
+}
+
+void
+_jit_ret(jit_state_t *_jit)
+{
+    jit_node_t         *instr;
+    assert(_jitc->function != NULL);
+    jit_inc_synth(ret);
+    /* jump to epilog */
+    instr = jit_jmpi();
+    jit_patch_at(instr, _jitc->function->epilog);
+    jit_dec_synth();
+}
+
+void
+_jit_retr(jit_state_t *_jit, jit_int32_t u)
+{
+    jit_inc_synth_w(retr, u);
+    if (JIT_RET != u)
+       jit_movr(JIT_RET, u);
+    jit_live(JIT_RET);
+    jit_ret();
+    jit_dec_synth();
+}
+
+void
+_jit_reti(jit_state_t *_jit, jit_word_t u)
+{
+    jit_inc_synth_w(reti, u);
+    jit_movi(JIT_RET, u);
+    jit_ret();
+    jit_dec_synth();
+}
+
+void
+_jit_retr_f(jit_state_t *_jit, jit_int32_t u)
+{
+    jit_inc_synth_w(retr_f, u);
+    if (u != JIT_FRET)
+       jit_movr_f(JIT_FRET, u);
+    else
+       jit_live(JIT_FRET);
+    jit_ret();
+    jit_dec_synth();
+}
+
+void
+_jit_reti_f(jit_state_t *_jit, jit_float32_t u)
+{
+    jit_inc_synth_f(reti_f, u);
+    jit_movi_f(JIT_FRET, u);
+    jit_ret();
+    jit_dec_synth();
+}
+
+void
+_jit_retr_d(jit_state_t *_jit, jit_int32_t u)
+{
+    jit_inc_synth_w(retr_d, u);
+    if (u != JIT_FRET)
+       jit_movr_d(JIT_FRET, u);
+    else
+       jit_live(JIT_FRET);
+    jit_ret();
+    jit_dec_synth();
+}
+
+void
+_jit_reti_d(jit_state_t *_jit, jit_float64_t u)
+{
+    jit_inc_synth_d(reti_d, u);
+    jit_movi_d(JIT_FRET, u);
+    jit_ret();
+    jit_dec_synth();
+}
+
+void
+_jit_epilog(jit_state_t *_jit)
+{
+    assert(_jitc->function != NULL);
+    assert(_jitc->function->epilog->next == NULL);
+    jit_link(_jitc->function->epilog);
+    _jitc->function = NULL;
+}
+
+jit_bool_t
+_jit_arg_register_p(jit_state_t *_jit, jit_node_t *u)
+{
+    if (u->code == jit_code_arg)
+       return (jit_arg_reg_p(u->u.w));
+    assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d);
+    return (jit_arg_f_reg_p(u->u.w));
+}
+
+void
+_jit_ellipsis(jit_state_t *_jit)
+{
+    jit_inc_synth(ellipsis);
+    if (_jitc->prepare) {
+       jit_link_prepare();
+       assert(!(_jitc->function->call.call & jit_call_varargs));
+       _jitc->function->call.call |= jit_call_varargs;
+    }
+    else {
+       jit_link_prolog();
+       assert(!(_jitc->function->self.call & jit_call_varargs));
+       _jitc->function->self.call |= jit_call_varargs;
+
+       /* Allocate va_list like object in the stack */
+       _jitc->function->vaoff = jit_allocai(sizeof(jit_va_list_t));
+       _jitc->function->vagp = _jitc->function->self.argi;
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_va_push(jit_state_t *_jit, jit_int32_t u)
+{
+    jit_int32_t                reg;
+    jit_inc_synth_w(va_push, u);
+    reg = jit_get_reg(jit_class_gpr);
+    jit_ldxi(reg, u, offsetof(jit_va_list_t, base));
+    jit_pushargr(reg);
+    jit_ldxi(reg, u, offsetof(jit_va_list_t, offset));
+    jit_pushargr(reg);
+    jit_unget_reg(reg);
+    jit_dec_synth();
+}
+
+jit_node_t *
+_jit_arg(jit_state_t *_jit)
+{
+    jit_node_t         *node;
+    jit_int32_t                 offset;
+    assert(_jitc->function != NULL);
+    if (jit_arg_reg_p(_jitc->function->self.argi))
+       offset = _jitc->function->self.argi++;
+    else {
+       offset = _jitc->function->self.size;
+       _jitc->function->self.size += 8;
+    }
+    node = jit_new_node_ww(jit_code_arg, offset,
+                          ++_jitc->function->self.argn);
+    jit_link_prolog();
+    return (node);
+}
+
+jit_node_t *
+_jit_arg_f(jit_state_t *_jit)
+{
+    jit_node_t         *node;
+    jit_int32_t                 offset;
+    assert(_jitc->function != NULL);
+    if (jit_arg_f_reg_p(_jitc->function->self.argi))
+       offset = _jitc->function->self.argi++;
+    else {
+       offset = _jitc->function->self.size;
+       _jitc->function->self.size += 8;
+    }
+    node = jit_new_node_ww(jit_code_arg_f, offset,
+                          ++_jitc->function->self.argn);
+    jit_link_prolog();
+    return (node);
+}
+
+jit_node_t *
+_jit_arg_d(jit_state_t *_jit)
+{
+    jit_node_t         *node;
+    jit_int32_t                 offset;
+    assert(_jitc->function != NULL);
+    if (jit_arg_f_reg_p(_jitc->function->self.argi))
+       offset = _jitc->function->self.argi++;
+    else {
+       offset = _jitc->function->self.size;
+       _jitc->function->self.size += 8;
+    }
+    node = jit_new_node_ww(jit_code_arg_d, offset,
+                          ++_jitc->function->self.argn);
+    jit_link_prolog();
+    return (node);
+}
+
+void
+_jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_c, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_extr_c(u, _A0 - v->u.w);
+    else
+       jit_ldxi_c(u, _FP, v->u.w + C_DISP);
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_uc, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_extr_uc(u, _A0 - v->u.w);
+    else
+       jit_ldxi_uc(u, _FP, v->u.w + C_DISP);
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_s, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_extr_s(u, _A0 - v->u.w);
+    else
+       jit_ldxi_s(u, _FP, v->u.w + S_DISP);
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_us, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_extr_us(u, _A0 - v->u.w);
+    else
+       jit_ldxi_us(u, _FP, v->u.w + S_DISP);
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_i, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_extr_i(u, _A0 - v->u.w);
+    else
+       jit_ldxi_i(u, _FP, v->u.w + I_DISP);
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_ui, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_extr_ui(u, _A0 - v->u.w);
+    else
+       jit_ldxi_ui(u, _FP, v->u.w + I_DISP);
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_l, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_movr(u, _A0 - v->u.w);
+    else
+       jit_ldxi_l(u, _FP, v->u.w);
+    jit_dec_synth();
+}
+
+void
+_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(putargr, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_movr(_A0 - v->u.w, u);
+    else
+       jit_stxi(v->u.w, _FP, u);
+    jit_dec_synth();
+}
+
+void
+_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v)
+{
+    jit_int32_t                regno;
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(putargi, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_movi(_A0 - v->u.w, u);
+    else {
+       regno = jit_get_reg(jit_class_gpr);
+       jit_movi(regno, u);
+       jit_stxi(v->u.w, _FP, regno);
+       jit_unget_reg(regno);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg_f);
+    jit_inc_synth_wp(getarg_f, u, v);
+    if (jit_arg_f_reg_p(v->u.w))
+       jit_movr_f(u, _F16 - v->u.w);
+    else
+       jit_ldxi_f(u, _FP, v->u.w + F_DISP);
+    jit_dec_synth();
+}
+
+void
+_jit_putargr_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg_f);
+    jit_inc_synth_wp(putargr_f, u, v);
+    if (jit_arg_f_reg_p(v->u.w))
+       jit_movr_f(_F16 - v->u.w, u);
+    else
+       jit_stxi_f(v->u.w, _FP, u + F_DISP);
+    jit_dec_synth();
+}
+
+void
+_jit_putargi_f(jit_state_t *_jit, jit_float32_t u, jit_node_t *v)
+{
+    jit_int32_t                regno;
+    assert(v->code == jit_code_arg_f);
+    jit_inc_synth_fp(putargi_f, u, v);
+    if (jit_arg_f_reg_p(v->u.w))
+       jit_movi_f(_F16 - v->u.w, u);
+    else {
+       regno = jit_get_reg(jit_class_fpr);
+       jit_movi_f(regno, u);
+       jit_stxi_f(v->u.w, _FP, regno + F_DISP);
+       jit_unget_reg(regno);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg_d);
+    jit_inc_synth_wp(getarg_d, u, v);
+    if (jit_arg_f_reg_p(v->u.w))
+       jit_movr_d(u, _F16 - v->u.w);
+    else
+       jit_ldxi_d(u, _FP, v->u.w);
+    jit_dec_synth();
+}
+
+void
+_jit_putargr_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg_d);
+    jit_inc_synth_wp(putargr_d, u, v);
+    if (jit_arg_f_reg_p(v->u.w))
+       jit_movr_d(_F16 - v->u.w, u);
+    else
+       jit_stxi_d(v->u.w, _FP, u);
+    jit_dec_synth();
+}
+
+void
+_jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v)
+{
+    jit_int32_t                regno;
+    assert(v->code == jit_code_arg_d);
+    jit_inc_synth_dp(putargi_d, u, v);
+    if (jit_arg_f_reg_p(v->u.w))
+       jit_movi_d(_F16 - v->u.w, u);
+    else {
+       regno = jit_get_reg(jit_class_fpr);
+       jit_movi_d(regno, u);
+       jit_stxi_d(v->u.w, _FP, regno);
+       jit_unget_reg(regno);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_pushargr(jit_state_t *_jit, jit_int32_t u)
+{
+    assert(_jitc->function != NULL);
+    jit_inc_synth_w(pushargr, u);
+    jit_link_prepare();
+    if (jit_arg_reg_p(_jitc->function->call.argi)) {
+       jit_movr(_A0 - _jitc->function->call.argi, u);
+       ++_jitc->function->call.argi;
+    }
+    else {
+       jit_stxi(_jitc->function->call.size, JIT_SP, u);
+       _jitc->function->call.size += 8;
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_pushargi(jit_state_t *_jit, jit_int64_t u)
+{
+    jit_int32_t                regno;
+    assert(_jitc->function != NULL);
+    jit_inc_synth_w(pushargi, u);
+    jit_link_prepare();
+    if (jit_arg_reg_p(_jitc->function->call.argi)) {
+       jit_movi(_A0 - _jitc->function->call.argi, u);
+       ++_jitc->function->call.argi;
+    }
+    else {
+       regno = jit_get_reg(jit_class_gpr);
+       jit_movi(regno, u);
+       jit_stxi(_jitc->function->call.size, JIT_SP, regno);
+       _jitc->function->call.size += 8;
+       jit_unget_reg(regno);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_pushargr_f(jit_state_t *_jit, jit_int32_t u)
+{
+    assert(_jitc->function != NULL);
+    jit_inc_synth_w(pushargr_f, u);
+    jit_link_prepare();
+    if (jit_arg_f_reg_p(_jitc->function->call.argi)) {
+       jit_movr_f(_F16 - _jitc->function->call.argi, u);
+       ++_jitc->function->call.argi;
+    }
+    else {
+       jit_stxi_f(_jitc->function->call.size + F_DISP, JIT_SP, u);
+       _jitc->function->call.size += 8;
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_pushargi_f(jit_state_t *_jit, jit_float32_t u)
+{
+    jit_int32_t                regno;
+    assert(_jitc->function != NULL);
+    jit_inc_synth_f(pushargi_f, u);
+    jit_link_prepare();
+    if (jit_arg_f_reg_p(_jitc->function->call.argi)) {
+       jit_movi_f(_F16 - _jitc->function->call.argi, u);
+       ++_jitc->function->call.argi;
+    }
+    else {
+       regno = jit_get_reg(jit_class_fpr);
+       jit_movi_f(regno, u);
+       jit_stxi_f(_jitc->function->call.size + F_DISP, JIT_SP, regno);
+       _jitc->function->call.size += 8;
+       jit_unget_reg(regno);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_pushargr_d(jit_state_t *_jit, jit_int32_t u)
+{
+    assert(_jitc->function != NULL);
+    jit_inc_synth_w(pushargr_d, u);
+    jit_link_prepare();
+    if (jit_arg_f_reg_p(_jitc->function->call.argi)) {
+       jit_movr_d(_F16 - _jitc->function->call.argi, u);
+       ++_jitc->function->call.argi;
+    }
+    else {
+       jit_stxi_d(_jitc->function->call.size, JIT_SP, u);
+       _jitc->function->call.size += 8;
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_pushargi_d(jit_state_t *_jit, jit_float64_t u)
+{
+    jit_int32_t                regno;
+    assert(_jitc->function != NULL);
+    jit_inc_synth_d(pushargi_d, u);
+    jit_link_prepare();
+    if (jit_arg_f_reg_p(_jitc->function->call.argi)) {
+       jit_movi_d(_F16 - _jitc->function->call.argi, u);
+       ++_jitc->function->call.argi;
+    }
+    else {
+       regno = jit_get_reg(jit_class_fpr);
+       jit_movi_d(regno, u);
+       jit_stxi_d(_jitc->function->call.size, JIT_SP, regno);
+       _jitc->function->call.size += 8;
+       jit_unget_reg(regno);
+    }
+    jit_dec_synth();
+}
+
+jit_bool_t
+_jit_regarg_p(jit_state_t *_jit, jit_node_t *node, jit_int32_t regno)
+{
+    jit_int32_t                spec;
+
+    spec = jit_class(_rvs[regno].spec);
+    if (spec & jit_class_arg) {
+       if (spec & jit_class_gpr) {
+           regno = _A0 - regno;
+           if (regno >= 0 && regno < node->v.w)
+               return (1);
+       }
+       else if (spec & jit_class_fpr) {
+           regno = _F16 - regno;
+           if (regno >= 0 && regno < node->w.w)
+               return (1);
+       }
+    }
+
+    return (0);
+}
+
+void
+_jit_finishr(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_node_t         *call;
+    assert(_jitc->function != NULL);
+    jit_inc_synth_w(finishr, r0);
+    if (_jitc->function->self.alen < _jitc->function->call.size)
+       _jitc->function->self.alen = _jitc->function->call.size;
+    call = jit_callr(r0);
+    call->v.w = call->w.w = _jitc->function->self.argi;
+    _jitc->function->call.argi = _jitc->function->call.size = 0;
+    _jitc->prepare = 0;
+    jit_dec_synth();
+}
+
+jit_node_t *
+_jit_finishi(jit_state_t *_jit, jit_pointer_t i0)
+{
+    jit_node_t         *call;
+    assert(_jitc->function != NULL);
+    jit_inc_synth_w(finishi, (jit_word_t)i0);
+    if (_jitc->function->self.alen < _jitc->function->call.size)
+       _jitc->function->self.alen = _jitc->function->call.size;
+    call = jit_calli(i0);
+    call->v.w = call->w.w = _jitc->function->self.argf;
+    _jitc->function->call.argi = _jitc->function->call.size = 0;
+    _jitc->prepare = 0;
+    jit_dec_synth();
+    return (call);
+}
+
+void
+_jit_retval_c(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_c, r0);
+    jit_extr_c(r0, JIT_RET);
+    jit_dec_synth();
+}
+
+void
+_jit_retval_uc(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_uc, r0);
+    jit_extr_uc(r0, JIT_RET);
+    jit_dec_synth();
+}
+
+void
+_jit_retval_s(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_s, r0);
+    jit_extr_s(r0, JIT_RET);
+    jit_dec_synth();
+}
+
+void
+_jit_retval_us(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_us, r0);
+    jit_extr_us(r0, JIT_RET);
+    jit_dec_synth();
+}
+
+void
+_jit_retval_i(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_i, r0);
+    jit_extr_i(r0, JIT_RET);
+    jit_dec_synth();
+}
+
+void
+_jit_retval_ui(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_ui, r0);
+    jit_extr_ui(r0, JIT_RET);
+    jit_dec_synth();
+}
+
+void
+_jit_retval_l(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_l, r0);
+    if (r0 != JIT_RET)
+       jit_movr(r0, JIT_RET);
+    jit_dec_synth();
+}
+
+void
+_jit_retval_f(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_f, r0);
+    if (r0 != JIT_FRET)
+       jit_movr_f(r0, JIT_FRET);
+    jit_dec_synth();
+}
+
+void
+_jit_retval_d(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_d, r0);
+    if (r0 != JIT_FRET)
+       jit_movr_d(r0, JIT_FRET);
+    jit_dec_synth();
+}
+
+jit_pointer_t
+_emit_code(jit_state_t *_jit)
+{
+    jit_node_t         *node;
+    jit_node_t         *temp;
+    jit_word_t          word;
+    jit_word_t          value;
+    jit_int32_t                 offset;
+    struct {
+       jit_node_t      *node;
+       jit_uint8_t     *data;
+       jit_word_t       word;
+#if DEVEL_DISASSEMBLER
+       jit_word_t       prevw;
+#endif
+       jit_int32_t      const_offset;
+       jit_int32_t      patch_offset;
+    } undo;
+#if DEVEL_DISASSEMBLER
+    jit_word_t          prevw;
+#endif
+
+    _jitc->function = NULL;
+
+    jit_reglive_setup();
+
+    undo.word = 0;
+    undo.node = NULL;
+    undo.const_offset = undo.patch_offset = 0;
+#define case_rr(name, type)                                            \
+           case jit_code_##name##r##type:                              \
+               name##r##type(rn(node->u.w), rn(node->v.w));            \
+               break
+#define case_rw(name, type)                                            \
+           case jit_code_##name##i##type:                              \
+               name##i##type(rn(node->u.w), node->v.w);                \
+               break
+#define case_wr(name, type)                                            \
+           case jit_code_##name##i##type:                              \
+               name##i##type(node->u.w, rn(node->v.w));                \
+               break
+#define case_rrr(name, type)                                           \
+           case jit_code_##name##r##type:                              \
+               name##r##type(rn(node->u.w),                            \
+                             rn(node->v.w), rn(node->w.w));            \
+               break
+#define case_rrw(name, type)                                           \
+           case jit_code_##name##i##type:                              \
+               name##i##type(rn(node->u.w), rn(node->v.w), node->w.w); \
+               break
+#define case_rrf(name, type, size)                                     \
+           case jit_code_##name##i##type:                              \
+               assert(node->flag & jit_flag_data);                     \
+               name##i##type(rn(node->u.w), rn(node->v.w),             \
+                             (jit_float##size##_t *)node->w.n->u.w);   \
+               break
+#define case_rrrr(name, type)                                          \
+           case jit_code_##name##r##type:                              \
+               name##r##type(rn(node->u.q.l), rn(node->u.q.h),         \
+                             rn(node->v.w), rn(node->w.w));            \
+               break
+#define case_rrrw(name, type)                                          \
+           case jit_code_##name##i##type:                              \
+               name##i##type(rn(node->u.q.l), rn(node->u.q.h),         \
+                             rn(node->v.w), node->w.w);                \
+               break
+#define case_wrr(name, type)                                           \
+           case jit_code_##name##i##type:                              \
+               name##i##type(node->u.w, rn(node->v.w), rn(node->w.w)); \
+               break
+#define case_brr(name, type)                                           \
+           case jit_code_##name##r##type:                              \
+               temp = node->u.n;                                       \
+               assert(temp->code == jit_code_label ||                  \
+                      temp->code == jit_code_epilog);                  \
+               if (temp->flag & jit_flag_patch)                        \
+                   name##r##type(temp->u.w, rn(node->v.w),             \
+                                 rn(node->w.w));                       \
+               else {                                                  \
+                   word = name##r##type(_jit->pc.w,                    \
+                                        rn(node->v.w), rn(node->w.w)); \
+                   patch(word, node);                                  \
+               }                                                       \
+               break
+#define case_brw(name, type)                                           \
+           case jit_code_##name##i##type:                              \
+               temp = node->u.n;                                       \
+               assert(temp->code == jit_code_label ||                  \
+                      temp->code == jit_code_epilog);                  \
+               if (temp->flag & jit_flag_patch)                        \
+                   name##i##type(temp->u.w,                            \
+                                 rn(node->v.w), node->w.w);            \
+               else {                                                  \
+                   word = name##i##type(_jit->pc.w,                    \
+                                        rn(node->v.w), node->w.w);     \
+                   patch(word, node);                                  \
+               }                                                       \
+               break
+#define case_brf(name, type, size)                                     \
+           case jit_code_##name##i##type:                              \
+               temp = node->u.n;                                       \
+               assert(temp->code == jit_code_label ||                  \
+                      temp->code == jit_code_epilog);                  \
+               if (temp->flag & jit_flag_patch)                        \
+                   name##i##type(temp->u.w, rn(node->v.w),             \
+                               (jit_float##size##_t *)node->w.n->u.w); \
+               else {                                                  \
+                   word = name##i##type(_jit->pc.w, rn(node->v.w),     \
+                               (jit_float##size##_t *)node->w.n->u.w); \
+                   patch(word, node);                                  \
+               }                                                       \
+               break
+#if DEVEL_DISASSEMBLER
+    prevw = _jit->pc.w;
+#endif
+    for (node = _jitc->head; node; node = node->next) {
+       if (_jit->pc.uc >= _jitc->code.end)
+           return (NULL);
+
+#if DEVEL_DISASSEMBLER
+       node->offset = (jit_uword_t)_jit->pc.w - (jit_uword_t)prevw;
+       prevw = _jit->pc.w;
+#endif
+       value = jit_classify(node->code);
+       jit_regarg_set(node, value);
+       switch (node->code) {
+           case jit_code_align:
+               assert(!(node->u.w & (node->u.w - 1)) &&
+                      node->u.w <= sizeof(jit_word_t));
+               if (node->u.w == sizeof(jit_word_t) &&
+                   (word = _jit->pc.w & (sizeof(jit_word_t) - 1)))
+                   nop(sizeof(jit_word_t) - word);
+               break;
+           case jit_code_note:         case jit_code_name:
+               node->u.w = _jit->pc.w;
+               break;
+           case jit_code_label:
+               /* remember label is defined */
+               node->flag |= jit_flag_patch;
+               node->u.w = _jit->pc.w;
+               break;
+               case_rrr(add,);
+               case_rrw(add,);
+               case_rrr(addc,);
+               case_rrw(addc,);
+               case_rrr(addx,);
+               case_rrw(addx,);
+               case_rrr(sub,);
+               case_rrw(sub,);
+               case_rrr(subc,);
+               case_rrw(subc,);
+               case_rrr(subx,);
+               case_rrw(subx,);
+               case_rrw(rsb,);
+               case_rrr(mul,);
+               case_rrw(mul,);
+               case_rrrr(qmul,);
+               case_rrrw(qmul,);
+               case_rrrr(qmul, _u);
+               case_rrrw(qmul, _u);
+               case_rrr(div,);
+               case_rrw(div,);
+               case_rrr(div, _u);
+               case_rrw(div, _u);
+               case_rrrr(qdiv,);
+               case_rrrw(qdiv,);
+               case_rrrr(qdiv, _u);
+               case_rrrw(qdiv, _u);
+               case_rrr(rem,);
+               case_rrw(rem,);
+               case_rrr(rem, _u);
+               case_rrw(rem, _u);
+               case_rrr(lsh,);
+               case_rrw(lsh,);
+               case_rrr(rsh,);
+               case_rrw(rsh,);
+               case_rrr(rsh, _u);
+               case_rrw(rsh, _u);
+               case_rrr(and,);
+               case_rrw(and,);
+               case_rrr(or,);
+               case_rrw(or,);
+               case_rrr(xor,);
+               case_rrw(xor,);
+               case_rr(trunc, _f_i);
+               case_rr(trunc, _d_i);
+               case_rr(trunc, _f_l);
+               case_rr(trunc, _d_l);
+               case_rr(ld, _c);
+               case_rw(ld, _c);
+               case_rr(ld, _uc);
+               case_rw(ld, _uc);
+               case_rr(ld, _s);
+               case_rw(ld, _s);
+               case_rr(ld, _us);
+               case_rw(ld, _us);
+               case_rr(ld, _i);
+               case_rw(ld, _i);
+               case_rr(ld, _ui);
+               case_rw(ld, _ui);
+               case_rr(ld, _l);
+               case_rw(ld, _l);
+               case_rrr(ldx, _c);
+               case_rrw(ldx, _c);
+               case_rrr(ldx, _uc);
+               case_rrw(ldx, _uc);
+               case_rrr(ldx, _s);
+               case_rrw(ldx, _s);
+               case_rrr(ldx, _us);
+               case_rrw(ldx, _us);
+               case_rrr(ldx, _i);
+               case_rrw(ldx, _i);
+               case_rrr(ldx, _ui);
+               case_rrw(ldx, _ui);
+               case_rrr(ldx, _l);
+               case_rrw(ldx, _l);
+               case_rr(st, _c);
+               case_wr(st, _c);
+               case_rr(st, _s);
+               case_wr(st, _s);
+               case_rr(st, _i);
+               case_wr(st, _i);
+               case_rr(st, _l);
+               case_wr(st, _l);
+               case_rrr(stx, _c);
+               case_wrr(stx, _c);
+               case_rrr(stx, _s);
+               case_wrr(stx, _s);
+               case_rrr(stx, _i);
+               case_wrr(stx, _i);
+               case_rrr(stx, _l);
+               case_wrr(stx, _l);
+               case_rr(hton, _us);
+               case_rr(hton, _ui);
+               case_rr(hton, _ul);
+               case_rr(ext, _c);
+               case_rr(ext, _uc);
+               case_rr(ext, _s);
+               case_rr(ext, _us);
+               case_rr(ext, _i);
+               case_rr(ext, _ui);
+               case_rr(mov,);
+           case jit_code_movi:
+               if (node->flag & jit_flag_node) {
+                   temp = node->v.n;
+                   if (temp->code == jit_code_data ||
+                       (temp->code == jit_code_label &&
+                        (temp->flag & jit_flag_patch)))
+                       movi(rn(node->u.w), temp->u.w);
+                   else {
+                       assert(temp->code == jit_code_label ||
+                              temp->code == jit_code_epilog);
+                       word = movi_p(rn(node->u.w), node->v.w);
+                       patch(word, node);
+                   }
+               }
+               else
+                   movi(rn(node->u.w), node->v.w);
+               break;
+               case_rr(neg,);
+               case_rr(com,);
+               case_rrr(lt,);
+               case_rrw(lt,);
+               case_rrr(lt, _u);
+               case_rrw(lt, _u);
+               case_rrr(le,);
+               case_rrw(le,);
+               case_rrr(le, _u);
+               case_rrw(le, _u);
+               case_rrr(eq,);
+               case_rrw(eq,);
+               case_rrr(ge,);
+               case_rrw(ge,);
+               case_rrr(ge, _u);
+               case_rrw(ge, _u);
+               case_rrr(gt,);
+               case_rrw(gt,);
+               case_rrr(gt, _u);
+               case_rrw(gt, _u);
+               case_rrr(ne,);
+               case_rrw(ne,);
+               case_brr(blt,);
+               case_brw(blt,);
+               case_brr(blt, _u);
+               case_brw(blt, _u);
+               case_brr(ble,);
+               case_brw(ble,);
+               case_brr(ble, _u);
+               case_brw(ble, _u);
+               case_brr(beq,);
+               case_brw(beq,);
+               case_brr(bge,);
+               case_brw(bge,);
+               case_brr(bge, _u);
+               case_brw(bge, _u);
+               case_brr(bgt,);
+               case_brw(bgt,);
+               case_brr(bgt, _u);
+               case_brw(bgt, _u);
+               case_brr(bne,);
+               case_brw(bne,);
+               case_brr(boadd,);
+               case_brw(boadd,);
+               case_brr(boadd, _u);
+               case_brw(boadd, _u);
+               case_brr(bxadd,);
+               case_brw(bxadd,);
+               case_brr(bxadd, _u);
+               case_brw(bxadd, _u);
+               case_brr(bosub,);
+               case_brw(bosub,);
+               case_brr(bosub, _u);
+               case_brw(bosub, _u);
+               case_brr(bxsub,);
+               case_brw(bxsub,);
+               case_brr(bxsub, _u);
+               case_brw(bxsub, _u);
+               case_brr(bms,);
+               case_brw(bms,);
+               case_brr(bmc,);
+               case_brw(bmc,);
+               case_rrr(add, _f);
+               case_rrf(add, _f, 32);
+               case_rrr(sub, _f);
+               case_rrf(sub, _f, 32);
+               case_rrf(rsb, _f, 32);
+               case_rrr(mul, _f);
+               case_rrf(mul, _f, 32);
+               case_rrr(div, _f);
+               case_rrf(div, _f, 32);
+               case_rr(abs, _f);
+               case_rr(neg, _f);
+               case_rr(sqrt, _f);
+               case_rr(ext, _f);
+               case_rr(ld, _f);
+               case_rw(ld, _f);
+               case_rrr(ldx, _f);
+               case_rrw(ldx, _f);
+               case_rr(st, _f);
+               case_wr(st, _f);
+               case_rrr(stx, _f);
+               case_wrr(stx, _f);
+               case_rr(mov, _f);
+           case jit_code_movi_f:
+               assert(node->flag & jit_flag_data);
+               movi_f(rn(node->u.w), (jit_float32_t *)node->v.n->u.w);
+               break;
+               case_rr(ext, _d_f);
+               case_rrr(lt, _f);
+               case_rrf(lt, _f, 32);
+               case_rrr(le, _f);
+               case_rrf(le, _f, 32);
+               case_rrr(eq, _f);
+               case_rrf(eq, _f, 32);
+               case_rrr(ge, _f);
+               case_rrf(ge, _f, 32);
+               case_rrr(gt, _f);
+               case_rrf(gt, _f, 32);
+               case_rrr(ne, _f);
+               case_rrf(ne, _f, 32);
+               case_rrr(unlt, _f);
+               case_rrf(unlt, _f, 32);
+               case_rrr(unle, _f);
+               case_rrf(unle, _f, 32);
+               case_rrr(uneq, _f);
+               case_rrf(uneq, _f, 32);
+               case_rrr(unge, _f);
+               case_rrf(unge, _f, 32);
+               case_rrr(ungt, _f);
+               case_rrf(ungt, _f, 32);
+               case_rrr(ltgt, _f);
+               case_rrf(ltgt, _f, 32);
+               case_rrr(ord, _f);
+               case_rrf(ord, _f, 32);
+               case_rrr(unord, _f);
+               case_rrf(unord, _f, 32);
+               case_brr(blt, _f);
+               case_brf(blt, _f, 32);
+               case_brr(ble, _f);
+               case_brf(ble, _f, 32);
+               case_brr(beq, _f);
+               case_brf(beq, _f, 32);
+               case_brr(bge, _f);
+               case_brf(bge, _f, 32);
+               case_brr(bgt, _f);
+               case_brf(bgt, _f, 32);
+               case_brr(bne, _f);
+               case_brf(bne, _f, 32);
+               case_brr(bunlt, _f);
+               case_brf(bunlt, _f, 32);
+               case_brr(bunle, _f);
+               case_brf(bunle, _f, 32);
+               case_brr(buneq, _f);
+               case_brf(buneq, _f, 32);
+               case_brr(bunge, _f);
+               case_brf(bunge, _f, 32);
+               case_brr(bungt, _f);
+               case_brf(bungt, _f, 32);
+               case_brr(bltgt, _f);
+               case_brf(bltgt, _f, 32);
+               case_brr(bord, _f);
+               case_brf(bord, _f, 32);
+               case_brr(bunord, _f);
+               case_brf(bunord, _f, 32);
+               case_rrr(add, _d);
+               case_rrf(add, _d, 64);
+               case_rrr(sub, _d);
+               case_rrf(sub, _d, 64);
+               case_rrf(rsb, _d, 64);
+               case_rrr(mul, _d);
+               case_rrf(mul, _d, 64);
+               case_rrr(div, _d);
+               case_rrf(div, _d, 64);
+               case_rr(abs, _d);
+               case_rr(neg, _d);
+               case_rr(sqrt, _d);
+               case_rr(ext, _d);
+               case_rr(ld, _d);
+               case_rw(ld, _d);
+               case_rrr(ldx, _d);
+               case_rrw(ldx, _d);
+               case_rr(st, _d);
+               case_wr(st, _d);
+               case_rrr(stx, _d);
+               case_wrr(stx, _d);
+               case_rr(mov, _d);
+           case jit_code_movi_d:
+               assert(node->flag & jit_flag_data);
+               movi_d(rn(node->u.w), (jit_float64_t *)node->v.n->u.w);
+               break;
+               case_rr(ext, _f_d);
+               case_rrr(lt, _d);
+               case_rrf(lt, _d, 64);
+               case_rrr(le, _d);
+               case_rrf(le, _d, 64);
+               case_rrr(eq, _d);
+               case_rrf(eq, _d, 64);
+               case_rrr(ge, _d);
+               case_rrf(ge, _d, 64);
+               case_rrr(gt, _d);
+               case_rrf(gt, _d, 64);
+               case_rrr(ne, _d);
+               case_rrf(ne, _d, 64);
+               case_rrr(unlt, _d);
+               case_rrf(unlt, _d, 64);
+               case_rrr(unle, _d);
+               case_rrf(unle, _d, 64);
+               case_rrr(uneq, _d);
+               case_rrf(uneq, _d, 64);
+               case_rrr(unge, _d);
+               case_rrf(unge, _d, 64);
+               case_rrr(ungt, _d);
+               case_rrf(ungt, _d, 64);
+               case_rrr(ltgt, _d);
+               case_rrf(ltgt, _d, 64);
+               case_rrr(ord, _d);
+               case_rrf(ord, _d, 64);
+               case_rrr(unord, _d);
+               case_rrf(unord, _d, 64);
+               case_brr(blt, _d);
+               case_brf(blt, _d, 64);
+               case_brr(ble, _d);
+               case_brf(ble, _d, 64);
+               case_brr(beq, _d);
+               case_brf(beq, _d, 64);
+               case_brr(bge, _d);
+               case_brf(bge, _d, 64);
+               case_brr(bgt, _d);
+               case_brf(bgt, _d, 64);
+               case_brr(bne, _d);
+               case_brf(bne, _d, 64);
+               case_brr(bunlt, _d);
+               case_brf(bunlt, _d, 64);
+               case_brr(bunle, _d);
+               case_brf(bunle, _d, 64);
+               case_brr(buneq, _d);
+               case_brf(buneq, _d, 64);
+               case_brr(bunge, _d);
+               case_brf(bunge, _d, 64);
+               case_brr(bungt, _d);
+               case_brf(bungt, _d, 64);
+               case_brr(bltgt, _d);
+               case_brf(bltgt, _d, 64);
+               case_brr(bord, _d);
+               case_brf(bord, _d, 64);
+               case_brr(bunord, _d);
+               case_brf(bunord, _d, 64);
+           case jit_code_jmpr:
+               jmpr(rn(node->u.w));
+               break;
+           case jit_code_jmpi:
+               if (node->flag & jit_flag_node) {
+                   temp = node->u.n;
+                   assert(temp->code == jit_code_label ||
+                          temp->code == jit_code_epilog);
+                   if (temp->flag & jit_flag_patch)
+                       jmpi(temp->u.w);
+                   else {
+                       word = jmpi_p(_jit->pc.w);
+                       patch(word, node);
+                   }
+               }
+               else
+                   jmpi(node->u.w);
+               break;
+           case jit_code_callr:
+               callr(rn(node->u.w));
+               break;
+           case jit_code_calli:
+               if (node->flag & jit_flag_node) {
+                   temp = node->u.n;
+                   assert(temp->code == jit_code_label ||
+                          temp->code == jit_code_epilog);
+                   if (!(temp->flag & jit_flag_patch)) {
+                       word = calli_p(temp->u.w);
+                       patch(word, node);
+                   }
+                   else
+                       calli(temp->u.w);
+               }
+               else
+                   calli(node->u.w);
+               break;
+           case jit_code_prolog:
+               _jitc->function = _jitc->functions.ptr + node->w.w;
+               undo.node = node;
+               undo.word = _jit->pc.w;
+#if DEVEL_DISASSEMBLER
+               undo.prevw = prevw;
+#endif
+               undo.patch_offset = _jitc->patches.offset;
+           restart_function:
+               _jitc->again = 0;
+               prolog(node);
+               break;
+           case jit_code_epilog:
+               assert(_jitc->function == _jitc->functions.ptr + node->w.w);
+               if (_jitc->again) {
+                   for (temp = undo.node->next;
+                        temp != node; temp = temp->next) {
+                       if (temp->code == jit_code_label ||
+                           temp->code == jit_code_epilog)
+                           temp->flag &= ~jit_flag_patch;
+                   }
+                   temp->flag &= ~jit_flag_patch;
+                   node = undo.node;
+                   _jit->pc.w = undo.word;
+#if DEVEL_DISASSEMBLER
+                   prevw = undo.prevw;
+#endif
+                   _jitc->patches.offset = undo.patch_offset;
+                   goto restart_function;
+               }
+               /* remember label is defined */
+               node->flag |= jit_flag_patch;
+               node->u.w = _jit->pc.w;
+               epilog(node);
+               _jitc->function = NULL;
+               break;
+           case jit_code_va_start:
+               vastart(rn(node->u.w));
+               break;
+           case jit_code_va_arg:
+               vaarg(rn(node->u.w), rn(node->v.w));
+               break;
+           case jit_code_va_arg_d:
+               vaarg_d(rn(node->u.w), rn(node->v.w));
+               break;
+           case jit_code_live:                 case jit_code_ellipsis:
+           case jit_code_va_push:
+           case jit_code_allocai:              case jit_code_allocar:
+           case jit_code_arg:
+           case jit_code_arg_f:                case jit_code_arg_d:
+           case jit_code_va_end:
+           case jit_code_ret:
+           case jit_code_retr:                 case jit_code_reti:
+           case jit_code_retr_f:               case jit_code_reti_f:
+           case jit_code_retr_d:               case jit_code_reti_d:
+           case jit_code_getarg_c:             case jit_code_getarg_uc:
+           case jit_code_getarg_s:             case jit_code_getarg_us:
+           case jit_code_getarg_i:             case jit_code_getarg_ui:
+           case jit_code_getarg_l:
+           case jit_code_getarg_f:             case jit_code_getarg_d:
+           case jit_code_putargr:              case jit_code_putargi:
+           case jit_code_putargr_f:            case jit_code_putargi_f:
+           case jit_code_putargr_d:            case jit_code_putargi_d:
+           case jit_code_pushargr:             case jit_code_pushargi:
+           case jit_code_pushargr_f:           case jit_code_pushargi_f:
+           case jit_code_pushargr_d:           case jit_code_pushargi_d:
+           case jit_code_retval_c:             case jit_code_retval_uc:
+           case jit_code_retval_s:             case jit_code_retval_us:
+           case jit_code_retval_i:
+           case jit_code_retval_ui:            case jit_code_retval_l:
+           case jit_code_retval_f:             case jit_code_retval_d:
+           case jit_code_prepare:
+           case jit_code_finishr:              case jit_code_finishi:
+               break;
+           default:
+               abort();
+       }
+       if (jit_carry != _NOREG) {
+           switch (node->code) {
+               case jit_code_note:
+               case jit_code_addcr:            case jit_code_addci:
+               case jit_code_addxr:            case jit_code_addxi:
+               case jit_code_subcr:            case jit_code_subci:
+               case jit_code_subxr:            case jit_code_subxi:
+                   break;
+               default:
+                   jit_unget_reg(jit_carry);
+                   jit_carry = _NOREG;
+                   break;
+           }
+       }
+       jit_regarg_clr(node, value);
+       assert(_jitc->regarg == 0 ||
+              (jit_carry != _NOREG && _jitc->regarg == (1 << jit_carry)));
+       assert(_jitc->synth == 0);
+       /* update register live state */
+       jit_reglive(node);
+    }
+#undef case_brf
+#undef case_brw
+#undef case_brr
+#undef case_wrr
+#undef case_rrrw
+#undef case_rrrr
+#undef case_rrf
+#undef case_rrw
+#undef case_rrr
+#undef case_wr
+#undef case_rw
+#undef case_rr
+    for (offset = 0; offset < _jitc->patches.offset; offset++) {
+       node = _jitc->patches.ptr[offset].node;
+       word = node->code == jit_code_movi ? node->v.n->u.w : node->u.n->u.w;
+       patch_at(_jitc->patches.ptr[offset].inst, word);
+    }
+
+    jit_flush(_jit->code.ptr, _jit->pc.uc);
+
+    return (_jit->code.ptr);
+}
+
+#define CODE                           1
+#  include "jit_alpha-cpu.c"
+#  include "jit_alpha-fpu.c"
+#undef CODE
+
+void
+jit_flush(void *fptr, void *tptr)
+{
+}
+
+void
+_emit_ldxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    ldxi(rn(r0), rn(r1), i0);
+}
+
+void
+_emit_stxi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    stxi(i0, rn(r0), rn(r1));
+}
+
+void
+_emit_ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    ldxi_d(rn(r0), rn(r1), i0);
+}
+
+void
+_emit_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    stxi_d(i0, rn(r0), rn(r1));
+}
+
+static void
+_patch(jit_state_t *_jit, jit_word_t instr, jit_node_t *node)
+{
+    jit_int32_t                 flag;
+
+    assert(node->flag & jit_flag_node);
+    if (node->code == jit_code_movi)
+       flag = node->v.n->flag;
+    else
+       flag = node->u.n->flag;
+     assert(!(flag & jit_flag_patch));
+    if (_jitc->patches.offset >= _jitc->patches.length) {
+       jit_realloc((jit_pointer_t *)&_jitc->patches.ptr,
+                   _jitc->patches.length * sizeof(jit_patch_t),
+                   (_jitc->patches.length + 1024) * sizeof(jit_patch_t));
+       _jitc->patches.length += 1024;
+    }
+    _jitc->patches.ptr[_jitc->patches.offset].inst = instr;
+    _jitc->patches.ptr[_jitc->patches.offset].node = node;
+    ++_jitc->patches.offset;
+}
diff --git a/deps/lightning/lib/jit_arm-cpu.c b/deps/lightning/lib/jit_arm-cpu.c
new file mode 100644 (file)
index 0000000..b6ee260
--- /dev/null
@@ -0,0 +1,3955 @@
+/*
+ * Copyright (C) 2012-2019  Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+#if PROTO
+#  define stxi(i0,r0,r1)               stxi_i(i0,r0,r1)
+#  define ldxi(r0,r1,i0)               ldxi_i(r0,r1,i0)
+#  define ldr(r0,r1)                   ldr_i(r0,r1)
+#  define _s20P(d)                     ((d) >= -(int)0x80000 && d <= 0x7ffff)
+#  define _s24P(d)                     ((d) >= -(int)0x800000 && d <= 0x7fffff)
+#  define _u3(v)                       ((v) & 0x7)
+#  define _u4(v)                       ((v) & 0xf)
+#  define _u5(v)                       ((v) & 0x1f)
+#  define _u8(v)                       ((v) & 0xff)
+#  define _u12(v)                      ((v) & 0xfff)
+#  define _u13(v)                      ((v) & 0x1fff)
+#  define _u16(v)                      ((v) & 0xffff)
+#  define _u24(v)                      ((v) & 0xffffff)
+#  define jit_thumb_p()                        jit_cpu.thumb
+#  define jit_no_set_flags()           _jitc->no_set_flags
+#  define jit_armv5_p()                        (jit_cpu.version >= 5)
+#  define jit_armv5e_p()               (jit_cpu.version > 5 || (jit_cpu.version == 5 && jit_cpu.extend))
+#  define jit_armv6_p()                        (jit_cpu.version >= 6)
+#  define jit_armv7r_p()               0
+#  define stack_framesize              48
+extern int     __aeabi_idivmod(int, int);
+extern unsigned        __aeabi_uidivmod(unsigned, unsigned);
+#  define _R0_REGNO                    0x00
+#  define _R1_REGNO                    0x01
+#  define _R2_REGNO                    0x02
+#  define _R3_REGNO                    0x03
+#  define _R4_REGNO                    0x04
+#  define _R5_REGNO                    0x05
+#  define _R6_REGNO                    0x06
+#  define _R7_REGNO                    0x07
+#  define _R8_REGNO                    0x08
+#  define _R9_REGNO                    0x09
+#  define _R10_REGNO                   0x0a
+#  define _R11_REGNO                   0x0b
+#  define _R12_REGNO                   0x0c
+#  define _R13_REGNO                   0x0d
+#  define _R14_REGNO                   0x0e
+#  define _R15_REGNO                   0x0f
+#  define _FP_REGNO                    _R11_REGNO
+#  define _SP_REGNO                    _R13_REGNO
+#  define _LR_REGNO                    _R14_REGNO
+#  define _PC_REGNO                    _R15_REGNO
+#  define ARM_CC_EQ                    0x00000000      /* Z=1 */
+#  define ARM_CC_NE                    0x10000000      /* Z=0 */
+#  define ARM_CC_HS                    0x20000000      /* C=1 */
+#    define ARM_CC_CS                  ARM_CC_HS
+#  define ARM_CC_LO                    0x30000000      /* C=0 */
+#    define ARM_CC_CC                  ARM_CC_LO
+#  define ARM_CC_MI                    0x40000000      /* N=1 */
+#  define ARM_CC_PL                    0x50000000      /* N=0 */
+#  define ARM_CC_VS                    0x60000000      /* V=1 */
+#  define ARM_CC_VC                    0x70000000      /* V=0 */
+#  define ARM_CC_HI                    0x80000000      /* C=1 && Z=0 */
+#  define ARM_CC_LS                    0x90000000      /* C=0 || Z=1 */
+#  define ARM_CC_GE                    0xa0000000      /* N=V */
+#  define ARM_CC_LT                    0xb0000000      /* N!=V */
+#  define ARM_CC_GT                    0xc0000000      /* Z=0 && N=V */
+#  define ARM_CC_LE                    0xd0000000      /* Z=1 || N!=V */
+#  define ARM_CC_AL                    0xe0000000      /* always */
+#  define ARM_CC_NV                    0xf0000000      /* reserved */
+#  define THUMB2_IT                    0
+#  define THUMB2_ITT                   1
+#  define THUMB2_ITE                   2
+#  define THUMB2_ITTT                  3
+#  define THUMB2_ITET                  4
+#  define THUMB2_ITTE                  5
+#  define THUMB2_ITEE                  6
+#  define THUMB2_ITTTT                 7
+#  define THUMB2_ITETT                 8
+#  define THUMB2_ITTET                 9
+#  define THUMB2_ITEET                 10
+#  define THUMB2_ITTTE                 11
+#  define THUMB2_ITETE                 12
+#  define THUMB2_ITTEE                 13
+#  define THUMB2_ITEEE                 14
+#  define ARM_MOV                      0x01a00000
+#  define THUMB_MOV                        0x4600
+#  define ARM_MOVWI                    0x03000000      /* v6t2, v7 */
+#  define THUMB_MOVI                       0x2000
+#  define THUMB2_MOVI                  0xf0400000
+#  define THUMB2_MOVWI                 0xf2400000
+#  define ARM_MOVTI                    0x03400000
+#  define THUMB2_MOVTI                 0xf2c00000
+#  define ARM_MVN                      0x01e00000
+#  define THUMB_MVN                        0x43c0
+#  define THUMB2_MVN                   0xea600000
+#  define THUMB2_MVNI                  0xf0600000
+#  define ARM_I                                0x02000000 /* immediate */
+#  define ARM_S                                0x00100000 /* set flags */
+#  define ARM_ADD                      0x00800000
+#  define THUMB_ADD                        0x1800
+#  define THUMB_ADDX                       0x4400
+#  define THUMB2_ADD                   0xeb000000
+#  define THUMB_ADDI3                      0x1c00
+#  define THUMB_ADDI8                      0x3000
+#  define THUMB2_ADDI                  0xf1000000
+#  define THUMB2_ADDWI                 0xf2000000
+#  define ARM_ADC                      0x00a00000
+#  define THUMB_ADC                        0x4140
+#  define THUMB2_ADC                   0xeb400000
+#  define THUMB2_ADCI                  0xf1400000
+#  define ARM_SUB                      0x00400000
+#  define THUMB_SUB                        0x1a00
+#  define THUMB2_SUB                   0xeba00000
+#  define THUMB_SUBI3                      0x1e00
+#  define THUMB_SUBI8                      0x3800
+#  define THUMB2_SUBI                  0xf1a00000
+#  define THUMB2_SUBWI                 0xf2a00000
+#  define ARM_SBC                      0x00c00000
+#  define THUMB_SBC                        0x4180
+#  define THUMB2_SBC                   0xeb600000
+#  define THUMB2_SBCI                  0xf1600000
+#  define ARM_RSB                      0x00600000
+#  define THUMB_RSBI                       0x4240
+#  define THUMB2_RSBI                  0xf1c00000
+#  define ARM_MUL                      0x00000090
+#  define THUMB_MUL                        0x4340
+#  define THUMB2_MUL                   0xfb00f000
+#  define ARM_UMULL                    0x00800090
+#  define THUMB2_UMULL                 0xfba00000
+#  define ARM_SMULL                    0x00c00090
+#  define THUMB2_SMULL                 0xfb800000
+#  define THUMB2_SDIV                  0xfb90f0f0
+#  define THUMB2_UDIV                  0xfbb0f0f0
+#  define ARM_AND                      0x00000000
+#  define THUMB_AND                        0x4000
+#  define THUMB2_AND                   0xea000000
+#  define THUMB2_ANDI                  0xf0000000
+#  define ARM_BIC                      0x01c00000
+#  define THUMB2_BIC                   0xea200000
+#  define THUMB2_BICI                  0xf0200000
+#  define ARM_ORR                      0x01800000
+#  define THUMB_ORR                        0x4300
+#  define THUMB2_ORR                   0xea400000
+#  define THUMB2_ORRI                  0xf0400000
+#  define ARM_EOR                      0x00200000
+#  define THUMB_EOR                        0x4040
+#  define THUMB2_EOR                   0xea800000
+#  define THUMB2_EORI                  0xf0800000
+/* >> ARMv6* */
+#  define ARM_REV                      0x06bf0f30
+#  define THUMB_REV                        0xba00
+#  define THUMB2_REV                   0xfa90f080
+#  define ARM_REV16                    0x06bf0fb0
+#  define THUMB_REV16                      0xba40
+#  define THUMB2_REV16                 0xfa90f090
+#  define ARM_SXTB                     0x06af0070
+#  define THUMB_SXTB                       0xb240
+#  define THUMB2_SXTB                  0xfa40f080
+#  define ARM_UXTB                     0x06ef0070
+#  define THUMB_UXTB                       0xb2c0
+#  define THUMB2_UXTB                  0xfa50f080
+#  define ARM_SXTH                     0x06bf0070
+#  define THUMB_SXTH                       0xb200
+#  define THUMB2_SXTH                  0xfa00f080
+#  define ARM_UXTH                     0x06ff0070
+#  define THUMB_UXTH                       0xb280
+#  define THUMB2_UXTH                  0xfa10f080
+#  define ARM_XTR8                     0x00000400 /* ?xt? rotate 8 bits */
+#  define ARM_XTR16                    0x00000800 /* ?xt? rotate 16 bits */
+#  define ARM_XTR24                    0x00000c00 /* ?xt? rotate 24 bits */
+/* << ARMv6* */
+#  define ARM_SHIFT                    0x01a00000
+#  define ARM_R                                0x00000010 /* register shift */
+#  define ARM_LSL                      0x00000000
+#  define THUMB_LSL                        0x4080
+#  define THUMB2_LSL                   0xfa00f000
+#  define THUMB_LSLI                       0x0000
+#  define THUMB2_LSLI                  0xea4f0000
+#  define ARM_LSR                      0x00000020
+#  define THUMB_LSR                        0x40c0
+#  define THUMB2_LSR                   0xfa20f000
+#  define THUMB_LSRI                       0x0800
+#  define THUMB2_LSRI                  0xea4f0010
+#  define ARM_ASR                      0x00000040
+#  define THUMB_ASR                        0x4100
+#  define THUMB2_ASR                   0xfa40f000
+#  define THUMB_ASRI                       0x1000
+#  define THUMB2_ASRI                  0xea4f0020
+#  define ARM_ROR                      0x00000060
+#  define ARM_CMP                      0x01500000
+#  define THUMB_CMP                        0x4280
+#  define THUMB_CMPX                       0x4500
+#  define THUMB2_CMP                   0xebb00000
+#  define THUMB_CMPI                       0x2800
+#  define THUMB2_CMPI                  0xf1b00000
+#  define ARM_CMN                      0x01700000
+#  define THUMB_CMN                        0x42c0
+#  define THUMB2_CMN                   0xeb100000
+#  define THUMB2_CMNI                  0xf1100000
+#  define ARM_TST                      0x01100000
+#  define THUMB_TST                        0x4200
+#  define THUMB2_TST                   0xea100000
+#  define THUMB2_TSTI                  0xf0100000
+#  define ARM_TEQ                      0x01300000
+/* branch */
+#  define ARM_BX                       0x012fff10
+#  define ARM_BLX                      0x012fff30
+#  define THUMB_BLX                        0x4780
+#  define ARM_BLXI                     0xfa000000
+#  define THUMB2_BLXI                  0xf000c000
+#  define ARM_B                                0x0a000000
+#  define THUMB_CC_B                       0xd000
+#  define THUMB_B                          0xe000
+#  define THUMB2_CC_B                  0xf0008000
+#  define THUMB2_B                     0xf0009000
+#  define ARM_BLI                      0x0b000000
+#  define THUMB2_BLI                   0xf000d000
+/* ldr/str */
+#  define ARM_P                                0x00800000 /* positive offset */
+#  define THUMB2_P                     0x00000400
+#  define THUMB2_U                     0x00000200
+#  define THUMB2_W                     0x00000100
+#  define ARM_LDRSB                    0x011000d0
+#  define THUMB_LDRSB                      0x5600
+#  define THUMB2_LDRSB                 0xf9100000
+#  define ARM_LDRSBI                   0x015000d0
+#  define THUMB2_LDRSBI                        0xf9100c00
+#  define THUMB2_LDRSBWI               0xf9900000
+#  define ARM_LDRB                     0x07500000
+#  define THUMB_LDRB                       0x5c00
+#  define THUMB2_LDRB                  0xf8100000
+#  define ARM_LDRBI                    0x05500000
+#  define THUMB_LDRBI                      0x7800
+#  define THUMB2_LDRBI                 0xf8100c00
+#  define THUMB2_LDRBWI                        0xf8900000
+#  define ARM_LDRSH                    0x011000f0
+#  define THUMB_LDRSH                      0x5e00
+#  define THUMB2_LDRSH                 0xf9300000
+#  define ARM_LDRSHI                   0x015000f0
+#  define THUMB2_LDRSHI                        0xf9300c00
+#  define THUMB2_LDRSHWI               0xf9b00000
+#  define ARM_LDRH                     0x011000b0
+#  define THUMB_LDRH                       0x5a00
+#  define THUMB2_LDRH                  0xf8300000
+#  define ARM_LDRHI                    0x015000b0
+#  define THUMB_LDRHI                      0x8800
+#  define THUMB2_LDRHI                 0xf8300c00
+#  define THUMB2_LDRHWI                        0xf8b00000
+#  define ARM_LDR                      0x07100000
+#  define THUMB_LDR                        0x5800
+#  define THUMB2_LDR                   0xf8500000
+#  define ARM_LDRI                     0x05100000
+#  define THUMB_LDRI                       0x6800
+#  define THUMB_LDRISP                     0x9800
+#  define THUMB2_LDRI                  0xf8500c00
+#  define THUMB2_LDRWI                 0xf8d00000
+#  define ARM_LDRD                     0x010000d0
+#  define ARM_LDRDI                    0x014000d0
+#  define THUMB2_LDRDI                 0xe8500000
+#  define ARM_STRB                     0x07400000
+#  define THUMB_STRB                       0x5400
+#  define THUMB2_STRB                  0xf8000000
+#  define ARM_STRBI                    0x05400000
+#  define THUMB_STRBI                      0x7000
+#  define THUMB2_STRBI                 0xf8000c00
+#  define THUMB2_STRBWI                        0xf8800000
+#  define ARM_STRH                     0x010000b0
+#  define THUMB_STRH                       0x5200
+#  define THUMB2_STRH                  0xf8200000
+#  define ARM_STRHI                    0x014000b0
+#  define THUMB_STRHI                      0x8000
+#  define THUMB2_STRHI                 0xf8200c00
+#  define THUMB2_STRHWI                        0xf8a00000
+#  define ARM_STR                      0x07000000
+#  define THUMB_STR                        0x5000
+#  define THUMB2_STR                   0xf8400000
+#  define ARM_STRI                     0x05000000
+#  define THUMB_STRI                       0x6000
+# define THUMB2_STRWI                  0xf8c00000
+#  define THUMB_STRISP                     0x9000
+#  define THUMB2_STRI                  0xf8400c00
+#  define ARM_STRD                     0x010000f0
+# define ARM_STRDI                     0x014000f0
+#  define THUMB2_STRDI                 0xe8400000
+/* ldm/stm */
+#  define ARM_M                                0x08000000
+#  define ARM_M_L                      0x00100000 /* load; store if not set */
+#  define ARM_M_I                      0x00800000 /* inc; dec if not set */
+#  define ARM_M_B                      0x01000000 /* before; after if not set */
+#  define ARM_M_U                      0x00200000 /* update Rn */
+#  define THUMB2_LDM_W                 0x00200000
+#  define THUMB2_LDM_P                 0x00008000
+#  define THUMB2_LDM_M                 0x00004000
+#  define THUMB_LDMIA                      0xc800
+#  define THUMB2_LDMIA                 0xe8900000
+#  define THUMB2_LDMB                  0xe9100000
+#  define THUMB_PUSH                       0xb400
+#  define THUMB2_PUSH                  0xe92d0000
+#  define THUMB_POP                        0xbc00
+#  define THUMB2_POP                   0xe8bd0000
+#  define ii(i)                                *_jit->pc.ui++ = i
+#  define is(i)                                *_jit->pc.us++ = i
+#  if __BYTE_ORDER == __LITTLE_ENDIAN
+#    define iss(i, j)                  do { is(j); is(i); } while (0)
+#    define code2thumb(t0, t1, c0, c1) do { t1 = c0; t0 = c1; } while (0)
+#    define thumb2code(t0, t1, c0, c1) do { c0 = t1; c1 = t0; } while (0)
+#  else
+#    define iss(i, j)                  do { is(i); is(j); } while (0)
+#    define code2thumb(t0, t1, c0, c1) do { t0 = c0; t1 = c1; } while (0)
+#    define thumb2code(t0, t1, c0, c1) do { c0 = t0; c1 = t1; } while (0)
+#  endif
+static int encode_arm_immediate(unsigned int v);
+static int encode_thumb_immediate(unsigned int v);
+static int encode_thumb_word_immediate(unsigned int v);
+static int encode_thumb_jump(int v);
+static int encode_thumb_cc_jump(int v);
+static int encode_thumb_shift(int v, int type) maybe_unused;
+#  define corrr(cc,o,rn,rd,rm)         _corrr(_jit,cc,o,rn,rd,rm)
+static void _corrr(jit_state_t*,int,int,int,int,int);
+#  define corri(cc,o,rn,rd,im)         _corri(_jit,cc,o,rn,rd,im)
+static void _corri(jit_state_t*,int,int,int,int,int);
+#define corri8(cc,o,rn,rt,im)  _corri8(_jit,cc,o,rn,rt,im)
+static void _corri8(jit_state_t*,int,int,int,int,int);
+#  define torrr(o,rn,rd,rm)            _torrr(_jit,o,rn,rd,rm)
+static void _torrr(jit_state_t*,int,int,int,int);
+#  define torrrs(o,rn,rd,rm,im)                _torrrs(_jit,o,rn,rd,rm,im)
+static void _torrrs(jit_state_t*,int,int,int,int,int) maybe_unused;
+#  define torxr(o,rn,rt,rm)            _torxr(_jit,o,rn,rt,rm)
+static void _torxr(jit_state_t*,int,int,int,int);
+#  define torrrr(o,rn,rl,rh,rm)                _torrrr(_jit,o,rn,rl,rh,rm)
+static void _torrrr(jit_state_t*,int,int,int,int,int) maybe_unused;
+#  define torrri8(o,rn,rt,rt2,im)      _torrri8(_jit,o,rn,rt,rt2,im)
+static void _torrri8(jit_state_t*,int,int,int,int,int) maybe_unused;
+#  define coriw(cc,o,rd,im)            _coriw(_jit,cc,o,rd,im)
+static void _coriw(jit_state_t*,int,int,int,int);
+#  define torri(o,rd,rn,im)            _torri(_jit,o,rd,rn,im)
+static void _torri(jit_state_t*,int,int,int,int);
+#  define torri8(o,rn,rt,im)           _torri8(_jit,o,rn,rt,im)
+static void _torri8(jit_state_t*,int,int,int,int);
+#  define torri12(o,rn,rt,im)          _torri12(_jit,o,rn,rt,im)
+static void _torri12(jit_state_t*,int,int,int,int);
+#  define tshift(o,rd,rm,im)           _tshift(_jit,o,rd,rm,im)
+static void _tshift(jit_state_t*,int,int,int,int);
+#  define toriw(o,rd,im)               _toriw(_jit,o,rd,im)
+static void _toriw(jit_state_t*,int,int,int);
+#  define tc8(cc,im)                   _tc8(_jit,cc,im)
+static void _tc8(jit_state_t*,int,int) maybe_unused;
+#  define t11(im)                      _t11(_jit,im)
+static void _t11(jit_state_t*,int);
+#  define tcb(cc,im)                   _tcb(_jit,cc,im)
+static void _tcb(jit_state_t*,int,int);
+#  define blxi(im)                     _blxi(_jit,im)
+static void _blxi(jit_state_t*,int) maybe_unused;
+#  define tb(o,im)                     _tb(_jit,o,im)
+static void _tb(jit_state_t*,int,int);
+#  define corrrr(cc,o,rh,rl,rm,rn)     _corrrr(_jit,cc,o,rh,rl,rm,rn)
+static void _corrrr(jit_state_t*,int,int,int,int,int,int);
+#  define corrrs(cc,o,rn,rd,rm,im)     _corrrs(_jit,cc,o,rn,rd,rm,im)
+static void _corrrs(jit_state_t*,int,int,int,int,int,int);
+#  define cshift(cc,o,rd,rm,rn,im)     _cshift(_jit,cc,o,rd,rm,rn,im)
+static void _cshift(jit_state_t*,int,int,int,int,int,int);
+#  define cb(cc,o,im)                  _cb(_jit,cc,o,im)
+static void _cb(jit_state_t*,int,int,int);
+#  define cbx(cc,o,rm)                 _cbx(_jit,cc,o,rm)
+static void _cbx(jit_state_t*,int,int,int);
+#  define corl(cc,o,r0,i0)             _corl(_jit,cc,o,r0,i0)
+static void _corl(jit_state_t*,int,int,int,int);
+#  define c6orr(cc,o,r0,r1)            _c6orr(_jit,cc,o,r0,r1)
+static void _c6orr(jit_state_t*,int,int,int,int);
+#  define tcit(cc,it)                  _tcit(_jit,cc,it)
+static void _tcit(jit_state_t*,unsigned int,int);
+#  define IT(cc)                       tcit(cc,THUMB2_IT)
+#  define ITT(cc)                      tcit(cc,THUMB2_ITT)
+#  define ITE(cc)                      tcit(cc,THUMB2_ITE)
+#  define ITTT(cc)                     tcit(cc,THUMB2_ITTT)
+#  define ITTE(cc)                     tcit(cc,THUMB2_ITTE)
+#  define ITET(cc)                     tcit(cc,THUMB2_ITET)
+#  define ITEE(cc)                     tcit(cc,THUMB2_ITEE)
+#  define ITTTT(cc)                    tcit(cc,THUMB2_ITTTT)
+#  define ITETT(cc)                    tcit(cc,THUMB2_ITETT)
+#  define ITTET(cc)                    tcit(cc,THUMB2_ITTET)
+#  define ITEET(cc)                    tcit(cc,THUMB2_ITEET)
+#  define ITTTE(cc)                    tcit(cc,THUMB2_ITTTE)
+#  define ITETE(cc)                    tcit(cc,THUMB2_ITETE)
+#  define ITTEE(cc)                    tcit(cc,THUMB2_ITTEE)
+#  define ITEEE(cc)                    tcit(cc,THUMB2_ITEEE)
+#  define tpp(o,im)                    _tpp(_jit,o,im)
+static void _tpp(jit_state_t*,int,int);
+#  define torl(o,rn,im)                        _torl(_jit,o,rn,im)
+static void _torl(jit_state_t*,int,int,int) maybe_unused;
+#  define CC_MOV(cc,rd,rm)             corrr(cc,ARM_MOV,0,rd,rm)
+#  define MOV(rd,rm)                   CC_MOV(ARM_CC_AL,rd,rm)
+#  define T1_MOV(rd,rm)                        is(THUMB_MOV|((_u4(rd)&8)<<4)|(_u4(rm)<<3)|(rd&7))
+#  define T2_MOV(rd,rm)                        T2_ORR(rd,_R15_REGNO,rm)
+#  define CC_MOVI(cc,rd,im)            corri(cc,ARM_MOV|ARM_I,0,rd,im)
+#  define MOVI(rd,im)                  CC_MOVI(ARM_CC_AL,rd,im)
+#  define CC_MOVWI(cc,rd,im)           coriw(cc,ARM_MOVWI,rd,im)
+#  define MOVWI(rd,im)                 CC_MOVWI(ARM_CC_AL,rd,im)
+#  define T1_MOVI(rd,im)               is(THUMB_MOVI|(_u3(rd)<<8)|_u8(im))
+#  define T2_MOVI(rd,im)               torri(THUMB2_MOVI,_R15_REGNO,rd,im)
+#  define T2_MOVWI(rd,im)              toriw(THUMB2_MOVWI,rd,im)
+#  define CC_MOVTI(cc,rd,im)           coriw(cc,ARM_MOVTI,rd,im)
+#  define MOVTI(rd,im)                 CC_MOVTI(ARM_CC_AL,rd,im)
+#  define T2_MOVTI(rd,im)              toriw(THUMB2_MOVTI,rd,im)
+#  define CC_MVN(cc,rd,rm)             corrr(cc,ARM_MVN,0,rd,rm)
+#  define MVN(rd,rm)                   CC_MVN(ARM_CC_AL,rd,rm)
+#  define T1_MVN(rd,rm)                        is(THUMB_MVN|(_u3(rm)<<3)|_u3(rd))
+#  define T2_MVN(rd,rm)                        torrr(THUMB2_MVN,_R15_REGNO,rd,rm)
+#  define CC_MVNI(cc,rd,im)            corri(cc,ARM_MVN|ARM_I,0,rd,im)
+#  define MVNI(rd,im)                  CC_MVNI(ARM_CC_AL,rd,im)
+#  define T2_MVNI(rd,im)               torri(THUMB2_MVNI,_R15_REGNO,rd,im)
+#  define CC_NOT(cc,rd,rm)             CC_MVN(cc,rd,rm)
+#  define NOT(rd,rm)                   CC_NOT(ARM_CC_AL,rd,rm)
+#  define T1_NOT(rd,rm)                        T1_MVN(rd,rm)
+#  define T2_NOT(rd,rm)                        T2_MVN(rd,rm)
+#  define NOP()                                MOV(_R0_REGNO, _R0_REGNO)
+#  define T1_NOP()                     is(0xbf00)
+#  define CC_ADD(cc,rd,rn,rm)          corrr(cc,ARM_ADD,rn,rd,rm)
+#  define ADD(rd,rn,rm)                        CC_ADD(ARM_CC_AL,rd,rn,rm)
+#  define T1_ADD(rd,rn,rm)             is(THUMB_ADD|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rd))
+#  define T1_ADDX(rdn,rm)              is(THUMB_ADDX|((_u4(rdn)&8)<<4)|(_u4(rm)<<3)|(rdn&7))
+#  define T2_ADD(rd,rn,rm)             torrr(THUMB2_ADD,rn,rd,rm)
+#  define CC_ADDI(cc,rd,rn,im)         corri(cc,ARM_ADD|ARM_I,rn,rd,im)
+#  define ADDI(rd,rn,im)               CC_ADDI(ARM_CC_AL,rd,rn,im)
+#  define T1_ADDI3(rd,rn,im)           is(THUMB_ADDI3|(_u3(im)<<6)|(_u3(rn)<<3)|_u3(rd))
+#  define T1_ADDI8(rdn,im)             is(THUMB_ADDI8|(_u3(rdn)<<8)|_u8(im))
+#  define T2_ADDI(rd,rn,im)            torri(THUMB2_ADDI,rn,rd,im)
+#  define T2_ADDWI(rd,rn,im)           torri(THUMB2_ADDWI,rn,rd,im)
+#  define CC_ADDS(cc,rd,rn,rm)         corrr(cc,ARM_ADD|ARM_S,rn,rd,rm)
+#  define ADDS(rd,rn,rm)               CC_ADDS(ARM_CC_AL,rd,rn,rm)
+#  define T2_ADDS(rd,rn,rm)            torrr(THUMB2_ADD|ARM_S,rn,rd,rm)
+#  define ADDSI(rd,rn,im)              corri(ARM_CC_AL,ARM_ADD|ARM_S|ARM_I,rn,rd,im)
+#  define T2_ADDSI(rd,rn,im)           torri(THUMB2_ADDI|ARM_S,rn,rd,im)
+#  define CC_ADC(cc,rd,rn,rm)          corrr(cc,ARM_ADC,rn,rd,rm)
+#  define ADC(rd,rn,rm)                        CC_ADC(ARM_CC_AL,rd,rn,rm)
+#  define T1_ADC(rdn,rm)               is(THUMB_ADC|(_u3(rm)<<3)|_u3(rdn))
+#  define T2_ADC(rd,rn,rm)             torrr(THUMB2_ADC,rn,rd,rm)
+#  define CC_ADCI(cc,rd,rn,im)         corri(cc,ARM_ADC|ARM_I,rn,rd,im)
+#  define ADCI(rd,rn,im)               CC_ADCI(ARM_CC_AL,rd,rn,im)
+#  define T2_ADCI(rd,rn,im)            torri(THUMB2_ADCI,rn,rd,im)
+#  define CC_ADCS(cc,rd,rn,rm)         corrr(cc,ARM_ADC|ARM_S,rn,rd,rm)
+#  define ADCS(rd,rn,rm)               CC_ADCS(ARM_CC_AL,rd,rn,rm)
+#  define T2_ADCS(rd,rn,rm)            torrr(THUMB2_ADC|ARM_S,rn,rd,rm)
+#  define CC_ADCSI(cc,rd,rn,im)                corri(cc,ARM_ADC|ARM_S|ARM_I,rn,rd,im)
+#  define ADCSI(rd,rn,im)              CC_ADCSI(ARM_CC_AL,rd,rn,im)
+#  define T2_ADCSI(rd,rn,im)           torri(THUMB2_ADCI|ARM_S,rn,rd,im)
+#  define CC_SUB(cc,rd,rn,rm)          corrr(cc,ARM_SUB,rn,rd,rm)
+#  define SUB(rd,rn,rm)                        CC_SUB(ARM_CC_AL,rd,rn,rm)
+#  define T1_SUB(rd,rn,rm)             is(THUMB_SUB|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rd))
+#  define T2_SUB(rd,rn,rm)             torrr(THUMB2_SUB,rn,rd,rm)
+#  define CC_SUBI(cc,rd,rn,im)         corri(cc,ARM_SUB|ARM_I,rn,rd,im)
+#  define SUBI(rd,rn,im)               CC_SUBI(ARM_CC_AL,rd,rn,im)
+#  define T1_SUBI3(rd,rn,im)           is(THUMB_SUBI3|(_u3(im)<<6)|(_u3(rn)<<3)|_u3(rd))
+#  define T1_SUBI8(rdn,im)             is(THUMB_SUBI8|(_u3(rdn)<<8)|_u8(im))
+#  define T2_SUBI(rd,rn,im)            torri(THUMB2_SUBI,rn,rd,im)
+#  define T2_SUBWI(rd,rn,im)           torri(THUMB2_SUBWI,rn,rd,im)
+#  define CC_SUBS(cc,rd,rn,rm)         corrr(cc,ARM_SUB|ARM_S,rn,rd,rm)
+#  define SUBS(rd,rn,rm)               CC_SUBS(ARM_CC_AL,rd,rn,rm)
+#  define T2_SUBS(rd,rn,rm)            torrr(THUMB2_SUB|ARM_S,rn,rd,rm)
+#  define CC_SUBSI(cc,rd,rn,im)                corri(cc,ARM_SUB|ARM_S|ARM_I,rn,rd,im)
+#  define SUBSI(rd,rn,im)              CC_SUBSI(ARM_CC_AL,rd,rn,im)
+#  define T2_SUBSI(rd,rn,im)           torri(THUMB2_SUBI|ARM_S,rn,rd,im)
+#  define CC_SBC(cc,rd,rn,rm)          corrr(cc,ARM_SBC,rn,rd,rm)
+#  define SBC(rd,rn,rm)                        CC_SBC(ARM_CC_AL,rd,rn,rm)
+#  define T1_SBC(rdn,rm)               is(THUMB_SBC|(_u3(rm)<<3)|_u3(rdn))
+#  define T2_SBC(rd,rn,rm)             torrr(THUMB2_SBC,rn,rd,rm)
+#  define CC_SBCI(cc,rd,rn,im)         corri(cc,ARM_SBC|ARM_I,rn,rd,im)
+#  define SBCI(rd,rn,im)               CC_SBCI(ARM_CC_AL,rd,rn,im)
+#  define T2_SBCI(rd,rn,im)            torri(THUMB2_SBCI,rn,rd,im)
+#  define CC_SBCS(cc,rd,rn,rm)         corrr(cc,ARM_SBC|ARM_S,rn,rd,rm)
+#  define SBCS(rd,rn,rm)               CC_SBCS(ARM_CC_AL,rd,rn,rm)
+#  define T2_SBCS(rd,rn,rm)            torrr(THUMB2_SBC|ARM_S,rn,rd,rm)
+#  define CC_SBCSI(cc,rd,rn,im)                corri(cc,ARM_SBC|ARM_S|ARM_I,rn,rd,im)
+#  define SBCSI(rd,rn,im)              CC_SBCSI(ARM_CC_AL,rd,rn,im)
+#  define T2_SBCSI(rd,rn,im)           torri(THUMB2_SBCI|ARM_S,rn,rd,im)
+#  define CC_RSB(cc,rd,rn,rm)          corrr(cc,ARM_RSB,rn,rd,rm)
+#  define RSB(rd,rn,rm)                        CC_RSB(ARM_CC_AL,rd,rn,rm)
+#  define T2_RSB(rd,rn,rm)             torrr(THUMB2_RSB,rn,rd,rm)
+#  define CC_RSBI(cc,rd,rn,im)         corri(cc,ARM_RSB|ARM_I,rn,rd,im)
+#  define RSBI(rd,rn,im)               CC_RSBI(ARM_CC_AL,rd,rn,im)
+#  define T1_RSBI(rd,rn)               is(THUMB_RSBI|(_u3(rn)<<3)|_u3(rd))
+#  define T2_RSBI(rd,rn,im)            torri(THUMB2_RSBI,rn,rd,im)
+#  define CC_MUL(cc,rl,rn,rm)          corrrr(cc,ARM_MUL,rl,0,rm,rn)
+#  define MUL(rl,rn,rm)                        CC_MUL(ARM_CC_AL,rl,rn,rm)
+#  define T1_MUL(rdm,rn)               is(THUMB_MUL|(_u3(rn)<<3)|_u3(rdm))
+#  define T2_MUL(rd,rn,rm)             torrr(THUMB2_MUL,rn,rd,rm)
+#  define CC_SMULL(cc,rl,rh,rn,rm)     corrrr(cc,ARM_SMULL,rh,rl,rm,rn)
+#  define SMULL(rl,rh,rn,rm)           CC_SMULL(ARM_CC_AL,rl,rh,rn,rm)
+#  define T2_SMULL(rl,rh,rn,rm)                torrrr(THUMB2_SMULL,rn,rl,rh,rm)
+#  define CC_UMULL(cc,rl,rh,rn,rm)     corrrr(cc,ARM_UMULL,rh,rl,rm,rn)
+#  define UMULL(rl,rh,rn,rm)           CC_UMULL(ARM_CC_AL,rl,rh,rn,rm)
+#  define T2_UMULL(rl,rh,rn,rm)                torrrr(THUMB2_UMULL,rn,rl,rh,rm)
+#  define T2_SDIV(rd,rn,rm)            torrr(THUMB2_SDIV,rn,rd,rm)
+#  define T2_UDIV(rd,rn,rm)            torrr(THUMB2_UDIV,rn,rd,rm)
+#  define CC_AND(cc,rd,rn,rm)          corrr(cc,ARM_AND,rn,rd,rm)
+#  define AND(rd,rn,rm)                        CC_AND(ARM_CC_AL,rd,rn,rm)
+#  define T1_AND(rdn,rm)               is(THUMB_AND|(_u3(rm)<<3)|_u3(rdn))
+#  define T2_AND(rd,rn,rm)             torrr(THUMB2_AND,rn,rd,rm)
+#  define CC_ANDI(cc,rd,rn,im)         corri(cc,ARM_AND|ARM_I,rn,rd,im)
+#  define ANDI(rd,rn,im)               CC_ANDI(ARM_CC_AL,rd,rn,im)
+#  define T2_ANDI(rd,rn,im)            torri(THUMB2_ANDI,rn,rd,im)
+#  define CC_ANDS(cc,rd,rn,rm)         corrr(cc,ARM_AND|ARM_S,rn,rd,rm)
+#  define ANDS(rd,rn,rm)               CC_ANDS(ARM_CC_AL,rd,rn,rm)
+#  define T2_ANDS(rd,rn,rm)            torrr(THUMB2_AND|ARM_S,rn,rd,rm)
+#  define CC_ANDSI(cc,rd,rn,im)                corri(cc,ARM_AND|ARM_S|ARM_I,rn,rd,im)
+#  define ANDSI(rd,rn,im)              CC_ANDSI(ARM_CC_AL,rd,rn,im)
+#  define T2_ANDSI(rd,rn,im)           torri(ARM_CC_AL,THUMB2_ANDI|ARM_S,rn,rd,im)
+#  define CC_BIC(cc,rd,rn,rm)          corrr(cc,ARM_BIC,rn,rd,rm)
+#  define BIC(rd,rn,rm)                        CC_BIC(ARM_CC_AL,rd,rn,rm)
+#  define T2_BIC(rd,rn,rm)             torrr(THUMB2_BIC,rn,rd,rm)
+#  define CC_BICI(cc,rd,rn,im)         corri(cc,ARM_BIC|ARM_I,rn,rd,im)
+#  define BICI(rd,rn,im)               CC_BICI(ARM_CC_AL,rd,rn,im)
+#  define T2_BICI(rd,rn,im)            torri(THUMB2_BICI,rn,rd,im)
+#  define CC_BICS(cc,rd,rn,rm)         corrr(cc,ARM_BIC|ARM_S,rn,rd,rm)
+#  define BICS(rd,rn,rm)               CC_BICS(ARM_CC_AL,rd,rn,rm)
+#  define T2_BICS(rd,rn,rm)            torrr(THUMB2_BIC|ARM_S,rn,rd,rm)
+#  define CC_BICSI(cc,rd,rn,im)                corri(cc,ARM_BIC|ARM_S|ARM_I,rn,rd,im)
+#  define BICSI(rd,rn,im)              CC_BICSI(ARM_CC_AL,rd,rn,im)
+#  define T2_BICSI(rd,rn,im)           torri(ARM_CC_AL,THUMB2_BICI|ARM_S,rn,rd,im)
+#  define CC_ORR(cc,rd,rn,rm)          corrr(cc,ARM_ORR,rn,rd,rm)
+#  define ORR(rd,rn,rm)                        CC_ORR(ARM_CC_AL,rd,rn,rm)
+#  define T1_ORR(rdn,rm)               is(THUMB_ORR|(_u3(rm)<<3)|_u3(rdn))
+#  define T2_ORR(rd,rn,rm)             torrr(THUMB2_ORR,rn,rd,rm)
+#  define CC_ORR_SI(cc,rd,rn,rt,sh,im) corrrs(cc,ARM_ORR|sh,rn,rd,rm,im)
+#  define ORR_SI(r0,r1,r2,sh,im)       CC_ORR_SI(ARM_CC_AL,r0,r1,r2,sh,im)
+#  define CC_ORRI(cc,rd,rn,im)         corri(cc,ARM_ORR|ARM_I,rn,rd,im)
+#  define ORRI(rd,rn,im)               CC_ORRI(ARM_CC_AL,rd,rn,im)
+#  define T2_ORRI(rd,rn,im)            torri(THUMB2_ORRI,rn,rd,im)
+#  define CC_EOR(cc,rd,rn,rm)          corrr(cc,ARM_EOR,rn,rd,rm)
+#  define EOR(rd,rn,rm)                        CC_EOR(ARM_CC_AL,rd,rn,rm)
+#  define T1_EOR(rdn,rm)               is(THUMB_EOR|(_u3(rm)<<3)|_u3(rdn))
+#  define T2_EOR(rd,rn,rm)             torrr(THUMB2_EOR,rn,rd,rm)
+#  define CC_EOR_SI(cc,rd,rn,rm,sh,im) corrrs(cc,ARM_EOR|sh,rn,rd,rm,im)
+#  define EOR_SI(r0,r1,r2,sh,im)       CC_EOR_SI(ARM_CC_AL,r0,r1,r2,sh,im)
+#  define CC_EORI(cc,rd,rn,im)         corri(cc,ARM_EOR|ARM_I,rn,rd,im)
+#  define EORI(rd,rn,im)               CC_EORI(ARM_CC_AL,rd,rn,im)
+#  define T2_EORI(rd,rn,im)            torri(THUMB2_EORI,rn,rd,im)
+#  define CC_REV(cc,rd,rm)             c6orr(cc,ARM_REV,rd,rm)
+#  define REV(rd,rm)                   CC_REV(ARM_CC_AL,rd,rm)
+#  define T1_REV(rd,rm)                        is(THUMB_REV|(_u3(rm)<<3)|_u3(rd))
+#  define T2_REV(rd,rm)                        torrr(THUMB2_REV,rm,rd,rm)
+#  define CC_REV16(cc,rd,rm)           c6orr(cc,ARM_REV16,rd,rm)
+#  define REV16(rd,rm)                 CC_REV16(ARM_CC_AL,rd,rm)
+#  define T1_REV16(rd,rm)              is(THUMB_REV16|(_u3(rm)<<3)|_u3(rd))
+#  define T2_REV16(rd,rm)              torrr(THUMB2_REV16,rm,rd,rm)
+#  define CC_SXTB(cc,rd,rm)            c6orr(cc,ARM_SXTB,rd,rm)
+#  define SXTB(rd,rm)                  CC_SXTB(ARM_CC_AL,rd,rm)
+#  define T1_SXTB(rd,rm)               is(THUMB_SXTB|(_u3(rm)<<3)|_u3(rd))
+#  define T2_SXTB(rd,rm)               torrr(THUMB2_SXTB,_R15_REGNO,rd,rm)
+#  define CC_UXTB(cc,rd,rm)            c6orr(cc,ARM_UXTB,rd,rm)
+#  define UXTB(rd,rm)                  CC_UXTB(ARM_CC_AL,rd,rm)
+#  define T1_UXTB(rd,rm)               is(THUMB_UXTB|(_u3(rm)<<3)|_u3(rd))
+#  define T2_UXTB(rd,rm)               torrr(THUMB2_UXTB,_R15_REGNO,rd,rm)
+#  define CC_SXTH(cc,rd,rm)            c6orr(cc,ARM_SXTH,rd,rm)
+#  define SXTH(rd,rm)                  CC_SXTH(ARM_CC_AL,rd,rm)
+#  define T1_SXTH(rd,rm)               is(THUMB_SXTH|(_u3(rm)<<3)|_u3(rd))
+#  define T2_SXTH(rd,rm)               torrr(THUMB2_SXTH,_R15_REGNO,rd,rm)
+#  define CC_UXTH(cc,rd,rm)            c6orr(cc,ARM_UXTH,rd,rm)
+#  define UXTH(rd,rm)                  CC_UXTH(ARM_CC_AL,rd,rm)
+#  define T1_UXTH(rd,rm)               is(THUMB_UXTH|(_u3(rm)<<3)|_u3(rd))
+#  define T2_UXTH(rd,rm)               torrr(THUMB2_UXTH,_R15_REGNO,rd,rm)
+#  define CC_SHIFT(cc,o,rd,rm,rn,im)   cshift(cc,o,rd,rm,rn,im)
+#  define CC_LSL(cc,rd,rn,rm)          CC_SHIFT(cc,ARM_LSL|ARM_R,rd,rm,rn,0)
+#  define LSL(rd,rn,rm)                        CC_LSL(ARM_CC_AL,rd,rn,rm)
+#  define T1_LSL(rdn,rm)               is(THUMB_LSL|(_u3(rm)<<3)|_u3(rdn))
+#  define T2_LSL(rd,rn,rm)             torrr(THUMB2_LSL,rn,rd,rm)
+#  define CC_LSLI(cc,rd,rn,im)         CC_SHIFT(cc,ARM_LSL,rd,0,rn,im)
+#  define LSLI(rd,rn,im)               CC_LSLI(ARM_CC_AL,rd,rn,im)
+#  define T1_LSLI(rd,rm,im)            is(THUMB_LSLI|(_u5(im)<<6)|(_u3(rm)<<3)|_u3(rd))
+#  define T2_LSLI(rd,rm,im)            tshift(THUMB2_LSLI,rd,rm,im)
+#  define CC_LSR(cc,rd,rn,rm)          CC_SHIFT(cc,ARM_LSR|ARM_R,rd,rm,rn,0)
+#  define LSR(rd,rn,rm)                        CC_LSR(ARM_CC_AL,rd,rn,rm)
+#  define T1_LSR(rdn,rm)               is(THUMB_LSR|(_u3(rm)<<3)|_u3(rdn))
+#  define T2_LSR(rd,rn,rm)             torrr(THUMB2_LSR,rn,rd,rm)
+#  define CC_LSRI(cc,rd,rn,im)         CC_SHIFT(cc,ARM_LSR,rd,0,rn,im)
+#  define LSRI(rd,rn,im)               CC_LSRI(ARM_CC_AL,rd,rn,im)
+#  define T1_LSRI(rd,rm,im)            is(THUMB_LSRI|(_u5(im)<<6)|(_u3(rm)<<3)|_u3(rd))
+#  define T2_LSRI(rd,rm,im)            tshift(THUMB2_LSRI,rd,rm,im)
+#  define CC_ASR(cc,rd,rn,rm)          CC_SHIFT(cc,ARM_ASR|ARM_R,rd,rm,rn,0)
+#  define ASR(rd,rn,rm)                        CC_ASR(ARM_CC_AL,rd,rn,rm)
+#  define T1_ASR(rdn,rm)               is(THUMB_ASR|(_u3(rm)<<3)|_u3(rdn))
+#  define T2_ASR(rd,rn,rm)             torrr(THUMB2_ASR,rn,rd,rm)
+#  define CC_ASRI(cc,rd,rn,im)         CC_SHIFT(cc,ARM_ASR,rd,0,rn,im)
+#  define ASRI(rd,rn,im)               CC_ASRI(ARM_CC_AL,rd,rn,im)
+#  define T1_ASRI(rd,rm,im)            is(THUMB_ASRI|(_u5(im)<<6)|(_u3(rm)<<3)|_u3(rd))
+#  define T2_ASRI(rd,rm,im)            tshift(THUMB2_ASRI,rd,rm,im)
+#  define CC_CMP(cc,rn,rm)             corrr(cc,ARM_CMP,rn,0,rm)
+#  define CMP(rn,rm)                   CC_CMP(ARM_CC_AL,rn,rm)
+#  define T1_CMP(rn,rm)                        is(THUMB_CMP|(_u3(rm)<<3)|_u3(rn))
+#  define T1_CMPX(rn,rm)               is(THUMB_CMPX|((_u4(rn)&8)<<4)|(_u4(rm)<<3)|(rn&7))
+#  define T2_CMP(rn,rm)                        torrr(THUMB2_CMP,rn,_R15_REGNO,rm)
+#  define CC_CMPI(cc,rn,im)            corri(cc,ARM_CMP|ARM_I,rn,0,im)
+#  define CMPI(rn,im)                  CC_CMPI(ARM_CC_AL,rn,im)
+#  define T1_CMPI(rn,im)               is(THUMB_CMPI|(_u3(rn)<<8)|_u8(im))
+#  define T2_CMPI(rn,im)               torri(THUMB2_CMPI,rn,_R15_REGNO,im)
+#  define CC_CMN(cc,rn,rm)             corrr(cc,ARM_CMN,rn,0,rm)
+#  define CMN(rn,rm)                   CC_CMN(ARM_CC_AL,rn,rm)
+#  define T1_CMN(rn,rm)                        is(THUMB_CMN|(_u3(rm)<<3)|_u3(rm))
+#  define T2_CMN(rn,rm)                        torrr(THUMB2_CMN,rn,_R15_REGNO,rm)
+#  define CC_CMNI(cc,rn,im)            corri(cc,ARM_CMN|ARM_I,rn,0,im)
+#  define CMNI(rn,im)                  CC_CMNI(ARM_CC_AL,rn,im)
+#  define T2_CMNI(rn,im)               torri(THUMB2_CMNI,rn,_R15_REGNO,im)
+#  define CC_TST(cc,rn,rm)             corrr(cc,ARM_TST,rn,r0,rm)
+#  define TST(rn,rm)                   CC_TST(ARM_CC_AL,rn,rm)
+#  define T1_TST(rn,rm)                        is(THUMB_TST|(_u3(rm)<<3)|_u3(rn))
+#  define T2_TST(rn,rm)                        torrr(THUMB2_TST,rn,_R15_REGNO,rm)
+#  define CC_TSTI(cc,rn,im)            corri(cc,ARM_TST|ARM_I,rn,0,im)
+#  define TSTI(rn,im)                  CC_TSTI(ARM_CC_AL,rn,im)
+#  define T2_TSTI(rn,im)               torri(THUMB2_TSTI,rn,_R15_REGNO,im)
+#  define CC_TEQ(cc,rn,rm)             corrr(cc,ARM_TEQ,rn,0,rm)
+#  define TEQ(rn,rm)                   CC_TEQ(ARM_CC_AL,rn,rm)
+#  define CC_TEQI(cc,rm,im)            corri(cc,ARM_TEQ|ARM_I,rn,0,im)
+#  define TEQI(rn,im)                  CC_TEQI(ARM_CC_AL,rn,im)
+#  define CC_BX(cc,rm)                 cbx(cc,ARM_BX,rm)
+#  define BX(rm)                       CC_BX(ARM_CC_AL,rm)
+#  define T1_BX(rm)                    is(0x4700|(_u4(rm)<<3))
+#  define CC_BLX(cc,rm)                        cbx(cc,ARM_BLX,rm)
+#  define BLX(rm)                      CC_BLX(ARM_CC_AL,rm)
+#  define T1_BLX(rm)                   is(THUMB_BLX|(_u4(rm)<<3))
+#  define BLXI(im)                     blxi(im)
+#  define T2_BLXI(im)                  tb(THUMB2_BLXI,im)
+#  define CC_B(cc,im)                  cb(cc,ARM_B,im)
+#  define B(im)                                CC_B(ARM_CC_AL,im)
+#  define T1_CC_B(cc,im)               tc8(cc,im)
+#  define T1_B(im)                     t11(im)
+#  define T2_CC_B(cc,im)               tcb(cc,im)
+#  define T2_B(im)                     tb(THUMB2_B,im)
+#  define CC_BLI(cc,im)                        cb(cc,ARM_BLI,im)
+#  define BLI(im)                      CC_BLI(ARM_CC_AL,im)
+#  define T2_BLI(im)                   tb(THUMB2_BLI,im)
+#  define CC_LDRSB(cc,rt,rn,rm)                corrr(cc,ARM_LDRSB|ARM_P,rn,rt,rm)
+#  define LDRSB(rt,rn,rm)              CC_LDRSB(ARM_CC_AL,rt,rn,rm)
+#  define T1_LDRSB(rt,rn,rm)           is(THUMB_LDRSB|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt))
+#  define T2_LDRSB(rt,rn,rm)           torxr(THUMB2_LDRSB,rn,rt,rm)
+#  define CC_LDRSBN(cc,rt,rn,rm)       corrr(cc,ARM_LDRSB,rn,rt,rm)
+#  define LDRSBN(rt,rn,rm)             CC_LDRSBN(ARM_CC_AL,rt,rn,rm)
+#  define CC_LDRSBI(cc,rt,rn,im)       corri8(cc,ARM_LDRSBI|ARM_P,rn,rt,im)
+#  define LDRSBI(rt,rn,im)             CC_LDRSBI(ARM_CC_AL,rt,rn,im)
+#  define T2_LDRSBI(rt,rn,im)          torri8(THUMB2_LDRSBI|THUMB2_U,rn,rt,im)
+#  define T2_LDRSBWI(rt,rn,im)         torri12(THUMB2_LDRSBWI,rn,rt,im)
+#  define CC_LDRSBIN(cc,rt,rn,im)      corri8(cc,ARM_LDRSBI,rn,rt,im)
+#  define LDRSBIN(rt,rn,im)            CC_LDRSBIN(ARM_CC_AL,rt,rn,im)
+#  define T2_LDRSBIN(rt,rn,im)         torri8(THUMB2_LDRSBI,rn,rt,im)
+#  define CC_LDRB(cc,rt,rn,rm)         corrr(cc,ARM_LDRB|ARM_P,rn,rt,rm)
+#  define LDRB(rt,rn,rm)               CC_LDRB(ARM_CC_AL,rt,rn,rm)
+#  define T1_LDRB(rt,rn,rm)            is(THUMB_LDRB|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt))
+#  define T2_LDRB(rt,rn,rm)            torxr(THUMB2_LDRB,rn,rt,rm)
+#  define CC_LDRBN(cc,rt,rn,rm)                corrr(cc,ARM_LDRB,rn,rt,rm)
+#  define LDRBN(rt,rn,rm)              CC_LDRBN(ARM_CC_AL,rt,rn,rm)
+#  define CC_LDRBI(cc,rt,rn,im)                corri(cc,ARM_LDRBI|ARM_P,rn,rt,im)
+#  define LDRBI(rt,rn,im)              CC_LDRBI(ARM_CC_AL,rt,rn,im)
+#  define T1_LDRBI(rt,rn,im)           is(THUMB_LDRBI|(_u5(im)<<6)|(_u3(rn)<<3)|_u3(rt))
+#  define T2_LDRBI(rt,rn,im)           torri8(THUMB2_LDRBI|THUMB2_U,rn,rt,im)
+#  define T2_LDRBWI(rt,rn,im)          torri12(THUMB2_LDRBWI,rn,rt,im)
+#  define CC_LDRBIN(cc,rt,rn,im)       corri(cc,ARM_LDRBI,rn,rt,im)
+#  define LDRBIN(rt,rn,im)             CC_LDRBIN(ARM_CC_AL,rt,rn,im)
+#  define T2_LDRBIN(rt,rn,im)          torri8(THUMB2_LDRBI,rn,rt,im)
+#  define CC_LDRSH(cc,rt,rn,rm)                corrr(cc,ARM_LDRSH|ARM_P,rn,rt,rm)
+#  define LDRSH(rt,rn,rm)              CC_LDRSH(ARM_CC_AL,rt,rn,rm)
+#  define T1_LDRSH(rt,rn,rm)           is(THUMB_LDRSH|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt))
+#  define T2_LDRSH(rt,rn,rm)           torxr(THUMB2_LDRSH,rn,rt,rm)
+#  define CC_LDRSHN(cc,rt,rn,rm)       corrr(cc,ARM_LDRSH,rn,rt,rm)
+#  define LDRSHN(rt,rn,rm)             CC_LDRSHN(ARM_CC_AL,rt,rn,rm)
+#  define CC_LDRSHI(cc,rt,rn,im)       corri8(cc,ARM_LDRSHI|ARM_P,rn,rt,im)
+#  define LDRSHI(rt,rn,im)             CC_LDRSHI(ARM_CC_AL,rt,rn,im)
+#  define T2_LDRSHI(rt,rn,im)          torri8(THUMB2_LDRSHI|THUMB2_U,rn,rt,im)
+#  define T2_LDRSHWI(rt,rn,im)         torri12(THUMB2_LDRSHWI,rn,rt,im)
+#  define CC_LDRSHIN(cc,rt,rn,im)      corri8(cc,ARM_LDRSHI,rn,rt,im)
+#  define LDRSHIN(rt,rn,im)            CC_LDRSHIN(ARM_CC_AL,rt,rn,im)
+#  define T2_LDRSHIN(rt,rn,im)         torri8(THUMB2_LDRSHI,rn,rt,im)
+#  define CC_LDRH(cc,rt,rn,rm)         corrr(cc,ARM_LDRH|ARM_P,rn,rt,rm)
+#  define LDRH(rt,rn,rm)               CC_LDRH(ARM_CC_AL,rt,rn,rm)
+#  define T1_LDRH(rt,rn,rm)            is(THUMB_LDRH|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt))
+#  define T2_LDRH(rt,rn,rm)            torxr(THUMB2_LDRH,rn,rt,rm)
+#  define CC_LDRHN(cc,rt,rn,rm)                corrr(cc,ARM_LDRH,rn,rt,rm)
+#  define LDRHN(rt,rn,rm)              CC_LDRHN(ARM_CC_AL,rt,rn,rm)
+#  define CC_LDRHI(cc,rt,rn,im)                corri8(cc,ARM_LDRHI|ARM_P,rn,rt,im)
+#  define LDRHI(rt,rn,im)              CC_LDRHI(ARM_CC_AL,rt,rn,im)
+#  define T1_LDRHI(rt,rn,im)           is(THUMB_LDRHI|(_u5(im)<<6)|(_u3(rn)<<3)|_u3(rt))
+#  define T2_LDRHI(rt,rn,im)           torri8(THUMB2_LDRHI|THUMB2_U,rn,rt,im)
+#  define T2_LDRHWI(rt,rn,im)          torri12(THUMB2_LDRHWI,rn,rt,im)
+#  define CC_LDRHIN(cc,rt,rn,im)       corri8(cc,ARM_LDRHI,rn,rt,im)
+#  define LDRHIN(rt,rn,im)             CC_LDRHIN(ARM_CC_AL,rt,rn,im)
+#  define T2_LDRHIN(rt,rn,im)          torri8(THUMB2_LDRHI,rn,rt,im)
+#  define CC_LDR(cc,rt,rn,rm)          corrr(cc,ARM_LDR|ARM_P,rn,rt,rm)
+#  define LDR(rt,rn,rm)                        CC_LDR(ARM_CC_AL,rt,rn,rm)
+#  define T1_LDR(rt,rn,rm)             is(THUMB_LDR|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt))
+#  define T2_LDR(rt,rn,rm)             torxr(THUMB2_LDR,rn,rt,rm)
+#  define CC_LDRN(cc,rt,rn,rm)         corrr(cc,ARM_LDR,rn,rt,rm)
+#  define LDRN(rt,rn,rm)               CC_LDRN(ARM_CC_AL,rt,rn,rm)
+#  define CC_LDRI(cc,rt,rn,im)         corri(cc,ARM_LDRI|ARM_P,rn,rt,im)
+#  define LDRI(rt,rn,im)               CC_LDRI(ARM_CC_AL,rt,rn,im)
+#  define T1_LDRI(rt,rn,im)            is(THUMB_LDRI|(_u5(im)<<6)|(_u3(rn)<<3)|_u3(rt))
+#  define T1_LDRISP(rt,im)             is(THUMB_LDRISP|(_u3(rt)<<8)|_u8(im))
+#  define T2_LDRI(rt,rn,im)            torri8(THUMB2_LDRI|THUMB2_U,rn,rt,im)
+#  define T2_LDRWI(rt,rn,im)           torri12(THUMB2_LDRWI,rn,rt,im)
+#  define CC_LDRIN(cc,rt,rn,im)                corri(cc,ARM_LDRI,rn,rt,im)
+#  define LDRIN(rt,rn,im)              CC_LDRIN(ARM_CC_AL,rt,rn,im)
+#  define T2_LDRIN(rt,rn,im)           torri8(THUMB2_LDRI,rn,rt,im)
+#  define CC_LDRD(cc,rt,rn,rm)         corrr(cc,ARM_LDRD|ARM_P,rn,rt,rm)
+#  define LDRD(rt,rn,rm)               CC_LDRD(ARM_CC_AL,rt,rn,rm)
+#  define T2_LDRDI(rt,rt2,rn,im)       torrri8(THUMB2_LDRDI|ARM_P,rn,rt,rt2,im)
+#  define CC_LDRDN(cc,rt,rn,rm)                corrr(cc,ARM_LDRD,rn,rt,rm)
+#  define LDRDN(rd,rn,rm)              CC_LDRDN(ARM_CC_AL,rt,rn,rm)
+#  define CC_LDRDI(cc,rt,rn,im)                corri8(cc,ARM_LDRDI|ARM_P,rn,rt,im)
+#  define LDRDI(rt,rn,im)              CC_LDRDI(ARM_CC_AL,rt,rn,im)
+#  define CC_LDRDIN(cc,rt,rn,im)       corri8(cc,ARM_LDRDI,rn,rt,im)
+#  define LDRDIN(rt,rn,im)             CC_LDRDIN(ARM_CC_AL,rt,rn,im)
+#  define T2_LDRDIN(rt,rt2,rn,im)      torrri8(THUMB2_LDRDI,rn,rt,rt2,im)
+#  define CC_STRB(cc,rt,rn,rm)         corrr(cc,ARM_STRB|ARM_P,rn,rt,rm)
+#  define STRB(rt,rn,rm)               CC_STRB(ARM_CC_AL,rt,rn,rm)
+#  define T1_STRB(rt,rn,rm)            is(THUMB_STRB|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt))
+#  define T2_STRB(rt,rn,rm)            torxr(THUMB2_STRB,rn,rt,rm)
+#  define CC_STRBN(cc,rt,rn,rm)                corrr(cc,ARM_STRB,rn,rt,rm)
+#  define STRBN(rt,rn,rm)              CC_STRBN(ARM_CC_AL,rt,rn,rm)
+#  define CC_STRBI(cc,rt,rn,im)                corri(cc,ARM_STRBI|ARM_P,rn,rt,im)
+#  define STRBI(rt,rn,im)              CC_STRBI(ARM_CC_AL,rt,rn,im)
+#  define T1_STRBI(rt,rn,im)           is(THUMB_STRBI|(_u5(im)<<6)|(_u3(rn)<<3)|_u3(rt))
+#  define T2_STRBI(rt,rn,im)           torri8(THUMB2_STRBI|THUMB2_U,rn,rt,im)
+#  define T2_STRBWI(rt,rn,im)          torri12(THUMB2_STRBWI,rn,rt,im)
+#  define CC_STRBIN(cc,rt,rn,im)       corri(cc,ARM_STRBI,rn,rt,im)
+#  define STRBIN(rt,rn,im)             CC_STRBIN(ARM_CC_AL,rt,rn,im)
+#  define T2_STRBIN(rt,rn,im)          torri8(THUMB2_STRBI,rn,rt,im)
+#  define CC_STRH(cc,rt,rn,rm)         corrr(cc,ARM_STRH|ARM_P,rn,rt,rm)
+#  define STRH(rt,rn,rm)               CC_STRH(ARM_CC_AL,rt,rn,rm)
+#  define T1_STRH(rt,rn,rm)            is(THUMB_STRH|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt))
+#  define T2_STRH(rt,rn,rm)            torxr(THUMB2_STRH,rn,rt,rm)
+#  define CC_STRHN(cc,rt,rn,rm)                corrr(cc,ARM_STRH,rn,rt,rm)
+#  define STRHN(rt,rn,rm)              CC_STRHN(ARM_CC_AL,rt,rn,rm)
+#  define CC_STRHI(cc,rt,rn,im)                corri8(cc,ARM_STRHI|ARM_P,rn,rt,im)
+#  define STRHI(rt,rn,im)              CC_STRHI(ARM_CC_AL,rt,rn,im)
+#  define T1_STRHI(rt,rn,im)           is(THUMB_STRHI|(_u5(im)<<6)|(_u3(rn)<<3)|_u3(rt))
+#  define T2_STRHI(rt,rn,im)           torri8(THUMB2_STRHI|THUMB2_U,rn,rt,im)
+#  define T2_STRHWI(rt,rn,im)          torri12(THUMB2_STRHWI,rn,rt,im)
+#  define CC_STRHIN(cc,rt,rn,im)       corri8(cc,ARM_STRHI,rn,rt,im)
+#  define STRHIN(rt,rn,im)             CC_STRHIN(ARM_CC_AL,rt,rn,im)
+#  define T2_STRHIN(rt,rn,im)          torri8(THUMB2_STRHI,rn,rt,im)
+#  define CC_STR(cc,rt,rn,rm)          corrr(cc,ARM_STR|ARM_P,rn,rt,rm)
+#  define STR(rt,rn,rm)                        CC_STR(ARM_CC_AL,rt,rn,rm)
+#  define T1_STR(rt,rn,rm)             is(THUMB_STR|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt))
+#  define T2_STR(rt,rn,rm)             torxr(THUMB2_STR,rn,rt,rm)
+#  define CC_STRN(cc,rt,rn,rm)         corrr(cc,ARM_STR,rn,rt,rm)
+#  define STRN(rt,rn,rm)               CC_STRN(ARM_CC_AL,rt,rn,rm)
+#  define CC_STRI(cc,rt,rn,im)         corri(cc,ARM_STRI|ARM_P,rn,rt,im)
+#  define STRI(rt,rn,im)               CC_STRI(ARM_CC_AL,rt,rn,im)
+#  define T1_STRI(rt,rn,im)            is(THUMB_STRI|(_u5(im)<<6)|(_u3(rn)<<3)|_u3(rt))
+#  define T1_STRISP(rt,im)             is(THUMB_STRISP|(_u3(rt)<<8)|(_u8(im)))
+#  define T2_STRI(rt,rn,im)            torri8(THUMB2_STRI|THUMB2_U,rn,rt,im)
+#  define T2_STRWI(rt,rn,im)           torri12(THUMB2_STRWI,rn,rt,im)
+#  define CC_STRIN(cc,rt,rn,im)                corri(cc,ARM_STRI,rn,rt,im)
+#  define STRIN(rt,rn,im)              CC_STRIN(ARM_CC_AL,rt,rn,im)
+#  define T2_STRIN(rt,rn,im)           torri8(THUMB2_STRI,rn,rt,im)
+#  define CC_STRD(cc,rt,rn,rm)         corrr(cc,ARM_STRD|ARM_P,rn,rt,rm)
+#  define STRD(rt,rn,rm)               CC_STRD(ARM_CC_AL,rt,rn,rm)
+#  define CC_STRDN(cc,rt,rn,rm)                corrr(cc,ARM_STRD,rn,rt,rm)
+#  define STRDN(rt,rn,rm)              CC_STRDN(ARM_CC_AL,rt,rn,rm)
+#  define CC_STRDI(cc,rt,rn,im)                corri8(cc,ARM_STRDI|ARM_P,rn,rt,im)
+#  define STRDI(rt,rn,im)              CC_STRDI(ARM_CC_AL,rt,rn,im)
+#  define T2_STRDI(rt,rt2,rn,im)       torrri8(THUMB2_STRDI|ARM_P,rn,rt,rt2,im)
+#  define CC_STRDIN(cc,rt,rn,im)       corri8(cc,ARM_STRDI,rn,rt,im)
+#  define STRDIN(rt,rn,im)             CC_STRDIN(ARM_CC_AL,rt,rn,im)
+#  define T2_STRDIN(rt,rt2,rn,im)      torrri8(THUMB2_STRDI,rn,rt,rt2,im)
+#  define CC_LDMIA(cc,rn,im)           corl(cc,ARM_M|ARM_M_L|ARM_M_I,rn,im)
+#  define LDMIA(rn,im)                 CC_LDMIA(ARM_CC_AL,rn,im)
+#  define CC_LDM(cc,rn,im)             CC_LDMIA(cc,rn,im)
+#  define LDM(rn,im)                   LDMIA(rn,im)
+#  define T1_LDMIA(rn,im)              is(THUMB_LDMIA|(_u3(rn)<<8)|im)
+#  define T2_LDMIA(rn,im)              torl(THUMB2_LDMIA,rn,im)
+#  define CC_LDMIA_U(cc,rn,im)         corl(cc,ARM_M|ARM_M_L|ARM_M_I|ARM_M_U,rn,im)
+#  define LDMIA_U(rn,im)               CC_LDMIA_U(ARM_CC_AL,rn,im)
+#  define LDM_U(r0,i0)                 LDMIA_U(r0,i0)
+#  define CC_LDMIB(cc,rn,im)           corl(cc,ARM_M|ARM_M_L|ARM_M_I|ARM_M_B,rn,im)
+#  define LDMIB(rn,im)                 CC_LDMIB(ARM_CC_AL,rn,im)
+#  define CC_LDMIB_U(cc,rn,im)         corl(cc,ARM_M|ARM_M_L|ARM_M_I|ARM_M_B|ARM_M_U,rn,im)
+#  define LDMIB_U(rn,im)               CC_LDMIB_U(ARM_CC_AL,rn,im)
+#  define CC_LDMDA(cc,rn,im)           corl(cc,ARM_M|ARM_M_L,rn,im)
+#  define LDMDA(rn,im)                 CC_LDMDA(ARM_CC_AL,rn,im)
+#  define CC_LDMDA_U(cc,rn,im)         corl(cc,ARM_M|ARM_M_L|ARM_M_U,rn,im)
+#  define LDMDA_U(rn,im)               CC_LDMDA_U(ARM_CC_AL,rn,im)
+#  define CC_LDMDB(cc,rn,im)           corl(cc,ARM_M|ARM_M_L|ARM_M_B,rn,im)
+#  define LDMDB(rn,im)                 CC_LDMDB(ARM_CC_AL,rn,im)
+#  define T2_LDMDB(rn,im)              torl(THUMB2_LDMDB,rn,im)
+#  define CC_LDMDB_U(cc,rn,im)         corl(cc,ARM_M|ARM_M_L|ARM_M_B|ARM_M_U,rn,im)
+#  define LDMDB_U(rn,im)               CC_LDMDB_U(ARM_CC_AL,rn,im)
+#  define CC_STMIA(cc,rn,im)           corl(cc,ARM_M|ARM_M_I,rn,im)
+#  define STMIA(rn,im)                 CC_STMIA(ARM_CC_AL,rn,im)
+#  define CC_STM(cc,rn,im)             CC_STMIA(cc,rn,im)
+#  define STM(rn,im)                   STMIA(rn,im)
+#  define CC_STMIA_U(cc,rn,im)         corl(cc,ARM_M|ARM_M_I|ARM_M_U,rn,im)
+#  define STMIA_U(rn,im)               CC_STMIA_U(ARM_CC_AL,rn,im)
+#  define CC_STM_U(cc,rn,im)           CC_STMIA_U(cc,rn,im)
+#  define STM_U(rn,im)                 STMIA_U(rn,im)
+#  define CC_STMIB(cc,rn,im)           corl(cc,ARM_M|ARM_M_I|ARM_M_B,rn,im)
+#  define STMIB(rn,im)                 CC_STMIB(ARM_CC_AL,rn,im)
+#  define CC_STMIB_U(cc,rn,im)         corl(cc,ARM_M|ARM_M_I|ARM_M_B|ARM_M_U,rn,im)
+#  define STMIB_U(rn,im)               CC_STMIB_U(ARM_CC_AL,rn,im)
+#  define CC_STMDA(cc,rn,im)           corl(cc,ARM_M,rn,im)
+#  define STMDA(rn,im)                 CC_STMDA(ARM_CC_AL,rn,im)
+#  define CC_STMDA_U(cc,rn,im)         corl(cc,ARM_M|ARM_M_U,rn,im)
+#  define STMDA_U(rn,im)               CC_STMDA_U(ARM_CC_AL,rn,im)
+#  define CC_STMDB(cc,rn,im)           corl(cc,ARM_M|ARM_M_B,rn,im)
+#  define STMDB(rn,im)                 CC_STMDB(ARM_CC_AL,rn,im)
+#  define CC_STMDB_U(cc,rn,im)         corl(cc,ARM_M|ARM_M_B|ARM_M_U,rn,im)
+#  define STMDB_U(rn,im)               CC_STMDB_U(ARM_CC_AL,rn,im)
+#  define CC_PUSH(cc,im)               CC_STMDB_U(cc,_SP_REGNO,im)
+#  define PUSH(im)                     STMDB_U(_SP_REGNO,im)
+#  define T1_PUSH(im)                  is(THUMB_PUSH|((im&0x4000)>>6)|(im&0xff))
+#  define T2_PUSH(im)                  tpp(THUMB2_PUSH,im)
+#  define CC_POP(cc,im)                        LDMIA_U(cc,_SP_REGNO,im)
+#  define POP(im)                      LDMIA_U(_SP_REGNO,im)
+#  define T1_POP(im)                   is(THUMB_POP|((im&0x8000)>>7)|(im&0xff))
+#  define T2_POP(im)                   tpp(THUMB2_POP,im)
+#  define jit_get_reg_args()                                           \
+    do {                                                               \
+       (void)jit_get_reg(_R0|jit_class_named|jit_class_gpr);           \
+       (void)jit_get_reg(_R1|jit_class_named|jit_class_gpr);           \
+       (void)jit_get_reg(_R2|jit_class_named|jit_class_gpr);           \
+       (void)jit_get_reg(_R3|jit_class_named|jit_class_gpr);           \
+    } while (0)
+#  define jit_unget_reg_args()                                         \
+    do {                                                               \
+       jit_unget_reg(_R3);                                             \
+       jit_unget_reg(_R2);                                             \
+       jit_unget_reg(_R1);                                             \
+       jit_unget_reg(_R0);                                             \
+    } while (0)
+#  define nop(i0)                      _nop(_jit,i0)
+static void _nop(jit_state_t*,jit_int32_t);
+#  define movr(r0,r1)                  _movr(_jit,r0,r1)
+static void _movr(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define movi(r0,i0)                  _movi(_jit,r0,i0)
+static void _movi(jit_state_t*,jit_int32_t,jit_word_t);
+#  define movi_p(r0,i0)                        _movi_p(_jit,r0,i0)
+static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t);
+#  define comr(r0,r1)                  _comr(_jit,r0,r1)
+static void _comr(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define negr(r0,r1)                  _negr(_jit,r0,r1)
+static void _negr(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define addr(r0,r1,r2)               _addr(_jit,r0,r1,r2)
+static void _addr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define addi(r0,r1,i0)               _addi(_jit,r0,r1,i0)
+static void _addi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define addcr(r0,r1,r2)              _addcr(_jit,r0,r1,r2)
+static void _addcr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define addci(r0,r1,i0)              _addci(_jit,r0,r1,i0)
+static void _addci(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define addxr(r0,r1,r2)              _addxr(_jit,r0,r1,r2)
+static void _addxr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define addxi(r0,r1,i0)              _addxi(_jit,r0,r1,i0)
+static void _addxi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define subr(r0,r1,r2)               _subr(_jit,r0,r1,r2)
+static void _subr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define subi(r0,r1,i0)               _subi(_jit,r0,r1,i0)
+static void _subi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define subcr(r0,r1,r2)              _subcr(_jit,r0,r1,r2)
+static void _subcr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define subci(r0,r1,i0)              _subci(_jit,r0,r1,i0)
+static void _subci(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define subxr(r0,r1,r2)              _subxr(_jit,r0,r1,r2)
+static void _subxr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define subxi(r0,r1,i0)              _subxi(_jit,r0,r1,i0)
+static void _subxi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define rsbi(r0, r1, i0)             _rsbi(_jit, r0, r1, i0)
+static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define mulr(r0,r1,r2)               _mulr(_jit,r0,r1,r2)
+static void _mulr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define muli(r0,r1,i0)               _muli(_jit,r0,r1,i0)
+static void _muli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define qmulr(r0,r1,r2,r3)           iqmulr(r0,r1,r2,r3,1)
+#  define qmulr_u(r0,r1,r2,r3)         iqmulr(r0,r1,r2,r3,0)
+#  define iqmulr(r0,r1,r2,r3,cc)       _iqmulr(_jit,r0,r1,r2,r3,cc)
+static void _iqmulr(jit_state_t*,jit_int32_t,jit_int32_t,
+                   jit_int32_t,jit_int32_t,jit_bool_t);
+#  define qmuli(r0,r1,r2,i0)           iqmuli(r0,r1,r2,i0,1)
+#  define qmuli_u(r0,r1,r2,i0)         iqmuli(r0,r1,r2,i0,0)
+#  define iqmuli(r0,r1,r2,i0,cc)       _iqmuli(_jit,r0,r1,r2,i0,cc)
+static void _iqmuli(jit_state_t*,jit_int32_t,jit_int32_t,
+                   jit_int32_t,jit_word_t,jit_bool_t);
+#  define divrem(d,s,r0,r1,r2)         _divrem(_jit,d,s,r0,r1,r2)
+static void _divrem(jit_state_t*,int,int,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define divr(r0,r1,r2)               _divr(_jit,r0,r1,r2)
+static void _divr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define divi(r0,r1,i0)               _divi(_jit,r0,r1,i0)
+static void _divi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define divr_u(r0,r1,r2)             _divr_u(_jit,r0,r1,r2)
+static void _divr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define divi_u(r0,r1,i0)             _divi_u(_jit,r0,r1,i0)
+static void _divi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define qdivr(r0,r1,r2,r3)           iqdivr(r0,r1,r2,r3,1)
+#  define qdivr_u(r0,r1,r2,r3)         iqdivr(r0,r1,r2,r3,0)
+#  define iqdivr(r0,r1,r2,r3,cc)       _iqdivr(_jit,r0,r1,r2,r3,cc)
+static void _iqdivr(jit_state_t*,jit_int32_t,jit_int32_t,
+                   jit_int32_t,jit_int32_t,jit_bool_t);
+#  define qdivi(r0,r1,r2,i0)           iqdivi(r0,r1,r2,i0,1)
+#  define qdivi_u(r0,r1,r2,i0)         iqdivi(r0,r1,r2,i0,0)
+#  define iqdivi(r0,r1,r2,i0,cc)       _iqdivi(_jit,r0,r1,r2,i0,cc)
+static void _iqdivi(jit_state_t*,jit_int32_t,jit_int32_t,
+                   jit_int32_t,jit_word_t,jit_bool_t);
+#  define remr(r0,r1,r2)               _remr(_jit,r0,r1,r2)
+static void _remr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define remi(r0,r1,i0)               _remi(_jit,r0,r1,i0)
+static void _remi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define remr_u(r0,r1,r2)             _remr_u(_jit,r0,r1,r2)
+static void _remr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define remi_u(r0,r1,i0)             _remi_u(_jit,r0,r1,i0)
+static void _remi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define andr(r0,r1,r2)               _andr(_jit,r0,r1,r2)
+static void _andr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define andi(r0,r1,i0)               _andi(_jit,r0,r1,i0)
+static void _andi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define orr(r0,r1,r2)                        _orr(_jit,r0,r1,r2)
+static void _orr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ori(r0,r1,i0)                        _ori(_jit,r0,r1,i0)
+static void _ori(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define xorr(r0,r1,r2)               _xorr(_jit,r0,r1,r2)
+static void _xorr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define xori(r0,r1,i0)               _xori(_jit,r0,r1,i0)
+static void _xori(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define lshr(r0,r1,r2)               _lshr(_jit,r0,r1,r2)
+static void _lshr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define lshi(r0,r1,i0)               _lshi(_jit,r0,r1,i0)
+static void _lshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define rshr(r0,r1,r2)               _rshr(_jit,r0,r1,r2)
+static void _rshr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define rshi(r0,r1,i0)               _rshi(_jit,r0,r1,i0)
+static void _rshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define rshr_u(r0,r1,r2)             _rshr_u(_jit,r0,r1,r2)
+static void _rshr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define rshi_u(r0,r1,i0)             _rshi_u(_jit,r0,r1,i0)
+static void _rshi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ccr(ct,cf,r0,r1,r2)          _ccr(_jit,ct,cf,r0,r1,r2)
+static void _ccr(jit_state_t*,int,int,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define cci(ct,cf,r0,r1,i0)          _cci(_jit,ct,cf,r0,r1,i0)
+static void _cci(jit_state_t*,int,int,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ltr(r0, r1, r2)              ccr(ARM_CC_LT,ARM_CC_GE,r0,r1,r2)
+#  define lti(r0, r1, i0)              cci(ARM_CC_LT,ARM_CC_GE,r0,r1,i0)
+#  define ltr_u(r0, r1, r2)            ccr(ARM_CC_LO,ARM_CC_HS,r0,r1,r2)
+#  define lti_u(r0, r1, i0)            cci(ARM_CC_LO,ARM_CC_HS,r0,r1,i0)
+#  define ler(r0, r1, r2)              ccr(ARM_CC_LE,ARM_CC_GT,r0,r1,r2)
+#  define lei(r0, r1, i0)              cci(ARM_CC_LE,ARM_CC_GT,r0,r1,i0)
+#  define ler_u(r0, r1, r2)            ccr(ARM_CC_LS,ARM_CC_HI,r0,r1,r2)
+#  define lei_u(r0, r1, i0)            cci(ARM_CC_LS,ARM_CC_HI,r0,r1,i0)
+#  define eqr(r0, r1, r2)              ccr(ARM_CC_EQ,ARM_CC_NE,r0,r1,r2)
+#  define eqi(r0, r1, i0)              cci(ARM_CC_EQ,ARM_CC_NE,r0,r1,i0)
+#  define ger(r0, r1, r2)              ccr(ARM_CC_GE,ARM_CC_LT,r0,r1,r2)
+#  define gei(r0, r1, i0)              cci(ARM_CC_GE,ARM_CC_LT,r0,r1,i0)
+#  define ger_u(r0, r1, r2)            ccr(ARM_CC_HS,ARM_CC_LO,r0,r1,r2)
+#  define gei_u(r0, r1, i0)            cci(ARM_CC_HS,ARM_CC_LO,r0,r1,i0)
+#  define gtr(r0, r1, r2)              ccr(ARM_CC_GT,ARM_CC_LE,r0,r1,r2)
+#  define gti(r0, r1, i0)              cci(ARM_CC_GT,ARM_CC_LE,r0,r1,i0)
+#  define gtr_u(r0, r1, r2)            ccr(ARM_CC_HI,ARM_CC_LS,r0,r1,r2)
+#  define gti_u(r0, r1, i0)            cci(ARM_CC_HI,ARM_CC_LS,r0,r1,i0)
+#  define ner(r0,r1,r2)                        _ner(_jit,r0,r1,r2)
+static void _ner(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define nei(r0,r1,i0)                        _nei(_jit,r0,r1,i0)
+static void _nei(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define jmpr(r0)                     _jmpr(_jit,r0)
+static void _jmpr(jit_state_t*,jit_int32_t);
+#  define jmpi(i0)                     _jmpi(_jit,i0)
+static void _jmpi(jit_state_t*,jit_word_t);
+#  define jmpi_p(i0, i1)               _jmpi_p(_jit,i0, i1)
+static jit_word_t _jmpi_p(jit_state_t*,jit_word_t,jit_bool_t);
+#  define bccr(cc,i0,r0,r1)            _bccr(_jit,cc,i0,r0,r1)
+static jit_word_t _bccr(jit_state_t*,int,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bcci(cc,i0,r0,i1)            _bcci(_jit,cc,i0,r0,i1)
+static jit_word_t _bcci(jit_state_t*,int,jit_word_t,jit_int32_t,jit_word_t);
+#  define bltr(i0, r0, r1)             bccr(ARM_CC_LT,i0,r0,r1)
+#  define blti(i0, r0, i1)             bcci(ARM_CC_LT,i0,r0,i1)
+#  define bltr_u(i0, r0, r1)           bccr(ARM_CC_LO,i0,r0,r1)
+#  define blti_u(i0, r0, i1)           bcci(ARM_CC_LO,i0,r0,i1)
+#  define bler(i0, r0, r1)             bccr(ARM_CC_LE,i0,r0,r1)
+#  define blei(i0, r0, i1)             bcci(ARM_CC_LE,i0,r0,i1)
+#  define bler_u(i0, r0, r1)           bccr(ARM_CC_LS,i0,r0,r1)
+#  define blei_u(i0, r0, i1)           bcci(ARM_CC_LS,i0,r0,i1)
+#  define beqr(i0, r0, r1)             bccr(ARM_CC_EQ,i0,r0,r1)
+#  define beqi(i0, r0, i1)             bcci(ARM_CC_EQ,i0,r0,i1)
+#  define bger(i0, r0, r1)             bccr(ARM_CC_GE,i0,r0,r1)
+#  define bgei(i0, r0, i1)             bcci(ARM_CC_GE,i0,r0,i1)
+#  define bger_u(i0, r0, r1)           bccr(ARM_CC_HS,i0,r0,r1)
+#  define bgei_u(i0, r0, i1)           bcci(ARM_CC_HS,i0,r0,i1)
+#  define bgtr(i0, r0, r1)             bccr(ARM_CC_GT,i0,r0,r1)
+#  define bgti(i0, r0, i1)             bcci(ARM_CC_GT,i0,r0,i1)
+#  define bgtr_u(i0, r0, r1)           bccr(ARM_CC_HI,i0,r0,r1)
+#  define bgti_u(i0, r0, i1)           bcci(ARM_CC_HI,i0,r0,i1)
+#  define bner(i0, r0, r1)             bccr(ARM_CC_NE,i0,r0,r1)
+#  define bnei(i0, r0, i1)             bcci(ARM_CC_NE,i0,r0,i1)
+#  define baddr(cc,i0,r0,r1)           _baddr(_jit,cc,i0,r0,r1)
+static jit_word_t _baddr(jit_state_t*,int,jit_word_t,jit_int32_t,jit_int32_t);
+#  define baddi(cc,i0,r0,r1)           _baddi(_jit,cc,i0,r0,r1)
+static jit_word_t _baddi(jit_state_t*,int,jit_word_t,jit_int32_t,jit_word_t);
+#  define boaddr(i0,r0,r1)             baddr(ARM_CC_VS,i0,r0,r1)
+#  define boaddi(i0,r0,i1)             baddi(ARM_CC_VS,i0,r0,i1)
+#  define boaddr_u(i0,r0,r1)           baddr(ARM_CC_HS,i0,r0,r1)
+#  define boaddi_u(i0,r0,i1)           baddi(ARM_CC_HS,i0,r0,i1)
+#  define bxaddr(i0,r0,r1)             baddr(ARM_CC_VC,i0,r0,r1)
+#  define bxaddi(i0,r0,i1)             baddi(ARM_CC_VC,i0,r0,i1)
+#  define bxaddr_u(i0,r0,r1)           baddr(ARM_CC_LO,i0,r0,r1)
+#  define bxaddi_u(i0,r0,i1)           baddi(ARM_CC_LO,i0,r0,i1)
+#  define bsubr(cc,i0,r0,r1)           _bsubr(_jit,cc,i0,r0,r1)
+static jit_word_t _bsubr(jit_state_t*,int,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bsubi(cc,i0,r0,r1)           _bsubi(_jit,cc,i0,r0,r1)
+static jit_word_t _bsubi(jit_state_t*,int,jit_word_t,jit_int32_t,jit_word_t);
+#  define bosubr(i0,r0,r1)             bsubr(ARM_CC_VS,i0,r0,r1)
+#  define bosubi(i0,r0,i1)             bsubi(ARM_CC_VS,i0,r0,i1)
+#  define bosubr_u(i0,r0,r1)           bsubr(ARM_CC_LO,i0,r0,r1)
+#  define bosubi_u(i0,r0,i1)           bsubi(ARM_CC_LO,i0,r0,i1)
+#  define bxsubr(i0,r0,r1)             bsubr(ARM_CC_VC,i0,r0,r1)
+#  define bxsubi(i0,r0,i1)             bsubi(ARM_CC_VC,i0,r0,i1)
+#  define bxsubr_u(i0,r0,r1)           bsubr(ARM_CC_HS,i0,r0,r1)
+#  define bxsubi_u(i0,r0,i1)           bsubi(ARM_CC_HS,i0,r0,i1)
+#  define bmxr(cc,i0,r0,r1)            _bmxr(_jit,cc,i0,r0,r1)
+static jit_word_t _bmxr(jit_state_t*,int,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bmxi(cc,i0,r0,r1)            _bmxi(_jit,cc,i0,r0,r1)
+static jit_word_t _bmxi(jit_state_t*,int,jit_word_t,jit_int32_t,jit_word_t);
+#  define bmsr(i0,r0,r1)               bmxr(ARM_CC_NE,i0,r0,r1)
+#  define bmsi(i0,r0,i1)               bmxi(ARM_CC_NE,i0,r0,i1)
+#  define bmcr(i0,r0,r1)               bmxr(ARM_CC_EQ,i0,r0,r1)
+#  define bmci(i0,r0,i1)               bmxi(ARM_CC_EQ,i0,r0,i1)
+#  define ldr_c(r0,r1)                 _ldr_c(_jit,r0,r1)
+static void _ldr_c(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define ldi_c(r0,i0)                 _ldi_c(_jit,r0,i0)
+static void _ldi_c(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldxr_c(r0,r1,r2)             _ldxr_c(_jit,r0,r1,r2)
+static void _ldxr_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxi_c(r0,r1,i0)             _ldxi_c(_jit,r0,r1,i0)
+static void _ldxi_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldr_uc(r0,r1)                        _ldr_uc(_jit,r0,r1)
+static void _ldr_uc(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define ldi_uc(r0,i0)                        _ldi_uc(_jit,r0,i0)
+static void _ldi_uc(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldxr_uc(r0,r1,r2)            _ldxr_uc(_jit,r0,r1,r2)
+static void _ldxr_uc(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxi_uc(r0,r1,i0)            _ldxi_uc(_jit,r0,r1,i0)
+static void _ldxi_uc(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldr_s(r0,r1)                 _ldr_s(_jit,r0,r1)
+static void _ldr_s(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define ldi_s(r0,i0)                 _ldi_s(_jit,r0,i0)
+static void _ldi_s(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldxr_s(r0,r1,r2)             _ldxr_s(_jit,r0,r1,r2)
+static void _ldxr_s(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxi_s(r0,r1,i0)             _ldxi_s(_jit,r0,r1,i0)
+static void _ldxi_s(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldr_us(r0,r1)                        _ldr_us(_jit,r0,r1)
+static void _ldr_us(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define ldi_us(r0,i0)                        _ldi_us(_jit,r0,i0)
+static void _ldi_us(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldxr_us(r0,r1,r2)            _ldxr_us(_jit,r0,r1,r2)
+static void _ldxr_us(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxi_us(r0,r1,i0)            _ldxi_us(_jit,r0,r1,i0)
+static void _ldxi_us(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldr_i(r0,r1)                 _ldr_i(_jit,r0,r1)
+static void _ldr_i(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define ldi_i(r0,i0)                 _ldi_i(_jit,r0,i0)
+static void _ldi_i(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldxr_i(r0,r1,r2)             _ldxr_i(_jit,r0,r1,r2)
+static void _ldxr_i(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxi_i(r0,r1,i0)             _ldxi_i(_jit,r0,r1,i0)
+static void _ldxi_i(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define str_c(r0,r1)                 _str_c(_jit,r0,r1)
+static void _str_c(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define sti_c(i0,r0)                 _sti_c(_jit,i0,r0)
+static void _sti_c(jit_state_t*,jit_word_t,jit_int32_t);
+#  define stxr_c(r0,r1,r2)             _stxr_c(_jit,r0,r1,r2)
+static void _stxr_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define stxi_c(r0,r1,i0)             _stxi_c(_jit,r0,r1,i0)
+static void _stxi_c(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define str_s(r0,r1)                 _str_s(_jit,r0,r1)
+static void _str_s(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define sti_s(i0,r0)                 _sti_s(_jit,i0,r0)
+static void _sti_s(jit_state_t*,jit_word_t,jit_int32_t);
+#  define stxr_s(r0,r1,r2)             _stxr_s(_jit,r0,r1,r2)
+static void _stxr_s(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define stxi_s(r0,r1,i0)             _stxi_s(_jit,r0,r1,i0)
+static void _stxi_s(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define str_i(r0,r1)                 _str_i(_jit,r0,r1)
+static void _str_i(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define sti_i(i0,r0)                 _sti_i(_jit,i0,r0)
+static void _sti_i(jit_state_t*,jit_word_t,jit_int32_t);
+#  define stxr_i(r0,r1,r2)             _stxr_i(_jit,r0,r1,r2)
+static void _stxr_i(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define stxi_i(r0,r1,i0)             _stxi_i(_jit,r0,r1,i0)
+static void _stxi_i(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  if __BYTE_ORDER == __LITTLE_ENDIAN
+#  define htonr_us(r0,r1)              _htonr_us(_jit,r0,r1)
+static void _htonr_us(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define htonr_ui(r0,r1)              _htonr_ui(_jit,r0,r1)
+static void _htonr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
+#  else
+#    define htonr_us(r0,r1)            extr_us(r0,r1)
+#    define htonr(r0,r1)               movr(r0,r1)
+#  endif
+#  define extr_c(r0,r1)                        _extr_c(_jit,r0,r1)
+static void _extr_c(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define extr_uc(r0,r1)               _extr_uc(_jit,r0,r1)
+static void _extr_uc(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define extr_s(r0,r1)                        _extr_s(_jit,r0,r1)
+static void _extr_s(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define extr_us(r0,r1)               _extr_us(_jit,r0,r1)
+static void _extr_us(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define prolog(i0)                   _prolog(_jit,i0)
+static void _prolog(jit_state_t*,jit_node_t*);
+#  define epilog(i0)                   _epilog(_jit,i0)
+static void _epilog(jit_state_t*,jit_node_t*);
+#  define callr(r0)                    _callr(_jit,r0)
+static void _callr(jit_state_t*,jit_int32_t);
+#  define calli(i0)                    _calli(_jit,i0)
+static void _calli(jit_state_t*,jit_word_t);
+#  define calli_p(i0)                  _calli_p(_jit,i0)
+static jit_word_t _calli_p(jit_state_t*,jit_word_t);
+#  define vastart(r0)                  _vastart(_jit, r0)
+static void _vastart(jit_state_t*, jit_int32_t);
+#  define vaarg(r0, r1)                        _vaarg(_jit, r0, r1)
+static void _vaarg(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define patch_at(kind,jump,label)    _patch_at(_jit,kind,jump,label)
+static void _patch_at(jit_state_t*,jit_int32_t,jit_word_t,jit_word_t);
+#endif
+
+#if CODE
+/* from binutils */
+#  define rotate_left(v, n)    (v << n | v >> (32 - n))
+static int
+encode_arm_immediate(unsigned int v)
+{
+    unsigned int       a, i;
+
+    for (i = 0; i < 32; i += 2)
+       if ((a = rotate_left(v, i)) <= 0xff)
+           return (a | (i << 7));
+
+    return (-1);
+}
+
+static int
+encode_thumb_immediate(unsigned int v)
+{
+    int                        i;
+    unsigned int       m;
+    unsigned int       n;
+    /* 00000000 00000000 00000000 abcdefgh */
+    if ((v & 0xff) == v)
+       return (v);
+    /* 00000000 abcdefgh 00000000 abcdefgh */
+    if ((v & 0xff00ff) == v && ((v & 0xff0000) >> 16) == (v & 0xff))
+       return ((v & 0xff) | (1 << 12));
+    /* abcdefgh 00000000 abcdefgh 00000000 */
+    if (((v & 0xffff0000) >> 16) == (v & 0xffff) && (v & 0xff) == 0)
+       return ((v & 0x000000ff) | (2 << 12));
+    /* abcdefgh abcdefgh abcdefgh abcdefgh */
+    if ( (v &    0xff)        == ((v &     0xff00) >>  8) &&
+       ((v &   0xff00) >> 8) == ((v &   0xff0000) >> 16) &&
+       ((v & 0xff0000) << 8) ==  (v & 0xff000000))
+       return ((v & 0xff) | (3 << 12));
+    /* 1bcdefgh << 24 ... 1bcdefgh << 1 */
+    for (i = 8, m = 0xff000000, n = 0x80000000;
+        i < 23; i++, m >>= 1,  n >>= 1) {
+       if ((v & m) == v && (v & n)) {
+           v >>= 32 - i;
+           if (!(i & 1))
+               v &= 0x7f;
+           i >>= 1;
+           return (((i & 7) << 12) | ((i & 8) << 23) | v);
+       }
+    }
+    return (-1);
+}
+
+static int
+encode_thumb_word_immediate(unsigned int v)
+{
+    if ((v & 0xfffff000) == 0)
+       return (((v & 0x800) << 15) | ((v & 0x700) << 4) | (v & 0xff));
+    return (-1);
+}
+
+static int
+encode_thumb_jump(int v)
+{
+    int                s, i1, i2, j1, j2;
+    if (v >= (int)-0x800000 && v <= 0x7fffff) {
+       s  = !!(v & 0x800000);
+       i1 = !!(v & 0x400000);
+       i2 = !!(v & 0x200000);
+       j1 = s ? i1 : !i1;
+       j2 = s ? i2 : !i2;
+       return ((s<<26)|((v&0x1ff800)<<5)|(j1<<13)|(j2<<11)|(v&0x7ff));
+    }
+    return (-1);
+}
+
+static int
+encode_thumb_cc_jump(int v)
+{
+    int                s, j1, j2;
+    if (v >= (int)-0x80000 && v <= 0x7ffff) {
+       s  = !!(v & 0x80000);
+       j1 = !!(v & 0x20000);
+       j2 = !!(v & 0x40000);
+       return ((s<<26)|((v&0x1f800)<<5)|(j1<<13)|(j2<<11)|(v&0x7ff));
+    }
+    return (-1);
+}
+
+static int
+encode_thumb_shift(int v, int type)
+{
+    switch (type) {
+       case ARM_ASR:
+       case ARM_LSL:
+       case ARM_LSR:           type >>= 1;     break;
+       default:                assert(!"handled shift");
+    }
+    assert(v >= 0 && v <= 31);
+    return (((v & 0x1c) << 10) | ((v & 3) << 6) | type);
+}
+
+static void
+_tcit(jit_state_t *_jit, unsigned int tc, int it)
+{
+    int                c;
+    int                m;
+    c = (tc >> 28) & 1;
+    assert(!(tc & 0xfffffff) && tc != ARM_CC_NV);
+    switch (it) {
+       case THUMB2_IT:         m =   1<<3;                     break;
+       case THUMB2_ITT:        m =  (c<<3)| (1<<2);            break;
+       case THUMB2_ITE:        m = (!c<<3)| (1<<2);            break;
+       case THUMB2_ITTT:       m =  (c<<3)| (c<<2)| (1<<1);    break;
+       case THUMB2_ITET:       m = (!c<<3)| (c<<2)| (1<<1);    break;
+       case THUMB2_ITTE:       m =  (c<<3)|(!c<<2)| (1<<1);    break;
+       case THUMB2_ITEE:       m = (!c<<3)|(!c<<2)| (1<<1);    break;
+       case THUMB2_ITTTT:      m =  (c<<3)| (c<<2)| (c<<1)|1;  break;
+       case THUMB2_ITETT:      m = (!c<<3)| (c<<2)| (c<<1)|1;  break;
+       case THUMB2_ITTET:      m =  (c<<3)|(!c<<2)| (c<<1)|1;  break;
+       case THUMB2_ITEET:      m = (!c<<3)|(!c<<2)| (c<<1)|1;  break;
+       case THUMB2_ITTTE:      m =  (c<<3)| (c<<2)|(!c<<1)|1;  break;
+       case THUMB2_ITETE:      m = (!c<<3)| (c<<2)|(!c<<1)|1;  break;
+       case THUMB2_ITTEE:      m =  (c<<3)|(!c<<2)|(!c<<1)|1;  break;
+       case THUMB2_ITEEE:      m = (!c<<3)|(!c<<2)|(!c<<1)|1;  break;
+       default:                abort();
+    }
+    assert(m && (tc != ARM_CC_AL || !(m & (m - 1))));
+    is(0xbf00 | (tc >> 24) | m);
+}
+
+static void
+_corrr(jit_state_t *_jit, int cc, int o, int rn, int rd, int rm)
+{
+    assert(!(cc & 0x0fffffff));
+    assert(!(o  & 0xf00fff0f));
+    ii(cc|o|(_u4(rn)<<16)|(_u4(rd)<<12)|_u4(rm));
+}
+
+static void
+_corri(jit_state_t *_jit, int cc, int o, int rn, int rd, int im)
+{
+    assert(!(cc & 0x0fffffff));
+    assert(!(o  & 0xf00fffff));
+    assert(!(im & 0xfffff000));
+    ii(cc|o|(_u4(rn)<<16)|(_u4(rd)<<12)|_u12(im));
+}
+
+static void
+_corri8(jit_state_t *_jit, int cc, int o, int rn, int rt, int im)
+{
+    assert(!(cc & 0x0fffffff));
+    assert(!(o  & 0xf00fff0f));
+    assert(!(im & 0xffffff00));
+    ii(cc|o|(_u4(rn)<<16)|(_u4(rt)<<12)|((im&0xf0)<<4)|(im&0x0f));
+}
+
+static void
+_coriw(jit_state_t *_jit, int cc, int o, int rd, int im)
+{
+    assert(!(cc & 0x0fffffff));
+    assert(!(o  & 0xf00fffff));
+    assert(!(im & 0xffff0000));
+    ii(cc|o|((im&0xf000)<<4)|(_u4(rd)<<12)|(im&0xfff));
+}
+
+static void
+_torrr(jit_state_t *_jit, int o, int rn, int rd, int rm)
+{
+    jit_thumb_t        thumb;
+    assert(!(o & 0xf0f0f));
+    thumb.i = o|(_u4(rn)<<16)|(_u4(rd)<<8)|_u4(rm);
+    iss(thumb.s[0], thumb.s[1]);
+}
+
+static void
+_torrrs(jit_state_t *_jit, int o, int rn, int rd, int rm, int im)
+{
+    jit_thumb_t        thumb;
+    assert(!(o  & 0x000f0f0f));
+    assert(!(im & 0xffff8f0f));
+    thumb.i = o|(_u4(rn)<<16)|(_u4(rd)<<8)|im|_u4(rm);
+    iss(thumb.s[0], thumb.s[1]);
+}
+
+static void
+_torxr(jit_state_t *_jit, int o, int rn, int rt, int rm)
+{
+    jit_thumb_t        thumb;
+    assert(!(o & 0xf0f0f));
+    thumb.i = o|(_u4(rn)<<16)|(_u4(rt)<<12)|_u4(rm);
+    iss(thumb.s[0], thumb.s[1]);
+}
+
+static void
+_torrrr(jit_state_t *_jit, int o, int rn, int rl, int rh, int rm)
+{
+    jit_thumb_t        thumb;
+    assert(!(o & 0x000fff0f));
+    thumb.i = o|(_u4(rn)<<16)|(_u4(rl)<<12)|(_u4(rh)<<8)|_u4(rm);
+    iss(thumb.s[0], thumb.s[1]);
+}
+
+static void
+_torrri8(jit_state_t *_jit, int o, int rn, int rt, int rt2, int im)
+{
+    jit_thumb_t        thumb;
+    assert(!(o  & 0x000fffff));
+    assert(!(im & 0xffffff00));
+    thumb.i = o|(_u4(rn)<<16)|(_u4(rt)<<12)|(_u4(rt2)<<8)|im;
+    iss(thumb.s[0], thumb.s[1]);
+}
+
+static void
+_torri(jit_state_t *_jit, int o, int rn, int rd, int im)
+{
+    jit_thumb_t        thumb;
+    assert(!(o  & 0x0c0f7fff));
+    assert(!(im & 0xfbff8f00));
+    thumb.i = o|(_u4(rn)<<16)|(_u4(rd)<<8)|im;
+    iss(thumb.s[0], thumb.s[1]);
+}
+
+static void
+_torri8(jit_state_t *_jit, int o, int rn, int rt, int im)
+{
+    jit_thumb_t        thumb;
+    assert(!(o  & 0x000ff0ff));
+    assert(!(im & 0xffffff00));
+    thumb.i = o|(_u4(rn)<<16)|(_u4(rt)<<12)|im;
+    iss(thumb.s[0], thumb.s[1]);
+}
+
+static void
+_torri12(jit_state_t *_jit, int o, int rn, int rt, int im)
+{
+    jit_thumb_t        thumb;
+    assert(!(o  & 0x000fffff));
+    assert(!(im & 0xfffff000));
+    thumb.i = o|(_u4(rn)<<16)|(_u4(rt)<<12)|im;
+    iss(thumb.s[0], thumb.s[1]);
+}
+
+static void
+_tshift(jit_state_t *_jit, int o, int rd, int rm, int im)
+{
+    jit_thumb_t        thumb;
+    assert(!(o & 0x7fcf));
+    assert(im >= 0 && im < 32);
+    thumb.i = o|((im&0x1c)<<10)|(_u4(rd)<<8)|((im&3)<<6)|_u4(rm);
+    iss(thumb.s[0], thumb.s[1]);
+}
+
+static void
+_toriw(jit_state_t *_jit, int o, int rd, int im)
+{
+    jit_thumb_t        thumb;
+    assert(!(im & 0xffff0000));
+    thumb.i = o|((im&0xf000)<<4)|((im&0x800)<<15)|((im&0x700)<<4)|(_u4(rd)<<8)|(im&0xff);
+    iss(thumb.s[0], thumb.s[1]);
+}
+
+static void
+_tc8(jit_state_t *_jit, int cc, int im)
+{
+    assert(!(cc & 0x0fffffff));
+    assert(cc != ARM_CC_AL && cc != ARM_CC_NV);
+    assert(im >= -128 && im <= 127);
+    is(THUMB_CC_B|(cc>>20)|(im&0xff));
+}
+
+static void
+_t11(jit_state_t *_jit, int im)
+{
+    assert(!(im & 0xfffff800));
+    is(THUMB_B|im);
+}
+
+static void
+_tcb(jit_state_t *_jit, int cc, int im)
+{
+    jit_thumb_t        thumb;
+    assert(!(cc & 0xfffffff));
+    assert(cc != ARM_CC_AL && cc != ARM_CC_NV);
+    cc = ((jit_uint32_t)cc) >> 6;
+    assert(!(im & (THUMB2_CC_B|cc)));
+    thumb.i = THUMB2_CC_B|cc|im;
+    iss(thumb.s[0], thumb.s[1]);
+}
+
+static void
+_blxi(jit_state_t *_jit, int im)
+{
+    assert(!(im & 0xfe000000));
+    ii(ARM_BLXI|im);
+}
+
+static void
+_tb(jit_state_t *_jit, int o, int im)
+{
+    jit_thumb_t        thumb;
+    assert(!(o & 0x07ff2fff));
+    assert(!(o & im));
+    thumb.i = o|im;
+    iss(thumb.s[0], thumb.s[1]);
+}
+
+static void
+_corrrr(jit_state_t *_jit, int cc, int o, int rh, int rl, int rm, int rn)
+{
+    assert(!(cc & 0x0fffffff));
+    assert(!(o & 0xf00fff0f));
+    ii(cc|o|(_u4(rh)<<16)|(_u4(rl)<<12)|(_u4(rm)<<8)|_u4(rn));
+}
+
+static void
+_corrrs(jit_state_t *_jit, int cc, int o, int rn, int rd, int rm, int im)
+{
+    assert(!(cc & 0x0fffffff));
+    assert(!(o  & 0xf000ff8f));
+    ii(cc|o|(_u4(rd)<<12)|(_u4(rn)<<16)|(im<<7)|_u4(rm));
+}
+
+static void
+_cshift(jit_state_t *_jit, int cc, int o, int rd, int rm, int rn, int im)
+{
+    assert(!(cc & 0x0fffffff));
+    assert(!(o  & 0xffe0ff8f));
+    assert(((_u4(rm)<<8)&(im<<7)) == 0);
+    ii(cc|ARM_SHIFT|o|(_u4(rd)<<12)|(_u4(rm)<<8)|(im<<7)|_u4(rn));
+}
+
+static void
+_cb(jit_state_t *_jit, int cc, int o, int im)
+{
+    assert(!(cc & 0x0fffffff));
+    assert(!(o  & 0xf0ffffff));
+    ii(cc|o|_u24(im));
+}
+
+static void
+_cbx(jit_state_t *_jit, int cc, int o, int rm)
+{
+    assert(!(cc & 0x0fffffff));
+    assert(!(o  & 0xf000000f));
+    ii(cc|o|_u4(rm));
+}
+
+static void
+_corl(jit_state_t *_jit, int cc, int o, int r0, int i0)
+{
+    assert(!(cc & 0x0fffffff));
+    assert(!(o  & 0xf00fffff));
+    ii(cc|o|(_u4(r0)<<16)|_u16(i0));
+}
+
+static void
+_c6orr(jit_state_t *_jit, int cc, int o, int rd, int rm)
+{
+    assert(!(cc & 0x0fffffff));
+    assert(!(o  & 0xf000f00f));
+    ii(cc|o|(_u4(rd)<<12)|_u4(rm));
+}
+
+static void
+_tpp(jit_state_t *_jit, int o, int im)
+{
+    jit_thumb_t        thumb;
+    assert(!(o & 0x0000ffff));
+    if (o == THUMB2_PUSH)
+       assert(!(im & 0x8000));
+    assert(__builtin_popcount(im & 0x1fff) > 1);
+    thumb.i = o|im;
+    iss(thumb.s[0], thumb.s[1]);
+}
+
+static void
+_torl(jit_state_t *_jit, int o, int rn, int im)
+{
+    jit_thumb_t        thumb;
+    assert(!(o & 0xf1fff));
+    assert(rn != _R15 || !im || ((o & 0xc000) == 0xc000));
+    assert(!(o & THUMB2_LDM_W) || !(im & (1 << rn)));
+    thumb.i = o | (_u4(rn)<<16)|_u13(im);
+    iss(thumb.s[0], thumb.s[1]);
+}
+
+static void
+_nop(jit_state_t *_jit, jit_int32_t i0)
+{
+    if (jit_thumb_p()) {
+       for (; i0 > 0; i0 -= 2)
+           T1_NOP();
+    }
+    else {
+       for (; i0 > 0; i0 -= 4)
+           NOP();
+    }
+    assert(i0 == 0);
+}
+
+static void
+_movr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (r0 != r1) {
+       if (jit_thumb_p())
+           T1_MOV(r0, r1);
+       else
+           MOV(r0, r1);
+    }
+}
+
+static void
+_movi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    int                        i;
+    if (jit_thumb_p()) {
+       if (!jit_no_set_flags() && r0 < 8 && !(i0 & 0xffffff80))
+           T1_MOVI(r0, i0);
+       else if ((i = encode_thumb_immediate(i0)) != -1)
+           T2_MOVI(r0, i);
+       else if ((i = encode_thumb_immediate(~i0)) != -1)
+           T2_MVNI(r0, i);
+       else {
+           T2_MOVWI(r0, (jit_uint16_t)i0);
+           if (i0 & 0xffff0000)
+               T2_MOVTI(r0, (jit_uint16_t)((unsigned)i0 >> 16));
+       }
+    }
+    else {
+       if (jit_armv6_p() && !(i0 & 0xffff0000))
+           MOVWI(r0, i0);
+       else if ((i = encode_arm_immediate(i0)) != -1)
+           MOVI(r0, i);
+       else if ((i = encode_arm_immediate(~i0)) != -1)
+           MVNI(r0, i);
+       else if (jit_armv6_p()) {
+           MOVWI(r0, (jit_uint16_t)(i0));
+           if ((i0 & 0xffff0000))
+               MOVTI(r0, (jit_uint16_t)((unsigned)i0 >> 16));
+       }
+       else
+           load_const(0, r0, i0);
+    }
+}
+
+static jit_word_t
+_movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_word_t         w;
+    w = _jit->pc.w;
+    if (jit_thumb_p()) {
+       T2_MOVWI(r0, (jit_uint16_t)(i0));
+       T2_MOVTI(r0, (jit_uint16_t)((unsigned)i0 >> 16));
+    }
+    else
+       load_const(1, r0, 0);
+    return (w);
+}
+
+static void
+_comr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (jit_thumb_p()) {
+       if (!jit_no_set_flags() && (r0|r1) < 8)
+           T1_NOT(r0, r1);
+       else
+           T2_NOT(r0, r1);
+    }
+    else
+       NOT(r0, r1);
+}
+
+static void
+_negr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (jit_thumb_p()) {
+       if (!jit_no_set_flags() && (r0|r1) < 8)
+           T1_RSBI(r0, r1);
+       else
+           T2_RSBI(r0, r1, 0);
+    }
+    else
+       RSBI(r0, r1, 0);
+}
+
+static void
+_addr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (jit_thumb_p()) {
+       if (!jit_no_set_flags() && (r0|r1|r2) < 8)
+           T1_ADD(r0, r1, r2);
+       else if (r0 == r1 || r0 == r2)
+           T1_ADDX(r0, r0 == r1 ? r2 : r1);
+       else
+           T2_ADD(r0, r1, r2);
+    }
+    else
+       ADD(r0, r1, r2);
+}
+
+static void
+_addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    int                        i;
+    jit_int32_t                reg;
+    if (jit_thumb_p()) {
+       if (!jit_no_set_flags() && (r0|r1) < 8 && !(i0 & ~7))
+           T1_ADDI3(r0, r1, i0);
+       else if (!jit_no_set_flags() && (r0|r1) < 8 && !(-i0 & ~7))
+           T1_SUBI3(r0, r1, -i0);
+       else if (!jit_no_set_flags() && r0 < 8 && r0 == r1 && !(i0 & ~0xff))
+           T1_ADDI8(r0, i0);
+       else if (!jit_no_set_flags() && r0 < 8 && r0 == r1 && !(-i0 & ~0xff))
+           T1_SUBI8(r0, -i0);
+       else if ((i = encode_thumb_immediate(i0)) != -1)
+           T2_ADDI(r0, r1, i);
+       else if ((i = encode_thumb_immediate(-i0)) != -1)
+           T2_SUBI(r0, r1, i);
+       else if ((i = encode_thumb_word_immediate(i0)) != -1)
+           T2_ADDWI(r0, r1, i);
+       else if ((i = encode_thumb_word_immediate(-i0)) != -1)
+           T2_SUBWI(r0, r1, i);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           T2_ADD(r0, r1, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+    else {
+       if ((i = encode_arm_immediate(i0)) != -1)
+           ADDI(r0, r1, i);
+       else if ((i = encode_arm_immediate(-i0)) != -1)
+           SUBI(r0, r1, i);
+       else if (r0 != r1) {
+           movi(r0, i0);
+           ADD(r0, r1, r0);
+       }
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           ADD(r0, r1, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+}
+
+static void
+_addcr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (jit_thumb_p()) {
+       /* thumb auto set carry if not inside IT block */
+       if ((r0|r1|r2) < 8)
+           T1_ADD(r0, r1, r2);
+       else
+           T2_ADDS(r0, r1, r2);
+    }
+    else
+       ADDS(r0, r1, r2);
+}
+
+static void
+_addci(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    int                        i;
+    jit_int32_t                reg;
+    if (jit_thumb_p()) {
+       if ((r0|r1) < 8 && !(i0 & ~7))
+           T1_ADDI3(r0, r1, i0);
+       else if ((r0|r1) < 8 && !(-i0 & ~7))
+           T1_SUBI3(r0, r1, -i0);
+       else if (r0 < 8 && r0 == r1 && !(i0 & ~0xff))
+           T1_ADDI8(r0, i0);
+       else if (r0 < 8 && r0 == r1 && !(-i0 & ~0xff))
+           T1_SUBI8(r0, -i0);
+       else if ((i = encode_thumb_immediate(i0)) != -1)
+           T2_ADDSI(r0, r1, i);
+       else if ((i = encode_thumb_immediate(-i0)) != -1)
+           T2_SUBSI(r0, r1, i);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           T2_ADDS(r0, r1, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+    else {
+       if ((i = encode_arm_immediate(i0)) != -1)
+           ADDSI(r0, r1, i);
+       else if ((i = encode_arm_immediate(-i0)) != -1)
+           SUBSI(r0, r1, i);
+       else if (r0 != r1) {
+           movi(r0, i0);
+           ADDS(r0, r1, r0);
+       }
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           ADDS(r0, r1, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+}
+
+static void
+_addxr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    /* keep setting carry because don't know last ADC */
+    if (jit_thumb_p()) {
+       /* thumb auto set carry if not inside IT block */
+       if ((r0|r1|r2) < 8 && (r0 == r1 || r0 == r2))
+           T1_ADC(r0, r0 == r1 ? r2 : r1);
+       else
+           T2_ADCS(r0, r1, r2);
+    }
+    else
+       ADCS(r0, r1, r2);
+}
+
+static void
+_addxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    int                        i;
+    jit_int32_t                reg;
+    int                        no_set_flags;
+    if (jit_thumb_p()) {
+       no_set_flags = jit_no_set_flags();
+       jit_no_set_flags() = 1;
+       if ((i = encode_thumb_immediate(i0)) != -1)
+           T2_ADCSI(r0, r1, i);
+       else if ((i = encode_thumb_immediate(-i0)) != -1)
+           T2_SBCSI(r0, r1, i);
+       else if (r0 != r1) {
+           movi(r0, i0);
+           T2_ADCS(r0, r1, r0);
+       }
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           T2_ADCS(r0, r1, rn(reg));
+           jit_unget_reg(reg);
+       }
+       jit_no_set_flags() = no_set_flags;
+    }
+    else {
+       if ((i = encode_arm_immediate(i0)) != -1)
+           ADCSI(r0, r1, i);
+       else if ((i = encode_arm_immediate(-i0)) != -1)
+           SBCSI(r0, r1, i);
+       else if (r0 != r1) {
+           movi(r0, i0);
+           ADCS(r0, r1, r0);
+       }
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           ADCS(r0, r1, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+}
+
+static void
+_subr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (jit_thumb_p()) {
+       if (!jit_no_set_flags() && (r0|r1|r2) < 8)
+           T1_SUB(r0, r1, r2);
+       else
+           T2_SUB(r0, r1, r2);
+    }
+    else
+       SUB(r0, r1, r2);
+}
+
+static void
+_subi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    int                        i;
+    jit_int32_t                reg;
+    if (jit_thumb_p()) {
+       if (!jit_no_set_flags() && (r0|r1) < 8 && !(i0 & ~7))
+           T1_SUBI3(r0, r1, i0);
+       else if (!jit_no_set_flags() && (r0|r1) < 8 && !(-i0 & ~7))
+           T1_ADDI3(r0, r1, -i0);
+       else if (!jit_no_set_flags() && r0 < 8 && r0 == r1 && !(i0 & ~0xff))
+           T1_SUBI8(r0, i0);
+       else if (!jit_no_set_flags() && r0 < 8 && r0 == r1 && !(-i0 & ~0xff))
+           T1_ADDI8(r0, -i0);
+       else if ((i = encode_thumb_immediate(i0)) != -1)
+           T2_SUBI(r0, r1, i);
+       else if ((i = encode_thumb_immediate(-i0)) != -1)
+           T2_ADDI(r0, r1, i);
+       else if ((i = encode_thumb_word_immediate(i0)) != -1)
+           T2_SUBWI(r0, r1, i);
+       else if ((i = encode_thumb_word_immediate(-i0)) != -1)
+           T2_ADDWI(r0, r1, i);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           T2_SUB(r0, r1, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+    else {
+       if ((i = encode_arm_immediate(i0)) != -1)
+           SUBI(r0, r1, i);
+       else if ((i = encode_arm_immediate(-i0)) != -1)
+           ADDI(r0, r1, i);
+       else if (r0 != r1) {
+           movi(r0, i0);
+           SUB(r0, r1, r0);
+       }
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           SUB(r0, r1, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+}
+
+static void
+_subcr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (jit_thumb_p()) {
+       /* thumb auto set carry if not inside IT block */
+       if ((r0|r1|r2) < 8)
+           T1_SUB(r0, r1, r2);
+       else
+           T2_SUBS(r0, r1, r2);
+    }
+    else
+       SUBS(r0, r1, r2);
+}
+
+static void
+_subci(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    int                        i;
+    jit_int32_t                reg;
+    if (jit_thumb_p()) {
+       if ((r0|r1) < 8 && !(i0 & ~7))
+           T1_SUBI3(r0, r1, i0);
+       else if ((r0|r1) < 8 && !(-i0 & ~7))
+           T1_ADDI3(r0, r1, -i0);
+       else if (r0 < 8 && r0 == r1 && !(i0 & ~0xff))
+           T1_SUBI8(r0, i0);
+       else if (r0 < 8 && r0 == r1 && !(-i0 & ~0xff))
+           T1_ADDI8(r0, -i0);
+       else if ((i = encode_thumb_immediate(i0)) != -1)
+           T2_SUBSI(r0, r1, i);
+       else if ((i = encode_thumb_immediate(-i0)) != -1)
+           T2_ADDSI(r0, r1, i);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           T2_SUBS(r0, r1, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+    else {
+       if ((i = encode_arm_immediate(i0)) != -1)
+           SUBSI(r0, r1, i);
+       else if ((i = encode_arm_immediate(-i0)) != -1)
+           ADDSI(r0, r1, i);
+       else if (r0 != r1) {
+           movi(r0, i0);
+           SUBS(r0, r1, r0);
+       }
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           SUBS(r0, r1, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+}
+
+static void
+_subxr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    /* keep setting carry because don't know last SBC */
+    if (jit_thumb_p()) {
+       /* thumb auto set carry if not inside IT block */
+       if ((r0|r1|r2) < 8 && r0 == r1)
+           T1_SBC(r0, r2);
+       else
+           T2_SBCS(r0, r1, r2);
+    }
+    else
+       SBCS(r0, r1, r2);
+}
+
+static void
+_subxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    int                        i;
+    jit_int32_t                reg;
+    int                        no_set_flags;
+    if (jit_thumb_p()) {
+       no_set_flags = jit_no_set_flags();
+       jit_no_set_flags() = 1;
+       if ((i = encode_arm_immediate(i0)) != -1)
+           T2_SBCSI(r0, r1, i);
+       else if ((i = encode_arm_immediate(-i0)) != -1)
+           T2_ADCSI(r0, r1, i);
+       else if (r0 != r1) {
+           movi(r0, i0);
+           T2_SBCS(r0, r1, r0);
+       }
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           SBCS(r0, r1, rn(reg));
+           jit_unget_reg(reg);
+       }
+       jit_no_set_flags() = no_set_flags;
+    }
+    else {
+       if ((i = encode_arm_immediate(i0)) != -1)
+           SBCSI(r0, r1, i);
+       else if ((i = encode_arm_immediate(-i0)) != -1)
+           ADCSI(r0, r1, i);
+       else if (r0 != r1) {
+           movi(r0, i0);
+           SBCS(r0, r1, r0);
+       }
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           SBCS(r0, r1, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+}
+
+static void
+_rsbi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    subi(r0, r1, i0);
+    negr(r0, r0);
+}
+
+static void
+_mulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (jit_thumb_p()) {
+       if (!jit_no_set_flags() && r0 == r2 && (r0|r1) < 8)
+           T1_MUL(r0, r1);
+       else if (!jit_no_set_flags() && r0 == r1 && (r0|r2) < 8)
+           T1_MUL(r0, r2);
+       else
+           T2_MUL(r0, r1, r2);
+    }
+    else {
+       if (r0 == r1 && !jit_armv6_p()) {
+           if (r0 != r2)
+               MUL(r0, r2, r1);
+           else {
+               reg = jit_get_reg(jit_class_gpr);
+               MOV(rn(reg), r1);
+               MUL(r0, rn(reg), r2);
+               jit_unget_reg(reg);
+           }
+       }
+       else
+           MUL(r0, r1, r2);
+    }
+}
+
+static void
+_muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    mulr(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_iqmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+       jit_int32_t r2, jit_int32_t r3, jit_bool_t sign)
+{
+    jit_int32_t                reg;
+    if (jit_thumb_p()) {
+       if (r2 == r3) {
+           reg = jit_get_reg(jit_class_gpr);
+           movr(rn(reg), r2);
+           if (sign)
+               T2_SMULL(r0, r1, rn(reg), r2);
+           else
+               T2_UMULL(r0, r1, rn(reg), r2);
+           jit_unget_reg(reg);
+       }
+       else if (r0 != r2 && r1 != r2) {
+           if (sign)
+               T2_SMULL(r0, r1, r2, r3);
+           else
+               T2_UMULL(r0, r1, r2, r3);
+       }
+       else {
+           if (sign)
+               T2_SMULL(r0, r1, r3, r2);
+           else
+               T2_UMULL(r0, r1, r3, r2);
+       }
+    }
+    else {
+       if (r2 == r3) {
+           reg = jit_get_reg(jit_class_gpr);
+           movr(rn(reg), r2);
+           if (sign)
+               SMULL(r0, r1, rn(reg), r2);
+           else
+               UMULL(r0, r1, rn(reg), r2);
+           jit_unget_reg(reg);
+       }
+       else if (r0 != r2 && r1 != r2) {
+           if (sign)
+               SMULL(r0, r1, r2, r3);
+           else
+               UMULL(r0, r1, r2, r3);
+       }
+       else {
+           if (sign)
+               SMULL(r0, r1, r3, r2);
+           else
+               UMULL(r0, r1, r3, r2);
+       }
+    }
+}
+
+static void
+_iqmuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+       jit_int32_t r2, jit_word_t i0, jit_bool_t sign)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    iqmulr(r0, r1, r2, rn(reg), sign);
+    jit_unget_reg(reg);
+}
+
+static void
+_divrem(jit_state_t *_jit, int div, int sign,
+       jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         d;
+    jit_word_t         w;
+    jit_get_reg_args();
+    movr(_R0_REGNO, r1);
+    movr(_R1_REGNO, r2);
+    if (sign)                  w = (jit_word_t)__aeabi_idivmod;
+    else                       w = (jit_word_t)__aeabi_uidivmod;
+    if (!jit_exchange_p()) {
+       if (jit_thumb_p())      d = ((w - _jit->pc.w) >> 1) - 2;
+       else                    d = ((w - _jit->pc.w) >> 2) - 2;
+       if (_s24P(d)) {
+           if (jit_thumb_p())  T2_BLI(encode_thumb_jump(d));
+           else                BLI(d & 0x00ffffff);
+       }
+       else                    goto fallback;
+    }
+    else {
+    fallback:
+       movi(_R2_REGNO, w);
+       if (jit_thumb_p())      T1_BLX(_R2_REGNO);
+       else                    BLX(_R2_REGNO);
+    }
+    if (div)                   movr(r0, _R0_REGNO);
+    else                       movr(r0, _R1_REGNO);
+    jit_unget_reg_args();
+}
+
+static void
+_divr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (jit_armv7r_p() && jit_thumb_p())
+       T2_SDIV(r0, r1, r2);
+    else
+       divrem(1, 1, r0, r1, r2);
+}
+
+static void
+_divi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    divr(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_divr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (jit_armv7r_p() && jit_thumb_p())
+       T2_UDIV(r0, r1, r2);
+    else
+       divrem(1, 0, r0, r1, r2);
+}
+
+static void
+_divi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    divr_u(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_iqdivr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+       jit_int32_t r2, jit_int32_t r3, jit_bool_t sign)
+{
+    jit_word_t         d;
+    jit_word_t         w;
+    jit_get_reg_args();
+    movr(_R0_REGNO, r2);
+    movr(_R1_REGNO, r3);
+    if (sign)                  w = (jit_word_t)__aeabi_idivmod;
+    else                       w = (jit_word_t)__aeabi_uidivmod;
+    if (!jit_exchange_p()) {
+       if (jit_thumb_p())      d = ((w - _jit->pc.w) >> 1) - 2;
+       else                    d = ((w - _jit->pc.w) >> 2) - 2;
+       if (_s24P(d)) {
+           if (jit_thumb_p())  T2_BLI(encode_thumb_jump(d));
+           else                BLI(d & 0x00ffffff);
+       }
+       else                    goto fallback;
+    }
+    else {
+    fallback:
+       movi(_R2_REGNO, w);
+       if (jit_thumb_p())      T1_BLX(_R2_REGNO);
+       else                    BLX(_R2_REGNO);
+    }
+    movr(r0, _R0_REGNO);
+    movr(r1, _R1_REGNO);
+    jit_unget_reg_args();
+}
+
+static void
+_iqdivi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+       jit_int32_t r2, jit_word_t i0, jit_bool_t sign)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    iqdivr(r0, r1, r2, rn(reg), sign);
+    jit_unget_reg(reg);
+}
+
+static void
+_remr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    divrem(0, 1, r0, r1, r2);
+}
+
+static void
+_remi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    remr(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_remr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    divrem(0, 0, r0, r1, r2);
+}
+
+static void
+_remi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    remr_u(r0, r1,rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_andr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (jit_thumb_p()) {
+       if (!jit_no_set_flags() && (r0|r1|r2) < 8 && (r0 == r1 || r0 == r2))
+           T1_AND(r0, r0 == r1 ? r2 : r1);
+       else
+           T2_AND(r0, r1, r2);
+    }
+    else
+       AND(r0, r1, r2);
+}
+
+static void
+_andi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    int                        i;
+    jit_int32_t                reg;
+    if (jit_thumb_p()) {
+       if ((i = encode_thumb_immediate(i0)) != -1)
+           T2_ANDI(r0, r1, i);
+       else if ((i = encode_thumb_immediate(~i0)) != -1)
+           T2_BICI(r0, r1, i);
+       else if (r0 != r1) {
+           movi(r0, i0);
+           T2_AND(r0, r1, r0);
+       }
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           T2_AND(r0, r1, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+    else {
+       if ((i = encode_arm_immediate(i0)) != -1)
+           ANDI(r0, r1, i);
+       else if ((i = encode_arm_immediate(~i0)) != -1)
+           BICI(r0, r1, i);
+       else if (r0 != r1) {
+           movi(r0, i0);
+           AND(r0, r1, r0);
+       }
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           AND(r0, r1, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+}
+
+static void
+_orr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (jit_thumb_p()) {
+       if (!jit_no_set_flags() && (r0|r1|r2) < 8 && (r0 == r1 || r0 == r2))
+           T1_ORR(r0, r0 == r1 ? r2 : r1);
+       else
+           T2_ORR(r0, r1, r2);
+    }
+    else
+       ORR(r0, r1, r2);
+}
+
+static void
+_ori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    int                        i;
+    jit_int32_t                reg;
+    if (jit_thumb_p()) {
+       if ((i = encode_thumb_immediate(i0)) != -1)
+           T2_ORRI(r0, r1, i);
+       else if (r0 != r1) {
+           movi(r0, i0);
+           T2_ORR(r0, r1, r0);
+       }
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           T2_ORR(r0, r1, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+    else {
+       if ((i = encode_arm_immediate(i0)) != -1)
+           ORRI(r0, r1, i);
+       else if (r0 != r1) {
+           movi(r0, i0);
+           ORR(r0, r1, r0);
+       }
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           ORR(r0, r1, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+}
+
+static void
+_xorr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (jit_thumb_p()) {
+       if (!jit_no_set_flags() && (r0|r1|r2) < 8 && (r0 == r1 || r0 == r2))
+           T1_EOR(r0, r0 == r1 ? r2 : r1);
+       else
+           T2_EOR(r0, r1, r2);
+    }
+    else
+       EOR(r0, r1, r2);
+}
+
+static void
+_xori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    int                        i;
+    jit_int32_t                reg;
+    if (jit_thumb_p()) {
+       if ((i = encode_thumb_immediate(i0)) != -1)
+           T2_EORI(r0, r1, i);
+       else if (r0 != r1) {
+           movi(r0, i0);
+           T2_EOR(r0, r1, r0);
+       }
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           T2_EOR(r0, r1, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+    else {
+       if ((i = encode_arm_immediate(i0)) != -1)
+           EORI(r0, r1, i);
+       else if (r0 != r1) {
+           movi(r0, i0);
+           EOR(r0, r1, r0);
+       }
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           EOR(r0, r1, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+}
+
+static void
+_lshr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (jit_thumb_p()) {
+       if (!jit_no_set_flags() && (r0|r1|r2) < 8 && r0 == r1)
+           T1_LSL(r0, r2);
+       else
+           T2_LSL(r0, r1, r2);
+    }
+    else
+       LSL(r0, r1, r2);
+}
+
+static void
+_lshi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    assert(i0 >= 0 && i0 <= 31);
+    if (i0 == 0)
+       movr(r0, r1);
+    else if (jit_thumb_p()) {
+       if (!jit_no_set_flags() && (r0|r1) < 8)
+           T1_LSLI(r0, r1, i0);
+       else
+           T2_LSLI(r0, r1, i0);
+    }
+    else
+       LSLI(r0, r1, i0);
+}
+
+static void
+_rshr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (jit_thumb_p()) {
+       if (!jit_no_set_flags() && (r0|r1|r2) < 8 && r0 == r1)
+           T1_ASR(r0, r2);
+       else
+           T2_ASR(r0, r1, r2);
+    }
+    else
+       ASR(r0, r1, r2);
+}
+
+static void
+_rshi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    assert(i0 >= 0 && i0 <= 31);
+    if (i0 == 0)
+       movr(r0, r1);
+    else if (jit_thumb_p()) {
+       if (!jit_no_set_flags() && (r0|r1) < 8)
+           T1_ASRI(r0, r1, i0);
+       else
+           T2_ASRI(r0, r1, i0);
+    }
+    else
+       ASRI(r0, r1, i0);
+}
+
+static void
+_rshr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (jit_thumb_p()) {
+       if (!jit_no_set_flags() && (r0|r1|r2) < 8 && r0 == r1)
+           T1_LSR(r0, r2);
+       else
+           T2_LSR(r0, r1, r2);
+    }
+    else
+       LSR(r0, r1, r2);
+}
+
+static void
+_rshi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    assert(i0 >= 0 && i0 <= 31);
+    if (i0 == 0)
+       movr(r0, r1);
+    else if (jit_thumb_p()) {
+       if (!jit_no_set_flags() && (r0|r1) < 8)
+           T1_LSRI(r0, r1, i0);
+       else
+           T2_LSRI(r0, r1, i0);
+    }
+    else
+       LSRI(r0, r1, i0);
+}
+
+static void
+_ccr(jit_state_t *_jit, int ct, int cf,
+     jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (jit_thumb_p()) {
+       assert((ct ^ cf) >> 28 == 1);
+       if ((r1|r2) < 8)
+           T1_CMP(r1, r2);
+       else if ((r1&r2) & 8)
+           T1_CMPX(r1, r2);
+       else
+           T2_CMP(r1, r2);
+       ITE(ct);
+       if (r0 < 8) {
+           T1_MOVI(r0, 1);
+           T1_MOVI(r0, 0);
+       }
+       else {
+           T2_MOVI(r0, 1);
+           T2_MOVI(r0, 0);
+       }
+    }
+    else {
+       CMP(r1, r2);
+       CC_MOVI(ct, r0, 1);
+       CC_MOVI(cf, r0, 0);
+    }
+}
+
+static void
+_cci(jit_state_t *_jit, int ct, int cf,
+     jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    int                        i;
+    jit_int32_t                reg;
+    if (jit_thumb_p()) {
+       if (r1 < 7 && !(i0 & 0xffffff00))
+           T1_CMPI(r1, i0);
+       else if ((i = encode_thumb_immediate(i0)) != -1)
+           T2_CMPI(r1, i);
+       else if ((i = encode_thumb_immediate(-i0)) != -1)
+           T2_CMNI(r1, i);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           ccr(ct, cf, r0, r1, rn(reg));
+           jit_unget_reg(reg);
+           return;
+       }
+       ITE(ct);
+       if (r0 < 8) {
+           T1_MOVI(r0, 1);
+           T1_MOVI(r0, 0);
+       }
+       else {
+           T2_MOVI(r0, 1);
+           T2_MOVI(r0, 0);
+       }
+    }
+    else {
+       if ((i = encode_arm_immediate(i0)) != -1)
+           CMPI(r1, i);
+       else if ((i = encode_arm_immediate(-i0)) != -1)
+           CMNI(r1, i);
+       else if (r0 != r1) {
+           movi(r0, i0);
+           CMP(r1, r0);
+       }
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           CMP(r1, rn(reg));
+           jit_unget_reg(reg);
+       }
+       CC_MOVI(ct, r0, 1);
+       CC_MOVI(cf, r0, 0);
+    }
+}
+
+static void
+_ner(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (jit_thumb_p())
+       ccr(ARM_CC_NE, ARM_CC_EQ, r0, r1, r2);
+    else {
+       SUBS(r0, r1, r2);
+       CC_MOVI(ARM_CC_NE, r0, 1);
+    }
+}
+
+static void
+_nei(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    int                        i;
+    jit_int32_t                reg;
+    if (jit_thumb_p())
+       cci(ARM_CC_NE, ARM_CC_EQ, r0, r1, i0);
+    else {
+       if ((i = encode_arm_immediate(i0)) != -1)
+           SUBSI(r0, r1, i);
+       else if ((i = encode_arm_immediate(-i0)) != -1)
+           ADDSI(r0, r1, i);
+       else if (r0 != r1) {
+           movi(r0, i0);
+           SUBS(r0, r1, r0);
+       }
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           SUBS(r0, r1, rn(reg));
+           jit_unget_reg(reg);
+       }
+       CC_MOVI(ARM_CC_NE, r0, 1);
+    }
+}
+
+static void
+_jmpr(jit_state_t *_jit, jit_int32_t r0)
+{
+    if (jit_thumb_p())
+       T1_MOV(_R15_REGNO, r0);
+    else
+       MOV(_R15_REGNO, r0);
+}
+
+static void
+_jmpi(jit_state_t *_jit, jit_word_t i0)
+{
+    jit_word_t         w;
+    jit_word_t         d;
+    jit_int32_t                reg;
+    w = _jit->pc.w;
+    /* if thumb and in thumb mode */
+    if (jit_thumb_p() && _jitc->thumb) {
+       d = ((i0 - w) >> 1) - 2;
+       if (d >= -1024 && d <= 1023)
+           T1_B(d & 0x7ff);
+       else if (_s24P(d))
+           T2_B(encode_thumb_jump(d));
+       else {
+           reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+           movi(rn(reg), i0);
+           jmpr(rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+    else {
+       d = ((i0 - w) >> 2) - 2;
+       if (_s24P(d))
+           B(d & 0x00ffffff);
+       else {
+           reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+           movi(rn(reg), i0);
+           jmpr(rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+}
+
+static jit_word_t
+_jmpi_p(jit_state_t *_jit, jit_word_t i0, jit_bool_t i1)
+{
+    jit_word_t         w;
+    jit_word_t         d;
+    jit_int32_t                reg;
+    if (i1) {
+       /* Assume jump is not longer than 23 bits if inside jit */
+       w = _jit->pc.w;
+       /* if thumb and in thumb mode */
+       if (jit_thumb_p() && _jitc->thumb) {
+           d = ((i0 - w) >> 1) - 2;
+           assert(_s24P(d));
+           T2_B(encode_thumb_jump(d));
+       }
+       else {
+           d = ((i0 - w) >> 2) - 2;
+           assert(_s24P(d));
+           B(d & 0x00ffffff);
+       }
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+       w = movi_p(rn(reg), i0);
+       jmpr(rn(reg));
+       jit_unget_reg(reg);
+    }
+    return (w);
+}
+
+static jit_word_t
+_bccr(jit_state_t *_jit, int cc, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    jit_word_t         d;
+    if (jit_thumb_p()) {
+       if ((r0|r1) < 8)
+           T1_CMP(r0, r1);
+       else if ((r0&r1) & 8)
+           T1_CMPX(r0, r1);
+       else
+           T2_CMP(r0, r1);
+       /* use only thumb2 conditional as does not know if will be patched */
+       w = _jit->pc.w;
+       d = ((i0 - w) >> 1) - 2;
+       assert(_s20P(d));
+       T2_CC_B(cc, encode_thumb_cc_jump(d));
+    }
+    else {
+       CMP(r0, r1);
+       w = _jit->pc.w;
+       d = ((i0 - w) >> 2) - 2;
+       assert(_s24P(d));
+       CC_B(cc, d & 0x00ffffff);
+    }
+    return (w);
+}
+
+static jit_word_t
+_bcci(jit_state_t *_jit, int cc, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_word_t         d;
+    int                        i;
+    jit_int32_t                reg;
+    if (jit_thumb_p()) {
+       if (r0 < 7 && !(i1 & 0xffffff00))
+           T1_CMPI(r0, i1);
+       else if ((i = encode_thumb_immediate(i1)) != -1)
+           T2_CMPI(r0, i);
+       else if ((i = encode_thumb_immediate(-i1)) != -1)
+           T2_CMNI(r0, i);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i1);
+           T2_CMP(r0, rn(reg));
+           jit_unget_reg(reg);
+       }
+       /* use only thumb2 conditional as does not know if will be patched */
+       w = _jit->pc.w;
+       d = ((i0 - w) >> 1) - 2;
+       assert(_s20P(d));
+       T2_CC_B(cc, encode_thumb_cc_jump(d));
+    }
+    else {
+       if ((i = encode_arm_immediate(i1)) != -1)
+           CMPI(r0, i);
+       else if ((i = encode_arm_immediate(-i1)) != -1)
+           CMNI(r0, i);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i1);
+           CMP(r0, rn(reg));
+           jit_unget_reg(reg);
+       }
+       w = _jit->pc.w;
+       d = ((i0 - w) >> 2) - 2;
+       assert(_s24P(d));
+       CC_B(cc, d & 0x00ffffff);
+    }
+    return (w);
+}
+
+static jit_word_t
+_baddr(jit_state_t *_jit, int cc, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    jit_word_t         d;
+    if (jit_thumb_p()) {
+       if ((r0|r1) < 8)
+           T1_ADD(r0, r0, r1);
+       else
+           T2_ADDS(r0, r0, r1);
+       w = _jit->pc.w;
+       d = ((i0 - w) >> 1) - 2;
+       assert(_s20P(d));
+       T2_CC_B(cc, encode_thumb_cc_jump(d));
+    }
+    else {
+       ADDS(r0, r0, r1);
+       w = _jit->pc.w;
+       d = ((i0 - w) >> 2) - 2;
+       assert(_s24P(d));
+       CC_B(cc, d & 0x00ffffff);
+    }
+    return (w);
+}
+
+static jit_word_t
+_baddi(jit_state_t *_jit, int cc, jit_word_t i0, jit_int32_t r0, int i1)
+{
+    int                        i;
+    jit_word_t         w;
+    jit_word_t         d;
+    jit_int32_t                reg;
+    if (jit_thumb_p()) {
+       if (r0 < 8 && !(i1 & ~7))
+           T1_ADDI3(r0, r0, i1);
+       else if (r0 < 8 && !(-i1 & ~7))
+           T1_SUBI3(r0, r0, -i1);
+       else if (r0 < 8 && !(i1 & ~0xff))
+           T1_ADDI8(r0, i1);
+       else if (r0 < 8 && !(-i1 & ~0xff))
+           T1_SUBI8(r0, -i1);
+       else if ((i = encode_thumb_immediate(i1)) != -1)
+           T2_ADDSI(r0, r0, i);
+       else if ((i = encode_thumb_immediate(-i1)) != -1)
+           T2_SUBSI(r0, r0, i);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i1);
+           T2_ADDS(r0, r0, rn(reg));
+           jit_unget_reg(reg);
+       }
+       w = _jit->pc.w;
+       d = ((i0 - w) >> 1) - 2;
+       assert(_s20P(d));
+       T2_CC_B(cc, encode_thumb_cc_jump(d));
+    }
+    else {
+       if ((i = encode_arm_immediate(i1)) != -1)
+           ADDSI(r0, r0, i);
+       else if ((i = encode_arm_immediate(-i1)) != -1)
+           SUBSI(r0, r0, i);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i1);
+           ADDS(r0, r0, rn(reg));
+           jit_unget_reg(reg);
+       }
+       w = _jit->pc.w;
+       d = ((i0 - w) >> 2) - 2;
+       assert(_s24P(d));
+       CC_B(cc, d & 0x00ffffff);
+    }
+    return (w);
+}
+
+static jit_word_t
+_bsubr(jit_state_t *_jit, int cc, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    jit_word_t         d;
+    if (jit_thumb_p()) {
+       if ((r0|r1) < 8)
+           T1_SUB(r0, r0, r1);
+       else
+           T2_SUBS(r0, r0, r1);
+       w = _jit->pc.w;
+       d = ((i0 - w) >> 1) - 2;
+       assert(_s20P(d));
+       T2_CC_B(cc, encode_thumb_cc_jump(d));
+    }
+    else {
+       SUBS(r0, r0, r1);
+       w = _jit->pc.w;
+       d = ((i0 - w) >> 2) - 2;
+       assert(_s24P(d));
+       CC_B(cc, d & 0x00ffffff);
+    }
+    return (w);
+}
+
+static jit_word_t
+_bsubi(jit_state_t *_jit, int cc, jit_word_t i0, jit_int32_t r0, int i1)
+{
+    int                        i;
+    jit_word_t         w;
+    jit_word_t         d;
+    jit_int32_t                reg;
+    if (jit_thumb_p()) {
+       if (r0 < 8 && !(i1 & ~7))
+           T1_SUBI3(r0, r0, i1);
+       else if (r0 < 8 && !(-i1 & ~7))
+           T1_ADDI3(r0, r0, -i1);
+       else if (r0 < 8 && !(i1 & ~0xff))
+           T1_SUBI8(r0, i1);
+       else if (r0 < 8 && !(-i1 & ~0xff))
+           T1_ADDI8(r0, -i1);
+       else if ((i = encode_thumb_immediate(i1)) != -1)
+           T2_SUBSI(r0, r0, i);
+       else if ((i = encode_thumb_immediate(-i1)) != -1)
+           T2_SUBSI(r0, r0, i);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i1);
+           T2_SUBS(r0, r0, rn(reg));
+           jit_unget_reg(reg);
+       }
+       w = _jit->pc.w;
+       d = ((i0 - w) >> 1) - 2;
+       assert(_s20P(d));
+       T2_CC_B(cc, encode_thumb_cc_jump(d));
+    }
+    else {
+       if ((i = encode_arm_immediate(i1)) != -1)
+           SUBSI(r0, r0, i);
+       else if ((i = encode_arm_immediate(-i1)) != -1)
+           ADDSI(r0, r0, i);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i1);
+           SUBS(r0, r0, rn(reg));
+           jit_unget_reg(reg);
+       }
+       w = _jit->pc.w;
+       d = ((i0 - w) >> 2) - 2;
+       assert(_s24P(d));
+       CC_B(cc, d & 0x00ffffff);
+    }
+    return (w);
+}
+
+static jit_word_t
+_bmxr(jit_state_t *_jit, int cc, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    jit_word_t         d;
+    jit_int32_t                reg;
+    if (jit_thumb_p()) {
+       if ((r0|r1) < 8)
+           T1_TST(r0, r1);
+       else
+           T2_TST(r0, r1);
+       w = _jit->pc.w;
+       d = ((i0 - w) >> 1) - 2;
+       assert(_s20P(d));
+       T2_CC_B(cc, encode_thumb_cc_jump(d));
+    }
+    else {
+       if (jit_armv5_p())
+           TST(r0, r1);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           ANDS(rn(reg), r0, r1);
+           jit_unget_reg(reg);
+       }
+       w = _jit->pc.w;
+       d = ((i0 - w) >> 2) - 2;
+       assert(_s24P(d));
+       CC_B(cc, d & 0x00ffffff);
+    }
+    return (w);
+}
+
+static jit_word_t
+_bmxi(jit_state_t *_jit, int cc, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    int                        i;
+    jit_word_t         w;
+    jit_word_t         d;
+    jit_int32_t                reg;
+    if (jit_thumb_p()) {
+       if ((i = encode_thumb_immediate(i1)) != -1)
+           T2_TSTI(r0, i);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i1);
+           T2_TST(r0, rn(reg));
+           jit_unget_reg(reg);
+       }
+       w = _jit->pc.w;
+       d = ((i0 - w) >> 1) - 2;
+       assert(_s20P(d));
+       T2_CC_B(cc, encode_thumb_cc_jump(d));
+    }
+    else {
+       if (jit_armv5_p()) {
+           if ((i = encode_arm_immediate(i1)) != -1)
+               TSTI(r0, i);
+           else {
+               reg = jit_get_reg(jit_class_gpr);
+               movi(rn(reg), i1);
+               TST(r0, rn(reg));
+               jit_unget_reg(reg);
+           }
+       }
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           if ((i = encode_arm_immediate(i1)) != -1)
+               ANDSI(rn(reg), r0, i);
+           else if ((i = encode_arm_immediate(~i1)) != -1)
+               BICSI(rn(reg), r0, i);
+           else {
+               movi(rn(reg), i1);
+               ANDS(rn(reg), r0, rn(reg));
+           }
+           jit_unget_reg(reg);
+       }
+       w = _jit->pc.w;
+       d = ((i0 - w) >> 2) - 2;
+       assert(_s24P(d));
+       CC_B(cc, d & 0x00ffffff);
+    }
+    return (w);
+}
+
+static void
+_ldr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (jit_thumb_p())
+       T2_LDRSBI(r0, r1, 0);
+    else
+       LDRSBI(r0, r1, 0);
+}
+
+static void
+_ldi_c(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    if (jit_thumb_p())
+       T2_LDRSBI(r0, rn(reg), 0);
+    else
+       LDRSBI(r0, rn(reg), 0);
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (jit_thumb_p()) {
+       if ((r0|r1|r2) < 8)
+           T1_LDRSB(r0, r1, r2);
+       else
+           T2_LDRSB(r0, r1, r2);
+    }
+    else
+       LDRSB(r0, r1, r2);
+}
+
+static void
+_ldxi_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (jit_thumb_p()) {
+       if (jit_ldrt_strt_p() && i0 >= 0 && i0 <= 255)
+           T2_LDRSBI(r0, r1, i0);
+       else if (i0 < 0 && i0 >= -255)
+           T2_LDRSBIN(r0, r1, -i0);
+       else if (i0 >= 0 && i0 <= 4095)
+           T2_LDRSBWI(r0, r1, i0);
+       else if (r0 != r1) {
+           movi(r0, i0);
+           if ((r0|r1) < 8)
+               T1_LDRSB(r0, r1, r0);
+           else
+               T2_LDRSB(r0, r1, r0);
+       }
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           if ((r0|r1|rn(reg)) < 8)
+               T1_LDRSB(r0, r1, rn(reg));
+           else
+               T2_LDRSB(r0, r1, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+    else {
+       if (i0 >= 0 && i0 <= 255)
+           LDRSBI(r0, r1, i0);
+       else if (i0 < 0 && i0 >= -255)
+           LDRSBIN(r0, r1, -i0);
+       else if (r0 != r1) {
+           movi(r0, i0);
+           LDRSB(r0, r1, r0);
+       }
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           LDRSB(r0, r1, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+}
+
+static void
+_ldr_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (jit_thumb_p())
+       T2_LDRBI(r0, r1, 0);
+    else
+       LDRBI(r0, r1, 0);
+}
+
+static void
+_ldi_uc(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    if (jit_thumb_p())
+       T2_LDRBI(r0, rn(reg), 0);
+    else
+       LDRBI(r0, rn(reg), 0);
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxr_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (jit_thumb_p()) {
+       if ((r0|r1|r2) < 8)
+           T1_LDRB(r0, r1, r2);
+       else
+           T2_LDRB(r0, r1, r2);
+    }
+    else
+       LDRB(r0, r1, r2);
+}
+
+static void
+_ldxi_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (jit_thumb_p()) {
+       if ((r0|r1) < 8 && i0 >= 0 && i0 < 0x20)
+           T1_LDRBI(r0, r1, i0);
+       else if (jit_ldrt_strt_p() && i0 >= 0 && i0 <= 255)
+           T2_LDRBI(r0, r1, i0);
+       else if (i0 < 0 && i0 >= -255)
+           T2_LDRBIN(r0, r1, -i0);
+       else if (i0 >= 0 && i0 <= 4095)
+           T2_LDRBWI(r0, r1, i0);
+       else if (r0 != r1) {
+           movi(r0, i0);
+           if ((r0|r1) < 8)
+               T1_LDRB(r0, r1, r0);
+           else
+               T2_LDRB(r0, r1, r0);
+       }
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           if ((r0|r1|rn(reg)) < 8)
+               T1_LDRB(r0, r1, rn(reg));
+           else
+               T2_LDRB(r0, r1, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+    else {
+       if (i0 >= 0 && i0 <= 4095)
+           LDRBI(r0, r1, i0);
+       else if (i0 < 0 && i0 >= -4095)
+           LDRBIN(r0, r1, -i0);
+       else if (r0 != r1) {
+           movi(r0, i0);
+           LDRB(r0, r1, r0);
+       }
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           LDRB(r0, r1, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+}
+
+static void
+_ldr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (jit_thumb_p())
+       T2_LDRSHI(r0, r1, 0);
+    else
+       LDRSHI(r0, r1, 0);
+}
+
+static void
+_ldi_s(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    if (jit_thumb_p())
+       T2_LDRSHI(r0, rn(reg), 0);
+    else
+       LDRSHI(r0, rn(reg), 0);
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (jit_thumb_p()) {
+       if ((r0|r1|r2) < 8)
+           T1_LDRSH(r0, r1, r2);
+       else
+           T2_LDRSH(r0, r1, r2);
+    }
+    else
+       LDRSH(r0, r1, r2);
+}
+
+static void
+_ldxi_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (jit_thumb_p()) {
+       if (jit_ldrt_strt_p() && i0 >= 0 && i0 <= 255)
+           T2_LDRSHI(r0, r1, i0);
+       else if (i0 < 0 && i0 >= -255)
+           T2_LDRSHIN(r0, r1, -i0);
+       else if (i0 >= 0 && i0 <= 4095)
+           T2_LDRSHWI(r0, r1, i0);
+       else if (r0 != r1) {
+           movi(r0, i0);
+           if ((r0|r1) < 8)
+               T1_LDRSH(r0, r1, r0);
+           else
+               T2_LDRSH(r0, r1, r0);
+       }
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           if ((r0|r1|rn(reg)) < 8)
+               T1_LDRSH(r0, r1, rn(reg));
+           else
+               T2_LDRSH(r0, r1, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+    else {
+       if (i0 >= 0 && i0 <= 255)
+           LDRSHI(r0, r1, i0);
+       else if (i0 < 0 && i0 >= -255)
+           LDRSHIN(r0, r1, -i0);
+       else if (r0 != r1) {
+           movi(r0, i0);
+           LDRSH(r0, r1, r0);
+       }
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           LDRSH(r0, r1, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+}
+
+static void
+_ldr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (jit_thumb_p())
+       T2_LDRHI(r0, r1, 0);
+    else
+       LDRHI(r0, r1, 0);
+}
+
+static void
+_ldi_us(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    if (jit_thumb_p())
+       T2_LDRHI(r0, rn(reg), 0);
+    else
+       LDRHI(r0, rn(reg), 0);
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (jit_thumb_p()) {
+       if ((r0|r1|r2) < 8)
+           T1_LDRH(r0, r1, r2);
+       else
+           T2_LDRH(r0, r1, r2);
+    }
+    else
+       LDRH(r0, r1, r2);
+}
+
+static void
+_ldxi_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (jit_thumb_p()) {
+       if ((r0|r1) < 8 && i0 >= 0 && !(i0 & 1) && (i0 >> 1) < 0x20)
+           T1_LDRHI(r0, r1, i0 >> 1);
+       else if (jit_ldrt_strt_p() && i0 >= 0 && i0 <= 255)
+           T2_LDRHI(r0, r1, i0);
+       else if (i0 < 0 && i0 >= -255)
+           T2_LDRHIN(r0, r1, -i0);
+       else if (i0 >= 0 && i0 <= 4095)
+           T2_LDRHWI(r0, r1, i0);
+       else if (r0 != r1) {
+           movi(r0, i0);
+           if ((r0|r1) < 8)
+               T1_LDRH(r0, r1, r0);
+           else
+               T2_LDRH(r0, r1, r0);
+       }
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           if ((r0|r1|rn(reg)) < 8)
+               T1_LDRH(r0, r1, rn(reg));
+           else
+               T2_LDRH(r0, r1, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+    else {
+       if (i0 >= 0 && i0 <= 255)
+           LDRHI(r0, r1, i0);
+       else if (i0 < 0 && i0 >= -255)
+           LDRHIN(r0, r1, -i0);
+       else if (r0 != r1) {
+           movi(r0, i0);
+           LDRH(r0, r1, r0);
+       }
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           LDRH(r0, r1, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+}
+
+static void
+_ldr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (jit_thumb_p())
+       T2_LDRI(r0, r1, 0);
+    else
+       LDRI(r0, r1, 0);
+}
+
+static void
+_ldi_i(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    if (jit_thumb_p())
+       T2_LDRI(r0, rn(reg), 0);
+    else
+       LDRI(r0, rn(reg), 0);
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (jit_thumb_p()) {
+       if ((r0|r1|r2) < 8)
+           T1_LDR(r0, r1, r2);
+       else
+           T2_LDR(r0, r1, r2);
+    }
+    else
+       LDR(r0, r1, r2);
+}
+
+static void
+_ldxi_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (jit_thumb_p()) {
+       if ((r0|r1) < 8 && i0 >= 0 && !(i0 & 3) && (i0 >> 2) < 0x20)
+           T1_LDRI(r0, r1, i0 >> 2);
+       else if (r1 == _R13_REGNO && r0 < 8 &&
+                i0 >= 0 && !(i0 & 3) && (i0 >> 2) <= 255)
+           T1_LDRISP(r0, i0 >> 2);
+       else if (jit_ldrt_strt_p() && i0 >= 0 && i0 <= 255)
+           T2_LDRI(r0, r1, i0);
+       else if (i0 < 0 && i0 > -255)
+           T2_LDRIN(r0, r1, -i0);
+       else if (i0 >= 0 && i0 <= 4095)
+           T2_LDRWI(r0, r1, i0);
+       else if (r0 != r1) {
+           movi(r0, i0);
+           if ((r0|r1) < 8)
+               T1_LDR(r0, r1, r0);
+           else
+               T2_LDR(r0, r1, r0);
+       }
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           if ((r0|r1|rn(reg)) < 8)
+               T1_LDR(r0, r1, rn(reg));
+           else
+               T2_LDR(r0, r1, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+    else {
+       if (i0 >= 0 && i0 <= 4095)
+           LDRI(r0, r1, i0);
+       else if (i0 < 0 && i0 >= -4095)
+           LDRIN(r0, r1, -i0);
+       else if (r0 != r1) {
+           movi(r0, i0);
+           LDR(r0, r1, r0);
+       }
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           LDR(r0, r1, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+}
+
+static void
+_str_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (jit_thumb_p())
+       T2_STRBI(r1, r0, 0);
+    else
+       STRBI(r1, r0, 0);
+}
+
+static void
+_sti_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    if (jit_thumb_p())
+       T2_STRBI(r0, rn(reg), 0);
+    else
+       STRBI(r0, rn(reg), 0);
+    jit_unget_reg(reg);
+}
+
+static void
+_stxr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (jit_thumb_p()) {
+       if ((r0|r1|r2) < 8)
+           T1_STRB(r2, r1, r0);
+       else
+           T2_STRB(r2, r1, r0);
+    }
+    else
+       STRB(r2, r1, r0);
+}
+
+static void
+_stxi_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (jit_thumb_p()) {
+       if ((r0|r1) < 8 && i0 >= 0 && i0 < 0x20)
+           T1_STRBI(r1, r0, i0);
+       else if (jit_ldrt_strt_p() && i0 >= 0 && i0 <= 255)
+           T2_STRBI(r1, r0, i0);
+       else if (i0 < 0 && i0 >= -255)
+           T2_STRBIN(r1, r0, -i0);
+       else if (i0 >= 0 && i0 <= 4095)
+           T2_STRBWI(r1, r0, i0);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           if ((r0|r1|rn(reg)) < 8)
+               T1_STRB(r1, r0, rn(reg));
+           else
+               T2_STRB(r1, r0, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+    else {
+       if (i0 >= 0 && i0 <= 4095)
+           STRBI(r1, r0, i0);
+       else if (i0 < 0 && i0 >= -4095)
+           STRBIN(r1, r0, -i0);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           STRB(r1, r0, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+}
+
+static void
+_str_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (jit_thumb_p())
+       T2_STRHI(r1, r0, 0);
+    else
+       STRHI(r1, r0, 0);
+}
+
+static void
+_sti_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    if (jit_thumb_p())
+       T2_STRHI(r0, rn(reg), 0);
+    else
+       STRHI(r0, rn(reg), 0);
+    jit_unget_reg(reg);
+}
+
+static void
+_stxr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (jit_thumb_p()) {
+       if ((r0|r1|r2) < 8)
+           T1_STRH(r2, r1, r0);
+       else
+           T2_STRH(r2, r1, r0);
+    }
+    else
+       STRH(r2, r1, r0);
+}
+
+static void
+_stxi_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (jit_thumb_p()) {
+       if ((r0|r1) < 8 && i0 >= 0 && !(i0 & 1) && (i0 >> 1) < 0x20)
+           T1_STRHI(r1, r0, i0 >> 1);
+       else if (jit_ldrt_strt_p() && i0 >= 0 && i0 <= 255)
+           T2_STRHI(r1, r0, i0);
+       else if (i0 < 0 && i0 >= -255)
+           T2_STRHIN(r1, r0, -i0);
+       else if (i0 >= 0 && i0 <= 4095)
+           T2_STRHWI(r1, r0, i0);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           if ((r0|r1|rn(reg)) < 8)
+               T1_STRH(r1, r0, rn(reg));
+           else
+               T2_STRH(r1, r0, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+    else {
+       if (i0 >= 0 && i0 <= 255)
+           STRHI(r1, r0, i0);
+       else if (i0 < 0 && i0 >= -255)
+           STRHIN(r1, r0, -i0);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           STRH(r1, r0, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+}
+
+static void
+_str_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (jit_thumb_p())
+       T2_STRI(r1, r0, 0);
+    else
+       STRI(r1, r0, 0);
+}
+
+static void
+_sti_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    if (jit_thumb_p())
+       T2_STRI(r0, rn(reg), 0);
+    else
+       STRI(r0, rn(reg), 0);
+    jit_unget_reg(reg);
+}
+
+static void
+_stxr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (jit_thumb_p()) {
+       if ((r0|r1|r2) < 8)
+           T1_STR(r2, r1, r0);
+       else
+           T2_STR(r2, r1, r0);
+    }
+    else
+       STR(r2, r1, r0);
+}
+
+static void
+_stxi_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (jit_thumb_p()) {
+       if ((r0|r1) < 8 && i0 >= 0 && !(i0 & 3) && (i0 >> 2) < 0x20)
+           T1_STRI(r1, r0, i0 >> 2);
+       else if (r0 == _R13_REGNO && r1 < 8 &&
+                i0 >= 0 && !(i0 & 3) && (i0 >> 2) <= 255)
+           T1_STRISP(r1, i0 >> 2);
+       else if (jit_ldrt_strt_p() && i0 >= 0 && i0 <= 255)
+           T2_STRI(r1, r0, i0);
+       else if (i0 < 0 && i0 >= -255)
+           T2_STRIN(r1, r0, -i0);
+       else if (i0 >= 0 && i0 <= 4095)
+           T2_STRWI(r1, r0, i0);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           if ((r0|r1|rn(reg)) < 8)
+               T1_STR(r1, r0, rn(reg));
+           else
+               T2_STR(r1, r0, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+    else {
+       if (i0 >= 0 && i0 <= 4095)
+           STRI(r1, r0, i0);
+       else if (i0 < 0 && i0 >= -4095)
+           STRIN(r1, r0, -i0);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           STR(r1, r0, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+}
+
+#  if __BYTE_ORDER == __LITTLE_ENDIAN
+static void
+_htonr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                t0;
+    if (jit_thumb_p()) {
+       if ((r0|r1) < 8)
+           T1_REV(r0, r1);
+       else
+           T2_REV(r0, r1);
+       rshi_u(r0, r0, 16);
+    }
+    else {
+       if (jit_armv6_p()) {
+           REV(r0, r1);
+           rshi_u(r0, r0, 16);
+       }
+       else {
+           t0 = jit_get_reg(jit_class_gpr);
+           rshi(rn(t0), r1, 8);
+           andi(r0, r1, 0xff);
+           andi(rn(t0), rn(t0), 0xff);
+           lshi(r0, r0, 8);
+           orr(r0, r0, rn(t0));
+           jit_unget_reg(t0);
+       }
+    }
+}
+
+/* inline glibc htonl (without register clobber) */
+static void
+_htonr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (jit_thumb_p()) {
+       if ((r0|r1) < 8)
+           T1_REV(r0, r1);
+       else
+           T2_REV(r0, r1);
+    }
+    else {
+       if (jit_armv6_p())
+           REV(r0, r1);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           EOR_SI(rn(reg), r1, r1, ARM_ROR, 16);
+           LSRI(rn(reg), rn(reg), 8);
+           BICI(rn(reg), rn(reg), encode_arm_immediate(0xff00));
+           EOR_SI(r0, rn(reg), r1, ARM_ROR, 8);
+           jit_unget_reg(reg);
+       }
+    }
+}
+#endif
+
+static void
+_extr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (jit_thumb_p()) {
+       if ((r0|r1) < 8)
+           T1_SXTB(r0, r1);
+       else
+           T2_SXTB(r0, r1);
+    }
+    else {
+       if (jit_armv6_p())
+           SXTB(r0, r1);
+       else {
+           LSLI(r0, r1, 24);
+           ASRI(r0, r0, 24);
+       }
+    }
+}
+
+static void
+_extr_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (jit_thumb_p()) {
+       if ((r0|r1) < 8)
+           T1_UXTB(r0, r1);
+       else
+           T2_UXTB(r0, r1);
+    }
+    else {
+       if (jit_armv6_p())
+           UXTB(r0, r1);
+       else
+           ANDI(r0, r1, 0xff);
+    }
+}
+
+static void
+_extr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (jit_thumb_p()) {
+       if ((r0|r1) < 8)
+           T1_SXTH(r0, r1);
+       else
+           T2_SXTH(r0, r1);
+    }
+    else {
+       if (jit_armv6_p())
+           SXTH(r0, r1);
+       else {
+           LSLI(r0, r1, 16);
+           ASRI(r0, r0, 16);
+       }
+    }
+}
+
+static void
+_extr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (jit_thumb_p()) {
+       if ((r0|r1) < 8)
+           T1_UXTH(r0, r1);
+       else
+           T2_UXTH(r0, r1);
+    }
+    else {
+       if (jit_armv6_p())
+           UXTH(r0, r1);
+       else {
+           LSLI(r0, r1, 16);
+           LSRI(r0, r0, 16);
+       }
+    }
+}
+
+static void
+_callr(jit_state_t *_jit, jit_int32_t r0)
+{
+    if (jit_thumb_p())
+       T1_BLX(r0);
+    else
+       BLX(r0);
+}
+
+static void
+_calli(jit_state_t *_jit, jit_word_t i0)
+{
+    jit_word_t         d;
+    jit_int32_t                reg;
+    d = ((i0 - _jit->pc.w) >> 2) - 2;
+    if (!jit_exchange_p() && !jit_thumb_p() && _s24P(d))
+       BLI(d & 0x00ffffff);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       if (jit_thumb_p())
+           T1_BLX(rn(reg));
+       else
+           BLX(rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static jit_word_t
+_calli_p(jit_state_t *_jit, jit_word_t i0)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    w = _jit->pc.w;
+    movi_p(rn(reg), i0);
+    if (jit_thumb_p())
+       T1_BLX(rn(reg));
+    else
+       BLX(rn(reg));
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static void
+_prolog(jit_state_t *_jit, jit_node_t *node)
+{
+    jit_int32_t                reg;
+    if (_jitc->function->define_frame || _jitc->function->assume_frame) {
+       jit_int32_t     frame = -_jitc->function->frame;
+       assert(_jitc->function->self.aoff >= frame);
+       if (_jitc->function->assume_frame) {
+           if (jit_thumb_p() && !_jitc->thumb)
+               _jitc->thumb = _jit->pc.w;
+           return;
+       }
+       _jitc->function->self.aoff = frame;
+    }
+    if (_jitc->function->allocar)
+       _jitc->function->self.aoff &= -8;
+    _jitc->function->stack = ((_jitc->function->self.alen -
+                             /* align stack at 8 bytes */
+                             _jitc->function->self.aoff) + 7) & -8;
+
+    if (jit_thumb_p()) {
+       /*  switch to thumb mode (better approach would be to
+        * ORR 1 address being called, but no clear distinction
+        * of what is a pointer to a jit function, or if patching
+        * a pointer to a jit function) */
+       ADDI(_R12_REGNO, _R15_REGNO, 1);
+       BX(_R12_REGNO);
+       if (!_jitc->thumb)
+           _jitc->thumb = _jit->pc.w;
+       if (jit_cpu.abi) {
+           T2_PUSH(0xf);
+           T2_PUSH(0x3f0|(1<<_FP_REGNO)|(1<<_LR_REGNO));
+           VPUSH_F64(_D8_REGNO, 8);
+       }
+       else {
+           T2_PUSH(0xf);
+           T2_PUSH(0x3f0|(1<<_FP_REGNO)|(1<<_LR_REGNO));
+       }
+    }
+    else {
+       if (jit_cpu.abi) {
+           PUSH(0xf);
+           PUSH(0x3f0|(1<<_FP_REGNO)|(1<<_LR_REGNO));
+           VPUSH_F64(_D8_REGNO, 8);
+       }
+       else {
+           PUSH(0xf);
+           PUSH(0x3f0|(1<<_FP_REGNO)|(1<<_LR_REGNO));
+       }
+    }
+    movr(_FP_REGNO, _SP_REGNO);
+    if (_jitc->function->stack)
+       subi(_SP_REGNO, _SP_REGNO, _jitc->function->stack);
+    if (_jitc->function->allocar) {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), _jitc->function->self.aoff);
+       stxi_i(_jitc->function->aoffoff, _FP_REGNO, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_epilog(jit_state_t *_jit, jit_node_t *node)
+{
+    if (_jitc->function->assume_frame)
+       return;
+
+    movr(_SP_REGNO, _FP_REGNO);
+    if (jit_cpu.abi)
+       VPOP_F64(_D8_REGNO, 8);
+    if (jit_thumb_p())
+       T2_POP(0x3f0|(1<<_FP_REGNO)|(1<<_LR_REGNO));
+    else
+       POP(0x3f0|(1<<_FP_REGNO)|(1<<_LR_REGNO));
+    addi(_SP_REGNO, _SP_REGNO, 16);
+    if (jit_thumb_p())
+       T1_BX(_LR_REGNO);
+    else
+       BX(_LR_REGNO);
+    if (jit_thumb_p() && (_jit->pc.w & 2))
+       T1_NOP();
+}
+
+static void
+_vastart(jit_state_t *_jit, jit_int32_t r0)
+{
+    assert(_jitc->function->self.call & jit_call_varargs);
+
+    /* Initialize stack pointer to the first stack argument.
+     * The -16 is to account for the 4 argument registers
+     * always saved, and _jitc->function->vagp is to account
+     * for declared arguments. */
+    addi(r0, _FP_REGNO, _jitc->function->self.size -
+        16 + _jitc->function->vagp);
+}
+
+static void
+_vaarg(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    assert(_jitc->function->self.call & jit_call_varargs);
+
+    /* Load argument. */
+    ldr(r0, r1);
+
+    /* Update stack pointer. */
+    addi(r1, r1, sizeof(jit_word_t));
+}
+
+static void
+_patch_at(jit_state_t *_jit,
+         jit_int32_t kind, jit_word_t instr, jit_word_t label)
+{
+    jit_word_t          d;
+    jit_thumb_t                 thumb;
+    union {
+       jit_int16_t     *s;
+       jit_int32_t     *i;
+       jit_word_t       w;
+    } u;
+    u.w = instr;
+    if (kind == arm_patch_jump) {
+       if (jit_thumb_p() && (jit_uword_t)instr >= _jitc->thumb) {
+           code2thumb(thumb.s[0], thumb.s[1], u.s[0], u.s[1]);
+           if ((thumb.i & THUMB2_B) == THUMB2_B) {
+               d = ((label - instr) >> 1) - 2;
+               assert(_s24P(d));
+               thumb.i = THUMB2_B | encode_thumb_jump(d);
+               thumb2code(thumb.s[0], thumb.s[1], u.s[0], u.s[1]);
+           }
+           else if ((thumb.i & THUMB2_B) == THUMB2_CC_B) {
+               d = ((label - instr) >> 1) - 2;
+               assert(_s20P(d));
+               thumb.i = THUMB2_CC_B | (thumb.i & 0x3c00000) |
+                         encode_thumb_cc_jump(d);
+               thumb2code(thumb.s[0], thumb.s[1], u.s[0], u.s[1]);
+           }
+           else {
+               /* for the sake of simplicity in case choose to
+                * movw+movt+[bx|blx], e.g. if changing to instead
+                * of asserting target is reachable, load constant
+                * and do indirect jump if not reachable */
+               if ((thumb.i & 0xfbf00000) == THUMB2_MOVWI)
+                   goto indirect_jump;
+               assert(!"handled branch opcode");
+           }
+       }
+       else {
+           thumb.i = u.i[0];
+           /* 0x0e000000 because 0x01000000 is (branch&) link modifier */
+           assert((thumb.i & 0x0e000000) == ARM_B);
+           d = ((label - instr) >> 2) - 2;
+           assert(_s24P(d));
+           u.i[0] = (thumb.i & 0xff000000) | (d & 0x00ffffff);
+       }
+    }
+    else if (kind == arm_patch_load) {
+       /* offset may be negative for a forward patch because it
+        * is relative to pc + 8, for example:
+        *          ldr r0, [pc, #-4]
+        *          bx r0               ;; [pc, #-8]
+        *          .data ...           ;; [pc, #-4]
+        *          ...                 ;; [pc]
+        */
+       assert(!jit_thumb_p());
+       thumb.i = u.i[0];
+       assert((thumb.i & 0x0f700000) == ARM_LDRI);
+       d = label - (instr + 8);
+       if (d < 0) {
+           thumb.i &= ~ARM_P;
+           d = -d;
+       }
+       else
+           thumb.i |= ARM_P;
+       assert(!(d & 0xfffff000));
+       u.i[0] = (thumb.i & 0xfffff000) | d;
+    }
+    else if (kind == arm_patch_word) {
+       if (jit_thumb_p()) {
+           code2thumb(thumb.s[0], thumb.s[1], u.s[0], u.s[1]);
+           assert((thumb.i & 0xfbf00000) == THUMB2_MOVWI);
+       indirect_jump:
+           thumb.i = ((thumb.i & 0xfbf00f00) |
+                      ( (label & 0x0000f000) <<  4) |
+                      ( (label & 0x00000800) << 15) |
+                      ( (label & 0x00000700) <<  4) |
+                      (  label & 0x000000ff));
+           thumb2code(thumb.s[0], thumb.s[1], u.s[0], u.s[1]);
+           label >>= 16;
+           code2thumb(thumb.s[0], thumb.s[1], u.s[2], u.s[3]);
+           assert((thumb.i & 0xfbf00000) == THUMB2_MOVTI);
+           thumb.i = ((thumb.i & 0xfbf00f00) |
+                      ( (label & 0x0000f000) <<  4) |
+                      ( (label & 0x00000800) << 15) |
+                      ( (label & 0x00000700) <<  4) |
+                      (  label & 0x000000ff));
+           thumb2code(thumb.s[0], thumb.s[1], u.s[2], u.s[3]);
+       }
+       else
+           u.i[0] = label;
+    }
+    else
+       assert(!"handled patch");
+}
+#endif
diff --git a/deps/lightning/lib/jit_arm-swf.c b/deps/lightning/lib/jit_arm-swf.c
new file mode 100644 (file)
index 0000000..bf86ca1
--- /dev/null
@@ -0,0 +1,2640 @@
+/*
+ * Copyright (C) 2012-2019  Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+#if PROTO
+/* match vfpv3 result */
+#define NAN_TO_INT_IS_ZERO             1
+extern float   sqrtf(float);
+extern double  sqrt(double);
+extern float   __addsf3(float, float);
+extern double  __adddf3(double, double);
+extern float   __aeabi_fsub(float, float);
+extern double  __aeabi_dsub(double, double);
+extern float   __aeabi_fmul(float, float);
+extern double  __aeabi_dmul(double, double);
+extern float   __aeabi_fdiv(float, float);
+extern double  __aeabi_ddiv(double, double);
+extern float   __aeabi_i2f(int);
+extern double  __aeabi_i2d(int);
+extern float   __aeabi_d2f(double);
+extern double  __aeabi_f2d(float);
+extern int     __aeabi_f2iz(float);
+extern int     __aeabi_d2iz(double);
+extern int     __aeabi_fcmplt(float, float);
+extern int     __aeabi_dcmplt(double, double);
+extern int     __aeabi_fcmple(float, float);
+extern int     __aeabi_dcmple(double, double);
+extern int     __aeabi_fcmpeq(float, float);
+extern int     __aeabi_dcmpeq(double, double);
+extern int     __aeabi_fcmpge(float, float);
+extern int     __aeabi_dcmpge(double, double);
+extern int     __aeabi_fcmpgt(float, float);
+extern int     __aeabi_dcmpgt(double, double);
+extern int     __aeabi_fcmpun(float, float);
+extern int     __aeabi_dcmpun(double, double);
+#  define swf_ff(i0,r0,r1)             _swf_ff(_jit,i0,r0,r1)
+static void
+_swf_ff(jit_state_t*,float(*)(float),jit_int32_t,jit_int32_t) maybe_unused;
+#  define swf_dd(i0,r0,r1)             _swf_dd(_jit,i0,r0,r1)
+static void
+_swf_dd(jit_state_t*,double(*)(double),jit_int32_t,jit_int32_t) maybe_unused;
+#  define swf_fff(i0,r0,r1,r2)         _swf_fff(_jit,i0,r0,r1,r2)
+static void _swf_fff(jit_state_t*,float(*)(float,float),
+                    jit_int32_t,jit_int32_t,jit_int32_t);
+#  define swf_ddd(i0,r0,r1,r2)         _swf_ddd(_jit,i0,r0,r1,r2)
+static void _swf_ddd(jit_state_t*,double(*)(double,double),
+                    jit_int32_t,jit_int32_t,jit_int32_t);
+#  define swf_fff_(i0,r0,r1,i1)                _swf_fff_(_jit,i0,r0,r1,i1)
+static void _swf_fff_(jit_state_t*,float(*)(float,float),
+                     jit_int32_t,jit_int32_t,jit_float32_t);
+#  define swf_ddd_(i0,r0,r1,i1)                _swf_ddd_(_jit,i0,r0,r1,i1)
+static void _swf_ddd_(jit_state_t*,double(*)(double,double),
+                     jit_int32_t,jit_int32_t,jit_float64_t);
+#  define swf_iff(i0,r0,r1,r2)         _swf_iff(_jit,i0,r0,r1,r2)
+static void _swf_iff(jit_state_t*,int(*)(float,float),
+                    jit_int32_t,jit_int32_t,jit_int32_t);
+#  define swf_idd(i0,r0,r1,r2)         _swf_idd(_jit,i0,r0,r1,r2)
+static void _swf_idd(jit_state_t*,int(*)(double,double),
+                    jit_int32_t,jit_int32_t,jit_int32_t);
+#  define swf_iff_(i0,r0,r1,r2)                _swf_iff_(_jit,i0,r0,r1,r2)
+static void _swf_iff_(jit_state_t*,int(*)(float,float),
+                     jit_int32_t,jit_int32_t,jit_float32_t);
+#  define swf_idd_(i0,r0,r1,r2)                _swf_idd_(_jit,i0,r0,r1,r2)
+static void _swf_idd_(jit_state_t*,int(*)(double,double),
+                     jit_int32_t,jit_int32_t,jit_float64_t);
+#  define swf_iunff(i0,r0,r1,r2)       _swf_iunff(_jit,i0,r0,r1,r2)
+static void _swf_iunff(jit_state_t*,int(*)(float,float),
+                      jit_int32_t,jit_int32_t,jit_int32_t);
+#  define swf_iundd(i0,r0,r1,r2)       _swf_iundd(_jit,i0,r0,r1,r2)
+static void _swf_iundd(jit_state_t*,int(*)(double,double),
+                      jit_int32_t,jit_int32_t,jit_int32_t);
+#  define swf_iunff_(i0,r0,r1,i1)      _swf_iunff_(_jit,i0,r0,r1,i1)
+static void _swf_iunff_(jit_state_t*,int(*)(float,float),
+                       jit_int32_t,jit_int32_t,jit_float32_t);
+#  define swf_iundd_(i0,r0,r1,i1)      _swf_iundd_(_jit,i0,r0,r1,i1)
+static void _swf_iundd_(jit_state_t*,int(*)(double,double),
+                       jit_int32_t,jit_int32_t,jit_float64_t);
+#  define swf_bff(i0,cc,i1,r0,r1)      _swf_bff(_jit,i0,cc,i1,r0,r1)
+static jit_word_t _swf_bff(jit_state_t*,int(*)(float,float),int,
+                          jit_word_t,jit_int32_t,jit_int32_t);
+#  define swf_bdd(i0,cc,i1,r0,r1)      _swf_bdd(_jit,i0,cc,i1,r0,r1)
+static jit_word_t _swf_bdd(jit_state_t*,int(*)(double,double),int,
+                          jit_word_t,jit_int32_t,jit_int32_t);
+#  define swf_bff_(i0,cc,i1,r0,i2)     _swf_bff_(_jit,i0,cc,i1,r0,i2)
+static jit_word_t _swf_bff_(jit_state_t*,int(*)(float,float),int,
+                           jit_word_t,jit_int32_t,jit_float32_t);
+#  define swf_bdd_(i0,cc,i1,r0,i2)     _swf_bdd_(_jit,i0,cc,i1,r0,i2)
+static jit_word_t _swf_bdd_(jit_state_t*,int(*)(double,double),int,
+                           jit_word_t,jit_int32_t,jit_float64_t);
+#  define swf_bunff(eq,i0,r0,r1)       _swf_bunff(_jit,eq,i0,r0,r1)
+static jit_word_t _swf_bunff(jit_state_t*,int,
+                            jit_word_t,jit_int32_t,jit_int32_t);
+#  define swf_bundd(eq,i0,r0,r1)       _swf_bundd(_jit,eq,i0,r0,r1)
+static jit_word_t _swf_bundd(jit_state_t*,int,
+                            jit_word_t,jit_int32_t,jit_int32_t);
+#  define swf_bunff_(eq,i0,r0,i1)      _swf_bunff_(_jit,eq,i0,r0,i1)
+static jit_word_t _swf_bunff_(jit_state_t*,int,
+                             jit_word_t,jit_int32_t,jit_float32_t);
+#  define swf_bundd_(eq,i0,r0,i1)      _swf_bundd_(_jit,eq,i0,r0,i1)
+static jit_word_t _swf_bundd_(jit_state_t*,int,
+                             jit_word_t,jit_int32_t,jit_float64_t);
+#  define swf_extr_f(r0,r1)            _swf_extr_f(_jit,r0,r1)
+static void _swf_extr_f(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define swf_extr_d(r0,r1)            _swf_extr_d(_jit,r0,r1)
+static void _swf_extr_d(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define swf_extr_d_f(r0,r1)          _swf_extr_d_f(_jit,r0,r1)
+static void _swf_extr_d_f(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define swf_extr_f_d(r0,r1)          _swf_extr_f_d(_jit,r0,r1)
+static void _swf_extr_f_d(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define swf_truncr_f_i(r0,r1)                _swf_truncr_f_i(_jit,r0,r1)
+static void _swf_truncr_f_i(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define swf_truncr_d_i(r0,r1)                _swf_truncr_d_i(_jit,r0,r1)
+static void _swf_truncr_d_i(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define swf_movr_f(r0,r1)            _swf_movr_f(_jit,r0,r1)
+static void _swf_movr_f(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define swf_movr_d(r0,r1)            _swf_movr_d(_jit,r0,r1)
+static void _swf_movr_d(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define swf_movi_f(r0,i0)            _swf_movi_f(_jit,r0,i0)
+static void _swf_movi_f(jit_state_t*,jit_int32_t,jit_float32_t);
+#  define swf_movi_d(r0,i0)            _swf_movi_d(_jit,r0,i0)
+static void _swf_movi_d(jit_state_t*,jit_int32_t,jit_float64_t);
+#  define swf_absr_f(r0,r1)            _swf_absr_f(_jit,r0,r1)
+static void _swf_absr_f(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define swf_absr_d(r0,r1)            _swf_absr_d(_jit,r0,r1)
+static void _swf_absr_d(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define swf_negr_f(r0,r1)            _swf_negr_f(_jit,r0,r1)
+static void _swf_negr_f(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define swf_negr_d(r0,r1)            _swf_negr_d(_jit,r0,r1)
+static void _swf_negr_d(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define swf_sqrtr_f(r0,r1)           swf_ff(sqrtf,r0,r1)
+#  define swf_sqrtr_d(r0,r1)           swf_dd(sqrt,r0,r1)
+#  define swf_addr_f(r0,r1,r2)         swf_fff(__addsf3,r0,r1,r2)
+#  define swf_addi_f(r0,r1,i0)         swf_fff_(__addsf3,r0,r1,i0)
+#  define swf_addr_d(r0,r1,r2)         swf_ddd(__adddf3,r0,r1,r2)
+#  define swf_addi_d(r0,r1,i0)         swf_ddd_(__adddf3,r0,r1,i0)
+#  define swf_subr_f(r0,r1,r2)         swf_fff(__aeabi_fsub,r0,r1,r2)
+#  define swf_subi_f(r0,r1,i0)         swf_fff_(__aeabi_fsub,r0,r1,i0)
+#  define swf_subr_d(r0,r1,r2)         swf_ddd(__aeabi_dsub,r0,r1,r2)
+#  define swf_subi_d(r0,r1,i0)         swf_ddd_(__aeabi_dsub,r0,r1,i0)
+#  define swf_rsbr_f(r0, r1, r2)       swf_subr_f(r0, r2, r1)
+#  define swf_rsbi_f(r0, r1, i0)       _swf_rsbi_f(_jit, r0, r1, i0)
+static void _swf_rsbi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
+#  define swf_rsbr_d(r0, r1, r2)       swf_subr_d(r0, r2, r1)
+#  define swf_rsbi_d(r0, r1, i0)       _swf_rsbi_d(_jit, r0, r1, i0)
+static void _swf_rsbi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
+#  define swf_mulr_f(r0,r1,r2)         swf_fff(__aeabi_fmul,r0,r1,r2)
+#  define swf_muli_f(r0,r1,i0)         swf_fff_(__aeabi_fmul,r0,r1,i0)
+#  define swf_mulr_d(r0,r1,r2)         swf_ddd(__aeabi_dmul,r0,r1,r2)
+#  define swf_muli_d(r0,r1,i0)         swf_ddd_(__aeabi_dmul,r0,r1,i0)
+#  define swf_divr_f(r0,r1,r2)         swf_fff(__aeabi_fdiv,r0,r1,r2)
+#  define swf_divi_f(r0,r1,i0)         swf_fff_(__aeabi_fdiv,r0,r1,i0)
+#  define swf_divr_d(r0,r1,r2)         swf_ddd(__aeabi_ddiv,r0,r1,r2)
+#  define swf_divi_d(r0,r1,i0)         swf_ddd_(__aeabi_ddiv,r0,r1,i0)
+#  define swf_ltr_f(r0,r1,r2)          swf_iff(__aeabi_fcmplt,r0,r1,r2)
+#  define swf_lti_f(r0,r1,i0)          swf_iff_(__aeabi_fcmplt,r0,r1,i0)
+#  define swf_ltr_d(r0,r1,r2)          swf_idd(__aeabi_dcmplt,r0,r1,r2)
+#  define swf_lti_d(r0,r1,i0)          swf_idd_(__aeabi_dcmplt,r0,r1,i0)
+#  define swf_ler_f(r0,r1,r2)          swf_iff(__aeabi_fcmple,r0,r1,r2)
+#  define swf_lei_f(r0,r1,i0)          swf_iff_(__aeabi_fcmple,r0,r1,i0)
+#  define swf_ler_d(r0,r1,r2)          swf_idd(__aeabi_dcmple,r0,r1,r2)
+#  define swf_lei_d(r0,r1,i0)          swf_idd_(__aeabi_dcmple,r0,r1,i0)
+#  define swf_eqr_f(r0,r1,r2)          swf_iff(__aeabi_fcmpeq,r0,r1,r2)
+#  define swf_eqi_f(r0,r1,i0)          swf_iff_(__aeabi_fcmpeq,r0,r1,i0)
+#  define swf_eqr_d(r0,r1,r2)          swf_idd(__aeabi_dcmpeq,r0,r1,r2)
+#  define swf_eqi_d(r0,r1,i0)          swf_idd_(__aeabi_dcmpeq,r0,r1,i0)
+#  define swf_ger_f(r0,r1,r2)          swf_iff(__aeabi_fcmpge,r0,r1,r2)
+#  define swf_gei_f(r0,r1,i0)          swf_iff_(__aeabi_fcmpge,r0,r1,i0)
+#  define swf_ger_d(r0,r1,r2)          swf_idd(__aeabi_dcmpge,r0,r1,r2)
+#  define swf_gei_d(r0,r1,i0)          swf_idd_(__aeabi_dcmpge,r0,r1,i0)
+#  define swf_gtr_f(r0,r1,r2)          swf_iff(__aeabi_fcmpgt,r0,r1,r2)
+#  define swf_gti_f(r0,r1,i0)          swf_iff_(__aeabi_fcmpgt,r0,r1,i0)
+#  define swf_gtr_d(r0,r1,r2)          swf_idd(__aeabi_dcmpgt,r0,r1,r2)
+#  define swf_gti_d(r0,r1,i0)          swf_idd_(__aeabi_dcmpgt,r0,r1,i0)
+#  define swf_ner_f(r0,r1,r2)          _swf_ner_f(_jit,r0,r1,r2)
+static void _swf_ner_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define swf_nei_f(r0,r1,i0)          _swf_nei_f(_jit,r0,r1,i0)
+static void _swf_nei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
+#  define swf_ner_d(r0,r1,r2)          _swf_ner_d(_jit,r0,r1,r2)
+static void _swf_ner_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define swf_nei_d(r0,r1,i0)          _swf_nei_d(_jit,r0,r1,i0)
+static void _swf_nei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
+#  define swf_unltr_f(r0,r1,r2)                swf_iunff(__aeabi_fcmplt,r0,r1,r2)
+#  define swf_unlti_f(r0,r1,i0)                swf_iunff_(__aeabi_fcmplt,r0,r1,i0)
+#  define swf_unltr_d(r0,r1,r2)                swf_iundd(__aeabi_dcmplt,r0,r1,r2)
+#  define swf_unlti_d(r0,r1,i0)                swf_iundd_(__aeabi_dcmplt,r0,r1,i0)
+#  define swf_unler_f(r0,r1,r2)                swf_iunff(__aeabi_fcmple,r0,r1,r2)
+#  define swf_unlei_f(r0,r1,i0)                swf_iunff_(__aeabi_fcmple,r0,r1,i0)
+#  define swf_unler_d(r0,r1,r2)                swf_iundd(__aeabi_dcmple,r0,r1,r2)
+#  define swf_unlei_d(r0,r1,i0)                swf_iundd_(__aeabi_dcmple,r0,r1,i0)
+#  define swf_uneqr_f(r0,r1,r2)                swf_iunff(__aeabi_fcmpeq,r0,r1,r2)
+#  define swf_uneqi_f(r0,r1,i0)                swf_iunff_(__aeabi_fcmpeq,r0,r1,i0)
+#  define swf_uneqr_d(r0,r1,r2)                swf_iundd(__aeabi_dcmpeq,r0,r1,r2)
+#  define swf_uneqi_d(r0,r1,i0)                swf_iundd_(__aeabi_dcmpeq,r0,r1,i0)
+#  define swf_unger_f(r0,r1,r2)                swf_iunff(__aeabi_fcmpge,r0,r1,r2)
+#  define swf_ungei_f(r0,r1,i0)                swf_iunff_(__aeabi_fcmpge,r0,r1,i0)
+#  define swf_unger_d(r0,r1,r2)                swf_iundd(__aeabi_dcmpge,r0,r1,r2)
+#  define swf_ungei_d(r0,r1,i0)                swf_iundd_(__aeabi_dcmpge,r0,r1,i0)
+#  define swf_ungtr_f(r0,r1,r2)                swf_iunff(__aeabi_fcmpgt,r0,r1,r2)
+#  define swf_ungti_f(r0,r1,i0)                swf_iunff_(__aeabi_fcmpgt,r0,r1,i0)
+#  define swf_ungtr_d(r0,r1,r2)                swf_iundd(__aeabi_dcmpgt,r0,r1,r2)
+#  define swf_ungti_d(r0,r1,i0)                swf_iundd_(__aeabi_dcmpgt,r0,r1,i0)
+#  define swf_ltgtr_f(r0,r1,r2)                _swf_ltgtr_f(_jit,r0,r1,r2)
+static void _swf_ltgtr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define swf_ltgti_f(r0,r1,i0)                _swf_ltgti_f(_jit,r0,r1,i0)
+static void _swf_ltgti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
+#  define swf_ltgtr_d(r0,r1,r2)                _swf_ltgtr_d(_jit,r0,r1,r2)
+static void _swf_ltgtr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define swf_ltgti_d(r0,r1,i0)                _swf_ltgti_d(_jit,r0,r1,i0)
+static void _swf_ltgti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
+#  define swf_ordr_f(r0,r1,r2)         _swf_ordr_f(_jit,r0,r1,r2)
+static void _swf_ordr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define swf_ordi_f(r0,r1,i0)         _swf_ordi_f(_jit,r0,r1,i0)
+static void _swf_ordi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
+#  define swf_ordr_d(r0,r1,r2)         _swf_ordr_d(_jit,r0,r1,r2)
+static void _swf_ordr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define swf_ordi_d(r0,r1,i0)         _swf_ordi_d(_jit,r0,r1,i0)
+static void _swf_ordi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
+#  define swf_unordr_f(r0,r1,r2)       swf_iunff(__aeabi_fcmpun,r0,r1,r2)
+#  define swf_unordi_f(r0,r1,i0)       swf_iunff_(__aeabi_fcmpun,r0,r1,i0)
+#  define swf_unordr_d(r0,r1,r2)       swf_iundd(__aeabi_dcmpun,r0,r1,r2)
+#  define swf_unordi_d(r0,r1,i0)       swf_iundd_(__aeabi_dcmpun,r0,r1,i0)
+#  define swf_bltr_f(i0,r0,r1)         swf_bff(__aeabi_fcmplt,ARM_CC_NE,i0,r0,r1)
+#  define swf_blti_f(i0,r0,i1)         swf_bff_(__aeabi_fcmplt,ARM_CC_NE,i0,r0,i1)
+#  define swf_bltr_d(i0,r0,r1)         swf_bdd(__aeabi_dcmplt,ARM_CC_NE,i0,r0,r1)
+#  define swf_blti_d(i0,r0,i1)         swf_bdd_(__aeabi_dcmplt,ARM_CC_NE,i0,r0,i1)
+#  define swf_bler_f(i0,r0,r1)         swf_bff(__aeabi_fcmple,ARM_CC_NE,i0,r0,r1)
+#  define swf_blei_f(i0,r0,i1)         swf_bff_(__aeabi_fcmple,ARM_CC_NE,i0,r0,i1)
+#  define swf_bler_d(i0,r0,r1)         swf_bdd(__aeabi_dcmple,ARM_CC_NE,i0,r0,r1)
+#  define swf_blei_d(i0,r0,i1)         swf_bdd_(__aeabi_dcmple,ARM_CC_NE,i0,r0,i1)
+#  define swf_beqr_f(i0,r0,r1)         swf_bff(__aeabi_fcmpeq,ARM_CC_NE,i0,r0,r1)
+#  define swf_beqi_f(i0,r0,i1)         swf_bff_(__aeabi_fcmpeq,ARM_CC_NE,i0,r0,i1)
+#  define swf_beqr_d(i0,r0,r1)         swf_bdd(__aeabi_dcmpeq,ARM_CC_NE,i0,r0,r1)
+#  define swf_beqi_d(i0,r0,i1)         swf_bdd_(__aeabi_dcmpeq,ARM_CC_NE,i0,r0,i1)
+#  define swf_bger_f(i0,r0,r1)         swf_bff(__aeabi_fcmpge,ARM_CC_NE,i0,r0,r1)
+#  define swf_bgei_f(i0,r0,i1)         swf_bff_(__aeabi_fcmpge,ARM_CC_NE,i0,r0,i1)
+#  define swf_bger_d(i0,r0,r1)         swf_bdd(__aeabi_dcmpge,ARM_CC_NE,i0,r0,r1)
+#  define swf_bgei_d(i0,r0,i1)         swf_bdd_(__aeabi_dcmpge,ARM_CC_NE,i0,r0,i1)
+#  define swf_bgtr_f(i0,r0,r1)         swf_bff(__aeabi_fcmpgt,ARM_CC_NE,i0,r0,r1)
+#  define swf_bgti_f(i0,r0,i1)         swf_bff_(__aeabi_fcmpgt,ARM_CC_NE,i0,r0,i1)
+#  define swf_bgtr_d(i0,r0,r1)         swf_bdd(__aeabi_dcmpgt,ARM_CC_NE,i0,r0,r1)
+#  define swf_bgti_d(i0,r0,i1)         swf_bdd_(__aeabi_dcmpgt,ARM_CC_NE,i0,r0,i1)
+#  define swf_bner_f(i0,r0,r1)         swf_bff(__aeabi_fcmpeq,ARM_CC_EQ,i0,r0,r1)
+#  define swf_bnei_f(i0,r0,i1)         swf_bff_(__aeabi_fcmpeq,ARM_CC_EQ,i0,r0,i1)
+#  define swf_bner_d(i0,r0,r1)         swf_bdd(__aeabi_dcmpeq,ARM_CC_EQ,i0,r0,r1)
+#  define swf_bnei_d(i0,r0,i1)         swf_bdd_(__aeabi_dcmpeq,ARM_CC_EQ,i0,r0,i1)
+#  define swf_bunltr_f(i0,r0,r1)       swf_bff(__aeabi_fcmpge,ARM_CC_EQ,i0,r0,r1)
+#  define swf_bunlti_f(i0,r0,i1)       swf_bff_(__aeabi_fcmpge,ARM_CC_EQ,i0,r0,i1)
+#  define swf_bunltr_d(i0,r0,r1)       swf_bdd(__aeabi_dcmpge,ARM_CC_EQ,i0,r0,r1)
+#  define swf_bunlti_d(i0,r0,i1)       swf_bdd_(__aeabi_dcmpge,ARM_CC_EQ,i0,r0,i1)
+#  define swf_bunler_f(i0,r0,r1)       swf_bff(__aeabi_fcmpgt,ARM_CC_EQ,i0,r0,r1)
+#  define swf_bunlei_f(i0,r0,i1)       swf_bff_(__aeabi_fcmpgt,ARM_CC_EQ,i0,r0,i1)
+#  define swf_bunler_d(i0,r0,r1)       swf_bdd(__aeabi_dcmpgt,ARM_CC_EQ,i0,r0,r1)
+#  define swf_bunlei_d(i0,r0,i1)       swf_bdd_(__aeabi_dcmpgt,ARM_CC_EQ,i0,r0,i1)
+#  define swf_buneqr_f(i0,r0,r1)       swf_bunff(1,i0,r0,r1)
+#  define swf_buneqi_f(i0,r0,i1)       swf_bunff_(1,i0,r0,i1)
+#  define swf_buneqr_d(i0,r0,r1)       swf_bundd(1,i0,r0,r1)
+#  define swf_buneqi_d(i0,r0,i1)       swf_bundd_(1,i0,r0,i1)
+#  define swf_bunger_f(i0,r0,r1)       swf_bff(__aeabi_fcmplt,ARM_CC_EQ,i0,r0,r1)
+#  define swf_bungei_f(i0,r0,i1)       swf_bff_(__aeabi_fcmplt,ARM_CC_EQ,i0,r0,i1)
+#  define swf_bunger_d(i0,r0,r1)       swf_bdd(__aeabi_dcmplt,ARM_CC_EQ,i0,r0,r1)
+#  define swf_bungei_d(i0,r0,i1)       swf_bdd_(__aeabi_dcmplt,ARM_CC_EQ,i0,r0,i1)
+#  define swf_bungtr_f(i0,r0,r1)       swf_bff(__aeabi_fcmple,ARM_CC_EQ,i0,r0,r1)
+#  define swf_bungti_f(i0,r0,i1)       swf_bff_(__aeabi_fcmple,ARM_CC_EQ,i0,r0,i1)
+#  define swf_bungtr_d(i0,r0,r1)       swf_bdd(__aeabi_dcmple,ARM_CC_EQ,i0,r0,r1)
+#  define swf_bungti_d(i0,r0,i1)       swf_bdd_(__aeabi_dcmple,ARM_CC_EQ,i0,r0,i1)
+#  define swf_bltgtr_f(i0,r0,r1)       swf_bunff(0,i0,r0,r1)
+#  define swf_bltgti_f(i0,r0,i1)       swf_bunff_(0,i0,r0,i1)
+#  define swf_bltgtr_d(i0,r0,r1)       swf_bundd(0,i0,r0,r1)
+#  define swf_bltgti_d(i0,r0,i1)       swf_bundd_(0,i0,r0,i1)
+#  define swf_bordr_f(i0,r0,r1)                swf_bff(__aeabi_fcmpun,ARM_CC_EQ,i0,r0,r1)
+#  define swf_bordi_f(i0,r0,i1)                swf_bff_(__aeabi_fcmpun,ARM_CC_EQ,i0,r0,i1)
+#  define swf_bordr_d(i0,r0,r1)                swf_bdd(__aeabi_dcmpun,ARM_CC_EQ,i0,r0,r1)
+#  define swf_bordi_d(i0,r0,i1)                swf_bdd_(__aeabi_dcmpun,ARM_CC_EQ,i0,r0,i1)
+#  define swf_bunordr_f(i0,r0,r1)      swf_bff(__aeabi_fcmpun,ARM_CC_NE,i0,r0,r1)
+#  define swf_bunordi_f(i0,r0,i1)      swf_bff_(__aeabi_fcmpun,ARM_CC_NE,i0,r0,i1)
+#  define swf_bunordr_d(i0,r0,r1)      swf_bdd(__aeabi_dcmpun,ARM_CC_NE,i0,r0,r1)
+#  define swf_bunordi_d(i0,r0,i1)      swf_bdd_(__aeabi_dcmpun,ARM_CC_NE,i0,r0,i1)
+#  define swf_ldr_f(r0,r1)             _swf_ldr_f(_jit,r0,r1)
+static void _swf_ldr_f(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define swf_ldr_d(r0,r1)             _swf_ldr_d(_jit,r0,r1)
+static void _swf_ldr_d(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define swf_ldi_f(r0,i0)             _swf_ldi_f(_jit,r0,i0)
+static void _swf_ldi_f(jit_state_t*,jit_int32_t,jit_word_t);
+#  define swf_ldi_d(r0,i0)             _swf_ldi_d(_jit,r0,i0)
+static void _swf_ldi_d(jit_state_t*,jit_int32_t,jit_word_t);
+#  define swf_ldxr_f(r0,r1,r2)         _swf_ldxr_f(_jit,r0,r1,r2)
+static void _swf_ldxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define swf_ldxr_d(r0,r1,r2)         _swf_ldxr_d(_jit,r0,r1,r2)
+static void _swf_ldxr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define swf_ldxi_f(r0,r1,i0)         _swf_ldxi_f(_jit,r0,r1,i0)
+static void _swf_ldxi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define swf_ldxi_d(r0,r1,i0)         _swf_ldxi_d(_jit,r0,r1,i0)
+static void _swf_ldxi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define swf_str_f(r0,r1)             _swf_str_f(_jit,r0,r1)
+static void _swf_str_f(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define swf_str_d(r0,r1)             _swf_str_d(_jit,r0,r1)
+static void _swf_str_d(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define swf_sti_f(r0,i0)             _swf_sti_f(_jit,r0,i0)
+static void _swf_sti_f(jit_state_t*,jit_word_t,jit_int32_t);
+#  define swf_sti_d(r0,i0)             _swf_sti_d(_jit,r0,i0)
+static void _swf_sti_d(jit_state_t*,jit_word_t,jit_int32_t);
+#  define swf_stxr_f(r0,r1,r2)         _swf_stxr_f(_jit,r0,r1,r2)
+static void _swf_stxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define swf_stxr_d(r0,r1,r2)         _swf_stxr_d(_jit,r0,r1,r2)
+static void _swf_stxr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define swf_stxi_f(r0,r1,i0)         _swf_stxi_f(_jit,r0,r1,i0)
+static void _swf_stxi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define swf_stxi_d(r0,r1,i0)         _swf_stxi_d(_jit,r0,r1,i0)
+static void _swf_stxi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define swf_vaarg_d(r0, r1)          _swf_vaarg_d(_jit, r0, r1)
+static void _swf_vaarg_d(jit_state_t*, jit_int32_t, jit_int32_t);
+#endif
+
+#if CODE
+#define swf_off(rn)                    ((rn - 16) << 2)
+
+#define swf_call(function, label, regno)                               \
+    do {                                                               \
+       jit_word_t      d;                                              \
+       if (!jit_exchange_p()) {                                        \
+           if (jit_thumb_p())                                          \
+               d = (((jit_word_t)function - _jit->pc.w) >> 1) - 2;     \
+           else                                                        \
+               d = (((jit_word_t)function - _jit->pc.w) >> 2) - 2;     \
+           if (_s24P(d)) {                                             \
+               if (jit_thumb_p())                                      \
+                   T2_BLI(encode_thumb_jump(d));                       \
+               else                                                    \
+                   BLI(d & 0x00ffffff);                                \
+           }                                                           \
+           else                                                        \
+               goto label;                                             \
+       }                                                               \
+       else {                                                          \
+       label:                                                          \
+           movi(regno, (jit_word_t)function);                          \
+           if (jit_thumb_p())                                          \
+               T1_BLX(regno);                                          \
+           else                                                        \
+               BLX(regno);                                             \
+       }                                                               \
+    } while (0)
+#define swf_call_with_get_reg(function, label)                         \
+    do {                                                               \
+       jit_word_t      d;                                              \
+       jit_int32_t     reg;                                            \
+       if (!jit_exchange_p()) {                                        \
+           if (jit_thumb_p())                                          \
+               d = (((jit_word_t)function - _jit->pc.w) >> 1) - 2;     \
+           else                                                        \
+               d = (((jit_word_t)function - _jit->pc.w) >> 2) - 2;     \
+           if (_s24P(d)) {                                             \
+               if (jit_thumb_p())                                      \
+                   T2_BLI(encode_thumb_jump(d));                       \
+               else                                                    \
+                   BLI(d & 0x00ffffff);                                \
+           }                                                           \
+           else                                                        \
+               goto label;                                             \
+       }                                                               \
+       else {                                                          \
+       label:                                                          \
+           reg = jit_get_reg(jit_class_gpr);                           \
+           movi(rn(reg), (jit_word_t)function);                        \
+           if (jit_thumb_p())                                          \
+               T1_BLX(rn(reg));                                        \
+           else                                                        \
+               BLX(rn(reg));                                           \
+           jit_unget_reg(reg);                                         \
+       }                                                               \
+    } while (0)
+#define swf_ldrin(rt, rn, im)                                          \
+    do {                                                               \
+       if (jit_thumb_p())      T2_LDRIN(rt, rn, im);                   \
+       else                    LDRIN(rt, rn, im);                      \
+    } while (0)
+#define swf_strin(rt, rn, im)                                          \
+    do {                                                               \
+       if (jit_thumb_p())      T2_STRIN(rt, rn, im);                   \
+       else                    STRIN(rt, rn, im);                      \
+    } while (0)
+#define swf_bici(rt, rn, im)                                           \
+    do {                                                               \
+       if (jit_thumb_p())                                              \
+           T2_BICI(rt, rn, encode_thumb_immediate(im));                \
+       else                                                            \
+           BICI(rt, rn, encode_arm_immediate(im));                     \
+    } while (0)
+
+#if !defined(__GNUC__)
+float __addsf3(float u, float v)
+{
+    return (u + v);
+}
+
+double
+__adddf3(double u, double v)
+{
+    return (u + v);
+}
+
+float
+__aeabi_fsub(float u, float v)
+{
+    return (u - v);
+}
+
+double
+__aeabi_dsub(double u, double v)
+{
+    return (u - v);
+}
+
+float
+__aeabi_fmul(float u, float v)
+{
+    return (u * v);
+}
+
+double
+__aeabi_dmul(double u, double v)
+{
+    return (u * v);
+}
+
+float
+__aeabi_fdiv(float u, float v)
+{
+    return (u / v);
+}
+
+double
+__aeabi_ddiv(double u, double v)
+{
+    return (u / v);
+}
+
+float
+__aeabi_i2f(int u)
+{
+    return (u);
+}
+
+double
+__aeabi_i2d(int u)
+{
+    return (u);
+}
+
+float
+__aeabi_d2f(double u)
+{
+    return (u);
+}
+
+double
+__aeabi_f2d(float u)
+{
+    return (u);
+}
+
+extern int
+__aeabi_f2iz(float u)
+{
+    return (u);
+}
+
+int
+__aeabi_d2iz(double u)
+{
+    return (u);
+}
+
+int
+__aeabi_fcmplt(float u, float v)
+{
+    return (u < v);
+}
+
+int
+__aeabi_dcmplt(double u, double v)
+{
+    return (u < v);
+}
+
+int
+__aeabi_fcmple(float u, float v)
+{
+    return (u <= v);
+}
+
+int
+__aeabi_dcmple(double u, double v)
+{
+    return (u <= v);
+}
+
+int
+__aeabi_fcmpeq(float u, float v)
+{
+    return (u == v);
+}
+
+int
+__aeabi_dcmpeq(double u, double v)
+{
+    return (u == v);
+}
+
+int
+__aeabi_fcmpge(float u, float v)
+{
+    return (u >= v);
+}
+
+int
+__aeabi_dcmpge(double u, double v)
+{
+    return (u >= v);
+}
+
+int
+__aeabi_fcmpgt(float u, float v)
+{
+    return (u > v);
+}
+
+int
+__aeabi_dcmpgt(double u, double v)
+{
+    return (u > v);
+}
+
+int
+__aeabi_fcmpun(float u, float v)
+{
+    return ((u != u) || (v != v));
+}
+
+int
+__aeabi_dcmpun(double u, double v)
+{
+    return ((u != u) || (v != v));
+}
+#endif
+
+static void
+_swf_ff(jit_state_t *_jit, float(*i0)(float),
+       jit_int32_t r0, jit_int32_t r1)
+{
+    jit_get_reg_args();
+    if (jit_fpr_p(r1))
+       swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
+    else
+       movr(_R0_REGNO, r1);
+    swf_call(i0, fallback, _R1_REGNO);
+    if (jit_fpr_p(r0))
+       swf_strin(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
+    else
+       movr(r0, _R0_REGNO);
+    jit_unget_reg_args();
+}
+
+static void
+_swf_dd(jit_state_t *_jit, double (*i0)(double),
+       jit_int32_t r0, jit_int32_t r1)
+{
+    jit_get_reg_args();
+    if (jit_fpr_p(r1)) {
+       if (!jit_thumb_p() && jit_armv5e_p())
+           LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
+       else {
+           swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
+           swf_ldrin(_R1_REGNO, _FP_REGNO, swf_off(r1) + 4);
+       }
+    }
+    else {
+       movr(_R0_REGNO, r1);
+       movr(_R1_REGNO, r1 + 1);
+    }
+    swf_call(i0, fallback, _R2_REGNO);
+    if (jit_fpr_p(r0)) {
+       if (!jit_thumb_p() && jit_armv5e_p())
+           STRDIN(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
+       else {
+           swf_strin(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
+           swf_strin(_R1_REGNO, _FP_REGNO, swf_off(r0) + 4);
+       }
+    }
+    else {
+       movr(r0, _R0_REGNO);
+       movr(r0 + 1, _R1_REGNO);
+    }
+    jit_unget_reg_args();
+}
+
+static void
+_swf_fff(jit_state_t *_jit, float (*i0)(float, float),
+        jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_get_reg_args();
+    if (jit_fpr_p(r1))
+       swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
+    else
+       movr(_R0_REGNO, r1);
+    if (jit_fpr_p(r2))
+       swf_ldrin(_R1_REGNO, _FP_REGNO, swf_off(r2) + 8);
+    else
+       movr(_R1_REGNO, r1);
+    swf_call(i0, fallback, _R3_REGNO);
+    if (jit_fpr_p(r0))
+       swf_strin(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
+    else
+       movr(r0, _R0_REGNO);
+    jit_unget_reg_args();
+}
+
+static void
+_swf_ddd(jit_state_t *_jit, double (*i0)(double, double),
+        jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_get_reg_args();
+    if (jit_fpr_p(r1)) {
+       if (!jit_thumb_p() && jit_armv5e_p())
+           LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
+       else {
+           swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
+           swf_ldrin(_R1_REGNO, _FP_REGNO, swf_off(r1) + 4);
+       }
+    }
+    else {
+       movr(_R0_REGNO, r1);
+       movr(_R1_REGNO, r1 + 1);
+    }
+    if (jit_fpr_p(r2)) {
+       if (!jit_thumb_p() && jit_armv5e_p())
+           LDRDIN(_R2_REGNO, _FP_REGNO, swf_off(r2) + 8);
+       else {
+           swf_ldrin(_R2_REGNO, _FP_REGNO, swf_off(r2) + 8);
+           swf_ldrin(_R3_REGNO, _FP_REGNO, swf_off(r2) + 4);
+       }
+    }
+    else {
+       movr(_R2_REGNO, r2);
+       movr(_R3_REGNO, r2 + 1);
+    }
+    swf_call_with_get_reg(i0, fallback);
+    if (jit_fpr_p(r0)) {
+       if (!jit_thumb_p() && jit_armv5e_p())
+           STRDIN(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
+       else {
+           swf_strin(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
+           swf_strin(_R1_REGNO, _FP_REGNO, swf_off(r0) + 4);
+       }
+    }
+    else {
+       movr(r0, _R0_REGNO);
+       movr(r0 + 1, _R1_REGNO);
+    }
+    jit_unget_reg_args();
+}
+
+static void
+_swf_fff_(jit_state_t *_jit, float (*i0)(float, float),
+         jit_int32_t r0, jit_int32_t r1, jit_float32_t i1)
+{
+    union {
+       jit_int32_t     i;
+       jit_float32_t   f;
+    } data;
+    jit_get_reg_args();
+    data.f = i1;
+    if (jit_fpr_p(r1))
+       swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
+    else
+       movr(_R0_REGNO, r1);
+    movi(_R1_REGNO, data.i);
+    swf_call(i0, fallback, _R3_REGNO);
+    if (jit_fpr_p(r0))
+       swf_strin(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
+    else
+       movr(r0, _R0_REGNO);
+    jit_unget_reg_args();
+}
+
+static void
+_swf_rsbi_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_float32_t i0)
+{
+    union {
+       jit_int32_t     i;
+       jit_float32_t   f;
+    } data;
+    jit_get_reg_args();
+    data.f = i0;
+    movi(_R0_REGNO, data.i);
+    if (jit_fpr_p(r1))
+       swf_ldrin(_R1_REGNO, _FP_REGNO, swf_off(r1) + 8);
+    else
+       movr(_R1_REGNO, r1);
+    swf_call(__aeabi_fsub, fallback, _R3_REGNO);
+    if (jit_fpr_p(r0))
+       swf_strin(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
+    else
+       movr(r0, _R0_REGNO);
+    jit_unget_reg_args();
+}
+
+static void
+_swf_ddd_(jit_state_t *_jit, double (*i0)(double, double),
+         jit_int32_t r0, jit_int32_t r1, jit_float64_t i1)
+{
+    union {
+       jit_int32_t     i[2];
+       jit_float64_t   d;
+    } data;
+    jit_get_reg_args();
+
+    data.d = i1;
+    if (jit_fpr_p(r1)) {
+       if (!jit_thumb_p() && jit_armv5e_p())
+           LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
+       else {
+           swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
+           swf_ldrin(_R1_REGNO, _FP_REGNO, swf_off(r1) + 4);
+       }
+    }
+    else {
+       movr(_R0_REGNO, r1);
+       movr(_R1_REGNO, r1 + 1);
+    }
+    movi(_R2_REGNO, data.i[0]);
+    movi(_R3_REGNO, data.i[1]);
+    swf_call_with_get_reg(i0, fallback);
+    if (jit_fpr_p(r0)) {
+       if (!jit_thumb_p() && jit_armv5e_p())
+           STRDIN(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
+       else {
+           swf_strin(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
+           swf_strin(_R1_REGNO, _FP_REGNO, swf_off(r0) + 4);
+       }
+    }
+    else {
+       movr(r0, _R0_REGNO);
+       movr(r0 + 1, _R1_REGNO);
+    }
+    jit_unget_reg_args();
+}
+
+static void
+_swf_rsbi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_float64_t i0)
+{
+    union {
+       jit_int32_t     i[2];
+       jit_float64_t   d;
+    } data;
+    jit_get_reg_args();
+    data.d = i0;
+    movi(_R0_REGNO, data.i[0]);
+    movi(_R1_REGNO, data.i[1]);
+    if (jit_fpr_p(r1)) {
+       if (!jit_thumb_p() && jit_armv5e_p())
+           LDRDIN(_R2_REGNO, _FP_REGNO, swf_off(r1) + 8);
+       else {
+           swf_ldrin(_R2_REGNO, _FP_REGNO, swf_off(r1) + 8);
+           swf_ldrin(_R3_REGNO, _FP_REGNO, swf_off(r1) + 4);
+       }
+    }
+    else {
+       movr(_R2_REGNO, r1);
+       movr(_R3_REGNO, r1 + 1);
+    }
+    swf_call_with_get_reg(__aeabi_dsub, fallback);
+    if (jit_fpr_p(r0)) {
+       if (!jit_thumb_p() && jit_armv5e_p())
+           STRDIN(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
+       else {
+           swf_strin(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
+           swf_strin(_R1_REGNO, _FP_REGNO, swf_off(r0) + 4);
+       }
+    }
+    else {
+       movr(r0, _R0_REGNO);
+       movr(r0 + 1, _R1_REGNO);
+    }
+    jit_unget_reg_args();
+}
+
+static void
+_swf_iff(jit_state_t *_jit, int (*i0)(float, float),
+        jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_get_reg_args();
+    if (jit_fpr_p(r1))
+       swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
+    else
+       movr(_R0_REGNO, r1);
+    if (jit_fpr_p(r2))
+       swf_ldrin(_R1_REGNO, _FP_REGNO, swf_off(r2) + 8);
+    else
+       movr(_R1_REGNO, r2);
+    swf_call(i0, fallback, _R2_REGNO);
+    movr(r0, _R0_REGNO);
+    jit_unget_reg_args();
+}
+
+static void
+_swf_idd(jit_state_t *_jit, int (*i0)(double, double),
+        jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_get_reg_args();
+    if (jit_fpr_p(r1)) {
+       if (!jit_thumb_p() && jit_armv5e_p())
+           LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
+       else {
+           swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
+           swf_ldrin(_R1_REGNO, _FP_REGNO, swf_off(r1) + 4);
+       }
+    }
+    else {
+       movr(_R0_REGNO, r1);
+       movr(_R1_REGNO, r1 + 1);
+    }
+    if (jit_fpr_p(r2)) {
+       if (!jit_thumb_p() && jit_armv5e_p())
+           LDRDIN(_R2_REGNO, _FP_REGNO, swf_off(r2) + 8);
+       else {
+           swf_ldrin(_R2_REGNO, _FP_REGNO, swf_off(r2) + 8);
+           swf_ldrin(_R3_REGNO, _FP_REGNO, swf_off(r2) + 4);
+       }
+    }
+    else {
+       movr(_R2_REGNO, r2);
+       movr(_R3_REGNO, r2 + 1);
+    }
+    swf_call_with_get_reg(i0, fallback);
+    movr(r0, _R0_REGNO);
+    jit_unget_reg_args();
+}
+
+static void
+_swf_iff_(jit_state_t *_jit, int (*i0)(float, float),
+         jit_int32_t r0, jit_int32_t r1, jit_float32_t i1)
+{
+    union {
+       jit_int32_t     i;
+       jit_float32_t   f;
+    } data;
+    jit_get_reg_args();
+    data.f = i1;
+    if (jit_fpr_p(r1))
+       swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
+    else
+       movr(_R0_REGNO, r1);
+    movi(_R1_REGNO, data.i);
+    swf_call(i0, fallback, _R2_REGNO);
+    movr(r0, _R0_REGNO);
+    jit_unget_reg_args();
+}
+
+static void
+_swf_idd_(jit_state_t *_jit, int (*i0)(double, double),
+         jit_int32_t r0, jit_int32_t r1, jit_float64_t i1)
+{
+    union {
+       jit_int32_t     i[2];
+       jit_float64_t   d;
+    } data;
+    jit_get_reg_args();
+    data.d = i1;
+    if (jit_fpr_p(r1)) {
+       if (!jit_thumb_p() && jit_armv5e_p())
+           LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
+       else {
+           swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
+           swf_ldrin(_R1_REGNO, _FP_REGNO, swf_off(r1) + 4);
+       }
+    }
+    else {
+       movr(_R0_REGNO, r1);
+       movr(_R1_REGNO, r1 + 1);
+    }
+    movi(_R2_REGNO, data.i[0]);
+    movi(_R3_REGNO, data.i[1]);
+    swf_call_with_get_reg(i0, fallback);
+    movr(r0, _R0_REGNO);
+    jit_unget_reg_args();
+}
+
+static void
+_swf_iunff(jit_state_t *_jit, int (*i0)(float, float),
+          jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         instr;
+    jit_get_reg_args();
+    if (jit_fpr_p(r1))
+       swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
+    else
+       movr(_R0_REGNO, r1);
+    if (jit_fpr_p(r2))
+       swf_ldrin(_R1_REGNO, _FP_REGNO, swf_off(r2) + 8);
+    else
+       movr(_R1_REGNO, r2);
+    swf_call(__aeabi_fcmpun, fcmpun, _R2_REGNO);
+    if (jit_thumb_p()) {
+       T1_CMPI(_R0_REGNO, 0);
+       IT(ARM_CC_NE);
+       if (r0 < 8)
+           T1_MOVI(r0, 1);
+       else
+           T2_MOVI(r0, 1);
+       instr = _jit->pc.w;
+       T2_CC_B(ARM_CC_NE, 0);
+    }
+    else {
+       CMPI(_R0_REGNO, 0);
+       CC_MOVI(ARM_CC_NE, r0, 1);
+       instr = _jit->pc.w;
+       CC_B(ARM_CC_NE, 0);
+    }
+    if (jit_fpr_p(r1))
+       swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
+    else
+       movr(_R0_REGNO, r1);
+    if (jit_fpr_p(r2))
+       swf_ldrin(_R1_REGNO, _FP_REGNO, swf_off(r2) + 8);
+    else
+       movr(_R1_REGNO, r2);
+    swf_call(i0, fallback, _R2_REGNO);
+    movr(r0, _R0_REGNO);
+    patch_at(arm_patch_jump, instr, _jit->pc.w);
+    jit_unget_reg_args();
+}
+
+static void
+_swf_iundd(jit_state_t *_jit, int (*i0)(double, double),
+          jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         instr;
+    jit_get_reg_args();
+    if (jit_fpr_p(r1)) {
+       if (!jit_thumb_p() && jit_armv5e_p())
+           LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
+       else {
+           swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
+           swf_ldrin(_R1_REGNO, _FP_REGNO, swf_off(r1) + 4);
+       }
+    }
+    else {
+       movr(_R0_REGNO, r1);
+       movr(_R1_REGNO, r1 + 1);
+    }
+    if (jit_fpr_p(r2)) {
+       if (!jit_thumb_p() && jit_armv5e_p())
+           LDRDIN(_R2_REGNO, _FP_REGNO, swf_off(r2) + 8);
+       else {
+           swf_ldrin(_R2_REGNO, _FP_REGNO, swf_off(r2) + 8);
+           swf_ldrin(_R3_REGNO, _FP_REGNO, swf_off(r2) + 4);
+       }
+    }
+    else {
+       movr(_R2_REGNO, r2);
+       movr(_R3_REGNO, r2 + 1);
+    }
+    swf_call_with_get_reg(__aeabi_dcmpun, dcmpun);
+    if (jit_thumb_p()) {
+       T1_CMPI(_R0_REGNO, 0);
+       IT(ARM_CC_NE);
+       if (r0 < 8)
+           T1_MOVI(r0, 1);
+       else
+           T2_MOVI(r0, 1);
+       instr = _jit->pc.w;
+       T2_CC_B(ARM_CC_NE, 0);
+    }
+    else {
+       CMPI(_R0_REGNO, 0);
+       CC_MOVI(ARM_CC_NE, r0, 1);
+       instr = _jit->pc.w;
+       CC_B(ARM_CC_NE, 0);
+    }
+    if (jit_fpr_p(r1)) {
+       if (!jit_thumb_p() && jit_armv5e_p())
+           LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
+       else {
+           swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
+           swf_ldrin(_R1_REGNO, _FP_REGNO, swf_off(r1) + 4);
+       }
+    }
+    else {
+       movr(_R0_REGNO, r1);
+       movr(_R1_REGNO, r1 + 1);
+    }
+    if (jit_fpr_p(r2)) {
+       if (!jit_thumb_p() && jit_armv5e_p())
+           LDRDIN(_R2_REGNO, _FP_REGNO, swf_off(r2) + 8);
+       else {
+           swf_ldrin(_R2_REGNO, _FP_REGNO, swf_off(r2) + 8);
+           swf_ldrin(_R3_REGNO, _FP_REGNO, swf_off(r2) + 4);
+       }
+    }
+    else {
+       movr(_R2_REGNO, r2);
+       movr(_R3_REGNO, r2 + 1);
+    }
+    swf_call_with_get_reg(i0, fallback);
+    movr(r0, _R0_REGNO);
+    patch_at(arm_patch_jump, instr, _jit->pc.w);
+    jit_unget_reg_args();
+}
+
+static void
+_swf_iunff_(jit_state_t *_jit, int (*i0)(float, float),
+           jit_int32_t r0, jit_int32_t r1, jit_float32_t i1)
+{
+    jit_word_t         instr;
+    union {
+       jit_int32_t     i;
+       jit_float32_t   f;
+    } data;
+    jit_get_reg_args();
+    data.f = i1;
+    if (jit_fpr_p(r1))
+       swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
+    else
+       movr(_R0_REGNO, r1);
+    movi(_R1_REGNO, data.i);
+    swf_call(__aeabi_fcmpun, fcmpun, _R2_REGNO);
+    if (jit_thumb_p()) {
+       T1_CMPI(_R0_REGNO, 0);
+       IT(ARM_CC_NE);
+       if (r0 < 8)
+           T1_MOVI(r0, 1);
+       else
+           T2_MOVI(r0, 1);
+       instr = _jit->pc.w;
+       T2_CC_B(ARM_CC_NE, 0);
+    }
+    else {
+       CMPI(_R0_REGNO, 0);
+       CC_MOVI(ARM_CC_NE, r0, 1);
+       instr = _jit->pc.w;
+       CC_B(ARM_CC_NE, 0);
+    }
+    if (jit_fpr_p(r1))
+       swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
+    else
+       movr(_R0_REGNO, r1);
+    movi(_R1_REGNO, data.i);
+    swf_call(i0, fallback, _R2_REGNO);
+    movr(r0, _R0_REGNO);
+    patch_at(arm_patch_jump, instr, _jit->pc.w);
+    jit_unget_reg_args();
+}
+
+static void
+_swf_iundd_(jit_state_t *_jit, int (*i0)(double, double),
+           jit_int32_t r0, jit_int32_t r1, jit_float64_t i1)
+{
+    jit_word_t         instr;
+    union {
+       jit_int32_t     i[2];
+       jit_float64_t   d;
+    } data;
+    jit_get_reg_args();
+    data.d = i1;
+    if (jit_fpr_p(r1)) {
+       if (!jit_thumb_p() && jit_armv5e_p())
+           LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
+       else {
+           swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
+           swf_ldrin(_R1_REGNO, _FP_REGNO, swf_off(r1) + 4);
+       }
+    }
+    else {
+       movr(_R0_REGNO, r1);
+       movr(_R1_REGNO, r1 + 1);
+    }
+    movi(_R2_REGNO, data.i[0]);
+    movi(_R3_REGNO, data.i[1]);
+    swf_call_with_get_reg(__aeabi_dcmpun, dcmpun);
+    if (jit_thumb_p()) {
+       T1_CMPI(_R0_REGNO, 0);
+       IT(ARM_CC_NE);
+       if (r0 < 8)
+           T1_MOVI(r0, 1);
+       else
+           T2_MOVI(r0, 1);
+       instr = _jit->pc.w;
+       T2_CC_B(ARM_CC_NE, 0);
+    }
+    else {
+       CMPI(_R0_REGNO, 0);
+       CC_MOVI(ARM_CC_NE, r0, 1);
+       instr = _jit->pc.w;
+       CC_B(ARM_CC_NE, 0);
+    }
+    if (jit_fpr_p(r1)) {
+       if (!jit_thumb_p() && jit_armv5e_p())
+           LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
+       else {
+           swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
+           swf_ldrin(_R1_REGNO, _FP_REGNO, swf_off(r1) + 4);
+       }
+    }
+    else {
+       movr(_R0_REGNO, r1);
+       movr(_R1_REGNO, r1 + 1);
+    }
+    movi(_R2_REGNO, data.i[0]);
+    movi(_R3_REGNO, data.i[1]);
+    swf_call_with_get_reg(i0, fallback);
+    movr(r0, _R0_REGNO);
+    patch_at(arm_patch_jump, instr, _jit->pc.w);
+    jit_unget_reg_args();
+}
+
+static jit_word_t
+_swf_bff(jit_state_t *_jit, int (*i0)(float, float), int cc,
+        jit_word_t i1, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w, d;
+    jit_get_reg_args();
+    if (jit_fpr_p(r0))
+       swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
+    else
+       movr(_R0_REGNO, r0);
+    if (jit_fpr_p(r1))
+       swf_ldrin(_R1_REGNO, _FP_REGNO, swf_off(r1) + 8);
+    else
+       movr(_R1_REGNO, r1);
+    swf_call(i0, fallback, _R2_REGNO);
+    if (jit_thumb_p()) {
+       T1_CMPI(_R0_REGNO, 0);
+       w = _jit->pc.w;
+       d = ((i1 - w) >> 1) - 2;
+       assert(_s20P(d));
+       T2_CC_B(cc, encode_thumb_cc_jump(d));
+    }
+    else {
+       CMPI(_R0_REGNO, 0);
+       w = _jit->pc.w;
+       d = ((i1 - w) >> 2) - 2;
+       assert(_s24P(d));
+       CC_B(cc, d & 0x00ffffff);
+    }
+    jit_unget_reg_args();
+    return (w);
+}
+
+static jit_word_t
+_swf_bdd(jit_state_t *_jit, int (*i0)(double, double), int cc,
+        jit_word_t i1, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w, d;
+    jit_get_reg_args();
+    if (jit_fpr_p(r0)) {
+       if (!jit_thumb_p() && jit_armv5e_p())
+           LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
+       else {
+           swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
+           swf_ldrin(_R1_REGNO, _FP_REGNO, swf_off(r0) + 4);
+       }
+    }
+    else {
+       movr(_R0_REGNO, r0);
+       movr(_R1_REGNO, r0 + 1);
+    }
+    if (jit_fpr_p(r1)) {
+       if (!jit_thumb_p() && jit_armv5e_p())
+           LDRDIN(_R2_REGNO, _FP_REGNO, swf_off(r1) + 8);
+       else {
+           swf_ldrin(_R2_REGNO, _FP_REGNO, swf_off(r1) + 8);
+           swf_ldrin(_R3_REGNO, _FP_REGNO, swf_off(r1) + 4);
+       }
+    }
+    else {
+       movr(_R2_REGNO, r1);
+       movr(_R3_REGNO, r1 + 1);
+    }
+    swf_call_with_get_reg(i0, fallback);
+    if (jit_thumb_p()) {
+       T1_CMPI(_R0_REGNO, 0);
+       w = _jit->pc.w;
+       d = ((i1 - w) >> 1) - 2;
+       assert(_s20P(d));
+       T2_CC_B(cc, encode_thumb_cc_jump(d));
+    }
+    else {
+       CMPI(_R0_REGNO, 0);
+       w = _jit->pc.w;
+       d = ((i1 - w) >> 2) - 2;
+       assert(_s24P(d));
+       CC_B(cc, d & 0x00ffffff);
+    }
+    jit_unget_reg_args();
+    return (w);
+}
+
+static jit_word_t
+_swf_bff_(jit_state_t *_jit, int (*i0)(float, float), int cc,
+         jit_word_t i1, jit_int32_t r0, jit_float32_t i2)
+{
+    union {
+       jit_int32_t     i;
+       jit_float32_t   f;
+    } data;
+    jit_word_t         w, d;
+    jit_get_reg_args();
+    data.f = i2;
+    if (jit_fpr_p(r0))
+       swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
+    else
+       movr(_R0_REGNO, r0);
+    movi(_R1_REGNO, data.i);
+    swf_call(i0, fallback, _R2_REGNO);
+    if (jit_thumb_p()) {
+       T1_CMPI(_R0_REGNO, 0);
+       w = _jit->pc.w;
+       d = ((i1 - w) >> 1) - 2;
+       assert(_s20P(d));
+       T2_CC_B(cc, encode_thumb_cc_jump(d));
+    }
+    else {
+       CMPI(_R0_REGNO, 0);
+       w = _jit->pc.w;
+       d = ((i1 - w) >> 2) - 2;
+       assert(_s24P(d));
+       CC_B(cc, d & 0x00ffffff);
+    }
+    jit_unget_reg_args();
+    return (w);
+}
+
+static jit_word_t
+_swf_bdd_(jit_state_t *_jit, int (*i0)(double, double), int cc,
+         jit_word_t i1, jit_int32_t r0, jit_float64_t i2)
+{
+    jit_word_t         w, d;
+    union {
+       jit_int32_t     i[2];
+       jit_float64_t   d;
+    } data;
+    jit_get_reg_args();
+    data.d = i2;
+    if (jit_fpr_p(r0)) {
+       if (!jit_thumb_p() && jit_armv5e_p())
+           LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
+       else {
+           swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
+           swf_ldrin(_R1_REGNO, _FP_REGNO, swf_off(r0) + 4);
+       }
+    }
+    else {
+       movr(_R0_REGNO, r0);
+       movr(_R1_REGNO, r0 + 1);
+    }
+    movi(_R2_REGNO, data.i[0]);
+    movi(_R3_REGNO, data.i[1]);
+    swf_call_with_get_reg(i0, fallback);
+    if (jit_thumb_p()) {
+       T1_CMPI(_R0_REGNO, 0);
+       w = _jit->pc.w;
+       d = ((i1 - w) >> 1) - 2;
+       assert(_s20P(d));
+       T2_CC_B(cc, encode_thumb_cc_jump(d));
+    }
+    else {
+       CMPI(_R0_REGNO, 0);
+       w = _jit->pc.w;
+       d = ((i1 - w) >> 2) - 2;
+       assert(_s24P(d));
+       CC_B(cc, d & 0x00ffffff);
+    }
+    jit_unget_reg_args();
+    return (w);
+}
+
+static jit_word_t
+_swf_bunff(jit_state_t *_jit, int eq,
+          jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w, d, j0, j1;
+    jit_get_reg_args();
+    if (jit_fpr_p(r0))
+       swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
+    else
+       movr(_R0_REGNO, r0);
+    if (jit_fpr_p(r1))
+       swf_ldrin(_R1_REGNO, _FP_REGNO, swf_off(r1) + 8);
+    else
+       movr(_R1_REGNO, r1);
+    swf_call(__aeabi_fcmpun, fcmpun, _R2_REGNO);
+    /* if unordered */
+    if (jit_thumb_p()) {
+       T1_CMPI(_R0_REGNO, 0);
+       j0 = _jit->pc.w;
+       T2_CC_B(ARM_CC_NE, 0);
+    }
+    else {
+       CMPI(_R0_REGNO, 0);
+       j0 = _jit->pc.w;
+       CC_B(ARM_CC_NE, 0);
+    }
+    if (jit_fpr_p(r0))
+       swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
+    else
+       movr(_R0_REGNO, r0);
+    if (jit_fpr_p(r1))
+       swf_ldrin(_R1_REGNO, _FP_REGNO, swf_off(r1) + 8);
+    else
+       movr(_R1_REGNO, r1);
+    swf_call(__aeabi_fcmpeq, fcmpeq, _R2_REGNO);
+    if (jit_thumb_p()) {
+       T1_CMPI(_R0_REGNO, 0);
+       j1 = _jit->pc.w;
+       if (eq) {
+           T2_CC_B(ARM_CC_EQ, 0);
+           patch_at(arm_patch_jump, j0, _jit->pc.w);
+       }
+       else
+           T2_CC_B(ARM_CC_NE, 0);
+       w = _jit->pc.w;
+       d = ((i0 - w) >> 1) - 2;
+       assert(_s24P(d));
+       T2_B(encode_thumb_jump(d));
+    }
+    else {
+       CMPI(_R0_REGNO, 0);
+       j1 = _jit->pc.w;
+       if (eq) {
+           CC_B(ARM_CC_EQ, 0);
+           patch_at(arm_patch_jump, j0, _jit->pc.w);
+       }
+       else
+           CC_B(ARM_CC_NE, 0);
+       w = _jit->pc.w;
+       d = ((i0 - w) >> 2) - 2;
+       assert(_s24P(d));
+       B(d & 0x00ffffff);
+    }
+    if (!eq)
+       patch_at(arm_patch_jump, j0, _jit->pc.w);
+    patch_at(arm_patch_jump, j1, _jit->pc.w);
+    jit_unget_reg_args();
+    return (w);
+}
+
+static jit_word_t
+_swf_bundd(jit_state_t *_jit, int eq,
+          jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w, d, j0, j1;
+    jit_get_reg_args();
+    if (jit_fpr_p(r0)) {
+       if (!jit_thumb_p() && jit_armv5e_p())
+           LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
+       else {
+           swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
+           swf_ldrin(_R1_REGNO, _FP_REGNO, swf_off(r0) + 4);
+       }
+    }
+    else {
+       movr(_R0_REGNO, r0);
+       movr(_R1_REGNO, r0 + 1);
+    }
+    if (jit_fpr_p(r1)) {
+       if (!jit_thumb_p() && jit_armv5e_p())
+           LDRDIN(_R2_REGNO, _FP_REGNO, swf_off(r1) + 8);
+       else {
+           swf_ldrin(_R2_REGNO, _FP_REGNO, swf_off(r1) + 8);
+           swf_ldrin(_R3_REGNO, _FP_REGNO, swf_off(r1) + 4);
+       }
+    }
+    else {
+       movr(_R2_REGNO, r0);
+       movr(_R3_REGNO, r0 + 1);
+    }
+    swf_call_with_get_reg(__aeabi_dcmpun, dcmpun);
+    /* if unordered */
+    if (jit_thumb_p()) {
+       T1_CMPI(_R0_REGNO, 0);
+       j0 = _jit->pc.w;
+       T2_CC_B(ARM_CC_NE, 0);
+    }
+    else {
+       CMPI(_R0_REGNO, 0);
+       j0 = _jit->pc.w;
+       CC_B(ARM_CC_NE, 0);
+    }
+    if (jit_fpr_p(r0)) {
+       if (!jit_thumb_p() && jit_armv5e_p())
+           LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
+       else {
+           swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
+           swf_ldrin(_R1_REGNO, _FP_REGNO, swf_off(r0) + 4);
+       }
+    }
+    else {
+       movr(_R0_REGNO, r0);
+       movr(_R1_REGNO, r0 + 1);
+    }
+    if (jit_fpr_p(r1)) {
+       if (!jit_thumb_p() && jit_armv5e_p())
+           LDRDIN(_R2_REGNO, _FP_REGNO, swf_off(r1) + 8);
+       else {
+           swf_ldrin(_R2_REGNO, _FP_REGNO, swf_off(r1) + 8);
+           swf_ldrin(_R3_REGNO, _FP_REGNO, swf_off(r1) + 4);
+       }
+    }
+    else {
+       movr(_R2_REGNO, r0);
+       movr(_R3_REGNO, r0 + 1);
+    }
+    swf_call_with_get_reg(__aeabi_dcmpeq, dcmpeq);
+    if (jit_thumb_p()) {
+       T1_CMPI(_R0_REGNO, 0);
+       j1 = _jit->pc.w;
+       if (eq) {
+           T2_CC_B(ARM_CC_EQ, 0);
+           patch_at(arm_patch_jump, j0, _jit->pc.w);
+       }
+       else
+           T2_CC_B(ARM_CC_NE, 0);
+       w = _jit->pc.w;
+       d = ((i0 - w) >> 1) - 2;
+       assert(_s24P(d));
+       T2_B(encode_thumb_jump(d));
+    }
+    else {
+       CMPI(_R0_REGNO, 0);
+       j1 = _jit->pc.w;
+       if (eq) {
+           CC_B(ARM_CC_EQ, 0);
+           patch_at(arm_patch_jump, j0, _jit->pc.w);
+       }
+       else
+           CC_B(ARM_CC_NE, 0);
+       w = _jit->pc.w;
+       d = ((i0 - w) >> 2) - 2;
+       assert(_s24P(d));
+       B(d & 0x00ffffff);
+    }
+    if (!eq)
+       patch_at(arm_patch_jump, j0, _jit->pc.w);
+    patch_at(arm_patch_jump, j1, _jit->pc.w);
+    jit_unget_reg_args();
+    return (w);
+}
+
+static jit_word_t
+_swf_bunff_(jit_state_t *_jit, int eq,
+           jit_word_t i0, jit_int32_t r0, jit_float32_t i1)
+{
+    union {
+       jit_int32_t     i;
+       jit_float32_t   f;
+    } data;
+    jit_word_t         w, d, j0, j1;
+    data.f = i1;
+    jit_get_reg_args();
+    if (jit_fpr_p(r0))
+       swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
+    else
+       movr(_R0_REGNO, r0);
+    movi(_R1_REGNO, data.i);
+    swf_call(__aeabi_fcmpun, fcmpun, _R2_REGNO);
+    /* if unordered */
+    if (jit_thumb_p()) {
+       T1_CMPI(_R0_REGNO, 0);
+       j0 = _jit->pc.w;
+       T2_CC_B(ARM_CC_NE, 0);
+    }
+    else {
+       CMPI(_R0_REGNO, 0);
+       j0 = _jit->pc.w;
+       CC_B(ARM_CC_NE, 0);
+    }
+    if (jit_fpr_p(r0))
+       swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
+    else
+       movr(_R0_REGNO, r0);
+    movi(_R1_REGNO, data.i);
+    swf_call(__aeabi_fcmpeq, fcmpeq, _R2_REGNO);
+    if (jit_thumb_p()) {
+       T1_CMPI(_R0_REGNO, 0);
+       j1 = _jit->pc.w;
+       if (eq) {
+           T2_CC_B(ARM_CC_EQ, 0);
+           patch_at(arm_patch_jump, j0, _jit->pc.w);
+       }
+       else
+           T2_CC_B(ARM_CC_NE, 0);
+       w = _jit->pc.w;
+       d = ((i0 - w) >> 1) - 2;
+       assert(_s24P(d));
+       T2_B(encode_thumb_jump(d));
+    }
+    else {
+       CMPI(_R0_REGNO, 0);
+       j1 = _jit->pc.w;
+       if (eq) {
+           CC_B(ARM_CC_EQ, 0);
+           patch_at(arm_patch_jump, j0, _jit->pc.w);
+       }
+       else
+           CC_B(ARM_CC_NE, 0);
+       w = _jit->pc.w;
+       d = ((i0 - w) >> 2) - 2;
+       assert(_s24P(d));
+       B(d & 0x00ffffff);
+    }
+    if (!eq)
+       patch_at(arm_patch_jump, j0, _jit->pc.w);
+    patch_at(arm_patch_jump, j1, _jit->pc.w);
+    jit_unget_reg_args();
+    return (w);
+}
+
+static jit_word_t
+_swf_bundd_(jit_state_t *_jit, int eq,
+           jit_word_t i0, jit_int32_t r0, jit_float64_t i1)
+{
+    jit_word_t         w, d, j0, j1;
+    union {
+       jit_int32_t     i[2];
+       jit_float64_t   d;
+    } data;
+    jit_get_reg_args();
+    data.d = i1;
+    if (jit_fpr_p(r0)) {
+       if (!jit_thumb_p() && jit_armv5e_p())
+           LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
+       else {
+           swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
+           swf_ldrin(_R1_REGNO, _FP_REGNO, swf_off(r0) + 4);
+       }
+    }
+    else {
+       movr(_R0_REGNO, r0);
+       movr(_R1_REGNO, r0 + 1);
+    }
+    movi(_R2_REGNO, data.i[0]);
+    movi(_R3_REGNO, data.i[1]);
+    swf_call_with_get_reg(__aeabi_dcmpun, fcmpun);
+    /* if unordered */
+    if (jit_thumb_p()) {
+       T1_CMPI(_R0_REGNO, 0);
+       j0 = _jit->pc.w;
+       T2_CC_B(ARM_CC_NE, 0);
+    }
+    else {
+       CMPI(_R0_REGNO, 0);
+       j0 = _jit->pc.w;
+       CC_B(ARM_CC_NE, 0);
+    }
+    if (jit_fpr_p(r0)) {
+       if (!jit_thumb_p() && jit_armv5e_p())
+           LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
+       else {
+           swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
+           swf_ldrin(_R1_REGNO, _FP_REGNO, swf_off(r0) + 4);
+       }
+    }
+    else {
+       movr(_R0_REGNO, r0);
+       movr(_R1_REGNO, r0 + 1);
+    }
+    movi(_R2_REGNO, data.i[0]);
+    movi(_R3_REGNO, data.i[1]);
+    swf_call_with_get_reg(__aeabi_dcmpeq, fcmpeq);
+    if (jit_thumb_p()) {
+       T1_CMPI(_R0_REGNO, 0);
+       j1 = _jit->pc.w;
+       if (eq) {
+           T2_CC_B(ARM_CC_EQ, 0);
+           patch_at(arm_patch_jump, j0, _jit->pc.w);
+       }
+       else
+           T2_CC_B(ARM_CC_NE, 0);
+       w = _jit->pc.w;
+       d = ((i0 - w) >> 1) - 2;
+       assert(_s24P(d));
+       T2_B(encode_thumb_jump(d));
+    }
+    else {
+       CMPI(_R0_REGNO, 0);
+       j1 = _jit->pc.w;
+       if (eq) {
+           CC_B(ARM_CC_EQ, 0);
+           patch_at(arm_patch_jump, j0, _jit->pc.w);
+       }
+       else
+           CC_B(ARM_CC_NE, 0);
+       w = _jit->pc.w;
+       d = ((i0 - w) >> 2) - 2;
+       assert(_s24P(d));
+       B(d & 0x00ffffff);
+    }
+    if (!eq)
+       patch_at(arm_patch_jump, j0, _jit->pc.w);
+    patch_at(arm_patch_jump, j1, _jit->pc.w);
+    jit_unget_reg_args();
+    return (w);
+}
+
+static void
+_swf_extr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_get_reg_args();
+    movr(_R0_REGNO, r1);
+    swf_call(__aeabi_i2f, i2f, _R1_REGNO);
+    if (jit_fpr_p(r0))
+       swf_strin(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
+    else
+       movr(r0, _R0_REGNO);
+    jit_unget_reg_args();
+}
+
+static void
+_swf_extr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_get_reg_args();
+    movr(_R0_REGNO, r1);
+    swf_call(__aeabi_i2d, i2d, _R2_REGNO);
+    if (jit_fpr_p(r0)) {
+       if (!jit_thumb_p() && jit_armv5e_p())
+           STRDIN(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
+       else {
+           swf_strin(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
+           swf_strin(_R1_REGNO, _FP_REGNO, swf_off(r0) + 4);
+       }
+    }
+    else {
+       movr(r0, _R0_REGNO);
+       movr(r0 + 1, _R1_REGNO);
+    }
+    jit_unget_reg_args();
+}
+
+static void
+_swf_extr_d_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_get_reg_args();
+    if (jit_fpr_p(r1)) {
+       if (!jit_thumb_p() && jit_armv5e_p())
+           LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
+       else {
+           swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
+           swf_ldrin(_R1_REGNO, _FP_REGNO, swf_off(r1) + 4);
+       }
+    }
+    else {
+       movr(_R0_REGNO, r1);
+       movr(_R1_REGNO, r1 + 1);
+    }
+    swf_call(__aeabi_d2f, d2f, _R2_REGNO);
+    if (jit_fpr_p(r0))
+       swf_strin(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
+    else
+       movr(r0, _R0_REGNO);
+    jit_unget_reg_args();
+}
+
+static void
+_swf_extr_f_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_get_reg_args();
+    if (jit_fpr_p(r1))
+       swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
+    else
+       movr(_R0_REGNO, r1);
+    swf_call(__aeabi_f2d, f2d, _R1_REGNO);
+    if (jit_fpr_p(r0)) {
+       if (!jit_thumb_p() && jit_armv5e_p())
+           STRDIN(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
+       else {
+           swf_strin(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
+           swf_strin(_R1_REGNO, _FP_REGNO, swf_off(r0) + 4);
+       }
+    }
+    else {
+       movr(r0, _R0_REGNO);
+       movr(r0 + 1, _R1_REGNO);
+    }
+    jit_unget_reg_args();
+}
+
+static void
+_swf_truncr_f_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+#if !NAN_TO_INT_IS_ZERO
+    jit_word_t         is_nan;
+    jit_word_t         fast_not_nan;
+    jit_word_t         slow_not_nan;
+#endif
+    jit_get_reg_args();
+    if (jit_fpr_p(r1))
+       swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
+    else
+       movr(_R0_REGNO, r1);
+#if !NAN_TO_INT_IS_ZERO
+    /* >> based on fragment of __aeabi_fcmpun */
+    lshi(_R2_REGNO, _R0_REGNO, 1);
+    if (jit_thumb_p())
+       torrrs(THUMB2_MVN|ARM_S, _R0_REGNO, _R3_REGNO, _R2_REGNO,
+              encode_thumb_shift(24, ARM_ASR));
+    else
+       corrrs(ARM_CC_AL, ARM_MVN|ARM_S|ARM_ASR,
+              _R0_REGNO, _R3_REGNO, _R2_REGNO, 24);
+    fast_not_nan = _jit->pc.w;
+    if (jit_thumb_p()) {
+       T2_CC_B(ARM_CC_NE, 0);
+       tshift(THUMB2_LSLI|ARM_S, _R0_REGNO, _R3_REGNO, 9);
+    }
+    else {
+       CC_B(ARM_CC_NE, 0);
+       cshift(ARM_CC_AL, ARM_S|ARM_LSL, _R0_REGNO, _R3_REGNO, _R0_REGNO, 9);
+    }
+    slow_not_nan = _jit->pc.w;
+    if (jit_thumb_p())
+       T2_CC_B(ARM_CC_EQ, 0);
+    else
+       CC_B(ARM_CC_EQ, 0);
+    movi(r0, 0x80000000);
+    is_nan = _jit->pc.w;
+    if (jit_thumb_p())
+       T2_B(0);
+    else
+       B(0);
+    patch_at(arm_patch_jump, fast_not_nan, _jit->pc.w);
+    patch_at(arm_patch_jump, slow_not_nan, _jit->pc.w);
+    /* << based on fragment of __aeabi_fcmpun */
+#endif
+    swf_call(__aeabi_f2iz, f2iz, _R2_REGNO);
+    movr(r0, _R0_REGNO);
+#if !NAN_TO_INT_IS_ZERO
+    patch_at(arm_patch_jump, is_nan, _jit->pc.w);
+#endif
+    jit_unget_reg_args();
+}
+
+static void
+_swf_truncr_d_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+#if !NAN_TO_INT_IS_ZERO
+    jit_word_t         is_nan;
+    jit_word_t         fast_not_nan;
+    jit_word_t         slow_not_nan;
+#endif
+    jit_get_reg_args();
+    if (jit_fpr_p(r1)) {
+       if (!jit_thumb_p() && jit_armv5e_p())
+           LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
+       else {
+           swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
+           swf_ldrin(_R1_REGNO, _FP_REGNO, swf_off(r1) + 4);
+       }
+    }
+    else {
+       movr(_R0_REGNO, r1);
+       movr(_R1_REGNO, r1 + 1);
+    }
+#if !NAN_TO_INT_IS_ZERO
+    /* >> based on fragment of __aeabi_dcmpun */
+    lshi(_R3_REGNO, _R1_REGNO, 1);
+    if (jit_thumb_p())
+       torrrs(THUMB2_MVN|ARM_S, _R0_REGNO, _R3_REGNO, _R3_REGNO,
+              encode_thumb_shift(21, ARM_ASR));
+    else
+       corrrs(ARM_CC_AL, ARM_MVN|ARM_S|ARM_ASR,
+              _R0_REGNO, _R3_REGNO, _R3_REGNO, 21);
+    fast_not_nan = _jit->pc.w;
+    if (jit_thumb_p()) {
+       T2_CC_B(ARM_CC_NE, 0);
+       torrrs(THUMB2_ORR|ARM_S, _R0_REGNO, _R3_REGNO, _R1_REGNO,
+              encode_thumb_shift(12, ARM_LSL));
+    }
+    else {
+       CC_B(ARM_CC_NE, 0);
+       corrrs(ARM_CC_AL, ARM_ORR|ARM_S|ARM_LSL,
+              _R0_REGNO, _R3_REGNO, _R1_REGNO, 12);
+    }
+    slow_not_nan = _jit->pc.w;
+    if (jit_thumb_p())
+       T2_CC_B(ARM_CC_EQ, 0);
+    else
+       CC_B(ARM_CC_EQ, 0);
+    movi(r0, 0x80000000);
+    is_nan = _jit->pc.w;
+    if (jit_thumb_p())
+       T2_B(0);
+    else
+       B(0);
+    patch_at(arm_patch_jump, fast_not_nan, _jit->pc.w);
+    patch_at(arm_patch_jump, slow_not_nan, _jit->pc.w);
+    /* << based on fragment of __aeabi_dcmpun */
+#endif
+    swf_call(__aeabi_d2iz, d2iz, _R3_REGNO);
+    movr(r0, _R0_REGNO);
+#if !NAN_TO_INT_IS_ZERO
+    patch_at(arm_patch_jump, is_nan, _jit->pc.w);
+#endif
+    jit_unget_reg_args();
+}
+
+static void
+_swf_movr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (r0 != r1) {
+       if (jit_fpr_p(r1)) {
+           reg = jit_get_reg(jit_class_gpr);
+           swf_ldrin(rn(reg), _FP_REGNO, swf_off(r1) + 8);
+           if (jit_fpr_p(r0))
+               swf_strin(rn(reg), _FP_REGNO, swf_off(r0) + 8);
+           else
+               movr(r0, rn(reg));
+           jit_unget_reg(reg);
+       }
+       else if (jit_fpr_p(r0))
+           swf_strin(r1, _FP_REGNO, swf_off(r0) + 8);
+       else
+           movr(r0, r1);
+    }
+}
+
+static void
+_swf_movr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (r0 != r1) {
+       if (jit_fpr_p(r1)) {
+           if (!jit_thumb_p() && jit_armv5e_p() &&
+               (reg = jit_get_reg_pair()) != JIT_NOREG) {
+               LDRDIN(rn(reg), _FP_REGNO, swf_off(r1) + 8);
+               if (jit_fpr_p(r0))
+                   STRDIN(rn(reg), _FP_REGNO, swf_off(r0) + 8);
+               else {
+                   movr(r0, rn(reg));
+                   movr(r0 + 1, rn(reg) + 1);
+               }
+               jit_unget_reg_pair(reg);
+           }
+           else {
+               reg = jit_get_reg(jit_class_gpr);
+               swf_ldrin(rn(reg), _FP_REGNO, swf_off(r1) + 8);
+               if (jit_fpr_p(r0))
+                   swf_strin(rn(reg), _FP_REGNO, swf_off(r0) + 8);
+               else
+                   movr(r0, rn(reg));
+               swf_ldrin(rn(reg), _FP_REGNO, swf_off(r1) + 4);
+               if (jit_fpr_p(r0))
+                   swf_strin(rn(reg), _FP_REGNO, swf_off(r0) + 4);
+               else
+                   movr(r0 + 1, rn(reg));
+               jit_unget_reg(reg);
+           }
+       }
+       else if (jit_fpr_p(r0)) {
+           if (!jit_thumb_p() && jit_armv5e_p() && !(r1 & 1))
+               STRDIN(r1, _FP_REGNO, swf_off(r0) + 8);
+           else {
+               swf_strin(r1, _FP_REGNO, swf_off(r0) + 8);
+               swf_strin(r1 + 1, _FP_REGNO, swf_off(r0) + 4);
+           }
+       }
+       else {
+           movr(r0, r1);
+           movr(r0 + 1, r1 + 1);
+       }
+    }
+}
+
+static void
+_swf_movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t i0)
+{
+    union {
+       jit_int32_t     i;
+       jit_float32_t   f;
+    } data;
+    jit_int32_t                reg;
+    data.f = i0;
+    if (jit_fpr_p(r0)) {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), data.i);
+       swf_strin(rn(reg), _FP_REGNO, swf_off(r0) + 8);
+       jit_unget_reg(reg);
+    }
+    else
+       movi(r0, data.i);
+}
+
+static void
+_swf_movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t i0)
+{
+    jit_int32_t                reg;
+    union {
+       jit_int32_t     i[2];
+       jit_float64_t   d;
+    } data;
+    data.d = i0;
+    if (jit_fpr_p(r0)) {
+       if (!jit_thumb_p() && jit_armv5e_p() &&
+           (reg = jit_get_reg_pair()) != JIT_NOREG) {
+           movi(rn(reg), data.i[0]);
+           movi(rn(reg) + 1, data.i[1]);
+           STRDIN(rn(reg), _FP_REGNO, swf_off(r0) + 8);
+           jit_unget_reg_pair(reg);
+       }
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), data.i[0]);
+           swf_strin(rn(reg), _FP_REGNO, swf_off(r0) + 8);
+           movi(rn(reg), data.i[1]);
+           swf_strin(rn(reg), _FP_REGNO, swf_off(r0) + 4);
+           jit_unget_reg(reg);
+       }
+    }
+    else {
+       movi(r0, data.i[0]);
+       movi(r0 + 1, data.i[1]);
+    }
+}
+
+static void
+_swf_absr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (jit_fpr_p(r1)) {
+       reg = jit_get_reg(jit_class_gpr);
+       swf_ldrin(rn(reg), _FP_REGNO, swf_off(r1) + 8);
+       swf_bici(rn(reg), rn(reg), 0x80000000);
+       if (jit_fpr_p(r0))
+           swf_strin(rn(reg), _FP_REGNO, swf_off(r0) + 8);
+       else
+           movr(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else if (jit_fpr_p(r0)) {
+       reg = jit_get_reg(jit_class_gpr);
+       movr(rn(reg), r1);
+       swf_bici(rn(reg), rn(reg), 0x80000000);
+       swf_strin(rn(reg), _FP_REGNO, swf_off(r0) + 8);
+       jit_unget_reg(reg);
+    }
+    else
+       swf_bici(r0, r1, 0x80000000);
+}
+
+static void
+_swf_absr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (jit_fpr_p(r1)) {
+       if (jit_fpr_p(r0) && !jit_thumb_p() && jit_armv5e_p() &&
+           r0 != r1 && (reg = jit_get_reg_pair()) != JIT_NOREG) {
+           LDRDIN(rn(reg), _FP_REGNO, swf_off(r1) + 8);
+           swf_bici(rn(reg) + 1, rn(reg) + 1, 0x80000000);
+           STRDIN(rn(reg), _FP_REGNO, swf_off(r0) + 8);
+           jit_unget_reg_pair(reg);
+       }
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           swf_ldrin(rn(reg), _FP_REGNO, swf_off(r1) + 4);
+           swf_bici(rn(reg), rn(reg), 0x80000000);
+           if (jit_fpr_p(r0)) {
+               swf_strin(rn(reg), _FP_REGNO, swf_off(r0) + 4);
+               if (r0 != r1) {
+                   swf_ldrin(rn(reg), _FP_REGNO, swf_off(r1) + 8);
+                   swf_strin(rn(reg), _FP_REGNO, swf_off(r0) + 8);
+               }
+           }
+           else {
+               movr(r0, rn(reg));
+               swf_ldrin(rn(reg), _FP_REGNO, swf_off(r1) + 8);
+               movr(r0 + 1, rn(reg));
+           }
+           jit_unget_reg(reg);
+       }
+    }
+    else if (jit_fpr_p(r0)) {
+       reg = jit_get_reg(jit_class_gpr);
+       movr(rn(reg), r1);
+       swf_bici(rn(reg), rn(reg), 0x80000000);
+       swf_strin(rn(reg), _FP_REGNO, swf_off(r0) + 4);
+       movr(rn(reg), r1 + 1);
+       swf_strin(rn(reg), _FP_REGNO, swf_off(r0) + 8);
+       jit_unget_reg(reg);
+    }
+    else {
+       swf_bici(r0, r1, 0x80000000);
+       if (r0 != r1)
+           movr(r0 + 1, r1 + 1);
+    }
+}
+
+static void
+_swf_negr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (jit_fpr_p(r1)) {
+       reg = jit_get_reg(jit_class_gpr);
+       swf_ldrin(rn(reg), _FP_REGNO, swf_off(r1) + 8);
+       xori(rn(reg), rn(reg), 0x80000000);
+       if (jit_fpr_p(r0))
+           swf_strin(rn(reg), _FP_REGNO, swf_off(r0) + 8);
+       else
+           movr(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else if (jit_fpr_p(r0)) {
+       reg = jit_get_reg(jit_class_gpr);
+       movr(rn(reg), r1);
+       xori(rn(reg), rn(reg), 0x80000000);
+       swf_strin(rn(reg), _FP_REGNO, swf_off(r0) + 8);
+       jit_unget_reg(reg);
+    }
+    else
+       xori(r0, r1, 0x80000000);
+}
+
+static void
+_swf_negr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (jit_fpr_p(r1)) {
+       if (jit_fpr_p(r0) && !jit_thumb_p() && jit_armv5e_p() &&
+           r0 != r1 && (reg = jit_get_reg_pair()) != JIT_NOREG) {
+           LDRDIN(rn(reg), _FP_REGNO, swf_off(r1) + 8);
+           EORI(rn(reg) + 1, rn(reg) + 1, encode_arm_immediate(0x80000000));
+           STRDIN(rn(reg), _FP_REGNO, swf_off(r0) + 8);
+           jit_unget_reg_pair(reg);
+       }
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           swf_ldrin(rn(reg), _FP_REGNO, swf_off(r1) + 4);
+           xori(rn(reg), rn(reg), 0x80000000);
+           if (jit_fpr_p(r0)) {
+               swf_strin(rn(reg), _FP_REGNO, swf_off(r0) + 4);
+               if (r0 != r1) {
+                   swf_ldrin(rn(reg), _FP_REGNO, swf_off(r1) + 8);
+                   swf_strin(rn(reg), _FP_REGNO, swf_off(r0) + 8);
+               }
+           }
+           else {
+               movr(r0, rn(reg));
+               swf_ldrin(rn(reg), _FP_REGNO, swf_off(r1) + 8);
+               movr(r0 + 1, rn(reg));
+           }
+           jit_unget_reg(reg);
+       }
+    }
+    else if (jit_fpr_p(r0)) {
+       reg = jit_get_reg(jit_class_gpr);
+       movr(rn(reg), r1);
+       xori(rn(reg), rn(reg), 0x80000000);
+       swf_strin(rn(reg), _FP_REGNO, swf_off(r0) + 4);
+       movr(rn(reg), r1 + 1);
+       swf_strin(rn(reg), _FP_REGNO, swf_off(r0) + 8);
+       jit_unget_reg(reg);
+    }
+    else {
+       xori(r0, r1, 0x80000000);
+       if (r0 != r1)
+           movr(r0 + 1, r1 + 1);
+    }
+}
+
+static void
+_swf_ner_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    swf_iff(__aeabi_fcmpeq, r0, r1, r2);
+    xori(r0, r0, 1);
+}
+
+static void
+_swf_nei_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_float32_t i0)
+{
+    swf_iff_(__aeabi_fcmpeq, r0, r1, i0);
+    xori(r0, r0, 1);
+}
+
+static void
+_swf_ner_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    swf_idd(__aeabi_dcmpeq, r0, r1, r2);
+    xori(r0, r0, 1);
+}
+
+static void
+_swf_nei_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_float64_t i0)
+{
+    swf_idd_(__aeabi_dcmpeq, r0, r1, i0);
+    xori(r0, r0, 1);
+}
+
+static void
+_swf_ltgtr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    swf_iunff(__aeabi_fcmpeq, r0, r1, r2);
+    xori(r0, r0, 1);
+}
+
+static void
+_swf_ltgti_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_float32_t i0)
+{
+    swf_iunff_(__aeabi_fcmpeq, r0, r1, i0);
+    xori(r0, r0, 1);
+}
+
+static void
+_swf_ltgtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    swf_iundd(__aeabi_dcmpeq, r0, r1, r2);
+    xori(r0, r0, 1);
+}
+
+static void
+_swf_ltgti_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_float64_t i0)
+{
+    swf_iundd_(__aeabi_dcmpeq, r0, r1, i0);
+    xori(r0, r0, 1);
+}
+
+static void
+_swf_ordr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    swf_iff(__aeabi_fcmpun, r0, r1, r2);
+    xori(r0, r0, 1);
+}
+
+static void
+_swf_ordi_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_float32_t i0)
+{
+    swf_iff_(__aeabi_fcmpun, r0, r1, i0);
+    xori(r0, r0, 1);
+}
+
+static void
+_swf_ordr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    swf_idd(__aeabi_dcmpun, r0, r1, r2);
+    xori(r0, r0, 1);
+}
+
+static void
+_swf_ordi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_float64_t i0)
+{
+    swf_idd_(__aeabi_dcmpun, r0, r1, i0);
+    xori(r0, r0, 1);
+}
+
+static void
+_swf_ldr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (jit_fpr_p(r0)) {
+       reg = jit_get_reg(jit_class_gpr);
+       ldxi_i(rn(reg), r1, 0);
+       swf_strin(rn(reg), _FP_REGNO, swf_off(r0) + 8);
+       jit_unget_reg(reg);
+    }
+    else
+       ldxi_i(r0, r1, 0);
+}
+
+static void
+_swf_ldr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (jit_fpr_p(r0)) {
+       if (!jit_thumb_p() && jit_armv5e_p() &&
+           (reg = jit_get_reg_pair()) != JIT_NOREG) {
+           LDRDI(rn(reg), r1, 0);
+           STRDIN(rn(reg), _FP_REGNO, swf_off(r0) + 8);
+           jit_unget_reg_pair(reg);
+       }
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           ldxi_i(rn(reg), r1, 0);
+           swf_strin(rn(reg), _FP_REGNO, swf_off(r0) + 8);
+           ldxi_i(rn(reg), r1, 4);
+           swf_strin(rn(reg), _FP_REGNO, swf_off(r0) + 4);
+           jit_unget_reg(reg);
+       }
+    }
+    else if (!jit_thumb_p() && jit_armv5e_p() && !(r0 & 1))
+       LDRDI(r0, r1, 0);
+    else {
+       ldxi_i(r0, r1, 0);
+       ldxi_i(r0 + 1, r1, 4);
+    }
+}
+
+static void
+_swf_ldi_f(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (jit_fpr_p(r0)) {
+       reg = jit_get_reg(jit_class_gpr);
+       ldi_i(rn(reg), i0);
+       swf_strin(rn(reg), _FP_REGNO, swf_off(r0) + 8);
+       jit_unget_reg(reg);
+    }
+    else
+       ldi_i(r0, i0);
+}
+
+static void
+_swf_ldi_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                rg0, rg1;
+    if (jit_fpr_p(r0) && !jit_thumb_p() && jit_armv5e_p() &&
+       (rg0 = jit_get_reg_pair()) != JIT_NOREG) {
+       movi(rn(rg0), i0);
+       LDRDI(rn(rg0), rn(rg0), 0);
+       STRDIN(rn(rg0), _FP_REGNO, swf_off(r0) + 8);
+       jit_unget_reg_pair(rg0);
+    }
+    else {
+       rg1 = jit_get_reg(jit_class_gpr);
+       movi(rn(rg1), i0);
+       if (jit_fpr_p(r0)) {
+           rg0 = jit_get_reg(jit_class_gpr);
+           ldxi_i(rn(rg0), rn(rg1), 0);
+           swf_strin(rn(rg0), _FP_REGNO, swf_off(r0) + 8);
+           ldxi_i(rn(rg0), rn(rg1), 4);
+           swf_strin(rn(rg0), _FP_REGNO, swf_off(r0) + 4);
+           jit_unget_reg(rg0);
+       }
+       else if (!jit_thumb_p() && jit_armv5e_p() && !(r0 & 1))
+           LDRDI(r0, rn(rg1), 0);
+       else {
+           ldxi_i(r0, rn(rg1), 0);
+           ldxi_i(r0 + 1, rn(rg1), 0);
+       }
+       jit_unget_reg(rg1);
+    }
+}
+
+static void
+_swf_ldxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (jit_fpr_p(r0)) {
+       reg = jit_get_reg(jit_class_gpr);
+       ldxr_i(rn(reg), r1, r2);
+       swf_strin(rn(reg), _FP_REGNO, swf_off(r0) + 8);
+       jit_unget_reg(reg);
+    }
+    else
+       ldxr_i(r0, r1, r2);
+}
+
+static void
+_swf_ldxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                rg0, rg1;
+    if (jit_fpr_p(r0)) {
+       if (!jit_thumb_p() && jit_armv5e_p() &&
+           (rg0 = jit_get_reg_pair()) != JIT_NOREG) {
+           LDRD(rn(rg0), r1, r2);
+           STRDIN(rn(rg0), _FP_REGNO, swf_off(r0) + 8);
+           jit_unget_reg_pair(rg0);
+       }
+       else {
+           rg1 = jit_get_reg(jit_class_gpr);
+           addr(rn(rg1), r1, r2);
+           rg0 = jit_get_reg(jit_class_gpr);
+           ldxi_i(rn(rg0), rn(rg1), 0);
+           swf_strin(rn(rg0), _FP_REGNO, swf_off(r0) + 8);
+           ldxi_i(rn(rg0), rn(rg1), 4);
+           swf_strin(rn(rg0), _FP_REGNO, swf_off(r0) + 4);
+           jit_unget_reg(rg0);
+           jit_unget_reg(rg1);
+       }
+    }
+    else {
+       if (!jit_thumb_p() && jit_armv5e_p() && !(r0 & 1))
+           LDRD(r0, r1, r2);
+       else {
+           rg1 = jit_get_reg(jit_class_gpr);
+           addr(rn(rg1), r1, r2);
+           ldxi_i(r0, rn(rg1), 0);
+           ldxi_i(r0 + 1, rn(rg1), 4);
+           jit_unget_reg(rg1);
+       }
+    }
+}
+
+static void
+_swf_ldxi_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (jit_fpr_p(r0)) {
+       reg = jit_get_reg(jit_class_gpr);
+       ldxi_i(rn(reg), r1, i0);
+       swf_strin(rn(reg), _FP_REGNO, swf_off(r0) + 8);
+       jit_unget_reg(reg);
+    }
+    else
+       ldxi_i(r0, r1, i0);
+}
+
+static void
+_swf_ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                rg0, rg1;
+    if (jit_fpr_p(r0)) {
+       if (!jit_thumb_p() && jit_armv5e_p() &&
+           ((i0 >= 0 && i0 <= 255) || (i0 < 0 && i0 >= -255)) &&
+           (rg0 = jit_get_reg_pair()) != JIT_NOREG) {
+           if (i0 >= 0)
+               LDRDI(rn(rg0), r1, i0);
+           else
+               LDRDIN(rn(rg0), r1, -i0);
+           STRDIN(rn(rg0), _FP_REGNO, swf_off(r0) + 8);
+           jit_unget_reg_pair(rg0);
+       }
+       else if (i0 >= 0 && i0 + 4 <= 4095) {
+           rg0 = jit_get_reg(jit_class_gpr);
+           ldxi_i(rn(rg0), r1, i0);
+           swf_strin(rn(rg0), _FP_REGNO, swf_off(r0) + 8);
+           ldxi_i(rn(rg0), r1, i0 + 4);
+           swf_strin(rn(rg0), _FP_REGNO, swf_off(r0) + 4);
+           jit_unget_reg(rg0);
+       }
+       else if (i0 < 0 && ((jit_thumb_p() && i0 >= -255) ||
+                           (!jit_thumb_p() && i0 >= -4095))) {
+           rg0 = jit_get_reg(jit_class_gpr);
+           swf_ldrin(rn(rg0), r1, -i0);
+           swf_strin(rn(rg0), _FP_REGNO, swf_off(r0) + 8);
+           swf_ldrin(rn(rg0), r1, -(i0 + 4));
+           swf_strin(rn(rg0), _FP_REGNO, swf_off(r0) + 4);
+           jit_unget_reg(rg0);
+       }
+       else {
+           rg0 = jit_get_reg(jit_class_gpr);
+           rg1 = jit_get_reg(jit_class_gpr);
+           addi(rn(rg1), r1, i0);
+           ldxi_i(rn(rg0), rn(rg1), 0);
+           swf_strin(rn(rg0), _FP_REGNO, swf_off(r0) + 8);
+           ldxi_i(rn(rg0), rn(rg1), 4);
+           swf_strin(rn(rg0), _FP_REGNO, swf_off(r0) + 4);
+           jit_unget_reg(rg1);
+           jit_unget_reg(rg0);
+       }
+    }
+    else {
+       if (!jit_thumb_p() && jit_armv5e_p() &&
+           i0 >= 0 && i0 <= 255 && !(r0 & 1))
+           LDRDI(r0, r1, i0);
+       else if (!jit_thumb_p() && jit_armv5e_p() &&
+                i0 < 0 && i0 >= -255 && !(r0 & 1))
+           LDRDIN(r0, r1, -i0);
+       else if (i0 >= 0 && i0 + 4 <= 4095) {
+           ldxi_i(r0, r1, i0);
+           ldxi_i(r0 + 1, r1, i0 + 4);
+       }
+       else if (i0 < 0 && i0 >= -4095) {
+           swf_ldrin(r0, r1, -i0);
+           swf_ldrin(r0 + 1, r1, -(i0 + 4));
+       }
+       else {
+           rg0 = jit_get_reg(jit_class_gpr);
+           addi(rn(rg0), r1, i0);
+           ldxi_i(r0, rn(rg0), 0);
+           ldxi_i(r0 + 1, rn(rg0), 4);
+           jit_unget_reg(rg0);
+       }
+    }
+}
+
+static void
+_swf_str_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (jit_fpr_p(r1)) {
+       reg = jit_get_reg(jit_class_gpr);
+       swf_ldrin(rn(reg), _FP_REGNO, swf_off(r1) + 8);
+       stxi_i(0, r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else
+       str_i(r0, r1);
+}
+
+static void
+_swf_str_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (jit_fpr_p(r1)) {
+       if (!jit_thumb_p() && jit_armv5e_p() &&
+           (reg = jit_get_reg_pair()) != JIT_NOREG) {
+           LDRDIN(rn(reg), _FP_REGNO, swf_off(r1) + 8);
+           STRDI(rn(reg), r0, 0);
+           jit_unget_reg_pair(reg);
+       }
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           swf_ldrin(rn(reg), _FP_REGNO, swf_off(r1) + 8);
+           stxi_i(0, r0, rn(reg));
+           swf_ldrin(rn(reg), _FP_REGNO, swf_off(r1) + 4);
+           stxi_i(4, r0, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+    else {
+       if (!jit_thumb_p() && jit_armv5e_p() && !(r1 & 1))
+           STRDI(r1, r0, 0);
+       else {
+           stxi_i(0, r0, r1);
+           stxi_i(4, r0, r1 + 1);
+       }
+    }
+}
+
+static void
+_swf_sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    if (jit_fpr_p(r0)) {
+       reg = jit_get_reg(jit_class_gpr);
+       swf_ldrin(rn(reg), _FP_REGNO, swf_off(r0) + 8);
+       sti_i(i0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else
+       sti_i(i0, r0);
+}
+
+static void
+_swf_sti_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                rg0, rg1;
+    if (jit_fpr_p(r0)) {
+       if (!jit_thumb_p() && jit_armv5e_p() &&
+           (rg0 = jit_get_reg_pair()) != JIT_NOREG) {
+           rg1 = jit_get_reg(jit_class_gpr);
+           movi(rn(rg1), i0);
+           LDRDIN(rn(rg0), _FP_REGNO, swf_off(r0) + 8);
+           STRDI(rn(rg0), rn(rg1), 0);
+           jit_unget_reg(rg1);
+           jit_unget_reg_pair(rg0);
+       }
+       else {
+           rg1 = jit_get_reg(jit_class_gpr);
+           movi(rn(rg1), i0);
+           rg0 = jit_get_reg(jit_class_gpr);
+           swf_ldrin(rn(rg0), _FP_REGNO, swf_off(r0) + 8);
+           stxi_i(0, rn(rg1), rn(rg0));
+           swf_ldrin(rn(rg0), _FP_REGNO, swf_off(r0) + 4);
+           stxi_i(4, rn(rg1), rn(rg0));
+           jit_unget_reg(rg1);
+           jit_unget_reg(rg0);
+       }
+    }
+    else {
+       rg1 = jit_get_reg(jit_class_gpr);
+       movi(rn(rg1), i0);
+       if (!jit_thumb_p() && jit_armv5e_p() && !(r0 & 1))
+           STRDI(r0, rn(rg1), 0);
+       else {
+           stxi_i(0, rn(rg1), r0);
+           stxi_i(4, rn(rg1), r0 + 1);
+       }
+       jit_unget_reg(rg1);
+    }
+}
+
+static void
+_swf_stxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (jit_fpr_p(r2)) {
+       reg = jit_get_reg(jit_class_gpr);
+       swf_ldrin(rn(reg), _FP_REGNO, swf_off(r2) + 8);
+       stxr_i(r1, r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else
+       stxr_i(r0, r1, r2);
+}
+
+static void
+_swf_stxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                rg0, rg1;
+    if (jit_fpr_p(r2)) {
+       if (!jit_thumb_p() && jit_armv5e_p() &&
+           (rg0 = jit_get_reg_pair()) != JIT_NOREG) {
+           LDRDIN(rn(rg0), _FP_REGNO, swf_off(r2) + 8);
+           STRD(rn(rg0), r0, r1);
+           jit_unget_reg_pair(rg0);
+       }
+       else {
+           rg1 = jit_get_reg(jit_class_gpr);
+           addr(rn(rg1), r0, r1);
+           rg0 = jit_get_reg(jit_class_gpr);
+           swf_ldrin(rn(rg0), _FP_REGNO, swf_off(r2) + 8);
+           stxi_i(0, rn(rg1), rn(rg0));
+           swf_ldrin(rn(rg0), _FP_REGNO, swf_off(r2) + 4);
+           stxi_i(4, rn(rg1), rn(rg0));
+           jit_unget_reg(rg0);
+           jit_unget_reg(rg1);
+       }
+    }
+    else {
+       if (!jit_thumb_p() && jit_armv5e_p() && !(r2 & 1))
+           STRD(r0, r1, r2);
+       else {
+           rg1 = jit_get_reg(jit_class_gpr);
+           addr(rn(rg1), r0, r1);
+           stxi_i(0, rn(rg1), r2);
+           stxi_i(4, rn(rg1), r2 + 1);
+           jit_unget_reg(rg1);
+       }
+    }
+}
+
+static void
+_swf_stxi_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (jit_fpr_p(r1)) {
+       reg = jit_get_reg(jit_class_gpr);
+       swf_ldrin(rn(reg), _FP_REGNO, swf_off(r1) + 8);
+       stxi_i(i0, r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else
+       stxi_i(i0, r0, r1);
+}
+
+static void
+_swf_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                rg0, rg1;
+    if (jit_fpr_p(r1)) {
+       if (!jit_thumb_p() && jit_armv5e_p() &&
+           ((i0 >= 0 && i0 <= 255) || (i0 < 0 && i0 >= -255)) &&
+           (rg0 = jit_get_reg_pair()) != JIT_NOREG) {
+           LDRDIN(rn(rg0), _FP_REGNO, swf_off(r1) + 8);
+           if (i0 >= 0 && i0 <= 255)
+               STRDI(rn(rg0), r0, i0);
+           else
+               STRDIN(rn(rg0), r0, -i0);
+           jit_unget_reg_pair(rg0);
+       }
+       else if (i0 >= 0 && i0 + 4 <= 4095) {
+           rg0 = jit_get_reg(jit_class_gpr);
+           swf_ldrin(rn(rg0), _FP_REGNO, swf_off(r1) + 8);
+           stxi_i(i0, r0, rn(rg0));
+           swf_ldrin(rn(rg0), _FP_REGNO, swf_off(r1) + 4);
+           stxi_i(i0 + 4, r0, rn(rg0));
+           jit_unget_reg(rg0);
+       }
+       else if (i0 < 0 && ((jit_thumb_p() && i0 >= -255) ||
+                           (!jit_thumb_p() && i0 >= -4095))) {
+           rg0 = jit_get_reg(jit_class_gpr);
+           swf_ldrin(rn(rg0), _FP_REGNO, swf_off(r1) + 8);
+           swf_strin(rn(rg0), r0, -i0);
+           swf_ldrin(rn(rg0), _FP_REGNO, swf_off(r1) + 4);
+           swf_strin(rn(rg0), r0, -(i0 + 4));
+           jit_unget_reg(rg0);
+       }
+       else {
+           rg1 = jit_get_reg(jit_class_gpr);
+           addi(rn(rg1), r0, i0);
+           rg0 = jit_get_reg(jit_class_gpr);
+           swf_ldrin(rn(rg0), _FP_REGNO, swf_off(r1) + 8);
+           stxi_i(0, rn(rg1), rn(rg0));
+           swf_ldrin(rn(rg0), _FP_REGNO, swf_off(r1) + 4);
+           stxi_i(4, rn(rg1), rn(rg0));
+           jit_unget_reg(rg0);
+           jit_unget_reg(rg1);
+       }
+    }
+    else {
+       if (!jit_thumb_p() && jit_armv5e_p() &&
+           i0 >= 0 && i0 <= 255 && !(r1 & 1))
+           STRDI(r1, r0, i0);
+       else if (!jit_thumb_p() && jit_armv5e_p() &&
+                i0 < 0 && i0 >= -255 && !(r1 & 1))
+           STRDIN(r1, r0, -i0);
+       else if (i0 >= 0 && i0 + 4 <= 4095) {
+           stxi_i(i0, r0, r1);
+           stxi_i(i0 + 4, r0, r1 + 1);
+       }
+       else if (i0 < 0 && ((jit_thumb_p() && i0 >= 255) ||
+                           (!jit_thumb_p() && i0 >= -4095))) {
+           swf_strin(r1, r0, -i0);
+           swf_strin(r1 + 1, r0, -(i0 + 4));
+       }
+       else {
+           rg1 = jit_get_reg(jit_class_gpr);
+           addi(rn(rg1), r0, i0);
+           stxi_i(0, rn(rg1), r1);
+           stxi_i(4, rn(rg1), r1 + 1);
+           jit_unget_reg(rg1);
+       }
+    }
+}
+
+static void
+_swf_vaarg_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+
+    assert(_jitc->function->self.call & jit_call_varargs);
+
+    /* Adjust pointer. */
+    reg = jit_get_reg(jit_class_gpr);
+    andi(rn(reg), r1, 7);
+    addr(r1, r1, rn(reg));
+    jit_unget_reg(reg);
+
+    /* Load argument. */
+    swf_ldr_d(r0, r1);
+
+    /* Update stack pointer. */
+    addi(r1, r1, sizeof(jit_float64_t));
+}
+
+#endif
diff --git a/deps/lightning/lib/jit_arm-sz.c b/deps/lightning/lib/jit_arm-sz.c
new file mode 100644 (file)
index 0000000..9f0d012
--- /dev/null
@@ -0,0 +1,808 @@
+
+#if __WORDSIZE == 32
+#if defined(__ARM_PCS_VFP)
+#define JIT_INSTR_MAX 48
+    0, /* data */
+    0, /* live */
+    2, /* align */
+    0, /* save */
+    0, /* load */
+    2, /* #name */
+    0, /* #note */
+    0, /* label */
+    34,        /* prolog */
+    0, /* ellipsis */
+    0, /* va_push */
+    0, /* allocai */
+    0, /* allocar */
+    0, /* arg */
+    0, /* getarg_c */
+    0, /* getarg_uc */
+    0, /* getarg_s */
+    0, /* getarg_us */
+    0, /* getarg_i */
+    0, /* getarg_ui */
+    0, /* getarg_l */
+    0, /* putargr */
+    0, /* putargi */
+    4, /* va_start */
+    8, /* va_arg */
+    16,        /* va_arg_d */
+    0, /* va_end */
+    4, /* addr */
+    12,        /* addi */
+    4, /* addcr */
+    8, /* addci */
+    4, /* addxr */
+    4, /* addxi */
+    4, /* subr */
+    12,        /* subi */
+    4, /* subcr */
+    8, /* subci */
+    4, /* subxr */
+    4, /* subxi */
+    16,        /* rsbi */
+    4, /* mulr */
+    12,        /* muli */
+    4, /* qmulr */
+    12,        /* qmuli */
+    4, /* qmulr_u */
+    8, /* qmuli_u */
+    40,        /* divr */
+    48,        /* divi */
+    40,        /* divr_u */
+    44,        /* divi_u */
+    34,        /* qdivr */
+    38,        /* qdivi */
+    34,        /* qdivr_u */
+    38,        /* qdivi_u */
+    40,        /* remr */
+    48,        /* remi */
+    40,        /* remr_u */
+    44,        /* remi_u */
+    4, /* andr */
+    12,        /* andi */
+    4, /* orr */
+    12,        /* ori */
+    4, /* xorr */
+    12,        /* xori */
+    4, /* lshr */
+    4, /* lshi */
+    4, /* rshr */
+    4, /* rshi */
+    4, /* rshr_u */
+    4, /* rshi_u */
+    4, /* negr */
+    4, /* comr */
+    14,        /* ltr */
+    14,        /* lti */
+    14,        /* ltr_u */
+    14,        /* lti_u */
+    14,        /* ler */
+    14,        /* lei */
+    14,        /* ler_u */
+    14,        /* lei_u */
+    14,        /* eqr */
+    14,        /* eqi */
+    14,        /* ger */
+    14,        /* gei */
+    14,        /* ger_u */
+    14,        /* gei_u */
+    14,        /* gtr */
+    14,        /* gti */
+    14,        /* gtr_u */
+    14,        /* gti_u */
+    14,        /* ner */
+    14,        /* nei */
+    4, /* movr */
+    8, /* movi */
+    4, /* extr_c */
+    4, /* extr_uc */
+    4, /* extr_s */
+    4, /* extr_us */
+    0, /* extr_i */
+    0, /* extr_ui */
+    8, /* htonr_us */
+    4, /* htonr_ui */
+    0, /* htonr_ul */
+    4, /* ldr_c */
+    12,        /* ldi_c */
+    4, /* ldr_uc */
+    12,        /* ldi_uc */
+    4, /* ldr_s */
+    12,        /* ldi_s */
+    4, /* ldr_us */
+    12,        /* ldi_us */
+    4, /* ldr_i */
+    12,        /* ldi_i */
+    0, /* ldr_ui */
+    0, /* ldi_ui */
+    0, /* ldr_l */
+    0, /* ldi_l */
+    4, /* ldxr_c */
+    12,        /* ldxi_c */
+    4, /* ldxr_uc */
+    12,        /* ldxi_uc */
+    4, /* ldxr_s */
+    12,        /* ldxi_s */
+    4, /* ldxr_us */
+    12,        /* ldxi_us */
+    4, /* ldxr_i */
+    12,        /* ldxi_i */
+    0, /* ldxr_ui */
+    0, /* ldxi_ui */
+    0, /* ldxr_l */
+    0, /* ldxi_l */
+    4, /* str_c */
+    12,        /* sti_c */
+    4, /* str_s */
+    12,        /* sti_s */
+    4, /* str_i */
+    12,        /* sti_i */
+    0, /* str_l */
+    0, /* sti_l */
+    4, /* stxr_c */
+    12,        /* stxi_c */
+    4, /* stxr_s */
+    12,        /* stxi_s */
+    4, /* stxr_i */
+    12,        /* stxi_i */
+    0, /* stxr_l */
+    0, /* stxi_l */
+    8, /* bltr */
+    8, /* blti */
+    8, /* bltr_u */
+    8, /* blti_u */
+    8, /* bler */
+    8, /* blei */
+    8, /* bler_u */
+    8, /* blei_u */
+    8, /* beqr */
+    16,        /* beqi */
+    8, /* bger */
+    8, /* bgei */
+    8, /* bger_u */
+    8, /* bgei_u */
+    8, /* bgtr */
+    8, /* bgti */
+    8, /* bgtr_u */
+    8, /* bgti_u */
+    8, /* bner */
+    16,        /* bnei */
+    8, /* bmsr */
+    8, /* bmsi */
+    8, /* bmcr */
+    8, /* bmci */
+    8, /* boaddr */
+    8, /* boaddi */
+    8, /* boaddr_u */
+    8, /* boaddi_u */
+    8, /* bxaddr */
+    8, /* bxaddi */
+    8, /* bxaddr_u */
+    8, /* bxaddi_u */
+    8, /* bosubr */
+    8, /* bosubi */
+    8, /* bosubr_u */
+    8, /* bosubi_u */
+    8, /* bxsubr */
+    8, /* bxsubi */
+    8, /* bxsubr_u */
+    8, /* bxsubi_u */
+    4, /* jmpr */
+    8, /* jmpi */
+    4, /* callr */
+    20,        /* calli */
+    0, /* prepare */
+    0, /* pushargr */
+    0, /* pushargi */
+    0, /* finishr */
+    0, /* finishi */
+    0, /* ret */
+    0, /* retr */
+    0, /* reti */
+    0, /* retval_c */
+    0, /* retval_uc */
+    0, /* retval_s */
+    0, /* retval_us */
+    0, /* retval_i */
+    0, /* retval_ui */
+    0, /* retval_l */
+    24,        /* epilog */
+    0, /* arg_f */
+    0, /* getarg_f */
+    0, /* putargr_f */
+    0, /* putargi_f */
+    4, /* addr_f */
+    8, /* addi_f */
+    4, /* subr_f */
+    8, /* subi_f */
+    8, /* rsbi_f */
+    4, /* mulr_f */
+    8, /* muli_f */
+    4, /* divr_f */
+    8, /* divi_f */
+    4, /* negr_f */
+    4, /* absr_f */
+    4, /* sqrtr_f */
+    18,        /* ltr_f */
+    30,        /* lti_f */
+    20,        /* ler_f */
+    32,        /* lei_f */
+    18,        /* eqr_f */
+    30,        /* eqi_f */
+    18,        /* ger_f */
+    30,        /* gei_f */
+    18,        /* gtr_f */
+    30,        /* gti_f */
+    18,        /* ner_f */
+    30,        /* nei_f */
+    18,        /* unltr_f */
+    30,        /* unlti_f */
+    18,        /* unler_f */
+    30,        /* unlei_f */
+    24,        /* uneqr_f */
+    36,        /* uneqi_f */
+    18,        /* unger_f */
+    30,        /* ungei_f */
+    18,        /* ungtr_f */
+    30,        /* ungti_f */
+    24,        /* ltgtr_f */
+    36,        /* ltgti_f */
+    18,        /* ordr_f */
+    30,        /* ordi_f */
+    18,        /* unordr_f */
+    30,        /* unordi_f */
+    8, /* truncr_f_i */
+    0, /* truncr_f_l */
+    8, /* extr_f */
+    4, /* extr_d_f */
+    4, /* movr_f */
+    12,        /* movi_f */
+    4, /* ldr_f */
+    12,        /* ldi_f */
+    8, /* ldxr_f */
+    16,        /* ldxi_f */
+    4, /* str_f */
+    12,        /* sti_f */
+    8, /* stxr_f */
+    16,        /* stxi_f */
+    12,        /* bltr_f */
+    24,        /* blti_f */
+    12,        /* bler_f */
+    24,        /* blei_f */
+    12,        /* beqr_f */
+    24,        /* beqi_f */
+    12,        /* bger_f */
+    24,        /* bgei_f */
+    12,        /* bgtr_f */
+    24,        /* bgti_f */
+    12,        /* bner_f */
+    24,        /* bnei_f */
+    16,        /* bunltr_f */
+    28,        /* bunlti_f */
+    16,        /* bunler_f */
+    28,        /* bunlei_f */
+    20,        /* buneqr_f */
+    32,        /* buneqi_f */
+    16,        /* bunger_f */
+    28,        /* bungei_f */
+    12,        /* bungtr_f */
+    24,        /* bungti_f */
+    20,        /* bltgtr_f */
+    32,        /* bltgti_f */
+    12,        /* bordr_f */
+    24,        /* bordi_f */
+    12,        /* bunordr_f */
+    24,        /* bunordi_f */
+    0, /* pushargr_f */
+    0, /* pushargi_f */
+    0, /* retr_f */
+    0, /* reti_f */
+    0, /* retval_f */
+    0, /* arg_d */
+    0, /* getarg_d */
+    0, /* putargr_d */
+    0, /* putargi_d */
+    4, /* addr_d */
+    20,        /* addi_d */
+    4, /* subr_d */
+    20,        /* subi_d */
+    20,        /* rsbi_d */
+    4, /* mulr_d */
+    20,        /* muli_d */
+    4, /* divr_d */
+    20,        /* divi_d */
+    4, /* negr_d */
+    4, /* absr_d */
+    4, /* sqrtr_d */
+    18,        /* ltr_d */
+    34,        /* lti_d */
+    20,        /* ler_d */
+    36,        /* lei_d */
+    18,        /* eqr_d */
+    34,        /* eqi_d */
+    18,        /* ger_d */
+    34,        /* gei_d */
+    18,        /* gtr_d */
+    34,        /* gti_d */
+    18,        /* ner_d */
+    34,        /* nei_d */
+    18,        /* unltr_d */
+    34,        /* unlti_d */
+    18,        /* unler_d */
+    34,        /* unlei_d */
+    24,        /* uneqr_d */
+    40,        /* uneqi_d */
+    18,        /* unger_d */
+    34,        /* ungei_d */
+    18,        /* ungtr_d */
+    34,        /* ungti_d */
+    24,        /* ltgtr_d */
+    40,        /* ltgti_d */
+    18,        /* ordr_d */
+    34,        /* ordi_d */
+    18,        /* unordr_d */
+    34,        /* unordi_d */
+    8, /* truncr_d_i */
+    0, /* truncr_d_l */
+    8, /* extr_d */
+    4, /* extr_f_d */
+    4, /* movr_d */
+    16,        /* movi_d */
+    4, /* ldr_d */
+    12,        /* ldi_d */
+    8, /* ldxr_d */
+    16,        /* ldxi_d */
+    4, /* str_d */
+    12,        /* sti_d */
+    8, /* stxr_d */
+    16,        /* stxi_d */
+    12,        /* bltr_d */
+    28,        /* blti_d */
+    12,        /* bler_d */
+    28,        /* blei_d */
+    12,        /* beqr_d */
+    28,        /* beqi_d */
+    12,        /* bger_d */
+    28,        /* bgei_d */
+    12,        /* bgtr_d */
+    28,        /* bgti_d */
+    12,        /* bner_d */
+    28,        /* bnei_d */
+    16,        /* bunltr_d */
+    32,        /* bunlti_d */
+    16,        /* bunler_d */
+    32,        /* bunlei_d */
+    20,        /* buneqr_d */
+    36,        /* buneqi_d */
+    16,        /* bunger_d */
+    32,        /* bungei_d */
+    12,        /* bungtr_d */
+    28,        /* bungti_d */
+    20,        /* bltgtr_d */
+    36,        /* bltgti_d */
+    12,        /* bordr_d */
+    28,        /* bordi_d */
+    12,        /* bunordr_d */
+    28,        /* bunordi_d */
+    0, /* pushargr_d */
+    0, /* pushargi_d */
+    0, /* retr_d */
+    0, /* reti_d */
+    0, /* retval_d */
+    0, /* movr_w_f */
+    4, /* movr_ww_d */
+    0, /* movr_w_d */
+    0, /* movr_f_w */
+    0, /* movi_f_w */
+    4, /* movr_d_ww */
+    12,        /* movi_d_ww */
+    0, /* movr_d_w */
+    0, /* movi_d_w */
+#endif /* __ARM_PCS_VFP */
+#endif /* __WORDSIZE */
+
+#if __WORDSIZE == 32
+#if !defined(__ARM_PCS_VFP)
+#define JIT_INSTR_MAX 160
+    0, /* data */
+    0, /* live */
+    2, /* align */
+    0, /* save */
+    0, /* load */
+    2, /* #name */
+    0, /* #note */
+    0, /* label */
+    30,        /* prolog */
+    0, /* ellipsis */
+    0, /* va_push */
+    0, /* allocai */
+    0, /* allocar */
+    0, /* arg */
+    0, /* getarg_c */
+    0, /* getarg_uc */
+    0, /* getarg_s */
+    0, /* getarg_us */
+    0, /* getarg_i */
+    0, /* getarg_ui */
+    0, /* getarg_l */
+    0, /* putargr */
+    0, /* putargi */
+    4, /* va_start */
+    8, /* va_arg */
+    28,        /* va_arg_d */
+    0, /* va_end */
+    4, /* addr */
+    12,        /* addi */
+    4, /* addcr */
+    8, /* addci */
+    4, /* addxr */
+    4, /* addxi */
+    4, /* subr */
+    12,        /* subi */
+    4, /* subcr */
+    8, /* subci */
+    4, /* subxr */
+    4, /* subxi */
+    16,        /* rsbi */
+    8, /* mulr */
+    12,        /* muli */
+    4, /* qmulr */
+    12,        /* qmuli */
+    4, /* qmulr_u */
+    8, /* qmuli_u */
+    40,        /* divr */
+    48,        /* divi */
+    40,        /* divr_u */
+    44,        /* divi_u */
+    34,        /* qdivr */
+    38,        /* qdivi */
+    34,        /* qdivr_u */
+    38,        /* qdivi_u */
+    40,        /* remr */
+    48,        /* remi */
+    40,        /* remr_u */
+    44,        /* remi_u */
+    4, /* andr */
+    12,        /* andi */
+    4, /* orr */
+    12,        /* ori */
+    4, /* xorr */
+    12,        /* xori */
+    4, /* lshr */
+    4, /* lshi */
+    4, /* rshr */
+    4, /* rshi */
+    4, /* rshr_u */
+    4, /* rshi_u */
+    4, /* negr */
+    4, /* comr */
+    14,        /* ltr */
+    14,        /* lti */
+    14,        /* ltr_u */
+    14,        /* lti_u */
+    14,        /* ler */
+    14,        /* lei */
+    14,        /* ler_u */
+    14,        /* lei_u */
+    14,        /* eqr */
+    14,        /* eqi */
+    14,        /* ger */
+    14,        /* gei */
+    14,        /* ger_u */
+    14,        /* gei_u */
+    14,        /* gtr */
+    14,        /* gti */
+    14,        /* gtr_u */
+    14,        /* gti_u */
+    14,        /* ner */
+    14,        /* nei */
+    4, /* movr */
+    8, /* movi */
+    8, /* extr_c */
+    4, /* extr_uc */
+    8, /* extr_s */
+    8, /* extr_us */
+    0, /* extr_i */
+    0, /* extr_ui */
+    20,        /* htonr_us */
+    16,        /* htonr_ui */
+    0, /* htonr_ul */
+    4, /* ldr_c */
+    12,        /* ldi_c */
+    4, /* ldr_uc */
+    12,        /* ldi_uc */
+    4, /* ldr_s */
+    12,        /* ldi_s */
+    4, /* ldr_us */
+    12,        /* ldi_us */
+    4, /* ldr_i */
+    12,        /* ldi_i */
+    0, /* ldr_ui */
+    0, /* ldi_ui */
+    0, /* ldr_l */
+    0, /* ldi_l */
+    4, /* ldxr_c */
+    12,        /* ldxi_c */
+    4, /* ldxr_uc */
+    12,        /* ldxi_uc */
+    4, /* ldxr_s */
+    12,        /* ldxi_s */
+    4, /* ldxr_us */
+    12,        /* ldxi_us */
+    4, /* ldxr_i */
+    12,        /* ldxi_i */
+    0, /* ldxr_ui */
+    0, /* ldxi_ui */
+    0, /* ldxr_l */
+    0, /* ldxi_l */
+    4, /* str_c */
+    12,        /* sti_c */
+    4, /* str_s */
+    12,        /* sti_s */
+    4, /* str_i */
+    12,        /* sti_i */
+    0, /* str_l */
+    0, /* sti_l */
+    4, /* stxr_c */
+    12,        /* stxi_c */
+    4, /* stxr_s */
+    12,        /* stxi_s */
+    4, /* stxr_i */
+    12,        /* stxi_i */
+    0, /* stxr_l */
+    0, /* stxi_l */
+    8, /* bltr */
+    8, /* blti */
+    8, /* bltr_u */
+    8, /* blti_u */
+    8, /* bler */
+    8, /* blei */
+    8, /* bler_u */
+    8, /* blei_u */
+    8, /* beqr */
+    16,        /* beqi */
+    8, /* bger */
+    8, /* bgei */
+    8, /* bger_u */
+    8, /* bgei_u */
+    8, /* bgtr */
+    8, /* bgti */
+    8, /* bgtr_u */
+    8, /* bgti_u */
+    8, /* bner */
+    16,        /* bnei */
+    8, /* bmsr */
+    8, /* bmsi */
+    8, /* bmcr */
+    8, /* bmci */
+    8, /* boaddr */
+    8, /* boaddi */
+    8, /* boaddr_u */
+    8, /* boaddi_u */
+    8, /* bxaddr */
+    8, /* bxaddi */
+    8, /* bxaddr_u */
+    8, /* bxaddi_u */
+    8, /* bosubr */
+    8, /* bosubi */
+    8, /* bosubr_u */
+    8, /* bosubi_u */
+    8, /* bxsubr */
+    8, /* bxsubi */
+    8, /* bxsubr_u */
+    8, /* bxsubi_u */
+    12,        /* jmpr */
+    72,        /* jmpi */
+    4, /* callr */
+    20,        /* calli */
+    0, /* prepare */
+    0, /* pushargr */
+    0, /* pushargi */
+    0, /* finishr */
+    0, /* finishi */
+    0, /* ret */
+    0, /* retr */
+    0, /* reti */
+    0, /* retval_c */
+    0, /* retval_uc */
+    0, /* retval_s */
+    0, /* retval_us */
+    0, /* retval_i */
+    0, /* retval_ui */
+    0, /* retval_l */
+    160,       /* epilog */
+    0, /* arg_f */
+    0, /* getarg_f */
+    0, /* putargr_f */
+    0, /* putargi_f */
+    40,        /* addr_f */
+    40,        /* addi_f */
+    40,        /* subr_f */
+    40,        /* subi_f */
+    40,        /* rsbi_f */
+    40,        /* mulr_f */
+    40,        /* muli_f */
+    40,        /* divr_f */
+    40,        /* divi_f */
+    12,        /* negr_f */
+    12,        /* absr_f */
+    36,        /* sqrtr_f */
+    40,        /* ltr_f */
+    44,        /* lti_f */
+    40,        /* ler_f */
+    44,        /* lei_f */
+    40,        /* eqr_f */
+    44,        /* eqi_f */
+    40,        /* ger_f */
+    44,        /* gei_f */
+    40,        /* gtr_f */
+    44,        /* gti_f */
+    44,        /* ner_f */
+    48,        /* nei_f */
+    72,        /* unltr_f */
+    80,        /* unlti_f */
+    72,        /* unler_f */
+    80,        /* unlei_f */
+    72,        /* uneqr_f */
+    80,        /* uneqi_f */
+    72,        /* unger_f */
+    80,        /* ungei_f */
+    72,        /* ungtr_f */
+    80,        /* ungti_f */
+    76,        /* ltgtr_f */
+    84,        /* ltgti_f */
+    44,        /* ordr_f */
+    48,        /* ordi_f */
+    72,        /* unordr_f */
+    80,        /* unordi_f */
+    36,        /* truncr_f_i */
+    0, /* truncr_f_l */
+    36,        /* extr_f */
+    38,        /* extr_d_f */
+    8, /* movr_f */
+    12,        /* movi_f */
+    8, /* ldr_f */
+    16,        /* ldi_f */
+    8, /* ldxr_f */
+    16,        /* ldxi_f */
+    8, /* str_f */
+    16,        /* sti_f */
+    8, /* stxr_f */
+    16,        /* stxi_f */
+    44,        /* bltr_f */
+    48,        /* blti_f */
+    44,        /* bler_f */
+    48,        /* blei_f */
+    44,        /* beqr_f */
+    52,        /* beqi_f */
+    44,        /* bger_f */
+    48,        /* bgei_f */
+    44,        /* bgtr_f */
+    48,        /* bgti_f */
+    44,        /* bner_f */
+    48,        /* bnei_f */
+    44,        /* bunltr_f */
+    48,        /* bunlti_f */
+    44,        /* bunler_f */
+    48,        /* bunlei_f */
+    76,        /* buneqr_f */
+    84,        /* buneqi_f */
+    44,        /* bunger_f */
+    48,        /* bungei_f */
+    44,        /* bungtr_f */
+    48,        /* bungti_f */
+    76,        /* bltgtr_f */
+    84,        /* bltgti_f */
+    44,        /* bordr_f */
+    48,        /* bordi_f */
+    44,        /* bunordr_f */
+    48,        /* bunordi_f */
+    0, /* pushargr_f */
+    0, /* pushargi_f */
+    0, /* retr_f */
+    0, /* reti_f */
+    0, /* retval_f */
+    0, /* arg_d */
+    0, /* getarg_d */
+    0, /* putargr_d */
+    0, /* putargi_d */
+    50,        /* addr_d */
+    52,        /* addi_d */
+    50,        /* subr_d */
+    52,        /* subi_d */
+    52,        /* rsbi_d */
+    50,        /* mulr_d */
+    52,        /* muli_d */
+    50,        /* divr_d */
+    52,        /* divi_d */
+    20,        /* negr_d */
+    20,        /* absr_d */
+    42,        /* sqrtr_d */
+    44,        /* ltr_d */
+    48,        /* lti_d */
+    44,        /* ler_d */
+    48,        /* lei_d */
+    44,        /* eqr_d */
+    48,        /* eqi_d */
+    44,        /* ger_d */
+    48,        /* gei_d */
+    44,        /* gtr_d */
+    48,        /* gti_d */
+    48,        /* ner_d */
+    52,        /* nei_d */
+    82,        /* unltr_d */
+    88,        /* unlti_d */
+    82,        /* unler_d */
+    88,        /* unlei_d */
+    82,        /* uneqr_d */
+    88,        /* uneqi_d */
+    82,        /* unger_d */
+    88,        /* ungei_d */
+    82,        /* ungtr_d */
+    88,        /* ungti_d */
+    86,        /* ltgtr_d */
+    92,        /* ltgti_d */
+    48,        /* ordr_d */
+    52,        /* ordi_d */
+    82,        /* unordr_d */
+    88,        /* unordi_d */
+    36,        /* truncr_d_i */
+    0, /* truncr_d_l */
+    36,        /* extr_d */
+    38,        /* extr_f_d */
+    16,        /* movr_d */
+    20,        /* movi_d */
+    16,        /* ldr_d */
+    24,        /* ldi_d */
+    20,        /* ldxr_d */
+    28,        /* ldxi_d */
+    16,        /* str_d */
+    24,        /* sti_d */
+    20,        /* stxr_d */
+    28,        /* stxi_d */
+    48,        /* bltr_d */
+    52,        /* blti_d */
+    48,        /* bler_d */
+    52,        /* blei_d */
+    48,        /* beqr_d */
+    60,        /* beqi_d */
+    48,        /* bger_d */
+    52,        /* bgei_d */
+    48,        /* bgtr_d */
+    52,        /* bgti_d */
+    48,        /* bner_d */
+    52,        /* bnei_d */
+    48,        /* bunltr_d */
+    52,        /* bunlti_d */
+    48,        /* bunler_d */
+    52,        /* bunlei_d */
+    84,        /* buneqr_d */
+    92,        /* buneqi_d */
+    48,        /* bunger_d */
+    52,        /* bungei_d */
+    48,        /* bungtr_d */
+    52,        /* bungti_d */
+    84,        /* bltgtr_d */
+    92,        /* bltgti_d */
+    48,        /* bordr_d */
+    52,        /* bordi_d */
+    48,        /* bunordr_d */
+    52,        /* bunordi_d */
+    0, /* pushargr_d */
+    0, /* pushargi_d */
+    0, /* retr_d */
+    0, /* reti_d */
+    0, /* retval_d */
+    4, /* movr_w_f */
+    8, /* movr_ww_d */
+    0, /* movr_w_d */
+    8, /* movr_f_w */
+    8, /* movi_f_w */
+    16,        /* movr_d_ww */
+    12,        /* movi_d_ww */
+    0, /* movr_d_w */
+    0, /* movi_d_w */
+#endif /* __ARM_PCS_VFP */
+#endif /* __WORDSIZE */
diff --git a/deps/lightning/lib/jit_arm-vfp.c b/deps/lightning/lib/jit_arm-vfp.c
new file mode 100644 (file)
index 0000000..743a3ef
--- /dev/null
@@ -0,0 +1,2330 @@
+/*
+ * Copyright (C) 2012-2019  Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+#if PROTO
+/* as per vfp_regno macro, required due to "support" to soft float registers
+ * or using integer registers as arguments to float operations */
+#  define _D8_REGNO                    32
+#  define ARM_V_Q                      0x00000040
+#  define FPSCR_N                      0x80000000 /* Negative flag */
+#  define FPSCR_Z                      0x40000000 /* Zero flag */
+#  define FPSCR_C                      0x20000000 /* Carry flag */
+#  define FPSCR_V                      0x10000000 /* Overflow flag */
+#  define FPSCR_QC                     0x08000000 /* Cumulative saturation */
+#  define FPSCR_AHP                    0x04000000 /* Alt. half-precision */
+#  define FPSCR_DN                     0x02000000 /* Default NaN mode */
+#  define FPSCR_FZ                     0x01000000 /* Flush to zero */
+#  define FPSCR_RMASK                  0x00c00000
+#    define FPSCR_RN                   0x00000000 /* Round to Nearest */
+#    define FPSCR_RP                   0x00400000 /* Round to Plus Infinity */
+#    define FPSCR_RM                   0x00800000 /* Round to Minus Infinity */
+#    define FPSCR_RZ                   0x00c00000 /* Round towards Zero */
+#  define FPSCR_STRIDE                 0x00300000
+#  define FPSCR_RES1                   0x00080000 /* Reserved, UNK/SBZP */
+#  define FPSCR_LEN                    0x00070000
+#  define FPSCR_IDE                    0x00008000 /* Input Denormal trap */
+#  define FPSCR_IXE                    0x00001000 /* Inexact trap */
+#  define FPSCR_UFE                    0x00000800 /* Underflow trap */
+#  define FPSCR_OFE                    0x00000400 /* Overflow trap */
+#  define FPSCR_DZE                    0x00000200 /* Division by zero trap */
+#  define FPSCR_IOE                    0x00000100 /* Invalid Operation trap */
+#  define FPSCR_IDC                    0x00000080 /* Input Denormal flag */
+#  define FPSCR_RES0                   0x00000060 /* Reserved, UNK/SBZP */
+#  define FPSCR_IXC                    0x00000010 /* Inexact flag */
+#  define FPSCR_UFC                    0x00000008 /* Underflow flag */
+#  define FPSCR_OFC                    0x00000004 /* Overflow flag */
+#  define FPSCR_DZC                    0x00000002 /* Division by zero flag */
+#  define FPSCR_IOC                    0x00000001 /* Invalid Operation flag */
+#  define ARM_V_E                      0x00000080 /* ARM_VCMP except if NaN */
+#  define ARM_V_Z                      0x00010000 /* ARM_VCMP with zero */
+#  define ARM_V_F64                    0x00000100
+#  define ARM_VADD_F                   0x0e300a00
+#  define ARM_VSUB_F                   0x0e300a40
+#  define ARM_VMUL_F                   0x0e200a00
+#  define ARM_VDIV_F                   0x0e800a00
+#  define ARM_VABS_F                   0x0eb00ac0
+#  define ARM_VNEG_F                   0x0eb10a40
+#  define ARM_VSQRT_F                  0x0eb10ac0
+#  define ARM_VMOV_F                   0x0eb00a40
+#  define ARM_VMOV_A_S                 0x0e100a10 /* vmov rn, sn */
+#  define ARM_VMOV_S_A                 0x0e000a10 /* vmov sn, rn */
+#  define ARM_VMOV_AA_D                        0x0c500b10 /* vmov rn,rn, dn */
+#  define ARM_VMOV_D_AA                        0x0c400b10 /* vmov dn, rn,rn */
+#  define ARM_VCMP                     0x0eb40a40
+#  define ARM_VMRS                     0x0ef10a10
+#  define ARM_VMSR                     0x0ee10a10
+#  define ARM_VCVT_2I                  0x00040000 /* to integer */
+#  define ARM_VCVT_2S                  0x00010000 /* to signed */
+#  define ARM_VCVT_RS                  0x00000080 /* round to zero or signed */
+#  define ARM_VCVT                     0x0eb80a40
+#  define ARM_VCVT_S32_F32             ARM_VCVT|ARM_VCVT_2I|ARM_VCVT_2S|ARM_VCVT_RS
+#  define ARM_VCVT_U32_F32             ARM_VCVT|ARM_VCVT_2I|ARM_VCVT_RS
+#  define ARM_VCVT_S32_F64             ARM_VCVT|ARM_VCVT_2I|ARM_VCVT_2S|ARM_VCVT_RS|ARM_V_F64
+#  define ARM_VCVT_U32_F64             ARM_VCVT|ARM_VCVT_2I|ARM_VCVT_RS|ARM_V_F64
+#  define ARM_VCVT_F32_S32             ARM_VCVT|ARM_VCVT_RS
+#  define ARM_VCVT_F32_U32             ARM_VCVT
+#  define ARM_VCVT_F64_S32             ARM_VCVT|ARM_VCVT_RS|ARM_V_F64
+#  define ARM_VCVT_F64_U32             ARM_VCVT|ARM_V_F64
+#  define ARM_VCVT_F                   0x0eb70ac0
+#  define ARM_VCVT_F32_F64             ARM_VCVT_F
+#  define ARM_VCVT_F64_F32             ARM_VCVT_F|ARM_V_F64
+#  define ARM_VCVTR_S32_F32            ARM_VCVT|ARM_VCVT_2I|ARM_VCVT_2S
+#  define ARM_VCVTR_U32_F32            ARM_VCVT|ARM_VCVT_2I
+#  define ARM_VCVTR_S32_F64            ARM_VCVT|ARM_VCVT_2I|ARM_VCVT_2S|ARM_V_F64
+#  define ARM_VCVTR_U32_F64            ARM_VCVT|ARM_VCVT_2I|ARM_V_F64
+#  define ARM_V_D                      0x00400000
+#  define ARM_V_N                      0x00000080
+#  define ARM_V_Q                      0x00000040
+#  define ARM_V_M                      0x00000020
+#  define ARM_V_U                      0x01000000
+#  define ARM_V_I16                    0x00100000
+#  define ARM_V_I32                    0x00200000
+#  define ARM_V_I64                    0x00300000
+#  define ARM_V_S16                    0x00040000
+#  define ARM_V_S32                    0x00080000
+#  define ARM_VADD_I                   0x02000800
+#  define ARM_VQADD_I                  0x02000010 /* set flag on over/carry */
+#  define ARM_VADDL_I                  0x02800000 /* q=d+d */
+#  define ARM_VADDW_I                  0x02800100 /* q=q+d */
+#  define ARM_VSUB_I                   0x03000800
+#  define ARM_VQSUB_I                  0x02000210 /* set flag on over/carry */
+#  define ARM_VSUBL_I                  0x02800200
+#  define ARM_VSUBW_I                  0x02800300
+#  define ARM_VMUL_I                   0x02000910
+#  define ARM_VMULL_I                  0x02800c00
+#  define ARM_VABS_I                   0x03b10300
+#  define ARM_VQABS_I                  0x03b00700 /* sets flag on overflow */
+#  define ARM_VNEG_I                   0x03b10380
+#  define ARM_VQNEG_I                  0x03b00780 /* sets flag on overflow */
+#  define ARM_VAND                     0x02000110
+#  define ARM_VBIC                     0x02100110
+#  define ARM_VORR                     0x02200110
+#  define ARM_VORN                     0x02300110
+#  define ARM_VEOR                     0x03000110
+#  define ARM_VMOVL_S8                 0x00080000
+#  define ARM_VMOVL_S16                        0x00100000
+#  define ARM_VMOVL_S32                        0x00200000
+#  define ARM_VMOVL_I                  0x02800a10
+#  define ARM_VMOVI                    0x02800010
+#  define ARM_VMVNI                    0x02800030
+#  define ARM_VLDR                     0x0d100a00
+#  define ARM_VSTR                     0x0d000a00
+#  define ARM_VM                       0x0c000a00
+#  define ARM_VMOV_ADV_U               0x00800000 /* zero extend */
+#  define ARM_VMOV_ADV_8               0x00400000
+#  define ARM_VMOV_ADV_16              0x00000020
+#  define ARM_VMOV_A_D                 0x0e100b10
+#  define ARM_VMOV_D_A                 0x0e000b10
+
+#  define vodi(oi,r0)                  _vodi(_jit,oi,r0)
+static void _vodi(jit_state_t*,int,int) maybe_unused;
+#  define voqi(oi,r0)                  _voqi(_jit,oi,r0)
+static void _voqi(jit_state_t*,int,int) maybe_unused;
+#  define vo_ss(o,r0,r1)               _cc_vo_ss(_jit,ARM_CC_NV,o,r0,r1)
+#  define cc_vo_ss(cc,o,r0,r1)         _cc_vo_ss(_jit,cc,o,r0,r1)
+static void _cc_vo_ss(jit_state_t*,int,int,int,int);
+#  define vo_dd(o,r0,r1)               _cc_vo_dd(_jit,ARM_CC_NV,o,r0,r1)
+#  define cc_vo_dd(cc,o,r0,r1)         _cc_vo_dd(_jit,cc,o,r0,r1)
+static void _cc_vo_dd(jit_state_t*,int,int,int,int);
+#  define vo_qd(o,r0,r1)               _cc_vo_qd(_jit,ARM_CC_NV,o,r0,r1)
+#  define cc_vo_qd(cc,o,r0,r1)         _cc_vo_qd(_jit,cc,o,r0,r1)
+static void _cc_vo_qd(jit_state_t*,int,int,int,int) maybe_unused;
+#  define vo_qq(o,r0,r1)               _cc_vo_qq(_jit,ARM_CC_NV,o,r0,r1)
+#  define cc_vo_qq(cc,o,r0,r1)         _cc_vo_qq(_jit,cc,o,r0,r1)
+static void _cc_vo_qq(jit_state_t*,int,int,int,int) maybe_unused;
+#  define vorr_(o,r0,r1)               _cc_vorr_(_jit,ARM_CC_NV,o,r0,r1)
+#  define cc_vorr_(cc,o,r0,r1)         _cc_vorr_(_jit,cc,o,r0,r1)
+static void _cc_vorr_(jit_state_t*,int,int,int,int);
+#  define vors_(o,r0,r1)               _cc_vors_(_jit,ARM_CC_NV,o,r0,r1)
+#  define cc_vors_(cc,o,r0,r1)         _cc_vors_(_jit,cc,o,r0,r1)
+static void _cc_vors_(jit_state_t*,int,int,int,int);
+#  define vorv_(o,r0,r1)               _cc_vorv_(_jit,ARM_CC_NV,o,r0,r1)
+#  define cc_vorv_(cc,o,r0,r1)         _cc_vorv_(_jit,cc,o,r0,r1)
+static void _cc_vorv_(jit_state_t*,int,int,int,int) maybe_unused;
+#  define vori_(o,r0,r1)               _cc_vori_(_jit,ARM_CC_NV,o,r0,r1)
+#  define cc_vori_(cc,o,r0,r1)         _cc_vori_(_jit,cc,o,r0,r1)
+static void _cc_vori_(jit_state_t*,int,int,int,int);
+#  define vorrd(o,r0,r1,r2)            _cc_vorrd(_jit,ARM_CC_NV,o,r0,r1,r2)
+#  define cc_vorrd(cc,o,r0,r1,r2)      _cc_vorrd(_jit,cc,o,r0,r1,r2)
+static void _cc_vorrd(jit_state_t*,int,int,int,int,int);
+#  define vosss(o,r0,r1,r2)            _cc_vosss(_jit,ARM_CC_NV,o,r0,r1,r2)
+#  define cc_vosss(cc,o,r0,r1,r2)      _cc_vosss(_jit,cc,o,r0,r1,r2)
+static void _cc_vosss(jit_state_t*,int,int,int,int,int);
+#  define voddd(o,r0,r1,r2)            _cc_voddd(_jit,ARM_CC_NV,o,r0,r1,r2)
+#  define cc_voddd(cc,o,r0,r1,r2)      _cc_voddd(_jit,cc,o,r0,r1,r2)
+static void _cc_voddd(jit_state_t*,int,int,int,int,int);
+#  define voqdd(o,r0,r1,r2)            _cc_voqdd(_jit,ARM_CC_NV,o,r0,r1,r2)
+#  define cc_voqdd(cc,o,r0,r1,r2)      _cc_voqdd(_jit,cc,o,r0,r1,r2)
+static void _cc_voqdd(jit_state_t*,int,int,int,int,int) maybe_unused;
+#  define voqqd(o,r0,r1,r2)            _cc_voqqd(_jit,ARM_CC_NV,o,r0,r1,r2)
+#  define cc_voqqd(cc,o,r0,r1,r2)      _cc_voqqd(_jit,cc,o,r0,r1,r2)
+static void _cc_voqqd(jit_state_t*,int,int,int,int,int) maybe_unused;
+#  define voqqq(o,r0,r1,r2)            _cc_voqqq(_jit,ARM_CC_NV,o,r0,r1,r2)
+#  define cc_voqqq(cc,o,r0,r1,r2)      _cc_voqqq(_jit,cc,o,r0,r1,r2)
+static void _cc_voqqq(jit_state_t*,int,int,int,int,int) maybe_unused;
+#  define cc_vldst(cc,o,r0,r1,i0)      _cc_vldst(_jit,cc,o,r0,r1,i0)
+static void _cc_vldst(jit_state_t*,int,int,int,int,int);
+#  define cc_vorsl(cc,o,r0,r1,i0)      _cc_vorsl(_jit,cc,o,r0,r1,i0)
+static void _cc_vorsl(jit_state_t*,int,int,int,int,int);
+#  define CC_VADD_F32(cc,r0,r1,r2)     cc_vosss(cc,ARM_VADD_F,r0,r1,r2)
+#  define VADD_F32(r0,r1,r2)           CC_VADD_F32(ARM_CC_AL,r0,r1,r2)
+#  define CC_VADD_F64(cc,r0,r1,r2)     cc_voddd(cc,ARM_VADD_F|ARM_V_F64,r0,r1,r2)
+#  define VADD_F64(r0,r1,r2)           CC_VADD_F64(ARM_CC_AL,r0,r1,r2)
+#  define CC_VSUB_F32(cc,r0,r1,r2)     cc_vosss(cc,ARM_VSUB_F,r0,r1,r2)
+#  define VSUB_F32(r0,r1,r2)           CC_VSUB_F32(ARM_CC_AL,r0,r1,r2)
+#  define CC_VSUB_F64(cc,r0,r1,r2)     cc_voddd(cc,ARM_VSUB_F|ARM_V_F64,r0,r1,r2)
+#  define VSUB_F64(r0,r1,r2)           CC_VSUB_F64(ARM_CC_AL,r0,r1,r2)
+#  define CC_VMUL_F32(cc,r0,r1,r2)     cc_vosss(cc,ARM_VMUL_F,r0,r1,r2)
+#  define VMUL_F32(r0,r1,r2)           CC_VMUL_F32(ARM_CC_AL,r0,r1,r2)
+#  define CC_VMUL_F64(cc,r0,r1,r2)     cc_voddd(cc,ARM_VMUL_F|ARM_V_F64,r0,r1,r2)
+#  define VMUL_F64(r0,r1,r2)           CC_VMUL_F64(ARM_CC_AL,r0,r1,r2)
+#  define CC_VDIV_F32(cc,r0,r1,r2)     cc_vosss(cc,ARM_VDIV_F,r0,r1,r2)
+#  define VDIV_F32(r0,r1,r2)           CC_VDIV_F32(ARM_CC_AL,r0,r1,r2)
+#  define CC_VDIV_F64(cc,r0,r1,r2)     cc_voddd(cc,ARM_VDIV_F|ARM_V_F64,r0,r1,r2)
+#  define VDIV_F64(r0,r1,r2)           CC_VDIV_F64(ARM_CC_AL,r0,r1,r2)
+#  define CC_VABS_F32(cc,r0,r1)                cc_vo_ss(cc,ARM_VABS_F,r0,r1)
+#  define VABS_F32(r0,r1)              CC_VABS_F32(ARM_CC_AL,r0,r1)
+#  define CC_VABS_F64(cc,r0,r1)                cc_vo_dd(cc,ARM_VABS_F|ARM_V_F64,r0,r1)
+#  define VABS_F64(r0,r1)              CC_VABS_F64(ARM_CC_AL,r0,r1)
+#  define CC_VNEG_F32(cc,r0,r1)                cc_vo_ss(cc,ARM_VNEG_F,r0,r1)
+#  define VNEG_F32(r0,r1)              CC_VNEG_F32(ARM_CC_AL,r0,r1)
+#  define CC_VNEG_F64(cc,r0,r1)                cc_vo_dd(cc,ARM_VNEG_F|ARM_V_F64,r0,r1)
+#  define VNEG_F64(r0,r1)              CC_VNEG_F64(ARM_CC_AL,r0,r1)
+#  define CC_VSQRT_F32(cc,r0,r1)       cc_vo_ss(cc,ARM_VSQRT_F,r0,r1)
+#  define VSQRT_F32(r0,r1)             CC_VSQRT_F32(ARM_CC_AL,r0,r1)
+#  define CC_VSQRT_F64(cc,r0,r1)       cc_vo_dd(cc,ARM_VSQRT_F|ARM_V_F64,r0,r1)
+#  define VSQRT_F64(r0,r1)             CC_VSQRT_F64(ARM_CC_AL,r0,r1)
+#  define CC_VMOV_F32(cc,r0,r1)                cc_vo_ss(cc,ARM_VMOV_F,r0,r1)
+#  define VMOV_F32(r0,r1)              CC_VMOV_F32(ARM_CC_AL,r0,r1)
+#  define CC_VMOV_F64(cc,r0,r1)                cc_vo_dd(cc,ARM_VMOV_F|ARM_V_F64,r0,r1)
+#  define VMOV_F64(r0,r1)              CC_VMOV_F64(ARM_CC_AL,r0,r1)
+#  define CC_VMOV_AA_D(cc,r0,r1,r2)    cc_vorrd(cc,ARM_VMOV_AA_D,r0,r1,r2)
+#  define VMOV_AA_D(r0,r1,r2)          CC_VMOV_AA_D(ARM_CC_AL,r0,r1,r2)
+#  define CC_VMOV_D_AA(cc,r0,r1,r2)    cc_vorrd(cc,ARM_VMOV_D_AA,r1,r2,r0)
+#  define VMOV_D_AA(r0,r1,r2)          CC_VMOV_D_AA(ARM_CC_AL,r0,r1,r2)
+#  define CC_VMOV_A_S(cc,r0,r1)                cc_vors_(cc,ARM_VMOV_A_S,r0,r1)
+#  define VMOV_A_S(r0,r1)              CC_VMOV_A_S(ARM_CC_AL,r0,r1)
+#  define CC_VMOV_S_A(cc,r0,r1)                cc_vors_(cc,ARM_VMOV_S_A,r1,r0)
+#  define VMOV_S_A(r0,r1)              CC_VMOV_S_A(ARM_CC_AL,r0,r1)
+#  define CC_VCMP_F32(cc,r0,r1)                cc_vo_ss(cc,ARM_VCMP,r0,r1)
+#  define VCMP_F32(r0,r1)              CC_VCMP_F32(ARM_CC_AL,r0,r1)
+#  define CC_VCMP_F64(cc,r0,r1)                cc_vo_dd(cc,ARM_VCMP|ARM_V_F64,r0,r1)
+#  define VCMP_F64(r0,r1)              CC_VCMP_F64(ARM_CC_AL,r0,r1)
+#  define CC_VCMPE_F32(cc,r0,r1)       cc_vo_ss(cc,ARM_VCMP|ARM_V_E,r0,r1)
+#  define VCMPE_F32(r0,r1)             CC_VCMPE_F32(ARM_CC_AL,r0,r1)
+#  define CC_VCMPE_F64(cc,r0,r1)       cc_vo_dd(cc,ARM_VCMP|ARM_V_E|ARM_V_F64,r0,r1)
+#  define VCMPE_F64(r0,r1)             CC_VCMPE_F64(ARM_CC_AL,r0,r1)
+#  define CC_VCMPZ_F32(cc,r0)          cc_vo_ss(cc,ARM_VCMP|ARM_V_Z,r0,0)
+#  define VCMPZ_F32(r0)                        CC_VCMPZ_F32(ARM_CC_AL,r0)
+#  define CC_VCMPZ_F64(cc,r0)          cc_vo_dd(cc,ARM_VCMP|ARM_V_Z|ARM_V_F64,r0,0)
+#  define VCMPZ_F64(r0)                        CC_VCMPZ_F64(ARM_CC_AL,r0)
+#  define CC_VCMPEZ_F32(cc,r0)         cc_vo_ss(cc,ARM_VCMP|ARM_V_Z|ARM_V_E,r0,0)
+#  define VCMPEZ_F32(r0)               CC_VCMPEZ_F32(ARM_CC_AL,r0)
+#  define CC_VCMPEZ_F64(cc,r0)         cc_vo_dd(cc,ARM_VCMP|ARM_V_Z|ARM_V_E|ARM_V_F64,r0,0)
+#  define VCMPEZ_F64(r0)               CC_VCMPEZ_F64(ARM_CC_AL,r0)
+#  define CC_VMRS(cc,r0)               cc_vorr_(cc,ARM_VMRS,r0,0)
+#  define VMRS(r0)                     CC_VMRS(ARM_CC_AL,r0)
+#  define CC_VMSR(cc,r0)               cc_vorr_(cc,ARM_VMSR,r0,0)
+#  define VMSR(r0)                     CC_VMSR(ARM_CC_AL,r0)
+#  define CC_VCVT_S32_F32(cc,r0,r1)    cc_vo_ss(cc,ARM_VCVT_S32_F32,r0,r1)
+#  define VCVT_S32_F32(r0,r1)          CC_VCVT_S32_F32(ARM_CC_AL,r0,r1)
+#  define CC_VCVT_U32_F32(cc,r0,r1)    cc_vo_ss(cc,ARM_VCVT_U32_F32,r0,r1)
+#  define VCVT_U32_F32(r0,r1)          CC_VCVT_U32_F32(ARM_CC_AL,r0,r1)
+#  define CC_VCVT_S32_F64(cc,r0,r1)    cc_vo_ss(cc,ARM_VCVT_S32_F64,r0,r1)
+#  define VCVT_S32_F64(r0,r1)          CC_VCVT_S32_F64(ARM_CC_AL,r0,r1)
+#  define CC_VCVT_U32_F64(cc,r0,r1)    cc_vo_ss(cc,ARM_VCVT_U32_F64,r0,r1)
+#  define VCVT_U32_F64(r0,r1)          CC_VCVT_U32_F64(ARM_CC_AL,r0,r1)
+#  define CC_VCVT_F32_S32(cc,r0,r1)    cc_vo_ss(cc,ARM_VCVT_F32_S32,r0,r1)
+#  define VCVT_F32_S32(r0,r1)          CC_VCVT_F32_S32(ARM_CC_AL,r0,r1)
+#  define CC_VCVT_F32_U32(cc,r0,r1)    cc_vo_ss(cc,ARM_VCVT_F32_U32,r0,r1)
+#  define VCVT_F32_U32(r0,r1)          CC_VCVT_F32_U32(ARM_CC_AL,r0,r1)
+#  define CC_VCVT_F64_S32(cc,r0,r1)    cc_vo_ss(cc,ARM_VCVT_F64_S32,r0,r1)
+#  define VCVT_F64_S32(r0,r1)          CC_VCVT_F64_S32(ARM_CC_AL,r0,r1)
+#  define CC_VCVT_F64_U32(cc,r0,r1)    cc_vo_ss(cc,ARM_VCVT_F64_U32,r0,r1)
+#  define VCVT_F64_U32(r0,r1)          CC_VCVT_F64_U32(ARM_CC_AL,r0,r1)
+#  define CC_VCVT_F32_F64(cc,r0,r1)    cc_vo_ss(cc,ARM_VCVT_F32_F64,r0,r1)
+#  define VCVT_F32_F64(r0,r1)          CC_VCVT_F32_F64(ARM_CC_AL,r0,r1)
+#  define CC_VCVT_F64_F32(cc,r0,r1)    cc_vo_ss(cc,ARM_VCVT_F64_F32,r0,r1)
+#  define VCVT_F64_F32(r0,r1)          CC_VCVT_F64_F32(ARM_CC_AL,r0,r1)
+#  define CC_VCVTR_S32_F32(cc,r0,r1)   cc_vo_ss(cc,ARM_VCVTR_S32_F32,r0,r1)
+#  define VCVTR_S32_F32(r0,r1)         CC_VCVTR_S32_F32(ARM_CC_AL,r0,r1)
+#  define CC_VCVTR_U32_F32(cc,r0,r1)   cc_vo_ss(cc,ARM_VCVTR_U32_F32,r0,r1)
+#  define VCVTR_U32_F32(r0,r1)         CC_VCVTR_U32_F32(ARM_CC_AL,r0,r1)
+#  define CC_VCVTR_S32_F64(cc,r0,r1)   cc_vo_ss(cc,ARM_VCVTR_S32_F64,r0,r1)
+#  define VCVTR_S32_F64(r0,r1)         CC_VCVTR_S32_F64(ARM_CC_AL,r0,r1)
+#  define CC_VCVTR_U32_F64(cc,r0,r1)   cc_vo_ss(cc,ARM_VCVTR_U32_F64,r0,r1)
+#  define VCVTR_U32_F64(r0,r1)         CC_VCVTR_U32_F64(ARM_CC_AL,r0,r1)
+#  define CC_VLDMIA_F32(cc,r0,r1,i0)   cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_I,r0,r1,i0)
+#  define VLDMIA_F32(r0,r1,i0)         CC_VLDMIA_F32(ARM_CC_AL,r0,r1,i0)
+#  define CC_VLDMIA_F64(cc,r0,r1,i0)   cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_I|ARM_V_F64,r0,r1,i0)
+#  define VLDMIA_F64(r0,r1,i0)         CC_VLDMIA_F64(ARM_CC_AL,r0,r1,i0)
+#  define CC_VSTMIA_F32(cc,r0,r1,i0)   cc_vorsl(cc,ARM_VM|ARM_M_I,r0,r1,i0)
+#  define VSTMIA_F32(r0,r1,i0)         CC_VSTMIA_F32(ARM_CC_AL,r0,r1,i0)
+#  define CC_VSTMIA_F64(cc,r0,r1,i0)   cc_vorsl(cc,ARM_VM|ARM_M_I|ARM_V_F64,r0,r1,i0)
+#  define VSTMIA_F64(r0,r1,i0)         CC_VSTMIA_F64(ARM_CC_AL,r0,r1,i0)
+#  define CC_VLDMIA_U_F32(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_I|ARM_M_U,r0,r1,i0)
+#  define VLDMIA_U_F32(r0,r1,i0)       CC_VLDMIA_U_F32(ARM_CC_AL,r0,r1,i0)
+#  define CC_VLDMIA_U_F64(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_I|ARM_M_U|ARM_V_F64,r0,r1,i0)
+#  define VLDMIA_U_F64(r0,r1,i0)       CC_VLDMIA_U_F64(ARM_CC_AL,r0,r1,i0)
+#  define CC_VSTMIA_U_F32(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_I|ARM_M_U,r0,r1,i0)
+#  define VSTMIA_U_F32(r0,r1,i0)       CC_VSTMIA_U_F32(ARM_CC_AL,r0,r1,i0)
+#  define CC_VSTMIA_U_F64(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_I|ARM_M_U|ARM_V_F64,r0,r1,i0)
+#  define VSTMIA_U_F64(r0,r1,i0)       CC_VSTMIA_U_F64(ARM_CC_AL,r0,r1,i0)
+#  define CC_VLDMDB_U_F32(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_B|ARM_M_U,r0,r1,i0)
+#  define VLDMDB_U_F32(r0,r1,i0)       CC_VLDMDB_U_F32(ARM_CC_AL,r0,r1,i0)
+#  define CC_VLDMDB_U_F64(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_B|ARM_M_U|ARM_V_F64,r0,r1,i0)
+#  define VLDMDB_U_F64(r0,r1,i0)       CC_VLDMDB_U_F64(ARM_CC_AL,r0,r1,i0)
+#  define CC_VSTMDB_U_F32(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_B|ARM_M_U,r0,r1,i0)
+#  define VSTMDB_U_F32(r0,r1,i0)       CC_VSTMDB_U_F32(ARM_CC_AL,r0,r1,i0)
+#  define CC_VSTMDB_U_F64(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_B|ARM_M_U|ARM_V_F64,r0,r1,i0)
+#  define VSTMDB_U_F64(r0,r1,i0)       CC_VSTMDB_U_F64(ARM_CC_AL,r0,r1,i0)
+#  define CC_VPUSH_F32(cc,r0,i0)       CC_VSTMDB_U_F32(cc,_SP_REGNO,r0,i0)
+#  define VPUSH_F32(r0,i0)             CC_VPUSH_F32(ARM_CC_AL,r0,i0)
+#  define CC_VPUSH_F64(cc,r0,i0)       CC_VSTMDB_U_F64(cc,_SP_REGNO,r0,i0)
+#  define VPUSH_F64(r0,i0)             CC_VPUSH_F64(ARM_CC_AL,r0,i0)
+#  define CC_VPOP_F32(cc,r0,i0)                CC_VLDMIA_U_F32(cc,_SP_REGNO,r0,i0)
+#  define VPOP_F32(r0,i0)              CC_VPOP_F32(ARM_CC_AL,r0,i0)
+#  define CC_VPOP_F64(cc,r0,i0)                CC_VLDMIA_U_F64(cc,_SP_REGNO,r0,i0)
+#  define VPOP_F64(r0,i0)              CC_VPOP_F64(ARM_CC_AL,r0,i0)
+#  define CC_VMOV_A_S8(cc,r0,r1)       cc_vorv_(cc,ARM_VMOV_A_D|ARM_VMOV_ADV_8,r0,r1)
+#  define VMOV_A_S8(r0,r1)             CC_VMOV_A_S8(ARM_CC_AL,r0,r1)
+#  define CC_VMOV_A_U8(cc,r0,r1)       cc_vorv_(cc,ARM_VMOV_A_D|ARM_VMOV_ADV_8|ARM_VMOV_ADV_U,r0,r1)
+#  define VMOV_A_U8(r0,r1)             CC_VMOV_A_U8(ARM_CC_AL,r0,r1)
+#  define CC_VMOV_A_S16(cc,r0,r1)      cc_vorv_(cc,ARM_VMOV_A_D|ARM_VMOV_ADV_16,r0,r1)
+#  define VMOV_A_S16(r0,r1)            CC_VMOV_A_S16(ARM_CC_AL,r0,r1)
+#  define CC_VMOV_A_U16(cc,r0,r1)      cc_vorv_(cc,ARM_VMOV_A_D|ARM_VMOV_ADV_16|ARM_VMOV_ADV_U,r0,r1)
+#  define VMOV_A_U16(r0,r1)            CC_VMOV_A_U16(ARM_CC_AL,r0,r1)
+#  define CC_VMOV_A_S32(cc,r0,r1)      cc_vori_(cc,ARM_VMOV_A_D,r0,r1)
+#  define VMOV_A_S32(r0,r1)            CC_VMOV_A_S32(ARM_CC_AL,r0,r1)
+#  define CC_VMOV_A_U32(cc,r0,r1)      cc_vori_(cc,ARM_VMOV_A_D|ARM_VMOV_ADV_U,r0,r1)
+#  define VMOV_A_U32(r0,r1)            CC_VMOV_A_U32(ARM_CC_AL,r0,r1)
+#  define CC_VMOV_V_I8(cc,r0,r1)       cc_vorv_(cc,ARM_VMOV_D_A|ARM_VMOV_ADV_8,r1,r0)
+#  define VMOV_V_I8(r0,r1)             CC_VMOV_V_I8(ARM_CC_AL,r0,r1)
+#  define CC_VMOV_V_I16(cc,r0,r1)      cc_vorv_(cc,ARM_VMOV_D_A|ARM_VMOV_ADV_16,r1,r0)
+#  define VMOV_V_I16(r0,r1)            CC_VMOV_V_I16(ARM_CC_AL,r0,r1)
+#  define CC_VMOV_V_I32(cc,r0,r1)      cc_vori_(cc,ARM_VMOV_D_A,r1,r0)
+#  define VMOV_V_I32(r0,r1)            CC_VMOV_V_I32(ARM_CC_AL,r0,r1)
+#  define VADD_I8(r0,r1,r2)            voddd(ARM_VADD_I,r0,r1,r2)
+#  define VADDQ_I8(r0,r1,r2)           voqqq(ARM_VADD_I|ARM_V_Q,r0,r1,r2)
+#  define VADD_I16(r0,r1,r2)           voddd(ARM_VADD_I|ARM_V_I16,r0,r1,r2)
+#  define VADDQ_I16(r0,r1,r2)          voqqq(ARM_VADD_I|ARM_V_I16|ARM_V_Q,r0,r1,r2)
+#  define VADD_I32(r0,r1,r2)           voddd(ARM_VADD_I|ARM_V_I32,r0,r1,r2)
+#  define VADDQ_I32(r0,r1,r2)          voqqq(ARM_VADD_I|ARM_V_I32|ARM_V_Q,r0,r1,r2)
+#  define VADD_I64(r0,r1,r2)           voddd(ARM_VADD_I|ARM_V_I64,r0,r1,r2)
+#  define VADDQ_I64(r0,r1,r2)          voqqq(ARM_VADD_I|ARM_V_I64|ARM_V_Q,r0,r1,r2)
+#  define VQADD_S8(r0,r1,r2)           voddd(ARM_VQADD_I,r0,r1,r2)
+#  define VQADDQ_S8(r0,r1,r2)          voqqq(ARM_VQADD_I|ARM_V_Q,r0,r1,r2)
+#  define VQADD_U8(r0,r1,r2)           voddd(ARM_VQADD_I|ARM_V_U,r0,r1,r2)
+#  define VQADDQ_U8(r0,r1,r2)          voqqq(ARM_VQADD_I|ARM_V_U|ARM_V_Q,r0,r1,r2)
+#  define VQADD_S16(r0,r1,r2)          voddd(ARM_VQADD_I|ARM_V_I16,r0,r1,r2)
+#  define VQADDQ_S16(r0,r1,r2)         voqqq(ARM_VQADD_I|ARM_V_I16|ARM_V_Q,r0,r1,r2)
+#  define VQADD_U16(r0,r1,r2)          voddd(ARM_VQADD_I|ARM_V_I16|ARM_V_U,r0,r1,r2)
+#  define VQADDQ_U16(r0,r1,r2)         voqqq(ARM_VQADD_I|ARM_V_I16|ARM_V_U|ARM_V_Q,r0,r1,r2)
+#  define VQADD_S32(r0,r1,r2)          voddd(ARM_VQADD_I|ARM_V_I32,r0,r1,r2)
+#  define VQADDQ_S32(r0,r1,r2)         voqqq(ARM_VQADD_I|ARM_V_I32|ARM_V_Q,r0,r1,r2)
+#  define VQADD_U32(r0,r1,r2)          voddd(ARM_VQADD_I|ARM_V_I32|ARM_V_U,r0,r1,r2)
+#  define VQADDQ_U32(r0,r1,r2)         voqqq(ARM_VQADD_I|ARM_V_I32|ARM_V_U|ARM_V_Q,r0,r1,r2)
+#  define VQADD_S64(r0,r1,r2)          voddd(ARM_VQADD_I|ARM_V_I64,r0,r1,r2)
+#  define VQADDQ_S64(r0,r1,r2)         voqqq(ARM_VQADD_I|ARM_V_I64|ARM_V_Q,r0,r1,r2)
+#  define VQADD_U64(r0,r1,r2)          voddd(ARM_VQADD_I|ARM_V_I64|ARM_V_U,r0,r1,r2)
+#  define VQADDQ_U64(r0,r1,r2)         voqqq(ARM_VQADD_I|ARM_V_I64|ARM_V_U|ARM_V_Q,r0,r1,r2)
+#  define VADDL_S8(r0,r1,r2)           voqdd(ARM_VADDL_I,r0,r1,r2)
+#  define VADDL_U8(r0,r1,r2)           voqdd(ARM_VADDL_I|ARM_V_U,r0,r1,r2)
+#  define VADDL_S16(r0,r1,r2)          voqdd(ARM_VADDL_I|ARM_V_I16,r0,r1,r2)
+#  define VADDL_U16(r0,r1,r2)          voqdd(ARM_VADDL_I|ARM_V_I16|ARM_V_U,r0,r1,r2)
+#  define VADDL_S32(r0,r1,r2)          voqdd(ARM_VADDL_I|ARM_V_I32,r0,r1,r2)
+#  define VADDL_U32(r0,r1,r2)          voqdd(ARM_VADDL_I|ARM_V_I32|ARM_V_U,r0,r1,r2)
+#  define VADDW_S8(r0,r1,r2)           voqqd(ARM_VADDW_I,r0,r1,r2)
+#  define VADDW_U8(r0,r1,r2)           voqqd(ARM_VADDW_I|ARM_V_U,r0,r1,r2)
+#  define VADDW_S16(r0,r1,r2)          voqqd(ARM_VADDW_I|ARM_V_I16,r0,r1,r2)
+#  define VADDW_U16(r0,r1,r2)          voqqd(ARM_VADDW_I|ARM_V_I16|ARM_V_U,r0,r1,r2)
+#  define VADDW_S32(r0,r1,r2)          voqqd(ARM_VADDW_I|ARM_V_I32,r0,r1,r2)
+#  define VADDW_U32(r0,r1,r2)          voqqd(ARM_VADDW_I|ARM_V_I32|ARM_V_U,r0,r1,r2)
+#  define VSUB_I8(r0,r1,r2)            voddd(ARM_VSUB_I,r0,r1,r2)
+#  define VSUBQ_I8(r0,r1,r2)           voqqq(ARM_VSUB_I|ARM_V_Q,r0,r1,r2)
+#  define VSUB_I16(r0,r1,r2)           voddd(ARM_VSUB_I|ARM_V_I16,r0,r1,r2)
+#  define VSUBQ_I16(r0,r1,r2)          voqqq(ARM_VSUB_I|ARM_V_I16|ARM_V_Q,r0,r1,r2)
+#  define VSUB_I32(r0,r1,r2)           voddd(ARM_VSUB_I|ARM_V_I32,r0,r1,r2)
+#  define VSUBQ_I32(r0,r1,r2)          voqqq(ARM_VSUB_I|ARM_V_I32|ARM_V_Q,r0,r1,r2)
+#  define VSUB_I64(r0,r1,r2)           voddd(ARM_VSUB_I|ARM_V_I64,r0,r1,r2)
+#  define VSUBQ_I64(r0,r1,r2)          voqqq(ARM_VSUB_I|ARM_V_I64|ARM_V_Q,r0,r1,r2)
+#  define VQSUB_S8(r0,r1,r2)           voddd(ARM_VQSUB_I,r0,r1,r2)
+#  define VQSUBQ_S8(r0,r1,r2)          voqqq(ARM_VQSUB_I|ARM_V_Q,r0,r1,r2)
+#  define VQSUB_U8(r0,r1,r2)           voddd(ARM_VQSUB_I|ARM_V_U,r0,r1,r2)
+#  define VQSUBQ_U8(r0,r1,r2)          voqqq(ARM_VQSUB_I|ARM_V_U|ARM_V_Q,r0,r1,r2)
+#  define VQSUB_S16(r0,r1,r2)          voddd(ARM_VQSUB_I|ARM_V_I16,r0,r1,r2)
+#  define VQSUBQ_S16(r0,r1,r2)         voqqq(ARM_VQSUB_I|ARM_V_I16|ARM_V_Q,r0,r1,r2)
+#  define VQSUB_U16(r0,r1,r2)          voddd(ARM_VQSUB_I|ARM_V_I16|ARM_V_U,r0,r1,r2)
+#  define VQSUBQ_U16(r0,r1,r2)         voqqq(ARM_VQSUB_I|ARM_V_I16|ARM_V_U|ARM_V_Q,r0,r1,r2)
+#  define VQSUB_S32(r0,r1,r2)          voddd(ARM_VQSUB_I|ARM_V_I32,r0,r1,r2)
+#  define VQSUBQ_S32(r0,r1,r2)         voqqq(ARM_VQSUB_I|ARM_V_I32|ARM_V_Q,r0,r1,r2)
+#  define VQSUB_U32(r0,r1,r2)          voddd(ARM_VQSUB_I|ARM_V_I32|ARM_V_U,r0,r1,r2)
+#  define VQSUBQ_U32(r0,r1,r2)         voqqq(ARM_VQSUB_I|ARM_V_I32|ARM_V_U|ARM_V_Q,r0,r1,r2)
+#  define VQSUB_S64(r0,r1,r2)          voddd(ARM_VQSUB_I|ARM_V_I64,r0,r1,r2)
+#  define VQSUBQ_S64(r0,r1,r2)         voqqq(ARM_VQSUB_I|ARM_V_I64|ARM_V_Q,r0,r1,r2)
+#  define VQSUB_U64(r0,r1,r2)          voddd(ARM_VQSUB_I|ARM_V_I64|ARM_V_U,r0,r1,r2)
+#  define VQSUBQ_U64(r0,r1,r2)         voqqq(ARM_VQSUB_I|ARM_V_I64|ARM_V_U|ARM_V_Q,r0,r1,r2)
+#  define VSUBL_S8(r0,r1,r2)           voqdd(ARM_VSUBL_I,r0,r1,r2)
+#  define VSUBL_U8(r0,r1,r2)           voqdd(ARM_VSUBL_I|ARM_V_U,r0,r1,r2)
+#  define VSUBL_S16(r0,r1,r2)          voqdd(ARM_VSUBL_I|ARM_V_I16,r0,r1,r2)
+#  define VSUBL_U16(r0,r1,r2)          voqdd(ARM_VSUBL_I|ARM_V_I16|ARM_V_U,r0,r1,r2)
+#  define VSUBL_S32(r0,r1,r2)          voqdd(ARM_VSUBL_I|ARM_V_I32,r0,r1,r2)
+#  define VSUBL_U32(r0,r1,r2)          voqdd(ARM_VSUBL_I|ARM_V_I32|ARM_V_U,r0,r1,r2)
+#  define VSUBW_S8(r0,r1,r2)           voqqd(ARM_VSUBW_I,r0,r1,r2)
+#  define VSUBW_U8(r0,r1,r2)           voqqd(ARM_VSUBW_I|ARM_V_U,r0,r1,r2)
+#  define VSUBW_S16(r0,r1,r2)          voqqd(ARM_VSUBW_I|ARM_V_I16,r0,r1,r2)
+#  define VSUBW_U16(r0,r1,r2)          voqqd(ARM_VSUBW_I|ARM_V_I16|ARM_V_U,r0,r1,r2)
+#  define VSUBW_S32(r0,r1,r2)          voqqd(ARM_VSUBW_I|ARM_V_I32,r0,r1,r2)
+#  define VSUBW_U32(r0,r1,r2)          voqqd(ARM_VSUBW_I|ARM_V_I32|ARM_V_U,r0,r1,r2)
+#  define VMUL_I8(r0,r1,r2)            voddd(ARM_VMUL_I,r0,r1,r2)
+#  define VMULQ_I8(r0,r1,r2)           voqqq(ARM_VMUL_I|ARM_V_Q,r0,r1,r2)
+#  define VMUL_I16(r0,r1,r2)           voddd(ARM_VMUL_I|ARM_V_I16,r0,r1,r2)
+#  define VMULQ_I16(r0,r1,r2)          voqqq(ARM_VMUL_I|ARM_V_Q|ARM_V_I16,r0,r1,r2)
+#  define VMUL_I32(r0,r1,r2)           voddd(ARM_VMUL_I|ARM_V_I32,r0,r1,r2)
+#  define VMULQ_I32(r0,r1,r2)          voqqq(ARM_VMUL_I|ARM_V_Q|ARM_V_I32,r0,r1,r2)
+#  define VMULL_S8(r0,r1,r2)           voddd(ARM_VMULL_I,r0,r1,r2)
+#  define VMULL_U8(r0,r1,r2)           voqqq(ARM_VMULL_I|ARM_V_U,r0,r1,r2)
+#  define VMULL_S16(r0,r1,r2)          voddd(ARM_VMULL_I|ARM_V_I16,r0,r1,r2)
+#  define VMULL_U16(r0,r1,r2)          voqqq(ARM_VMULL_I|ARM_V_U|ARM_V_I16,r0,r1,r2)
+#  define VMULL_S32(r0,r1,r2)          voddd(ARM_VMULL_I|ARM_V_I32,r0,r1,r2)
+#  define VMULL_U32(r0,r1,r2)          voqqq(ARM_VMULL_I|ARM_V_U|ARM_V_I32,r0,r1,r2)
+#  define VABS_S8(r0,r1)               vo_dd(ARM_VABS_I,r0,r1)
+#  define VABSQ_S8(r0,r1)              vo_qq(ARM_VABS_I|ARM_V_Q,r0,r1)
+#  define VABS_S16(r0,r1)              vo_dd(ARM_VABS_I|ARM_V_S16,r0,r1)
+#  define VABSQ_S16(r0,r1)             vo_qq(ARM_VABS_I|ARM_V_S16|ARM_V_Q,r0,r1)
+#  define VABS_S32(r0,r1)              vo_dd(ARM_VABS_I|ARM_V_S32,r0,r1)
+#  define VABSQ_S32(r0,r1)             vo_qq(ARM_VABS_I|ARM_V_S32|ARM_V_Q,r0,r1)
+#  define VQABS_S8(r0,r1)              vo_dd(ARM_VQABS_I,r0,r1)
+#  define VQABSQ_S8(r0,r1)             vo_qq(ARM_VQABS_I|ARM_V_Q,r0,r1)
+#  define VQABS_S16(r0,r1)             vo_dd(ARM_VQABS_I|ARM_V_S16,r0,r1)
+#  define VQABSQ_S16(r0,r1)            vo_qq(ARM_VQABS_I|ARM_V_S16|ARM_V_Q,r0,r1)
+#  define VQABS_S32(r0,r1)             vo_dd(ARM_VQABS_I|ARM_V_S32,r0,r1)
+#  define VQABSQ_S32(r0,r1)            vo_qq(ARM_VQABS_I|ARM_V_S32|ARM_V_Q,r0,r1)
+#  define VNEG_S8(r0,r1)               vo_dd(ARM_VNEG_I,r0,r1)
+#  define VNEGQ_S8(r0,r1)              vo_qq(ARM_VNEG_I|ARM_V_Q,r0,r1)
+#  define VNEG_S16(r0,r1)              vo_dd(ARM_VNEG_I|ARM_V_S16,r0,r1)
+#  define VNEGQ_S16(r0,r1)             vo_qq(ARM_VNEG_I|ARM_V_S16|ARM_V_Q,r0,r1)
+#  define VNEG_S32(r0,r1)              vo_dd(ARM_VNEG_I|ARM_V_S32,r0,r1)
+#  define VNEGQ_S32(r0,r1)             vo_qq(ARM_VNEG_I|ARM_V_S32|ARM_V_Q,r0,r1)
+#  define VQNEG_S8(r0,r1)              vo_dd(ARM_VQNEG_I,r0,r1)
+#  define VQNEGQ_S8(r0,r1)             vo_qq(ARM_VQNEG_I|ARM_V_Q,r0,r1)
+#  define VQNEG_S16(r0,r1)             vo_dd(ARM_VQNEG_I|ARM_V_S16,r0,r1)
+#  define VQNEGQ_S16(r0,r1)            vo_qq(ARM_VQNEG_I|ARM_V_S16|ARM_V_Q,r0,r1)
+#  define VQNEG_S32(r0,r1)             vo_dd(ARM_VQNEG_I|ARM_V_S32,r0,r1)
+#  define VQNEGQ_S32(r0,r1)            vo_qq(ARM_VQNEG_I|ARM_V_S32|ARM_V_Q,r0,r1)
+#  define VAND(r0,r1,r2)               voddd(ARM_VAND,r0,r1,r2)
+#  define VANDQ(r0,r1,r2)              voqqq(ARM_VAND|ARM_V_Q,r0,r1,r2)
+#  define VBIC(r0,r1,r2)               voddd(ARM_VBIC,r0,r1,r2)
+#  define VBICQ(r0,r1,r2)              voqqq(ARM_VBIC|ARM_V_Q,r0,r1,r2)
+#  define VORR(r0,r1,r2)               voddd(ARM_VORR,r0,r1,r2)
+#  define VORRQ(r0,r1,r2)              voqqq(ARM_VORR|ARM_V_Q,r0,r1,r2)
+#  define VORN(r0,r1,r2)               voddd(ARM_VORN,r0,r1,r2)
+#  define VORNQ(r0,r1,r2)              voqqq(ARM_VORN|ARM_V_Q,r0,r1,r2)
+#  define VEOR(r0,r1,r2)               voddd(ARM_VEOR,r0,r1,r2)
+#  define VEORQ(r0,r1,r2)              voqqq(ARM_VEOR|ARM_V_Q,r0,r1,r2)
+#  define VMOV(r0,r1)                  VORR(r0,r1,r1)
+#  define VMOVQ(r0,r1)                 VORRQ(r0,r1,r1)
+#  define VMOVL_S8(r0,r1)              vo_qd(ARM_VMOVL_I|ARM_VMOVL_S8,r0,r1)
+#  define VMOVL_U8(r0,r1)              vo_qd(ARM_VMOVL_I|ARM_V_U|ARM_VMOVL_S8,r0,r1)
+#  define VMOVL_S16(r0,r1)             vo_qd(ARM_VMOVL_I|ARM_VMOVL_S16,r0,r1)
+#  define VMOVL_U16(r0,r1)             vo_qd(ARM_VMOVL_I|ARM_V_U|ARM_VMOVL_S16,r0,r1)
+#  define VMOVL_S32(r0,r1)             vo_qd(ARM_VMOVL_I|ARM_VMOVL_S32,r0,r1)
+#  define VMOVL_U32(r0,r1)             vo_qd(ARM_VMOVL_I|ARM_V_U|ARM_VMOVL_S32,r0,r1)
+/* "oi" should be the result of encode_vfp_double */
+#  define VIMM(oi,r0)                  vodi(oi,r0)
+#  define VIMMQ(oi,r0)                 voqi(oi|ARM_V_Q,r0)
+/* index is multipled by four */
+#  define CC_VLDRN_F32(cc,r0,r1,i0)    cc_vldst(cc,ARM_VLDR,r0,r1,i0)
+#  define VLDRN_F32(r0,r1,i0)          CC_VLDRN_F32(ARM_CC_AL,r0,r1,i0)
+#  define CC_VLDR_F32(cc,r0,r1,i0)     cc_vldst(cc,ARM_VLDR|ARM_P,r0,r1,i0)
+#  define VLDR_F32(r0,r1,i0)           CC_VLDR_F32(ARM_CC_AL,r0,r1,i0)
+#  define CC_VLDRN_F64(cc,r0,r1,i0)    cc_vldst(cc,ARM_VLDR|ARM_V_F64,r0,r1,i0)
+#  define VLDRN_F64(r0,r1,i0)          CC_VLDRN_F64(ARM_CC_AL,r0,r1,i0)
+#  define CC_VLDR_F64(cc,r0,r1,i0)     cc_vldst(cc,ARM_VLDR|ARM_V_F64|ARM_P,r0,r1,i0)
+#  define VLDR_F64(r0,r1,i0)           CC_VLDR_F64(ARM_CC_AL,r0,r1,i0)
+#  define CC_VSTRN_F32(cc,r0,r1,i0)    cc_vldst(cc,ARM_VSTR,r0,r1,i0)
+#  define VSTRN_F32(r0,r1,i0)          CC_VSTRN_F32(ARM_CC_AL,r0,r1,i0)
+#  define CC_VSTR_F32(cc,r0,r1,i0)     cc_vldst(cc,ARM_VSTR|ARM_P,r0,r1,i0)
+#  define VSTR_F32(r0,r1,i0)           CC_VSTR_F32(ARM_CC_AL,r0,r1,i0)
+#  define CC_VSTRN_F64(cc,r0,r1,i0)    cc_vldst(cc,ARM_VSTR|ARM_V_F64,r0,r1,i0)
+#  define VSTRN_F64(r0,r1,i0)          CC_VSTRN_F64(ARM_CC_AL,r0,r1,i0)
+#  define CC_VSTR_F64(cc,r0,r1,i0)     cc_vldst(cc,ARM_VSTR|ARM_V_F64|ARM_P,r0,r1,i0)
+#  define VSTR_F64(r0,r1,i0)           CC_VSTR_F64(ARM_CC_AL,r0,r1,i0)
+#  define vfp_movr_f(r0,r1)            _vfp_movr_f(_jit,r0,r1)
+static void _vfp_movr_f(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define vfp_movr_d(r0,r1)            _vfp_movr_d(_jit,r0,r1)
+static void _vfp_movr_d(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define vfp_movi_f(r0,i0)            _vfp_movi_f(_jit,r0,i0)
+static void _vfp_movi_f(jit_state_t*,jit_int32_t,jit_float32_t);
+#  define vfp_movi_d(r0,i0)            _vfp_movi_d(_jit,r0,i0)
+static void _vfp_movi_d(jit_state_t*,jit_int32_t,jit_float64_t);
+#  define vfp_extr_f(r0,r1)            _vfp_extr_f(_jit,r0,r1)
+static void _vfp_extr_f(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define vfp_extr_d(r0,r1)            _vfp_extr_d(_jit,r0,r1)
+static void _vfp_extr_d(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define vfp_extr_d_f(r0,r1)          _vfp_extr_d_f(_jit,r0,r1)
+static void _vfp_extr_d_f(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define vfp_extr_f_d(r0,r1)          _vfp_extr_f_d(_jit,r0,r1)
+static void _vfp_extr_f_d(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define vfp_truncr_f_i(r0,r1)                _vfp_truncr_f_i(_jit,r0,r1)
+static void _vfp_truncr_f_i(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define vfp_truncr_d_i(r0,r1)                _vfp_truncr_d_i(_jit,r0,r1)
+static void _vfp_truncr_d_i(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define vfp_absr_f(r0,r1)            VABS_F32(r0,r1)
+#  define vfp_absr_d(r0,r1)            VABS_F64(r0,r1)
+#  define vfp_negr_f(r0,r1)            VNEG_F32(r0,r1)
+#  define vfp_negr_d(r0,r1)            VNEG_F64(r0,r1)
+#  define vfp_sqrtr_f(r0,r1)           VSQRT_F32(r0,r1)
+#  define vfp_sqrtr_d(r0,r1)           VSQRT_F64(r0,r1)
+#  define vfp_addr_f(r0,r1,r2)         VADD_F32(r0,r1,r2)
+#  define vfp_addi_f(r0,r1,i0)         _vfp_addi_f(_jit,r0,r1,i0)
+static void _vfp_addi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
+#  define vfp_addr_d(r0,r1,r2)         VADD_F64(r0,r1,r2)
+#  define vfp_addi_d(r0,r1,i0)         _vfp_addi_d(_jit,r0,r1,i0)
+static void _vfp_addi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
+#  define vfp_subr_f(r0,r1,r2)         VSUB_F32(r0,r1,r2)
+#  define vfp_subi_f(r0,r1,i0)         _vfp_subi_f(_jit,r0,r1,i0)
+static void _vfp_subi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
+#  define vfp_subr_d(r0,r1,r2)         VSUB_F64(r0,r1,r2)
+#  define vfp_subi_d(r0,r1,i0)         _vfp_subi_d(_jit,r0,r1,i0)
+static void _vfp_subi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
+#  define vfp_rsbr_f(r0,r1,r2)         vfp_subr_f(r0,r2,r1)
+#  define vfp_rsbi_f(r0,r1,i0)         _vfp_rsbi_f(_jit,r0,r1,i0)
+static void _vfp_rsbi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
+#  define vfp_rsbr_d(r0,r1,r2)         vfp_subr_d(r0,r2,r1)
+#  define vfp_rsbi_d(r0,r1,i0)         _vfp_rsbi_d(_jit,r0,r1,i0)
+static void _vfp_rsbi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
+#  define vfp_mulr_f(r0,r1,r2)         VMUL_F32(r0,r1,r2)
+#  define vfp_muli_f(r0,r1,i0)         _vfp_muli_f(_jit,r0,r1,i0)
+static void _vfp_muli_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
+#  define vfp_mulr_d(r0,r1,r2)         VMUL_F64(r0,r1,r2)
+#  define vfp_muli_d(r0,r1,i0)         _vfp_muli_d(_jit,r0,r1,i0)
+static void _vfp_muli_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
+#  define vfp_divr_f(r0,r1,r2)         VDIV_F32(r0,r1,r2)
+#  define vfp_divi_f(r0,r1,i0)         _vfp_divi_f(_jit,r0,r1,i0)
+static void _vfp_divi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
+#  define vfp_divr_d(r0,r1,r2)         VDIV_F64(r0,r1,r2)
+#  define vfp_divi_d(r0,r1,i0)         _vfp_divi_d(_jit,r0,r1,i0)
+static void _vfp_divi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
+#  define vfp_cmp_f(r0,r1)             _vfp_cmp_f(_jit,r0,r1)
+static void _vfp_cmp_f(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define vfp_cmp_d(r0,r1)             _vfp_cmp_d(_jit,r0,r1)
+static void _vfp_cmp_d(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define vcmp01_x(c0,c1,r0)           _vcmp01_x(_jit,c0,c1,r0)
+static void _vcmp01_x(jit_state_t*,int,int,jit_int32_t);
+#  define vcmp01_f(c0,c1,r0,r1,r2)     _vcmp01_f(_jit,c0,c1,r0,r1,r2)
+static void _vcmp01_f(jit_state_t*,int,int,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define vcmp01_d(c0,c1,r0,r1,r2)     _vcmp01_d(_jit,c0,c1,r0,r1,r2)
+static void _vcmp01_d(jit_state_t*,int,int,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define vfp_ltr_f(r0,r1,r2)          vcmp01_f(ARM_CC_PL,ARM_CC_MI,r0,r1,r2)
+#  define vfp_lti_f(r0,r1,i0)          _vfp_lti_f(_jit,r0,r1,i0)
+static void _vfp_lti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
+#  define vfp_ltr_d(r0,r1,r2)          vcmp01_d(ARM_CC_PL,ARM_CC_MI,r0,r1,r2)
+#  define vfp_lti_d(r0,r1,i0)          _vfp_lti_d(_jit,r0,r1,i0)
+static void _vfp_lti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
+#  define vfp_ler_f(r0,r1,r2)          vcmp01_f(ARM_CC_HS,ARM_CC_LS,r0,r1,r2)
+#  define vfp_lei_f(r0,r1,i0)          _vfp_lei_f(_jit,r0,r1,i0)
+static void _vfp_lei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
+#  define vfp_ler_d(r0,r1,r2)          vcmp01_d(ARM_CC_HS,ARM_CC_LS,r0,r1,r2)
+#  define vfp_lei_d(r0,r1,i0)          _vfp_lei_d(_jit,r0,r1,i0)
+static void _vfp_lei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
+#  define vfp_eqr_f(r0,r1,r2)          vcmp01_f(ARM_CC_NE,ARM_CC_EQ,r0,r1,r2)
+#  define vfp_eqi_f(r0,r1,i0)          _vfp_eqi_f(_jit,r0,r1,i0)
+static void _vfp_eqi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
+#  define vfp_eqr_d(r0,r1,r2)          vcmp01_d(ARM_CC_NE,ARM_CC_EQ,r0,r1,r2)
+#  define vfp_eqi_d(r0,r1,i0)          _vfp_eqi_d(_jit,r0,r1,i0)
+static void _vfp_eqi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
+#  define vfp_ger_f(r0,r1,r2)          vcmp01_f(ARM_CC_LT,ARM_CC_GE,r0,r1,r2)
+#  define vfp_gei_f(r0,r1,i0)          _vfp_gei_f(_jit,r0,r1,i0)
+static void _vfp_gei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
+#  define vfp_ger_d(r0,r1,r2)          vcmp01_d(ARM_CC_LT,ARM_CC_GE,r0,r1,r2)
+#  define vfp_gei_d(r0,r1,i0)          _vfp_gei_d(_jit,r0,r1,i0)
+static void _vfp_gei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
+#  define vfp_gtr_f(r0,r1,r2)          vcmp01_f(ARM_CC_LE,ARM_CC_GT,r0,r1,r2)
+#  define vfp_gti_f(r0,r1,i0)          _vfp_gti_f(_jit,r0,r1,i0)
+static void _vfp_gti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
+#  define vfp_gtr_d(r0,r1,r2)          vcmp01_d(ARM_CC_LE,ARM_CC_GT,r0,r1,r2)
+#  define vfp_gti_d(r0,r1,i0)          _vfp_gti_d(_jit,r0,r1,i0)
+static void _vfp_gti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
+#  define vfp_ner_f(r0,r1,r2)          vcmp01_f(ARM_CC_EQ,ARM_CC_NE,r0,r1,r2)
+#  define vfp_nei_f(r0,r1,i0)          _vfp_nei_f(_jit,r0,r1,i0)
+static void _vfp_nei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
+#  define vfp_ner_d(r0,r1,r2)          vcmp01_d(ARM_CC_EQ,ARM_CC_NE,r0,r1,r2)
+#  define vfp_nei_d(r0,r1,i0)          _vfp_nei_d(_jit,r0,r1,i0)
+static void _vfp_nei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
+#  define vcmp10_x(c0,r0)              _vcmp10_x(_jit,c0,r0)
+static void _vcmp10_x(jit_state_t*,int,jit_int32_t);
+#  define vcmp_10_f(c0,r0,r1,r2)       _vcmp_10_f(_jit,c0,r0,r1,r2)
+static void _vcmp_10_f(jit_state_t*,int,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define vcmp_10_d(c0,r0,r1,r2)       _vcmp_10_d(_jit,c0,r0,r1,r2)
+static void _vcmp_10_d(jit_state_t*,int,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define vfp_unltr_f(r0,r1,r2)                vcmp_10_f(ARM_CC_GE,r0,r1,r2)
+#  define vfp_unlti_f(r0,r1,i0)                _vfp_unlti_f(_jit,r0,r1,i0)
+static void _vfp_unlti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
+#  define vfp_unltr_d(r0,r1,r2)                vcmp_10_d(ARM_CC_GE,r0,r1,r2)
+#  define vfp_unlti_d(r0,r1,i0)                _vfp_unlti_d(_jit,r0,r1,i0)
+static void _vfp_unlti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
+#  define vfp_unler_f(r0,r1,r2)                vcmp_10_f(ARM_CC_GT,r0,r1,r2)
+#  define vfp_unlei_f(r0,r1,i0)                _vfp_unlei_f(_jit,r0,r1,i0)
+static void _vfp_unlei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
+#  define vfp_unler_d(r0,r1,r2)                vcmp_10_d(ARM_CC_GT,r0,r1,r2)
+#  define vfp_unlei_d(r0,r1,i0)                _vfp_unlei_d(_jit,r0,r1,i0)
+static void _vfp_unlei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
+#  define vfp_uneqr_x(r0)              _vfp_uneqr_x(_jit,r0)
+static void _vfp_uneqr_x(jit_state_t*,jit_int32_t);
+#  define vfp_uneqr_f(r0,r1,r2)                _vfp_uneqr_f(_jit,r0,r1,r2)
+static void _vfp_uneqr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define vfp_uneqi_f(r0,r1,i0)                _vfp_uneqi_f(_jit,r0,r1,i0)
+static void _vfp_uneqi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
+#  define vfp_uneqr_d(r0,r1,r2)                _vfp_uneqr_d(_jit,r0,r1,r2)
+static void _vfp_uneqr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define vfp_uneqi_d(r0,r1,i0)                _vfp_uneqi_d(_jit,r0,r1,i0)
+static void _vfp_uneqi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
+#  define vcmp_01_x(c0,r0)             _vcmp_01_x(_jit,c0,r0)
+static void _vcmp_01_x(jit_state_t*,int,jit_int32_t);
+#  define vcmp_01_f(c0,r0,r1,r2)       _vcmp_01_f(_jit,c0,r0,r1,r2)
+static void _vcmp_01_f(jit_state_t*,int,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define vcmp_01_d(c0,r0,r1,r2)       _vcmp_01_d(_jit,c0,r0,r1,r2)
+static void _vcmp_01_d(jit_state_t*,int,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define vfp_unger_f(r0,r1,r2)                vcmp_01_f(ARM_CC_CS,r0,r1,r2)
+#  define vfp_ungei_f(r0,r1,i0)                _vfp_ungei_f(_jit,r0,r1,i0)
+static void _vfp_ungei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
+#  define vfp_unger_d(r0,r1,r2)                vcmp_01_d(ARM_CC_CS,r0,r1,r2)
+#  define vfp_ungei_d(r0,r1,i0)                _vfp_ungei_d(_jit,r0,r1,i0)
+static void _vfp_ungei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
+#  define vfp_ungtr_f(r0,r1,r2)                vcmp_01_f(ARM_CC_HI,r0,r1,r2)
+#  define vfp_ungti_f(r0,r1,i0)                _vfp_ungti_f(_jit,r0,r1,i0)
+static void _vfp_ungti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
+#  define vfp_ungtr_d(r0,r1,r2)                vcmp_01_d(ARM_CC_HI,r0,r1,r2)
+#  define vfp_ungti_d(r0,r1,i0)                _vfp_ungti_d(_jit,r0,r1,i0)
+static void _vfp_ungti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
+#  define vfp_ltgtr_x(r0)              _vfp_ltgtr_x(_jit,r0)
+static void _vfp_ltgtr_x(jit_state_t*,jit_int32_t);
+#  define vfp_ltgtr_f(r0,r1,r2)                _vfp_ltgtr_f(_jit,r0,r1,r2)
+static void _vfp_ltgtr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define vfp_ltgti_f(r0,r1,i0)                _vfp_ltgti_f(_jit,r0,r1,i0)
+static void _vfp_ltgti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
+#  define vfp_ltgtr_d(r0,r1,r2)                _vfp_ltgtr_d(_jit,r0,r1,r2)
+static void _vfp_ltgtr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define vfp_ltgti_d(r0,r1,i0)                _vfp_ltgti_d(_jit,r0,r1,i0)
+static void _vfp_ltgti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
+#  define vfp_ordr_f(r0,r1,r2)         _vfp_ordr_f(_jit,r0,r1,r2)
+static void _vfp_ordr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define vfp_ordi_f(r0,r1,i0)         _vfp_ordi_f(_jit,r0,r1,i0)
+static void _vfp_ordi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
+#  define vfp_ordr_d(r0,r1,r2)         _vfp_ordr_d(_jit,r0,r1,r2)
+static void _vfp_ordr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define vfp_ordi_d(r0,r1,i0)         _vfp_ordi_d(_jit,r0,r1,i0)
+static void _vfp_ordi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
+#  define vfp_unordr_f(r0,r1,r2)       _vfp_unordr_f(_jit,r0,r1,r2)
+static void _vfp_unordr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define vfp_unordi_f(r0,r1,i0)       _vfp_unordi_f(_jit,r0,r1,i0)
+static void _vfp_unordi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
+#  define vfp_unordr_d(r0,r1,r2)       _vfp_unordr_d(_jit,r0,r1,r2)
+static void _vfp_unordr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define vfp_unordi_d(r0,r1,i0)       _vfp_unordi_d(_jit,r0,r1,i0)
+static void _vfp_unordi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
+#  define vbcmp_x(cc,i0)               _vbcmp_x(_jit,cc,i0)
+static jit_word_t _vbcmp_x(jit_state_t*,int,jit_word_t);
+#  define vbcmp_f(cc,i0,r0,r1)         _vbcmp_f(_jit,cc,i0,r0,r1)
+static jit_word_t
+_vbcmp_f(jit_state_t*,int,jit_word_t,jit_int32_t,jit_int32_t);
+#  define vbcmp_x(cc,i0)               _vbcmp_x(_jit,cc,i0)
+static jit_word_t _vbcmp_x(jit_state_t*,int,jit_word_t);
+#  define vbcmp_d(cc,i0,r0,r1)         _vbcmp_d(_jit,cc,i0,r0,r1)
+static jit_word_t
+_vbcmp_d(jit_state_t*,int,jit_word_t,jit_int32_t,jit_int32_t);
+#  define vfp_bltr_f(i0,r0,r1)         vbcmp_f(ARM_CC_MI,i0,r0,r1)
+#  define vfp_blti_f(i0,r0,i1)         _vfp_blti_f(_jit,i0,r0,i1)
+static jit_word_t _vfp_blti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
+#  define vfp_bltr_d(i0,r0,r1)         vbcmp_d(ARM_CC_MI,i0,r0,r1)
+static jit_word_t _vfp_blti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
+#  define vfp_blti_d(i0,r0,i1)         _vfp_blti_d(_jit,i0,r0,i1)
+#  define vfp_bler_f(i0,r0,r1)         vbcmp_f(ARM_CC_LS,i0,r0,r1)
+#  define vfp_blei_f(i0,r0,i1)         _vfp_blei_f(_jit,i0,r0,i1)
+static jit_word_t _vfp_blei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
+#  define vfp_bler_d(i0,r0,r1)         vbcmp_d(ARM_CC_LS,i0,r0,r1)
+#  define vfp_blei_d(i0,r0,i1)         _vfp_blei_d(_jit,i0,r0,i1)
+static jit_word_t _vfp_blei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
+#  define vfp_beqr_f(i0,r0,r1)         vbcmp_f(ARM_CC_EQ,i0,r0,r1)
+#  define vfp_beqi_f(i0,r0,i1)         _vfp_beqi_f(_jit,i0,r0,i1)
+static jit_word_t _vfp_beqi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
+#  define vfp_beqr_d(i0,r0,r1)         vbcmp_d(ARM_CC_EQ,i0,r0,r1)
+#  define vfp_beqi_d(i0,r0,i1)         _vfp_beqi_d(_jit,i0,r0,i1)
+static jit_word_t _vfp_beqi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
+#  define vfp_bger_f(i0,r0,r1)         vbcmp_f(ARM_CC_GE,i0,r0,r1)
+#  define vfp_bgei_f(i0,r0,i1)         _vfp_bgei_f(_jit,i0,r0,i1)
+static jit_word_t _vfp_bgei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
+#  define vfp_bger_d(i0,r0,r1)         vbcmp_d(ARM_CC_GE,i0,r0,r1)
+#  define vfp_bgei_d(i0,r0,i1)         _vfp_bgei_d(_jit,i0,r0,i1)
+static jit_word_t _vfp_bgei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
+#  define vfp_bgtr_f(i0,r0,r1)         vbcmp_f(ARM_CC_GT,i0,r0,r1)
+#  define vfp_bgti_f(i0,r0,i1)         _vfp_bgti_f(_jit,i0,r0,i1)
+static jit_word_t _vfp_bgti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
+#  define vfp_bgtr_d(i0,r0,r1)         vbcmp_d(ARM_CC_GT,i0,r0,r1)
+#  define vfp_bgti_d(i0,r0,i1)         _vfp_bgti_d(_jit,i0,r0,i1)
+static jit_word_t _vfp_bgti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
+#  define vfp_bner_f(i0,r0,r1)         vbcmp_f(ARM_CC_NE,i0,r0,r1)
+#  define vfp_bnei_f(i0,r0,i1)         _vfp_bnei_f(_jit,i0,r0,i1)
+static jit_word_t _vfp_bnei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
+#  define vfp_bner_d(i0,r0,r1)         vbcmp_d(ARM_CC_NE,i0,r0,r1)
+#  define vfp_bnei_d(i0,r0,i1)         _vfp_bnei_d(_jit,i0,r0,i1)
+static jit_word_t _vfp_bnei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
+#  define vbncmp_x(cc,i0)              _vbncmp_x(_jit,cc,i0)
+static jit_word_t _vbncmp_x(jit_state_t*,int,jit_word_t);
+#  define vbncmp_f(cc,i0,r0,r1)                _vbncmp_f(_jit,cc,i0,r0,r1)
+static jit_word_t
+_vbncmp_f(jit_state_t*,int,jit_word_t,jit_int32_t,jit_int32_t);
+#  define vbncmp_d(cc,i0,r0,r1)                _vbncmp_d(_jit,cc,i0,r0,r1)
+static jit_word_t
+_vbncmp_d(jit_state_t*,int,jit_word_t,jit_int32_t,jit_int32_t);
+#  define vfp_bunltr_f(i0,r0,r1)       vbncmp_f(ARM_CC_GE,i0,r0,r1)
+#  define vfp_bunlti_f(i0,r0,i1)       _vfp_bunlti_f(_jit,i0,r0,i1)
+static jit_word_t
+_vfp_bunlti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
+#  define vfp_bunltr_d(i0,r0,r1)       vbncmp_d(ARM_CC_GE,i0,r0,r1)
+#  define vfp_bunlti_d(i0,r0,i1)       _vfp_bunlti_d(_jit,i0,r0,i1)
+static jit_word_t
+_vfp_bunlti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
+#  define vfp_bunler_f(i0,r0,r1)       vbncmp_f(ARM_CC_GT,i0,r0,r1)
+#  define vfp_bunlei_f(i0,r0,i1)       _vfp_bunlei_f(_jit,i0,r0,i1)
+static jit_word_t
+_vfp_bunlei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
+#  define vfp_bunler_d(i0,r0,r1)       vbncmp_d(ARM_CC_GT,i0,r0,r1)
+#  define vfp_bunlei_d(i0,r0,i1)       _vfp_bunlei_d(_jit,i0,r0,i1)
+static jit_word_t
+_vfp_bunlei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
+#  define vfp_buneqr_x(i0)             _vfp_buneqr_x(_jit,i0)
+static jit_word_t _vfp_buneqr_x(jit_state_t*,jit_word_t);
+#  define vfp_buneqr_f(i0,r0,r1)       _vfp_buneqr_f(_jit,i0,r0,r1)
+static jit_word_t
+_vfp_buneqr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define vfp_buneqi_f(i0,r0,i1)       _vfp_buneqi_f(_jit,i0,r0,i1)
+static jit_word_t
+_vfp_buneqi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
+#  define vfp_buneqr_d(i0,r0,r1)       _vfp_buneqr_d(_jit,i0,r0,r1)
+static jit_word_t
+_vfp_buneqr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define vfp_buneqi_d(i0,r0,i1)       _vfp_buneqi_d(_jit,i0,r0,i1)
+static jit_word_t
+_vfp_buneqi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
+#  define vfp_bunger_x(i0)             _vfp_bunger_x(_jit,i0)
+static jit_word_t _vfp_bunger_x(jit_state_t*,jit_word_t);
+#  define vfp_bunger_f(i0,r0,r1)       _vfp_bunger_f(_jit,i0,r0,r1)
+static jit_word_t
+_vfp_bunger_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define vfp_bungei_f(i0,r0,i1)       _vfp_bungei_f(_jit,i0,r0,i1)
+static jit_word_t
+_vfp_bungei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
+#  define vfp_bunger_d(i0,r0,r1)       _vfp_bunger_d(_jit,i0,r0,r1)
+static jit_word_t
+_vfp_bunger_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define vfp_bungei_d(i0,r0,i1)       _vfp_bungei_d(_jit,i0,r0,i1)
+static jit_word_t
+_vfp_bungei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
+#  define vfp_bungtr_f(i0,r0,r1)       vbcmp_f(ARM_CC_HI,i0,r0,r1)
+#  define vfp_bungti_f(i0,r0,i1)       _vfp_bungti_f(_jit,i0,r0,i1)
+static jit_word_t
+_vfp_bungti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
+#  define vfp_bungtr_d(i0,r0,r1)       vbcmp_d(ARM_CC_HI,i0,r0,r1)
+#  define vfp_bungti_d(i0,r0,i1)       _vfp_bungti_d(_jit,i0,r0,i1)
+static jit_word_t
+_vfp_bungti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
+#  define vfp_bltgtr_x(i0)             _vfp_bltgtr_x(_jit,i0)
+static jit_word_t _vfp_bltgtr_x(jit_state_t*,jit_word_t);
+#  define vfp_bltgtr_f(i0,r0,r1)       _vfp_bltgtr_f(_jit,i0,r0,r1)
+static jit_word_t
+_vfp_bltgtr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define vfp_bltgti_f(i0,r0,i1)       _vfp_bltgti_f(_jit,i0,r0,i1)
+static jit_word_t
+_vfp_bltgti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
+#  define vfp_bltgtr_d(i0,r0,r1)       _vfp_bltgtr_d(_jit,i0,r0,r1)
+static jit_word_t
+_vfp_bltgtr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define vfp_bltgti_d(i0,r0,i1)       _vfp_bltgti_d(_jit,i0,r0,i1)
+static jit_word_t
+_vfp_bltgti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
+#  define vfp_bordr_f(i0,r0,r1)                vbcmp_f(ARM_CC_VC,i0,r0,r1)
+#  define vfp_bordi_f(i0,r0,i1)                _vfp_bordi_f(_jit,i0,r0,i1)
+static jit_word_t
+_vfp_bordi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
+#  define vfp_bordr_d(i0,r0,r1)                vbcmp_d(ARM_CC_VC,i0,r0,r1)
+#  define vfp_bordi_d(i0,r0,i1)                _vfp_bordi_d(_jit,i0,r0,i1)
+static jit_word_t
+_vfp_bordi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
+#  define vfp_bunordr_f(i0,r0,r1)      vbcmp_f(ARM_CC_VS,i0,r0,r1)
+#  define vfp_bunordi_f(i0,r0,i1)      _vfp_bunordi_f(_jit,i0,r0,i1)
+static jit_word_t
+_vfp_bunordi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
+#  define vfp_bunordr_d(i0,r0,r1)      vbcmp_d(ARM_CC_VS,i0,r0,r1)
+#  define vfp_bunordi_d(i0,r0,i1)      _vfp_bunordi_d(_jit,i0,r0,i1)
+static jit_word_t
+_vfp_bunordi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
+#  define vfp_ldr_f(r0,r1)             VLDR_F32(r0,r1,0)
+#  define vfp_ldr_d(r0,r1)             VLDR_F64(r0,r1,0)
+#  define vfp_ldi_f(r0,i0)             _vfp_ldi_f(_jit,r0,i0)
+static void _vfp_ldi_f(jit_state_t*,jit_int32_t,jit_word_t);
+#  define vfp_ldi_d(r0,i0)             _vfp_ldi_d(_jit,r0,i0)
+static void _vfp_ldi_d(jit_state_t*,jit_int32_t,jit_word_t);
+#  define vfp_ldxr_f(r0,r1,r2)         _vfp_ldxr_f(_jit,r0,r1,r2)
+static void _vfp_ldxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define vfp_ldxr_d(r0,r1,r2)         _vfp_ldxr_d(_jit,r0,r1,r2)
+static void _vfp_ldxr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define vfp_ldxi_f(r0,r1,i0)         _vfp_ldxi_f(_jit,r0,r1,i0)
+static void _vfp_ldxi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define vfp_ldxi_d(r0,r1,i0)         _vfp_ldxi_d(_jit,r0,r1,i0)
+static void _vfp_ldxi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define vfp_str_f(r0,r1)             VSTR_F32(r1,r0,0)
+#  define vfp_str_d(r0,r1)             VSTR_F64(r1,r0,0)
+#  define vfp_sti_f(i0,r0)             _vfp_sti_f(_jit,i0,r0)
+static void _vfp_sti_f(jit_state_t*,jit_word_t,jit_int32_t);
+#  define vfp_sti_d(i0,r0)             _vfp_sti_d(_jit,i0,r0)
+static void _vfp_sti_d(jit_state_t*,jit_word_t,jit_int32_t);
+#  define vfp_stxr_f(r0,r1,r2)         _vfp_stxr_f(_jit,r0,r1,r2)
+static void _vfp_stxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define vfp_stxr_d(r0,r1,r2)         _vfp_stxr_d(_jit,r0,r1,r2)
+static void _vfp_stxr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define vfp_stxi_f(i0,r0,r1)         _vfp_stxi_f(_jit,i0,r0,r1)
+static void _vfp_stxi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define vfp_stxi_d(i0,r0,r1)         _vfp_stxi_d(_jit,i0,r0,r1)
+static void _vfp_stxi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define vfp_vaarg_d(r0, r1)          _vfp_vaarg_d(_jit, r0, r1)
+static void _vfp_vaarg_d(jit_state_t*, jit_int32_t, jit_int32_t);
+#endif
+
+#if CODE
+#  define vfp_regno(rn)                (((rn) - 16) >> 1)
+
+static int
+encode_vfp_double(int mov, int inv, unsigned lo, unsigned hi)
+{
+    int                code, mode, imm, mask;
+
+    if (hi != lo) {
+       if (mov && !inv) {
+           /* (I64)
+            *  aaaaaaaabbbbbbbbccccccccddddddddeeeeeeeeffffffffgggggggghhhhhhhh
+            */
+           for (mode = 0, mask = 0xff; mode < 4; mask <<= 8, mode++) {
+               imm = lo & mask;
+               if (imm != mask && imm != 0)
+                   goto fail;
+               imm = hi & mask;
+               if (imm != mask && imm != 0)
+                   goto fail;
+           }
+           mode = 0xe20;
+           imm = (((hi & 0x80000000) >> 24) | ((hi & 0x00800000) >> 17) |
+                  ((hi & 0x00008000) >> 10) | ((hi & 0x00000080) >>  3) |
+                  ((lo & 0x80000000) >> 28) | ((lo & 0x00800000) >> 21) |
+                  ((lo & 0x00008000) >> 14) | ((lo & 0x00000080) >>  7));
+           goto success;
+       }
+       goto fail;
+    }
+    /*  (I32)
+     *  00000000 00000000 00000000 abcdefgh
+     *  00000000 00000000 abcdefgh 00000000
+     *  00000000 abcdefgh 00000000 00000000
+     *  abcdefgh 00000000 00000000 00000000 */
+    for (mode = 0, mask = 0xff; mode < 4; mask <<= 8, mode++) {
+       if ((lo & mask) == lo) {
+           imm = lo >> (mode << 3);
+           mode <<= 9;
+           goto success;
+       }
+    }
+    /*  (I16)
+     *  00000000 abcdefgh 00000000 abcdefgh
+     *  abcdefgh 00000000 abcdefgh 00000000 */
+    for (mode = 0, mask = 0xff; mode < 2; mask <<= 8, mode++) {
+       if ((lo & mask) && ((lo & (mask << 16)) >> 16) == (lo & mask)) {
+           imm = lo >> (mode << 3);
+           mode = 0x800 | (mode << 9);
+           goto success;
+       }
+    }
+    if (mov) {
+       /*  (I32)
+        *  00000000 00000000 abcdefgh 11111111
+        *  00000000 abcdefgh 11111111 11111111 */
+       for (mode = 0, mask = 0xff; mode < 2;
+            mask = (mask << 8) | 0xff, mode++) {
+           if ((lo & mask) == mask &&
+               !((lo & ~mask) >> 8) &&
+               (imm = lo >> (8 + (mode << 8)))) {
+               mode = 0xc00 | (mode << 8);
+               goto success;
+           }
+       }
+       if (!inv) {
+           /* (F32)
+            *  aBbbbbbc defgh000 00000000 00000000
+            *  from the ARM Architecture Reference Manual:
+            *  In this entry, B = NOT(b). The bit pattern represents the
+            *  floating-point number (-1)^s* 2^exp * mantissa, where
+            *  S = UInt(a),
+            *  exp = UInt(NOT(b):c:d)-3 and
+            *  mantissa = (16+UInt(e:f:g:h))/16. */
+           if ((lo & 0x7ffff) == 0 &&
+               (((lo & 0x7e000000) == 0x3e000000) ||
+                ((lo & 0x7e000000) == 0x40000000))) {
+               mode = 0xf00;
+               imm = ((lo >> 24) & 0x80) | ((lo >> 19) & 0x7f);
+               goto success;
+           }
+       }
+    }
+
+fail:
+    /* need another approach (load from memory, move from arm register, etc) */
+    return (-1);
+
+success:
+    code = inv ? ARM_VMVNI : ARM_VMOVI;
+    switch ((mode & 0xf00) >> 8) {
+       case 0x0:       case 0x2:       case 0x4:       case 0x6:
+       case 0x8:       case 0xa:
+           if (inv)    mode |= 0x20;
+           if (!mov)   mode |= 0x100;
+           break;
+       case 0x1:       case 0x3:       case 0x5:       case 0x7:
+           /* should actually not reach here */
+           assert(!inv);
+       case 0x9:       case 0xb:
+           assert(!mov);
+           break;
+       case 0xc:       case 0xd:
+           /* should actually not reach here */
+           assert(inv);
+       case 0xe:
+           assert(mode & 0x20);
+           assert(mov && !inv);
+           break;
+       default:
+           assert(!(mode & 0x20));
+           break;
+    }
+    imm = ((imm & 0x80) << 17) | ((imm & 0x70) << 12) | (imm & 0x0f);
+    code |= mode | imm;
+    if (jit_thumb_p()) {
+       if (code & 0x1000000)
+           code |= 0xff000000;
+       else
+           code |= 0xef000000;
+    }
+    else
+       code |= ARM_CC_NV;
+    return (code);
+}
+
+static void
+_vodi(jit_state_t *_jit, int oi, int r0)
+{
+    jit_thumb_t        thumb;
+    assert(!(oi  & 0x0000f000));
+    assert(!(r0 & 1)); r0 = vfp_regno(r0);
+    thumb.i = oi|(_u4(r0)<<12);
+    if (jit_thumb_p())
+       iss(thumb.s[0], thumb.s[1]);
+    else
+       ii(thumb.i);
+}
+
+static void
+_voqi(jit_state_t *_jit, int oi, int r0)
+{
+    jit_thumb_t        thumb;
+    assert(!(oi  & 0x0000f000));
+    assert(!(r0 & 3)); r0 = vfp_regno(r0);
+    thumb.i = oi|(_u4(r0)<<12);
+    if (jit_thumb_p())
+       iss(thumb.s[0], thumb.s[1]);
+    else
+       ii(thumb.i);
+}
+
+static void
+_cc_vo_ss(jit_state_t *_jit, int cc, int o, int r0, int r1)
+{
+    jit_thumb_t        thumb;
+    assert(!(cc & 0x0fffffff));
+    assert(!(o  & 0xf000f00f));
+    if (r0 & 1)        o |= ARM_V_D;   r0 = vfp_regno(r0);
+    if (r1 & 1)        o |= ARM_V_M;   r1 = vfp_regno(r1);
+    thumb.i = cc|o|(_u4(r0)<<12)|_u4(r1);
+    if (jit_thumb_p())
+       iss(thumb.s[0], thumb.s[1]);
+    else
+       ii(thumb.i);
+}
+
+static void
+_cc_vo_dd(jit_state_t *_jit, int cc, int o, int r0, int r1)
+{
+    jit_thumb_t        thumb;
+    assert(!(cc & 0x0fffffff));
+    assert(!(o  & 0xf000f00f));
+    assert(!(r0 & 1) && !(r1 & 1));
+    r0 = vfp_regno(r0);        r1 = vfp_regno(r1);
+    thumb.i = cc|o|(_u4(r0)<<12)|_u4(r1);
+    if (jit_thumb_p())
+       iss(thumb.s[0], thumb.s[1]);
+    else
+       ii(thumb.i);
+}
+
+static void
+_cc_vo_qd(jit_state_t *_jit, int cc, int o, int r0, int r1)
+{
+    jit_thumb_t        thumb;
+    assert(!(cc & 0x0fffffff));
+    assert(!(o  & 0xf000f00f));
+    assert(!(r0 & 3) && !(r1 & 1));
+    r0 = vfp_regno(r0);        r1 = vfp_regno(r1);
+    thumb.i = cc|o|(_u4(r0)<<12)|_u4(r1);
+    if (jit_thumb_p())
+       iss(thumb.s[0], thumb.s[1]);
+    else
+       ii(thumb.i);
+}
+
+static void
+_cc_vo_qq(jit_state_t *_jit, int cc, int o, int r0, int r1)
+{
+    jit_thumb_t        thumb;
+    assert(!(cc & 0x0fffffff));
+    assert(!(o  & 0xf000f00f));
+    assert(!(r0 & 3) && !(r1 & 3));
+    r0 = vfp_regno(r0);        r1 = vfp_regno(r1);
+    thumb.i = cc|o|(_u4(r0)<<12)|_u4(r1);
+    if (jit_thumb_p())
+       iss(thumb.s[0], thumb.s[1]);
+    else
+       ii(thumb.i);
+}
+
+static void
+_cc_vorr_(jit_state_t *_jit, int cc, int o, int r0, int r1)
+{
+    jit_thumb_t        thumb;
+    assert(!(cc & 0x0fffffff));
+    assert(!(o  & 0xf000f00f));
+    thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12);
+    if (jit_thumb_p())
+       iss(thumb.s[0], thumb.s[1]);
+    else
+       ii(thumb.i);
+}
+
+static void
+_cc_vors_(jit_state_t *_jit, int cc, int o, int r0, int r1)
+{
+    jit_thumb_t        thumb;
+    assert(!(cc & 0x0fffffff));
+    assert(!(o  & 0xf000f00f));
+    if (r1 & 1)        o |= ARM_V_N;   r1 = vfp_regno(r1);
+    thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12);
+    if (jit_thumb_p())
+       iss(thumb.s[0], thumb.s[1]);
+    else
+       ii(thumb.i);
+}
+
+static void
+_cc_vorv_(jit_state_t *_jit, int cc, int o, int r0, int r1)
+{
+    jit_thumb_t        thumb;
+    assert(!(cc & 0x0fffffff));
+    assert(!(o  & 0xf000f00f));
+    if (r1 & 1)        o |= ARM_V_M;   r1 = vfp_regno(r1);
+    thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12);
+    if (jit_thumb_p())
+       iss(thumb.s[0], thumb.s[1]);
+    else
+       ii(thumb.i);
+}
+
+static void
+_cc_vori_(jit_state_t *_jit, int cc, int o, int r0, int r1)
+{
+    jit_thumb_t        thumb;
+    assert(!(cc & 0x0fffffff));
+    assert(!(o  & 0xf000f00f));
+    /* use same bit pattern, to set opc1... */
+    if (r1 & 1)        o |= ARM_V_I32; r1 = vfp_regno(r1);
+    thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12);
+    if (jit_thumb_p())
+       iss(thumb.s[0], thumb.s[1]);
+    else
+       ii(thumb.i);
+}
+
+static void
+_cc_vorrd(jit_state_t *_jit, int cc, int o, int r0, int r1, int r2)
+{
+    jit_thumb_t        thumb;
+    assert(!(cc & 0x0fffffff));
+    assert(!(o  & 0xf00ff00f));
+    assert(!(r2 & 1));
+    r2 = vfp_regno(r2);
+    thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2);
+    if (jit_thumb_p())
+       iss(thumb.s[0], thumb.s[1]);
+    else
+       ii(thumb.i);
+}
+
+static void
+_cc_vosss(jit_state_t *_jit, int cc, int o, int r0, int r1, int r2)
+{
+    jit_thumb_t        thumb;
+    assert(!(cc & 0x0fffffff));
+    assert(!(o  & 0xf00ff00f));
+    if (r0 & 1)        o |= ARM_V_D;   r0 = vfp_regno(r0);
+    if (r1 & 1)        o |= ARM_V_N;   r1 = vfp_regno(r1);
+    if (r2 & 1)        o |= ARM_V_M;   r2 = vfp_regno(r2);
+    thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2);
+    if (jit_thumb_p())
+       iss(thumb.s[0], thumb.s[1]);
+    else
+       ii(thumb.i);
+}
+
+static void
+_cc_voddd(jit_state_t *_jit, int cc, int o, int r0, int r1, int r2)
+{
+    jit_thumb_t        thumb;
+    assert(!(cc & 0x0fffffff));
+    assert(!(o  & 0xf00ff00f));
+    assert(!(r0 & 1) && !(r1 & 1) && !(r2 & 1));
+    r0 = vfp_regno(r0);        r1 = vfp_regno(r1);     r2 = vfp_regno(r2);
+    thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2);
+    if (jit_thumb_p())
+       iss(thumb.s[0], thumb.s[1]);
+    else
+       ii(thumb.i);
+}
+
+static void
+_cc_voqdd(jit_state_t *_jit, int cc, int o, int r0, int r1, int r2)
+{
+    jit_thumb_t        thumb;
+    assert(!(cc & 0x0fffffff));
+    assert(!(o  & 0xf00ff00f));
+    assert(!(r0 & 3) && !(r1 & 1) && !(r2 & 1));
+    r0 = vfp_regno(r0);        r1 = vfp_regno(r1);     r2 = vfp_regno(r2);
+    thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2);
+    if (jit_thumb_p())
+       iss(thumb.s[0], thumb.s[1]);
+    else
+       ii(thumb.i);
+}
+
+static void
+_cc_voqqd(jit_state_t *_jit, int cc, int o, int r0, int r1, int r2)
+{
+    jit_thumb_t        thumb;
+    assert(!(cc & 0x0fffffff));
+    assert(!(o  & 0xf00ff00f));
+    assert(!(r0 & 3) && !(r1 & 3) && !(r2 & 1));
+    r0 = vfp_regno(r0);        r1 = vfp_regno(r1);     r2 = vfp_regno(r2);
+    thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2);
+    if (jit_thumb_p())
+       iss(thumb.s[0], thumb.s[1]);
+    else
+       ii(thumb.i);
+}
+
+static void
+_cc_voqqq(jit_state_t *_jit, int cc, int o, int r0, int r1, int r2)
+{
+    jit_thumb_t        thumb;
+    assert(!(cc & 0x0fffffff));
+    assert(!(o  & 0xf00ff00f));
+    assert(!(r0 & 3) && !(r1 & 3) && !(r2 & 3));
+    r0 = vfp_regno(r0);        r1 = vfp_regno(r1);     r2 = vfp_regno(r2);
+    thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2);
+    if (jit_thumb_p())
+       iss(thumb.s[0], thumb.s[1]);
+    else
+       ii(thumb.i);
+}
+
+static void
+_cc_vldst(jit_state_t *_jit, int cc, int o, int r0, int r1, int i0)
+{
+    jit_thumb_t        thumb;
+    /* i0 << 2 is byte offset */
+    assert(!(cc & 0x0fffffff));
+    assert(!(o  & 0xf00ff0ff));
+    if (r0 & 1) {
+       assert(!(o & ARM_V_F64));
+       o |= ARM_V_D;
+    }
+    r0 = vfp_regno(r0);
+    thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u8(i0);
+    if (jit_thumb_p())
+       iss(thumb.s[0], thumb.s[1]);
+    else
+       ii(thumb.i);
+}
+
+static void
+_cc_vorsl(jit_state_t *_jit, int cc, int o, int r0, int r1, int i0)
+{
+    jit_thumb_t        thumb;
+    assert(!(cc & 0x0fffffff));
+    assert(!(o  & 0xf00ff0ff));
+    /* save i0 double precision registers */
+    if (o & ARM_V_F64)         i0 <<= 1;
+    /* if (r1 & 1) cc & ARM_V_F64 must be false */
+    if (r1 & 1)        o |= ARM_V_D;   r1 = vfp_regno(r1);
+    assert(i0 && !(i0 & 1) && r1 + i0 <= 32);
+    thumb.i = cc|o|(_u4(r0)<<16)|(_u4(r1)<<12)|_u8(i0);
+    if (jit_thumb_p())
+       iss(thumb.s[0], thumb.s[1]);
+    else
+       ii(thumb.i);
+}
+
+static void
+_vfp_movr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (r0 != r1) {
+       if (jit_fpr_p(r1)) {
+           if (jit_fpr_p(r0))
+               VMOV_F32(r0, r1);
+           else
+               VMOV_A_S(r0, r1);
+       }
+       else if (jit_fpr_p(r0))
+           VMOV_S_A(r0, r1);
+       else
+           movr(r0, r1);
+    }
+}
+
+static void
+_vfp_movr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (r0 != r1) {
+       if (jit_fpr_p(r1)) {
+           if (jit_fpr_p(r0))
+               VMOV_F64(r0, r1);
+           else
+               VMOV_AA_D(r0, r0 + 1, r1);
+       }
+       else if (jit_fpr_p(r0))
+           VMOV_D_AA(r0, r1, r1 + 1);
+       else {
+           /* minor consistency check */
+           assert(r0 + 1 != r1 && r0 -1 != r1);
+           movr(r0, r1);
+           movr(r0 + 1, r1 + 1);
+       }
+    }
+}
+
+static void
+_vfp_movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t i0)
+{
+    union {
+       jit_int32_t     i;
+       jit_float32_t   f;
+    } u;
+    jit_int32_t                reg;
+    jit_int32_t                code;
+    u.f = i0;
+    if (jit_fpr_p(r0)) {
+       /* float arguments are packed, for others,
+        * lightning only address even registers */
+       if (!(r0 & 1) && (r0 - 16) >= 0 &&
+           ((code = encode_vfp_double(1, 0, u.i, u.i)) != -1 ||
+            (code = encode_vfp_double(1, 1, ~u.i, ~u.i)) != -1))
+           VIMM(code, r0);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), u.i);
+           VMOV_S_A(r0, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+    else
+       movi(r0, u.i);
+}
+
+static void
+_vfp_movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t i0)
+{
+    union {
+       jit_int32_t     i[2];
+       jit_float64_t   d;
+    } u;
+    jit_int32_t                code;
+    jit_int32_t                rg0, rg1;
+    u.d = i0;
+    if (jit_fpr_p(r0)) {
+       if ((code = encode_vfp_double(1, 0, u.i[0], u.i[1])) != -1 ||
+           (code = encode_vfp_double(1, 1, ~u.i[0], ~u.i[1])) != -1)
+           VIMM(code, r0);
+       else {
+           rg0 = jit_get_reg(jit_class_gpr);
+           rg1 = jit_get_reg(jit_class_gpr);
+           movi(rn(rg0), u.i[0]);
+           movi(rn(rg1), u.i[1]);
+           VMOV_D_AA(r0, rn(rg0), rn(rg1));
+           jit_unget_reg(rg1);
+           jit_unget_reg(rg0);
+       }
+    }
+    else {
+       movi(r0, u.i[0]);
+       movi(r0 + 1, u.i[1]);
+    }
+}
+
+static void
+_vfp_extr_d_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (jit_fpr_p(r1)) {
+       if (jit_fpr_p(r0))
+           VCVT_F64_F32(r0, r1);
+       else {
+           reg = jit_get_reg(jit_class_fpr);
+           VCVT_F64_F32(rn(reg), r1);
+           VMOV_A_S(r0, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+    else {
+       reg = jit_get_reg(jit_class_fpr);
+       VMOV_S_A(rn(reg), r1);
+       VCVT_F64_F32(rn(reg), rn(reg));
+       if (jit_fpr_p(r0))
+           VMOV_F32(r0, rn(reg));
+       else
+           VMOV_A_S(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_vfp_extr_f_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (jit_fpr_p(r1)) {
+       if (jit_fpr_p(r0))
+           VCVT_F32_F64(r0, r1);
+       else {
+           reg = jit_get_reg(jit_class_fpr);
+           VCVT_F32_F64(rn(reg), r1);
+           VMOV_AA_D(r0, r0 + 1, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+    else {
+       reg = jit_get_reg(jit_class_fpr);
+       VMOV_D_AA(rn(reg), r1, r1 + 1);
+       VCVT_F32_F64(rn(reg), rn(reg));
+       if (jit_fpr_p(r0))
+           VMOV_F64(r0, rn(reg));
+       else
+           VMOV_AA_D(r0, r0 + 1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_vfp_extr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (jit_fpr_p(r0)) {
+       VMOV_V_I32(r0, r1);
+       VCVT_F32_S32(r0, r0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_fpr);
+       VMOV_V_I32(rn(reg), r1);
+       VCVT_F32_S32(rn(reg), rn(reg));
+       VMOV_F32(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_vfp_extr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (jit_fpr_p(r0)) {
+       VMOV_V_I32(r0, r1);
+       VCVT_F64_S32(r0, r0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_fpr);
+       VMOV_V_I32(rn(reg), r1);
+       VCVT_F64_S32(rn(reg), rn(reg));
+       VMOV_F64(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_vfp_truncr_f_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_fpr);
+    if (jit_fpr_p(r1))
+       VCVT_S32_F32(rn(reg), r1);
+    else {
+       VMOV_V_I32(rn(reg), r1);
+       VCVT_S32_F32(rn(reg), rn(reg));
+    }
+    VMOV_A_S32(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_vfp_truncr_d_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_fpr);
+    if (jit_fpr_p(r1))
+       VCVT_S32_F64(rn(reg), r1);
+    else {
+       VMOV_V_I32(rn(reg), r1);
+       VCVT_S32_F64(rn(reg), rn(reg));
+    }
+    VMOV_A_S32(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+#  define fopi(name)                                                   \
+static void                                                            \
+_vfp_##name##i_f(jit_state_t *_jit,                                    \
+                jit_int32_t r0, jit_int32_t r1, jit_float32_t i0)      \
+{                                                                      \
+    jit_int32_t                reg = jit_get_reg(jit_class_fpr);               \
+    vfp_movi_f(rn(reg), i0);                                           \
+    vfp_##name##r_f(r0, r1, rn(reg));                                  \
+    jit_unget_reg(reg);                                                        \
+}
+#  define dopi(name)                                                   \
+static void                                                            \
+_vfp_##name##i_d(jit_state_t *_jit,                                    \
+                jit_int32_t r0, jit_int32_t r1, jit_float64_t i0)      \
+{                                                                      \
+    jit_int32_t                reg = jit_get_reg(jit_class_fpr);               \
+    vfp_movi_d(rn(reg), i0);                                           \
+    vfp_##name##r_d(r0, r1, rn(reg));                                  \
+    jit_unget_reg(reg);                                                        \
+}
+#  define fbopi(name)                                                  \
+static jit_word_t                                                      \
+_vfp_b##name##i_f(jit_state_t *_jit,                                   \
+                 jit_int32_t r0, jit_int32_t r1, jit_float32_t i0)     \
+{                                                                      \
+    jit_word_t         word;                                           \
+    jit_int32_t                reg = jit_get_reg(jit_class_fpr|                \
+                                         jit_class_nospill);           \
+    vfp_movi_f(rn(reg), i0);                                           \
+    word = vfp_b##name##r_f(r0, r1, rn(reg));                          \
+    jit_unget_reg(reg);                                                        \
+    return (word);                                                     \
+}
+#  define dbopi(name)                                                  \
+static jit_word_t                                                      \
+_vfp_b##name##i_d(jit_state_t *_jit,                                   \
+                 jit_int32_t r0, jit_int32_t r1, jit_float64_t i0)     \
+{                                                                      \
+    jit_word_t         word;                                           \
+    jit_int32_t                reg = jit_get_reg(jit_class_fpr|                \
+                                         jit_class_nospill);           \
+    vfp_movi_d(rn(reg), i0);                                           \
+    word = vfp_b##name##r_d(r0, r1, rn(reg));                          \
+    jit_unget_reg(reg);                                                        \
+    return (word);                                                     \
+}
+
+fopi(add)
+dopi(add)
+fopi(sub)
+fopi(rsb)
+dopi(rsb)
+dopi(sub)
+fopi(mul)
+dopi(mul)
+fopi(div)
+dopi(div)
+
+static void
+_vfp_cmp_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                rg0, rg1;
+    if (jit_fpr_p(r0)) {
+       if (jit_fpr_p(r1))
+           VCMP_F32(r0, r1);
+       else {
+           rg1 = jit_get_reg(jit_class_fpr);
+           VMOV_S_A(rn(rg1), r1);
+           VCMP_F32(r0, rn(rg1));
+           jit_unget_reg(rg1);
+       }
+    }
+    else {
+       rg0 = jit_get_reg(jit_class_fpr);
+       VMOV_S_A(rn(rg0), r0);
+       if (jit_fpr_p(r1))
+           VCMP_F32(rn(rg0), r1);
+       else {
+           rg1 = jit_get_reg(jit_class_fpr);
+           VMOV_S_A(rn(rg1), r1);
+           VCMP_F32(rn(rg0), rn(rg1));
+           jit_unget_reg(rg1);
+       }
+       jit_unget_reg(rg0);
+    }
+}
+
+static void
+_vfp_cmp_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                rg0, rg1;
+    if (jit_fpr_p(r0)) {
+       if (jit_fpr_p(r1))
+           VCMP_F64(r0, r1);
+       else {
+           rg1 = jit_get_reg(jit_class_fpr);
+           VMOV_D_AA(rn(rg1), r1, r1 + 1);
+           VCMP_F64(r0, rn(rg1));
+           jit_unget_reg(rg1);
+       }
+    }
+    else {
+       rg0 = jit_get_reg(jit_class_fpr);
+       VMOV_D_AA(rn(rg0), r0, r0 + 1);
+       if (jit_fpr_p(r1))
+           VCMP_F64(rn(rg0), r1);
+       else {
+           rg1 = jit_get_reg(jit_class_fpr);
+           VMOV_D_AA(rn(rg1), r1, r1 + 1);
+           VCMP_F64(rn(rg0), rn(rg1));
+           jit_unget_reg(rg1);
+       }
+       jit_unget_reg(rg0);
+    }
+}
+
+static void
+_vcmp01_x(jit_state_t *_jit, int c0, int c1, jit_int32_t r0)
+{
+    VMRS(_R15_REGNO);
+    if (jit_thumb_p()) {
+       if ((c0 ^ c1) >> 28 == 1) {
+           ITE(c0);
+           if (r0 < 8) {
+               T1_MOVI(r0, 0);
+               T1_MOVI(r0, 1);
+           }
+           else {
+               T2_MOVI(r0, 0);
+               T2_MOVI(r0, 1);
+           }
+       }
+       else {
+           if (r0 < 8) {
+               IT(c0);
+               T1_MOVI(r0, 0);
+               IT(c1);
+               T1_MOVI(r0, 1);
+           }
+           else {
+               IT(c0);
+               T2_MOVI(r0, 0);
+               IT(c1);
+               T2_MOVI(r0, 1);
+           }
+       }
+    }
+    else {
+       CC_MOVI(c0, r0, 0);
+       CC_MOVI(c1, r0, 1);
+    }
+}
+
+static void
+_vcmp01_f(jit_state_t *_jit, int c0, int c1,
+         jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    vfp_cmp_f(r1, r2);
+    vcmp01_x(c0, c1, r0);
+}
+
+static void
+_vcmp01_d(jit_state_t *_jit, int c0, int c1,
+         jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    vfp_cmp_d(r1, r2);
+    vcmp01_x(c0, c1, r0);
+}
+
+static void
+_vcmp10_x(jit_state_t *_jit, int cc, jit_int32_t r0)
+{
+    if (jit_thumb_p()) {
+       if (r0 < 8) {
+           T1_MOVI(r0, 1);
+           VMRS(_R15_REGNO);
+           IT(cc);
+           T1_MOVI(r0, 0);
+       }
+       else {
+           T2_MOVI(r0, 1);
+           VMRS(_R15_REGNO);
+           IT(cc);
+           T2_MOVI(r0, 0);
+       }
+    }
+    else {
+       VMRS(_R15_REGNO);
+       MOVI(r0, 1);
+       CC_MOVI(cc, r0, 0);
+    }
+}
+static void
+_vcmp_10_f(jit_state_t *_jit, int cc,
+          jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    vfp_cmp_f(r1, r2);
+    vcmp10_x(cc, r0);
+}
+
+static void
+_vcmp_10_d(jit_state_t *_jit, int cc,
+          jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    vfp_cmp_d(r1, r2);
+    vcmp10_x(cc, r0);
+}
+
+fopi(lt)
+dopi(lt)
+fopi(le)
+dopi(le)
+fopi(eq)
+dopi(eq)
+fopi(ge)
+dopi(ge)
+fopi(gt)
+dopi(gt)
+fopi(ne)
+dopi(ne)
+fopi(unlt)
+dopi(unlt)
+fopi(unle)
+dopi(unle)
+
+static void
+_vfp_uneqr_x(jit_state_t *_jit, jit_int32_t r0)
+{
+    VMRS(_R15_REGNO);
+    if (jit_thumb_p()) {
+       ITE(ARM_CC_NE);
+       if (r0 < 8) {
+           T1_MOVI(r0, 0);
+           T1_MOVI(r0, 1);
+           IT(ARM_CC_VS);
+           T1_MOVI(r0, 1);
+       }
+       else {
+           T2_MOVI(r0, 0);
+           T2_MOVI(r0, 1);
+           IT(ARM_CC_VS);
+           T2_MOVI(r0, 1);
+       }
+    }
+    else {
+       CC_MOVI(ARM_CC_NE, r0, 0);
+       CC_MOVI(ARM_CC_EQ, r0, 1);
+       CC_MOVI(ARM_CC_VS, r0, 1);
+    }
+}
+
+static void
+_vfp_uneqr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    vfp_cmp_f(r1, r2);
+    vfp_uneqr_x(r0);
+}
+
+fopi(uneq)
+
+static void
+_vfp_uneqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    vfp_cmp_d(r1, r2);
+    vfp_uneqr_x(r0);
+}
+
+dopi(uneq)
+
+static void
+_vcmp_01_x(jit_state_t *_jit, int cc, jit_int32_t r0)
+{
+    if (jit_thumb_p()) {
+       if (r0 < 8) {
+           T1_MOVI(r0, 0);
+           VMRS(_R15_REGNO);
+           IT(cc);
+           T1_MOVI(r0, 1);
+       }
+       else {
+           T2_MOVI(r0, 0);
+           VMRS(_R15_REGNO);
+           IT(cc);
+           T2_MOVI(r0, 1);
+       }
+    }
+    else {
+       MOVI(r0, 0);
+       VMRS(_R15_REGNO);
+       CC_MOVI(cc, r0, 1);
+    }
+}
+
+static void
+_vcmp_01_f(jit_state_t *_jit, int cc,
+          jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    vfp_cmp_f(r1, r2);
+    vcmp_01_x(cc, r0);
+}
+
+static void
+_vcmp_01_d(jit_state_t *_jit, int cc,
+          jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    vfp_cmp_d(r1, r2);
+    vcmp_01_x(cc, r0);
+}
+
+fopi(unge)
+dopi(unge)
+fopi(ungt)
+dopi(ungt)
+
+static void
+_vfp_ltgtr_x(jit_state_t *_jit, jit_int32_t r0)
+{
+    VMRS(_R15_REGNO);
+    if (jit_thumb_p()) {
+       ITE(ARM_CC_NE);
+       if (r0 < 8) {
+           T1_MOVI(r0, 1);
+           T1_MOVI(r0, 0);
+           IT(ARM_CC_VS);
+           T1_MOVI(r0, 0);
+       }
+       else {
+           T2_MOVI(r0, 1);
+           T2_MOVI(r0, 0);
+           IT(ARM_CC_VS);
+           T2_MOVI(r0, 0);
+       }
+    }
+    else {
+       CC_MOVI(ARM_CC_NE, r0, 1);
+       CC_MOVI(ARM_CC_EQ, r0, 0);
+       CC_MOVI(ARM_CC_VS, r0, 0);
+    }
+}
+
+static void
+_vfp_ltgtr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    vfp_cmp_f(r1, r2);
+    vfp_ltgtr_x(r0);
+}
+
+fopi(ltgt)
+
+static void
+_vfp_ltgtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    vfp_cmp_d(r1, r2);
+    vfp_ltgtr_x(r0);
+}
+
+dopi(ltgt)
+
+static void
+_vfp_ordr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    vfp_cmp_f(r1, r2);
+    vcmp10_x(ARM_CC_VS, r0);
+}
+
+fopi(ord)
+
+static void
+_vfp_ordr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    vfp_cmp_d(r1, r2);
+    vcmp10_x(ARM_CC_VS, r0);
+}
+
+dopi(ord)
+
+static void
+_vfp_unordr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    vfp_cmp_f(r1, r2);
+    vcmp_01_x(ARM_CC_VS, r0);
+}
+
+fopi(unord)
+
+static void
+_vfp_unordr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    vfp_cmp_d(r1, r2);
+    vcmp_01_x(ARM_CC_VS, r0);
+}
+
+dopi(unord)
+
+static jit_word_t
+_vbcmp_x(jit_state_t *_jit, int cc, jit_word_t i0)
+{
+    jit_word_t         d, w;
+    VMRS(_R15_REGNO);
+    w = _jit->pc.w;
+    if (jit_thumb_p()) {
+       d = ((i0 - w) >> 1) - 2;
+       assert(_s20P(d));
+       T2_CC_B(cc, encode_thumb_cc_jump(d));
+    }
+    else {
+       d = ((i0 - w) >> 2) - 2;
+       assert(_s24P(d));
+       CC_B(cc, d & 0x00ffffff);
+    }
+    return (w);
+}
+
+
+static jit_word_t
+_vbcmp_f(jit_state_t *_jit, int cc,
+        jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    vfp_cmp_f(r0, r1);
+    return (vbcmp_x(cc, i0));
+}
+
+static jit_word_t
+_vbcmp_d(jit_state_t *_jit, int cc,
+        jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    vfp_cmp_d(r0, r1);
+    return (vbcmp_x(cc, i0));
+}
+
+static jit_word_t
+_vbncmp_x(jit_state_t *_jit, int cc, jit_word_t i0)
+{
+    jit_word_t         d, p, w;
+    VMRS(_R15_REGNO);
+    p = _jit->pc.w;
+    if (jit_thumb_p()) {
+       T2_CC_B(cc, 0);
+       w = _jit->pc.w;
+       d = ((i0 - w) >> 1) - 2;
+       assert(_s20P(d));
+       T2_B(encode_thumb_jump(d));
+    }
+    else {
+       CC_B(cc, 0);
+       w = _jit->pc.w;
+       d = ((i0 - w) >> 2) - 2;
+       assert(_s24P(d));
+       B(d & 0x00ffffff);
+    }
+    patch_at(arm_patch_jump, p, _jit->pc.w);
+    return (w);
+}
+
+static jit_word_t
+_vbncmp_f(jit_state_t *_jit, int cc,
+         jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    vfp_cmp_f(r0, r1);
+    return (vbncmp_x(cc, i0));
+}
+
+static jit_word_t
+_vbncmp_d(jit_state_t *_jit, int cc,
+         jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    vfp_cmp_d(r0, r1);
+    return (vbncmp_x(cc, i0));
+}
+
+fbopi(lt)
+dbopi(lt)
+fbopi(le)
+dbopi(le)
+fbopi(eq)
+dbopi(eq)
+fbopi(ge)
+dbopi(ge)
+fbopi(gt)
+dbopi(gt)
+fbopi(ne)
+dbopi(ne)
+fbopi(unlt)
+dbopi(unlt)
+fbopi(unle)
+dbopi(unle)
+
+static jit_word_t
+_vfp_buneqr_x(jit_state_t *_jit, jit_word_t i0)
+{
+    jit_word_t         d, p, q, w;
+    VMRS(_R15_REGNO);
+    p = _jit->pc.w;
+    if (jit_thumb_p()) {
+       T2_CC_B(ARM_CC_VS, 0);
+       q = _jit->pc.w;
+       T2_CC_B(ARM_CC_NE, 0);
+       patch_at(arm_patch_jump, p, _jit->pc.w);
+       w = _jit->pc.w;
+       d = ((i0 - w) >> 1) - 2;
+       assert(_s20P(d));
+       T2_B(encode_thumb_jump(d));
+    }
+    else {
+       CC_B(ARM_CC_VS, 0);
+       q = _jit->pc.w;
+       CC_B(ARM_CC_NE, 0);
+       patch_at(arm_patch_jump, p, _jit->pc.w);
+       w = _jit->pc.w;
+       d = ((i0 - w) >> 2) - 2;
+       assert(_s24P(d));
+       B(d & 0x00ffffff);
+    }
+    patch_at(arm_patch_jump, q, _jit->pc.w);
+    return (w);
+}
+
+static jit_word_t
+_vfp_buneqr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    vfp_cmp_f(r0, r1);
+    return (vfp_buneqr_x(i0));
+}
+
+fbopi(uneq)
+
+static jit_word_t
+_vfp_buneqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    vfp_cmp_d(r0, r1);
+    return (vfp_buneqr_x(i0));
+}
+
+dbopi(uneq)
+
+static jit_word_t
+_vfp_bunger_x(jit_state_t *_jit, jit_word_t i0)
+{
+    jit_word_t         d, p, w;
+    VMRS(_R15_REGNO);
+    p = _jit->pc.w;
+    if (jit_thumb_p()) {
+       T2_CC_B(ARM_CC_MI, 0);
+       w = _jit->pc.w;
+       d = ((i0 - w) >> 1) - 2;
+       assert(_s20P(d));
+       T2_CC_B(ARM_CC_HS, encode_thumb_cc_jump(d));
+    }
+    else {
+       CC_B(ARM_CC_MI, 0);
+       w = _jit->pc.w;
+       d = ((i0 - w) >> 2) - 2;
+       assert(_s24P(d));
+       CC_B(ARM_CC_HS, d & 0x00ffffff);
+    }
+    patch_at(arm_patch_jump, p, _jit->pc.w);
+    return (w);
+}
+
+static jit_word_t
+_vfp_bunger_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    vfp_cmp_f(r0, r1);
+    return (vfp_bunger_x(i0));
+}
+
+fbopi(unge)
+
+static jit_word_t
+_vfp_bunger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    vfp_cmp_d(r0, r1);
+    return (vfp_bunger_x(i0));
+}
+
+dbopi(unge)
+
+static jit_word_t
+_vfp_bltgtr_x(jit_state_t *_jit, jit_word_t i0)
+{
+    jit_word_t         d, p, q, w;
+    VMRS(_R15_REGNO);
+    p = _jit->pc.w;
+    if (jit_thumb_p()) {
+       T2_CC_B(ARM_CC_VS, 0);
+       q = _jit->pc.w;
+       T2_CC_B(ARM_CC_EQ, 0);
+       w = _jit->pc.w;
+       d = ((i0 - w) >> 1) - 2;
+       assert(_s20P(d));
+       T2_B(encode_thumb_jump(d));
+    }
+    else {
+       CC_B(ARM_CC_VS, 0);
+       q = _jit->pc.w;
+       CC_B(ARM_CC_EQ, 0);
+       w = _jit->pc.w;
+       d = ((i0 - w) >> 2) - 2;
+       assert(_s24P(d));
+       B(d & 0x00ffffff);
+    }
+    patch_at(arm_patch_jump, p, _jit->pc.w);
+    patch_at(arm_patch_jump, q, _jit->pc.w);
+    return (w);
+}
+
+static jit_word_t
+_vfp_bltgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    vfp_cmp_f(r0, r1);
+    return (vfp_bltgtr_x(i0));
+}
+
+fbopi(ungt)
+dbopi(ungt)
+fbopi(ltgt)
+
+static jit_word_t
+_vfp_bltgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    vfp_cmp_d(r0, r1);
+    return (vfp_bltgtr_x(i0));
+}
+
+dbopi(ltgt)
+fbopi(ord)
+dbopi(ord)
+fbopi(unord)
+dbopi(unord)
+
+static void
+_vfp_ldi_f(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                gpr;
+    if (jit_fpr_p(r0)) {
+       gpr = jit_get_reg(jit_class_gpr);
+       movi(rn(gpr), i0);
+       VLDR_F32(r0, rn(gpr), 0);
+       jit_unget_reg(gpr);
+    }
+    else
+       ldi_i(r0, i0);
+}
+
+static void
+_vfp_ldi_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    if (jit_fpr_p(r0))
+       VLDR_F64(r0, rn(reg), 0);
+    else {
+       ldr_i(r0, rn(reg));
+       ldxi_i(r0 + 1, rn(reg), 4);
+    }
+    jit_unget_reg(reg);
+}
+
+static void
+_vfp_ldxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (jit_fpr_p(r0)) {
+       reg = jit_get_reg(jit_class_gpr);
+       addr(rn(reg), r1, r2);
+       VLDR_F32(r0, rn(reg), 0);
+       jit_unget_reg(reg);
+    }
+    else
+       ldxr_i(r0, r1, r2);
+}
+
+static void
+_vfp_ldxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r1, r2);
+    if (jit_fpr_p(r0))
+       VLDR_F64(r0, rn(reg), 0);
+    else {
+       ldr_i(r0, rn(reg));
+       ldxi_i(r0 + 1, rn(reg), 4);
+    }
+    jit_unget_reg(reg);
+}
+
+static void
+_vfp_ldxi_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (jit_fpr_p(r0)) {
+       if (i0 >= 0) {
+           assert(!(i0 & 3));
+           if (i0 < 1024)
+               VLDR_F32(r0, r1, i0 >> 2);
+           else {
+               reg = jit_get_reg(jit_class_gpr);
+               addi(rn(reg), r1, i0);
+               VLDR_F32(r0, rn(reg), 0);
+               jit_unget_reg(reg);
+           }
+       }
+       else {
+           i0 = -i0;
+           assert(!(i0 & 3));
+           if (i0 < 1024)
+               VLDRN_F32(r0, r1, i0 >> 2);
+           else {
+               reg = jit_get_reg(jit_class_gpr);
+               subi(rn(reg), r1, i0);
+               VLDR_F32(r0, rn(reg), 0);
+               jit_unget_reg(reg);
+           }
+       }
+    }
+    else
+       ldxi_i(r0, r1, i0);
+}
+
+static void
+_vfp_ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (jit_fpr_p(r0)) {
+       if (i0 >= 0) {
+           assert(!(i0 & 3));
+           if (i0 < 1024)
+               VLDR_F64(r0, r1, i0 >> 2);
+           else {
+               reg = jit_get_reg(jit_class_gpr);
+               addi(rn(reg), r1, i0);
+               VLDR_F64(r0, rn(reg), 0);
+               jit_unget_reg(reg);
+           }
+       }
+       else {
+           i0 = -i0;
+           assert(!(i0 & 3));
+           if (i0 < 1024)
+               VLDRN_F64(r0, r1, i0 >> 2);
+           else {
+               reg = jit_get_reg(jit_class_gpr);
+               subi(rn(reg), r1, i0);
+               VLDR_F64(r0, rn(reg), 0);
+               jit_unget_reg(reg);
+           }
+       }
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r1, i0);
+       ldr_i(r0, rn(reg));
+       ldxi_i(r0 + 1, rn(reg), 4);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_vfp_sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    if (jit_fpr_p(r0)) {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       VSTR_F32(r0, rn(reg), 0);
+       jit_unget_reg(reg);
+    }
+    else
+       sti_i(i0, r0);
+}
+
+static void
+_vfp_sti_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    if (jit_fpr_p(r0))
+       VSTR_F64(r0, rn(reg), 0);
+    else {
+       str_i(rn(reg), r0);
+       stxi_i(4, rn(reg), r0 + 1);
+    }
+    jit_unget_reg(reg);
+}
+
+static void
+_vfp_stxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (jit_fpr_p(r2)) {
+       reg = jit_get_reg(jit_class_gpr);
+       addr(rn(reg), r0, r1);
+       VSTR_F32(r2, rn(reg), 0);
+       jit_unget_reg(reg);
+    }
+    else
+       stxr_i(r0, r1, r2);
+}
+
+static void
+_vfp_stxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r0, r1);
+    if (jit_fpr_p(r2))
+       VSTR_F64(r2, rn(reg), 0);
+    else {
+       str_i(rn(reg), r2);
+       stxi_i(4, rn(reg), r2 + 1);
+    }
+    jit_unget_reg(reg);
+}
+
+static void
+_vfp_stxi_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (jit_fpr_p(r1)) {
+       if (i0 >= 0) {
+           assert(!(i0 & 3));
+           if (i0 < 1024)
+               VSTR_F32(r1, r0, i0 >> 2);
+           else {
+               reg = jit_get_reg(jit_class_gpr);
+               addi(rn(reg), r0, i0);
+               VSTR_F32(r1, rn(reg), 0);
+               jit_unget_reg(reg);
+           }
+       }
+       else {
+           i0 = -i0;
+           assert(!(i0 & 3));
+           if (i0 < 1024)
+               VSTRN_F32(r1, r0, i0 >> 2);
+           else {
+               reg = jit_get_reg(jit_class_gpr);
+               subi(rn(reg), r0, i0);
+               VSTR_F32(r1, rn(reg), 0);
+               jit_unget_reg(reg);
+           }
+       }
+    }
+    else
+       stxi_i(i0, r0, r1);
+}
+
+static void
+_vfp_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (jit_fpr_p(r1)) {
+       if (i0 >= 0) {
+           assert(!(i0 & 3));
+           if (i0 < 0124)
+               VSTR_F64(r1, r0, i0 >> 2);
+           else {
+               reg = jit_get_reg(jit_class_gpr);
+               addi(rn(reg), r0, i0);
+               VSTR_F64(r1, rn(reg), 0);
+               jit_unget_reg(reg);
+           }
+       }
+       else {
+           i0 = -i0;
+           assert(!(i0 & 3));
+           if (i0 < 1024)
+               VSTRN_F64(r1, r0, i0 >> 2);
+           else {
+               reg = jit_get_reg(jit_class_gpr);
+               subi(rn(reg), r0, i0);
+               VSTR_F64(r1, rn(reg), 0);
+               jit_unget_reg(reg);
+           }
+       }
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r0, i0);
+       str_i(rn(reg), r1);
+       stxi_i(4, rn(reg), r1 + 1);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_vfp_vaarg_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+
+    assert(_jitc->function->self.call & jit_call_varargs);
+
+    /* Adjust pointer. */
+    reg = jit_get_reg(jit_class_gpr);
+    andi(rn(reg), r1, 7);
+    addr(r1, r1, rn(reg));
+    jit_unget_reg(reg);
+
+    /* Load argument. */
+    vfp_ldr_d(r0, r1);
+
+    /* Update stack pointer. */
+    addi(r1, r1, sizeof(jit_float64_t));
+}
+#  undef dbopi
+#  undef fbopi
+#  undef dopi
+#  undef fopi
+#endif
diff --git a/deps/lightning/lib/jit_arm.c b/deps/lightning/lib/jit_arm.c
new file mode 100644 (file)
index 0000000..783fa90
--- /dev/null
@@ -0,0 +1,2274 @@
+/*
+ * Copyright (C) 2012-2019  Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+#if defined(__linux__)
+#  include <stdio.h>
+#endif
+
+#define jit_arg_reg_p(i)               ((i) >= 0 && (i) < 4)
+#define jit_arg_f_reg_p(i)             ((i) >= 0 && (i) < 16)
+#define jit_arg_d_reg_p(i)             ((i) >= 0 && (i) < 15)
+
+#define arm_patch_node                 0x80000000
+#define arm_patch_word                 0x40000000
+#define arm_patch_jump                 0x20000000
+#define arm_patch_load                 0x00000000
+
+#define jit_fpr_p(rn)                  ((rn) > 15)
+
+#define arg_base()                                                     \
+    (stack_framesize - 16 + (jit_cpu.abi ? 64 : 0))
+#define arg_offset(n)                                                  \
+    ((n) < 4 ? arg_base() + ((n) << 2) : (n))
+
+/* Assume functions called never match jit instruction set, that is
+ * libc, gmp, mpfr, etc functions are in thumb mode and jit is in
+ * arm mode, what may cause a crash upon return of that function
+ * if generating jit for a relative jump.
+ */
+#define jit_exchange_p()               1
+
+/* FIXME is it really required to not touch _R10? */
+
+/*
+ * Types
+ */
+typedef union _jit_thumb_t {
+    jit_int32_t                i;
+    jit_int16_t                s[2];
+} jit_thumb_t;
+
+typedef jit_pointer_t  jit_va_list;
+
+/*
+ * Prototypes
+ */
+#define jit_make_arg(node)             _jit_make_arg(_jit,node)
+static jit_node_t *_jit_make_arg(jit_state_t*,jit_node_t*);
+#define jit_make_arg_f(node)           _jit_make_arg_f(_jit,node)
+static jit_node_t *_jit_make_arg_f(jit_state_t*,jit_node_t*);
+#define jit_make_arg_d(node)           _jit_make_arg_d(_jit,node)
+static jit_node_t *_jit_make_arg_d(jit_state_t*,jit_node_t*);
+#define jit_get_reg_pair()             _jit_get_reg_pair(_jit)
+static jit_int32_t _jit_get_reg_pair(jit_state_t*);
+#define jit_unget_reg_pair(rn)         _jit_unget_reg_pair(_jit,rn)
+static void _jit_unget_reg_pair(jit_state_t*,jit_int32_t);
+# define must_align_p(node)            _must_align_p(_jit, node)
+static jit_bool_t _must_align_p(jit_state_t*,jit_node_t*);
+#define load_const(uniq,r0,i0)         _load_const(_jit,uniq,r0,i0)
+static void _load_const(jit_state_t*,jit_bool_t,jit_int32_t,jit_word_t);
+#define flush_consts()                 _flush_consts(_jit)
+static void _flush_consts(jit_state_t*);
+#define invalidate_consts()            _invalidate_consts(_jit)
+static void _invalidate_consts(jit_state_t*);
+#define patch(instr, node)             _patch(_jit, instr, node)
+static void _patch(jit_state_t*,jit_word_t,jit_node_t*);
+
+#if defined(__GNUC__)
+/* libgcc */
+extern void __clear_cache(void *, void *);
+#endif
+
+#define PROTO                          1
+#  include "jit_rewind.c"
+#  include "jit_arm-cpu.c"
+#  include "jit_arm-swf.c"
+#  include "jit_arm-vfp.c"
+#undef PROTO
+
+/*
+ * Initialization
+ */
+jit_cpu_t              jit_cpu;
+jit_register_t         _rvs[] = {
+    { rc(gpr) | 0x0c,                  "ip" },
+    { rc(sav) | rc(gpr) | 0x04,                "r4" },
+    { rc(sav) | rc(gpr) | 0x05,                "r5" },
+    { rc(sav) | rc(gpr) | 0x06,                "r6" },
+    { rc(sav) | rc(gpr) | 0x07,                "r7" },
+    { rc(sav) | rc(gpr) | 0x08,                "r8" },
+    { rc(sav) | rc(gpr) | 0x09,                "r9" },
+    { rc(sav) | 0x0a,                  "sl" },
+    { rc(sav) | 0x0b,                  "fp" },
+    { rc(sav) | 0x0d,                  "sp" },
+    { rc(sav) | 0x0e,                  "lr" },
+    { 0x0f,                            "pc" },
+    { rc(arg) | rc(gpr) | 0x03,                "r3" },
+    { rc(arg) | rc(gpr) | 0x02,                "r2" },
+    { rc(arg) | rc(gpr) | 0x01,                "r1" },
+    { rc(arg) | rc(gpr) | 0x00,                "r0" },
+    { rc(fpr) | 0x20,                  "d8" },
+    { 0x21,                            "s17" },
+    { rc(fpr) | 0x22,                  "d9" },
+    { 0x23,                            "s19" },
+    { rc(fpr) | 0x24,                  "d10" },
+    { 0x25,                            "s21" },
+    { rc(fpr) | 0x26,                  "d11" },
+    { 0x27,                            "s23" },
+    { rc(fpr) | 0x28,                  "d12" },
+    { 0x29,                            "s25" },
+    { rc(fpr) | 0x2a,                  "d13" },
+    { 0x2b,                            "s27" },
+    { rc(fpr) | 0x2c,                  "d14" },
+    { 0x2d,                            "s29" },
+    { rc(fpr) | 0x2e,                  "d15" },
+    { 0x2f,                            "s31" },
+    { rc(arg) | 0x1f,                  "s15" },
+    { rc(arg)|rc(sft)|rc(fpr)|0x1e,    "d7" },
+    { rc(arg) | 0x1d,                  "s13" },
+    { rc(arg)|rc(sft)|rc(fpr)|0x1c,    "d6" },
+    { rc(arg) | 0x1b,                  "s11" },
+    { rc(arg)|rc(sft)|rc(fpr)|0x1a,    "d5" },
+    { rc(arg) | 0x19,                  "s9" },
+    { rc(arg)|rc(sft)|rc(fpr)|0x18,    "d4" },
+    { rc(arg) | 0x17,                  "s7" },
+    { rc(arg)|rc(sft)|rc(fpr)|0x16,    "d3" },
+    { rc(arg) | 0x15,                  "s5" },
+    { rc(arg)|rc(sft)|rc(fpr)|0x14,    "d2" },
+    { rc(arg) | 0x13,                  "s3" },
+    { rc(arg)|rc(sft)|rc(fpr)|0x12,    "d1" },
+    { rc(arg) | 0x11,                  "s1" },
+    { rc(arg)|rc(sft)|rc(fpr)|0x10,    "d0" },
+    { _NOREG,                          "<none>" },
+};
+
+/*
+ * Implementation
+ */
+void
+jit_get_cpu(void)
+{
+#if defined(__linux__)
+    FILE       *fp;
+    char       *ptr;
+    char        buf[128];
+
+    if ((fp = fopen("/proc/cpuinfo", "r")) != NULL) {
+       while (fgets(buf, sizeof(buf), fp)) {
+           if (strncmp(buf, "CPU architecture:", 17) == 0) {
+               jit_cpu.version = strtol(buf + 17, &ptr, 10);
+               while (*ptr) {
+                   if (*ptr == 'T' || *ptr == 't') {
+                       ++ptr;
+                       jit_cpu.thumb = 1;
+                   }
+                   else if (*ptr == 'E' || *ptr == 'e') {
+                       jit_cpu.extend = 1;
+                       ++ptr;
+                   }
+                   else
+                       ++ptr;
+               }
+           }
+           else if (strncmp(buf, "Features\t:", 10) == 0) {
+               if ((ptr = strstr(buf + 10, "vfpv")))
+                   jit_cpu.vfp = strtol(ptr + 4, NULL, 0);
+               if ((ptr = strstr(buf + 10, "neon")))
+                   jit_cpu.neon = 1;
+               if ((ptr = strstr(buf + 10, "thumb")))
+                   jit_cpu.thumb = 1;
+           }
+       }
+       fclose(fp);
+    }
+#endif
+#if defined(__ARM_PCS_VFP)
+    if (!jit_cpu.vfp)
+       jit_cpu.vfp = 3;
+    if (!jit_cpu.version)
+       jit_cpu.version = 7;
+    jit_cpu.abi = 1;
+#endif
+#if defined(__thumb2__)
+    jit_cpu.thumb = 1;
+#endif
+    /* armv6t2 todo (software float and thumb2) */
+    if (!jit_cpu.vfp && jit_cpu.thumb)
+       jit_cpu.thumb = 0;
+}
+
+void
+_jit_init(jit_state_t *_jit)
+{
+    jit_int32_t                regno;
+    static jit_bool_t  first = 1;
+
+    _jitc->reglen = jit_size(_rvs) - 1;
+    if (first) {
+       /* jit_get_cpu() should have been already called, and only once */
+       if (!jit_cpu.vfp) {
+           /* cause register to never be allocated, because simple
+            * software float only allocates stack space for 8 slots  */
+           for (regno = _D8; regno < _D7; regno++)
+               _rvs[regno].spec = 0;
+       }
+       if (!jit_cpu.abi) {
+           for (regno = _S15; regno <= _D0; regno++)
+               _rvs[regno].spec &= ~rc(arg);
+       }
+       first = 0;
+    }
+}
+
+void
+_jit_prolog(jit_state_t *_jit)
+{
+    jit_int32_t                 offset;
+
+    if (_jitc->function)
+       jit_epilog();
+    assert(jit_regset_cmp_ui(&_jitc->regarg, 0) == 0);
+    jit_regset_set_ui(&_jitc->regsav, 0);
+    offset = _jitc->functions.offset;
+    if (offset >= _jitc->functions.length) {
+       jit_realloc((jit_pointer_t *)&_jitc->functions.ptr,
+                   _jitc->functions.length * sizeof(jit_function_t),
+                   (_jitc->functions.length + 16) * sizeof(jit_function_t));
+       _jitc->functions.length += 16;
+    }
+    _jitc->function = _jitc->functions.ptr + _jitc->functions.offset++;
+    _jitc->function->self.size = stack_framesize;
+    if (jit_cpu.abi)
+       _jitc->function->self.size += 64;
+    _jitc->function->self.argi = _jitc->function->self.argf =
+       _jitc->function->self.alen = 0;
+    if (jit_swf_p())
+       /* 8 soft float registers */
+       _jitc->function->self.aoff = -64;
+    else
+       _jitc->function->self.aoff = 0;
+    _jitc->function->self.call = jit_call_default;
+    jit_alloc((jit_pointer_t *)&_jitc->function->regoff,
+             _jitc->reglen * sizeof(jit_int32_t));
+
+    /* _no_link here does not mean the jit_link() call can be removed
+     * by rewriting as:
+     * _jitc->function->prolog = jit_new_node(jit_code_prolog);
+     */
+    _jitc->function->prolog = jit_new_node_no_link(jit_code_prolog);
+    jit_link(_jitc->function->prolog);
+    _jitc->function->prolog->w.w = offset;
+    _jitc->function->epilog = jit_new_node_no_link(jit_code_epilog);
+    /* u:      label value
+     * v:      offset in blocks vector
+     * w:      offset in functions vector
+     */
+    _jitc->function->epilog->w.w = offset;
+
+    jit_regset_new(&_jitc->function->regset);
+}
+
+jit_int32_t
+_jit_allocai(jit_state_t *_jit, jit_int32_t length)
+{
+    assert(_jitc->function);
+    switch (length) {
+       case 0: case 1:                                         break;
+       case 2:         _jitc->function->self.aoff &= -2;       break;
+       case 3: case 4: _jitc->function->self.aoff &= -4;       break;
+       default:        _jitc->function->self.aoff &= -8;       break;
+    }
+    _jitc->function->self.aoff -= length;
+    if (!_jitc->realize) {
+       jit_inc_synth_ww(allocai, _jitc->function->self.aoff, length);
+       jit_dec_synth();
+    }
+    return (_jitc->function->self.aoff);
+}
+
+void
+_jit_allocar(jit_state_t *_jit, jit_int32_t u, jit_int32_t v)
+{
+    jit_int32_t                 reg;
+    assert(_jitc->function);
+    jit_inc_synth_ww(allocar, u, v);
+    if (!_jitc->function->allocar) {
+       _jitc->function->aoffoff = jit_allocai(sizeof(jit_int32_t));
+       _jitc->function->allocar = 1;
+    }
+    reg = jit_get_reg(jit_class_gpr);
+    jit_negr(reg, v);
+    jit_andi(reg, reg, -8);
+    jit_ldxi_i(u, JIT_FP, _jitc->function->aoffoff);
+    jit_addr(u, u, reg);
+    jit_addr(JIT_SP, JIT_SP, reg);
+    jit_stxi_i(_jitc->function->aoffoff, JIT_FP, u);
+    jit_unget_reg(reg);
+    jit_dec_synth();
+}
+
+void
+_jit_ret(jit_state_t *_jit)
+{
+    jit_node_t         *instr;
+    assert(_jitc->function);
+    jit_inc_synth(ret);
+    /* jump to epilog */
+    instr = jit_jmpi();
+    jit_patch_at(instr, _jitc->function->epilog);
+    jit_dec_synth();
+}
+
+void
+_jit_retr(jit_state_t *_jit, jit_int32_t u)
+{
+    jit_inc_synth_w(retr, u);
+    if (JIT_RET != u)
+       jit_movr(JIT_RET, u);
+    jit_live(JIT_RET);
+    jit_ret();
+    jit_dec_synth();
+}
+
+void
+_jit_reti(jit_state_t *_jit, jit_word_t u)
+{
+    jit_inc_synth_w(reti, u);
+    jit_movi(JIT_RET, u);
+    jit_ret();
+    jit_dec_synth();
+}
+
+void
+_jit_retr_f(jit_state_t *_jit, jit_int32_t u)
+{
+    jit_inc_synth_w(retr_f, u);
+    if (jit_cpu.abi) {
+       if (u != JIT_FRET)
+           jit_movr_f(JIT_FRET, u);
+       else
+           jit_live(JIT_FRET);
+    }
+    else {
+       if (u != JIT_RET)
+           jit_movr_f_w(JIT_RET, u);
+       else
+           jit_live(JIT_RET);
+    }
+    jit_ret();
+    jit_dec_synth();
+}
+
+void
+_jit_reti_f(jit_state_t *_jit, jit_float32_t u)
+{
+    jit_inc_synth_f(reti_f, u);
+    if (jit_cpu.abi)
+       jit_movi_f(JIT_FRET, u);
+    else
+       jit_movi_f_w(JIT_RET, u);
+    jit_ret();
+    jit_dec_synth();
+}
+
+void
+_jit_retr_d(jit_state_t *_jit, jit_int32_t u)
+{
+    jit_inc_synth_w(retr_d, u);
+    if (jit_cpu.abi) {
+       if (u != JIT_FRET)
+           jit_movr_d(JIT_FRET, u);
+       else
+           jit_live(JIT_FRET);
+    }
+    else {
+       if (u != JIT_RET)
+           jit_movr_d_ww(JIT_RET, _R1, u);
+       else
+           jit_live(JIT_RET);
+    }
+    jit_ret();
+    jit_dec_synth();
+}
+
+void
+_jit_reti_d(jit_state_t *_jit, jit_float64_t u)
+{
+    jit_inc_synth_d(reti_d, u);
+    if (jit_cpu.abi)
+       jit_movi_d(JIT_FRET, u);
+    else
+       jit_movi_d_ww(JIT_RET, _R1, u);
+    jit_ret();
+    jit_dec_synth();
+}
+
+void
+_jit_epilog(jit_state_t *_jit)
+{
+    assert(_jitc->function);
+    assert(_jitc->function->epilog->next == NULL);
+    jit_link(_jitc->function->epilog);
+    _jitc->function = NULL;
+}
+
+jit_bool_t
+_jit_arg_register_p(jit_state_t *_jit, jit_node_t *u)
+{
+    if (u->code != jit_code_arg) {
+       if (u->code == jit_code_arg_f) {
+           if (jit_cpu.abi)
+               return (jit_arg_f_reg_p(u->u.w));
+       }
+       else {
+           assert(u->code == jit_code_arg_d);
+           if (jit_cpu.abi)
+               return (jit_arg_d_reg_p(u->u.w));
+       }
+    }
+    return (jit_arg_reg_p(u->u.w));
+}
+
+static jit_node_t *
+_jit_make_arg(jit_state_t *_jit, jit_node_t *node)
+{
+    jit_int32_t                 offset;
+    if (jit_arg_reg_p(_jitc->function->self.argi))
+       offset = _jitc->function->self.argi++;
+    else {
+       offset = _jitc->function->self.size;
+       _jitc->function->self.size += sizeof(jit_word_t);
+    }
+    if (node == (jit_node_t *)0)
+       node = jit_new_node(jit_code_arg);
+    else
+       link_node(node);
+    node->u.w = offset;
+    node->v.w = ++_jitc->function->self.argn;
+    jit_link_prolog();
+    return (node);
+}
+
+jit_node_t *
+_jit_make_arg_f(jit_state_t *_jit, jit_node_t *node)
+{
+    jit_int32_t                 offset;
+    if (jit_cpu.abi && !(_jitc->function->self.call & jit_call_varargs)) {
+       if (jit_arg_f_reg_p(_jitc->function->self.argf)) {
+           offset = _jitc->function->self.argf++;
+           goto done;
+       }
+    }
+    else {
+       if (jit_arg_reg_p(_jitc->function->self.argi)) {
+           offset = _jitc->function->self.argi++;
+           goto done;
+       }
+    }
+    offset = _jitc->function->self.size;
+    _jitc->function->self.size += sizeof(jit_float32_t);
+done:
+    if (node == (jit_node_t *)0)
+       node = jit_new_node(jit_code_arg_f);
+    else
+       link_node(node);
+    node->u.w = offset;
+    node->v.w = ++_jitc->function->self.argn;
+    jit_link_prolog();
+    return (node);
+}
+
+jit_node_t *
+_jit_make_arg_d(jit_state_t *_jit, jit_node_t *node)
+{
+    jit_int32_t                 offset;
+    if (jit_cpu.abi && !(_jitc->function->self.call & jit_call_varargs)) {
+       if (jit_arg_d_reg_p(_jitc->function->self.argf)) {
+           if (_jitc->function->self.argf & 1)
+               ++_jitc->function->self.argf;
+           offset = _jitc->function->self.argf;
+           _jitc->function->self.argf += 2;
+           goto done;
+       }
+    }
+    else {
+       if (_jitc->function->self.argi & 1)
+           ++_jitc->function->self.argi;
+       if (jit_arg_reg_p(_jitc->function->self.argi)) {
+           offset = _jitc->function->self.argi;
+           _jitc->function->self.argi += 2;
+           goto done;
+       }
+    }
+    if (_jitc->function->self.size & 7)
+       _jitc->function->self.size += 4;
+    offset = _jitc->function->self.size;
+    _jitc->function->self.size += sizeof(jit_float64_t);
+done:
+    if (node == (jit_node_t *)0)
+       node = jit_new_node(jit_code_arg_d);
+    else
+       link_node(node);
+    node->u.w = offset;
+    node->v.w = ++_jitc->function->self.argn;
+    jit_link_prolog();
+    return (node);
+}
+
+void
+_jit_ellipsis(jit_state_t *_jit)
+{
+    if (_jitc->prepare) {
+       assert(!(_jitc->function->call.call & jit_call_varargs));
+       _jitc->function->call.call |= jit_call_varargs;
+       if (jit_cpu.abi && _jitc->function->call.argf)
+           rewind_prepare();
+    }
+    else {
+       assert(!(_jitc->function->self.call & jit_call_varargs));
+       _jitc->function->self.call |= jit_call_varargs;
+       if (jit_cpu.abi &&  _jitc->function->self.argf)
+           rewind_prolog();
+       /* First 4 stack addresses are always spilled r0-r3 */
+       if (jit_arg_reg_p(_jitc->function->self.argi))
+           _jitc->function->vagp = _jitc->function->self.argi * 4;
+       else
+           _jitc->function->vagp = 16;
+    }
+    jit_inc_synth(ellipsis);
+    if (_jitc->prepare)
+       jit_link_prepare();
+    else
+       jit_link_prolog();
+    jit_dec_synth();
+}
+
+void
+_jit_va_push(jit_state_t *_jit, jit_int32_t u)
+{
+    jit_inc_synth_w(va_push, u);
+    jit_pushargr(u);
+    jit_dec_synth();
+}
+
+jit_node_t *
+_jit_arg(jit_state_t *_jit)
+{
+    assert(_jitc->function);
+    return (jit_make_arg((jit_node_t*)0));
+}
+
+jit_node_t *
+_jit_arg_f(jit_state_t *_jit)
+{
+    assert(_jitc->function);
+    return (jit_make_arg_f((jit_node_t*)0));
+}
+
+jit_node_t *
+_jit_arg_d(jit_state_t *_jit)
+{
+    assert(_jitc->function);
+    return (jit_make_arg_d((jit_node_t*)0));
+}
+
+void
+_jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_c, u, v);
+    if (jit_swf_p())
+       jit_ldxi_c(u, JIT_FP, arg_offset(v->u.w));
+    else if (jit_arg_reg_p(v->u.w))
+       jit_extr_c(u, JIT_RA0 - v->u.w);
+    else
+       jit_ldxi_c(u, JIT_FP, v->u.w);
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_uc, u, v);
+    if (jit_swf_p())
+       jit_ldxi_uc(u, JIT_FP, arg_offset(v->u.w));
+    else if (jit_arg_reg_p(v->u.w))
+       jit_extr_uc(u, JIT_RA0 - v->u.w);
+    else
+       jit_ldxi_uc(u, JIT_FP, v->u.w);
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_s, u, v);
+    if (jit_swf_p())
+       jit_ldxi_s(u, JIT_FP, arg_offset(v->u.w));
+    else if (jit_arg_reg_p(v->u.w))
+       jit_extr_s(u, JIT_RA0 - v->u.w);
+    else
+       jit_ldxi_s(u, JIT_FP, v->u.w);
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_us, u, v);
+    if (jit_swf_p())
+       jit_ldxi_us(u, JIT_FP, arg_offset(v->u.w));
+    else if (jit_arg_reg_p(v->u.w))
+       jit_extr_us(u, JIT_RA0 - v->u.w);
+    else
+       jit_ldxi_us(u, JIT_FP, v->u.w);
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_i, u, v);
+    if (jit_swf_p())
+       jit_ldxi_i(u, JIT_FP, arg_offset(v->u.w));
+    else if (jit_arg_reg_p(v->u.w))
+       jit_movr(u, JIT_RA0 - v->u.w);
+    else
+       jit_ldxi_i(u, JIT_FP, v->u.w);
+    jit_dec_synth();
+}
+
+void
+_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(putargr, u, v);
+    if (jit_swf_p())
+       jit_stxi(arg_offset(v->u.w), JIT_FP, u);
+    else if (jit_arg_reg_p(v->u.w))
+       jit_movr(JIT_RA0 - v->u.w, u);
+    else
+       jit_stxi(v->u.w, JIT_FP, u);
+    jit_dec_synth();
+}
+
+void
+_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v)
+{
+    jit_int32_t                regno;
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(putargi, u, v);
+    if (jit_swf_p()) {
+       regno = jit_get_reg(jit_class_gpr);
+       jit_movi(regno, u);
+       jit_stxi(arg_offset(v->u.w), JIT_FP, regno);
+       jit_unget_reg(regno);
+    }
+    else if (jit_arg_reg_p(v->u.w))
+       jit_movi(JIT_RA0 - v->u.w, u);
+    else {
+       regno = jit_get_reg(jit_class_gpr);
+       jit_movi(regno, u);
+       jit_stxi(v->u.w, JIT_FP, regno);
+       jit_unget_reg(regno);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg_f);
+    jit_inc_synth_wp(getarg_f, u, v);
+    if (jit_cpu.abi && !(_jitc->function->self.call & jit_call_varargs)) {
+       if (jit_arg_f_reg_p(v->u.w))
+           jit_movr_f(u, JIT_FA0 - v->u.w);
+       else
+           jit_ldxi_f(u, JIT_FP, v->u.w);
+    }
+    else if (jit_swf_p())
+       jit_ldxi_f(u, JIT_FP, arg_offset(v->u.w));
+    else {
+       if (jit_arg_reg_p(v->u.w))
+           jit_movr_w_f(u, JIT_RA0 - v->u.w);
+       else
+           jit_ldxi_f(u, JIT_FP, v->u.w);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_putargr_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg_f);
+    jit_inc_synth_wp(putargr_f, u, v);
+    if (jit_cpu.abi) {
+       if (jit_arg_f_reg_p(v->u.w))
+           jit_movr_f(JIT_FA0 - v->u.w, u);
+       else
+           jit_stxi_f(v->u.w, JIT_FP, u);
+    }
+    else if (jit_swf_p())
+       jit_stxi_f(arg_offset(v->u.w), JIT_FP, u);
+    else {
+       if (jit_arg_reg_p(v->u.w))
+           jit_movr_f_w(JIT_RA0 - v->u.w, u);
+       else
+           jit_stxi_f(v->u.w, JIT_FP, u);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_putargi_f(jit_state_t *_jit, jit_float32_t u, jit_node_t *v)
+{
+    jit_int32_t                regno;
+    assert(v->code == jit_code_arg_f);
+    jit_inc_synth_fp(putargi_f, u, v);
+    if (jit_cpu.abi) {
+       if (jit_arg_f_reg_p(v->u.w))
+           jit_movi_f(JIT_FA0 - v->u.w, u);
+       else {
+           regno = jit_get_reg(jit_class_fpr);
+           jit_movi_f(regno, u);
+           jit_stxi_f(v->u.w, JIT_FP, regno);
+           jit_unget_reg(regno);
+       }
+    }
+    else if (jit_swf_p()) {
+       regno = jit_get_reg(jit_class_fpr);
+       jit_movi_f(regno, u);
+       jit_stxi_f(arg_offset(v->u.w), JIT_FP, regno);
+       jit_unget_reg(regno);
+    }
+    else {
+       regno = jit_get_reg(jit_class_fpr);
+       jit_movi_f(regno, u);
+       if (jit_arg_reg_p(v->u.w))
+           jit_movr_f_w(JIT_RA0 - v->u.w, regno);
+       else
+           jit_stxi_f(v->u.w, JIT_FP, regno);
+       jit_unget_reg(regno);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg_d);
+    jit_inc_synth_wp(getarg_d, u, v);
+    if (jit_cpu.abi && !(_jitc->function->self.call & jit_call_varargs)) {
+       if (jit_arg_f_reg_p(v->u.w))
+           jit_movr_d(u, JIT_FA0 - v->u.w);
+       else
+           jit_ldxi_d(u, JIT_FP, v->u.w);
+    }
+    else if (jit_swf_p())
+       jit_ldxi_d(u, JIT_FP, arg_offset(v->u.w));
+    else {
+       if (jit_arg_reg_p(v->u.w))
+           jit_movr_ww_d(u, JIT_RA0 - v->u.w, JIT_RA0 - (v->u.w + 1));
+       else
+           jit_ldxi_d(u, JIT_FP, v->u.w);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_putargr_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg_d);
+    jit_inc_synth_wp(putargr_d, u, v);
+    if (jit_cpu.abi) {
+       if (jit_arg_f_reg_p(v->u.w))
+           jit_movr_d(JIT_FA0 - v->u.w, u);
+       else
+           jit_stxi_d(v->u.w, JIT_FP, u);
+    }
+    else if (jit_swf_p())
+       jit_stxi_d(arg_offset(v->u.w), JIT_FP, u);
+    else {
+       if (jit_arg_reg_p(v->u.w))
+           jit_movr_d_ww(JIT_RA0 - v->u.w, JIT_RA0 - (v->u.w + 1), u);
+       else
+           jit_stxi_d(v->u.w, JIT_FP, u);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v)
+{
+    jit_int32_t                regno;
+    assert(v->code == jit_code_arg_d);
+    jit_inc_synth_dp(putargi_d, u, v);
+    if (jit_cpu.abi) {
+       if (jit_arg_f_reg_p(v->u.w))
+           jit_movi_d(JIT_FA0 - v->u.w, u);
+       else {
+           regno = jit_get_reg(jit_class_fpr);
+           jit_movi_d(regno, u);
+           jit_stxi_d(v->u.w, JIT_FP, regno);
+           jit_unget_reg(regno);
+       }
+    }
+    else if (jit_swf_p()) {
+       regno = jit_get_reg(jit_class_fpr);
+       jit_movi_d(regno, u);
+       jit_stxi_d(arg_offset(v->u.w), JIT_FP, regno);
+       jit_unget_reg(regno);
+    }
+    else {
+       regno = jit_get_reg(jit_class_fpr);
+       jit_movi_d(regno, u);
+       if (jit_arg_reg_p(v->u.w))
+           jit_movr_d_ww(JIT_RA0 - v->u.w, JIT_RA0 - (v->u.w + 1), regno);
+       else
+           jit_stxi_d(v->u.w, JIT_FP, regno);
+       jit_unget_reg(regno);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_pushargr(jit_state_t *_jit, jit_int32_t u)
+{
+    assert(_jitc->function);
+    jit_inc_synth_w(pushargr, u);
+    jit_link_prepare();
+    if (jit_arg_reg_p(_jitc->function->call.argi)) {
+       jit_movr(JIT_RA0 - _jitc->function->call.argi, u);
+       ++_jitc->function->call.argi;
+    }
+    else {
+       jit_stxi(_jitc->function->call.size, JIT_SP, u);
+       _jitc->function->call.size += sizeof(jit_word_t);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_pushargi(jit_state_t *_jit, jit_word_t u)
+{
+    jit_int32_t                 regno;
+    assert(_jitc->function);
+    jit_inc_synth_w(pushargi, u);
+    jit_link_prepare();
+    if (jit_arg_reg_p(_jitc->function->call.argi)) {
+       jit_movi(JIT_RA0 - _jitc->function->call.argi, u);
+       ++_jitc->function->call.argi;
+    }
+    else {
+       regno = jit_get_reg(jit_class_gpr);
+       jit_movi(regno, u);
+       jit_stxi(_jitc->function->call.size, JIT_SP, regno);
+       jit_unget_reg(regno);
+       _jitc->function->call.size += sizeof(jit_word_t);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_pushargr_f(jit_state_t *_jit, jit_int32_t u)
+{
+    assert(_jitc->function);
+    jit_inc_synth_w(pushargr_f, u);
+    jit_link_prepare();
+    if (jit_cpu.abi && !(_jitc->function->call.call & jit_call_varargs)) {
+       if (jit_arg_f_reg_p(_jitc->function->call.argf)) {
+           jit_movr_f(JIT_FA0 - _jitc->function->call.argf, u);
+           ++_jitc->function->call.argf;
+           goto done;
+       }
+    }
+    else {
+       if (jit_arg_reg_p(_jitc->function->call.argi)) {
+           jit_movr_f_w(JIT_RA0 - _jitc->function->call.argi, u);
+           ++_jitc->function->call.argi;
+           goto done;
+       }
+    }
+    jit_stxi_f(_jitc->function->call.size, JIT_SP, u);
+    _jitc->function->call.size += sizeof(jit_word_t);
+done:
+    jit_dec_synth();
+}
+
+void
+_jit_pushargi_f(jit_state_t *_jit, jit_float32_t u)
+{
+    jit_int32_t                regno;
+    assert(_jitc->function);
+    jit_inc_synth_f(pushargi_f, u);
+    jit_link_prepare();
+    if (jit_cpu.abi && !(_jitc->function->call.call & jit_call_varargs)) {
+       if (jit_arg_f_reg_p(_jitc->function->call.argf)) {
+           /* cannot jit_movi_f in the argument register because
+            * float arguments are packed, and that would cause
+            * either an assertion in debug mode, or overwritting
+            * two registers */
+           regno = jit_get_reg(jit_class_fpr);
+           jit_movi_f(regno, u);
+           jit_movr_f(JIT_FA0 - _jitc->function->call.argf, regno);
+           jit_unget_reg(regno);
+           ++_jitc->function->call.argf;
+           goto done;
+       }
+    }
+    else {
+       if (jit_arg_reg_p(_jitc->function->call.argi)) {
+           jit_movi_f_w(JIT_RA0 - _jitc->function->call.argi, u);
+           ++_jitc->function->call.argi;
+           goto done;
+       }
+    }
+    regno = jit_get_reg(jit_class_fpr);
+    jit_movi_f(regno, u);
+    jit_stxi_f(_jitc->function->call.size, JIT_SP, regno);
+    jit_unget_reg(regno);
+    _jitc->function->call.size += sizeof(jit_word_t);
+done:
+    jit_dec_synth();
+}
+
+void
+_jit_pushargr_d(jit_state_t *_jit, jit_int32_t u)
+{
+    assert(_jitc->function);
+    jit_inc_synth_w(pushargr_d, u);
+    jit_link_prepare();
+    if (jit_cpu.abi && !(_jitc->function->call.call & jit_call_varargs)) {
+       if (jit_arg_d_reg_p(_jitc->function->call.argf)) {
+           if (_jitc->function->call.argf & 1)
+               ++_jitc->function->call.argf;
+           jit_movr_d(JIT_FA0 - _jitc->function->call.argf, u);
+           _jitc->function->call.argf += 2;
+           goto done;
+       }
+    }
+    else {
+       if (_jitc->function->call.argi & 1)
+           ++_jitc->function->call.argi;
+       if (jit_arg_reg_p(_jitc->function->call.argi)) {
+           jit_movr_d_ww(JIT_RA0 - _jitc->function->call.argi,
+                         JIT_RA0 - (_jitc->function->call.argi + 1),
+                         u);
+           _jitc->function->call.argi += 2;
+           goto done;
+       }
+    }
+    if (_jitc->function->call.size & 7)
+       _jitc->function->call.size += 4;
+    jit_stxi_d(_jitc->function->call.size, JIT_SP, u);
+    _jitc->function->call.size += sizeof(jit_float64_t);
+done:
+    jit_dec_synth();
+}
+
+void
+_jit_pushargi_d(jit_state_t *_jit, jit_float64_t u)
+{
+    jit_int32_t                regno;
+    assert(_jitc->function);
+    jit_inc_synth_d(pushargi_d, u);
+    jit_link_prepare();
+    if (jit_cpu.abi && !(_jitc->function->call.call & jit_call_varargs)) {
+       if (jit_arg_d_reg_p(_jitc->function->call.argf)) {
+           if (_jitc->function->call.argf & 1)
+               ++_jitc->function->call.argf;
+           jit_movi_d(JIT_FA0 - _jitc->function->call.argf, u);
+           _jitc->function->call.argf += 2;
+           goto done;
+       }
+    }
+    else {
+       if (_jitc->function->call.argi & 1)
+           ++_jitc->function->call.argi;
+       if (jit_arg_reg_p(_jitc->function->call.argi)) {
+           jit_movi_d_ww(JIT_RA0 - _jitc->function->call.argi,
+                         JIT_RA0 - (_jitc->function->call.argi + 1),
+                         u);
+           _jitc->function->call.argi += 2;
+           goto done;
+       }
+    }
+    if (_jitc->function->call.size & 7)
+       _jitc->function->call.size += 4;
+    regno = jit_get_reg(jit_class_fpr);
+    jit_movi_d(regno, u);
+    jit_stxi_d(_jitc->function->call.size, JIT_SP, regno);
+    jit_unget_reg(regno);
+    _jitc->function->call.size += sizeof(jit_float64_t);
+done:
+    jit_dec_synth();
+}
+
+jit_bool_t
+_jit_regarg_p(jit_state_t *_jit, jit_node_t *node, jit_int32_t regno)
+{
+    jit_int32_t                spec;
+    spec = jit_class(_rvs[regno].spec);
+    if (spec & jit_class_arg) {
+       regno = JIT_RA0 - regno;
+       if (regno >= 0 && regno < node->v.w)
+           return (1);
+       if (jit_cpu.abi && spec & jit_class_fpr) {
+           regno = JIT_FA0 - regno;
+           if (regno >= 0 && regno < node->w.w)
+               return (1);
+       }
+    }
+
+    return (0);
+}
+
+void
+_jit_finishr(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_node_t         *node;
+    assert(_jitc->function);
+    jit_inc_synth_w(finishr, r0);
+    if (_jitc->function->self.alen < _jitc->function->call.size)
+       _jitc->function->self.alen = _jitc->function->call.size;
+    node = jit_callr(r0);
+    node->v.w = _jitc->function->self.argi;
+    node->w.w = _jitc->function->call.argf;
+    _jitc->function->call.argi = _jitc->function->call.argf =
+       _jitc->function->call.size = 0;
+    _jitc->prepare = 0;
+    jit_dec_synth();
+}
+
+jit_node_t *
+_jit_finishi(jit_state_t *_jit, jit_pointer_t i0)
+{
+    jit_node_t         *node;
+    assert(_jitc->function);
+    jit_inc_synth_w(finishi, (jit_word_t)i0);
+    if (_jitc->function->self.alen < _jitc->function->call.size)
+       _jitc->function->self.alen = _jitc->function->call.size;
+    node = jit_calli(i0);
+    node->v.w = _jitc->function->call.argi;
+    node->w.w = _jitc->function->call.argf;
+    _jitc->function->call.argi = _jitc->function->call.argf =
+       _jitc->function->call.size = 0;
+    _jitc->prepare = 0;
+    jit_dec_synth();
+    return (node);
+}
+
+void
+_jit_retval_c(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_c, r0);
+    jit_extr_c(r0, JIT_RET);
+    jit_dec_synth();
+}
+
+void
+_jit_retval_uc(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_uc, r0);
+    jit_extr_uc(r0, JIT_RET);
+    jit_dec_synth();
+}
+
+void
+_jit_retval_s(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_s, r0);
+    jit_extr_s(r0, JIT_RET);
+    jit_dec_synth();
+}
+
+void
+_jit_retval_us(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_us, r0);
+    jit_extr_us(r0, JIT_RET);
+    jit_dec_synth();
+}
+
+void
+_jit_retval_i(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_i, r0);
+    if (r0 != JIT_RET)
+       jit_movr(r0, JIT_RET);
+    jit_dec_synth();
+}
+
+void
+_jit_retval_f(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_f, r0);
+    if (jit_cpu.abi) {
+       if (r0 != JIT_FRET)
+           jit_movr_f(r0, JIT_FRET);
+    }
+    else if (r0 != JIT_RET)
+       jit_movr_w_f(r0, JIT_RET);
+    jit_dec_synth();
+}
+
+void
+_jit_retval_d(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_d, r0);
+    if (jit_cpu.abi) {
+       if (r0 != JIT_FRET)
+           jit_movr_d(r0, JIT_FRET);
+    }
+    else if (r0 != JIT_RET)
+       jit_movr_ww_d(r0, JIT_RET, _R1);
+    jit_dec_synth();
+}
+
+jit_pointer_t
+_emit_code(jit_state_t *_jit)
+{
+    jit_node_t         *node;
+    jit_node_t         *temp;
+    jit_word_t          word;
+    jit_int32_t                 value;
+    jit_int32_t                 offset;
+    struct {
+       jit_node_t      *node;
+       jit_uint8_t     *data;
+       jit_word_t       word;
+#if DEVEL_DISASSEMBLER
+       jit_word_t       prevw;
+#endif
+       jit_uword_t      thumb;
+#if DISASSEMBLER
+       jit_int32_t      info_offset;
+#endif
+       jit_int32_t      const_offset;
+       jit_int32_t      patch_offset;
+    } undo;
+#if DEVEL_DISASSEMBLER
+    jit_word_t          prevw;
+#endif
+
+    _jitc->function = NULL;
+    _jitc->thumb = 0;
+
+    jit_reglive_setup();
+
+    _jitc->consts.data = NULL;
+    _jitc->consts.offset = _jitc->consts.length = 0;
+
+    undo.word = 0;
+    undo.node = NULL;
+    undo.data = NULL;
+    undo.thumb = 0;
+#if DISASSEMBLER
+    undo.info_offset =
+#endif
+       undo.const_offset = undo.patch_offset = 0;
+#  define assert_data(node)            /**/
+#define case_rr(name, type)                                            \
+           case jit_code_##name##r##type:                              \
+               name##r##type(rn(node->u.w), rn(node->v.w));            \
+               break
+#define case_rw(name, type)                                            \
+           case jit_code_##name##i##type:                              \
+               name##i##type(rn(node->u.w), node->v.w);                \
+               break
+#define case_vv(name, type)                                            \
+           case jit_code_##name##r##type:                              \
+               if (jit_swf_p())                                        \
+                   swf_##name##r##type(rn(node->u.w), rn(node->v.w));  \
+               else                                                    \
+                   vfp_##name##r##type(rn(node->u.w), rn(node->v.w));  \
+               break
+#define case_vw(name, type)                                            \
+           case jit_code_##name##i##type:                              \
+               if (jit_swf_p())                                        \
+                   swf_##name##i##type(rn(node->u.w), node->v.w);      \
+               else                                                    \
+                   vfp_##name##i##type(rn(node->u.w), node->v.w);      \
+               break
+#define case_wr(name, type)                                            \
+           case jit_code_##name##i##type:                              \
+               name##i##type(node->u.w, rn(node->v.w));                \
+               break
+#define case_wv(name, type)                                            \
+           case jit_code_##name##i##type:                              \
+               if (jit_swf_p())                                        \
+                   swf_##name##i##type(node->u.w, rn(node->v.w));      \
+               else                                                    \
+                   vfp_##name##i##type(node->u.w, rn(node->v.w));      \
+               break
+#define case_rrr(name, type)                                           \
+           case jit_code_##name##r##type:                              \
+               name##r##type(rn(node->u.w),                            \
+                             rn(node->v.w), rn(node->w.w));            \
+               break
+#define case_rrrr(name, type)                                          \
+           case jit_code_##name##r##type:                              \
+               name##r##type(rn(node->u.q.l), rn(node->u.q.h),         \
+                             rn(node->v.w), rn(node->w.w));            \
+               break
+#define case_vvv(name, type)                                           \
+           case jit_code_##name##r##type:                              \
+               if (jit_swf_p())                                        \
+                   swf_##name##r##type(rn(node->u.w),                  \
+                                       rn(node->v.w), rn(node->w.w));  \
+               else                                                    \
+                   vfp_##name##r##type(rn(node->u.w),                  \
+                                       rn(node->v.w), rn(node->w.w));  \
+               break
+#define case_rrw(name, type)                                           \
+           case jit_code_##name##i##type:                              \
+               name##i##type(rn(node->u.w), rn(node->v.w), node->w.w); \
+               break
+#define case_rrrw(name, type)                                          \
+           case jit_code_##name##i##type:                              \
+               name##i##type(rn(node->u.q.l), rn(node->u.q.h),         \
+                             rn(node->v.w), node->w.w);                \
+               break
+#define case_vvw(name, type)                                           \
+           case jit_code_##name##i##type:                              \
+               if (jit_swf_p())                                        \
+                   swf_##name##i##type(rn(node->u.w),                  \
+                                       rn(node->v.w), node->w.w);      \
+               else                                                    \
+                   vfp_##name##i##type(rn(node->u.w),                  \
+                                       rn(node->v.w), node->w.w);      \
+               break
+#define case_vvf(name)                                                 \
+           case jit_code_##name##i_f:                                  \
+               assert_data(node);                                      \
+               if (jit_swf_p())                                        \
+                   swf_##name##i_f(rn(node->u.w), rn(node->v.w),       \
+                                   node->w.f);                         \
+               else                                                    \
+                   vfp_##name##i_f(rn(node->u.w), rn(node->v.w),       \
+                                   node->w.f);                         \
+               break
+#define case_vvd(name)                                                 \
+           case jit_code_##name##i_d:                                  \
+               assert_data(node);                                      \
+               if (jit_swf_p())                                        \
+                   swf_##name##i_d(rn(node->u.w), rn(node->v.w),       \
+                                   node->w.d);                         \
+               else                                                    \
+                   vfp_##name##i_d(rn(node->u.w), rn(node->v.w),       \
+                                   node->w.d);                         \
+               break
+#define case_wrr(name, type)                                           \
+           case jit_code_##name##i##type:                              \
+               name##i##type(node->u.w, rn(node->v.w), rn(node->w.w)); \
+               break
+#define case_wvv(name, type)                                           \
+           case jit_code_##name##i##type:                              \
+               if (jit_swf_p())                                        \
+                   swf_##name##i##type(node->u.w,                      \
+                                       rn(node->v.w), rn(node->w.w));  \
+               else                                                    \
+                   vfp_##name##i##type(node->u.w,                      \
+                                       rn(node->v.w), rn(node->w.w));  \
+               break
+#define case_brr(name, type)                                           \
+           case jit_code_##name##r##type:                              \
+               temp = node->u.n;                                       \
+               assert(temp->code == jit_code_label ||                  \
+                      temp->code == jit_code_epilog);                  \
+               if (temp->flag & jit_flag_patch)                        \
+                   name##r##type(temp->u.w, rn(node->v.w),             \
+                                 rn(node->w.w));                       \
+               else {                                                  \
+                   word = name##r##type(_jit->pc.w,                    \
+                                        rn(node->v.w), rn(node->w.w)); \
+                   patch(word, node);                                  \
+               }                                                       \
+               break
+#define case_bvv(name, type)                                           \
+           case jit_code_##name##r##type:                              \
+               temp = node->u.n;                                       \
+               assert(temp->code == jit_code_label ||                  \
+                      temp->code == jit_code_epilog);                  \
+               if (temp->flag & jit_flag_patch) {                      \
+                   if (jit_swf_p())                                    \
+                       swf_##name##r##type(temp->u.w, rn(node->v.w),   \
+                                           rn(node->w.w));             \
+                   else                                                \
+                       vfp_##name##r##type(temp->u.w, rn(node->v.w),   \
+                                           rn(node->w.w));             \
+               }                                                       \
+               else {                                                  \
+                   if (jit_swf_p())                                    \
+                       word = swf_##name##r##type(_jit->pc.w,          \
+                                                  rn(node->v.w),       \
+                                                  rn(node->w.w));      \
+                   else                                                \
+                       word = vfp_##name##r##type(_jit->pc.w,          \
+                                                  rn(node->v.w),       \
+                                                  rn(node->w.w));      \
+                   patch(word, node);                                  \
+               }                                                       \
+               break
+#define case_brw(name, type)                                           \
+           case jit_code_##name##i##type:                              \
+               temp = node->u.n;                                       \
+               assert(temp->code == jit_code_label ||                  \
+                      temp->code == jit_code_epilog);                  \
+               if (temp->flag & jit_flag_patch)                        \
+                   name##i##type(temp->u.w,                            \
+                                 rn(node->v.w), node->w.w);            \
+               else {                                                  \
+                   word = name##i##type(_jit->pc.w,                    \
+                                        rn(node->v.w), node->w.w);     \
+                   patch(word, node);                                  \
+               }                                                       \
+               break;
+#define case_bvf(name)                                                 \
+           case jit_code_##name##i_f:                                  \
+               temp = node->u.n;                                       \
+               assert(temp->code == jit_code_label ||                  \
+                      temp->code == jit_code_epilog);                  \
+               if (temp->flag & jit_flag_patch) {                      \
+                   if (jit_swf_p())                                    \
+                       swf_##name##i_f(temp->u.w, rn(node->v.w),       \
+                                       node->w.f);                     \
+                   else                                                \
+                       vfp_##name##i_f(temp->u.w, rn(node->v.w),       \
+                                       node->w.f);                     \
+               }                                                       \
+               else {                                                  \
+                   if (jit_swf_p())                                    \
+                       word = swf_##name##i_f(_jit->pc.w,              \
+                                              rn(node->v.w),           \
+                                              node->w.f);              \
+                   else                                                \
+                       word = vfp_##name##i_f(_jit->pc.w,              \
+                                              rn(node->v.w),           \
+                                              node->w.f);              \
+                   patch(word, node);                                  \
+               }                                                       \
+               break
+#define case_bvd(name)                                                 \
+           case jit_code_##name##i_d:                                  \
+               temp = node->u.n;                                       \
+               assert(temp->code == jit_code_label ||                  \
+                      temp->code == jit_code_epilog);                  \
+               if (temp->flag & jit_flag_patch) {                      \
+                   if (jit_swf_p())                                    \
+                       swf_##name##i_d(temp->u.w, rn(node->v.w),       \
+                                       node->w.d);                     \
+                   else                                                \
+                       vfp_##name##i_d(temp->u.w, rn(node->v.w),       \
+                                       node->w.d);                     \
+               }                                                       \
+               else {                                                  \
+                   if (jit_swf_p())                                    \
+                       word = swf_##name##i_d(_jit->pc.w,              \
+                                              rn(node->v.w),           \
+                                              node->w.d);              \
+                   else                                                \
+                       word = vfp_##name##i_d(_jit->pc.w,              \
+                                              rn(node->v.w),           \
+                                              node->w.d);              \
+                   patch(word, node);                                  \
+               }                                                       \
+               break
+#if DEVEL_DISASSEMBLER
+    prevw = _jit->pc.w;
+#endif
+    for (node = _jitc->head; node; node = node->next) {
+       if (_jit->pc.uc >= _jitc->code.end)
+           return (NULL);
+
+#if DEVEL_DISASSEMBLER
+       node->offset = (jit_uword_t)_jit->pc.w - (jit_uword_t)prevw;
+       prevw = _jit->pc.w;
+#endif
+       value = jit_classify(node->code);
+       jit_regarg_set(node, value);
+       switch (node->code) {
+           case jit_code_align:
+               assert(!(node->u.w & (node->u.w - 1)) &&
+                      node->u.w <= sizeof(jit_word_t));
+               if (node->u.w == sizeof(jit_word_t) &&
+                   (word = _jit->pc.w & (sizeof(jit_word_t) - 1)))
+                   nop(sizeof(jit_word_t) - word);
+               break;
+           case jit_code_note:         case jit_code_name:
+               if (must_align_p(node->next))
+                   nop(2);
+               node->u.w = _jit->pc.w;
+               break;
+           case jit_code_label:
+               if (must_align_p(node->next))
+                   nop(2);
+               /* remember label is defined */
+               node->flag |= jit_flag_patch;
+               node->u.w = _jit->pc.w;
+               break;
+               case_rrr(add,);
+               case_rrw(add,);
+               case_rrr(addc,);
+               case_rrw(addc,);
+               case_rrr(addx,);
+               case_rrw(addx,);
+               case_rrr(sub,);
+               case_rrw(sub,);
+               case_rrr(subc,);
+               case_rrw(subc,);
+               case_rrr(subx,);
+               case_rrw(subx,);
+               case_rrw(rsb,);
+               case_rrr(mul,);
+               case_rrw(mul,);
+               case_rrrr(qmul,);
+               case_rrrw(qmul,);
+               case_rrrr(qmul, _u);
+               case_rrrw(qmul, _u);
+               case_rrr(div,);
+               case_rrw(div,);
+               case_rrr(div, _u);
+               case_rrw(div, _u);
+               case_rrrr(qdiv,);
+               case_rrrw(qdiv,);
+               case_rrrr(qdiv, _u);
+               case_rrrw(qdiv, _u);
+               case_rrr(rem,);
+               case_rrw(rem,);
+               case_rrr(rem, _u);
+               case_rrw(rem, _u);
+               case_rrr(lsh,);
+               case_rrw(lsh,);
+               case_rrr(rsh,);
+               case_rrw(rsh,);
+               case_rrr(rsh, _u);
+               case_rrw(rsh, _u);
+               case_rr(neg,);
+               case_rr(com,);
+               case_rrr(and,);
+               case_rrw(and,);
+               case_rrr(or,);
+               case_rrw(or,);
+               case_rrr(xor,);
+               case_rrw(xor,);
+               case_vv(trunc, _f_i);
+               case_vv(trunc, _d_i);
+               case_rr(ld, _c);
+               case_rw(ld, _c);
+               case_rr(ld, _uc);
+               case_rw(ld, _uc);
+               case_rr(ld, _s);
+               case_rw(ld, _s);
+               case_rr(ld, _us);
+               case_rw(ld, _us);
+               case_rr(ld, _i);
+               case_rw(ld, _i);
+               case_rrr(ldx, _c);
+               case_rrw(ldx, _c);
+               case_rrr(ldx, _uc);
+               case_rrw(ldx, _uc);
+               case_rrr(ldx, _s);
+               case_rrw(ldx, _s);
+               case_rrr(ldx, _us);
+               case_rrw(ldx, _us);
+               case_rrr(ldx, _i);
+               case_rrw(ldx, _i);
+               case_rr(st, _c);
+               case_wr(st, _c);
+               case_rr(st, _s);
+               case_wr(st, _s);
+               case_rr(st, _i);
+               case_wr(st, _i);
+               case_rrr(stx, _c);
+               case_wrr(stx, _c);
+               case_rrr(stx, _s);
+               case_wrr(stx, _s);
+               case_rrr(stx, _i);
+               case_wrr(stx, _i);
+               case_rr(hton, _us);
+               case_rr(hton, _ui);
+               case_rr(ext, _c);
+               case_rr(ext, _uc);
+               case_rr(ext, _s);
+               case_rr(ext, _us);
+               case_rr(mov,);
+           case jit_code_movi:
+               if (node->flag & jit_flag_node) {
+                   temp = node->v.n;
+                   if (temp->code == jit_code_data ||
+                       (temp->code == jit_code_label &&
+                        (temp->flag & jit_flag_patch)))
+                       movi(rn(node->u.w), temp->u.w);
+                   else {
+                       assert(temp->code == jit_code_label ||
+                              temp->code == jit_code_epilog);
+                       word = movi_p(rn(node->u.w), temp->u.w);
+                       patch(word, node);
+                   }
+               }
+               else
+                   movi(rn(node->u.w), node->v.w);
+               break;
+               case_rrr(lt,);
+               case_rrw(lt,);
+               case_rrr(lt, _u);
+               case_rrw(lt, _u);
+               case_rrr(le,);
+               case_rrw(le,);
+               case_rrr(le, _u);
+               case_rrw(le, _u);
+               case_rrr(eq,);
+               case_rrw(eq,);
+               case_rrr(ge,);
+               case_rrw(ge,);
+               case_rrr(ge, _u);
+               case_rrw(ge, _u);
+               case_rrr(gt,);
+               case_rrw(gt,);
+               case_rrr(gt, _u);
+               case_rrw(gt, _u);
+               case_rrr(ne,);
+               case_rrw(ne,);
+               case_brr(blt,);
+               case_brw(blt,);
+               case_brr(blt, _u);
+               case_brw(blt, _u);
+               case_brr(ble,);
+               case_brw(ble,);
+               case_brr(ble, _u);
+               case_brw(ble, _u);
+               case_brr(beq,);
+               case_brw(beq,);
+               case_brr(bge,);
+               case_brw(bge,);
+               case_brr(bge, _u);
+               case_brw(bge, _u);
+               case_brr(bgt,);
+               case_brw(bgt,);
+               case_brr(bgt, _u);
+               case_brw(bgt, _u);
+               case_brr(bne,);
+               case_brw(bne,);
+               case_brr(boadd,);
+               case_brw(boadd,);
+               case_brr(boadd, _u);
+               case_brw(boadd, _u);
+               case_brr(bxadd,);
+               case_brw(bxadd,);
+               case_brr(bxadd, _u);
+               case_brw(bxadd, _u);
+               case_brr(bosub,);
+               case_brw(bosub,);
+               case_brr(bosub, _u);
+               case_brw(bosub, _u);
+               case_brr(bxsub,);
+               case_brw(bxsub,);
+               case_brr(bxsub, _u);
+               case_brw(bxsub, _u);
+               case_brr(bms,);
+               case_brw(bms,);
+               case_brr(bmc,);
+               case_brw(bmc,);
+               case_vvv(add, _f);
+               case_vvf(add);
+               case_vvv(sub, _f);
+               case_vvf(sub);
+               case_vvf(rsb);
+               case_vvv(mul, _f);
+               case_vvf(mul);
+               case_vvv(div, _f);
+               case_vvf(div);
+               case_vv(abs, _f);
+               case_vv(neg, _f);
+               case_vv(sqrt, _f);
+               case_vv(ext, _f);
+               case_vv(ld, _f);
+               case_vw(ld, _f);
+               case_vvv(ldx, _f);
+               case_vvw(ldx, _f);
+               case_vv(st, _f);
+               case_wv(st, _f);
+               case_vvv(stx, _f);
+               case_wvv(stx, _f);
+               case_vv(mov, _f);
+           case jit_code_movi_f:
+               assert_data(node);
+               if (jit_swf_p())
+                   swf_movi_f(rn(node->u.w), node->v.f);
+               else
+                   vfp_movi_f(rn(node->u.w), node->v.f);
+               break;
+               case_vv(ext, _d_f);
+               case_vvv(lt, _f);
+               case_vvf(lt);
+               case_vvv(le, _f);
+               case_vvf(le);
+               case_vvv(eq, _f);
+               case_vvf(eq);
+               case_vvv(ge, _f);
+               case_vvf(ge);
+               case_vvv(gt, _f);
+               case_vvf(gt);
+               case_vvv(ne, _f);
+               case_vvf(ne);
+               case_vvv(unlt, _f);
+               case_vvf(unlt);
+               case_vvv(unle, _f);
+               case_vvf(unle);
+               case_vvv(uneq, _f);
+               case_vvf(uneq);
+               case_vvv(unge, _f);
+               case_vvf(unge);
+               case_vvv(ungt, _f);
+               case_vvf(ungt);
+               case_vvv(ltgt, _f);
+               case_vvf(ltgt);
+               case_vvv(ord, _f);
+               case_vvf(ord);
+               case_vvv(unord, _f);
+               case_vvf(unord);
+               case_bvv(blt, _f);
+               case_bvf(blt);
+               case_bvv(ble, _f);
+               case_bvf(ble);
+               case_bvv(beq, _f);
+               case_bvf(beq);
+               case_bvv(bge, _f);
+               case_bvf(bge);
+               case_bvv(bgt, _f);
+               case_bvf(bgt);
+               case_bvv(bne, _f);
+               case_bvf(bne);
+               case_bvv(bunlt, _f);
+               case_bvf(bunlt);
+               case_bvv(bunle, _f);
+               case_bvf(bunle);
+               case_bvv(buneq, _f);
+               case_bvf(buneq);
+               case_bvv(bunge, _f);
+               case_bvf(bunge);
+               case_bvv(bungt, _f);
+               case_bvf(bungt);
+               case_bvv(bltgt, _f);
+               case_bvf(bltgt);
+               case_bvv(bord, _f);
+               case_bvf(bord);
+               case_bvv(bunord, _f);
+               case_bvf(bunord);
+               case_vvv(add, _d);
+               case_vvd(add);
+               case_vvv(sub, _d);
+               case_vvd(sub);
+               case_vvd(rsb);
+               case_vvv(mul, _d);
+               case_vvd(mul);
+               case_vvv(div, _d);
+               case_vvd(div);
+               case_vv(abs, _d);
+               case_vv(neg, _d);
+               case_vv(sqrt, _d);
+               case_vv(ext, _d);
+               case_vv(ld, _d);
+               case_vw(ld, _d);
+               case_vvv(ldx, _d);
+               case_vvw(ldx, _d);
+               case_vv(st, _d);
+               case_wv(st, _d);
+               case_vvv(stx, _d);
+               case_wvv(stx, _d);
+               case_vv(mov, _d);
+           case jit_code_movi_d:
+               assert_data(node);
+               if (jit_swf_p())
+                   swf_movi_d(rn(node->u.w), node->v.d);
+               else
+                   vfp_movi_d(rn(node->u.w), node->v.d);
+               break;
+               case_vv(ext, _f_d);
+               case_vvv(lt, _d);
+               case_vvd(lt);
+               case_vvv(le, _d);
+               case_vvd(le);
+               case_vvv(eq, _d);
+               case_vvd(eq);
+               case_vvv(ge, _d);
+               case_vvd(ge);
+               case_vvv(gt, _d);
+               case_vvd(gt);
+               case_vvv(ne, _d);
+               case_vvd(ne);
+               case_vvv(unlt, _d);
+               case_vvd(unlt);
+               case_vvv(unle, _d);
+               case_vvd(unle);
+               case_vvv(uneq, _d);
+               case_vvd(uneq);
+               case_vvv(unge, _d);
+               case_vvd(unge);
+               case_vvv(ungt, _d);
+               case_vvd(ungt);
+               case_vvv(ltgt, _d);
+               case_vvd(ltgt);
+               case_vvv(ord, _d);
+               case_vvd(ord);
+               case_vvv(unord, _d);
+               case_vvd(unord);
+               case_bvv(blt, _d);
+               case_bvd(blt);
+               case_bvv(ble, _d);
+               case_bvd(ble);
+               case_bvv(beq, _d);
+               case_bvd(beq);
+               case_bvv(bge, _d);
+               case_bvd(bge);
+               case_bvv(bgt, _d);
+               case_bvd(bgt);
+               case_bvv(bne, _d);
+               case_bvd(bne);
+               case_bvv(bunlt, _d);
+               case_bvd(bunlt);
+               case_bvv(bunle, _d);
+               case_bvd(bunle);
+               case_bvv(buneq, _d);
+               case_bvd(buneq);
+               case_bvv(bunge, _d);
+               case_bvd(bunge);
+               case_bvv(bungt, _d);
+               case_bvd(bungt);
+               case_bvv(bltgt, _d);
+               case_bvd(bltgt);
+               case_bvv(bord, _d);
+               case_bvd(bord);
+               case_bvv(bunord, _d);
+               case_bvd(bunord);
+           case jit_code_jmpr:
+               jmpr(rn(node->u.w));
+               flush_consts();
+               break;
+           case jit_code_jmpi:
+               if (node->flag & jit_flag_node) {
+                   temp = node->u.n;
+                   assert(temp->code == jit_code_label ||
+                          temp->code == jit_code_epilog);
+                   if (temp->flag & jit_flag_patch)
+                       jmpi(temp->u.w);
+                   else {
+                       word = jmpi_p(_jit->pc.w, 1);
+                       patch(word, node);
+                   }
+               }
+               else
+                   jmpi(node->u.w);
+               flush_consts();
+               break;
+           case jit_code_callr:
+               callr(rn(node->u.w));
+               break;
+           case jit_code_calli:
+               if (node->flag & jit_flag_node) {
+                   temp = node->u.n;
+                   assert(temp->code == jit_code_label ||
+                          temp->code == jit_code_epilog);
+                   if (temp->flag & jit_flag_patch)
+                       calli(temp->u.w);
+                   else {
+                       word = calli_p(_jit->pc.w);
+                       patch(word, node);
+                   }
+               }
+               else
+                   calli(node->u.w);
+               break;
+           case jit_code_prolog:
+               _jitc->function = _jitc->functions.ptr + node->w.w;
+               undo.node = node;
+               undo.word = _jit->pc.w;
+#if DEVEL_DISASSEMBLER
+               undo.prevw = prevw;
+#endif
+               undo.data = _jitc->consts.data;
+               undo.thumb = _jitc->thumb;
+               undo.const_offset = _jitc->consts.offset;
+               undo.patch_offset = _jitc->patches.offset;
+#if DISASSEMBLER
+               if (_jitc->data_info.ptr)
+                   undo.info_offset = _jitc->data_info.offset;
+#endif
+           restart_function:
+               _jitc->again = 0;
+               prolog(node);
+               break;
+           case jit_code_epilog:
+               assert(_jitc->function == _jitc->functions.ptr + node->w.w);
+               if (_jitc->again) {
+                   for (temp = undo.node->next;
+                        temp != node; temp = temp->next) {
+                       if (temp->code == jit_code_label ||
+                           temp->code == jit_code_epilog)
+                           temp->flag &= ~jit_flag_patch;
+                   }
+                   temp->flag &= ~jit_flag_patch;
+                   node = undo.node;
+                   _jit->pc.w = undo.word;
+#if DEVEL_DISASSEMBLER
+                   prevw = undo.prevw;
+#endif
+                   invalidate_consts();
+                   _jitc->consts.data = undo.data;
+                   _jitc->thumb = undo.thumb;
+                   _jitc->consts.offset = undo.const_offset;
+                   _jitc->patches.offset = undo.patch_offset;
+#if DISASSEMBLER
+                   if (_jitc->data_info.ptr)
+                       _jitc->data_info.offset = undo.info_offset;
+#endif
+                   goto restart_function;
+               }
+               /* remember label is defined */
+               node->flag |= jit_flag_patch;
+               node->u.w = _jit->pc.w;
+               epilog(node);
+               _jitc->function = NULL;
+               flush_consts();
+               break;
+           case jit_code_movr_w_f:
+               if (jit_swf_p())
+                   swf_movr_f(rn(node->u.w), rn(node->v.w));
+               else
+                   vfp_movr_f(rn(node->u.w), rn(node->v.w));
+               break;
+           case jit_code_movr_f_w:
+               if (jit_swf_p())
+                   swf_movr_f(rn(node->u.w), rn(node->v.w));
+               else
+                   vfp_movr_f(rn(node->u.w), rn(node->v.w));
+               break;
+           case jit_code_movi_f_w:
+               assert_data(node);
+               if (jit_swf_p())
+                   swf_movi_f(rn(node->u.w), node->v.f);
+               else
+                   vfp_movi_f(rn(node->u.w), node->v.f);
+               break;
+           case jit_code_movr_ww_d:
+               if (jit_swf_p())
+                   swf_movr_d(rn(node->u.w), rn(node->v.w));
+               else
+                   vfp_movr_d(rn(node->u.w), rn(node->v.w));
+               break;
+           case jit_code_movr_d_ww:
+               if (jit_swf_p())
+                   swf_movr_d(rn(node->u.w), rn(node->w.w));
+               else
+                   vfp_movr_d(rn(node->u.w), rn(node->w.w));
+               break;
+           case jit_code_movi_d_ww:
+               assert_data(node);
+               if (jit_swf_p())
+                   swf_movi_d(rn(node->u.w), node->w.d);
+               else
+                   vfp_movi_d(rn(node->u.w), node->w.d);
+               break;
+           case jit_code_va_start:
+               vastart(rn(node->u.w));
+               break;
+           case jit_code_va_arg:
+               vaarg(rn(node->u.w), rn(node->v.w));
+               break;
+           case jit_code_va_arg_d:
+               if (jit_swf_p())
+                   swf_vaarg_d(rn(node->u.w), rn(node->v.w));
+               else
+                   vfp_vaarg_d(rn(node->u.w), rn(node->v.w));
+               break;
+           case jit_code_live:                 case jit_code_ellipsis:
+           case jit_code_va_push:
+           case jit_code_allocai:              case jit_code_allocar:
+           case jit_code_arg:
+           case jit_code_arg_f:                case jit_code_arg_d:
+           case jit_code_va_end:
+           case jit_code_ret:
+           case jit_code_retr:                 case jit_code_reti:
+           case jit_code_retr_f:               case jit_code_reti_f:
+           case jit_code_retr_d:               case jit_code_reti_d:
+           case jit_code_getarg_c:             case jit_code_getarg_uc:
+           case jit_code_getarg_s:             case jit_code_getarg_us:
+           case jit_code_getarg_i:
+           case jit_code_getarg_f:             case jit_code_getarg_d:
+           case jit_code_putargr:              case jit_code_putargi:
+           case jit_code_putargr_f:            case jit_code_putargi_f:
+           case jit_code_putargr_d:            case jit_code_putargi_d:
+           case jit_code_pushargr:             case jit_code_pushargi:
+           case jit_code_pushargr_f:           case jit_code_pushargi_f:
+           case jit_code_pushargr_d:           case jit_code_pushargi_d:
+           case jit_code_retval_c:             case jit_code_retval_uc:
+           case jit_code_retval_s:             case jit_code_retval_us:
+           case jit_code_retval_i:
+           case jit_code_retval_f:             case jit_code_retval_d:
+           case jit_code_prepare:
+           case jit_code_finishr:              case jit_code_finishi:
+               break;
+           default:
+               abort();
+       }
+       jit_regarg_clr(node, value);
+       assert(_jitc->regarg == 0 && _jitc->synth == 0);
+       /* update register live state */
+       jit_reglive(node);
+
+       if (_jitc->consts.length &&
+           (_jit->pc.uc - _jitc->consts.data >= 3968 ||
+            (jit_uword_t)_jit->pc.uc -
+            (jit_uword_t)_jitc->consts.patches[0] >= 3968)) {
+           /* longest sequence should be 64 bytes, but preventively
+            * do not let it go past 128 remaining bytes before a flush */
+           if (node->next &&
+               node->next->code != jit_code_jmpi &&
+               node->next->code != jit_code_jmpr &&
+               node->next->code != jit_code_epilog) {
+               /* insert a jump, flush constants and continue */
+               word = _jit->pc.w;
+               assert(!jit_thumb_p());
+               B(0);
+               flush_consts();
+               patch_at(arm_patch_jump, word, _jit->pc.w);
+           }
+       }
+    }
+#undef case_bvd
+#undef case_bvf
+#undef case_brw
+#undef case_bvv
+#undef case_brr
+#undef case_wvv
+#undef case_wrr
+#undef case_vvd
+#undef case_vvf
+#undef case_vvw
+#undef case_rrw
+#undef case_vvv
+#undef case_rrr
+#undef case_wv
+#undef case_wr
+#undef case_vw
+#undef case_vv
+#undef case_rw
+#undef case_rr
+
+    flush_consts();
+    for (offset = 0; offset < _jitc->patches.offset; offset++) {
+       assert(_jitc->patches.ptr[offset].kind & arm_patch_node);
+       node = _jitc->patches.ptr[offset].node;
+       word = _jitc->patches.ptr[offset].inst;
+       if (!jit_thumb_p() &&
+           (node->code == jit_code_movi || node->code == jit_code_calli)) {
+           /* calculate where to patch word */
+           value = *(jit_int32_t *)word;
+           assert((value & 0x0f700000) == ARM_LDRI);
+           /* offset may become negative (-4) if last instruction
+            * before unconditional branch and data following
+            * FIXME can this cause issues in the preprocessor prefetch
+            * or something else? should not, as the constants are after
+            * an unconditional jump */
+           if (value & ARM_P)  value =   value & 0x00000fff;
+           else                value = -(value & 0x00000fff);
+           word = word + 8 + value;
+       }
+       value = node->code == jit_code_movi ? node->v.n->u.w : node->u.n->u.w;
+       patch_at(_jitc->patches.ptr[offset].kind & ~arm_patch_node, word, value);
+    }
+
+    jit_flush(_jit->code.ptr, _jit->pc.uc);
+
+    return (_jit->code.ptr);
+}
+
+#define CODE                           1
+#  include "jit_rewind.c"
+#  include "jit_arm-cpu.c"
+#  include "jit_arm-swf.c"
+#  include "jit_arm-vfp.c"
+#undef CODE
+
+void
+jit_flush(void *fptr, void *tptr)
+{
+#if defined(__GNUC__)
+    jit_uword_t                i, f, t, s;
+
+    s = sysconf(_SC_PAGE_SIZE);
+    f = (jit_uword_t)fptr & -s;
+    t = (((jit_uword_t)tptr) + s - 1) & -s;
+    for (i = f; i < t; i += s)
+       __clear_cache((void *)i, (void *)(i + s));
+#endif
+}
+
+void
+_emit_ldxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    ldxi_i(rn(r0), rn(r1), i0);
+}
+
+void
+_emit_stxi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    stxi_i(i0, rn(r0), rn(r1));
+}
+
+void
+_emit_ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    if (jit_swf_p())
+       swf_ldxi_d(rn(r0), rn(r1), i0);
+    else
+       vfp_ldxi_d(rn(r0), rn(r1), i0);
+}
+
+void
+_emit_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    if (jit_swf_p())
+       swf_stxi_d(i0, rn(r0), rn(r1));
+    else
+       vfp_stxi_d(i0, rn(r0), rn(r1));
+}
+
+static jit_int32_t
+_jit_get_reg_pair(jit_state_t *_jit)
+{
+    /*   bypass jit_get_reg() with argument or'ed with jit_class_chk
+     * and try to find an consecutive, even free register pair, or
+     * return JIT_NOREG if fail, as the cost of spills is greater
+     * than splitting a double load/store in two operations. */
+    if (jit_reg_free_p(_R0) && jit_reg_free_p(_R1)) {
+       jit_regset_setbit(&_jitc->regarg, _R0);
+       jit_regset_setbit(&_jitc->regarg, _R1);
+       return (_R0);
+    }
+    if (jit_reg_free_p(_R2) && jit_reg_free_p(_R3)) {
+       jit_regset_setbit(&_jitc->regarg, _R2);
+       jit_regset_setbit(&_jitc->regarg, _R3);
+       return (_R2);
+    }
+    if (jit_reg_free_p(_R4) && jit_reg_free_p(_R5)) {
+       jit_regset_setbit(&_jitc->regarg, _R4);
+       jit_regset_setbit(&_jitc->regarg, _R5);
+       return (_R4);
+    }
+    if (jit_reg_free_p(_R6) && jit_reg_free_p(_R7)) {
+       jit_regset_setbit(&_jitc->regarg, _R6);
+       jit_regset_setbit(&_jitc->regarg, _R7);
+       return (_R6);
+    }
+    if (jit_reg_free_p(_R8) && jit_reg_free_p(_R9)) {
+       jit_regset_setbit(&_jitc->regarg, _R8);
+       jit_regset_setbit(&_jitc->regarg, _R9);
+       return (_R8);
+    }
+    return (JIT_NOREG);
+}
+
+static void
+_jit_unget_reg_pair(jit_state_t *_jit, jit_int32_t reg)
+{
+    jit_unget_reg(reg);
+    switch (reg) {
+       case _R0:       jit_unget_reg(_R1);     break;
+       case _R2:       jit_unget_reg(_R3);     break;
+       case _R4:       jit_unget_reg(_R5);     break;
+       case _R6:       jit_unget_reg(_R7);     break;
+       case _R8:       jit_unget_reg(_R9);     break;
+       default:        abort();
+    }
+}
+
+/*   A prolog must be aligned at mod 4 bytes boundary.
+ *   This condition was not being required to be tested by
+ * accident previously, but with the jit_frame and jit_tramp
+ * code it is required */
+static jit_bool_t
+_must_align_p(jit_state_t *_jit, jit_node_t *node)
+{
+    if (jit_thumb_p() && (_jit->pc.w & 3)) {
+       for (; node; node = node->next) {
+           switch (node->code) {
+               case jit_code_note:
+               case jit_code_name:
+               case jit_code_label:
+                   break;
+               case jit_code_prolog:
+                   return (1);
+               default:
+                   return (0);
+           }
+       }
+    }
+    return (0);
+}
+
+static void
+_load_const(jit_state_t *_jit, jit_bool_t uniq, jit_int32_t r0, jit_word_t i0)
+{
+    jit_word_t          w;
+    jit_word_t          d;
+    jit_word_t          base;
+    jit_int32_t                *data;
+    jit_int32_t                 size;
+    jit_int32_t                 offset;
+
+    assert(!jit_thumb_p());
+    if (!uniq) {
+       /* use zero, a valid directly encoded immediate, to avoid the
+        * need of a bitmask to know what offsets will be patched, so
+        * that comparison will always fail for constants that cannot
+        * be encoded */
+       assert(i0 != 0);
+
+       /* Actually, code is (currently at least) not self modifying,
+        * so, any value reachable backwards is valid as a constant. */
+
+       /* FIXME a quickly updateable/mutable hash table could be
+        * better here, but most times only a few comparisons
+        * should be done
+        */
+
+       /* search in previous constant pool */
+       if ((data = (jit_int32_t *)_jitc->consts.data)) {
+           w = (jit_word_t)data;
+           /* maximum backwards offset */
+           base = (_jit->pc.w + 8) - 4092;
+           if (base <= w)
+               /* can scan all possible available backward constants */
+               base = 0;
+           else
+               base = (base - w) >> 2;
+           size = _jitc->consts.size >> 2;
+           for (offset = size - 1; offset >= base; offset--) {
+               if (data[offset] == i0) {
+                   w = (jit_word_t)(data + offset);
+                   d = (_jit->pc.w + 8) - w;
+                   LDRIN(r0, _R15_REGNO, d);
+                   return;
+               }
+           }
+       }
+    }
+    else
+       assert(i0 == 0);
+
+    _jitc->consts.patches[_jitc->consts.offset++] = _jit->pc.w;
+    /* (probably) positive forward offset */
+    LDRI(r0, _R15_REGNO, 0);
+
+    if (!uniq) {
+       /* search already requested values */
+       for (offset = 0; offset < _jitc->consts.length; offset++) {
+           if (_jitc->consts.values[offset] == i0) {
+               _jitc->consts.patches[_jitc->consts.offset++] = offset;
+               return;
+           }
+       }
+    }
+
+#if DEBUG
+    /* cannot run out of space because of limited range
+     * but assert anyway to catch logic errors */
+    assert(_jitc->consts.length < 1024);
+    assert(_jitc->consts.offset < 2048);
+#endif
+    _jitc->consts.patches[_jitc->consts.offset++] = _jitc->consts.length;
+    _jitc->consts.values[_jitc->consts.length++] = i0;
+}
+
+static void
+_flush_consts(jit_state_t *_jit)
+{
+    jit_word_t          word;
+    jit_int32_t                 offset;
+
+    /* if no forward constants */
+    if (!_jitc->consts.length)
+       return;
+    assert(!jit_thumb_p());
+    word = _jit->pc.w;
+    _jitc->consts.data = _jit->pc.uc;
+    _jitc->consts.size = _jitc->consts.length << 2;
+    /* FIXME check will not overrun, otherwise, need to reallocate
+     * code buffer and start over */
+    jit_memcpy(_jitc->consts.data, _jitc->consts.values, _jitc->consts.size);
+    _jit->pc.w += _jitc->consts.size;
+
+#if DISASSEMBLER
+    if (_jitc->data_info.ptr) {
+       if (_jitc->data_info.offset >= _jitc->data_info.length) {
+           jit_realloc((jit_pointer_t *)&_jitc->data_info.ptr,
+                       _jitc->data_info.length * sizeof(jit_data_info_t),
+                       (_jitc->data_info.length + 1024) *
+                       sizeof(jit_data_info_t));
+           _jitc->data_info.length += 1024;
+       }
+       _jitc->data_info.ptr[_jitc->data_info.offset].code = word;
+       _jitc->data_info.ptr[_jitc->data_info.offset].length = _jitc->consts.size;
+       ++_jitc->data_info.offset;
+    }
+#endif
+
+    for (offset = 0; offset < _jitc->consts.offset; offset += 2)
+       patch_at(arm_patch_load, _jitc->consts.patches[offset],
+                word + (_jitc->consts.patches[offset + 1] << 2));
+    _jitc->consts.length = _jitc->consts.offset = 0;
+}
+
+/* to be called if needing to start over a function */
+static void
+_invalidate_consts(jit_state_t *_jit)
+{
+    /* if no forward constants */
+    if (_jitc->consts.length)
+       _jitc->consts.length = _jitc->consts.offset = 0;
+}
+
+static void
+_patch(jit_state_t *_jit, jit_word_t instr, jit_node_t *node)
+{
+    jit_int32_t                 flag;
+    jit_int32_t                 kind;
+
+    assert(node->flag & jit_flag_node);
+    if (node->code == jit_code_movi) {
+       flag = node->v.n->flag;
+       kind = arm_patch_word;
+    }
+    else {
+       flag = node->u.n->flag;
+       if (node->code == jit_code_calli ||
+           (node->code == jit_code_jmpi && !(node->flag & jit_flag_node)))
+           kind = arm_patch_word;
+       else
+           kind = arm_patch_jump;
+    }
+    assert(!(flag & jit_flag_patch));
+    kind |= arm_patch_node;
+    if (_jitc->patches.offset >= _jitc->patches.length) {
+       jit_realloc((jit_pointer_t *)&_jitc->patches.ptr,
+                   _jitc->patches.length * sizeof(jit_patch_t),
+                   (_jitc->patches.length + 1024) * sizeof(jit_patch_t));
+       _jitc->patches.length += 1024;
+    }
+    _jitc->patches.ptr[_jitc->patches.offset].kind = kind;
+    _jitc->patches.ptr[_jitc->patches.offset].inst = instr;
+    _jitc->patches.ptr[_jitc->patches.offset].node = node;
+    ++_jitc->patches.offset;
+}
diff --git a/deps/lightning/lib/jit_disasm.c b/deps/lightning/lib/jit_disasm.c
new file mode 100644 (file)
index 0000000..15b91b9
--- /dev/null
@@ -0,0 +1,429 @@
+/*
+ * Copyright (C) 2012-2019  Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+#include <lightning.h>
+#include <lightning/jit_private.h>
+#if DISASSEMBLER
+#  include <dis-asm.h>
+#endif
+
+/*
+ * Prototypes
+ */
+#if DISASSEMBLER
+static int
+disasm_compare_symbols(const void *ap, const void *bp);
+
+static void
+disasm_print_address(bfd_vma addr, struct disassemble_info *info);
+
+#define disassemble(u, v)      _disassemble(_jit, u, v)
+static void
+_disassemble(jit_state_t *_jit, jit_pointer_t code, jit_int32_t length);
+#endif
+
+/*
+ * Initialization
+ */
+#if DISASSEMBLER
+static bfd                      *disasm_bfd;
+static disassemble_info                  disasm_info;
+static disassembler_ftype        disasm_print;
+static asymbol                 **disasm_symbols;
+static asymbol                  *disasm_synthetic;
+static long                      disasm_num_symbols;
+static long                      disasm_num_synthetic;
+static jit_state_t              *disasm_jit;
+#define disasm_stream            stdout
+#endif
+
+/*
+ * Implementation
+ */
+void
+jit_init_debug(const char *progname)
+{
+#if DISASSEMBLER
+    bfd_init();
+
+    if (progname)
+       disasm_bfd = bfd_openr(progname, NULL);
+    if (disasm_bfd == NULL) {
+#if defined(__linux__)
+       disasm_bfd = bfd_openr("/proc/self/exe", NULL);
+       if (disasm_bfd == NULL)
+#endif
+           return;
+    }
+    bfd_check_format(disasm_bfd, bfd_object);
+    bfd_check_format(disasm_bfd, bfd_archive);
+    INIT_DISASSEMBLE_INFO(disasm_info, disasm_stream, fprintf);
+#  if defined(__i386__) || defined(__x86_64__)
+    disasm_info.arch = bfd_arch_i386;
+#    if defined(__x86_64__)
+#      if __WORDSIZE == 32
+    disasm_info.mach = bfd_mach_x64_32;
+#      else
+    disasm_info.mach = bfd_mach_x86_64;
+#      endif
+#    else
+    disasm_info.mach = bfd_mach_i386_i386;
+#    endif
+#  endif
+#  if defined(__powerpc__)
+    disasm_info.arch = bfd_arch_powerpc;
+    disasm_info.mach = bfd_mach_ppc64;
+#    if HAVE_DISASSEMBLE_INIT_FOR_TARGET
+    disassemble_init_for_target(&disasm_info);
+#    elif HAVE_DISASSEMBLE_INIT_POWERPC
+    disassemble_init_powerpc(&disasm_info);
+#    endif
+#    if defined(__powerpc64__)
+    disasm_info.disassembler_options = "64";
+#    endif
+#    if HAVE_DISASSEMBLE_INIT_FOR_TARGET
+    disassemble_init_for_target(&disasm_info);
+#    elif HAVE_DISASSEMBLE_INIT_POWERPC
+    disassemble_init_powerpc(&disasm_info);
+#    endif
+#  endif
+#  if defined(__sparc__)
+    disasm_info.endian = disasm_info.display_endian = BFD_ENDIAN_BIG;
+#  endif
+#  if defined(__s390__) || defined(__s390x__)
+    disasm_info.arch = bfd_arch_s390;
+#    if __WORDSIZE == 32
+    disasm_info.mach = bfd_mach_s390_31;
+#    else
+    disasm_info.mach = bfd_mach_s390_64;
+#    endif
+    disasm_info.endian = disasm_info.display_endian = BFD_ENDIAN_BIG;
+    disasm_info.disassembler_options = "zarch";
+#  endif
+#  if defined(__alpha__)
+    disasm_info.arch = bfd_arch_alpha;
+    disasm_info.mach = bfd_mach_alpha_ev6;
+#  endif
+#  if defined(__hppa__)
+    disasm_info.arch = bfd_arch_hppa;
+    disasm_info.mach = bfd_mach_hppa10;
+#  endif
+#  if defined(__riscv)
+    disasm_info.arch = bfd_arch_riscv;
+#  if __WORDSIZE == 32
+    disasm_info.mach = bfd_mach_riscv32;
+#  else
+    disasm_info.mach = bfd_mach_riscv64;
+#  endif
+#  endif
+    disasm_info.print_address_func = disasm_print_address;
+
+# if BINUTILS_2_29
+    disasm_print = disassembler(disasm_info.arch, __BYTE_ORDER == __BIG_ENDIAN,
+                               disasm_info.mach, disasm_bfd);
+#  else
+    disasm_print = disassembler(disasm_bfd);
+#  endif
+    assert(disasm_print);
+
+    if (bfd_get_file_flags(disasm_bfd) & HAS_SYMS) {
+       asymbol         **in;
+       asymbol         **out;
+       asymbol          *symbol;
+       long              offset;
+       long              sym_count;
+       long              dyn_count;
+       long              sym_storage;
+       long              dyn_storage;
+
+       if ((sym_storage = bfd_get_symtab_upper_bound(disasm_bfd)) >= 0) {
+
+           if (bfd_get_file_flags(disasm_bfd) & DYNAMIC) {
+               dyn_storage = bfd_get_dynamic_symtab_upper_bound(disasm_bfd);
+#  if defined(__alpha__)
+               /* XXX */
+               if (dyn_storage < 0)
+                   dyn_storage = 0;
+#  else
+               assert(dyn_storage >= 0);
+#  endif
+           }
+           else
+               dyn_storage = 0;
+
+           jit_alloc((jit_pointer_t *)&disasm_symbols,
+                     (sym_storage + dyn_storage) * sizeof(asymbol *));
+           sym_count = bfd_canonicalize_symtab(disasm_bfd, disasm_symbols);
+           assert(sym_count >= 0);
+           if (dyn_storage) {
+               dyn_count = bfd_canonicalize_dynamic_symtab(disasm_bfd,
+                                                           disasm_symbols +
+                                                           sym_count);
+               assert(dyn_count >= 0);
+           }
+           else
+               dyn_count = 0;
+           disasm_num_symbols = sym_count + dyn_count;
+
+           disasm_num_synthetic = bfd_get_synthetic_symtab(disasm_bfd,
+                                                           sym_count,
+                                                           disasm_symbols,
+                                                           dyn_count,
+                                                           disasm_symbols +
+                                                           sym_count,
+                                                           &disasm_synthetic);
+           if (disasm_num_synthetic > 0) {
+               jit_realloc((jit_pointer_t *)&disasm_symbols,
+                           (sym_storage + dyn_storage) * sizeof(asymbol *),
+                           (sym_storage + dyn_storage + disasm_num_synthetic) *
+                           sizeof(asymbol *));
+               for (offset = 0; offset < disasm_num_synthetic; offset++)
+                   disasm_symbols[disasm_num_symbols++] =
+                       disasm_synthetic + offset;
+           }
+
+           /* remove symbols not useful for disassemble */
+           in = out = disasm_symbols;
+           for (offset = 0; offset < disasm_num_symbols; offset++) {
+               symbol = *in++;
+               if (symbol->name &&
+                   symbol->name[0] != '\0' &&
+                   !(symbol->flags & (BSF_DEBUGGING | BSF_SECTION_SYM)) &&
+                   !bfd_is_und_section(symbol->section) &&
+                   !bfd_is_com_section(symbol->section))
+                   *out++ = symbol;
+           }
+           disasm_num_symbols = out - disasm_symbols;
+           qsort(disasm_symbols, disasm_num_symbols,
+                 sizeof(asymbol *), disasm_compare_symbols);
+       }
+    }
+#endif
+}
+
+void
+jit_finish_debug(void)
+{
+#if DISASSEMBLER
+    if (disasm_synthetic)
+       jit_free((jit_pointer_t *)&disasm_synthetic);
+    if (disasm_symbols)
+       jit_free((jit_pointer_t *)&disasm_symbols);
+    if (disasm_bfd)
+       bfd_close (disasm_bfd);
+#endif
+}
+
+void
+_jit_disassemble(jit_state_t *_jit)
+{
+#if DISASSEMBLER
+    if (disasm_bfd) {
+#  if defined(__arm__)
+       /* FIXME add mapping for prolog switching to arm and possible jump
+        * before first prolog also in arm mode */
+       disasm_info.disassembler_options = jit_cpu.thumb ? "force-thumb" : "";
+#  endif
+
+       disassemble(_jit->code.ptr, _jit->pc.uc - _jit->code.ptr);
+    }
+#endif
+}
+
+#if DISASSEMBLER
+/* Based on objdump source */
+static int
+disasm_compare_symbols(const void *ap, const void *bp)
+{
+    const asymbol      *a = *(const asymbol **)ap;
+    const asymbol      *b = *(const asymbol **)bp;
+
+    if (bfd_asymbol_value(a) > bfd_asymbol_value(b))
+       return (1);
+    if (bfd_asymbol_value(a) < bfd_asymbol_value(b))
+       return (-1);
+    return (0);
+}
+
+#if __WORDSIZE == 32
+#  define address_buffer_length                16
+#  define address_buffer_format                "%llx"
+#else
+#  define address_buffer_length                32
+#  define address_buffer_format                "%lx"
+#endif
+static void
+disasm_print_address(bfd_vma addr, struct disassemble_info *info)
+{
+    char               *name;
+    char               *file;
+    int                         line;
+    char                buffer[address_buffer_length];
+
+    sprintf(buffer, address_buffer_format, (long long)addr);
+    (*info->fprintf_func)(info->stream, "0x%s", buffer);
+
+#  define _jit                         disasm_jit
+#  undef jit_pointer_p
+#  define jit_pointer_p(u)                                     \
+       ((u) >= _jit->code.ptr && (u) < _jit->pc.uc)
+    if (jit_pointer_p((jit_uint8_t *)(jit_word_t)addr)) {
+       if (jit_get_note((jit_uint8_t *)(jit_word_t)addr, &name, &file, &line))
+           (*info->fprintf_func)(info->stream, " %s:%s:%d",
+                                 name ? name : "",
+                                 file ? file : "",
+                                 line);
+    }
+#  undef jit_pointer_p
+#  undef _jit
+    else if (disasm_num_symbols) {
+       long             low;
+       long             high;
+       long             offset;
+       asymbol         *symbol;
+
+       low = 0;
+       high = disasm_num_symbols;
+       do {
+           offset = (low + high) >> 1;
+           symbol = disasm_symbols[offset];
+           if (bfd_asymbol_value(symbol) > addr)
+               high = offset - 1;
+           else if (bfd_asymbol_value(symbol) < addr)
+               low = offset + 1;
+           else
+               break;
+       } while (low < high);
+
+       if (offset >= 0 && offset < disasm_num_symbols) {
+           if (bfd_asymbol_value(symbol) < addr) {
+               while (++offset < disasm_num_symbols) {
+                   symbol = disasm_symbols[offset];
+                   if (bfd_asymbol_value(symbol) >= addr)
+                       break;
+               }
+           }
+           else if (bfd_asymbol_value(symbol) > addr) {
+               while (offset--) {
+                   if (bfd_asymbol_value(disasm_symbols[offset]) < addr)
+                       break;
+                   symbol = disasm_symbols[offset];
+               }
+           }
+           if (bfd_asymbol_value(symbol) == addr)
+               (*info->fprintf_func)(info->stream, " # %s", symbol->name);
+       }
+    }
+}
+
+static void
+_disassemble(jit_state_t *_jit, jit_pointer_t code, jit_int32_t length)
+{
+    int                         bytes;
+    char               *name, *old_name;
+    char               *file, *old_file;
+    int                         line,  old_line;
+#if __arm__
+    jit_int32_t                 offset;
+    jit_bool_t          data_info;
+    jit_int32_t                 data_offset;
+#endif
+    bfd_vma             pc = (jit_uword_t)code;
+    bfd_vma             end = (jit_uword_t)code + length;
+    char                buffer[address_buffer_length];
+#if DEVEL_DISASSEMBLER
+    jit_node_t         *node;
+    jit_uword_t                 prevw;
+#endif
+
+#if __arm__
+    data_info = _jitc && _jitc->data_info.ptr;
+    data_offset = 0;
+#endif
+    disasm_info.buffer = code;
+    disasm_info.buffer_vma = (jit_uword_t)code;
+    disasm_info.buffer_length = length;
+    old_file = old_name = NULL;
+    old_line = 0;
+    disasm_jit = _jit;
+#if DEVEL_DISASSEMBLER
+    node = _jitc->head;
+    prevw = pc;
+#endif
+    while (pc < end) {
+#if DEVEL_DISASSEMBLER
+       while (node && (jit_uword_t)(prevw + node->offset) < (jit_uword_t)pc) {
+           prevw += node->offset;
+           node = node->next;
+       }
+       while (node && (jit_uword_t)(prevw + node->offset) == (jit_uword_t)pc) {
+           jit_print_node(node);
+           fputc('\n', stdout); 
+           prevw += node->offset;
+           node = node->next;
+       }
+#endif
+#if __arm__
+    again:
+       if (data_info) {
+           while (_jitc->data_info.ptr[data_offset].code < pc) {
+               if (++data_offset >= _jitc->data_info.length) {
+                   data_info = 0;
+                   goto again;
+               }
+           }
+           if (pc == _jitc->data_info.ptr[data_offset].code) {
+               offset = _jitc->data_info.ptr[data_offset].length;
+               for (; offset >= 4; offset -= 4, pc += 4) {
+                   bytes = sprintf(buffer, address_buffer_format, pc);
+                   (*disasm_info.fprintf_func)(disasm_stream,
+                                               "%*c0x%s\t.data\t0x%08x\n",
+                                               16 - bytes, ' ', buffer,
+                                               *(jit_uint32_t *)
+                                               (jit_uint32_t)pc);
+               }
+               /* reset disassemble information instead of attempting
+                * to hack the arm specific backend data structures to
+                * tell it to forward the required number of bytes. */
+               disasm_info.buffer = (jit_pointer_t)(jit_uint32_t)pc;
+               disasm_info.buffer_vma = (jit_uword_t)pc;
+               if ((disasm_info.buffer_length = end - pc) <= 0)
+                   break;
+           }
+       }
+#endif
+       if (jit_get_note((jit_uint8_t *)(jit_word_t)pc, &name, &file, &line) &&
+           (name != old_name || file != old_file || line != old_line)) {
+           (*disasm_info.fprintf_func)(disasm_stream, "# %s:%s:%d\n",
+                                       name ? name : "",
+                                       file ? file : "",
+                                       line);
+           old_name = name;
+           old_file = file;
+           old_line = line;
+       }
+
+       bytes = sprintf(buffer, address_buffer_format, (long long)pc);
+       (*disasm_info.fprintf_func)(disasm_stream, "%*c0x%s\t",
+                                   16 - bytes, ' ', buffer);
+       pc += (*disasm_print)(pc, &disasm_info);
+       putc('\n', disasm_stream);
+    }
+}
+#endif
diff --git a/deps/lightning/lib/jit_hppa-cpu.c b/deps/lightning/lib/jit_hppa-cpu.c
new file mode 100644 (file)
index 0000000..db5a36a
--- /dev/null
@@ -0,0 +1,2796 @@
+/*
+ * Copyright (C) 2013-2019  Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+#if PROTO
+typedef struct idiv {
+    int                        quo;
+    int                        rem;
+} idiv_t;
+
+typedef struct udiv {
+    unsigned int       quo;
+    unsigned int       rem;
+} udiv_t;
+
+/* 16 spill bytes; -52 for first actual stack argument */
+#define params_offset          -32
+/* Assume all callee save registers may need to be spilled */
+#define alloca_offset          192
+#define _R0_REGNO              0
+#define _R1_REGNO              1
+#define _RP_REGNO              2
+#define _FP_REGNO              3
+#define _R19_REGNO             19
+#define _R23_REGNO             23
+#define _R24_REGNO             24
+#define _R25_REGNO             25
+#define _R26_REGNO             26
+#define _R28_REGNO             28
+#define _R29_REGNO             29
+#define _SP_REGNO              30
+#define _R31_REGNO             31
+#define _CR11_REGNO            11
+#define ii(v)                  *_jit->pc.ui++ = v
+#define f1(o,b,t,i)                    _f1(_jit,o,b,t,i)
+static void _f1(jit_state_t*,jit_int32_t,
+               jit_int32_t,jit_int32_t,jit_int32_t);
+#define f2(o,b,r,i,j)                  _f2(_jit,o,b,r,i,j)
+static void _f2(jit_state_t*,jit_int32_t,jit_int32_t,
+               jit_int32_t,jit_int32_t,jit_int32_t) maybe_unused;
+#define f3(o,b,t,i,j)                  _f3(_jit,o,b,t,i,j)
+static void _f3(jit_state_t*,jit_int32_t,jit_int32_t,
+               jit_int32_t,jit_int32_t,jit_int32_t);
+#define f4(o,b,x,s,u,y,c,z,m,t)                _f4(_jit,o,b,x,s,u,y,c,z,m,t)
+static void _f4(jit_state_t*,jit_int32_t,jit_int32_t,
+               jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,
+               jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#define f5(o,b,i,s,a,y,c,z,m,t)                _f5(_jit,o,b,i,s,a,y,c,z,m,t)
+static void _f5(jit_state_t*,jit_int32_t,jit_int32_t,
+               jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,
+               jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#define f6(o,b,r,s,a,x,c,y,m,i)                _f6(_jit,o,b,r,s,a,x,c,y,m,i)
+static void _f6(jit_state_t*,jit_int32_t,jit_int32_t,
+               jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,
+               jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#define f7(o,r,i)                      _f7(_jit,o,r,i)
+static void _f7(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define f8(o,r2,r1,cf,e1,x,e2,y,d,t)   _f8(_jit,o,r2,r1,cf,e1,x,e2,y,d,t)
+static void _f8(jit_state_t*,jit_int32_t,
+               jit_int32_t,jit_int32_t,jit_int32_t,
+               jit_int32_t,jit_int32_t,jit_int32_t,
+               jit_int32_t,jit_int32_t,jit_int32_t);
+#define f9(o,r,t,cf,e1,im)             _f9(_jit,o,r,t,cf,e1,im)
+static void _f9(jit_state_t*,
+               jit_int32_t,jit_int32_t,jit_int32_t,
+               jit_int32_t,jit_int32_t,jit_int32_t);
+#define f10(o,r2,r1,u,v,w,x,sa,y,t)    _f10(_jit,o,r2,r1,u,v,w,x,sa,y,t)
+static void _f10(jit_state_t*,jit_int32_t,jit_int32_t,
+               jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,
+               jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t) maybe_unused;
+#define f11(o,r2,r1,c,x,y,z,u,t)       _f11(_jit,o,r2,r1,c,x,y,z,u,t)
+static void _f11(jit_state_t*,jit_int32_t,
+               jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,
+               jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t) maybe_unused;
+#define f12(o,r,t,c,x,se,y,c1,z,clen)  _f12(_jit,o,r,t,c,x,se,y,c1,z,clen)
+static void _f12(jit_state_t*,jit_int32_t,jit_int32_t,
+                jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,
+                jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#define f13(o,t,r,c,x,nz,c1,clen)      _f13(_jit,o,t,r,c,x,nz,c1,clen)
+static void _f13(jit_state_t*,jit_int32_t,jit_int32_t,
+                jit_int32_t,jit_int32_t,jit_int32_t,
+                jit_int32_t,jit_int32_t,jit_int32_t);
+#define f13x(o,t,i,c,x,nz,c1,clen)     _f13x(_jit,o,t,i,c,x,nz,c1,clen)
+static void _f13x(jit_state_t*,jit_int32_t,jit_int32_t,
+                 jit_int32_t,jit_int32_t,jit_int32_t,
+                 jit_int32_t,jit_int32_t,jit_int32_t) maybe_unused;
+#define f14(o,r2,r1,c,x,cp,y,cpos,t)   _f14(_jit,o,r2,r1,c,x,cp,y,cpos,t)
+static void _f14(jit_state_t*,jit_int32_t,
+               jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,
+               jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t) maybe_unused;
+#define f15(o,r,t,c,c1,p,se,pos,clen)  _f15(_jit,o,r,t,c,c1,p,se,pos,clen)
+static void _f15(jit_state_t*,jit_int32_t,
+                jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,
+                jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#define f16(o,t,r,c,c1,cp,nz,cpos,clen)        _f16(_jit,o,t,r,c,c1,cp,nz,cpos,clen)
+static void _f16(jit_state_t*,jit_int32_t,
+                jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,
+                jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#define f16x(o,t,i,c,c1,cp,nz,cpos,clen) _f16x(_jit,o,t,i,c,c1,cp,nz,cpos,clen)
+static void _f16x(jit_state_t*,jit_int32_t,
+                jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,
+                jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#define f17(o,r2,r1,c,i,n)             _f17(_jit,o,r2,r1,c,i,n)
+static void _f17(jit_state_t*,jit_int32_t,jit_int32_t,
+                jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#define f17x(o,r2,r1,c,i,n)            _f17x(_jit,o,r2,r1,c,i,n)
+static void _f17x(jit_state_t*,jit_int32_t,jit_int32_t,
+                 jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#define f18(o,p,r,c,i,n)               _f18(_jit,o,p,r,c,i,n)
+static void _f18(jit_state_t*,jit_int32_t,jit_int32_t,
+                jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#define f19(o,b,s,i,n)                 _f19(_jit,o,b,s,i,n)
+static void _f19(jit_state_t*,jit_int32_t,jit_int32_t,
+                jit_int32_t,jit_int32_t,jit_int32_t) maybe_unused;
+#define f20(o,t,i,g,n)                 _f20(_jit,o,t,i,g,n)
+static void _f20(jit_state_t*,jit_int32_t,jit_int32_t,
+               jit_int32_t,jit_int32_t,jit_int32_t);
+#define f21(o,t,x,y,n)                 _f21(_jit,o,t,x,y,n)
+static void _f21(jit_state_t*,jit_int32_t,jit_int32_t,
+               jit_int32_t,jit_int32_t,jit_int32_t);
+#define f22(o,b,x,r,n,p)               _f22(_jit,o,b,x,r,n,p)
+static void _f22(jit_state_t*,jit_int32_t,jit_int32_t,
+               jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#define f23(o,a,b,c,d,e,f,g,h)         _f23(_jit,o,a,b,c,d,e,f,g,h)
+static void _f23(jit_state_t*,jit_int32_t,
+                jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,
+                jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t) maybe_unused;
+#define f24(o,b,x,s,y,m,r)             _f24(_jit,o,b,x,s,y,m,r)
+static void _f24(jit_state_t*,jit_int32_t,
+                jit_int32_t,jit_int32_t,jit_int32_t,
+                jit_int32_t,jit_int32_t,jit_int32_t) maybe_unused;
+#define f25(o,b,i,s,y,m,r)             _f25(_jit,o,b,i,s,y,m,r)
+static void _f25(jit_state_t*,jit_int32_t,
+                jit_int32_t,jit_int32_t,jit_int32_t,
+                jit_int32_t,jit_int32_t,jit_int32_t) maybe_unused;
+#define f26(o,b,x,s,y,m,r)             _f26(_jit,o,b,x,s,y,m,r)
+static void _f26(jit_state_t*,jit_int32_t,
+                jit_int32_t,jit_int32_t,jit_int32_t,
+                jit_int32_t,jit_int32_t,jit_int32_t) maybe_unused;
+#define f27(o,i,j)                     _f27(_jit,o,i,j)
+static void _f27(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t) maybe_unused;
+#define f28(o,i)                       _f28(_jit,o,i)
+static void _f28(jit_state_t*,jit_int32_t,jit_int32_t) maybe_unused;
+#define f29(o,r,x,s,y,t)               _f29(_jit,o,r,x,s,y,t)
+static void _f29(jit_state_t*,jit_int32_t,jit_int32_t,
+                jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t) maybe_unused;
+#define f30(o,b,r,s,x,y,t)             _f30(_jit,o,b,r,s,x,y,t)
+static void _f30(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,
+                jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t) maybe_unused;
+#define f31(o,t,r,v,x,y)               _f31(_jit,o,t,r,v,x,y)
+static void _f31(jit_state_t*,jit_int32_t,jit_int32_t,
+                jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#define f33(o,x,r,y,z,u)               _f33(_jit,o,x,r,y,z,u)
+static void _f33(jit_state_t*,jit_int32_t,jit_int32_t,
+                jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t) maybe_unused;
+#define f34(o,o1,x,sf,n,o2)            _f34(_jit,o,o1,x,sf,n,o2)
+static void _f34(jit_state_t*,jit_int32_t,jit_int32_t,
+                jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t) maybe_unused;
+#define f35(o,op,x,sf,n,t)             _f35(_jit,o,op,x,sf,n,t)
+static void _f35(jit_state_t*,jit_int32_t,jit_int32_t,
+                jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t) maybe_unused;
+#define f36(o,r,o1,x,sf,n,o2)          _f36(_jit,o,r,o1,x,sf,n,o2)
+static void _f36(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,
+                jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t) maybe_unused;
+#define f37(o,r2,r1,o1,x,sf,n,o2)      _f37(_jit,o,r2,r1,o1,x,sf,n,o2)
+static void _f37(jit_state_t*,jit_int32_t,jit_int32_t,
+                jit_int32_t,jit_int32_t,jit_int32_t,
+                jit_int32_t,jit_int32_t,jit_int32_t) maybe_unused;
+#define f38(o,s,u,n)                   _f38(_jit,o,s,u,n)
+static void _f38(jit_state_t*,jit_int32_t,
+                jit_int32_t,jit_int32_t,jit_int32_t) maybe_unused;
+/* nulify next instruction if condition is met with addition */
+#define ADD_CF_NV              0       /* never */
+#define ADD_CF_EQ              2       /* O1 == -O2 (word) */
+#define ADD_CF_LT              4       /* O1 <  -O2 (signed)*/
+#define ADD_CF_LE              6       /* O1 <= -O2 (signed) */
+#define ADD_CF_NUV             8       /* O1 +   O2 does not overflow (unsigned) */
+#define ADD_CF_ZNV             10      /* O1 +   O2 is zero or no overflow (unsigned) */
+#define ADD_CF_SV              12      /* O1 +   O2 overflows (signed) */
+#define ADD_CF_OD              14      /* O1 +   O2 is odd */
+#define ADD_CF_TR              1       /* always */
+#define ADD_CF_NE              3       /* O1 != -O2 */
+#define ADD_CF_GE              5       /* O1 >= -O2 (signed) */
+#define ADD_CF_GT              7       /* O1 >  -O2 (signed) */
+#define ADD_CF_UV              9       /* O1 +   O2 overflows (unsigned) */
+#define ADD_CF_VNZ             11      /* O1 +   O2 is nonzero and overflows (unsigned) */
+#define ADD_CF_NSV             13      /* O1 +   O2 does not overflow (signed) */
+#define ADD_CF_EV              15      /* O1 +   O2 is even */
+#define ADD_EN_NONE            6       /* none */
+#define ADD_EN_C               7       /* with carry */
+#define ADD_EN_L               10      /* logical */
+#define ADD_EN_TSV             12      /* trap on signed overflow */
+#define ADD_EN_C_TSV           13      /* with carry and trap on signed overflow */
+#define ADDI_OE_TC             0       /* trap on condition */
+#define ADDI_OE_TSV_TC         1       /* trap on signed overflow or condition */
+#define ADDI_OE_NONE           2       /* none */
+#define ADDI_OE_TSV            3       /* trap on signed overflow */
+#define ADD_(en,cf,r1,r2,t)    f8(0x2,r2,r1,cf,en>>2,1,en&1,0,0,t)
+#define ADD(r1,r2,t)           ADD_(ADD_EN_NONE,ADD_CF_NV,r1,r2,t)
+#define ADD_C(r1,r2,t)         ADD_(ADD_EN_C,ADD_CF_NV,r1,r2,t)
+#define ADD_L(r1,r2,t)         ADD_(ADD_EN_L,ADD_CF_NV,r1,r2,t)
+#define ADDB_(cc,r1,r2,t)      f17(0x28|((cc&1)<<1),r2,r1,cc>>1,t,0)
+#define ADDB(r1,r2,t)          ADDB_(ADD_CF_NV,r1,r2,t)
+#define ADDB_EQ(r1,r2,t)       ADDB_(ADD_CF_EQ,r1,r2,t)
+#define ADDB_LT(r1,r2,t)       ADDB_(ADD_CF_LT,r1,r2,t)
+#define ADDB_LE(r1,r2,t)       ADDB_(ADD_CF_LE,r1,r2,t)
+#define ADDB_NUV(r1,r2,t)      ADDB_(ADD_CF_NUV,r1,r2,t)
+#define ADDB_ZNV(r1,r2,t)      ADDB_(ADD_CF_ZNV,r1,r2,t)
+#define ADDB_SV(r1,r2,t)       ADDB_(ADD_CF_SV,r1,r2,t)
+#define ADDB_OD(r1,r2,t)       ADDB_(ADD_CF_OD,r1,r2,t)
+#define ADDB_TR(r1,r2,t)       ADDB_(ADD_CF_TR,r1,r2,t)
+#define ADDB_NE(r1,r2,t)       ADDB_(ADD_CF_NE,r1,r2,t)
+#define ADDB_GE(r1,r2,t)       ADDB_(ADD_CF_GE,r1,r2,t)
+#define ADDB_GT(r1,r2,t)       ADDB_(ADD_CF_GT,r1,r2,t)
+#define ADDB_UV(r1,r2,t)       ADDB_(ADD_CF_UV,r1,r2,t)
+#define ADDB_VNZ(r1,r2,t)      ADDB_(ADD_CF_VNZ,r1,r2,t)
+#define ADDB_NSV(r1,r2,t)      ADDB_(ADD_CF_NSV,r1,r2,t)
+#define ADDB_EV(r1,r2,t)       ADDB_(ADD_CF_EV,r2,r1,t)
+#define ADDB_N_(cc,r1,r2,t)    f17(0x28|((cc&1)<<1),r2,r1,cc>>1,t,1)
+#define ADDB_N(r1,r2,t)                ADDB_N_(ADD_CF_NV,r1,r2,t)
+#define ADDB_N_EQ(r1,r2,t)     ADDB_N_(ADD_CF_EQ,r1,r2,t)
+#define ADDB_N_LT(r1,r2,t)     ADDB_N_(ADD_CF_LT,r1,r2,t)
+#define ADDB_N_LE(r1,r2,t)     ADDB_N_(ADD_CF_LE,r1,r2,t)
+#define ADDB_N_NUV(r1,r2,t)    ADDB_N_(ADD_CF_NUV,r1,r2,t)
+#define ADDB_N_ZNV(r1,r2,t)    ADDB_N_(ADD_CF_ZNV,r1,r2,t)
+#define ADDB_N_SV(r1,r2,t)     ADDB_N_(ADD_CF_SV,r1,r2,t)
+#define ADDB_N_OD(r1,r2,t)     ADDB_N_(ADD_CF_OD,r1,r2,t)
+#define ADDB_N_TR(r1,r2,t)     ADDB_N_(ADD_CF_TR,r1,r2,t)
+#define ADDB_N_NE(r1,r2,t)     ADDB_N_(ADD_CF_NE,r1,r2,t)
+#define ADDB_N_GE(r1,r2,t)     ADDB_N_(ADD_CF_GE,r1,r2,t)
+#define ADDB_N_GT(r1,r2,t)     ADDB_N_(ADD_CF_GT,r1,r2,t)
+#define ADDB_N_UV(r1,r2,t)     ADDB_N_(ADD_CF_UV,r1,r2,t)
+#define ADDB_N_VNZ(r1,r2,t)    ADDB_N_(ADD_CF_VNZ,r1,r2,t)
+#define ADDB_N_NSV(r1,r2,t)    ADDB_N_(ADD_CF_NSV,r1,r2,t)
+#define ADDB_N_EV(r1,r2,t)     ADDB_N_(ADD_CF_EV,r1,r2,t)
+#define ADDI_(ec,cf,i,r,t)     f9(0x2c|(ec>>1),r,t,cf,ec&1,i)
+#define ADDI(i,r,t)            ADDI_(ADDI_OE_NONE,ADD_CF_NV,i,r,t)
+#define ADDIB_(cc,i,r,t)       f17x(0x29|((cc&1)<<1),r,i,cc>>1,t,0)
+#define ADDIB(i,r,t)           ADDIB_(ADD_CF_NV,i,r,t)
+#define ADDIB_EQ(i,r,t)                ADDIB_(ADD_CF_EQ,i,r,t)
+#define ADDIB_LT(i,r,t)                ADDIB_(ADD_CF_LT,i,r,t)
+#define ADDIB_LE(i,r,t)                ADDIB_(ADD_CF_LE,i,r,t)
+#define ADDIB_NUV(i,r,t)       ADDIB_(ADD_CF_NUV,i,r,t)
+#define ADDIB_ZNV(i,r,t)       ADDIB_(ADD_CF_ZNV,i,r,t)
+#define ADDIB_SV(i,r,t)                ADDIB_(ADD_CF_SV,i,r,t)
+#define ADDIB_OD(i,r,t)                ADDIB_(ADD_CF_OD,i,r,t)
+#define ADDIB_TR(i,r,t)                ADDIB_(ADD_CF_TR,i,r,t)
+#define ADDIB_NE(i,r,t)                ADDIB_(ADD_CF_NE,i,r,t)
+#define ADDIB_GE(i,r,t)                ADDIB_(ADD_CF_GE,i,r,t)
+#define ADDIB_GT(i,r,t)                ADDIB_(ADD_CF_GT,i,r,t)
+#define ADDIB_UV(i,r,t)                ADDIB_(ADD_CF_UV,i,r,t)
+#define ADDIB_VNZ(i,r,t)       ADDIB_(ADD_CF_VNZ,i,r,t)
+#define ADDIB_NSV(i,r,t)       ADDIB_(ADD_CF_NSV,i,r,t)
+#define ADDIB_EV(i,r,t)                ADDIB_(ADD_CF_EV,i,r,t)
+#define ADDIB_N_(cc,i,r,t)     f17x(0x29|((cc&1)<<1),r,i,cc>>1,t,1)
+#define ADDIB_N(i,r,t)         ADDIB_N_(ADD_CF_NV,i,r,t)
+#define ADDIB_N_EQ(i,r,t)      ADDIB_N_(ADD_CF_EQ,i,r,t)
+#define ADDIB_N_LT(i,r,t)      ADDIB_N_(ADD_CF_LT,i,r,t)
+#define ADDIB_N_LE(i,r,t)      ADDIB_N_(ADD_CF_LE,i,r,t)
+#define ADDIB_N_NUV(i,r,t)     ADDIB_N_(ADD_CF_NUV,i,r,t)
+#define ADDIB_N_ZNV(i,r,t)     ADDIB_N_(ADD_CF_ZNV,i,r,t)
+#define ADDIB_N_SV(i,r,t)      ADDIB_N_(ADD_CF_SV,i,r,t)
+#define ADDIB_N_OD(i,r,t)      ADDIB_N_(ADD_CF_OD,i,r,t)
+#define ADDIB_N_TR(i,r,t)      ADDIB_N_(ADD_CF_TR,i,r,t)
+#define ADDIB_N_NE(i,r,t)      ADDIB_N_(ADD_CF_NE,i,r,t)
+#define ADDIB_N_GE(i,r,t)      ADDIB_N_(ADD_CF_GE,i,r,t)
+#define ADDIB_N_GT(i,r,t)      ADDIB_N_(ADD_CF_GT,i,r,t)
+#define ADDIB_N_UV(i,r,t)      ADDIB_N_(ADD_CF_UV,i,r,t)
+#define ADDIB_N_VNZ(i,r,t)     ADDIB_N_(ADD_CF_VNZ,i,r,t)
+#define ADDIB_N_NSV(i,r,t)     ADDIB_N_(ADD_CF_NSV,i,r,t)
+#define ADDIB_N_EV(i,r,t)      ADDIB_N_(ADD_CF_EV,0,i,r,t)
+#define ADDIL(i,r)             f7(0xa,r,i)
+#define LOG_CC_NV              0       /* never */
+#define LOG_CC_EQ              1       /* all bits are 0 */
+#define LOG_CC_LT              2       /* leftmost bit is 1 */
+#define LOG_CC_LE              3       /* leftmost bit is 1 or all bits are 0 */
+#define LOG_CC_OD              7       /* rightmost bit is 1 */
+#define LOG_CC_TR              8       /* always */
+#define LOG_CC_NE              9       /* some bits are 1 */
+#define LOG_CC_GE              10      /* leftmost bit is 0 */
+#define LOG_CC_GT              11      /* leftmost bit is 0 or some bits are 1 */
+#define LOG_CC_EV              15      /* rightmost bit is 0 */
+#define AND_(cc,r1,r2,t)       f8(0x2,r2,r1,cc,0,1,0,0,0,t)
+#define AND(r1,r2,t)           AND_(LOG_CC_NV,r1,r2,t)
+#define ANDCM_(cc,r1,r2,t)     f8(0x2,r2,r1,cc,0,0,0,0,0,t)
+#define ANDCM(r1,r2,t)         ANDCM_(LOG_CC_NV,r1,r2,t)
+#define B_(n,i,t)              f20(0x3a,t,i,0,n)
+#define B(i,t)                 B_(0,i,t)
+#define B_N(i,t)               B_(1,i,t)
+#define B_L(i)                 B_(0,i,_RP_REGNO)
+#define B_L_N(i)               B_(1,i,_RP_REGNO)
+#define BB_CC_LT               0       /* leftmost bit in word is 1 */
+#define BB_CC_GE               1       /* leftmost bit in word is 0 */
+#define BB_(c,r,i)             f18(0x30,0,r,c,i,0)
+#define BB_N_(c,r,i)           f18(0x30,0,r,c,i,1)
+#define BBI_(c,r,p,i)          f18(0x31,p,r,c,i,0)
+#define BBI_N_(c,r,p,i)                f18(0x31,p,r,c,i,1)
+#define BB(c,r,i)              BB_(c,r,i)
+#define BBI_LT(r,p,i)          BBI_(BB_CC_LT,r,p,i)
+#define BBI_GE(r,p,i)          BBI_(BB_CC_GE,r,p,i)
+#define BB_N(c,r,i)            BB_(c,r,i)
+#define BBI_N_LT(r,p,i)                BBI_N_(BB_CC_LT,r,p,i)
+#define BBI_N_GE(r,p,i)                BBI_N_(BB_CC_GE,r,p,i)
+#define BE(i,s,b)              f19(0x38,b,s,i,0)
+#define BE_L(i,s,b)            f19(0x39,b,s,i,0)
+#define BE_L_N(i,s,b)          f19(0x39,b,s,i,1)
+#define BLR(x,t)               f21(0x3a,t,x,2,0)
+#define BLR_N(x,t)             f21(0x3a,t,x,2,1)
+#define BREAK(i,j)             f27(0,j,i)
+#define BV(x,b)                        f21(0x3a,b,x,6,0)
+#define BV_N(x,b)              f21(0x3a,b,x,6,1)
+#define BVE(b)                 f22(0x3a,b,6,0,0,0)
+#define BVE_N(b)               f22(0x3a,b,6,0,1,0)
+#define BVE_L(b)               f22(0x3a,b,7,0,0,0)
+#define BVE_L_N(b)             f22(0x3a,b,7,0,0,1)
+#define II_C_NONE              0
+#define II_C_M                 (1<<5)
+#define II_C_S                 (1<<13)
+#define II_C_SM                        (II_C_S|II_C_M)
+#define II_AU_NONE             0
+#define II_AU_PRE              ((1<<13)|II_C_M)
+#define II_AU_POS              II_C_M
+#define LD_CC_H_NONE           0       /* No hint */
+#define LD_CC_H_SPL            2       /* Spatial Locality */
+#define CLRBTS()               f23(0x3a,0,0,2,0,0,1,0,1)
+#define CS_CC_NV               0       /* never */
+#define CS_CC_EQ               2       /* O1 =  O2 */
+#define CS_CC_LT               4       /* O1 <  O2 (signed) */
+#define CS_CC_LE               6       /* O1 <= O2 (signed) */
+#define CS_CC_ULT              8       /* O1 <  O2 (unsigned) */
+#define CS_CC_ULE              10      /* O1 <= O2 (unsigned) */
+#define CS_CC_SV               12      /* O1 -  O2 overflows (signed) */
+#define CS_CC_OD               14      /* O1 -  O2 is odd */
+#define CS_CC_TR               1       /* always */
+#define CS_CC_NE               3       /* O1 != O2 */
+#define CS_CC_GE               5       /* O1 >= O2 (signed) */
+#define CS_CC_GT               7       /* O1 >  O2 (signed) */
+#define CS_CC_UGE              9       /* O1 >= O2 (unsigned) */
+#define CS_CC_UGT              11      /* O1 >  O2 (unsigned) */
+#define CS_CC_NSV              13      /* O1 -  O2 does not overflows (signed) */
+#define CS_CC_EV               15      /* O1 -  O2 is even */
+#define CMPB_(c,r1,r2,i)       f17((c)&1?0x22:0x20,r2,r1,(c)>>1,i,0)
+#define CMPB(r1,r2,i)          CMPB_(CS_CC_NV,r1,r2,i)
+#define CMPB_EQ(r1,r2,i)       CMPB_(CS_CC_EQ,r1,r2,i)
+#define CMPB_LT(r1,r2,i)       CMPB_(CS_CC_LT,r1,r2,i)
+#define CMPB_LE(r1,r2,i)       CMPB_(CS_CC_LE,r1,r2,i)
+#define CMPB_ULT(r1,r2,i)      CMPB_(CS_CC_ULT,r1,r2,i)
+#define CMPB_ULE(r1,r2,i)      CMPB_(CS_CC_ULE,r1,r2,i)
+#define CMPB_SV(r1,r2,i)       CMPB_(CS_CC_SV,r1,r2,i)
+#define CMPB_OD(r1,r2,i)       CMPB_(CS_CC_OD,r1,r2,i)
+#define CMPB_TR(r1,r2,i)       CMPB_(CS_CC_TR,r1,r2,i)
+#define CMPB_NE(r1,r2,i)       CMPB_(CS_CC_NE,r1,r2,i)
+#define CMPB_GE(r1,r2,i)       CMPB_(CS_CC_GE,r1,r2,i)
+#define CMPB_GT(r1,r2,i)       CMPB_(CS_CC_GT,r1,r2,i)
+#define CMPB_UGE(r1,r2,i)      CMPB_(CS_CC_UGE,r1,r2,i)
+#define CMPB_UGT(r1,r2,i)      CMPB_(CS_CC_UGT,r1,r2,i)
+#define CMPB_NSV(r1,r2,i)      CMPB_(CS_CC_NSV,r1,r2,i)
+#define CMPB_EV(r1,r2,i)       CMPB_(CS_CC_EV,r1,r2,i)
+#define CMPB_N_(c,r1,r2,i)     f17((c)&1?0x22:0x20,r2,r1,(c)>>1,i,1)
+#define CMPB_N(r1,r2,i)                CMPB_N_(CS_CC_NV,r1,r2,i)
+#define CMPB_EQ_N(r1,r2,i)     CMPB_N_(CS_CC_EQ,r1,r2,i)
+#define CMPB_LT_N(r1,r2,i)     CMPB_N_(CS_CC_LT,r1,r2,i)
+#define CMPB_LE_N(r1,r2,i)     CMPB_N_(CS_CC_LE,r1,r2,i)
+#define CMPB_ULT_N(r1,r2,i)    CMPB_N_(CS_CC_ULT,r1,r2,i)
+#define CMPB_ULE_N(r1,r2,i)    CMPB_N_(CS_CC_ULE,r1,r2,i)
+#define CMPB_SV_N(r1,r2,i)     CMPB_N_(CS_CC_SV,r1,r2,i)
+#define CMPB_OD_N(r1,r2,i)     CMPB_N_(CS_CC_OD,r1,r2,i)
+#define CMPB_TR_N(r1,r2,i)     CMPB_N_(CS_CC_TR,r1,r2,i)
+#define CMPB_NE_N(r1,r2,i)     CMPB_N_(CS_CC_NE,r1,r2,i)
+#define CMPB_GE_N(r1,r2,i)     CMPB_N_(CS_CC_GE,r1,r2,i)
+#define CMPB_GT_N(r1,r2,i)     CMPB_N_(CS_CC_GT,r1,r2,i)
+#define CMPB_UGE_N(r1,r2,i)    CMPB_N_(CS_CC_UGE,r1,r2,i)
+#define CMPB_UGT_N(r1,r2,i)    CMPB_N_(CS_CC_UGT,r1,r2,i)
+#define CMPB_NSV_N(r1,r2,i)    CMPB_N_(CS_CC_NSV,r1,r2,i)
+#define CMPB_EV_N(r1,r2,i)     CMPB_N_(CS_CC_EV,r1,r2,i)
+#define CMPCLR_(c,r1,r2,i)     f8(0x2,r2,r1,c,2,0,0,2,0,i)
+#define CMPCLR(r1,r2,i)                CMPCLR_(CS_CC_NV,r1,r2,i)
+#define CMPCLR_EQ(r1,r2,i)     CMPCLR_(CS_CC_EQ,r1,r2,i)
+#define CMPCLR_LT(r1,r2,i)     CMPCLR_(CS_CC_LT,r1,r2,i)
+#define CMPCLR_LE(r1,r2,i)     CMPCLR_(CS_CC_LE,r1,r2,i)
+#define CMPCLR_ULT(r1,r2,i)    CMPCLR_(CS_CC_ULT,r1,r2,i)
+#define CMPCLR_ULE(r1,r2,i)    CMPCLR_(CS_CC_ULE,r1,r2,i)
+#define CMPCLR_SV(r1,r2,i)     CMPCLR_(CS_CC_SV,r1,r2,i)
+#define CMPCLR_OD(r1,r2,i)     CMPCLR_(CS_CC_OD,r1,r2,i)
+#define CMPCLR_TR(r1,r2,i)     CMPCLR_(CS_CC_TR,r1,r2,i)
+#define CMPCLR_NE(r1,r2,i)     CMPCLR_(CS_CC_NE,r1,r2,i)
+#define CMPCLR_GE(r1,r2,i)     CMPCLR_(CS_CC_GE,r1,r2,i)
+#define CMPCLR_GT(r1,r2,i)     CMPCLR_(CS_CC_GT,r1,r2,i)
+#define CMPCLR_UGE(r1,r2,i)    CMPCLR_(CS_CC_UGE,r1,r2,i)
+#define CMPCLR_UGT(r1,r2,i)    CMPCLR_(CS_CC_UGT,r1,r2,i)
+#define CMPCLR_NSV(r1,r2,i)    CMPCLR_(CS_CC_NSV,r1,r2,i)
+#define CMPCLR_EV(r1,r2,i)     CMPCLR_(CS_CC_EV,r1,r2,i)
+#define CMPIB_(c,i,r,t)                f17x((c)&1?0x23:0x21,r,i,(c)>>1,t,0)
+#define CMPIB_NONE(i,r,t)      CMPIB_(CS_CC_NV,i,r,t)
+#define CMPIB_EQ(i,r,t)                CMPIB_(CS_CC_EQ,i,r,t)
+#define CMPIB_LT(i,r,t)                CMPIB_(CS_CC_LT,i,r,t)
+#define CMPIB_LE(i,r,t)                CMPIB_(CS_CC_LE,i,r,t)
+#define CMPIB_ULT(i,r,t)       CMPIB_(CS_CC_ULT,i,r,t)
+#define CMPIB_ULE(i,r,t)       CMPIB_(CS_CC_ULE,i,r,t)
+#define CMPIB_SV(i,r,t)                CMPIB_(CS_CC_SV,i,r,t)
+#define CMPIB_OD(i,r,t)                CMPIB_(CS_CC_OD,i,r,t)
+#define CMPIB(i,r,t)           CMPIB_(CS_CC_TR,i,r,t)
+#define CMPIB_NE(i,r,t)                CMPIB_(CS_CC_NE,i,r,t)
+#define CMPIB_GE(i,r,t)                CMPIB_(CS_CC_GE,i,r,t)
+#define CMPIB_GT(i,r,t)                CMPIB_(CS_CC_GT,i,r,t)
+#define CMPIB_UGE(i,r,t)       CMPIB_(CS_CC_UGE,i,r,t)
+#define CMPIB_UGT(i,r,t)       CMPIB_(CS_CC_UGT,i,r,t)
+#define CMPIB_NSV(i,r,t)       CMPIB_(CS_CC_NSV,i,r,t)
+#define CMPIB_EV(i,r,t)                CMPIB_(CS_CC_EV,i,r,t)
+#define CMPIB_N_(c,i,r,t)      f17x((c)&1?0x23:0x21,r,i,(c)>>1,t,1)
+#define CMPIB_NONE_N(i,r,t)    CMPIB_N_(CS_CC_NV,i,r,t)
+#define CMPIB_EQ_N(i,r,t)      CMPIB_N_(CS_CC_EQ,i,r,t)
+#define CMPIB_LT_N(i,r,t)      CMPIB_N_(CS_CC_LT,i,r,t)
+#define CMPIB_LE_N(i,r,t)      CMPIB_N_(CS_CC_LE,i,r,t)
+#define CMPIB_ULT_N(i,r,t)     CMPIB_N_(CS_CC_ULT,i,r,t)
+#define CMPIB_ULE_N(i,r,t)     CMPIB_N_(CS_CC_ULE,i,r,t)
+#define CMPIB_SV_N(i,r,t)      CMPIB_N_(CS_CC_SV,i,r,t)
+#define CMPIB_OD_N(i,r,t)      CMPIB_N_(CS_CC_OD,i,r,t)
+#define CMPIB_N(i,r,t)         CMPIB_N_(CS_CC_TR,i,r,t)
+#define CMPIB_NE_N(i,r,t)      CMPIB_N_(CS_CC_NE,i,r,t)
+#define CMPIB_GE_N(i,r,t)      CMPIB_N_(CS_CC_GE,i,r,t)
+#define CMPIB_GT_N(i,r,t)      CMPIB_N_(CS_CC_GT,i,r,t)
+#define CMPIB_UGE_N(i,r,t)     CMPIB_N_(CS_CC_UGE,i,r,t)
+#define CMPIB_UGT_N(i,r,t)     CMPIB_N_(CS_CC_UGT,i,r,t)
+#define CMPIB_NSV_N(i,r,t)     CMPIB_N_(CS_CC_NSV,i,r,t)
+#define CMPIB_EV_N(i,r,t)      CMPIB_N_(CS_CC_EV,i,r,t)
+#define CMPICLR_(c,i,r,t)      f9(0x24,r,t,c,0,i)
+#define CMPICLR(i,r,t)         CMPICLR_(CS_CC_NV,i,r,t)
+#define CMPICLR_EQ(i,r,t)      CMPICLR_(CS_CC_EQ,i,r,t)
+#define CMPICLR_LT(i,r,t)      CMPICLR_(CS_CC_LT,i,r,t)
+#define CMPICLR_LE(i,r,t)      CMPICLR_(CS_CC_LE,i,r,t)
+#define CMPICLR_ULT(i,r,t)     CMPICLR_(CS_CC_ULT,i,r,t)
+#define CMPICLR_ULE(i,r,t)     CMPICLR_(CS_CC_ULE,i,r,t)
+#define CMPICLR_SV(i,r,t)      CMPICLR_(CS_CC_SV,i,r,t)
+#define CMPICLR_OD(i,r,t)      CMPICLR_(CS_CC_OD,i,r,t)
+#define CMPICLR_TR(i,r,t)      CMPICLR_(CS_CC_TR,i,r,t)
+#define CMPICLR_NE(i,r,t)      CMPICLR_(CS_CC_NE,i,r,t)
+#define CMPICLR_GE(i,r,t)      CMPICLR_(CS_CC_GE,i,r,t)
+#define CMPICLR_GT(i,r,t)      CMPICLR_(CS_CC_GT,i,r,t)
+#define CMPICLR_UGE(i,r,t)     CMPICLR_(CS_CC_UGE,i,r,t)
+#define CMPICLR_UGT(i,r,t)     CMPICLR_(CS_CC_UGT,i,r,t)
+#define CMPICLR_NSV(i,r,t)     CMPICLR_(CS_CC_NSV,i,r,t)
+#define CMPICLR_EV(i,r,t)      CMPICLR_(CS_CC_EV,i,r,t)
+#define COPR(u,s)              f38(0x0c,s,u,0)
+#define UI_CF_NONE             0       /* never */
+#define UI_CF_SBZ              2       /* some byte zero */
+#define UI_CF_SHZ              3       /* some halfword zero */
+#define UI_CF_SDC              4       /* some digit carry */
+#define UI_CF_SBC              6       /* some byte carry */
+#define UI_CF_SHC              7       /* some halfword carry */
+#define UI_TR_SHC              8       /* always */
+#define UI_CF_NBZ              10      /* no byte zero */
+#define UI_CF_NHZ              11      /* no halfword zero */
+#define UI_CF_NDC              12      /* no digit carry */
+#define UI_CF_NBC              14      /* no byte carry */
+#define UI_CF_NHC              15      /* no halfword carry */
+#define DCOR_(e1,cf,r,t)       f8(0x2,r,0,cf,2,1,1,e1,0,t)
+#define DCOR(r,t)              DCOR_(2,UI_CF_NONE,r,t)
+#define DCOR_I(r,t)            DCOR_(3,UI_CF_NONE,r,t)
+#define SED_C_NEVER            0       /* never */
+#define SED_C_EQ               1       /* all bits 0 */
+#define SED_C_LT               2       /* lefmost bits 1 */
+#define SED_C_OD               3       /* rightmost bit 1 */
+#define SED_C_TR               4       /* always */
+#define SED_C_NE               5       /* some bit 1 */
+#define SED_C_GE               6       /* lefmost bits 1 */
+#define SED_C_EV               7       /* rightmost bit 0 */
+#define DEPW(r,len,t)          f13(0x35,t,r,SED_C_NEVER,0,1,0,len)
+#define DEPW_Z(r,len,t)                f13(0x35,t,r,SED_C_NEVER,0,0,0,len)
+#define DEPWR(r,pos,len,t)     f16(0x35,t,r,SED_C_NEVER,0,1,1,31-(pos),len)
+#define DEPWR_Z(r,pos,len,t)   f16(0x35,t,r,SED_C_NEVER,0,1,0,31-(pos),len)
+#define SHLWI(r,sa,t)          DEPWR_Z(r,31-(sa),32-(sa),t)
+#define DEPWI(i,len,t)         f13x(0x35,t,i,SED_C_NEVER,2,1,0,len)
+#define DEPWI_Z(i,len,t)       f13x(0x35,t,i,SED_C_NEVER,2,0,0,len)
+#define DEPWRI(i,pos,len,t)    f16x(0x35,t,i,SED_C_NEVER,1,1,1,31-(pos),len)
+#define DEPWRI_Z(i,pos,len,t)  f16x(0x35,t,i,SED_C_NEVER,1,1,0,31-(pos),len)
+#define DIAG(i)                        f28(0x5,i)
+#define DS(r1,r2,t)            f8(0x2,r2,r1,ADD_CF_NV,1,0,0,1,0,t)
+#define EXTRW(r,len,t)         f12(0x34,r,t,SED_C_NEVER,2,1,0,0,0,len)
+#define EXTRW_U(r,len,t)       f12(0x34,r,t,SED_C_NEVER,2,0,0,0,0,len)
+#define EXTRWR(r,pos,len,t)    f15(0x34,r,t,SED_C_NEVER,1,1,1,pos,len)
+#define SHRWI(r,sa,t)          EXTRWR(r,31-(sa),32-(sa),t)
+#define EXTRWR_U(r,pos,len,t)  f15(0x34,r,t,SED_C_NEVER,1,1,0,pos,len)
+#define SHRWI_U(r,sa,t)                EXTRWR_U(r,31-(sa),32-(sa),t)
+#define FDC(x,s,b)             f24(0x1,b,x,s,0x4a,0,0)
+#define FDC_M(x,s,b)           f24(0x1,b,x,s,0x4a,1,0)
+#define FDCI(i,s,b)            f25(0x1,b,i,s,0xca,0,0)
+#define FDCE(x,s,b)            f24(0x1,b,x,s,0x4b,0,0)
+#define FDCE_M(x,s,b)          f24(0x1,b,x,s,0x4b,1,0)
+#define FIC(x,s,b)             f26(0x1,b,x,s,0xa,0,0)
+#define FIC_M(x,s,b)           f26(0x1,b,x,s,0xa,1,0)
+#define FICI(x,s,b)            f25(0x1,b,x,s,0x4f,0,0)
+#define FICI_M(x,s,b)          f25(0x1,b,x,s,0x4f,1,0)
+#define FICE(x,s,b)            f26(0x1,b,x,s,0xb,0,0)
+#define FICE_M(x,s,b)          f26(0x1,b,x,s,0xb,1,0)
+#define HADD_(c,r1,r2,t)       f8(0x2,r2,r1,0,0,1,1,c,0,t)
+#define HADD(r1,r2,t)          HADD_(3,r1,r2,t)
+#define HADD_SS(r1,r2,t)       HADD_(1,r1,r2,t)
+#define HADD_US(r1,r2,t)       HADD_(0,r1,r2,t)
+#define HAVG(r1,r2,t)          f8(0x2,r2,r1,0,0,1,0,3,0,t)
+#define HSHL(r,sa,t)           f10(0x3e,0,r,1,0,0,2,sa,0,t)
+#define HSHLADD(r1,sa,r2,t)    f8(0x2,r2,r1,0,1,1,1,sa,0,t)
+#define HSHR(r,sa,t)           f10(0x3e,r,0,1,2,0,3,sa,0,t)
+#define HSHR_U(r,sa,t)         f10(0x3e,r,0,1,2,0,2,sa,0,t)
+#define HSHRADD(r1,sa,r2,t)    f8(0x2,r2,r1,0,1,0,1,sa,0,t)
+#define HSUB_(c,r1,r2,t)       f8(0x2,r2,r1,0,0,0,1,c,0,t)
+#define HSUB(r1,r2,t)          HSUB_(3,r1,r2,t)
+#define HSUB_SS(r1,r2,t)       HSUB_(1,r1,r2,t)
+#define HSUB_US(r1,r2,t)       HSUB_(0,r1,r2,t)
+#define IDTLBT(r1,r2)          f26(0x1,r2,r1,0,0x60,0,0)
+#define IITLBT(r1,r2)          f26(0x1,r2,r1,0,0x20,0,0)
+#define LCI(x,s,b,t)           f24(0x1,b,x,s,0x4c,0,t)
+#define LDBL(i,b,t)            f1(0x10,b,t,i)
+#define LDB(x,b,t)             f4(0x3,b,x,0,0,0,LD_CC_H_NONE,0,0,t)
+#define LDBI(i,b,t)            f5(0x3,b,i,0,0,1,LD_CC_H_NONE,0,0,t)
+#define LDCD(x,b,t)            f4(0x3,b,x,0,0,0,LD_CC_H_NONE,5,0,t)
+#define LDCDI(i,b,t)           f5(0x3,b,i,0,0,1,LD_CC_H_NONE,5,0,t)
+#define LDCW(x,b,t)            f4(0x3,b,x,0,0,0,LD_CC_H_NONE,7,0,t)
+#define LDCWI(i,b,t)           f5(0x3,b,i,0,0,1,LD_CC_H_NONE,7,0,t)
+#define LDDL(i,b,t)            f3(0x14,b,t,i,0)
+#define LDD(x,b,t)             f4(0x3,b,x,0,0,0,LD_CC_H_NONE,3,0,t)
+#define LDDI(i,b,t)            f5(0x3,b,i,0,0,1,LD_CC_H_NONE,3,0,t)
+#define LDDA(x,b,t)            f4(0x3,b,x,0,0,0,LD_CC_H_NONE,4,0,t)
+#define LDDAI(i,b,t)           f5(0x3,b,i,0,0,1,LD_CC_H_NONE,4,0,t)
+#define LDHL(i,b,t)            f1(0x11,b,t,i)
+#define LDH(x,b,t)             f4(0x3,b,x,0,0,0,LD_CC_H_NONE,1,0,t)
+#define LDHI(i,b,t)            f5(0x3,b,i,0,0,1,LD_CC_H_NONE,1,0,t)
+#define LDIL(i,t)              f7(0x8,t,i)
+#define LDO(i,b,t)             f1(0xd,b,t,i)
+#define LDI(i,t)               LDO(i,0,t)
+#define LDSID(s,b,t)           f30(0x0,b,0,s,0,0x85,t)
+#define LDWL(i,b,t)            f1(0x12,b,t,i)
+#define LDWL_MB(i,b,t)         f1(0x13,b,t,i)  /* pre-dec or post-inc */
+#define LDWL_MA(i,b,t)         f2(0x17,b,t,i,2)/* post-dec or pre-inc */
+#define LDW(x,b,t)             f4(0x3,b,x,0,0,0,LD_CC_H_NONE,2,0,t)
+#define LDWI(i,b,t)            f5(0x3,b,i,0,0,1,LD_CC_H_NONE,2,0,t)
+#define LDWA(x,b,t)            f4(0x3,b,x,0,0,0,LD_CC_H_NONE,6,0,t)
+#define LDWAI(i,b,t)           f5(0x3,b,i,0,0,1,LD_CC_H_NONE,6,0,t)
+#define LPA(x,s,b,t)           f24(0x1,b,x,s,0x4d,0,t)
+#define MFSP(s,t)              f29(0x0,0,0,s,0x25,t)
+#define MIXH_L(r1,r2,t)                f10(0x3e,r2,r1,1,0,0,1,0,0,t)
+#define MIXH_R(r1,r2,t)                f10(0x3e,r2,r1,1,2,0,1,0,0,t)
+#define MIXW_L(r1,r2,t)                f10(0x3e,r2,r1,1,0,0,0,0,0,t)
+#define MIXW_R(r1,r2,t)                f10(0x3e,r2,r1,1,2,0,0,0,0,t)
+#define MOVB_(c,r1,r2,i)       f17(0x32,r2,r1,c,i,0)
+#define MOVB(r1,r2,i)          MOVB_(SED_C_NEVER,r1,r2,i)
+#define MOVB_EQ(r1,r2,i)       MOVB_(SED_C_EQ,r1,r2,i)
+#define MOVB_LT(r1,r2,i)       MOVB_(SED_C_LT,r1,r2,i)
+#define MOVB_OD(r1,r2,i)       MOVB_(SED_C_OD,r1,r2,i)
+#define MOVB_TR(r1,r2,i)       MOVB_(SED_C_TR,r1,r2,i)
+#define MOVB_NE(r1,r2,i)       MOVB_(SED_C_NE,r1,r2,i)
+#define MOVB_GE(r1,r2,i)       MOVB_(SED_C_GE,r1,r2,i)
+#define MOVB_EV(r1,r2,i)       MOVB_(SED_C_EV,r1,r2,i)
+#define MOVIB_(c,r,i,t)                f17x(0x33,r,i,c,t,0)
+#define MOVIB(i,r,t)           MOVIB_(SED_C_NEVER,i,r,t)
+#define MOVIB_EQ(i,r,t)                MOVIB_(SED_C_EQ,i,r,t)
+#define MOVIB_LT(i,r,t)                MOVIB_(SED_C_LT,i,r,t)
+#define MOVIB_OD(i,r,t)                MOVIB_(SED_C_OD,i,r,t)
+#define MOVIB_TR(i,r,t)                MOVIB_(SED_C_TR,i,r,t)
+#define MOVIB_NE(i,r,t)                MOVIB_(SED_C_NE,i,r,t)
+#define MOVIB_GE(i,r,t)                MOVIB_(SED_C_GE,i,r,t)
+#define MOVIB_EV(i,r,t)                MOVIB_(SED_C_EV,i,r,t)
+#define MTCTL(r,t)             f31(0x0,t,r,0,0xc2,0)
+#define MTSAR(r)               MTCTL(r,_CR11_REGNO)
+#define MTSARCM(r)             f31(0x0,0xb,r,0,0xc6,0)
+#define MTSM(r)                        f33(0x0,0,r,0,0xc3,0)
+#define MTSP(r,s)              f29(0x0,0,r,s,0xc1,0)
+#define OR_(c,r1,r2,t)         f8(0x2,r2,r1,c,0,1,0,1,0,t)
+#define OR(r1,r2,t)            OR_(LOG_CC_NV,r1,r2,t)
+#define NOP()                  OR(_R0_REGNO,_R0_REGNO,_R0_REGNO)
+#define COPY(r,t)              OR(r,0,t)
+#define PDC(x,s,b)             f24(0x1,b,x,s,0x4e,0,0)
+#define PDTLB(x,s,b)           f24(0x1,b,x,s,0x48,0,0)
+#define PDTLB_L(x,s,b)         f24(0x1,b,x,s,0x58,0,0)
+#define PDTLBE(x,s,b)          f24(0x1,b,x,s,0x49,0,0)
+#define PERMH(c,r,t)           f10(0x3e,r,r,0,(c)&3,0,((c)>>2)&3,(((c)>>2)&6)|(((c)>>6)&3),0,t)
+#define PITBL(x,s,b)           f26(0x1,b,x,s,0x08,0,0)
+#define PITBL_L(x,s,b)         f26(0x1,b,x,s,0x18,0,0)
+#define PITBLE(x,s,b)          f26(0x1,b,x,s,0x09,0,0)
+#define POPBTS(i)              f23(0x3a,0,0,2,0,i,1,0,1)
+#define PROBE_R(s,b,r,t)       f24(0x1,b,r,s,0x46,0,t)
+#define PROBE_W(s,b,r,t)       f24(0x1,b,r,s,0x47,0,t)
+#define PROBEI_R(s,b,i,t)      f24(0x1,b,i,s,0x46,0,t)
+#define PROBEI_W(s,b,i,t)      f24(0x1,b,i,s,0x47,0,t)
+#define PUSHBTS(r)             f23(0x3a,0,r,2,0,0,0,0,1)
+#define PUSHNOM()              f23(0x3a,0,0,2,0,0,0,0,1)
+#define RFI()                  f33(0x0,0,0,0,0x60,0)
+#define RFI_R()                        f33(0x0,0,0,0,0x65,0)
+#define RSM(i,t)               f33(0x0,((i)&0x3e0)>>5,(i)&0x1f,0,0x73,t)
+#define SHLADD_(e,cf,r1,sa,r2,t) f8(0x2,r2,r1,cf,e,1,0,sa,0,t)
+#define SHLADD(r1,sa,r2,t)     SHLADD_(1,ADD_CF_NV,r1,sa,r2,t)
+#define SHLADD_L(r1,sa,r2,t)   SHLADD_(2,ADD_CF_NV,r1,sa,r2,t)
+#define SHLADD_TSV(r1,sa,r2,t) SHLADD_(3,ADD_CF_NV,r1,sa,r2,t)
+#define SHRPD(r1,r2,t)         f11(0x34,r2,r1,SED_C_NEVER,0,0,1,0,t)
+#define SHRPDI(r1,r2,sa,t)     f14(0x34,r2,r1,SED_C_NEVER,0,(63-(sa))>>5,1,(63-(sa))&0x1f,t)
+#define SHRPW(r1,r2,t)         f11(0x34,r2,r1,SED_C_NEVER,0,0,0,0,t)
+#define SHRPWI(r1,r2,sa,t)     f14(0x34,r2,r1,SED_C_NEVER,0,1,0,31-(sa),t)
+#define SPOP0(sf,so)           f34(0x4,(so)>>5,0,sf,0,(so)&0x1f)
+#define SPOP1(sf,so,t)         f35(0x4,so,1,sf,0,t)
+#define SPOP2(sf,so,r)         f36(0x4,r,(so)>>5,2,sf,0,(so)&0x1f)
+#define SPOP3(sf,so,r1,r2)     f37(0x4,r2,r1,(so)>>5,3,sf,0,(so)&0x1f)
+#define SSM(i,t)               f33(0x00,(i)>>5,(i)&0x1f,0,0x6b,t)
+#define STBL(r,i,b)            f1(0x18,b,r,i)
+#define STBI(r,i,b)            f6(0x3,b,r,0,0,1,LD_CC_H_NONE,0x8,0,i)
+#define STDL(r,i,b)            f3(0x1c,b,r,i,0)
+#define STDI(r,i,b)            f6(0x3,b,r,0,0,1,LD_CC_H_NONE,0xc,0,i)
+#define STDA(r,i,b)            f6(0x3,b,r,0,0,1,LD_CC_H_NONE,0xf,0,i)
+#define STHL(r,i,b)            f1(0x19,b,r,i)
+#define STHI(r,i,b)            f6(0x3,b,r,0,0,1,LD_CC_H_NONE,0x9,0,i)
+#define STWL(r,i,b)            f1(0x1a,b,r,i)
+#define STWL_MA(r,i,b)         f1(0x1b,b,r,i)  /* pre-dec or post-inc */
+#define STWL_MB(r,i,b)         f2(0x1f,b,r,i,2)/* post-dec or pre-inc */
+#define STWI(r,i,b)            f6(0x3,b,r,0,0,1,LD_CC_H_NONE,0xa,0,i)
+#define STWA(r,i,b)            f6(0x3,b,r,0,0,1,LD_CC_H_NONE,0xe,0,i)
+#define SUB_(e1,e2,cf,r1,r2,t) f8(0x2,r2,r1,cf,e1,0,e2,0,0,t)
+#define SUB(r1,r2,t)           SUB_(1,0,CS_CC_NV,r1,r2,t)
+#define SUB_B(r1,r2,t)         SUB_(1,1,CS_CC_NV,r1,r2,t)
+#define SUB_SV(r1,r2,t)                SUB_(1,0,CS_CC_SV,r1,r2,t)
+#define SUB_NSV(r1,r2,t)       SUB_(1,0,CS_CC_NSV,r1,r2,t)
+/* actually, rsbi */
+#define SUBI_(e1,cf,i,r,t)     f9(0x25,r,t,cf,e1,i)
+#define SUBI(i,r,t)            SUBI_(0,ADD_CF_NV,i,r,t)
+#define SYNC()                 f33(0x0,0,0,0,0x20,0)
+#define SYNCDMA()              f33(0x0,0,1<<4,0,0x20,0)
+#define UADDCM(r1,r2,t)                f8(0x2,r2,r1,ADD_CF_NV,2,0,1,2,0,t)
+#define UXOR(r1,r2,t)          f8(0x2,r2,r1,LOG_CC_NV,0,1,1,2,0,t)
+#define XOR(r1,r2,t)           f8(0x2,r2,r1,LOG_CC_NV,0,1,0,2,0,t)
+#  define nop(c)                       _nop(_jit,c)
+static void _nop(jit_state_t*,jit_int32_t);
+#define movr(r0,r1)            _movr(_jit,r0,r1)
+static void _movr(jit_state_t*,jit_int32_t,jit_int32_t);
+#define movi(r0,i0)            _movi(_jit,r0,i0)
+static void _movi(jit_state_t*,jit_int32_t,jit_word_t);
+#define movi_p(r0,i0)          _movi_p(_jit,r0,i0)
+static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t);
+#define comr(r0,r1)            UADDCM(_R0_REGNO,r1,r0)
+#define negr(r0,r1)            SUB(_R0_REGNO,r1,r0)
+#define extr_c(r0,r1)          EXTRWR(r1,31,8,r0)
+#define extr_uc(r0,r1)         EXTRWR_U(r1,31,8,r0)
+#define extr_s(r0,r1)          EXTRWR(r1,31,16,r0)
+#define extr_us(r0,r1)         EXTRWR_U(r1,31,16,r0)
+#if __BYTE_ORDER == __BIG_ENDIAN
+#  define htonr_us(r0,r1)      extr_us(r0,r1)
+#  define htonr_ui(r0,r1)      movr(r0,r1)
+#else
+#  error need htonr implementation
+#endif
+#define addr(r0,r1,r2)         ADD(r1,r2,r0)
+#define addi(r0,r1,i0)         _addi(_jit,r0,r1,i0)
+static void _addi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define addcr(r0,r1,r2)                addr(r0,r1,r2)
+#define addci(r0,r1,i0)                _addci(_jit,r0,r1,i0)
+static void _addci(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define addxr(r0,r1,r2)                ADD_C(r1,r2,r0)
+#define addxi(r0,r1,i0)                _addxi(_jit,r0,r1,i0)
+static void _addxi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define subr(r0,r1,r2)         SUB(r1,r2,r0)
+#define subi(r0,r1,i0)         _subi(_jit,r0,r1,i0)
+static void _subi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define subcr(r0,r1,r2)                subr(r0,r1,r2)
+#define subci(r0,r1,i0)                _subci(_jit,r0,r1,i0)
+static void _subci(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define subxr(r0,r1,r2)                SUB_B(r1,r2,r0)
+#define subxi(r0,r1,i0)                _subxi(_jit,r0,r1,i0)
+static void _subxi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define rsbi(r0, r1, i0)       _rsbi(_jit, r0, r1, i0)
+static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define mulr(r0,r1,r2)         _mulr(_jit,r0,r1,r2)
+static void _mulr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define muli(r0,r1,i0)         _muli(_jit,r0,r1,i0)
+static void _muli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+static long long __llmul(int, int);
+#define qmulr(r0,r1,r2,r3)     _qmulr(_jit,r0,r1,r2,r3)
+static void _qmulr(jit_state_t*,
+                  jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#define qmuli(r0,r1,r2,i0)     _qmuli(_jit,r0,r1,r2,i0)
+static void _qmuli(jit_state_t*,
+                  jit_int32_t,jit_int32_t,jit_int32_t,jit_word_t);
+#define qmulr_u(r0,r1,r2,r3)   _qmulr_u(_jit,r0,r1,r2,r3)
+static void _qmulr_u(jit_state_t*,
+                    jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#define qmuli_u(r0,r1,r2,i0)   _qmuli_u(_jit,r0,r1,r2,i0)
+static void _qmuli_u(jit_state_t*,
+                    jit_int32_t,jit_int32_t,jit_int32_t,jit_word_t);
+static int __idiv(int, int);
+#define divr(r0,r1,r2)         _divr(_jit,r0,r1,r2)
+static void _divr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define divi(r0,r1,i0)         _divi(_jit,r0,r1,i0)
+static void _divi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+static unsigned int __udiv(unsigned int, unsigned int);
+#define divr_u(r0,r1,r2)       _divr_u(_jit,r0,r1,r2)
+static void _divr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define divi_u(r0,r1,i0)       _divi_u(_jit,r0,r1,i0)
+static void _divi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+static int __irem(int, int);
+#define remr(r0,r1,r2)         _remr(_jit,r0,r1,r2)
+static void _remr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define remi(r0,r1,i0)         _remi(_jit,r0,r1,i0)
+static void _remi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+static unsigned int __urem(unsigned int, unsigned int);
+#define remr_u(r0,r1,r2)       _remr_u(_jit,r0,r1,r2)
+static void _remr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define remi_u(r0,r1,i0)       _remi_u(_jit,r0,r1,i0)
+static void _remi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+static idiv_t __idivrem(int, int);
+#define qdivr(r0,r1,r2,r3)     _qdivr(_jit,r0,r1,r2,r3)
+static void _qdivr(jit_state_t*,
+                  jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#define qdivi(r0,r1,r2,i0)     _qdivi(_jit,r0,r1,r2,i0)
+static void _qdivi(jit_state_t*,
+                  jit_int32_t,jit_int32_t,jit_int32_t,jit_word_t);
+static udiv_t __udivrem(unsigned int, unsigned int);
+#define qdivr_u(r0,r1,r2,r3)   _qdivr_u(_jit,r0,r1,r2,r3)
+static void _qdivr_u(jit_state_t*,
+                    jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#define qdivi_u(r0,r1,r2,i0)   _qdivi_u(_jit,r0,r1,r2,i0)
+static void _qdivi_u(jit_state_t*,
+                    jit_int32_t,jit_int32_t,jit_int32_t,jit_word_t);
+#define andr(r0,r1,r2)         AND(r1,r2,r0)
+#define andi(r0,r1,i0)         _andi(_jit,r0,r1,i0)
+static void _andi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define orr(r0,r1,r2)          OR(r1,r2,r0)
+#define ori(r0,r1,i0)          _ori(_jit,r0,r1,i0)
+static void _ori(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define xorr(r0,r1,r2)         XOR(r1,r2,r0)
+#define xori(r0,r1,i0)         _xori(_jit,r0,r1,i0)
+static void _xori(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define lshr(r0,r1,r2)         _lshr(_jit,r0,r1,r2)
+static void _lshr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define lshi(r0,r1,i0)         SHLWI(r1,i0,r0)
+#define rshr(r0,r1,r2)         _rshr(_jit,r0,r1,r2)
+static void _rshr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define rshi(r0,r1,i0)         SHRWI(r1,i0,r0)
+#define rshr_u(r0,r1,r2)       _rshr_u(_jit,r0,r1,r2)
+static void _rshr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define rshi_u(r0,r1,i0)       SHRWI_U(r1,i0,r0)
+#define cmpr(c,r0,r1,r2)       _cmpr(_jit,c,r0,r1,r2)
+static void _cmpr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#define cmpi(c,ci,r0,r1,i0)    _cmpi(_jit,c,ci,r0,r1,i0)
+static void _cmpi(jit_state_t*,jit_word_t,jit_word_t,
+                 jit_int32_t,jit_int32_t,jit_word_t);
+#define ltr(r0,r1,r2)          cmpr(CS_CC_GE,r0,r1,r2)
+#define lti(r0,r1,i0)          cmpi(CS_CC_GE,CS_CC_LE,r0,r1,i0)
+#define ltr_u(r0,r1,r2)                cmpr(CS_CC_UGE,r0,r1,r2)
+#define lti_u(r0,r1,i0)                cmpi(CS_CC_UGE,CS_CC_ULE,r0,r1,i0)
+#define ler(r0,r1,r2)          cmpr(CS_CC_GT,r0,r1,r2)
+#define lei(r0,r1,i0)          cmpi(CS_CC_GT,CS_CC_LT,r0,r1,i0)
+#define ler_u(r0,r1,r2)                cmpr(CS_CC_UGT,r0,r1,r2)
+#define lei_u(r0,r1,i0)                cmpi(CS_CC_UGT,CS_CC_ULT,r0,r1,i0)
+#define eqr(r0,r1,r2)          cmpr(CS_CC_NE,r0,r1,r2)
+#define eqi(r0,r1,i0)          cmpi(CS_CC_NE,CS_CC_NE,r0,r1,i0)
+#define ger(r0,r1,r2)          cmpr(CS_CC_LT,r0,r1,r2)
+#define gei(r0,r1,i0)          cmpi(CS_CC_LT,CS_CC_GT,r0,r1,i0)
+#define ger_u(r0,r1,r2)                cmpr(CS_CC_ULT,r0,r1,r2)
+#define gei_u(r0,r1,i0)                cmpi(CS_CC_ULT,CS_CC_UGT,r0,r1,i0)
+#define gtr(r0,r1,r2)          cmpr(CS_CC_LE,r0,r1,r2)
+#define gti(r0,r1,i0)          cmpi(CS_CC_LE,CS_CC_GE,r0,r1,i0)
+#define gtr_u(r0,r1,r2)                cmpr(CS_CC_ULE,r0,r1,r2)
+#define gti_u(r0,r1,i0)                cmpi(CS_CC_ULE,CS_CC_UGE,r0,r1,i0)
+#define ner(r0,r1,r2)          cmpr(CS_CC_EQ,r0,r1,r2)
+#define nei(r0,r1,i0)          cmpi(CS_CC_EQ,CS_CC_EQ,r0,r1,i0)
+#define ldr_c(r0,r1)           _ldr_c(_jit,r0,r1)
+static void _ldr_c(jit_state_t*,jit_int32_t,jit_int32_t);
+#define ldi_c(r0,i0)           _ldi_c(_jit,r0,i0)
+static void _ldi_c(jit_state_t*,jit_int32_t,jit_word_t);
+#define ldxr_c(r0,r1,r2)       _ldxr_c(_jit,r0,r1,r2)
+static void _ldxr_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define ldxi_c(r0,r1,i0)       _ldxi_c(_jit,r0,r1,i0)
+static void _ldxi_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define ldr_uc(r0,r1)          LDBI(_R0_REGNO,r1,r0)
+#define ldi_uc(r0,i0)          _ldi_uc(_jit,r0,i0)
+static void _ldi_uc(jit_state_t*,jit_int32_t,jit_word_t);
+#define ldxr_uc(r0,r1,r2)      LDB(r2,r1,r0)
+#define ldxi_uc(r0,r1,i0)      _ldxi_uc(_jit,r0,r1,i0)
+static void _ldxi_uc(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define ldr_s(r0,r1)           _ldr_s(_jit,r0,r1)
+static void _ldr_s(jit_state_t*,jit_int32_t,jit_int32_t);
+#define ldi_s(r0,i0)           _ldi_s(_jit,r0,i0)
+static void _ldi_s(jit_state_t*,jit_int32_t,jit_word_t);
+#define ldxr_s(r0,r1,r2)       _ldxr_s(_jit,r0,r1,r2)
+static void _ldxr_s(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define ldxi_s(r0,r1,i0)       _ldxi_s(_jit,r0,r1,i0)
+static void _ldxi_s(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define ldr_us(r0,r1)          LDHI(_R0_REGNO,r1,r0)
+#define ldi_us(r0,i0)          _ldi_us(_jit,r0,i0)
+static void _ldi_us(jit_state_t*,jit_int32_t,jit_word_t);
+#define ldxr_us(r0,r1,r2)      LDH(r2,r1,r0)
+#define ldxi_us(r0,r1,i0)      _ldxi_us(_jit,r0,r1,i0)
+static void _ldxi_us(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define ldr(r0,r1)             ldr_ui(r0,r1)
+#define ldr_i(r0,r1)           ldr_ui(r0,r1)
+#define ldr_ui(r0,r1)          LDWI(_R0_REGNO,r1,r0)
+#define ldi_i(r0,i0)           ldi_ui(r0,i0)
+#define ldi_ui(r0,i0)          _ldi_ui(_jit,r0,i0)
+static void _ldi_ui(jit_state_t*,jit_int32_t,jit_word_t);
+#define ldxr_i(r0,r1,r2)       ldxr_ui(r0,r1,r2)
+#define ldxr_ui(r0,r1,r2)      LDW(r2,r1,r0)
+#define ldxi(r0,r1,i0)         ldxi_ui(r0,r1,i0)
+#define ldxi_i(r0,r1,i0)       ldxi_ui(r0,r1,i0)
+#define ldxi_ui(r0,r1,i0)      _ldxi_ui(_jit,r0,r1,i0)
+static void _ldxi_ui(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define str_c(r0,r1)           STBI(r1,_R0_REGNO,r0)
+#define sti_c(i0,r0)           _sti_c(_jit,i0,r0)
+static void _sti_c(jit_state_t*,jit_word_t,jit_int32_t);
+#define stxr_c(r0,r1,r2)       _stxr_c(_jit,r0,r1,r2)
+static void _stxr_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define stxi_c(i0,r0,r1)       _stxi_c(_jit,i0,r0,r1)
+static void _stxi_c(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define str_s(r0,r1)           STHI(r1,_R0_REGNO,r0)
+#define sti_s(i0,r0)           _sti_s(_jit,i0,r0)
+static void _sti_s(jit_state_t*,jit_word_t,jit_int32_t);
+#define stxr_s(r0,r1,r2)       _stxr_s(_jit,r0,r1,r2)
+static void _stxr_s(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define stxi_s(i0,r0,r1)       _stxi_s(_jit,i0,r0,r1)
+static void _stxi_s(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define str_i(r0,r1)           STWI(r1,_R0_REGNO,r0)
+#define sti_i(i0,r0)           _sti_i(_jit,i0,r0)
+static void _sti_i(jit_state_t*,jit_word_t,jit_int32_t);
+#define stxr_i(r0,r1,r2)       _stxr_i(_jit,r0,r1,r2)
+static void _stxr_i(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define stxi(i0,r0,r1)         stxi_i(i0,r0,r1)
+#define stxi_i(i0,r0,r1)       _stxi_i(_jit,i0,r0,r1)
+static void _stxi_i(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bcmpr(c,i0,r0,r1)      _bcmpr(_jit,c,i0,r0,r1)
+static jit_word_t _bcmpr(jit_state_t*,jit_word_t,
+                        jit_word_t,jit_int32_t,jit_int32_t);
+#define bcmpi(c,ci,i0,r0,i1)   _bcmpi(_jit,c,ci,i0,r0,i1)
+static jit_word_t _bcmpi(jit_state_t*,jit_word_t,jit_word_t,
+                        jit_word_t,jit_int32_t,jit_word_t);
+#define bltr(i0,r0,r1)         bcmpr(CS_CC_LT,i0,r0,r1)
+#define blti(i0,r0,r1)         bcmpi(CS_CC_LT,CS_CC_GT,i0,r0,r1)
+#define bltr_u(i0,r0,r1)       bcmpr(CS_CC_ULT,i0,r0,r1)
+#define blti_u(i0,r0,r1)       bcmpi(CS_CC_ULT,CS_CC_UGT,i0,r0,r1)
+#define bler(i0,r0,r1)         bcmpr(CS_CC_LE,i0,r0,r1)
+#define blei(i0,r0,r1)         bcmpi(CS_CC_LE,CS_CC_GE,i0,r0,r1)
+#define bler_u(i0,r0,r1)       bcmpr(CS_CC_ULE,i0,r0,r1)
+#define blei_u(i0,r0,r1)       bcmpi(CS_CC_ULE,CS_CC_UGE,i0,r0,r1)
+#define beqr(i0,r0,r1)         bcmpr(CS_CC_EQ,i0,r0,r1)
+#define beqi(i0,r0,r1)         bcmpi(CS_CC_EQ,CS_CC_EQ,i0,r0,r1)
+#define bger(i0,r0,r1)         bcmpr(CS_CC_GE,i0,r0,r1)
+#define bgei(i0,r0,r1)         bcmpi(CS_CC_GE,CS_CC_LE,i0,r0,r1)
+#define bger_u(i0,r0,r1)       bcmpr(CS_CC_UGE,i0,r0,r1)
+#define bgei_u(i0,r0,r1)       bcmpi(CS_CC_UGE,CS_CC_ULE,i0,r0,r1)
+#define bgtr(i0,r0,r1)         bcmpr(CS_CC_GT,i0,r0,r1)
+#define bgti(i0,r0,r1)         bcmpi(CS_CC_GT,CS_CC_LT,i0,r0,r1)
+#define bgtr_u(i0,r0,r1)       bcmpr(CS_CC_UGT,i0,r0,r1)
+#define bgti_u(i0,r0,r1)       bcmpi(CS_CC_UGT,CS_CC_ULT,i0,r0,r1)
+#define bner(i0,r0,r1)         bcmpr(CS_CC_NE,i0,r0,r1)
+#define bnei(i0,r0,r1)         bcmpi(CS_CC_NE,CS_CC_NE,i0,r0,r1)
+#define bmxr(c,i0,r0,r1)       _bmxr(_jit,c,i0,r0,r1)
+static jit_word_t _bmxr(jit_state_t*,jit_bool_t,
+                       jit_word_t,jit_int32_t,jit_int32_t);
+#define bmxi(c,i0,r0,i1)       _bmxi(_jit,c,i0,r0,i1)
+static jit_word_t _bmxi(jit_state_t*,jit_bool_t,
+                       jit_word_t,jit_int32_t,jit_word_t);
+#define bmcr(r0,r1,r2)         bmxr(0,r0,r1,r2)
+#define bmci(r0,r1,r2)         bmxi(0,r0,r1,r2)
+#define bmsr(r0,r1,r2)         bmxr(1,r0,r1,r2)
+#define bmsi(r0,r1,r2)         bmxi(1,r0,r1,r2)
+#define boaddr(i0,r0,r1)       _boaddr(_jit,i0,r0,r1)
+static jit_word_t _boaddr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define boaddi(i0,r0,i1)       _boaddi(_jit,i0,r0,i1)
+static jit_word_t _boaddi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define boaddr_u(i0,r0,r1)     _boaddr_u(_jit,i0,r0,r1)
+static jit_word_t _boaddr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define boaddi_u(i0,r0,i1)     _boaddi_u(_jit,i0,r0,i1)
+static jit_word_t _boaddi_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bxaddr(i0,r0,r1)       _bxaddr(_jit,i0,r0,r1)
+static jit_word_t _bxaddr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bxaddi(i0,r0,i1)       _bxaddi(_jit,i0,r0,i1)
+static jit_word_t _bxaddi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bxaddr_u(i0,r0,r1)     _bxaddr_u(_jit,i0,r0,r1)
+static jit_word_t _bxaddr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bxaddi_u(i0,r0,i1)     _bxaddi_u(_jit,i0,r0,i1)
+static jit_word_t _bxaddi_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bosubr(i0,r0,r1)       _bosubr(_jit,i0,r0,r1)
+static jit_word_t _bosubr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bosubi(i0,r0,i1)       _bosubi(_jit,i0,r0,i1)
+static jit_word_t _bosubi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bosubr_u(i0,r0,r1)     _bosubr_u(_jit,i0,r0,r1)
+static jit_word_t _bosubr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bosubi_u(i0,r0,i1)     _bosubi_u(_jit,i0,r0,i1)
+static jit_word_t _bosubi_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bxsubr(i0,r0,r1)       _bxsubr(_jit,i0,r0,r1)
+static jit_word_t _bxsubr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bxsubi(i0,r0,i1)       _bxsubi(_jit,i0,r0,i1)
+static jit_word_t _bxsubi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bxsubr_u(i0,r0,r1)     _bxsubr_u(_jit,i0,r0,r1)
+static jit_word_t _bxsubr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bxsubi_u(i0,r0,i1)     _bxsubi_u(_jit,i0,r0,i1)
+static jit_word_t _bxsubi_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define jmpr(r0)               _jmpr(_jit,r0)
+static void _jmpr(jit_state_t*,jit_int32_t);
+#define jmpi(i0)               _jmpi(_jit,i0)
+static void _jmpi(jit_state_t*,jit_word_t);
+#define jmpi_p(i0)             _jmpi_p(_jit,i0)
+static jit_word_t _jmpi_p(jit_state_t*,jit_word_t);
+#define callr(r0)              _callr(_jit,r0)
+static void _callr(jit_state_t*,jit_int32_t);
+#define calli(i0)              _calli(_jit,i0)
+static void _calli(jit_state_t*,jit_word_t);
+#define calli_p(i0)            _calli_p(_jit,i0)
+static jit_word_t _calli_p(jit_state_t*,jit_word_t);
+#define prolog(node)           _prolog(_jit, node)
+static void _prolog(jit_state_t*, jit_node_t*);
+#define epilog(node)           _epilog(_jit, node)
+static void _epilog(jit_state_t*, jit_node_t*);
+#define vastart(r0)            _vastart(_jit, r0)
+static void _vastart(jit_state_t*, jit_int32_t);
+#define vaarg(r0, r1)          _vaarg(_jit, r0, r1)
+static void _vaarg(jit_state_t*, jit_int32_t, jit_int32_t);
+#define patch_at(i,l)          _patch_at(_jit,i,l)
+static void _patch_at(jit_state_t*,jit_word_t,jit_word_t);
+#endif
+
+#if CODE
+/* from binutils bfd/libhppa.h */
+static inline int
+sign_extend (int x, int len)
+{
+  int signbit = (1 << (len - 1));
+  int mask = (signbit << 1) - 1;
+  return ((x & mask) ^ signbit) - signbit;
+}
+
+static inline int
+sign_unext (int x, int len)
+{
+  int len_ones;
+
+  len_ones = (1 << len) - 1;
+
+  return x & len_ones;
+}
+
+static inline int
+low_sign_unext (int x, int len)
+{
+  int temp;
+  int sign;
+
+  sign = (x >> (len-1)) & 1;
+
+  temp = sign_unext (x, len-1);
+
+  return (temp << 1) | sign;
+}
+
+static inline int
+re_assemble_3 (int as3)
+{
+  return ((  (as3 & 4) << (13-2))
+         | ((as3 & 3) << (13+1)));
+}
+
+static inline int
+re_assemble_12 (int as12)
+{
+  return ((  (as12 & 0x800) >> 11)
+         | ((as12 & 0x400) >> (10 - 2))
+         | ((as12 & 0x3ff) << (1 + 2)));
+}
+
+static inline int
+re_assemble_16 (int as16)
+{
+  int s, t;
+
+  /* Unusual 16-bit encoding, for wide mode only.  */
+  t = (as16 << 1) & 0xffff;
+  s = (as16 & 0x8000);
+  return (t ^ s ^ (s >> 1)) | (s >> 15);
+}
+
+static inline int
+re_assemble_17 (int as17)
+{
+  return ((  (as17 & 0x10000) >> 16)
+         | ((as17 & 0x0f800) << (16 - 11))
+         | ((as17 & 0x00400) >> (10 - 2))
+         | ((as17 & 0x003ff) << (1 + 2)));
+}
+
+static inline int
+re_assemble_21 (int as21)
+{
+  return ((  (as21 & 0x100000) >> 20)
+         | ((as21 & 0x0ffe00) >> 8)
+         | ((as21 & 0x000180) << 7)
+         | ((as21 & 0x00007c) << 14)
+         | ((as21 & 0x000003) << 12));
+}
+
+static inline int
+re_assemble_22 (int as22)
+{
+  return ((  (as22 & 0x200000) >> 21)
+         | ((as22 & 0x1f0000) << (21 - 16))
+         | ((as22 & 0x00f800) << (16 - 11))
+         | ((as22 & 0x000400) >> (10 - 2))
+         | ((as22 & 0x0003ff) << (1 + 2)));
+}
+
+static void
+_f1(jit_state_t *_jit, jit_int32_t o,
+    jit_int32_t b, jit_int32_t t, jit_int32_t i)
+{
+    assert(!(o  & ~0x3f));
+    assert(!(b  & ~0x1f));
+    assert(!(t  & ~0x1f));
+    assert(i >= -8192 && i <= 8191);
+    ii((o<<26)|(b<<21)|(t<<16)|(re_assemble_16(i)));
+}
+
+static void
+_f2(jit_state_t *_jit, jit_int32_t o,
+    jit_int32_t b, jit_int32_t t, jit_int32_t i, jit_int32_t j)
+{
+    assert(!(o  & ~0x3f));
+    assert(!(b  & ~0x1f));
+    assert(!(t  & ~0x1f));
+    assert(i >= -32768 && i <= 32767);
+    assert(!(j  &  ~0x3));
+    ii((o<<26)|(b<<21)|(t<<16)|(j<<1)|(re_assemble_16(i)));
+}
+
+static void
+_f3(jit_state_t *_jit, jit_int32_t o,
+    jit_int32_t b, jit_int32_t t, jit_int32_t i, jit_int32_t j)
+{
+    assert(!(o  & ~0x3f));
+    assert(!(b  & ~0x1f));
+    assert(!(t  & ~0x1f));
+    assert(i >= -32768 && i <= 32767);
+    assert(!(j  &  ~0x1));
+    ii((o<<26)|(b<<21)|(t<<16)|(j<<1)|(re_assemble_16(i)));
+}
+
+static void
+_f4(jit_state_t *_jit, jit_int32_t o, jit_int32_t b,
+    jit_int32_t x, jit_int32_t s, jit_int32_t u, jit_int32_t y,
+    jit_int32_t c, jit_int32_t z, jit_int32_t m, jit_int32_t t)
+{
+    assert(!(o  & ~0x3f));
+    assert(!(b  & ~0x1f));
+    assert(!(x  & ~0x1f));
+    assert(!(s  &  ~0x3));
+    assert(!(u  &  ~0x1));
+    assert(!(y  &  ~0x1));
+    assert(!(c  &  ~0x3));
+    assert(!(z  &  ~0xf));
+    assert(!(m  &  ~0x1));
+    assert(!(t  & ~0x1f));
+    ii((o<<26)|(b<<21)|(x<<16)|(s<<14)|(u<<13)|(y<<12)|(c<<10)|(z<<6)|(m<<5)|t);
+}
+
+static void
+_f5(jit_state_t *_jit, jit_int32_t o, jit_int32_t b,
+    jit_int32_t i, jit_int32_t s, jit_int32_t a, jit_int32_t y,
+    jit_int32_t c, jit_int32_t z, jit_int32_t m, jit_int32_t t)
+{
+    assert(!(o  & ~0x3f));
+    assert(!(b  & ~0x1f));
+    assert(!(s  &  ~0x3));
+    assert(!(a  &  ~0x1));
+    assert(!(y  &  ~0x1));
+    assert(!(c  &  ~0x3));
+    assert(!(z  &  ~0xf));
+    assert(!(m  &  ~0x1));
+    assert(i >= -16 && i <= 15);
+    ii((o<<26)|(b<<21)|(low_sign_unext(i,5)<<16)|
+       (s<<14)|(a<<13)|(y<<12)|(c<<10)|(z<<6)|(m<<5)|t);
+}
+
+static void
+_f6(jit_state_t *_jit, jit_int32_t o, jit_int32_t b,
+    jit_int32_t r, jit_int32_t s, jit_int32_t a, jit_int32_t x,
+    jit_int32_t c, jit_int32_t y, jit_int32_t m, jit_int32_t i)
+{
+    assert(!(o  & ~0x3f));
+    assert(!(b  & ~0x1f));
+    assert(!(s  &  ~0x3));
+    assert(!(a  &  ~0x1));
+    assert(!(x  &  ~0x1));
+    assert(!(c  &  ~0x3));
+    assert(!(y  &  ~0xf));
+    assert(!(m  &  ~0x1));
+    assert(i >= -16 && i <= 15);
+    ii((o<<26)|(b<<21)|(r<<16)|(s<<14)|(a<<13)|
+       (x<<12)|(c<<10)|(y<<6)|(m<<5)|low_sign_unext(i,5));
+}
+
+static void
+_f7(jit_state_t *_jit, jit_int32_t o, jit_int32_t r, jit_int32_t i)
+{
+    assert(!(o  & ~0x3f));
+    assert(!(r  & ~0x1f));
+    assert(!(i  &  0x7ff));
+    ii((o<<26)|(r<<21)|re_assemble_21(i>>11));
+}
+
+static void
+_f8(jit_state_t *_jit, jit_int32_t o,
+    jit_int32_t r2, jit_int32_t r1, jit_int32_t cf,
+    jit_int32_t e1, jit_int32_t x, jit_int32_t e2,
+    jit_int32_t e3, jit_int32_t d, jit_int32_t t)
+{
+    assert(!(o  & ~0x3f));
+    assert(!(r2 & ~0x1f));
+    assert(!(r1 & ~0x1f));
+    assert(!(cf &  ~0xf));
+    assert(!(e1 &  ~0x3));
+    assert(!(x  &  ~0x1));
+    assert(!(e2 &  ~0x3));
+    assert(!(e3 &  ~0x3));
+    assert(!(d  &  ~0x1));
+    assert(!(t  & ~0x1f));
+    ii((o<<26)|(r2<<21)|(r1<<16)|(cf<<12)|
+       (e1<<10)|(x<<9)|(e2<<8)|(e3<<6)|(d<<5)|t);
+}
+
+static void
+_f9(jit_state_t *_jit,
+    jit_int32_t o, jit_int32_t r, jit_int32_t t,
+    jit_int32_t cf, jit_int32_t e1, jit_int32_t i)
+{
+    assert(!(o  & ~0x3f));
+    assert(!(r  & ~0x1f));
+    assert(!(t  & ~0x1f));
+    assert(!(cf &  ~0xf));
+    assert(!(e1 &  ~0x1));
+    assert(i >= -2048 && i <= 2047);
+    ii((o<<26)|(r<<21)|(t<<16)|(cf<<12)|(e1<<11)|low_sign_unext(i,11));
+}
+
+static void
+_f10(jit_state_t *_jit, jit_int32_t o, jit_int32_t r2,
+     jit_int32_t r1, jit_int32_t u, jit_int32_t v, jit_int32_t w,
+     jit_int32_t x, jit_int32_t sa, jit_int32_t y, jit_int32_t t)
+{
+    assert(!(o  & ~0x3f));
+    assert(!(r2 & ~0x1f));
+    assert(!(r1 & ~0x1f));
+    assert(!(u  &  ~0x1));
+    assert(!(v  &  ~0x3));
+    assert(!(w  &  ~0x1));
+    assert(!(x  &  ~0x3));
+    assert(!(sa &  ~0xf));
+    assert(!(y  &  ~0x1));
+    assert(!(t  & ~0x1f));
+    ii((o<<26)|(r2<<21)|(r1<<16)|(u<<15)|
+       (v<<13)|(w<<12)|(x<<10)|(sa<<6)|(y<<5)|t);
+}
+
+static void
+_f11(jit_state_t *_jit, jit_int32_t o,
+     jit_int32_t r2, jit_int32_t r1, jit_int32_t c, jit_int32_t x,
+     jit_int32_t y, jit_int32_t z, jit_int32_t u, jit_int32_t t)
+{
+    assert(!(o  & ~0x3f));
+    assert(!(r1 & ~0x1f));
+    assert(!(r2 & ~0x1f));
+    assert(!(c  &  ~0x7));
+    assert(!(x  &  ~0x3));
+    assert(!(y  &  ~0x1));
+    assert(!(z  &  ~0x1));
+    assert(!(u  &  ~0xf));
+    assert(!(t  & ~0x1f));
+    ii((o<<26)|(r2<<21)|(r1<<16)|(c<<13)|(x<<11)|(y<<10)|(z<<9)|(u<<5)|t);
+}
+
+static void
+_f12(jit_state_t *_jit, jit_int32_t o, jit_int32_t r,
+     jit_int32_t t, jit_int32_t c, jit_int32_t x, jit_int32_t se,
+     jit_int32_t y, jit_int32_t c1, jit_int32_t z, jit_int32_t clen)
+{
+    assert(!(o    & ~0x3f));
+    assert(!(r    & ~0x1f));
+    assert(!(t    & ~0x1f));
+    assert(!(c    &  ~0x7));
+    assert(!(x    &  ~0x3));
+    assert(!(se   &  ~0x1));
+    assert(!(y    &  ~0x1));
+    assert(!(c1   &  ~0x1));
+    assert(!(z    &  ~0x7));
+    assert(!((32-clen) & ~0x1f));
+    ii((o<<26)|(r<<21)|(t<<16)|(c<<13)|(x<<11)|
+       (se<<10)|(y<<9)|(c1<<8)|(z<<5)|(32-clen));
+}
+
+static void
+_f13(jit_state_t *_jit, jit_int32_t o, jit_int32_t t,
+     jit_int32_t r, jit_int32_t c, jit_int32_t x,
+     jit_int32_t nz, jit_int32_t c1, jit_int32_t clen)
+{
+    assert(!(o    & ~0x3f));
+    assert(!(t    & ~0x1f));
+    assert(!(r    & ~0x1f));
+    assert(!(c    &  ~0x7));
+    assert(!(x    &  ~0x3));
+    assert(!(nz   &  ~0x1));
+    assert(!(c1   &  ~0x1));
+    assert(!((32-clen) & ~0x1f));
+    ii((o<<26)|(t<<21)|(r<<16)|(c<<13)|
+       (x<<11)|(nz<<10)|(c1<<8)|(32-clen));
+}
+
+static void
+_f13x(jit_state_t *_jit, jit_int32_t o, jit_int32_t t,
+      jit_int32_t i, jit_int32_t c, jit_int32_t x,
+      jit_int32_t nz, jit_int32_t c1, jit_int32_t clen)
+{
+    assert(!(o    & ~0x3f));
+    assert(!(t    & ~0x1f));
+    assert(i >= -16 && i <= 15);
+    assert(!(c    &  ~0x7));
+    assert(!(x    &  ~0x3));
+    assert(!(nz   &  ~0x1));
+    assert(!((32-clen) & ~0x1f));
+    ii((o<<26)|(t<<21)|(low_sign_unext(i,5)<<16)|
+       (c<<13)|(x<<11)|(nz<<10)|(c1<<8)|(32-clen));
+}
+
+static void
+_f14(jit_state_t *_jit, jit_int32_t o,
+     jit_int32_t r2, jit_int32_t r1, jit_int32_t c, jit_int32_t x,
+     jit_int32_t cp, jit_int32_t y, jit_int32_t cpos, jit_int32_t t)
+{
+    assert(!(o    & ~0x3f));
+    assert(!(r1   & ~0x1f));
+    assert(!(r2   & ~0x1f));
+    assert(!(c    &  ~0x7));
+    assert(!(x    &  ~0x3));
+    assert(!(cp   &  ~0x1));
+    assert(!(y    &  ~0x1));
+    assert(!(cpos & ~0x1f));
+    assert(!(t    & ~0x1f));
+    ii((o<<26)|(r2<<21)|(r1<<16)|(c<<13)|(x<<12)|(cp<<11)|(y<<10)|(cpos<<5)|t);
+}
+
+static void
+_f15(jit_state_t *_jit, jit_int32_t o,
+     jit_int32_t r, jit_int32_t t, jit_int32_t c, jit_int32_t c1,
+     jit_int32_t p, jit_int32_t se, jit_int32_t pos, jit_int32_t clen)
+{
+    assert(!(o    & ~0x3f));
+    assert(!(r    & ~0x1f));
+    assert(!(t    & ~0x1f));
+    assert(!(c    &  ~0x7));
+    assert(!(c1   &  ~0x1));
+    assert(!(p    &  ~0x1));
+    assert(!(se   &  ~0x1));
+    assert(!(pos  & ~0x1f));
+    assert(!((32-clen) & ~0x1f));
+    ii((o<<26)|(r<<21)|(t<<16)|(c<<13)|(c1<<12)|
+       (p<<11)|(se<<10)|(pos<<5)|(32-clen));
+}
+
+static void
+_f16(jit_state_t *_jit, jit_int32_t o,
+     jit_int32_t t, jit_int32_t r, jit_int32_t c, jit_int32_t c1,
+     jit_int32_t cp, jit_int32_t nz, jit_int32_t cpos, jit_int32_t clen)
+{
+    assert(!(o    & ~0x3f));
+    assert(!(t    & ~0x1f));
+    assert(!(r    & ~0x1f));
+    assert(!(c    &  ~0x7));
+    assert(!(c1   &  ~0x1));
+    assert(!(cp   &  ~0x1));
+    assert(!(nz   &  ~0x1));
+    assert(!(cpos & ~0x1f));
+    assert(!((32-clen) & ~0x3f));
+    ii((o<<26)|(t<<21)|(r<<16)|(c<<13)|
+       (c1<<12)|(cp<<11)|(nz<<10)|(cpos<<5)|(32-clen));
+}
+
+static void
+_f16x(jit_state_t *_jit, jit_int32_t o,
+      jit_int32_t t, jit_int32_t i, jit_int32_t c, jit_int32_t c1,
+      jit_int32_t cp, jit_int32_t nz, jit_int32_t cpos, jit_int32_t clen)
+{
+    assert(!(o    & ~0x3f));
+    assert(!(t    & ~0x1f));
+    assert(i >= -16 && i <= 15);
+    assert(!(c    &  ~0x7));
+    assert(!(c1   &  ~0x1));
+    assert(!(cp   &  ~0x1));
+    assert(!(nz   &  ~0x1));
+    assert(!(cpos & ~0x1f));
+    assert(!((32-clen) & ~0x3f));
+    ii((o<<26)|(t<<21)|(low_sign_unext(i,5)<<16)|(c<<13)|
+       (c1<<12)|(cp<<11)|(nz<<10)|(cpos<<5)|(32-clen));
+}
+
+static void
+_f17(jit_state_t *_jit, jit_int32_t o, jit_int32_t r2,
+     jit_int32_t r1, jit_int32_t c, jit_int32_t i, jit_int32_t n)
+{
+    assert(!(o  & ~0x3f));
+    assert(!(r2 & ~0x1f));
+    assert(!(r1 & ~0x1f));
+    assert(!(c  &  ~0x7));
+    assert(i >= -2048 && i <= 2047);
+    assert(!(n  &   ~0x1));
+    ii((o<<26)|(r2<<21)|(r1<<16)|(c<<13)|re_assemble_12(i)|(n<<1));
+}
+
+static void
+_f17x(jit_state_t *_jit, jit_int32_t o, jit_int32_t r,
+      jit_int32_t i, jit_int32_t c, jit_int32_t t, jit_int32_t n)
+{
+    assert(!(o & ~0x3f));
+    assert(!(r & ~0x1f));
+    assert(i >= -16 && i <= 15);
+    assert(!(c &  ~0x7));
+    assert(t >= -2048 && t <= 2047);
+    assert(!(n &  ~0x1));
+    ii((o<<26)|(r<<21)|
+       (low_sign_unext(i,5)<<16)|(c<<13)|re_assemble_12(t)|(n<<1));
+}
+
+static void
+_f18(jit_state_t *_jit, jit_int32_t o, jit_int32_t p,
+     jit_int32_t r, jit_int32_t c, jit_int32_t i, jit_int32_t n)
+{
+    assert(!(o & ~0x3f));
+    assert(!(p & ~0x1f));
+    assert(!(r & ~0x1f));
+    assert(!(c &  ~0x1));
+    assert(i >= -2048 && i <= 2047);
+    assert(!(n &  ~0x1));
+    ii((o<<26)|(p<<21)|(r<<16)|(c<<15)|(1<<14)|re_assemble_12(i)|(n<<1));
+}
+
+static void
+_f19(jit_state_t *_jit, jit_int32_t o, jit_int32_t b,
+     jit_int32_t s, jit_int32_t i, jit_int32_t n)
+{
+    assert(!(o & ~0x3f));
+    assert(!(b & ~0x1f));
+    assert(!(s &  ~0x7));
+    assert(i >= -65536 && i <= 65535);
+    assert(!(n &  ~0x1));
+    ii((o<<26)|(b<<21)|re_assemble_3(s)|re_assemble_17(i)|(n<<1));
+}
+
+static void
+_f20(jit_state_t *_jit, jit_int32_t o, jit_int32_t t,
+     jit_int32_t i, jit_int32_t g, jit_int32_t n)
+{
+    assert(!(o & ~0x3f));
+    assert(!(t & ~0x1f));
+    assert(i >= -32768 && i <= 32767);
+    assert(!(g &  ~0x7));
+    assert(!(n &  ~0x1));
+    ii((o<<26)|(t<<21)|(g<<13)|re_assemble_17(i)|(n<<1));
+}
+
+static void
+_f21(jit_state_t *_jit, jit_int32_t o, jit_int32_t t,
+     jit_int32_t x, jit_int32_t y, jit_int32_t n)
+{
+    assert(!(o & ~0x3f));
+    assert(!(t & ~0x1f));
+    assert(!(x & ~0x1f));
+    assert(!(y &  ~0x7));
+    assert(!(n &  ~0x1));
+    ii((o<<26)|(t<<21)|(x<<16)|(y<<13)|(n<<1));
+}
+
+static void
+_f22(jit_state_t *_jit, jit_int32_t o, jit_int32_t b,
+     jit_int32_t x, jit_int32_t r, jit_int32_t n, jit_int32_t p)
+{
+    assert(!(o & ~0x3f));
+    assert(!(b & ~0x1f));
+    assert(!(x &  ~0x7));
+    assert(!(r & ~0x3f));
+    assert(!(n &  ~0x1));
+    assert(!(p &  ~0x1));
+    ii((o<<26)|(b<<21)|(x<<13)|(1<<12)|(r<<2)|(n<<1)|p);
+}
+
+static void
+_f23(jit_state_t *_jit, jit_int32_t o,
+     jit_int32_t a, jit_int32_t b, jit_int32_t c, jit_int32_t d,
+     jit_int32_t e, jit_int32_t f, jit_int32_t g, jit_int32_t h)
+{
+    assert(!(o &  ~0x3f));
+    assert(!(a &  ~0x1f));
+    assert(!(b &  ~0x1f));
+    assert(!(c &   ~0x7));
+    assert(!(d &   ~0x1));
+    assert(!(e & ~0x1ff));
+    assert(!(f &   ~0x1));
+    assert(!(g &   ~0x1));
+    assert(!(h &   ~0x1));
+    ii((o<<26)|(a<<21)|(b<<16)|(c<<13)|(d<<12)|(e<<3)|(f<<2)|(g<<1)|h);
+}
+
+static void
+_f24(jit_state_t *_jit, jit_int32_t o,
+     jit_int32_t b, jit_int32_t x, jit_int32_t s,
+     jit_int32_t y, jit_int32_t m,jit_int32_t r)
+{
+    assert(!(o &  ~0x3f));
+    assert(!(b &  ~0x1f));
+    assert(!(x &  ~0x1f));
+    assert(!(s &   ~0x3));
+    assert(!(y &  ~0xff));
+    assert(!(m &   ~0x1));
+    assert(!(r &  ~0x1f));
+    ii((o<<26)|(b<<21)|(x<<16)|(s<<14)|(y<<6)|(m<<5)|r);
+}
+
+static void
+_f25(jit_state_t *_jit, jit_int32_t o,
+     jit_int32_t b, jit_int32_t i, jit_int32_t s,
+     jit_int32_t y, jit_int32_t m, jit_int32_t r)
+{
+    assert(!(o &  ~0x3f));
+    assert(!(b &  ~0x1f));
+    assert(i >= -16 && i <= 15);
+    assert(!(s &   ~0x3));
+    assert(!(y &  ~0xff));
+    assert(!(m &   ~0x1));
+    assert(!(r &  ~0x1f));
+    ii((o<<26)|(b<<21)|(low_sign_unext(i,5)<<16)|(s<<14)|(y<<6)|(m<<5)|r);
+}
+
+static void
+_f26(jit_state_t *_jit, jit_int32_t o,
+     jit_int32_t b, jit_int32_t x, jit_int32_t s,
+     jit_int32_t y, jit_int32_t m,jit_int32_t r)
+{
+    assert(!(o &  ~0x3f));
+    assert(!(b &  ~0x1f));
+    assert(!(x &  ~0x1f));
+    assert(!(s &   ~0x7));
+    assert(!(y &  ~0x7f));
+    assert(!(m &   ~0x1));
+    assert(!(r &  ~0x1f));
+    ii((o<<26)|(b<<21)|(x<<16)|(s<<13)|(y<<6)|(m<<5)|r);
+}
+
+static void
+_f27(jit_state_t *_jit, jit_int32_t o, jit_int32_t i, jit_int32_t j)
+{
+    assert(!(o & ~0x3f));
+    assert(i >= -4096 && i < 4095);
+    assert(j >=   -16 && j <   15);
+    ii((o<<26)|(i<<13)|j);
+}
+
+static void
+_f28(jit_state_t *_jit, jit_int32_t o, jit_int32_t i)
+{
+    assert(!(o &      ~0x3f));
+    assert(!(i & ~0x1ffffff));
+    ii((o<<26)|i);
+}
+
+static void
+_f29(jit_state_t *_jit, jit_int32_t o, jit_int32_t r,
+     jit_int32_t x, jit_int32_t s, jit_int32_t y, jit_int32_t t)
+{
+    assert(!(o & ~0x3f));
+    assert(!(r & ~0x1f));
+    assert(!(x & ~0x1f));
+    assert(!(s &  ~0x7));
+    assert(!(y & ~0xff));
+    assert(!(t & ~0x1f));
+    ii((o<<26)|(r<<21)|(x<<16)|re_assemble_3(s)|(y<<5)|t);
+}
+
+static void
+_f30(jit_state_t *_jit, jit_int32_t o, jit_int32_t b, jit_int32_t r,
+     jit_int32_t s, jit_int32_t x, jit_int32_t y, jit_int32_t t)
+{
+    assert(!(o & ~0x3f));
+    assert(!(b & ~0x1f));
+    assert(!(r & ~0x1f));
+    assert(!(s &  ~0x3));
+    assert(!(x &  ~0x1));
+    assert(!(y & ~0xff));
+    assert(!(t & ~0x1f));
+    ii((o<<26)|(b<<21)|(r<<16)|(s<<14)|(x<<13)|(y<<5)|t);
+}
+
+static void
+_f31(jit_state_t *_jit, jit_int32_t o, jit_int32_t t,
+     jit_int32_t r, jit_int32_t v, jit_int32_t x, jit_int32_t y)
+{
+    assert(!(o & ~0x3f));
+    assert(!(t & ~0x1f));
+    assert(!(r & ~0x1f));
+    assert(!(v & ~0x1f));
+    assert(!(x & ~0xff));
+    assert(!(y & ~0x1f));
+    ii((o<<26)|(t<<21)|(r<<16)|(v<<14)|(x<<5)|y);
+}
+
+static void
+_f33(jit_state_t *_jit, jit_int32_t o, jit_int32_t x,
+     jit_int32_t r, jit_int32_t y, jit_int32_t z, jit_int32_t u)
+{
+    assert(!(o & ~0x3f));
+    assert(!(x & ~0x1f));
+    assert(!(r & ~0x1f));
+    assert(!(y &  ~0x7));
+    assert(!(z & ~0xff));
+    assert(!(u & ~0x1f));
+    ii((o<<26)|(x<<21)|(r<<16)|(y<<13)|(z<<5)|u);
+}
+
+static void
+_f34(jit_state_t *_jit, jit_int32_t o, jit_int32_t o1,
+     jit_int32_t x, jit_int32_t sf, jit_int32_t n, jit_int32_t o2)
+{
+    assert(!(o  &   ~0x3f));
+    assert(!(o1 & ~0x7fff));
+    assert(!(x  &    ~0x3));
+    assert(!(sf &    ~0x7));
+    assert(!(n  &    ~0x1));
+    assert(!(o2 &   ~0x1f));
+    ii((o<<26)|(o1<<11)|(x<<9)|(sf<<6)|(n<<5)|o2);
+}
+
+static void
+_f35(jit_state_t *_jit, jit_int32_t o, jit_int32_t op,
+     jit_int32_t x, jit_int32_t sf, jit_int32_t n, jit_int32_t t)
+{
+    assert(!(o  &   ~0x3f));
+    assert(!(op & ~0x7fff));
+    assert(!(x  &    ~0x3));
+    assert(!(sf &    ~0x7));
+    assert(!(n  &    ~0x1));
+    assert(!(t  &   ~0x1f));
+    ii((o<<26)|(op<<11)|(x<<9)|(sf<<6)|(n<<5)|t);
+}
+
+static void
+_f36(jit_state_t *_jit, jit_int32_t o, jit_int32_t r, jit_int32_t o1,
+     jit_int32_t x, jit_int32_t sf, jit_int32_t n, jit_int32_t o2)
+{
+    assert(!(o  &  ~0x3f));
+    assert(!(r  &  ~0x1f));
+    assert(!(o1 & ~0x3ff));
+    assert(!(x  &   ~0x3));
+    assert(!(sf &   ~0x7));
+    assert(!(n  &   ~0x1));
+    assert(!(o2 &  ~0x1f));
+    ii((o<<26)|(r<<21)|(o1<<11)|(x<<9)|(sf<<6)|(n<<5)|o2);
+}
+
+static void
+_f37(jit_state_t *_jit, jit_int32_t o, jit_int32_t r2,
+     jit_int32_t r1, jit_int32_t o1, jit_int32_t x,
+     jit_int32_t sf, jit_int32_t n, jit_int32_t o2)
+{
+    assert(!(o  & ~0x3f));
+    assert(!(r2 & ~0x1f));
+    assert(!(r1 & ~0x1f));
+    assert(!(o1 & ~0x1f));
+    assert(!(x  &  ~0x3));
+    assert(!(sf &  ~0x7));
+    assert(!(n  &  ~0x1));
+    assert(!(o2 & ~0x1f));
+    ii((o<<26)|(r2<<21)|(r1<<16)|(o1<<11)|(x<<9)|(sf<<6)|(n<<5)|o2);
+}
+
+static void
+_f38(jit_state_t *_jit, jit_int32_t o,
+     jit_int32_t s, jit_int32_t u, jit_int32_t n)
+{
+    assert(!(o &     ~0x3f));
+    assert(!(s & ~0x3fffff));
+    assert(!(u &      ~0x7));
+    assert(!(n &      ~0x1));
+    ii((o<<26)|((s>>5)<<9)|(u<<6)|(n<<1)|(s&0x1f));
+}
+
+static void
+_nop(jit_state_t *_jit, jit_int32_t i0)
+{
+    for (; i0 > 0; i0 -= 4)
+       NOP();
+    assert(i0 == 0);
+}
+
+static void
+_movr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (r0 != r1)
+       COPY(r1, r0);
+}
+
+static void
+_movi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    if (i0 >= -8192 && i0 <= 8191)
+       LDI(i0, r0);
+    else if (!(i0 & 0x7ff))
+       LDIL(i0, r0);
+    else {
+       LDIL(i0 & ~0x7ff, r0);
+       LDO(i0 & 0x7ff, r0, r0);
+    }
+}
+
+static jit_word_t
+_movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_word_t         w;
+    w = _jit->pc.w;
+    LDIL(i0 & ~0x7ff, r0);
+    LDO(i0 & 0x7ff, r0, r0);
+    return (w);
+}
+
+static void
+_addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0 >= -1024 && i0 <= 1023)
+       ADDI(i0, r1, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       addr(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_addci(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    addcr(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_addxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    addxr(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_subi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0 >= -1023 && i0 <= 1024)
+       addi(r0, r1, -i0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       subr(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_subci(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    subcr(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_subxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    subxr(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_rsbi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0 >= -1024 && i0 <= 1023)
+       SUBI(i0, r1, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       subr(r0, rn(reg), r1);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_mulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                t0, t1;
+    t0 = jit_get_reg(jit_class_fpr);
+    t1 = jit_get_reg(jit_class_fpr);
+    stxi(alloca_offset - 8, _FP_REGNO, r1);
+    ldxi_f(rn(t0), _FP_REGNO, alloca_offset - 8);
+    stxi(alloca_offset - 8, _FP_REGNO, r2);
+    ldxi_f(rn(t1), _FP_REGNO, alloca_offset - 8);
+    XMPYU(rn(t0), rn(t1), rn(t0));
+    stxi_d(alloca_offset - 8, _FP_REGNO, rn(t0));
+    ldxi(r0, _FP_REGNO, alloca_offset - 4);
+    jit_unget_reg(t1);
+    jit_unget_reg(t0);
+}
+
+static void
+_muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (r0 != r1) {
+       movi(r0, i0);
+       mulr(r0, r1, r0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       mulr(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static long long
+__llmul(int u, int v)
+{
+    return ((long long)u * (long long)v);
+}
+
+static void
+_qmulr(jit_state_t *_jit,
+       jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+    movr(_R26_REGNO, r2);
+    movr(_R25_REGNO, r3);
+    calli((jit_word_t)__llmul);
+    movr(r0, _R29_REGNO);
+    movr(r1, _R28_REGNO);
+}
+
+static void
+_qmuli(jit_state_t *_jit,
+       jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_word_t i0)
+{
+    movr(_R26_REGNO, r2);
+    movi(_R25_REGNO, i0);
+    calli((jit_word_t)__llmul);
+    movr(r0, _R29_REGNO);
+    movr(r1, _R28_REGNO);
+}
+
+static void
+_qmulr_u(jit_state_t *_jit,
+        jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+    jit_int32_t                t0, t1;
+    t0 = jit_get_reg(jit_class_fpr);
+    t1 = jit_get_reg(jit_class_fpr);
+    stxi(alloca_offset - 8, _FP_REGNO, r2);
+    ldxi_f(rn(t0), _FP_REGNO, alloca_offset - 8);
+    stxi(alloca_offset - 8, _FP_REGNO, r3);
+    ldxi_f(rn(t1), _FP_REGNO, alloca_offset - 8);
+    XMPYU(rn(t0), rn(t1), rn(t0));
+    stxi_d(alloca_offset - 8, _FP_REGNO, rn(t0));
+    ldxi(r0, _FP_REGNO, alloca_offset - 4);
+    ldxi(r1, _FP_REGNO, alloca_offset - 8);
+    jit_unget_reg(t1);
+    jit_unget_reg(t0);
+}
+
+static void
+_qmuli_u(jit_state_t *_jit,
+        jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    qmulr_u(r0, r1, r2, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static int
+__idiv(int u, int v)
+{
+    return (u / v);
+}
+
+static void
+_divr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    movr(_R26_REGNO, r1);
+    movr(_R25_REGNO, r2);
+    calli((jit_word_t)__idiv);
+    movr(r0, _R28_REGNO);
+}
+
+static void
+_divi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    movr(_R26_REGNO, r1);
+    movi(_R25_REGNO, i0);
+    calli((jit_word_t)__idiv);
+    movr(r0, _R28_REGNO);
+}
+
+static unsigned int
+__udiv(unsigned int u, unsigned int v)
+{
+    return (u / v);
+}
+
+static void
+_divr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    movr(_R26_REGNO, r1);
+    movr(_R25_REGNO, r2);
+    calli((jit_word_t)__udiv);
+    movr(r0, _R28_REGNO);
+}
+
+static void
+_divi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    movr(_R26_REGNO, r1);
+    movi(_R25_REGNO, i0);
+    calli((jit_word_t)__udiv);
+    movr(r0, _R28_REGNO);
+}
+
+static int
+__irem(int u, int v)
+{
+    return (u % v);
+}
+
+static void
+_remr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    movr(_R26_REGNO, r1);
+    movr(_R25_REGNO, r2);
+    calli((jit_word_t)__irem);
+    movr(r0, _R28_REGNO);
+}
+
+static void
+_remi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    movr(_R26_REGNO, r1);
+    movi(_R25_REGNO, i0);
+    calli((jit_word_t)__irem);
+    movr(r0, _R28_REGNO);
+}
+
+static unsigned int
+__urem(unsigned int u, unsigned int v)
+{
+    return (u % v);
+}
+
+static void
+_remr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    movr(_R26_REGNO, r1);
+    movr(_R25_REGNO, r2);
+    calli((jit_word_t)__urem);
+    movr(r0, _R28_REGNO);
+}
+
+static void
+_remi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    movr(_R26_REGNO, r1);
+    movi(_R25_REGNO, i0);
+    calli((jit_word_t)__urem);
+    movr(r0, _R28_REGNO);
+}
+
+static idiv_t
+__idivrem(int u, int v)
+{
+    idiv_t     div;
+    div.quo = u / v;
+    div.rem = u % v;
+    return (div);
+}
+
+static void
+_qdivr(jit_state_t *_jit,
+       jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+    movr(_R26_REGNO, r2);
+    movr(_R25_REGNO, r3);
+    calli((jit_word_t)__idivrem);
+    movr(r0, _R28_REGNO);
+    movr(r1, _R29_REGNO);
+}
+
+static void
+_qdivi(jit_state_t *_jit,
+       jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_word_t i0)
+{
+    movr(_R26_REGNO, r2);
+    movi(_R25_REGNO, i0);
+    calli((jit_word_t)__idivrem);
+    movr(r0, _R28_REGNO);
+    movr(r1, _R29_REGNO);
+}
+
+static udiv_t
+__udivrem(unsigned int u, unsigned int v)
+{
+    udiv_t     div;
+    div.quo = u / v;
+    div.rem = u % v;
+    return (div);
+}
+
+static void
+_qdivr_u(jit_state_t *_jit,
+        jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+    movr(_R26_REGNO, r2);
+    movr(_R25_REGNO, r3);
+    calli((jit_word_t)__udivrem);
+    movr(r0, _R28_REGNO);
+    movr(r1, _R29_REGNO);
+}
+
+static void
+_qdivi_u(jit_state_t *_jit,
+        jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_word_t i0)
+{
+    movr(_R26_REGNO, r2);
+    movi(_R25_REGNO, i0);
+    calli((jit_word_t)__udivrem);
+    movr(r0, _R28_REGNO);
+    movr(r1, _R29_REGNO);
+}
+
+static void
+_andi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    andr(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    orr(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_xori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    xorr(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_lshr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1 ,jit_int32_t r2)
+{
+    SUBI(0x1f, r2, _R1_REGNO);
+    MTSAR(_R1_REGNO);
+    DEPW_Z(r1, 32, r0);
+}
+
+static void
+_rshr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1 ,jit_int32_t r2)
+{
+    SUBI(0x1f, r2, _R1_REGNO);
+    MTSAR(_R1_REGNO);
+    EXTRW(r1, 32, r0);
+}
+
+static void
+_rshr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1 ,jit_int32_t r2)
+{
+    SUBI(0x1f, r2, _R1_REGNO);
+    MTSAR(_R1_REGNO);
+    EXTRW_U(r1, 32, r0);
+}
+
+static void
+_cmpr(jit_state_t *_jit, jit_word_t c,
+      jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    CMPCLR_(c, r1, r2, r0);
+    LDI(1, r0);
+}
+
+static void
+_cmpi(jit_state_t *_jit, jit_word_t c, jit_word_t ci,
+      jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0 >= -16 && i0 <= 15)
+       CMPICLR_(ci, i0, r1, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       CMPCLR_(c, r1, rn(reg), r0);
+       jit_unget_reg(reg);
+    }
+    LDI(1, r0);
+}
+
+static void
+_ldr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    ldr_uc(r0, r1);
+    extr_c(r0, r0);
+}
+
+static void
+_ldi_c(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    ldi_uc(r0, i0);
+    extr_c(r0, r0);
+}
+
+static void
+_ldxr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    ldxr_uc(r0, r1, r2);
+    extr_c(r0, r0);
+}
+
+static void
+_ldxi_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    ldxi_uc(r0, r1, i0);
+    extr_c(r0, r0);
+}
+
+static void
+_ldi_uc(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0 >= -8182 && i0 <= 8191)
+       LDBL(i0, _R0_REGNO, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       LDIL(i0 & ~0x7ff, rn(reg));
+       LDBL(sign_extend(i0, 11), rn(reg), r0);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldxi_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0 >= -16 && i0 <= 15)
+       LDBI(i0, r1, r0);
+    else if (i0 >= -8182 && i0 <= 8191)
+       LDBL(i0, r1, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldxr_uc(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    ldr_us(r0, r1);
+    extr_s(r0, r0);
+}
+
+static void
+_ldi_s(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    ldi_us(r0, i0);
+    extr_s(r0, r0);
+}
+
+static void
+_ldxr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    ldxr_us(r0, r1, r2);
+    extr_s(r0, r0);
+}
+
+static void
+_ldxi_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    ldxi_us(r0, r1, i0);
+    extr_s(r0, r0);
+}
+
+static void
+_ldi_us(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0 >= -8182 && i0 <= 8191)
+       LDHL(i0, _R0_REGNO, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       LDIL(i0 & ~0x7ff, rn(reg));
+       LDHL(sign_extend(i0, 11), rn(reg), r0);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldxi_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0 >= -16 && i0 <= 15)
+       LDHI(i0, r1, r0);
+    else if (i0 >= -8182 && i0 <= 8191)
+       LDHL(i0, r1, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldxr_us(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldi_ui(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0 >= -8182 && i0 <= 8191)
+       LDWL(i0, _R0_REGNO, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       LDIL(i0 & ~0x7ff, rn(reg));
+       LDWL(sign_extend(i0, 11), rn(reg), r0);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldxi_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0 >= -16 && i0 <= 15)
+       LDWI(i0, r1, r0);
+    else if (i0 >= -8182 && i0 <= 8191)
+       LDWL(i0, r1, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldxr_ui(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_sti_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    if (i0 >= -8182 && i0 <= 8191)
+       STBL(r0, i0, _R0_REGNO);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       LDIL(i0 & ~0x7ff, rn(reg));
+       STBL(r0, sign_extend(i0, 11), rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_stxr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r0, r1);
+    str_c(rn(reg), r2);
+    jit_unget_reg(reg);
+}
+
+static void
+_stxi_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (i0 >= -16 && i0 <= 15)
+       STBI(r1, i0, r0);
+    else if (i0 >= -8182 && i0 <= 8191)
+       STBL(r1, i0, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r0, i0);
+       str_c(rn(reg), r1);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_sti_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    if (i0 >= -8182 && i0 <= 8191)
+       STHL(r0, i0, _R0_REGNO);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       LDIL(i0 & ~0x7ff, rn(reg));
+       STHL(r0, sign_extend(i0, 11), rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_stxr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1 ,jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r0, r1);
+    str_s(rn(reg), r2);
+    jit_unget_reg(reg);
+}
+
+static void
+_stxi_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (i0 >= -16 && i0 <= 15)
+       STHI(r1, i0, r0);
+    else if (i0 >= -8182 && i0 <= 8191)
+       STHL(r1, i0, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r0, i0);
+       str_s(rn(reg), r1);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_sti_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    if (i0 >= -8182 && i0 <= 8191)
+       STWL(r0, i0, _R0_REGNO);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       LDIL(i0 & ~0x7ff, rn(reg));
+       STWL(r0, sign_extend(i0, 11), rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_stxr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1 ,jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r0, r1);
+    str_i(rn(reg), r2);
+    jit_unget_reg(reg);
+}
+
+static void
+_stxi_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (i0 >= -16 && i0 <= 15)
+       STWI(r1, i0, r0);
+    else if (i0 >= -8182 && i0 <= 8191)
+       STWL(r1, i0, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r0, i0);
+       str_i(rn(reg), r1);
+       jit_unget_reg(reg);
+    }
+}
+
+static jit_word_t
+_bcmpr(jit_state_t *_jit, jit_word_t c,
+       jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    w = _jit->pc.w;
+    CMPB_N_(c, r0, r1, ((i0 - w) >> 2) - 2);
+    NOP();
+    return (w);
+}
+
+static jit_word_t
+_bcmpi(jit_state_t *_jit, jit_word_t c, jit_word_t ci,
+       jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    if (i1 >= -16 && i1 <= 15) {
+       w = _jit->pc.w;
+       CMPIB_N_(ci, i1, r0, ((i0 - w) >> 2) - 2);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+       movi(rn(reg), i1);
+       w = _jit->pc.w;
+       CMPB_N_(c, r0, rn(reg), ((i0 - w) >> 2) - 2);
+       jit_unget_reg(reg);
+    }
+    NOP();
+    return (w);
+}
+
+static jit_word_t
+_bmxr(jit_state_t *_jit, jit_bool_t c,
+      jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    andr(rn(reg), r0, r1);
+    w = c ? bnei(i0, rn(reg), 0) : beqi(i0, rn(reg), 0);
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static jit_word_t
+_bmxi(jit_state_t *_jit, jit_bool_t c,
+      jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    movi(rn(reg), i1);
+    andr(rn(reg), r0, rn(reg));
+    w = c ? bnei(i0, rn(reg), 0) : beqi(i0, rn(reg), 0);
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static jit_word_t
+_boaddr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    w = _jit->pc.w;
+    ADDB_N_SV(r1, r0, ((i0 - w) >> 2) - 2);
+    NOP();
+    return (w);
+}
+
+static jit_word_t
+_boaddi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    if (i1 >= -16 && i1 <= 15) {
+       w = _jit->pc.w;
+       ADDIB_N_SV(i1, r0, ((i0 - w) >> 2) - 2);
+       NOP();
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+       movi(rn(reg), i1);
+       w = boaddr(i0, r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    return (w);
+}
+
+static jit_word_t
+_boaddr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    w = _jit->pc.w;
+    ADDB_N_UV(r1, r0, ((i0 - w) >> 2) - 2);
+    NOP();
+    return (w);
+}
+
+static jit_word_t
+_boaddi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    if (i1 >= -16 && i1 <= 15) {
+       w = _jit->pc.w;
+       ADDIB_N_UV(i1, r0, ((i0 - w) >> 2) - 2);
+       NOP();
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+       movi(rn(reg), i1);
+       w = boaddr_u(i0, r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    return (w);
+}
+
+static jit_word_t
+_bxaddr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    w = _jit->pc.w;
+    ADDB_N_NSV(r1, r0, ((i0 - w) >> 2) - 2);
+    NOP();
+    return (w);
+}
+
+static jit_word_t
+_bxaddi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    if (i1 >= -16 && i1 <= 15) {
+       w = _jit->pc.w;
+       ADDIB_N_NSV(i1, r0, ((i0 - w) >> 2) - 2);
+       NOP();
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+       movi(rn(reg), i1);
+       w = bxaddr(i0, r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    return (w);
+}
+
+static jit_word_t
+_bxaddr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    w = _jit->pc.w;
+    ADDB_N_NUV(r1, r0, ((i0 - w) >> 2) - 2);
+    NOP();
+    return (w);
+}
+
+static jit_word_t
+_bxaddi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    if (i1 >= -16 && i1 <= 15) {
+       w = _jit->pc.w;
+       ADDIB_N_NUV(i1, r0, ((i0 - w) >> 2) - 2);
+       NOP();
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+       movi(rn(reg), i1);
+       w = bxaddr_u(i0, r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    return (w);
+}
+
+static jit_word_t
+_bosubr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    SUB_NSV(r0, r1, r0);
+    w = _jit->pc.w;
+    /* null'ed if no signed overflow */
+    B_N(((i0 - w) >> 2) - 2, _R0_REGNO);
+    NOP();
+    return (w);
+}
+
+static jit_word_t
+_bosubi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    movi(_R1_REGNO, i1);
+    return (bosubr(i0, r0, _R1_REGNO));
+}
+
+static jit_word_t
+_bosubr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    SUB(r0, r1, r0);
+    SUB_B(_R0_REGNO, _R0_REGNO, _R1_REGNO);
+    return (bnei(i0, _R1_REGNO, 0));
+}
+
+static jit_word_t
+_bosubi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    movi(_R1_REGNO, i1);
+    SUB(r0, _R1_REGNO, r0);
+    SUB_B(_R0_REGNO, _R0_REGNO, _R1_REGNO);
+    return (bnei(i0, _R1_REGNO, 0));
+}
+
+static jit_word_t
+_bxsubr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    SUB_SV(r0, r1, r0);
+    w = _jit->pc.w;
+    /* null'ed if signed overflow */
+    B_N(((i0 - w) >> 2) - 2, _R0_REGNO);
+    NOP();
+    return (w);
+}
+
+static jit_word_t
+_bxsubi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    movi(_R1_REGNO, i1);
+    return (bxsubr(i0, r0, _R1_REGNO));
+}
+
+static jit_word_t
+_bxsubr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    SUB(r0, r1, r0);
+    SUB_B(_R0_REGNO, _R0_REGNO, _R1_REGNO);
+    return (beqi(i0, _R1_REGNO, 0));
+}
+
+static jit_word_t
+_bxsubi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    movi(_R1_REGNO, i1);
+    SUB(r0, _R1_REGNO, r0);
+    SUB_B(_R0_REGNO, _R0_REGNO, _R1_REGNO);
+    return (beqi(i0, _R1_REGNO, 0));
+}
+
+static void
+_jmpr(jit_state_t *_jit, jit_int32_t r0)
+{
+    BV_N(_R0_REGNO, r0);
+}
+
+static void
+_jmpi(jit_state_t *_jit, jit_word_t i0)
+{
+    jit_word_t         w;
+    w = ((i0 - _jit->pc.w) >> 2) - 2;
+    if (w >= -32768 && w <= 32767)
+       B_N(w, _R0_REGNO);
+    else {
+       movi(_R1_REGNO, w);
+       BV_N(_R0_REGNO, _R1_REGNO);
+    }
+}
+
+static jit_word_t
+_jmpi_p(jit_state_t *_jit, jit_word_t i0)
+{
+    jit_word_t         w;
+    w = movi_p(_R1_REGNO, i0);
+    jmpr(_R1_REGNO);
+    return (w);
+}
+
+static void
+_callr(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_word_t         dyn;
+    jit_word_t         imm;
+    if (r0 != _R1_REGNO)
+       COPY(r0, _R1_REGNO);
+    /* inline $$dyncall */
+    imm = _jit->pc.w;
+    BBI_N_GE(_R1_REGNO, 30, 0);                /*   if (r1 & 2) {              */
+    DEPWRI(0, 31, 2, _R1_REGNO);       /*      r1 &= ~2;               */
+    LDWI(4, _R1_REGNO, _R19_REGNO);    /*      r19 = r1[1];            */
+    LDWI(0, _R1_REGNO, _R1_REGNO);     /*      r1  = r1[0];            */
+                                       /*   }                          */
+    BVE_L(_R1_REGNO);
+    STWL(_RP_REGNO, -24, _SP_REGNO);
+    dyn = _jit->pc.w;
+    jmpi(_jit->pc.w);
+    patch_at(imm, _jit->pc.w);
+    BVE_L_N(_R1_REGNO);
+    NOP();
+    patch_at(dyn, _jit->pc.w);
+}
+
+static void
+_calli(jit_state_t *_jit, jit_word_t i0)
+{
+    jit_word_t         w;
+    if (i0 & 2) {
+       i0 &= -4;
+       movi(_R1_REGNO, i0);
+       LDWI(4, _R1_REGNO, _R19_REGNO);
+       LDWI(0, _R1_REGNO, _R1_REGNO);
+       BVE_L(_R1_REGNO);
+       STWL(_RP_REGNO, -24, _SP_REGNO);
+    }
+    else {
+       w = ((i0 - _jit->pc.w) >> 2) - 2;
+       if (w >= -32768 && w <= 32767)
+           B_L_N(w);
+       else {
+           movi(_R1_REGNO, i0);
+           BVE_L_N(_R1_REGNO);
+       }
+       NOP();
+    }
+}
+
+static jit_word_t
+_calli_p(jit_state_t *_jit, jit_word_t i0)
+{
+    jit_word_t         w;
+    w = movi_p(_R1_REGNO, i0);
+    callr(_R1_REGNO);
+    return (w);
+}
+
+static jit_int32_t gr[] = {
+     _R4,  _R5,  _R6,  _R7,  _R8,
+     _R9, _R10, _R11, _R12, _R13,
+    _R14, _R15, _R16, _R17, _R18
+};
+
+static jit_int32_t fr[] = {
+    _F12, _F13, _F14, _F15, _F16,
+    _F17, _F18, _F19, _F20, _F21
+};
+
+static void
+_prolog(jit_state_t *_jit, jit_node_t *node)
+{
+    jit_int32_t                regno;
+    jit_word_t         offset;
+
+    if (_jitc->function->define_frame || _jitc->function->assume_frame) {
+       /* hppa stack grows up */
+       assert(_jitc->function->self.aoff <= _jitc->function->frame);
+       if (_jitc->function->assume_frame)
+           return;
+       _jitc->function->self.aoff = _jitc->function->frame;
+    }
+    if (_jitc->function->allocar) {
+       _jitc->function->self.aoff += 63;
+       _jitc->function->self.aoff &= -64;
+    }
+    _jitc->function->stack = ((_jitc->function->self.aoff -
+                              _jitc->function->self.alen -
+                              _jitc->function->self.size) + 63) & -64;
+
+    /* Save stack frame (FIXME Only required if non leaf) */
+    STWL(_RP_REGNO, -20, _SP_REGNO);
+
+    /* Create stack frame */
+    COPY(_FP_REGNO, _R1_REGNO);
+    COPY(_SP_REGNO, _FP_REGNO);
+    STWL_MA(_R1_REGNO, _jitc->function->stack, _SP_REGNO);
+
+    /* Save any modified callee save registers */
+    offset = alloca_offset - 140;
+    for (regno = 0; regno < jit_size(gr); regno++, offset += 4) {
+       if (jit_regset_tstbit(&_jitc->function->regset, gr[regno]))
+           stxi(offset, _FP_REGNO, rn(gr[regno]));
+    }
+    for (regno = 0; regno < jit_size(fr); regno++, offset += 8) {
+       if (jit_regset_tstbit(&_jitc->function->regset, fr[regno]))
+           stxi_d(offset, _FP_REGNO, rn(fr[regno]));
+    }
+
+    if (_jitc->function->allocar) {
+       regno = jit_get_reg(jit_class_gpr);
+       movi(rn(regno), _jitc->function->self.aoff);
+       stxi_i(_jitc->function->aoffoff, _FP_REGNO, rn(regno));
+       jit_unget_reg(regno);
+    }
+
+    if (_jitc->function->self.call & jit_call_varargs) {
+       for (regno = 3; regno >= _jitc->function->vagp; --regno)
+           stxi(params_offset - regno * 4 - 4, _FP_REGNO, rn(_R26 - regno));
+    }
+}
+
+static void
+_epilog(jit_state_t *_jit, jit_node_t *node)
+{
+    jit_int32_t                regno;
+    jit_word_t         offset;
+
+    if (_jitc->function->assume_frame)
+       return;
+    /* Restore any modified callee save registers */
+    offset = alloca_offset - 140;
+    for (regno = 0; regno < jit_size(gr); regno++, offset += 4) {
+       if (jit_regset_tstbit(&_jitc->function->regset, gr[regno]))
+           ldxi(rn(gr[regno]), _FP_REGNO, offset);
+    }
+    for (regno = 0; regno < jit_size(fr); regno++, offset += 8) {
+       if (jit_regset_tstbit(&_jitc->function->regset, fr[regno]))
+           ldxi_d(rn(fr[regno]), _FP_REGNO, offset);
+    }
+
+    /* Restore stack frame (FIXME Only required if non leaf) */
+    LDWL(-20, _FP_REGNO, _RP_REGNO);
+    LDO(_jitc->function->stack, _FP_REGNO, _SP_REGNO);
+    LDWL_MB(-_jitc->function->stack, _SP_REGNO, _FP_REGNO);
+#if defined(__hpux)
+    BVE_N(_RP_REGNO);
+#else
+    BV_N(_R0_REGNO, _RP_REGNO);
+#endif
+}
+
+static void
+_vastart(jit_state_t *_jit, jit_int32_t r0)
+{
+    /* Initialize stack pointer to the first stack argument. */
+    if (jit_arg_reg_p(_jitc->function->vagp))
+       addi(r0, _FP_REGNO, params_offset - _jitc->function->vagp * 4);
+    else
+       addi(r0, _FP_REGNO, _jitc->function->self.size);
+}
+
+static void
+_vaarg(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    assert(_jitc->function->self.call & jit_call_varargs);
+
+    /* Update vararg stack pointer. */
+    subi(r1, r1, 4);
+
+    /* Load argument. */
+    ldr(r0, r1);
+}
+
+static void
+_patch_at(jit_state_t *_jit, jit_word_t instr, jit_word_t label)
+{
+    jit_word_t          w;
+    union {
+       jit_uint32_t    *i;
+       jit_word_t       w;
+    } u;
+    u.w = instr;
+    switch (u.i[0] >> 26) {
+       case 0x8:                               /* LDIL */
+           u.i[0] &= ~0x1fffff;
+           u.i[0] |= re_assemble_21((label & ~0x7ff) >> 11);
+           assert((u.i[1] >> 26) == 0xd);      /* LDO */
+           u.i[1] &= ~0xffff;
+           u.i[1] |= re_assemble_16(label & 0x7ff);
+           break;
+       case 0x20:      case 0x22:              /* CMPB */
+       case 0x21:      case 0x23:              /* CMPIB */
+       case 0x28:      case 0x2a:              /* ADDB */
+       case 0x29:      case 0x2b:              /* ADDIB */
+       case 0x31:                              /* BBI */
+           w = ((label - instr) >> 2) - 2;
+           assert(w >= -2048 && w <= 2047);
+           u.i[0] &= ~0x1ffd;
+           u.i[0] |= re_assemble_12(w);
+           break;
+       case 0x3a:                              /* B */
+           w = ((label - instr) >> 2) - 2;
+           assert(w >= -32768 && w <= 32767);
+           u.i[0] &= ~0x1f1ffd;
+           u.i[0] |= re_assemble_17(w);
+           break;
+       default:
+           abort();
+    }
+}
+#endif
diff --git a/deps/lightning/lib/jit_hppa-fpu.c b/deps/lightning/lib/jit_hppa-fpu.c
new file mode 100644 (file)
index 0000000..5fa6856
--- /dev/null
@@ -0,0 +1,1070 @@
+/*
+ * Copyright (C) 2013-2019  Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+#if PROTO
+
+/* FIXME should actually be hw model/version/etc or other constraint
+ * that causes a SIGSEGV/SIGILL if using these instructions */
+#if 1 //defined(__hpux)
+#  define FSTXR                                0
+#  define FLDXR                                0
+#else
+#  define FSTXR                                1
+#  define FLDXR                                1
+#endif
+
+#define f39(o,b,x,t)                   _f39(_jit,o,b,x,t)
+static void _f39(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#define f40(o,b,x,r)                   _f40(_jit,o,b,x,r)
+static void _f40(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t)
+    maybe_unused;
+#define f41(o,b,x,t)                   _f41(_jit,o,b,x,t)
+static void _f41(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#define f42(o,b,i,r)                   _f42(_jit,o,b,i,r)
+static void _f42(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#define f43(o,b,t,i)                   f1(o,b,t,i)
+#define f45(o,r,a,b,fmt,c,d,e,t)       _f45(_jit,o,r,a,b,fmt,c,d,e,t)
+static void _f45(jit_state_t*,jit_int32_t,
+                jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,
+                jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t) maybe_unused;
+#define f46(o,r,a,s,df,sf,b,c,d,t)     _f46(_jit,o,r,a,s,df,sf,b,c,d,t)
+static void _f46(jit_state_t*,jit_int32_t,jit_int32_t,
+                jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,
+                jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#define f47(o,r1,r2,a,fmt,b,c,d,t)     f47_48(o,r1,r2,a,fmt,b,c,d,t)
+#define f48(o,r1,r2,a,fmt,b,c,d,t)     f47_48(o,r1,r2,a,fmt,b,c,d,t)
+#define f47_48(o,r1,r2,y,fmt,b,c,d,t)  _f47_48(_jit,o,r1,r2,y,fmt,b,c,d,t)
+static void _f47_48(jit_state_t*,jit_int32_t,
+                   jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,
+                   jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#define f49(o,r,a,b,c,f,d,e,g,h,i,t)   f49_52(o,r,a,b,c,f,d,e,g,h,i,t)
+#define f51(o,r1,r2,y,a,f,b,d,e,g,h,c) f49_52(o,r1,r2,y,a,f,b,d,e,g,h,c)
+#define f52(o,r1,r2,a,b,f,c,d,e,g,h,t) f49_52(o,r1,r2,a,b,f,c,d,e,g,h,t)
+#define f49_52(o,r1,r2,y,v,f,a,b,u,c,d,t) _f49_52(_jit,o,r1,r2,y,v,f,a,b,u,c,d,t)
+static void _f49_52(jit_state_t*,jit_int32_t,
+                   jit_int32_t,jit_int32_t,jit_int32_t,
+                   jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,
+                   jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#define f53(o,r1,r2,ta,ra,f,tm)                _f53(_jit,o,r1,r2,ta,ra,f,tm)
+static void _f53(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,
+                jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t) maybe_unused;
+#define f54(o,r1,r2,a,b,f,c,d,e,g,t)   _f54(_jit,o,r1,r2,a,b,f,c,d,e,g,t)
+static void _f54(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,
+                jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,
+                jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t) maybe_unused;
+#define FABS_(f,r,t)                   f49(0xe,r,0,3,0,f,0,0,0,0,0,t)
+#define FABS_S(r,t)                    FABS_(0,r,t)
+#define FABS_D(r,t)                    FABS_(1,r,t)
+#define FABS_Q(r,t)                    f45(0xc,r,0,3,3,0,0,0,t)
+#define FADD_(f,r1,r2,t)               f52(0xe,r1,r2,0,0,f,3,0,0,0,0,t)
+#define FADD_S(r1,r2,t)                        FADD_(0,r1,r2,t)
+#define FADD_D(r1,r2,t)                        FADD_(1,r1,r2,t)
+#define FADD_Q(r1,r2,t)                        f48(0xc,r1,r2,0,3,3,0,0,t)
+#define FPSR_GT                                1
+#define FPSR_LT                                2
+#define FPSR_EQ                                4
+#define FPSR_UN                                8
+/*
+    Actually these are reversed, but easier for the FTEST after the FCMP
+    fcmp,dbl,false? fr4,fr12           0
+    fcmp,dbl,false fr4,fr12            1
+    fcmp,dbl,? fr4,fr12                        2
+    fcmp,dbl,!<=> fr4,fr12             3               ORD
+    fcmp,dbl,= fr4,fr12                        4               NE
+    fcmp,dbl,=t fr4,fr12               5
+    fcmp,dbl,?= fr4,fr12               6
+    fcmp,dbl,!<> fr4,fr12              7               LTGT
+    fcmp,dbl,!?>= fr4,fr12             8
+    fcmp,dbl,< fr4,fr12                        9               UNGE
+    fcmp,dbl,?< fr4,fr12               10
+    fcmp,dbl,!>= fr4,fr12              11              GE
+    fcmp,dbl,!?> fr4,fr12              12
+    fcmp,dbl,<= fr4,fr12               13              UNGT
+    fcmp,dbl,?<= fr4,fr12              14
+    fcmp,dbl,!> fr4,fr12               15              GT
+    fcmp,dbl,!?<= fr4,fr12             16
+    fcmp,dbl,> fr4,fr12                        17              UNLE
+    fcmp,dbl,?> fr4,fr12               18
+    fcmp,dbl,!<= fr4,fr12              19              LE
+    fcmp,dbl,!?< fr4,fr12              20
+    fcmp,dbl,>= fr4,fr12               21              UNLT
+    fcmp,dbl,?>= fr4,fr12              22
+    fcmp,dbl,!< fr4,fr12               23              LT
+    fcmp,dbl,!?= fr4,fr12              24
+    fcmp,dbl,<> fr4,fr12               25              UNEQ
+    fcmp,dbl,!= fr4,fr12               26              EQ
+    fcmp,dbl,!=t fr4,fr12              27
+    fcmp,dbl,!? fr4,fr12               28
+    fcmp,dbl,<=> fr4,fr12              29              UNORD
+    fcmp,dbl,true? fr4,fr12            30
+    fcmp,dbl,true fr4,fr12             31
+ */
+#define FCMP_LT                                23
+#define FCMP_LE                                19
+#define FCMP_EQ                                26
+#define FCMP_GE                                11
+#define FCMP_GT                                15
+#define FCMP_NE                                4
+#define FCMP_UNLT                      21
+#define FCMP_UNLE                      17
+#define FCMP_UNEQ                      25
+#define FCMP_UNGE                      9
+#define FCMP_UNGT                      13
+#define FCMP_LTGT                      7
+#define FCMP_ORD                       3
+#define FCMP_UNORD                     29
+#define FCMP_(f,r1,r2,c)               f51(0xe,r1,r2,0,0,f,2,0,0,0,0,c)
+#define FCMP_S_(r1,r2,c)               FCMP_(0,r1,r2,c)
+#define FCMP_D_(r1,r2,c)               FCMP_(1,r1,r2,c)
+#define FCMP_Q_(r1,r2,c)               f47(0xc,r1,r2,0,3,2,0,0,c)
+#define FCMP_S_LT(r1,r2)               FCMP_S_(r1,r2,FCMP_LT)
+#define FCMP_D_LT(r1,r2)               FCMP_D_(r1,r2,FCMP_LT)
+#define FCMP_Q_LT(r1,r2)               FCMP_Q_(r1,r2,FCMP_LT)
+#define FCMP_S_LE(r1,r2)               FCMP_S_(r1,r2,FCMP_LE)
+#define FCMP_D_LE(r1,r2)               FCMP_D_(r1,r2,FCMP_LE)
+#define FCMP_Q_LE(r1,r2)               FCMP_Q_(r1,r2,FCMP_LE)
+#define FCMP_S_EQ(r1,r2)               FCMP_S_(r1,r2,FCMP_EQ)
+#define FCMP_D_EQ(r1,r2)               FCMP_D_(r1,r2,FCMP_EQ)
+#define FCMP_Q_EQ(r1,r2)               FCMP_Q_(r1,r2,FCMP_EQ)
+#define FCMP_S_GE(r1,r2)               FCMP_S_(r1,r2,FCMP_GE)
+#define FCMP_D_GE(r1,r2)               FCMP_D_(r1,r2,FCMP_GE)
+#define FCMP_Q_GE(r1,r2)               FCMP_Q_(r1,r2,FCMP_GE)
+#define FCMP_S_GT(r1,r2)               FCMP_S_(r1,r2,FCMP_GT)
+#define FCMP_D_GT(r1,r2)               FCMP_D_(r1,r2,FCMP_GT)
+#define FCMP_Q_GT(r1,r2)               FCMP_Q_(r1,r2,FCMP_GT)
+#define FCMP_S_NE(r1,r2)               FCMP_S_(r1,r2,FCMP_NE)
+#define FCMP_D_NE(r1,r2)               FCMP_D_(r1,r2,FCMP_NE)
+#define FCMP_Q_NE(r1,r2)               FCMP_Q_(r1,r2,FCMP_NE)
+#define FCMP_S_UNLT(r1,r2)             FCMP_S_(r1,r2,FCMP_UNLT)
+#define FCMP_D_UNLT(r1,r2)             FCMP_D_(r1,r2,FCMP_UNLT)
+#define FCMP_Q_UNLT(r1,r2)             FCMP_Q_(r1,r2,FCMP_UNLT)
+#define FCMP_S_UNLE(r1,r2)             FCMP_S_(r1,r2,FCMP_UNLE)
+#define FCMP_D_UNLE(r1,r2)             FCMP_D_(r1,r2,FCMP_UNLE)
+#define FCMP_Q_UNLE(r1,r2)             FCMP_Q_(r1,r2,FCMP_UNLE)
+#define FCMP_S_UNEQ(r1,r2)             FCMP_S_(r1,r2,FCMP_UNEQ)
+#define FCMP_D_UNEQ(r1,r2)             FCMP_D_(r1,r2,FCMP_UNEQ)
+#define FCMP_Q_UNEQ(r1,r2)             FCMP_Q_(r1,r2,FCMP_UNEQ)
+#define FCMP_S_UNGE(r1,r2)             FCMP_S_(r1,r2,FCMP_UNGE)
+#define FCMP_D_UNGE(r1,r2)             FCMP_D_(r1,r2,FCMP_UNGE)
+#define FCMP_Q_UNGE(r1,r2)             FCMP_Q_(r1,r2,FCMP_UNGE)
+#define FCMP_S_UNGT(r1,r2)             FCMP_S_(r1,r2,FCMP_UNGT)
+#define FCMP_D_UNGT(r1,r2)             FCMP_D_(r1,r2,FCMP_UNGT)
+#define FCMP_Q_UNGT(r1,r2)             FCMP_Q_(r1,r2,FCMP_UNGT)
+#define FCMP_S_LTGT(r1,r2)             FCMP_S_(r1,r2,FCMP_LTGT)
+#define FCMP_D_LTGT(r1,r2)             FCMP_D_(r1,r2,FCMP_LTGT)
+#define FCMP_Q_LTGT(r1,r2)             FCMP_Q_(r1,r2,FCMP_LTGT)
+#define FCMP_S_ORD(r1,r2)              FCMP_S_(r1,r2,FCMP_ORD)
+#define FCMP_D_ORD(r1,r2)              FCMP_D_(r1,r2,FCMP_ORD)
+#define FCMP_Q_ORD(r1,r2)              FCMP_Q_(r1,r2,FCMP_ORD)
+#define FCMP_S_UNORD(r1,r2)            FCMP_S_(r1,r2,FCMP_UNORD)
+#define FCMP_D_UNORD(r1,r2)            FCMP_D_(r1,r2,FCMP_UNORD)
+#define FCMP_Q_UNORD(r1,r2)            FCMP_Q_(r1,r2,FCMP_UNORD)
+#define XFNVFF(s,d,r,t)                        f46(0xc,r,0,0,d,s,1,0,0,t)
+#define FCNVFF_Q_S(r,t)                        XFNVFF(3,0,r,t)
+#define FCNVFF_Q_D(r,t)                        XFNVFF(3,1,r,t)
+#define FCNVFF_S_Q(r,t)                        XFNVFF(0,3,r,t)
+#define FCNVFF_D_Q(r,t)                        XFNVFF(1,3,r,t)
+#define FCNVFF_(s,d,r,t)               f46(0xc,r,0,0,d,s,1,0,0,t)
+#define FCNVFF_S_D(r,t)                        FCNVFF_(0,1,r,t)
+#define FCNVFF_D_S(r,t)                        FCNVFF_(1,0,r,t)
+#define FCNVXF_(s,d,r,t)               f46(0xc,r,0,1,d,s,1,0,0,t)
+#define FCNVXF_S_S(r,t)                        FCNVXF_(0,0,r,t)
+#define FCNVXF_S_D(r,t)                        FCNVXF_(0,1,r,t)
+#define FCNVXT_(s,d,r,t)               f46(0xc,r,0,3,d,s,1,0,0,t)
+#define FCNVXT_S_S(r,t)                        FCNVXT_(0,0,r,t)
+#define FCNVXT_D_S(r,t)                        FCNVXT_(1,0,r,t)
+#define FCPY_(f,r,t)                   f49(0xe,r,0,2,0,f,0,0,0,0,0,t)
+#define FCPY_S(r,t)                    FCPY_(0,r,t)
+#define FCPY_D(r,t)                    FCPY_(1,r,t)
+#define FCPY_Q(r,t)                    f45(0xc,r,0,2,2,0,0,0,t)
+#define FDIV_(f,r1,r2,t)               f52(0xe,r1,r2,3,0,f,3,0,0,0,0,t)
+#define FDIV_S(r1,r2,t)                        FDIV_(0,r1,r2,t)
+#define FDIV_D(r1,r2,t)                        FDIV_(1,r1,r2,t)
+#define FDIV_Q(r1,r2,t)                        f48(0xc,r1,r2,3,3,3,0,0,t)
+#define FID()                          f45(0xc,0,0,0,2,0,0,0,0)
+#define FLDDL(i,b,t)                   f3(0x14,b,t,i,1)
+#define FLDD(x,b,t)                    f39(0xb,b,x,t)
+#define FLDDI(i,b,t)                   f41(0xb,b,i,t)
+#define FLDWL(i,b,t)                   f43(0x17,b,t,i)
+#define FLDW(x,b,t)                    f39(0x9,b,x,t)
+#define FLDWI(i,b,t)                   f41(0x9,b,i,t)
+#define FMPY_(f,r1,r2,t)               f52(0xe,r1,r2,2,0,f,3,0,0,0,0,t)
+#define FMPY_S(r1,r2,t)                        FMPY_(0,r1,r2,t)
+#define FMPY_D(r1,r2,t)                        FMPY_(1,r1,r2,t)
+#define FMPY_Q(r1,r2,t)                        f48(0xc,r1,r2,2,3,3,0,0,t)
+/* FIXME not disassembled */
+#define FMPYADD_(f,r1,r2,ta,ra,tm)     f53(0x6,r1,r2,ta,ra,f,tm)
+#define FMPYADD_S(r1,r2,ta,ra,tm)      FMPYADD_(0,r1,r2,ta,ra,tm)
+#define FMPYADD_D(r1,r2,ta,ra,tm)      FMPYADD_(1,r1,r2,ta,ra,tm)
+#define FMPYFADD_(f,r1,r2,ra,t)                f54(0x2e,r1,r2,ra>>3,0,f,(ra)&7,0,0,0,t)
+#define FMPYFADD_S(r1,r2,ra,t)         FMPYFADD_(0,r1,r2,ra,t)
+#define FMPYFADD_D(r1,r2,ra,t)         FMPYFADD_(1,r1,r2,ra,t)
+#define FMPYNFADD_(f,r1,r2,ra,t)       f54(0x2e,r1,r2,ra>>3,0,f,(ra)&7,0,0,1,t)
+#define FMPYNFADD_S(r1,r2,ra,t)                FMPYNFADD_(0,r1,r2,ra,t)
+#define FMPYNFADD_D(r1,r2,ra,t)                FMPYNFADD_(1,r1,r2,ra,t)
+#define FMPYSUB_(f,r1,r2,ta,ra,tm)     f53(0x26,r1,r2,ta,ra,f,tm)
+#define FMPYSUB_S(r1,r2,ta,ra,tm)      FMPYSUB_(0,r1,r2,ta,ra,tm)
+#define FMPYSUB_D(r1,r2,ta,ra,tm)      FMPYSUB_(1,r1,r2,ta,ra,tm)
+#define FNEG_(f,r,t)                   f49(0xe,r,0,6,0,f,0,0,0,0,0,t)
+#define FNEG_S(r,t)                    FNEG_(0,r,t)
+#define FNEG_D(r,t)                    FNEG_(1,r,t)
+/* FIXME not disassembled */
+#define FNEG_Q(r,t)                    f45(0xc,r,0,6,3,0,0,0,t)
+#define FNEGABS_(f,r,t)                        f49(0xe,r,0,7,0,f,0,0,0,0,0,t)
+#define FNEGABS_S(r,t)                 FNEGABS_(0,r,t)
+#define FNEGABS_D(r,t)                 FNEGABS_(1,r,t)
+#define FNEGABS_Q(r,t)                 f45(0xc,r,0,7,3,0,0,0,t)
+#define FRND_(f,r,t)                   f49(0xe,r,0,5,0,f,0,0,0,0,0,t)
+#define FRND_S(r,t)                    FRND_(0,r,t)
+#define FRND_D(r,t)                    FRND_(1,r,t)
+#define FRND_Q(r,t)                    f45(0xc,r,0,5,3,0,0,0,t)
+#define FSQRT_(f,r,t)                  f49(0xe,r,0,4,0,f,0,0,0,0,0,t)
+#define FSQRT_S(r,t)                   FSQRT_(0,r,t)
+#define FSQRT_D(r,t)                   FSQRT_(1,r,t)
+#define FSQRT_Q(r,t)                   f45(0xc,r,0,4,3,0,0,0,t)
+#define FSTDL(r,i,b)                   f3(0x1c,b,r,i,1)
+#define FSTD(r,x,b)                    f40(0xb,b,x,r)
+#define FSTDI(r,i,b)                   f42(0xb,b,i,r)
+#define FSTWL(r,i,b)                   f43(0x1f,b,r,i)
+#define FSTW(r,x,b)                    f40(0x9,b,x,r)
+#define FSTWI(r,i,b)                   f42(0x9,b,i,r)
+#define FSUB_(f,r1,r2,t)               f52(0xe,r1,r2,1,0,f,3,0,0,0,0,t)
+#define FSUB_S(r1,r2,t)                        FSUB_(0,r1,r2,t)
+#define FSUB_D(r1,r2,t)                        FSUB_(1,r1,r2,t)
+#define FSUB_Q(r1,r2,t)                        f48(0xc,r1,r2,1,3,3,0,0,t)
+#define FTEST_(c)                      f47(0xc,0,0,0,0,2,0,1,c)
+#define FTEST()                                f47(0xc,0,0,1,0,2,0,1,0)
+#define FTEST_LT()                     FTEST_(FCMP_LT)
+#define FTEST_LE()                     FTEST_(FCMP_LE)
+#define FTEST_EQ()                     FTEST_(FCMP_EQ)
+#define FTEST_GE()                     FTEST_(FCMP_GE)
+#define FTEST_GT()                     FTEST_(FCMP_GT)
+#define FTEST_NE()                     FTEST_(FCMP_NE)
+#define FTEST_UNLT()                   FTEST_(FCMP_UNLT)
+#define FTEST_UNLE()                   FTEST_(FCMP_UNLE)
+#define FTEST_UNEQ()                   FTEST_(FCMP_UNEQ)
+#define FTEST_UNGE()                   FTEST_(FCMP_UNGE)
+#define FTEST_UNGT()                   FTEST_(FCMP_UNGT)
+#define FTEST_LTGT()                   FTEST_(FCMP_LTGT)
+#define FTEST_ORD()                    FTEST_(FCMP_ORD)
+#define FTEST_UNORD()                  FTEST_(FCMP_UNORD)
+#define XMPYU(r1,r2,t)                 f52(0xe,r1,r2,2,0,0,3,1,0,0,0,t)
+#define XMPYU_L_R(r1,r2,t)             f52(0xe,r1,r2,2,1,0,3,1,0,0,0,t)
+#define XMPYU_R_L(r1,r2,t)             f52(0xe,r1,r2,2,0,0,3,1,1,0,0,t)
+#define XMPYU_R_R(r1,r2,t)             f52(0xe,r1,r2,2,1,0,3,1,1,0,0,t)
+#define negr_f(r0,r1)                  FNEG_S(r1,r0)
+#define negr_d(r0,r1)                  FNEG_D(r1,r0)
+#define sqrtr_f(r0,r1)                 FSQRT_S(r1,r0)
+#define sqrtr_d(r0,r1)                 FSQRT_D(r1,r0)
+#define extr_f(r0,r1)                  _extr_f(_jit,r0,r1)
+static void _extr_f(jit_state_t*,jit_int32_t,jit_int32_t);
+#define extr_d(r0,r1)                  _extr_d(_jit,r0,r1)
+static void _extr_d(jit_state_t*,jit_int32_t,jit_int32_t);
+#define extr_f_d(r0,r1)                        FCNVFF_S_D(r1,r0)
+#define extr_d_f(r0,r1)                        FCNVFF_D_S(r1,r0)
+#define truncr_f(r0,r1)                        truncr_f_i(r0,r1)
+#define truncr_f_i(r0,r1)              _truncr_f_i(_jit,r0,r1)
+static void _truncr_f_i(jit_state_t*,jit_int32_t,jit_int32_t);
+#define truncr_d(r0,r1)                        truncr_d_i(r0,r1)
+#define truncr_d_i(r0,r1)              _truncr_d_i(_jit,r0,r1)
+static void _truncr_d_i(jit_state_t*,jit_int32_t,jit_int32_t);
+#define movr_f(r0,r1)                  FCPY_S(r1,r0)
+#define movi_f(r0,i0)                  _movi_f(_jit,r0,i0)
+static void _movi_f(jit_state_t*,jit_int32_t,jit_float32_t*);
+#define movr_d(r0,r1)                  FCPY_D(r1,r0)
+#define movi_d(r0,i0)                  _movi_d(_jit,r0,i0)
+static void _movi_d(jit_state_t*,jit_int32_t,jit_float64_t*);
+#define absr_f(r0,r1)                  FABS_S(r1,r0)
+#define absr_d(r0,r1)                  FABS_D(r1,r0)
+#define addr_f(r0,r1,r2)               FADD_S(r1,r2,r0)
+#define addi_f(r0,r1,i0)               _addi_f(_jit,r0,r1,i0)
+static void _addi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#define addr_d(r0,r1,r2)               FADD_D(r1,r2,r0)
+#define addi_d(r0,r1,i0)               _addi_d(_jit,r0,r1,i0)
+static void _addi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#define subr_f(r0,r1,r2)               FSUB_S(r1,r2,r0)
+#define subi_f(r0,r1,i0)               _subi_f(_jit,r0,r1,i0)
+static void _subi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#define subr_d(r0,r1,r2)               FSUB_D(r1,r2,r0)
+#define subi_d(r0,r1,i0)               _subi_d(_jit,r0,r1,i0)
+static void _subi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#define rsbr_f(r0,r1,r2)               subr_f(r0,r2,r1)
+#define rsbi_f(r0,r1,i0)               _rsbi_f(_jit,r0,r1,i0)
+static void _rsbi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#define rsbr_d(r0,r1,r2)               subr_d(r0,r2,r1)
+#define rsbi_d(r0,r1,i0)               _rsbi_d(_jit,r0,r1,i0)
+static void _rsbi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#define mulr_f(r0,r1,r2)               FMPY_S(r1,r2,r0)
+#define muli_f(r0,r1,i0)               _muli_f(_jit,r0,r1,i0)
+static void _muli_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#define mulr_d(r0,r1,r2)               FMPY_D(r1,r2,r0)
+#define muli_d(r0,r1,i0)               _muli_d(_jit,r0,r1,i0)
+static void _muli_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#define divr_f(r0,r1,r2)               FDIV_S(r1,r2,r0)
+#define divi_f(r0,r1,i0)               _divi_f(_jit,r0,r1,i0)
+static void _divi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#define divr_d(r0,r1,r2)               FDIV_D(r1,r2,r0)
+#define divi_d(r0,r1,i0)               _divi_d(_jit,r0,r1,i0)
+static void _divi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#define cmpr_f(c,r0,r1,r2)             _cmpr_f(_jit,c,r0,r1,r2)
+static void _cmpr_f(jit_state_t*,jit_word_t,
+                   jit_int32_t,jit_int32_t,jit_int32_t);
+#define cmpi_f(c,r0,r1,i0)             _cmpi_f(_jit,c,r0,r1,i0)
+static void _cmpi_f(jit_state_t*,jit_word_t,
+                   jit_int32_t,jit_int32_t,jit_float32_t*);
+#define cmpr_d(c,r0,r1,r2)             _cmpr_d(_jit,c,r0,r1,r2)
+static void _cmpr_d(jit_state_t*,jit_word_t,
+                   jit_int32_t,jit_int32_t,jit_int32_t);
+#define cmpi_d(c,r0,r1,i0)             _cmpi_d(_jit,c,r0,r1,i0)
+static void _cmpi_d(jit_state_t*,jit_word_t,
+                   jit_int32_t,jit_int32_t,jit_float64_t*);
+#define ltr_f(r0,r1,r2)                        cmpr_f(FCMP_LT,r0,r1,r2)
+#define lti_f(r0,r1,i0)                        cmpi_f(FCMP_LT,r0,r1,i0)
+#define ltr_d(r0,r1,r2)                        cmpr_d(FCMP_LT,r0,r1,r2)
+#define lti_d(r0,r1,i0)                        cmpi_d(FCMP_LT,r0,r1,i0)
+#define ler_f(r0,r1,r2)                        cmpr_f(FCMP_LE,r0,r1,r2)
+#define lei_f(r0,r1,i0)                        cmpi_f(FCMP_LE,r0,r1,i0)
+#define ler_d(r0,r1,r2)                        cmpr_d(FCMP_LE,r0,r1,r2)
+#define lei_d(r0,r1,i0)                        cmpi_d(FCMP_LE,r0,r1,i0)
+#define eqr_f(r0,r1,r2)                        cmpr_f(FCMP_EQ,r0,r1,r2)
+#define eqi_f(r0,r1,i0)                        cmpi_f(FCMP_EQ,r0,r1,i0)
+#define eqr_d(r0,r1,r2)                        cmpr_d(FCMP_EQ,r0,r1,r2)
+#define eqi_d(r0,r1,i0)                        cmpi_d(FCMP_EQ,r0,r1,i0)
+#define ger_f(r0,r1,r2)                        cmpr_f(FCMP_GE,r0,r1,r2)
+#define gei_f(r0,r1,i0)                        cmpi_f(FCMP_GE,r0,r1,i0)
+#define ger_d(r0,r1,r2)                        cmpr_d(FCMP_GE,r0,r1,r2)
+#define gei_d(r0,r1,i0)                        cmpi_d(FCMP_GE,r0,r1,i0)
+#define gtr_f(r0,r1,r2)                        cmpr_f(FCMP_GT,r0,r1,r2)
+#define gti_f(r0,r1,i0)                        cmpi_f(FCMP_GT,r0,r1,i0)
+#define gtr_d(r0,r1,r2)                        cmpr_d(FCMP_GT,r0,r1,r2)
+#define gti_d(r0,r1,i0)                        cmpi_d(FCMP_GT,r0,r1,i0)
+#define ner_f(r0,r1,r2)                        cmpr_f(FCMP_NE,r0,r1,r2)
+#define nei_f(r0,r1,i0)                        cmpi_f(FCMP_NE,r0,r1,i0)
+#define ner_d(r0,r1,r2)                        cmpr_d(FCMP_NE,r0,r1,r2)
+#define nei_d(r0,r1,i0)                        cmpi_d(FCMP_NE,r0,r1,i0)
+#define unltr_f(r0,r1,r2)              cmpr_f(FCMP_UNLT,r0,r1,r2)
+#define unlti_f(r0,r1,i0)              cmpi_f(FCMP_UNLT,r0,r1,i0)
+#define unltr_d(r0,r1,r2)              cmpr_d(FCMP_UNLT,r0,r1,r2)
+#define unlti_d(r0,r1,i0)              cmpi_d(FCMP_UNLT,r0,r1,i0)
+#define unler_f(r0,r1,r2)              cmpr_f(FCMP_UNLE,r0,r1,r2)
+#define unlei_f(r0,r1,i0)              cmpi_f(FCMP_UNLE,r0,r1,i0)
+#define unler_d(r0,r1,r2)              cmpr_d(FCMP_UNLE,r0,r1,r2)
+#define unlei_d(r0,r1,i0)              cmpi_d(FCMP_UNLE,r0,r1,i0)
+#define uneqr_f(r0,r1,r2)              cmpr_f(FCMP_UNEQ,r0,r1,r2)
+#define uneqi_f(r0,r1,i0)              cmpi_f(FCMP_UNEQ,r0,r1,i0)
+#define uneqr_d(r0,r1,r2)              cmpr_d(FCMP_UNEQ,r0,r1,r2)
+#define uneqi_d(r0,r1,i0)              cmpi_d(FCMP_UNEQ,r0,r1,i0)
+#define unger_f(r0,r1,r2)              cmpr_f(FCMP_UNGE,r0,r1,r2)
+#define ungei_f(r0,r1,i0)              cmpi_f(FCMP_UNGE,r0,r1,i0)
+#define unger_d(r0,r1,r2)              cmpr_d(FCMP_UNGE,r0,r1,r2)
+#define ungei_d(r0,r1,i0)              cmpi_d(FCMP_UNGE,r0,r1,i0)
+#define ungtr_f(r0,r1,r2)              cmpr_f(FCMP_UNGT,r0,r1,r2)
+#define ungti_f(r0,r1,i0)              cmpi_f(FCMP_UNGT,r0,r1,i0)
+#define ungtr_d(r0,r1,r2)              cmpr_d(FCMP_UNGT,r0,r1,r2)
+#define ungti_d(r0,r1,i0)              cmpi_d(FCMP_UNGT,r0,r1,i0)
+#define ltgtr_f(r0,r1,r2)              cmpr_f(FCMP_LTGT,r0,r1,r2)
+#define ltgti_f(r0,r1,i0)              cmpi_f(FCMP_LTGT,r0,r1,i0)
+#define ltgtr_d(r0,r1,r2)              cmpr_d(FCMP_LTGT,r0,r1,r2)
+#define ltgti_d(r0,r1,i0)              cmpi_d(FCMP_LTGT,r0,r1,i0)
+#define ordr_f(r0,r1,r2)               cmpr_f(FCMP_ORD,r0,r1,r2)
+#define ordi_f(r0,r1,i0)               cmpi_f(FCMP_ORD,r0,r1,i0)
+#define ordr_d(r0,r1,r2)               cmpr_d(FCMP_ORD,r0,r1,r2)
+#define ordi_d(r0,r1,i0)               cmpi_d(FCMP_ORD,r0,r1,i0)
+#define unordr_f(r0,r1,r2)             cmpr_f(FCMP_UNORD,r0,r1,r2)
+#define unordi_f(r0,r1,i0)             cmpi_f(FCMP_UNORD,r0,r1,i0)
+#define unordr_d(r0,r1,r2)             cmpr_d(FCMP_UNORD,r0,r1,r2)
+#define unordi_d(r0,r1,i0)             cmpi_d(FCMP_UNORD,r0,r1,i0)
+#define ldr_f(r0,r1)                   FLDWI(0,r1,r0)
+#define ldi_f(r0,i0)                   _ldi_f(_jit,r0,i0)
+static void _ldi_f(jit_state_t*,jit_int32_t,jit_word_t);
+#if FLDXR
+#  define ldxr_f(r0,r1,r2)             FLDW(r2,r1,r0)
+#  define ldxr_d(r0,r1,r2)             FLDD(r2,r1,r0)
+#else
+#define ldxr_f(r0,r1,r2)               _ldxr_f(_jit,r0,r1,r2)
+static void _ldxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define ldxr_d(r0,r1,r2)               _ldxr_d(_jit,r0,r1,r2)
+static void _ldxr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#endif
+#define ldxi_f(r0,r1,i0)               _ldxi_f(_jit,r0,r1,i0)
+static void _ldxi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define ldr_d(r0,r1)                   FLDDI(0,r1,r0)
+#define ldi_d(r0,i0)                   _ldi_d(_jit,r0,i0)
+static void _ldi_d(jit_state_t*,jit_int32_t,jit_word_t);
+#define ldxi_d(r0,r1,i0)               _ldxi_d(_jit,r0,r1,i0)
+static void _ldxi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define str_f(r0,r1)                   FSTWI(r1,0,r0)
+#define sti_f(i0,r0)                   _sti_f(_jit,i0,r0)
+static void _sti_f(jit_state_t*,jit_word_t,jit_int32_t);
+#if FSTXR
+#  define stxr_f(r0,r1,r2)             FSTW(r2,r1,r0)
+#  define stxr_d(r0,r1,r2)             FSTD(r2,r1,r0)
+#else
+#  define stxr_f(r0,r1,r2)             _stxr_f(_jit,r0,r1,r2)
+static void _stxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define stxr_d(r0,r1,r2)             _stxr_d(_jit,r0,r1,r2)
+static void _stxr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#endif
+#define stxi_f(i0,r0,r1)               _stxi_f(_jit,i0,r0,r1)
+static void _stxi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define str_d(r0,r1)                   FSTDI(r1,0,r0)
+#define sti_d(i0,r0)                   _sti_d(_jit,i0,r0)
+static void _sti_d(jit_state_t*,jit_word_t,jit_int32_t);
+#define stxi_d(i0,r0,r1)               _stxi_d(_jit,i0,r0,r1)
+static void _stxi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bcmpr_f(c,i0,r0,r1)            _bcmpr_f(_jit,c,i0,r0,r1)
+static jit_word_t _bcmpr_f(jit_state_t*,jit_word_t,
+                          jit_word_t,jit_int32_t,jit_int32_t);
+#define bcmpi_f(c,i0,r0,i1)            _bcmpi_f(_jit,c,i0,r0,i1)
+static jit_word_t _bcmpi_f(jit_state_t*,jit_word_t,
+                          jit_word_t,jit_int32_t,jit_float32_t*);
+#define bcmpr_d(c,i0,r0,r1)            _bcmpr_d(_jit,c,i0,r0,r1)
+static jit_word_t _bcmpr_d(jit_state_t*,jit_word_t,
+                          jit_word_t,jit_int32_t,jit_int32_t);
+#define bcmpi_d(c,i0,r0,i1)            _bcmpi_d(_jit,c,i0,r0,i1)
+static jit_word_t _bcmpi_d(jit_state_t*,jit_word_t,
+                          jit_word_t,jit_int32_t,jit_float64_t*);
+#define bltr_f(i0,r0,r1)               bcmpr_f(FCMP_LT,i0,r0,r1)
+#define blti_f(i0,r0,i1)               bcmpi_f(FCMP_LT,i0,r0,i1)
+#define bltr_d(i0,r0,r1)               bcmpr_d(FCMP_LT,i0,r0,r1)
+#define blti_d(i0,r0,i1)               bcmpi_d(FCMP_LT,i0,r0,i1)
+#define bler_f(i0,r0,r1)               bcmpr_f(FCMP_LE,i0,r0,r1)
+#define blei_f(i0,r0,i1)               bcmpi_f(FCMP_LE,i0,r0,i1)
+#define bler_d(i0,r0,r1)               bcmpr_d(FCMP_LE,i0,r0,r1)
+#define blei_d(i0,r0,i1)               bcmpi_d(FCMP_LE,i0,r0,i1)
+#define beqr_f(i0,r0,r1)               bcmpr_f(FCMP_EQ,i0,r0,r1)
+#define beqi_f(i0,r0,i1)               bcmpi_f(FCMP_EQ,i0,r0,i1)
+#define beqr_d(i0,r0,r1)               bcmpr_d(FCMP_EQ,i0,r0,r1)
+#define beqi_d(i0,r0,i1)               bcmpi_d(FCMP_EQ,i0,r0,i1)
+#define bger_f(i0,r0,r1)               bcmpr_f(FCMP_GE,i0,r0,r1)
+#define bgei_f(i0,r0,i1)               bcmpi_f(FCMP_GE,i0,r0,i1)
+#define bger_d(i0,r0,r1)               bcmpr_d(FCMP_GE,i0,r0,r1)
+#define bgei_d(i0,r0,i1)               bcmpi_d(FCMP_GE,i0,r0,i1)
+#define bgtr_f(i0,r0,r1)               bcmpr_f(FCMP_GT,i0,r0,r1)
+#define bgti_f(i0,r0,i1)               bcmpi_f(FCMP_GT,i0,r0,i1)
+#define bgtr_d(i0,r0,r1)               bcmpr_d(FCMP_GT,i0,r0,r1)
+#define bgti_d(i0,r0,i1)               bcmpi_d(FCMP_GT,i0,r0,i1)
+#define bner_f(i0,r0,r1)               bcmpr_f(FCMP_NE,i0,r0,r1)
+#define bnei_f(i0,r0,i1)               bcmpi_f(FCMP_NE,i0,r0,i1)
+#define bner_d(i0,r0,r1)               bcmpr_d(FCMP_NE,i0,r0,r1)
+#define bnei_d(i0,r0,i1)               bcmpi_d(FCMP_NE,i0,r0,i1)
+#define bunltr_f(i0,r0,r1)             bcmpr_f(FCMP_UNLT,i0,r0,r1)
+#define bunlti_f(i0,r0,i1)             bcmpi_f(FCMP_UNLT,i0,r0,i1)
+#define bunltr_d(i0,r0,r1)             bcmpr_d(FCMP_UNLT,i0,r0,r1)
+#define bunlti_d(i0,r0,i1)             bcmpi_d(FCMP_UNLT,i0,r0,i1)
+#define bunler_f(i0,r0,r1)             bcmpr_f(FCMP_UNLE,i0,r0,r1)
+#define bunlei_f(i0,r0,i1)             bcmpi_f(FCMP_UNLE,i0,r0,i1)
+#define bunler_d(i0,r0,r1)             bcmpr_d(FCMP_UNLE,i0,r0,r1)
+#define bunlei_d(i0,r0,i1)             bcmpi_d(FCMP_UNLE,i0,r0,i1)
+#define buneqr_f(i0,r0,r1)             bcmpr_f(FCMP_UNEQ,i0,r0,r1)
+#define buneqi_f(i0,r0,i1)             bcmpi_f(FCMP_UNEQ,i0,r0,i1)
+#define buneqr_d(i0,r0,r1)             bcmpr_d(FCMP_UNEQ,i0,r0,r1)
+#define buneqi_d(i0,r0,i1)             bcmpi_d(FCMP_UNEQ,i0,r0,i1)
+#define bunger_f(i0,r0,r1)             bcmpr_f(FCMP_UNGE,i0,r0,r1)
+#define bungei_f(i0,r0,i1)             bcmpi_f(FCMP_UNGE,i0,r0,i1)
+#define bunger_d(i0,r0,r1)             bcmpr_d(FCMP_UNGE,i0,r0,r1)
+#define bungei_d(i0,r0,i1)             bcmpi_d(FCMP_UNGE,i0,r0,i1)
+#define bungtr_f(i0,r0,r1)             bcmpr_f(FCMP_UNGT,i0,r0,r1)
+#define bungti_f(i0,r0,i1)             bcmpi_f(FCMP_UNGT,i0,r0,i1)
+#define bungtr_d(i0,r0,r1)             bcmpr_d(FCMP_UNGT,i0,r0,r1)
+#define bungti_d(i0,r0,i1)             bcmpi_d(FCMP_UNGT,i0,r0,i1)
+#define bltgtr_f(i0,r0,r1)             bcmpr_f(FCMP_LTGT,i0,r0,r1)
+#define bltgti_f(i0,r0,i1)             bcmpi_f(FCMP_LTGT,i0,r0,i1)
+#define bltgtr_d(i0,r0,r1)             bcmpr_d(FCMP_LTGT,i0,r0,r1)
+#define bltgti_d(i0,r0,i1)             bcmpi_d(FCMP_LTGT,i0,r0,i1)
+#define bordr_f(i0,r0,r1)              bcmpr_f(FCMP_ORD,i0,r0,r1)
+#define bordi_f(i0,r0,i1)              bcmpi_f(FCMP_ORD,i0,r0,i1)
+#define bordr_d(i0,r0,r1)              bcmpr_d(FCMP_ORD,i0,r0,r1)
+#define bordi_d(i0,r0,i1)              bcmpi_d(FCMP_ORD,i0,r0,i1)
+#define bunordr_f(i0,r0,r1)            bcmpr_f(FCMP_UNORD,i0,r0,r1)
+#define bunordi_f(i0,r0,i1)            bcmpi_f(FCMP_UNORD,i0,r0,i1)
+#define bunordr_d(i0,r0,r1)            bcmpr_d(FCMP_UNORD,i0,r0,r1)
+#define bunordi_d(i0,r0,i1)            bcmpi_d(FCMP_UNORD,i0,r0,i1)
+#define vaarg_d(r0, r1)                        _vaarg_d(_jit, r0, r1)
+static void _vaarg_d(jit_state_t*, jit_int32_t, jit_int32_t);
+#endif
+
+#if CODE
+static void
+_f39(jit_state_t *_jit, jit_int32_t o,
+     jit_int32_t b, jit_int32_t x, jit_int32_t t)
+{
+    assert(!(o & ~0x3f));
+    assert(!(b & ~0x1f));
+    assert(!(x & ~0x1f));
+    assert(!(t & ~0x1f));
+    ii((o<<26)|(b<<21)|(x<<16)|t);
+}
+
+static void
+_f40(jit_state_t *_jit, jit_int32_t o,
+     jit_int32_t b, jit_int32_t x, jit_int32_t r)
+{
+    assert(!(o & ~0x3f));
+    assert(!(b & ~0x1f));
+    assert(!(x & ~0x1f));
+    assert(!(r & ~0x1f));
+    ii((o<<26)|(b<<21)|(x<<16)|(1<<9)|r);
+}
+
+static void
+_f41(jit_state_t *_jit, jit_int32_t o,
+     jit_int32_t b, jit_int32_t x, jit_int32_t t)
+{
+    assert(!(o & ~0x3f));
+    assert(!(b & ~0x1f));
+    assert(x >= -16 && x < 15);
+    assert(!(t & ~0x1f));
+    ii((o<<26)|(b<<21)|(low_sign_unext(x,5)<<16)|(1<<12)|t);
+}
+
+static void
+_f42(jit_state_t *_jit, jit_int32_t o,
+     jit_int32_t b, jit_int32_t i, jit_int32_t r)
+{
+    assert(!(o & ~0x3f));
+    assert(!(b & ~0x1f));
+    assert(i >= -16 && i < 15);
+    assert(!(r & ~0x1f));
+    ii((o<<26)|(b<<21)|(low_sign_unext(i,5)<<16)|(1<<12)|(1<<9)|r);
+}
+
+static void
+_f45(jit_state_t *_jit, jit_int32_t o,
+     jit_int32_t r, jit_int32_t a, jit_int32_t b, jit_int32_t fmt,
+     jit_int32_t c, jit_int32_t d, jit_int32_t e, jit_int32_t t)
+{
+    assert(!(o   & ~0x3f));
+    assert(!(r   & ~0x1f));
+    assert(!(a   & ~0x1f));
+    assert(!(b   &  ~0x7));
+    assert(!(fmt &  ~0x3));
+    assert(!(c   &  ~0x3));
+    assert(!(d   &  ~0x7));
+    assert(!(e   &  ~0x1));
+    assert(!(t   & ~0x1f));
+    ii((o<<26)|(r<<21)|(a<<16)|(fmt<<13)|(b<<11)|(c<<9)|(d<<6)|(e<<5)|t);
+}
+
+static void
+_f46(jit_state_t *_jit, jit_int32_t o, jit_int32_t r,
+     jit_int32_t a, jit_int32_t s, jit_int32_t df, jit_int32_t sf,
+     jit_int32_t b, jit_int32_t c, jit_int32_t d, jit_int32_t t)
+{
+    assert(!(o  & ~0x3f));
+    assert(!(r  & ~0x1f));
+    assert(!(a  &  ~0x7));
+    assert(!(s  &  ~0x7));
+    assert(!(df &  ~0x3));
+    assert(!(sf &  ~0x3));
+    assert(!(b  &  ~0x3));
+    assert(!(c  &  ~0x7));
+    assert(!(d  &  ~0x1));
+    assert(!(t  & ~0x1f));
+    ii((o<<26)|(r<<21)|(a<<18)|(s<<15)|
+       (df<<13)|(sf<<11)|(b<<9)|(c<<6)|(d<<5)|t);
+}
+
+static void
+_f47_48(jit_state_t *_jit, jit_int32_t o,
+       jit_int32_t r2, jit_int32_t r1, jit_int32_t y, jit_int32_t fmt,
+       jit_int32_t a, jit_int32_t b, jit_int32_t c, jit_int32_t t)
+{
+    assert(!(o   & ~0x3f));
+    assert(!(r2  & ~0x1f));
+    assert(!(r1  & ~0x1f));
+    assert(!(y   &  ~0x7));
+    assert(!(fmt &  ~0x3));
+    assert(!(a   &  ~0x3));
+    assert(!(b   &  ~0x7));
+    assert(!(c   &  ~0x1));
+    assert(!(t   & ~0x1f));
+    ii((o<<26)|(r2<<21)|(r1<<16)|(y<<13)|(fmt<<11)|(a<<9)|(b<<6)|(c<<5)|t);
+}
+
+static void
+_f49_52(jit_state_t *_jit, jit_int32_t o,
+       jit_int32_t r1, jit_int32_t r2, jit_int32_t y,
+       jit_int32_t v, jit_int32_t f, jit_int32_t a, jit_int32_t b,
+       jit_int32_t u, jit_int32_t c, jit_int32_t d, jit_int32_t t)
+{
+    assert(!(o  & ~0x3f));
+    assert(!(r1 & ~0x1f));
+    assert(!(r2 & ~0x3f));
+    assert(!(y  &  ~0x7));
+    assert(!(v  &  ~0x1));
+    assert(!(f  &  ~0x1));
+    assert(!(a  &  ~0x3));
+    assert(!(b  &  ~0x1));
+    assert(!(u  &  ~0x1));
+    assert(!(c  &  ~0x1));
+    assert(!(d  &  ~0x1));
+    assert(!(t  & ~0x1f));
+    ii((o<<26)|(r1<<21)|(r2<<16)|(y<<13)|(v<<12)|
+       (f<<11)|(a<<9)|(b<<8)|(u<<7)|(c<<6)|(d<<5)|t);
+}
+
+static void
+_f53(jit_state_t *_jit, jit_int32_t o, jit_int32_t r1, jit_int32_t r2,
+     jit_int32_t ta, jit_int32_t ra, jit_int32_t f, jit_int32_t tm)
+{
+    assert(!(o  & ~0x3f));
+    assert(!(r1 & ~0x1f));
+    assert(!(r2 & ~0x1f));
+    assert(!(ta & ~0x1f));
+    assert(!(ra & ~0x1f));
+    assert(!(f  &  ~0x1));
+    assert(!(tm & ~0x1f));
+    assert(ra != tm ||
+          (ta == r1 || ta == r2 || ta == tm) ||
+          (f && ra == 1) || (!f && !ra));
+    ii((o<<26)|(r1<<21)|(r2<<16)|(ta<<11)|(ra<<6)|(f<<5)|tm);
+}
+
+static void
+_f54(jit_state_t *_jit, jit_int32_t o, jit_int32_t r1, jit_int32_t r2,
+     jit_int32_t a, jit_int32_t b, jit_int32_t f, jit_int32_t c,
+     jit_int32_t d, jit_int32_t e, jit_int32_t g, jit_int32_t t)
+{
+    assert(!(o  & ~0x3f));
+    assert(!(r1 & ~0x1f));
+    assert(!(r2 & ~0x1f));
+    assert(!(a  &  ~0x7));
+    assert(!(b  &  ~0x1));
+    assert(!(f  &  ~0x1));
+    assert(!(c  &  ~0x7));
+    assert(!(e  &  ~0x1));
+    assert(!(e  &  ~0x1));
+    assert(!(g  &  ~0x1));
+    assert(!(t  & ~0x1f));
+    ii((o<<26)|(r1<<21)|(r2<<16)|(a<<13)|
+       (b<<12)|(f<11)|(c<<8)|(d<<7)|(e<<6)|(g<<5)|t);
+}
+
+static void
+_extr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    stxi(alloca_offset - 8, _FP_REGNO, r1);
+    ldxi_f(r0, _FP_REGNO, alloca_offset - 8);
+    FCNVXF_S_S(r0, r0);
+}
+
+static void
+_extr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    stxi(alloca_offset - 8, _FP_REGNO, r1);
+    ldxi_f(r0, _FP_REGNO, alloca_offset - 8);
+    FCNVXF_S_D(r0, r0);
+}
+
+static void
+_truncr_f_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_fpr);
+    FCNVXT_S_S(r1, rn(reg));
+    stxi_f(alloca_offset - 8, _FP_REGNO, rn(reg));
+    ldxi(r0, _FP_REGNO, alloca_offset - 8);
+    jit_unget_reg(reg);
+}
+
+static void
+_truncr_d_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_fpr);
+    FCNVXT_D_S(r1, rn(reg));
+    stxi_d(alloca_offset - 8, _FP_REGNO, rn(reg));
+    ldxi(r0, _FP_REGNO, alloca_offset - 8);
+    jit_unget_reg(reg);
+}
+
+static void
+_movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t *i0)
+{
+    union {
+       jit_int32_t      i;
+       jit_float32_t    f;
+    } data;
+    jit_int32_t                 reg;
+
+    if (_jitc->no_data) {
+       data.f = *i0;
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), data.i);
+       stxi_i(alloca_offset - 8, _FP_REGNO, rn(reg));
+       jit_unget_reg(reg);
+       ldxi_f(r0, _FP_REGNO, alloca_offset - 8);
+    }
+    else
+       ldi_f(r0, (jit_word_t)i0);
+}
+
+static void
+_movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t *i0)
+{
+    union {
+       jit_int32_t      ii[2];
+       jit_word_t       w;
+       jit_float64_t    d;
+    } data;
+    jit_int32_t                 reg;
+
+    data.d = *i0;
+    if (_jitc->no_data) {
+       data.d = *i0;
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), data.ii[0]);
+       stxi_i(alloca_offset - 8, _FP_REGNO, rn(reg));
+       movi(rn(reg), data.ii[1]);
+       stxi_i(alloca_offset - 4, _FP_REGNO, rn(reg));
+       jit_unget_reg(reg);
+       ldxi_d(r0, _FP_REGNO, alloca_offset - 8);
+    }
+    else
+       ldi_d(r0, (jit_word_t)i0);
+}
+
+#define fpr_opi(name, type, size)                                      \
+static void                                                            \
+_##name##i_##type(jit_state_t *_jit,                                   \
+                 jit_int32_t r0, jit_int32_t r1,                       \
+                 jit_float##size##_t *i0)                              \
+{                                                                      \
+    jit_int32_t                reg = jit_get_reg(jit_class_fpr);               \
+    movi_##type(rn(reg), i0);                                          \
+    name##r_##type(r0, r1, rn(reg));                                   \
+    jit_unget_reg(reg);                                                        \
+}
+#define fopi(name)                     fpr_opi(name, f, 32)
+#define dopi(name)                     fpr_opi(name, d, 64)
+
+fopi(add)
+dopi(add)
+fopi(sub)
+dopi(sub)
+fopi(rsb)
+dopi(rsb)
+fopi(mul)
+dopi(mul)
+fopi(div)
+dopi(div)
+
+static void
+_cmpr_f(jit_state_t *_jit, jit_word_t c,
+       jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    LDI(0, r0);
+    FCMP_S_(r1, r2, c);
+    FTEST();
+    LDI(1, r0);
+}
+
+static void
+_cmpi_f(jit_state_t *_jit, jit_word_t c,
+       jit_int32_t r0, jit_int32_t r1, jit_float32_t *i0)
+{
+    jit_int32_t                reg = jit_get_reg(jit_class_fpr);
+    movi_f(rn(reg), i0);
+    cmpr_f(c, r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_cmpr_d(jit_state_t *_jit, jit_word_t c,
+       jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    LDI(0, r0);
+    FCMP_D_(r1, r2, c);
+    FTEST();
+    LDI(1, r0);
+}
+
+static void
+_cmpi_d(jit_state_t *_jit, jit_word_t c,
+       jit_int32_t r0, jit_int32_t r1, jit_float64_t *i0)
+{
+    jit_int32_t                reg = jit_get_reg(jit_class_fpr);
+    movi_d(rn(reg), i0);
+    cmpr_d(c, r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldi_f(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    assert(!(i0 & 3));
+    if (i0 >= -8192 && i0 <= 8191 && !(re_assemble_16(i0) & 6))
+       FLDWL(i0, _R0_REGNO, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldr_f(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+#if !FLDXR
+static void
+_ldxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r1, r2);
+    ldr_f(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r1, r2);
+    ldr_d(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+#endif
+
+static void
+_ldxi_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0 >= -16 && i0 <= 15)
+       FLDWI(i0, r1, r0);
+    /* |im11a|0|t|i| */
+    else if (FLDXR && i0 >= -8192 && i0 <= 8191 && !(re_assemble_16(i0) & 6))
+       FLDWL(i0, r1, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldxr_f(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldi_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    assert(!(i0 & 7));
+    if (i0 >= -8192 && i0 <= 8191 && !(re_assemble_16(i0) & 14))
+       FLDDL(i0, _R0_REGNO, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldr_d(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0 >= -16 && i0 <= 15)
+       FLDDI(i0, r1, r0);
+    /* |im10a|m|a|1|i| */
+    else if (FLDXR && i0 >= -8192 && i0 <= 8191 && !(re_assemble_16(i0) & 14))
+       FLDDL(i0, r1, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldxr_d(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    assert(!(i0 & 3));
+    if (i0 >= -8192 && i0 <= 8191 && !(re_assemble_16(i0) & 6))
+       FSTWL(r0, i0, _R0_REGNO);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       str_f(rn(reg), r0);
+       jit_unget_reg(reg);
+    }
+}
+
+#if !FSTXR
+static void
+_stxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r0, r1);
+    str_f(rn(reg), r2);
+    jit_unget_reg(reg);
+}
+
+static void
+_stxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r0, r1);
+    str_d(rn(reg), r2);
+    jit_unget_reg(reg);
+}
+#endif
+
+static void
+_stxi_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (i0 >= -16 && i0 <= 15)
+       FSTWI(r1, i0, r0);
+    /* |im11a|0|t|i| */
+    else if (FSTXR && i0 >= -8192 && i0 <= 8191 && !(re_assemble_16(i0) & 6))
+       FSTWL(r1, i0, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+#if FSTXR
+       movi(rn(reg), i0);
+       stxr_f(rn(reg), r0, r1);
+#else
+       addi(rn(reg), r0, i0);
+       str_f(rn(reg), r1);
+#endif
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_sti_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    assert(!(i0 & 7));
+    if (i0 >= -8192 && i0 <= 8191 && !(re_assemble_16(i0) & 14))
+       FSTDL(r0, i0, _R0_REGNO);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       str_d(rn(reg), r0);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (i0 >= -16 && i0 <= 15)
+       FSTDI(r1, i0, r0);
+    /* |im10a|m|a|1|i| */
+    else if (FSTXR && i0 >= -8192 && i0 <= 8191 && !(re_assemble_16(i0) & 14))
+       FSTDL(r1, i0, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+#if FSTXR
+       movi(rn(reg), i0);
+       stxr_d(rn(reg), r0, r1);
+#else
+       addi(rn(reg), r0, i0);
+       str_d(rn(reg), r1);
+#endif
+       jit_unget_reg(reg);
+    }
+}
+
+static jit_word_t
+_bcmpr_f(jit_state_t *_jit, jit_word_t c,
+        jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    FCMP_S_(r0, r1, c);
+    FTEST();
+    w = _jit->pc.w;
+    B_N(((i0 - w) >> 2) - 2, _R0_REGNO);
+    NOP();
+    return (w);
+}
+
+static jit_word_t
+_bcmpi_f(jit_state_t *_jit, jit_word_t c,
+        jit_word_t i0, jit_int32_t r0, jit_float32_t *i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_fpr|jit_class_nospill);
+    movi_f(rn(reg), i1);
+    FCMP_S_(r0, rn(reg), c);
+    FTEST();
+    w = _jit->pc.w;
+    B_N(((i0 - w) >> 2) - 2, _R0_REGNO);
+    NOP();
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static jit_word_t
+_bcmpr_d(jit_state_t *_jit, jit_word_t c,
+        jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    FCMP_D_(r0, r1, c);
+    FTEST();
+    w = _jit->pc.w;
+    B_N(((i0 - w) >> 2) - 2, _R0_REGNO);
+    NOP();
+    return (w);
+}
+
+static jit_word_t
+_bcmpi_d(jit_state_t *_jit, jit_word_t c,
+        jit_word_t i0, jit_int32_t r0, jit_float64_t *i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_fpr|jit_class_nospill);
+    movi_d(rn(reg), i1);
+    FCMP_D_(r0, rn(reg), c);
+    FTEST();
+    w = _jit->pc.w;
+    B_N(((i0 - w) >> 2) - 2, _R0_REGNO);
+    NOP();
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static void
+_vaarg_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+
+    assert(_jitc->function->self.call & jit_call_varargs);
+
+    /* Align pointer if required. */
+    reg = jit_get_reg(jit_class_gpr);
+    andi(rn(reg), r1, 7);
+    subr(r1, r1, rn(reg));
+    jit_unget_reg(reg);
+
+    /* Adjust vararg stack pointer. */
+    subi(r1, r1, 8);
+
+    /* Load argument. */
+    ldr_d(r0, r1);
+}
+#endif
diff --git a/deps/lightning/lib/jit_hppa-sz.c b/deps/lightning/lib/jit_hppa-sz.c
new file mode 100644 (file)
index 0000000..3c04f63
--- /dev/null
@@ -0,0 +1,402 @@
+
+#if __WORDSIZE == 32
+#define JIT_INSTR_MAX 64
+    0, /* data */
+    0, /* live */
+    0, /* align */
+    0, /* save */
+    0, /* load */
+    0, /* #name */
+    0, /* #note */
+    0, /* label */
+    64,        /* prolog */
+    0, /* ellipsis */
+    0, /* va_push */
+    0, /* allocai */
+    0, /* allocar */
+    0, /* arg */
+    0, /* getarg_c */
+    0, /* getarg_uc */
+    0, /* getarg_s */
+    0, /* getarg_us */
+    0, /* getarg_i */
+    0, /* getarg_ui */
+    0, /* getarg_l */
+    0, /* putargr */
+    0, /* putargi */
+    0, /* va_start */
+    0, /* va_arg */
+    0, /* va_arg_d */
+    0, /* va_end */
+    4, /* addr */
+    12,        /* addi */
+    4, /* addcr */
+    12,        /* addci */
+    4, /* addxr */
+    8, /* addxi */
+    4, /* subr */
+    12,        /* subi */
+    4, /* subcr */
+    12,        /* subci */
+    4, /* subxr */
+    8, /* subxi */
+    16,        /* rsbi */
+    28,        /* mulr */
+    36,        /* muli */
+    40,        /* qmulr */
+    44,        /* qmuli */
+    32,        /* qmulr_u */
+    40,        /* qmuli_u */
+    36,        /* divr */
+    40,        /* divi */
+    36,        /* divr_u */
+    40,        /* divi_u */
+    40,        /* qdivr */
+    40,        /* qdivi */
+    40,        /* qdivr_u */
+    40,        /* qdivi_u */
+    36,        /* remr */
+    40,        /* remi */
+    36,        /* remr_u */
+    40,        /* remi_u */
+    4, /* andr */
+    12,        /* andi */
+    4, /* orr */
+    12,        /* ori */
+    4, /* xorr */
+    12,        /* xori */
+    12,        /* lshr */
+    4, /* lshi */
+    12,        /* rshr */
+    4, /* rshi */
+    12,        /* rshr_u */
+    4, /* rshi_u */
+    4, /* negr */
+    4, /* comr */
+    8, /* ltr */
+    8, /* lti */
+    8, /* ltr_u */
+    8, /* lti_u */
+    8, /* ler */
+    8, /* lei */
+    8, /* ler_u */
+    8, /* lei_u */
+    8, /* eqr */
+    12,        /* eqi */
+    8, /* ger */
+    8, /* gei */
+    8, /* ger_u */
+    8, /* gei_u */
+    8, /* gtr */
+    8, /* gti */
+    8, /* gtr_u */
+    8, /* gti_u */
+    8, /* ner */
+    8, /* nei */
+    4, /* movr */
+    8, /* movi */
+    4, /* extr_c */
+    4, /* extr_uc */
+    4, /* extr_s */
+    4, /* extr_us */
+    0, /* extr_i */
+    0, /* extr_ui */
+    4, /* htonr_us */
+    4, /* htonr_ui */
+    0, /* htonr_l */
+    8, /* ldr_c */
+    12,        /* ldi_c */
+    4, /* ldr_uc */
+    8, /* ldi_uc */
+    8, /* ldr_s */
+    12,        /* ldi_s */
+    4, /* ldr_us */
+    8, /* ldi_us */
+    4, /* ldr_i */
+    8, /* ldi_i */
+    0, /* ldr_ui */
+    0, /* ldi_ui */
+    0, /* ldr_l */
+    0, /* ldi_l */
+    8, /* ldxr_c */
+    8, /* ldxi_c */
+    4, /* ldxr_uc */
+    4, /* ldxi_uc */
+    8, /* ldxr_s */
+    8, /* ldxi_s */
+    4, /* ldxr_us */
+    4, /* ldxi_us */
+    4, /* ldxr_i */
+    4, /* ldxi_i */
+    0, /* ldxr_ui */
+    0, /* ldxi_ui */
+    0, /* ldxr_l */
+    0, /* ldxi_l */
+    4, /* str_c */
+    8, /* sti_c */
+    4, /* str_s */
+    8, /* sti_s */
+    4, /* str_i */
+    8, /* sti_i */
+    0, /* str_l */
+    0, /* sti_l */
+    8, /* stxr_c */
+    4, /* stxi_c */
+    8, /* stxr_s */
+    4, /* stxi_s */
+    8, /* stxr_i */
+    4, /* stxi_i */
+    0, /* stxr_l */
+    0, /* stxi_l */
+    8, /* bltr */
+    8, /* blti */
+    8, /* bltr_u */
+    8, /* blti_u */
+    8, /* bler */
+    12,        /* blei */
+    8, /* bler_u */
+    8, /* blei_u */
+    8, /* beqr */
+    16,        /* beqi */
+    8, /* bger */
+    8, /* bgei */
+    8, /* bger_u */
+    8, /* bgei_u */
+    8, /* bgtr */
+    8, /* bgti */
+    8, /* bgtr_u */
+    8, /* bgti_u */
+    8, /* bner */
+    16,        /* bnei */
+    12,        /* bmsr */
+    16,        /* bmsi */
+    12,        /* bmcr */
+    16,        /* bmci */
+    8, /* boaddr */
+    8, /* boaddi */
+    8, /* boaddr_u */
+    8, /* boaddi_u */
+    8, /* bxaddr */
+    8, /* bxaddi */
+    8, /* bxaddr_u */
+    8, /* bxaddi_u */
+    12,        /* bosubr */
+    16,        /* bosubi */
+    16,        /* bosubr_u */
+    20,        /* bosubi_u */
+    12,        /* bxsubr */
+    16,        /* bxsubi */
+    16,        /* bxsubr_u */
+    20,        /* bxsubi_u */
+    0, /* jmpr */
+    12,        /* jmpi */
+    40,        /* callr */
+    44,        /* calli */
+    0, /* prepare */
+    0, /* pushargr */
+    0, /* pushargi */
+    0, /* finishr */
+    0, /* finishi */
+    0, /* ret */
+    0, /* retr */
+    0, /* reti */
+    0, /* retval_c */
+    0, /* retval_uc */
+    0, /* retval_s */
+    0, /* retval_us */
+    0, /* retval_i */
+    0, /* retval_ui */
+    0, /* retval_l */
+    64,        /* epilog */
+    0, /* arg_f */
+    0, /* getarg_f */
+    0, /* putargr_f */
+    0, /* putargi_f */
+    4, /* addr_f */
+    16,        /* addi_f */
+    4, /* subr_f */
+    16,        /* subi_f */
+    16,        /* rsbi_f */
+    4, /* mulr_f */
+    16,        /* muli_f */
+    4, /* divr_f */
+    16,        /* divi_f */
+    4, /* negr_f */
+    4, /* absr_f */
+    4, /* sqrtr_f */
+    16,        /* ltr_f */
+    28,        /* lti_f */
+    16,        /* ler_f */
+    28,        /* lei_f */
+    16,        /* eqr_f */
+    28,        /* eqi_f */
+    16,        /* ger_f */
+    28,        /* gei_f */
+    16,        /* gtr_f */
+    28,        /* gti_f */
+    16,        /* ner_f */
+    28,        /* nei_f */
+    16,        /* unltr_f */
+    28,        /* unlti_f */
+    16,        /* unler_f */
+    28,        /* unlei_f */
+    16,        /* uneqr_f */
+    28,        /* uneqi_f */
+    16,        /* unger_f */
+    28,        /* ungei_f */
+    16,        /* ungtr_f */
+    28,        /* ungti_f */
+    16,        /* ltgtr_f */
+    28,        /* ltgti_f */
+    16,        /* ordr_f */
+    28,        /* ordi_f */
+    16,        /* unordr_f */
+    28,        /* unordi_f */
+    12,        /* truncr_f_i */
+    0, /* truncr_f_l */
+    12,        /* extr_f */
+    4, /* extr_d_f */
+    4, /* movr_f */
+    12,        /* movi_f */
+    4, /* ldr_f */
+    12,        /* ldi_f */
+    4, /* ldxr_f */
+    4, /* ldxi_f */
+    4, /* str_f */
+    12,        /* sti_f */
+    8, /* stxr_f */
+    4, /* stxi_f */
+    16,        /* bltr_f */
+    28,        /* blti_f */
+    16,        /* bler_f */
+    28,        /* blei_f */
+    16,        /* beqr_f */
+    28,        /* beqi_f */
+    16,        /* bger_f */
+    28,        /* bgei_f */
+    16,        /* bgtr_f */
+    28,        /* bgti_f */
+    16,        /* bner_f */
+    28,        /* bnei_f */
+    16,        /* bunltr_f */
+    28,        /* bunlti_f */
+    16,        /* bunler_f */
+    28,        /* bunlei_f */
+    16,        /* buneqr_f */
+    28,        /* buneqi_f */
+    16,        /* bunger_f */
+    28,        /* bungei_f */
+    16,        /* bungtr_f */
+    28,        /* bungti_f */
+    16,        /* bltgtr_f */
+    28,        /* bltgti_f */
+    16,        /* bordr_f */
+    28,        /* bordi_f */
+    16,        /* bunordr_f */
+    28,        /* bunordi_f */
+    0, /* pushargr_f */
+    0, /* pushargi_f */
+    0, /* retr_f */
+    0, /* reti_f */
+    0, /* retval_f */
+    0, /* arg_d */
+    0, /* getarg_d */
+    0, /* putargr_d */
+    0, /* putargi_d */
+    4, /* addr_d */
+    24,        /* addi_d */
+    4, /* subr_d */
+    24,        /* subi_d */
+    24,        /* rsbi_d */
+    4, /* mulr_d */
+    24,        /* muli_d */
+    4, /* divr_d */
+    24,        /* divi_d */
+    4, /* negr_d */
+    4, /* absr_d */
+    4, /* sqrtr_d */
+    16,        /* ltr_d */
+    36,        /* lti_d */
+    16,        /* ler_d */
+    36,        /* lei_d */
+    16,        /* eqr_d */
+    36,        /* eqi_d */
+    16,        /* ger_d */
+    36,        /* gei_d */
+    16,        /* gtr_d */
+    36,        /* gti_d */
+    16,        /* ner_d */
+    36,        /* nei_d */
+    16,        /* unltr_d */
+    36,        /* unlti_d */
+    16,        /* unler_d */
+    36,        /* unlei_d */
+    16,        /* uneqr_d */
+    36,        /* uneqi_d */
+    16,        /* unger_d */
+    36,        /* ungei_d */
+    16,        /* ungtr_d */
+    36,        /* ungti_d */
+    16,        /* ltgtr_d */
+    36,        /* ltgti_d */
+    16,        /* ordr_d */
+    36,        /* ordi_d */
+    16,        /* unordr_d */
+    36,        /* unordi_d */
+    12,        /* truncr_d_i */
+    0, /* truncr_d_l */
+    12,        /* extr_d */
+    4, /* extr_f_d */
+    4, /* movr_d */
+    20,        /* movi_d */
+    4, /* ldr_d */
+    12,        /* ldi_d */
+    4, /* ldxr_d */
+    4, /* ldxi_d */
+    4, /* str_d */
+    12,        /* sti_d */
+    8, /* stxr_d */
+    4, /* stxi_d */
+    16,        /* bltr_d */
+    36,        /* blti_d */
+    16,        /* bler_d */
+    36,        /* blei_d */
+    16,        /* beqr_d */
+    36,        /* beqi_d */
+    16,        /* bger_d */
+    36,        /* bgei_d */
+    16,        /* bgtr_d */
+    36,        /* bgti_d */
+    16,        /* bner_d */
+    36,        /* bnei_d */
+    16,        /* bunltr_d */
+    36,        /* bunlti_d */
+    16,        /* bunler_d */
+    36,        /* bunlei_d */
+    16,        /* buneqr_d */
+    36,        /* buneqi_d */
+    16,        /* bunger_d */
+    36,        /* bungei_d */
+    16,        /* bungtr_d */
+    36,        /* bungti_d */
+    16,        /* bltgtr_d */
+    36,        /* bltgti_d */
+    16,        /* bordr_d */
+    36,        /* bordi_d */
+    16,        /* bunordr_d */
+    36,        /* bunordi_d */
+    0, /* pushargr_d */
+    0, /* pushargi_d */
+    0, /* retr_d */
+    0, /* reti_d */
+    0, /* retval_d */
+    0, /* movr_w_f */
+    0, /* movr_ww_d */
+    0, /* movr_w_d */
+    0, /* movr_f_w */
+    0, /* movi_f_w */
+    0, /* movr_d_ww */
+    0, /* movi_d_ww */
+    0, /* movr_d_w */
+    0, /* movi_d_w */
+#endif /* __WORDSIZE */
diff --git a/deps/lightning/lib/jit_hppa.c b/deps/lightning/lib/jit_hppa.c
new file mode 100644 (file)
index 0000000..21fe20c
--- /dev/null
@@ -0,0 +1,1585 @@
+/*
+ * Copyright (C) 2013-2019  Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+#include <lightning.h>
+#include <lightning/jit_private.h>
+
+#define jit_arg_reg_p(i)               (i >= 0 && i < 4)
+
+#define PROTO                          1
+#  include "jit_hppa-cpu.c"
+#  include "jit_hppa-fpu.c"
+#undef PROTO
+
+/*
+ * Types
+ */
+typedef jit_pointer_t  jit_va_list;
+
+/*
+ * Prototypes
+ */
+#define patch(instr, node)             _patch(_jit, instr, node)
+static void _patch(jit_state_t*,jit_word_t,jit_node_t*);
+
+/* libgcc */
+extern void __clear_cache(void *, void *);
+
+/*
+ * Initialization
+ */
+jit_register_t         _rvs[] = {
+    { 0,                       "r0" },         /* Zero */
+    /* Not register starved, so, avoid allocating r1 and rp
+     * due to being implicit target of ADDIL and B,L */
+    { 1,                       "r1" },         /* Scratch */
+    { 2,                       "rp" },         /* Return Pointer and scratch */
+    { rc(sav) | 3,             "r3" },
+    { 19,                      "r19" },        /* Linkage Table */
+    { rc(gpr) | 20,            "r20" },
+    { rc(gpr) | 21,            "r21" },
+    { rc(gpr) | 22,            "r22" },
+    { rc(gpr) | 29,            "ret1" },
+    { rc(gpr) | 28,            "ret0" },
+    /* JIT_Rx in callee save registers due to need to call
+     * functions to implement some instructions */
+    /* JIT_R0- JIT_R2 */
+    { rc(gpr) | rc(sav) | 4,   "r4" },
+    { rc(gpr) | rc(sav) | 5,   "r5" },
+    { rc(gpr) | rc(sav) | 6,   "r6" },
+    /* JIT_V0- JIT_V2 */
+    { rc(gpr) | rc(sav) | 7,   "r7" },
+    { rc(sav) | rc(sav) | 8,   "r8" },
+    { rc(gpr) | rc(sav) | 9,   "r9" },
+    /* JIT_R3 */
+    { rc(gpr) | rc(sav) | 10,  "r10" },
+    /* JIT_V3+ */
+    { rc(gpr) | rc(sav) | 11,  "r11" },
+    { rc(gpr) | rc(sav) | 12,  "r12" },
+    { rc(gpr) | rc(sav) | 13,  "r13" },
+    { rc(gpr) | rc(sav) | 14,  "r14" },
+    { rc(gpr) | rc(sav) | 15,  "r15" },
+    { rc(gpr) | rc(sav) | 16,  "r16" },
+    { rc(gpr) | rc(sav) | 17,  "r17" },
+    { rc(gpr) | rc(sav) | 18,  "r18" },
+    /* Arguments */
+    { rc(gpr) | rc(arg) | 23,  "r23" },
+    { rc(gpr) | rc(arg) | 24,  "r24" },
+    { rc(gpr) | rc(arg) | 25,  "r25" },
+    { rc(gpr) | rc(arg) | 26,  "r26" },
+    { 27,                      "dp" },         /* Data Pointer */
+    { 30,                      "sp" },
+    { 31,                      "r31" },        /* Link Register */
+    { rc(fpr) | 31,            "fr31" },
+    { rc(fpr) | 30,            "fr30" },
+    { rc(fpr) | 29,            "fr29" },
+    { rc(fpr) | 28,            "fr28" },
+    { rc(fpr) | 27,            "fr27" },
+    { rc(fpr) | 26,            "fr26" },
+    { rc(fpr) | 25,            "fr25" },
+    { rc(fpr) | 24,            "fr24" },
+    { rc(fpr) | 23,            "fr23" },
+    { rc(fpr) | 22,            "fr22" },
+    { rc(fpr) | 11,            "fr11" },
+    { rc(fpr) | 10,            "fr10" },
+    { rc(fpr) | 9,             "fr9" },
+    { rc(fpr) | 8,             "fr8" },
+    /* Arguments */
+    { rc(fpr) | rc(arg) | 7,   "fr7" },
+    { rc(fpr) | rc(arg) | 6,   "fr6" },
+    { rc(fpr) | rc(arg) | 5,   "fr5" },
+    { rc(fpr) | rc(arg) | 4,   "fr4" },
+    /* Callee Saves */
+    { rc(fpr) | rc(sav) | 21,  "fr21" },
+    { rc(fpr) | rc(sav) | 20,  "fr20" },
+    { rc(fpr) | rc(sav) | 19,  "fr19" },
+    { rc(fpr) | rc(sav) | 18,  "fr18" },
+    { rc(fpr) | rc(sav) | 17,  "fr17" },
+    { rc(fpr) | rc(sav) | 16,  "fr16" },
+    { rc(fpr) | rc(sav) | 15,  "fr15" },
+    { rc(fpr) | rc(sav) | 14,  "fr14" },
+    { rc(fpr) | rc(sav) | 13,  "fr13" },
+    { rc(fpr) | rc(sav) | 12,  "fr12" },
+    { 0,                       "fpsr" },
+    { 1,                       "fpe2" },
+    { 2,                       "fpe4" },
+    { 3,                       "fpe6" },
+    { _NOREG,                  "<none>" },
+};
+
+/*
+ * Implementation
+ */
+void
+jit_get_cpu(void)
+{
+    /* FIXME Expecting PARISC 2.0, for PARISC 1.0 should not use fr16-fr31 */
+}
+
+void
+_jit_init(jit_state_t *_jit)
+{
+    _jitc->reglen = jit_size(_rvs) - 1;
+}
+
+void
+_jit_prolog(jit_state_t *_jit)
+{
+    jit_int32_t                offset;
+
+    if (_jitc->function)
+       jit_epilog();
+    assert(jit_regset_cmp_ui(&_jitc->regarg, 0) == 0);
+    jit_regset_set_ui(&_jitc->regsav, 0);
+    offset = _jitc->functions.offset;
+    if (offset >= _jitc->functions.length) {
+       jit_realloc((jit_pointer_t *)&_jitc->functions.ptr,
+                   _jitc->functions.length * sizeof(jit_function_t),
+                   (_jitc->functions.length + 16) * sizeof(jit_function_t));
+       _jitc->functions.length += 16;
+    }
+    _jitc->function = _jitc->functions.ptr + _jitc->functions.offset++;
+    _jitc->function->self.size = params_offset;
+    _jitc->function->self.argi = _jitc->function->self.alen = 0;
+    /* float conversion */
+    _jitc->function->self.aoff = alloca_offset;
+    _jitc->function->self.call = jit_call_default;
+    jit_alloc((jit_pointer_t *)&_jitc->function->regoff,
+             _jitc->reglen * sizeof(jit_int32_t));
+
+    /* _no_link here does not mean the jit_link() call can be removed
+     * by rewriting as:
+     * _jitc->function->prolog = jit_new_node(jit_code_prolog);
+     */
+    _jitc->function->prolog = jit_new_node_no_link(jit_code_prolog);
+    jit_link(_jitc->function->prolog);
+    _jitc->function->prolog->w.w = offset;
+    _jitc->function->epilog = jit_new_node_no_link(jit_code_epilog);
+    /* u:      label value
+     * v:      offset in blocks vector
+     * w:      offset in functions vector
+     */
+    _jitc->function->epilog->w.w = offset;
+
+    jit_regset_new(&_jitc->function->regset);
+}
+
+jit_int32_t
+_jit_allocai(jit_state_t *_jit, jit_int32_t length)
+{
+    jit_int32_t                offset;
+    assert(_jitc->function);
+    switch (length) {
+       case 0: case 1:
+           break;
+       case 2:
+           _jitc->function->self.aoff = (_jitc->function->self.aoff + 1) & -2;
+           break;
+       case 3: case 4:
+           _jitc->function->self.aoff = (_jitc->function->self.aoff + 3) & -4;
+           break;
+       default:
+           _jitc->function->self.aoff = (_jitc->function->self.aoff + 7) & -8;
+           break;
+    }
+    if (!_jitc->realize) {
+       jit_inc_synth_ww(allocai, _jitc->function->self.aoff, length);
+       jit_dec_synth();
+    }
+    offset = _jitc->function->self.aoff;
+    _jitc->function->self.aoff += length;
+    return (offset);
+}
+
+void
+_jit_allocar(jit_state_t *_jit, jit_int32_t u, jit_int32_t v)
+{
+    jit_int32_t                 reg;
+    assert(_jitc->function);
+    jit_inc_synth_ww(allocar, u, v);
+    if (!_jitc->function->allocar) {
+       _jitc->function->aoffoff = jit_allocai(sizeof(jit_int32_t));
+       _jitc->function->allocar = 1;
+    }
+    reg = jit_get_reg(jit_class_gpr);
+    jit_addi(reg, v, 63);
+    jit_andi(reg, reg, -64);
+    jit_ldxi_i(u, JIT_FP, _jitc->function->aoffoff);
+    jit_addr(JIT_SP, JIT_SP, reg);
+    jit_stxi_i(_jitc->function->aoffoff, JIT_FP, u);
+    jit_unget_reg(reg);
+    jit_dec_synth();
+}
+
+void
+_jit_ret(jit_state_t *_jit)
+{
+    jit_node_t         *instr;
+    assert(_jitc->function);
+    jit_inc_synth(ret);
+    /* jump to epilog */
+    instr = jit_jmpi();
+    jit_patch_at(instr, _jitc->function->epilog);
+    jit_dec_synth();
+}
+
+void
+_jit_retr(jit_state_t *_jit, jit_int32_t u)
+{
+    jit_inc_synth_w(retr, u);
+    jit_movr(JIT_RET, u);
+    jit_ret();
+    jit_dec_synth();
+}
+
+void
+_jit_reti(jit_state_t *_jit, jit_word_t u)
+{
+    jit_inc_synth_w(reti, u);
+    jit_movi(JIT_RET, u);
+    jit_ret();
+    jit_dec_synth();
+}
+
+void
+_jit_retr_f(jit_state_t *_jit, jit_int32_t u)
+{
+    jit_inc_synth_w(retr_f, u);
+    jit_movr_f(JIT_FRET, u);
+    jit_ret();
+    jit_dec_synth();
+}
+
+void
+_jit_reti_f(jit_state_t *_jit, jit_float32_t u)
+{
+    jit_inc_synth_f(reti_f, u);
+    jit_movi_f(JIT_FRET, u);
+    jit_ret();
+    jit_dec_synth();
+}
+
+void
+_jit_retr_d(jit_state_t *_jit, jit_int32_t u)
+{
+    jit_inc_synth_w(retr_d, u);
+    jit_movr_d(JIT_FRET, u);
+    jit_ret();
+    jit_dec_synth();
+}
+
+void
+_jit_reti_d(jit_state_t *_jit, jit_float64_t u)
+{
+    jit_inc_synth_d(reti_d, u);
+    jit_movi_d(JIT_FRET, u);
+    jit_ret();
+    jit_dec_synth();
+}
+
+void
+_jit_epilog(jit_state_t *_jit)
+{
+    assert(_jitc->function);
+    assert(_jitc->function->epilog->next == NULL);
+    jit_link(_jitc->function->epilog);
+    _jitc->function = NULL;
+}
+
+jit_bool_t
+_jit_arg_register_p(jit_state_t *_jit, jit_node_t *u)
+{
+    assert(u->code == jit_code_arg ||
+          u->code == jit_code_arg_f || u->code == jit_code_arg_d);
+    return (jit_arg_reg_p(u->u.w));
+}
+
+void
+_jit_ellipsis(jit_state_t *_jit)
+{
+    jit_inc_synth(ellipsis);
+    if (_jitc->prepare) {
+       jit_link_prepare();
+       assert(!(_jitc->function->call.call & jit_call_varargs));
+       _jitc->function->call.call |= jit_call_varargs;
+    }
+    else {
+       jit_link_prolog();
+       assert(!(_jitc->function->self.call & jit_call_varargs));
+       _jitc->function->self.call |= jit_call_varargs;
+
+       _jitc->function->vagp = _jitc->function->self.argi;
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_va_push(jit_state_t *_jit, jit_int32_t u)
+{
+    jit_inc_synth_w(va_push, u);
+    jit_pushargr(u);
+    jit_dec_synth();
+}
+
+jit_node_t *
+_jit_arg(jit_state_t *_jit)
+{
+    jit_node_t         *node;
+    jit_int32_t                 offset;
+    assert(_jitc->function);
+    _jitc->function->self.size -= sizeof(jit_word_t);
+    if (jit_arg_reg_p(_jitc->function->self.argi))
+       offset = _jitc->function->self.argi++;
+    else
+       offset = _jitc->function->self.size;
+    node = jit_new_node_ww(jit_code_arg, offset,
+                          ++_jitc->function->self.argn);
+    jit_link_prolog();
+    return (node);
+}
+
+jit_node_t *
+_jit_arg_f(jit_state_t *_jit)
+{
+    jit_node_t         *node;
+    jit_int32_t                 offset;
+    assert(_jitc->function);
+    _jitc->function->self.size -= sizeof(jit_word_t);
+    if (jit_arg_reg_p(_jitc->function->self.argi))
+       offset = _jitc->function->self.argi++;
+    else
+       offset = _jitc->function->self.size;
+    node = jit_new_node_ww(jit_code_arg_f, offset,
+                          ++_jitc->function->self.argn);
+    jit_link_prolog();
+    return (node);
+}
+
+jit_node_t *
+_jit_arg_d(jit_state_t *_jit)
+{
+    jit_node_t         *node;
+    jit_int32_t                 offset;
+    assert(_jitc->function);
+    if (_jitc->function->self.argi & 1) {
+       ++_jitc->function->self.argi;
+       _jitc->function->self.size -= sizeof(jit_word_t);
+    }
+    _jitc->function->self.size -= sizeof(jit_float64_t);
+    if (jit_arg_reg_p(_jitc->function->self.argi)) {
+       offset = _jitc->function->self.argi + 1;
+       _jitc->function->self.argi += 2;
+    }
+    else {
+       /* "Standard" initial value (-52) is unaligned */
+       if (_jitc->function->self.size & 7)
+           _jitc->function->self.size -= sizeof(jit_word_t);
+       offset = _jitc->function->self.size;
+    }
+    node = jit_new_node_ww(jit_code_arg_d, offset,
+                          ++_jitc->function->self.argn);
+    jit_link_prolog();
+    return (node);
+}
+
+void
+_jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_c, u, v);
+    if (v->u.w >= 0)
+       jit_extr_c(u, _R26 - v->u.w);
+    else
+       jit_ldxi_c(u, JIT_FP, v->u.w + 3);
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_uc, u, v);
+    if (v->u.w >= 0)
+       jit_extr_uc(u, _R26 - v->u.w);
+    else
+       jit_ldxi_uc(u, JIT_FP, v->u.w + 3);
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_s, u, v);
+    if (v->u.w >= 0)
+       jit_extr_s(u, _R26 - v->u.w);
+    else
+       jit_ldxi_s(u, JIT_FP, v->u.w + 2);
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_us, u, v);
+    if (v->u.w >= 0)
+       jit_extr_us(u, _R26 - v->u.w);
+    else
+       jit_ldxi_us(u, JIT_FP, v->u.w + 2);
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_i, u, v);
+    if (v->u.w >= 0)
+       jit_movr(u, _R26 - v->u.w);
+    else
+       jit_ldxi_i(u, JIT_FP, v->u.w);
+    jit_dec_synth();
+}
+
+void
+_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(putargr, u, v);
+    if (v->u.w >= 0)
+       jit_movr(_R26 - v->u.w, u);
+    else
+       jit_stxi(v->u.w, JIT_FP, u);
+    jit_dec_synth();
+}
+
+void
+_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v)
+{
+    jit_int32_t                regno;
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(putargi, u, v);
+    if (v->u.w >= 0)
+       jit_movi(_R26 - v->u.w, u);
+    else {
+       regno = jit_get_reg(jit_class_gpr);
+       jit_movi(regno, u);
+       jit_stxi(v->u.w, JIT_FP, regno);
+       jit_unget_reg(regno);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg_f);
+    jit_inc_synth_wp(getarg_f, u, v);
+    if (v->u.w >= 0)
+       jit_movr_f(u, _F4 - v->u.w);
+    else
+       jit_ldxi_f(u, JIT_FP, v->u.w);
+    jit_dec_synth();
+}
+
+void
+_jit_putargr_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg_f);
+    jit_inc_synth_wp(putargr_f, u, v);
+    if (v->u.w >= 0)
+       jit_movr_f(_F4 - v->u.w, u);
+    else
+       jit_stxi_f(v->u.w, JIT_FP, u);
+    jit_dec_synth();
+}
+
+void
+_jit_putargi_f(jit_state_t *_jit, jit_float32_t u, jit_node_t *v)
+{
+    jit_int32_t                regno;
+    assert(v->code == jit_code_arg_f);
+    jit_inc_synth_fp(putargi_f, u, v);
+    if (v->u.w >= 0)
+       jit_movi_f(_R26 - v->u.w, u);
+    else {
+       regno = jit_get_reg(jit_class_fpr);
+       jit_movi_f(regno, u);
+       jit_stxi_f(v->u.w, JIT_FP, regno);
+       jit_unget_reg(regno);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg_d);
+    jit_inc_synth_wp(getarg_d, u, v);
+    if (v->u.w >= 0)
+       jit_movr_d(u, _F4 - v->u.w);
+    else
+       jit_ldxi_d(u, JIT_FP, v->u.w);
+    jit_dec_synth();
+}
+
+void
+_jit_putargr_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg_d);
+    jit_inc_synth_wp(putargr_d, u, v);
+    if (v->u.w >= 0)
+       jit_movr_d(_F4 - v->u.w, u);
+    else
+       jit_stxi_d(v->u.w, JIT_FP, u);
+    jit_dec_synth();
+}
+
+void
+_jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v)
+{
+    jit_int32_t                regno;
+    assert(v->code == jit_code_arg_d);
+    jit_inc_synth_dp(putargi_d, u, v);
+    if (v->u.w >= 0)
+       jit_movi_d(_R26 - v->u.w, u);
+    else {
+       regno = jit_get_reg(jit_class_fpr);
+       jit_movi_d(regno, u);
+       jit_stxi_d(v->u.w, JIT_FP, regno);
+       jit_unget_reg(regno);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_pushargr(jit_state_t *_jit, jit_int32_t u)
+{
+    assert(_jitc->function);
+    jit_inc_synth_w(pushargr, u);
+    jit_link_prepare();
+    _jitc->function->call.size -= sizeof(jit_word_t);
+    if (jit_arg_reg_p(_jitc->function->call.argi)) {
+       jit_movr(_R26 - _jitc->function->call.argi, u);
+       ++_jitc->function->call.argi;
+    }
+    else
+       jit_stxi(_jitc->function->call.size + params_offset, JIT_SP, u);
+    jit_dec_synth();
+}
+
+void
+_jit_pushargi(jit_state_t *_jit, jit_word_t u)
+{
+    jit_int32_t                 regno;
+    assert(_jitc->function);
+    jit_inc_synth_w(pushargi, u);
+    jit_link_prepare();
+    _jitc->function->call.size -= sizeof(jit_word_t);
+    if (jit_arg_reg_p(_jitc->function->call.argi)) {
+       jit_movi(_R26 - _jitc->function->call.argi, u);
+       ++_jitc->function->call.argi;
+    }
+    else {
+       regno = jit_get_reg(jit_class_gpr);
+       jit_movi(regno, u);
+       jit_stxi(_jitc->function->call.size + params_offset, JIT_SP, regno);
+       jit_unget_reg(regno);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_pushargr_f(jit_state_t *_jit, jit_int32_t u)
+{
+    assert(_jitc->function);
+    jit_inc_synth_w(pushargr_f, u);
+    jit_link_prepare();
+    _jitc->function->call.size -= sizeof(jit_word_t);
+    if (jit_arg_reg_p(_jitc->function->call.argi)) {
+       jit_movr_f(_F4 - _jitc->function->call.argi, u);
+#if !defined(__hpux)
+       /* HP-UX appears to always pass float arguments in gpr registers */
+       if (_jitc->function->call.call & jit_call_varargs)
+#endif
+       {
+           jit_stxi_f(alloca_offset - 8, JIT_FP, u);
+           jit_ldxi(_R26 - _jitc->function->call.argi, JIT_FP,
+                    alloca_offset - 8);
+       }
+       ++_jitc->function->call.argi;
+    }
+    else
+       jit_stxi_f(_jitc->function->call.size + params_offset, JIT_SP, u);
+    jit_dec_synth();
+}
+
+void
+_jit_pushargi_f(jit_state_t *_jit, jit_float32_t u)
+{
+    jit_int32_t                 regno;
+    assert(_jitc->function);
+    jit_inc_synth_f(pushargi_f, u);
+    jit_link_prepare();
+    _jitc->function->call.size -= sizeof(jit_word_t);
+    if (jit_arg_reg_p(_jitc->function->call.argi)) {
+       jit_movi_f(_F4 - _jitc->function->call.argi, u);
+#if !defined(__hpux)
+       /* HP-UX appears to always pass float arguments in gpr registers */
+       if (_jitc->function->call.call & jit_call_varargs)
+#endif
+       {
+           jit_stxi_f(alloca_offset - 8, JIT_FP,
+                      _F4 - _jitc->function->call.argi);
+           jit_ldxi(_R26 - _jitc->function->call.argi,
+                    JIT_FP, alloca_offset - 8);
+       }
+       ++_jitc->function->call.argi;
+    }
+    else {
+       regno = jit_get_reg(jit_class_fpr);
+       jit_movi_f(regno, u);
+       jit_stxi_f(_jitc->function->call.size + params_offset, JIT_SP, regno);
+       jit_unget_reg(regno);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_pushargr_d(jit_state_t *_jit, jit_int32_t u)
+{
+    assert(_jitc->function);
+    jit_inc_synth_w(pushargr_d, u);
+    jit_link_prepare();
+    _jitc->function->call.size -= sizeof(jit_float64_t);
+    if (_jitc->function->call.argi & 1) {
+       ++_jitc->function->call.argi;
+       _jitc->function->call.size -= sizeof(jit_word_t);
+    }
+    if (jit_arg_reg_p(_jitc->function->call.argi)) {
+       jit_movr_d(_F4 - (_jitc->function->call.argi + 1), u);
+#if !defined(__hpux)
+       /* HP-UX appears to always pass float arguments in gpr registers */
+       if (_jitc->function->call.call & jit_call_varargs)
+#endif
+       {
+           jit_stxi_d(alloca_offset - 8, JIT_FP, u);
+           jit_ldxi(_R26 - _jitc->function->call.argi,
+                    JIT_FP, alloca_offset - 4);
+           jit_ldxi(_R25 - _jitc->function->call.argi,
+                    JIT_FP, alloca_offset - 8);
+       }
+       _jitc->function->call.argi += 2;
+    }
+    else {
+       /* "Standard" initial value (-52) is unaligned */
+       if ((_jitc->function->call.size + params_offset) & 7)
+           _jitc->function->call.size -= sizeof(jit_word_t);
+       jit_stxi_d(_jitc->function->call.size + params_offset, JIT_SP, u);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_pushargi_d(jit_state_t *_jit, jit_float64_t u)
+{
+    jit_int32_t                 regno;
+    assert(_jitc->function);
+    jit_inc_synth_d(pushargi_d, u);
+    jit_link_prepare();
+    _jitc->function->call.size -= sizeof(jit_float64_t);
+    if (_jitc->function->call.argi & 1) {
+       ++_jitc->function->call.argi;
+       _jitc->function->call.size -= sizeof(jit_word_t);
+    }
+    if (jit_arg_reg_p(_jitc->function->call.argi)) {
+       jit_movi_d(_F4 - (_jitc->function->call.argi + 1), u);
+#if !defined(__hpux)
+       /* HP-UX appears to always pass float arguments in gpr registers */
+       if (_jitc->function->call.call & jit_call_varargs)
+#endif
+       {
+           jit_stxi_d(alloca_offset - 8, JIT_FP,
+                      _F4 - (_jitc->function->call.argi + 1));
+           jit_ldxi(_R26 - _jitc->function->call.argi,
+                    JIT_FP, alloca_offset - 4);
+           jit_ldxi(_R25 - _jitc->function->call.argi,
+                    JIT_FP, alloca_offset - 8);
+       }
+       _jitc->function->call.argi += 2;
+    }
+    else {
+       regno = jit_get_reg(jit_class_fpr);
+       jit_movi_d(regno, u);
+       if ((_jitc->function->call.size + params_offset) & 7)
+           _jitc->function->call.size -= sizeof(jit_word_t);
+       jit_stxi_d(_jitc->function->call.size + params_offset, JIT_SP, regno);
+       jit_unget_reg(regno);
+    }
+    jit_dec_synth();
+}
+
+jit_bool_t
+_jit_regarg_p(jit_state_t *_jit, jit_node_t *node, jit_int32_t regno)
+{
+    jit_int32_t                spec;
+    spec = jit_class(_rvs[regno].spec);
+    if (spec & jit_class_arg) {
+       if (spec & jit_class_gpr) {
+           regno -= _R23;
+           if (regno >= 0 && regno < node->v.w)
+               return (1);
+       }
+       else if (spec & jit_class_fpr) {
+           regno = _F4 - regno;
+           if (regno >= 0 && regno < node->w.w)
+               return (1);
+       }
+    }
+    return (0);
+}
+
+void
+_jit_finishr(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_node_t         *call;
+    assert(_jitc->function);
+    jit_inc_synth_w(finishr, r0);
+    if (_jitc->function->self.alen > _jitc->function->call.size)
+       _jitc->function->self.alen = _jitc->function->call.size;
+    call = jit_callr(r0);
+    call->v.w = call->w.w = _jitc->function->call.argi;
+    _jitc->function->call.argi = _jitc->function->call.size = 0;
+    _jitc->prepare = 0;
+    jit_dec_synth();
+}
+
+jit_node_t *
+_jit_finishi(jit_state_t *_jit, jit_pointer_t i0)
+{
+    jit_node_t         *node;
+    assert(_jitc->function);
+    jit_inc_synth_w(finishi, (jit_word_t)i0);
+    if (_jitc->function->self.alen > _jitc->function->call.size)
+       _jitc->function->self.alen = _jitc->function->call.size;
+    node = jit_calli(i0);
+    node->v.w = node->w.w = _jitc->function->call.argi;
+    _jitc->function->call.argi = _jitc->function->call.size = 0;
+    _jitc->prepare = 0;
+    jit_dec_synth();
+    return (node);
+}
+
+void
+_jit_retval_c(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_c, r0);
+    jit_extr_c(r0, JIT_RET);
+    jit_dec_synth();
+}
+
+void
+_jit_retval_uc(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_uc, r0);
+    jit_extr_uc(r0, JIT_RET);
+    jit_dec_synth();
+}
+
+void
+_jit_retval_s(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_s, r0);
+    jit_extr_s(r0, JIT_RET);
+    jit_dec_synth();
+}
+
+void
+_jit_retval_us(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_us, r0);
+    jit_extr_us(r0, JIT_RET);
+    jit_dec_synth();
+}
+
+void
+_jit_retval_i(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_i, r0);
+    jit_movr(r0, JIT_RET);
+    jit_dec_synth();
+}
+
+void
+_jit_retval_f(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_f, r0);
+    jit_movr_f(r0, JIT_FRET);
+    jit_dec_synth();
+}
+
+void
+_jit_retval_d(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_d, r0);
+    jit_movr_d(r0, JIT_FRET);
+    jit_dec_synth();
+}
+
+jit_pointer_t
+_emit_code(jit_state_t *_jit)
+{
+    jit_node_t         *node;
+    jit_node_t         *temp;
+    jit_word_t          word;
+    jit_int32_t                 value;
+    jit_int32_t                 offset;
+    struct {
+       jit_node_t      *node;
+       jit_word_t       word;
+#if DEVEL_DISASSEMBLER
+       jit_word_t       prevw;
+#endif
+       jit_int32_t      patch_offset;
+    } undo;
+#if DEVEL_DISASSEMBLER
+    jit_word_t          prevw;
+#endif
+
+    _jitc->function = NULL;
+
+    jit_reglive_setup();
+
+    undo.word = 0;
+    undo.node = NULL;
+    undo.patch_offset = 0;
+
+#define case_rr(name, type)                                            \
+           case jit_code_##name##r##type:                              \
+               name##r##type(rn(node->u.w), rn(node->v.w));            \
+               break
+#define case_rw(name, type)                                            \
+           case jit_code_##name##i##type:                              \
+               name##i##type(rn(node->u.w), node->v.w);                \
+               break
+#define case_wr(name, type)                                            \
+           case jit_code_##name##i##type:                              \
+               name##i##type(node->u.w, rn(node->v.w));                \
+               break
+#define case_rrr(name, type)                                           \
+           case jit_code_##name##r##type:                              \
+               name##r##type(rn(node->u.w),                            \
+                             rn(node->v.w), rn(node->w.w));            \
+               break
+#define case_rrrr(name, type)                                          \
+           case jit_code_##name##r##type:                              \
+               name##r##type(rn(node->u.q.l), rn(node->u.q.h),         \
+                             rn(node->v.w), rn(node->w.w));            \
+               break
+#define case_rrw(name, type)                                           \
+           case jit_code_##name##i##type:                              \
+               name##i##type(rn(node->u.w),rn(node->v.w), node->w.w);  \
+               break
+#define case_rrrw(name, type)                                          \
+           case jit_code_##name##i##type:                              \
+               name##i##type(rn(node->u.q.l), rn(node->u.q.h),         \
+                             rn(node->v.w), node->w.w);                \
+               break
+#define case_rrf(name, type, size)                                     \
+           case jit_code_##name##i##type:                              \
+               assert(node->flag & jit_flag_data);                     \
+               name##i##type(rn(node->u.w), rn(node->v.w),             \
+                             (jit_float##size##_t *)node->w.n->u.w);   \
+               break
+#define case_wrr(name, type)                                           \
+           case jit_code_##name##i##type:                              \
+               name##i##type(node->u.w,rn(node->v.w), rn(node->w.w));  \
+               break
+#define case_brr(name, type)                                           \
+           case jit_code_##name##r##type:                              \
+               temp = node->u.n;                                       \
+               assert(temp->code == jit_code_label ||                  \
+                      temp->code == jit_code_epilog);                  \
+               if (temp->flag & jit_flag_patch)                        \
+                   name##r##type(temp->u.w, rn(node->v.w),             \
+                                 rn(node->w.w));                       \
+               else {                                                  \
+                   word = name##r##type(_jit->pc.w,                    \
+                                        rn(node->v.w), rn(node->w.w)); \
+                   patch(word, node);                                  \
+               }                                                       \
+               break
+#define case_brw(name, type)                                           \
+           case jit_code_##name##i##type:                              \
+               temp = node->u.n;                                       \
+               assert(temp->code == jit_code_label ||                  \
+                      temp->code == jit_code_epilog);                  \
+               if (temp->flag & jit_flag_patch)                        \
+                   name##i##type(temp->u.w,                            \
+                                 rn(node->v.w), node->w.w);            \
+               else {                                                  \
+                   word = name##i##type(_jit->pc.w,                    \
+                                        rn(node->v.w), node->w.w);     \
+                   patch(word, node);                                  \
+               }                                                       \
+               break
+#define case_brf(name, type, size)                                     \
+           case jit_code_##name##i##type:                              \
+               temp = node->u.n;                                       \
+               assert(temp->code == jit_code_label ||                  \
+                      temp->code == jit_code_epilog);                  \
+               if (temp->flag & jit_flag_patch)                        \
+                   name##i##type(temp->u.w, rn(node->v.w),             \
+                               (jit_float##size##_t *)node->w.n->u.w); \
+               else {                                                  \
+                   word = name##i##type(_jit->pc.w, rn(node->v.w),     \
+                               (jit_float##size##_t *)node->w.n->u.w); \
+                   patch(word, node);                                  \
+               }                                                       \
+               break
+#if DEVEL_DISASSEMBLER
+    prevw = _jit->pc.w;
+#endif
+    for (node = _jitc->head; node; node = node->next) {
+       if (_jit->pc.uc >= _jitc->code.end)
+           return (NULL);
+
+#if DEVEL_DISASSEMBLER
+       node->offset = (jit_uword_t)_jit->pc.w - (jit_uword_t)prevw;
+       prevw = _jit->pc.w;
+#endif
+       value = jit_classify(node->code);
+       jit_regarg_set(node, value);
+       switch (node->code) {
+           case jit_code_align:
+               assert(!(node->u.w & (node->u.w - 1)) &&
+                      node->u.w <= sizeof(jit_word_t));
+               if (node->u.w == sizeof(jit_word_t) &&
+                   (word = _jit->pc.w & (sizeof(jit_word_t) - 1)))
+                   nop(sizeof(jit_word_t) - word);
+               break;
+           case jit_code_note:         case jit_code_name:
+               node->u.w = _jit->pc.w;
+               break;
+           case jit_code_label:
+               /* remember label is defined */
+               node->flag |= jit_flag_patch;
+               node->u.w = _jit->pc.w;
+               break;
+               case_rrr(add,);
+               case_rrw(add,);
+               case_rrr(addc,);
+               case_rrw(addc,);
+               case_rrr(addx,);
+               case_rrw(addx,);
+               case_rrr(sub,);
+               case_rrw(sub,);
+               case_rrr(subc,);
+               case_rrw(subc,);
+               case_rrr(subx,);
+               case_rrw(subx,);
+               case_rrw(rsb,);
+               case_rrr(mul,);
+               case_rrw(mul,);
+               case_rrrr(qmul,);
+               case_rrrw(qmul,);
+               case_rrrr(qmul, _u);
+               case_rrrw(qmul, _u);
+               case_rrr(div,);
+               case_rrw(div,);
+               case_rrr(div, _u);
+               case_rrw(div, _u);
+               case_rrr(rem,);
+               case_rrw(rem,);
+               case_rrr(rem, _u);
+               case_rrw(rem, _u);
+               case_rrrr(qdiv,);
+               case_rrrw(qdiv,);
+               case_rrrr(qdiv, _u);
+               case_rrrw(qdiv, _u);
+               case_rrr(and,);
+               case_rrw(and,);
+               case_rrr(or,);
+               case_rrw(or,);
+               case_rrr(xor,);
+               case_rrw(xor,);
+               case_rrr(lsh,);
+               case_rrw(lsh,);
+               case_rrr(rsh,);
+               case_rrw(rsh,);
+               case_rrr(rsh, _u);
+               case_rrw(rsh, _u);
+               case_rr(mov,);
+           case jit_code_movi:
+               if (node->flag & jit_flag_node) {
+                   temp = node->v.n;
+                   if (temp->code == jit_code_data ||
+                       (temp->code == jit_code_label &&
+                        (temp->flag & jit_flag_patch)))
+                       movi(rn(node->u.w), temp->u.w);
+                   else {
+                       assert(temp->code == jit_code_label ||
+                              temp->code == jit_code_epilog);
+                       word = movi_p(rn(node->u.w), node->v.w);
+                       patch(word, node);
+                   }
+               }
+               else
+                   movi(rn(node->u.w), node->v.w);
+               break;
+               case_rr(neg,);
+               case_rr(com,);
+               case_rr(ext, _c);
+               case_rr(ext, _uc);
+               case_rr(ext, _s);
+               case_rr(ext, _us);
+               case_rr(hton, _us);
+               case_rr(hton, _ui);
+               case_rrr(lt,);
+               case_rrw(lt,);
+               case_rrr(lt, _u);
+               case_rrw(lt, _u);
+               case_rrr(le,);
+               case_rrw(le,);
+               case_rrr(le, _u);
+               case_rrw(le, _u);
+               case_rrr(eq,);
+               case_rrw(eq,);
+               case_rrr(ge,);
+               case_rrw(ge,);
+               case_rrr(ge, _u);
+               case_rrw(ge, _u);
+               case_rrr(gt,);
+               case_rrw(gt,);
+               case_rrr(gt, _u);
+               case_rrw(gt, _u);
+               case_rrr(ne,);
+               case_rrw(ne,);
+               case_rr(ld, _c);
+               case_rw(ld, _c);
+               case_rr(ld, _uc);
+               case_rw(ld, _uc);
+               case_rr(ld, _s);
+               case_rw(ld, _s);
+               case_rr(ld, _us);
+               case_rw(ld, _us);
+               case_rr(ld, _i);
+               case_rw(ld, _i);
+               case_rrr(ldx, _c);
+               case_rrw(ldx, _c);
+               case_rrr(ldx, _uc);
+               case_rrw(ldx, _uc);
+               case_rrr(ldx, _s);
+               case_rrw(ldx, _s);
+               case_rrr(ldx, _us);
+               case_rrw(ldx, _us);
+               case_rrr(ldx, _i);
+               case_rrw(ldx, _i);
+               case_rr(st, _c);
+               case_wr(st, _c);
+               case_rr(st, _s);
+               case_wr(st, _s);
+               case_rr(st, _i);
+               case_wr(st, _i);
+               case_rrr(stx, _c);
+               case_wrr(stx, _c);
+               case_rrr(stx, _s);
+               case_wrr(stx, _s);
+               case_rrr(stx, _i);
+               case_wrr(stx, _i);
+               case_brr(blt,);
+               case_brw(blt,);
+               case_brr(blt, _u);
+               case_brw(blt, _u);
+               case_brr(ble,);
+               case_brw(ble,);
+               case_brr(ble, _u);
+               case_brw(ble, _u);
+               case_brr(beq,);
+               case_brw(beq,);
+               case_brr(bge,);
+               case_brw(bge,);
+               case_brr(bge, _u);
+               case_brw(bge, _u);
+               case_brr(bgt,);
+               case_brw(bgt,);
+               case_brr(bgt, _u);
+               case_brw(bgt, _u);
+               case_brr(bne,);
+               case_brw(bne,);
+               case_brr(bms,);
+               case_brw(bms,);
+               case_brr(bmc,);
+               case_brw(bmc,);
+               case_brr(boadd,);
+               case_brw(boadd,);
+               case_brr(boadd, _u);
+               case_brw(boadd, _u);
+               case_brr(bxadd,);
+               case_brw(bxadd,);
+               case_brr(bxadd, _u);
+               case_brw(bxadd, _u);
+               case_brr(bosub,);
+               case_brw(bosub,);
+               case_brr(bosub, _u);
+               case_brw(bosub, _u);
+               case_brr(bxsub,);
+               case_brw(bxsub,);
+               case_brr(bxsub, _u);
+               case_brw(bxsub, _u);
+               case_rr(mov, _f);
+           case jit_code_movi_f:
+               assert(node->flag & jit_flag_data);
+               movi_f(rn(node->u.w), (jit_float32_t *)node->v.n->u.w);
+               break;
+               case_rr(mov, _d);
+           case jit_code_movi_d:
+               assert(node->flag & jit_flag_data);
+               movi_d(rn(node->u.w), (jit_float64_t *)node->v.n->u.w);
+               break;
+               case_rr(trunc, _f_i);
+               case_rr(trunc, _d_i);
+               case_rr(ext, _f);
+               case_rr(ext, _d);
+               case_rr(ext, _d_f);
+               case_rr(ext, _f_d);
+               case_rr(abs, _f);
+               case_rr(abs, _d);
+               case_rr(neg, _f);
+               case_rr(neg, _d);
+               case_rr(sqrt, _f);
+               case_rr(sqrt, _d);
+               case_rrr(add, _f);
+               case_rrf(add, _f, 32);
+               case_rrr(add, _d);
+               case_rrf(add, _d, 64);
+               case_rrr(sub, _f);
+               case_rrf(sub, _f, 32);
+               case_rrf(rsb, _f, 32);
+               case_rrr(sub, _d);
+               case_rrf(sub, _d, 64);
+               case_rrf(rsb, _d, 64);
+               case_rrr(mul, _f);
+               case_rrf(mul, _f, 32);
+               case_rrr(mul, _d);
+               case_rrf(mul, _d, 64);
+               case_rrr(div, _f);
+               case_rrf(div, _f, 32);
+               case_rrr(div, _d);
+               case_rrf(div, _d, 64);
+               case_rrr(lt, _f);
+               case_rrf(lt, _f, 32);
+               case_rrr(lt, _d);
+               case_rrf(lt, _d, 64);
+               case_rrr(le, _f);
+               case_rrf(le, _f, 32);
+               case_rrr(le, _d);
+               case_rrf(le, _d, 64);
+               case_rrr(eq, _f);
+               case_rrf(eq, _f, 32);
+               case_rrr(eq, _d);
+               case_rrf(eq, _d, 64);
+               case_rrr(ge, _f);
+               case_rrf(ge, _f, 32);
+               case_rrr(ge, _d);
+               case_rrf(ge, _d, 64);
+               case_rrr(gt, _f);
+               case_rrf(gt, _f, 32);
+               case_rrr(gt, _d);
+               case_rrf(gt, _d, 64);
+               case_rrr(ne, _f);
+               case_rrf(ne, _f, 32);
+               case_rrr(ne, _d);
+               case_rrf(ne, _d, 64);
+               case_rrr(unlt, _f);
+               case_rrf(unlt, _f, 32);
+               case_rrr(unlt, _d);
+               case_rrf(unlt, _d, 64);
+               case_rrr(unle, _f);
+               case_rrf(unle, _f, 32);
+               case_rrr(unle, _d);
+               case_rrf(unle, _d, 64);
+               case_rrr(uneq, _f);
+               case_rrf(uneq, _f, 32);
+               case_rrr(uneq, _d);
+               case_rrf(uneq, _d, 64);
+               case_rrr(unge, _f);
+               case_rrf(unge, _f, 32);
+               case_rrr(unge, _d);
+               case_rrf(unge, _d, 64);
+               case_rrr(ungt, _f);
+               case_rrf(ungt, _f, 32);
+               case_rrr(ungt, _d);
+               case_rrf(ungt, _d, 64);
+               case_rrr(ltgt, _f);
+               case_rrf(ltgt, _f, 32);
+               case_rrr(ltgt, _d);
+               case_rrf(ltgt, _d, 64);
+               case_rrr(ord, _f);
+               case_rrf(ord, _f, 32);
+               case_rrr(ord, _d);
+               case_rrf(ord, _d, 64);
+               case_rrr(unord, _f);
+               case_rrf(unord, _f, 32);
+               case_rrr(unord, _d);
+               case_rrf(unord, _d, 64);
+               case_rr(ld, _f);
+               case_rw(ld, _f);
+               case_rr(ld, _d);
+               case_rw(ld, _d);
+               case_rrr(ldx, _f);
+               case_rrw(ldx, _f);
+               case_rrr(ldx, _d);
+               case_rrw(ldx, _d);
+               case_rr(st, _f);
+               case_wr(st, _f);
+               case_rr(st, _d);
+               case_wr(st, _d);
+               case_rrr(stx, _f);
+               case_wrr(stx, _f);
+               case_rrr(stx, _d);
+               case_wrr(stx, _d);
+               case_brr(blt, _f);
+               case_brf(blt, _f, 32);
+               case_brr(blt, _d);
+               case_brf(blt, _d, 64);
+               case_brr(ble, _f);
+               case_brf(ble, _f, 32);
+               case_brr(ble, _d);
+               case_brf(ble, _d, 64);
+               case_brr(beq, _f);
+               case_brf(beq, _f, 32);
+               case_brr(beq, _d);
+               case_brf(beq, _d, 64);
+               case_brr(bge, _f);
+               case_brf(bge, _f, 32);
+               case_brr(bge, _d);
+               case_brf(bge, _d, 64);
+               case_brr(bgt, _f);
+               case_brf(bgt, _f, 32);
+               case_brr(bgt, _d);
+               case_brf(bgt, _d, 64);
+               case_brr(bne, _f);
+               case_brf(bne, _f, 32);
+               case_brr(bne, _d);
+               case_brf(bne, _d, 64);
+               case_brr(bunlt, _f);
+               case_brf(bunlt, _f, 32);
+               case_brr(bunlt, _d);
+               case_brf(bunlt, _d, 64);
+               case_brr(bunle, _f);
+               case_brf(bunle, _f, 32);
+               case_brr(bunle, _d);
+               case_brf(bunle, _d, 64);
+               case_brr(buneq, _f);
+               case_brf(buneq, _f, 32);
+               case_brr(buneq, _d);
+               case_brf(buneq, _d, 64);
+               case_brr(bunge, _f);
+               case_brf(bunge, _f, 32);
+               case_brr(bunge, _d);
+               case_brf(bunge, _d, 64);
+               case_brr(bungt, _f);
+               case_brf(bungt, _f, 32);
+               case_brr(bungt, _d);
+               case_brf(bungt, _d, 64);
+               case_brr(bltgt, _f);
+               case_brf(bltgt, _f, 32);
+               case_brr(bltgt, _d);
+               case_brf(bltgt, _d, 64);
+               case_brr(bord, _f);
+               case_brf(bord, _f, 32);
+               case_brr(bord, _d);
+               case_brf(bord, _d, 64);
+               case_brr(bunord, _f);
+               case_brf(bunord, _f, 32);
+               case_brr(bunord, _d);
+               case_brf(bunord, _d, 64);
+           case jit_code_jmpr:
+               jmpr(rn(node->u.w));
+               break;
+           case jit_code_jmpi:
+               if (node->flag & jit_flag_node) {
+                   temp = node->u.n;
+                   assert(temp->code == jit_code_label ||
+                          temp->code == jit_code_epilog);
+                   if (temp->flag & jit_flag_patch)
+                       jmpi(temp->u.w);
+                   else {
+                       word = jmpi_p(_jit->pc.w);
+                       patch(word, node);
+                   }
+               }
+               else
+                   jmpi(node->u.w);
+               break;
+           case jit_code_callr:
+               callr(rn(node->u.w));
+               break;
+           case jit_code_calli:
+               if (node->flag & jit_flag_node) {
+                   temp = node->u.n;
+                   assert(temp->code == jit_code_label ||
+                          temp->code == jit_code_epilog);
+                   if (!(temp->flag & jit_flag_patch)) {
+                       word = calli_p(temp->u.w);
+                       patch(word, node);
+                   }
+                   else
+                       calli(temp->u.w);
+               }
+               else
+                   calli(node->u.w);
+               break;
+           case jit_code_prolog:
+               _jitc->function = _jitc->functions.ptr + node->w.w;
+               undo.node = node;
+               undo.word = _jit->pc.w;
+#if DEVEL_DISASSEMBLER
+               undo.prevw = prevw;
+#endif
+               undo.patch_offset = _jitc->patches.offset;
+           restart_function:
+               _jitc->again = 0;
+               prolog(node);
+               break;
+           case jit_code_epilog:
+               assert(_jitc->function == _jitc->functions.ptr + node->w.w);
+               if (_jitc->again) {
+                   for (temp = undo.node->next;
+                        temp != node; temp = temp->next) {
+                       if (temp->code == jit_code_label ||
+                           temp->code == jit_code_epilog)
+                           temp->flag &= ~jit_flag_patch;
+                   }
+                   temp->flag &= ~jit_flag_patch;
+                   node = undo.node;
+                   _jit->pc.w = undo.word;
+#if DEVEL_DISASSEMBLER
+                   prevw = undo.prevw;
+#endif
+                   _jitc->patches.offset = undo.patch_offset;
+                   goto restart_function;
+               }
+               /* remember label is defined */
+               node->flag |= jit_flag_patch;
+               node->u.w = _jit->pc.w;
+               epilog(node);
+               _jitc->function = NULL;
+               break;
+           case jit_code_va_start:
+               vastart(rn(node->u.w));
+               break;
+           case jit_code_va_arg:
+               vaarg(rn(node->u.w), rn(node->v.w));
+               break;
+           case jit_code_va_arg_d:
+               vaarg_d(rn(node->u.w), rn(node->v.w));
+               break;
+           case jit_code_live:
+           case jit_code_arg:                  case jit_code_ellipsis:
+           case jit_code_va_push:
+           case jit_code_allocai:              case jit_code_allocar:
+           case jit_code_arg_f:                case jit_code_arg_d:
+           case jit_code_va_end:
+           case jit_code_ret:
+           case jit_code_retr:                 case jit_code_reti:
+           case jit_code_retr_f:               case jit_code_reti_f:
+           case jit_code_retr_d:               case jit_code_reti_d:
+           case jit_code_getarg_c:             case jit_code_getarg_uc:
+           case jit_code_getarg_s:             case jit_code_getarg_us:
+           case jit_code_getarg_i:
+           case jit_code_getarg_f:             case jit_code_getarg_d:
+           case jit_code_putargr:              case jit_code_putargi:
+           case jit_code_putargr_f:            case jit_code_putargi_f:
+           case jit_code_putargr_d:            case jit_code_putargi_d:
+           case jit_code_pushargr:             case jit_code_pushargi:
+           case jit_code_pushargr_f:           case jit_code_pushargi_f:
+           case jit_code_pushargr_d:           case jit_code_pushargi_d:
+           case jit_code_retval_c:             case jit_code_retval_uc:
+           case jit_code_retval_s:             case jit_code_retval_us:
+           case jit_code_retval_i:
+           case jit_code_retval_f:             case jit_code_retval_d:
+           case jit_code_prepare:
+           case jit_code_finishr:              case jit_code_finishi:
+               break;
+           default:
+               abort();
+       }
+       jit_regarg_clr(node, value);
+       assert(_jitc->regarg == 0 && _jitc->synth == 0);
+       /* update register live state */
+       jit_reglive(node);
+    }
+#undef case_brf
+#undef case_brw
+#undef case_brr
+#undef case_wrr
+#undef case_rrf
+#undef case_rrrw
+#undef case_rrw
+#undef case_rrrr
+#undef case_rrr
+#undef case_wr
+#undef case_rw
+#undef case_rr
+
+    for (offset = 0; offset < _jitc->patches.offset; offset++) {
+       node = _jitc->patches.ptr[offset].node;
+       word = node->code == jit_code_movi ? node->v.n->u.w : node->u.n->u.w;
+       patch_at(_jitc->patches.ptr[offset].inst, word);
+    }
+
+    jit_flush(_jit->code.ptr, _jit->pc.uc);
+
+    return (_jit->code.ptr);
+}
+
+#define CODE                           1
+#  include "jit_hppa-cpu.c"
+#  include "jit_hppa-fpu.c"
+#undef CODE
+
+void
+jit_flush(void *fptr, void *tptr)
+{
+    jit_word_t         f, t, s;
+    s = sysconf(_SC_PAGE_SIZE);
+    f = (jit_word_t)fptr & -s;
+    t = (((jit_word_t)tptr) + s - 1) & -s;
+#if defined(__hppa)
+/* --- parisc2.0.pdf ---
+               Programming Note
+
+The minimum spacing that is guaranteed to work for "self-modifying code" is
+shown in the code segment below. Since instruction prefetching is permitted,
+any data cache flushes must be separated from any instruction cache flushes
+by a SYNC. This will ensure that the "new" instruction will be written to
+memory prior to any attempts at prefetching it as an instruction.
+
+       LDIL    l%newinstr,rnew
+       LDW     r%newinstr(0,rnew),temp
+       LDIL    l%instr,rinstr
+       STW     temp,r%instr(0,rinstr)
+       FDC     r%instr(0,rinstr)
+       SYNC
+       FIC     r%instr(rinstr)
+       SYNC
+       instr   ...
+       (at least seven instructions)
+
+This sequence assumes a uniprocessor system. In a multiprocessor system,
+software must ensure no processor is executing code which is in the process
+of being modified.
+*/
+
+/*
+  Adapted from ffcall/trampoline/cache-hppa.c:__TR_clear_cache to
+loop over addresses as it is unlikely from and to addresses would fit in
+at most two cachelines.
+  FIXME A cache line can be 16, 32, or 64 bytes.
+ */
+    /*
+     * Copyright 1995-1997 Bruno Haible, <bruno@clisp.org>
+     *
+     * This is free software distributed under the GNU General Public Licence
+     * described in the file COPYING. Contact the author if you don't have this
+     * or can't live with it. There is ABSOLUTELY NO WARRANTY, explicit or implied,
+     * on this software.
+     */
+    {
+       jit_word_t      n = f + 32;
+       register int    u, v;
+       for (; f <= t; n = f + 32, f += 64) {
+           asm volatile ("fdc 0(0,%0)"
+                         "\n\t" "fdc 0(0,%1)"
+                         "\n\t" "sync"
+                         :
+                         : "r" (f), "r" (n)
+                         );
+           asm volatile ("mfsp %%sr0,%1"
+                         "\n\t" "ldsid (0,%4),%0"
+                         "\n\t" "mtsp %0,%%sr0"
+                         "\n\t" "fic 0(%%sr0,%2)"
+                         "\n\t" "fic 0(%%sr0,%3)"
+                         "\n\t" "sync"
+                         "\n\t" "mtsp %1,%%sr0"
+                         "\n\t" "nop"
+                         "\n\t" "nop"
+                         "\n\t" "nop"
+                         "\n\t" "nop"
+                         "\n\t" "nop"
+                         "\n\t" "nop"
+                         : "=r" (u), "=r" (v)
+                         : "r" (f), "r" (n), "r" (f)
+                         );
+       }
+    }
+#else
+    /* This is supposed to work but appears to fail on multiprocessor systems */
+    __clear_cache((void *)f, (void *)t);
+#endif
+}
+
+void
+_emit_ldxi(jit_state_t *_jit, jit_gpr_t r0, jit_gpr_t r1, jit_word_t i0)
+{
+    ldxi(rn(r0), rn(r1), i0);
+}
+
+void
+_emit_stxi(jit_state_t *_jit, jit_word_t i0, jit_gpr_t r0, jit_gpr_t r1)
+{
+    stxi(i0, rn(r0), rn(r1));
+}
+
+void
+_emit_ldxi_d(jit_state_t *_jit, jit_fpr_t r0, jit_gpr_t r1, jit_word_t i0)
+{
+    ldxi_d(rn(r0), rn(r1), i0);
+}
+
+void
+_emit_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_gpr_t r0, jit_fpr_t r1)
+{
+    stxi_d(i0, rn(r0), rn(r1));
+}
+
+static void
+_patch(jit_state_t *_jit, jit_word_t instr, jit_node_t *node)
+{
+    jit_int32_t                flag;
+
+    assert(node->flag & jit_flag_node);
+    if (node->code == jit_code_movi)
+       flag = node->v.n->flag;
+    else
+       flag = node->u.n->flag;
+    assert(!(flag & jit_flag_patch));
+    if (_jitc->patches.offset >= _jitc->patches.length) {
+       jit_realloc((jit_pointer_t *)&_jitc->patches.ptr,
+                   _jitc->patches.length * sizeof(jit_patch_t),
+                   (_jitc->patches.length + 1024) * sizeof(jit_patch_t));
+       _jitc->patches.length += 1024;
+    }
+    _jitc->patches.ptr[_jitc->patches.offset].inst = instr;
+    _jitc->patches.ptr[_jitc->patches.offset].node = node;
+    ++_jitc->patches.offset;
+}
diff --git a/deps/lightning/lib/jit_ia64-cpu.c b/deps/lightning/lib/jit_ia64-cpu.c
new file mode 100644 (file)
index 0000000..dec1465
--- /dev/null
@@ -0,0 +1,5402 @@
+/*
+ * Copyright (C) 2013-2019  Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+#if PROTO
+#define stack_framesize                        144
+#define params_offset                  16
+#define INST_NONE                      0       /* should never be generated */
+#define INST_STOP                      1       /* or'ed if stop is required */
+#define INST_A                         2       /* M- or I- unit */
+#define INST_As                                3
+#define INST_I                         4
+#define INST_Is                                5
+#define INST_M                         6
+#define INST_Ms                                7
+#define INST_F                         8
+#define INST_Fs                                9
+#define INST_B                         10
+#define INST_Bs                                11
+#define INST_L                         12
+#define INST_Ls                                13
+#define INST_X                         14
+#define INST_Xs                                15
+
+/* Data and instructions are referenced by 64-bit addresses. Instructions
+ * are stored in memory in little endian byte order, in which the least
+ * significant byte appears in the lowest addressed byte of a memory
+ * location. For data, modes for both big and little endian byte order are
+ * supported and can be controlled by a bit in the User Mask Register.
+ */
+#define il(ii)                         *_jit->pc.ul++ = ii
+#define set_bundle(p, l, h, tm, s0, s1, s2)                            \
+    do {                                                               \
+       l = tm | ((s0 & 0x1ffffffffffL) << 5L) | (s1 << 46L);           \
+       h = ((s1 >> 18L) & 0x7fffffLL) | (s2 << 23L);                   \
+       p[0] = byte_swap_if_big_endian(l);                              \
+       p[1] = byte_swap_if_big_endian(h);                              \
+    } while (0)
+#define get_bundle(p, l, h, tm, s0, s1, s2)                            \
+    do {                                                               \
+       l = byte_swap_if_big_endian(p[0]);                              \
+       h = byte_swap_if_big_endian(p[1]);                              \
+       tm = l & 0x1f;                                                  \
+       s0 = (l >> 5L) & 0x1ffffffffffL;                                \
+       s1 = ((l >> 46L) & 0x3ffffL) | ((h & 0x7fffffL) << 18L);        \
+       s2 = (h >> 23L) & 0x1ffffffffffL;                               \
+    } while (0)
+
+/*  Need to insert a stop if a modified register would (or could)
+ *  be read in the same cycle.
+ */
+#define TSTREG1(r0)                                                    \
+    do {                                                               \
+       if (jit_regset_tstbit(&_jitc->regs, r0))                        \
+           stop();                                                     \
+    } while (0)
+#define TSTREG2(r0, r1)                                                        \
+    do {                                                               \
+       if (jit_regset_tstbit(&_jitc->regs, r0) ||                      \
+           jit_regset_tstbit(&_jitc->regs, r1))                        \
+           stop();                                                     \
+    } while (0)
+#define TSTPRED(p0)                                                    \
+    do {                                                               \
+       if (p0 && (_jitc->pred & (1 << p0)))                            \
+           stop();                                                     \
+    } while (0)
+/* Record register was modified */
+#define SETREG(r0)             jit_regset_setbit(&_jitc->regs, r0)
+
+/* Avoid using constants in macros and code */
+typedef enum {
+    GR_0,              GR_1,           GR_2,           GR_3,
+    GR_4,              GR_5,           GR_6,           GR_7,
+    GR_8,              GR_9,           GR_10,          GR_11,
+    GR_12,             GR_13,          GR_14,          GR_15,
+    GR_16,             GR_17,          GR_18,          GR_19,
+    GR_20,             GR_21,          GR_22,          GR_23,
+    GR_24,             GR_25,          GR_26,          GR_27,
+    GR_28,             GR_29,          GR_30,          GR_31,
+    GR_32,             GR_33,          GR_34,          GR_35,
+    GR_36,             GR_37,          GR_38,          GR_39,
+    GR_40,             GR_41,          GR_42,          GR_43,
+    /* GR_44...GR_127 */
+} gr_reg_t;
+
+typedef enum {
+    PR_0,              /* constant - always 1 */
+    /* p0-p5            -  preserved */
+    PR_1,              PR_2,           PR_3,           PR_4,
+    PR_5,
+    /* p6-p15           - scratch */
+    PR_6,              PR_7,           PR_8,           PR_9,
+    PR_10,             PR_11,          PR_12,          PR_13,
+    PR_14,             PR_15,
+    /* p16-...          - preserved - rotating */
+} pr_reg_t;
+
+typedef enum {
+    BR_0,              /* scratch - Return link */
+    /* b1-b5            - preserved */
+    BR_1,              BR_2,           BR_3,           BR_4,
+    BR_5,
+    /* b6-b7           - scratch */
+    BR_6,              BR_7
+} br_reg_t;
+
+typedef enum {
+    AR_KR0,            AR_KR1,         AR_KR2,         AR_KR3,
+    AR_KR4,            AR_KR5,         AR_KR6,         AR_KR7,
+    AR_8,              AR_9,           AR_10,          AR_11,
+    AR_12,             AR_13,          AR_14,          AR_15,
+    AR_RSC,            AR_BSP,         AR_BSPSTORE,    AR_RNAT,
+    AR_20,             AR_FCR,         AR_22,          AR_23,
+    AR_EFLAG,          AR_CSD,         AR_SSD,         AR_CFLG,
+    AR_FSR,            AR_FIR,         AR_FDR,         AR_31,
+    AR_CCV,            AR_33,          AR_34,          AR_35,
+    AR_UNAT,           AR_37,          AR_38,          AR_39,
+    AR_FPSR,           AR_41,          AR_42,          AR_43,
+    AR_ITC,            AR_RUC,         AR_46,          AR_47,
+    AR_48,             AR_49,          AR_50,          AR_51,
+    AR_52,             AR_53,          AR_54,          AR_55,
+    AR_56,             AR_57,          AR_58,          AR_59,
+    AR_60,             AR_61,          AR_62,          AR_63,
+    AR_PFS,            AR_LC,          AR_EC,
+    /* AR_67 ... AR_127 */
+} ar_reg_t;
+
+typedef enum {
+    TM_M_I_I_, TM_M_I_Is,      TM_M_IsI_,      TM_M_IsIs,
+    TM_M_L_X_, TM_M_L_Xs,      TM_ILL_06,      TM_ILL_07,
+    TM_M_M_I_, TM_M_M_Is,      TM_MsM_I_,      TM_MsM_Is,
+    TM_M_F_I_, TM_M_F_Is,      TM_M_M_F_,      TM_M_M_Fs,
+    TM_M_I_B_, TM_M_I_Bs,      TM_M_B_B_,      TM_M_B_Bs,
+    TM_ILL_14, TM_ILL_15,      TM_B_B_B_,      TM_B_B_Bs,
+    TM_M_M_B_, TM_M_M_Bs,      TM_ILL_1A,      TM_ILL_1B,
+    TM_M_F_B_, TM_M_F_Bs,      TM_ILL_1E,      TM_ILL_1F,
+} template_t;
+
+#define MWH_SPTK                       0
+#define MWH_NONE                       1
+#define MWH_DPTK                       2
+
+#define IH_NONE                                0
+#define IH_IMP                         1
+
+#define LD_NONE                                0
+#define LD_NT1                         1
+#define LD_NTA                         3
+
+#define ST_NONE                                0
+#define ST_NTA                         3
+
+#define LF_NONE                                0
+#define LF_NT1                         1
+#define LF_NT2                         2
+#define LF_NTA                         3
+
+#define BR_PH_FEW                      0
+#define BR_PH_MANY                     1
+
+#define BR_BWH_SPTK                    0
+#define BR_BWH_SPNT                    1
+#define BR_BWH_DPTK                    2
+#define BR_BWH_DPNT                    3
+
+#define BRI_BWH_SPTK                   1
+#define BRI_BWH_SPNT                   3
+#define BRI_BWH_DPTK                   5
+#define BRI_BWH_DPNT                   7
+
+#define BR_DH_NONE                     0
+#define BR_DH_CLR                      1
+
+#define BR_IH_NONE                     0
+#define BR_IH_IMP                      1
+
+#define BR_IPWH_SPTK                   0
+#define BR_IPWH_LOOP                   1
+#define BR_IPWH_DPTK                   2
+#define BR_IPWH_EXIT                   3
+
+#define BR_INDWH_SPTK                  0
+#define BR_INDWH_DPTK                  2
+
+#define MUX_BRCST                      0
+#define MUX_REV                                11
+#define MUX_MIX                                8
+#define MUX_SHUF                       9
+#define MUX_ALT                                10
+
+#define ldr(r0,r1)                     ldr_l(r0,r1)
+#define ldi(r0,i0)                     ldi_l(r0,i0)
+#define str(r0,r1)                     str_l(r0,r1)
+#define sti(i0,r0)                     str_l(i0,r0)
+#define ldxr(r0,r1,r2)                 ldxr_l(r0,r1,r2)
+#define ldxi(r0,r1,i0)                 ldxi_l(r0,r1,i0)
+#define stxr(r0,r1,r2)                 stxr_l(r0,r1,r2)
+#define stxi(i0,r0,r1)                 stxi_l(i0,r0,r1)
+
+#if !HAVE_FFSL
+#  define ffsl(l)                      __builtin_ffsl(l)
+#endif
+
+/* libgcc */
+#if defined(__GNUC__)
+extern long __divdi3(long,long);
+extern unsigned long __udivdi3(unsigned long,unsigned long);
+extern long __moddi3(long,long);
+extern unsigned long __umoddi3(unsigned long,unsigned long);
+#else
+static long __divdi3(long,long);
+static unsigned long __udivdi3(unsigned long,unsigned long);
+static long __moddi3(long,long);
+static unsigned long __umoddi3(unsigned long,unsigned long);
+#endif
+#define out(n,tm,s0,s1,s2)             _out(_jit,n,tm,s0,s1,s2)
+static void _out(jit_state_t*,int,int,jit_word_t,jit_word_t,jit_word_t);
+#define stop()                         _stop(_jit)
+static void _stop(jit_state_t*);
+#define sync()                         _sync(_jit)
+static void _sync(jit_state_t*);
+#define flush()                                _flush(_jit)
+static void _flush(jit_state_t*);
+#define inst(w, t)                     _inst(_jit, w, t)
+static void _inst(jit_state_t*, jit_word_t, jit_uint8_t);
+#define A1(x4,x2,r3,r2,r1)             _A1(_jit,0,x4,x2,r3,r2,r1)
+static void _A1(jit_state_t*, jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,jit_word_t,jit_word_t);
+#define A2(x4,ct,r3,r2,r1)             A1(x4,ct,r3,r2,r1)
+#define A3(x4,x2,r3,im,r1)             _A3(_jit,0,x4,x2,r3,im,r1)
+static void _A3(jit_state_t*, jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,jit_word_t,jit_word_t);
+#define A4(x2a,r3,im,r1)               _A4(_jit,0,x2a,r3,im,r1)
+static void _A4(jit_state_t*,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,jit_word_t);
+#define A5(r3,im,r1)                   _A5(_jit,0,r3,im,r1)
+static void _A5(jit_state_t*,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t);
+#define A6(o,x2,ta,p2,r3,r2,c,p1)      _A6(_jit,0,o,x2,ta,p2,r3,r2,c,p1)
+static void _A6(jit_state_t*,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,jit_word_t);
+#define A7(o,x2,ta,p2,r3,c,p1)         _A7(_jit,0,o,x2,ta,p2,r3,c,p1)
+static void _A7(jit_state_t*,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,jit_word_t)
+    maybe_unused;
+#define A8(o,x2,ta,p2,r3,im,c,p1)      _A8(_jit,0,o,x2,ta,p2,r3,im,c,p1)
+static void _A8(jit_state_t*,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,jit_word_t);
+#define A9(za,zb,x4,x2,r3,r2,r1)       _A9(_jit,0,za,zb,x4,x2,r3,r2,r1)
+static void _A9(jit_state_t*,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,jit_word_t)
+    maybe_unused;
+#define A10(x4,ct,r3,r2,r1)            A9(0,1,x4,ct,r3,r2,r1)
+#define I1(ct,x2,r3,r2,r1)             _I1(_jit,0,ct,x2,r3,r2,r1)
+static void _I1(jit_state_t*,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,jit_word_t,jit_word_t)
+    maybe_unused;
+#define I2(za,x2a,zb,x2c,x2b,r3,r2,r1) _I2(_jit,0,za,x2a,zb,x2c,x2b,r3,r2,r1)
+static void _I2(jit_state_t*,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,jit_word_t)
+    maybe_unused;
+#define I3(mbt,r2,r1)                  _I3(_jit,0,mbt,r2,r1)
+static void _I3(jit_state_t*,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t)
+#if __BYTE_ORDER == __BIG_ENDIAN
+    maybe_unused
+#endif
+    ;
+#define I4(mht,r2,r1)                  _I4(_jit,0,mht,r2,r1)
+static void _I4(jit_state_t*,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t)
+    maybe_unused;
+#define I5(za,zb,x2b,r3,r2,r1)         _I5(_jit,0,za,zb,x2b,r3,r2,r1)
+static void _I5(jit_state_t*,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t);
+#define I6(za,zb,x2b,r3,ct,r1)         _I6(_jit,0,za,zb,x2b,r3,ct,r1)
+static void _I6(jit_state_t*,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t)
+    maybe_unused;
+#define I7(za,zb,r3,r2,r1)             _I7(_jit,0,za,zb,r3,r2,r1)
+static void _I7(jit_state_t*,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,jit_word_t,jit_word_t);
+#define I8(za,zb,ct,r2,r1)             _I8(_jit,0,za,zb,ct,r2,r1)
+static void _I8(jit_state_t*,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,jit_word_t,jit_word_t)
+    maybe_unused;
+#define I9(x2c,r3,r1)                  _I9(_jit,0,x2c,r3,r1)
+static void _I9(jit_state_t*,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t)
+    maybe_unused;
+#define I10(ct,r3,r2,r1)               _I10(_jit,0,ct,r3,r2,r1)
+static void _I10(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t,jit_word_t)
+    maybe_unused;
+#define I11(len,r3,pos,y,r1)           _I11(_jit,0,len,r3,pos,y,r1)
+static void _I11(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t,jit_word_t,jit_word_t)
+    maybe_unused;
+#define I12(len,pos,r2,r1)             _I12(_jit,0,len,pos,r2,r1)
+static void _I12(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t,jit_word_t)
+    maybe_unused;
+#define I13(len,pos,im,r1)             _I13(_jit,0,len,pos,im,r1)
+static void _I13(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t,jit_word_t)
+    maybe_unused;
+#define I14(s,len,r3,pos,r1)           _I14(_jit,0,s,len,r3,pos,r1)
+static void _I14(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t,jit_word_t,jit_word_t)
+    maybe_unused;
+#define I15(pos,len,r3,r2,r1)          _I15(_jit,0,pos,len,r3,r2,r1)
+static void _I15(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t,jit_word_t,jit_word_t)
+    maybe_unused;
+#define I16(tb,ta,p2,r3,pos,c,p1)      _I16(_jit,0,tb,ta,p2,r3,pos,c,p1)
+static void _I16(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t,jit_word_t)
+    maybe_unused;
+#define I17(tb,ta,p2,r3,c,p1)          _I17(_jit,0,tb,ta,p2,r3,c,p1)
+static void _I17(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t)
+    maybe_unused;
+#define I18(im,y)                      _I18(_jit,0,im,y)
+static void _I18(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t)
+    maybe_unused;
+#define I19(im)                                _I19(_jit,0,im)
+static void _I19(jit_state_t*,jit_word_t,
+                jit_word_t)
+    maybe_unused;
+#define I20(r2,im)                     _I20(_jit,0,r2,im)
+static void _I20(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t)
+    maybe_unused;
+#define I21(im,ih,x,wh,r2,b1)          _I21(_jit,0,im,ih,x,wh,r2,b1)
+static void _I21(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t,jit_word_t);
+#define I22(b2,r1)                     _I22(_jit,0,b2,r1)
+static void _I22(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t);
+#define I23(r2,im)                     _I23(_jit,0,r2,im)
+static void _I23(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t)
+    maybe_unused;
+#define I24(im)                                _I24(_jit,0,im)
+static void _I24(jit_state_t*,jit_word_t,
+                jit_word_t)
+    maybe_unused;
+#define I25(x6,r1)                     _I25(_jit,0,x6,r1)
+static void _I25(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t)
+    maybe_unused;
+#define I26(ar,r2)                     _I26(_jit,0,ar,r2)
+static void _I26(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t);
+#define I27(ar,im)                     _I27(_jit,0,ar,im)
+static void _I27(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t)
+    maybe_unused;
+#define I28(ar,r1)                     _I28(_jit,0,ar,r1)
+static void _I28(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t)
+    maybe_unused;
+#define I29(x6,r3,r1)                  _I29(_jit,0,x6,r3,r1)
+static void _I29(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t);
+#define I30(tb,ta,p2,im,c,p1)          _I30(_jit,0,ta,tb,p2,im,c,p1)
+static void _I30(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t)
+    maybe_unused;
+#define M1(x6,ht,x,r3,r1)              _M1(_jit,0,x6,ht,x,r3,r1)
+static void _M1(jit_state_t*,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,jit_word_t,jit_word_t);
+#define M2(x6,ht,r3,r2,r1)             _M2(_jit,0,x6,ht,r3,r2,r1)
+static void _M2(jit_state_t*,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,jit_word_t,jit_word_t)
+    maybe_unused;
+#define M3(x6,ht,r3,im,r1)             _M3(_jit,0,x6,ht,r3,im,r1)
+static void _M3(jit_state_t*,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,jit_word_t,jit_word_t);
+#define M5(x6,ht,r3,r2,im)             _M5(_jit,0,x6,ht,r3,r2,im)
+static void _M5(jit_state_t*,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,jit_word_t,jit_word_t)
+    maybe_unused;
+#define M6(x6,ht,x,r3,r2)              _M6(_jit,0,x6,ht,x,r3,r2)
+static void _M6(jit_state_t*,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,jit_word_t,jit_word_t);
+#define M13(x6,ht,r3,f2)               _M13(_jit,0,x6,ht,r3,f2)
+static void _M13(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t,jit_word_t);
+#define M14(x6,ht,r3,r2)               _M14(_jit,0,x6,ht,r3,r2)
+static void _M14(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t,jit_word_t)
+    maybe_unused;
+#define M15(x6,ht,r3,im)               _M15(_jit,0,x6,ht,r3,im)
+static void _M15(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t,jit_word_t)
+    maybe_unused;
+#define M16(x6,ht,r3,r2,r1)            _M16(_jit,0,x6,ht,r3,r2,r1)
+static void _M16(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t,jit_word_t,jit_word_t)
+    maybe_unused;
+#define M17(x6,ht,r3,im,r1)            _M17(_jit,0,x6,ht,r3,im,r1)
+static void _M17(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t,jit_word_t,jit_word_t)
+    maybe_unused;
+#define M20(r2,im)                     M20x(0x1,r2,im)
+#define M20x(x3,r2,im)                 _M20x(_jit,0,x3,r2,im)
+static void _M20x(jit_state_t*,jit_word_t,
+                 jit_word_t,jit_word_t,jit_word_t)
+    maybe_unused;
+#define M22(x3,im,r1)                  M22x(x3,im,r1)
+#define M22x(x3,im,r1)                 _M22x(_jit,0,x3,im,r1)
+static void _M22x(jit_state_t*,jit_word_t,
+                 jit_word_t,jit_word_t,jit_word_t)
+    maybe_unused;
+#define M24(x2,x4)                     _M24(_jit,0,x2,x4)
+#define M25(x4)                                M24(0,x4)
+static void _M24(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t)
+    maybe_unused;
+#define M26(r1)                                M26x(2,r1)
+#define M26x(x4,r1)                    _M26x(_jit,0,x4,r1)
+static void _M26x(jit_state_t*,jit_word_t,
+                 jit_word_t,jit_word_t)
+    maybe_unused;
+#define M28(x,r3)                      _M28(_jit,0,x,r3)
+static void _M28(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t)
+    maybe_unused;
+#define M29(ar,r2)                     _M29(_jit,0,ar,r2)
+static void _M29(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t)
+    maybe_unused;
+#define M30(ar,im)                     _M30(_jit,0,ar,im)
+static void _M30(jit_state_t*,jit_word_t,jit_word_t,jit_word_t)
+    maybe_unused;
+#define M31(ar,r1)                     _M31(_jit,0,ar,r1)
+static void _M31(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t)
+    maybe_unused;
+#define M32(cr,r2)                     _M32(_jit,0,cr,r2)
+static void _M32(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t)
+    maybe_unused;
+#define M33(cr,r1)                     _M33(_jit,0,cr,r1)
+static void _M33(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t)
+    maybe_unused;
+#define M34(sor,sol,sof,r1)            _M34(_jit,0,sor,sol,sof,r1)
+static void _M34(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t,jit_word_t);
+#define M35(x6,r2)                     _M35(_jit,0,x6,r2)
+static void _M35(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t)
+    maybe_unused;
+#define M36(x6,r1)                     _M36(_jit,0,x6,r1)
+static void _M36(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t)
+    maybe_unused;
+#define M37(im)                                _M37(_jit,0,im)
+static void _M37(jit_state_t*,jit_word_t,
+                jit_word_t)
+    maybe_unused;
+#define M38(x6,r3,r2,r1)               _M38(_jit,0,x6,r3,r2,r1)
+static void _M38(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t,jit_word_t)
+    maybe_unused;
+#define M39(x6,r3,im,r1)               _M39(_jit,0,x6,r3,im,r1)
+static void _M39(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t,jit_word_t)
+    maybe_unused;
+#define M40(x6,r3,im)                  _M40(_jit,0,x6,r3,im)
+static void _M40(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t)
+    maybe_unused;
+#define M41(x6,r2)                     _M41(_jit,0,x6,r2)
+static void _M41(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t)
+    maybe_unused;
+#define M42(x6,r3,r2)                  _M42(_jit,0,x6,r3,r2)
+static void _M42(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t)
+    maybe_unused;
+#define M43(x6,r3,r1)                  _M43(_jit,0,x6,r3,r1)
+static void _M43(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t)
+    maybe_unused;
+#define M44(x4,im)                     _M44(_jit,0,x4,im)
+static void _M44(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t)
+    maybe_unused;
+#define M45(x6,r3,r2)                  _M45(_jit,0,x6,r3,r2)
+static void _M45(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t)
+    maybe_unused;
+#define M46(x6,r3,r1)                  _M46(_jit,0,x6,r3,r1)
+#define M47(x6,r3)                     M46(x6,r3,0)
+static void _M46(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t)
+    maybe_unused;
+#define M48(y,im)                      _M48(_jit,0,y,im)
+static void _M48(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t)
+    maybe_unused;
+#define B1(d,wh,im,p,tp)               _B1(_jit,0,d,wh,im,p,tp)
+#define B2(d,wh,im,p,tp)               B1(d,wh,im,p,tp)
+static void _B1(jit_state_t*,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,jit_word_t,jit_word_t);
+#define B3(d,wh,im,p,b)                        _B3(_jit,0,d,wh,im,p,b)
+static void _B3(jit_state_t*,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,jit_word_t,jit_word_t)
+    maybe_unused;
+#define B4(d,wh,x6,b,p,tp)             _B4(_jit,0,d,wh,x6,b,p,tp)
+static void _B4(jit_state_t*,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t);
+#define B5(d,wh,b2,p,b1)               _B5(_jit,0,d,wh,b2,p,b1)
+static void _B5(jit_state_t*,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,jit_word_t,jit_word_t);
+#define B6(ih,im,tag,wh)               _B6(_jit,0,ih,im,tag,wh)
+static void _B6(jit_state_t*,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,jit_word_t)
+    maybe_unused;
+#define B7(ih,x6,b2,tag,wh)            _B7(_jit,0,ih,x6,b2,tag,wh)
+static void _B7(jit_state_t*,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,jit_word_t,jit_word_t)
+    maybe_unused;
+#define B8(x6)                         _B8(_jit,0,x6)
+static void _B8(jit_state_t*,jit_word_t,
+               jit_word_t)
+    maybe_unused;
+#define B9(op,x6,im)                   _B9(_jit,0,op,x6,im)
+static void _B9(jit_state_t*,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t)
+    maybe_unused;
+#define X1(im)                         _X1(_jit,0,im)
+static void _X1(jit_state_t*,jit_word_t,
+               jit_word_t)
+    maybe_unused;
+#define X2(r1,im)                      _X2(_jit,0,r1,im)
+static void _X2(jit_state_t*,jit_word_t,
+               jit_word_t,jit_word_t);
+#define X3x(o,d,wh,p,tp,im)            _X3x(_jit,0,o,d,wh,p,tp,im)
+#define X3(d,wh,p,tp,im)               X3x(0xc,d,wh,p,tp,im)
+#define X4(d,wh,p,tp,im)               X3x(0xd,d,wh,p,tp,im)
+static void _X3x(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t);
+#define X5(y,im)                       _X5(_jit,0,y,im)
+static void _X5(jit_state_t*,jit_word_t,
+               jit_word_t,jit_word_t)
+    maybe_unused;
+
+/* add */
+#define ADD(r1,r2,r3)                  A1(0,0,r3,r2,r1)
+#define ADD1(r1,r2,r3)                 A1(0,1,r3,r2,r1)
+#define ADDS(r1,im,r3)                 A4(2,r3,im,r1)
+#define ADDS_p(r1,im,r3,_p)            _A4(_jit,_p,2,r3,im,r1)
+#define ADDL(r1,im,r3)                 A5(r3,im,r1)
+#define ADDL_p(r1,im,r3,_p)            _A5(_jit,_p,r3,im,r1)
+/* addp4 */
+#define ADDP4(r1,r2,r3)                        A1(2,0,r3,r2,r1)
+#define ADDIP4(r1,im,r3)               A4(3,r3,im,r1)
+/* alloc */
+#define ALLOCR(r1,i,l,o,r)             M34((r)>>3,(i)+(l),(i)+(l)+(o),r1)
+#define ALLOC(r1,i,o)                  ALLOCR(r1,i,0,o,0)
+/* and */
+#define AND(r1,r2,r3)                  A1(3,0,r3,r2,r1)
+#define ANDI(r1,im,r3)                 A3(0xb,0,r3,im,r1)
+/* andcm */
+#define ANDCM(r1,r2,r3)                        A1(3,1,r3,r2,r1)
+#define ANDCMI(r1,im,r3)               A3(0xb,1,r3,im,r1)
+/* br */
+#define BRI(im)                                B1(BR_DH_NONE,BR_BWH_SPTK,im,BR_PH_FEW,0)
+#define BRI_COND(im,_p)                        _B1(_jit,_p,BR_DH_NONE,BR_BWH_SPTK,im,BR_PH_FEW,0)
+#define BRI_WEXIT(im)                  B1(BR_DH_NONE,BR_BWH_SPTK,im,BR_PH_MANY,2)
+#define BRI_WTOP(im)                   B1(BR_DH_NONE,BR_BWH_SPTK,im,BR_PH_MANY,3)
+#define BRI_CALL(b,im)                 B3(BR_DH_NONE,BR_BWH_SPTK,im,BR_PH_MANY,b)
+#define BRI_CLOOP(im)                  B2(BR_DH_NONE,BR_BWH_SPTK,im,BR_PH_MANY,5)
+#define BRI_CEXIT(im)                  B2(BR_DH_NONE,BR_BWH_SPTK,im,BR_PH_MANY,6)
+#define BRI_CTOP(im)                   B2(BR_DH_NONE,BR_BWH_SPTK,im,BR_PH_MANY,7)
+#define BR_COND(b,_p)                  _B4(_jit,_p,BR_DH_NONE,BR_BWH_SPTK,0x20,b,BR_PH_FEW,0)
+#define BR(b)                          B4(BR_DH_NONE,BR_BWH_SPTK,0x20,b,BR_PH_FEW,0)
+#define BR_IA(b)                       B4(BR_DH_NONE,BR_BWH_SPTK,0x20,b,BR_PH_MANY,1)
+#define BR_RET(b)                      B4(BR_DH_NONE,BR_BWH_SPTK,0x21,b,BR_PH_MANY,4)
+#define BR_CALL(b1,b2)                 B5(BR_DH_NONE,BRI_BWH_SPTK,b2,BR_PH_MANY,b1)
+/* break */
+#define BREAK_I(im)                    I19(im)
+#define BREAK_M(im)                    M37(im)
+#define BREAK_B(im)                    B9(0,0,im)
+#define BREAK_X(im)                    X1(im)
+/* brl */
+#define BRL(im)                                X3(BR_DH_NONE,BR_BWH_SPTK,BR_PH_MANY,0,im)
+#define BRL_COND(im,_p)                        _X3(_jit,_p,BR_DH_NONE,BR_BWH_SPTK,BR_PH_MANY,0,im)
+#define BRL_CALL(b1,im)                        X4(BR_DH_NONE,BR_BWH_SPTK,BR_PH_MANY,b1,im)
+/* brp */
+#define BRP(im,tag)                    B6(BR_IH_NONE,im,tag,BR_IPWH_SPTK)
+#define BRPI(b2,tag)                   B7(BR_IH_NONE,0x10,b2,tag,BR_INDWH_SPTK)
+#define BRPI_RET(b2,tag)               B7(BR_IH_NONE,0x11,b2,tag,BR_INDWH_SPTK)
+/* bsw */
+#define BSW_0()                                B8(0x0c)
+#define BSW_1()                                B8(0x0d)
+/* chk */
+#define CHK_S_I(r2,im)                 I20(r2,im)
+#define CHK_S_M(r2,im)                 M20(r2,im)
+#define CHK_A_NC(r1,im)                        M22(0x4,im,r1)
+#define CHK_A_CLR(r1,im)               M22(0x5,im,r1)
+/* clrrrb */
+#define CLRRRB()                       B8(0x04)
+#define CLRRRB_PR()                    B8(0x05)
+/* clz */
+#define CLZ(r1,r3)                     I9(3,r3,r1)
+/* cmp */
+#define CMP_LT(p1,p2,r2,r3)            A6(0xc,0,0,p2,r3,r2,0,p1)
+#define CMP_LT_p(p1,p2,r2,r3,_p)       A6(_jit,_p,0xc,0,0,p2,r3,r2,0,p1)
+#define CMP_LTU(p1,p2,r2,r3)           A6(0xd,0,0,p2,r3,r2,0,p1)
+#define CMP_EQ(p1,p2,r2,r3)            A6(0xe,0,0,p2,r3,r2,0,p1)
+#define CMP_LT_UNC(p1,p2,r2,r3)                A6(0xc,0,0,p2,r3,r2,1,p1)
+#define CMP_LTU_UNC(p1,p2,r2,r3)       A6(0xd,0,0,p2,r3,r2,1,p1)
+#define CMP_EQ_UNC(p1,p2,r2,r3)                A6(0xe,0,0,p2,r3,r2,1,p1)
+#define CMP_EQ_AND(p1,p2,r2,r3)                A6(0xc,0,1,p2,r3,r2,0,p1)
+#define CMP_EQ_OR(p1,p2,r2,r3)         A6(0xd,0,1,p2,r3,r2,0,p1)
+#define CMP_EQ_OR_ANDCM(p1,p2,r2,r3)   A6(0xe,0,1,p2,r3,r2,0,p1)
+#define CMP_NE_AND(p1,p2,r2,r3)                A6(0xc,0,1,p2,r3,r2,1,p1)
+#define CMP_NE_OR(p1,p2,r2,r3)         A6(0xd,0,1,p2,r3,r2,1,p1)
+#define CMP_NE_OR_ANDCM(p1,p2,r2,r3)   A6(0xe,0,1,p2,r3,r2,1,p1)
+#define CMPI_LT(p1,p2,im,r3)           A8(0xc,2,0,p2,r3,im,0,p1)
+#define CMPI_LTU(p1,p2,im,r3)          A8(0xd,2,0,p2,r3,im,0,p1)
+#define CMPI_EQ(p1,p2,im,r3)           A8(0xe,2,0,p2,r3,im,0,p1)
+#define CMPI_EQ_p(p1,p2,im,r3,_p)      _A8(_jit,_p,0xe,2,0,p2,r3,im,0,p1)
+#define CMPI_LT_UNC(p1,p2,im,r3)       A8(0xc,2,0,p2,r3,im,1,p1)
+#define CMPI_LTU_UNC(p1,p2,im,r3)      A8(0xd,2,0,p2,r3,im,1,p1)
+#define CMPI_EQ_UNC(p1,p2,im,r3)       A8(0xe,2,0,p2,r3,im,1,p1)
+#define CMPI_EQ_AND(p1,p2,im,r3)       A8(0xc,2,1,p2,r3,im,0,p1)
+#define CMPI_EQ_OR(p1,p2,im,r3)                A8(0xd,2,1,p2,r3,im,0,p1)
+#define CMPI_EQ_ANDCM(p1,p2,im,r3)     A8(0xe,2,1,p2,r3,im,0,p1)
+#define CMPI_NE_AND(p1,p2,im,r3)       A8(0xc,2,1,p2,r3,im,1,p1)
+#define CMPI_NE_OR(p1,p2,im,r3)                A8(0xd,2,1,p2,r3,im,1,p1)
+#define CMPI_NE_ANDCM(p1,p2,im,r3)     A8(0xe,2,1,p2,r3,im,1,p1)
+#define ZCMP_GT_AND(p1,p2,r3)          A7(0xc,0,0,p2,r3,0,p1)
+#define ZCMP_GT_OR(p1,p2,r3)           A7(0xd,0,0,p2,r3,0,p1)
+#define ZCMP_GT_ANDCM(p1,p2,r3)                A7(0xe,0,0,p2,r3,0,p1)
+#define ZCMP_LE_AND(p1,p2,r3)          A7(0xc,0,0,p2,r3,1,p1)
+#define ZCMP_LE_OR(p1,p2,r3)           A7(0xd,0,0,p2,r3,1,p1)
+#define ZCMP_LE_ANDCM(p1,p2,r3)                A7(0xe,0,0,p2,r3,1,p1)
+#define ZCMP_GE_AND(p1,p2,r3)          A7(0xc,0,1,p2,r3,0,p1)
+#define ZCMP_GE_OR(p1,p2,r3)           A7(0xd,0,1,p2,r3,0,p1)
+#define ZCMP_GE_ANDCM(p1,p2,r3)                A7(0xe,0,1,p2,r3,0,p1)
+#define ZCMP_LT_AND(p1,p2,r3)          A7(0xc,0,1,p2,r3,1,p1)
+#define ZCMP_LT_OR(p1,p2,r3)           A7(0xd,0,1,p2,r3,1,p1)
+#define ZCMP_LT_ANDCM(p1,p2,r3)                A7(0xe,0,1,p2,r3,1,p1)
+/* cmp4 */
+#define CMP4_LT(p1,p2,r2,r3)           A6(0xc,1,0,p2,r3,r2,0,p1)
+#define CMP4_LTU(p1,p2,r2,r3)          A6(0xd,1,0,p2,r3,r2,0,p1)
+#define CMP4_EQ(p1,p2,r2,r3)           A6(0xe,1,0,p2,r3,r2,0,p1)
+#define CMP4_LT_UNC(p1,p2,r2,r3)       A6(0xc,1,0,p2,r3,r2,1,p1)
+#define CMP4_LTU_UNC(p1,p2,r2,r3)      A6(0xd,1,0,p2,r3,r2,1,p1)
+#define CMP4_EQ_UNC(p1,p2,r2,r3)       A6(0xe,1,0,p2,r3,r2,1,p1)
+#define CMP4_EQ_AND(p1,p2,r2,r3)       A6(0xc,1,1,p2,r3,r2,0,p1)
+#define CMP4_EQ_OR(p1,p2,r2,r3)                A6(0xd,1,1,p2,r3,r2,0,p1)
+#define CMP4_EQ_XOR(p1,p2,r2,r3)       A6(0xe,1,1,p2,r3,r2,0,p1)
+#define CMP4_NE_AND(p1,p2,r2,r3)       A6(0xc,1,1,p2,r3,r2,1,p1)
+#define CMP4_NE_OR(p1,p2,r2,r3)                A6(0xd,1,1,p2,r3,r2,1,p1)
+#define CMP4_NE_XOR(p1,p2,r2,r3)       A6(0xe,1,1,p2,r3,r2,1,p1)
+#define CMP4I_LT(p1,p2,im,r3)          A8(0xc,3,0,p2,r3,im,0,p1)
+#define CMP4I_LTU(p1,p2,im,r3)         A8(0xd,3,0,p2,r3,im,0,p1)
+#define CMP4I_EQ(p1,p2,im,r3)          A8(0xe,3,0,p2,r3,im,0,p1)
+#define CMP4I_LT_UNC(p1,p2,im,r3)      A8(0xc,3,0,p2,r3,im,1,p1)
+#define CMP4I_LTU_UNC(p1,p2,im,r3)     A8(0xd,3,0,p2,r3,im,1,p1)
+#define CMP4I_EQ_UNC(p1,p2,im,r3)      A8(0xe,3,0,p2,r3,im,1,p1)
+#define CMP4I_EQ_AND(p1,p2,im,r3)      A8(0xc,3,1,p2,r3,im,0,p1)
+#define CMP4I_EQ_OR(p1,p2,im,r3)       A8(0xd,3,1,p2,r3,im,0,p1)
+#define CMP4I_EQ_ANDCM(p1,p2,im,r3)    A8(0xe,3,1,p2,r3,im,0,p1)
+#define CMP4I_NE_AND(p1,p2,im,r3)      A8(0xc,3,1,p2,r3,im,1,p1)
+#define CMP4I_NE_OR(p1,p2,im,r3)       A8(0xd,3,1,p2,r3,im,1,p1)
+#define CMP4I_NE_ANDCM(p1,p2,im,r3)    A8(0xe,3,1,p2,r3,im,1,p1)
+#define ZCMP4_GT_AND(p1,p2,r3)         A7(0xc,1,0,p2,r3,0,p1)
+#define ZCMP4_GT_OR(p1,p2,r3)          A7(0xd,1,0,p2,r3,0,p1)
+#define ZCMP4_GT_ANDCM(p1,p2,r3)       A7(0xe,1,0,p2,r3,0,p1)
+#define ZCMP4_LE_AND(p1,p2,r3)         A7(0xc,1,0,p2,r3,1,p1)
+#define ZCMP4_LE_OR(p1,p2,r3)          A7(0xd,1,0,p2,r3,1,p1)
+#define ZCMP4_LE_ANDCM(p1,p2,r3)       A7(0xe,1,0,p2,r3,1,p1)
+#define ZCMP4_GE_AND(p1,p2,r3)         A7(0xc,1,1,p2,r3,0,p1)
+#define ZCMP4_GE_OR(p1,p2,r3)          A7(0xd,1,1,p2,r3,0,p1)
+#define ZCMP4_GE_ANDCM(p1,p2,r3)       A7(0xe,1,1,p2,r3,0,p1)
+#define ZCMP4_LT_AND(p1,p2,r3)         A7(0xc,1,1,p2,r3,1,p1)
+#define ZCMP4_LT_OR(p1,p2,r3)          A7(0xd,1,1,p2,r3,1,p1)
+#define ZCMP4_LT_ANDCM(p1,p2,r3)       A7(0xe,1,1,p2,r3,1,p1)
+/* cmpxchg */
+#define CMPXCHG1_ACQ(r1,r3,r2)         M16(0x00,LD_NONE,r3,r2,r1)
+#define CMPXCHG2_ACQ(r1,r3,r2)         M16(0x01,LD_NONE,r3,r2,r1)
+#define CMPXCHG4_ACQ(r1,r3,r2)         M16(0x02,LD_NONE,r3,r2,r1)
+#define CMPXCHG8_ACQ(r1,r3,r2)         M16(0x03,LD_NONE,r3,r2,r1)
+#define CMPXCHG1_REL(r1,r3,r2)         M16(0x04,LD_NONE,r3,r2,r1)
+#define CMPXCHG2_REL(r1,r3,r2)         M16(0x05,LD_NONE,r3,r2,r1)
+#define CMPXCHG4_REL(r1,r3,r2)         M16(0x06,LD_NONE,r3,r2,r1)
+#define CMPXCHG8_REL(r1,r3,r2)         M16(0x07,LD_NONE,r3,r2,r1)
+#define CMP8XCHG16_ACQ(r1,r3,r2)       M16(0x20,LD_NONE,r3,r2,r1)
+#define CMP8XCHG16_REL(r1,r3,r2)       M16(0x24,LD_NONE,r3,r2,r1)
+/* cover */
+#define COVER()                                B8(0x02)
+/* cxz */
+#define CZX1_L(r1,r3)                  I29(0x18,r3,r1)
+#define CZX2_L(r1,r3)                  I29(0x19,r3,r1)
+#define CZX1_R(r1,r3)                  I29(0x1c,r3,r1)
+#define CZX2_R(r1,r3)                  I29(0x1d,r3,r1)
+/* dep */
+#define DEP_Z(r1,r2,pos,len)           I12(len,pos,r2,r1)
+#define DEPI_Z(r1,im,pos,len)          I13(len,pos,im,r1)
+#define DEPs(r1,r2,r3,pos,len)         I14(1,len,r3,pos,r1)
+#define DEPu(r1,r2,r3,pos,len)         I14(0,len,r3,pos,r1)
+#define DEP(r1,r2,r3,pos,len)          I15(pos,len,r3,r2,r1)
+/* epc */
+#define EPC()                          B8(0x10)
+/* extr */
+#define EXTR(r1,r3,pos,len)            I11(len,r3,pos,1,r1)
+#define EXTR_U(r1,r3,pos,len)          I11(len,r3,pos,0,r1)
+/* fc */
+#define FC(r3)                         M28(0,r3)
+#define FC_I(r3)                       M28(1,r3)
+/* fetchadd */
+#define FETCHADD4_ACQ(r1,r3,im)                M17(0x12,LD_NONE,r3,im,r1)
+#define FETCHADD8_ACQ(r1,r3,im)                M17(0x13,LD_NONE,r3,im,r1)
+#define FETCHADD4_REL(r1,r3,im)                M17(0x16,LD_NONE,r3,im,r1)
+#define FETCHADD8_REL(r1,r3,im)                M17(0x17,LD_NONE,r3,im,r1)
+/* flushrs */
+#define FLUSHRS()                      M25(0xc)
+/* fwb */
+#define FWB()                          M24(2,0)
+/* hint */
+#define HINT_I(im)                     I18(im,1)
+#define HINT_M(im)                     M48(1,im)
+#define HINT_B(im)                     B9(2,1,im)
+#define HINT_X(im)                     X5(1,im)
+/* invala */
+#define INVALA()                       M24(1,0)
+#define INVALA_E(r1)                   M26(r1)
+/* itc */
+#define ITC_I(r2)                      M41(0x2f,r2)
+#define ITC_D(r2)                      M41(0x2e,r2)
+/* itr */
+#define ITR_I(r3,r2)                   M42(0x0f,r3,r2)
+#define ITR_D(r3,r2)                   M42(0x0e,r3,r2)
+/* ld */
+#define LD1(r1,r3)                     M1(0x00,LD_NONE,0,r3,r1)
+#define LD2(r1,r3)                     M1(0x01,LD_NONE,0,r3,r1)
+#define LD4(r1,r3)                     M1(0x02,LD_NONE,0,r3,r1)
+#define LD8(r1,r3)                     M1(0x03,LD_NONE,0,r3,r1)
+#define LD1_S(r1,r3)                   M1(0x04,LD_NONE,0,r3,r1)
+#define LD2_S(r1,r3)                   M1(0x05,LD_NONE,0,r3,r1)
+#define LD4_S(r1,r3)                   M1(0x06,LD_NONE,0,r3,r1)
+#define LD8_S(r1,r3)                   M1(0x07,LD_NONE,0,r3,r1)
+#define LD1_A(r1,r3)                   M1(0x08,LD_NONE,0,r3,r1)
+#define LD2_A(r1,r3)                   M1(0x09,LD_NONE,0,r3,r1)
+#define LD4_A(r1,r3)                   M1(0x0a,LD_NONE,0,r3,r1)
+#define LD8_A(r1,r3)                   M1(0x0b,LD_NONE,0,r3,r1)
+#define LD1_SA(r1,r3)                  M1(0x0c,LD_NONE,0,r3,r1)
+#define LD2_SA(r1,r3)                  M1(0x0d,LD_NONE,0,r3,r1)
+#define LD4_SA(r1,r3)                  M1(0x0e,LD_NONE,0,r3,r1)
+#define LD8_SA(r1,r3)                  M1(0x0f,LD_NONE,0,r3,r1)
+#define LD1_BIAS(r1,r3)                        M1(0x10,LD_NONE,0,r3,r1)
+#define LD2_BIAS(r1,r3)                        M1(0x11,LD_NONE,0,r3,r1)
+#define LD4_BIAS(r1,r3)                        M1(0x12,LD_NONE,0,r3,r1)
+#define LD8_BIAS(r1,r3)                        M1(0x13,LD_NONE,0,r3,r1)
+#define LD1_ACQ(r1,r3)                 M1(0x14,LD_NONE,0,r3,r1)
+#define LD2_ACQ(r1,r3)                 M1(0x15,LD_NONE,0,r3,r1)
+#define LD4_ACQ(r1,r3)                 M1(0x16,LD_NONE,0,r3,r1)
+#define LD8_ACQ(r1,r3)                 M1(0x17,LD_NONE,0,r3,r1)
+#define LD8_FILL(r1,r3)                        M1(0x1b,LD_NONE,0,r3,r1)
+#define LD1_C_CLR(r1,r3)               M1(0x20,LD_NONE,0,r3,r1)
+#define LD2_C_CLR(r1,r3)               M1(0x21,LD_NONE,0,r3,r1)
+#define LD4_C_CLR(r1,r3)               M1(0x22,LD_NONE,0,r3,r1)
+#define LD8_C_CLR(r1,r3)               M1(0x23,LD_NONE,0,r3,r1)
+#define LD1_C_NC(r1,r3)                        M1(0x24,LD_NONE,0,r3,r1)
+#define LD2_C_NC(r1,r3)                        M1(0x25,LD_NONE,0,r3,r1)
+#define LD4_C_NC(r1,r3)                        M1(0x26,LD_NONE,0,r3,r1)
+#define LD8_C_NC(r1,r3)                        M1(0x27,LD_NONE,0,r3,r1)
+#define LD1_C_CLR_ACQ(r1,r3)           M1(0x28,LD_NONE,0,r3,r1)
+#define LD2_C_CLR_ACQ(r1,r3)           M1(0x29,LD_NONE,0,r3,r1)
+#define LD4_C_CLR_ACQ(r1,r3)           M1(0x2a,LD_NONE,0,r3,r1)
+#define LD8_C_CLR_ACQ(r1,r3)           M1(0x2b,LD_NONE,0,r3,r1)
+#define LD16(r1,r3)                    M1(0x28,LD_NONE,1,r3,r1)
+#define LD16_ACQ(r1,r3)                        M1(0x2c,LD_NONE,1,r3,r1)
+#define LD1_inc(r1,r3,im)              M3(0x00,LD_NONE,r3,im,r1)
+#define LD2_inc(r1,r3,im)              M3(0x01,LD_NONE,r3,im,r1)
+#define LD4_inc(r1,r3,im)              M3(0x02,LD_NONE,r3,im,r1)
+#define LD8_inc(r1,r3,im)              M3(0x03,LD_NONE,r3,im,r1)
+#define LD1_S_inc(r1,r3,im)            M3(0x04,LD_NONE,r3,im,r1)
+#define LD2_S_inc(r1,r3,im)            M3(0x05,LD_NONE,r3,im,r1)
+#define LD4_S_inc(r1,r3,im)            M3(0x06,LD_NONE,r3,im,r1)
+#define LD8_S_inc(r1,r3,im)            M3(0x07,LD_NONE,r3,im,r1)
+#define LD1_A_inc(r1,r3,im)            M3(0x08,LD_NONE,r3,im,r1)
+#define LD2_A_inc(r1,r3,im)            M3(0x09,LD_NONE,r3,im,r1)
+#define LD4_A_inc(r1,r3,im)            M3(0x0a,LD_NONE,r3,im,r1)
+#define LD8_A_inc(r1,r3,im)            M3(0x0b,LD_NONE,r3,im,r1)
+#define LD1_SA_inc(r1,r3,im)           M3(0x0c,LD_NONE,r3,im,r1)
+#define LD2_SA_inc(r1,r3,im)           M3(0x0d,LD_NONE,r3,im,r1)
+#define LD4_SA_inc(r1,r3,im)           M3(0x0e,LD_NONE,r3,im,r1)
+#define LD8_SA_inc(r1,r3,im)           M3(0x0f,LD_NONE,r3,im,r1)
+#define LD1_BIAS_inc(r1,r3,im)         M3(0x10,LD_NONE,r3,im,r1)
+#define LD2_BIAS_inc(r1,r3,im)         M3(0x11,LD_NONE,r3,im,r1)
+#define LD4_BIAS_inc(r1,r3,im)         M3(0x12,LD_NONE,r3,im,r1)
+#define LD8_BIAS_inc(r1,r3,im)         M3(0x13,LD_NONE,r3,im,r1)
+#define LD1_ACQ_inc(r1,r3,im)          M3(0x14,LD_NONE,r3,im,r1)
+#define LD2_ACQ_inc(r1,r3,im)          M3(0x15,LD_NONE,r3,im,r1)
+#define LD4_ACQ_inc(r1,r3,im)          M3(0x16,LD_NONE,r3,im,r1)
+#define LD8_AVQ_inc(r1,r3,im)          M3(0x17,LD_NONE,r3,im,r1)
+#define LD8_FILL_inc(r1,r3,im)         M3(0x1b,LD_NONE,r3,im,r1)
+#define LD1_C_CLR_inc(r1,r3,im)                M3(0x20,LD_NONE,r3,im,r1)
+#define LD2_C_CLR_inc(r1,r3,im)                M3(0x21,LD_NONE,r3,im,r1)
+#define LD4_C_CLR_inc(r1,r3,im)                M3(0x22,LD_NONE,r3,im,r1)
+#define LD8_C_CLR_inc(r1,r3,im)                M3(0x23,LD_NONE,r3,im,r1)
+#define LD1_C_NC_inc(r1,r3,im)         M3(0x24,LD_NONE,r3,im,r1)
+#define LD2_C_NC_inc(r1,r3,im)         M3(0x25,LD_NONE,r3,im,r1)
+#define LD4_C_NC_inc(r1,r3,im)         M3(0x26,LD_NONE,r3,im,r1)
+#define LD8_C_NC_inc(r1,r3,im)         M3(0x27,LD_NONE,r3,im,r1)
+#define LD1_C_CLR_ACQ_inc(r1,r3,im)    M3(0x28,LD_NONE,r3,im,r1)
+#define LD2_C_CLR_ACQ_inc(r1,r3,im)    M3(0x29,LD_NONE,r3,im,r1)
+#define LD4_C_CLR_ACQ_inc(r1,r3,im)    M3(0x2a,LD_NONE,r3,im,r1)
+#define LD8_C_CLR_ACQ_inc(r1,r3,im)    M3(0x2b,LD_NONE,r3,im,r1)
+#define LDX1(r1,r3,r2)                 M2(0x00,LD_NONE,r3,r2,r1)
+#define LDX2(r1,r3,r2)                 M2(0x01,LD_NONE,r3,r2,r1)
+#define LDX4(r1,r3,r2)                 M2(0x02,LD_NONE,r3,r2,r1)
+#define LDX8(r1,r3,r2)                 M2(0x03,LD_NONE,r3,r2,r1)
+#define LDX1_S(r1,r3,r2)               M2(0x04,LD_NONE,r3,r2,r1)
+#define LDX2_S(r1,r3,r2)               M2(0x05,LD_NONE,r3,r2,r1)
+#define LDX4_S(r1,r3,r2)               M2(0x06,LD_NONE,r3,r2,r1)
+#define LDX8_S(r1,r3,r2)               M2(0x07,LD_NONE,r3,r2,r1)
+#define LDX1_A(r1,r3,r2)               M2(0x08,LD_NONE,r3,r2,r1)
+#define LDX2_A(r1,r3,r2)               M2(0x09,LD_NONE,r3,r2,r1)
+#define LDX4_A(r1,r3,r2)               M2(0x0a,LD_NONE,r3,r2,r1)
+#define LDX8_A(r1,r3,r2)               M2(0x0b,LD_NONE,r3,r2,r1)
+#define LDX1_SA(r1,r3,r2)              M2(0x0c,LD_NONE,r3,r2,r1)
+#define LDX2_SA(r1,r3,r2)              M2(0x0d,LD_NONE,r3,r2,r1)
+#define LDX4_SA(r1,r3,r2)              M2(0x0e,LD_NONE,r3,r2,r1)
+#define LDX8_SA(r1,r3,r2)              M2(0x0f,LD_NONE,r3,r2,r1)
+#define LDX1_BIAS(r1,r3,r2)            M2(0x10,LD_NONE,r3,r2,r1)
+#define LDX2_BIAS(r1,r3,r2)            M2(0x11,LD_NONE,r3,r2,r1)
+#define LDX4_BIAS(r1,r3,r2)            M2(0x12,LD_NONE,r3,r2,r1)
+#define LDX8_BIAS(r1,r3,r2)            M2(0x13,LD_NONE,r3,r2,r1)
+#define LDX1_ACQ(r1,r3,r2)             M2(0x14,LD_NONE,r3,r2,r1)
+#define LDX2_ACQ(r1,r3,r2)             M2(0x15,LD_NONE,r3,r2,r1)
+#define LDX4_ACQ(r1,r3,r2)             M2(0x16,LD_NONE,r3,r2,r1)
+#define LDX8_ACQ(r1,r3,r2)             M2(0x17,LD_NONE,r3,r2,r1)
+#define LDX8_FILL(r1,r3,r2)            M2(0x1b,LD_NONE,r3,r2,r1)
+#define LDX1_C_CLR(r1,r3,r2)           M2(0x20,LD_NONE,r3,r2,r1)
+#define LDX2_C_CLR(r1,r3,r2)           M2(0x21,LD_NONE,r3,r2,r1)
+#define LDX4_C_CLR(r1,r3,r2)           M2(0x22,LD_NONE,r3,r2,r1)
+#define LDX8_C_CLR(r1,r3,r2)           M2(0x23,LD_NONE,r3,r2,r1)
+#define LDX1_C_NC(r1,r3,r2)            M2(0x24,LD_NONE,r3,r2,r1)
+#define LDX2_C_NC(r1,r3,r2)            M2(0x25,LD_NONE,r3,r2,r1)
+#define LDX4_C_NC(r1,r3,r2)            M2(0x26,LD_NONE,r3,r2,r1)
+#define LDX8_C_NC(r1,r3,r2)            M2(0x27,LD_NONE,r3,r2,r1)
+#define LDX1_C_CLR_ACQ(r1,r3,r2)       M2(0x28,LD_NONE,r3,r2,r1)
+#define LDX2_C_CLR_ACQ(r1,r3,r2)       M2(0x29,LD_NONE,r3,r2,r1)
+#define LDX4_C_CLR_ACQ(r1,r3,r2)       M2(0x2a,LD_NONE,r3,r2,r1)
+#define LDX8_C_CLR_ACQ(r1,r3,r2)       M2(0x2b,LD_NONE,r3,r2,r1)
+/* lfetch */
+#define LFETCH_EXCL(r3)                        M13(0x2d,LF_NONE,r3,GR_0)
+#define LFETCH_FAULT(r3)               M13(0x2e,LF_NONE,r3,GR_0)
+#define LFETCH_FAULT_EXCL(r3)          M13(0x2f,LF_NONE,r3,GR_0)
+#define LXFETCH(r3,r2)                 M14(0x2c,LF_NONE,r3,r2)
+#define LXFETCH_EXCL(r3,r2)            M14(0x2d,LF_NONE,r3,r2)
+#define LXFETCH_FAULT(r3,r2)           M14(0x2e,LF_NONE,r3,r2)
+#define LXFETCH_FAULT_EXCL(r3,r2)      M14(0x2f,LF_NONE,r3,r2)
+#define LFETCHI(r3,im)                 M15(0x2c,LF_NONE,r3,im)
+#define LFETCHI_EXCL(r3,im)            M15(0x2d,LF_NONE,r3,im)
+#define LFETCHI_FAULT(r3,im)           M15(0x2e,LF_NONE,r3,im)
+#define LFETCHI_FAULT_EXCL(r3,im)      M15(0x2f,LF_NONE,r3,im)
+/* loadrs */
+#define LOADRS()                       M25(0xa)
+/* mf */
+#define MF()                           M24(2,2)
+#define MF_A()                         M24(2,3)
+/* mix */
+#define MIX1_R(r1,r2,r3)               I2(0,2,0,2,0,r3,r2,r1)
+#define MIX2_R(r1,r2,r3)               I2(0,2,1,2,0,r3,r2,r1)
+#define MIX4_R(r1,r2,r3)               I2(1,2,0,2,0,r3,r2,r1)
+#define MIX1_L(r1,r2,r3)               I2(0,2,0,2,2,r3,r2,r1)
+#define MIX2_L(r1,r2,r3)               I2(0,2,1,2,2,r3,r2,r1)
+#define MIX4_L(r1,r2,r3)               I2(1,2,0,2,2,r3,r2,r1)
+/* mov - Move Application Register */
+#define MOV_I_rn_ar(r1,ar)             I28(ar,r1)
+#define MOV_I_ar_rn(ar,r2)             I26(ar,r2)
+#define MOV_I_ar_im(ar,im)             I27(ar,im)
+#define MOV_M_rn_a(r1,ar)              M31(r1,ar)
+#define MOV_M_ar_rn(ar,r2)             M29(ar,r2)
+#define MOV_M_ar_im(ar,im)             M30(ar,im)
+/* mov - Move Branch Register */
+#define MOV_rn_br(r1,b2)               I22(b2,r1)
+#define MOV_br_rn_tg(b1,r2,tag)                I21(tag,IH_NONE,0,MWH_NONE,r2,b1)
+#define MOV_br_rn(b1,r2)               MOV_br_rn_tg(b1,r2,0)
+#define MOV_RET_br_rn_tg(b1,r2,tag)    I21(tag,IH_NONE,1,MWH_NONE,r2,b1)
+/* mov - Move Control Register */
+#define MOV_rn_cr(cr,r1)               M33(cr,r1)
+#define MOV_cr_rr(cr,r2)               M32(cr,r2)
+/* mov - Move General Register */
+#define MOV(r0,r1)                     ADDS(r0,0,r1)
+#define MOV_p(r0,r1,_p)                        ADDS_p(r0,0,r1,_p)
+/* mov - Move Immediate */
+#define MOVI(r1,im)                    ADDL(r1,im,GR_0)
+#define MOVI_p(r1,im,_p)               ADDL_p(r1,im,GR_0,_p)
+/* mov - Move Indirect Register */
+#define MOV_rn_RR(r1,r3)               M43(0x10,r3,r1)
+#define MOV_rn_DBR(r1,r3)              M43(0x11,r3,r1)
+#define MOV_rn_IBR(r1,r3)              M43(0x012,r3,r1)
+#define MOV_rn_PKR(r1,r3)              M43(0x13,r3,r1)
+#define MOV_rn_PMC(r1,r3)              M43(0x14,r3,r1)
+#define MOV_rn_PMD(r1,r3)              M43(0x15,r3,r1)
+#define MOV_rn_CPUID(r1,r3)            M43(0x17,r3,r1)
+#define MOV_RR_rn(r3,r2)               M42(0x00,r3,r2)
+#define MOV_DBR_rn(r3,r2)              M42(0x01,r3,r2)
+#define MOV_IBR_rn(r3,r2)              M42(0x02,r3,r2)
+#define MOV_PKR_rn(r3,r2)              M42(0x03,r3,r2)
+#define MOV_PMC_rn(r3,r2)              M42(0x04,r3,r2)
+#define MOV_PMD_rn(r3,r2)              M42(0x05,r3,r2)
+/* mov - Move Instruction Pointer */
+#define MOV_rn_ip(r1)                  I25(0x30,r1)
+/* mov - Move Predicates */
+#define MOV_rn_pr(r1)                  I25(0x33,r1)
+#define MOV_pr_rn(r2,im)               I23(r2,im)
+#define MOVI_pr(im)                    I24(im)
+/* mov - Move Processor Status Register */
+#define MOV_rn_psr(r1)                 M36(0x25,r1)
+#define MOV_psr_l_rn(r2)               M35(0x2d,r2)
+/* mov - Move User Mask */
+#define MOV_rn_psr_um(r1)              M36(0x21,r1)
+#define MOV_psr_um_rn(r2)              M35(0x29,r2)
+/* movl */
+#define MOVL(r1,im)                    X2(r1,im)
+/* mpy4 */
+#define MPY4(r1,r2,r3)                 I2(1,0,0,3,1,r3,r2,r1)
+/* mpyshl4 */
+#define MPYSHL4(r1,r2,r3)              I2(1,0,0,3,3,r3,r2,r1)
+/* mux */
+#define MUX1(r1,r2,mbt)                        I3(mbt,r2,r1)
+#define MUX2(r1,r2,mht)                        I4(mht,r2,r1)
+/* nop */
+#define NOP_I(im)                      I18(im,0)
+#define NOP_M(im)                      M48(0,im)
+#define NOP_B(im)                      B9(2,0,im)
+#define NOP_X(im)                      X5(0,im)
+/* or */
+#define OR(r1,r2,r3)                   A1(3,2,r3,r2,r1)
+#define ORI(r1,im,r3)                  A3(0xb,2,r3,im,r1)
+/* pack */
+#define PACK2_USS(r1,r2,r3)            I2(0,2,1,0,0,r3,r2,r1)
+#define PACK2_SSS(r1,r2,r3)            I2(0,2,1,0,2,r3,r2,r1)
+#define PACK4_SSS(r1,r2,r3)            I2(1,2,0,0,2,r3,r2,r1)
+/* padd */
+#define PADD1(r1,r2,r3)                        A9(0,0,0,0,r3,r2,r1)
+#define PADD1_SSS(r1,r2,r3)            A9(0,0,0,1,r3,r2,r1)
+#define PADD1_UUU(r1,r2,r3)            A9(0,0,0,2,r3,r2,r1)
+#define PADD1_UUS(r1,r2,r3)            A9(0,0,0,3,r3,r2,r1)
+#define PADD2(r1,r2,r3)                        A9(0,1,0,0,r3,r2,r1)
+#define PADD2_SSS(r1,r2,r3)            A9(0,1,0,1,r3,r2,r1)
+#define PADD2_UUU(r1,r2,r3)            A9(0,1,0,2,r3,r2,r1)
+#define PADD2_UUS(r1,r2,r3)            A9(0,1,0,3,r3,r2,r1)
+#define PADD4(r1,r2,r3)                        A9(1,0,0,0,r3,r2,r1)
+/* pavg */
+#define PAVG1(r1,r2,r3)                        A9(0,0,2,2,r3,r2,r1)
+#define PAVG2(r1,r2,r3)                        A9(0,1,2,2,r3,r2,r1)
+#define PAVG1_RAZ(r1,r2,r3)            A9(0,0,2,3,r3,r2,r1)
+#define PAVG2_RAZ(r1,r2,r3)            A9(0,1,2,3,r3,r2,r1)
+/* pavgsub */
+#define PAVGSUB1(r1,r2,r3)             A9(0,0,3,2,r3,r2,r1)
+#define PAVGSUB2(r1,r2,r3)             A9(0,1,3,2,r3,r2,r1)
+/* pcmp */
+#define PCMP1_EQ(r1,r2,r3)             A9(0,0,9,0,r3,r2,r1)
+#define PCMP2_EQ(r1,r2,r3)             A9(0,1,9,0,r3,r2,r1)
+#define PCMP4_EQ(r1,r2,r3)             A9(1,0,9,0,r3,r2,r1)
+#define PCMP1_GT(r1,r2,r3)             A9(0,0,9,1,r3,r2,r1)
+#define PCMP2_GT(r1,r2,r3)             A9(0,1,9,1,r3,r2,r1)
+#define PCMP4_GT(r1,r2,r3)             A9(1,0,9,1,r3,r2,r1)
+/* pmax */
+#define PMAX1_U(r1,r2,r3)              I2(0,2,0,1,1,r3,r2,r1)
+#define PMAX2(r1,r2,r3)                        I2(0,2,1,1,3,r3,r2,r1)
+/* pmin */
+#define PMIN1_U(r1,r2,r3)              I2(0,2,0,0,1,r3,r2,r1)
+#define PMIN2(r1,r2,r3)                        I2(0,2,1,0,3,r3,r2,r1)
+/* pmpy */
+#define PMPY2_R(r1,r2,r3)              I2(0,2,1,3,1,r3,r2,r1)
+#define PMPY2_L(r1,r2,r3)              I2(0,2,1,3,3,r3,r2,r1)
+/* pmpyshr */
+#define PMPYSHR2(r1,r2,r3,im)          I1(im,3,r3,r2,r1)
+#define PMPYSHR2_U(r1,r2,r3,im)                I1(im,1,r3,r2,r1)
+/* popcnt */
+#define POPCNT(r1,r3)                  I9(2,r3,r1)
+/* probe */
+#define PROBE_R(r1,r3,r2)              M38(0x38,r3,r2,r1)
+#define PROBE_W(r1,r3,r2)              M38(0x39,r3,r2,r1)
+#define PROBEI_R(r1,r3,im)             M39(0x18,r3,im,r1)
+#define PROBEI_W(r1,r3,im)             M39(0x19,r3,im,r1)
+#define PROBE_RW_FAULT(r3,im)          M40(0x31,r3,im)
+#define PROBE_R_FAULT(r3,im)           M40(0x32,r3,im)
+#define PROBE_W_FAULT(r3,im)           M40(0x33,r3,im)
+/* psad */
+#define PSAD1(r1,r2,r3)                        I2(0,2,0,2,3,r3,r2,r1)
+/* pshl */
+#define PSHL2(r1,r2,r3)                        I7(0,1,r3,r2,r1)
+#define PSHL4(r1,r2,r3)                        I7(1,0,r3,r2,r1)
+#define PSHL2I(r1,r2,im)               I8(0,1,im,r2,r1)
+#define PSHL4I(r1,r2,im)               I8(1,0,im,r2,r1)
+/* pshladd */
+#define PSHLADD2(r1,r2,im,r3)          A10(4,im,r3,r2,r1)
+/* pshr */
+#define PSHR2(r1,r3,r2)                        I5(0,1,2,r3,r2,r1)
+#define PSHR2I(r1,r3,im)               I6(0,1,3,r3,im,r1)
+#define PSHR2_U(r1,r3,r2)              I5(0,1,0,r3,r2,r1)
+#define PSHR2I_U(r1,r3,im)             I6(0,1,1,r3,im,r1)
+#define PSHR4(r1,r3,r2)                        I5(1,0,2,r3,r2,r1)
+#define PSHR4I(r1,r3,im)               I6(1,0,3,r3,im,r1)
+#define PSHR4_U(r1,r3,r2)              I5(1,0,0,r3,r2,r1)
+#define PSHR4I_U(r1,r3,im)             I6(1,0,1,r3,im,r1)
+/* pshradd */
+#define PSHRADD2(r1,r2,im,r3)          A10(6,im,r3,r2,r1)
+/* psub */
+#define PSUB1(r1,r2,r3)                        A9(0,0,1,0,r3,r2,r1)
+#define PSUB1_SSS(r1,r2,r3)            A9(0,0,1,1,r3,r2,r1)
+#define PSUB1_UUU(r1,r2,r3)            A9(0,0,1,2,r3,r2,r1)
+#define PSUB1_UUS(r1,r2,r3)            A9(0,0,1,3,r3,r2,r1)
+#define PSUB2(r1,r2,r3)                        A9(0,1,1,0,r3,r2,r1)
+#define PSUB2_SSS(r1,r2,r3)            A9(0,1,1,1,r3,r2,r1)
+#define PSUB2_UUU(r1,r2,r3)            A9(0,1,1,2,r3,r2,r1)
+#define PSUB2_UUS(r1,r2,r3)            A9(0,1,1,3,r3,r2,r1)
+#define PSUB4(r1,r2,r3)                        A9(1,0,1,0,r3,r2,r1)
+/* ptc.e */
+#define PTC_E(r3)                      M47(0x34,r3)
+/* ptc.g, ptc.ga */
+#define PTC_G(r3,r2)                   M45(0xa,r3,r2)
+#define PTC_GA(r3,r2)                  M45(0xb,r3,r2)
+/* ptc.l */
+#define PTC_L(r3,r2)                   M45(0x9,r3,r2)
+/* ptr */
+#define PTR_D(r3,r2)                   M45(0xc,r3,r2)
+#define PTR_I(r3,r2)                   M45(0xd,r3,r2)
+/* rfi */
+#define RFI()                          B8(0x08)
+/* rsm */
+#define RSM(im)                                M44(7,im)
+/* rum */
+#define RUM(im)                                M44(5,im)
+/* shl */
+#define SHL(r1,r2,r3)                  I7(1,1,r3,r2,r1)
+/* shladd */
+#define SHLADD(r1,r2,im,r3)            A2(4,im,r3,r2,r1)
+/* shladdp4 */
+#define SHLADDP4(r1,r2,im,r3)          A2(6,im,r3,r2,r1)
+/* shr */
+#define SHR(r1,r3,r2)                  I5(1,1,2,r3,r2,r1)
+#define SHR_U(r1,r3,r2)                        I5(1,1,0,r3,r2,r1)
+/* shrp */
+#define SHRP(r1,r2,r3,im)              I10(im,r3,r2,r1)
+/* srlz */
+#define SRLZ_I()                       M24(3,1)
+#define SRLZ_D()                       M24(3,0)
+/* ssm */
+#define SSM(im)                                M44(6,im)
+/* st */
+#define ST1(r3,r2)                     M6(0x30,ST_NONE,0,r3,r2)
+#define ST2(r3,r2)                     M6(0x31,ST_NONE,0,r3,r2)
+#define ST4(r3,r2)                     M6(0x32,ST_NONE,0,r3,r2)
+#define ST8(r3,r2)                     M6(0x33,ST_NONE,0,r3,r2)
+#define ST1_REL(r3,r2)                 M6(0x34,ST_NONE,0,r3,r2)
+#define ST2_REL(r3,r2)                 M6(0x35,ST_NONE,0,r3,r2)
+#define ST4_REL(r3,r2)                 M6(0x36,ST_NONE,0,r3,r2)
+#define ST8_REL(r3,r2)                 M6(0x37,ST_NONE,0,r3,r2)
+#define ST8_SPILL(r3,r2)               M6(0x3b,ST_NONE,0,r3,r2)
+#define ST16(r3,r2)                    M6(0x30,ST_NONE,1,r3,r2)
+#define ST16_REL(r3,r2)                        M6(0x34,ST_NONE,1,r3,r2)
+#define ST1_inc(r3,r2,im)              M5(0x30,ST_NONE,r3,r2,im)
+#define ST2_inc(r3,r2,im)              M5(0x31,ST_NONE,r3,r2,im)
+#define ST4_inc(r3,r2,im)              M5(0x32,ST_NONE,r3,r2,im)
+#define ST8_inc(r3,r2,im)              M5(0x33,ST_NONE,r3,r2,im)
+#define ST1_REL_inc(r3,r2,im)          M5(0x34,ST_NONE,r3,r2,im)
+#define ST2_REL_inc(r3,r2,im)          M5(0x35,ST_NONE,r3,r2,im)
+#define ST4_REL_inc(r3,r2,im)          M5(0x36,ST_NONE,r3,r2,im)
+#define ST8_REL_inc(r3,r2,im)          M5(0x37,ST_NONE,r3,r2,im)
+#define ST8_SPILL_inc(r3,r2,im)                M5(0x3b,ST_NONE,r3,r2,im)
+/* sub */
+#define SUB(r1,r2,r3)                  A1(1,1,r3,r2,r1)
+#define SUB1(r1,r2,r3)                 A1(1,0,r3,r2,r1)
+#define SUBI(r1,im,r3)                 A3(9,1,r3,im,r1)
+/* sum */
+#define SUM(im)                                M44(4,im)
+/* sxt */
+#define SXT1(r1,r3)                    I29(0x14,r3,r1)
+#define SXT2(r1,r3)                    I29(0x15,r3,r1)
+#define SXT4(r1,r3)                    I29(0x16,r3,r1)
+/* sync */
+#define SYNC_I()                       M24(3,3)
+/* tak */
+#define TAK(r1,r3)                     M46(0x1f,r3,r1)
+/* tbit */
+#define TBIT_Z(p1,p2,r3,pos)           I16(0,0,p2,r3,pos,0,p1)
+#define TBIT_Z_UNC(p1,p2,r3,pos)       I16(0,0,p2,r3,pos,1,p1)
+#define TBIT_Z_AND(p1,p2,r3,pos)       I16(1,0,p2,r3,pos,0,p1)
+#define TBIT_NZ_AND(p1,p2,r3,pos)      I16(1,0,p2,r3,pos,1,p1)
+#define TBIT_Z_OR(p1,p2,r3,pos)                I16(0,1,p2,r3,pos,0,p1)
+#define TBIT_NZ_OR(p1,p2,r3,pos)       I16(0,1,p2,r3,pos,1,p1)
+#define TBIT_Z_ANDCM(p1,p2,r3,pos)     I16(1,1,p2,r3,pos,0,p1)
+#define TBIT_NZ_ANDCM(p1,p2,r3,pos)    I16(1,1,p2,r3,pos,1,p1)
+/* tf */
+#define TF_Z(p1,p2,im)                 I30(0,0,p2,im,0,p1)
+#define TF_Z_UNC(p1,p2,im)             I30(0,0,p2,im,1,p1)
+#define TF_Z_AND(p1,p2,im)             I30(1,0,p2,im,0,p1)
+#define TF_NZ_AND(p1,p2,im)            I30(1,0,p2,im,1,p1)
+#define TF_Z_OR(p1,p2,im)              I30(0,1,p2,im,0,p1)
+#define TF_NZ_OR(p1,p2,im)             I30(0,1,p2,im,1,p1)
+#define TF_Z_ANDCM(p1,p2,im)           I30(1,1,p2,im,0,p1)
+#define TF_NZ_ANDCM(p1,p2,im)          I30(1,1,p2,im,1,p1)
+/* thash */
+#define THASH(r1,r3)                   M46(0x1a,r3,r1)
+/* tnat */
+#define TNAT_Z(p1,p2,r3)               I17(0,0,p2,r3,0,p1)
+#define TNAT_Z_UNC(p1,p2,r3)           I17(0,0,p2,r3,1,p1)
+#define TNAT_Z_AND(p1,p2,r3)           I17(1,0,p2,r3,0,p1)
+#define TNAT_NZ_AND(p1,p2,r3)          I17(1,0,p2,r3,1,p1)
+#define TNAT_Z_OR(p1,p2,r3)            I17(0,1,p2,r3,0,p1)
+#define TNAT_NZ_OR(p1,p2,r3)           I17(0,1,p2,r3,1,p1)
+#define TNAT_Z_ANDCM(p1,p2,r3)         I17(1,1,p2,r3,0,p1)
+#define TNAT_NZ_ANDCM(p1,p2,r3)                I17(1,1,p2,r3,1,p1)
+/* tpa */
+#define TPA(r1,r3)                     M46(0x1e,r3,r1)
+/* ttag */
+#define TTAG(r1,r3)                    M46(0x1b,r3,r1)
+/* unpack */
+#define UNPACK1_H(r1,r2,r3)            I2(0,2,0,1,0,r3,r2,r1)
+#define UNPACK2_H(r1,r2,r3)            I2(0,2,1,1,0,r3,r2,r1)
+#define UNPACK4_H(r1,r2,r3)            I2(1,2,0,1,0,r3,r2,r1)
+#define UNPACK1_L(r1,r2,r3)            I2(0,2,0,1,2,r3,r2,r1)
+#define UNPACK2_L(r1,r2,r3)            I2(0,2,1,1,2,r3,r2,r1)
+#define UNPACK4_L(r1,r2,r3)            I2(1,2,0,1,2,r3,r2,r1)
+/* vmsw */
+#define VMSW_0()                       B8(0x18)
+#define VMSW_1()                       B8(0x19)
+/* xchg */
+#define XCHG1_ACQ(r1,r3,r2)            M16(0x08,LD_NONE,r3,r2,r1)
+#define XCHG2_ACQ(r1,r3,r2)            M16(0x09,LD_NONE,r3,r2,r1)
+#define XCHG4_ACQ(r1,r3,r2)            M16(0x0a,LD_NONE,r3,r2,r1)
+#define XCHG8_ACQ(r1,r3,r2)            M16(0x0b,LD_NONE,r3,r2,r1)
+/* xor */
+#define XOR(r1,r2,r3)                  A1(3,3,r3,r2,r1)
+#define XORI(r1,im,r3)                 A3(0xb,3,r3,im,r1)
+/* zxt */
+#define ZXT1(r1,r3)                    I29(0x10,r3,r1)
+#define ZXT2(r1,r3)                    I29(0x11,r3,r1)
+#define ZXT4(r1,r3)                    I29(0x12,r3,r1)
+
+#define addr(r0,r1,r2)                 ADD(r0,r1,r2)
+#define addi(r0,r1,i0)                 _addi(_jit,r0,r1,i0)
+static void _addi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define addcr(r0, r1, r2)              _addcr(_jit, r0, r1, r2)
+static void _addcr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#define addci(r0, r1, i0)              _addci(_jit, r0, r1, i0)
+static void _addci(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#  define addxr(r0, r1, r2)            _addxr(_jit, r0, r1, r2)
+static void _addxr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define addxi(r0, r1, i0)            _addxi(_jit, r0, r1, i0)
+static void _addxi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#define subr(r0,r1,r2)                 SUB(r0,r1,r2)
+#define subi(r0,r1,i0)                 _subi(_jit,r0,r1,i0)
+static void _subi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define subcr(r0, r1, r2)            _subcr(_jit, r0, r1, r2)
+static void _subcr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define subci(r0, r1, i0)            _subci(_jit, r0, r1, i0)
+static void _subci(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define subxr(r0, r1, r2)            _subxr(_jit, r0, r1, r2)
+static void _subxr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define subxi(r0, r1, i0)            _subxi(_jit, r0, r1, i0)
+static void _subxi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define rsbi(r0, r1, i0)             _rsbi(_jit, r0, r1, i0)
+static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define mulr(r0,r1,r2)                 _mulr(_jit,r0,r1,r2)
+static void _mulr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define muli(r0,r1,i0)                 _muli(_jit,r0,r1,i0)
+static void _muli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define divr(r0,r1,r2)                 _divr(_jit,r0,r1,r2)
+static void _divr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define divi(r0,r1,i0)                 _divi(_jit,r0,r1,i0)
+static void _divi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define divr_u(r0,r1,r2)               _divr_u(_jit,r0,r1,r2)
+static void _divr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define divi_u(r0,r1,i0)               _divi_u(_jit,r0,r1,i0)
+static void _divi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define remr(r0,r1,r2)                 _remr(_jit,r0,r1,r2)
+static void _remr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define remi(r0,r1,i0)                 _remi(_jit,r0,r1,i0)
+static void _remi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define remr_u(r0,r1,r2)               _remr_u(_jit,r0,r1,r2)
+static void _remr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define remi_u(r0,r1,i0)               _remi_u(_jit,r0,r1,i0)
+static void _remi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define mulh(r0,r1,r2,sign)            _mulh(_jit,r0,r1,r2,sign)
+static void _mulh(jit_state_t*,jit_bool_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#define qmulr(r0,r1,r2,r3)             iqmulr(r0,r1,r2,r3,1)
+#define qmulr_u(r0,r1,r2,r3)           iqmulr(r0,r1,r2,r3,0)
+#define iqmulr(r0,r1,r2,r3,sign)       _iqmulr(_jit,r0,r1,r2,r3,sign)
+static void _iqmulr(jit_state_t*,jit_int32_t,jit_int32_t,
+                   jit_int32_t,jit_int32_t,jit_bool_t);
+#define qmuli(r0,r1,r2,i0)             iqmuli(r0,r1,r2,i0,1)
+#define qmuli_u(r0,r1,r2,i0)           iqmuli(r0,r1,r2,i0,0)
+#define iqmuli(r0,r1,r2,i0,sign)       _iqmuli(_jit,r0,r1,r2,i0,sign)
+static void _iqmuli(jit_state_t*,jit_int32_t,jit_int32_t,
+                   jit_int32_t,jit_word_t,jit_bool_t);
+#define qdivr(r0,r1,r2,r3)             iqdivr(r0,r1,r2,r3,1)
+#define qdivr_u(r0,r1,r2,r3)           iqdivr(r0,r1,r2,r3,0)
+#define iqdivr(r0,r1,r2,r3,sign)       _iqdivr(_jit,r0,r1,r2,r3,sign)
+static void _iqdivr(jit_state_t*,jit_int32_t,jit_int32_t,
+                   jit_int32_t,jit_int32_t,jit_bool_t);
+#define qdivi(r0,r1,r2,i0)             iqdivi(r0,r1,r2,i0,1)
+#define qdivi_u(r0,r1,r2,i0)           iqdivi(r0,r1,r2,i0,0)
+#define iqdivi(r0,r1,r2,i0,sign)       _iqdivi(_jit,r0,r1,r2,i0,sign)
+static void _iqdivi(jit_state_t*,jit_int32_t,jit_int32_t,
+                   jit_int32_t,jit_word_t,jit_bool_t);
+#define andr(r0,r1,r2)                 AND(r0,r1,r2)
+#define andi(r0,r1,i0)                 _andi(_jit,r0,r1,i0)
+static void _andi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define orr(r0,r1,r2)                  OR(r0,r1,r2)
+#define ori(r0,r1,i0)                  _ori(_jit,r0,r1,i0)
+static void _ori(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define xorr(r0,r1,r2)                 XOR(r0,r1,r2)
+#define xori(r0,r1,i0)                 _xori(_jit,r0,r1,i0)
+static void _xori(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define lshr(r0,r1,r2)                 SHL(r0,r1,r2)
+#define lshi(r0,r1,i0)                 _lshi(_jit,r0,r1,i0)
+static void _lshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define rshr(r0,r1,r2)                 SHR(r0,r1,r2)
+#define rshi(r0,r1,i0)                 _rshi(_jit,r0,r1,i0)
+static void _rshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define rshr_u(r0,r1,r2)               SHR_U(r0,r1,r2)
+#define rshi_u(r0,r1,i0)               _rshi_u(_jit,r0,r1,i0)
+static void _rshi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define ltr(r0,r1,r2)                  _ltr(_jit,r0,r1,r2)
+static void _ltr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define lti(r0,r1,i0)                  _lti(_jit,r0,r1,i0)
+static void _lti(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define ltr_u(r0,r1,r2)                        _ltr_u(_jit,r0,r1,r2)
+static void _ltr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define lti_u(r0,r1,i0)                        _lti_u(_jit,r0,r1,i0)
+static void _lti_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define ler(r0,r1,r2)                  _ler(_jit,r0,r1,r2)
+static void _ler(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define lei(r0,r1,i0)                  _lei(_jit,r0,r1,i0)
+static void _lei(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define ler_u(r0,r1,r2)                        _ler_u(_jit,r0,r1,r2)
+static void _ler_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define lei_u(r0,r1,i0)                        _lei_u(_jit,r0,r1,i0)
+static void _lei_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define eqr(r0,r1,r2)                  _eqr(_jit,r0,r1,r2)
+static void _eqr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define eqi(r0,r1,i0)                  _eqi(_jit,r0,r1,i0)
+static void _eqi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define ger(r0,r1,r2)                  _ger(_jit,r0,r1,r2)
+static void _ger(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define gei(r0,r1,i0)                  _gei(_jit,r0,r1,i0)
+static void _gei(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define ger_u(r0,r1,r2)                        _ger_u(_jit,r0,r1,r2)
+static void _ger_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define gei_u(r0,r1,i0)                        _gei_u(_jit,r0,r1,i0)
+static void _gei_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define gtr(r0,r1,r2)                  _gtr(_jit,r0,r1,r2)
+static void _gtr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define gti(r0,r1,i0)                  _gti(_jit,r0,r1,i0)
+static void _gti(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define gtr_u(r0,r1,r2)                        _gtr_u(_jit,r0,r1,r2)
+static void _gtr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define gti_u(r0,r1,i0)                        _gti_u(_jit,r0,r1,i0)
+static void _gti_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define ner(r0,r1,r2)                  _ner(_jit,r0,r1,r2)
+static void _ner(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define nei(r0,r1,i0)                  _nei(_jit,r0,r1,i0)
+static void _nei(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define negr(r0,r1)                    subr(r0,0,r1)
+#define comr(r0,r1)                    ANDCMI(r0,-1,r1)
+#define movr(r0,r1)                    _movr(_jit,r0,r1)
+static void _movr(jit_state_t*,jit_int32_t,jit_int32_t);
+#define movi(r0,i0)                    _movi(_jit,r0,i0)
+static void _movi(jit_state_t*,jit_int32_t,jit_word_t);
+#define movi_p(r0,i0)                  _movi_p(_jit,r0,i0)
+static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t);
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+#  define htonr_us(r0,r1)              _htonr_us(_jit,r0,r1)
+static void _htonr_us(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define htonr_ui(r0,r1)              _htonr_ui(_jit,r0,r1)
+static void _htonr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define htonr_ul(r0,r1)              MUX1(r0,r1,MUX_REV)
+#else
+#  define htonr_us(r0,r1)              extr_us(r0,r1)
+#  define htonr_ui(r0,r1)              extr_ui(r0,r1)
+#  define htonr_ul(r0,r1)              movr(r0,r1)
+#endif
+#define extr_c(r0,r1)                  SXT1(r0,r1)
+#define extr_uc(r0,r1)                 ZXT1(r0,r1)
+#define extr_s(r0,r1)                  SXT2(r0,r1)
+#define extr_us(r0,r1)                 ZXT2(r0,r1)
+#define extr_i(r0,r1)                  SXT4(r0,r1)
+#define extr_ui(r0,r1)                 ZXT4(r0,r1)
+#define bltr(i0,r0,r1)                 _bltr(_jit,i0,r0,r1)
+static jit_word_t _bltr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define blti(i0,r0,i1)                 _blti(_jit,i0,r0,i1)
+static jit_word_t _blti(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bltr_u(i0,r0,r1)               _bltr_u(_jit,i0,r0,r1)
+static jit_word_t _bltr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define blti_u(i0,r0,i1)               _blti_u(_jit,i0,r0,i1)
+static jit_word_t _blti_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bler(i0,r0,r1)                 _bler(_jit,i0,r0,r1)
+static jit_word_t _bler(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define blei(i0,r0,i1)                 _blei(_jit,i0,r0,i1)
+static jit_word_t _blei(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bler_u(i0,r0,r1)               _bler_u(_jit,i0,r0,r1)
+static jit_word_t _bler_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define blei_u(i0,r0,i1)               _blei_u(_jit,i0,r0,i1)
+static jit_word_t _blei_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define beqr(i0,r0,r1)                 _beqr(_jit,i0,r0,r1)
+static jit_word_t _beqr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define beqi(i0,r0,i1)                 _beqi(_jit,i0,r0,i1)
+static jit_word_t _beqi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bger(i0,r0,r1)                 _bger(_jit,i0,r0,r1)
+static jit_word_t _bger(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bgei(i0,r0,i1)                 _bgei(_jit,i0,r0,i1)
+static jit_word_t _bgei(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bger_u(i0,r0,r1)               _bger_u(_jit,i0,r0,r1)
+static jit_word_t _bger_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bgei_u(i0,r0,i1)               _bgei_u(_jit,i0,r0,i1)
+static jit_word_t _bgei_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bgtr(i0,r0,r1)                 _bgtr(_jit,i0,r0,r1)
+static jit_word_t _bgtr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bgti(i0,r0,i1)                 _bgti(_jit,i0,r0,i1)
+static jit_word_t _bgti(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bgtr_u(i0,r0,r1)               _bgtr_u(_jit,i0,r0,r1)
+static jit_word_t _bgtr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bgti_u(i0,r0,i1)               _bgti_u(_jit,i0,r0,i1)
+static jit_word_t _bgti_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bner(i0,r0,r1)                 _bner(_jit,i0,r0,r1)
+static jit_word_t _bner(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bnei(i0,r0,i1)                 _bnei(_jit,i0,r0,i1)
+static jit_word_t _bnei(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bmsr(i0,r0,r1)                 _bmsr(_jit,i0,r0,r1)
+static jit_word_t _bmsr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bmsi(i0,r0,i1)                 _bmsi(_jit,i0,r0,i1)
+static jit_word_t _bmsi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bmcr(i0,r0,r1)                 _bmcr(_jit,i0,r0,r1)
+static jit_word_t _bmcr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bmci(i0,r0,i1)                 _bmci(_jit,i0,r0,i1)
+static jit_word_t _bmci(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define baddr(i0,r0,r1,cc)             _baddr(_jit,i0,r0,r1,cc)
+static jit_word_t _baddr(jit_state_t*,jit_word_t,
+                        jit_int32_t,jit_int32_t,jit_bool_t);
+#define baddi(i0,r0,i1,cc)             _baddi(_jit,i0,r0,i1,cc)
+static jit_word_t _baddi(jit_state_t*,jit_word_t,
+                        jit_int32_t,jit_word_t,jit_bool_t);
+#define baddr_u(i0,r0,r1,cc)           _baddr_u(_jit,i0,r0,r1,cc)
+static jit_word_t _baddr_u(jit_state_t*,jit_word_t,
+                          jit_int32_t,jit_int32_t,jit_bool_t);
+#define baddi_u(i0,r0,i1,cc)           _baddi_u(_jit,i0,r0,i1,cc)
+static jit_word_t _baddi_u(jit_state_t*,jit_word_t,
+                          jit_int32_t,jit_word_t,jit_bool_t);
+#define bsubr(i0,r0,r1,cc)             _bsubr(_jit,i0,r0,r1,cc)
+static jit_word_t _bsubr(jit_state_t*,jit_word_t,
+                        jit_int32_t,jit_int32_t,jit_bool_t);
+#define bsubi(i0,r0,i1,cc)             _bsubi(_jit,i0,r0,i1,cc)
+static jit_word_t _bsubi(jit_state_t*,jit_word_t,
+                        jit_int32_t,jit_word_t,jit_bool_t);
+#define bsubr_u(i0,r0,r1,cc)           _bsubr_u(_jit,i0,r0,r1,cc)
+static jit_word_t _bsubr_u(jit_state_t*,jit_word_t,
+                          jit_int32_t,jit_int32_t,jit_bool_t);
+#define bsubi_u(i0,r0,i1,cc)           _bsubi_u(_jit,i0,r0,i1,cc)
+static jit_word_t _bsubi_u(jit_state_t*,jit_word_t,
+                          jit_int32_t,jit_word_t,jit_bool_t);
+#define boaddr(i0,r0,r1)               baddr(i0,r0,r1,1)
+#define boaddi(i0,r0,i1)               baddi(i0,r0,i1,1)
+#define boaddr_u(i0,r0,r1)             baddr_u(i0,r0,r1,1)
+#define boaddi_u(i0,r0,i1)             baddi_u(i0,r0,i1,1)
+#define bxaddr(i0,r0,r1)               baddr(i0,r0,r1,0)
+#define bxaddi(i0,r0,i1)               baddi(i0,r0,i1,0)
+#define bxaddr_u(i0,r0,r1)             baddr_u(i0,r0,r1,0)
+#define bxaddi_u(i0,r0,i1)             baddi_u(i0,r0,i1,0)
+#define bosubr(i0,r0,r1)               bsubr(i0,r0,r1,1)
+#define bosubi(i0,r0,i1)               bsubi(i0,r0,i1,1)
+#define bosubr_u(i0,r0,r1)             bsubr_u(i0,r0,r1,1)
+#define bosubi_u(i0,r0,i1)             bsubi_u(i0,r0,i1,1)
+#define bxsubr(i0,r0,r1)               bsubr(i0,r0,r1,0)
+#define bxsubi(i0,r0,i1)               bsubi(i0,r0,i1,0)
+#define bxsubr_u(i0,r0,r1)             bsubr_u(i0,r0,r1,0)
+#define bxsubi_u(i0,r0,i1)             bsubi_u(i0,r0,i1,0)
+#define ldr_c(r0,r1)                   _ldr_c(_jit,r0,r1)
+static void _ldr_c(jit_state_t*,jit_int32_t,jit_int32_t);
+#define ldi_c(r0,i0)                   _ldi_c(_jit,r0,i0)
+static void _ldi_c(jit_state_t*,jit_int32_t,jit_word_t);
+#define ldr_uc(r0,r1)                  LD1(r0,r1)
+#define ldi_uc(r0,i0)                  _ldi_uc(_jit,r0,i0)
+static void _ldi_uc(jit_state_t*,jit_int32_t,jit_word_t);
+#define ldr_s(r0,r1)                   _ldr_s(_jit,r0,r1)
+static void _ldr_s(jit_state_t*,jit_int32_t,jit_int32_t);
+#define ldi_s(r0,i0)                   _ldi_s(_jit,r0,i0)
+static void _ldi_s(jit_state_t*,jit_int32_t,jit_word_t);
+#define ldr_us(r0,r1)                  LD2(r0,r1)
+#define ldi_us(r0,i0)                  _ldi_us(_jit,r0,i0)
+static void _ldi_us(jit_state_t*,jit_int32_t,jit_word_t);
+#define ldr_i(r0,r1)                   _ldr_i(_jit,r0,r1)
+static void _ldr_i(jit_state_t*,jit_int32_t,jit_int32_t);
+#define ldi_i(r0,i0)                   _ldi_i(_jit,r0,i0)
+static void _ldi_i(jit_state_t*,jit_int32_t,jit_word_t);
+#define ldr_ui(r0,r1)                  LD4(r0,r1)
+#define ldi_ui(r0,i0)                  _ldi_ui(_jit,r0,i0)
+static void _ldi_ui(jit_state_t*,jit_int32_t,jit_word_t);
+#define ldr_l(r0,r1)                   LD8(r0,r1)
+#define ldi_l(r0,i0)                   _ldi_l(_jit,r0,i0)
+static void _ldi_l(jit_state_t*,jit_int32_t,jit_word_t);
+#define ldxr_c(r0,r1,r2)               _ldxr_c(_jit,r0,r1,r2)
+static void _ldxr_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define ldxi_c(r0,r1,i0)               _ldxi_c(_jit,r0,r1,i0)
+static void _ldxi_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define ldxr_uc(r0,r1,r2)              _ldxr_uc(_jit,r0,r1,r2)
+static void _ldxr_uc(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define ldxi_uc(r0,r1,i0)              _ldxi_uc(_jit,r0,r1,i0)
+static void _ldxi_uc(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define ldxr_s(r0,r1,r2)               _ldxr_s(_jit,r0,r1,r2)
+static void _ldxr_s(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define ldxi_s(r0,r1,i0)               _ldxi_s(_jit,r0,r1,i0)
+static void _ldxi_s(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define ldxr_us(r0,r1,r2)              _ldxr_us(_jit,r0,r1,r2)
+static void _ldxr_us(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define ldxi_us(r0,r1,i0)              _ldxi_us(_jit,r0,r1,i0)
+static void _ldxi_us(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define ldxr_i(r0,r1,r2)               _ldxr_i(_jit,r0,r1,r2)
+static void _ldxr_i(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define ldxi_i(r0,r1,i0)               _ldxi_i(_jit,r0,r1,i0)
+static void _ldxi_i(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define ldxr_ui(r0,r1,r2)              _ldxr_ui(_jit,r0,r1,r2)
+static void _ldxr_ui(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define ldxi_ui(r0,r1,i0)              _ldxi_ui(_jit,r0,r1,i0)
+static void _ldxi_ui(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define ldxr_l(r0,r1,r2)               _ldxr_l(_jit,r0,r1,r2)
+static void _ldxr_l(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define ldxi_l(r0,r1,i0)               _ldxi_l(_jit,r0,r1,i0)
+static void _ldxi_l(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define str_c(r0,r1)                   ST1(r0,r1)
+#define sti_c(i0,r0)                   _sti_c(_jit,i0,r0)
+static void _sti_c(jit_state_t*,jit_word_t,jit_int32_t);
+#define str_s(r0,r1)                   ST2(r0,r1)
+#define sti_s(i0,r0)                   _sti_s(_jit,i0,r0)
+static void _sti_s(jit_state_t*,jit_word_t,jit_int32_t);
+#define str_i(r0,r1)                   ST4(r0,r1)
+#define sti_i(i0,r0)                   _sti_i(_jit,i0,r0)
+static void _sti_i(jit_state_t*,jit_word_t,jit_int32_t);
+#define str_l(r0,r1)                   ST8(r0,r1)
+#define sti_l(i0,r0)                   _sti_l(_jit,i0,r0)
+static void _sti_l(jit_state_t*,jit_word_t,jit_int32_t);
+#define stxr_c(r0,r1,r2)               _stxr_c(_jit,r0,r1,r2)
+static void _stxr_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define stxi_c(i0,r0,r1)               _stxi_c(_jit,i0,r0,r1)
+static void _stxi_c(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define stxr_s(r0,r1,r2)               _stxr_s(_jit,r0,r1,r2)
+static void _stxr_s(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define stxi_s(i0,r0,r1)               _stxi_s(_jit,i0,r0,r1)
+static void _stxi_s(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define stxr_i(r0,r1,r2)               _stxr_i(_jit,r0,r1,r2)
+static void _stxr_i(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define stxi_i(i0,r0,r1)               _stxi_i(_jit,i0,r0,r1)
+static void _stxi_i(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define stxr_l(r0,r1,r2)               _stxr_l(_jit,r0,r1,r2)
+static void _stxr_l(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define stxi_l(i0,r0,r1)               _stxi_l(_jit,i0,r0,r1)
+static void _stxi_l(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define jmpr(r0)                       _jmpr(_jit,r0)
+static void _jmpr(jit_state_t*,jit_int32_t);
+#define jmpi(i0)                       _jmpi(_jit,i0)
+static void _jmpi(jit_state_t*,jit_word_t);
+#define jmpi_p(i0)                     _jmpi_p(_jit,i0)
+static jit_word_t _jmpi_p(jit_state_t*,jit_word_t);
+#define callr(r0)                      _callr(_jit,r0)
+static void _callr(jit_state_t*,jit_int32_t);
+#define calli(i0)                      _calli(_jit,i0)
+static void _calli(jit_state_t*,jit_word_t);
+#define calli_p(i0)                    _calli_p(_jit,i0)
+static jit_word_t _calli_p(jit_state_t*,jit_word_t);
+#define prolog(node)                   _prolog(_jit,node)
+static void _prolog(jit_state_t*,jit_node_t*);
+#define epilog(node)                   _epilog(_jit,node)
+static void _epilog(jit_state_t*,jit_node_t*);
+#  define vastart(r0)                  _vastart(_jit, r0)
+static void _vastart(jit_state_t*, jit_int32_t);
+#  define vaarg(r0, r1)                        _vaarg(_jit, r0, r1)
+static void _vaarg(jit_state_t*, jit_int32_t, jit_int32_t);
+#define patch_at(node,instr,label)     _patch_at(_jit,node,instr,label)
+static void _patch_at(jit_state_t*,jit_code_t,jit_word_t,jit_word_t);
+#endif
+
+#if CODE
+#if __BYTE_ORDER == __BIG_ENDIAN
+static jit_word_t
+byte_swap_if_big_endian(jit_word_t w)
+{
+    union {
+       char    c[8];
+       long    w;
+    } l, h;
+    l.w = w;
+    h.c[0] = l.c[7];
+    h.c[1] = l.c[6];
+    h.c[2] = l.c[5];
+    h.c[3] = l.c[4];
+    h.c[4] = l.c[3];
+    h.c[5] = l.c[2];
+    h.c[6] = l.c[1];
+    h.c[7] = l.c[0];
+    return (h.w);
+}
+#else
+#define byte_swap_if_big_endian(w)             (w)
+#endif
+
+static void
+_out(jit_state_t *_jit, int n, int tm,
+     jit_word_t s0, jit_word_t s1, jit_word_t s2)
+{
+    int                         i;
+    jit_word_t          l, h, *w;
+    set_bundle(_jit->pc.ul, l, h, tm, s0, s1, s2);
+    _jit->pc.ul += 2;
+    w = (jit_word_t *)_jitc->inst;
+    for (i = n; i < _jitc->ioff; i++)
+       w[i - n] = w[i];
+    _jitc->ioff -= n;
+}
+
+#define nop_m          0x0008000000L
+#define nop_i          0x0008000000L
+#define nop_b          0x4000000000L
+static void
+_stop(jit_state_t *_jit)
+{
+    /* Clear set of live registers */
+    jit_regset_set_ui(&_jitc->regs, 0);
+    _jitc->pred = 0;
+    /* Flag a stop is required */
+    if (_jitc->ioff)
+       _jitc->inst[_jitc->ioff - 1].t |= INST_STOP;
+    else
+       inst(nop_m, INST_Ms);
+}
+
+static void
+_sync(jit_state_t *_jit)
+{
+    /* Taken branches are supposed to not need a stop, so, it
+     * should not be required to stop if no registers live in
+     * sequential code */
+    if (jit_regset_cmp_ui(&_jitc->regs, 0) != 0 || _jitc->pred)
+       stop();
+    do
+       flush();
+    while (_jitc->ioff);
+}
+
+#define A_0            INST_A
+#define As0            INST_As
+#define I_0            INST_I
+#define Is0            INST_Is
+#define M_0            INST_M
+#define Ms0            INST_Ms
+#define F_0            INST_F
+#define Fs0            INST_Fs
+#define B_0            INST_B
+#define Bs0            INST_Bs
+#define L_0            INST_L
+#define Ls0            INST_Ls
+#define X_0            INST_X
+#define Xs0            INST_Xs
+#define A_1            (INST_A<<4)
+#define As1            (INST_As<<4)
+#define I_1            (INST_I<<4)
+#define Is1            (INST_Is<<4)
+#define M_1            (INST_M<<4)
+#define Ms1            (INST_Ms<<4)
+#define F_1            (INST_F<<4)
+#define Fs1            (INST_Fs<<4)
+#define B_1            (INST_B<<4)
+#define Bs1            (INST_Bs<<4)
+#define L_1            (INST_L<<4)
+#define Ls1            (INST_Ls<<4)
+#define X_1            (INST_X<<4)
+#define Xs1            (INST_Xs<<4)
+#define A_2            (INST_A<<8)
+#define As2            (INST_As<<8)
+#define I_2            (INST_I<<8)
+#define Is2            (INST_Is<<8)
+#define M_2            (INST_M<<8)
+#define Ms2            (INST_Ms<<8)
+#define F_2            (INST_F<<8)
+#define Fs2            (INST_Fs<<8)
+#define B_2            (INST_B<<8)
+#define Bs2            (INST_Bs<<8)
+#define L_2            (INST_L<<8)
+#define Ls2            (INST_Ls<<8)
+#define X_2            (INST_X<<8)
+#define Xs2            (INST_Xs<<8)
+
+#define I_             I_0
+#define I_I_           I_0|I_1
+#define I_Is           I_0|Is1
+#define I_B_           I_0|B_1
+#define I_Bs           I_0|Bs1
+#define Is             Is0
+#define IsI_           Is0|I_1
+#define IsIs           Is0|Is1
+#define M_             M_0
+#define M_I_           M_0|I_1
+#define M_Is           M_0|Is1
+#define M_M_           M_0|M_1
+#define M_Ms           M_0|Ms1
+#define M_F_           M_0|F_1
+#define M_Fs           M_0|Fs1
+#define M_B_           M_0|B_1
+#define M_Bs           M_0|Bs1
+#define M_I_I_         M_0|I_1|I_2
+#define M_I_Is         M_0|I_1|Is2
+#define M_I_B_         M_0|I_1|B_2
+#define M_I_Bs         M_0|I_1|Bs2
+#define M_IsI_         M_0|Is1|I_2
+#define M_IsIs         M_0|Is1|Is2
+#define M_M_I_         M_0|M_1|I_2
+#define M_M_Is         M_0|M_1|Is2
+#define M_M_F_         M_0|M_1|F_2
+#define M_M_Fs         M_0|M_1|Fs2
+#define M_M_B_         M_0|M_1|B_2
+#define M_M_Bs         M_0|M_1|Bs2
+#define M_F_I_         M_0|F_1|I_2
+#define M_F_Is         M_0|F_1|Is2
+#define M_F_B_         M_0|F_1|B_2
+#define M_F_Bs         M_0|F_1|Bs2
+#define M_B_B_         M_0|B_1|B_2
+#define M_B_Bs         M_0|B_1|Bs2
+#define M_L_X_         M_0|L_1|X_2
+#define M_L_Xs         M_0|L_1|Xs2
+#define Ms             Ms0
+#define MsI_           Ms0|I_1
+#define MsIs           Ms0|Is1
+#define MsM_           Ms0|M_1
+#define MsMs           Ms0|Ms1
+#define MsM_I_         Ms0|M_1|I_2
+#define MsM_Is         Ms0|M_1|Is2
+#define F_             F_0
+#define F_I_           F_0|I_1
+#define F_Is           F_0|Is1
+#define F_B_           F_0|B_1
+#define F_Bs           F_0|Bs1
+#define Fs             Fs0
+#define B_             B_0
+#define B_B_           B_0|B_1
+#define B_Bs           B_0|Bs1
+#define B_B_B_         B_0|B_1|B_2
+#define B_B_Bs         B_0|B_1|Bs2
+#define Bs             Bs0
+#define L_X_           L_0|X_1
+#define L_Xs           L_0|Xs1
+
+static jit_word_t
+templat(jit_word_t cc)
+{
+    switch (cc) {
+       case I_:
+       case I_I_:      case I_Is:
+       case I_B_:      case I_Bs:
+       case Is:
+       case IsI_:      case IsIs:
+       case M_:
+       case M_I_:      case M_Is:
+       case M_M_:      case M_Ms:
+       case M_F_:      case M_Fs:
+       case M_B_:      case M_Bs:
+       case M_I_I_:    case M_I_Is:
+       case M_I_B_:    case M_I_Bs:
+       case M_IsI_:    case M_IsIs:
+       case M_M_I_:    case M_M_Is:
+       case M_M_F_:    case M_M_Fs:
+       case M_M_B_:    case M_M_Bs:
+       case M_F_I_:    case M_F_Is:
+       case M_F_B_:    case M_F_Bs:
+       case M_B_B_:    case M_B_Bs:
+       case M_L_X_:    case M_L_Xs:
+       case Ms:
+       case MsI_:      case MsIs:
+       case MsM_:      case MsMs:
+       case MsM_I_:    case MsM_Is:
+       case F_:
+       case F_I_:      case F_Is:
+       case F_B_:      case F_Bs:
+       case Fs:
+       case B_:
+       case B_B_:      case B_Bs:
+       case B_B_B_:    case B_B_Bs:
+       case Bs:
+       case L_X_:      case L_Xs:
+           return (cc);
+       default:
+           return (0);
+    }
+}
+
+/* match* functions recurse attempting to find a template for A-
+ * instructions, that may be executed in M- or I- unit.
+ * It also uses an heuristic of trying first M- for slot 0 and 2,
+ * and I- for slot 1, but tries all possible matches.
+ */
+static jit_word_t
+match_2(jit_word_t cc)
+{
+    jit_word_t         t;
+    if ((t = templat(cc | M_2)))
+       return (t);
+    if ((t = templat(cc | I_2)))
+       return (t);
+    return (0);
+}
+
+static jit_word_t
+matchs2(jit_word_t cc)
+{
+    jit_word_t         t;
+    if ((t = templat(cc | Ms2)))
+       return (t);
+    if ((t = templat(cc | Is2)))
+       return (t);
+    return (0);
+}
+
+static jit_word_t
+match2(jit_word_t cc)
+{
+    if ((cc & 0xf00) == A_2)
+       return (match_2(cc & ~0xf00));
+    if ((cc & 0xf00) == As2)
+       return (matchs2(cc & ~0xf00));
+    return (0);
+}
+
+static jit_word_t
+match_1(jit_word_t cc)
+{
+    jit_word_t         t;
+    if ((t = templat(cc | I_1)))
+       return (t);
+    if ((t = templat(cc | M_1)))
+       return (t);
+    if ((t = match2(cc | I_1)))
+       return (t);
+    if ((t = match2(cc | M_1)))
+       return (t);
+    return (0);
+}
+
+static jit_word_t
+matchs1(jit_word_t cc)
+{
+    jit_word_t         t;
+    if ((t = templat(cc | Is1)))
+       return (t);
+    if ((t = templat(cc | Ms1)))
+       return (t);
+    if ((t = match2(cc | Is1)))
+       return (t);
+    if ((t = match2(cc | Ms1)))
+       return (t);
+    return (0);
+}
+
+static jit_word_t
+match1(jit_word_t cc)
+{
+    if ((cc & 0x0f0) == A_1)
+       return (match_1(cc & ~0x0f0));
+    if ((cc & 0x0f0) == As1)
+       return (matchs1(cc & ~0x0f0));
+    return (0);
+}
+
+static jit_word_t
+match_0(jit_word_t cc)
+{
+    jit_word_t         t;
+    if ((t = templat(cc | M_0)))
+       return (t);
+    if ((t = templat(cc | I_0)))
+       return (t);
+    if ((t = match1(cc | M_0)))
+       return (t);
+    if ((t = match1(cc | I_0)))
+       return (t);
+    return (0);
+}
+
+static jit_word_t
+matchs0(jit_word_t cc)
+{
+    jit_word_t         t;
+    if ((t = templat(cc | Ms0)))
+       return (t);
+    if ((t = templat(cc | Is0)))
+       return (t);
+    if ((t = match1(cc | Ms0)))
+       return (t);
+    if ((t = match1(cc | Is0)))
+       return (t);
+    return (0);
+}
+
+static jit_word_t
+match0(jit_word_t cc)
+{
+    if ((cc & 0x00f) == A_0)
+       return (match_0(cc & ~0x00f));
+    if ((cc & 0x00f) == As0)
+       return (matchs0(cc & ~0x00f));
+    return (0);
+}
+
+static void
+_flush(jit_state_t *_jit)
+{
+    int                        n, soff;
+    jit_word_t         t, cc, tm, s0, s1, s2;
+
+    if (!_jitc->ioff)
+       return;
+    for (cc = 0, n = soff = 0; n < _jitc->ioff; n++, soff += 4)
+       cc |= (jit_uword_t)(_jitc->inst[n].t) << soff;
+
+    soff = 0xf00;
+    while (soff) {
+       /* Try to find a template, or reduce down
+        * to one instruction if no template match */
+       if ((t = templat(cc))) {
+           cc = t;
+           break;
+       }
+       /* A- instructions may be execute in M- or I- unit */
+       if ((t = match0(cc))) {
+           cc = t;
+           break;
+       }
+       cc &= ~soff;
+       soff >>= 4;
+    }
+    assert(soff);
+
+    /* Prefer tail nop if need to add some nop, so that patching is easier */
+#define ii(n)          _jitc->inst[n].i
+    switch (cc) {
+       case I_:
+           n = 1;              tm = TM_M_I_I_;
+           s0 = nop_m;         s1 = ii(0);             s2 = nop_i;
+           break;
+       case I_I_:
+           n = 2;              tm = TM_M_I_I_;
+           s0 = nop_m;         s1 = ii(0);             s2 = ii(1);
+           break;
+       case I_Is:
+           n = 2;              tm = TM_M_I_Is;
+           s0 = nop_m;         s1 = ii(0);             s2 = ii(1);
+           break;
+       case I_B_:
+           n = 2;              tm = TM_M_I_B_;
+           s0 = nop_m;         s1 = ii(0);             s2 = ii(1);
+           break;
+       case I_Bs:
+           n = 2;              tm = TM_M_I_Bs;
+           s0 = nop_m;         s1 = ii(0);             s2 = ii(1);
+           break;
+       case Is:
+           n = 1;              tm = TM_M_IsI_;
+           s0 = nop_m;         s1 = ii(0);             s2 = nop_i;
+           break;
+       case IsI_:
+           n = 2;              tm = TM_M_IsI_;
+           s0 = nop_m;         s1 = ii(0);             s2 = ii(1);
+           break;
+       case IsIs:
+           n = 2;              tm = TM_M_IsIs;
+           s0 = nop_m;         s1 = ii(0);             s2 = ii(1);
+           break;
+       case M_:
+           n = 1;              tm = TM_M_I_I_;
+           s0 = ii(0);         s1 = nop_i;             s2 = nop_i;
+           break;
+       case M_I_:
+           n = 2;              tm = TM_M_I_I_;
+           s0 = ii(0);         s1 = ii(1);             s2 = nop_i;
+           break;
+       case M_Is:
+           n = 2;              tm = TM_M_IsI_;
+           s0 = ii(0);         s1 = ii(1);             s2 = nop_i;
+           break;
+       case M_M_:
+           n = 2;              tm = TM_M_M_I_;
+           s0 = ii(0);         s1 = ii(1);             s2 = nop_i;
+           break;
+       case M_Ms:
+           n = 2;              tm = TM_M_M_Is;
+           s0 = ii(0);         s1 = ii(1);             s2 = nop_i;
+           break;
+       case M_F_:
+           n = 2;              tm = TM_M_F_I_;
+           s0 = ii(0);         s1 = ii(1);             s2 = nop_i;
+           break;
+       case M_Fs:
+           n = 2;              tm = TM_M_F_Is;
+           s0 = ii(0);         s1 = ii(1);             s2 = nop_i;
+           break;
+       case M_B_:
+           n = 2;              tm = TM_M_B_B_;
+           s0 = ii(0);         s1 = ii(1);             s2 = nop_b;
+           break;
+       case M_Bs:
+           n = 2;              tm = TM_M_B_Bs;
+           s0 = ii(0);         s1 = ii(1);             s2 = nop_b;
+           break;
+       case M_I_I_:
+           n = 3;              tm = TM_M_I_I_;
+           s0 = ii(0);         s1 = ii(1);             s2 = ii(2);
+           break;
+       case M_I_Is:
+           n = 3;              tm = TM_M_I_Is;
+           s0 = ii(0);         s1 = ii(1);             s2 = ii(2);
+           break;
+       case M_I_B_:
+           n = 3;              tm = TM_M_I_B_;
+           s0 = ii(0);         s1 = ii(1);             s2 = ii(2);
+           break;
+       case M_I_Bs:
+           n = 3;              tm = TM_M_I_Bs;
+           s0 = ii(0);         s1 = ii(1);             s2 = ii(2);
+           break;
+       case M_IsI_:
+           n = 3;              tm = TM_M_IsI_;
+           s0 = ii(0);         s1 = ii(1);             s2 = ii(2);
+           break;
+       case M_IsIs:
+           n = 3;              tm = TM_M_IsIs;
+           s0 = ii(0);         s1 = ii(1);             s2 = ii(2);
+           break;
+       case M_M_I_:
+           n = 3;              tm = TM_M_M_I_;
+           s0 = ii(0);         s1 = ii(1);             s2 = ii(2);
+           break;
+       case M_M_Is:
+           n = 3;              tm = TM_M_M_Is;
+           s0 = ii(0);         s1 = ii(1);             s2 = ii(2);
+           break;
+       case M_M_F_:
+           n = 3;              tm = TM_M_M_F_;
+           s0 = ii(0);         s1 = ii(1);             s2 = ii(2);
+           break;
+       case M_M_Fs:
+           n = 3;              tm = TM_M_M_Fs;
+           s0 = ii(0);         s1 = ii(1);             s2 = ii(2);
+           break;
+       case M_M_B_:
+           n = 3;              tm = TM_M_M_B_;
+           s0 = ii(0);         s1 = ii(1);             s2 = ii(2);
+           break;
+       case M_M_Bs:
+           n = 3;              tm = TM_M_M_Bs;
+           s0 = ii(0);         s1 = ii(1);             s2 = ii(2);
+           break;
+       case M_F_I_:
+           n = 3;              tm = TM_M_F_I_;
+           s0 = ii(0);         s1 = ii(1);             s2 = ii(2);
+           break;
+       case M_F_Is:
+           n = 3;              tm = TM_M_F_Is;
+           s0 = ii(0);         s1 = ii(1);             s2 = ii(2);
+           break;
+       case M_F_B_:
+           n = 3;              tm = TM_M_F_B_;
+           s0 = ii(0);         s1 = ii(1);             s2 = ii(2);
+           break;
+       case M_F_Bs:
+           n = 3;              tm = TM_M_F_Bs;
+           s0 = ii(0);         s1 = ii(1);             s2 = ii(2);
+           break;
+       case M_B_B_:
+           n = 3;              tm = TM_M_B_B_;
+           s0 = ii(0);         s1 = ii(1);             s2 = ii(2);
+           break;
+       case M_B_Bs:
+           n = 3;              tm = TM_M_B_Bs;
+           s0 = ii(0);         s1 = ii(1);             s2 = ii(2);
+           break;
+       case M_L_X_:
+           n = 3;              tm = TM_M_L_X_;
+           s0 = ii(0);         s1 = ii(1);             s2 = ii(2);
+           break;
+       case M_L_Xs:
+           n = 3;              tm = TM_M_L_Xs;
+           s0 = ii(0);         s1 = ii(1);             s2 = ii(2);
+           break;
+       case Ms:
+           n = 1;              tm = TM_MsM_I_;
+           s0 = ii(0);         s1 = nop_m;             s2 = nop_i;
+           break;
+       case MsI_:
+           n = 2;              tm = TM_MsM_I_;
+           s0 = ii(0);         s1 = nop_m;             s2 = ii(1);
+           break;
+       case MsIs:
+           n = 2;              tm = TM_MsM_Is;
+           s0 = ii(0);         s1 = nop_m;             s2 = ii(1);
+           break;
+       case MsM_:
+           n = 2;              tm = TM_MsM_I_;
+           s0 = ii(0);         s1 = ii(1);             s2 = nop_i;
+           break;
+       case MsMs:
+           n = 2;              tm = TM_MsM_Is;
+           s0 = ii(0);         s1 = ii(1);             s2 = nop_i;
+           break;
+       case MsM_I_:
+           n = 3;              tm = TM_MsM_I_;
+           s0 = ii(0);         s1 = ii(1);             s2 = ii(2);
+           break;
+       case MsM_Is:
+           n = 3;              tm = TM_MsM_Is;
+           s0 = ii(0);         s1 = ii(1);             s2 = ii(2);
+           break;
+       case F_:
+           n = 1;              tm = TM_M_F_I_;
+           s0 = nop_m;         s1 = ii(0);             s2 = nop_i;
+           break;
+       case F_I_:
+           n = 2;              tm = TM_M_F_I_;
+           s0 = nop_m;         s1 = ii(0);             s2 = ii(1);
+           break;
+       case F_Is:
+           n = 2;              tm = TM_M_F_Is;
+           s0 = nop_m;         s1 = ii(0);             s2 = ii(1);
+           break;
+       case F_B_:
+           n = 2;              tm = TM_M_F_B_;
+           s0 = nop_m;         s1 = ii(0);             s2 = ii(1);
+           break;
+       case F_Bs:
+           n = 2;              tm = TM_M_F_Bs;
+           s0 = nop_m;         s1 = ii(0);             s2 = ii(1);
+           break;
+       case Fs:
+           n = 1;              tm = TM_M_F_Is;
+           s0 = nop_m;         s1 = ii(0);             s2 = nop_i;
+           break;
+       case B_:
+           n = 1;              tm = TM_B_B_B_;
+           s0 = ii(0);         s1 = nop_b;             s2 = nop_b;
+           break;
+       case B_B_:
+           n = 2;              tm = TM_B_B_B_;
+           s0 = ii(0);         s1 = ii(1);             s2 = nop_b;
+           break;
+       case B_Bs:
+           n = 2;              tm = TM_B_B_Bs;
+           s0 = ii(0);         s1 = ii(1);             s2 = nop_b;
+           break;
+       case B_B_B_:
+           n = 3;              tm = TM_B_B_B_;
+           s0 = ii(0);         s1 = ii(1);             s2 = ii(2);
+           break;
+       case B_B_Bs:
+           n = 3;              tm = TM_B_B_Bs;
+           s0 = ii(0);         s1 = ii(1);             s2 = ii(2);
+           break;
+       case Bs:
+           n = 1;              tm = TM_B_B_Bs;
+           s0 = ii(0);         s1 = nop_b;             s2 = nop_b;
+           break;
+       case L_X_:
+           n = 2;              tm = TM_M_L_X_;
+           s0 = nop_m;         s1 = ii(0);             s2 = ii(1);
+           break;
+       case L_Xs:
+           n = 2;              tm = TM_M_L_Xs;
+           s0 = nop_m;         s1 = ii(0);             s2 = ii(1);
+           break;
+       default:
+           abort();
+    }
+    out(n, tm, s0, s1, s2);
+}
+
+static void
+_inst(jit_state_t *_jit, jit_word_t i, jit_uint8_t t)
+{
+    if (_jitc->ioff > 2)
+       flush();
+    assert(!(i & 0x11111e0000000000L));
+    _jitc->inst[_jitc->ioff].i = i;
+    _jitc->inst[_jitc->ioff].t = t;
+    ++_jitc->ioff;
+}
+
+static void
+_A1(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t x4, jit_word_t x2, jit_word_t r3, jit_word_t r2, jit_word_t r1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(x4 &  ~0xfL));
+    assert(!(x2 &  ~0x3L));
+    assert(!(r3 & ~0x7fL));
+    assert(!(r2 & ~0x7fL));
+    assert(!(r1 & ~0x7fL));
+    TSTREG2(r2, r3);
+    TSTPRED(_p);
+    TSTREG1(r1);
+    inst((8L<<37)|(x4<<29)|(x2<<27)|(r3<<20)|(r2<<13)|(r1<<6)|_p, INST_A);
+    SETREG(r1);
+}
+
+static void
+_A3(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t x4, jit_word_t x2, jit_word_t r3, jit_word_t im, jit_word_t r1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(x4 &  ~0xfL));
+    assert(!(x2 &  ~0x3L));
+    assert(!(r3 & ~0x7fL));
+    assert(im >= -128 && im <= 127);
+    assert(!(r1 & ~0x7f));
+    TSTREG1(r3);
+    TSTPRED(_p);
+    TSTREG1(r1);
+    inst((8L<<37)|(((im>>7)&1L)<<36)|(x4<<29)|(x2<<27)|
+        (r3<<20)|((im&0x7fL)<<13)|(r1<<6)|_p, INST_A);
+    SETREG(r1);
+}
+
+static void
+_A4(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t x2, jit_word_t r3, jit_word_t im, jit_word_t r1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(x2 &  ~0x3L));
+    assert(!(r3 & ~0x7fL));
+    assert(im >= -8192 && im <= 8191);
+    assert(!(r1  & ~0x7f));
+    TSTREG1(r3);
+    TSTPRED(_p);
+    TSTREG1(r1);
+    inst((8L<<37)|(((im>>13)&1L)<<36)|(x2<<34)|(((im>>7)&0x3fL)<<27)|
+        (r3<<20)|((im&0x7fL)<<13)|(r1<<6)|_p, INST_A);
+    SETREG(r1);
+}
+
+static void
+_A5(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t r3, jit_word_t im, jit_word_t r1)
+{
+    jit_word_t         s, i5, i9, i7;
+    assert(!(_p & ~0x3fL));
+    assert(!(r3  & ~0x3L));
+    assert(im >= -2097152 && im <= 2097151);
+    assert(!(r1  & ~0x7fL));
+    /* imm22 = sign_ext(s << 21 | imm5c << 16 | imm9d << 7 | imm7b, 22) */
+    s  = (im & 0x200000) >> 21;
+    i5 = (im & 0x1f0000) >> 16;
+    i9 = (im &   0xff80) >>  7;
+    i7 =  im &     0x7f;
+    TSTREG1(r3);
+    TSTPRED(_p);
+    TSTREG1(r1);
+    inst((9L<<37)|(s<<36)|(i9<<27)|(i5<<22)|
+        (r3<<20)|(i7<<13)|(r1<<6)|_p, INST_A);
+    SETREG(r1);
+}
+
+static void
+_A6(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t o, jit_word_t x2, jit_word_t ta, jit_word_t p2,
+    jit_word_t r3, jit_word_t r2, jit_word_t c, jit_word_t p1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(o  &  ~0xfL));
+    assert(!(x2 &  ~0x3L));
+    assert(!(ta &  ~0x1L));
+    assert(!(p2 & ~0x7fL));
+    assert(!(r3 & ~0x7fL));
+    assert(!(r2 & ~0x7fL));
+    assert(!(c  &  ~0x1L));
+    assert(!(p1 & ~0x3fL));
+    TSTREG2(r2, r3);
+    TSTPRED(_p);
+    inst((o<<37)|(x2<<34)|(ta<<33)|(p2<<27)|(r3<<20)|
+        (r2<<13)|(c<<12)|(p1<<6)|_p, INST_A);
+    if (p1)
+       _jitc->pred |= 1 << p1;
+    if (p2)
+       _jitc->pred |= 1 << p2;
+}
+
+static void
+_A7(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t o, jit_word_t x2, jit_word_t ta,
+    jit_word_t p2, jit_word_t r3, jit_word_t c, jit_word_t p1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(o  &  ~0xfL));
+    assert(!(x2 &  ~0x3L));
+    assert(!(ta &  ~0x1L));
+    assert(!(p2 & ~0x7fL));
+    assert(!(r3 & ~0x7fL));
+    assert(!(c  &  ~0x1L));
+    assert(!(p1 & ~0x3fL));
+    TSTREG1(r3);
+    TSTPRED(_p);
+    inst((o<<37)|(1L<<36)|(x2<<34)|(ta<<33)|
+        (p2<<27)|(r3<<20)|(c<<12)|(p1<<6)|_p, INST_A);
+    if (p1)
+       _jitc->pred |= 1 << p1;
+    if (p2)
+       _jitc->pred |= 1 << p2;
+}
+
+static void
+_A8(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t o, jit_word_t x2, jit_word_t ta, jit_word_t p2,
+    jit_word_t r3, jit_word_t im, jit_word_t c, jit_word_t p1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(o  &  ~0xfL));
+    assert(!(x2 &  ~0x3L));
+    assert(!(ta &  ~0x1L));
+    assert(!(p2 & ~0x7fL));
+    assert(!(r3 & ~0x7fL));
+    assert(im >= -128 && im <= 127);
+    assert(!(c  &  ~0x1L));
+    assert(!(p1 & ~0x3fL));
+    TSTREG1(r3);
+    TSTPRED(_p);
+    inst((o<<37)|(((im>>7)&1L)<<36)|(x2<<34)|(ta<<33)|(p2<<27)|(r3<<20)|
+        ((im&0x7fL)<<13)|(c<<12)|(p1<<6)|_p, INST_A);
+    if (p1)
+       _jitc->pred |= 1 << p1;
+    if (p2)
+       _jitc->pred |= 1 << p2;
+}
+
+static void
+_A9(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t za, jit_word_t zb, jit_word_t x4,
+    jit_word_t x2, jit_word_t r3, jit_word_t r2, jit_word_t r1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(za &  ~0x1L));
+    assert(!(zb &  ~0x1L));
+    assert(!(x4 &  ~0xfL));
+    assert(!(x2 &  ~0x3L));
+    assert(!(r3 & ~0x7fL));
+    assert(!(r2 & ~0x7fL));
+    assert(!(r1 & ~0x7fL));
+    TSTREG2(r2, r3);
+    TSTPRED(_p);
+    TSTREG1(r1);
+    inst((8L<<37)|(za<<36)|(1L<<34)|(zb<<33)|(x4<<29)|(x2<<27)|
+        (r3<<20)|(r2<<13)|(r1<<6)|_p, INST_A);
+    SETREG(r1);
+}
+
+static void
+_I1(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t ct, jit_word_t x2, jit_word_t r3, jit_word_t r2, jit_word_t r1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(ct &  ~0x3L));
+    assert(!(x2 &  ~0x3L));
+    assert(!(r3 & ~0x7fL));
+    assert(!(r2 & ~0x7fL));
+    assert(!(r1 & ~0x7fL));
+    TSTREG2(r2, r3);
+    TSTPRED(_p);
+    TSTREG1(r1);
+    inst((7L<<37)|(1L<<33)|(ct<<30)|(x2<<28)|
+        (r3<<20)|(r2<<13)|(r1<<6)|_p, INST_I);
+    SETREG(r1);
+}
+
+static void
+_I2(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t za, jit_word_t xa, jit_word_t zb, jit_word_t xc,
+    jit_word_t xb ,jit_word_t r3, jit_word_t r2, jit_word_t r1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(za &  ~0x1L));
+    assert(!(xa &  ~0x3L));
+    assert(!(zb &  ~0x1L));
+    assert(!(xc &  ~0x3L));
+    assert(!(xb &  ~0x3L));
+    assert(!(r3 & ~0x7fL));
+    assert(!(r2 & ~0x7fL));
+    assert(!(r1 & ~0x7fL));
+    TSTREG2(r2, r3);
+    TSTPRED(_p);
+    TSTREG1(r1);
+    inst((7L<<37)|(za<<36)|(xa<<34)|(zb<<33)|(xc<<30)|
+        (xb<<28)|(r3<<20)|(r2<<13)|(r1<<6), INST_I);
+    SETREG(r1);
+}
+
+static void
+_I3(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t mb, jit_word_t r2, jit_word_t r1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(mb &  ~0xfL));
+    assert(!(r2 & ~0x7fL));
+    assert(!(r1 & ~0x7fL));
+    TSTREG1(r2);
+    TSTPRED(_p);
+    TSTREG1(r1);
+    inst((7L<<37)|(3L<<34)|(2L<<30)|(2L<<28)|
+        (mb<<20)|(r2<<13)|(r1<<6)|_p, INST_I);
+    SETREG(r1);
+}
+
+static void
+_I4(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t mh, jit_word_t r2, jit_word_t r1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(mh & ~0xffL));
+    assert(!(r2 & ~0x7fL));
+    assert(!(r1 & ~0x7fL));
+    TSTREG1(r2);
+    TSTPRED(_p);
+    TSTREG1(r1);
+    inst((7L<<37)|(3L<<34)|(1L<<33)|(2L<<30)|
+        (2L<<28)|(mh<<20)|(r2<<13)|(r1<<6)|_p, INST_I);
+    SETREG(r1);
+}
+
+static void
+_I5(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t za, jit_word_t zb, jit_word_t x2,
+    jit_word_t r3, jit_word_t r2, jit_word_t r1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(za &  ~0x1L));
+    assert(!(zb &  ~0x1L));
+    assert(!(x2 &  ~0x3L));
+    assert(!(r3 & ~0x7fL));
+    assert(!(r2 & ~0x7fL));
+    assert(!(r1 & ~0x7fL));
+    TSTREG2(r2, r3);
+    TSTPRED(_p);
+    TSTREG1(r1);
+    inst((7L<<37)|(za<<36)|(zb<<33)|(x2<<28)|
+        (r3<<20)|(r2<<13)|(r1<<6)|_p, INST_I);
+    SETREG(r1);
+}
+
+static void
+_I6(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t za, jit_word_t zb, jit_word_t x2,
+    jit_word_t r3, jit_word_t ct, jit_word_t r1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(za &  ~0x1L));
+    assert(!(zb &  ~0x1L));
+    assert(!(x2 &  ~0x3L));
+    assert(!(r3 & ~0x7fL));
+    assert(!(ct & ~0x1fL));
+    assert(!(r1 & ~0x7fL));
+    TSTREG1(r3);
+    TSTPRED(_p);
+    TSTREG1(r1);
+    inst((7L<<37)|(za<<36)|(1L<<34)|(zb<<33)|
+        (x2<<28)|(r3<<20)|(ct<<14)|(r1<<6)|_p, INST_I);
+    SETREG(r1);
+}
+
+static void
+_I7(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t za, jit_word_t zb, jit_word_t r3, jit_word_t r2, jit_word_t r1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(za &  ~0x1L));
+    assert(!(zb &  ~0x1L));
+    assert(!(r3 & ~0x7fL));
+    assert(!(r2 & ~0x7fL));
+    assert(!(r1 & ~0x7fL));
+    TSTREG2(r2, r3);
+    TSTPRED(_p);
+    TSTREG1(r1);
+    inst((7L<<37)|(za<<36)|(zb<<33)|(1L<<30)|
+        (r3<<20)|(r2<<13)|(r1<<6)|_p, INST_I);
+    SETREG(r1);
+}
+
+static void
+_I8(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t za, jit_word_t zb, jit_word_t im, jit_word_t r2, jit_word_t r1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(za &  ~0x1L));
+    assert(!(zb &  ~0x1L));
+    assert(!(im & ~0x1fL));
+    assert(!(r2 & ~0x7fL));
+    assert(!(r1 & ~0x7fL));
+    TSTREG1(r2);
+    TSTPRED(_p);
+    TSTREG1(r1);
+    inst((7L<<37)|(za<<36)|(3L<<34)|(zb<<33)|(1L<<30)|(1L<<28)|
+        (im<<20)|(r2<<13)|(r1<<6), INST_I);
+    SETREG(r1);
+}
+
+static void
+_I9(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t x2, jit_word_t r3, jit_word_t r1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(x2 &  ~0x3L));
+    assert(!(r3 & ~0x7fL));
+    assert(!(r1 & ~0x7fL));
+    TSTREG1(r3);
+    TSTPRED(_p);
+    TSTREG1(r1);
+    inst((7L<<37)|(1L<<34)|(1L<<34)|(1L<<33)|
+        (x2<<30)|(1L<<28)|(r3<<20)|(r1<<6)|_p, INST_I);
+    SETREG(r1);
+}
+
+static void
+_I10(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t ct, jit_word_t r3, jit_word_t r2, jit_word_t r1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(ct & ~0x3fL));
+    assert(!(r3 & ~0x7fL));
+    assert(!(r2 & ~0x7fL));
+    assert(!(r1 & ~0x7fL));
+    TSTREG2(r2, r3);
+    TSTPRED(_p);
+    TSTREG1(r1);
+    inst((5L<<37)|(3L<<34)|(ct<<27)|(r3<<20)|(r2<<13)|(r1<<6)|_p, INST_I);
+    SETREG(r1);
+}
+
+static void
+_I11(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t len, jit_word_t r3,
+     jit_word_t pos, jit_word_t y, jit_word_t r1)
+{
+    assert(!(_p  & ~0x3fL));
+    assert(!(len & ~0x3fL));
+    assert(!(r3  & ~0x7fL));
+    assert(!(pos & ~0x1fL));
+    assert(!(y   &  ~0x1L));
+    assert(!(r1  & ~0x7fL));
+    TSTREG1(r3);
+    TSTPRED(_p);
+    TSTREG1(r1);
+    inst((5L<<37)|(1L<<34)|(len<<27)|(r3<<20)|
+        (pos<<14)|(y<<13)|(r1<<6)|_p, INST_I);
+    SETREG(r1);
+}
+
+static void
+_I12(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t len, jit_word_t pos, jit_word_t r2, jit_word_t r1)
+{
+    assert(!(_p  & ~0x3fL));
+    assert(!(len & ~0x3fL));
+    assert(!(pos & ~0x3fL));
+    assert(!(r2  & ~0x7fL));
+    assert(!(r1  & ~0x7fL));
+    TSTREG1(r2);
+    TSTPRED(_p);
+    TSTREG1(r1);
+    inst((5L<<37)|(1L<<34)|(1L<<33)|(len<<27)|
+        (pos<<20)|(r2<<13)|(r1<<6)|_p, INST_I);
+    SETREG(r1);
+}
+
+static void
+_I13(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t len, jit_word_t pos, jit_word_t im, jit_word_t r1)
+{
+    assert(!(_p  & ~0x3fL));
+    assert(!(len & ~0x3fL));
+    assert(!(pos & ~0x3fL));
+    assert(!(im  & ~0x7fL));
+    assert(!(r1  & ~0x7fL));
+    TSTPRED(_p);
+    TSTREG1(r1);
+    inst((5L<<37)|(((im>>7)&1L)<<36)|(1L<<34)|(1L<<33)|(len<<27)|
+        (1L<<26)|(pos<<20)|((im&0x7fL)<<13)|(r1<<6)|_p, INST_I);
+    SETREG(r1);
+}
+
+static void
+_I14(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t s, jit_word_t len, jit_word_t r3, jit_word_t pos, jit_word_t r1)
+{
+    assert(!(_p  & ~0x3fL));
+    assert(!(s   &  ~0x1L));
+    assert(!(len & ~0x3fL));
+    assert(!(r3  & ~0x7fL));
+    assert(!(pos & ~0x1fL));
+    assert(!(r1  & ~0x7fL));
+    TSTREG1(r3);
+    TSTPRED(_p);
+    TSTREG1(r1);
+    inst((5L<<37)|(s<<36)|(3L<<34)|(1L<<33)|
+        (len<<27)|(r3<<20)|(pos<<14)|(r1<<6)|_p, INST_I);
+    SETREG(r1);
+}
+
+static void
+_I15(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t pos, jit_word_t len,
+     jit_word_t r3, jit_word_t r2, jit_word_t r1)
+{
+    assert(!(_p  & ~0x3fL));
+    assert(!(pos & ~0x3fL));
+    assert(!(len &  ~0xfL));
+    assert(!(r3  & ~0x7fL));
+    assert(!(r2  & ~0x7fL));
+    assert(!(r1  & ~0x7fL));
+    TSTREG2(r2, r3);
+    TSTPRED(_p);
+    TSTREG1(r1);
+    inst((4L<<37)|(pos<<31)|(len<<27)|(r3<<20)|(r2<<13)|(r1<<6)|_p, INST_I);
+    SETREG(r1);
+}
+
+static void
+_I16(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t tb, jit_word_t ta, jit_word_t p2,
+     jit_word_t r3, jit_word_t ps, jit_word_t c, jit_word_t p1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(tb &  ~0x1L));
+    assert(!(ta &  ~0x1L));
+    assert(!(p2 & ~0x7fL));
+    assert(!(r3 & ~0x7fL));
+    assert(!(ps & ~0x3fL));
+    assert(!(c  &  ~0x1L));
+    assert(!(p1 & ~0x3fL));
+    TSTREG1(r3);
+    TSTPRED(_p);
+    inst((5L<<37)|(tb<<36)|(ta<<33)|(p2<<27)|
+        (r3<<20)|(ps<<14)|(c<<12)|(p1<<6), INST_I);
+    if (p1)
+       _jitc->pred |= 1 << p1;
+    if (p2)
+       _jitc->pred |= 1 << p2;
+}
+
+static void
+_I17(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t tb, jit_word_t ta, jit_word_t p2,
+     jit_word_t r3, jit_word_t c, jit_word_t p1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(tb &  ~0x1L));
+    assert(!(ta &  ~0x1L));
+    assert(!(p2 & ~0x7fL));
+    assert(!(r3 & ~0x7fL));
+    assert(!(c  &  ~0x1L));
+    assert(!(p1 & ~0x3fL));
+    TSTREG1(r3);
+    TSTPRED(_p);
+    inst((5L<<37)|(tb<<36)|(ta<<33)|(p2<<27)|
+        (r3<<20)|(1L<<13)|(c<<12)|(p1<<6)|_p, INST_I);
+    if (p1)
+       _jitc->pred |= 1 << p1;
+    if (p2)
+       _jitc->pred |= 1 << p2;
+}
+
+static void
+_I18(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t im, jit_word_t y)
+{
+    assert(!(_p &     ~0x3fL));
+    assert(!(im & ~0x1fffffL));
+    assert(!(y  &      ~0x1L));
+    TSTPRED(_p);
+    inst((((im>>20)&1L)<<26)|(1L<<27)|(y<<26)|((im&0xffffL)<<6)|_p, INST_I);
+}
+
+static void
+_I19(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t im)
+{
+    assert(!(_p &     ~0x3fL));
+    assert(!(im & ~0x1fffffL));
+    TSTPRED(_p);
+    inst(((im>>20)&1L)|((im&0xffffL)<<6)|_p, INST_I);
+}
+
+static void
+_I20(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t r2, jit_word_t im)
+{
+    assert(!(_p &     ~0x3fL));
+    assert(!(r2 &     ~0x7fL));
+    assert(!(im & ~0x1fffffL));
+    TSTREG1(r2);
+    TSTPRED(_p);
+    inst(((im>>20)&1L)|(1L<<33)|(((im>>7)&0x1fffL)<<20)|
+        (r2<<13)|((im&0x7fL)<<6)|_p, INST_I);
+}
+
+static void
+_I21(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t im, jit_word_t ih, jit_word_t x,
+     jit_word_t wh, jit_word_t r2, jit_word_t b1)
+{
+    assert(!(_p &     ~0x3fL));
+    assert(!(im &    ~0x1ffL));
+    assert(!(ih &      ~0x1L));
+    assert(!(x  &      ~0x1L));
+    assert(!(wh &      ~0x3L));
+    assert(!(r2 &     ~0x7fL));
+    assert(!(b1 &      ~0x7L));
+    TSTREG1(r2);
+    TSTPRED(_p);
+    inst((7L<<33)|(im<<24)|(ih<<23)|(x<<22)|(wh<<20)|
+        (r2<<13)|(b1<<6), INST_I);
+}
+
+static void
+_I22(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t b2, jit_word_t r1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(b2 &  ~0x7L));
+    assert(!(r1 & ~0x7fL));
+    TSTPRED(_p);
+    inst((0x31L<<27)|(b2<<13)|(r1<<6)|_p, INST_I);
+}
+
+static void
+_I23(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t r2, jit_word_t im)
+{
+    assert(!(_p &   ~0x3fL));
+    assert(!(r2 &   ~0x7fL));
+    assert(!(im & ~0xffffL));
+    TSTREG1(r2);
+    TSTPRED(_p);
+    inst((((im>>15)&1L)<<36)|(3L<<33)|(((im>>7)&0xffL)<<24)|
+        (r2<<13)|(im&0x7fL)|_p, INST_I);
+}
+
+static void
+_I24(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t im)
+{
+    jit_uint8_t                cc = INST_I;
+    assert(!(_p &      ~0x3fL));
+    assert(!(im & ~0xfffffffL));
+    TSTPRED(_p);
+    inst((((im>>27)&1L)<<36)|(2L<<33)|((im&0x7ffffffL)<<6)|_p, cc);
+}
+
+static void
+_I25(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t x6, jit_word_t r1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(x6 & ~0x3fL));
+    assert(!(r1 & ~0x7fL));
+    TSTPRED(_p);
+    TSTREG1(r1);
+    inst((x6<<27)|(r1<<6)|_p, INST_I);
+    SETREG(r1);
+}
+
+static void
+_I26(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t ar,jit_word_t r2)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(ar & ~0x7fL));
+    assert(!(r2 & ~0x7fL));
+    TSTREG1(r2);
+    TSTPRED(_p);
+    inst((0x2aL<<27)|(ar<<20)|(r2<<13)|_p, INST_I);
+}
+
+static void
+_I27(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t ar,jit_word_t im)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(ar & ~0x7fL));
+    assert(!(im & ~0xffL));
+    TSTPRED(_p);
+    inst((((im>>7)&1L)<<36)|(0xaL<<27)|(ar<<20)|((im&0x7fL)<<13)|_p, INST_I);
+}
+
+static void
+_I28(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t ar, jit_word_t r1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(ar & ~0x7fL));
+    assert(!(r1 & ~0x7fL));
+    TSTPRED(_p);
+    TSTREG1(r1);
+    inst((0x32L<<27)|(ar<<20)|(r1<<6)|_p, INST_I);
+    SETREG(r1);
+}
+
+static void
+_I29(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t x6, jit_word_t r3,jit_word_t r1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(x6 & ~0x3fL));
+    assert(!(r3 & ~0x7fL));
+    assert(!(r1 & ~0x7fL));
+    TSTREG1(r3);
+    TSTPRED(_p);
+    TSTREG1(r1);
+    inst((x6<<27)|(r3<<20)|(r1<<6)|_p, INST_I);
+    SETREG(r1);
+}
+
+static void
+_I30(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t ta, jit_word_t tb, jit_word_t p2,
+     jit_word_t im, jit_word_t c, jit_word_t p1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(ta &  ~0x1L));
+    assert(!(tb &  ~0x1L));
+    assert(!(p2 & ~0x3fL));
+    assert(!(im & ~0x1fL));
+    assert(!(c  &  ~0x1L));
+    assert(!(p1 & ~0x1fL));
+    TSTPRED(_p);
+    inst((5L<<37)|(tb<<36)|(ta<<33)|(1L<<19)|(im<<14)|
+        (1L<<13)|(c<<12)|(p1<<6)|_p, INST_I);
+    if (p1)
+       _jitc->pred |= 1 << p1;
+    if (p2)
+       _jitc->pred |= 1 << p2;
+}
+
+static void
+_M1(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t x6, jit_word_t ht, jit_word_t x, jit_word_t r3, jit_word_t r1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(x6 & ~0x3fL));
+    assert(!(ht &  ~0x3L));
+    assert(!(x  &  ~0x1L));
+    assert(!(r3 & ~0x7fL));
+    assert(!(r1 & ~0x7fL));
+    TSTREG1(r3);
+    TSTPRED(_p);
+    TSTREG1(r1);
+    inst((4L<<37)|(x6<<30)|(ht<<28)|(x<<27)|(r3<<20)|(r1<<6)|_p, INST_M);
+    SETREG(r1);
+}
+
+static void
+_M2(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t x6, jit_word_t ht, jit_word_t r3, jit_word_t r2, jit_word_t r1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(x6 & ~0x3fL));
+    assert(!(ht &  ~0x3L));
+    assert(!(r3 & ~0x7fL));
+    assert(!(r2 & ~0x7fL));
+    assert(!(r1 & ~0x7fL));
+    TSTREG2(r2, r3);
+    TSTPRED(_p);
+    TSTREG1(r1);
+    inst((4L<<37)|(1L<<36)|(x6<<30)|(ht<<28)|
+        (r3<<20)|(r2<<13)|(r1<<6)|_p, INST_M);
+    SETREG(r1);
+    SETREG(r3);
+}
+
+static void
+_M3(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t x6, jit_word_t ht, jit_word_t r3, jit_word_t im, jit_word_t r1)
+{
+    jit_uint8_t                cc = INST_M;
+    assert(!(_p &  ~0x3fL));
+    assert(!(x6 &  ~0x3fL));
+    assert(!(ht &   ~0x3L));
+    assert(!(r3 &  ~0x7fL));
+    assert(im > -256 && im <= 255);
+    assert(!(r1 &  ~0x7fL));
+    TSTREG1(r3);
+    TSTPRED(_p);
+    TSTREG1(r1);
+    inst((5L<<37)|(((im>>8)&1L)<<36)|(x6<<30)|(ht<<28)|
+        (((im>>7)&1L)<<27)|(r3<<20)|((im&0x7fL)<<13)|(r1<<6)|_p, cc);
+    SETREG(r1);
+    SETREG(r3);
+}
+
+static void
+_M5(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t x6, jit_word_t ht, jit_word_t r3, jit_word_t r2, jit_word_t im)
+{
+    assert(!(_p &  ~0x3fL));
+    assert(!(x6 &  ~0x3fL));
+    assert(!(ht &   ~0x3L));
+    assert(!(r3 &  ~0x7fL));
+    assert(!(r2 &  ~0x7fL));
+    assert(im > -256 && im <= 255);
+    TSTREG2(r2, r3);
+    TSTPRED(_p);
+    inst((5L<<37)|(((im>>8)&1L)<<36)|(x6<<30)|(ht<<28)|
+        (((im>>7)&1L)<<27)|(r3<<20)|(r2<<13)|((im&0x7fL)<<6)|_p, INST_M);
+    SETREG(r3);
+}
+
+static void
+_M6(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t x6, jit_word_t ht, jit_word_t x, jit_word_t r3, jit_word_t r2)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(x6 & ~0x3fL));
+    assert(!(ht &  ~0x3L));
+    assert(!(x  &  ~0x1L));
+    assert(!(r3 & ~0x7fL));
+    assert(!(r2 & ~0x7fL));
+    TSTREG2(r2, r3);
+    TSTPRED(_p);
+    inst((4L<<37)|(x6<<30)|(ht<<28)|(x<<27)|(r3<<20)|(r2<<13)|_p, INST_M);
+}
+
+static void
+_M13(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t x6, jit_word_t ht, jit_word_t r3, jit_word_t r2)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(x6 & ~0x3fL));
+    assert(!(ht &  ~0x3L));
+    assert(!(r3 & ~0x7fL));
+    assert(!(r2 & ~0x7fL));
+    TSTREG1(r3);
+    if (r2)
+       TSTFREG1(r2);
+    TSTPRED(_p);
+    inst((6L<<37)|(x6<<30)|(ht<<28)|(r3<<20)|(r2<<13)|_p, INST_M);
+}
+
+static void
+_M14(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t x6, jit_word_t ht, jit_word_t r3, jit_word_t r2)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(x6 & ~0x3fL));
+    assert(!(ht &  ~0x3L));
+    assert(!(r3 & ~0x7fL));
+    assert(!(r2 & ~0x7fL));
+    TSTREG2(r2, r3);
+    TSTPRED(_p);
+    inst((6L<<37)|(1L<<36)|(x6<<30)|(ht<<28)|(r3<<20)|(r2<<13)|_p, INST_M);
+}
+
+static void
+_M15(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t x6, jit_word_t ht, jit_word_t r3, jit_word_t im)
+{
+    assert(!(_p &  ~0x3fL));
+    assert(!(x6 &  ~0x3fL));
+    assert(!(ht &   ~0x3L));
+    assert(!(r3 &  ~0x7fL));
+    assert(!(im & ~0x1ffL));
+    TSTREG1(r3);
+    TSTPRED(_p);
+    inst((7L<<37)|(((im>>8)&1L)<<36)|(x6<<30)|(ht<<28)|
+        (((im>>7)&1L)<<27)|(r3<<20)|((im&0x7fL)<<13)|_p, INST_M);
+}
+
+static void
+_M16(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t x6, jit_word_t ht, jit_word_t r3, jit_word_t r2, jit_word_t r1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(x6 & ~0x3fL));
+    assert(!(ht &  ~0x3L));
+    assert(!(r3 & ~0x7fL));
+    assert(!(r2 & ~0x7fL));
+    assert(!(r1 & ~0x7fL));
+    TSTREG2(r2, r3);
+    TSTPRED(_p);
+    TSTREG1(r1);
+    inst((4L<<37)|(x6<<30)|(ht<<28)|(1L<<27)|
+        (r3<<20)|(r2<<13)|(r1<<6)|_p, INST_M);
+    SETREG(r1);
+}
+
+static void
+_M17(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t x6, jit_word_t ht, jit_word_t r3, jit_word_t im, jit_word_t r1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(x6 & ~0x3fL));
+    assert(!(ht &  ~0x3L));
+    assert(!(r3 & ~0x7fL));
+    assert(!(im &  ~0x7L));
+    assert(!(r1 & ~0x7fL));
+    TSTREG1(r3);
+    TSTPRED(_p);
+    TSTREG1(r1);
+    inst((4L<<37)|(x6<<30)|(ht<<28)|(1L<<27)|
+        (r3<<20)|(im<<13)|(r1<<6)|_p, INST_M);
+    SETREG(r1);
+}
+
+static void
+_M20x(jit_state_t *_jit, jit_word_t _p,
+      jit_word_t x3, jit_word_t r2, jit_word_t im)
+{
+    assert(!(_p &     ~0x3fL));
+    assert(!(x3 &      ~0x7L));
+    assert(!(r2 &     ~0x7fL));
+    assert(!(im & ~0x1fffffL));
+    if (x3 == 1)
+       TSTREG1(r2);
+    else
+       TSTFREG1(r2);
+    TSTPRED(_p);
+    inst((1L<<37)|(((im>>20)&1L)<<36)|(x3<<33)|
+        (((im>>7)&0x1fffL)<<20)|(r2<<13)|((im&0x7fL)<<6)|_p, INST_M);
+}
+
+static void
+_M22x(jit_state_t *_jit, jit_word_t _p,
+      jit_word_t x3, jit_word_t im, jit_word_t r1)
+{
+    assert(!(_p &     ~0x3fL));
+    assert(!(x3 &      ~0x7L));
+    assert(!(im & ~0x1fffffL));
+    assert(!(r1 &     ~0x7fL));
+    TSTPRED(_p);
+    if (x3 < 6)
+       TSTREG1(r1);
+    else
+       TSTFREG1(r1);
+    inst((((im>>20)&1L)<<36)|(x3<<33)|((im&0xffffL)<<13)|(r1<<6)|_p, INST_M);
+    if (x3 < 6)
+       SETREG(r1);
+    else
+       SETFREG(r1);
+}
+
+static void
+_M24(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t x2, jit_word_t x4)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(x2 &  ~0x3L));
+    assert(!(x4 &  ~0xfL));
+    TSTPRED(_p);
+    inst((x2<<31)|(x4<<27)|_p, INST_M);
+}
+
+static void
+_M26x(jit_state_t *_jit, jit_word_t _p,
+      jit_word_t x4, jit_word_t r1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(x4 &  ~0xfL));
+    assert(!(r1 & ~0x7fL));
+    if (x4 == 2)
+       TSTREG1(r1);
+    else
+       TSTFREG1(r1);
+    TSTPRED(_p);
+    inst((1L<<31)|(x4<<27)|(r1<<6)|_p, INST_M);
+}
+
+static void
+_M28(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t x, jit_word_t r3)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(x  &  ~0x1L));
+    assert(!(r3 & ~0x7fL));
+    TSTREG1(r3);
+    TSTPRED(_p);
+    inst((1L<<37)|(x<<36)|(0x30L<<27)|(r3<<20)|_p, INST_M);
+}
+
+static void
+_M29(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t ar, jit_word_t r2)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(ar  & ~0x7L));
+    assert(!(r2 & ~0x7fL));
+    TSTREG1(r2);
+    TSTPRED(_p);
+    inst((1L<<37)|(0x2aL<<27)|(ar<<20)|(r2<<13)|_p, INST_M);
+}
+
+static void
+_M30(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t ar, jit_word_t im)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(ar  & ~0x7L));
+    assert(!(im & ~0xffL));
+    TSTPRED(_p);
+    inst((((im>>7)&1L)<<36)|(2L<<31)|(0x8L<<27)|
+        (ar<<20)|((im&0x7fL)<<13)|_p, INST_M);
+}
+
+static void
+_M31(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t ar, jit_word_t r1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(ar  & ~0x7L));
+    assert(!(r1 & ~0x7fL));
+    TSTPRED(_p);
+    TSTREG1(r1);
+    inst((1L<<37)|(0x22L<<27)|(ar<<20)|(r1<<6)|_p, INST_M);
+    SETREG(r1);
+}
+
+static void
+_M32(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t cr, jit_word_t r2)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(cr  & ~0x7L));
+    assert(!(r2 & ~0x7fL));
+    TSTREG1(r2);
+    TSTPRED(_p);
+    inst((1L<<37)|(0x2cL<<27)|(cr<<20)|(r2<<13)|_p, INST_M);
+}
+
+static void
+_M33(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t cr, jit_word_t r1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(cr  & ~0x7L));
+    assert(!(r1 & ~0x7fL));
+    TSTPRED(_p);
+    TSTREG1(r1);
+    inst((1L<<37)|(0x24L<<27)|(cr<<20)|(r1<<6)|_p, INST_M);
+    SETREG(r1);
+}
+
+static void
+_M34(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t sor, jit_word_t sol, jit_word_t sof, jit_word_t r1)
+{
+    assert(!(_p  & ~0x3fL));
+    assert(!(sor &  ~0xfL));
+    assert(!(sol & ~0x7fL));
+    assert(!(sof & ~0x7fL));
+    assert(!(r1  & ~0x7fL));
+    TSTPRED(_p);
+    inst((1L<<37)|(6L<<33)|(sor<<27)|(sol<<20)|(sof<<13)|(r1<<6)|_p, INST_M);
+}
+
+static void
+_M35(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t x6, jit_word_t r2)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(x6 & ~0x3fL));
+    assert(!(r2 & ~0x7fL));
+    TSTREG1(r2);
+    TSTPRED(_p);
+    inst((1L<<37)|(x6<<27)|(r2<<13)|_p, INST_M);
+}
+
+static void
+_M36(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t x6, jit_word_t r1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(x6 & ~0x3fL));
+    assert(!(r1 & ~0x7fL));
+    TSTPRED(_p);
+    TSTREG1(r1);
+    inst((1L<<37)|(x6<<27)|(r1<<6)|_p, INST_M);
+    SETREG(r1);
+}
+
+static void
+_M37(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t im)
+{
+    assert(!(_p &    ~0x3fL));
+    assert(!(im & ~0x1ffffL));
+    TSTPRED(_p);
+    inst((((im>>20)&1L)<<36)|((im&0xffffL)<<6)|_p, INST_M);
+}
+
+static void
+_M38(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t x6, jit_word_t r3, jit_word_t r2, jit_word_t r1)
+{
+    assert(!(_p &  ~0x3fL));
+    assert(!(x6 &  ~0x3fL));
+    assert(!(r3 &  ~0x7fL));
+    assert(!(r2 &  ~0x7fL));
+    assert(!(r1 &  ~0x7fL));
+    TSTREG2(r2, r3);
+    TSTPRED(_p);
+    TSTREG1(r1);
+    inst((1L<<37)|(x6<<27)|(r3<<20)|(r2<<13)|(r1<<6)|_p, INST_M);
+    SETREG(r1);
+}
+
+static void
+_M39(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t x6, jit_word_t r3, jit_word_t im, jit_word_t r1)
+{
+    assert(!(_p &  ~0x3fL));
+    assert(!(x6 &  ~0x3fL));
+    assert(!(r3 &  ~0x7fL));
+    assert(!(im &   ~0x7L));
+    assert(!(r1 &  ~0x7fL));
+    TSTREG1(r3);
+    TSTPRED(_p);
+    TSTREG1(r1);
+    inst((1L<<37)|(x6<<27)|(r3<<20)|(im<<13)|(r1<<6)|_p, INST_M);
+    SETREG(r1);
+}
+
+static void
+_M40(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t x6, jit_word_t r3, jit_word_t im)
+{
+    assert(!(_p &  ~0x3fL));
+    assert(!(x6 &  ~0x3fL));
+    assert(!(r3 &  ~0x7fL));
+    assert(!(im &   ~0x7L));
+    TSTREG1(r3);
+    TSTPRED(_p);
+    inst((1L<<37)|(x6<<27)|(r3<<20)|(im<<13)|_p, INST_M);
+}
+
+static void
+_M41(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t x6, jit_word_t r2)
+{
+    assert(!(_p &  ~0x3fL));
+    assert(!(x6 &  ~0x3fL));
+    assert(!(r2 &  ~0x7fL));
+    TSTREG1(r2);
+    TSTPRED(_p);
+    inst((1L<<37)|(x6<<27)|(r2<<13)|_p, INST_M);
+}
+
+static void
+_M42(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t x6, jit_word_t r3, jit_word_t r2)
+{
+    assert(!(_p &  ~0x3fL));
+    assert(!(x6 &  ~0x3fL));
+    assert(!(r3 &  ~0x7fL));
+    assert(!(r2 &  ~0x7fL));
+    TSTREG1(r2);
+    TSTPRED(_p);
+    inst((1L<<37)|(x6<<27)|(r3<<20)|(r2<<13)|_p, INST_M);
+}
+
+static void
+_M43(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t x6, jit_word_t r3, jit_word_t r1)
+{
+    assert(!(_p &  ~0x3fL));
+    assert(!(x6 &  ~0x3fL));
+    assert(!(r3 &  ~0x7fL));
+    assert(!(r1 &  ~0x7fL));
+    TSTPRED(_p);
+    TSTREG1(r1);
+    inst((1L<<37)|(x6<<27)|(r3<<20)|(r1<<6)|_p, INST_M);
+    SETREG(r1);
+}
+
+static void
+_M44(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t x4, jit_word_t im)
+{
+    assert(!(_p &    ~0x3fL));
+    assert(!(x4 &     ~0xfL));
+    assert(!(im & ~0xfffffL));
+    TSTPRED(_p);
+    inst((((im>>23)&1L)<<36)|(((im>>21)&3L)<<31)|
+        (x4<<27)|((im&0x1ffffL)<<6)|_p, INST_M);
+}
+
+static void
+_M45(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t x6, jit_word_t r3, jit_word_t r2)
+{
+    assert(!(_p &  ~0x3fL));
+    assert(!(x6 &  ~0x3fL));
+    assert(!(r3 &  ~0x7fL));
+    assert(!(r2 &  ~0x7fL));
+    TSTREG2(r2, r3);
+    TSTPRED(_p);
+    inst((1L<<37)|(x6<<27)|(r3<<20)|(r2<<13)|_p, INST_M);
+}
+
+static void
+_M46(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t x6, jit_word_t r3, jit_word_t r1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(x6 & ~0x3fL));
+    assert(!(r3 & ~0x7fL));
+    assert(!(r1 & ~0x7fL));
+    TSTREG1(r3);
+    TSTPRED(_p);
+    if (r1)    TSTREG1(r1);
+    inst((1L<<37)|(x6<<27)|(r3<<20)|(r1<<6)|_p, INST_M);
+    if (r1)    SETREG(r1);
+}
+
+static void
+_M48(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t y, jit_word_t im)
+{
+    assert(!(_p &    ~0x3fL));
+    assert(!(y  &     ~0x1L));
+    assert(!(im & ~0x1ffffL));
+    TSTPRED(_p);
+    inst((((im>>20)&1L)<<36)|(1L<<27)|(y<<26)|((im&0xffffL)<<6)|_p, INST_M);
+}
+
+static void
+_B1(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t d, jit_word_t wh, jit_word_t im, jit_word_t p, jit_word_t tp)
+{
+    assert(!(_p &    ~0x3fL));
+    assert(!(d  &     ~0x1L));
+    assert(!(wh &     ~0x3L));
+    assert(im >= -1048576 && im <= 1048575);
+    assert(!(p  &     ~0x1L));
+    assert(!(tp &     ~0x7L));
+    TSTPRED(_p);
+    inst((4L<<37)|(((im>>20)&1L)<<36)|(d<<35)|(wh<<33)|
+        ((im&0xfffffL)<<13)|(p<<12)|(tp<<6)|_p, INST_B);
+}
+
+static void
+_B3(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t d, jit_word_t wh, jit_word_t im, jit_word_t p, jit_word_t b)
+{
+    assert(!(_p &    ~0x3fL));
+    assert(!(d  &     ~0x1L));
+    assert(!(wh &     ~0x3L));
+    assert(im >= -1048576 && im <= 1048575);
+    assert(!(p  &     ~0x1L));
+    assert(!(b  &     ~0x3L));
+    TSTPRED(_p);
+    inst((5L<<37)|(((im>>20)&1L)<<36)|(d<<35)|(wh<<33)|
+        ((im&0xfffffL)<<13)|(p<<12)|(b<<6)|_p, INST_B);
+}
+
+static void
+_B4(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t d, jit_word_t wh, jit_word_t x6,
+    jit_word_t b, jit_word_t p, jit_word_t tp)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(d  &  ~0x1L));
+    assert(!(wh &  ~0x3L));
+    assert(!(x6 & ~0x3fL));
+    assert(!(b  &  ~0x7L));
+    assert(!(p  &  ~0x1L));
+    assert(!(tp &  ~0x7L));
+    TSTPRED(_p);
+    inst((d<<35)|(wh<<33)|(x6<<27)|(b<<13)|(p<<12)|(tp<<6)|_p, INST_B);
+}
+
+static void
+_B5(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t d, jit_word_t wh, jit_word_t b2, jit_word_t p, jit_word_t b1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(d  &  ~0x1L));
+    assert(!(wh &  ~0x3L));
+    assert(!(b2 &  ~0x7L));
+    assert(!(p  &  ~0x1L));
+    assert(!(b1 &  ~0x7L));
+    TSTPRED(_p);
+    inst((1L<<37)|(d<<35)|(wh<<32)|(b2<<13)|(p<<12)|(b1<<6)|_p, INST_B);
+}
+
+static void
+_B6(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t ih, jit_word_t im, jit_word_t tag, jit_word_t wh)
+{
+    assert(!(_p &     ~0x3fL));
+    assert(!(ih  &     ~0x1L));
+    assert(!(im  & ~0x1ffffL));
+    assert(!(tag &   ~0x1ffL));
+    assert(!(wh  &     ~0x3L));
+    TSTPRED(_p);
+    inst((7L<<37)|(((im>>20)&1L)<<36)|(ih<<35)|(((tag>>7)&3L)<<33)|
+        ((im&0xfffffL)<<13)|((tag&0x7fL)<<6)|(wh<<3)|_p, INST_B);
+}
+
+static void
+_B7(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t ih, jit_word_t x6, jit_word_t b2, jit_word_t tag, jit_word_t wh)
+{
+    assert(!(_p &   ~0x3fL));
+    assert(!(ih  &   ~0x1L));
+    assert(!(x6  &  ~0x3fL));
+    assert(!(b2  &   ~0x7L));
+    assert(!(tag & ~0x1ffL));
+    assert(!(wh  &   ~0x3L));
+    TSTPRED(_p);
+    inst((2L<<37)|(ih<<35)|(((tag>>7)&3L)<<33)|(x6<<27)|
+        (b2<<13)|((tag&0x7fL)<<6)|(wh<<3)|_p, INST_B);
+}
+
+static void
+_B8(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t x6)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(x6 & ~0x3fL));
+    TSTPRED(_p);
+    inst((x6<<27)|_p, INST_B);
+}
+
+static void
+_B9(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t op, jit_word_t x6, jit_word_t im)
+{
+    assert(!(_p &    ~0x3fL));
+    assert(!(op &     ~0xfL));
+    assert(!(x6 &    ~0x3fL));
+    assert(!(im & ~0x1ffffL));
+    TSTPRED(_p);
+    inst((op<<37)|(((im>>20)&1L)<<36)|(x6<<27)|((im&0xffffL)<<6)|_p, INST_B);
+}
+
+static void
+_X1(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t im)
+{
+    jit_word_t         i41, i1, i20;
+    assert(!(_p &               ~0x3fL));
+    assert(im > -0x2000000000000000 && im <= 0x1fffffffffffffff);
+    i41 = (im >> 22) & 0x1ffffffffffL;
+    i1  = (im >> 21) &           0x1L;
+    i20 =  im        &       0xfffffL;
+    TSTPRED(_p);
+    inst(i41, INST_L);
+    inst((i1<<36)|(i20<<6)|_p, INST_X);
+}
+
+static void
+_X2(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t r1, jit_word_t im)
+{
+    jit_word_t         i1, i41, ic, i5, i9, i7;
+    assert(!(_p & ~0x3fL));
+    assert(!(r1 & ~0x7fL));
+    i1  = (im >> 63) &           0x1L;
+    i41 = (im >> 22) & 0x1ffffffffffL;
+    ic  = (im >> 21) &           0x1L;
+    i5  = (im >> 16) &          0x1fL;
+    i9  = (im >>  7) &         0x1ffL;
+    i7  =  im        &          0x7fL;
+    TSTPRED(_p);
+    TSTREG1(r1);
+    inst(i41, INST_L);
+    inst((6L<<37)|(i1<<36)|(i9<<27)|(i5<<22)|
+        (ic<<21)|(i7<<13)|(r1<<6)|_p, INST_X);
+    SETREG(r1);
+}
+
+static void
+_X3x(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t op, jit_word_t d, jit_word_t wh,
+     jit_word_t p, jit_word_t tp, jit_word_t im)
+{
+    /* target64 = IP + ((i1 << 59 | imm39 << 20 | imm20b) << 4) */
+    jit_word_t         i1, i41, i20;
+    assert(!(_p &               ~0x3fL));
+    assert(!(op &                ~0xfL));
+    assert(!(d  &                ~0x1L));
+    assert(!(wh &                ~0x3L));
+    assert(!(p  &                ~0x1L));
+    assert(!(tp &                ~0x7L));
+    i1  = (im >> 61) &           0x1L;
+    i41 = (im >> 22) & 0x1ffffffffffL;
+    i20 =  im        &       0xfffffL;
+    TSTPRED(_p);
+    inst(i41, INST_L);
+    inst((op<<37)|(i1<<36)|(d<<35)|(wh<<33)|
+        (i20<<13)|(p<<12)|(tp<<6)|_p, INST_X);
+}
+
+static void
+_X5(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t y, jit_word_t im)
+{
+    jit_word_t         i41, i1, i20;
+    assert(!(_p &               ~0x3fL));
+    assert(im > -0x2000000000000000 && im <= 0x1fffffffffffffff);
+    i41 = (im >> 22) & 0x1ffffffffffL;
+    i1  = (im >> 21) &           0x1L;
+    i20 =  im        &       0xfffffL;
+    TSTPRED(_p);
+    inst(i41, INST_L);
+    inst((i1<<36)|(1L<<27)|(y<<26)|(i20<<6)|_p, INST_X);
+}
+
+static void
+_movr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    /* patch pushargr */
+    if (r0 >= 120)
+       r0 = _jitc->rout + (r0 - 120);
+
+    MOV(r0, r1);
+}
+
+static void
+_movi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    /* patch pushargi */
+    if (r0 >= 120)
+       r0 = _jitc->rout + (r0 - 120);
+
+    if (i0 >= -2097152 && i0 <= 2097151)
+       MOVI(r0, i0);
+    else
+       MOVL(r0, i0);
+}
+
+static jit_word_t
+_movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_word_t         w;
+    sync();
+    w = _jit->pc.w;
+    MOVL(r0, i0);
+    return (w);
+}
+
+static void
+_addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0 >= -8192 && i0 <= 8191)
+       ADDS(r0, i0, r1);
+    else if (!(r1 & ~3) && i0 >= -2097152 && i0 <= 2097151)
+       ADDL(r1, i0, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       addr(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_addcr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (jit_carry == _NOREG)
+       jit_carry = jit_get_reg(jit_class_gpr);
+    if (r0 == r1) {
+       reg = jit_get_reg(jit_class_gpr);
+       addr(rn(reg), r1, r2);
+       ltr_u(rn(jit_carry), rn(reg), r1);
+       movr(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else {
+       addr(r0, r1, r2);
+       ltr_u(rn(jit_carry), r0, r1);
+    }
+}
+
+static void
+_addci(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (jit_carry == _NOREG)
+       jit_carry = jit_get_reg(jit_class_gpr);
+    if (r0 == r1) {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r1, i0);
+       ltr_u(rn(jit_carry), rn(reg), r1);
+       movr(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else {
+       addi(r0, r1, i0);
+       ltr_u(rn(jit_carry), r0, r1);
+    }
+}
+
+static void
+_addxr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    assert(jit_carry != _NOREG);
+    reg = jit_get_reg(jit_class_gpr);
+    movr(rn(reg), rn(jit_carry));
+    addcr(r0, r1, r2);
+    addcr(r0, r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_addxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    assert(jit_carry != _NOREG);
+    reg = jit_get_reg(jit_class_gpr);
+    movr(rn(reg), rn(jit_carry));
+    addci(r0, r1, i0);
+    addcr(r0, r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_subi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    subr(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_subcr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (jit_carry == _NOREG)
+       jit_carry = jit_get_reg(jit_class_gpr);
+    if (r0 == r1) {
+       reg = jit_get_reg(jit_class_gpr);
+       subr(rn(reg), r1, r2);
+       ltr_u(rn(jit_carry), r1, rn(reg));
+       movr(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else {
+       subr(r0, r1, r2);
+       ltr_u(rn(jit_carry), r1, r0);
+    }
+}
+
+static void
+_subci(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (jit_carry == _NOREG)
+       jit_carry = jit_get_reg(jit_class_gpr);
+    if (r0 == r1) {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r1, -i0);
+       ltr_u(rn(jit_carry), r1, rn(reg));
+       movr(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else {
+       addi(r0, r1, -i0);
+       ltr_u(rn(jit_carry), r1, r0);
+    }
+}
+
+static void
+_subxr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    assert(jit_carry != _NOREG);
+    reg = jit_get_reg(jit_class_gpr);
+    movr(rn(reg), rn(jit_carry));
+    subcr(r0, r1, r2);
+    subcr(r0, r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_subxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    assert(jit_carry != _NOREG);
+    reg = jit_get_reg(jit_class_gpr);
+    movr(rn(reg), rn(jit_carry));
+    subci(r0, r1, i0);
+    subcr(r0, r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_rsbi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0 >= -128 && i0 <= 127)
+       SUBI(r0, i0, r1);
+    else if (!(r1 & ~3) && i0 >= -2097151 && i0 <= 2097152)
+       ADDL(r1, -i0, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       subr(r0, rn(reg), r1);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_mulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                f0, f1;
+    f0 = jit_get_reg(jit_class_fpr);
+    f1 = jit_get_reg(jit_class_fpr);
+    SETF_SIG(rn(f0), r1);
+    SETF_SIG(rn(f1), r2);
+    XMPY_L(rn(f0), rn(f0), rn(f1));
+    GETF_SIG(r0, rn(f0));
+    jit_unget_reg(f0);
+    jit_unget_reg(f1);
+}
+
+static void
+_muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    mulr(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+#if !defined(__GNUC__)
+static long
+__divdi3(long u, long v)
+{
+    return (u / v);
+}
+
+static unsigned long
+__udivdi3(unsigned long u, unsigned long v)
+{
+    return (u / v);
+}
+
+static long
+__moddi3(long u, long v)
+{
+    return (u % v);
+}
+
+static unsigned long
+__umoddi3(unsigned long u, unsigned long v)
+{
+    return (u % v);
+}
+#endif
+
+static void
+_divr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    MOV(_jitc->rout, r1);
+    MOV(_jitc->rout + 1, r2);
+    calli((jit_word_t)__divdi3);
+    MOV(r0, GR_8);
+}
+
+static void
+_divi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    switch (i0) {
+       case 1:
+           movr(r0, r1);
+           return;
+       case -1:
+           negr(r0, r1);
+           return;
+       default:
+           if (i0 > 0 && !(i0 & (i0 - 1))) {
+               movr(r0, r1);
+               rshi(r0, r0, ffsl(i0) - 1);
+               return;
+           }
+           break;
+    }
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    divr(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_divr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    MOV(_jitc->rout, r1);
+    MOV(_jitc->rout + 1, r2);
+    calli((jit_word_t)__udivdi3);
+    MOV(r0, GR_8);
+}
+
+static void
+_divi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0 == 1) {
+       movr(r0, r1);
+       return;
+    }
+    else if (i0 > 0 && !(i0 & (i0 - 1))) {
+       movr(r0, r1);
+       rshi_u(r0, r0, ffsl(i0) - 1);
+       return;
+    }
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    divr_u(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_remr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    MOV(_jitc->rout, r1);
+    MOV(_jitc->rout + 1, r2);
+    calli((jit_word_t)__moddi3);
+    MOV(r0, GR_8);
+}
+
+static void
+_remi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0 == 1 || i0 == -1) {
+       MOV(r0, GR_0);
+       return;
+    }
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    remr(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_remr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    MOV(_jitc->rout, r1);
+    MOV(_jitc->rout + 1, r2);
+    calli((jit_word_t)__umoddi3);
+    MOV(r0, GR_8);
+}
+
+static void
+_remi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0 == 1) {
+       MOV(r0, GR_0);
+       return;
+    }
+    else if (i0 > 0 && !(i0 & (i0 - 1))) {
+       andi(r0, r1, i0 - 1);
+       return;
+    }
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    remr_u(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_mulh(jit_state_t *_jit,
+      jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_bool_t sign)
+{
+    jit_int32_t                f0, f1;
+    f0 = jit_get_reg(jit_class_fpr);
+    f1 = jit_get_reg(jit_class_fpr);
+    SETF_SIG(rn(f0), r1);
+    SETF_SIG(rn(f1), r2);
+    if (sign)
+       XMPY_H(rn(f0), rn(f0), rn(f1));
+    else
+       XMPY_HU(rn(f0), rn(f0), rn(f1));
+    GETF_SIG(r0, rn(f0));
+    jit_unget_reg(f0);
+    jit_unget_reg(f1);
+}
+
+static void
+_iqmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+       jit_int32_t r2, jit_int32_t r3, jit_bool_t sign)
+{
+    jit_int32_t                reg;
+    if (r0 == r2 || r0 == r3) {
+       reg = jit_get_reg(jit_class_gpr);
+       mulr(rn(reg), r2, r3);
+    }
+    else
+       mulr(r0, r2, r3);
+    mulh(r1, r2, r3, sign);
+    if (r0 == r2 || r0 == r3) {
+       movr(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_iqmuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+       jit_int32_t r2, jit_word_t i0, jit_bool_t sign)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    iqmulr(r0, r1, r2, rn(reg), sign);
+    jit_unget_reg(reg);
+}
+
+static void
+_iqdivr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+       jit_int32_t r2, jit_int32_t r3, jit_bool_t sign)
+{
+    jit_int32_t                sv0, rg0;
+    jit_int32_t                sv1, rg1;
+
+    if (r0 == r2 || r0 == r3) {
+       sv0 = jit_get_reg(jit_class_gpr);
+       rg0 = rn(sv0);
+    }
+    else
+       rg0 = r0;
+    if (r1 == r2 || r1 == r3) {
+       sv1 = jit_get_reg(jit_class_gpr);
+       rg1 = rn(sv1);
+    }
+    else
+       rg1 = r1;
+
+    if (sign)
+       divr(rg0, r2, r3);
+    else
+       divr_u(rg0, r2, r3);
+    mulr(rg1, r3, rg0);
+    subr(rg1, r2, rg1);
+    if (rg0 != r0) {
+       movr(r0, rg0);
+       jit_unget_reg(sv0);
+    }
+    if (rg1 != r1) {
+       movr(r1, rg1);
+       jit_unget_reg(sv1);
+    }
+}
+
+static void
+_iqdivi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+       jit_int32_t r2, jit_word_t i0, jit_bool_t sign)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    iqdivr(r0, r1, r2, rn(reg), sign);
+    jit_unget_reg(reg);
+}
+
+static void
+_andi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0 >= -128 && i0 <= 127)
+       ANDI(r0, i0, r1);
+    else if (~i0 >= -128 && ~i0 <= 127)
+       ANDCMI(r0, ~i0, r1);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       andr(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0 >= -128 && i0 <= 127)
+       ORI(r0, i0, r1);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       orr(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_xori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0 >= -128 && i0 <= 127)
+       XORI(r0, i0, r1);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       xorr(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+static void
+_htonr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_gpr);
+    rshi(rn(t0), r1, 8);
+    andi(r0, r1, 0xff);
+    andi(rn(t0), rn(t0), 0xff);
+    lshi(r0, r0, 8);
+    orr(r0, r0, rn(t0));
+    jit_unget_reg(t0);
+}
+
+static void
+_htonr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                t0;
+    jit_int32_t                t1;
+    jit_int32_t                t2;
+    t0 = jit_get_reg(jit_class_gpr);
+    t1 = jit_get_reg(jit_class_gpr);
+    t2 = jit_get_reg(jit_class_gpr);
+    rshi(rn(t0), r1, 24);
+    rshi(rn(t1), r1, 16);
+    rshi(rn(t2), r1,  8);
+    andi(rn(t0), rn(t0), 0xff);
+    andi(rn(t1), rn(t1), 0xff);
+    andi(rn(t2), rn(t2), 0xff);
+    andi(r0, r1, 0xff);
+    lshi(r0, r0, 24);
+    lshi(rn(t1), rn(t1), 8);
+    orr(r0, r0, rn(t0));
+    lshi(rn(t2), rn(t2), 16);
+    orr(r0, r0, rn(t1));
+    orr(r0, r0, rn(t2));
+    jit_unget_reg(t2);
+    jit_unget_reg(t1);
+    jit_unget_reg(t0);
+}
+#endif
+
+static void
+_lshi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    lshr(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_rshi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    rshr(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_rshi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    rshr_u(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ltr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    CMP_LT(PR_6, PR_7, r1, r2);
+    MOVI_p(r0, 1, PR_6);
+    MOV_p(r0, GR_0, PR_7);
+}
+
+static void
+_lti(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0 >= -127 && i0 <= 128)
+       CMPI_LT(PR_7, PR_6, i0 - 1, r1);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       CMP_LT(PR_6, PR_7, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+    MOVI_p(r0, 1, PR_6);
+    MOV_p(r0, GR_0, PR_7);
+}
+
+static void
+_ltr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    CMP_LTU(PR_6, PR_7, r1, r2);
+    MOVI_p(r0, 1, PR_6);
+    MOV_p(r0, GR_0, PR_7);
+}
+
+static void
+_lti_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0 >= -127 && i0 <= 128)
+       CMPI_LTU(PR_7, PR_6, i0 - 1, r1);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       CMP_LTU(PR_6, PR_7, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+    MOVI_p(r0, 1, PR_6);
+    MOV_p(r0, GR_0, PR_7);
+}
+
+static void
+_ler(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    CMP_LT(PR_6, PR_7, r2, r1);
+    MOV_p(r0, GR_0, PR_6);
+    MOVI_p(r0, 1, PR_7);
+}
+
+static void
+_lei(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    ler(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ler_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    CMP_LTU(PR_6, PR_7, r2, r1);
+    MOV_p(r0, GR_0, PR_6);
+    MOVI_p(r0, 1, PR_7);
+}
+
+static void
+_lei_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    ler_u(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_eqr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    CMP_EQ(PR_6, PR_7, r1, r2);
+    MOVI_p(r0, 1, PR_6);
+    MOV_p(r0, GR_0, PR_7);
+}
+
+static void
+_eqi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0 >= -128 && i0 <= 127)
+       CMPI_EQ(PR_6, PR_7, i0, r1);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       CMP_EQ(PR_6, PR_7, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+    MOVI_p(r0, 1, PR_6);
+    MOV_p(r0, GR_0, PR_7);
+}
+
+static void
+_ger(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    CMP_LT(PR_6, PR_7, r1, r2);
+    MOV_p(r0, GR_0, PR_6);
+    MOVI_p(r0, 1, PR_7);
+}
+
+static void
+_gei(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0 >= -127 && i0 <= 128)
+       CMPI_LT(PR_7, PR_6, i0 - 1, r1);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       CMP_LT(PR_6, PR_7, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+    MOV_p(r0, GR_0, PR_6);
+    MOVI_p(r0, 1, PR_7);
+}
+
+static void
+_ger_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    CMP_LTU(PR_6, PR_7, r1, r2);
+    MOV_p(r0, GR_0, PR_6);
+    MOVI_p(r0, 1, PR_7);
+}
+
+static void
+_gei_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0 >= -127 && i0 <= 128)
+       CMPI_LTU(PR_7, PR_6, i0 - 1, r1);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       CMP_LTU(PR_6, PR_7, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+    MOV_p(r0, GR_0, PR_6);
+    MOVI_p(r0, 1, PR_7);
+}
+
+static void
+_gtr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    CMP_LT(PR_6, PR_7, r2, r1);
+    MOVI_p(r0, 1, PR_6);
+    MOV_p(r0, GR_0, PR_7);
+}
+
+static void
+_gti(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    gtr(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_gtr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    CMP_LTU(PR_6, PR_7, r2, r1);
+    MOVI_p(r0, 1, PR_6);
+    MOV_p(r0, GR_0, PR_7);
+}
+
+static void
+_gti_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    gtr_u(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ner(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    CMP_EQ(PR_6, PR_7, r1, r2);
+    MOV_p(r0, GR_0, PR_6);
+    MOVI_p(r0, 1, PR_7);
+}
+
+static void
+_nei(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0 >= -128 && i0 <= 127)
+       CMPI_EQ(PR_6, PR_7, i0, r1);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       CMP_EQ(PR_6, PR_7, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+    MOV_p(r0, GR_0, PR_6);
+    MOVI_p(r0, 1, PR_7);
+}
+
+static void
+_ldr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    ldr_uc(r0, r1);
+    extr_c(r0, r0);
+}
+
+static void
+_ldi_c(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    ldr_c(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldi_uc(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    ldr_uc(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    ldr_us(r0, r1);
+    extr_s(r0, r0);
+}
+
+static void
+_ldi_s(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    ldr_s(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldi_us(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    ldr_us(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    ldr_ui(r0, r1);
+    extr_i(r0, r0);
+}
+
+static void
+_ldi_i(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    ldr_i(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldi_ui(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    ldr_ui(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldi_l(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    ldr_l(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r1, r2);
+    ldr_c(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxi_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0) {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldxr_c(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else
+       ldr_c(r0, r1);
+}
+
+static void
+_ldxr_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r1, r2);
+    ldr_uc(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxi_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0) {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldxr_uc(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else
+       ldr_uc(r0, r1);
+}
+
+static void
+_ldxr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r1, r2);
+    ldr_s(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxi_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0) {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldxr_s(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else
+       ldr_s(r0, r1);
+}
+
+static void
+_ldxr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r1, r2);
+    ldr_us(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxi_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0) {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldxr_us(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else
+       ldr_us(r0, r1);
+}
+
+static void
+_ldxr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r1, r2);
+    ldr_i(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxi_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0) {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldxr_i(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else
+       ldr_i(r0, r1);
+}
+
+static void
+_ldxr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r1, r2);
+    ldr_ui(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxi_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0) {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldxr_ui(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else
+       ldr_ui(r0, r1);
+}
+
+static void
+_ldxr_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r1, r2);
+    ldr_l(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxi_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0) {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldxr_l(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else
+       ldr_l(r0, r1);
+}
+
+static void
+_sti_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    str_c(rn(reg), r0);
+    jit_unget_reg(reg);
+}
+
+static void
+_sti_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    str_s(rn(reg), r0);
+    jit_unget_reg(reg);
+}
+
+static void
+_sti_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    str_i(rn(reg), r0);
+    jit_unget_reg(reg);
+}
+
+static void
+_sti_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    str_l(rn(reg), r0);
+    jit_unget_reg(reg);
+}
+
+static void
+_stxr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r0, r1);
+    str_c(rn(reg), r2);
+    jit_unget_reg(reg);
+}
+
+static void
+_stxi_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (i0) {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r0, i0);
+       str_c(rn(reg), r1);
+       jit_unget_reg(reg);
+    }
+    else
+       str_c(r0, r1);
+}
+
+static void
+_stxr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r0, r1);
+    str_s(rn(reg), r2);
+    jit_unget_reg(reg);
+}
+
+static void
+_stxi_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (i0) {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r0, i0);
+       str_s(rn(reg), r1);
+       jit_unget_reg(reg);
+    }
+    else
+       str_s(r0, r1);
+}
+
+static void
+_stxr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r0, r1);
+    str_i(rn(reg), r2);
+    jit_unget_reg(reg);
+}
+
+static void
+_stxi_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (i0) {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r0, i0);
+       str_i(rn(reg), r1);
+       jit_unget_reg(reg);
+    }
+    else
+       str_i(r0, r1);
+}
+
+static void
+_stxr_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r0, r1);
+    str_l(rn(reg), r2);
+    jit_unget_reg(reg);
+}
+
+static void
+_stxi_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (i0) {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r0, i0);
+       str_l(rn(reg), r1);
+       jit_unget_reg(reg);
+    }
+    else
+       str_l(r0, r1);
+}
+
+static jit_word_t
+_bltr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    CMP_LT(PR_6, PR_7, r0, r1);
+    sync();
+    w = _jit->pc.w;
+    BRI_COND((i0 - w) >> 4, PR_6);
+    return (w);
+}
+
+static jit_word_t
+_blti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    if (i1 >= -127 && i1 <= 128)
+       CMPI_LT(PR_7, PR_6, i1 - 1, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i1);
+       CMP_LT(PR_6, PR_7, r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    sync();
+    w = _jit->pc.w;
+    BRI_COND((i0 - w) >> 4, PR_6);
+    return (w);
+}
+
+static jit_word_t
+_bltr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    CMP_LTU(PR_6, PR_7, r0, r1);
+    sync();
+    w = _jit->pc.w;
+    BRI_COND((i0 - w) >> 4, PR_6);
+    return (w);
+}
+
+static jit_word_t
+_blti_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    if (i1 >= -127 && i1 <= 128)
+       CMPI_LTU(PR_7, PR_6, i1 - 1, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i1);
+       CMP_LTU(PR_6, PR_7, r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    sync();
+    w = _jit->pc.w;
+    BRI_COND((i0 - w) >> 4, PR_6);
+    return (w);
+}
+
+static jit_word_t
+_bler(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    CMP_LT(PR_6, PR_7, r1, r0);
+    sync();
+    w = _jit->pc.w;
+    BRI_COND((i0 - w) >> 4, PR_7);
+    return (w);
+}
+
+static jit_word_t
+_blei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    movi(rn(reg), i1);
+    w = bler(i0, r0, rn(reg));
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static jit_word_t
+_bler_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    CMP_LTU(PR_6, PR_7, r1, r0);
+    sync();
+    w = _jit->pc.w;
+    BRI_COND((i0 - w) >> 4, PR_7);
+    return (w);
+}
+
+static jit_word_t
+_blei_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    movi(rn(reg), i1);
+    w = bler_u(i0, r0, rn(reg));
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static jit_word_t
+_beqr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    CMP_EQ(PR_6, PR_7, r0, r1);
+    sync();
+    w = _jit->pc.w;
+    BRI_COND((i0 - w) >> 4, PR_6);
+    return (w);
+}
+
+static jit_word_t
+_beqi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    if (i1 >= -128 && i1 <= 127)
+       CMPI_EQ(PR_6, PR_7, i1, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i1);
+       CMP_EQ(PR_6, PR_7, r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    sync();
+    w = _jit->pc.w;
+    BRI_COND((i0 - w) >> 4, PR_6);
+    return (w);
+}
+
+static jit_word_t
+_bger(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    CMP_LT(PR_6, PR_7, r0, r1);
+    sync();
+    w = _jit->pc.w;
+    BRI_COND((i0 - w) >> 4, PR_7);
+    return (w);
+}
+
+static jit_word_t
+_bgei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    if (i1 >= -127 && i1 <= 128)
+       CMPI_LT(PR_7, PR_6, i1 - 1, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i1);
+       CMP_LT(PR_6, PR_7, r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    sync();
+    w = _jit->pc.w;
+    BRI_COND((i0 - w) >> 4, PR_7);
+    return (w);
+}
+
+static jit_word_t
+_bger_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    CMP_LTU(PR_6, PR_7, r0, r1);
+    sync();
+    w = _jit->pc.w;
+    BRI_COND((i0 - w) >> 4, PR_7);
+    return (w);
+}
+
+static jit_word_t
+_bgei_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    if (i1 >= -127 && i1 <= 128)
+       CMPI_LTU(PR_7, PR_6, i1 - 1, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i1);
+       CMP_LTU(PR_6, PR_7, r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    sync();
+    w = _jit->pc.w;
+    BRI_COND((i0 - w) >> 4, PR_7);
+    return (w);
+}
+
+static jit_word_t
+_bgtr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    CMP_LT(PR_6, PR_7, r1, r0);
+    sync();
+    w = _jit->pc.w;
+    BRI_COND((i0 - w) >> 4, PR_6);
+    return (w);
+}
+
+static jit_word_t
+_bgti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    movi(rn(reg), i1);
+    w = bgtr(i0, r0, rn(reg));
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static jit_word_t
+_bgtr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    CMP_LTU(PR_6, PR_7, r1, r0);
+    sync();
+    w = _jit->pc.w;
+    BRI_COND((i0 - w) >> 4, PR_6);
+    return (w);
+}
+
+static jit_word_t
+_bgti_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    movi(rn(reg), i1);
+    w = bgtr_u(i0, r0, rn(reg));
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static jit_word_t
+_bner(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    CMP_EQ(PR_6, PR_7, r0, r1);
+    sync();
+    w = _jit->pc.w;
+    BRI_COND((i0 - w) >> 4, PR_7);
+    return (w);
+}
+
+static jit_word_t
+_bnei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    if (i1 >= -128 && i1 <= 127)
+       CMPI_EQ(PR_6, PR_7, i1, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i1);
+       CMP_EQ(PR_6, PR_7, r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    sync();
+    w = _jit->pc.w;
+    BRI_COND((i0 - w) >> 4, PR_7);
+    return (w);
+}
+
+static jit_word_t
+_bmsr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    andr(rn(reg), r0, r1);
+    CMPI_EQ(PR_6, PR_7, 0, rn(reg));
+    jit_unget_reg(reg);
+    sync();
+    w = _jit->pc.w;
+    BRI_COND((i0 - w) >> 4, PR_7);
+    return (w);
+}
+
+static jit_word_t
+_bmsi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i1);
+    andr(rn(reg), r0, rn(reg));
+    CMPI_EQ(PR_6, PR_7, 0, rn(reg));
+    jit_unget_reg(reg);
+    sync();
+    w = _jit->pc.w;
+    BRI_COND((i0 - w) >> 4, PR_7);
+    return (w);
+}
+
+static jit_word_t
+_bmcr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    andr(rn(reg), r0, r1);
+    CMPI_EQ(PR_6, PR_7, 0, rn(reg));
+    jit_unget_reg(reg);
+    sync();
+    w = _jit->pc.w;
+    BRI_COND((i0 - w) >> 4, PR_6);
+    return (w);
+}
+
+static jit_word_t
+_bmci(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i1);
+    andr(rn(reg), r0, rn(reg));
+    CMPI_EQ(PR_6, PR_7, 0, rn(reg));
+    jit_unget_reg(reg);
+    sync();
+    w = _jit->pc.w;
+    BRI_COND((i0 - w) >> 4, PR_6);
+    return (w);
+}
+
+static jit_word_t
+_baddr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1,
+       jit_bool_t carry)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    jit_int32_t                t1;
+    jit_int32_t                t2;
+    /* t1 = r0 + r1;   overflow = r1 < 0 ? r0 < t1 : t1 < r0 */
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    t1 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    t2 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    lti(rn(t0), r1, 0);                        /* t0 = r1 < 0 */
+    addr(rn(t1), r0, r1);              /* t1 = r0 + r1 */
+    ltr(rn(t2), rn(t1), r0);           /* t2 = t1 < r0 */
+    ltr(rn(t1), r0, rn(t1));           /* t1 = r0 < t1 */
+    CMPI_EQ(PR_6, PR_7, 0, rn(t0));
+    CMPI_EQ_p(PR_8, PR_9, 0, rn(t2), PR_6);/* if (t0==0) p8=t2==0,p9=t2!=0; */
+    CMPI_EQ_p(PR_8, PR_9, 0, rn(t1), PR_7);/* if (t0!=0) p8=t1==0,p9=t1!=0; */
+    addr(r0, r0, r1);
+    sync();
+    w = _jit->pc.w;
+    BRI_COND((i0 - w) >> 4, carry ? PR_9 : PR_8);
+    jit_unget_reg(t2);
+    jit_unget_reg(t1);
+    jit_unget_reg(t0);
+    return (w);
+}
+
+static jit_word_t
+_baddi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1,
+       jit_bool_t carry)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    movi(rn(reg), i1);
+    w = baddr(i0, r0, rn(reg), carry);
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static jit_word_t
+_baddr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1,
+        jit_bool_t carry)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    jit_int32_t                t1;
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    t1 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    addr(rn(t0), r0, r1);
+    ltr_u(rn(t1), rn(t0), r0);
+    CMPI_EQ(PR_6, PR_7, 0, rn(t1));
+    MOV(r0, rn(t0));
+    sync();
+    w = _jit->pc.w;
+    BRI_COND((i0 - w) >> 4, carry ? PR_7 : PR_6);
+    jit_unget_reg(t1);
+    jit_unget_reg(t0);
+    return (w);
+}
+
+static jit_word_t
+_baddi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1,
+        jit_bool_t carry)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    movi(rn(reg), i1);
+    w = baddr_u(i0, r0, rn(reg), carry);
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static jit_word_t
+_bsubr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1,
+       jit_bool_t carry)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    jit_int32_t                t1;
+    jit_int32_t                t2;
+    /* t1 = r0 - r1;   overflow = 0 < r1 ? r0 < t1 : t1 < r0 */
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    t1 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    t2 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    gti(rn(t0), r1, 0);                        /* t0 = r1 > 0 */
+    subr(rn(t1), r0, r1);              /* t1 = r0 - r1 */
+    ltr(rn(t2), rn(t1), r0);           /* t2 = t1 < r0 */
+    ltr(rn(t1), r0, rn(t1));           /* t1 = r0 < t1 */
+    CMPI_EQ(PR_6, PR_7, 0, rn(t0));
+    CMPI_EQ_p(PR_8, PR_9, 0, rn(t2), PR_6);/* if (t0==0) p4=t2==0,p5=t2!=0; */
+    CMPI_EQ_p(PR_8, PR_9, 0, rn(t1), PR_7);/* if (t0!=0) p4=t1==0,p5=t1!=0; */
+    subr(r0, r0, r1);
+    sync();
+    w = _jit->pc.w;
+    BRI_COND((i0 - w) >> 4, carry ? PR_9 : PR_8);
+    jit_unget_reg(t2);
+    jit_unget_reg(t1);
+    jit_unget_reg(t0);
+    return (w);
+}
+
+static jit_word_t
+_bsubi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1,
+       jit_bool_t carry)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    movi(rn(reg), i1);
+    w = bsubr(i0, r0, rn(reg), carry);
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static jit_word_t
+_bsubr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1,
+        jit_bool_t carry)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    jit_int32_t                t1;
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    t1 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    subr(rn(t0), r0, r1);
+    ltr_u(rn(t1), r0, rn(t0));
+    CMPI_EQ(PR_6, PR_7, 0, rn(t1));
+    MOV(r0, rn(t0));
+    sync();
+    w = _jit->pc.w;
+    BRI_COND((i0 - w) >> 4, carry ? PR_7 : PR_6);
+    jit_unget_reg(t1);
+    jit_unget_reg(t0);
+    return (w);
+}
+
+static jit_word_t
+_bsubi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1,
+        jit_bool_t carry)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    movi(rn(reg), i1);
+    w = bsubr_u(i0, r0, rn(reg), carry);
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static void
+_jmpr(jit_state_t *_jit, jit_int32_t r0)
+{
+    MOV_br_rn(BR_6, r0);
+    BR(BR_6);
+}
+
+static void
+_jmpi(jit_state_t *_jit, jit_word_t i0)
+{
+    jit_word_t         d;
+    sync();
+    d = ((jit_word_t)i0 - _jit->pc.w) >> 4;
+    if (d >= -16777216 && d <= 16777215)
+       BRI(d);
+    else
+       BRL(d);
+}
+
+static jit_word_t
+_jmpi_p(jit_state_t *_jit, jit_word_t i0)
+{
+    jit_word_t         d, w;
+    sync();
+    w = _jit->pc.w;
+    d = ((jit_word_t)i0 - w) >> 4;
+    BRL(d);
+    return (w);
+}
+
+static void
+_callr(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    LD8_inc(rn(reg), r0, 8);
+    MOV_br_rn(BR_6, rn(reg));
+    jit_unget_reg(reg);
+    LD8(GR_1, r0);
+    BR_CALL(BR_0, BR_6);
+}
+
+static void
+_calli(jit_state_t *_jit, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    callr(rn(reg));
+    jit_unget_reg(reg);
+}
+
+static jit_word_t
+_calli_p(jit_state_t *_jit, jit_word_t i0)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    w = movi_p(rn(reg), i0);
+    callr(rn(reg));
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static void
+_prolog(jit_state_t *_jit, jit_node_t *node)
+{
+    jit_int32_t                reg, ruse, rout;
+
+    if (_jitc->function->define_frame || _jitc->function->assume_frame) {
+       jit_int32_t     frame = -_jitc->function->frame;
+       assert(_jitc->function->self.aoff >= frame);
+       if (_jitc->function->assume_frame)
+           return;
+       _jitc->function->self.aoff = frame;
+    }
+    if (_jitc->function->allocar)
+       _jitc->function->self.aoff &= -16;
+    _jitc->function->stack = ((_jitc->function->self.alen -
+                              _jitc->function->self.aoff) + 15) & -16;
+
+    /* First lowest unused register is first output register */
+    for (reg = _R115; reg >= _R40; reg--) {
+       if (jit_regset_tstbit(&_jitc->function->regset, reg))
+           break;
+    }
+    _jitc->breg = rn(reg) + 1;
+    _jitc->rout = _jitc->breg + 5;
+    ruse = _jitc->rout - GR_32;
+
+    /* How many out argument registers required? */
+    if (!_jitc->function->define_frame) {
+       for (reg = _OUT7; reg >= _OUT0; --reg) {
+           if (jit_regset_tstbit(&_jitc->function->regset, reg))
+               break;
+       }
+       rout = (reg + 1) - _OUT0;
+    }
+    else
+       rout = 8;
+
+    /* Do not know if will call div/mod functions (sqrt* needs one) */
+    if (rout < 2)
+       rout = 2;
+
+    /* Match gcc prolog */
+    ALLOC(_jitc->breg + 1, ruse, rout);
+    MOV(_jitc->breg + 2, GR_12);
+    MOV_rn_br(_jitc->breg, BR_0);
+    MOV(_jitc->breg + 3, GR_1);
+
+    /* lightning specific, use r4 as frame pointer */
+    MOV(_jitc->breg + 4, GR_4);
+    addi(GR_4, GR_12, -(stack_framesize + params_offset));
+
+    /* adjust stack pointer */
+    addi(GR_12, GR_12, -(stack_framesize +
+                        (params_offset << 1) + _jitc->function->stack));
+
+    if (jit_regset_tstbit(&_jitc->function->regset, JIT_F0))
+       STF_SPILL(GR_4, rn(JIT_F0));
+    if (jit_regset_tstbit(&_jitc->function->regset, JIT_F1)) {
+       addi(GR_2, GR_4, 16);
+       STF_SPILL(GR_2, rn(JIT_F1));
+    }
+    if (jit_regset_tstbit(&_jitc->function->regset, JIT_F2)) {
+       addi(GR_2, GR_4, 32);
+       STF_SPILL(GR_2, rn(JIT_F2));
+    }
+    if (jit_regset_tstbit(&_jitc->function->regset, JIT_F3)) {
+       addi(GR_2, GR_4, 48);
+       STF_SPILL(GR_2, rn(JIT_F3));
+    }
+    if (jit_regset_tstbit(&_jitc->function->regset, JIT_F4)) {
+       addi(GR_2, GR_4, 64);
+       STF_SPILL(GR_2, rn(JIT_F4));
+    }
+    if (jit_regset_tstbit(&_jitc->function->regset, JIT_F5)) {
+       addi(GR_2, GR_4, 80);
+       STF_SPILL(GR_2, rn(JIT_F5));
+    }
+
+    if (_jitc->function->allocar) {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), _jitc->function->self.aoff);
+       stxi_i(_jitc->function->aoffoff, GR_4, rn(reg));
+       jit_unget_reg(reg);
+    }
+
+    if (_jitc->function->self.call & jit_call_varargs) {
+       for (reg = _jitc->function->vagp; reg < 8; ++reg)
+           stxi(112 + reg * 8, GR_4, GR_32 + reg);
+    }
+}
+
+static void
+_epilog(jit_state_t *_jit, jit_node_t *node)
+{
+    if (_jitc->function->assume_frame)
+       return;
+    if (jit_regset_tstbit(&_jitc->function->regset, JIT_F0))
+       LDF_FILL(rn(JIT_F0), GR_4);
+    if (jit_regset_tstbit(&_jitc->function->regset, JIT_F1)) {
+       addi(GR_2, GR_4, 16);
+       LDF_FILL(rn(JIT_F1), GR_2);
+    }
+    if (jit_regset_tstbit(&_jitc->function->regset, JIT_F2)) {
+       addi(GR_2, GR_4, 32);
+       LDF_FILL(rn(JIT_F2), GR_2);
+    }
+    if (jit_regset_tstbit(&_jitc->function->regset, JIT_F3)) {
+       addi(GR_2, GR_4, 48);
+       LDF_FILL(rn(JIT_F3), GR_2);
+    }
+    if (jit_regset_tstbit(&_jitc->function->regset, JIT_F4)) {
+       addi(GR_2, GR_4, 64);
+       LDF_FILL(rn(JIT_F4), GR_2);
+    }
+    if (jit_regset_tstbit(&_jitc->function->regset, JIT_F5)) {
+       addi(GR_2, GR_4, 80);
+       LDF_FILL(rn(JIT_F5), GR_2);
+    }
+    /* Match gcc epilog */
+    MOV(GR_1, _jitc->breg + 3);
+    MOV_I_ar_rn(AR_PFS, _jitc->breg + 1);
+    MOV_br_rn(BR_0, _jitc->breg);
+    MOV(GR_12, _jitc->breg + 2);
+    /* Restore lightning specific r4 as frame pointer */
+    MOV(GR_4, _jitc->breg + 4);
+    BR_RET(BR_0);
+    flush();
+}
+
+static void
+_vastart(jit_state_t *_jit, jit_int32_t r0)
+{
+    assert(_jitc->function->self.call & jit_call_varargs);
+    /* Initialize va_list to the first stack argument. */
+    if (_jitc->function->vagp < 8)
+       addi(r0, GR_4, 112 + _jitc->function->vagp * 8);
+    else
+       addi(r0, GR_4, _jitc->function->self.size);
+}
+
+static void
+_vaarg(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    assert(_jitc->function->self.call & jit_call_varargs);
+    /* Load argument. */
+    ldr(r0, r1);
+    /* Update va_list. */
+    addi(r1, r1, 8);
+}
+
+static void
+_patch_at(jit_state_t *_jit, jit_code_t code,
+         jit_word_t instr, jit_word_t label)
+{
+    jit_word_t          l, h, tm, s0, s1, s2;
+    union {
+       jit_word_t       w;
+       jit_word_t      *p;
+    } c;
+    jit_word_t          i1, i41, i20, ic, i5, i9, i7;
+    c.w = instr;
+    get_bundle(c.p, l, h, tm, s0, s1, s2);
+    switch (code) {
+       case jit_code_movi:
+           /* Handle jit functions as C function, so that jit function
+            * pointers can be passed to C code, and jit code does not
+            * need to try to differentiate them. */
+       case jit_code_calli:
+           i1  = (label >> 63) &           0x1L;
+           i41 = (label >> 22) & 0x1ffffffffffL;
+           ic  = (label >> 21) &           0x1L;
+           i5  = (label >> 16) &          0x1fL;
+           i9  = (label >>  7) &         0x1ffL;
+           i7  =  label        &          0x7fL;
+           s1 = i41;
+           assert((tm & ~1) == TM_M_L_X_ &&
+                  (s2 & 0xfL<<37) == (6L<<37) &&
+                  s0 == nop_m);
+           s2 &= (6L<<37)|(0x7fL<<6);
+           s2 |= (i1<<36)|(i9<<27)|(i5<<22)|(ic<<21)|(i7<<13);
+           break;
+       case jit_code_jmpi:
+           if (_jitc->jump) {
+               /* kludge to hide function descriptors; check that gp
+                * is zero, what is done for all jit functions */
+               if (((long *)label)[1] == 0) {
+                   for (ic = 0; ic < _jitc->prolog.offset; ic++) {
+                       if (_jitc->prolog.ptr[ic] == label) {
+                           label += 16;
+                           break;
+                       }
+                   }
+               }
+           }
+           ic = (label - instr) >> 4;
+           i1  = (ic >> 61) &           0x1L;
+           i41 = (ic >> 22) & 0x1ffffffffffL;
+           i20 =  ic        &       0xfffffL;
+           assert((tm & ~1) == TM_M_L_X_ &&
+                  (s2 & 0xfL<<37) == (0xcL<<37) &&
+                  s0 == nop_m);
+           s1 = i41;
+           s2 &= (0xcL<<37)|(0x7L<<33)|(1L<<12);
+           s2 |= (i1<<36)|(i20<<13);
+           break;
+       default:
+           /* Only B1 in slot 0 expected due to need to either
+            * a stop to update predicates, or a sync before
+            * unconditional short branch */
+           ic = (label - instr) >> 4;
+           assert((s0 >> 37) == 4 && (s0 & (7 << 6)) == 0);
+           s0 &= (4L<<37)|(7L<<33)|(1L<<12)|0x1f;
+           s0 |= (((ic>>20)&1L)<<36)|((ic&0xfffffL)<<13);
+           break;
+    }
+    set_bundle(c.p, l, h, tm, s0, s1, s2);
+}
+#endif
diff --git a/deps/lightning/lib/jit_ia64-fpu.c b/deps/lightning/lib/jit_ia64-fpu.c
new file mode 100644 (file)
index 0000000..19cc381
--- /dev/null
@@ -0,0 +1,1762 @@
+/*
+ * Copyright (C) 2013-2019  Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+#if PROTO
+#define SF_S0                          0
+#define SF_S1                          1
+#define SF_S2                          2
+#define SF_S3                          3
+
+#define TSTFREG1(r0)                                                   \
+    do {                                                               \
+       if (jit_regset_tstbit(&_jitc->regs, r0 + 128))                  \
+           stop();                                                     \
+    } while (0)
+#define TSTFREG2(r0, r1)                                               \
+    do {                                                               \
+       if (jit_regset_tstbit(&_jitc->regs, r0 + 128) ||                \
+           jit_regset_tstbit(&_jitc->regs, r1 + 128))                  \
+           stop();                                                     \
+    } while (0)
+#define TSTFREG3(r0, r1, r2)                                           \
+    do {                                                               \
+       if (jit_regset_tstbit(&_jitc->regs, r0 + 128) ||                \
+           jit_regset_tstbit(&_jitc->regs, r1 + 128) ||                \
+           jit_regset_tstbit(&_jitc->regs, r2 + 128))                  \
+           stop();                                                     \
+    } while (0)
+#define SETFREG(r0)            jit_regset_setbit(&_jitc->regs, r0 + 128)
+
+/* libm */
+extern float sqrtf(float);
+extern double sqrt(double);
+#define M7(x6,ht,r3,r2,f1)             _M7(_jit,0,x6,ht,r3,r2,f1)
+static void _M7(jit_state_t*,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,jit_word_t,jit_word_t)
+    maybe_unused;
+#define M8(x6,ht,r3,im,f1)             _M8(_jit,0,x6,ht,r3,im,f1)
+static void _M8(jit_state_t*,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,jit_word_t,jit_word_t)
+    maybe_unused;
+#define M9(x6,ht,r3,f1)                        _M9(_jit,0,x6,ht,r3,f1)
+static void _M9(jit_state_t*,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,jit_word_t);
+#define M10(x6,ht,r3,r2,im)            _M10(_jit,0,x6,ht,r3,r2,im)
+static void _M10(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t,jit_word_t,jit_word_t)
+    maybe_unused;
+#define M11(x6,ht,r3,f2,f1)            _M11(_jit,0,x6,ht,r3,f2,f1)
+static void _M11(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t,jit_word_t,jit_word_t)
+    maybe_unused;
+#define M12(x6,ht,r3,f2,f1)            _M12(_jit,0,x6,ht,r3,f2,f1)
+static void _M12(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t,jit_word_t,jit_word_t)
+    maybe_unused;
+#define M18(x6,r2,f1)                  _M18(_jit,0,x6,r2,f1)
+static void _M18(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t);
+#define M19(x6,f2,r1)                  _M19(_jit,0,x6,f2,r1)
+static void _M19(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t);
+#define M21(f2,im)                     M20x(0x3,f2,im)
+#define M23(x3,im,f1)                  M22x(x3,im,f1)
+#define M27(f1)                                M26x(3,f1)
+#define F1(op,x,sf,f4,f3,f2,f1)                F1_(_jit,0,op,x,sf,f4,f3,f2,f1)
+#define F2(x2,f4,f3,f2,f1)             F1(0xe,1,x2,f4,f3,f2,f1)
+#define F3(f4,f3,f2,f1)                        F1(0xe,0,0,f4,f3,f2,f1)
+static void F1_(jit_state_t*,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,jit_word_t);
+#define F4(rb,sf,ra,p2,f3,f2,ta,p1)    F4_(_jit,0,rb,sf,ra,p2,f3,f2,ta,p1)
+static void F4_(jit_state_t*,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,jit_word_t);
+#define F5(p2,fc,f2,ta,p1)             F5_(_jit,0,p2,fc,f2,ta,p1)
+static void F5_(jit_state_t*,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,jit_word_t,jit_word_t)
+    maybe_unused;
+#define F6x(op,q,sf,p2,f3,f2,f1)       F6x_(_jit,0,op,q,sf,p2,f3,f2,f1)
+#define F6(op,sf,p2,f3,f2,f1)          F6x(op,0,sf,p2,f3,f2,f1)
+#define F7(op,sf,p2,f3,f1)             F6x(op,1,sf,p2,f3,0,f1)
+static void F6x_(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t,jit_word_t);
+#define F8(op,sf,x6,f3,f2,f1)          F8_(_jit,0,op,sf,x6,f3,f2,f1)
+#define F9(op,x6,f3,f2,f1)             F8(op,0,x6,f3,f2,f1)
+#define F10(op,sf,x6,f2,f1)            F8(op,sf,x6,0,f2,f1)
+#define F11(x6,f2,f1)                  F8(0,0,x6,0,f2,f1)
+static void F8_(jit_state_t*,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t);
+#define F12(sf,x6,omsk,amsk)           F12_(_jit,0,sf,x6,omsk,amsk)
+#define F13(sf,x6)                     F12(sf,x6,0,0)
+static void F12_(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t,jit_word_t)
+    maybe_unused;
+#define F14x(sf,x,x6,im)               F14x_(_jit,0,sf,x,x6,im)
+#define F14(sf,im)                     F14x(sf,0,8,im)
+#define F15(im)                                F14x(0,0,0,im)
+static void F14x_(jit_state_t*,jit_word_t,
+                 jit_word_t,jit_word_t,jit_word_t,jit_word_t)
+    maybe_unused;
+#define F16(y,im)                      F16_(_jit,0,y,im)
+static void F16_(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t)maybe_unused;
+
+/* break */
+#define BREAK_F(im)                    F15(im)
+/* chk */
+#define CHK_S(f2,im)                   M21(f2,im)
+#define CHKF_A_NC(f1,im)               M23(0x6,im,f1)
+#define CHKF_A_CLR(f1,im)              M23(0x7,im,f1)
+/* fabs */
+#define FABS(f1,f3)                    FMERGE_S(f1,0,f3)
+/* fadd */
+#define FADD(f1,f3,f2)                 FMA(f1,f3,1,f2)
+#define FADD_S(f1,f3,f2)               FMA_S(f1,f3,1,f2)
+#define FADD_D(f1,f3,f2)               FMA_D(f1,f3,1,f2)
+/* famax */
+#define FAMAX(f1,f2,f3)                        F8(0,SF_S0,0x17,f3,f2,f1)
+/* famin */
+#define FAMIN(f1,f2,f3)                        F8(0,SF_S0,0x16,f3,f2,f1)
+/* fand */
+#define FAND(f1,f2,f3)                 F9(0,0x2c,f3,f2,f1)
+/* fandcm */
+#define FANDCM(f1,f2,f3)               F9(0,0x2d,f3,f2,f1)
+/* fchkf */
+#define FCHKF(im)                      F14(SF_S0,im)
+/* fclass */
+#define FCLASS_M(p1,p2,f2,fc)          F5(p2,fc,f2,0,p1)
+#define FCLASS_M_UNC(p1,p2,f2,fc)      F5(p2,fc,f2,1,p1)
+/* fclrf */
+#define FCLRF()                                F13(SF_S0,5)
+/* fcmp */
+#define FCMP_EQ(p1,p2,f2,f3)           F4(0,SF_S0,0,p2,f3,f2,0,p1)
+#define FCMP_LT(p1,p2,f2,f3)           F4(1,SF_S0,0,p2,f3,f2,0,p1)
+#define FCMP_LE(p1,p2,f2,f3)           F4(0,SF_S0,1,p2,f3,f2,0,p1)
+#define FCMP_UNORD(p1,p2,f2,f3)                F4(1,SF_S0,1,p2,f3,f2,0,p1)
+#define FCMP_EQ_UNC(p1,p2,f2,f3)       F4(0,SF_S0,0,p2,f3,f2,1,p1)
+#define FCMP_LT_UNC(p1,p2,f2,f3)       F4(1,SF_S0,0,p2,f3,f2,1,p1)
+#define FCMP_LE_UNC(p1,p2,f2,f3)       F4(0,SF_S0,1,p2,f3,f2,1,p1)
+#define FCMP_UNORD_UNC(p1,p2,f2,f3)    F4(1,SF_S0,1,p2,f3,f2,1,p1)
+/* fcvt.fx */
+#define FCVT_FX(f1,f2)                 F10(0,SF_S0,0x18,f2,f1)
+#define FCVT_FXU(f1,f2)                        F10(0,SF_S0,0x19,f2,f1)
+#define FCVT_FX_TRUNC(f1,f2)           F10(0,SF_S0,0x1a,f2,f1)
+#define FCVT_FXU_TRUNC(f1,f2)          F10(0,SF_S0,0x1b,f2,f1)
+/* fcvt.xf */
+#define FCVT_XF(f1,f2)                 F11(0x1c,f2,f1)
+/* fcvt.fxuf */
+#define FCVT_XUF(f1,f3)                        FMA(f1,f3,1,0)
+/* fma */
+#define FMA(f1,f3,f4,f2)               F1(0x8,0,SF_S0,f4,f3,f2,f1)
+#define FMA_p(f1,f3,f4,f2,sf,_p)       F1_(_jit,_p,0x8,0,sf,f4,f3,f2,f1)
+#define FMA_S(f1,f3,f4,f2)             F1(0x8,1,SF_S0,f4,f3,f2,f1)
+#define FMA_S_p(f1,f3,f4,f2,sf,_p)     F1_(_jit,_p,0x8,1,sf,f4,f3,f2,f1)
+#define FMA_D(f1,f3,f4,f2)             F1(0x9,0,SF_S0,f4,f3,f2,f1)
+#define FMA_D_p(f1,f3,f4,f2,sf,_p)     F1_(_jit,_p,0x9,0,sf,f4,f3,f2,f1)
+/* fmax */
+#define FMAX(f1,f2,f3)                 F8(0,SF_S0,0x15,f3,f2,f1)
+/* fmerge */
+#define FMERGE_S(f1,f2,f3)             F9(0,0x10,f3,f2,f1)
+#define FMERGE_NS(f1,f2,f3)            F9(0,0x11,f3,f2,f1)
+#define FMERGE_SE(f1,f2,f3)            F9(0,0x12,f3,f2,f1)
+/* fmin */
+#define FMIN(f1,f2,f3)                 F8(0,SF_S0,0x14,f3,f2,f1)
+/* fmix */
+#define FMIX_LR(f1,f2,f3)              F9(0,0x39,f3,f2,f1)
+#define FMIX_R(f1,f2,f3)               F9(0,0x3a,f3,f2,f1)
+#define FMIX_L(f1,f2,f3)               F9(0,0x3b,f3,f2,f1)
+/* fmpy */
+#define FMPY(f1,f3,f4)                 FMA(f1,f3,f4,0)
+#define FMPY_p(f1,f3,f4,sf,_p)         FMA_p(f1,f3,f4,0,sf,_p)
+#define FMPY_S(f1,f3,f4)               FMA_S(f1,f3,f4,0)
+#define FMPY_S_p(f1,f3,f4,sf,_p)       FMA_S_p(f1,f3,f4,0,sf,_p)
+#define FMPY_D(f1,f3,f4)               FMA_D(f1,f3,f4,0)
+#define FMPY_D_p(f1,f3,f4,sf,_p)       FMA_D_p(f1,f3,f4,0,sf,_p)
+/* fms */
+#define FMS(f1,f3,f4,f2)               F1(0xa,0,SF_S0,f4,f3,f2,f1)
+#define FMS_S(f1,f3,f4,f2)             F1(0xa,1,SF_S0,f4,f3,f2,f1)
+#define FMS_D(f1,f3,f4,f2)             F1(0xb,0,SF_S0,f4,f3,f2,f1)
+/* fneg */
+#define FNEG(f1,f3)                    FMERGE_NS(f1,f3,f3)
+/* fnegabs */
+#define FNEGABS(f1,f3)                 FMERGE_NS(f1,0,f3)
+/* fnma */
+#define FNMA(f1,f3,f4,f2)              F1(0xc,0,SF_S0,f4,f3,f2,f1)
+#define FNMA_p(f1,f3,f4,f2,sf,_p)      F1_(_jit,_p,0xc,0,sf,f4,f3,f2,f1)
+#define FNMA_S(f1,f3,f4,f2)            F1(0xc,1,SF_S0,f4,f3,f2,f1)
+#define FNMA_D(f1,f3,f4,f2)            F1(0xd,0,SF_S0,f4,f3,f2,f1)
+/* fnmpy */
+#define FNMPY(f1,f3,f4)                        FNMA(f1,f3,f4,0)
+/* fnorm */
+#define FNORM(f1,f3)                   FMA(f1,f3,1,0)
+#define FNORM_S(f1,f3)                 FMA_S(f1,f3,1,0)
+#define FNORM_D(f1,f3)                 FMA_D(f1,f3,1,0)
+/* for */
+#define FOR(f1,f2,f3)                  F9(0,0x2e,f3,f2,f1)
+/* fpabs */
+#define FPABS(f1,f3)                   FPMERGE_S(f1,0,f3)
+/* fpack */
+#define FPACK(f1,f2,f3)                        F9(0,0x28,f3,f2,f1)
+/* fpamax */
+#define FPAMAX(f1,f2,f3)               F8(1,SF_S0,0x17,f3,f2,f1)
+/* fpamin */
+#define FPAMIN(f1,f2,f3)               F8(1,SF_S0,0x16,f3,f2,f1)
+/* fpcmp */
+#define FPCMP_EQ(f1,f2,f3)             F8(1,SF_S0,0x30,f3,f2,f1)
+#define FPCMP_LT(f1,f2,f3)             F8(1,SF_S0,0x31,f3,f2,f1)
+#define FPCMP_LE(f1,f2,f3)             F8(1,SF_S0,0x32,f3,f2,f1)
+#define FPCMP_UNORD(f1,f2,f3)          F8(1,SF_S0,0x33,f3,f2,f1)
+#define FPCMP_NEQ(f1,f2,f3)            F8(1,SF_S0,0x34,f3,f2,f1)
+#define FPCMP_NLT(f1,f2,f3)            F8(1,SF_S0,0x35,f3,f2,f1)
+#define FPCMP_NLE(f1,f2,f3)            F8(1,SF_S0,0x36,f3,f2,f1)
+#define FPCMP_ORD(f1,f2,f3)            F8(1,SF_S0,0x37,f3,f2,f1)
+/* fpcvt.fx */
+#define FPCVT_FX(f1,f2)                        F10(1,SF_S0,0x18,f2,f1)
+#define FPCVT_FXU(f1,f2)               F10(1,SF_S0,0x19,f2,f1)
+#define FPCVT_FX_TRUNC(f1,f2)          F10(1,SF_S0,0x1a,f2,f1)
+#define FPCVT_FXU_TRUNC(f1,f2)         F10(1,SF_S0,0x1b,f2,f1)
+/* fpma */
+#define FPMA(f1,f3,f4,f2)              F1(0x9,1,SF_S0,f4,f3,f3,f1)
+/* fpmax */
+#define FPMAX(f1,f2,f3)                        F8(1,SF_S0,0x15,f3,f2,f1)
+/* fpmerge */
+#define FPMERGE_S(f1,f2,f3)            F9(1,0x10,f3,f2,f1)
+#define FPMERGE_NS(f1,f2,f3)           F9(1,0x11,f3,f2,f1)
+#define FPMERGE_SE(f1,f2,f3)           F9(1,0x12,f3,f2,f1)
+/* fpmin */
+#define FPMIN(f1,f2,f3)                        F8(1,SF_S0,0x14,f3,f2,f1)
+/* fpmpy */
+#define FPMPY(f1,f3,f4)                        FPMA(f1,f3,f4,0)
+/* fpms */
+#define FPMS(f1,f3,f4,f2)              F1(0xb,1,SF_S0,f4,f3,f3,f1)
+/* fpneg */
+#define FPNEG(f1,f3)                   FPMERGE_NS(f1,f3,f3)
+/* fpnegabs */
+#define FPNEGABS(f1,f3)                        FPMERGE_NS(f1,0,f3)
+/* fpnma */
+#define FPNMA(f1,f3,f4,f2)             F1(0xd,1,SF_S0,f4,f3,f3,f1)
+/* fpnmpy */
+#define FPNMPY(f1,f3,f4)               FPNMA(f1,f3,f4,0)
+/* fprcpa */
+#define FPRCPA(f1,p2,f2,f3)            F6(1,SF_S0,p2,f3,f2,f1)
+/* fprsqrta */
+#define FPRSQRTA(f1,p2,f3)             F7(1,SF_S0,p2,f3,f1)
+/* frcpa */
+#define FRCPA(f1,p2,f2,f3)             F6(0,SF_S0,p2,f3,f2,f1)
+/* frsqrta */
+#define FRSQRTA(f1,p2,f3)              F7(0,SF_S0,p2,f3,f1)
+/* fselect */
+#define FSELECT(f1,f3,f4,f2)           F3(f4,f3,f2,f1)
+#define FSETC(amsk,omsk)               F12(SF_S0,4,omsk,amsk)
+/* fsub */
+#define FSUB(f1,f3,f2)                 FMS(f1,f3,1,f2)
+#define FSUB_S(f1,f3,f2)               FMS_S(f1,f3,1,f2)
+#define FSUB_D(f1,f3,f2)               FMS_D(f1,f3,1,f2)
+/* fswap */
+#define FSWAP(f1,f2,f3)                        F9(0,0x34,f3,f2,f1)
+#define FSWAP_NL(f1,f2,f3)             F9(0,0x35,f3,f2,f1)
+#define FSWAP_NR(f1,f2,f3)             F9(0,0x36,f3,f2,f1)
+/* fsxt */
+#define FSXT_R(f1,f2,f3)               F9(0,0x3c,f3,f2,f1)
+#define FSXT_L(f1,f2,f3)               F9(0,0x3d,f3,f2,f1)
+/* fxor */
+#define FXOR(f1,f2,f3)                 F9(0,0x2f,f3,f2,f1)
+/* getf */
+#define GETF_S(r1,f2)                  M19(0x1e,f2,r1)
+#define GETF_D(r1,f2)                  M19(0x1f,f2,r1)
+#define GETF_EXP(r1,f2)                        M19(0x1d,f2,r1)
+#define GETF_SIG(r1,f2)                        M19(0x1c,f2,r1)
+/* hint */
+#define HINT_F(im)                     F16(1,im)
+/* invala */
+#define INVALAF_E(f1)                  M27(f1)
+/* ldf */
+#define LDFS(f1,r3)                    M9(0x02,LD_NONE,r3,f1)
+#define LDFD(f1,r3)                    M9(0x03,LD_NONE,r3,f1)
+#define LDF8(f1,r3)                    M9(0x01,LD_NONE,r3,f1)
+#define LDFE(f1,r3)                    M9(0x00,LD_NONE,r3,f1)
+#define LDFS_S(f1,r3)                  M9(0x06,LD_NONE,r3,f1)
+#define LDFD_S(f1,r3)                  M9(0x07,LD_NONE,r3,f1)
+#define LDF8_S(f1,r3)                  M9(0x05,LD_NONE,r3,f1)
+#define LDFE_S(f1,r3)                  M9(0x04,LD_NONE,r3,f1)
+#define LDFS_A(f1,r3)                  M9(0x0a,LD_NONE,r3,f1)
+#define LDFD_A(f1,r3)                  M9(0x0b,LD_NONE,r3,f1)
+#define LDF8_A(f1,r3)                  M9(0x09,LD_NONE,r3,f1)
+#define LDFE_A(f1,r3)                  M9(0x08,LD_NONE,r3,f1)
+#define LDFS_SA(f1,r3)                 M9(0x0e,LD_NONE,r3,f1)
+#define LDFD_SA(f1,r3)                 M9(0x0f,LD_NONE,r3,f1)
+#define LDF8_SA(f1,r3)                 M9(0x0d,LD_NONE,r3,f1)
+#define LDFE_SA(f1,r3)                 M9(0x0c,LD_NONE,r3,f1)
+#define LDF_FILL(f1,r3)                        M9(0x1b,LD_NONE,r3,f1)
+#define LDFS_C_CLR(f1,r3)              M9(0x22,LD_NONE,r3,f1)
+#define LDFD_C_CLR(f1,r3)              M9(0x23,LD_NONE,r3,f1)
+#define LDF8_C_CLR(f1,r3)              M9(0x21,LD_NONE,r3,f1)
+#define LDFE_C_CLR(f1,r3)              M9(0x20,LD_NONE,r3,f1)
+#define LDFS_C_NC(f1,r3)               M9(0x26,LD_NONE,r3,f1)
+#define LDFD_C_NC(f1,r3)               M9(0x27,LD_NONE,r3,f1)
+#define LDF8_C_NC(f1,r3)               M9(0x25,LD_NONE,r3,f1)
+#define LDFE_C_NC(f1,r3)               M9(0x24,LD_NONE,r3,f1)
+#define LDFS_inc(f1,r3,r2)             M7(0x02,LD_NONE,r3,r2,f1)
+#define LDFD_inc(f1,r3,r2)             M7(0x03,LD_NONE,r3,r2,f1)
+#define LDF8_inc(f1,r3,r2)             M7(0x01,LD_NONE,r3,r2,f1)
+#define LDFE_inc(f1,r3,r2)             M7(0x00,LD_NONE,r3,r2,f1)
+#define LDFS_S_inc(f1,r3,r2)           M7(0x06,LD_NONE,r3,r2,f1)
+#define LDFD_S_inc(f1,r3,r2)           M7(0x07,LD_NONE,r3,r2,f1)
+#define LDF8_S_inc(f1,r3,r2)           M7(0x05,LD_NONE,r3,r2,f1)
+#define LDFE_S_inc(f1,r3,r2)           M7(0x04,LD_NONE,r3,r2,f1)
+#define LDFS_A_inc(f1,r3,r2)           M7(0x0a,LD_NONE,r3,r2,f1)
+#define LDXFD_A_inc(f1,r3,r2)          M7(0x0b,LD_NONE,r3,r2,f1)
+#define LDXF8_A_inc(f1,r3,r2)          M7(0x09,LD_NONE,r3,r2,f1)
+#define LDXFE_A_inc(f1,r3,r2)          M7(0x08,LD_NONE,r3,r2,f1)
+#define LDXFS_SA_inc(f1,r3,r2)         M7(0x0e,LD_NONE,r3,r2,f1)
+#define LDXFD_SA_inc(f1,r3,r2)         M7(0x0f,LD_NONE,r3,r2,f1)
+#define LDXF8_SA_inc(f1,r3,r2)         M7(0x0d,LD_NONE,r3,r2,f1)
+#define LDXFE_SA_inc(f1,r3,r2)         M7(0x0c,LD_NONE,r3,r2,f1)
+#define LDXFS_FILL_inc(f1,r3,r2)       M7(0x1b,LD_NONE,r3,r2,f1)
+#define LDXFS_C_CLR_inc(f1,r3,r2)      M7(0x22,LD_NONE,r3,r2,f1)
+#define LDXFD_C_CLR_inc(f1,r3,r2)      M7(0x23,LD_NONE,r3,r2,f1)
+#define LDXF8_C_CLR_inc(f1,r3,r2)      M7(0x21,LD_NONE,r3,r2,f1)
+#define LDXFE_C_CLR_inc(f1,r3,r2)      M7(0x20,LD_NONE,r3,r2,f1)
+#define LDXFS_C_NC_inc(f1,r3,r2)       M7(0x26,LD_NONE,r3,r2,f1)
+#define LDXFD_C_NC_inc(f1,r3,r2)       M7(0x27,LD_NONE,r3,r2,f1)
+#define LDXF8_C_NC_inc(f1,r3,r2)       M7(0x25,LD_NONE,r3,r2,f1)
+#define LDXFE_C_NC_inc(f1,r3,r2)       M7(0x24,LD_NONE,r3,r2,f1)
+#define LDIFS_inc(f1,f3,im)            M8(0x02,LD_NONE,f3,im,f1)
+#define LDIFD_inc(f1,f3,im)            M8(0x03,LD_NONE,f3,im,f1)
+#define LDIF8_inc(f1,f3,im)            M8(0x01,LD_NONE,f3,im,f1)
+#define LDIFE_inc(f1,f3,im)            M8(0x00,LD_NONE,f3,im,f1)
+#define LDIFS_S_inc(f1,f3,im)          M8(0x06,LD_NONE,f3,im,f1)
+#define LDIFD_S_inc(f1,f3,im)          M8(0x07,LD_NONE,f3,im,f1)
+#define LDIF8_S_inc(f1,f3,im)          M8(0x05,LD_NONE,f3,im,f1)
+#define LDIFE_S_inc(f1,f3,im)          M8(0x04,LD_NONE,f3,im,f1)
+#define LDIFS_A_inc(f1,f3,im)          M8(0x0a,LD_NONE,f3,im,f1)
+#define LDIFD_A_inc(f1,f3,im)          M8(0x0b,LD_NONE,f3,im,f1)
+#define LDIF8_A_inc(f1,f3,im)          M8(0x09,LD_NONE,f3,im,f1)
+#define LDIFE_A_inc(f1,f3,im)          M8(0x08,LD_NONE,f3,im,f1)
+#define LDIF_FILL_inc(f1,f3,im)                M8(0x1b,LD_NONE,f3,im,f1)
+#define LDIFS_C_CLR_inc(f1,f3,im)      M8(0x22,LD_NONE,f3,im,f1)
+#define LDIFD_C_CLR_inc(f1,f3,im)      M8(0x23,LD_NONE,f3,im,f1)
+#define LDIF8_C_CLR_inc(f1,f3,im)      M8(0x21,LD_NONE,f3,im,f1)
+#define LDIFE_C_CLR_inc(f1,f3,im)      M8(0x20,LD_NONE,f3,im,f1)
+#define LDIFS_C_NC_inc(f1,f3,im)       M8(0x26,LD_NONE,f3,im,f1)
+#define LDIFD_C_NC_inc(f1,f3,im)       M8(0x27,LD_NONE,f3,im,f1)
+#define LDIF8_C_NC_inc(f1,f3,im)       M8(0x25,LD_NONE,f3,im,f1)
+#define LDIFE_C_NC_inc(f1,f3,im)       M8(0x24,LD_NONE,f3,im,f1)
+/* ldpf */
+#define LDFPS(f1,f2,r3)                        M11(0x02,LD_NONE,r3,f2,f1)
+#define LDFPD(f1,f2,r3)                        M11(0x03,LD_NONE,r3,f2,f1)
+#define LDFP8(f1,f2,r3)                        M11(0x01,LD_NONE,r3,f2,f1)
+#define LDFPS_S(f1,f2,r3)              M11(0x06,LD_NONE,r3,f2,f1)
+#define LDFPD_S(f1,f2,r3)              M11(0x07,LD_NONE,r3,f2,f1)
+#define LDFP8_S(f1,f2,r3)              M11(0x05,LD_NONE,r3,f2,f1)
+#define LDFPS_A(f1,f2,r3)              M11(0x0a,LD_NONE,r3,f2,f1)
+#define LDFPD_A(f1,f2,r3)              M11(0x0b,LD_NONE,r3,f2,f1)
+#define LDFP8_A(f1,f2,r3)              M11(0x09,LD_NONE,r3,f2,f1)
+#define LDFPS_SA(f1,f2,r3)             M11(0x0e,LD_NONE,r3,f2,f1)
+#define LDFPD_SA(f1,f2,r3)             M11(0x0f,LD_NONE,r3,f2,f1)
+#define LDFP8_SA(f1,f2,r3)             M11(0x0d,LD_NONE,r3,f2,f1)
+#define LDFPS_C_CLR(f1,f2,r3)          M11(0x22,LD_NONE,r3,f2,f1)
+#define LDFPD_C_CLR(f1,f2,r3)          M11(0x23,LD_NONE,r3,f2,f1)
+#define LDFP8_C_CLR(f1,f2,r3)          M11(0x21,LD_NONE,r3,f2,f1)
+#define LDFPS_C_NC(f1,f2,r3)           M11(0x26,LD_NONE,r3,f2,f1)
+#define LDFPD_C_NC(f1,f2,r3)           M11(0x27,LD_NONE,r3,f2,f1)
+#define LDFP8_C_NC(f1,f2,r3)           M11(0x25,LD_NONE,r3,f2,f1)
+#define LDIFPS(f1,f2,r3)               M12(0x02,LD_NONE,r3,f2,f1)
+#define LDIFPD(f1,f2,r3)               M12(0x03,LD_NONE,r3,f2,f1)
+#define LDIFP8(f1,f2,r3)               M12(0x01,LD_NONE,r3,f2,f1)
+#define LDIFPS_S(f1,f2,r3)             M12(0x06,LD_NONE,r3,f2,f1)
+#define LDIFPD_S(f1,f2,r3)             M12(0x07,LD_NONE,r3,f2,f1)
+#define LDIFP8_S(f1,f2,r3)             M12(0x05,LD_NONE,r3,f2,f1)
+#define LDIFPS_A(f1,f2,r3)             M12(0x0a,LD_NONE,r3,f2,f1)
+#define LDIFPD_A(f1,f2,r3)             M12(0x0b,LD_NONE,r3,f2,f1)
+#define LDIFP8_A(f1,f2,r3)             M12(0x09,LD_NONE,r3,f2,f1)
+#define LDIFPS_SA(f1,f2,r3)            M12(0x0e,LD_NONE,r3,f2,f1)
+#define LDIFPD_SA(f1,f2,r3)            M12(0x0f,LD_NONE,r3,f2,f1)
+#define LDIFP8_SA(f1,f2,r3)            M12(0x0d,LD_NONE,r3,f2,f1)
+#define LDIFPS_C_CLR(f1,f2,r3)         M12(0x22,LD_NONE,r3,f2,f1)
+#define LDIFPD_C_CLR(f1,f2,r3)         M12(0x23,LD_NONE,r3,f2,f1)
+#define LDIFP8_C_CLR(f1,f2,r3)         M12(0x21,LD_NONE,r3,f2,f1)
+#define LDIFPS_C_NC(f1,f2,r3)          M12(0x26,LD_NONE,r3,f2,f1)
+#define LDIFPD_C_NC(f1,f2,r3)          M12(0x27,LD_NONE,r3,f2,f1)
+#define LDIFP8_C_NC(f1,f2,r3)          M12(0x25,LD_NONE,r3,f2,f1)
+/* mov - Move Floating-point Register */
+#define MOVF(f1,f3)                    FMERGE_S(f1,f3,f3)
+/* nop */
+#define NOP_F(im)                      F16(0,im)
+/* setf */
+#define SETF_S(f1,r2)                  M18(0x1e,r2,f1)
+#define SETF_D(f1,r2)                  M18(0x1f,r2,f1)
+#define SETF_EXP(f1,r2)                        M18(0x1d,r2,f1)
+#define SETF_SIG(f1,r2)                        M18(0x1c,r2,f1)
+/* stf */
+#define STFS(r3,f2)                    M13(0x32,ST_NONE,r3,f2)
+#define STFD(r3,f2)                    M13(0x33,ST_NONE,r3,f2)
+#define STF8(r3,f2)                    M13(0x31,ST_NONE,r3,f2)
+#define STFE(r3,f2)                    M13(0x30,ST_NONE,r3,f2)
+#define STF_SPILL(r3,f2)               M13(0x3b,ST_NONE,r3,f2)
+#define STFS_inc(r3,f2,im)             M10(0x32,ST_NONE,r3,f2,im)
+#define STFD_inc(r3,f2,im)             M10(0x33,ST_NONE,r3,f2,im)
+#define STF8_inc(r3,f2,im)             M10(0x31,ST_NONE,r3,f2,im)
+#define STFE_inc(r3,f2,im)             M10(0x30,ST_NONE,r3,f2,im)
+#define STF_SPILL_inc(r3,f2,im)                M10(0x3b,ST_NONE,r3,f2,im)
+/* xma */
+#define XMA_L(f1,f3,f4,f2)             F2(0,f4,f3,f2,f1)
+#define XMA_LU(f1,f3,f4,f2)            XMA_L(f1,f3,f4,f2)
+#define XMA_H(f1,f3,f4,f2)             F2(3,f4,f3,f2,f1)
+#define XMA_HU(f1,f3,f4,f2)            F2(2,f4,f3,f2,f1)
+/* xmpy */
+#define XMPY_L(f1,f3,f4)               XMA_L(f1,f3,f4,0)
+#define XMPY_LU(f1,f3,f4)              XMA_L(f1,f3,f4,0)
+#define XMPY_H(f1,f3,f4)               XMA_H(f1,f3,f4,0)
+#define XMPY_HU(f1,f3,f4)              XMA_HU(f1,f3,f4,0)
+
+#define movr_f(r0,r1)                  movr_d(r0,r1)
+#define movr_d(r0,r1)                  MOVF(r0,r1)
+#define movi_f(r0,i0)                  _movi_f(_jit,r0,i0)
+static void _movi_f(jit_state_t*,jit_int32_t,jit_float32_t*);
+#define movi_d(r0,i0)                  _movi_d(_jit,r0,i0)
+static void _movi_d(jit_state_t*,jit_int32_t,jit_float64_t*);
+#define movr_w_f(r0,r1)                        _movr_w_f(_jit,r0,r1)
+static void _movr_w_f(jit_state_t*,jit_int32_t,jit_int32_t);
+#define movr_f_w(r0,r1)                        _movr_f_w(_jit,r0,r1)
+static void _movr_f_w(jit_state_t*,jit_int32_t,jit_int32_t);
+#define movr_w_d(r0,r1)                        _movr_w_d(_jit,r0,r1)
+static void _movr_w_d(jit_state_t*,jit_int32_t,jit_int32_t);
+#define movr_d_w(r0,r1)                        _movr_d_w(_jit,r0,r1)
+static void _movr_d_w(jit_state_t*,jit_int32_t,jit_int32_t);
+#define movi_f_w(r0,i0)                        _movi_f_w(_jit,r0,i0)
+static void _movi_f_w(jit_state_t*,jit_int32_t,jit_float32_t*);
+#define movi_d_w(r0,i0)                        _movi_d_w(_jit,r0,i0)
+static void _movi_d_w(jit_state_t*,jit_int32_t,jit_float64_t*);
+#define absr_f(r0,r1)                  absr_d(r0,r1)
+#define absr_d(r0,r1)                  FABS(r0,r1)
+#define negr_f(r0,r1)                  negr_d(r0,r1)
+#define negr_d(r0,r1)                  FNEG(r0,r1)
+#define sqrtr_f(r0,r1)                 _sqrtr_f(_jit,r0,r1)
+static void _sqrtr_f(jit_state_t*,jit_int32_t,jit_int32_t);
+#define sqrtr_d(r0,r1)                 _sqrtr_d(_jit,r0,r1)
+static void _sqrtr_d(jit_state_t*,jit_int32_t,jit_int32_t);
+#define extr_f_d(r0,r1)                        FNORM_D(r0,r1)
+#define extr_d_f(r0,r1)                        FNORM_S(r0,r1)
+#define extr_f(r0,r1)                  _extr_f(_jit,r0,r1)
+static void _extr_f(jit_state_t*,jit_int32_t,jit_int32_t);
+#define extr_d(r0,r1)                  _extr_d(_jit,r0,r1)
+static void _extr_d(jit_state_t*,jit_int32_t,jit_int32_t);
+#define truncr_f_i(r0,r1)              truncr_d_l(r0,r1)
+#define truncr_d_i(r0,r1)              truncr_d_l(r0,r1)
+#define truncr_f_l(r0,r1)              truncr_d_l(r0,r1)
+#define truncr_d_l(r0,r1)              _truncr_d_l(_jit,r0,r1)
+static void _truncr_d_l(jit_state_t*,jit_int32_t,jit_int32_t);
+#define addr_f(r0,r1,r2)               FADD_S(r0,r1,r2)
+#define addi_f(r0,r1,i0)               _addi_f(_jit,r0,r1,i0)
+static void _addi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#define addr_d(r0,r1,r2)               FADD_D(r0,r1,r2)
+#define addi_d(r0,r1,i0)               _addi_d(_jit,r0,r1,i0)
+static void _addi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#define subr_f(r0,r1,r2)               FSUB_S(r0,r1,r2)
+#define subi_f(r0,r1,i0)               _subi_f(_jit,r0,r1,i0)
+static void _subi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#define subr_d(r0,r1,r2)               FSUB_D(r0,r1,r2)
+#define subi_d(r0,r1,i0)               _subi_d(_jit,r0,r1,i0)
+static void _subi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#define rsbr_f(r0,r1,r2)               subr_f(r0,r2,r1)
+#define rsbi_f(r0,r1,i0)               _rsbi_f(_jit,r0,r1,i0)
+static void _rsbi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#define rsbr_d(r0,r1,r2)               subr_d(r0,r2,r1)
+#define rsbi_d(r0,r1,i0)               _rsbi_d(_jit,r0,r1,i0)
+static void _rsbi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#define mulr_f(r0,r1,r2)               FMPY_S(r0,r1,r2)
+#define muli_f(r0,r1,i0)               _muli_f(_jit,r0,r1,i0)
+static void _muli_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#define mulr_d(r0,r1,r2)               FMPY_D(r0,r1,r2)
+#define muli_d(r0,r1,i0)               _muli_d(_jit,r0,r1,i0)
+static void _muli_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#define divr_f(r0,r1,r2)               _divr_f(_jit,r0,r1,r2)
+static void _divr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define divi_f(r0,r1,i0)               _divi_f(_jit,r0,r1,i0)
+static void _divi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#define divr_d(r0,r1,r2)               _divr_d(_jit,r0,r1,r2)
+static void _divr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define divi_d(r0,r1,i0)               _divi_d(_jit,r0,r1,i0)
+static void _divi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#define ltr_f(r0,r1,r2)                        ltr_d(r0,r1,r2)
+#define ltr_d(r0,r1,r2)                        _ltr_d(_jit,r0,r1,r2)
+static void _ltr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define lti_f(r0,r1,i0)                        _lti_f(_jit,r0,r1,i0)
+static void _lti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#define lti_d(r0,r1,i0)                        _lti_d(_jit,r0,r1,i0)
+static void _lti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#define ler_f(r0,r1,r2)                        ler_d(r0,r1,r2)
+#define ler_d(r0,r1,r2)                        _ler_d(_jit,r0,r1,r2)
+static void _ler_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define lei_f(r0,r1,i0)                        _lei_f(_jit,r0,r1,i0)
+static void _lei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#define lei_d(r0,r1,i0)                        _lei_d(_jit,r0,r1,i0)
+static void _lei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#define eqr_f(r0,r1,r2)                        eqr_d(r0,r1,r2)
+#define eqr_d(r0,r1,r2)                        _eqr_d(_jit,r0,r1,r2)
+static void _eqr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define eqi_f(r0,r1,i0)                        _eqi_f(_jit,r0,r1,i0)
+static void _eqi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#define eqi_d(r0,r1,i0)                        _eqi_d(_jit,r0,r1,i0)
+static void _eqi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#define ger_f(r0,r1,r2)                        ger_d(r0,r1,r2)
+#define ger_d(r0,r1,r2)                        _ger_d(_jit,r0,r1,r2)
+static void _ger_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define gei_f(r0,r1,i0)                        _gei_f(_jit,r0,r1,i0)
+static void _gei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#define gei_d(r0,r1,i0)                        _gei_d(_jit,r0,r1,i0)
+static void _gei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#define gtr_f(r0,r1,r2)                        gtr_d(r0,r1,r2)
+#define gtr_d(r0,r1,r2)                        _gtr_d(_jit,r0,r1,r2)
+static void _gtr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define gti_f(r0,r1,i0)                        _gti_f(_jit,r0,r1,i0)
+static void _gti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#define gti_d(r0,r1,i0)                        _gti_d(_jit,r0,r1,i0)
+static void _gti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#define ner_f(r0,r1,r2)                        ner_d(r0,r1,r2)
+#define ner_d(r0,r1,r2)                        _ner_d(_jit,r0,r1,r2)
+static void _ner_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define nei_f(r0,r1,i0)                        _nei_f(_jit,r0,r1,i0)
+static void _nei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#define nei_d(r0,r1,i0)                        _nei_d(_jit,r0,r1,i0)
+static void _nei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#define unltr_f(r0,r1,r2)              unltr_d(r0,r1,r2)
+#define unltr_d(r0,r1,r2)              _unltr_d(_jit,r0,r1,r2)
+static void _unltr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define unlti_f(r0,r1,i0)              _unlti_f(_jit,r0,r1,i0)
+static void _unlti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#define unlti_d(r0,r1,i0)              _unlti_d(_jit,r0,r1,i0)
+static void _unlti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#define unler_f(r0,r1,r2)              unler_d(r0,r1,r2)
+#define unler_d(r0,r1,r2)              _unler_d(_jit,r0,r1,r2)
+static void _unler_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define unlei_f(r0,r1,i0)              _unlei_f(_jit,r0,r1,i0)
+static void _unlei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#define unlei_d(r0,r1,i0)              _unlei_d(_jit,r0,r1,i0)
+static void _unlei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#define uneqr_f(r0,r1,r2)              uneqr_d(r0,r1,r2)
+#define uneqr_d(r0,r1,r2)              _uneqr_d(_jit,r0,r1,r2)
+static void _uneqr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define uneqi_f(r0,r1,i0)              _uneqi_f(_jit,r0,r1,i0)
+static void _uneqi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#define uneqi_d(r0,r1,i0)              _uneqi_d(_jit,r0,r1,i0)
+static void _uneqi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#define unger_f(r0,r1,r2)              unger_d(r0,r1,r2)
+#define unger_d(r0,r1,r2)              _unger_d(_jit,r0,r1,r2)
+static void _unger_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define ungei_f(r0,r1,i0)              _ungei_f(_jit,r0,r1,i0)
+static void _ungei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#define ungei_d(r0,r1,i0)              _ungei_d(_jit,r0,r1,i0)
+static void _ungei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#define ungtr_f(r0,r1,r2)              ungtr_d(r0,r1,r2)
+#define ungtr_d(r0,r1,r2)              _ungtr_d(_jit,r0,r1,r2)
+static void _ungtr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define ungti_f(r0,r1,i0)              _ungti_f(_jit,r0,r1,i0)
+static void _ungti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#define ungti_d(r0,r1,i0)              _ungti_d(_jit,r0,r1,i0)
+static void _ungti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#define ltgtr_f(r0,r1,r2)              ltgtr_d(r0,r1,r2)
+#define ltgtr_d(r0,r1,r2)              _ltgtr_d(_jit,r0,r1,r2)
+static void _ltgtr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define ltgti_f(r0,r1,i0)              _ltgti_f(_jit,r0,r1,i0)
+static void _ltgti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#define ltgti_d(r0,r1,i0)              _ltgti_d(_jit,r0,r1,i0)
+static void _ltgti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#define ordr_f(r0,r1,r2)               ordr_d(r0,r1,r2)
+#define ordr_d(r0,r1,r2)               _ordr_d(_jit,r0,r1,r2)
+static void _ordr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define ordi_f(r0,r1,i0)               _ordi_f(_jit,r0,r1,i0)
+static void _ordi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#define ordi_d(r0,r1,i0)               _ordi_d(_jit,r0,r1,i0)
+static void _ordi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#define unordr_f(r0,r1,r2)             unordr_d(r0,r1,r2)
+#define unordr_d(r0,r1,r2)             _unordr_d(_jit,r0,r1,r2)
+static void _unordr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define unordi_f(r0,r1,i0)             _unordi_f(_jit,r0,r1,i0)
+static void _unordi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#define unordi_d(r0,r1,i0)             _unordi_d(_jit,r0,r1,i0)
+static void _unordi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#define ldr_f(r0,r1)                   LDFS(r0,r1)
+#define ldi_f(r0,i0)                   _ldi_f(_jit,r0,i0)
+static void _ldi_f(jit_state_t*,jit_int32_t,jit_word_t);
+#define ldxr_f(r0,r1,r2)               _ldxr_f(_jit,r0,r1,r2)
+static void _ldxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define ldxi_f(r0,r1,i0)               _ldxi_f(_jit,r0,r1,i0)
+static void _ldxi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define ldr_d(r0,r1)                   LDFD(r0,r1)
+#define ldi_d(r0,i0)                   _ldi_d(_jit,r0,i0)
+static void _ldi_d(jit_state_t*,jit_int32_t,jit_word_t);
+#define ldxr_d(r0,r1,r2)               _ldxr_d(_jit,r0,r1,r2)
+static void _ldxr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define ldxi_d(r0,r1,i0)               _ldxi_d(_jit,r0,r1,i0)
+static void _ldxi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define str_f(r0,r1)                   STFS(r0,r1)
+#define sti_f(i0,r0)                   _sti_f(_jit,i0,r0)
+static void _sti_f(jit_state_t*,jit_word_t,jit_int32_t);
+#define stxr_f(r0,r1,r2)               _stxr_f(_jit,r0,r1,r2)
+static void _stxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define stxi_f(i0,r0,r1)               _stxi_f(_jit,i0,r0,r1)
+static void _stxi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define str_d(r0,r1)                   STFD(r0,r1)
+#define sti_d(i0,r0)                   _sti_d(_jit,i0,r0)
+static void _sti_d(jit_state_t*,jit_word_t,jit_int32_t);
+#define stxr_d(r0,r1,r2)               _stxr_d(_jit,r0,r1,r2)
+static void _stxr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define stxi_d(i0,r0,r1)               _stxi_d(_jit,i0,r0,r1)
+static void _stxi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bltr_f(i0,r0,r1)               bltr_d(i0,r0,r1)
+#define bltr_d(i0,r0,r1)               _bltr_d(_jit,i0,r0,r1)
+static jit_word_t _bltr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define blti_f(i0,r0,i1)               _blti_f(_jit,i0,r0,i1)
+static jit_word_t _blti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t*);
+#define blti_d(i0,r0,i1)               _blti_d(_jit,i0,r0,i1)
+static jit_word_t _blti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t*);
+#define bler_f(i0,r0,r1)               bler_d(i0,r0,r1)
+#define bler_d(i0,r0,r1)               _bler_d(_jit,i0,r0,r1)
+static jit_word_t _bler_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define blei_f(i0,r0,i1)               _blei_f(_jit,i0,r0,i1)
+static jit_word_t _blei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t*);
+#define blei_d(i0,r0,i1)               _blei_d(_jit,i0,r0,i1)
+static jit_word_t _blei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t*);
+#define beqr_f(i0,r0,r1)               beqr_d(i0,r0,r1)
+#define beqr_d(i0,r0,r1)               _beqr_d(_jit,i0,r0,r1)
+static jit_word_t _beqr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define beqi_f(i0,r0,i1)               _beqi_f(_jit,i0,r0,i1)
+static jit_word_t _beqi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t*);
+#define beqi_d(i0,r0,i1)               _beqi_d(_jit,i0,r0,i1)
+static jit_word_t _beqi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t*);
+#define bger_f(i0,r0,r1)               bger_d(i0,r0,r1)
+#define bger_d(i0,r0,r1)               _bger_d(_jit,i0,r0,r1)
+static jit_word_t _bger_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bgei_f(i0,r0,i1)               _bgei_f(_jit,i0,r0,i1)
+static jit_word_t _bgei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t*);
+#define bgei_d(i0,r0,i1)               _bgei_d(_jit,i0,r0,i1)
+static jit_word_t _bgei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t*);
+#define bgtr_f(i0,r0,r1)               bgtr_d(i0,r0,r1)
+#define bgtr_d(i0,r0,r1)               _bgtr_d(_jit,i0,r0,r1)
+static jit_word_t _bgtr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bgti_f(i0,r0,i1)               _bgti_f(_jit,i0,r0,i1)
+static jit_word_t _bgti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t*);
+#define bgti_d(i0,r0,i1)               _bgti_d(_jit,i0,r0,i1)
+static jit_word_t _bgti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t*);
+#define bner_f(i0,r0,r1)               bner_d(i0,r0,r1)
+#define bner_d(i0,r0,r1)               _bner_d(_jit,i0,r0,r1)
+static jit_word_t _bner_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bnei_f(i0,r0,i1)               _bnei_f(_jit,i0,r0,i1)
+static jit_word_t _bnei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t*);
+#define bnei_d(i0,r0,i1)               _bnei_d(_jit,i0,r0,i1)
+static jit_word_t _bnei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t*);
+#define bunltr_f(i0,r0,r1)             bunltr_d(i0,r0,r1)
+#define bunltr_d(i0,r0,r1)             _bunltr_d(_jit,i0,r0,r1)
+static jit_word_t _bunltr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bunlti_f(i0,r0,i1)             _bunlti_f(_jit,i0,r0,i1)
+static jit_word_t _bunlti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t*);
+#define bunlti_d(i0,r0,i1)             _bunlti_d(_jit,i0,r0,i1)
+static jit_word_t _bunlti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t*);
+#define bunler_f(i0,r0,r1)             bunler_d(i0,r0,r1)
+#define bunler_d(i0,r0,r1)             _bunler_d(_jit,i0,r0,r1)
+static jit_word_t _bunler_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bunlei_f(i0,r0,i1)             _bunlei_f(_jit,i0,r0,i1)
+static jit_word_t _bunlei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t*);
+#define bunlei_d(i0,r0,i1)             _bunlei_d(_jit,i0,r0,i1)
+static jit_word_t _bunlei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t*);
+#define buneqr_f(i0,r0,r1)             buneqr_d(i0,r0,r1)
+#define buneqr_d(i0,r0,r1)             _buneqr_d(_jit,i0,r0,r1)
+static jit_word_t _buneqr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define buneqi_f(i0,r0,i1)             _buneqi_f(_jit,i0,r0,i1)
+static jit_word_t _buneqi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t*);
+#define buneqi_d(i0,r0,i1)             _buneqi_d(_jit,i0,r0,i1)
+static jit_word_t _buneqi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t*);
+#define bunger_f(i0,r0,r1)             bunger_d(i0,r0,r1)
+#define bunger_d(i0,r0,r1)             _bunger_d(_jit,i0,r0,r1)
+static jit_word_t _bunger_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bungei_f(i0,r0,i1)             _bungei_f(_jit,i0,r0,i1)
+static jit_word_t _bungei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t*);
+#define bungei_d(i0,r0,i1)             _bungei_d(_jit,i0,r0,i1)
+static jit_word_t _bungei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t*);
+#define bungtr_f(i0,r0,r1)             bungtr_d(i0,r0,r1)
+#define bungtr_d(i0,r0,r1)             _bungtr_d(_jit,i0,r0,r1)
+static jit_word_t _bungtr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bungti_f(i0,r0,i1)             _bungti_f(_jit,i0,r0,i1)
+static jit_word_t _bungti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t*);
+#define bungti_d(i0,r0,i1)             _bungti_d(_jit,i0,r0,i1)
+static jit_word_t _bungti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t*);
+#define bltgtr_f(i0,r0,r1)             bltgtr_d(i0,r0,r1)
+#define bltgtr_d(i0,r0,r1)             _bltgtr_d(_jit,i0,r0,r1)
+static jit_word_t _bltgtr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bltgti_f(i0,r0,i1)             _bltgti_f(_jit,i0,r0,i1)
+static jit_word_t _bltgti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t*);
+#define bltgti_d(i0,r0,i1)             _bltgti_d(_jit,i0,r0,i1)
+static jit_word_t _bltgti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t*);
+#define bordr_f(i0,r0,r1)              bordr_d(i0,r0,r1)
+#define bordr_d(i0,r0,r1)              _bordr_d(_jit,i0,r0,r1)
+static jit_word_t _bordr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bordi_f(i0,r0,i1)              _bordi_f(_jit,i0,r0,i1)
+static jit_word_t _bordi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t*);
+#define bordi_d(i0,r0,i1)              _bordi_d(_jit,i0,r0,i1)
+static jit_word_t _bordi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t*);
+#define bunordr_f(i0,r0,r1)            bunordr_d(i0,r0,r1)
+#define bunordr_d(i0,r0,r1)            _bunordr_d(_jit,i0,r0,r1)
+static jit_word_t _bunordr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bunordi_f(i0,r0,i1)            _bunordi_f(_jit,i0,r0,i1)
+static jit_word_t _bunordi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t*);
+#define bunordi_d(i0,r0,i1)            _bunordi_d(_jit,i0,r0,i1)
+static jit_word_t _bunordi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t*);
+#  define vaarg_d(r0, r1)              _vaarg_d(_jit, r0, r1)
+static void _vaarg_d(jit_state_t*, jit_int32_t, jit_int32_t);
+#endif
+
+#if CODE
+static void
+_M7(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t x6, jit_word_t ht, jit_word_t r3, jit_word_t r2, jit_word_t f1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(x6 & ~0x3fL));
+    assert(!(ht &  ~0x3L));
+    assert(!(r3 & ~0x7fL));
+    assert(!(r2 & ~0x7fL));
+    assert(!(f1 & ~0x7fL));
+    TSTREG2(r2, r3);
+    TSTPRED(_p);
+    TSTFREG1(f1);
+    inst((6L<<37)|(1L<<36)|(x6<<30)|(ht<<28)|
+        (r3<<20)|(r2<<13)|(f1<<6)|_p, INST_M);
+    SETFREG(f1);
+    SETREG(r3);
+}
+
+static void
+_M8(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t x6, jit_word_t ht, jit_word_t r3, jit_word_t im, jit_word_t f1)
+{
+    assert(!(_p &  ~0x3fL));
+    assert(!(x6 &  ~0x3fL));
+    assert(!(ht &   ~0x3L));
+    assert(!(r3 &  ~0x7fL));
+    assert(im > -256 && im <= 255);
+    assert(!(f1 &  ~0x7fL));
+    TSTREG1(r3);
+    TSTPRED(_p);
+    TSTFREG1(f1);
+    inst((7L<<37)|(((im>>8)&1L)<<36)|(x6<<30)|(ht<<28)|
+        (((im>>8)&1L)<<27)|(r3<<20)|((im&0x7fLL)<<13)|(f1<<6)|_p, INST_M);
+    SETFREG(f1);
+    SETREG(r3);
+}
+
+static void
+_M9(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t x6, jit_word_t ht, jit_word_t r3, jit_word_t f1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(x6 & ~0x3fL));
+    assert(!(ht &  ~0x3L));
+    assert(!(r3 & ~0x7fL));
+    assert(!(f1 & ~0x7fL));
+    TSTREG1(r3);
+    TSTPRED(_p);
+    TSTFREG1(f1);
+    inst((6L<<37)|(x6<<30)|(ht<<28)|(r3<<20)|(f1<<6)|_p, INST_M);
+    SETFREG(f1);
+}
+
+static void
+_M10(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t x6, jit_word_t ht, jit_word_t r3, jit_word_t f2, jit_word_t im)
+{
+    assert(!(_p &  ~0x3fL));
+    assert(!(x6 &  ~0x3fL));
+    assert(!(ht &   ~0x3L));
+    assert(!(r3 &  ~0x7fL));
+    assert(!(f2 &  ~0x7fL));
+    assert(im > -256 && im <= 255);
+    TSTREG1(r3);
+    TSTFREG1(f2);
+    TSTPRED(_p);
+    inst((7L<<37)|(((im>>8)&1L)<<36)|(x6<<30)|(ht<<28)|
+        (((im>>8)&1L)<<27)|(r3<<20)|(f2<<13)|((im&0x7fL)<<6)|_p, INST_M);
+    SETREG(r3);
+}
+
+static void
+_M11(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t x6, jit_word_t ht, jit_word_t r3, jit_word_t f2, jit_word_t f1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(x6 & ~0x3fL));
+    assert(!(ht &  ~0x3L));
+    assert(!(r3 & ~0x7fL));
+    assert(!(f2 & ~0x7fL));
+    assert(!(f1 & ~0x7fL));
+    TSTREG1(r3);
+    TSTPRED(_p);
+    TSTFREG2(f1, f2);
+    inst((6L<<37)|(x6<<30)|(ht<<28)|(1L<<27)|
+        (r3<<20)|(f2<<13)|(f1<<6)|_p, INST_M);
+    SETFREG(f1);
+    SETFREG(f2);
+}
+
+static void
+_M12(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t x6, jit_word_t ht, jit_word_t r3, jit_word_t f2, jit_word_t f1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(x6 & ~0x3fL));
+    assert(!(ht &  ~0x3L));
+    assert(!(r3 & ~0x7fL));
+    assert(!(f2 & ~0x7fL));
+    assert(!(f1 & ~0x7fL));
+    TSTREG1(r3);
+    TSTPRED(_p);
+    TSTFREG2(f1, f2);
+    inst((6L<<37)|(1L<<36)|(x6<<30)|(ht<<28)|
+        (1L<<27)|(r3<<20)|(f2<<13)|(f1<<6)|_p, INST_M);
+    SETFREG(f1);
+    SETFREG(f2);
+    SETREG(r3);
+}
+
+static void
+_M18(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t x6, jit_word_t r2, jit_word_t f1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(x6 & ~0x3fL));
+    assert(!(r2 & ~0x7fL));
+    assert(!(f1 & ~0x7fL));
+    TSTREG1(r2);
+    TSTPRED(_p);
+    TSTFREG1(f1);
+    inst((6L<<37)|(x6<<30)|(1L<<27)|(r2<<13)|(f1<<6)|_p, INST_M);
+    SETFREG(f1);
+}
+
+static void
+_M19(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t x6, jit_word_t f2, jit_word_t r1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(x6 & ~0x3fL));
+    assert(!(f2 & ~0x7fL));
+    assert(!(r1 & ~0x7fL));
+    TSTFREG1(f2);
+    TSTPRED(_p);
+    TSTREG1(r1);
+    inst((4L<<37)|(x6<<30)|(1L<<27)|(f2<<13)|(r1<<6)|_p, INST_M);
+    SETREG(r1);
+}
+
+static void
+F1_(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t op, jit_word_t x, jit_word_t sf,
+    jit_word_t f4, jit_word_t f3, jit_word_t f2, jit_word_t f1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(op &  ~0xfL));
+    assert(!(x  &  ~0x1L));
+    assert(!(sf &  ~0x3L));
+    assert(!(f4 & ~0x7fL));
+    assert(!(f3 & ~0x7fL));
+    assert(!(f2 & ~0x7fL));
+    assert(!(f1 & ~0x7fL));
+    TSTFREG3(f2, f3, f4);
+    TSTPRED(_p);
+    TSTFREG1(f1);
+    inst((op<<37)|(x<<36)|(sf<<34)|(f4<<27)|
+        (f3<<20)|(f2<<13)|(f1<<6)|_p, INST_F);
+    SETFREG(f1);
+}
+
+static void
+F4_(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t rb, jit_word_t sf, jit_word_t ra, jit_word_t p2,
+    jit_word_t f3, jit_word_t f2, jit_word_t ta, jit_word_t p1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(rb &  ~0x1L));
+    assert(!(sf &  ~0x3L));
+    assert(!(ra &  ~0x1L));
+    assert(!(p2 & ~0x3fL));
+    assert(!(f3 & ~0x7fL));
+    assert(!(f2 & ~0x7fL));
+    assert(!(ta &  ~0x1L));
+    assert(!(p1 & ~0x3fL));
+    TSTFREG2(f2, f3);
+    TSTPRED(_p);
+    inst((4L<<37)|(rb<<36)|(sf<<34)|(ra<<33)|(p2<<27)|
+        (f3<<20)|(f2<<13)|(ta<<12)|(p1<<6)|_p, INST_F);
+    if (p1)
+       _jitc->pred |= 1 << p1;
+    if (p2)
+       _jitc->pred |= 1 << p2;
+}
+
+static void
+F5_(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t p2, jit_word_t fc, jit_word_t f2, jit_word_t ta, jit_word_t p1)
+{
+    assert(!(_p &  ~0x3fL));
+    assert(!(p2 &  ~0x3fL));
+    assert(!(fc & ~0x1ffL));
+    assert(!(f2 &  ~0x7fL));
+    assert(!(ta &   ~0x1L));
+    assert(!(p1 &  ~0x3fL));
+    TSTFREG1(f2);
+    TSTPRED(_p);
+    inst((5L<<37)|(((fc>>7)&3L)<<33)|(p2<<27)|
+        ((fc&0x7fL)<<20)|(f2<<13)|(ta<<12)|(p1<<6)|_p, INST_F);
+    if (p1)
+       _jitc->pred |= 1 << p1;
+    if (p2)
+       _jitc->pred |= 1 << p2;
+}
+
+static void
+F6x_(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t op, jit_word_t q, jit_word_t sf,
+     jit_word_t p2,  jit_word_t f3, jit_word_t f2, jit_word_t f1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(op &  ~0xfL));
+    assert(!(q  &  ~0x1L));
+    assert(!(sf &  ~0x3L));
+    assert(!(p2 & ~0x3fL));
+    assert(!(f3 & ~0x7fL));
+    assert(!(f2 & ~0x7fL));
+    assert(!(f1 & ~0x7fL));
+    TSTFREG2(f2, f3);
+    TSTPRED(_p);
+    TSTFREG1(f1);
+    inst((op<<37)|(q<<36)|(sf<<34)|(1L<<33)|
+        (p2<<27)|(f3<<20)|(f2<<13)|(f1<<6)|_p, INST_F);
+    SETFREG(f1);
+    if (p2)
+       _jitc->pred |= 1 << p2;
+}
+
+static void
+F8_(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t op, jit_word_t sf, jit_word_t x6,
+    jit_word_t f3, jit_word_t f2, jit_word_t f1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(op &  ~0xfL));
+    assert(!(sf &  ~0x3L));
+    assert(!(x6 & ~0x3fL));
+    assert(!(f3 & ~0x7fL));
+    assert(!(f2 & ~0x7fL));
+    assert(!(f1 & ~0x7fL));
+    TSTFREG2(f2, f3);
+    TSTPRED(_p);
+    TSTFREG1(f1);
+    inst((op<<37)|(sf<<34)|(x6<<27)|(f3<<20)|(f2<<13)|(f1<<6)|_p, INST_F);
+    SETFREG(f1);
+}
+
+static void
+F12_(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t sf, jit_word_t x6, jit_word_t omsk, jit_word_t amsk)
+{
+    assert(!(_p   & ~0x3fL));
+    assert(!(sf   &  ~0x3L));
+    assert(!(x6   & ~0x3fL));
+    assert(!(omsk & ~0x7fL));
+    assert(!(amsk & ~0x7fL));
+    TSTPRED(_p);
+    inst((sf<<34)|(x6<<27)|(omsk<<20)|(amsk<<13), INST_F);
+}
+
+static void
+F14x_(jit_state_t* _jit, jit_word_t _p,
+      jit_word_t sf,  jit_word_t x, jit_word_t x6, jit_word_t im)
+{
+    assert(!(_p &    ~0x3fL));
+    assert(!(sf &     ~0x3L));
+    assert(!(x  &     ~0x1L));
+    assert(!(x6 &    ~0x3fL));
+    assert(!(im & ~0x1ffffL));
+    TSTPRED(_p);
+    inst((((im>>20)&1L)<<36)|(sf<<34)|(x<<33)|
+        (x6<<27)|((im&0xffffL)<<6)|_p, INST_F);
+}
+
+static void
+F16_(jit_state_t* _jit, jit_word_t _p,
+     jit_word_t y, jit_word_t im)
+{
+    assert(!(_p &    ~0x3fL));
+    assert(!(y  &     ~0x1L));
+    assert(!(im & ~0x1ffffL));
+    TSTPRED(_p);
+    inst((((im>>20)&1L)<<36)|(y<<27)|(1L<<26)|((im&0xffffL)<<6)|_p, INST_F);
+}
+
+static void
+_movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t *i0)
+{
+    union {
+       jit_int32_t      i;
+       jit_float32_t    f;
+    } data;
+    jit_int32_t                 reg;
+
+    if (_jitc->no_data) {
+       data.f = *i0;
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), data.i & 0xffffffff);
+       SETF_S(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else
+       ldi_f(r0, (jit_word_t)i0);
+}
+
+static void
+_movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t *i0)
+{
+    union {
+       jit_word_t       w;
+       jit_float64_t    d;
+    } data;
+    jit_int32_t                 reg;
+
+    if (_jitc->no_data) {
+       data.d = *i0;
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), data.w);
+       SETF_D(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else
+       ldi_d(r0, (jit_word_t)i0);
+}
+
+static void
+_movr_w_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    /* Should be used only in this case (with out0 == 120) */
+    if (r1 >= 120)
+       r1 = _jitc->rout + (r1 - 120);
+    SETF_S(r0, r1);
+}
+
+static void
+_movr_f_w(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    /* Should actually be used only in this case (with out0 == 120) */
+    if (r0 >= 120)
+       r0 = _jitc->rout + (r0 - 120);
+    GETF_S(r0, r1);
+}
+
+static void
+_movi_f_w(jit_state_t *_jit, jit_int32_t r0, jit_float32_t *i0)
+{
+    jit_data_t         data;
+
+    /* Should be used only in this case (with out0 == 120) */
+    if (r0 >= 120)
+       r0 = _jitc->rout + (r0 - 120);
+    if (_jitc->no_data) {
+       data.f = *i0;
+       movi(r0, data.q.l);
+    }
+    else
+       ldi_i(r0, (jit_word_t)i0);
+}
+
+static void
+_movr_w_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    /* Should be used only in this case (with out0 == 120) */
+    if (r1 >= 120)
+       r1 = _jitc->rout + (r1 - 120);
+    SETF_D(r0, r1);
+}
+
+static void
+_movr_d_w(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    /* Should be used only in this case (with out0 == 120) */
+    if (r0 >= 120)
+       r0 = _jitc->rout + (r0 - 120);
+    GETF_D(r0, r1);
+}
+
+static void
+_movi_d_w(jit_state_t *_jit, jit_int32_t r0, jit_float64_t *i0)
+{
+    union {
+       jit_word_t       w;
+       jit_float64_t    d;
+    } data;
+
+    /* Should be used only in this case (with out0 == 120) */
+    if (r0 >= 120)
+       r0 = _jitc->rout + (r0 - 120);
+    if (_jitc->no_data) {
+       data.d = *i0;
+       movi(r0, data.w);
+    }
+    else
+       ldi_l(r0, (jit_word_t)i0);
+}
+
+#define fpr_opi(name, type, size)                                      \
+static void                                                            \
+_##name##i_##type(jit_state_t *_jit,                                   \
+                 jit_int32_t r0, jit_int32_t r1,                       \
+                 jit_float##size##_t *i0)                              \
+{                                                                      \
+    jit_int32_t                reg = jit_get_reg(jit_class_fpr);               \
+    movi_##type(rn(reg), i0);                                          \
+    name##r_##type(r0, r1, rn(reg));                                   \
+    jit_unget_reg(reg);                                                        \
+}
+#define fpr_bopi(name, type, size)                                     \
+static jit_word_t                                                      \
+_b##name##i_##type(jit_state_t *_jit,                                  \
+                 jit_word_t i0, jit_int32_t r0,                        \
+                 jit_float##size##_t *i1)                              \
+{                                                                      \
+    jit_word_t         word;                                           \
+    jit_int32_t                reg = jit_get_reg(jit_class_fpr|                \
+                                         jit_class_nospill);           \
+    movi_##type(rn(reg), i1);                                          \
+    word = b##name##r_##type(i0, r0, rn(reg));                         \
+    jit_unget_reg(reg);                                                        \
+    return (word);                                                     \
+}
+#define fopi(name)                     fpr_opi(name, f, 32)
+#define fbopi(name)                    fpr_bopi(name, f, 32)
+#define dopi(name)                     fpr_opi(name, d, 64)
+#define dbopi(name)                    fpr_bopi(name, d, 64)
+
+fopi(add)
+fopi(sub)
+fopi(rsb)
+fopi(mul)
+fopi(div)
+dopi(add)
+dopi(sub)
+dopi(rsb)
+dopi(mul)
+dopi(div)
+
+/* translation from gcc -O0 */
+static void
+_divr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                t0, t1, t2;
+    t0 = jit_get_reg(jit_class_fpr);
+    t1 = jit_get_reg(jit_class_fpr);
+    t2 = jit_get_reg(jit_class_fpr);
+    FRCPA(rn(t0), PR_6, r1, r2);
+    FNMA_p(rn(t1), r2, rn(t0), GR_1, SF_S1, PR_6);
+    FMA_p(rn(t2), rn(t0), rn(t1), rn(t0), SF_S1, PR_6);
+    FMPY_p(rn(t1), rn(t1), rn(t1), SF_S1, PR_6);
+    FMA_p(rn(t2), rn(t2), rn(t1), rn(t2), SF_S1, PR_6);
+    FMPY_p(rn(t1), rn(t1), rn(t1), SF_S1, PR_6);
+    FMA_p(rn(t1), rn(t2), rn(t1), rn(t2), SF_S1, PR_6);
+    FMPY_S_p(rn(t2), r1, rn(t1), SF_S1, PR_6);
+    FNMA_p(rn(t0), r2, rn(t2), r1, SF_S1, PR_6);
+    FMA_S_p(r0, rn(t0), rn(t1), rn(t2), SF_S0, PR_6);
+    jit_unget_reg(t2);
+    jit_unget_reg(t1);
+    jit_unget_reg(t0);
+}
+
+static void
+_divr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                t0, t1, t2;
+    t0 = jit_get_reg(jit_class_fpr);
+    t1 = jit_get_reg(jit_class_fpr);
+    t2 = jit_get_reg(jit_class_fpr);
+    FRCPA(rn(t0), PR_6, r1, r2);
+    FNMA_p(rn(t1), r2, rn(t0), GR_1, SF_S1, PR_6);
+    FMA_p(rn(t2), rn(t0), rn(t1), rn(t0), SF_S1, PR_6);
+    FMPY_p(rn(t1), rn(t1), rn(t1), SF_S1, PR_6);
+    FMA_p(rn(t2), rn(t2), rn(t1), rn(t2), SF_S1, PR_6);
+    FMPY_p(rn(t1), rn(t1), rn(t1), SF_S1, PR_6);
+    FMA_p(rn(t1), rn(t2), rn(t1), rn(t2), SF_S1, PR_6);
+    FMPY_D_p(rn(t2), r1, rn(t1), SF_S1, PR_6);
+    FNMA_p(rn(t0), r2, rn(t2), r1, SF_S1, PR_6);
+    FMA_D_p(r0, rn(t0), rn(t1), rn(t2), SF_S0, PR_6);
+    jit_unget_reg(t2);
+    jit_unget_reg(t1);
+    jit_unget_reg(t0);
+}
+
+static void
+_extr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_fpr);
+    SETF_SIG(rn(reg), r1);
+    FCVT_XF(r0, rn(reg));
+    FNORM_S(r0, r0);
+    jit_unget_reg(reg);
+}
+
+static void
+_extr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_fpr);
+    SETF_SIG(rn(reg), r1);
+    FCVT_XF(r0, rn(reg));
+    FNORM_D(r0, r0);
+    jit_unget_reg(reg);
+}
+
+static void
+_truncr_d_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_fpr);
+    FCVT_FX_TRUNC(rn(reg), r1);
+    GETF_SIG(r0, rn(reg));
+    FNORM(r0, r0);
+    jit_unget_reg(reg);
+}
+
+static void
+_ltr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    FCMP_LT(PR_6, PR_7, r1, r2);
+    MOVI_p(r0, 1, PR_6);
+    MOV_p(r0, GR_0, PR_7);
+}
+fopi(lt)
+dopi(lt)
+
+static void
+_ler_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    FCMP_LE(PR_6, PR_7, r1, r2);
+    MOVI_p(r0, 1, PR_6);
+    MOV_p(r0, GR_0, PR_7);
+}
+fopi(le)
+dopi(le)
+
+static void
+_eqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    FCMP_EQ(PR_6, PR_7, r1, r2);
+    MOVI_p(r0, 1, PR_6);
+    MOV_p(r0, GR_0, PR_7);
+}
+fopi(eq)
+dopi(eq)
+
+static void
+_ger_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    FCMP_LE(PR_6, PR_7, r2, r1);
+    MOVI_p(r0, 1, PR_6);
+    MOV_p(r0, GR_0, PR_7);
+}
+fopi(ge)
+dopi(ge)
+
+static void
+_gtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    FCMP_LT(PR_6, PR_7, r2, r1);
+    MOVI_p(r0, 1, PR_6);
+    MOV_p(r0, GR_0, PR_7);
+}
+fopi(gt)
+dopi(gt)
+
+static void
+_ner_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    FCMP_EQ(PR_6, PR_7, r1, r2);
+    MOV_p(r0, GR_0, PR_6);
+    MOVI_p(r0, 1, PR_7);
+}
+fopi(ne)
+dopi(ne)
+
+static void
+_unltr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    FCMP_LE(PR_6, PR_7, r2, r1);
+    MOV_p(r0, GR_0, PR_6);
+    MOVI_p(r0, 1, PR_7);
+}
+fopi(unlt)
+dopi(unlt)
+
+static void
+_unler_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    FCMP_LT(PR_6, PR_7, r2, r1);
+    MOV_p(r0, GR_0, PR_6);
+    MOVI_p(r0, 1, PR_7);
+}
+fopi(unle)
+dopi(unle)
+
+static void
+_uneqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    MOVI(r0, 1);
+    FCMP_LT(PR_8, PR_9, r1, r2);
+    FCMP_LT(PR_6, PR_7, r2, r1);
+    MOV_p(r0, GR_0, PR_8);             /* !(r1 < r2) && !(r2 < r1) */
+    MOV_p(r0, GR_0, PR_6);
+}
+fopi(uneq)
+dopi(uneq)
+
+static void
+_unger_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    FCMP_LT(PR_6, PR_7, r1, r2);
+    MOV_p(r0, GR_0, PR_6);
+    MOVI_p(r0, 1, PR_7);
+}
+fopi(unge)
+dopi(unge)
+
+static void
+_ungtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    FCMP_LE(PR_6, PR_7, r1, r2);
+    MOV_p(r0, GR_0, PR_6);
+    MOVI_p(r0, 1, PR_7);
+}
+fopi(ungt)
+dopi(ungt)
+
+static void
+_ltgtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    MOVI(r0, 1);
+    FCMP_EQ(PR_8, PR_9, r1, r2);
+    FCMP_UNORD(PR_6, PR_7, r1, r2);
+    MOV_p(r0, GR_0, PR_8);
+    MOV_p(r0, GR_0, PR_6);
+}
+fopi(ltgt)
+dopi(ltgt)
+
+static void
+_ordr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    FCMP_UNORD(PR_6, PR_7, r1, r2);
+    MOV_p(r0, GR_0, PR_6);
+    MOVI_p(r0, 1, PR_7);
+}
+fopi(ord)
+dopi(ord)
+
+static void
+_unordr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    FCMP_UNORD(PR_6, PR_7, r1, r2);
+    MOVI_p(r0, 1, PR_6);
+    MOV_p(r0, GR_0, PR_7);
+}
+fopi(unord)
+dopi(unord)
+
+static void
+_ldi_f(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    ldr_f(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r1, r2);
+    ldr_f(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxi_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (r0) {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r1, i0);
+       ldr_f(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else
+       ldr_f(r0, r1);
+}
+
+static void
+_ldi_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    ldr_d(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r1, r2);
+    ldr_d(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (r0) {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r1, i0);
+       ldr_d(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else
+       ldr_d(r0, r1);
+}
+
+static void
+_sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    str_f(rn(reg), r0);
+    jit_unget_reg(reg);
+}
+
+static void
+_stxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r0, r1);
+    str_f(rn(reg), r2);
+    jit_unget_reg(reg);
+}
+
+static void
+_stxi_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (i0) {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r0, i0);
+       str_f(rn(reg), r1);
+       jit_unget_reg(reg);
+    }
+    else
+       str_f(r0, r1);
+}
+
+static void
+_sti_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    str_d(rn(reg), r0);
+    jit_unget_reg(reg);
+}
+
+static void
+_stxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r0, r1);
+    str_d(rn(reg), r2);
+    jit_unget_reg(reg);
+}
+
+static void
+_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (i0) {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r0, i0);
+       str_d(rn(reg), r1);
+       jit_unget_reg(reg);
+    }
+    else
+       str_d(r0, r1);
+}
+
+static void
+_sqrtr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    movr_f(GR_8, r1);
+    calli((jit_word_t)sqrtf);
+    MOVF(r0, GR_8);
+}
+
+static void
+_sqrtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    movr_d(GR_8, r1);
+    calli((jit_word_t)sqrt);
+    MOVF(r0, GR_8);
+}
+
+static jit_word_t
+_bltr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    FCMP_LT(PR_6, PR_7, r0, r1);
+    sync();
+    w = _jit->pc.w;
+    BRI_COND((i0 - w) >> 4, PR_6);
+    return (w);
+}
+fbopi(lt)
+dbopi(lt)
+
+static jit_word_t
+_bler_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    FCMP_LE(PR_6, PR_7, r0, r1);
+    sync();
+    w = _jit->pc.w;
+    BRI_COND((i0 - w) >> 4, PR_6);
+    return (w);
+}
+fbopi(le)
+dbopi(le)
+
+static jit_word_t
+_beqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    FCMP_EQ(PR_6, PR_7, r0, r1);
+    sync();
+    w = _jit->pc.w;
+    BRI_COND((i0 - w) >> 4, PR_6);
+    return (w);
+}
+fbopi(eq)
+dbopi(eq)
+
+static jit_word_t
+_bger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    FCMP_LE(PR_6, PR_7, r1, r0);
+    sync();
+    w = _jit->pc.w;
+    BRI_COND((i0 - w) >> 4, PR_6);
+    return (w);
+}
+fbopi(ge)
+dbopi(ge)
+
+static jit_word_t
+_bgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    FCMP_LT(PR_6, PR_7, r1, r0);
+    sync();
+    w = _jit->pc.w;
+    BRI_COND((i0 - w) >> 4, PR_6);
+    return (w);
+}
+fbopi(gt)
+dbopi(gt)
+
+static jit_word_t
+_bner_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    FCMP_EQ(PR_6, PR_7, r0, r1);
+    sync();
+    w = _jit->pc.w;
+    BRI_COND((i0 - w) >> 4, PR_7);
+    return (w);
+}
+fbopi(ne)
+dbopi(ne)
+
+static jit_word_t
+_bunltr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    FCMP_LE(PR_6, PR_7, r1, r0);
+    sync();
+    w = _jit->pc.w;
+    BRI_COND((i0 - w) >> 4, PR_7);
+    return (w);
+}
+fbopi(unlt)
+dbopi(unlt)
+
+static jit_word_t
+_bunler_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    FCMP_LT(PR_6, PR_7, r1, r0);
+    sync();
+    w = _jit->pc.w;
+    BRI_COND((i0 - w) >> 4, PR_7);
+    return (w);
+}
+fbopi(unle)
+dbopi(unle)
+
+static jit_word_t
+_buneqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    jit_word_t         junord, jne;
+    FCMP_UNORD(PR_8, PR_9, r0, r1);
+    sync();
+    /* junord L1 */
+    junord = _jit->pc.w;
+    BRI_COND(3, PR_8);
+    FCMP_EQ(PR_6, PR_7, r0, r1);
+    sync();
+    /* jne L2 */
+    jne = _jit->pc.w;
+    BRI_COND(2, PR_7);
+    sync();
+    w = _jit->pc.w;
+    /* L1: */
+    patch_at(jit_code_bunordr_d, junord, _jit->pc.w);
+    BRI((i0 - w) >> 4);                /* unconditional jump to patch */
+    sync();
+    /* L2: */
+    patch_at(jit_code_bner_d, jne, _jit->pc.w);
+    return (w);
+}
+fbopi(uneq)
+dbopi(uneq)
+
+static jit_word_t
+_bunger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    FCMP_LT(PR_6, PR_7, r0, r1);
+    sync();
+    w = _jit->pc.w;
+    BRI_COND((i0 - w) >> 4, PR_7);
+    return (w);
+}
+fbopi(unge)
+dbopi(unge)
+
+static jit_word_t
+_bungtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    FCMP_LE(PR_6, PR_7, r0, r1);
+    sync();
+    w = _jit->pc.w;
+    BRI_COND((i0 - w) >> 4, PR_7);
+    return (w);
+}
+fbopi(ungt)
+dbopi(ungt)
+
+static jit_word_t
+_bltgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    jit_word_t         jeq, junord;
+    FCMP_EQ(PR_8, PR_9, r0, r1);
+    /* jeq L1 */
+    sync();
+    jeq = _jit->pc.w;
+    BRI_COND(4, PR_8);
+    FCMP_UNORD(PR_6, PR_7, r0, r1);
+    /* junord L1 */
+    sync();
+    junord = _jit->pc.w;
+    BRI_COND(2, PR_6);
+    sync();
+    w = _jit->pc.w;
+    BRI((i0 - w) >> 4);                /* unconditional jump to patch */
+    /* L1 */
+    sync();
+    patch_at(jit_code_beqr_d, jeq, _jit->pc.w);
+    patch_at(jit_code_bordr_d, junord, _jit->pc.w);
+    return (w);
+}
+fbopi(ltgt)
+dbopi(ltgt)
+
+static jit_word_t
+_bordr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    FCMP_UNORD(PR_6, PR_7, r0, r1);
+    sync();
+    w = _jit->pc.w;
+    BRI_COND((i0 - w) >> 4, PR_7);
+    return (w);
+}
+fbopi(ord)
+dbopi(ord)
+
+static jit_word_t
+_bunordr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    FCMP_UNORD(PR_6, PR_7, r0, r1);
+    sync();
+    w = _jit->pc.w;
+    BRI_COND((i0 - w) >> 4, PR_6);
+    return (w);
+}
+fbopi(unord)
+dbopi(unord)
+
+static void
+_vaarg_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    assert(_jitc->function->self.call & jit_call_varargs);
+    /* Load argument. */
+    ldr_d(r0, r1);
+    /* Update va_list. */
+    addi(r1, r1, 8);
+}
+#endif
diff --git a/deps/lightning/lib/jit_ia64-sz.c b/deps/lightning/lib/jit_ia64-sz.c
new file mode 100644 (file)
index 0000000..59826d9
--- /dev/null
@@ -0,0 +1,402 @@
+
+#if __WORDSIZE == 64
+#define JIT_INSTR_MAX 224
+    0, /* data */
+    0, /* live */
+    0, /* align */
+    0, /* save */
+    0, /* load */
+    0, /* #name */
+    0, /* #note */
+    0, /* label */
+    224,       /* prolog */
+    0, /* ellipsis */
+    0, /* va_push */
+    0, /* allocai */
+    0, /* allocar */
+    0, /* arg */
+    0, /* getarg_c */
+    0, /* getarg_uc */
+    0, /* getarg_s */
+    0, /* getarg_us */
+    0, /* getarg_i */
+    0, /* getarg_ui */
+    0, /* getarg_l */
+    0, /* putargr */
+    0, /* putargi */
+    16,        /* va_start */
+    32,        /* va_arg */
+    32,        /* va_arg_d */
+    0, /* va_end */
+    16,        /* addr */
+    32,        /* addi */
+    32,        /* addcr */
+    48,        /* addci */
+    64,        /* addxr */
+    64,        /* addxi */
+    16,        /* subr */
+    32,        /* subi */
+    32,        /* subcr */
+    48,        /* subci */
+    64,        /* subxr */
+    64,        /* subxi */
+    32,        /* rsbi */
+    48,        /* mulr */
+    64,        /* muli */
+    112,       /* qmulr */
+    112,       /* qmuli */
+    112,       /* qmulr_u */
+    112,       /* qmuli_u */
+    80,        /* divr */
+    96,        /* divi */
+    80,        /* divr_u */
+    96,        /* divi_u */
+    144,       /* qdivr */
+    144,       /* qdivi */
+    144,       /* qdivr_u */
+    144,       /* qdivi_u */
+    80,        /* remr */
+    96,        /* remi */
+    80,        /* remr_u */
+    96,        /* remi_u */
+    16,        /* andr */
+    32,        /* andi */
+    16,        /* orr */
+    32,        /* ori */
+    16,        /* xorr */
+    32,        /* xori */
+    16,        /* lshr */
+    16,        /* lshi */
+    16,        /* rshr */
+    16,        /* rshi */
+    16,        /* rshr_u */
+    16,        /* rshi_u */
+    16,        /* negr */
+    16,        /* comr */
+    32,        /* ltr */
+    32,        /* lti */
+    32,        /* ltr_u */
+    32,        /* lti_u */
+    32,        /* ler */
+    32,        /* lei */
+    32,        /* ler_u */
+    32,        /* lei_u */
+    32,        /* eqr */
+    32,        /* eqi */
+    32,        /* ger */
+    32,        /* gei */
+    32,        /* ger_u */
+    32,        /* gei_u */
+    32,        /* gtr */
+    32,        /* gti */
+    32,        /* gtr_u */
+    32,        /* gti_u */
+    32,        /* ner */
+    32,        /* nei */
+    16,        /* movr */
+    16,        /* movi */
+    16,        /* extr_c */
+    16,        /* extr_uc */
+    16,        /* extr_s */
+    16,        /* extr_us */
+    16,        /* extr_i */
+    16,        /* extr_ui */
+    64,        /* htonr_us */
+    160,       /* htonr_ui */
+    16,        /* htonr_ul */
+    16,        /* ldr_c */
+    32,        /* ldi_c */
+    16,        /* ldr_uc */
+    32,        /* ldi_uc */
+    16,        /* ldr_s */
+    32,        /* ldi_s */
+    16,        /* ldr_us */
+    32,        /* ldi_us */
+    16,        /* ldr_i */
+    32,        /* ldi_i */
+    16,        /* ldr_ui */
+    32,        /* ldi_ui */
+    16,        /* ldr_l */
+    32,        /* ldi_l */
+    32,        /* ldxr_c */
+    48,        /* ldxi_c */
+    16,        /* ldxr_uc */
+    32,        /* ldxi_uc */
+    32,        /* ldxr_s */
+    48,        /* ldxi_s */
+    16,        /* ldxr_us */
+    32,        /* ldxi_us */
+    32,        /* ldxr_i */
+    48,        /* ldxi_i */
+    16,        /* ldxr_ui */
+    32,        /* ldxi_ui */
+    16,        /* ldxr_l */
+    32,        /* ldxi_l */
+    16,        /* str_c */
+    32,        /* sti_c */
+    16,        /* str_s */
+    32,        /* sti_s */
+    16,        /* str_i */
+    32,        /* sti_i */
+    16,        /* str_l */
+    32,        /* sti_l */
+    16,        /* stxr_c */
+    32,        /* stxi_c */
+    16,        /* stxr_s */
+    32,        /* stxi_s */
+    16,        /* stxr_i */
+    32,        /* stxi_i */
+    16,        /* stxr_l */
+    32,        /* stxi_l */
+    32,        /* bltr */
+    32,        /* blti */
+    32,        /* bltr_u */
+    32,        /* blti_u */
+    32,        /* bler */
+    32,        /* blei */
+    32,        /* bler_u */
+    32,        /* blei_u */
+    32,        /* beqr */
+    48,        /* beqi */
+    32,        /* bger */
+    32,        /* bgei */
+    32,        /* bger_u */
+    32,        /* bgei_u */
+    32,        /* bgtr */
+    32,        /* bgti */
+    32,        /* bgtr_u */
+    32,        /* bgti_u */
+    32,        /* bner */
+    48,        /* bnei */
+    32,        /* bmsr */
+    48,        /* bmsi */
+    32,        /* bmcr */
+    48,        /* bmci */
+    96,        /* boaddr */
+    112,       /* boaddi */
+    64,        /* boaddr_u */
+    64,        /* boaddi_u */
+    96,        /* bxaddr */
+    112,       /* bxaddi */
+    64,        /* bxaddr_u */
+    64,        /* bxaddi_u */
+    112,       /* bosubr */
+    112,       /* bosubi */
+    64,        /* bosubr_u */
+    64,        /* bosubi_u */
+    112,       /* bxsubr */
+    112,       /* bxsubi */
+    64,        /* bxsubr_u */
+    64,        /* bxsubi_u */
+    16,        /* jmpr */
+    16,        /* jmpi */
+    32,        /* callr */
+    48,        /* calli */
+    0, /* prepare */
+    0, /* pushargr */
+    0, /* pushargi */
+    0, /* finishr */
+    0, /* finishi */
+    0, /* ret */
+    0, /* retr */
+    0, /* reti */
+    0, /* retval_c */
+    0, /* retval_uc */
+    0, /* retval_s */
+    0, /* retval_us */
+    0, /* retval_i */
+    0, /* retval_ui */
+    0, /* retval_l */
+    128,       /* epilog */
+    0, /* arg_f */
+    0, /* getarg_f */
+    0, /* putargr_f */
+    0, /* putargi_f */
+    16,        /* addr_f */
+    48,        /* addi_f */
+    16,        /* subr_f */
+    48,        /* subi_f */
+    48,        /* rsbi_f */
+    16,        /* mulr_f */
+    48,        /* muli_f */
+    160,       /* divr_f */
+    192,       /* divi_f */
+    16,        /* negr_f */
+    16,        /* absr_f */
+    80,        /* sqrtr_f */
+    32,        /* ltr_f */
+    64,        /* lti_f */
+    32,        /* ler_f */
+    64,        /* lei_f */
+    32,        /* eqr_f */
+    64,        /* eqi_f */
+    32,        /* ger_f */
+    64,        /* gei_f */
+    32,        /* gtr_f */
+    64,        /* gti_f */
+    32,        /* ner_f */
+    64,        /* nei_f */
+    32,        /* unltr_f */
+    64,        /* unlti_f */
+    32,        /* unler_f */
+    64,        /* unlei_f */
+    48,        /* uneqr_f */
+    96,        /* uneqi_f */
+    32,        /* unger_f */
+    64,        /* ungei_f */
+    32,        /* ungtr_f */
+    64,        /* ungti_f */
+    48,        /* ltgtr_f */
+    96,        /* ltgti_f */
+    32,        /* ordr_f */
+    64,        /* ordi_f */
+    32,        /* unordr_f */
+    64,        /* unordi_f */
+    32,        /* truncr_f_i */
+    32,        /* truncr_f_l */
+    48,        /* extr_f */
+    16,        /* extr_d_f */
+    16,        /* movr_f */
+    32,        /* movi_f */
+    16,        /* ldr_f */
+    32,        /* ldi_f */
+    16,        /* ldxr_f */
+    32,        /* ldxi_f */
+    16,        /* str_f */
+    32,        /* sti_f */
+    16,        /* stxr_f */
+    32,        /* stxi_f */
+    32,        /* bltr_f */
+    64,        /* blti_f */
+    32,        /* bler_f */
+    64,        /* blei_f */
+    32,        /* beqr_f */
+    64,        /* beqi_f */
+    32,        /* bger_f */
+    64,        /* bgei_f */
+    32,        /* bgtr_f */
+    64,        /* bgti_f */
+    32,        /* bner_f */
+    64,        /* bnei_f */
+    32,        /* bunltr_f */
+    64,        /* bunlti_f */
+    32,        /* bunler_f */
+    64,        /* bunlei_f */
+    80,        /* buneqr_f */
+    112,       /* buneqi_f */
+    32,        /* bunger_f */
+    64,        /* bungei_f */
+    32,        /* bungtr_f */
+    64,        /* bungti_f */
+    80,        /* bltgtr_f */
+    112,       /* bltgti_f */
+    32,        /* bordr_f */
+    64,        /* bordi_f */
+    32,        /* bunordr_f */
+    64,        /* bunordi_f */
+    0, /* pushargr_f */
+    0, /* pushargi_f */
+    0, /* retr_f */
+    0, /* reti_f */
+    0, /* retval_f */
+    0, /* arg_d */
+    0, /* getarg_d */
+    0, /* putargr_d */
+    0, /* putargi_d */
+    16,        /* addr_d */
+    48,        /* addi_d */
+    16,        /* subr_d */
+    48,        /* subi_d */
+    48,        /* rsbi_d */
+    16,        /* mulr_d */
+    48,        /* muli_d */
+    160,       /* divr_d */
+    192,       /* divi_d */
+    16,        /* negr_d */
+    16,        /* absr_d */
+    80,        /* sqrtr_d */
+    32,        /* ltr_d */
+    64,        /* lti_d */
+    32,        /* ler_d */
+    64,        /* lei_d */
+    32,        /* eqr_d */
+    64,        /* eqi_d */
+    32,        /* ger_d */
+    64,        /* gei_d */
+    32,        /* gtr_d */
+    64,        /* gti_d */
+    32,        /* ner_d */
+    64,        /* nei_d */
+    32,        /* unltr_d */
+    64,        /* unlti_d */
+    32,        /* unler_d */
+    64,        /* unlei_d */
+    48,        /* uneqr_d */
+    96,        /* uneqi_d */
+    32,        /* unger_d */
+    64,        /* ungei_d */
+    32,        /* ungtr_d */
+    64,        /* ungti_d */
+    48,        /* ltgtr_d */
+    96,        /* ltgti_d */
+    32,        /* ordr_d */
+    64,        /* ordi_d */
+    32,        /* unordr_d */
+    64,        /* unordi_d */
+    32,        /* truncr_d_i */
+    32,        /* truncr_d_l */
+    48,        /* extr_d */
+    16,        /* extr_f_d */
+    16,        /* movr_d */
+    32,        /* movi_d */
+    16,        /* ldr_d */
+    32,        /* ldi_d */
+    16,        /* ldxr_d */
+    32,        /* ldxi_d */
+    16,        /* str_d */
+    32,        /* sti_d */
+    16,        /* stxr_d */
+    32,        /* stxi_d */
+    32,        /* bltr_d */
+    64,        /* blti_d */
+    32,        /* bler_d */
+    64,        /* blei_d */
+    32,        /* beqr_d */
+    64,        /* beqi_d */
+    32,        /* bger_d */
+    64,        /* bgei_d */
+    32,        /* bgtr_d */
+    64,        /* bgti_d */
+    32,        /* bner_d */
+    64,        /* bnei_d */
+    32,        /* bunltr_d */
+    64,        /* bunlti_d */
+    32,        /* bunler_d */
+    64,        /* bunlei_d */
+    80,        /* buneqr_d */
+    112,       /* buneqi_d */
+    32,        /* bunger_d */
+    64,        /* bungei_d */
+    32,        /* bungtr_d */
+    64,        /* bungti_d */
+    80,        /* bltgtr_d */
+    112,       /* bltgti_d */
+    32,        /* bordr_d */
+    64,        /* bordi_d */
+    32,        /* bunordr_d */
+    64,        /* bunordi_d */
+    0, /* pushargr_d */
+    0, /* pushargi_d */
+    0, /* retr_d */
+    0, /* reti_d */
+    0, /* retval_d */
+    0, /* movr_w_f */
+    0, /* movr_ww_d */
+    0, /* movr_w_d */
+    0, /* movr_f_w */
+    0, /* movi_f_w */
+    0, /* movr_d_ww */
+    0, /* movi_d_ww */
+    16,        /* movr_d_w */
+    32,        /* movi_d_w */
+#endif /* __WORDSIZE */
diff --git a/deps/lightning/lib/jit_ia64.c b/deps/lightning/lib/jit_ia64.c
new file mode 100644 (file)
index 0000000..9207d81
--- /dev/null
@@ -0,0 +1,1769 @@
+/*
+ * Copyright (C) 2013-2019  Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+#include <lightning.h>
+#include <lightning/jit_private.h>
+
+#define jit_arg_reg_p(i)               ((i) >= 0 && (i) < 8)
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+#  define C_DISP                       0
+#  define S_DISP                       0
+#  define I_DISP                       0
+#  define F_DISP                       0
+#else
+#  define C_DISP                       8 - sizeof(jit_int8_t)
+#  define S_DISP                       8 - sizeof(jit_int16_t)
+#  define I_DISP                       8 - sizeof(jit_int32_t)
+#  define F_DISP                       8 - sizeof(jit_float32_t)
+#endif
+
+/*
+ * Types
+ */
+typedef jit_pointer_t jit_va_list_t;
+
+/*
+ * Prototypes
+ */
+#define patch(instr, node)             _patch(_jit, instr, node)
+static void _patch(jit_state_t*,jit_word_t,jit_node_t*);
+
+#if defined(__GNUC__)
+/* libgcc */
+extern void __clear_cache(void *, void *);
+#endif
+
+#define PROTO                          1
+#  include "jit_ia64-cpu.c"
+#  include "jit_ia64-fpu.c"
+#undef PROTO
+
+/*
+ * Initialization
+ */
+jit_register_t         _rvs[] = {
+    /* Always 0 */
+    { 0,                "r0"  },
+    /* Global Pointer */
+    { rc(sav)|1,        "r1"  },
+    /* Used when a register cannot be allocated */
+    { 2,                "r2"  },
+    /* First scratch register */
+    { rc(gpr)|3,        "r3"  },
+    /* Use r4 as lightning fp register */
+    { rc(sav)|4,        "r4"  },
+    /* Do not touch callee save registers not automatically spill/reloaded */
+    { rc(sav)|5,        "r5"  },       { rc(sav)|6,            "r6"  },
+    { rc(sav)|7,        "r7"  },
+    /* Do not touch return register for the sake of simplicity, besides
+     * having JIT_R0 being the same as JIT_RET usually an optimization */
+    { 8,                "r8"  },
+    /* Return registers, use as temporaries */
+    { rc(gpr)|9,        "r9"  },
+    { rc(gpr)|10,       "r10" },       { rc(gpr)|11,           "r11" },
+    /* Stack pointer */
+    { rc(sav)|12,       "r12" },
+    /* Thread pointer */
+    { rc(sav)|13,       "r13" },
+    /* (Usually) assembly temporaries */
+    { rc(gpr)|31,       "r31" },       { rc(gpr)|30,           "r30" },
+    { rc(gpr)|29,       "r29" },       { rc(gpr)|28,           "r28" },
+    { rc(gpr)|27,       "r27" },       { rc(gpr)|26,           "r26" },
+    { rc(gpr)|25,       "r25" },       { rc(gpr)|24,           "r24" },
+    { rc(gpr)|23,       "r23" },       { rc(gpr)|22,           "r22" },
+    { rc(gpr)|21,       "r21" },       { rc(gpr)|20,           "r20" },
+    { rc(gpr)|19,       "r19" },       { rc(gpr)|18,           "r18" },
+    { rc(gpr)|17,       "r17" },       { rc(gpr)|16,           "r16" },
+    { rc(gpr)|15,       "r15" },       { rc(gpr)|14,           "r14" },
+    /* Do not allow allocating r32-r41 as temoraries for the sake of
+     * avoiding the need of extra complexity  in the non backend code */
+    { rc(arg)|32,       "r32" },       { rc(arg)|33,           "r33" },
+    { rc(arg)|34,       "r34" },       { rc(arg)|35,           "r35" },
+    { rc(arg)|36,       "r36" },       { rc(arg)|37,           "r37" },
+    { rc(arg)|38,       "r38" },       { rc(arg)|39,           "r39" },
+    /* JIT_R0-JIT_V3 */
+    { rc(gpr)|rc(sav)|40, "r40" },     { rc(gpr)|rc(sav)|41,   "r41" },
+    { rc(gpr)|rc(sav)|42, "r42" },     { rc(gpr)|rc(sav)|43,   "r43" },
+    { rc(gpr)|rc(sav)|44, "r44" },     { rc(gpr)|rc(sav)|45,   "r45" },
+    { rc(gpr)|rc(sav)|46, "r46" },     { rc(gpr)|rc(sav)|47,   "r47" },
+    /* Temporaries/locals */
+    { rc(gpr)|rc(sav)|48, "r48" },     { rc(gpr)|rc(sav)|49,   "r49" },
+    { rc(gpr)|rc(sav)|50, "r50" },     { rc(gpr)|rc(sav)|51,   "r51" },
+    { rc(gpr)|rc(sav)|52, "r52" },     { rc(gpr)|rc(sav)|53,   "r53" },
+    { rc(gpr)|rc(sav)|54, "r54" },     { rc(gpr)|rc(sav)|55,   "r55" },
+    { rc(gpr)|rc(sav)|56, "r56" },     { rc(gpr)|rc(sav)|57,   "r57" },
+    { rc(gpr)|rc(sav)|58, "r58" },     { rc(gpr)|rc(sav)|59,   "r59" },
+    { rc(gpr)|rc(sav)|60, "r60" },     { rc(gpr)|rc(sav)|61,   "r61" },
+    { rc(gpr)|rc(sav)|62, "r62" },     { rc(gpr)|rc(sav)|63,   "r63" },
+    { rc(gpr)|rc(sav)|64, "r64" },     { rc(gpr)|rc(sav)|65,   "r65" },
+    { rc(gpr)|rc(sav)|66, "r66" },     { rc(gpr)|rc(sav)|67,   "r67" },
+    { rc(gpr)|rc(sav)|68, "r68" },     { rc(gpr)|rc(sav)|69,   "r69" },
+    { rc(gpr)|rc(sav)|70, "r70" },     { rc(gpr)|rc(sav)|71,   "r71" },
+    { rc(gpr)|rc(sav)|72, "r72" },     { rc(gpr)|rc(sav)|73,   "r73" },
+    { rc(gpr)|rc(sav)|74, "r74" },     { rc(gpr)|rc(sav)|75,   "r75" },
+    { rc(gpr)|rc(sav)|76, "r76" },     { rc(gpr)|rc(sav)|77,   "r77" },
+    { rc(gpr)|rc(sav)|78, "r78" },     { rc(gpr)|rc(sav)|79,   "r79" },
+    { rc(gpr)|rc(sav)|80, "r80" },     { rc(gpr)|rc(sav)|81,   "r81" },
+    { rc(gpr)|rc(sav)|82, "r82" },     { rc(gpr)|rc(sav)|83,   "r83" },
+    { rc(gpr)|rc(sav)|84, "r84" },     { rc(gpr)|rc(sav)|85,   "r85" },
+    { rc(gpr)|rc(sav)|86, "r86" },     { rc(gpr)|rc(sav)|87,   "r87" },
+    { rc(gpr)|rc(sav)|88, "r88" },     { rc(gpr)|rc(sav)|89,   "r89" },
+    { rc(gpr)|rc(sav)|90, "r90" },     { rc(gpr)|rc(sav)|91,   "r91" },
+    { rc(gpr)|rc(sav)|92, "r92" },     { rc(gpr)|rc(sav)|93,   "r93" },
+    { rc(gpr)|rc(sav)|94, "r94" },     { rc(gpr)|rc(sav)|95,   "r95" },
+    { rc(gpr)|rc(sav)|96, "r96" },     { rc(gpr)|rc(sav)|97,   "r97" },
+    { rc(gpr)|rc(sav)|98, "r98" },     { rc(gpr)|rc(sav)|99,   "r99" },
+    { rc(gpr)|rc(sav)|100,"r100"},     { rc(gpr)|rc(sav)|101,  "r101"},
+    { rc(gpr)|rc(sav)|102,"r102"},     { rc(gpr)|rc(sav)|103,  "r103"},
+    { rc(gpr)|rc(sav)|104,"r104"},     { rc(gpr)|rc(sav)|105,  "r105"},
+    { rc(gpr)|rc(sav)|106,"r106"},     { rc(gpr)|rc(sav)|107,  "r107"},
+    { rc(gpr)|rc(sav)|108,"r108"},     { rc(gpr)|rc(sav)|109,  "r109"},
+    { rc(gpr)|rc(sav)|110,"r110"},     { rc(gpr)|rc(sav)|111,  "r111"},
+    { rc(gpr)|rc(sav)|112,"r112"},     { rc(gpr)|rc(sav)|113,  "r113"},
+    { rc(gpr)|rc(sav)|114,"r114"},
+    /* Do not enable these because no matter what, want 13 free registers,
+     * 5 for prolog and epilog and 8 for outgoing arguments */
+    { 115,              "r115"},
+    { 116,              "r116"},       { 117,                  "r117"},
+    { 118,              "r118"},       { 119,                  "r119"},
+    { 120,              "r120"},       { 121,                  "r121"},
+    { 122,              "r122"},       { 123,                  "r123"},
+    { 124,              "r124"},       { 125,                  "r125"},
+    { 126,              "r126"},       { 127,                  "r127"},
+    /* Always 0.0 */
+    { 0,                "f0"  },
+    /* Always 1.0 */
+    { 1,                "f1"  },
+    /* Do not touch callee save registers not automatically spill/reloaded */
+    { rc(sav)|2,        "f2"  },       { rc(sav)|3,            "f3"  },
+    { rc(sav)|4,        "f4"  },       { rc(sav)|5,            "f5"  },
+    /* Scratch */
+    { rc(fpr)|6,        "f6"  },       { rc(fpr)|7,            "f7"  },
+    /* Do not allocate for the sake of simplification */
+    { rc(arg)|8,        "f8"  },
+    /* Scratch - Argument/return registers */
+    { rc(arg)|9,        "f9"  },
+    { rc(arg)|10,       "f10" },       { rc(arg)|11,           "f11" },
+    { rc(arg)|12,       "f12" },       { rc(arg)|13,           "f13" },
+    { rc(arg)|14,       "f14" },       { rc(arg)|15,           "f15" },
+    /* Do not touch callee save registers not automatically spill/reloaded */
+    { rc(sav)|16,       "f16" },       { rc(sav)|17,           "f17" },
+    { rc(sav)|18,       "f18" },       { rc(sav)|19,           "f19" },
+    { rc(sav)|20,       "f20" },       { rc(sav)|21,           "f21" },
+    { rc(sav)|22,       "f22" },       { rc(sav)|23,           "f23" },
+    { rc(sav)|24,       "f24" },       { rc(sav)|25,           "f25" },
+    { rc(sav)|26,       "f26" },       { rc(sav)|27,           "f27" },
+    { rc(sav)|28,       "f28" },       { rc(sav)|29,           "f29" },
+    { rc(sav)|30,       "f30" },       { rc(sav)|31,           "f31" },
+    /* Scratch */
+    { rc(fpr)|32,       "f32" },       { rc(fpr)|33,           "f33" },
+    { rc(fpr)|34,       "f34" },       { rc(fpr)|35,           "f35" },
+    { rc(fpr)|36,       "f36" },       { rc(fpr)|37,           "f37" },
+    { rc(fpr)|38,       "f38" },       { rc(fpr)|39,           "f39" },
+    { rc(fpr)|40,       "f40" },       { rc(fpr)|41,           "f41" },
+    { rc(fpr)|42,       "f42" },       { rc(fpr)|43,           "f43" },
+    { rc(fpr)|44,       "f44" },       { rc(fpr)|45,           "f45" },
+    { rc(fpr)|46,       "f46" },       { rc(fpr)|47,           "f47" },
+    { rc(fpr)|48,       "f48" },       { rc(fpr)|49,           "f49" },
+    { rc(fpr)|50,       "f50" },       { rc(fpr)|51,           "f51" },
+    { rc(fpr)|52,       "f52" },       { rc(fpr)|53,           "f53" },
+    { rc(fpr)|54,       "f54" },       { rc(fpr)|55,           "f55" },
+    { rc(fpr)|56,       "f56" },       { rc(fpr)|57,           "f57" },
+    { rc(fpr)|58,       "f58" },       { rc(fpr)|59,           "f59" },
+    { rc(fpr)|60,       "f60" },       { rc(fpr)|61,           "f61" },
+    { rc(fpr)|62,       "f62" },       { rc(fpr)|63,           "f63" },
+    { rc(fpr)|64,       "f64" },       { rc(fpr)|65,           "f65" },
+    { rc(fpr)|66,       "f66" },       { rc(fpr)|67,           "f67" },
+    { rc(fpr)|68,       "f68" },       { rc(fpr)|69,           "f69" },
+    { rc(fpr)|70,       "f70" },       { rc(fpr)|71,           "f71" },
+    { rc(fpr)|72,       "f72" },       { rc(fpr)|73,           "f73" },
+    { rc(fpr)|74,       "f74" },       { rc(fpr)|75,           "f75" },
+    { rc(fpr)|76,       "f76" },       { rc(fpr)|77,           "f77" },
+    { rc(fpr)|78,       "f78" },       { rc(fpr)|79,           "f79" },
+    { rc(fpr)|80,       "f80" },       { rc(fpr)|81,           "f81" },
+    { rc(fpr)|82,       "f82" },       { rc(fpr)|83,           "f83" },
+    { rc(fpr)|84,       "f84" },       { rc(fpr)|85,           "f85" },
+    { rc(fpr)|86,       "f86" },       { rc(fpr)|87,           "f87" },
+    { rc(fpr)|88,       "f88" },       { rc(fpr)|89,           "f89" },
+    { rc(fpr)|90,       "f90" },       { rc(fpr)|91,           "f91" },
+    { rc(fpr)|92,       "f92" },       { rc(fpr)|93,           "f93" },
+    { rc(fpr)|94,       "f94" },       { rc(fpr)|95,           "f95" },
+    { rc(fpr)|96,       "f96" },       { rc(fpr)|97,           "f97" },
+    { rc(fpr)|98,       "f98" },       { rc(fpr)|99,           "f99" },
+    { rc(fpr)|100,      "f100"},       { rc(fpr)|101,          "f101"},
+    { rc(fpr)|102,      "f102"},       { rc(fpr)|103,          "f103"},
+    { rc(fpr)|104,      "f104"},       { rc(fpr)|105,          "f105"},
+    { rc(fpr)|106,      "f106"},       { rc(fpr)|107,          "f107"},
+    { rc(fpr)|108,      "f108"},       { rc(fpr)|109,          "f109"},
+    { rc(fpr)|110,      "f110"},       { rc(fpr)|111,          "f111"},
+    { rc(fpr)|112,      "f112"},       { rc(fpr)|113,          "f113"},
+    { rc(fpr)|114,      "f114"},       { rc(fpr)|115,          "f115"},
+    { rc(fpr)|116,      "f116"},       { rc(fpr)|117,          "f117"},
+    { rc(fpr)|118,      "f118"},       { rc(fpr)|119,          "f119"},
+#if 0
+    /* commented to fit a jit_regset_t in 256 bits, so that the fake
+     * O0-O7 registers are easily patched when an argument is pushed */
+    { rc(fpr)|120,      "f120"},       { rc(fpr)|121,          "f121"},
+    { rc(fpr)|122,      "f122"},       { rc(fpr)|123,          "f123"},
+    { rc(fpr)|124,      "f124"},       { rc(fpr)|125,          "f125"},
+    { rc(fpr)|126,      "f126"},       { rc(fpr)|127,          "f127"},
+#endif
+    /* Fake registers to patch in movr and movi arguments */
+    { rc(arg)|120,      "o0"  },       { rc(arg)|121,          "o1"  },
+    { rc(arg)|122,      "o2"  },       { rc(arg)|123,          "o3"  },
+    { rc(arg)|124,      "o4"  },       { rc(arg)|125,          "o5"  },
+    { rc(arg)|126,      "o6"  },       { rc(arg)|127,          "o7"  },
+    { _NOREG,           "<none>" },
+};
+
+/*
+ * Implementation
+ */
+void
+jit_get_cpu(void)
+{
+}
+
+void
+_jit_init(jit_state_t *_jit)
+{
+    _jitc->reglen = jit_size(_rvs) - 1;
+    jit_regset_new(&_jitc->regs);
+    jit_carry = _NOREG;
+}
+
+void
+_jit_prolog(jit_state_t *_jit)
+{
+    jit_int32_t                offset;
+
+    if (_jitc->function)
+       jit_epilog();
+    assert(jit_regset_cmp_ui(&_jitc->regarg, 0) == 0);
+    jit_regset_set_ui(&_jitc->regsav, 0);
+    offset = _jitc->functions.offset;
+    if (offset >= _jitc->functions.length) {
+       jit_realloc((jit_pointer_t *)&_jitc->functions.ptr,
+                   _jitc->functions.length * sizeof(jit_function_t),
+                   (_jitc->functions.length + 16) * sizeof(jit_function_t));
+       _jitc->functions.length += 16;
+    }
+    _jitc->function = _jitc->functions.ptr + _jitc->functions.offset++;
+    /* params_offset << 1, once for ours arguments, once for arguments
+     * to any called function; required because offsets are computed
+     * as JIT_FP displacement */
+    _jitc->function->self.size = stack_framesize + (params_offset << 1);
+    _jitc->function->self.argi = _jitc->function->self.argf =
+       _jitc->function->self.aoff = _jitc->function->self.alen = 0;
+    _jitc->function->self.call = jit_call_default;
+    jit_alloc((jit_pointer_t *)&_jitc->function->regoff,
+             _jitc->reglen * sizeof(jit_int32_t));
+
+    /* _no_link here does not mean the jit_link() call can be removed
+     * by rewriting as:
+     * _jitc->function->prolog = jit_new_node(jit_code_prolog);
+     */
+    _jitc->function->prolog = jit_new_node_no_link(jit_code_prolog);
+    jit_link(_jitc->function->prolog);
+    _jitc->function->prolog->w.w = offset;
+    _jitc->function->epilog = jit_new_node_no_link(jit_code_epilog);
+    /* u:      label value
+     * v:      offset in blocks vector
+     * w:      offset in functions vector
+     */
+    _jitc->function->epilog->w.w = offset;
+
+    jit_regset_new(&_jitc->function->regset);
+}
+
+jit_int32_t
+_jit_allocai(jit_state_t *_jit, jit_int32_t length)
+{
+    assert(_jitc->function);
+    switch (length) {
+       case 0: case 1:                                         break;
+       case 2:         _jitc->function->self.aoff &= -2;       break;
+       case 3: case 4: _jitc->function->self.aoff &= -4;       break;
+       default:        _jitc->function->self.aoff &= -8;       break;
+    }
+    _jitc->function->self.aoff -= length;
+    if (!_jitc->realize) {
+       jit_inc_synth_ww(allocai, _jitc->function->self.aoff, length);
+       jit_dec_synth();
+    }
+    return (_jitc->function->self.aoff);
+}
+
+void
+_jit_allocar(jit_state_t *_jit, jit_int32_t u, jit_int32_t v)
+{
+    jit_int32_t                 reg;
+    assert(_jitc->function);
+    jit_inc_synth_ww(allocar, u, v);
+    if (!_jitc->function->allocar) {
+       _jitc->function->aoffoff = jit_allocai(sizeof(jit_int32_t));
+       _jitc->function->allocar = 1;
+    }
+    reg = jit_get_reg(jit_class_gpr);
+    jit_negr(reg, v);
+    jit_andi(reg, reg, -16);
+    jit_ldxi_i(u, JIT_FP, _jitc->function->aoffoff);
+    jit_addr(u, u, reg);
+    jit_addr(JIT_SP, JIT_SP, reg);
+    jit_stxi_i(_jitc->function->aoffoff, JIT_FP, u);
+    jit_unget_reg(reg);
+    jit_dec_synth();
+}
+
+void
+_jit_ret(jit_state_t *_jit)
+{
+    jit_node_t         *instr;
+    assert(_jitc->function);
+    jit_inc_synth(ret);
+    /* jump to epilog */
+    instr = jit_jmpi();
+    jit_patch_at(instr, _jitc->function->epilog);
+    jit_dec_synth();
+}
+
+void
+_jit_retr(jit_state_t *_jit, jit_int32_t u)
+{
+    jit_inc_synth_w(retr, u);
+    jit_movr(JIT_RET, u);
+    jit_ret();
+    jit_dec_synth();
+}
+
+void
+_jit_reti(jit_state_t *_jit, jit_word_t u)
+{
+    jit_inc_synth_w(reti, u);
+    jit_movi(JIT_RET, u);
+    jit_ret();
+    jit_dec_synth();
+}
+
+void
+_jit_retr_f(jit_state_t *_jit, jit_int32_t u)
+{
+    jit_inc_synth_w(retr_f, u);
+    jit_movr_f(JIT_FRET, u);
+    jit_ret();
+    jit_dec_synth();
+}
+
+void
+_jit_reti_f(jit_state_t *_jit, jit_float32_t u)
+{
+    jit_inc_synth_f(reti_f, u);
+    jit_movi_f(JIT_FRET, u);
+    jit_ret();
+    jit_dec_synth();
+}
+
+void
+_jit_retr_d(jit_state_t *_jit, jit_int32_t u)
+{
+    jit_inc_synth_w(retr_d, u);
+    jit_movr_d(JIT_FRET, u);
+    jit_ret();
+    jit_dec_synth();
+}
+
+void
+_jit_reti_d(jit_state_t *_jit, jit_float64_t u)
+{
+    jit_inc_synth_d(reti_d, u);
+    jit_movi_d(JIT_FRET, u);
+    jit_ret();
+    jit_dec_synth();
+}
+
+void
+_jit_epilog(jit_state_t *_jit)
+{
+    assert(_jitc->function);
+    assert(_jitc->function->epilog->next == NULL);
+    jit_link(_jitc->function->epilog);
+    _jitc->function = NULL;
+}
+
+jit_bool_t
+_jit_arg_register_p(jit_state_t *_jit, jit_node_t *u)
+{
+    assert(u->code == jit_code_arg ||
+          u->code == jit_code_arg_f || u->code == jit_code_arg_d);
+    return (jit_arg_reg_p(u->u.w));
+}
+
+void
+_jit_ellipsis(jit_state_t *_jit)
+{
+    jit_inc_synth(ellipsis);
+    if (_jitc->prepare) {
+       assert(!(_jitc->function->call.call & jit_call_varargs));
+       _jitc->function->call.call |= jit_call_varargs;
+       jit_link_prepare();
+    }
+    else {
+       assert(!(_jitc->function->self.call & jit_call_varargs));
+       _jitc->function->self.call |= jit_call_varargs;
+       _jitc->function->vagp = _jitc->function->self.argi;
+       jit_link_prolog();
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_va_push(jit_state_t *_jit, jit_int32_t u)
+{
+    jit_inc_synth_w(va_push, u);
+    jit_pushargr(u);
+    jit_dec_synth();
+}
+
+jit_node_t *
+_jit_arg(jit_state_t *_jit)
+{
+    jit_node_t         *node;
+    jit_int32_t                 offset;
+    assert(_jitc->function);
+    if (jit_arg_reg_p(_jitc->function->self.argi))
+       offset = _jitc->function->self.argi++;
+    else {
+       offset = _jitc->function->self.size;
+       _jitc->function->self.size += sizeof(jit_word_t);
+    }
+    node = jit_new_node_ww(jit_code_arg, offset,
+                          ++_jitc->function->self.argn);
+    jit_link_prolog();
+    return (node);
+}
+
+jit_node_t *
+_jit_arg_f(jit_state_t *_jit)
+{
+    jit_node_t         *node;
+    jit_int32_t                 offset;
+    assert(_jitc->function);
+    if (jit_arg_reg_p(_jitc->function->self.argi)) {
+       if (!(_jitc->function->self.call & jit_call_varargs))
+           offset = 8 + _jitc->function->self.argf++;
+       else
+           offset = _jitc->function->self.argi;
+       ++_jitc->function->self.argi;
+    }
+   else {
+       offset = _jitc->function->self.size;
+       _jitc->function->self.size += sizeof(jit_word_t);
+    }
+    node = jit_new_node_ww(jit_code_arg_f, offset,
+                          ++_jitc->function->self.argn);
+    jit_link_prolog();
+    return (node);
+}
+
+jit_node_t *
+_jit_arg_d(jit_state_t *_jit)
+{
+    jit_node_t         *node;
+    jit_int32_t                 offset;
+    assert(_jitc->function);
+    if (jit_arg_reg_p(_jitc->function->self.argi)) {
+       if (!(_jitc->function->self.call & jit_call_varargs))
+           offset = 8 + _jitc->function->self.argf++;
+       else
+           offset = _jitc->function->self.argi;
+       ++_jitc->function->self.argi;
+    }
+   else {
+       offset = _jitc->function->self.size;
+       _jitc->function->self.size += sizeof(jit_float64_t);
+    }
+    node = jit_new_node_ww(jit_code_arg_d, offset,
+                          ++_jitc->function->self.argn);
+    jit_link_prolog();
+    return (node);
+}
+
+void
+_jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_c, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_extr_c(u, _R32 + v->u.w);
+    else
+       jit_ldxi_c(u, JIT_FP, v->u.w + C_DISP);
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_uc, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_extr_uc(u, _R32 + v->u.w);
+    else
+       jit_ldxi_uc(u, JIT_FP, v->u.w + C_DISP);
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_s, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_extr_s(u, _R32 + v->u.w);
+    else
+       jit_ldxi_s(u, JIT_FP, v->u.w + S_DISP);
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_us, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_extr_us(u, _R32 + v->u.w);
+    else
+       jit_ldxi_us(u, JIT_FP, v->u.w + S_DISP);
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_i, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_extr_i(u, _R32 + v->u.w);
+    else
+       jit_ldxi_i(u, JIT_FP, v->u.w + I_DISP);
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_ui, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_extr_ui(u, _R32 + v->u.w);
+    else
+       jit_ldxi_ui(u, JIT_FP, v->u.w + I_DISP);
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_l, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_movr(u, _R32 + v->u.w);
+    else
+       jit_ldxi(u, JIT_FP, v->u.w);
+    jit_dec_synth();
+}
+
+void
+_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(putargr, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_movr(_R32 + v->u.w, u);
+    else
+       jit_stxi(v->u.w, JIT_FP, u);
+    jit_dec_synth();
+}
+
+void
+_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v)
+{
+    jit_int32_t                regno;
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(putargi, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_movi(_R32 + v->u.w, u);
+    else {
+       regno = jit_get_reg(jit_class_gpr);
+       jit_movi(regno, u);
+       jit_stxi(v->u.w, JIT_FP, regno);
+       jit_unget_reg(regno);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg_f);
+    jit_inc_synth_wp(getarg_f, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_movr_w_f(u, _OUT0 + v->u.w);
+    else if (jit_arg_reg_p(v->u.w - 8))
+       jit_movr_f(u, _F8 + (v->u.w - 8));
+    else
+       jit_ldxi_f(u, JIT_FP, v->u.w + F_DISP);
+    jit_dec_synth();
+}
+
+void
+_jit_putargr_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg_f);
+    jit_inc_synth_wp(putargr_f, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_movr_f_w(_OUT0 + v->u.w, u);
+    else if (jit_arg_reg_p(v->u.w - 8))
+       jit_movr_f(_F8 + (v->u.w - 8), u);
+    else
+       jit_stxi_f(v->u.w, JIT_FP, u + F_DISP);
+    jit_dec_synth();
+}
+
+void
+_jit_putargi_f(jit_state_t *_jit, jit_float32_t u, jit_node_t *v)
+{
+    jit_int32_t                regno;
+    assert(v->code == jit_code_arg_f);
+    jit_inc_synth_fp(putargi_f, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_movi_f_w(_OUT0 + v->u.w, u);
+    else if (jit_arg_reg_p(v->u.w - 8))
+       jit_movi_f(_F8 + (v->u.w - 8), u);
+    else {
+       regno = jit_get_reg(jit_class_fpr);
+       jit_movi_f(regno, u);
+       jit_stxi_f(v->u.w, JIT_FP, regno + F_DISP);
+       jit_unget_reg(regno);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg_d);
+    jit_inc_synth_wp(getarg_d, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_movr_w_d(u, _OUT0 + v->u.w);
+    else if (jit_arg_reg_p(v->u.w - 8))
+       jit_movr_d(u, _F8 + (v->u.w - 8));
+    else
+       jit_ldxi_d(u, JIT_FP, v->u.w);
+    jit_dec_synth();
+}
+
+void
+_jit_putargr_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg_d);
+    jit_inc_synth_wp(putargr_d, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_movr_d_w(_OUT0 + v->u.w, u);
+    else if (jit_arg_reg_p(v->u.w - 8))
+       jit_movr_d(_F8 + (v->u.w - 8), u);
+    else
+       jit_stxi_d(v->u.w, JIT_FP, u);
+    jit_dec_synth();
+}
+
+void
+_jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v)
+{
+    jit_int32_t                regno;
+    assert(v->code == jit_code_arg_d);
+    jit_inc_synth_dp(putargi_d, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_movi_d_w(_OUT0 + v->u.w, u);
+    else if (jit_arg_reg_p(v->u.w - 8))
+       jit_movi_d(_F8 + (v->u.w - 8), u);
+    else {
+       regno = jit_get_reg(jit_class_fpr);
+       jit_movi_d(regno, u);
+       jit_stxi_d(v->u.w, JIT_FP, regno);
+       jit_unget_reg(regno);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_pushargr(jit_state_t *_jit, jit_int32_t u)
+{
+    assert(_jitc->function);
+    jit_inc_synth_w(pushargr, u);
+    jit_link_prepare();
+    if (jit_arg_reg_p(_jitc->function->call.argi)) {
+       jit_movr(_OUT0 + _jitc->function->call.argi, u);
+       ++_jitc->function->call.argi;
+    }
+    else {
+       jit_stxi(_jitc->function->call.size + params_offset, JIT_SP, u);
+       _jitc->function->call.size += sizeof(jit_word_t);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_pushargi(jit_state_t *_jit, jit_word_t u)
+{
+    jit_int32_t                 regno;
+    assert(_jitc->function);
+    jit_inc_synth_w(pushargi, u);
+    jit_link_prepare();
+    if (jit_arg_reg_p(_jitc->function->call.argi)) {
+       jit_movi(_OUT0 + _jitc->function->call.argi, u);
+       ++_jitc->function->call.argi;
+    }
+    else {
+       regno = jit_get_reg(jit_class_gpr);
+       jit_movi(regno, u);
+       jit_stxi(_jitc->function->call.size + params_offset, JIT_SP, regno);
+       _jitc->function->call.size += sizeof(jit_word_t);
+       jit_unget_reg(regno);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_pushargr_f(jit_state_t *_jit, jit_int32_t u)
+{
+    assert(_jitc->function);
+    jit_inc_synth_w(pushargr_f, u);
+    jit_link_prepare();
+    if (jit_arg_reg_p(_jitc->function->call.argi)) {
+       if (!(_jitc->function->call.call & jit_call_varargs)) {
+           jit_movr_f(_F8 + _jitc->function->call.argf, u);
+           ++_jitc->function->call.argf;
+       }
+       else
+           jit_movr_f_w(_OUT0 + _jitc->function->call.argi, u);
+       ++_jitc->function->call.argi;
+    }
+    else {
+       jit_stxi_f(_jitc->function->call.size + params_offset + F_DISP,
+                  JIT_SP, u);
+       _jitc->function->call.size += sizeof(jit_word_t);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_pushargi_f(jit_state_t *_jit, jit_float32_t u)
+{
+    jit_int32_t                 regno;
+    assert(_jitc->function);
+    jit_inc_synth_f(pushargi_f, u);
+    jit_link_prepare();
+    if (jit_arg_reg_p(_jitc->function->call.argi)) {
+       if (!(_jitc->function->call.call & jit_call_varargs)) {
+           jit_movi_f(_F8 + _jitc->function->call.argf, u);
+           ++_jitc->function->call.argf;
+       }
+       else
+           jit_movi_f_w(_OUT0 + _jitc->function->call.argi, u);
+       ++_jitc->function->call.argi;
+    }
+    else {
+       regno = jit_get_reg(jit_class_fpr);
+       jit_movi_f(regno, u);
+       jit_stxi_f(_jitc->function->call.size + params_offset + F_DISP,
+                  JIT_SP, regno);
+       _jitc->function->call.size += sizeof(jit_word_t);
+       jit_unget_reg(regno);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_pushargr_d(jit_state_t *_jit, jit_int32_t u)
+{
+    assert(_jitc->function);
+    jit_inc_synth_w(pushargr_d, u);
+    jit_link_prepare();
+    if (jit_arg_reg_p(_jitc->function->call.argi)) {
+       if (!(_jitc->function->call.call & jit_call_varargs)) {
+           jit_movr_d(_F8 + _jitc->function->call.argf, u);
+           ++_jitc->function->call.argf;
+       }
+       else
+           jit_movr_d_w(_OUT0 + _jitc->function->call.argi, u);
+       ++_jitc->function->call.argi;
+    }
+    else {
+       jit_stxi_d(_jitc->function->call.size + params_offset, JIT_SP, u);
+       _jitc->function->call.size += sizeof(jit_float64_t);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_pushargi_d(jit_state_t *_jit, jit_float64_t u)
+{
+    jit_int32_t                 regno;
+    assert(_jitc->function);
+    jit_inc_synth_d(pushargi_d, u);
+    jit_link_prepare();
+    if (jit_arg_reg_p(_jitc->function->call.argi)) {
+       if (!(_jitc->function->call.call & jit_call_varargs)) {
+           jit_movi_d(_F8 + _jitc->function->call.argf, u);
+           ++_jitc->function->call.argf;
+       }
+       else
+           jit_movi_d_w(_OUT0 + _jitc->function->call.argi, u);
+       ++_jitc->function->call.argi;
+    }
+    else {
+       regno = jit_get_reg(jit_class_fpr);
+       jit_movi_d(regno, u);
+       jit_stxi_d(_jitc->function->call.size + params_offset, JIT_SP, regno);
+       _jitc->function->call.size += sizeof(jit_float64_t);
+       jit_unget_reg(regno);
+    }
+    jit_dec_synth();
+}
+
+jit_bool_t
+_jit_regarg_p(jit_state_t *_jit, jit_node_t *node, jit_int32_t regno)
+{
+    /* Argument registers are allocated from the pool of unused registers */
+    return (0);
+}
+
+void
+_jit_finishr(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_node_t         *call;
+    assert(_jitc->function);
+    jit_inc_synth_w(finishr, r0);
+    if (_jitc->function->self.alen < _jitc->function->call.size)
+       _jitc->function->self.alen = _jitc->function->call.size;
+    call = jit_callr(r0);
+    call->v.w = _jitc->function->call.argi;
+    call->w.w = _jitc->function->call.argf;
+    _jitc->function->call.argi = _jitc->function->call.argf =
+       _jitc->function->call.size = 0;
+    _jitc->prepare = 0;
+    jit_dec_synth();
+}
+
+jit_node_t *
+_jit_finishi(jit_state_t *_jit, jit_pointer_t i0)
+{
+    jit_node_t         *node;
+    assert(_jitc->function);
+    jit_inc_synth_w(finishi, (jit_word_t)i0);
+    if (_jitc->function->self.alen < _jitc->function->call.size)
+       _jitc->function->self.alen = _jitc->function->call.size;
+    node = jit_calli(i0);
+    node->v.w = _jitc->function->call.argi;
+    node->w.w = _jitc->function->call.argf;
+    _jitc->function->call.argi = _jitc->function->call.argf =
+       _jitc->function->call.size = 0;
+    _jitc->prepare = 0;
+    jit_dec_synth();
+    return (node);
+}
+
+void
+_jit_retval_c(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_c, r0);
+    jit_extr_c(r0, JIT_RET);
+    jit_dec_synth();
+}
+
+void
+_jit_retval_uc(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_uc, r0);
+    jit_extr_uc(r0, JIT_RET);
+    jit_dec_synth();
+}
+
+void
+_jit_retval_s(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_s, r0);
+    jit_extr_s(r0, JIT_RET);
+    jit_dec_synth();
+}
+
+void
+_jit_retval_us(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_us, r0);
+    jit_extr_us(r0, JIT_RET);
+    jit_dec_synth();
+}
+
+void
+_jit_retval_i(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_i, r0);
+    jit_extr_i(r0, JIT_RET);
+    jit_dec_synth();
+}
+
+void
+_jit_retval_ui(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_ui, r0);
+    jit_extr_ui(r0, JIT_RET);
+    jit_dec_synth();
+}
+
+void
+_jit_retval_l(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_l, r0);
+    jit_movr(r0, JIT_RET);
+    jit_dec_synth();
+}
+
+void
+_jit_retval_f(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_l, r0);
+    jit_movr_f(r0, JIT_FRET);
+    jit_dec_synth();
+}
+
+void
+_jit_retval_d(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_d, r0);
+    jit_movr_d(r0, JIT_FRET);
+    jit_dec_synth();
+}
+
+jit_pointer_t
+_emit_code(jit_state_t *_jit)
+{
+    jit_node_t         *node;
+    jit_node_t         *temp;
+    jit_word_t          word;
+    jit_int32_t                 value;
+    jit_int32_t                 offset;
+    struct {
+       jit_node_t      *node;
+       jit_word_t       word;
+#if DEVEL_DISASSEMBLER
+       jit_word_t       prevw;
+#endif
+       jit_int32_t      patch_offset;
+       jit_word_t       prolog_offset;
+    } undo;
+#if DEVEL_DISASSEMBLER
+    jit_word_t          prevw;
+#endif
+
+    _jitc->function = NULL;
+
+    /* If did resize the code buffer, these were not reset */
+    _jitc->ioff = 0;
+    jit_regset_set_ui(&_jitc->regs, 0);
+    _jitc->pred = 0;
+
+    jit_reglive_setup();
+
+    undo.word = 0;
+    undo.node = NULL;
+    undo.patch_offset = 0;
+
+#if DEVEL_DISASSEMBLER
+    prevw = _jit->pc.w;
+#endif
+    undo.prolog_offset = 0;
+    for (node = _jitc->head; node; node = node->next)
+       if (node->code != jit_code_label &&
+           node->code != jit_code_note &&
+           node->code != jit_code_name)
+           break;
+    if (node && (node->code != jit_code_prolog ||
+                !(_jitc->functions.ptr + node->w.w)->assume_frame)) {
+       /* code may start with a jump so add an initial function descriptor */
+       word = _jit->pc.w + 16;
+       il(word);               /* addr */
+       il(0);                  /* gp */
+    }
+#define case_rr(name, type)                                            \
+           case jit_code_##name##r##type:                              \
+               name##r##type(rn(node->u.w), rn(node->v.w));            \
+               break
+#define case_rw(name, type)                                            \
+           case jit_code_##name##i##type:                              \
+               name##i##type(rn(node->u.w), node->v.w);                \
+               break
+#define case_wr(name, type)                                            \
+           case jit_code_##name##i##type:                              \
+               name##i##type(node->u.w, rn(node->v.w));                \
+               break
+#define case_rrr(name, type)                                           \
+           case jit_code_##name##r##type:                              \
+               name##r##type(rn(node->u.w),                            \
+                             rn(node->v.w), rn(node->w.w));            \
+               break
+#define case_rrrr(name, type)                                          \
+           case jit_code_##name##r##type:                              \
+               name##r##type(rn(node->u.q.l), rn(node->u.q.h),         \
+                             rn(node->v.w), rn(node->w.w));            \
+               break
+#define case_rrw(name, type)                                           \
+           case jit_code_##name##i##type:                              \
+               name##i##type(rn(node->u.w), rn(node->v.w), node->w.w); \
+               break
+#define case_rrrw(name, type)                                          \
+           case jit_code_##name##i##type:                              \
+               name##i##type(rn(node->u.q.l), rn(node->u.q.h),         \
+                             rn(node->v.w), node->w.w);                \
+               break
+#define case_rrf(name, type, size)                                     \
+           case jit_code_##name##i##type:                              \
+               assert(node->flag & jit_flag_data);                     \
+               name##i##type(rn(node->u.w), rn(node->v.w),             \
+                             (jit_float##size##_t *)node->w.n->u.w);   \
+               break
+#define case_wrr(name, type)                                           \
+           case jit_code_##name##i##type:                              \
+               name##i##type(node->u.w, rn(node->v.w), rn(node->w.w)); \
+               break
+#define case_brr(name, type)                                           \
+           case jit_code_##name##r##type:                              \
+               temp = node->u.n;                                       \
+               assert(temp->code == jit_code_label ||                  \
+                      temp->code == jit_code_epilog);                  \
+               if (temp->flag & jit_flag_patch)                        \
+                   name##r##type(temp->u.w, rn(node->v.w),             \
+                                 rn(node->w.w));                       \
+               else {                                                  \
+                   word = name##r##type(_jit->pc.w,                    \
+                                        rn(node->v.w), rn(node->w.w)); \
+                   patch(word, node);                                  \
+               }                                                       \
+               break
+#define case_brw(name, type)                                           \
+           case jit_code_##name##i##type:                              \
+               temp = node->u.n;                                       \
+               assert(temp->code == jit_code_label ||                  \
+                      temp->code == jit_code_epilog);                  \
+               if (temp->flag & jit_flag_patch)                        \
+                   name##i##type(temp->u.w,                            \
+                                 rn(node->v.w), node->w.w);            \
+               else {                                                  \
+                   word = name##i##type(_jit->pc.w,                    \
+                                        rn(node->v.w), node->w.w);     \
+                   patch(word, node);                                  \
+               }                                                       \
+               break
+#define case_brf(name, type, size)                                     \
+           case jit_code_##name##i##type:                              \
+               temp = node->u.n;                                       \
+               assert(temp->code == jit_code_label ||                  \
+                      temp->code == jit_code_epilog);                  \
+               if (temp->flag & jit_flag_patch)                        \
+                   name##i##type(temp->u.w, rn(node->v.w),             \
+                               (jit_float##size##_t *)node->w.n->u.w); \
+               else {                                                  \
+                   word = name##i##type(_jit->pc.w, rn(node->v.w),     \
+                               (jit_float##size##_t *)node->w.n->u.w); \
+                   patch(word, node);                                  \
+               }                                                       \
+               break
+    for (node = _jitc->head; node; node = node->next) {
+       if (_jit->pc.uc >= _jitc->code.end)
+           return (NULL);
+
+       value = jit_classify(node->code);
+#if GET_JIT_SIZE
+       sync();
+#endif
+#if DEVEL_DISASSEMBLER
+       /* FIXME DEVEL_DISASSEMBLER should become DISASSEMBLER,
+        * but a "real" DEVEL_DISASSEMBLER should be required
+        * to turn the below "#if 0" into "#if 1" */
+#  if 0                /* Since disassembly outputs 3 instructions at a time,
+                * make it "#if 1" for more clear debug output. */
+       sync();
+#  endif
+       node->offset = (jit_uword_t)_jit->pc.w - (jit_uword_t)prevw;
+       prevw = _jit->pc.w;
+#endif
+       jit_regarg_set(node, value);
+       switch (node->code) {
+           case jit_code_align:
+               assert(!(node->u.w & (node->u.w - 1)) &&
+                      node->u.w <= sizeof(jit_word_t));
+               /* nothing done */
+               break;
+           case jit_code_note:         case jit_code_name:
+               sync();
+               node->u.w = _jit->pc.w;
+               break;
+           case jit_code_label:
+               sync();
+               /* remember label is defined */
+               node->flag |= jit_flag_patch;
+               node->u.w = _jit->pc.w;
+               break;
+               case_rrr(add,);
+               case_rrw(add,);
+               case_rrr(addc,);
+               case_rrw(addc,);
+               case_rrr(addx,);
+               case_rrw(addx,);
+               case_rrr(sub,);
+               case_rrw(sub,);
+               case_rrr(subx,);
+               case_rrw(subx,);
+               case_rrw(rsb,);
+               case_rrr(subc,);
+               case_rrw(subc,);
+               case_rrr(mul,);
+               case_rrw(mul,);
+               case_rrrr(qmul,);
+               case_rrrw(qmul,);
+               case_rrrr(qmul, _u);
+               case_rrrw(qmul, _u);
+               case_rrr(div,);
+               case_rrw(div,);
+               case_rrr(div, _u);
+               case_rrw(div, _u);
+               case_rrr(rem,);
+               case_rrw(rem,);
+               case_rrr(rem, _u);
+               case_rrw(rem, _u);
+               case_rrrr(qdiv,);
+               case_rrrw(qdiv,);
+               case_rrrr(qdiv, _u);
+               case_rrrw(qdiv, _u);
+               case_rrr(and,);
+               case_rrw(and,);
+               case_rrr(or,);
+               case_rrw(or,);
+               case_rrr(xor,);
+               case_rrw(xor,);
+               case_rrr(lsh,);
+               case_rrw(lsh,);
+               case_rrr(rsh,);
+               case_rrw(rsh,);
+               case_rrr(rsh, _u);
+               case_rrw(rsh, _u);
+               case_rr(neg,);
+               case_rr(com,);
+               case_rr(mov,);
+           case jit_code_movi:
+               if (node->flag & jit_flag_node) {
+                   temp = node->v.n;
+                   if (temp->code == jit_code_data ||
+                       (temp->code == jit_code_label &&
+                        (temp->flag & jit_flag_patch)))
+                       movi(rn(node->u.w), temp->u.w);
+                   else {
+                       assert(temp->code == jit_code_label ||
+                              temp->code == jit_code_epilog);
+                       word = movi_p(rn(node->u.w), node->v.w);
+                       patch(word, node);
+                   }
+               }
+               else
+                   movi(rn(node->u.w), node->v.w);
+               break;
+               case_rr(hton, _us);
+               case_rr(hton, _ui);
+               case_rr(hton, _ul);
+               case_rr(ext, _c);
+               case_rr(ext, _uc);
+               case_rr(ext, _s);
+               case_rr(ext, _us);
+               case_rr(ext, _i);
+               case_rr(ext, _ui);
+               case_rr(trunc, _f_i);
+               case_rr(trunc, _d_i);
+               case_rr(trunc, _f_l);
+               case_rr(trunc, _d_l);
+               case_rrr(lt,);
+               case_rrw(lt,);
+               case_rrr(lt, _u);
+               case_rrw(lt, _u);
+               case_rrr(le,);
+               case_rrw(le,);
+               case_rrr(le, _u);
+               case_rrw(le, _u);
+               case_rrr(eq,);
+               case_rrw(eq,);
+               case_rrr(ge,);
+               case_rrw(ge,);
+               case_rrr(ge, _u);
+               case_rrw(ge, _u);
+               case_rrr(gt,);
+               case_rrw(gt,);
+               case_rrr(gt, _u);
+               case_rrw(gt, _u);
+               case_rrr(ne,);
+               case_rrw(ne,);
+               case_rr(ld, _c);
+               case_rw(ld, _c);
+               case_rr(ld, _uc);
+               case_rw(ld, _uc);
+               case_rr(ld, _s);
+               case_rw(ld, _s);
+               case_rr(ld, _us);
+               case_rw(ld, _us);
+               case_rr(ld, _i);
+               case_rw(ld, _i);
+               case_rr(ld, _ui);
+               case_rw(ld, _ui);
+               case_rr(ld, _l);
+               case_rw(ld, _l);
+               case_rrr(ldx, _c);
+               case_rrw(ldx, _c);
+               case_rrr(ldx, _uc);
+               case_rrw(ldx, _uc);
+               case_rrr(ldx, _s);
+               case_rrw(ldx, _s);
+               case_rrr(ldx, _us);
+               case_rrw(ldx, _us);
+               case_rrr(ldx, _i);
+               case_rrw(ldx, _i);
+               case_rrr(ldx, _ui);
+               case_rrw(ldx, _ui);
+               case_rrr(ldx, _l);
+               case_rrw(ldx, _l);
+               case_rr(st, _c);
+               case_wr(st, _c);
+               case_rr(st, _s);
+               case_wr(st, _s);
+               case_rr(st, _i);
+               case_wr(st, _i);
+               case_rr(st, _l);
+               case_wr(st, _l);
+               case_rrr(stx, _c);
+               case_wrr(stx, _c);
+               case_rrr(stx, _s);
+               case_wrr(stx, _s);
+               case_rrr(stx, _i);
+               case_wrr(stx, _i);
+               case_rrr(stx, _l);
+               case_wrr(stx, _l);
+               case_brr(blt,);
+               case_brw(blt,);
+               case_brr(blt, _u);
+               case_brw(blt, _u);
+               case_brr(ble,);
+               case_brw(ble,);
+               case_brr(ble, _u);
+               case_brw(ble, _u);
+               case_brr(beq,);
+               case_brw(beq,);
+               case_brr(bge,);
+               case_brw(bge,);
+               case_brr(bge, _u);
+               case_brw(bge, _u);
+               case_brr(bgt,);
+               case_brw(bgt,);
+               case_brr(bgt, _u);
+               case_brw(bgt, _u);
+               case_brr(bne,);
+               case_brw(bne,);
+               case_brr(bms,);
+               case_brw(bms,);
+               case_brr(bmc,);
+               case_brw(bmc,);
+               case_brr(boadd,);
+               case_brw(boadd,);
+               case_brr(boadd, _u);
+               case_brw(boadd, _u);
+               case_brr(bxadd,);
+               case_brw(bxadd,);
+               case_brr(bxadd, _u);
+               case_brw(bxadd, _u);
+               case_brr(bosub,);
+               case_brw(bosub,);
+               case_brr(bosub, _u);
+               case_brw(bosub, _u);
+               case_brr(bxsub,);
+               case_brw(bxsub,);
+               case_brr(bxsub, _u);
+               case_brw(bxsub, _u);
+               case_rrr(add, _f);
+               case_rrf(add, _f, 32);
+               case_rrr(sub, _f);
+               case_rrf(sub, _f, 32);
+               case_rrf(rsb, _f, 32);
+               case_rrr(mul, _f);
+               case_rrf(mul, _f, 32);
+               case_rrr(div, _f);
+               case_rrf(div, _f, 32);
+               case_rr(ext, _f);
+               case_rr(abs, _f);
+               case_rr(neg, _f);
+               case_rr(sqrt, _f);
+               case_rr(ld, _f);
+               case_rw(ld, _f);
+               case_rrr(ldx, _f);
+               case_rrw(ldx, _f);
+               case_rr(st, _f);
+               case_wr(st, _f);
+               case_rrr(stx, _f);
+               case_wrr(stx, _f);
+               case_rr(mov, _f);
+           case jit_code_movi_f:
+               assert(node->flag & jit_flag_data);
+               movi_f(rn(node->u.w), (jit_float32_t *)node->v.n->u.w);
+               break;
+               case_rr(ext, _d_f);
+               case_rrr(lt, _f);
+               case_rrf(lt, _f, 32);
+               case_rrr(le, _f);
+               case_rrf(le, _f, 32);
+               case_rrr(eq, _f);
+               case_rrf(eq, _f, 32);
+               case_rrr(ge, _f);
+               case_rrf(ge, _f, 32);
+               case_rrr(gt, _f);
+               case_rrf(gt, _f, 32);
+               case_rrr(ne, _f);
+               case_rrf(ne, _f, 32);
+               case_rrr(unlt, _f);
+               case_rrf(unlt, _f, 32);
+               case_rrr(unle, _f);
+               case_rrf(unle, _f, 32);
+               case_rrr(uneq, _f);
+               case_rrf(uneq, _f, 32);
+               case_rrr(unge, _f);
+               case_rrf(unge, _f, 32);
+               case_rrr(ungt, _f);
+               case_rrf(ungt, _f, 32);
+               case_rrr(ltgt, _f);
+               case_rrf(ltgt, _f, 32);
+               case_rrr(ord, _f);
+               case_rrf(ord, _f, 32);
+               case_rrr(unord, _f);
+               case_rrf(unord, _f, 32);
+               case_brr(blt, _f);
+               case_brf(blt, _f, 32);
+               case_brr(ble, _f);
+               case_brf(ble, _f, 32);
+               case_brr(beq, _f);
+               case_brf(beq, _f, 32);
+               case_brr(bge, _f);
+               case_brf(bge, _f, 32);
+               case_brr(bgt, _f);
+               case_brf(bgt, _f, 32);
+               case_brr(bne, _f);
+               case_brf(bne, _f, 32);
+               case_brr(bunlt, _f);
+               case_brf(bunlt, _f, 32);
+               case_brr(bunle, _f);
+               case_brf(bunle, _f, 32);
+               case_brr(buneq, _f);
+               case_brf(buneq, _f, 32);
+               case_brr(bunge, _f);
+               case_brf(bunge, _f, 32);
+               case_brr(bungt, _f);
+               case_brf(bungt, _f, 32);
+               case_brr(bltgt, _f);
+               case_brf(bltgt, _f, 32);
+               case_brr(bord, _f);
+               case_brf(bord, _f, 32);
+               case_brr(bunord, _f);
+               case_brf(bunord, _f, 32);
+               case_rrr(add, _d);
+               case_rrf(add, _d, 64);
+               case_rrr(sub, _d);
+               case_rrf(sub, _d, 64);
+               case_rrf(rsb, _d, 64);
+               case_rrr(mul, _d);
+               case_rrf(mul, _d, 64);
+               case_rrr(div, _d);
+               case_rrf(div, _d, 64);
+               case_rr(ext, _d);
+               case_rr(abs, _d);
+               case_rr(neg, _d);
+               case_rr(sqrt, _d);
+               case_rr(ld, _d);
+               case_rw(ld, _d);
+               case_rrr(ldx, _d);
+               case_rrw(ldx, _d);
+               case_rr(st, _d);
+               case_wr(st, _d);
+               case_rrr(stx, _d);
+               case_wrr(stx, _d);
+               case_rr(mov, _d);
+           case jit_code_movi_d:
+               assert(node->flag & jit_flag_data);
+               movi_d(rn(node->u.w), (jit_float64_t *)node->v.n->u.w);
+               break;
+               case_rr(ext, _f_d);
+               case_rrr(lt, _d);
+               case_rrf(lt, _d, 64);
+               case_rrr(le, _d);
+               case_rrf(le, _d, 64);
+               case_rrr(eq, _d);
+               case_rrf(eq, _d, 64);
+               case_rrr(ge, _d);
+               case_rrf(ge, _d, 64);
+               case_rrr(gt, _d);
+               case_rrf(gt, _d, 64);
+               case_rrr(ne, _d);
+               case_rrf(ne, _d, 64);
+               case_rrr(unlt, _d);
+               case_rrf(unlt, _d, 64);
+               case_rrr(unle, _d);
+               case_rrf(unle, _d, 64);
+               case_rrr(uneq, _d);
+               case_rrf(uneq, _d, 64);
+               case_rrr(unge, _d);
+               case_rrf(unge, _d, 64);
+               case_rrr(ungt, _d);
+               case_rrf(ungt, _d, 64);
+               case_rrr(ltgt, _d);
+               case_rrf(ltgt, _d, 64);
+               case_rrr(ord, _d);
+               case_rrf(ord, _d, 64);
+               case_rrr(unord, _d);
+               case_rrf(unord, _d, 64);
+               case_brr(blt, _d);
+               case_brf(blt, _d, 64);
+               case_brr(ble, _d);
+               case_brf(ble, _d, 64);
+               case_brr(beq, _d);
+               case_brf(beq, _d, 64);
+               case_brr(bge, _d);
+               case_brf(bge, _d, 64);
+               case_brr(bgt, _d);
+               case_brf(bgt, _d, 64);
+               case_brr(bne, _d);
+               case_brf(bne, _d, 64);
+               case_brr(bunlt, _d);
+               case_brf(bunlt, _d, 64);
+               case_brr(bunle, _d);
+               case_brf(bunle, _d, 64);
+               case_brr(buneq, _d);
+               case_brf(buneq, _d, 64);
+               case_brr(bunge, _d);
+               case_brf(bunge, _d, 64);
+               case_brr(bungt, _d);
+               case_brf(bungt, _d, 64);
+               case_brr(bltgt, _d);
+               case_brf(bltgt, _d, 64);
+               case_brr(bord, _d);
+               case_brf(bord, _d, 64);
+               case_brr(bunord, _d);
+               case_brf(bunord, _d, 64);
+           case jit_code_jmpr:
+               jmpr(rn(node->u.w));
+               break;
+           case jit_code_jmpi:
+               if (node->flag & jit_flag_node) {
+                   if (_jit->pc.uc == _jit->code.ptr + 16)
+                       _jitc->jump = 1;
+                   temp = node->u.n;
+                   assert(temp->code == jit_code_label ||
+                          temp->code == jit_code_epilog);
+                   if (temp->flag & jit_flag_patch)
+                       jmpi(temp->u.w);
+                   else {
+                       word = jmpi_p(_jit->pc.w);
+                       patch(word, node);
+                   }
+               }
+               else
+                   jmpi(node->u.w);
+               break;
+           case jit_code_callr:
+               callr(rn(node->u.w));
+               break;
+           case jit_code_calli:
+               if (node->flag & jit_flag_node) {
+                   temp = node->u.n;
+                   assert(temp->code == jit_code_label ||
+                          temp->code == jit_code_epilog);
+                   word = calli_p(temp->u.w);
+                   if (!(temp->flag & jit_flag_patch))
+                       patch(word, node);
+               }
+               else
+                   calli(node->u.w);
+               break;
+           case jit_code_prolog:
+               jit_regset_set_ui(&_jitc->regs, 0);
+               _jitc->pred = 0;
+               sync();
+               _jitc->function = _jitc->functions.ptr + node->w.w;
+               undo.node = node;
+               undo.word = _jit->pc.w;
+#if DEVEL_DISASSEMBLER
+               undo.prevw = prevw;
+#endif
+               undo.patch_offset = _jitc->patches.offset;
+               undo.prolog_offset = _jitc->prolog.offset;
+           restart_function:
+               _jitc->again = 0;
+               if (_jitc->jump && !_jitc->function->assume_frame) {
+                   /* remember prolog to hide offset adjustment for a jump
+                    * to the start of a function, what is expected to be
+                    * a common practice as first jit instruction */
+                   if (_jitc->prolog.offset >= _jitc->prolog.length) {
+                       _jitc->prolog.length += 16;
+                       jit_realloc((jit_pointer_t *)&_jitc->prolog.ptr,
+                                   (_jitc->prolog.length - 16) *
+                                   sizeof(jit_word_t),
+                                   _jitc->prolog.length * sizeof(jit_word_t));
+                   }
+                   _jitc->prolog.ptr[_jitc->prolog.offset++] = _jit->pc.w;
+                   /* function descriptor */
+                   word = _jit->pc.w + 16;
+                   il(word);                   /* addr */
+                   il(0);                      /* gp */
+               }
+               prolog(node);
+               break;
+           case jit_code_epilog:
+               assert(_jitc->function == _jitc->functions.ptr + node->w.w);
+               if (_jitc->again) {
+                   for (temp = undo.node->next;
+                        temp != node; temp = temp->next) {
+                       if (temp->code == jit_code_label ||
+                           temp->code == jit_code_epilog)
+                           temp->flag &= ~jit_flag_patch;
+                   }
+                   temp->flag &= ~jit_flag_patch;
+                   node = undo.node;
+                   _jit->pc.w = undo.word;
+#if DEVEL_DISASSEMBLER
+                   prevw = undo.prevw;
+#endif
+                   _jitc->patches.offset = undo.patch_offset;
+                   _jitc->prolog.offset = undo.prolog_offset;
+                   _jitc->ioff = 0;
+                   jit_regset_set_ui(&_jitc->regs, 0);
+                   _jitc->pred = 0;
+                   goto restart_function;
+               }
+               /* remember label is defined */
+               sync();
+               node->flag |= jit_flag_patch;
+               node->u.w = _jit->pc.w;
+               epilog(node);
+               jit_regset_set_ui(&_jitc->regs, 0);
+               _jitc->pred = 0;
+               _jitc->function = NULL;
+               break;
+           case jit_code_va_start:
+               vastart(rn(node->u.w));
+               break;
+           case jit_code_va_arg:
+               vaarg(rn(node->u.w), rn(node->v.w));
+               break;
+           case jit_code_va_arg_d:
+               vaarg_d(rn(node->u.w), rn(node->v.w));
+               break;
+           case jit_code_live:
+           case jit_code_arg:                  case jit_code_ellipsis:
+           case jit_code_va_push:
+           case jit_code_allocai:              case jit_code_allocar:
+           case jit_code_arg_f:                case jit_code_arg_d:
+           case jit_code_va_end:
+           case jit_code_ret:
+           case jit_code_retr:                 case jit_code_reti:
+           case jit_code_retr_f:               case jit_code_reti_f:
+           case jit_code_retr_d:               case jit_code_reti_d:
+           case jit_code_getarg_c:             case jit_code_getarg_uc:
+           case jit_code_getarg_s:             case jit_code_getarg_us:
+           case jit_code_getarg_i:             case jit_code_getarg_ui:
+           case jit_code_getarg_l:
+           case jit_code_getarg_f:             case jit_code_getarg_d:
+           case jit_code_putargr:              case jit_code_putargi:
+           case jit_code_putargr_f:            case jit_code_putargi_f:
+           case jit_code_putargr_d:            case jit_code_putargi_d:
+           case jit_code_pushargr:             case jit_code_pushargi:
+           case jit_code_pushargr_f:           case jit_code_pushargi_f:
+           case jit_code_pushargr_d:           case jit_code_pushargi_d:
+           case jit_code_retval_c:             case jit_code_retval_uc:
+           case jit_code_retval_s:             case jit_code_retval_us:
+           case jit_code_retval_i:
+           case jit_code_retval_ui:            case jit_code_retval_l:
+           case jit_code_retval_f:             case jit_code_retval_d:
+           case jit_code_prepare:
+           case jit_code_finishr:              case jit_code_finishi:
+               break;
+           case jit_code_movr_w_f:
+               movr_w_f(rn(node->u.w), rn(node->v.w));
+               break;
+           case jit_code_movr_f_w:
+               movr_f_w(rn(node->u.w), rn(node->v.w));
+               break;
+           case jit_code_movr_w_d:
+               movr_w_d(rn(node->u.w), rn(node->v.w));
+               break;
+           case jit_code_movr_d_w:
+               movr_d_w(rn(node->u.w), rn(node->v.w));
+               break;
+           case jit_code_movi_f_w:
+               movi_f_w(rn(node->u.w), node->v.n->u.p);
+               break;
+           case jit_code_movi_d_w:
+               movi_d_w(rn(node->u.w), node->v.n->u.p);
+               break;
+           default:
+               abort();
+       }
+       if (jit_carry != _NOREG) {
+           switch (node->code) {
+               case jit_code_note:
+               case jit_code_addcr:            case jit_code_addci:
+               case jit_code_addxr:            case jit_code_addxi:
+               case jit_code_subcr:            case jit_code_subci:
+               case jit_code_subxr:            case jit_code_subxi:
+                   break;
+               default:
+                   jit_unget_reg(jit_carry);
+                   jit_carry = _NOREG;
+                   break;
+           }
+       }
+#if GET_JIT_SIZE
+       sync();
+#endif
+       jit_regarg_clr(node, value);
+       if (jit_regset_cmp_ui(&_jitc->regarg, 0) != 0) {
+           assert(jit_regset_scan1(&_jitc->regarg, 0) == jit_carry);
+           assert(jit_regset_scan1(&_jitc->regarg, jit_carry + 1) == ULONG_MAX);
+       }
+       assert(_jitc->synth == 0);
+       /* update register live state */
+       jit_reglive(node);
+    }
+    sync();
+#undef case_brf
+#undef case_brw
+#undef case_brr
+#undef case_wrr
+#undef case_rrrw
+#undef case_rrf
+#undef case_rrw
+#undef case_rrrr
+#undef case_rrr
+#undef case_wr
+#undef case_rw
+#undef case_rr
+
+    for (offset = 0; offset < _jitc->patches.offset; offset++) {
+       node = _jitc->patches.ptr[offset].node;
+       word = node->code == jit_code_movi ? node->v.n->u.w : node->u.n->u.w;
+       patch_at(node->code, _jitc->patches.ptr[offset].inst, word);
+    }
+
+    jit_flush(_jit->code.ptr, _jit->pc.uc);
+
+    return (_jit->code.ptr);
+}
+
+#define CODE                           1
+#  include "jit_ia64-cpu.c"
+#  include "jit_ia64-fpu.c"
+#undef CODE
+
+void
+jit_flush(void *fptr, void *tptr)
+{
+#if defined(__GNUC__)
+    jit_word_t         f, t, s;
+
+    s = sysconf(_SC_PAGE_SIZE);
+    f = (jit_word_t)fptr & -s;
+    t = (((jit_word_t)tptr) + s - 1) & -s;
+#  if 0
+    __clear_cache((void *)f, (void *)t);
+#  else
+    /* __clear_cache is a noop in (old?) gcc, but cache flush is
+     * required on a multi processor Linux system. */
+    for (s = f; s < t; s += 32)
+       asm volatile("fc %0" :: "r"(s) : "memory");
+#  endif
+#endif
+}
+
+/* Use r2 that is reserved to not require a jit_get_reg call, also note
+ * that addil needs a register that first in 2 bits, so, if using a
+ * register other than r2 must be less than r8 (or check for a smaller
+ * immediate, i.e. i0 >= -8192 && i0 <= 8191) */
+void
+_emit_ldxi(jit_state_t *_jit, jit_gpr_t r0, jit_gpr_t r1, jit_word_t i0)
+{
+    assert(i0 >= -2097152 && i0 < 2097151);
+    addi(GR_2, rn(r1), i0);
+    ldr(rn(r0), GR_2);
+}
+
+void
+_emit_stxi(jit_state_t *_jit, jit_word_t i0, jit_gpr_t r0, jit_gpr_t r1)
+{
+    assert(i0 >= -2097152 && i0 < 2097151);
+    addi(GR_2, rn(r0), i0);
+    str(GR_2, rn(r1));
+}
+
+void
+_emit_ldxi_d(jit_state_t *_jit, jit_fpr_t r0, jit_gpr_t r1, jit_word_t i0)
+{
+    assert(i0 >= -2097152 && i0 < 2097151);
+    addi(GR_2, rn(r1), i0);
+    ldr_d(rn(r0), GR_2);
+}
+
+void
+_emit_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_gpr_t r0, jit_fpr_t r1)
+{
+    assert(i0 >= -2097152 && i0 < 2097151);
+    addi(GR_2, rn(r0), i0);
+    str_d(GR_2, rn(r1));
+}
+
+static void
+_patch(jit_state_t *_jit, jit_word_t instr, jit_node_t *node)
+{
+    jit_int32_t                flag;
+
+    assert(node->flag & jit_flag_node);
+    if (node->code == jit_code_movi)
+       flag = node->v.n->flag;
+    else
+       flag = node->u.n->flag;
+    assert(!(flag & jit_flag_patch));
+    if (_jitc->patches.offset >= _jitc->patches.length) {
+       jit_realloc((jit_pointer_t *)&_jitc->patches.ptr,
+                   _jitc->patches.length * sizeof(jit_patch_t),
+                   (_jitc->patches.length + 1024) * sizeof(jit_patch_t));
+       _jitc->patches.length += 1024;
+    }
+    _jitc->patches.ptr[_jitc->patches.offset].inst = instr;
+    _jitc->patches.ptr[_jitc->patches.offset].node = node;
+    ++_jitc->patches.offset;
+}
diff --git a/deps/lightning/lib/jit_memory.c b/deps/lightning/lib/jit_memory.c
new file mode 100644 (file)
index 0000000..4d7f92d
--- /dev/null
@@ -0,0 +1,123 @@
+/*
+ * Copyright (C) 2013-2019  Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+#include <lightning.h>
+#include <lightning/jit_private.h>
+#include <sys/mman.h>
+
+/*
+ * Prototypes
+ */
+static void *jit_default_alloc_func(size_t);
+static void *jit_default_realloc_func(void*, size_t);
+static void jit_default_free_func(void *);
+
+/*
+ * Initialization
+ */
+static jit_alloc_func_ptr jit_alloc_ptr = jit_default_alloc_func;
+static jit_realloc_func_ptr jit_realloc_ptr = jit_default_realloc_func;
+static jit_free_func_ptr jit_free_ptr = jit_default_free_func;
+
+/*
+ * Implementation
+ */
+jit_pointer_t
+jit_memcpy(jit_pointer_t dst, const void * src, jit_word_t size)
+{
+    if (size)
+       return (memcpy(dst, src, size));
+    return (dst);
+}
+
+jit_pointer_t
+jit_memmove(jit_pointer_t dst, const void *src , jit_word_t size)
+{
+    if (size)
+       return (memmove(dst, src, size));
+    return (dst);
+}
+
+void
+jit_set_memory_functions(jit_alloc_func_ptr alloc_ptr,
+                        jit_realloc_func_ptr realloc_ptr,
+                        jit_free_func_ptr free_ptr)
+{
+    if (alloc_ptr == NULL)
+       alloc_ptr = jit_default_alloc_func;
+    if (realloc_ptr == NULL)
+       realloc_ptr = jit_default_realloc_func;
+    if (free_ptr == NULL)
+       free_ptr = jit_default_free_func;
+    jit_alloc_ptr = alloc_ptr;
+    jit_realloc_ptr = realloc_ptr;
+    jit_free_ptr = free_ptr;
+}
+
+void
+jit_get_memory_functions(jit_alloc_func_ptr *alloc_ptr,
+                        jit_realloc_func_ptr *realloc_ptr,
+                        jit_free_func_ptr *free_ptr)
+{
+    *alloc_ptr = jit_alloc_ptr;
+    *realloc_ptr = jit_realloc_ptr;
+    *free_ptr = jit_free_ptr;
+}
+
+void
+jit_alloc(jit_pointer_t *ptr, jit_word_t size)
+{
+    *ptr = (*jit_alloc_ptr)(size);
+    memset(*ptr, 0, size);
+}
+
+void
+jit_realloc(jit_pointer_t *ptr, jit_word_t old_size, jit_word_t new_size)
+{
+    *ptr = (*jit_realloc_ptr)(*ptr, new_size);
+    if (old_size < new_size)
+       memset((jit_int8_t*)*ptr + old_size, 0, new_size - old_size);
+}
+
+void
+jit_free(jit_pointer_t *ptr)
+{
+    if (*ptr) {
+       (*jit_free_ptr)(*ptr);
+       *ptr = NULL;
+    }
+}
+
+static void *
+jit_default_alloc_func(size_t size)
+{
+    return (malloc(size));
+}
+
+static void *
+jit_default_realloc_func(void *ptr, size_t size)
+{
+    return (realloc(ptr, size));
+}
+
+static void
+jit_default_free_func(void *ptr)
+{
+    free(ptr);
+}
diff --git a/deps/lightning/lib/jit_mips-cpu.c b/deps/lightning/lib/jit_mips-cpu.c
new file mode 100644 (file)
index 0000000..8fb7fa1
--- /dev/null
@@ -0,0 +1,3157 @@
+/*
+ * Copyright (C) 2012-2019  Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+#if PROTO
+typedef union {
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+    struct {   jit_uint32_t _:26;      jit_uint32_t b :  6; } hc;
+    struct {   jit_uint32_t _:21;      jit_uint32_t b :  5; } rs;
+    struct {   jit_uint32_t _:21;      jit_uint32_t b :  5; } fm;
+    struct {   jit_uint32_t _:16;      jit_uint32_t b :  5; } rt;
+    struct {   jit_uint32_t _:16;      jit_uint32_t b :  5; } ft;
+    struct {   jit_uint32_t _:11;      jit_uint32_t b :  5; } rd;
+    struct {   jit_uint32_t _:11;      jit_uint32_t b :  5; } fs;
+    struct {   jit_uint32_t _: 6;      jit_uint32_t b :  5; } ic;
+    struct {   jit_uint32_t _: 6;      jit_uint32_t b :  5; } fd;
+    struct {   jit_uint32_t _: 6;      jit_uint32_t b : 10; } tr;
+    struct {   jit_uint32_t _: 6;      jit_uint32_t b : 20; } br;
+    struct {                           jit_uint32_t b :  6; } tc;
+    struct {                           jit_uint32_t b : 11; } cc;
+    struct {                           jit_uint32_t b : 16; } is;
+    struct {                           jit_uint32_t b : 26; } ii;
+#else
+    struct {                           jit_uint32_t b :  6; } hc;
+    struct {   jit_uint32_t _: 6;      jit_uint32_t b :  5; } rs;
+    struct {   jit_uint32_t _: 6;      jit_uint32_t b :  5; } fm;
+    struct {   jit_uint32_t _:11;      jit_uint32_t b :  5; } rt;
+    struct {   jit_uint32_t _:11;      jit_uint32_t b :  5; } ft;
+    struct {   jit_uint32_t _:16;      jit_uint32_t b :  5; } rd;
+    struct {   jit_uint32_t _:16;      jit_uint32_t b :  5; } fs;
+    struct {   jit_uint32_t _:21;      jit_uint32_t b :  5; } ic;
+    struct {   jit_uint32_t _:21;      jit_uint32_t b :  5; } fd;
+    struct {   jit_uint32_t _:21;      jit_uint32_t b : 10; } tr;
+    struct {   jit_uint32_t _:21;      jit_uint32_t b : 20; } br;
+    struct {   jit_uint32_t _:26;      jit_uint32_t b :  6; } tc;
+    struct {   jit_uint32_t _:21;      jit_uint32_t b : 11; } cc;
+    struct {   jit_uint32_t _:16;      jit_uint32_t b : 16; } is;
+    struct {   jit_uint32_t _: 6;      jit_uint32_t b : 26; } ii;
+#endif
+    int                                        op;
+} jit_instr_t;
+/* FIXME */
+#  define jit_mips2_p()                        0
+#  define _ZERO_REGNO                  0
+#  define _T0_REGNO                    0x08
+#  define _T1_REGNO                    0x09
+#  define _T2_REGNO                    0x0a
+#  define _T3_REGNO                    0x0b
+#  define _T4_REGNO                    0x0c
+#  define _T5_REGNO                    0x0d
+#  define _T6_REGNO                    0x0e
+#  define _T7_REGNO                    0x0f
+#  define _S0_REGNO                    0x10
+#  define _S1_REGNO                    0x11
+#  define _S2_REGNO                    0x12
+#  define _S3_REGNO                    0x13
+#  define _S4_REGNO                    0x14
+#  define _S5_REGNO                    0x15
+#  define _S6_REGNO                    0x16
+#  define _S7_REGNO                    0x17
+#  define _T8_REGNO                    0x18
+#  define _T9_REGNO                    0x19
+#  define _SP_REGNO                    0x1d
+#  define _BP_REGNO                    0x1e
+#  define _RA_REGNO                    0x1f
+#  define _F16_REGNO                   16
+#  define _F18_REGNO                   18
+#  define _F20_REGNO                   20
+#  define _F22_REGNO                   22
+#  define _F24_REGNO                   24
+#  define _F26_REGNO                   26
+#  define _F28_REGNO                   28
+#  define _F30_REGNO                   30
+#  if __WORDSIZE == 32
+#    if NEW_ABI
+#      define stack_framesize          144
+#    else
+#      define stack_framesize          112
+#    endif
+#    define ldr(u,v)                   ldr_i(u,v)
+#    define ldi(u,v)                   ldi_i(u,v)
+#    define ldxi(u,v,w)                        ldxi_i(u,v,w)
+#    define sti(u,v)                   sti_i(u,v)
+#    define stxi(u,v,w)                        stxi_i(u,v,w)
+#  else
+#    define stack_framesize            144
+#    define ldr(u,v)                   ldr_l(u,v)
+#    define ldi(u,v)                   ldi_l(u,v)
+#    define ldxi(u,v,w)                        ldxi_l(u,v,w)
+#    define sti(u,v)                   sti_l(u,v)
+#    define stxi(u,v,w)                        stxi_l(u,v,w)
+#  endif
+#  define can_sign_extend_short_p(im)  ((im) >= -32678 && (im) <= 32767)
+#  define can_zero_extend_short_p(im)  ((im) >= 0 && (im) <= 65535)
+#  if __WORDSIZE == 32
+#    define can_sign_extend_int_p(im)  1
+#    define can_zero_extend_int_p(im)  1
+#  else
+#    define can_sign_extend_int_p(im)                                  \
+       (((im) >= 0 && (im) <=  0x7fffffffL) ||                         \
+        ((im) <  0 && (im) >= -0x80000000L))
+#    define can_zero_extend_int_p(im)  ((im) >= 0 && (im) <= 0xffffffff)
+#  endif
+#  define MIPS_SPECIAL                 0x00
+#  define MIPS_REGIMM                  0x01
+#  define MIPS_J                       0x02
+#  define MIPS_SRL                     0x02
+#  define MIPS_JAL                     0x03
+#  define MIPS_SRA                     0x03
+#  define MIPS_BEQ                     0x04
+#  define MIPS_BNE                     0x05
+#  define MIPS_BLEZ                    0x06
+#  define MIPS_BGTZ                    0x07
+#  define MIPS_ADDI                    0x08
+#  define MIPS_ADDIU                   0x09
+#  define MIPS_SLTI                    0x0a
+#  define MIPS_SLTIU                   0x0b
+#  define MIPS_ANDI                    0x0c
+#  define MIPS_ORI                     0x0d
+#  define MIPS_XORI                    0x0e
+#  define MIPS_LUI                     0x0f
+#  define MIPS_COP0                    0x10
+#  define MIPS_COP1                    0x11
+#  define MIPS_COP2                    0x12
+#  define MIPS_COP1X                   0x13
+#  define MIPS_BEQL                    0x14
+#  define MIPS_BNEL                    0x15
+#  define MIPS_BLEZL                   0x16
+#  define MIPS_BGTZL                   0x17
+#  define MIPS_DADDI                   0x18
+#  define MIPS_DADDIU                  0x19
+#  define MIPS_LDL                     0x1a
+#  define MIPS_LDR                     0x1b
+#  define MIPS_SPECIAL2                        0x1c
+#  define MIPS_JALX                    0x1d
+#  define MIPS_SPECIAL3                        0x1f
+#  define MIPS_LB                      0x20
+#  define MIPS_LH                      0x21
+#  define MIPS_LWL                     0x22
+#  define MIPS_LW                      0x23
+#  define MIPS_LBU                     0x24
+#  define MIPS_LHU                     0x25
+#  define MIPS_LWR                     0x26
+#  define MIPS_LWU                     0x27
+#  define MIPS_SB                      0x28
+#  define MIPS_SH                      0x29
+#  define MIPS_SWL                     0x2a
+#  define MIPS_SW                      0x2b
+#  define MIPS_SWR                     0x2e
+#  define MIPS_CACHE                   0x2f
+#  define MIPS_LL                      0x30
+#  define MIPS_LWC1                    0x31
+#  define MIPS_LWC2                    0x32
+#  define MIPS_PREF                    0x33
+#  define MIPS_LLD                     0x34
+#  define MIPS_LDC1                    0x35
+#  define MIPS_LDC2                    0x36
+#  define MIPS_LD                      0x37
+#  define MIPS_SC                      0x38
+#  define MIPS_SCD                     0x3c
+#  define MIPS_SDC1                    0x3d
+#  define MIPS_SDC2                    0x3e
+#  define MIPS_SWC1                    0x39
+#  define MIPS_SWC2                    0x3a
+#  define MIPS_SD                      0x3f
+#  define MIPS_MF                      0x00
+#  define MIPS_DMF                     0x01
+#  define MIPS_CF                      0x02
+#  define MIPS_MFH                     0x03
+#  define MIPS_MT                      0x04
+#  define MIPS_DMT                     0x05
+#  define MIPS_CT                      0x06
+#  define MIPS_MTH                     0x07
+#  define MIPS_BC                      0x08
+#  define MIPS_WRPGPR                  0x0e
+#  define MIPS_BGZAL                   0x11
+#  define MIPS_MFMC0                   0x11
+#  define MIPS_BCF                     0x00
+#  define MIPS_BLTZ                    0x00
+#  define MIPS_BCT                     0x01
+#  define MIPS_BGEZ                    0x01
+#  define MIPS_BCFL                    0x02
+#  define MIPS_BLTZL                   0x02
+#  define MIPS_BCTL                    0x03
+#  define MIPS_BGEZL                   0x03
+#  define MIPS_TGEI                    0x08
+#  define MIPS_TGEIU                   0x09
+#  define MIPS_TLTI                    0x0a
+#  define MIPS_TLTIU                   0x0b
+#  define MIPS_TEQI                    0x0c
+#  define MIPS_TNEI                    0x0e
+#  define MIPS_BLTZAL                  0x10
+#  define MIPS_BGEZAL                  0x11
+#  define MIPS_BLTZALL                 0x12
+#  define MIPS_BGEZALL                 0x13
+#  define MIPS_SYNCI                   0x1f
+#  define MIPS_WSBH                    0x02
+#  define MIPS_DBSH                    0x02
+#  define MIPS_DSHD                    0x05
+#  define MIPS_SEB                     0x10
+#  define MIPS_SEH                     0x18
+#  define MIPS_MADD                    0x00
+#  define MIPS_SLL                     0x00
+#  define MIPS_EXT                     0x00
+#  define MIPS_DEXTM                   0x01
+#  define MIPS_MADDU                   0x01
+#  define MIPS_MOVFT                   0x01
+#  define MIPS_TLBR                    0x01
+#  define MIPS_MUL                     0x02
+#  define MIPS_DEXTU                   0x02
+#  define MIPS_TLBWI                   0x02
+#  define MIPS_DEXT                    0x03
+#  define MIPS_SLLV                    0x04
+#  define MIPS_INS                     0x04
+#  define MIPS_MSUB                    0x04
+#  define MIPS_DINSM                   0x05
+#  define MIPS_MSUBU                   0x05
+#  define MIPS_SRLV                    0x06
+#  define MIPS_DINSU                   0x06
+#  define MIPS_TLBWR                   0x06
+#  define MIPS_SRAV                    0x07
+#  define MIPS_DINS                    0x07
+#  define MIPS_JR                      0x08
+#  define MIPS_TLBP                    0x08
+#  define MIPS_JALR                    0x09
+#  define MIPS_MOVZ                    0x0a
+#  define MIPS_MOVN                    0x0b
+#  define MIPS_SYSCALL                 0x0c
+#  define MIPS_BREAK                   0x0d
+#  define MIPS_PREFX                   0x0f
+#  define MIPS_SYNC                    0x0f
+#  define MIPS_MFHI                    0x10
+#  define MIPS_MTHI                    0x11
+#  define MIPS_MFLO                    0x12
+#  define MIPS_MTLO                    0x13
+#  define MIPS_DSLLV                   0x14
+#  define MIPS_DSRLV                   0x16
+#  define MIPS_DSRAV                   0x17
+#  define MIPS_MULT                    0x18
+#  define MIPS_ERET                    0x18
+#  define MIPS_MULTU                   0x19
+#  define MIPS_DIV                     0x1a
+#  define MIPS_DIVU                    0x1b
+#  define MIPS_DMULT                   0x1c
+#  define MIPS_DMULTU                  0x1d
+#  define MIPS_DDIV                    0x1e
+#  define MIPS_DDIVU                   0x1f
+#  define MIPS_DERET                   0x1f
+#  define MIPS_ADD                     0x20
+#  define MIPS_CLZ                     0x20
+#  define MIPS_BSHFL                   0x20
+#  define MIPS_ADDU                    0x21
+#  define MIPS_CLO                     0x21
+#  define MIPS_SUB                     0x22
+#  define MIPS_SUBU                    0x23
+#  define MIPS_AND                     0x24
+#  define MIPS_DCLZ                    0x24
+#  define MIPS_DBSHFL                  0x24
+#  define MIPS_OR                      0x25
+#  define MIPS_DCLO                    0x25
+#  define MIPS_XOR                     0x26
+#  define MIPS_NOR                     0x27
+#  define MIPS_SLT                     0x2a
+#  define MIPS_SLTU                    0x2b
+#  define MIPS_DADD                    0x2c
+#  define MIPS_DADDU                   0x2d
+#  define MIPS_DSUB                    0x2e
+#  define MIPS_DSUBU                   0x2f
+#  define MIPS_TGE                     0x30
+#  define MIPS_TGEU                    0x31
+#  define MIPS_TLT                     0x32
+#  define MIPS_TLTU                    0x33
+#  define MIPS_TEQ                     0x34
+#  define MIPS_TNE                     0x36
+#  define MIPS_DSLL                    0x38
+#  define MIPS_DSRL                    0x3a
+#  define MIPS_DSRA                    0x3b
+#  define MIPS_DSLL32                  0x3c
+#  define MIPS_DSRL32                  0x3e
+#  define MIPS_DSRA32                  0x3f
+#  define MIPS_SDBPP                   0x3f
+#  define ii(i)                                *_jit->pc.ui++ = i
+static void
+_hrrrit(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,
+       jit_int32_t,jit_int32_t);
+#  define hrrrit(hc,rs,rt,rd,im,tc)    _hrrrit(_jit,hc,rs,rt,rd,im,tc)
+#  define hrrr_t(hc,rs,rt,rd,tc)       hrrrit(hc,rs,rt,rd,0,tc)
+#  define rrr_t(rs,rt,rd,tc)           hrrr_t(0,rs,rt,rd,tc)
+#  define hrri(hc,rs,rt,im)            _hrri(_jit,hc,rs,rt,im)
+static void _hrri(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define hi(hc,im)                    _hi(_jit,hc,im)
+static void _hi(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define NOP(i0)                      ii(0)
+#  define nop(i0)                      _nop(_jit,i0)
+static void _nop(jit_state_t*,jit_int32_t);
+#  define h_ri(hc,rt,im)               _hrri(_jit,hc,0,rt,im)
+#  define rrit(rt,rd,im,tc)            _hrrrit(_jit,0,0,rt,rd,im,tc)
+#  define LUI(rt,im)                   h_ri(MIPS_LUI,rt,im)
+#  define ADDU(rd,rs,rt)               rrr_t(rs,rt,rd,MIPS_ADDU)
+#  define DADDU(rd,rs,rt)              rrr_t(rs,rt,rd,MIPS_DADDU)
+#  define ADDIU(rt,rs,im)              hrri(MIPS_ADDIU,rs,rt,im)
+#  define DADDIU(rt,rs,im)             hrri(MIPS_DADDIU,rs,rt,im)
+#  define SUBU(rd,rs,rt)               rrr_t(rs,rt,rd,MIPS_SUBU)
+#  define DSUBU(rd,rs,rt)              rrr_t(rs,rt,rd,MIPS_DSUBU)
+#  define MULT(rs,rt)                  rrr_t(rs,rt,_ZERO_REGNO,MIPS_MULT)
+#  define MULTU(rs,rt)                 rrr_t(rs,rt,_ZERO_REGNO,MIPS_MULTU)
+#  define DMULT(rs,rt)                 rrr_t(rs,rt,_ZERO_REGNO,MIPS_DMULT)
+#  define DMULTU(rs,rt)                        rrr_t(rs,rt,_ZERO_REGNO,MIPS_DMULTU)
+#  define DIV(rs,rt)                   rrr_t(rs,rt,_ZERO_REGNO,MIPS_DIV)
+#  define DIVU(rs,rt)                  rrr_t(rs,rt,_ZERO_REGNO,MIPS_DIVU)
+#  define DDIV(rs,rt)                  rrr_t(rs,rt,_ZERO_REGNO,MIPS_DDIV)
+#  define DDIVU(rs,rt)                 rrr_t(rs,rt,_ZERO_REGNO,MIPS_DDIVU)
+#  define SLLV(rd,rt,rs)               rrr_t(rs,rt,rd,MIPS_SLLV)
+#  define SLL(rd,rt,sa)                        rrit(rt,rd,sa,MIPS_SLL)
+#  define DSLLV(rd,rt,rs)              rrr_t(rs,rt,rd,MIPS_DSLLV)
+#  define DSLL(rd,rt,sa)               rrit(rt,rd,sa,MIPS_DSLL)
+#  define DSLL32(rd,rt,sa)             rrit(rt,rd,sa,MIPS_DSLL32)
+#  define SRAV(rd,rt,rs)               rrr_t(rs,rt,rd,MIPS_SRAV)
+#  define SRA(rd,rt,sa)                        rrit(rt,rd,sa,MIPS_SRA)
+#  define SRLV(rd,rt,rs)               rrr_t(rs,rt,rd,MIPS_SRLV)
+#  define SRL(rd,rt,sa)                        rrit(rt,rd,sa,MIPS_SRL)
+#  define DSRAV(rd,rt,rs)              rrr_t(rs,rt,rd,MIPS_DSRAV)
+#  define DSRA(rd,rt,sa)               rrit(rt,rd,sa,MIPS_DSRA)
+#  define DSRA32(rd,rt,sa)             rrit(rt,rd,sa,MIPS_DSRA32)
+#  define DSRLV(rd,rt,rs)              rrr_t(rs,rt,rd,MIPS_DSRLV)
+#  define DSRL(rd,rt,sa)               rrit(rt,rd,sa,MIPS_DSRL)
+#  define DSRL32(rd,rt,sa)             rrit(rt,rd,sa,MIPS_DSRL32)
+#  define INS(rt,rs,pos,size)          hrrrit(MIPS_SPECIAL3,rs,rt,pos,pos+size-1,MIPS_INS)
+#  define DINS(rt,rs,pos,size)         hrrrit(MIPS_SPECIAL3,rs,rt,pos,pos+size-1,MIPS_DINS)
+#  define ROTR(rd,rt,sa)               hrrrit(MIPS_SPECIAL,1,rt,rd,sa,MIPS_SRL)
+#  define DROTR(rd,rt,sa)              hrrrit(MIPS_SPECIAL,1,rt,rd,sa,MIPS_DSRL)
+#  define MFHI(rd)                     rrr_t(_ZERO_REGNO,_ZERO_REGNO,rd,MIPS_MFHI)
+#  define MFLO(rd)                     rrr_t(_ZERO_REGNO,_ZERO_REGNO,rd,MIPS_MFLO)
+#  define MTHI(rs)                     rrr_t(rs,_ZERO_REGNO,_ZERO_REGNO,MIPS_MTHI)
+#  define MTLO(rs)                     rrr_t(rs,_ZERO_REGNO,_ZERO_REGNO,MIPS_MTLO)
+#  define AND(rd,rs,rt)                        rrr_t(rs,rt,rd,MIPS_AND)
+#  define ANDI(rt,rs,im)               hrri(MIPS_ANDI,rs,rt,im)
+#  define OR(rd,rs,rt)                 rrr_t(rs,rt,rd,MIPS_OR)
+#  define ORI(rt,rs,im)                        hrri(MIPS_ORI,rs,rt,im)
+#  define XOR(rd,rs,rt)                        rrr_t(rs,rt,rd,MIPS_XOR)
+#  define XORI(rt,rs,im)               hrri(MIPS_XORI,rs,rt,im)
+#  define LB(rt,of,rb)                 hrri(MIPS_LB,rb,rt,of)
+#  define LBU(rt,of,rb)                        hrri(MIPS_LBU,rb,rt,of)
+#  define LH(rt,of,rb)                 hrri(MIPS_LH,rb,rt,of)
+#  define LHU(rt,of,rb)                        hrri(MIPS_LHU,rb,rt,of)
+#  define LW(rt,of,rb)                 hrri(MIPS_LW,rb,rt,of)
+#  define LWU(rt,of,rb)                        hrri(MIPS_LWU,rb,rt,of)
+#  define LD(rt,of,rb)                 hrri(MIPS_LD,rb,rt,of)
+#  define SB(rt,of,rb)                 hrri(MIPS_SB,rb,rt,of)
+#  define SH(rt,of,rb)                 hrri(MIPS_SH,rb,rt,of)
+#  define SW(rt,of,rb)                 hrri(MIPS_SW,rb,rt,of)
+#  define SD(rt,of,rb)                 hrri(MIPS_SD,rb,rt,of)
+#  define WSBH(rd,rt)                  hrrrit(MIPS_SPECIAL3,0,rt,rd,MIPS_WSBH,MIPS_BSHFL)
+#  define SEB(rd,rt)                   hrrrit(MIPS_SPECIAL3,0,rt,rd,MIPS_SEB,MIPS_BSHFL)
+#  define SEH(rd,rt)                   hrrrit(MIPS_SPECIAL3,0,rt,rd,MIPS_SEH,MIPS_BSHFL)
+#  define SLT(rd,rs,rt)                        rrr_t(rs,rt,rd,MIPS_SLT)
+#  define SLTU(rd,rs,rt)               rrr_t(rs,rt,rd,MIPS_SLTU)
+#  define SLTI(rt,rs,im)               hrri(MIPS_SLTI,rs,rt,im)
+#  define SLTIU(rt,rs,im)              hrri(MIPS_SLTIU,rs,rt,im)
+#  define BLTZ(rs,im)                  hrri(MIPS_REGIMM,rs,MIPS_BLTZ,im)
+#  define BLEZ(rs,im)                  hrri(MIPS_BLEZ,rs,_ZERO_REGNO,im)
+#  define BEQ(rs,rt,im)                        hrri(MIPS_BEQ,rs,rt,im)
+#  define BGEZ(rs,im)                  hrri(MIPS_REGIMM,rs,MIPS_BGEZ,im)
+#  define BGTZ(rs,im)                  hrri(MIPS_BGTZ,rs,_ZERO_REGNO,im)
+#  define BNE(rs,rt,im)                        hrri(MIPS_BNE,rs,rt,im)
+#  define JALR(r0)                     hrrrit(MIPS_SPECIAL,r0,0,_RA_REGNO,0,MIPS_JALR)
+#  if 1 /* supports MIPS32 R6 */
+#   define JR(r0)                      hrrrit(MIPS_SPECIAL,r0,0,0,0,MIPS_JALR)
+#  else /* does not support MIPS32 R6 */
+#   define JR(r0)                      hrrrit(MIPS_SPECIAL,r0,0,0,0,MIPS_JR)
+#  endif
+#  define J(i0)                                hi(MIPS_J,i0)
+#  define MOVZ(rd,rs,rt)               hrrrit(0,rs,rt,rd,0,MIPS_MOVZ)
+#  define comr(r0,r1)                  xori(r0,r1,-1)
+#  define negr(r0,r1)                  subr(r0,_ZERO_REGNO,r1)
+#  if __WORDSIZE == 32
+#    define addr(rd,rs,rt)             ADDU(rd,rs,rt)
+#    define addiu(r0,r1,i0)            ADDIU(r0,r1,i0)
+#    define subr(rd,rs,rt)             SUBU(rd,rs,rt)
+#    define mult(rs,rt)                        MULT(rs,rt)
+#    define multu(rs,rt)               MULTU(rs,rt)
+#    define div(rs,rt)                 DIV(rs,rt)
+#    define divu(rs,rt)                        DIVU(rs,rt)
+#  else
+#    define addr(rd,rs,rt)             DADDU(rd,rs,rt)
+#    define addiu(r0,r1,i0)            DADDIU(r0,r1,i0)
+#    define subr(rd,rs,rt)             DSUBU(rd,rs,rt)
+#    define mult(rs,rt)                        DMULT(rs,rt)
+#    define multu(rs,rt)               DMULTU(rs,rt)
+#    define div(rs,rt)                 DDIV(rs,rt)
+#    define divu(rs,rt)                        DDIVU(rs,rt)
+#  endif
+#  define addi(r0,r1,i0)               _addi(_jit,r0,r1,i0)
+static void _addi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define addcr(r0,r1,r2)                        _addcr(_jit,r0,r1,r2)
+static void _addcr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define addci(r0,r1,i0)                        _addci(_jit,r0,r1,i0)
+static void _addci(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define addxr(r0,r1,r2)              _addxr(_jit,r0,r1,r2)
+static void _addxr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define addxi(r0,r1,i0)              _addxi(_jit,r0,r1,i0)
+static void _addxi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define subi(r0,r1,i0)               _subi(_jit,r0,r1,i0)
+static void _subi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define subcr(r0,r1,r2)              _subcr(_jit,r0,r1,r2)
+static void _subcr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define subci(r0,r1,i0)              _subci(_jit,r0,r1,i0)
+static void _subci(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define subxr(r0,r1,r2)              _subxr(_jit,r0,r1,r2)
+static void _subxr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define subxi(r0,r1,i0)              _subxi(_jit,r0,r1,i0)
+static void _subxi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define rsbi(r0, r1, i0)             _rsbi(_jit, r0, r1, i0)
+static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define mulr(r0,r1,r2)               _mulr(_jit,r0,r1,r2)
+static void _mulr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define muli(r0,r1,i0)               _muli(_jit,r0,r1,i0)
+static void _muli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define qmulr(r0,r1,r2,r3)           iqmulr(r0,r1,r2,r3,1)
+#  define qmulr_u(r0,r1,r2,r3)         iqmulr(r0,r1,r2,r3,0)
+#  define iqmulr(r0,r1,r2,r3,cc)       _iqmulr(_jit,r0,r1,r2,r3,cc)
+static void _iqmulr(jit_state_t*,jit_int32_t,jit_int32_t,
+                   jit_int32_t,jit_int32_t,jit_bool_t);
+#  define qmuli(r0,r1,r2,i0)           iqmuli(r0,r1,r2,i0,1)
+#  define qmuli_u(r0,r1,r2,i0)         iqmuli(r0,r1,r2,i0,0)
+#  define iqmuli(r0,r1,r2,i0,cc)       _iqmuli(_jit,r0,r1,r2,i0,cc)
+static void _iqmuli(jit_state_t*,jit_int32_t,jit_int32_t,
+                   jit_int32_t,jit_word_t,jit_bool_t);
+#  define divr(r0,r1,r2)               _divr(_jit,r0,r1,r2)
+static void _divr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define divi(r0,r1,i0)               _divi(_jit,r0,r1,i0)
+static void _divi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define divr_u(r0,r1,r2)             _divr_u(_jit,r0,r1,r2)
+static void _divr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define divi_u(r0,r1,i0)             _divi_u(_jit,r0,r1,i0)
+static void _divi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define qdivr(r0,r1,r2,r3)           iqdivr(r0,r1,r2,r3,1)
+#  define qdivr_u(r0,r1,r2,r3)         iqdivr(r0,r1,r2,r3,0)
+#  define iqdivr(r0,r1,r2,r3,cc)       _iqdivr(_jit,r0,r1,r2,r3,cc)
+static void _iqdivr(jit_state_t*,jit_int32_t,jit_int32_t,
+                   jit_int32_t,jit_int32_t,jit_bool_t);
+#  define qdivi(r0,r1,r2,i0)           iqdivi(r0,r1,r2,i0,1)
+#  define qdivi_u(r0,r1,r2,i0)         iqdivi(r0,r1,r2,i0,0)
+#  define iqdivi(r0,r1,r2,i0,cc)       _iqdivi(_jit,r0,r1,r2,i0,cc)
+static void _iqdivi(jit_state_t*,jit_int32_t,jit_int32_t,
+                   jit_int32_t,jit_word_t,jit_bool_t);
+#  define remr(r0,r1,r2)               _remr(_jit,r0,r1,r2)
+static void _remr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define remi(r0,r1,i0)               _remi(_jit,r0,r1,i0)
+static void _remi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define remr_u(r0,r1,r2)             _remr_u(_jit,r0,r1,r2)
+static void _remr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define remi_u(r0,r1,i0)             _remi_u(_jit,r0,r1,i0)
+static void _remi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  if __WORDSIZE == 32
+#    define lshr(r0,r1,r2)             SLLV(r0,r1,r2)
+#    define lshi(r0,r1,i0)             SLL(r0,r1,i0)
+#    define rshr(r0,r1,r2)             SRAV(r0,r1,r2)
+#    define rshi(r0,r1,i0)             SRA(r0,r1,i0)
+#    define rshr_u(r0,r1,r2)           SRLV(r0,r1,r2)
+#    define rshi_u(r0,r1,i0)           SRL(r0,r1,i0)
+#  else
+#    define lshr(r0,r1,r2)             DSLLV(r0,r1,r2)
+#    define lshi(r0,r1,i0)             _lshi(_jit,r0,r1,i0)
+static void _lshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#    define rshr(r0,r1,r2)             DSRAV(r0,r1,r2)
+#    define rshi(r0,r1,i0)             _rshi(_jit,r0,r1,i0)
+static void _rshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#    define rshr_u(r0,r1,r2)           DSRLV(r0,r1,r2)
+#    define rshi_u(r0,r1,i0)           _rshi_u(_jit,r0,r1,i0)
+static void _rshi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  endif
+#  define andr(r0,r1,r2)               AND(r0,r1,r2)
+#  define andi(r0,r1,i0)               _andi(_jit,r0,r1,i0)
+static void _andi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define orr(r0,r1,r2)                        OR(r0,r1,r2)
+#  define ori(r0,r1,i0)                        _ori(_jit,r0,r1,i0)
+static void _ori(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define xorr(r0,r1,r2)               XOR(r0,r1,r2)
+#  define xori(r0,r1,i0)               _xori(_jit,r0,r1,i0)
+static void _xori(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define movr(r0,r1)                  orr(r0,r1,_ZERO_REGNO)
+#  define movi(r0,i0)                  _movi(_jit,r0,i0)
+static void _movi(jit_state_t*,jit_int32_t,jit_word_t);
+#  define movi_p(r0,i0)                        _movi_p(_jit,r0,i0)
+static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldr_c(r0,r1)                 LB(r0,0,r1)
+#  define ldi_c(r0,i0)                 _ldi_c(_jit,r0,i0)
+static void _ldi_c(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldr_uc(r0,r1)                        LBU(r0,0,r1)
+#  define ldi_uc(r0,i0)                        _ldi_uc(_jit,r0,i0)
+static void _ldi_uc(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldr_s(r0,r1)                 LH(r0,0,r1)
+#  define ldi_s(r0,i0)                 _ldi_s(_jit,r0,i0)
+static void _ldi_s(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldr_us(r0,r1)                        LHU(r0,0,r1)
+#  define ldi_us(r0,i0)                        _ldi_us(_jit,r0,i0)
+static void _ldi_us(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldr_i(r0,r1)                 LW(r0,0,r1)
+#  define ldi_i(r0,i0)                 _ldi_i(_jit,r0,i0)
+static void _ldi_i(jit_state_t*,jit_int32_t,jit_word_t);
+#  if __WORDSIZE == 64
+#    define ldr_ui(r0,r1)              LWU(r0,0,r1)
+#    define ldi_ui(r0,i0)              _ldi_ui(_jit,r0,i0)
+static void _ldi_ui(jit_state_t*,jit_int32_t,jit_word_t);
+#    define ldr_l(r0,r1)               LD(r0,0,r1)
+#    define ldi_l(r0,i0)               _ldi_l(_jit,r0,i0)
+static void _ldi_l(jit_state_t*,jit_int32_t,jit_word_t);
+#  endif
+#  define ldxr_c(r0,r1,r2)             _ldxr_c(_jit,r0,r1,r2)
+static void _ldxr_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxi_c(r0,r1,i0)             _ldxi_c(_jit,r0,r1,i0)
+static void _ldxi_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldxr_uc(r0,r1,r2)            _ldxr_uc(_jit,r0,r1,r2)
+static void _ldxr_uc(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxi_uc(r0,r1,i0)            _ldxi_uc(_jit,r0,r1,i0)
+static void _ldxi_uc(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldxr_s(r0,r1,r2)             _ldxr_s(_jit,r0,r1,r2)
+static void _ldxr_s(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxi_s(r0,r1,i0)             _ldxi_s(_jit,r0,r1,i0)
+static void _ldxi_s(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldxr_us(r0,r1,r2)            _ldxr_us(_jit,r0,r1,r2)
+static void _ldxr_us(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxi_us(r0,r1,i0)            _ldxi_us(_jit,r0,r1,i0)
+static void _ldxi_us(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldxr_i(r0,r1,r2)             _ldxr_i(_jit,r0,r1,r2)
+static void _ldxr_i(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxi_i(r0,r1,i0)             _ldxi_i(_jit,r0,r1,i0)
+static void _ldxi_i(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  if __WORDSIZE == 64
+#    define ldxr_ui(r0,r1,r2)          _ldxr_ui(_jit,r0,r1,r2)
+static void _ldxr_ui(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#    define ldxi_ui(r0,r1,i0)          _ldxi_ui(_jit,r0,r1,i0)
+static void _ldxi_ui(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#    define ldxr_l(r0,r1,r2)           _ldxr_l(_jit,r0,r1,r2)
+static void _ldxr_l(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#    define ldxi_l(r0,r1,i0)           _ldxi_l(_jit,r0,r1,i0)
+static void _ldxi_l(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  endif
+#  define str_c(r0,r1)                 SB(r1,0,r0)
+#  define sti_c(i0,r0)                 _sti_c(_jit,i0,r0)
+static void _sti_c(jit_state_t*,jit_word_t,jit_int32_t);
+#  define str_s(r0,r1)                 SH(r1,0,r0)
+#  define sti_s(i0,r0)                 _sti_s(_jit,i0,r0)
+static void _sti_s(jit_state_t*,jit_word_t,jit_int32_t);
+#  define str_i(r0,r1)                 SW(r1,0,r0)
+#  define sti_i(i0,r0)                 _sti_i(_jit,i0,r0)
+static void _sti_i(jit_state_t*,jit_word_t,jit_int32_t);
+#  if __WORDSIZE == 64
+#    define str_l(r0,r1)               SD(r1,0,r0)
+#    define sti_l(i0,r0)               _sti_l(_jit,i0,r0)
+static void _sti_l(jit_state_t*,jit_word_t,jit_int32_t);
+#  endif
+#  define stxr_c(r0,r1,r2)             _stxr_c(_jit,r0,r1,r2)
+static void _stxr_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define stxi_c(i0,r0,r1)             _stxi_c(_jit,i0,r0,r1)
+static void _stxi_c(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define stxr_s(r0,r1,r2)             _stxr_s(_jit,r0,r1,r2)
+static void _stxr_s(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define stxi_s(i0,r0,r1)             _stxi_s(_jit,i0,r0,r1)
+static void _stxi_s(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define stxr_i(r0,r1,r2)             _stxr_i(_jit,r0,r1,r2)
+static void _stxr_i(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define stxi_i(i0,r0,r1)             _stxi_i(_jit,i0,r0,r1)
+static void _stxi_i(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  if __WORDSIZE == 64
+#    define stxr_l(r0,r1,r2)           _stxr_l(_jit,r0,r1,r2)
+static void _stxr_l(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#    define stxi_l(i0,r0,r1)           _stxi_l(_jit,i0,r0,r1)
+static void _stxi_l(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  endif
+#  if __BYTE_ORDER == __LITTLE_ENDIAN
+#    define htonr_us(r0,r1)            _htonr_us(_jit,r0,r1)
+static void _htonr_us(jit_state_t*,jit_int32_t,jit_int32_t);
+#    define htonr_ui(r0,r1)            _htonr_ui(_jit,r0,r1)
+static void _htonr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
+#    if __WORDSIZE == 64
+#      define htonr_ul(r0,r1)          _htonr_ul(_jit,r0,r1)
+static void _htonr_ul(jit_state_t*,jit_int32_t,jit_int32_t);
+#    endif
+#  else
+#    define htonr_us(r0,r1)            extr_us(r0,r1)
+#    if __WORDSIZE == 32
+#      define htonr_ui(r0,r1)          movr(r0,r1)
+#    else
+#      define htonr_ui(r0,r1)          extr_ui(r0,r1)
+#      define htonr_ul(r0,r1)          movr(r0,r1)
+#    endif
+#  endif
+#  define extr_c(r0,r1)                        _extr_c(_jit,r0,r1)
+static void _extr_c(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define extr_uc(r0,r1)               ANDI(r0,r1,0xff)
+#  define extr_s(r0,r1)                        _extr_s(_jit,r0,r1)
+static void _extr_s(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define extr_us(r0,r1)               ANDI(r0,r1,0xffff)
+#  if __WORDSIZE == 64
+#    define extr_i(r0,r1)              SLL(r0,r1,0)
+#    define extr_ui(r0,r1)             _extr_ui(_jit,r0,r1)
+static void _extr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
+#  endif
+#  define ltr(r0,r1,r2)                        SLT(r0,r1,r2)
+#  define lti(r0,r1,i0)                        _lti(_jit,r0,r1,i0)
+static void _lti(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ltr_u(r0,r1,r2)              SLTU(r0,r1,r2)
+#  define lti_u(r0,r1,i0)              _lti_u(_jit,r0,r1,i0)
+static void _lti_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define ler(r0,r1,r2)                  _ler(_jit,r0,r1,r2)
+static void _ler(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define lei(r0,r1,i0)                  _lei(_jit,r0,r1,i0)
+static void _lei(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define ler_u(r0,r1,r2)                        _ler_u(_jit,r0,r1,r2)
+static void _ler_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define lei_u(r0,r1,i0)                        _lei_u(_jit,r0,r1,i0)
+static void _lei_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define eqr(r0,r1,r2)                  _eqr(_jit,r0,r1,r2)
+static void _eqr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define eqi(r0,r1,i0)                  _eqi(_jit,r0,r1,i0)
+static void _eqi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define ger(r0,r1,r2)                  _ger(_jit,r0,r1,r2)
+static void _ger(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define gei(r0,r1,i0)                  _gei(_jit,r0,r1,i0)
+static void _gei(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define ger_u(r0,r1,i0)                        _ger_u(_jit,r0,r1,i0)
+static void _ger_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define gei_u(r0,r1,i0)                        _gei_u(_jit,r0,r1,i0)
+static void _gei_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define gtr(r0,r1,r2)                        SLT(r0,r2,r1)
+#define gti(r0,r1,i0)                  _gti(_jit,r0,r1,i0)
+static void _gti(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define gtr_u(r0,r1,r2)              SLTU(r0,r2,r1)
+#  define gti_u(r0,r1,i0)              _gti_u(_jit,r0,r1,i0)
+static void _gti_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define ner(r0,r1,r2)                  _ner(_jit,r0,r1,r2)
+static void _ner(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define nei(r0,r1,i0)                  _nei(_jit,r0,r1,i0)
+static void _nei(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define bltr(i0,r0,r1)                 _bltr(_jit,i0,r0,r1)
+static jit_word_t _bltr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bltr_u(i0,r0,r1)               _bltr_u(_jit,i0,r0,r1)
+static jit_word_t _bltr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define blti(i0,r0,i1)                 _blti(_jit,i0,r0,i1)
+static jit_word_t _blti(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define blti_u(i0,r0,i1)               _blti_u(_jit,i0,r0,i1)
+static jit_word_t _blti_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bler(i0,r0,r1)                 _bler(_jit,i0,r0,r1)
+static jit_word_t _bler(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bler_u(i0,r0,r1)               _bler_u(_jit,i0,r0,r1)
+static jit_word_t _bler_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define blei(i0,r0,i1)                 _blei(_jit,i0,r0,i1)
+static jit_word_t _blei(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define blei_u(i0,r0,i1)               _blei_u(_jit,i0,r0,i1)
+static jit_word_t _blei_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define beqr(i0,r0,r1)                 _beqr(_jit,i0,r0,r1)
+static jit_word_t _beqr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define beqi(i0,r0,i1)                 _beqi(_jit,i0,r0,i1)
+static jit_word_t _beqi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bger(i0,r0,r1)                 _bger(_jit,i0,r0,r1)
+static jit_word_t _bger(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bger_u(i0,r0,r1)               _bger_u(_jit,i0,r0,r1)
+static jit_word_t _bger_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bgei(i0,r0,i1)                 _bgei(_jit,i0,r0,i1)
+static jit_word_t _bgei(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bgei_u(i0,r0,i1)               _bgei_u(_jit,i0,r0,i1)
+static jit_word_t _bgei_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bgtr(i0,r0,r1)                 _bgtr(_jit,i0,r0,r1)
+static jit_word_t _bgtr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bgtr_u(i0,r0,r1)               _bgtr_u(_jit,i0,r0,r1)
+static jit_word_t _bgtr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bgti(i0,r0,i1)                 _bgti(_jit,i0,r0,i1)
+static jit_word_t _bgti(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bgti_u(i0,r0,i1)               _bgti_u(_jit,i0,r0,i1)
+static jit_word_t _bgti_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bner(i0,r0,r1)                 _bner(_jit,i0,r0,r1)
+static jit_word_t _bner(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bnei(i0,r0,i1)                 _bnei(_jit,i0,r0,i1)
+static jit_word_t _bnei(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#  define jmpr(r0)                     _jmpr(_jit,r0)
+static void _jmpr(jit_state_t*,jit_int32_t);
+#  define jmpi(i0)                     _jmpi(_jit,i0)
+static jit_word_t _jmpi(jit_state_t*,jit_word_t);
+#  define boaddr(i0,r0,r1)             _boaddr(_jit,i0,r0,r1)
+static jit_word_t _boaddr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define boaddi(i0,r0,i1)             _boaddi(_jit,i0,r0,i1)
+static jit_word_t _boaddi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#  define boaddr_u(i0,r0,r1)           _boaddr_u(_jit,i0,r0,r1)
+static jit_word_t _boaddr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define boaddi_u(i0,r0,i1)           _boaddi_u(_jit,i0,r0,i1)
+static jit_word_t _boaddi_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#  define bxaddr(i0,r0,r1)             _bxaddr(_jit,i0,r0,r1)
+static jit_word_t _bxaddr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bxaddi(i0,r0,i1)             _bxaddi(_jit,i0,r0,i1)
+static jit_word_t _bxaddi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#  define bxaddr_u(i0,r0,r1)           _bxaddr_u(_jit,i0,r0,r1)
+static jit_word_t _bxaddr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bxaddi_u(i0,r0,i1)           _bxaddi_u(_jit,i0,r0,i1)
+static jit_word_t _bxaddi_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#  define bosubr(i0,r0,r1)             _bosubr(_jit,i0,r0,r1)
+static jit_word_t _bosubr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bosubi(i0,r0,i1)             _bosubi(_jit,i0,r0,i1)
+static jit_word_t _bosubi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#  define bosubr_u(i0,r0,r1)           _bosubr_u(_jit,i0,r0,r1)
+static jit_word_t _bosubr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bosubi_u(i0,r0,i1)           _bosubi_u(_jit,i0,r0,i1)
+static jit_word_t _bosubi_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#  define bxsubr(i0,r0,r1)             _bxsubr(_jit,i0,r0,r1)
+static jit_word_t _bxsubr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bxsubi(i0,r0,i1)             _bxsubi(_jit,i0,r0,i1)
+static jit_word_t _bxsubi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#  define bxsubr_u(i0,r0,r1)           _bxsubr_u(_jit,i0,r0,r1)
+static jit_word_t _bxsubr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bxsubi_u(i0,r0,i1)           _bxsubi_u(_jit,i0,r0,i1)
+static jit_word_t _bxsubi_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#  define bmsr(i0,r0,r1)               _bmsr(_jit,i0,r0,r1)
+static jit_word_t _bmsr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bmsi(i0,r0,i1)               _bmsi(_jit,i0,r0,i1)
+static jit_word_t _bmsi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#  define bmcr(i0,r0,r1)               _bmcr(_jit,i0,r0,r1)
+static jit_word_t _bmcr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bmci(i0,r0,i1)               _bmci(_jit,i0,r0,i1)
+static jit_word_t _bmci(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#  define callr(r0)                    _callr(_jit,r0)
+static void _callr(jit_state_t*,jit_int32_t);
+#  define calli(i0)                    _calli(_jit,i0)
+static void _calli(jit_state_t*,jit_word_t);
+#  define calli_p(i0)                  _calli_p(_jit,i0)
+static jit_word_t _calli_p(jit_state_t*,jit_word_t);
+#  define prolog(node)                 _prolog(_jit,node)
+static void _prolog(jit_state_t*,jit_node_t*);
+#  define epilog(node)                 _epilog(_jit,node)
+static void _epilog(jit_state_t*,jit_node_t*);
+#  define vastart(r0)                  _vastart(_jit, r0)
+static void _vastart(jit_state_t*, jit_int32_t);
+#  define vaarg(r0, r1)                        _vaarg(_jit, r0, r1)
+static void _vaarg(jit_state_t*, jit_int32_t, jit_int32_t);
+#define patch_abs(instr,label)         _patch_abs(_jit,instr,label)
+static void _patch_abs(jit_state_t*,jit_word_t,jit_word_t);
+#define patch_at(jump,label)           _patch_at(_jit,jump,label)
+static void _patch_at(jit_state_t*,jit_word_t,jit_word_t);
+#endif
+
+#if CODE
+static void
+_hrrrit(jit_state_t *_jit,jit_int32_t hc,
+       jit_int32_t rs, jit_int32_t rt, jit_int32_t rd,
+       jit_int32_t ic, jit_int32_t tc)
+{
+    jit_instr_t                i;
+    i.tc.b = tc;
+    i.ic.b = ic;
+    i.rd.b = rd;
+    i.rt.b = rt;
+    i.rs.b = rs;
+    i.hc.b = hc;
+    ii(i.op);
+}
+
+static void
+_hrri(jit_state_t *_jit, jit_int32_t hc,
+      jit_int32_t rs, jit_int32_t rt, jit_int32_t im)
+{
+    jit_instr_t                i;
+    i.op = 0;
+    i.is.b = im;
+    i.rt.b = rt;
+    i.rs.b = rs;
+    i.hc.b = hc;
+    ii(i.op);
+}
+
+static void
+_hi(jit_state_t *_jit, jit_int32_t hc, jit_int32_t im)
+{
+    jit_instr_t                i;
+    i.ii.b = im;
+    i.hc.b = hc;
+    ii(i.op);
+}
+
+static void
+_nop(jit_state_t *_jit, jit_int32_t i0)
+{
+    for (; i0 > 0; i0 -= 4)
+       NOP();
+    assert(i0 == 0);
+}
+
+static void
+_addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0 == 0)
+       movr(r0, r1);
+    else if (can_sign_extend_short_p(i0))
+       addiu(r0, r1, i0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       addr(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_addcr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                t0;
+
+    if (jit_carry == _NOREG)
+       jit_carry = jit_get_reg(jit_class_gpr);
+    if (r0 == r1) {
+       t0 = jit_get_reg(jit_class_gpr);
+       addr(rn(t0), r1, r2);
+       SLTU(rn(jit_carry), rn(t0), r1);
+       movr(r0, rn(t0));
+       jit_unget_reg(t0);
+    }
+    else {
+       addr(r0, r1, r2);
+       SLTU(rn(jit_carry), r0, r1);
+    }
+}
+
+static void
+_addci(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                t0;
+
+    if (jit_carry == _NOREG)
+       jit_carry = jit_get_reg(jit_class_gpr);
+    t0 = jit_get_reg(jit_class_gpr);
+    if (r0 == r1) {
+       if (can_sign_extend_short_p(i0))
+           addiu(rn(t0), r1, i0);
+       else {
+           movi(rn(t0), i0);
+           addr(rn(t0), r1, rn(t0));
+       }
+       SLTU(rn(jit_carry), rn(t0), r1);
+       movr(r0, rn(t0));
+    }
+    else {
+       if (can_sign_extend_short_p(i0))
+           addiu(r0, r1, i0);
+       else {
+           movi(rn(t0), i0);
+           addr(r0, r1, rn(t0));
+       }
+       SLTU(rn(jit_carry), r0, r1);
+    }
+    jit_unget_reg(t0);
+}
+
+static void
+_addxr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                t0;
+
+    assert(jit_carry != _NOREG);
+    t0 = jit_get_reg(jit_class_gpr);
+    movr(rn(t0), rn(jit_carry));
+    addcr(r0, r1, r2);
+    addcr(r0, r0, rn(t0));
+    jit_unget_reg(t0);
+}
+
+static void
+_addxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                t0;
+
+    assert(jit_carry != _NOREG);
+    t0 = jit_get_reg(jit_class_gpr);
+    movr(rn(t0), rn(jit_carry));
+    addci(r0, r1, i0);
+    addcr(r0, r0, rn(t0));
+    jit_unget_reg(t0);
+}
+
+static void
+_subi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0 == 0)
+       movr(r0, r1);
+    else if (can_sign_extend_short_p(i0) && (i0 & 0xffff) != 0x8000)
+       addiu(r0, r1, -i0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       subr(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_subcr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                t0;
+
+    if (jit_carry == _NOREG)
+       jit_carry = jit_get_reg(jit_class_gpr);
+    if (r0 == r1) {
+       t0 = jit_get_reg(jit_class_gpr);
+       subr(rn(t0), r1, r2);
+       SLTU(rn(jit_carry), r1, rn(t0));
+       movr(r0, rn(t0));
+       jit_unget_reg(t0);
+    }
+    else {
+       subr(r0, r1, r2);
+       SLTU(rn(jit_carry), r1, r0);
+    }
+}
+
+static void
+_subci(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                t0;
+
+    if (jit_carry == _NOREG)
+       jit_carry = jit_get_reg(jit_class_gpr);
+    t0 = jit_get_reg(jit_class_gpr);
+    if (r0 == r1) {
+       if (can_sign_extend_short_p(i0) && (i0 & 0xffff) != 0x8000)
+           addiu(rn(t0), r1, -i0);
+       else {
+           movi(rn(t0), i0);
+           subr(rn(t0), r1, rn(t0));
+       }
+       SLTU(rn(jit_carry), r1, rn(t0));
+       movr(r0, rn(t0));
+    }
+    else {
+       if (can_sign_extend_short_p(i0) && (i0 & 0xffff) != 0x8000)
+           addiu(r0, r1, -i0);
+       else {
+           movi(rn(t0), i0);
+           subr(r0, r1, rn(t0));
+       }
+       SLTU(rn(jit_carry), r1, r0);
+    }
+    jit_unget_reg(t0);
+}
+
+static void
+_subxr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                t0;
+
+    assert(jit_carry != _NOREG);
+    t0 = jit_get_reg(jit_class_gpr);
+    movr(rn(t0), rn(jit_carry));
+    subcr(r0, r1, r2);
+    subcr(r0, r0, rn(t0));
+    jit_unget_reg(t0);
+}
+
+static void
+_subxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                t0;
+
+    assert(jit_carry != _NOREG);
+    t0 = jit_get_reg(jit_class_gpr);
+    movr(rn(t0), rn(jit_carry));
+    subci(r0, r1, i0);
+    subcr(r0, r0, rn(t0));
+    jit_unget_reg(t0);
+}
+
+static void
+_rsbi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    subi(r0, r1, i0);
+    negr(r0, r0);
+}
+
+static void
+_mulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    multu(r1, r2);
+    MFLO(r0);
+}
+
+static void
+_muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    mulr(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_iqmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+       jit_int32_t r2, jit_int32_t r3, jit_bool_t sign)
+{
+    if (sign)
+       mult(r2, r3);
+    else
+       multu(r2, r3);
+    MFLO(r0);
+    MFHI(r1);
+}
+
+static void
+_iqmuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+       jit_int32_t r2, jit_word_t i0, jit_bool_t sign)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    iqmulr(r0, r1, r2, rn(reg), sign);
+    jit_unget_reg(reg);
+}
+
+static void
+_divr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    div(r1, r2);
+    MFLO(r0);
+}
+
+static void
+_divi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    divr(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_divr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    divu(r1, r2);
+    MFLO(r0);
+}
+
+static void
+_divi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    divr_u(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_iqdivr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+       jit_int32_t r2, jit_int32_t r3, jit_bool_t sign)
+{
+    if (sign)
+       div(r2, r3);
+    else
+       divu(r2, r3);
+    MFLO(r0);
+    MFHI(r1);
+}
+
+static void
+_iqdivi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+       jit_int32_t r2, jit_word_t i0, jit_bool_t sign)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    iqdivr(r0, r1, r2, rn(reg), sign);
+    jit_unget_reg(reg);
+}
+
+static void
+_remr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    div(r1, r2);
+    MFHI(r0);
+}
+
+static void
+_remi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    remr(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_remr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    divu(r1, r2);
+    MFHI(r0);
+}
+
+static void
+_remi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    remr_u(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+#if __WORDSIZE == 64
+static void
+_lshi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    assert(i0 >= 0 && i0 <= 63);
+    if (i0 < 32)
+       DSLL(r0, r1, i0);
+    else
+       DSLL32(r0, r1, i0 - 32);
+}
+
+static void
+_rshi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    assert(i0 >= 0 && i0 <= 63);
+    if (i0 < 32)
+       DSRA(r0, r1, i0);
+    else
+       DSRA32(r0, r1, i0 - 32);
+}
+
+static void
+_rshi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    assert(i0 >= 0 && i0 <= 63);
+    if (i0 < 32)
+       DSRL(r0, r1, i0);
+    else
+       DSRL32(r0, r1, i0 - 32);
+}
+#endif
+
+static void
+_andi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_zero_extend_short_p(i0))
+       ANDI(r0, r1, i0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       AND(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_zero_extend_short_p(i0))
+       ORI(r0, r1, i0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       OR(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_xori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_zero_extend_short_p(i0))
+       XORI(r0, r1, i0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       XOR(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_movi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    if (i0 == 0)
+       OR(r0, _ZERO_REGNO, _ZERO_REGNO);
+    else if (can_sign_extend_short_p(i0))
+       addiu(r0, _ZERO_REGNO, i0);
+    else if (can_zero_extend_short_p(i0))
+       ORI(r0, _ZERO_REGNO, i0);
+    else {
+       if (can_sign_extend_int_p(i0))
+           LUI(r0, i0 >> 16);
+       else if (can_zero_extend_int_p(i0)) {
+           if (i0 & 0xffff0000) {
+               ORI(r0, _ZERO_REGNO, i0 >> 16);
+               lshi(r0, r0, 16);
+           }
+       }
+#  if __WORDSIZE == 64
+       else {
+           movi(r0, (jit_uword_t)i0 >> 32);
+           if (i0 & 0xffff0000) {
+               lshi(r0, r0, 16);
+               ORI(r0, r0, i0 >> 16);
+               lshi(r0, r0, 16);
+           }
+           else
+               lshi(r0, r0, 32);
+       }
+#  endif
+       if (i0 & 0xffff)
+           ORI(r0, r0, i0);
+    }
+}
+
+static jit_word_t
+_movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_word_t         w;
+
+    w = _jit->pc.w;
+#  if __WORDSIZE == 32
+    LUI(r0, i0 >> 16);
+    ORI(r0, r0, i0);
+#  else
+    LUI(r0, i0 >> 48);
+    ORI(r0, r0, i0 >> 32);
+    lshi(r0, r0, 16);
+    ORI(r0, r0, i0 >> 16);
+    lshi(r0, r0, 16);
+    ORI(r0, r0, i0);
+#  endif
+
+    return (w);
+}
+
+static void
+_ldi_c(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0))
+       LB(r0, i0, _ZERO_REGNO);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldr_c(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldi_uc(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0))
+       LBU(r0, i0, _ZERO_REGNO);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldr_uc(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldi_s(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0))
+       LH(r0, i0, _ZERO_REGNO);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldr_s(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldi_us(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0))
+       LHU(r0, i0, _ZERO_REGNO);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldr_us(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldi_i(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0))
+       LW(r0, i0, _ZERO_REGNO);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldr_i(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+#if __WORDSIZE == 64
+static void
+_ldi_ui(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0))
+       LWU(r0, i0, _ZERO_REGNO);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldr_ui(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldi_l(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0))
+       LD(r0, i0, _ZERO_REGNO);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldr_l(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+#endif
+
+static void
+_ldxr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1 ,jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r1, r2);
+    ldr_c(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxi_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0))
+       LB(r0, i0, r1);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r1, i0);
+       ldr_c(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldxr_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1 ,jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r1, r2);
+    ldr_uc(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxi_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0))
+       LBU(r0, i0, r1);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r1, i0);
+       ldr_uc(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldxr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1 ,jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r1, r2);
+    ldr_s(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxi_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0))
+       LH(r0, i0, r1);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r1, i0);
+       ldr_s(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldxr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1 ,jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r1, r2);
+    ldr_us(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxi_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0))
+       LHU(r0, i0, r1);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r1, i0);
+       ldr_us(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldxr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1 ,jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r1, r2);
+    ldr_i(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxi_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0))
+       LW(r0, i0, r1);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r1, i0);
+       ldr_i(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+#if __WORDSIZE == 64
+static void
+_ldxr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1 ,jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r1, r2);
+    ldr_ui(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxi_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0))
+       LWU(r0, i0, r1);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r1, i0);
+       ldr_ui(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldxr_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1 ,jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r1, r2);
+    ldr_l(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxi_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0))
+       LD(r0, i0, r1);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r1, i0);
+       ldr_l(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+#endif
+
+static void
+_sti_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0))
+       SB(r0, i0, _ZERO_REGNO);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       str_c(rn(reg), r0);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_sti_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0))
+       SH(r0, i0, _ZERO_REGNO);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       str_s(rn(reg), r0);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_sti_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0))
+       SW(r0, i0, _ZERO_REGNO);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       str_i(rn(reg), r0);
+       jit_unget_reg(reg);
+    }
+}
+
+#if __WORDSIZE == 64
+static void
+_sti_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0))
+       SD(r0, i0, _ZERO_REGNO);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       str_l(rn(reg), r0);
+       jit_unget_reg(reg);
+    }
+}
+#endif
+
+static void
+_stxr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r0, r1);
+    str_c(rn(reg), r2);
+    jit_unget_reg(reg);
+}
+
+static void
+_stxi_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0))
+       SB(r1, i0, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r0, i0);
+       str_c(rn(reg), r1);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_stxr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1 ,jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r0, r1);
+    str_s(rn(reg), r2);
+    jit_unget_reg(reg);
+}
+
+static void
+_stxi_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0))
+       SH(r1, i0, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r0, i0);
+       str_s(rn(reg), r1);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_stxr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1 ,jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r0, r1);
+    str_i(rn(reg), r2);
+    jit_unget_reg(reg);
+}
+
+static void
+_stxi_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0))
+       SW(r1, i0, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r0, i0);
+       str_i(rn(reg), r1);
+       jit_unget_reg(reg);
+    }
+}
+
+#if __WORDSIZE == 64
+static void
+_stxr_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1 ,jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r0, r1);
+    str_l(rn(reg), r2);
+    jit_unget_reg(reg);
+}
+
+static void
+_stxi_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0))
+       SD(r1, i0, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r0, i0);
+       str_l(rn(reg), r1);
+       jit_unget_reg(reg);
+    }
+}
+#endif
+
+#  if __BYTE_ORDER == __LITTLE_ENDIAN
+static void
+_htonr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_gpr);
+    rshi(rn(t0), r1, 8);
+    andi(r0, r1, 0xff);
+    andi(rn(t0), rn(t0), 0xff);
+    lshi(r0, r0, 8);
+    orr(r0, r0, rn(t0));
+    jit_unget_reg(t0);
+}
+
+static void
+_htonr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                t0;
+    jit_int32_t                t1;
+    jit_int32_t                t2;
+    t0 = jit_get_reg(jit_class_gpr);
+    t1 = jit_get_reg(jit_class_gpr);
+    t2 = jit_get_reg(jit_class_gpr);
+    rshi(rn(t0), r1, 24);
+    rshi(rn(t1), r1, 16);
+    rshi(rn(t2), r1,  8);
+    andi(rn(t0), rn(t0), 0xff);
+    andi(rn(t1), rn(t1), 0xff);
+    andi(rn(t2), rn(t2), 0xff);
+    andi(r0, r1, 0xff);
+    lshi(r0, r0, 24);
+    lshi(rn(t1), rn(t1), 8);
+    orr(r0, r0, rn(t0));
+    lshi(rn(t2), rn(t2), 16);
+    orr(r0, r0, rn(t1));
+    orr(r0, r0, rn(t2));
+    jit_unget_reg(t2);
+    jit_unget_reg(t1);
+    jit_unget_reg(t0);
+}
+
+static void
+_htonr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    rshi_u(rn(reg), r1, 32);
+    htonr_ui(r0, r1);
+    htonr_ui(rn(reg), rn(reg));
+    lshi(r0, r0, 32);
+    orr(r0, r0, rn(reg));
+    jit_unget_reg(reg);
+}
+#  endif
+
+static void
+_extr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (jit_mips2_p())
+       SEB(r0, r1);
+    else {
+       lshi(r0, r1, __WORDSIZE - 8);
+       rshi(r0, r0, __WORDSIZE - 8);
+    }
+}
+
+static void
+_extr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (jit_mips2_p())
+       SEH(r0, r1);
+    else {
+       lshi(r0, r1, __WORDSIZE - 16);
+       rshi(r0, r0, __WORDSIZE - 16);
+    }
+}
+
+#  if __WORDSIZE == 64
+static void
+_extr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    lshi(r0, r1, 32);
+    rshi_u(r0, r0, 32);
+}
+#  endif
+
+static void
+_lti(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+
+    if (can_sign_extend_short_p(i0))
+       SLTI(r0, r1, i0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ltr(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_lti_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+
+    if (can_sign_extend_short_p(i0))
+       SLTIU(r0, r1, i0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ltr_u(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ler(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    SLT(r0, r2, r1);
+    XORI(r0, r0, 1);
+}
+
+static void
+_lei(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+
+    if (i0 == 0) {
+       SLT(r0, _ZERO_REGNO, r1);
+       XORI(r0, r0, 1);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ler(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ler_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    SLTU(r0, r2, r1);
+    XORI(r0, r0, 1);
+}
+
+static void
+_lei_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+
+    if (i0 == 0) {
+       SLTU(r0, _ZERO_REGNO, r1);
+       XORI(r0, r0, 1);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ler_u(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_eqr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    subr(r0, r1, r2);
+    SLTU(r0, _ZERO_REGNO, r0);
+    XORI(r0, r0, 1);
+}
+
+static void
+_eqi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    if (i0) {
+       subi(r0, r1, i0);
+       SLTU(r0, _ZERO_REGNO, r0);
+    }
+    else
+       SLTU(r0, _ZERO_REGNO, r1);
+    XORI(r0, r0, 1);
+}
+
+static void
+_ger(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    SLT(r0, r1, r2);
+    XORI(r0, r0, 1);
+}
+
+static void
+_gei(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    ger(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ger_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    SLTU(r0, r1, r2);
+    XORI(r0, r0, 1);
+}
+
+static void
+_gei_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    ger_u(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_gti(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+
+    if (i0 == 0)
+       SLT(r0, _ZERO_REGNO, r1);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       SLT(r0, rn(reg), r1);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_gti_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+
+    if (i0 == 0)
+       SLTU(r0, _ZERO_REGNO, r1);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       SLTU(r0, rn(reg), r1);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ner(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    subr(r0, r1, r2);
+    SLTU(r0, _ZERO_REGNO, r0);
+}
+
+static void
+_nei(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    if (i0) {
+       subi(r0, r1, i0);
+       SLTU(r0, _ZERO_REGNO, r0);
+    }
+    else
+       SLTU(r0, _ZERO_REGNO, r1);
+}
+
+static jit_word_t
+_bltr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+
+    reg = jit_get_reg(jit_class_gpr);
+    SLT(rn(reg), r0, r1);
+    w = _jit->pc.w;
+    BNE(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
+    NOP(1);
+    jit_unget_reg(reg);
+
+    return (w);
+}
+
+static jit_word_t
+_bltr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+
+    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    SLTU(rn(reg), r0, r1);
+    w = _jit->pc.w;
+    BNE(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
+    NOP(1);
+    jit_unget_reg(reg);
+
+    return (w);
+}
+
+static jit_word_t
+_blti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_word_t         d;
+    jit_int32_t                reg;
+    jit_bool_t         zero_p;
+
+    if (!(zero_p = i1 == 0))
+       reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    if (can_sign_extend_short_p(i1)) {
+       if (!zero_p)
+           SLTI(rn(reg), r0, i1);
+       w = _jit->pc.w;
+       d = ((i0 - w) >> 2) - 1;
+       if (!zero_p)
+           BNE(rn(reg), _ZERO_REGNO, d);
+       else
+           BLTZ(r0, d);
+       NOP(1);
+    }
+    else {
+       movi(rn(reg), i1);
+       w = bltr(i0, r0, rn(reg));
+    }
+    if (!zero_p)
+       jit_unget_reg(reg);
+
+    return (w);
+}
+
+static jit_word_t
+_blti_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+
+    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    if (can_sign_extend_short_p(i1)) {
+       SLTIU(rn(reg), r0, i1);
+       w = _jit->pc.w;
+       BNE(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
+       NOP(1);
+    }
+    else {
+       movi(rn(reg), i1);
+       w = bltr_u(i0, r0, rn(reg));
+    }
+    jit_unget_reg(reg);
+
+    return (w);
+}
+
+static jit_word_t
+_bler(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+
+    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    SLT(rn(reg), r1, r0);
+    w = _jit->pc.w;
+    BEQ(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
+    NOP(1);
+    jit_unget_reg(reg);
+
+    return (w);
+}
+
+static jit_word_t
+_bler_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+
+    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    SLTU(rn(reg), r1, r0);
+    w = _jit->pc.w;
+    BEQ(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
+    NOP(1);
+    jit_unget_reg(reg);
+
+    return (w);
+}
+
+static jit_word_t
+_blei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+
+    if (i1 == 0) {
+       w = _jit->pc.w;
+       BLEZ(r0, ((i0 - w) >> 2) - 1);
+       NOP(1);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+       movi(rn(reg), i1);
+       w = bler(i0, r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+
+    return (w);
+}
+
+static jit_word_t
+_blei_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+
+    if (i1 == 0) {
+       w = _jit->pc.w;
+       BEQ(r0, _ZERO_REGNO, ((i0 - w) >> 2) - 1);
+       NOP(1);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+       movi(rn(reg), i1);
+       w = bler_u(i0, r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+
+    return (w);
+}
+
+static jit_word_t
+_beqr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+
+    w = _jit->pc.w;
+    BEQ(r0, r1, ((i0 - w) >> 2) - 1);
+    NOP(1);
+
+    return (w);
+}
+
+static jit_word_t
+_beqi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+
+    if (i1 == 0) {
+       w = _jit->pc.w;
+       BEQ(r0, _ZERO_REGNO, ((i0 - w) >> 2) - 1);
+       NOP(1);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+       movi(rn(reg), i1);
+       w = beqr(i0, r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+
+    return (w);
+}
+
+static jit_word_t
+_bger(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+
+    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    SLT(rn(reg), r0, r1);
+    w = _jit->pc.w;
+    BEQ(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
+    NOP(1);
+    jit_unget_reg(reg);
+
+    return (w);
+}
+
+static jit_word_t
+_bger_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+
+    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    SLTU(rn(reg), r0, r1);
+    w = _jit->pc.w;
+    BEQ(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
+    NOP(1);
+    jit_unget_reg(reg);
+
+    return (w);
+}
+
+static jit_word_t
+_bgei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_word_t         d;
+    jit_int32_t                reg;
+    jit_bool_t         zero_p;
+
+    if (!(zero_p = i1 == 0))
+       reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    if (can_sign_extend_short_p(i1)) {
+       if (!zero_p)
+           SLTI(rn(reg), r0, i1);
+       w = _jit->pc.w;
+       d = ((i0 - w) >> 2) - 1;
+       if (!zero_p)
+           BEQ(rn(reg), _ZERO_REGNO, d);
+       else
+           BGEZ(r0, d);
+       NOP(1);
+    }
+    else {
+       movi(rn(reg), i1);
+       w = bger(i0, r0, rn(reg));
+    }
+    if (!zero_p)
+       jit_unget_reg(reg);
+
+    return (w);
+}
+
+static jit_word_t
+_bgei_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+
+    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    if (can_sign_extend_short_p(i1)) {
+       SLTIU(rn(reg), r0, i1);
+       w = _jit->pc.w;
+       BEQ(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
+       NOP(1);
+    }
+    else {
+       movi(rn(reg), i1);
+       w = bger_u(i0, r0, rn(reg));
+    }
+    jit_unget_reg(reg);
+
+    return (w);
+}
+
+static jit_word_t
+_bgtr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+
+    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    SLT(rn(reg), r1, r0);
+    w = _jit->pc.w;
+    BNE(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
+    NOP(1);
+    jit_unget_reg(reg);
+
+    return (w);
+}
+
+static jit_word_t
+_bgtr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+
+    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    SLTU(rn(reg), r1, r0);
+    w = _jit->pc.w;
+    BNE(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
+    NOP(1);
+    jit_unget_reg(reg);
+
+    return (w);
+}
+
+static jit_word_t
+_bgti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+
+    if (i1 == 0) {
+       w = _jit->pc.w;
+       BGTZ(r0, ((i0 - w) >> 2) - 1);
+       NOP(1);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+       movi(rn(reg), i1);
+       w = bgtr(i0, r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+
+    return (w);
+}
+
+static jit_word_t
+_bgti_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+
+    if (i1 == 0) {
+       w = _jit->pc.w;
+       BNE(r0, _ZERO_REGNO, ((i0 - w) >> 2) - 1);
+       NOP(1);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+       movi(rn(reg), i1);
+       w = bgtr_u(i0, r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+
+    return (w);
+}
+
+static jit_word_t
+_bner(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+
+    w = _jit->pc.w;
+    BNE(r0, r1, ((i0 - w) >> 2) - 1);
+    NOP(1);
+
+    return (w);
+}
+
+static jit_word_t
+_bnei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+
+    if (i1 == 0) {
+       w = _jit->pc.w;
+       BNE(r0, _ZERO_REGNO, ((i0 - w) >> 2) - 1);
+       NOP(1);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+       movi(rn(reg), i1);
+       w = bner(i0, r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+
+    return (w);
+}
+
+static void
+_jmpr(jit_state_t *_jit, jit_int32_t r0)
+{
+    JR(r0);
+    NOP(1);
+}
+
+static jit_word_t
+_jmpi(jit_state_t *_jit, jit_word_t i0)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+
+    w = _jit->pc.w;
+    if (((w + sizeof(jit_int32_t)) & 0xf0000000) == (i0 & 0xf0000000)) {
+       J((i0 & ~0xf0000000) >> 2);
+       NOP(1);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+       movi_p(rn(reg), i0);
+       jmpr(rn(reg));
+       jit_unget_reg(reg);
+    }
+
+    return (w);
+}
+
+static jit_word_t
+_boaddr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    jit_int32_t                t1;
+    jit_int32_t                t2;
+
+    /* t1 = r0 + r1;   overflow = r1 < 0 ? r0 < t1 : t1 < r0 */
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    t1 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    t2 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    SLT(rn(t0), r1, _ZERO_REGNO);      /* t0 = r1 < 0 */
+    addr(rn(t1), r0, r1);              /* t1 = r0 + r1 */
+    SLT(rn(t2), rn(t1), r0);           /* t2 = t1 < r0 */
+    SLT(rn(t1), r0, rn(t1));           /* t1 = r0 < t1 */
+    MOVZ(rn(t1), rn(t2), rn(t0));      /* if (r0 == 0) t1 = t2 */
+    w = _jit->pc.w;
+    BNE(rn(t1), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
+    /* delay slot */
+    addr(r0, r0, r1);
+    jit_unget_reg(t2);
+    jit_unget_reg(t1);
+    jit_unget_reg(t0);
+
+    return (w);
+}
+
+static jit_word_t
+_boaddi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    jit_int32_t                t1;
+    jit_int32_t                t2;
+
+    if (can_sign_extend_short_p(i1)) {
+       t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+       t1 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+       t2 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+       SLTI(rn(t0), _ZERO_REGNO, i1);
+       addiu(rn(t1), r0, i1);
+       SLT(rn(t2), r0, rn(t1));
+       SLT(rn(t1), rn(t1), r0);
+       MOVZ(rn(t1), rn(t2), rn(t0));
+       w = _jit->pc.w;
+       BNE(rn(t1), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
+       /* delay slot */
+       addiu(r0, r0, i1);
+       jit_unget_reg(t2);
+       jit_unget_reg(t1);
+       jit_unget_reg(t0);
+    }
+    else {
+       t0 = jit_get_reg(jit_class_gpr);
+       movi(rn(t0), i1);
+       w = boaddr(i0, r0, rn(t0));
+       jit_unget_reg(t0);
+    }
+    return (w);
+}
+
+static jit_word_t
+_boaddr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    jit_int32_t                t1;
+
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    t1 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    addr(rn(t0), r0, r1);
+    SLTU(rn(t1), rn(t0), r0);
+    w = _jit->pc.w;
+    BNE(_ZERO_REGNO, rn(t1), ((i0 - w) >> 2) - 1);
+    /* delay slot */
+    movr(r0, rn(t0));
+    jit_unget_reg(t1);
+    jit_unget_reg(t0);
+    return (w);
+}
+
+static jit_word_t
+_boaddi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    jit_int32_t                t1;
+
+    if (can_sign_extend_short_p(i0)) {
+       t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+       t1 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+       addiu(rn(t0), r0, i1);
+       SLTU(rn(t1), rn(t0), r0);
+       w = _jit->pc.w;
+       BNE(_ZERO_REGNO, rn(t1), ((i0 - w) >> 2) - 1);
+       /* delay slot */
+       movr(r0, rn(t0));
+       jit_unget_reg(t1);
+       jit_unget_reg(t0);
+    }
+    else {
+       t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+       movi(rn(t0), i1);
+       w = boaddr_u(i0, r0, rn(t0));
+       jit_unget_reg(t0);
+    }
+    return (w);
+}
+
+static jit_word_t
+_bxaddr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    jit_int32_t                t1;
+    jit_int32_t                t2;
+
+    /* t1 = r0 + r1;   overflow = r1 < 0 ? r0 < t1 : t1 < r0 */
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    t1 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    t2 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    SLT(rn(t0), r1, _ZERO_REGNO);      /* t0 = r1 < 0 */
+    addr(rn(t1), r0, r1);              /* t1 = r0 + r1 */
+    SLT(rn(t2), rn(t1), r0);           /* t2 = t1 < r0 */
+    SLT(rn(t1), r0, rn(t1));           /* t1 = r0 < t1 */
+    MOVZ(rn(t1), rn(t2), rn(t0));      /* if (r0 == 0) t1 = t2 */
+    w = _jit->pc.w;
+    BEQ(rn(t1), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
+    /* delay slot */
+    addr(r0, r0, r1);
+    jit_unget_reg(t2);
+    jit_unget_reg(t1);
+    jit_unget_reg(t0);
+
+    return (w);
+}
+
+static jit_word_t
+_bxaddi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    jit_int32_t                t1;
+    jit_int32_t                t2;
+
+    if (can_sign_extend_short_p(i1)) {
+       t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+       t1 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+       t2 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+       SLTI(rn(t0), _ZERO_REGNO, i1);
+       addiu(rn(t1), r0, i1);
+       SLT(rn(t2), r0, rn(t1));
+       SLT(rn(t1), rn(t1), r0);
+       MOVZ(rn(t1), rn(t2), rn(t0));
+       w = _jit->pc.w;
+       BEQ(rn(t1), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
+       /* delay slot */
+       addiu(r0, r0, i1);
+       jit_unget_reg(t2);
+       jit_unget_reg(t1);
+       jit_unget_reg(t0);
+    }
+    else {
+       t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+       movi(rn(t0), i1);
+       w = bxaddr(i0, r0, rn(t0));
+       jit_unget_reg(t0);
+    }
+    return (w);
+}
+
+static jit_word_t
+_bxaddr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    jit_int32_t                t1;
+
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    t1 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    addr(rn(t0), r0, r1);
+    SLTU(rn(t1), rn(t0), r0);
+    w = _jit->pc.w;
+    BEQ(_ZERO_REGNO, rn(t1), ((i0 - w) >> 2) - 1);
+    /* delay slot */
+    movr(r0, rn(t0));
+    jit_unget_reg(t1);
+    jit_unget_reg(t0);
+    return (w);
+}
+
+static jit_word_t
+_bxaddi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    jit_int32_t                t1;
+
+    if (can_sign_extend_short_p(i0)) {
+       t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+       t1 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+       addiu(rn(t0), r0, i1);
+       SLTU(rn(t1), rn(t0), r0);
+       w = _jit->pc.w;
+       BEQ(_ZERO_REGNO, rn(t1), ((i0 - w) >> 2) - 1);
+       /* delay slot */
+       movr(r0, rn(t0));
+       jit_unget_reg(t1);
+       jit_unget_reg(t0);
+    }
+    else {
+       t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+       movi(rn(t0), i1);
+       w = bxaddr_u(i0, r0, rn(t0));
+       jit_unget_reg(t0);
+    }
+    return (w);
+}
+
+static jit_word_t
+_bosubr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    jit_int32_t                t1;
+    jit_int32_t                t2;
+
+    /* t1 = r0 - r1;   overflow = 0 < r1 ? r0 < t1 : t1 < r0 */
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    t1 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    t2 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    SLT(rn(t0), _ZERO_REGNO, r1);      /* t0 = 0 < r1 */
+    subr(rn(t1), r0, r1);              /* t1 = r0 - r1 */
+    SLT(rn(t2), rn(t1), r0);           /* t2 = t1 < r0 */
+    SLT(rn(t1), r0, rn(t1));           /* t1 = r0 < t1 */
+    MOVZ(rn(t1), rn(t2), rn(t0));      /* if (r0 == 0) t1 = t2 */
+    w = _jit->pc.w;
+    BNE(rn(t1), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
+    /* delay slot */
+    subr(r0, r0, r1);
+    jit_unget_reg(t2);
+    jit_unget_reg(t1);
+    jit_unget_reg(t0);
+
+    return (w);
+}
+
+static jit_word_t
+_bosubi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    jit_int32_t                t1;
+    jit_int32_t                t2;
+
+    if (can_sign_extend_short_p(i1) && (i1 & 0xffff) != 0x8000) {
+       t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+       t1 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+       t2 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+       SLTI(rn(t0), _ZERO_REGNO, i1);
+       addiu(rn(t1), r0, -i1);
+       SLT(rn(t2), rn(t1), r0);
+       SLT(rn(t1), r0, rn(t1));
+       MOVZ(rn(t1), rn(t2), rn(t0));
+       w = _jit->pc.w;
+       BNE(rn(t1), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
+       /* delay slot */
+       addiu(r0, r0, -i1);
+       jit_unget_reg(t2);
+       jit_unget_reg(t1);
+       jit_unget_reg(t0);
+    }
+    else {
+       t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+       movi(rn(t0), i1);
+       w = bosubr(i0, r0, rn(t0));
+       jit_unget_reg(t0);
+    }
+    return (w);
+}
+
+static jit_word_t
+_bosubr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    jit_int32_t                t1;
+
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    t1 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    subr(rn(t0), r0, r1);
+    SLTU(rn(t1), r0, rn(t0));
+    w = _jit->pc.w;
+    BNE(_ZERO_REGNO, rn(t1), ((i0 - w) >> 2) - 1);
+    /* delay slot */
+    movr(r0, rn(t0));
+    jit_unget_reg(t1);
+    jit_unget_reg(t0);
+    return (w);
+}
+
+static jit_word_t
+_bosubi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    jit_int32_t                t1;
+
+    if (can_sign_extend_short_p(i0) && (i0 & 0xffff) != 0x8000) {
+       t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+       t1 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+       addiu(rn(t0), r0, -i1);
+       SLTU(rn(t1), r0, rn(t0));
+       w = _jit->pc.w;
+       BNE(_ZERO_REGNO, rn(t1), ((i0 - w) >> 2) - 1);
+       /* delay slot */
+       movr(r0, rn(t0));
+       jit_unget_reg(t1);
+       jit_unget_reg(t0);
+    }
+    else {
+       t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+       movi(rn(t0), i1);
+       w = bosubr_u(i0, r0, rn(t0));
+       jit_unget_reg(t0);
+    }
+    return (w);
+}
+
+static jit_word_t
+_bxsubr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    jit_int32_t                t1;
+    jit_int32_t                t2;
+
+    /* t1 = r0 - r1;   overflow = 0 < r1 ? r0 < t1 : t1 < r0 */
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    t1 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    t2 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    SLT(rn(t0), _ZERO_REGNO, r1);      /* t0 = 0 < r1 */
+    subr(rn(t1), r0, r1);              /* t1 = r0 - r1 */
+    SLT(rn(t2), rn(t1), r0);           /* t2 = t1 < r0 */
+    SLT(rn(t1), r0, rn(t1));           /* t1 = r0 < t1 */
+    MOVZ(rn(t1), rn(t2), rn(t0));      /* if (t0 == 0) t1 = t2 */
+    w = _jit->pc.w;
+    BEQ(rn(t1), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
+    /* delay slot */
+    subr(r0, r0, r1);
+    jit_unget_reg(t2);
+    jit_unget_reg(t1);
+    jit_unget_reg(t0);
+
+    return (w);
+}
+
+static jit_word_t
+_bxsubi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    jit_int32_t                t1;
+    jit_int32_t                t2;
+
+    if (can_sign_extend_short_p(i1) && (i1 & 0xffff) != 0x8000) {
+       t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+       t1 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+       t2 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+       SLTI(rn(t0), _ZERO_REGNO, i1);
+       addiu(rn(t1), r0, -i1);
+       SLT(rn(t2), rn(t1), r0);
+       SLT(rn(t1), r0, rn(t1));
+       MOVZ(rn(t1), rn(t2), rn(t0));
+       w = _jit->pc.w;
+       BEQ(rn(t1), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
+       /* delay slot */
+       addiu(r0, r0, -i1);
+       jit_unget_reg(t2);
+       jit_unget_reg(t1);
+       jit_unget_reg(t0);
+    }
+    else {
+       t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+       movi(rn(t0), i1);
+       w = bxsubr(i0, r0, rn(t0));
+       jit_unget_reg(t0);
+    }
+    return (w);
+}
+
+static jit_word_t
+_bxsubr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    jit_int32_t                t1;
+
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    t1 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    subr(rn(t0), r0, r1);
+    SLTU(rn(t1), r0, rn(t0));
+    w = _jit->pc.w;
+    BEQ(_ZERO_REGNO, rn(t1), ((i0 - w) >> 2) - 1);
+    /* delay slot */
+    movr(r0, rn(t0));
+    jit_unget_reg(t1);
+    jit_unget_reg(t0);
+    return (w);
+}
+
+static jit_word_t
+_bxsubi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    jit_int32_t                t1;
+
+    if (can_sign_extend_short_p(i0) && (i0 & 0xffff) != 0x8000) {
+       t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+       t1 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+       addiu(rn(t0), r0, -i1);
+       SLTU(rn(t1), r0, rn(t0));
+       w = _jit->pc.w;
+       BEQ(_ZERO_REGNO, rn(t1), ((i0 - w) >> 2) - 1);
+       /* delay slot */
+       movr(r0, rn(t0));
+       jit_unget_reg(t1);
+       jit_unget_reg(t0);
+    }
+    else {
+       t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+       movi(rn(t0), i1);
+       w = bxsubr_u(i0, r0, rn(t0));
+       jit_unget_reg(t0);
+    }
+    return (w);
+}
+
+static jit_word_t
+_bmsr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    AND(rn(t0), r0, r1);
+    w = _jit->pc.w;
+    BNE(_ZERO_REGNO, rn(t0), ((i0 - w) >> 2) - 1);
+    NOP(1);
+    jit_unget_reg(t0);
+    return (w);
+}
+
+static jit_word_t
+_bmsi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    if (can_zero_extend_short_p(i1)) {
+       ANDI(rn(t0), r0, i1);
+       w = _jit->pc.w;
+       BNE(_ZERO_REGNO, rn(t0), ((i0 - w) >> 2) - 1);
+       NOP(1);
+    }
+    else {
+       movi(rn(t0), i1);
+       w = bmsr(i0, r0, rn(t0));
+    }
+    jit_unget_reg(t0);
+    return (w);
+}
+
+static jit_word_t
+_bmcr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    AND(rn(t0), r0, r1);
+    w = _jit->pc.w;
+    BEQ(_ZERO_REGNO, rn(t0), ((i0 - w) >> 2) - 1);
+    NOP(1);
+    jit_unget_reg(t0);
+    return (w);
+}
+
+static jit_word_t
+_bmci(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    if (can_zero_extend_short_p(i1)) {
+       ANDI(rn(t0), r0, i1);
+       w = _jit->pc.w;
+       BEQ(_ZERO_REGNO, rn(t0), ((i0 - w) >> 2) - 1);
+       NOP(1);
+    }
+    else {
+       movi(rn(t0), i1);
+       w = bmcr(i0, r0, rn(t0));
+    }
+    jit_unget_reg(t0);
+    return (w);
+}
+
+static void
+_callr(jit_state_t *_jit, jit_int32_t r0)
+{
+    if (r0 != _T9_REGNO)
+       movr(_T9_REGNO, r0);
+    JALR(r0);
+    NOP(1);
+}
+
+static void
+_calli(jit_state_t *_jit, jit_word_t i0)
+{
+    movi(_T9_REGNO, i0);
+    JALR(_T9_REGNO);
+    NOP(1);
+}
+
+static jit_word_t
+_calli_p(jit_state_t *_jit, jit_word_t i0)
+{
+    jit_word_t         word;
+
+    word = _jit->pc.w;
+    movi_p(_T9_REGNO, i0);
+    JALR(_T9_REGNO);
+    NOP(1);
+
+    return (word);
+}
+
+static jit_int32_t fregs[] = {
+    _F30, _F28, _F26, _F24, _F22, _F20,
+#if !NEW_ABI
+    _F18, _F16,
+#endif
+};
+
+static jit_int32_t iregs[] = {
+    _S7, _S6, _S5, _S4, _S3, _S2, _S1, _S0,
+};
+
+static void
+_prolog(jit_state_t *_jit, jit_node_t *node)
+{
+    jit_int32_t                index;
+    jit_int32_t                offset;
+    if (_jitc->function->define_frame || _jitc->function->assume_frame) {
+       jit_int32_t     frame = -_jitc->function->frame;
+       assert(_jitc->function->self.aoff >= frame);
+       if (_jitc->function->assume_frame)
+           return;
+       _jitc->function->self.aoff = frame;
+    }
+    if (_jitc->function->allocar)
+       _jitc->function->self.aoff &= -8;
+#if NEW_ABI
+    _jitc->function->stack = ((_jitc->function->self.alen -
+                             /* align stack at 16 bytes */
+                              _jitc->function->self.aoff) + 15) & -16;
+#else
+    _jitc->function->stack = ((/* first 16 bytes must be allocated */
+                             (_jitc->function->self.alen > 16 ?
+                              _jitc->function->self.alen : 16) -
+                             /* align stack at 8 bytes */
+                             _jitc->function->self.aoff) + 7) & -8;
+#endif
+    /* callee save registers */
+#if NEW_ABI
+    if ((_jitc->function->self.call & jit_call_varargs) &&
+       jit_arg_reg_p(_jitc->function->vagp))
+       subi(_SP_REGNO, _SP_REGNO, stack_framesize + 64);
+    else
+#endif
+       subi(_SP_REGNO, _SP_REGNO, stack_framesize);
+    offset = stack_framesize - (sizeof(jit_word_t) << 1);
+    for (index = 0; index < jit_size(fregs); index++, offset -= 8) {
+       if (jit_regset_tstbit(&_jitc->function->regset, fregs[index]))
+           stxi_d(offset, _SP_REGNO, rn(fregs[index]));
+    }
+    for (index = 0; index < jit_size(iregs);
+        index++, offset -= sizeof(jit_word_t)) {
+       if (jit_regset_tstbit(&_jitc->function->regset, iregs[index]))
+           stxi(offset, _SP_REGNO, rn(iregs[index]));
+    }
+    assert(offset >= sizeof(jit_word_t));
+    stxi(offset, _SP_REGNO, _RA_REGNO);
+    stxi(0, _SP_REGNO, _BP_REGNO);
+    movr(_BP_REGNO, _SP_REGNO);
+
+    /* alloca */
+    if (_jitc->function->stack)
+       subi(_SP_REGNO, _SP_REGNO, _jitc->function->stack);
+    if (_jitc->function->allocar) {
+       index = jit_get_reg(jit_class_gpr);
+       movi(rn(index), _jitc->function->self.aoff);
+       stxi_i(_jitc->function->aoffoff, _BP_REGNO, rn(index));
+       jit_unget_reg(index);
+    }
+
+    if (_jitc->function->self.call & jit_call_varargs) {
+#if NEW_ABI
+       index = _jitc->function->vagp;
+#else
+       index = (_jitc->function->self.size - stack_framesize) >> STACK_SHIFT;
+#endif
+       offset = stack_framesize + index * STACK_SLOT;
+       for (; jit_arg_reg_p(index); ++index, offset += STACK_SLOT) {
+#if NEW_ABI
+           SD(rn(_A0 - index), offset, _BP_REGNO);
+#else
+           stxi(offset +  WORD_ADJUST, _BP_REGNO, rn(_A0 - index));
+#endif
+       }
+    }
+}
+
+static void
+_epilog(jit_state_t *_jit, jit_node_t *node)
+{
+    jit_int32_t                index;
+    jit_int32_t                offset;
+    if (_jitc->function->assume_frame)
+       return;
+    /* callee save registers */
+    movr(_SP_REGNO, _BP_REGNO);
+    offset = stack_framesize - (sizeof(jit_word_t) << 1);
+    for (index = 0; index < jit_size(fregs); index++, offset -= 8) {
+       if (jit_regset_tstbit(&_jitc->function->regset, fregs[index]))
+           ldxi_d(rn(fregs[index]), _SP_REGNO, offset);
+    }
+    for (index = 0; index < jit_size(iregs);
+        index++, offset -= sizeof(jit_word_t)) {
+       if (jit_regset_tstbit(&_jitc->function->regset, iregs[index]))
+           ldxi(rn(iregs[index]), _SP_REGNO, offset);
+    }
+    assert(offset >= sizeof(jit_word_t));
+    ldxi(_RA_REGNO, _SP_REGNO, offset);
+    ldxi(_BP_REGNO, _SP_REGNO, 0);
+    JR(_RA_REGNO);
+    /* delay slot */
+#if NEW_ABI
+    if ((_jitc->function->self.call & jit_call_varargs) &&
+       jit_arg_reg_p(_jitc->function->vagp))
+       addi(_SP_REGNO, _SP_REGNO, stack_framesize + 64);
+    else
+#endif
+       addi(_SP_REGNO, _SP_REGNO, stack_framesize);
+}
+
+static void
+_vastart(jit_state_t *_jit, jit_int32_t r0)
+{
+    assert(_jitc->function->self.call & jit_call_varargs);
+    /* Initialize va_list to the first stack argument. */
+#if NEW_ABI
+    if (jit_arg_reg_p(_jitc->function->vagp))
+       addi(r0, _BP_REGNO, stack_framesize + _jitc->function->vagp *
+            sizeof(jit_int64_t));
+    else
+#endif
+       addi(r0, _BP_REGNO, _jitc->function->self.size);
+}
+
+static void
+_vaarg(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    /* Load argument. */
+#if WORD_ADJUST
+    ldxi(r0, r1, WORD_ADJUST);
+#else
+    ldr(r0, r1);
+#endif
+
+    /* Update va_list. */
+    addi(r1, r1, STACK_SLOT);
+}
+
+static void
+_patch_abs(jit_state_t *_jit, jit_word_t instr, jit_word_t label)
+{
+    jit_instr_t                i;
+    union {
+       jit_int32_t     *i;
+       jit_word_t       w;
+    } u;
+
+    u.w = instr;
+#if __WORDSIZE == 32
+    i.op = u.i[0];
+    assert(i.hc.b == MIPS_LUI);
+    i.is.b = label >> 16;
+    u.i[0] = i.op;
+    i.op = u.i[1];
+    assert(i.hc.b == MIPS_ORI);
+    i.is.b = label;
+    u.i[1] = i.op;
+#else
+    i.op = u.i[0];
+    assert(i.hc.b == MIPS_LUI);
+    i.is.b = label >> 48;
+    u.i[0] = i.op;
+    i.op = u.i[1];
+    assert(i.hc.b == MIPS_ORI);
+    i.is.b = label >> 32;
+    u.i[1] = i.op;
+    /* lshi */
+    i.op = u.i[3];
+    assert(i.hc.b == MIPS_ORI);
+    i.is.b = label >> 16;
+    u.i[3] = i.op;
+    /* lshi */
+    i.op = u.i[5];
+    assert(i.hc.b == MIPS_ORI);
+    i.is.b = label;
+    u.i[5] = i.op;
+#endif
+}
+
+static void
+_patch_at(jit_state_t *_jit, jit_word_t instr, jit_word_t label)
+{
+    jit_instr_t                i;
+    union {
+       jit_int32_t     *i;
+       jit_word_t       w;
+    } u;
+
+    u.w = instr;
+    i.op = u.i[0];
+    switch (i.hc.b) {
+       /* 16 bit immediate opcodes */
+       case MIPS_REGIMM:
+           switch (i.rt.b) {
+               case MIPS_BLTZ:         case MIPS_BLTZL:
+               case MIPS_BLTZAL:       case MIPS_BLTZALL:
+               case MIPS_BGEZ:         case MIPS_BGEZAL:
+               case MIPS_BGEZALL:      case MIPS_BGEZL:
+               case MIPS_TEQI:         case MIPS_TGEI:
+               case MIPS_TGEIU:        case MIPS_TLTI:
+               case MIPS_TLTIU:        case MIPS_TNEI:
+                   i.is.b = ((label - instr) >> 2) - 1;
+                   u.i[0] = i.op;
+                   break;
+               default:
+                   assert(!"unhandled branch opcode");
+                   break;
+           }
+           break;
+
+       case MIPS_COP1:                 case MIPS_COP2:
+           assert(i.rs.b == MIPS_BC);
+           switch (i.rt.b) {
+               case MIPS_BCF:          case MIPS_BCFL:
+               case MIPS_BCT:          case MIPS_BCTL:
+                   i.is.b = ((label - instr) >> 2) - 1;
+                   u.i[0] = i.op;
+                   break;
+               default:
+                   assert(!"unhandled branch opcode");
+                   break;
+           }
+           break;
+
+       case MIPS_BLEZ:                 case MIPS_BLEZL:
+       case MIPS_BEQ:                  case MIPS_BEQL:
+       case MIPS_BGTZ:                 case MIPS_BGTZL:
+       case MIPS_BNE:                  case MIPS_BNEL:
+           i.is.b = ((label - instr) >> 2) - 1;
+           u.i[0] = i.op;
+           break;
+
+       case MIPS_LUI:
+           patch_abs(instr, label);
+           break;
+
+       case MIPS_J:                    case MIPS_JAL:
+       case MIPS_JALX:
+           assert(((instr + sizeof(jit_int32_t)) & 0xf0000000) ==
+                  (label & 0xf0000000));
+           i.ii.b = (label & ~0xf0000000) >> 2;
+           u.i[0] = i.op;
+           break;
+
+       default:
+           assert(!"unhandled branch opcode");
+           break;
+    }
+}
+#endif
diff --git a/deps/lightning/lib/jit_mips-fpu.c b/deps/lightning/lib/jit_mips-fpu.c
new file mode 100644 (file)
index 0000000..7513219
--- /dev/null
@@ -0,0 +1,1844 @@
+/*
+ * Copyright (C) 2012-2019  Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+#if PROTO
+#  define BE_P                         (__BYTE_ORDER == __BIG_ENDIAN)
+#  define LE_P                         (__BYTE_ORDER == __LITTLE_ENDIAN)
+#  define MIPS_fmt_S                   0x10            /* float32 */
+#  define MIPS_fmt_D                   0x11            /* float64 */
+#  define MIPS_fmt_W                   0x14            /* int32 */
+#  define MIPS_fmt_L                   0x15            /* int64 */
+#  define MIPS_fmt_PS                  0x16            /* 2 x float32 */
+#  define MIPS_fmt_S_PU                        0x20
+#  define MIPS_fmt_S_PL                        0x26
+#  define MIPS_ADD_fmt                 0x00
+#  define MIPS_LWXC1                   0x00
+#  define MIPS_SUB_fmt                 0x01
+#  define MIPS_LDXC1                   0x01
+#  define MIPS_MUL_fmt                 0x02
+#  define MIPS_DIV_fmt                 0x03
+#  define MIPS_SQRT_fmt                        0x04
+#  define MIPS_ABS_fmt                 0x05
+#  define MIPS_LUXC1                   0x05
+#  define MIPS_MOV_fmt                 0x06
+#  define MIPS_NEG_fmt                 0x07
+#  define MIPS_SWXC1                   0x08
+#  define MIPS_ROUND_fmt_L             0x08
+#  define MIPS_TRUNC_fmt_L             0x09
+#  define MIPS_SDXC1                   0x09
+#  define MIPS_CEIL_fmt_L              0x0a
+#  define MIPS_FLOOR_fmt_L             0x0b
+#  define MIPS_ROUND_fmt_W             0x0c
+#  define MIPS_TRUNC_fmt_W             0x0d
+#  define MIPS_SUXC1                   0x0d
+#  define MIPS_CEIL_fmt_W              0x0e
+#  define MIPS_FLOOR_fmt_W             0x0f
+#  define MIPS_RECIP                   0x15
+#  define MIPS_RSQRT                   0x16
+#  define MIPS_ALNV_PS                 0x1e
+#  define MIPS_CVT_fmt_S               0x20
+#  define MIPS_CVT_fmt_D               0x21
+#  define MIPS_CVT_fmt_W               0x24
+#  define MIPS_CVT_fmt_L               0x25
+#  define MIPS_PLL                     0x2c
+#  define MIPS_PLU                     0x2d
+#  define MIPS_PUL                     0x2e
+#  define MIPS_PUU                     0x2f
+#  define MIPS_MADD_fmt_S              (0x20 | MIPS_fmt_S)
+#  define MIPS_MADD_fmt_D              (0x20 | MIPS_fmt_D)
+#  define MIPS_MADD_fmt_PS             (0x20 | MIPS_fmt_PS)
+#  define MIPS_MSUB_fmt_S              (0x28 | MIPS_fmt_S)
+#  define MIPS_MSUB_fmt_D              (0x28 | MIPS_fmt_D)
+#  define MIPS_MSUB_fmt_PS             (0x28 | MIPS_fmt_PS)
+#  define MIPS_NMADD_fmt_S             (0x30 | MIPS_fmt_S)
+#  define MIPS_NMADD_fmt_D             (0x30 | MIPS_fmt_D)
+#  define MIPS_NMADD_fmt_PS            (0x30 | MIPS_fmt_PS)
+#  define MIPS_NMSUB_fmt_S             (0x38 | MIPS_fmt_S)
+#  define MIPS_NMSUB_fmt_D             (0x38 | MIPS_fmt_D)
+#  define MIPS_NMSUB_fmt_PS            (0x38 | MIPS_fmt_PS)
+#  define MIPS_cond_F                  0x30
+#  define MIPS_cond_UN                 0x31
+#  define MIPS_cond_EQ                 0x32
+#  define MIPS_cond_UEQ                        0x33
+#  define MIPS_cond_OLT                        0x34
+#  define MIPS_cond_ULT                        0x35
+#  define MIPS_cond_OLE                        0x36
+#  define MIPS_cond_ULE                        0x37
+#  define MIPS_cond_SF                 0x38
+#  define MIPS_cond_NGLE               0x39
+#  define MIPS_cond_SEQ                        0x3a
+#  define MIPS_cond_NGL                        0x3b
+#  define MIPS_cond_LT                 0x3c
+#  define MIPS_cond_NGE                        0x3d
+#  define MIPS_cond_LE                 0x3e
+#  define MIPS_cond_UGT                        0x3f
+#  define ADD_S(fd,fs,ft)              hrrrit(MIPS_COP1,MIPS_fmt_S,ft,fs,fd,MIPS_ADD_fmt)
+#  define ADD_D(fd,fs,ft)              hrrrit(MIPS_COP1,MIPS_fmt_D,ft,fs,fd,MIPS_ADD_fmt)
+#  define SUB_S(fd,fs,ft)              hrrrit(MIPS_COP1,MIPS_fmt_S,ft,fs,fd,MIPS_SUB_fmt)
+#  define SUB_D(fd,fs,ft)              hrrrit(MIPS_COP1,MIPS_fmt_D,ft,fs,fd,MIPS_SUB_fmt)
+#  define MUL_S(fd,fs,ft)              hrrrit(MIPS_COP1,MIPS_fmt_S,ft,fs,fd,MIPS_MUL_fmt)
+#  define MUL_D(fd,fs,ft)              hrrrit(MIPS_COP1,MIPS_fmt_D,ft,fs,fd,MIPS_MUL_fmt)
+#  define DIV_S(fd,fs,ft)              hrrrit(MIPS_COP1,MIPS_fmt_S,ft,fs,fd,MIPS_DIV_fmt)
+#  define DIV_D(fd,fs,ft)              hrrrit(MIPS_COP1,MIPS_fmt_D,ft,fs,fd,MIPS_DIV_fmt)
+#  define ABS_S(fd,fs)                 hrrrit(MIPS_COP1,MIPS_fmt_S,0,fs,fd,MIPS_ABS_fmt)
+#  define ABS_D(fd,fs)                 hrrrit(MIPS_COP1,MIPS_fmt_D,0,fs,fd,MIPS_ABS_fmt)
+#  define NEG_S(fd,fs)                 hrrrit(MIPS_COP1,MIPS_fmt_S,0,fs,fd,MIPS_NEG_fmt)
+#  define NEG_D(fd,fs)                 hrrrit(MIPS_COP1,MIPS_fmt_D,0,fs,fd,MIPS_NEG_fmt)
+#  define SQRT_S(fd,fs)                        hrrrit(MIPS_COP1,MIPS_fmt_S,0,fs,fd,MIPS_SQRT_fmt)
+#  define SQRT_D(fd,fs)                        hrrrit(MIPS_COP1,MIPS_fmt_D,0,fs,fd,MIPS_SQRT_fmt)
+#  define MFC1(rt, fs)                 hrrrit(MIPS_COP1,MIPS_MF,rt,fs,0,0)
+#  define MTC1(rt, fs)                 hrrrit(MIPS_COP1,MIPS_MT,rt,fs,0,0)
+#  define DMFC1(rt, fs)                        hrrrit(MIPS_COP1,MIPS_DMF,rt,fs,0,0)
+#  define DMTC1(rt, fs)                        hrrrit(MIPS_COP1,MIPS_DMT,rt,fs,0,0)
+#  define CVT_D_S(fd,fs)               hrrrit(MIPS_COP1,MIPS_fmt_S,0,fs,fd,MIPS_CVT_fmt_D)
+#  define CVT_D_W(fd,fs)               hrrrit(MIPS_COP1,MIPS_fmt_W,0,fs,fd,MIPS_CVT_fmt_D)
+#  define CVT_D_L(fd,fs)               hrrrit(MIPS_COP1,MIPS_fmt_L,0,fs,fd,MIPS_CVT_fmt_D)
+#  define CVT_L_S(fd,fs)               hrrrit(MIPS_COP1,MIPS_fmt_S,0,fs,fd,MIPS_CVT_fmt_L)
+#  define CVT_L_D(fd,fs)               hrrrit(MIPS_COP1,MIPS_fmt_D,0,fs,fd,MIPS_CVT_fmt_L)
+#  define CVT_PS_S(fd,fs)              hrrrit(MIPS_COP1,MIPS_fmt_S,0,fs,fd,MIPS_CVT_fmt_PS)
+#  define CVT_S_D(fd,fs)               hrrrit(MIPS_COP1,MIPS_fmt_D,0,fs,fd,MIPS_CVT_fmt_S)
+#  define CVT_S_W(fd,fs)               hrrrit(MIPS_COP1,MIPS_fmt_W,0,fs,fd,MIPS_CVT_fmt_S)
+#  define CVT_S_L(fd,fs)               hrrrit(MIPS_COP1,MIPS_fmt_L,0,fs,fd,MIPS_CVT_fmt_S)
+#  define CVT_S_PL(fd,fs)              hrrrit(MIPS_COP1,MIPS_fmt_PS,0,fs,fd,MIPS_CVT_fmt_S_PL)
+#  define CVT_S_PU(fd,fs)              hrrrit(MIPS_COP1,MIPS_fmt_PS,0,fs,fd,MIPS_CVT_fmt_S_PU)
+#  define CVT_W_S(fd,fs)               hrrrit(MIPS_COP1,MIPS_fmt_S,0,fs,fd,MIPS_CVT_fmt_W)
+#  define CVT_W_D(fd,fs)               hrrrit(MIPS_COP1,MIPS_fmt_D,0,fs,fd,MIPS_CVT_fmt_W)
+#  define TRUNC_L_S(fd,fs)             hrrrit(MIPS_COP1,MIPS_fmt_S,0,fs,fd,MIPS_TRUNC_fmt_L)
+#  define TRUNC_L_D(fd,fs)             hrrrit(MIPS_COP1,MIPS_fmt_D,0,fs,fd,MIPS_TRUNC_fmt_L)
+#  define TRUNC_W_S(fd,fs)             hrrrit(MIPS_COP1,MIPS_fmt_S,0,fs,fd,MIPS_TRUNC_fmt_W)
+#  define TRUNC_W_D(fd,fs)             hrrrit(MIPS_COP1,MIPS_fmt_D,0,fs,fd,MIPS_TRUNC_fmt_W)
+#  define LWC1(rt, of, rb)             hrri(MIPS_LWC1, rb, rt, of)
+#  define SWC1(rt, of, rb)             hrri(MIPS_SWC1, rb, rt, of)
+#  define LDC1(rt, of, rb)             hrri(MIPS_LDC1, rb, rt, of)
+#  define SDC1(rt, of, rb)             hrri(MIPS_SDC1, rb, rt, of)
+#  define MOV_S(fd, fs)                        hrrrit(MIPS_COP1,MIPS_fmt_S,0,fs,fd,MIPS_MOV_fmt)
+#  define MOV_D(fd, fs)                        hrrrit(MIPS_COP1,MIPS_fmt_D,0,fs,fd,MIPS_MOV_fmt)
+#  define BC1F(im)                     hrri(MIPS_COP1,MIPS_BC,MIPS_BCF,im)
+#  define BC1T(im)                     hrri(MIPS_COP1,MIPS_BC,MIPS_BCT,im)
+#  define C_F_S(fs,ft)                 c_cond_fmt(MIPS_fmt_S,ft,fs,MIPS_cond_F)
+#  define C_F_D(fs,ft)                 c_cond_fmt(MIPS_fmt_D,ft,fs,MIPS_cond_F)
+#  define C_F_PS(fs,ft)                        c_cond_fmt(MIPS_fmt_PS,ft,fs,MIPS_cond_F)
+#  define C_UN_S(fs,ft)                        c_cond_fmt(MIPS_fmt_S,ft,fs,MIPS_cond_UN)
+#  define C_UN_D(fs,ft)                        c_cond_fmt(MIPS_fmt_D,ft,fs,MIPS_cond_UN)
+#  define C_UN_PS(fs,ft)               c_cond_fmt(MIPS_fmt_PS,ft,fs,MIPS_cond_UN)
+#  define C_EQ_S(fs,ft)                        c_cond_fmt(MIPS_fmt_S,ft,fs,MIPS_cond_EQ)
+#  define C_EQ_D(fs,ft)                        c_cond_fmt(MIPS_fmt_D,ft,fs,MIPS_cond_EQ)
+#  define C_EQ_PS(fs,ft)               c_cond_fmt(MIPS_fmt_PS,ft,fs,MIPS_cond_EQ)
+#  define C_UEQ_S(fs,ft)               c_cond_fmt(MIPS_fmt_S,ft,fs,MIPS_cond_UEQ)
+#  define C_UEQ_D(fs,ft)               c_cond_fmt(MIPS_fmt_D,ft,fs,MIPS_cond_UEQ)
+#  define C_UEQ_PS(fs,ft)              c_cond_fmt(MIPS_fmt_PS,ft,fs,MIPS_cond_UEQ)
+#  define C_OLT_S(fs,ft)               c_cond_fmt(MIPS_fmt_S,ft,fs,MIPS_cond_OLT)
+#  define C_OLT_D(fs,ft)               c_cond_fmt(MIPS_fmt_D,ft,fs,MIPS_cond_OLT)
+#  define C_OLT_PS(fs,ft)              c_cond_fmt(MIPS_fmt_PS,ft,fs,MIPS_cond_OLT)
+#  define C_ULT_S(fs,ft)               c_cond_fmt(MIPS_fmt_S,ft,fs,MIPS_cond_ULT)
+#  define C_ULT_D(fs,ft)               c_cond_fmt(MIPS_fmt_D,ft,fs,MIPS_cond_ULT)
+#  define C_ULT_PS(fs,ft)              c_cond_fmt(MIPS_fmt_PS,ft,fs,MIPS_cond_ULT)
+#  define C_OLE_S(fs,ft)               c_cond_fmt(MIPS_fmt_S,ft,fs,MIPS_cond_OLE)
+#  define C_OLE_D(fs,ft)               c_cond_fmt(MIPS_fmt_D,ft,fs,MIPS_cond_OLE)
+#  define C_OLE_PS(fs,ft)              c_cond_fmt(MIPS_fmt_PS,ft,fs,MIPS_cond_OLE)
+#  define C_ULE_S(fs,ft)               c_cond_fmt(MIPS_fmt_S,ft,fs,MIPS_cond_ULE)
+#  define C_ULE_D(fs,ft)               c_cond_fmt(MIPS_fmt_D,ft,fs,MIPS_cond_ULE)
+#  define C_ULE_PS(fs,ft)              c_cond_fmt(MIPS_fmt_PS,ft,fs,MIPS_cond_ULE)
+#  define C_SF_S(fs,ft)                        c_cond_fmt(MIPS_fmt_S,ft,fs,MIPS_cond_SF)
+#  define C_SF_D(fs,ft)                        c_cond_fmt(MIPS_fmt_D,ft,fs,MIPS_cond_SF)
+#  define C_SF_PS(fs,ft)               c_cond_fmt(MIPS_fmt_PS,ft,fs,MIPS_cond_SF)
+#  define C_NGLE_S(fs,ft)              c_cond_fmt(MIPS_fmt_S,ft,fs,MIPS_cond_NGLE)
+#  define C_NGLE_D(fs,ft)              c_cond_fmt(MIPS_fmt_D,ft,fs,MIPS_cond_NGLE)
+#  define C_NGLE_PS(fs,ft)             c_cond_fmt(MIPS_fmt_PS,ft,fs,MIPS_cond_NGLE)
+#  define C_SEQ_S(fs,ft)               c_cond_fmt(MIPS_fmt_S,ft,fs,MIPS_cond_SEQ)
+#  define C_SEQ_D(fs,ft)               c_cond_fmt(MIPS_fmt_D,ft,fs,MIPS_cond_SEQ)
+#  define C_SEQ_PS(fs,ft)              c_cond_fmt(MIPS_fmt_PS,ft,fs,MIPS_cond_SEQ)
+#  define C_NGL_S(fs,ft)               c_cond_fmt(MIPS_fmt_S,ft,fs,MIPS_cond_NGL)
+#  define C_NGL_D(fs,ft)               c_cond_fmt(MIPS_fmt_D,ft,fs,MIPS_cond_NGL)
+#  define C_NGL_PS(fs,ft)              c_cond_fmt(MIPS_fmt_PS,ft,fs,MIPS_cond_NGL)
+#  define C_NLT_S(fs,ft)               c_cond_fmt(MIPS_fmt_S,ft,fs,MIPS_cond_NLT)
+#  define C_NLT_D(fs,ft)               c_cond_fmt(MIPS_fmt_D,ft,fs,MIPS_cond_NLT)
+#  define C_NLT_PS(fs,ft)              c_cond_fmt(MIPS_fmt_PS,ft,fs,MIPS_cond_NLT)
+#  define C_NGE_S(fs,ft)               c_cond_fmt(MIPS_fmt_S,ft,fs,MIPS_cond_NGE)
+#  define C_NGE_D(fs,ft)               c_cond_fmt(MIPS_fmt_D,ft,fs,MIPS_cond_NGE)
+#  define C_NGE_PS(fs,ft)              c_cond_fmt(MIPS_fmt_PS,ft,fs,MIPS_cond_NGE)
+#  define C_NLE_S(fs,ft)               c_cond_fmt(MIPS_fmt_S,ft,fs,MIPS_cond_NLE)
+#  define C_NLE_D(fs,ft)               c_cond_fmt(MIPS_fmt_D,ft,fs,MIPS_cond_NLE)
+#  define C_NLE_PS(fs,ft)              c_cond_fmt(MIPS_fmt_PS,ft,fs,MIPS_cond_NLE)
+#  define C_UGT_S(fs,ft)               c_cond_fmt(MIPS_fmt_S,ft,fs,MIPS_cond_UGT)
+#  define C_UGT_D(fs,ft)               c_cond_fmt(MIPS_fmt_D,ft,fs,MIPS_cond_UGT)
+#  define C_UGT_PS(fs,ft)              c_cond_fmt(MIPS_fmt_PS,ft,fs,MIPS_cond_UGT)
+#  define c_cond_fmt(fm,ft,fs,cc)      _c_cond_fmt(_jit,fm,ft,fs,cc)
+static void
+_c_cond_fmt(jit_state_t *_jit, jit_int32_t fm,
+           jit_int32_t ft, jit_int32_t fs, jit_int32_t cc);
+#  define addr_f(r0,r1,r2)             ADD_S(r0,r1,r2)
+#  define addi_f(r0,r1,i0)             _addi_f(_jit,r0,r1,i0)
+static void _addi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define addr_d(r0,r1,r2)             ADD_D(r0,r1,r2)
+#  define addi_d(r0,r1,i0)             _addi_d(_jit,r0,r1,i0)
+static void _addi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define subr_f(r0,r1,r2)             SUB_S(r0,r1,r2)
+#  define subi_f(r0,r1,i0)             _subi_f(_jit,r0,r1,i0)
+static void _subi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define subr_d(r0,r1,r2)             SUB_D(r0,r1,r2)
+#  define subi_d(r0,r1,i0)             _subi_d(_jit,r0,r1,i0)
+static void _subi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define rsbr_f(r0,r1,r2)             subr_f(r0,r2,r1)
+#  define rsbi_f(r0,r1,i0)             _rsbi_f(_jit,r0,r1,i0)
+static void _rsbi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define rsbr_d(r0,r1,r2)             subr_d(r0,r2,r1)
+#  define rsbi_d(r0,r1,i0)             _rsbi_d(_jit,r0,r1,i0)
+static void _rsbi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define mulr_f(r0,r1,r2)             MUL_S(r0,r1,r2)
+#  define muli_f(r0,r1,i0)             _muli_f(_jit,r0,r1,i0)
+static void _muli_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define mulr_d(r0,r1,r2)             MUL_D(r0,r1,r2)
+#  define muli_d(r0,r1,i0)             _muli_d(_jit,r0,r1,i0)
+static void _muli_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define divr_f(r0,r1,r2)             DIV_S(r0,r1,r2)
+#  define divi_f(r0,r1,i0)             _divi_f(_jit,r0,r1,i0)
+static void _divi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define divr_d(r0,r1,r2)             DIV_D(r0,r1,r2)
+#  define divi_d(r0,r1,i0)             _divi_d(_jit,r0,r1,i0)
+static void _divi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define absr_f(r0,r1)                        ABS_S(r0,r1)
+#  define absr_d(r0,r1)                        ABS_D(r0,r1)
+#  define negr_f(r0,r1)                        NEG_S(r0,r1)
+#  define negr_d(r0,r1)                        NEG_D(r0,r1)
+#  define sqrtr_f(r0,r1)               SQRT_S(r0,r1)
+#  define sqrtr_d(r0,r1)               SQRT_D(r0,r1)
+#  define movr_w_f(r0, r1)             MTC1(r1, r0)
+#  define movr_f_w(r0, r1)             MFC1(r1, r0)
+#  define movi_f_w(r0, i0)             _movi_f_w(_jit, r0, i0)
+static void _movi_f_w(jit_state_t*,jit_int32_t,jit_float32_t*);
+#  define extr_f(r0, r1)               _extr_f(_jit, r0, r1)
+static void _extr_f(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define truncr_f_i(r0, r1)           _truncr_f_i(_jit, r0, r1)
+static void _truncr_f_i(jit_state_t*,jit_int32_t,jit_int32_t);
+#  if __WORDSIZE == 64
+#    define truncr_f_l(r0, r1)         _truncr_f_l(_jit, r0, r1)
+static void _truncr_f_l(jit_state_t*,jit_int32_t,jit_int32_t);
+#  endif
+#  define extr_d_f(r0, r1)             CVT_S_D(r0, r1)
+#  define ldr_f(r0, r1)                        LWC1(r0, 0, r1)
+#  define ldi_f(r0, i0)                        _ldi_f(_jit, r0, i0)
+static void _ldi_f(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldxr_f(r0, r1, r2)           _ldxr_f(_jit, r0, r1, r2)
+static void _ldxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxi_f(r0, r1, i0)           _ldxi_f(_jit, r0, r1, i0)
+static void _ldxi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define str_f(r0, r1)                        SWC1(r1, 0, r0)
+#  define sti_f(i0, r0)                        _sti_f(_jit, i0, r0)
+static void _sti_f(jit_state_t*,jit_word_t,jit_int32_t);
+#  define stxr_f(r0, r1, r2)           _stxr_f(_jit, r0, r1, r2)
+static void _stxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define stxi_f(i0, r0, r1)           _stxi_f(_jit, i0, r0, r1)
+static void _stxi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define movr_f(r0, r1)               _movr_f(_jit, r0, r1)
+static void _movr_f(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define movi_f(r0, i0)               _movi_f(_jit, r0, i0)
+static void _movi_f(jit_state_t*,jit_int32_t,jit_float32_t*);
+#  if NEW_ABI
+#    if __WORDSIZE == 32
+#      define movi64(r0, i0)           _movi64(_jit, r0, i0)
+static void _movi64(jit_state_t*,jit_int32_t,jit_int64_t);
+#    else
+#      define movi64(r0, i0)           movi(r0, i0)
+#    endif
+#    define movr_w_d(r0, r1)           DMTC1(r1, r0)
+#    define movr_d_w(r0, r1)           DMFC1(r0, r1)
+#    define movi_d_w(r0, i0)           _movi_d_w(_jit,r0,i0)
+static void _movi_d_w(jit_state_t*,jit_int32_t,jit_float64_t*);
+#  else
+#    define movr_ww_d(r0, r1, r2)      _movr_ww_d(_jit, r0, r1, r2)
+static void _movr_ww_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#    define movr_d_ww(r0, r1, r2)      _movr_d_ww(_jit, r0, r1, r2)
+static void _movr_d_ww(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#    define movi_d_ww(r0, r1, i0)      _movi_d_ww(_jit, r0, r1, i0)
+static void _movi_d_ww(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  endif
+#  define extr_d(r0, r1)               _extr_d(_jit, r0, r1)
+static void _extr_d(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define truncr_d_i(r0, r1)           _truncr_d_i(_jit, r0, r1)
+static void _truncr_d_i(jit_state_t*,jit_int32_t,jit_int32_t);
+#  if __WORDSIZE == 64
+#    define truncr_d_l(r0, r1)         _truncr_d_l(_jit, r0, r1)
+static void _truncr_d_l(jit_state_t*,jit_int32_t,jit_int32_t);
+#  endif
+#  define ldr_d(r0, r1)                        _ldr_d(_jit, r0, r1)
+static void _ldr_d(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define ldi_d(r0, i0)                        _ldi_d(_jit, r0, i0)
+static void _ldi_d(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldxr_d(r0, r1, r2)           _ldxr_d(_jit, r0, r1, r2)
+static void _ldxr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxi_d(r0, r1, i0)           _ldxi_d(_jit, r0, r1, i0)
+static void _ldxi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define str_d(r0, r1)                        _str_d(_jit, r0, r1)
+static void _str_d(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define sti_d(i0, r0)                        _sti_d(_jit, i0, r0)
+static void _sti_d(jit_state_t*,jit_word_t,jit_int32_t);
+#  define stxr_d(r0, r1, r2)           _stxr_d(_jit, r0, r1, r2)
+static void _stxr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define stxi_d(i0, r0, r1)           _stxi_d(_jit, i0, r0, r1)
+static void _stxi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define movr_d(r0, r1)               _movr_d(_jit, r0, r1)
+static void _movr_d(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define movi_d(r0, i0)               _movi_d(_jit, r0, i0)
+static void _movi_d(jit_state_t*,jit_int32_t,jit_float64_t*);
+#  define ltr_f(r0, r1, r2)            _ltr_f(_jit, r0, r1, r2)
+static void _ltr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define lti_f(r0, r1, i2)            _lti_f(_jit, r0, r1, i2)
+static void _lti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define ler_f(r0, r1, r2)            _ler_f(_jit, r0, r1, r2)
+static void _ler_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define lei_f(r0, r1, i2)            _lei_f(_jit, r0, r1, i2)
+static void _lei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define eqr_f(r0, r1, r2)            _eqr_f(_jit, r0, r1, r2)
+static void _eqr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define eqi_f(r0, r1, i2)            _eqi_f(_jit, r0, r1, i2)
+static void _eqi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define ger_f(r0, r1, r2)            _ger_f(_jit, r0, r1, r2)
+static void _ger_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define gei_f(r0, r1, i2)            _gei_f(_jit, r0, r1, i2)
+static void _gei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define gtr_f(r0, r1, r2)            _gtr_f(_jit, r0, r1, r2)
+static void _gtr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define gti_f(r0, r1, i2)            _gti_f(_jit, r0, r1, i2)
+static void _gti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define ner_f(r0, r1, r2)            _ner_f(_jit, r0, r1, r2)
+static void _ner_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define nei_f(r0, r1, i2)            _nei_f(_jit, r0, r1, i2)
+static void _nei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define unltr_f(r0, r1, r2)          _unltr_f(_jit, r0, r1, r2)
+static void _unltr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define unlti_f(r0, r1, i2)          _unlti_f(_jit, r0, r1, i2)
+static void _unlti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define unler_f(r0, r1, r2)          _unler_f(_jit, r0, r1, r2)
+static void _unler_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define unlei_f(r0, r1, i2)          _unlei_f(_jit, r0, r1, i2)
+static void _unlei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define uneqr_f(r0, r1, r2)          _uneqr_f(_jit, r0, r1, r2)
+static void _uneqr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define uneqi_f(r0, r1, i2)          _uneqi_f(_jit, r0, r1, i2)
+static void _uneqi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define unger_f(r0, r1, r2)          _unger_f(_jit, r0, r1, r2)
+static void _unger_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ungei_f(r0, r1, i2)          _ungei_f(_jit, r0, r1, i2)
+static void _ungei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define ungtr_f(r0, r1, r2)          _ungtr_f(_jit, r0, r1, r2)
+static void _ungtr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ungti_f(r0, r1, i2)          _ungti_f(_jit, r0, r1, i2)
+static void _ungti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define ltgtr_f(r0, r1, r2)          _ltgtr_f(_jit, r0, r1, r2)
+static void _ltgtr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ltgti_f(r0, r1, i2)          _ltgti_f(_jit, r0, r1, i2)
+static void _ltgti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define ordr_f(r0, r1, r2)           _ordr_f(_jit, r0, r1, r2)
+static void _ordr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ordi_f(r0, r1, i2)           _ordi_f(_jit, r0, r1, i2)
+static void _ordi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define unordr_f(r0, r1, r2)         _unordr_f(_jit, r0, r1, r2)
+static void _unordr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define unordi_f(r0, r1, i2)         _unordi_f(_jit, r0, r1, i2)
+static void _unordi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define bltr_f(i0, r0, r1)           _bltr_f(_jit, i0, r0, r1)
+static jit_word_t _bltr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define blti_f(i0, r0, i1)           _blti_f(_jit, i0, r0, i1)
+static jit_word_t
+_blti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
+#  define bler_f(i0, r0, r1)           _bler_f(_jit, i0, r0, r1)
+static jit_word_t _bler_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define blei_f(i0, r0, i1)           _blei_f(_jit, i0, r0, i1)
+static jit_word_t
+_blei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
+#  define beqr_f(i0, r0, r1)           _beqr_f(_jit, i0, r0, r1)
+static jit_word_t _beqr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define beqi_f(i0, r0, i1)           _beqi_f(_jit, i0, r0, i1)
+static jit_word_t
+_beqi_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
+#  define bger_f(i0, r0, r1)           _bger_f(_jit, i0, r0, r1)
+static jit_word_t _bger_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bgei_f(i0, r0, i1)           _bgei_f(_jit, i0, r0, i1)
+static jit_word_t
+_bgei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
+#  define bgtr_f(i0, r0, r1)           _bgtr_f(_jit, i0, r0, r1)
+static jit_word_t _bgtr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bgti_f(i0, r0, i1)           _bgti_f(_jit, i0, r0, i1)
+static jit_word_t
+_bgti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
+#  define bner_f(i0, r0, r1)           _bner_f(_jit, i0, r0, r1)
+static jit_word_t _bner_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bnei_f(i0, r0, i1)           _bnei_f(_jit, i0, r0, i1)
+static jit_word_t
+_bnei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
+#  define bunltr_f(i0, r0, r1)         _bunltr_f(_jit, i0, r0, r1)
+static jit_word_t _bunltr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bunlti_f(i0, r0, i1)         _bunlti_f(_jit, i0, r0, i1)
+static jit_word_t
+_bunlti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
+#  define bunler_f(i0, r0, r1)         _bunler_f(_jit, i0, r0, r1)
+static jit_word_t _bunler_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bunlei_f(i0, r0, i1)         _bunlei_f(_jit, i0, r0, i1)
+static jit_word_t
+_bunlei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
+#  define buneqr_f(i0, r0, r1)         _buneqr_f(_jit, i0, r0, r1)
+static jit_word_t _buneqr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define buneqi_f(i0, r0, i1)         _buneqi_f(_jit, i0, r0, i1)
+static jit_word_t
+_buneqi_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
+#  define bunger_f(i0, r0, r1)         _bunger_f(_jit, i0, r0, r1)
+static jit_word_t _bunger_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bungei_f(i0, r0, i1)         _bungei_f(_jit, i0, r0, i1)
+static jit_word_t
+_bungei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
+#  define bungtr_f(i0, r0, r1)         _bungtr_f(_jit, i0, r0, r1)
+static jit_word_t _bungtr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bungti_f(i0, r0, i1)         _bungti_f(_jit, i0, r0, i1)
+static jit_word_t
+_bungti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
+#  define bltgtr_f(i0, r0, r1)         _bltgtr_f(_jit, i0, r0, r1)
+static jit_word_t _bltgtr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bltgti_f(i0, r0, i1)         _bltgti_f(_jit, i0, r0, i1)
+static jit_word_t
+_bltgti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
+#  define bordr_f(i0, r0, r1)          _bordr_f(_jit, i0, r0, r1)
+static jit_word_t _bordr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bordi_f(i0, r0, i1)          _bordi_f(_jit, i0, r0, i1)
+static jit_word_t
+_bordi_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
+#  define bunordr_f(i0, r0, r1)                _bunordr_f(_jit, i0, r0, r1)
+static jit_word_t _bunordr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bunordi_f(i0, r0, i1)                _bunordi_f(_jit, i0, r0, i1)
+static jit_word_t
+_bunordi_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
+#  define extr_f_d(r0, r1)             CVT_D_S(r0, r1)
+#  define ltr_d(r0, r1, r2)            _ltr_d(_jit, r0, r1, r2)
+static void _ltr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define lti_d(r0, r1, i2)            _lti_d(_jit, r0, r1, i2)
+static void _lti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define ler_d(r0, r1, r2)            _ler_d(_jit, r0, r1, r2)
+static void _ler_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define lei_d(r0, r1, i2)            _lei_d(_jit, r0, r1, i2)
+static void _lei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define eqr_d(r0, r1, r2)            _eqr_d(_jit, r0, r1, r2)
+static void _eqr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define eqi_d(r0, r1, i2)            _eqi_d(_jit, r0, r1, i2)
+static void _eqi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define ger_d(r0, r1, r2)            _ger_d(_jit, r0, r1, r2)
+static void _ger_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define gei_d(r0, r1, i2)            _gei_d(_jit, r0, r1, i2)
+static void _gei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define gtr_d(r0, r1, r2)            _gtr_d(_jit, r0, r1, r2)
+static void _gtr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define gti_d(r0, r1, i2)            _gti_d(_jit, r0, r1, i2)
+static void _gti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define ner_d(r0, r1, r2)            _ner_d(_jit, r0, r1, r2)
+static void _ner_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define nei_d(r0, r1, i2)            _nei_d(_jit, r0, r1, i2)
+static void _nei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define unltr_d(r0, r1, r2)          _unltr_d(_jit, r0, r1, r2)
+static void _unltr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define unlti_d(r0, r1, i2)          _unlti_d(_jit, r0, r1, i2)
+static void _unlti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define unler_d(r0, r1, r2)          _unler_d(_jit, r0, r1, r2)
+static void _unler_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define unlei_d(r0, r1, i2)          _unlei_d(_jit, r0, r1, i2)
+static void _unlei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define uneqr_d(r0, r1, r2)          _uneqr_d(_jit, r0, r1, r2)
+static void _uneqr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define uneqi_d(r0, r1, i2)          _uneqi_d(_jit, r0, r1, i2)
+static void _uneqi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define unger_d(r0, r1, r2)          _unger_d(_jit, r0, r1, r2)
+static void _unger_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ungei_d(r0, r1, i2)          _ungei_d(_jit, r0, r1, i2)
+static void _ungei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define ungtr_d(r0, r1, r2)          _ungtr_d(_jit, r0, r1, r2)
+static void _ungtr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ungti_d(r0, r1, i2)          _ungti_d(_jit, r0, r1, i2)
+static void _ungti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define ltgtr_d(r0, r1, r2)          _ltgtr_d(_jit, r0, r1, r2)
+static void _ltgtr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ltgti_d(r0, r1, i2)          _ltgti_d(_jit, r0, r1, i2)
+static void _ltgti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define ordr_d(r0, r1, r2)           _ordr_d(_jit, r0, r1, r2)
+static void _ordr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ordi_d(r0, r1, i2)           _ordi_d(_jit, r0, r1, i2)
+static void _ordi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define unordr_d(r0, r1, r2)         _unordr_d(_jit, r0, r1, r2)
+static void _unordr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define unordi_d(r0, r1, i2)         _unordi_d(_jit, r0, r1, i2)
+static void _unordi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define bltr_d(i0, r0, r1)           _bltr_d(_jit, i0, r0, r1)
+static jit_word_t _bltr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define blti_d(i0, r0, i1)           _blti_d(_jit, i0, r0, i1)
+static jit_word_t
+_blti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
+#  define bler_d(i0, r0, r1)           _bler_d(_jit, i0, r0, r1)
+static jit_word_t _bler_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define blei_d(i0, r0, i1)           _blei_d(_jit, i0, r0, i1)
+static jit_word_t
+_blei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
+#  define beqr_d(i0, r0, r1)           _beqr_d(_jit, i0, r0, r1)
+static jit_word_t _beqr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define beqi_d(i0, r0, i1)           _beqi_d(_jit, i0, r0, i1)
+static jit_word_t
+_beqi_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
+#  define bger_d(i0, r0, r1)           _bger_d(_jit, i0, r0, r1)
+static jit_word_t _bger_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bgei_d(i0, r0, i1)           _bgei_d(_jit, i0, r0, i1)
+static jit_word_t
+_bgei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
+#  define bgtr_d(i0, r0, r1)           _bgtr_d(_jit, i0, r0, r1)
+static jit_word_t _bgtr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bgti_d(i0, r0, i1)           _bgti_d(_jit, i0, r0, i1)
+static jit_word_t
+_bgti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
+#  define bner_d(i0, r0, r1)           _bner_d(_jit, i0, r0, r1)
+static jit_word_t _bner_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bnei_d(i0, r0, i1)           _bnei_d(_jit, i0, r0, i1)
+static jit_word_t
+_bnei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
+#  define bunltr_d(i0, r0, r1)         _bunltr_d(_jit, i0, r0, r1)
+static jit_word_t _bunltr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bunlti_d(i0, r0, i1)         _bunlti_d(_jit, i0, r0, i1)
+static jit_word_t
+_bunlti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
+#  define bunler_d(i0, r0, r1)         _bunler_d(_jit, i0, r0, r1)
+static jit_word_t _bunler_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bunlei_d(i0, r0, i1)         _bunlei_d(_jit, i0, r0, i1)
+static jit_word_t
+_bunlei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
+#  define buneqr_d(i0, r0, r1)         _buneqr_d(_jit, i0, r0, r1)
+static jit_word_t _buneqr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define buneqi_d(i0, r0, i1)         _buneqi_d(_jit, i0, r0, i1)
+static jit_word_t
+_buneqi_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
+#  define bunger_d(i0, r0, r1)         _bunger_d(_jit, i0, r0, r1)
+static jit_word_t _bunger_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bungei_d(i0, r0, i1)         _bungei_d(_jit, i0, r0, i1)
+static jit_word_t
+_bungei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
+#  define bungtr_d(i0, r0, r1)         _bungtr_d(_jit, i0, r0, r1)
+static jit_word_t _bungtr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bungti_d(i0, r0, i1)         _bungti_d(_jit, i0, r0, i1)
+static jit_word_t
+_bungti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
+#  define bltgtr_d(i0, r0, r1)         _bltgtr_d(_jit, i0, r0, r1)
+static jit_word_t _bltgtr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bltgti_d(i0, r0, i1)         _bltgti_d(_jit, i0, r0, i1)
+static jit_word_t
+_bltgti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
+#  define bordr_d(i0, r0, r1)          _bordr_d(_jit, i0, r0, r1)
+static jit_word_t _bordr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bordi_d(i0, r0, i1)          _bordi_d(_jit, i0, r0, i1)
+static jit_word_t
+_bordi_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
+#  define bunordr_d(i0, r0, r1)                _bunordr_d(_jit, i0, r0, r1)
+static jit_word_t _bunordr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bunordi_d(i0, r0, i1)                _bunordi_d(_jit, i0, r0, i1)
+static jit_word_t
+_bunordi_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
+#  define vaarg_d(r0, r1)              _vaarg_d(_jit, r0, r1)
+static void _vaarg_d(jit_state_t*, jit_int32_t, jit_int32_t);
+#endif
+
+#if CODE
+static void
+_c_cond_fmt(jit_state_t *_jit, jit_int32_t fm,
+           jit_int32_t ft, jit_int32_t fs, jit_int32_t cc)
+{
+    jit_instr_t                i;
+    i.cc.b = cc;
+    i.fs.b = fs;
+    i.ft.b = ft;
+    i.fm.b = fm;
+    i.hc.b = MIPS_COP1;
+    ii(i.op);
+}
+
+#  define fpr_opi(name, type, size)                                    \
+static void                                                            \
+_##name##i_##type(jit_state_t *_jit,                                   \
+                 jit_int32_t r0, jit_int32_t r1,                       \
+                 jit_float##size##_t *i0)                              \
+{                                                                      \
+    jit_int32_t                reg = jit_get_reg(jit_class_fpr);               \
+    movi_##type(rn(reg), i0);                                          \
+    name##r_##type(r0, r1, rn(reg));                                   \
+    jit_unget_reg(reg);                                                        \
+}
+#  define fpr_bopi(name, type, size)                                   \
+static jit_word_t                                                      \
+_b##name##i_##type(jit_state_t *_jit,                                  \
+                 jit_word_t i0, jit_int32_t r0,                        \
+                 jit_float##size##_t *i1)                              \
+{                                                                      \
+    jit_word_t         word;                                           \
+    jit_int32_t                reg = jit_get_reg(jit_class_fpr|                \
+                                         jit_class_nospill);           \
+    movi_##type(rn(reg), i1);                                          \
+    word = b##name##r_##type(i0, r0, rn(reg));                         \
+    jit_unget_reg(reg);                                                        \
+    return (word);                                                     \
+}
+#  define fopi(name)                   fpr_opi(name, f, 32)
+#  define fbopi(name)                  fpr_bopi(name, f, 32)
+#  define dopi(name)                   fpr_opi(name, d, 64)
+#  define dbopi(name)                  fpr_bopi(name, d, 64)
+
+fopi(add)
+fopi(sub)
+fopi(rsb)
+fopi(mul)
+fopi(div)
+
+static void
+_movi_f_w(jit_state_t *_jit, jit_int32_t r0, jit_float32_t *i0)
+{
+    union {
+       jit_int32_t     i;
+       jit_float32_t   f;
+    } data;
+
+    data.f = *i0;
+    movi(r0, data.i);
+}
+
+static void
+_extr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_fpr);
+#  if __WORDSIZE == 32
+    MTC1(r1, rn(t0));
+    CVT_S_W(r0, rn(t0));
+#  else
+    DMTC1(r1, rn(t0));
+    CVT_S_L(r0, rn(t0));
+#  endif
+    jit_unget_reg(t0);
+}
+
+static void
+_truncr_f_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_fpr);
+    TRUNC_W_S(rn(t0), r1);
+    MFC1(r0, rn(t0));
+    jit_unget_reg(t0);
+}
+
+#  if __WORDSIZE == 64
+static void
+_truncr_f_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_fpr);
+    TRUNC_L_S(rn(t0), r1);
+    DMFC1(r0, rn(t0));
+    jit_unget_reg(t0);
+}
+#  endif
+
+static void
+_ldi_f(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0))
+       LWC1(r0, i0, _ZERO_REGNO);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldr_f(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r1, r2);
+    ldr_f(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxi_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0))
+       LWC1(r0, i0, r1);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r1, i0);
+       ldr_f(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0))
+       SWC1(r0, i0, _ZERO_REGNO);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       str_f(rn(reg), r0);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_stxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r0, r1);
+    str_f(rn(reg), r2);
+    jit_unget_reg(reg);
+}
+
+static void
+_stxi_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0))
+       SWC1(r1, i0, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r0, i0);
+       str_f(rn(reg), r1);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_movr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (r0 != r1)
+       MOV_S(r0, r1);
+}
+
+static void
+_movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t *i0)
+{
+    union {
+       jit_int32_t     i;
+       jit_float32_t   f;
+    } data;
+    jit_int32_t                reg;
+
+    data.f = *i0;
+    if (data.i) {
+       if (_jitc->no_data) {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), data.i);
+           MTC1(rn(reg), r0);
+           jit_unget_reg(reg);
+       }
+       else
+           ldi_f(r0, (jit_word_t)i0);
+    }
+    else
+       MTC1(_ZERO_REGNO, r0);
+}
+
+dopi(add)
+dopi(sub)
+dopi(rsb)
+dopi(mul)
+dopi(div)
+
+#if NEW_ABI
+/* n32 abi requires 64 bit cpu */
+static void
+_movi64(jit_state_t *_jit, jit_int32_t r0, jit_int64_t i0)
+{
+    if (i0 == 0)
+       OR(r0, _ZERO_REGNO, _ZERO_REGNO);
+    else if (i0 >= -32678 && i0 <= 32767)
+       DADDIU(r0, _ZERO_REGNO, i0);
+    else if (i0 >= 0 && i0 <= 65535)
+       ORI(r0, _ZERO_REGNO, i0);
+    else {
+       if (i0 >= 0 && i0 <= 0x7fffffffLL)
+           LUI(r0, i0 >> 16);
+       else if (i0 >= 0 && i0 <= 0xffffffffLL) {
+           if (i0 & 0xffff0000LL) {
+               ORI(r0, _ZERO_REGNO, (jit_word_t)(i0 >> 16));
+               DSLL(r0, r0, 16);
+           }
+       }
+       else {
+           movi(r0, (jit_word_t)(i0 >> 32));
+           if (i0 & 0xffff0000LL) {
+               DSLL(r0, r0, 16);
+               ORI(r0, r0, (jit_word_t)(i0 >> 16));
+               DSLL(r0, r0, 16);
+           }
+           else
+               DSLL32(r0, r0, 0);
+       }
+       if ((jit_word_t)i0 & 0xffff)
+           ORI(r0, r0, (jit_word_t)i0 & 0xffff);
+    }
+}
+
+static void
+_movi_d_w(jit_state_t *_jit, jit_int32_t r0, jit_float64_t *i0)
+{
+    jit_word_t         w;
+    union {
+       jit_int64_t     l;
+       jit_float64_t   d;
+    } data;
+    if (_jitc->no_data) {
+       data.d = *i0;
+       movi64(r0, data.l);
+    }
+    else {
+       w = (jit_word_t)i0;
+       if (can_sign_extend_short_p(w))
+           LD(r0, w, _ZERO_REGNO);
+       else {
+           movi(r0, w);
+           LD(r0, 0, r0);
+       }
+    }
+}
+
+#else
+static void
+_movr_ww_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    assert(r1 == r2 - 1);
+    MTC1(r1, r0 + BE_P);
+    MTC1(r2, r0 + LE_P);
+}
+
+static void
+_movr_d_ww(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    assert(r0 == r1 - 1);
+    MFC1(r0, r2 + BE_P);
+    MFC1(r1, r2 + LE_P);
+}
+
+static void
+_movi_d_ww(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_float64_t *i0)
+{
+    union {
+       jit_int32_t     i[2];
+       jit_int64_t     l;
+       jit_float64_t   d;
+    } data;
+
+    data.d = *i0;
+    movi(r0, data.i[0]);
+    movi(r1, data.i[1]);
+}
+#endif
+
+static void
+_extr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_fpr);
+#  if __WORDSIZE == 32
+    MTC1(r1, rn(t0));
+    CVT_D_W(r0, rn(t0));
+#  else
+    DMTC1(r1, rn(t0));
+    CVT_D_L(r0, rn(t0));
+#  endif
+    jit_unget_reg(t0);
+}
+
+static void
+_truncr_d_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_fpr);
+    TRUNC_W_D(rn(t0), r1);
+    MFC1(r0, rn(t0));
+    jit_unget_reg(t0);
+}
+
+#  if __WORDSIZE == 64
+static void
+_truncr_d_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_fpr);
+    TRUNC_L_D(rn(t0), r1);
+    DMFC1(r0, rn(t0));
+    jit_unget_reg(t0);
+}
+#  endif
+
+static void
+_ldr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+#  if __WORDSIZE == 64 || NEW_ABI
+    LDC1(r0, 0, r1);
+#  else
+    LWC1(r0 + BE_P, 0, r1);
+    LWC1(r0 + LE_P, 4, r1);
+#  endif
+}
+
+static void
+_ldi_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+#  if __WORDSIZE == 64 || NEW_ABI
+    if (can_sign_extend_short_p(i0))
+       LDC1(r0, i0, _ZERO_REGNO);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       LDC1(r0, 0, rn(reg));
+       jit_unget_reg(reg);
+    }
+#  else
+    if (can_sign_extend_short_p(i0) && can_sign_extend_short_p(i0 + 4)) {
+       LWC1(r0 + BE_P, i0, _ZERO_REGNO);
+       LWC1(r0 + LE_P, i0 + 4, _ZERO_REGNO);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       LWC1(r0 + BE_P, 0, rn(reg));
+       LWC1(r0 + LE_P, 4, rn(reg));
+       jit_unget_reg(reg);
+    }
+#  endif
+}
+
+static void
+_ldxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r1, r2);
+    ldr_d(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+#  if __WORDSIZE == 64 || NEW_ABI
+    if (can_sign_extend_short_p(i0))
+       LDC1(r0, i0, r1);
+#  else
+    if (can_sign_extend_short_p(i0) && can_sign_extend_short_p(i0 + 4)) {
+       LWC1(r0 + BE_P, i0, r1);
+       LWC1(r0 + LE_P, i0 + 4, r1);
+    }
+#  endif
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r1, i0);
+       ldr_d(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_str_d(jit_state_t *_jit,jit_int32_t r0, jit_int32_t r1)
+{
+#  if __WORDSIZE == 64 || NEW_ABI
+    SDC1(r1, 0, r0);
+#  else
+    SWC1(r1 + BE_P, 0, r0);
+    SWC1(r1 + LE_P, 4, r0);
+#  endif
+}
+
+static void
+_sti_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+#  if __WORDSIZE == 64 || NEW_ABI
+    if (can_sign_extend_short_p(i0))
+       SDC1(r0, i0, _ZERO_REGNO);
+#  else
+    if (can_sign_extend_short_p(i0) && can_sign_extend_short_p(i0 + 4)) {
+       SWC1(r0 + BE_P, i0, _ZERO_REGNO);
+       SWC1(r0 + LE_P, i0 + 4, _ZERO_REGNO);
+    }
+#  endif
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       str_d(rn(reg), r0);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_stxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r0, r1);
+    str_d(rn(reg), r2);
+    jit_unget_reg(reg);
+}
+
+static void
+_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+#  if __WORDSIZE == 64 || NEW_ABI
+    if (can_sign_extend_short_p(i0))
+       SDC1(r1, i0, r0);
+#  else
+    if (can_sign_extend_short_p(i0) && can_sign_extend_short_p(i0 + 4)) {
+       SWC1(r1 + BE_P, i0, r0);
+       SWC1(r1 + LE_P, i0 + 4, r0);
+    }
+#  endif
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r0, i0);
+       str_d(rn(reg), r1);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_movr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (r0 != r1)
+       MOV_D(r0, r1);
+}
+
+static void
+_movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t *i0)
+{
+    union {
+       jit_int32_t     i[2];
+       jit_int64_t     l;
+       jit_float64_t   d;
+    } data;
+    jit_int32_t                reg;
+
+    data.d = *i0;
+#  if __WORDSIZE == 64 || NEW_ABI
+    if (data.l) {
+       if (_jitc->no_data) {
+           reg = jit_get_reg(jit_class_gpr);
+           movi64(rn(reg), data.l);
+           DMTC1(rn(reg), r0);
+           jit_unget_reg(reg);
+       }
+       else
+           ldi_d(r0, (jit_word_t)i0);
+    }
+    else
+       DMTC1(_ZERO_REGNO, r0);
+#  else
+    if (_jitc->no_data)
+       reg = jit_get_reg(jit_class_gpr);
+    if (data.i[0]) {
+       if (_jitc->no_data) {
+           movi(rn(reg), data.i[0]);
+           MTC1(rn(reg), r0 + BE_P);
+       }
+       else
+           ldi_f(r0 + BE_P, (jit_word_t)i0);
+    }
+    else
+       MTC1(_ZERO_REGNO, r0 + BE_P);
+    if (data.i[1]) {
+       if (_jitc->no_data) {
+           movi(rn(reg), data.i[1]);
+           MTC1(rn(reg), r0 + LE_P);
+       }
+       else
+           ldi_f(r0 + LE_P, ((jit_word_t)i0) + 4);
+    }
+    else
+       MTC1(_ZERO_REGNO, r0 + LE_P);
+    if (_jitc->no_data)
+       jit_unget_reg(reg);
+#  endif
+}
+
+static void
+_ltr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_OLT_S(r1, r2);
+    w = _jit->pc.w;
+    BC1T(0);
+    /* delay slot */
+    movi(r0, 1);
+    movi(r0, 0);
+    patch_at(w, _jit->pc.w);
+}
+fopi(lt)
+
+static void
+_ler_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_OLE_S(r1, r2);
+    w = _jit->pc.w;
+    BC1T(0);
+    /* delay slot */
+    movi(r0, 1);
+    movi(r0, 0);
+    patch_at(w, _jit->pc.w);
+}
+fopi(le)
+
+static void
+_eqr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_EQ_S(r1, r2);
+    w = _jit->pc.w;
+    BC1T(0);
+    /* delay slot */
+    movi(r0, 1);
+    movi(r0, 0);
+    patch_at(w, _jit->pc.w);
+}
+fopi(eq)
+
+static void
+_ger_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_ULT_S(r1, r2);
+    w = _jit->pc.w;
+    BC1F(0);
+    /* delay slot */
+    movi(r0, 1);
+    movi(r0, 0);
+    patch_at(w, _jit->pc.w);
+}
+fopi(ge)
+
+static void
+_gtr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_ULE_S(r1, r2);
+    w = _jit->pc.w;
+    BC1F(0);
+    /* delay slot */
+    movi(r0, 1);
+    movi(r0, 0);
+    patch_at(w, _jit->pc.w);
+}
+fopi(gt)
+
+static void
+_ner_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_EQ_S(r1, r2);
+    w = _jit->pc.w;
+    BC1F(0);
+    /* delay slot */
+    movi(r0, 1);
+    movi(r0, 0);
+    patch_at(w, _jit->pc.w);
+}
+fopi(ne)
+
+static void
+_unltr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_ULT_S(r1, r2);
+    w = _jit->pc.w;
+    BC1T(0);
+    /* delay slot */
+    movi(r0, 1);
+    movi(r0, 0);
+    patch_at(w, _jit->pc.w);
+}
+fopi(unlt)
+
+static void
+_unler_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_ULE_S(r1, r2);
+    w = _jit->pc.w;
+    BC1T(0);
+    /* delay slot */
+    movi(r0, 1);
+    movi(r0, 0);
+    patch_at(w, _jit->pc.w);
+}
+fopi(unle)
+
+static void
+_uneqr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_UEQ_S(r1, r2);
+    w = _jit->pc.w;
+    BC1T(0);
+    /* delay slot */
+    movi(r0, 1);
+    movi(r0, 0);
+    patch_at(w, _jit->pc.w);
+}
+fopi(uneq)
+
+static void
+_unger_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_OLT_S(r1, r2);
+    w = _jit->pc.w;
+    BC1F(0);
+    /* delay slot */
+    movi(r0, 1);
+    movi(r0, 0);
+    patch_at(w, _jit->pc.w);
+}
+fopi(unge)
+
+static void
+_ungtr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_OLE_S(r1, r2);
+    w = _jit->pc.w;
+    BC1F(0);
+    /* delay slot */
+    movi(r0, 1);
+    movi(r0, 0);
+    patch_at(w, _jit->pc.w);
+}
+fopi(ungt)
+
+static void
+_ltgtr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_UEQ_S(r1, r2);
+    w = _jit->pc.w;
+    BC1F(0);
+    /* delay slot */
+    movi(r0, 1);
+    movi(r0, 0);
+    patch_at(w, _jit->pc.w);
+}
+fopi(ltgt)
+
+static void
+_ordr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_UN_S(r1, r2);
+    w = _jit->pc.w;
+    BC1F(0);
+    /* delay slot */
+    movi(r0, 1);
+    movi(r0, 0);
+    patch_at(w, _jit->pc.w);
+}
+fopi(ord)
+
+static void
+_unordr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_UN_S(r1, r2);
+    w = _jit->pc.w;
+    BC1T(0);
+    /* delay slot */
+    movi(r0, 1);
+    movi(r0, 0);
+    patch_at(w, _jit->pc.w);
+}
+fopi(unord)
+
+static jit_word_t
+_bltr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_OLT_S(r1, r2);
+    w = _jit->pc.w;
+    BC1T(((i0 - w) >> 2) - 1);
+    NOP(1);
+    return (w);
+}
+fbopi(lt)
+
+static jit_word_t
+_bler_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_OLE_S(r1, r2);
+    w = _jit->pc.w;
+    BC1T(((i0 - w) >> 2) - 1);
+    NOP(1);
+    return (w);
+}
+fbopi(le)
+
+static jit_word_t
+_beqr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_EQ_S(r1, r2);
+    w = _jit->pc.w;
+    BC1T(((i0 - w) >> 2) - 1);
+    NOP(1);
+    return (w);
+}
+fbopi(eq)
+
+static jit_word_t
+_bger_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_ULT_S(r1, r2);
+    w = _jit->pc.w;
+    BC1F(((i0 - w) >> 2) - 1);
+    NOP(1);
+    return (w);
+}
+fbopi(ge)
+
+static jit_word_t
+_bgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_ULE_S(r1, r2);
+    w = _jit->pc.w;
+    BC1F(((i0 - w) >> 2) - 1);
+    NOP(1);
+    return (w);
+}
+fbopi(gt)
+
+static jit_word_t
+_bner_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_EQ_S(r1, r2);
+    w = _jit->pc.w;
+    BC1F(((i0 - w) >> 2) - 1);
+    NOP(1);
+    return (w);
+}
+fbopi(ne)
+
+static jit_word_t
+_bunltr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_ULT_S(r1, r2);
+    w = _jit->pc.w;
+    BC1T(((i0 - w) >> 2) - 1);
+    NOP(1);
+    return (w);
+}
+fbopi(unlt)
+
+static jit_word_t
+_bunler_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_ULE_S(r1, r2);
+    w = _jit->pc.w;
+    BC1T(((i0 - w) >> 2) - 1);
+    NOP(1);
+    return (w);
+}
+fbopi(unle)
+
+static jit_word_t
+_buneqr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_UEQ_S(r1, r2);
+    w = _jit->pc.w;
+    BC1T(((i0 - w) >> 2) - 1);
+    NOP(1);
+    return (w);
+}
+fbopi(uneq)
+
+static jit_word_t
+_bunger_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_OLT_S(r1, r2);
+    w = _jit->pc.w;
+    BC1F(((i0 - w) >> 2) - 1);
+    NOP(1);
+    return (w);
+}
+fbopi(unge)
+
+static jit_word_t
+_bungtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_OLE_S(r1, r2);
+    w = _jit->pc.w;
+    BC1F(((i0 - w) >> 2) - 1);
+    NOP(1);
+    return (w);
+}
+fbopi(ungt)
+
+static jit_word_t
+_bltgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_UEQ_S(r1, r2);
+    w = _jit->pc.w;
+    BC1F(((i0 - w) >> 2) - 1);
+    NOP(1);
+    return (w);
+}
+fbopi(ltgt)
+
+static jit_word_t
+_bordr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_UN_S(r1, r2);
+    w = _jit->pc.w;
+    BC1F(((i0 - w) >> 2) - 1);
+    NOP(1);
+    return (w);
+}
+fbopi(ord)
+
+static jit_word_t
+_bunordr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_UN_S(r1, r2);
+    w = _jit->pc.w;
+    BC1T(((i0 - w) >> 2) - 1);
+    NOP(1);
+    return (w);
+}
+fbopi(unord)
+
+static void
+_ltr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_OLT_D(r1, r2);
+    w = _jit->pc.w;
+    BC1T(0);
+    /* delay slot */
+    movi(r0, 1);
+    movi(r0, 0);
+    patch_at(w, _jit->pc.w);
+}
+dopi(lt)
+
+static void
+_ler_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_OLE_D(r1, r2);
+    w = _jit->pc.w;
+    BC1T(0);
+    /* delay slot */
+    movi(r0, 1);
+    movi(r0, 0);
+    patch_at(w, _jit->pc.w);
+}
+dopi(le)
+
+static void
+_eqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_EQ_D(r1, r2);
+    w = _jit->pc.w;
+    BC1T(0);
+    /* delay slot */
+    movi(r0, 1);
+    movi(r0, 0);
+    patch_at(w, _jit->pc.w);
+}
+dopi(eq)
+
+static void
+_ger_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_ULT_D(r1, r2);
+    w = _jit->pc.w;
+    BC1F(0);
+    /* delay slot */
+    movi(r0, 1);
+    movi(r0, 0);
+    patch_at(w, _jit->pc.w);
+}
+dopi(ge)
+
+static void
+_gtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_ULE_D(r1, r2);
+    w = _jit->pc.w;
+    BC1F(0);
+    /* delay slot */
+    movi(r0, 1);
+    movi(r0, 0);
+    patch_at(w, _jit->pc.w);
+}
+dopi(gt)
+
+static void
+_ner_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_EQ_D(r1, r2);
+    w = _jit->pc.w;
+    BC1F(0);
+    /* delay slot */
+    movi(r0, 1);
+    movi(r0, 0);
+    patch_at(w, _jit->pc.w);
+}
+dopi(ne)
+
+static void
+_unltr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_ULT_D(r1, r2);
+    w = _jit->pc.w;
+    BC1T(0);
+    /* delay slot */
+    movi(r0, 1);
+    movi(r0, 0);
+    patch_at(w, _jit->pc.w);
+}
+dopi(unlt)
+
+static void
+_unler_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_ULE_D(r1, r2);
+    w = _jit->pc.w;
+    BC1T(0);
+    /* delay slot */
+    movi(r0, 1);
+    movi(r0, 0);
+    patch_at(w, _jit->pc.w);
+}
+dopi(unle)
+
+static void
+_uneqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_UEQ_D(r1, r2);
+    w = _jit->pc.w;
+    BC1T(0);
+    /* delay slot */
+    movi(r0, 1);
+    movi(r0, 0);
+    patch_at(w, _jit->pc.w);
+}
+dopi(uneq)
+
+static void
+_unger_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_OLT_D(r1, r2);
+    w = _jit->pc.w;
+    BC1F(0);
+    /* delay slot */
+    movi(r0, 1);
+    movi(r0, 0);
+    patch_at(w, _jit->pc.w);
+}
+dopi(unge)
+
+static void
+_ungtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_OLE_D(r1, r2);
+    w = _jit->pc.w;
+    BC1F(0);
+    /* delay slot */
+    movi(r0, 1);
+    movi(r0, 0);
+    patch_at(w, _jit->pc.w);
+}
+dopi(ungt)
+
+static void
+_ltgtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_UEQ_D(r1, r2);
+    w = _jit->pc.w;
+    BC1F(0);
+    /* delay slot */
+    movi(r0, 1);
+    movi(r0, 0);
+    patch_at(w, _jit->pc.w);
+}
+dopi(ltgt)
+
+static void
+_ordr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_UN_D(r1, r2);
+    w = _jit->pc.w;
+    BC1F(0);
+    /* delay slot */
+    movi(r0, 1);
+    movi(r0, 0);
+    patch_at(w, _jit->pc.w);
+}
+dopi(ord)
+
+static void
+_unordr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_UN_D(r1, r2);
+    w = _jit->pc.w;
+    BC1T(0);
+    /* delay slot */
+    movi(r0, 1);
+    movi(r0, 0);
+    patch_at(w, _jit->pc.w);
+}
+dopi(unord)
+
+static jit_word_t
+_bltr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_OLT_D(r1, r2);
+    w = _jit->pc.w;
+    BC1T(((i0 - w) >> 2) - 1);
+    NOP(1);
+    return (w);
+}
+dbopi(lt)
+
+static jit_word_t
+_bler_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_OLE_D(r1, r2);
+    w = _jit->pc.w;
+    BC1T(((i0 - w) >> 2) - 1);
+    NOP(1);
+    return (w);
+}
+dbopi(le)
+
+static jit_word_t
+_beqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_EQ_D(r1, r2);
+    w = _jit->pc.w;
+    BC1T(((i0 - w) >> 2) - 1);
+    NOP(1);
+    return (w);
+}
+dbopi(eq)
+
+static jit_word_t
+_bger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_ULT_D(r1, r2);
+    w = _jit->pc.w;
+    BC1F(((i0 - w) >> 2) - 1);
+    NOP(1);
+    return (w);
+}
+dbopi(ge)
+
+static jit_word_t
+_bgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_ULE_D(r1, r2);
+    w = _jit->pc.w;
+    BC1F(((i0 - w) >> 2) - 1);
+    NOP(1);
+    return (w);
+}
+dbopi(gt)
+
+static jit_word_t
+_bner_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_EQ_D(r1, r2);
+    w = _jit->pc.w;
+    BC1F(((i0 - w) >> 2) - 1);
+    NOP(1);
+    return (w);
+}
+dbopi(ne)
+
+static jit_word_t
+_bunltr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_ULT_D(r1, r2);
+    w = _jit->pc.w;
+    BC1T(((i0 - w) >> 2) - 1);
+    NOP(1);
+    return (w);
+}
+dbopi(unlt)
+
+static jit_word_t
+_bunler_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_ULE_D(r1, r2);
+    w = _jit->pc.w;
+    BC1T(((i0 - w) >> 2) - 1);
+    NOP(1);
+    return (w);
+}
+dbopi(unle)
+
+static jit_word_t
+_buneqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_UEQ_D(r1, r2);
+    w = _jit->pc.w;
+    BC1T(((i0 - w) >> 2) - 1);
+    NOP(1);
+    return (w);
+}
+dbopi(uneq)
+
+static jit_word_t
+_bunger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_OLT_D(r1, r2);
+    w = _jit->pc.w;
+    BC1F(((i0 - w) >> 2) - 1);
+    NOP(1);
+    return (w);
+}
+dbopi(unge)
+
+static jit_word_t
+_bungtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_OLE_D(r1, r2);
+    w = _jit->pc.w;
+    BC1F(((i0 - w) >> 2) - 1);
+    NOP(1);
+    return (w);
+}
+dbopi(ungt)
+
+static jit_word_t
+_bltgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_UEQ_D(r1, r2);
+    w = _jit->pc.w;
+    BC1F(((i0 - w) >> 2) - 1);
+    NOP(1);
+    return (w);
+}
+dbopi(ltgt)
+
+static jit_word_t
+_bordr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_UN_D(r1, r2);
+    w = _jit->pc.w;
+    BC1F(((i0 - w) >> 2) - 1);
+    NOP(1);
+    return (w);
+}
+dbopi(ord)
+
+static jit_word_t
+_bunordr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_UN_D(r1, r2);
+    w = _jit->pc.w;
+    BC1T(((i0 - w) >> 2) - 1);
+    NOP(1);
+    return (w);
+}
+dbopi(unord)
+
+static void
+_vaarg_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+#if !NEW_ABI
+    jit_int32_t                reg;
+#endif
+    assert(_jitc->function->self.call & jit_call_varargs);
+#if !NEW_ABI
+    /* Align, if required. */
+    reg = jit_get_reg(jit_class_gpr);
+    andi(rn(reg), r1, 7);
+    addr(r1, r1, rn(reg));
+    jit_unget_reg(reg);
+#endif
+
+    /* Load argument. */
+    ldr_d(r0, r1);
+
+    /* Update va_list. */
+    addi(r1, r1, sizeof(jit_float64_t));
+}
+
+#  undef fopi
+#  undef fbopi
+#  undef dopi
+#  undef dbopi
+#  undef fpr_bopi
+#  undef fpr_opi
+#endif
diff --git a/deps/lightning/lib/jit_mips-sz.c b/deps/lightning/lib/jit_mips-sz.c
new file mode 100644 (file)
index 0000000..613aa00
--- /dev/null
@@ -0,0 +1,1210 @@
+
+#if __WORDSIZE == 32
+#if NEW_ABI
+#define JIT_INSTR_MAX 44
+    0, /* data */
+    0, /* live */
+    0, /* align */
+    0, /* save */
+    0, /* load */
+    0, /* #name */
+    0, /* #note */
+    0, /* label */
+    44,        /* prolog */
+    0, /* ellipsis */
+    0, /* va_push */
+    0, /* allocai */
+    0, /* allocar */
+    0, /* arg */
+    0, /* getarg_c */
+    0, /* getarg_uc */
+    0, /* getarg_s */
+    0, /* getarg_us */
+    0, /* getarg_i */
+    0, /* getarg_ui */
+    0, /* getarg_l */
+    0, /* putargr */
+    0, /* putargi */
+    0, /* va_start */
+    0, /* va_arg */
+    0, /* va_arg_d */
+    0, /* va_end */
+    4, /* addr */
+    12,        /* addi */
+    12,        /* addcr */
+    20,        /* addci */
+    28,        /* addxr */
+    28,        /* addxi */
+    4, /* subr */
+    12,        /* subi */
+    12,        /* subcr */
+    20,        /* subci */
+    28,        /* subxr */
+    28,        /* subxi */
+    16,        /* rsbi */
+    8, /* mulr */
+    16,        /* muli */
+    12,        /* qmulr */
+    20,        /* qmuli */
+    12,        /* qmulr_u */
+    20,        /* qmuli_u */
+    8, /* divr */
+    16,        /* divi */
+    8, /* divr_u */
+    16,        /* divi_u */
+    12,        /* qdivr */
+    16,        /* qdivi */
+    12,        /* qdivr_u */
+    16,        /* qdivi_u */
+    8, /* remr */
+    16,        /* remi */
+    8, /* remr_u */
+    16,        /* remi_u */
+    4, /* andr */
+    12,        /* andi */
+    4, /* orr */
+    12,        /* ori */
+    4, /* xorr */
+    12,        /* xori */
+    4, /* lshr */
+    4, /* lshi */
+    4, /* rshr */
+    4, /* rshi */
+    4, /* rshr_u */
+    4, /* rshi_u */
+    4, /* negr */
+    8, /* comr */
+    4, /* ltr */
+    4, /* lti */
+    4, /* ltr_u */
+    4, /* lti_u */
+    8, /* ler */
+    12,        /* lei */
+    8, /* ler_u */
+    12,        /* lei_u */
+    12,        /* eqr */
+    12,        /* eqi */
+    8, /* ger */
+    12,        /* gei */
+    8, /* ger_u */
+    12,        /* gei_u */
+    4, /* gtr */
+    8, /* gti */
+    4, /* gtr_u */
+    8, /* gti_u */
+    8, /* ner */
+    8, /* nei */
+    4, /* movr */
+    8, /* movi */
+    8, /* extr_c */
+    4, /* extr_uc */
+    8, /* extr_s */
+    4, /* extr_us */
+    0, /* extr_i */
+    0, /* extr_ui */
+    4, /* htonr_us */
+    4, /* htonr_ui */
+    0, /* htonr_ul */
+    4, /* ldr_c */
+    12,        /* ldi_c */
+    4, /* ldr_uc */
+    12,        /* ldi_uc */
+    4, /* ldr_s */
+    12,        /* ldi_s */
+    4, /* ldr_us */
+    12,        /* ldi_us */
+    4, /* ldr_i */
+    12,        /* ldi_i */
+    0, /* ldr_ui */
+    0, /* ldi_ui */
+    0, /* ldr_l */
+    0, /* ldi_l */
+    8, /* ldxr_c */
+    4, /* ldxi_c */
+    8, /* ldxr_uc */
+    4, /* ldxi_uc */
+    8, /* ldxr_s */
+    4, /* ldxi_s */
+    8, /* ldxr_us */
+    4, /* ldxi_us */
+    8, /* ldxr_i */
+    4, /* ldxi_i */
+    0, /* ldxr_ui */
+    0, /* ldxi_ui */
+    0, /* ldxr_l */
+    0, /* ldxi_l */
+    4, /* str_c */
+    12,        /* sti_c */
+    4, /* str_s */
+    12,        /* sti_s */
+    4, /* str_i */
+    12,        /* sti_i */
+    0, /* str_l */
+    0, /* sti_l */
+    8, /* stxr_c */
+    4, /* stxi_c */
+    8, /* stxr_s */
+    4, /* stxi_s */
+    8, /* stxr_i */
+    4, /* stxi_i */
+    0, /* stxr_l */
+    0, /* stxi_l */
+    12,        /* bltr */
+    12,        /* blti */
+    12,        /* bltr_u */
+    12,        /* blti_u */
+    12,        /* bler */
+    16,        /* blei */
+    12,        /* bler_u */
+    16,        /* blei_u */
+    8, /* beqr */
+    16,        /* beqi */
+    12,        /* bger */
+    12,        /* bgei */
+    12,        /* bger_u */
+    12,        /* bgei_u */
+    12,        /* bgtr */
+    16,        /* bgti */
+    12,        /* bgtr_u */
+    16,        /* bgti_u */
+    8, /* bner */
+    16,        /* bnei */
+    12,        /* bmsr */
+    12,        /* bmsi */
+    12,        /* bmcr */
+    12,        /* bmci */
+    28,        /* boaddr */
+    28,        /* boaddi */
+    16,        /* boaddr_u */
+    20,        /* boaddi_u */
+    28,        /* bxaddr */
+    28,        /* bxaddi */
+    16,        /* bxaddr_u */
+    20,        /* bxaddi_u */
+    28,        /* bosubr */
+    28,        /* bosubi */
+    16,        /* bosubr_u */
+    20,        /* bosubi_u */
+    28,        /* bxsubr */
+    28,        /* bxsubi */
+    16,        /* bxsubr_u */
+    20,        /* bxsubi_u */
+    0, /* jmpr */
+    8, /* jmpi */
+    12,        /* callr */
+    16,        /* calli */
+    0, /* prepare */
+    0, /* pushargr */
+    0, /* pushargi */
+    0, /* finishr */
+    0, /* finishi */
+    0, /* ret */
+    0, /* retr */
+    0, /* reti */
+    0, /* retval_c */
+    0, /* retval_uc */
+    0, /* retval_s */
+    0, /* retval_us */
+    0, /* retval_i */
+    0, /* retval_ui */
+    0, /* retval_l */
+    44,        /* epilog */
+    0, /* arg_f */
+    0, /* getarg_f */
+    0, /* putargr_f */
+    0, /* putargi_f */
+    4, /* addr_f */
+    16,        /* addi_f */
+    4, /* subr_f */
+    16,        /* subi_f */
+    16,        /* rsbi_f */
+    4, /* mulr_f */
+    16,        /* muli_f */
+    4, /* divr_f */
+    16,        /* divi_f */
+    4, /* negr_f */
+    4, /* absr_f */
+    4, /* sqrtr_f */
+    16,        /* ltr_f */
+    28,        /* lti_f */
+    16,        /* ler_f */
+    28,        /* lei_f */
+    16,        /* eqr_f */
+    28,        /* eqi_f */
+    16,        /* ger_f */
+    28,        /* gei_f */
+    16,        /* gtr_f */
+    28,        /* gti_f */
+    16,        /* ner_f */
+    28,        /* nei_f */
+    16,        /* unltr_f */
+    28,        /* unlti_f */
+    16,        /* unler_f */
+    28,        /* unlei_f */
+    16,        /* uneqr_f */
+    28,        /* uneqi_f */
+    16,        /* unger_f */
+    28,        /* ungei_f */
+    16,        /* ungtr_f */
+    28,        /* ungti_f */
+    16,        /* ltgtr_f */
+    28,        /* ltgti_f */
+    16,        /* ordr_f */
+    28,        /* ordi_f */
+    16,        /* unordr_f */
+    28,        /* unordi_f */
+    8, /* truncr_f_i */
+    0, /* truncr_f_l */
+    8, /* extr_f */
+    4, /* extr_d_f */
+    4, /* movr_f */
+    12,        /* movi_f */
+    4, /* ldr_f */
+    12,        /* ldi_f */
+    8, /* ldxr_f */
+    4, /* ldxi_f */
+    4, /* str_f */
+    12,        /* sti_f */
+    8, /* stxr_f */
+    4, /* stxi_f */
+    12,        /* bltr_f */
+    24,        /* blti_f */
+    12,        /* bler_f */
+    24,        /* blei_f */
+    12,        /* beqr_f */
+    24,        /* beqi_f */
+    12,        /* bger_f */
+    24,        /* bgei_f */
+    12,        /* bgtr_f */
+    24,        /* bgti_f */
+    12,        /* bner_f */
+    24,        /* bnei_f */
+    12,        /* bunltr_f */
+    24,        /* bunlti_f */
+    12,        /* bunler_f */
+    24,        /* bunlei_f */
+    12,        /* buneqr_f */
+    24,        /* buneqi_f */
+    12,        /* bunger_f */
+    24,        /* bungei_f */
+    12,        /* bungtr_f */
+    24,        /* bungti_f */
+    12,        /* bltgtr_f */
+    24,        /* bltgti_f */
+    12,        /* bordr_f */
+    24,        /* bordi_f */
+    12,        /* bunordr_f */
+    24,        /* bunordi_f */
+    0, /* pushargr_f */
+    0, /* pushargi_f */
+    0, /* retr_f */
+    0, /* reti_f */
+    0, /* retval_f */
+    0, /* arg_d */
+    0, /* getarg_d */
+    0, /* putargr_d */
+    0, /* putargi_d */
+    4, /* addr_d */
+    16,        /* addi_d */
+    4, /* subr_d */
+    16,        /* subi_d */
+    16,        /* rsbi_d */
+    4, /* mulr_d */
+    16,        /* muli_d */
+    4, /* divr_d */
+    16,        /* divi_d */
+    4, /* negr_d */
+    4, /* absr_d */
+    4, /* sqrtr_d */
+    16,        /* ltr_d */
+    28,        /* lti_d */
+    16,        /* ler_d */
+    28,        /* lei_d */
+    16,        /* eqr_d */
+    28,        /* eqi_d */
+    16,        /* ger_d */
+    28,        /* gei_d */
+    16,        /* gtr_d */
+    28,        /* gti_d */
+    16,        /* ner_d */
+    28,        /* nei_d */
+    16,        /* unltr_d */
+    28,        /* unlti_d */
+    16,        /* unler_d */
+    28,        /* unlei_d */
+    16,        /* uneqr_d */
+    28,        /* uneqi_d */
+    16,        /* unger_d */
+    28,        /* ungei_d */
+    16,        /* ungtr_d */
+    28,        /* ungti_d */
+    16,        /* ltgtr_d */
+    28,        /* ltgti_d */
+    16,        /* ordr_d */
+    28,        /* ordi_d */
+    16,        /* unordr_d */
+    28,        /* unordi_d */
+    8, /* truncr_d_i */
+    0, /* truncr_d_l */
+    8, /* extr_d */
+    4, /* extr_f_d */
+    4, /* movr_d */
+    12,        /* movi_d */
+    4, /* ldr_d */
+    12,        /* ldi_d */
+    8, /* ldxr_d */
+    4, /* ldxi_d */
+    4, /* str_d */
+    12,        /* sti_d */
+    8, /* stxr_d */
+    4, /* stxi_d */
+    12,        /* bltr_d */
+    24,        /* blti_d */
+    12,        /* bler_d */
+    24,        /* blei_d */
+    12,        /* beqr_d */
+    24,        /* beqi_d */
+    12,        /* bger_d */
+    24,        /* bgei_d */
+    12,        /* bgtr_d */
+    24,        /* bgti_d */
+    12,        /* bner_d */
+    24,        /* bnei_d */
+    12,        /* bunltr_d */
+    24,        /* bunlti_d */
+    12,        /* bunler_d */
+    24,        /* bunlei_d */
+    12,        /* buneqr_d */
+    24,        /* buneqi_d */
+    12,        /* bunger_d */
+    24,        /* bungei_d */
+    12,        /* bungtr_d */
+    24,        /* bungti_d */
+    12,        /* bltgtr_d */
+    24,        /* bltgti_d */
+    12,        /* bordr_d */
+    24,        /* bordi_d */
+    12,        /* bunordr_d */
+    24,        /* bunordi_d */
+    0, /* pushargr_d */
+    0, /* pushargi_d */
+    0, /* retr_d */
+    0, /* reti_d */
+    0, /* retval_d */
+    0, /* movr_w_f */
+    0, /* movr_ww_d */
+    0, /* movr_w_d */
+    0, /* movr_f_w */
+    0, /* movi_f_w */
+    0, /* movr_d_ww */
+    0, /* movi_d_ww */
+    4, /* movr_d_w */
+    12,        /* movi_d_w */
+#endif /* NEW_ABI */
+#endif /* __WORDSIZE */
+
+#if __WORDSIZE == 32
+#if !NEW_ABI
+#define JIT_INSTR_MAX 116
+    0, /* data */
+    0, /* live */
+    0, /* align */
+    0, /* save */
+    0, /* load */
+    0, /* #name */
+    0, /* #note */
+    0, /* label */
+    116,       /* prolog */
+    0, /* ellipsis */
+    0, /* va_push */
+    0, /* allocai */
+    0, /* allocar */
+    0, /* arg */
+    0, /* getarg_c */
+    0, /* getarg_uc */
+    0, /* getarg_s */
+    0, /* getarg_us */
+    0, /* getarg_i */
+    0, /* getarg_ui */
+    0, /* getarg_l */
+    0, /* putargr */
+    0, /* putargi */
+    4, /* va_start */
+    8, /* va_arg */
+    20,        /* va_arg_d */
+    0, /* va_end */
+    4, /* addr */
+    12,        /* addi */
+    12,        /* addcr */
+    20,        /* addci */
+    28,        /* addxr */
+    28,        /* addxi */
+    4, /* subr */
+    12,        /* subi */
+    12,        /* subcr */
+    20,        /* subci */
+    28,        /* subxr */
+    28,        /* subxi */
+    16,        /* rsbi */
+    8, /* mulr */
+    16,        /* muli */
+    12,        /* qmulr */
+    20,        /* qmuli */
+    12,        /* qmulr_u */
+    20,        /* qmuli_u */
+    8, /* divr */
+    16,        /* divi */
+    8, /* divr_u */
+    16,        /* divi_u */
+    12,        /* qdivr */
+    16,        /* qdivi */
+    12,        /* qdivr_u */
+    16,        /* qdivi_u */
+    8, /* remr */
+    16,        /* remi */
+    8, /* remr_u */
+    16,        /* remi_u */
+    4, /* andr */
+    12,        /* andi */
+    4, /* orr */
+    12,        /* ori */
+    4, /* xorr */
+    12,        /* xori */
+    4, /* lshr */
+    4, /* lshi */
+    4, /* rshr */
+    4, /* rshi */
+    4, /* rshr_u */
+    4, /* rshi_u */
+    4, /* negr */
+    8, /* comr */
+    4, /* ltr */
+    4, /* lti */
+    4, /* ltr_u */
+    4, /* lti_u */
+    8, /* ler */
+    12,        /* lei */
+    8, /* ler_u */
+    12,        /* lei_u */
+    12,        /* eqr */
+    12,        /* eqi */
+    8, /* ger */
+    12,        /* gei */
+    8, /* ger_u */
+    12,        /* gei_u */
+    4, /* gtr */
+    8, /* gti */
+    4, /* gtr_u */
+    8, /* gti_u */
+    8, /* ner */
+    8, /* nei */
+    4, /* movr */
+    8, /* movi */
+    8, /* extr_c */
+    4, /* extr_uc */
+    8, /* extr_s */
+    4, /* extr_us */
+    0, /* extr_i */
+    0, /* extr_ui */
+    20,        /* htonr_us */
+    52,        /* htonr_ui */
+    0, /* htonr_ul */
+    4, /* ldr_c */
+    12,        /* ldi_c */
+    4, /* ldr_uc */
+    12,        /* ldi_uc */
+    4, /* ldr_s */
+    12,        /* ldi_s */
+    4, /* ldr_us */
+    12,        /* ldi_us */
+    4, /* ldr_i */
+    12,        /* ldi_i */
+    0, /* ldr_ui */
+    0, /* ldi_ui */
+    0, /* ldr_l */
+    0, /* ldi_l */
+    8, /* ldxr_c */
+    16,        /* ldxi_c */
+    8, /* ldxr_uc */
+    16,        /* ldxi_uc */
+    8, /* ldxr_s */
+    16,        /* ldxi_s */
+    8, /* ldxr_us */
+    16,        /* ldxi_us */
+    8, /* ldxr_i */
+    16,        /* ldxi_i */
+    0, /* ldxr_ui */
+    0, /* ldxi_ui */
+    0, /* ldxr_l */
+    0, /* ldxi_l */
+    4, /* str_c */
+    12,        /* sti_c */
+    4, /* str_s */
+    12,        /* sti_s */
+    4, /* str_i */
+    12,        /* sti_i */
+    0, /* str_l */
+    0, /* sti_l */
+    8, /* stxr_c */
+    16,        /* stxi_c */
+    8, /* stxr_s */
+    16,        /* stxi_s */
+    8, /* stxr_i */
+    16,        /* stxi_i */
+    0, /* stxr_l */
+    0, /* stxi_l */
+    12,        /* bltr */
+    12,        /* blti */
+    12,        /* bltr_u */
+    12,        /* blti_u */
+    12,        /* bler */
+    16,        /* blei */
+    12,        /* bler_u */
+    16,        /* blei_u */
+    8, /* beqr */
+    16,        /* beqi */
+    12,        /* bger */
+    12,        /* bgei */
+    12,        /* bger_u */
+    12,        /* bgei_u */
+    12,        /* bgtr */
+    16,        /* bgti */
+    12,        /* bgtr_u */
+    16,        /* bgti_u */
+    8, /* bner */
+    16,        /* bnei */
+    12,        /* bmsr */
+    12,        /* bmsi */
+    12,        /* bmcr */
+    12,        /* bmci */
+    28,        /* boaddr */
+    28,        /* boaddi */
+    16,        /* boaddr_u */
+    20,        /* boaddi_u */
+    28,        /* bxaddr */
+    28,        /* bxaddi */
+    16,        /* bxaddr_u */
+    20,        /* bxaddi_u */
+    28,        /* bosubr */
+    28,        /* bosubi */
+    16,        /* bosubr_u */
+    20,        /* bosubi_u */
+    28,        /* bxsubr */
+    28,        /* bxsubi */
+    16,        /* bxsubr_u */
+    20,        /* bxsubi_u */
+    8, /* jmpr */
+    8, /* jmpi */
+    12,        /* callr */
+    16,        /* calli */
+    0, /* prepare */
+    0, /* pushargr */
+    0, /* pushargi */
+    0, /* finishr */
+    0, /* finishi */
+    0, /* ret */
+    0, /* retr */
+    0, /* reti */
+    0, /* retval_c */
+    0, /* retval_uc */
+    0, /* retval_s */
+    0, /* retval_us */
+    0, /* retval_i */
+    0, /* retval_ui */
+    0, /* retval_l */
+    116,       /* epilog */
+    0, /* arg_f */
+    0, /* getarg_f */
+    0, /* putargr_f */
+    0, /* putargi_f */
+    4, /* addr_f */
+    16,        /* addi_f */
+    4, /* subr_f */
+    16,        /* subi_f */
+    16,        /* rsbi_f */
+    4, /* mulr_f */
+    16,        /* muli_f */
+    4, /* divr_f */
+    16,        /* divi_f */
+    4, /* negr_f */
+    4, /* absr_f */
+    4, /* sqrtr_f */
+    16,        /* ltr_f */
+    28,        /* lti_f */
+    16,        /* ler_f */
+    28,        /* lei_f */
+    16,        /* eqr_f */
+    28,        /* eqi_f */
+    16,        /* ger_f */
+    28,        /* gei_f */
+    16,        /* gtr_f */
+    28,        /* gti_f */
+    16,        /* ner_f */
+    28,        /* nei_f */
+    16,        /* unltr_f */
+    28,        /* unlti_f */
+    16,        /* unler_f */
+    28,        /* unlei_f */
+    16,        /* uneqr_f */
+    28,        /* uneqi_f */
+    16,        /* unger_f */
+    28,        /* ungei_f */
+    16,        /* ungtr_f */
+    28,        /* ungti_f */
+    16,        /* ltgtr_f */
+    28,        /* ltgti_f */
+    16,        /* ordr_f */
+    28,        /* ordi_f */
+    16,        /* unordr_f */
+    28,        /* unordi_f */
+    8, /* truncr_f_i */
+    0, /* truncr_f_l */
+    8, /* extr_f */
+    4, /* extr_d_f */
+    4, /* movr_f */
+    12,        /* movi_f */
+    4, /* ldr_f */
+    12,        /* ldi_f */
+    8, /* ldxr_f */
+    16,        /* ldxi_f */
+    4, /* str_f */
+    12,        /* sti_f */
+    8, /* stxr_f */
+    16,        /* stxi_f */
+    12,        /* bltr_f */
+    24,        /* blti_f */
+    12,        /* bler_f */
+    24,        /* blei_f */
+    12,        /* beqr_f */
+    24,        /* beqi_f */
+    12,        /* bger_f */
+    24,        /* bgei_f */
+    12,        /* bgtr_f */
+    24,        /* bgti_f */
+    12,        /* bner_f */
+    24,        /* bnei_f */
+    12,        /* bunltr_f */
+    24,        /* bunlti_f */
+    12,        /* bunler_f */
+    24,        /* bunlei_f */
+    12,        /* buneqr_f */
+    24,        /* buneqi_f */
+    12,        /* bunger_f */
+    24,        /* bungei_f */
+    12,        /* bungtr_f */
+    24,        /* bungti_f */
+    12,        /* bltgtr_f */
+    24,        /* bltgti_f */
+    12,        /* bordr_f */
+    24,        /* bordi_f */
+    12,        /* bunordr_f */
+    24,        /* bunordi_f */
+    0, /* pushargr_f */
+    0, /* pushargi_f */
+    0, /* retr_f */
+    0, /* reti_f */
+    0, /* retval_f */
+    0, /* arg_d */
+    0, /* getarg_d */
+    0, /* putargr_d */
+    0, /* putargi_d */
+    4, /* addr_d */
+    20,        /* addi_d */
+    4, /* subr_d */
+    20,        /* subi_d */
+    20,        /* rsbi_d */
+    4, /* mulr_d */
+    20,        /* muli_d */
+    4, /* divr_d */
+    20,        /* divi_d */
+    4, /* negr_d */
+    4, /* absr_d */
+    4, /* sqrtr_d */
+    16,        /* ltr_d */
+    40,        /* lti_d */
+    16,        /* ler_d */
+    40,        /* lei_d */
+    16,        /* eqr_d */
+    40,        /* eqi_d */
+    16,        /* ger_d */
+    40,        /* gei_d */
+    16,        /* gtr_d */
+    40,        /* gti_d */
+    16,        /* ner_d */
+    40,        /* nei_d */
+    16,        /* unltr_d */
+    40,        /* unlti_d */
+    16,        /* unler_d */
+    40,        /* unlei_d */
+    16,        /* uneqr_d */
+    40,        /* uneqi_d */
+    16,        /* unger_d */
+    40,        /* ungei_d */
+    16,        /* ungtr_d */
+    40,        /* ungti_d */
+    16,        /* ltgtr_d */
+    40,        /* ltgti_d */
+    16,        /* ordr_d */
+    40,        /* ordi_d */
+    16,        /* unordr_d */
+    40,        /* unordi_d */
+    8, /* truncr_d_i */
+    0, /* truncr_d_l */
+    8, /* extr_d */
+    4, /* extr_f_d */
+    4, /* movr_d */
+    24,        /* movi_d */
+    8, /* ldr_d */
+    16,        /* ldi_d */
+    12,        /* ldxr_d */
+    20,        /* ldxi_d */
+    8, /* str_d */
+    16,        /* sti_d */
+    12,        /* stxr_d */
+    20,        /* stxi_d */
+    12,        /* bltr_d */
+    28,        /* blti_d */
+    12,        /* bler_d */
+    28,        /* blei_d */
+    12,        /* beqr_d */
+    28,        /* beqi_d */
+    12,        /* bger_d */
+    28,        /* bgei_d */
+    12,        /* bgtr_d */
+    28,        /* bgti_d */
+    12,        /* bner_d */
+    36,        /* bnei_d */
+    12,        /* bunltr_d */
+    36,        /* bunlti_d */
+    12,        /* bunler_d */
+    36,        /* bunlei_d */
+    12,        /* buneqr_d */
+    36,        /* buneqi_d */
+    12,        /* bunger_d */
+    36,        /* bungei_d */
+    12,        /* bungtr_d */
+    36,        /* bungti_d */
+    12,        /* bltgtr_d */
+    28,        /* bltgti_d */
+    12,        /* bordr_d */
+    28,        /* bordi_d */
+    12,        /* bunordr_d */
+    36,        /* bunordi_d */
+    0, /* pushargr_d */
+    0, /* pushargi_d */
+    0, /* retr_d */
+    0, /* reti_d */
+    0, /* retval_d */
+    4, /* movr_w_f */
+    8, /* movr_ww_d */
+    0, /* movr_w_d */
+    4, /* movr_f_w */
+    4, /* movi_f_w */
+    8, /* movr_d_ww */
+    8, /* movi_d_ww */
+    0, /* movr_d_w */
+    0, /* movi_d_w */
+#endif /* NEW_ABI */
+#endif /* __WORDSIZE */
+
+#if __WORDSIZE == 64
+#define JIT_INSTR_MAX 44
+    0, /* data */
+    0, /* live */
+    4, /* align */
+    0, /* save */
+    0, /* load */
+    0, /* #name */
+    0, /* #note */
+    0, /* label */
+    44,        /* prolog */
+    0, /* ellipsis */
+    0, /* va_push */
+    0, /* allocai */
+    0, /* allocar */
+    0, /* arg */
+    0, /* getarg_c */
+    0, /* getarg_uc */
+    0, /* getarg_s */
+    0, /* getarg_us */
+    0, /* getarg_i */
+    0, /* getarg_ui */
+    0, /* getarg_l */
+    0, /* putargr */
+    0, /* putargi */
+    0, /* va_start */
+    0, /* va_arg */
+    0, /* va_arg_d */
+    0, /* va_end */
+    4, /* addr */
+    28,        /* addi */
+    12,        /* addcr */
+    36,        /* addci */
+    28,        /* addxr */
+    28,        /* addxi */
+    4, /* subr */
+    28,        /* subi */
+    12,        /* subcr */
+    36,        /* subci */
+    28,        /* subxr */
+    28,        /* subxi */
+    32,        /* rsbi */
+    8, /* mulr */
+    32,        /* muli */
+    12,        /* qmulr */
+    32,        /* qmuli */
+    12,        /* qmulr_u */
+    32,        /* qmuli_u */
+    8, /* divr */
+    32,        /* divi */
+    8, /* divr_u */
+    32,        /* divi_u */
+    12,        /* qdivr */
+    16,        /* qdivi */
+    12,        /* qdivr_u */
+    16,        /* qdivi_u */
+    8, /* remr */
+    32,        /* remi */
+    8, /* remr_u */
+    32,        /* remi_u */
+    4, /* andr */
+    28,        /* andi */
+    4, /* orr */
+    28,        /* ori */
+    4, /* xorr */
+    28,        /* xori */
+    4, /* lshr */
+    4, /* lshi */
+    4, /* rshr */
+    4, /* rshi */
+    4, /* rshr_u */
+    4, /* rshi_u */
+    4, /* negr */
+    8, /* comr */
+    4, /* ltr */
+    4, /* lti */
+    4, /* ltr_u */
+    4, /* lti_u */
+    8, /* ler */
+    12,        /* lei */
+    8, /* ler_u */
+    12,        /* lei_u */
+    12,        /* eqr */
+    12,        /* eqi */
+    8, /* ger */
+    12,        /* gei */
+    8, /* ger_u */
+    12,        /* gei_u */
+    4, /* gtr */
+    8, /* gti */
+    4, /* gtr_u */
+    8, /* gti_u */
+    8, /* ner */
+    8, /* nei */
+    4, /* movr */
+    28,        /* movi */
+    8, /* extr_c */
+    4, /* extr_uc */
+    8, /* extr_s */
+    4, /* extr_us */
+    4, /* extr_i */
+    8, /* extr_ui */
+    4, /* htonr_us */
+    4, /* htonr_ui */
+    4, /* htonr_ul */
+    4, /* ldr_c */
+    12,        /* ldi_c */
+    4, /* ldr_uc */
+    12,        /* ldi_uc */
+    4, /* ldr_s */
+    12,        /* ldi_s */
+    4, /* ldr_us */
+    12,        /* ldi_us */
+    4, /* ldr_i */
+    12,        /* ldi_i */
+    4, /* ldr_ui */
+    12,        /* ldi_ui */
+    4, /* ldr_l */
+    12,        /* ldi_l */
+    8, /* ldxr_c */
+    4, /* ldxi_c */
+    8, /* ldxr_uc */
+    4, /* ldxi_uc */
+    8, /* ldxr_s */
+    4, /* ldxi_s */
+    8, /* ldxr_us */
+    4, /* ldxi_us */
+    8, /* ldxr_i */
+    4, /* ldxi_i */
+    8, /* ldxr_ui */
+    4, /* ldxi_ui */
+    8, /* ldxr_l */
+    4, /* ldxi_l */
+    4, /* str_c */
+    12,        /* sti_c */
+    4, /* str_s */
+    12,        /* sti_s */
+    4, /* str_i */
+    12,        /* sti_i */
+    4, /* str_l */
+    12,        /* sti_l */
+    8, /* stxr_c */
+    4, /* stxi_c */
+    8, /* stxr_s */
+    4, /* stxi_s */
+    8, /* stxr_i */
+    4, /* stxi_i */
+    8, /* stxr_l */
+    4, /* stxi_l */
+    12,        /* bltr */
+    12,        /* blti */
+    12,        /* bltr_u */
+    12,        /* blti_u */
+    12,        /* bler */
+    16,        /* blei */
+    12,        /* bler_u */
+    16,        /* blei_u */
+    8, /* beqr */
+    36,        /* beqi */
+    12,        /* bger */
+    12,        /* bgei */
+    12,        /* bger_u */
+    12,        /* bgei_u */
+    12,        /* bgtr */
+    16,        /* bgti */
+    12,        /* bgtr_u */
+    16,        /* bgti_u */
+    8, /* bner */
+    32,        /* bnei */
+    12,        /* bmsr */
+    12,        /* bmsi */
+    12,        /* bmcr */
+    12,        /* bmci */
+    28,        /* boaddr */
+    28,        /* boaddi */
+    16,        /* boaddr_u */
+    20,        /* boaddi_u */
+    28,        /* bxaddr */
+    28,        /* bxaddi */
+    16,        /* bxaddr_u */
+    20,        /* bxaddi_u */
+    28,        /* bosubr */
+    28,        /* bosubi */
+    16,        /* bosubr_u */
+    20,        /* bosubi_u */
+    28,        /* bxsubr */
+    28,        /* bxsubi */
+    16,        /* bxsubr_u */
+    20,        /* bxsubi_u */
+    0, /* jmpr */
+    8, /* jmpi */
+    12,        /* callr */
+    32,        /* calli */
+    0, /* prepare */
+    0, /* pushargr */
+    0, /* pushargi */
+    0, /* finishr */
+    0, /* finishi */
+    0, /* ret */
+    0, /* retr */
+    0, /* reti */
+    0, /* retval_c */
+    0, /* retval_uc */
+    0, /* retval_s */
+    0, /* retval_us */
+    0, /* retval_i */
+    0, /* retval_ui */
+    0, /* retval_l */
+    44,        /* epilog */
+    0, /* arg_f */
+    0, /* getarg_f */
+    0, /* putargr_f */
+    0, /* putargi_f */
+    4, /* addr_f */
+    16,        /* addi_f */
+    4, /* subr_f */
+    16,        /* subi_f */
+    16,        /* rsbi_f */
+    4, /* mulr_f */
+    16,        /* muli_f */
+    4, /* divr_f */
+    16,        /* divi_f */
+    4, /* negr_f */
+    4, /* absr_f */
+    4, /* sqrtr_f */
+    16,        /* ltr_f */
+    28,        /* lti_f */
+    16,        /* ler_f */
+    28,        /* lei_f */
+    16,        /* eqr_f */
+    28,        /* eqi_f */
+    16,        /* ger_f */
+    28,        /* gei_f */
+    16,        /* gtr_f */
+    28,        /* gti_f */
+    16,        /* ner_f */
+    28,        /* nei_f */
+    16,        /* unltr_f */
+    28,        /* unlti_f */
+    16,        /* unler_f */
+    28,        /* unlei_f */
+    16,        /* uneqr_f */
+    28,        /* uneqi_f */
+    16,        /* unger_f */
+    28,        /* ungei_f */
+    16,        /* ungtr_f */
+    28,        /* ungti_f */
+    16,        /* ltgtr_f */
+    28,        /* ltgti_f */
+    16,        /* ordr_f */
+    28,        /* ordi_f */
+    16,        /* unordr_f */
+    28,        /* unordi_f */
+    8, /* truncr_f_i */
+    8, /* truncr_f_l */
+    8, /* extr_f */
+    4, /* extr_d_f */
+    4, /* movr_f */
+    12,        /* movi_f */
+    4, /* ldr_f */
+    12,        /* ldi_f */
+    8, /* ldxr_f */
+    4, /* ldxi_f */
+    4, /* str_f */
+    12,        /* sti_f */
+    8, /* stxr_f */
+    4, /* stxi_f */
+    12,        /* bltr_f */
+    24,        /* blti_f */
+    12,        /* bler_f */
+    24,        /* blei_f */
+    12,        /* beqr_f */
+    24,        /* beqi_f */
+    12,        /* bger_f */
+    24,        /* bgei_f */
+    12,        /* bgtr_f */
+    24,        /* bgti_f */
+    12,        /* bner_f */
+    24,        /* bnei_f */
+    12,        /* bunltr_f */
+    24,        /* bunlti_f */
+    12,        /* bunler_f */
+    24,        /* bunlei_f */
+    12,        /* buneqr_f */
+    24,        /* buneqi_f */
+    12,        /* bunger_f */
+    24,        /* bungei_f */
+    12,        /* bungtr_f */
+    24,        /* bungti_f */
+    12,        /* bltgtr_f */
+    24,        /* bltgti_f */
+    12,        /* bordr_f */
+    24,        /* bordi_f */
+    12,        /* bunordr_f */
+    24,        /* bunordi_f */
+    0, /* pushargr_f */
+    0, /* pushargi_f */
+    0, /* retr_f */
+    0, /* reti_f */
+    0, /* retval_f */
+    0, /* arg_d */
+    0, /* getarg_d */
+    0, /* putargr_d */
+    0, /* putargi_d */
+    4, /* addr_d */
+    16,        /* addi_d */
+    4, /* subr_d */
+    16,        /* subi_d */
+    16,        /* rsbi_d */
+    4, /* mulr_d */
+    16,        /* muli_d */
+    4, /* divr_d */
+    16,        /* divi_d */
+    4, /* negr_d */
+    4, /* absr_d */
+    4, /* sqrtr_d */
+    16,        /* ltr_d */
+    28,        /* lti_d */
+    16,        /* ler_d */
+    28,        /* lei_d */
+    16,        /* eqr_d */
+    28,        /* eqi_d */
+    16,        /* ger_d */
+    28,        /* gei_d */
+    16,        /* gtr_d */
+    28,        /* gti_d */
+    16,        /* ner_d */
+    28,        /* nei_d */
+    16,        /* unltr_d */
+    28,        /* unlti_d */
+    16,        /* unler_d */
+    28,        /* unlei_d */
+    16,        /* uneqr_d */
+    28,        /* uneqi_d */
+    16,        /* unger_d */
+    28,        /* ungei_d */
+    16,        /* ungtr_d */
+    28,        /* ungti_d */
+    16,        /* ltgtr_d */
+    28,        /* ltgti_d */
+    16,        /* ordr_d */
+    28,        /* ordi_d */
+    16,        /* unordr_d */
+    28,        /* unordi_d */
+    8, /* truncr_d_i */
+    8, /* truncr_d_l */
+    8, /* extr_d */
+    4, /* extr_f_d */
+    4, /* movr_d */
+    12,        /* movi_d */
+    4, /* ldr_d */
+    12,        /* ldi_d */
+    8, /* ldxr_d */
+    4, /* ldxi_d */
+    4, /* str_d */
+    12,        /* sti_d */
+    8, /* stxr_d */
+    4, /* stxi_d */
+    12,        /* bltr_d */
+    24,        /* blti_d */
+    12,        /* bler_d */
+    24,        /* blei_d */
+    12,        /* beqr_d */
+    24,        /* beqi_d */
+    12,        /* bger_d */
+    24,        /* bgei_d */
+    12,        /* bgtr_d */
+    24,        /* bgti_d */
+    12,        /* bner_d */
+    24,        /* bnei_d */
+    12,        /* bunltr_d */
+    24,        /* bunlti_d */
+    12,        /* bunler_d */
+    24,        /* bunlei_d */
+    12,        /* buneqr_d */
+    24,        /* buneqi_d */
+    12,        /* bunger_d */
+    24,        /* bungei_d */
+    12,        /* bungtr_d */
+    24,        /* bungti_d */
+    12,        /* bltgtr_d */
+    24,        /* bltgti_d */
+    12,        /* bordr_d */
+    24,        /* bordi_d */
+    12,        /* bunordr_d */
+    24,        /* bunordi_d */
+    0, /* pushargr_d */
+    0, /* pushargi_d */
+    0, /* retr_d */
+    0, /* reti_d */
+    0, /* retval_d */
+    0, /* movr_w_f */
+    0, /* movr_ww_d */
+    0, /* movr_w_d */
+    0, /* movr_f_w */
+    0, /* movi_f_w */
+    0, /* movr_d_ww */
+    0, /* movi_d_ww */
+    4, /* movr_d_w */
+    12,        /* movi_d_w */
+#endif /* __WORDSIZE */
diff --git a/deps/lightning/lib/jit_mips.c b/deps/lightning/lib/jit_mips.c
new file mode 100644 (file)
index 0000000..dafade8
--- /dev/null
@@ -0,0 +1,1930 @@
+/*
+ * Copyright (C) 2012-2019  Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+#if defined(__linux__)
+#  include <sys/cachectl.h>
+#endif
+
+#if NEW_ABI
+#  define NUM_WORD_ARGS                        8
+#  define STACK_SLOT                   8
+#  define STACK_SHIFT                  3
+#else
+#  define NUM_WORD_ARGS                        4
+#  define STACK_SLOT                   4
+#  define STACK_SHIFT                  2
+#endif
+#if NEW_ABI && __BYTE_ORDER == __BIG_ENDIAN && __WORDSIZE == 32
+#  define WORD_ADJUST                  4
+#else
+#  define WORD_ADJUST                  0
+#endif
+#define jit_arg_reg_p(i)               ((i) >= 0 && (i) < NUM_WORD_ARGS)
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+#  define C_DISP                       0
+#  define S_DISP                       0
+#  define I_DISP                       0
+#else
+#  define C_DISP                       STACK_SLOT - sizeof(jit_int8_t)
+#  define S_DISP                       STACK_SLOT - sizeof(jit_int16_t)
+#  define I_DISP                       STACK_SLOT - sizeof(jit_int32_t)
+#endif
+
+/*
+ * Types
+ */
+typedef struct jit_pointer_t jit_va_list_t;
+
+/*
+ * Prototypes
+ */
+#define jit_make_arg(node)             _jit_make_arg(_jit,node)
+static jit_node_t *_jit_make_arg(jit_state_t*,jit_node_t*);
+#define jit_make_arg_f(node)           _jit_make_arg_f(_jit,node)
+static jit_node_t *_jit_make_arg_f(jit_state_t*,jit_node_t*);
+#define jit_make_arg_d(node)           _jit_make_arg_d(_jit,node)
+static jit_node_t *_jit_make_arg_d(jit_state_t*,jit_node_t*);
+#define patch(instr, node)             _patch(_jit, instr, node)
+static void _patch(jit_state_t*,jit_word_t,jit_node_t*);
+
+#define PROTO                          1
+#  include "jit_rewind.c"
+#  include "jit_mips-cpu.c"
+#  include "jit_mips-fpu.c"
+#undef PROTO
+
+/*
+ * Initialization
+ */
+jit_register_t         _rvs[] = {
+    { rc(gpr) | 0x01,                  "at" },
+    { rc(gpr) | 0x02,                  "v0" },
+    { rc(gpr) | 0x03,                  "v1" },
+#if !NEW_ABI
+    { rc(gpr) | 0x08,                  "t0" },
+    { rc(gpr) | 0x09,                  "t1" },
+    { rc(gpr) | 0x0a,                  "t2" },
+    { rc(gpr) | 0x0b,                  "t3" },
+#endif
+    { rc(gpr) | 0x0c,                  "t4" },
+    { rc(gpr) | 0x0d,                  "t5" },
+    { rc(gpr) | 0x0e,                  "t6" },
+    { rc(gpr) | 0x0f,                  "t7" },
+    { rc(gpr) | 0x18,                  "t8" },
+    { rc(gpr) | 0x19,                  "t9" },
+    { rc(sav) | rc(gpr) | 0x10,                "s0" },
+    { rc(sav) | rc(gpr) | 0x11,                "s1" },
+    { rc(sav) | rc(gpr) | 0x12,                "s2" },
+    { rc(sav) | rc(gpr) | 0x13,                "s3" },
+    { rc(sav) | rc(gpr) | 0x14,                "s4" },
+    { rc(sav) | rc(gpr) | 0x15,                "s5" },
+    { rc(sav) | rc(gpr) | 0x16,                "s6" },
+    { rc(sav) | rc(gpr) | 0x17,                "s7" },
+    { 0x00,                            "zero" },
+    { 0x1a,                            "k0" },
+    { 0x1b,                            "k1" },
+    { rc(sav) | 0x1f,                  "ra" },
+    { rc(sav) | 0x1c,                  "gp" },
+    { rc(sav) | 0x1d,                  "sp" },
+    { rc(sav) | 0x1e,                  "fp" },
+#if NEW_ABI
+    { rc(gpr) | 0x0b,                  "a7" },
+    { rc(gpr) | 0x0a,                  "a6" },
+    { rc(gpr) | 0x09,                  "a5" },
+    { rc(gpr) | 0x08,                  "a4" },
+#endif
+    { rc(arg) | rc(gpr) | 0x07,                "a3" },
+    { rc(arg) | rc(gpr) | 0x06,                "a2" },
+    { rc(arg) | rc(gpr) | 0x05,                "a1" },
+    { rc(arg) | rc(gpr) | 0x04,                "a0" },
+    { rc(fpr) | 0x00,                  "$f0" },
+    { rc(fpr) | 0x02,                  "$f2" },
+    { rc(fpr) | 0x04,                  "$f4" },
+    { rc(fpr) | 0x06,                  "$f6" },
+    { rc(fpr) | 0x08,                  "$f8" },
+    { rc(fpr) | 0x0a,                  "$f10" },
+#if !NEW_ABI
+    { rc(sav) | rc(fpr) | 0x10,                "$f16" },
+    { rc(sav) | rc(fpr) | 0x12,                "$f18" },
+#endif
+    { rc(sav) | rc(fpr) | 0x14,                "$f20" },
+    { rc(sav) | rc(fpr) | 0x16,                "$f22" },
+    { rc(sav) | rc(fpr) | 0x18,                "$f24" },
+    { rc(sav) | rc(fpr) | 0x1a,                "$f26" },
+    { rc(sav) | rc(fpr) | 0x1c,                "$f28" },
+    { rc(sav) | rc(fpr) | 0x1e,                "$f30" },
+#if NEW_ABI
+    { rc(arg) | rc(fpr) | 0x13,                "$f19" },
+    { rc(arg) | rc(fpr) | 0x12,                "$f18" },
+    { rc(arg) | rc(fpr) | 0x11,                "$f17" },
+    { rc(arg) | rc(fpr) | 0x10,                "$f16" },
+    { rc(arg) | rc(fpr) | 0x0f,                "$f15" },
+    { rc(arg) | rc(fpr) | 0x0e,                "$f14" },
+    { rc(arg) | rc(fpr) | 0x0d,                "$f13" },
+    { rc(arg) | rc(fpr) | 0x0c,                "$f12" },
+#else
+    { rc(arg) | rc(fpr) | 0x0e,                "$f14" },
+    { rc(arg) | rc(fpr) | 0x0c,                "$f12" },
+#endif
+    { _NOREG,                          "<none>" },
+};
+
+/*
+ * Implementation
+ */
+void
+jit_get_cpu(void)
+{
+}
+
+void
+_jit_init(jit_state_t *_jit)
+{
+    _jitc->reglen = jit_size(_rvs) - 1;
+/* Could also:
+ *     o reserve a register for carry  (overkill)
+ *     o use MTLO/MFLO                 (performance hit)
+ * So, keep a register allocated after setting carry, and implicitly
+ * deallocate it if it can no longer be tracked
+ */
+    jit_carry = _NOREG;
+}
+
+void
+_jit_prolog(jit_state_t *_jit)
+{
+    jit_int32_t                 offset;
+
+    if (_jitc->function)
+       jit_epilog();
+    assert(jit_regset_cmp_ui(&_jitc->regarg, 0) == 0);
+    jit_regset_set_ui(&_jitc->regsav, 0);
+    offset = _jitc->functions.offset;
+    if (offset >= _jitc->functions.length) {
+       jit_realloc((jit_pointer_t *)&_jitc->functions.ptr,
+                   _jitc->functions.length * sizeof(jit_function_t),
+                   (_jitc->functions.length + 16) * sizeof(jit_function_t));
+       _jitc->functions.length += 16;
+    }
+    _jitc->function = _jitc->functions.ptr + _jitc->functions.offset++;
+    _jitc->function->self.size = stack_framesize;
+    _jitc->function->self.argi = _jitc->function->self.argf =
+       _jitc->function->self.aoff = _jitc->function->self.alen = 0;
+    _jitc->function->self.call = jit_call_default;
+    jit_alloc((jit_pointer_t *)&_jitc->function->regoff,
+             _jitc->reglen * sizeof(jit_int32_t));
+
+    /* _no_link here does not mean the jit_link() call can be removed
+     * by rewriting as:
+     * _jitc->function->prolog = jit_new_node(jit_code_prolog);
+     */
+    _jitc->function->prolog = jit_new_node_no_link(jit_code_prolog);
+    jit_link(_jitc->function->prolog);
+    _jitc->function->prolog->w.w = offset;
+    _jitc->function->epilog = jit_new_node_no_link(jit_code_epilog);
+    /* u:      label value
+     * v:      offset in blocks vector
+     * w:      offset in functions vector
+     */
+    _jitc->function->epilog->w.w = offset;
+
+    jit_regset_new(&_jitc->function->regset);
+}
+
+jit_int32_t
+_jit_allocai(jit_state_t *_jit, jit_int32_t length)
+{
+    assert(_jitc->function);
+    switch (length) {
+       case 0: case 1:                                         break;
+       case 2:         _jitc->function->self.aoff &= -2;       break;
+       case 3: case 4: _jitc->function->self.aoff &= -4;       break;
+       default:        _jitc->function->self.aoff &= -8;       break;
+    }
+    _jitc->function->self.aoff -= length;
+    if (!_jitc->realize) {
+       jit_inc_synth_ww(allocai, _jitc->function->self.aoff, length);
+       jit_dec_synth();
+    }
+    return (_jitc->function->self.aoff);
+}
+
+void
+_jit_allocar(jit_state_t *_jit, jit_int32_t u, jit_int32_t v)
+{
+    jit_int32_t                 reg;
+    assert(_jitc->function);
+    jit_inc_synth_ww(allocar, u, v);
+    if (!_jitc->function->allocar) {
+       _jitc->function->aoffoff = jit_allocai(sizeof(jit_int32_t));
+       _jitc->function->allocar = 1;
+    }
+    reg = jit_get_reg(jit_class_gpr);
+    jit_negr(reg, v);
+    jit_andi(reg, reg, -8);
+    jit_ldxi_i(u, JIT_FP, _jitc->function->aoffoff);
+    jit_addr(u, u, reg);
+    jit_addr(JIT_SP, JIT_SP, reg);
+    jit_stxi_i(_jitc->function->aoffoff, JIT_FP, u);
+    jit_unget_reg(reg);
+    jit_dec_synth();
+}
+
+void
+_jit_ret(jit_state_t *_jit)
+{
+    jit_node_t         *instr;
+    assert(_jitc->function);
+    jit_inc_synth(ret);
+    /* jump to epilog */
+    instr = jit_jmpi();
+    jit_patch_at(instr, _jitc->function->epilog);
+    jit_dec_synth();
+}
+
+void
+_jit_retr(jit_state_t *_jit, jit_int32_t u)
+{
+    jit_inc_synth_w(retr, u);
+    if (JIT_RET != u)
+       jit_movr(JIT_RET, u);
+    jit_live(JIT_RET);
+    jit_ret();
+    jit_dec_synth();
+}
+
+void
+_jit_reti(jit_state_t *_jit, jit_word_t u)
+{
+    jit_inc_synth_w(reti, u);
+    jit_movi(JIT_RET, u);
+    jit_ret();
+    jit_dec_synth();
+}
+
+void
+_jit_retr_f(jit_state_t *_jit, jit_int32_t u)
+{
+    jit_inc_synth_w(retr_f, u);
+    if (JIT_FRET != u)
+       jit_movr_f(JIT_FRET, u);
+    else
+       jit_live(JIT_FRET);
+    jit_ret();
+    jit_dec_synth();
+}
+
+void
+_jit_reti_f(jit_state_t *_jit, jit_float32_t u)
+{
+    jit_inc_synth_f(reti_f, u);
+    jit_movi_f(JIT_FRET, u);
+    jit_ret();
+    jit_dec_synth();
+}
+
+void
+_jit_retr_d(jit_state_t *_jit, jit_int32_t u)
+{
+    jit_inc_synth_w(retr_d, u);
+    if (JIT_FRET != u)
+       jit_movr_d(JIT_FRET, u);
+    else
+       jit_live(JIT_FRET);
+    jit_ret();
+    jit_dec_synth();
+}
+
+void
+_jit_reti_d(jit_state_t *_jit, jit_float64_t u)
+{
+    jit_inc_synth_d(reti_d, u);
+    jit_movi_d(JIT_FRET, u);
+    jit_ret();
+    jit_dec_synth();
+}
+
+void
+_jit_epilog(jit_state_t *_jit)
+{
+    assert(_jitc->function);
+    assert(_jitc->function->epilog->next == NULL);
+    jit_link(_jitc->function->epilog);
+    _jitc->function = NULL;
+}
+
+jit_bool_t
+_jit_arg_register_p(jit_state_t *_jit, jit_node_t *u)
+{
+    if (u->code == jit_code_arg)
+       return (jit_arg_reg_p(u->u.w));
+    assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d);
+#if NEW_ABI
+    return (jit_arg_reg_p(u->u.w));
+#else
+    return (u->u.w < 8);
+#endif
+}
+
+static jit_node_t *
+_jit_make_arg(jit_state_t *_jit, jit_node_t *node)
+{
+    jit_int32_t                 offset;
+#if NEW_ABI
+    if (jit_arg_reg_p(_jitc->function->self.argi))
+       offset = _jitc->function->self.argi++;
+    else {
+       offset = _jitc->function->self.size;
+       _jitc->function->self.size += STACK_SLOT;
+    }
+#else
+    offset = (_jitc->function->self.size - stack_framesize) >> STACK_SHIFT;
+    _jitc->function->self.argi = 1;
+    if (offset >= 4)
+       offset = _jitc->function->self.size;
+    _jitc->function->self.size += STACK_SLOT;
+#endif
+    if (node == (jit_node_t *)0)
+       node = jit_new_node(jit_code_arg);
+    else
+       link_node(node);
+    node->u.w = offset;
+    node->v.w = ++_jitc->function->self.argn;
+    jit_link_prolog();
+    return (node);
+}
+
+static jit_node_t *
+_jit_make_arg_f(jit_state_t *_jit, jit_node_t *node)
+{
+    jit_int32_t                 offset;
+#if NEW_ABI
+    if (jit_arg_reg_p(_jitc->function->self.argi)) {
+       offset = _jitc->function->self.argi++;
+       if (_jitc->function->self.call & jit_call_varargs)
+           offset += 8;
+    }
+    else {
+       offset = _jitc->function->self.size;
+       _jitc->function->self.size += STACK_SLOT;
+    }
+#else
+    offset = (_jitc->function->self.size - stack_framesize) >> STACK_SHIFT;
+    if (offset < NUM_WORD_ARGS) {
+       if (!_jitc->function->self.argi &&
+           !(_jitc->function->self.call & jit_call_varargs)) {
+           if (offset == 0)
+               offset = 4;
+           else {
+               offset = 6;
+               _jitc->function->self.argi = 1;
+           }
+           /* Use as flag to rewind in case of varargs function */
+           ++_jitc->function->self.argf;
+       }
+    }
+    else
+       offset = _jitc->function->self.size;
+    _jitc->function->self.size += STACK_SLOT;
+#endif
+    if (node == (jit_node_t *)0)
+       node = jit_new_node(jit_code_arg_f);
+    else
+       link_node(node);
+    node->u.w = offset;
+    node->v.w = ++_jitc->function->self.argn;
+    jit_link_prolog();
+    return (node);
+}
+
+static jit_node_t *
+_jit_make_arg_d(jit_state_t *_jit, jit_node_t *node)
+{
+    jit_int32_t                 offset;
+#if NEW_ABI
+    if (jit_arg_reg_p(_jitc->function->self.argi)) {
+       offset = _jitc->function->self.argi++;
+       if (_jitc->function->self.call & jit_call_varargs)
+           offset += 8;
+    }
+    else {
+       offset = _jitc->function->self.size;
+       _jitc->function->self.size += STACK_SLOT;
+    }
+#else
+    if (_jitc->function->self.size & 7) {
+       _jitc->function->self.size += 4;
+       _jitc->function->self.argi = 1;
+    }
+    offset = (_jitc->function->self.size - stack_framesize) >> STACK_SHIFT;
+    if (offset < NUM_WORD_ARGS) {
+       if (!_jitc->function->self.argi &&
+           !(_jitc->function->self.call & jit_call_varargs)) {
+           offset += 4;
+           /* Use as flag to rewind in case of varargs function */
+           ++_jitc->function->self.argf;
+       }
+    }
+    else
+       offset = _jitc->function->self.size;
+    _jitc->function->self.size += sizeof(jit_float64_t);
+#endif
+    if (node == (jit_node_t *)0)
+       node = jit_new_node(jit_code_arg_d);
+    else
+       link_node(node);
+    node->u.w = offset;
+    node->v.w = ++_jitc->function->self.argn;
+    jit_link_prolog();
+    return (node);
+}
+
+void
+_jit_ellipsis(jit_state_t *_jit)
+{
+    if (_jitc->prepare) {
+       assert(!(_jitc->function->call.call & jit_call_varargs));
+       _jitc->function->call.call |= jit_call_varargs;
+#if !NEW_ABI
+       if (_jitc->function->call.argf)
+           rewind_prepare();
+#endif
+    }
+    else {
+       assert(!(_jitc->function->self.call & jit_call_varargs));
+#if NEW_ABI
+       /* If varargs start in a register, allocate extra 64 bytes. */
+       if (jit_arg_reg_p(_jitc->function->self.argi))
+           rewind_prolog();
+       /* Do not set during possible rewind. */
+       _jitc->function->self.call |= jit_call_varargs;
+#else
+       _jitc->function->self.call |= jit_call_varargs;
+       if (_jitc->function->self.argf)
+           rewind_prolog();
+#endif
+       _jitc->function->vagp = _jitc->function->self.argi;
+    }
+    jit_inc_synth(ellipsis);
+    if (_jitc->prepare)
+       jit_link_prepare();
+    else
+       jit_link_prolog();
+    jit_dec_synth();
+}
+
+void
+_jit_va_push(jit_state_t *_jit, jit_int32_t u)
+{
+    jit_inc_synth_w(va_push, u);
+    jit_pushargr(u);
+    jit_dec_synth();
+}
+
+jit_node_t *
+_jit_arg(jit_state_t *_jit)
+{
+    assert(_jitc->function);
+    return (jit_make_arg((jit_node_t*)0));
+}
+
+jit_node_t *
+_jit_arg_f(jit_state_t *_jit)
+{
+    assert(_jitc->function);
+    return (jit_make_arg_f((jit_node_t*)0));
+}
+
+jit_node_t *
+_jit_arg_d(jit_state_t *_jit)
+{
+    assert(_jitc->function);
+    return (jit_make_arg_d((jit_node_t*)0));
+}
+
+void
+_jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_c, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_extr_c(u, _A0 - v->u.w);
+    else
+       jit_ldxi_c(u, _FP, v->u.w + C_DISP);
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_uc, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_extr_uc(u, _A0 - v->u.w);
+    else
+       jit_ldxi_uc(u, _FP, v->u.w + C_DISP);
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_s, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_extr_s(u, _A0 - v->u.w);
+    else
+       jit_ldxi_s(u, _FP, v->u.w + S_DISP);
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_us, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_extr_us(u, _A0 - v->u.w);
+    else
+       jit_ldxi_us(u, _FP, v->u.w + S_DISP);
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_i, u, v);
+    if (jit_arg_reg_p(v->u.w)) {
+#if __WORDSIZE == 64
+       jit_extr_i(u, _A0 - v->u.w);
+#else
+       jit_movr(u, _A0 - v->u.w);
+#endif
+    }
+    else
+       jit_ldxi_i(u, _FP, v->u.w + I_DISP);
+    jit_dec_synth();
+}
+
+#if __WORDSIZE == 64
+void
+_jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_ui, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_extr_ui(u, _A0 - v->u.w);
+    else
+       jit_ldxi_ui(u, _FP, v->u.w + I_DISP);
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_l, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_movr(u, _A0 - v->u.w);
+    else
+       jit_ldxi_l(u, _FP, v->u.w);
+    jit_dec_synth();
+}
+#endif
+
+void
+_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    jit_inc_synth_wp(putargr, u, v);
+    assert(v->code == jit_code_arg);
+    if (jit_arg_reg_p(v->u.w))
+       jit_movr(_A0 - v->u.w, u);
+    else
+       jit_stxi(v->u.w + WORD_ADJUST, _FP, u);
+    jit_dec_synth();
+}
+
+void
+_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v)
+{
+    jit_int32_t                regno;
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(putargi, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_movi(_A0 - v->u.w, u);
+    else {
+       regno = jit_get_reg(jit_class_gpr);
+       jit_movi(regno, u);
+       jit_stxi(v->u.w + WORD_ADJUST, _FP, regno);
+       jit_unget_reg(regno);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg_f);
+    jit_inc_synth_wp(getarg_f, u, v);
+#if NEW_ABI
+    if (jit_arg_reg_p(v->u.w))
+       jit_movr_f(u, _F12 - v->u.w);
+    else if (jit_arg_reg_p(v->u.w - 8))
+       jit_movr_w_f(u, _A0 - v->u.w - 8);
+#else
+    if (v->u.w < 4)
+       jit_movr_w_f(u, _A0 - v->u.w);
+    else if (v->u.w < 8)
+       jit_movr_f(u, _F12 - ((v->u.w - 4) >> 1));
+#endif
+    else
+       jit_ldxi_f(u, _FP, v->u.w);
+    jit_dec_synth();
+}
+
+void
+_jit_putargr_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg_f);
+    jit_inc_synth_wp(putargr_f, u, v);
+#if NEW_ABI
+    if (jit_arg_reg_p(v->u.w))
+       jit_movr_f(_F12 - v->u.w, u);
+    else if (jit_arg_reg_p(v->u.w - 8))
+       jit_movr_f_w(_A0 - v->u.w - 8, u);
+#else
+    if (v->u.w < 4)
+       jit_movr_f_w(_A0 - v->u.w, u);
+    else if (v->u.w < 8)
+       jit_movr_f(_F12 - ((v->u.w - 4) >> 1), u);
+#endif
+    else
+       jit_stxi_f(v->u.w, _FP, u);
+    jit_dec_synth();
+}
+
+void
+_jit_putargi_f(jit_state_t *_jit, jit_float32_t u, jit_node_t *v)
+{
+    jit_int32_t                regno;
+    assert(v->code == jit_code_arg_f);
+    jit_inc_synth_fp(putargi_f, u, v);
+#if NEW_ABI
+    if (jit_arg_reg_p(v->u.w))
+       jit_movi_f(_F12 - v->u.w, u);
+    else if (jit_arg_reg_p(v->u.w - 8)) {
+       regno = jit_get_reg(jit_class_fpr);
+       jit_movi_f(regno, u);
+       jit_movr_f_w(_A0 - v->u.w - 8, u);
+       jit_unget_reg(regno);
+    }
+#else
+    if (v->u.w < 4) {
+       regno = jit_get_reg(jit_class_fpr);
+       jit_movi_f(regno, u);
+       jit_movr_f_w(_A0 - ((v->u.w - 4) >> 1), regno);
+       jit_unget_reg(regno);
+    }
+    else if (v->u.w < 8)
+       jit_movi_f(_F12 - ((v->u.w - 4) >> 1), u);
+#endif
+    else {
+       regno = jit_get_reg(jit_class_fpr);
+       jit_movi_f(regno, u);
+       jit_stxi_f(v->u.w, _FP, regno);
+       jit_unget_reg(regno);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg_d);
+    jit_inc_synth_wp(getarg_d, u, v);
+#if NEW_ABI
+    if (jit_arg_reg_p(v->u.w))
+       jit_movr_d(u, _F12 - v->u.w);
+    else if (jit_arg_reg_p(v->u.w - 8))
+       jit_movr_d_w(_A0 - v->u.w - 8, u);
+#else
+    if (v->u.w < 4)
+       jit_movr_ww_d(u, _A0 - v->u.w, _A0 - (v->u.w + 1));
+    else if (v->u.w < 8)
+       jit_movr_d(u, _F12 - ((v->u.w - 4) >> 1));
+#endif
+    else
+       jit_ldxi_d(u, _FP, v->u.w);
+    jit_dec_synth();
+}
+
+void
+_jit_putargr_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg_d);
+    jit_inc_synth_wp(putargr_d, u, v);
+#if NEW_ABI
+    if (jit_arg_reg_p(v->u.w))
+       jit_movr_d(_F12 - v->u.w, u);
+    else if (jit_arg_reg_p(v->u.w - 8))
+       jit_movr_d_w(_A0 - v->u.w - 8, u);
+#else
+    if (v->u.w < 4)
+       jit_movr_d_ww(_A0 - v->u.w, _A0 - (v->u.w + 1), u);
+    else if (v->u.w < 8)
+       jit_movr_d(_F12 - ((v->u.w - 4) >> 1), u);
+#endif
+    else
+       jit_stxi_d(v->u.w, _FP, u);
+    jit_dec_synth();
+}
+
+void
+_jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v)
+{
+    jit_int32_t                regno;
+    assert(v->code == jit_code_arg_d);
+    jit_inc_synth_dp(putargi_d, u, v);
+#if NEW_ABI
+    if (jit_arg_reg_p(v->u.w))
+       jit_movi_d(_F12 - v->u.w, u);
+    else if (jit_arg_reg_p(v->u.w - 8)) {
+       regno = jit_get_reg(jit_class_fpr);
+       jit_movi_d(regno, u);
+       jit_movr_d_w(_A0 - v->u.w - 8, u);
+       jit_unget_reg(regno);
+    }
+#else
+    if (v->u.w < 4) {
+       regno = jit_get_reg(jit_class_fpr);
+       jit_movi_d(regno, u);
+       jit_movr_d_ww(_A0 - v->u.w, _A0 - (v->u.w + 1), regno);
+       jit_unget_reg(regno);
+    }
+    else if (v->u.w < 8)
+       jit_movi_d(_F12 - ((v->u.w - 4) >> 1), u);
+#endif
+    else {
+       regno = jit_get_reg(jit_class_fpr);
+       jit_movi_d(regno, u);
+       jit_stxi_d(v->u.w, _FP, regno);
+       jit_unget_reg(regno);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_pushargr(jit_state_t *_jit, jit_int32_t u)
+{
+    jit_inc_synth_w(pushargr, u);
+    jit_link_prepare();
+#if NEW_ABI
+    assert(_jitc->function);
+    if (jit_arg_reg_p(_jitc->function->call.argi)) {
+       jit_movr(_A0 - _jitc->function->call.argi, u);
+       ++_jitc->function->call.argi;
+    }
+    else {
+       jit_stxi(_jitc->function->call.size + WORD_ADJUST, JIT_SP, u);
+       _jitc->function->call.size += STACK_SLOT;
+    }
+#else
+    jit_word_t         offset;
+    assert(_jitc->function);
+    offset = _jitc->function->call.size >> STACK_SHIFT;
+    _jitc->function->call.argi = 1;
+    if (jit_arg_reg_p(offset))
+       jit_movr(_A0 - offset, u);
+    else
+       jit_stxi(_jitc->function->call.size, JIT_SP, u);
+    _jitc->function->call.size += STACK_SLOT;
+#endif
+    jit_dec_synth();
+}
+
+void
+_jit_pushargi(jit_state_t *_jit, jit_word_t u)
+{
+    jit_int32_t                regno;
+#if !NEW_ABI
+    jit_word_t         offset;
+#endif
+    assert(_jitc->function);
+    jit_inc_synth_w(pushargi, u);
+    jit_link_prepare();
+#if NEW_ABI
+    if (jit_arg_reg_p(_jitc->function->call.argi)) {
+       jit_movi(_A0 - _jitc->function->call.argi, u);
+       ++_jitc->function->call.argi;
+    }
+    else {
+       regno = jit_get_reg(jit_class_gpr);
+       jit_movi(regno, u);
+       jit_stxi(_jitc->function->call.size + WORD_ADJUST, JIT_SP, regno);
+       _jitc->function->call.size += STACK_SLOT;
+       jit_unget_reg(regno);
+    }
+#else
+    offset = _jitc->function->call.size >> STACK_SHIFT;
+    ++_jitc->function->call.argi;
+    if (jit_arg_reg_p(offset))
+       jit_movi(_A0 - offset, u);
+    else {
+       regno = jit_get_reg(jit_class_gpr);
+       jit_movi(regno, u);
+       jit_stxi(_jitc->function->call.size, JIT_SP, regno);
+       jit_unget_reg(regno);
+    }
+    _jitc->function->call.size += STACK_SLOT;
+#endif
+    jit_dec_synth();
+}
+
+void
+_jit_pushargr_f(jit_state_t *_jit, jit_int32_t u)
+{
+#if !NEW_ABI
+    jit_word_t         offset;
+#endif
+    assert(_jitc->function);
+    jit_inc_synth_w(pushargr_f, u);
+    jit_link_prepare();
+#if NEW_ABI
+    if (jit_arg_reg_p(_jitc->function->call.argi)) {
+       if (!(_jitc->function->call.call & jit_call_varargs))
+           jit_movr_f(_F12 - _jitc->function->call.argi, u);
+       else
+           jit_movr_f_w(_A0 - _jitc->function->call.argi, u);
+       ++_jitc->function->call.argi;
+    }
+    else {
+       jit_stxi_f(_jitc->function->call.size, JIT_SP, u);
+       _jitc->function->call.size += STACK_SLOT;
+    }
+#else
+    offset = _jitc->function->call.size >> STACK_SHIFT;
+    if (offset < 2 && !_jitc->function->call.argi &&
+       !(_jitc->function->call.call & jit_call_varargs)) {
+       ++_jitc->function->call.argf;
+       jit_movr_f(_F12 - offset, u);
+    }
+    else if (offset < 4) {
+       ++_jitc->function->call.argi;
+       jit_movr_f_w(_A0 - offset, u);
+    }
+    else
+       jit_stxi_f(_jitc->function->call.size, JIT_SP, u);
+    _jitc->function->call.size += STACK_SLOT;
+#endif
+    jit_dec_synth();
+}
+
+void
+_jit_pushargi_f(jit_state_t *_jit, jit_float32_t u)
+{
+    jit_int32_t                regno;
+#if !NEW_ABI
+    jit_word_t         offset;
+#endif
+    assert(_jitc->function);
+    jit_inc_synth_f(pushargi_f, u);
+    jit_link_prepare();
+#if NEW_ABI
+    if (jit_arg_reg_p(_jitc->function->call.argi)) {
+       if (!(_jitc->function->call.call & jit_call_varargs))
+           jit_movi_f(_F12 - _jitc->function->call.argi, u);
+       else
+           jit_movi_f_w(_A0 - _jitc->function->call.argi, u);
+       ++_jitc->function->call.argi;
+    }
+    else {
+       regno = jit_get_reg(jit_class_fpr);
+       jit_movi_f(regno, u);
+       jit_stxi_f(_jitc->function->call.size, JIT_SP, regno);
+       _jitc->function->call.size += STACK_SLOT;
+       jit_unget_reg(regno);
+    }
+#else
+    offset = _jitc->function->call.size >> STACK_SHIFT;
+    if (offset < 2 && !_jitc->function->call.argi &&
+       !(_jitc->function->call.call & jit_call_varargs)) {
+       ++_jitc->function->call.argf;
+       jit_movi_f(_F12 - offset, u);
+    }
+    else if (offset < 4) {
+       ++_jitc->function->call.argi;
+       jit_movi_f_w(_A0 - offset, u);
+    }
+    else {
+       regno = jit_get_reg(jit_class_fpr);
+       jit_movi_f(regno, u);
+       jit_stxi_f(_jitc->function->call.size, JIT_SP, regno);
+       jit_unget_reg(regno);
+    }
+    _jitc->function->call.size += STACK_SLOT;
+#endif
+    jit_dec_synth();
+}
+
+void
+_jit_pushargr_d(jit_state_t *_jit, jit_int32_t u)
+{
+#if !NEW_ABI
+    jit_bool_t         adjust;
+    jit_word_t         offset;
+#endif
+    assert(_jitc->function);
+    jit_inc_synth_w(pushargr_d, u);
+    jit_link_prepare();
+#if NEW_ABI
+    if (jit_arg_reg_p(_jitc->function->call.argi)) {
+       if (!(_jitc->function->call.call & jit_call_varargs))
+           jit_movr_d(_F12 - _jitc->function->call.argi, u);
+       else
+           jit_movr_d_w(_A0 - _jitc->function->call.argi, u);
+       ++_jitc->function->call.argi;
+    }
+    else {
+       jit_stxi_d(_jitc->function->call.size, JIT_SP, u);
+       _jitc->function->call.size += STACK_SLOT;
+    }
+#else
+    adjust = !!_jitc->function->call.argi;
+    if (_jitc->function->call.size & 7) {
+       _jitc->function->call.size += 4;
+       adjust = 1;
+    }
+    offset = _jitc->function->call.size >> STACK_SHIFT;
+    if (offset < 3) {
+       if (adjust || (_jitc->function->call.call & jit_call_varargs)) {
+           jit_movr_d_ww(_A0 - offset, _A0 - (offset + 1), u);
+           _jitc->function->call.argi += 2;
+       }
+       else {
+           jit_movr_d(_F12 - (offset >> 1), u);
+           ++_jitc->function->call.argf;
+       }
+    }
+    else
+       jit_stxi_d(_jitc->function->call.size, JIT_SP, u);
+    _jitc->function->call.size += sizeof(jit_float64_t);
+#endif
+    jit_dec_synth();
+}
+
+void
+_jit_pushargi_d(jit_state_t *_jit, jit_float64_t u)
+{
+    jit_int32_t                regno;
+#if !NEW_ABI
+    jit_bool_t         adjust;
+    jit_word_t         offset;
+#endif
+    assert(_jitc->function);
+    jit_inc_synth_d(pushargi_d, u);
+    jit_link_prepare();
+#if NEW_ABI
+    if (jit_arg_reg_p(_jitc->function->call.argi)) {
+       if (!(_jitc->function->call.call & jit_call_varargs))
+           jit_movi_d(_F12 - _jitc->function->call.argi, u);
+       else
+           jit_movi_d_w(_A0 - _jitc->function->call.argi, u);
+       ++_jitc->function->call.argi;
+    }
+    else {
+       regno = jit_get_reg(jit_class_fpr);
+       jit_movi_d(regno, u);
+       jit_stxi_d(_jitc->function->call.size, JIT_SP, regno);
+       _jitc->function->call.size += STACK_SLOT;
+       jit_unget_reg(regno);
+    }
+#else
+    adjust = !!_jitc->function->call.argi;
+    if (_jitc->function->call.size & 7) {
+       _jitc->function->call.size += 4;
+       adjust = 1;
+    }
+    offset = _jitc->function->call.size >> STACK_SHIFT;
+    if (offset < 3) {
+       if (adjust || (_jitc->function->call.call & jit_call_varargs)) {
+           jit_movi_d_ww(_A0 - offset, _A0 - (offset + 1), u);
+           _jitc->function->call.argi += 2;
+       }
+       else {
+           jit_movi_d(_F12 - (offset >> 1), u);
+           ++_jitc->function->call.argf;
+       }
+    }
+    else {
+       regno = jit_get_reg(jit_class_fpr);
+       jit_movi_d(regno, u);
+       jit_stxi_d(_jitc->function->call.size, JIT_SP, regno);
+       jit_unget_reg(regno);
+    }
+    _jitc->function->call.size += sizeof(jit_float64_t);
+#endif
+    jit_dec_synth();
+}
+
+jit_bool_t
+_jit_regarg_p(jit_state_t *_jit, jit_node_t *node, jit_int32_t regno)
+{
+    jit_int32_t                spec;
+
+    spec = jit_class(_rvs[regno].spec);
+    if (spec & jit_class_arg) {
+       if (spec & jit_class_gpr) {
+           regno = _A0 - regno;
+           if (regno >= 0 && regno < node->v.w)
+               return (1);
+       }
+       else if (spec & jit_class_fpr) {
+           regno = _F12 - regno;
+           if (regno >= 0 && regno < node->w.w)
+               return (1);
+       }
+    }
+
+    return (0);
+}
+
+void
+_jit_finishr(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_node_t         *call;
+    assert(_jitc->function);
+    jit_inc_synth_w(finishr, r0);
+    if (_jitc->function->self.alen < _jitc->function->call.size)
+       _jitc->function->self.alen = _jitc->function->call.size;
+    jit_movr(_T9, r0);
+    call = jit_callr(_T9);
+    call->v.w = _jitc->function->self.argi;
+#if NEW_ABI
+    call->w.w = call->v.w;
+#else
+    call->w.w = _jitc->function->self.argf;
+#endif
+    _jitc->function->call.argi = _jitc->function->call.argf =
+       _jitc->function->call.size = 0;
+    _jitc->prepare = 0;
+    jit_dec_synth();
+}
+
+jit_node_t *
+_jit_finishi(jit_state_t *_jit, jit_pointer_t i0)
+{
+    jit_node_t         *call;
+    jit_node_t         *node;
+    assert(_jitc->function);
+    jit_inc_synth_w(finishi, (jit_word_t)i0);
+    if (_jitc->function->self.alen < _jitc->function->call.size)
+       _jitc->function->self.alen = _jitc->function->call.size;
+    node = jit_movi(_T9, (jit_word_t)i0);
+    call = jit_callr(_T9);
+    call->v.w = _jitc->function->call.argi;
+#if NEW_ABI
+    call->w.w = call->v.w;
+#else
+    call->w.w = _jitc->function->call.argf;
+#endif
+    _jitc->function->call.argi = _jitc->function->call.argf =
+       _jitc->function->call.size = 0;
+    _jitc->prepare = 0;
+    jit_dec_synth();
+    return (node);
+}
+
+void
+_jit_retval_c(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_extr_c(r0, JIT_RET);
+}
+
+void
+_jit_retval_uc(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_extr_uc(r0, JIT_RET);
+}
+
+void
+_jit_retval_s(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_extr_s(r0, JIT_RET);
+}
+
+void
+_jit_retval_us(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_extr_us(r0, JIT_RET);
+}
+
+void
+_jit_retval_i(jit_state_t *_jit, jit_int32_t r0)
+{
+#if __WORDSIZE == 32
+    if (r0 != JIT_RET)
+       jit_movr(r0, JIT_RET);
+#else
+    jit_extr_i(r0, JIT_RET);
+#endif
+}
+
+#if __WORDSIZE == 64
+void
+_jit_retval_ui(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_extr_ui(r0, JIT_RET);
+}
+
+void
+_jit_retval_l(jit_state_t *_jit, jit_int32_t r0)
+{
+    if (r0 != JIT_RET)
+       jit_movr(r0, JIT_RET);
+}
+#endif
+
+void
+_jit_retval_f(jit_state_t *_jit, jit_int32_t r0)
+{
+    if (r0 != JIT_FRET)
+       jit_movr_f(r0, JIT_FRET);
+}
+
+void
+_jit_retval_d(jit_state_t *_jit, jit_int32_t r0)
+{
+    if (r0 != JIT_FRET)
+       jit_movr_d(r0, JIT_FRET);
+}
+
+jit_pointer_t
+_emit_code(jit_state_t *_jit)
+{
+    jit_node_t         *node;
+    jit_node_t         *temp;
+    jit_word_t          word;
+    jit_int32_t                 value;
+    jit_int32_t                 offset;
+    struct {
+       jit_node_t      *node;
+       jit_word_t       word;
+#if DEVEL_DISASSEMBLER
+       jit_word_t       prevw;
+#endif
+       jit_int32_t      patch_offset;
+    } undo;
+#if DEVEL_DISASSEMBLER
+    jit_word_t          prevw;
+#endif
+
+    _jitc->function = NULL;
+
+    jit_reglive_setup();
+
+    undo.word = 0;
+    undo.node = NULL;
+    undo.patch_offset = 0;
+#define case_rr(name, type)                                            \
+           case jit_code_##name##r##type:                              \
+               name##r##type(rn(node->u.w), rn(node->v.w));            \
+               break
+#define case_rw(name, type)                                            \
+           case jit_code_##name##i##type:                              \
+               name##i##type(rn(node->u.w), node->v.w);                \
+               break
+#define case_wr(name, type)                                            \
+           case jit_code_##name##i##type:                              \
+               name##i##type(node->u.w, rn(node->v.w));                \
+               break
+#define case_rrr(name, type)                                           \
+           case jit_code_##name##r##type:                              \
+               name##r##type(rn(node->u.w),                            \
+                             rn(node->v.w), rn(node->w.w));            \
+               break
+#define case_rrw(name, type)                                           \
+           case jit_code_##name##i##type:                              \
+               name##i##type(rn(node->u.w), rn(node->v.w), node->w.w); \
+               break
+#define case_rrrr(name, type)                                          \
+           case jit_code_##name##r##type:                              \
+               name##r##type(rn(node->u.q.l), rn(node->u.q.h),         \
+                             rn(node->v.w), rn(node->w.w));            \
+               break
+#define case_rrrw(name, type)                                          \
+           case jit_code_##name##i##type:                              \
+               name##i##type(rn(node->u.q.l), rn(node->u.q.h),         \
+                             rn(node->v.w), node->w.w);                \
+               break
+#define case_rrf(name, type, size)                                     \
+           case jit_code_##name##i##type:                              \
+               assert(node->flag & jit_flag_data);                     \
+               name##i##type(rn(node->u.w), rn(node->v.w),             \
+                             (jit_float##size##_t *)node->w.n->u.w);   \
+               break
+#define case_wrr(name, type)                                           \
+           case jit_code_##name##i##type:                              \
+               name##i##type(node->u.w, rn(node->v.w), rn(node->w.w)); \
+               break
+#define case_brr(name, type)                                           \
+           case jit_code_##name##r##type:                              \
+               temp = node->u.n;                                       \
+               assert(temp->code == jit_code_label ||                  \
+                      temp->code == jit_code_epilog);                  \
+               if (temp->flag & jit_flag_patch)                        \
+                   name##r##type(temp->u.w, rn(node->v.w),             \
+                                 rn(node->w.w));                       \
+               else {                                                  \
+                   word = name##r##type(_jit->pc.w,                    \
+                                        rn(node->v.w), rn(node->w.w)); \
+                   patch(word, node);                                  \
+               }                                                       \
+               break
+#define case_brw(name, type)                                           \
+           case jit_code_##name##i##type:                              \
+               temp = node->u.n;                                       \
+               assert(temp->code == jit_code_label ||                  \
+                      temp->code == jit_code_epilog);                  \
+               if (temp->flag & jit_flag_patch)                        \
+                   name##i##type(temp->u.w,                            \
+                                 rn(node->v.w), node->w.w);            \
+               else {                                                  \
+                   word = name##i##type(_jit->pc.w,                    \
+                                        rn(node->v.w), node->w.w);     \
+                   patch(word, node);                                  \
+               }                                                       \
+               break
+#define case_brf(name, type, size)                                     \
+           case jit_code_##name##i##type:                              \
+               temp = node->u.n;                                       \
+               assert(temp->code == jit_code_label ||                  \
+                      temp->code == jit_code_epilog);                  \
+               if (temp->flag & jit_flag_patch)                        \
+                   name##i##type(temp->u.w, rn(node->v.w),             \
+                               (jit_float##size##_t *)node->w.n->u.w); \
+               else {                                                  \
+                   word = name##i##type(_jit->pc.w, rn(node->v.w),     \
+                               (jit_float##size##_t *)node->w.n->u.w); \
+                   patch(word, node);                                  \
+               }                                                       \
+               break
+#if DEVEL_DISASSEMBLER
+    prevw = _jit->pc.w;
+#endif
+    for (node = _jitc->head; node; node = node->next) {
+       if (_jit->pc.uc >= _jitc->code.end)
+           return (NULL);
+
+#if DEVEL_DISASSEMBLER
+       node->offset = (jit_uword_t)_jit->pc.w - (jit_uword_t)prevw;
+       prevw = _jit->pc.w;
+#endif
+       value = jit_classify(node->code);
+       jit_regarg_set(node, value);
+       switch (node->code) {
+           case jit_code_align:
+               assert(!(node->u.w & (node->u.w - 1)) &&
+                      node->u.w <= sizeof(jit_word_t));
+               if (node->u.w == sizeof(jit_word_t) &&
+                   (word = _jit->pc.w & (sizeof(jit_word_t) - 1)))
+                   nop(sizeof(jit_word_t) - word);
+               break;
+           case jit_code_note:         case jit_code_name:
+               node->u.w = _jit->pc.w;
+               break;
+           case jit_code_label:
+               /* remember label is defined */
+               node->flag |= jit_flag_patch;
+               node->u.w = _jit->pc.w;
+               break;
+               case_rrr(add,);
+               case_rrw(add,);
+               case_rrr(addc,);
+               case_rrw(addc,);
+               case_rrr(addx,);
+               case_rrw(addx,);
+               case_rrr(sub,);
+               case_rrw(sub,);
+               case_rrr(subc,);
+               case_rrw(subc,);
+               case_rrr(subx,);
+               case_rrw(subx,);
+               case_rrw(rsb,);
+               case_rrr(mul,);
+               case_rrw(mul,);
+               case_rrrr(qmul,);
+               case_rrrw(qmul,);
+               case_rrrr(qmul, _u);
+               case_rrrw(qmul, _u);
+               case_rrr(div,);
+               case_rrw(div,);
+               case_rrr(div, _u);
+               case_rrw(div, _u);
+               case_rrrr(qdiv,);
+               case_rrrw(qdiv,);
+               case_rrrr(qdiv, _u);
+               case_rrrw(qdiv, _u);
+               case_rrr(rem,);
+               case_rrw(rem,);
+               case_rrr(rem, _u);
+               case_rrw(rem, _u);
+               case_rrr(lsh,);
+               case_rrw(lsh,);
+               case_rrr(rsh,);
+               case_rrw(rsh,);
+               case_rrr(rsh, _u);
+               case_rrw(rsh, _u);
+               case_rrr(and,);
+               case_rrw(and,);
+               case_rrr(or,);
+               case_rrw(or,);
+               case_rrr(xor,);
+               case_rrw(xor,);
+               case_rr(trunc, _f_i);
+               case_rr(trunc, _d_i);
+#if __WORDSIZE == 64
+               case_rr(trunc, _f_l);
+               case_rr(trunc, _d_l);
+#endif
+               case_rr(ld, _c);
+               case_rw(ld, _c);
+               case_rr(ld, _uc);
+               case_rw(ld, _uc);
+               case_rr(ld, _s);
+               case_rw(ld, _s);
+               case_rr(ld, _us);
+               case_rw(ld, _us);
+               case_rr(ld, _i);
+               case_rw(ld, _i);
+#if __WORDSIZE == 64
+               case_rr(ld, _ui);
+               case_rw(ld, _ui);
+               case_rr(ld, _l);
+               case_rw(ld, _l);
+#endif
+               case_rrr(ldx, _c);
+               case_rrw(ldx, _c);
+               case_rrr(ldx, _uc);
+               case_rrw(ldx, _uc);
+               case_rrr(ldx, _s);
+               case_rrw(ldx, _s);
+               case_rrr(ldx, _us);
+               case_rrw(ldx, _us);
+               case_rrr(ldx, _i);
+               case_rrw(ldx, _i);
+#if __WORDSIZE == 64
+               case_rrr(ldx, _ui);
+               case_rrw(ldx, _ui);
+               case_rrr(ldx, _l);
+               case_rrw(ldx, _l);
+#endif
+               case_rr(st, _c);
+               case_wr(st, _c);
+               case_rr(st, _s);
+               case_wr(st, _s);
+               case_rr(st, _i);
+               case_wr(st, _i);
+#if __WORDSIZE == 64
+               case_rr(st, _l);
+               case_wr(st, _l);
+#endif
+               case_rrr(stx, _c);
+               case_wrr(stx, _c);
+               case_rrr(stx, _s);
+               case_wrr(stx, _s);
+               case_rrr(stx, _i);
+               case_wrr(stx, _i);
+#if __WORDSIZE == 64
+               case_rrr(stx, _l);
+               case_wrr(stx, _l);
+#endif
+               case_rr(hton, _us);
+               case_rr(hton, _ui);
+#if __WORDSIZE == 64
+               case_rr(hton, _ul);
+#endif
+               case_rr(ext, _c);
+               case_rr(ext, _uc);
+               case_rr(ext, _s);
+               case_rr(ext, _us);
+#if __WORDSIZE == 64
+               case_rr(ext, _i);
+               case_rr(ext, _ui);
+#endif
+               case_rr(mov,);
+           case jit_code_movi:
+               if (node->flag & jit_flag_node) {
+                   temp = node->v.n;
+                   if (temp->code == jit_code_data ||
+                       (temp->code == jit_code_label &&
+                        (temp->flag & jit_flag_patch)))
+                       movi(rn(node->u.w), temp->u.w);
+                   else {
+                       assert(temp->code == jit_code_label ||
+                              temp->code == jit_code_epilog);
+                       word = movi_p(rn(node->u.w), node->v.w);
+                       patch(word, node);
+                   }
+               }
+               else
+                   movi(rn(node->u.w), node->v.w);
+               break;
+               case_rr(neg,);
+               case_rr(com,);
+               case_rrr(lt,);
+               case_rrw(lt,);
+               case_rrr(lt, _u);
+               case_rrw(lt, _u);
+               case_rrr(le,);
+               case_rrw(le,);
+               case_rrr(le, _u);
+               case_rrw(le, _u);
+               case_rrr(eq,);
+               case_rrw(eq,);
+               case_rrr(ge,);
+               case_rrw(ge,);
+               case_rrr(ge, _u);
+               case_rrw(ge, _u);
+               case_rrr(gt,);
+               case_rrw(gt,);
+               case_rrr(gt, _u);
+               case_rrw(gt, _u);
+               case_rrr(ne,);
+               case_rrw(ne,);
+               case_brr(blt,);
+               case_brw(blt,);
+               case_brr(blt, _u);
+               case_brw(blt, _u);
+               case_brr(ble,);
+               case_brw(ble,);
+               case_brr(ble, _u);
+               case_brw(ble, _u);
+               case_brr(beq,);
+               case_brw(beq,);
+               case_brr(bge,);
+               case_brw(bge,);
+               case_brr(bge, _u);
+               case_brw(bge, _u);
+               case_brr(bgt,);
+               case_brw(bgt,);
+               case_brr(bgt, _u);
+               case_brw(bgt, _u);
+               case_brr(bne,);
+               case_brw(bne,);
+               case_brr(boadd,);
+               case_brw(boadd,);
+               case_brr(boadd, _u);
+               case_brw(boadd, _u);
+               case_brr(bxadd,);
+               case_brw(bxadd,);
+               case_brr(bxadd, _u);
+               case_brw(bxadd, _u);
+               case_brr(bosub,);
+               case_brw(bosub,);
+               case_brr(bosub, _u);
+               case_brw(bosub, _u);
+               case_brr(bxsub,);
+               case_brw(bxsub,);
+               case_brr(bxsub, _u);
+               case_brw(bxsub, _u);
+               case_brr(bms,);
+               case_brw(bms,);
+               case_brr(bmc,);
+               case_brw(bmc,);
+               case_rrr(add, _f);
+               case_rrf(add, _f, 32);
+               case_rrr(sub, _f);
+               case_rrf(sub, _f, 32);
+               case_rrf(rsb, _f, 32);
+               case_rrr(mul, _f);
+               case_rrf(mul, _f, 32);
+               case_rrr(div, _f);
+               case_rrf(div, _f, 32);
+               case_rr(abs, _f);
+               case_rr(neg, _f);
+               case_rr(sqrt, _f);
+               case_rr(ext, _f);
+               case_rr(ld, _f);
+               case_rw(ld, _f);
+               case_rrr(ldx, _f);
+               case_rrw(ldx, _f);
+               case_rr(st, _f);
+               case_wr(st, _f);
+               case_rrr(stx, _f);
+               case_wrr(stx, _f);
+               case_rr(mov, _f);
+           case jit_code_movi_f:
+               assert(node->flag & jit_flag_data);
+               movi_f(rn(node->u.w), (jit_float32_t *)node->v.n->u.w);
+               break;
+               case_rr(ext, _d_f);
+               case_rrr(lt, _f);
+               case_rrf(lt, _f, 32);
+               case_rrr(le, _f);
+               case_rrf(le, _f, 32);
+               case_rrr(eq, _f);
+               case_rrf(eq, _f, 32);
+               case_rrr(ge, _f);
+               case_rrf(ge, _f, 32);
+               case_rrr(gt, _f);
+               case_rrf(gt, _f, 32);
+               case_rrr(ne, _f);
+               case_rrf(ne, _f, 32);
+               case_rrr(unlt, _f);
+               case_rrf(unlt, _f, 32);
+               case_rrr(unle, _f);
+               case_rrf(unle, _f, 32);
+               case_rrr(uneq, _f);
+               case_rrf(uneq, _f, 32);
+               case_rrr(unge, _f);
+               case_rrf(unge, _f, 32);
+               case_rrr(ungt, _f);
+               case_rrf(ungt, _f, 32);
+               case_rrr(ltgt, _f);
+               case_rrf(ltgt, _f, 32);
+               case_rrr(ord, _f);
+               case_rrf(ord, _f, 32);
+               case_rrr(unord, _f);
+               case_rrf(unord, _f, 32);
+               case_brr(blt, _f);
+               case_brf(blt, _f, 32);
+               case_brr(ble, _f);
+               case_brf(ble, _f, 32);
+               case_brr(beq, _f);
+               case_brf(beq, _f, 32);
+               case_brr(bge, _f);
+               case_brf(bge, _f, 32);
+               case_brr(bgt, _f);
+               case_brf(bgt, _f, 32);
+               case_brr(bne, _f);
+               case_brf(bne, _f, 32);
+               case_brr(bunlt, _f);
+               case_brf(bunlt, _f, 32);
+               case_brr(bunle, _f);
+               case_brf(bunle, _f, 32);
+               case_brr(buneq, _f);
+               case_brf(buneq, _f, 32);
+               case_brr(bunge, _f);
+               case_brf(bunge, _f, 32);
+               case_brr(bungt, _f);
+               case_brf(bungt, _f, 32);
+               case_brr(bltgt, _f);
+               case_brf(bltgt, _f, 32);
+               case_brr(bord, _f);
+               case_brf(bord, _f, 32);
+               case_brr(bunord, _f);
+               case_brf(bunord, _f, 32);
+               case_rrr(add, _d);
+               case_rrf(add, _d, 64);
+               case_rrr(sub, _d);
+               case_rrf(sub, _d, 64);
+               case_rrf(rsb, _d, 64);
+               case_rrr(mul, _d);
+               case_rrf(mul, _d, 64);
+               case_rrr(div, _d);
+               case_rrf(div, _d, 64);
+               case_rr(abs, _d);
+               case_rr(neg, _d);
+               case_rr(sqrt, _d);
+               case_rr(ext, _d);
+               case_rr(ld, _d);
+               case_rw(ld, _d);
+               case_rrr(ldx, _d);
+               case_rrw(ldx, _d);
+               case_rr(st, _d);
+               case_wr(st, _d);
+               case_rrr(stx, _d);
+               case_wrr(stx, _d);
+               case_rr(mov, _d);
+           case jit_code_movi_d:
+               assert(node->flag & jit_flag_data);
+               movi_d(rn(node->u.w), (jit_float64_t *)node->v.n->u.w);
+               break;
+               case_rr(ext, _f_d);
+               case_rrr(lt, _d);
+               case_rrf(lt, _d, 64);
+               case_rrr(le, _d);
+               case_rrf(le, _d, 64);
+               case_rrr(eq, _d);
+               case_rrf(eq, _d, 64);
+               case_rrr(ge, _d);
+               case_rrf(ge, _d, 64);
+               case_rrr(gt, _d);
+               case_rrf(gt, _d, 64);
+               case_rrr(ne, _d);
+               case_rrf(ne, _d, 64);
+               case_rrr(unlt, _d);
+               case_rrf(unlt, _d, 64);
+               case_rrr(unle, _d);
+               case_rrf(unle, _d, 64);
+               case_rrr(uneq, _d);
+               case_rrf(uneq, _d, 64);
+               case_rrr(unge, _d);
+               case_rrf(unge, _d, 64);
+               case_rrr(ungt, _d);
+               case_rrf(ungt, _d, 64);
+               case_rrr(ltgt, _d);
+               case_rrf(ltgt, _d, 64);
+               case_rrr(ord, _d);
+               case_rrf(ord, _d, 64);
+               case_rrr(unord, _d);
+               case_rrf(unord, _d, 64);
+               case_brr(blt, _d);
+               case_brf(blt, _d, 64);
+               case_brr(ble, _d);
+               case_brf(ble, _d, 64);
+               case_brr(beq, _d);
+               case_brf(beq, _d, 64);
+               case_brr(bge, _d);
+               case_brf(bge, _d, 64);
+               case_brr(bgt, _d);
+               case_brf(bgt, _d, 64);
+               case_brr(bne, _d);
+               case_brf(bne, _d, 64);
+               case_brr(bunlt, _d);
+               case_brf(bunlt, _d, 64);
+               case_brr(bunle, _d);
+               case_brf(bunle, _d, 64);
+               case_brr(buneq, _d);
+               case_brf(buneq, _d, 64);
+               case_brr(bunge, _d);
+               case_brf(bunge, _d, 64);
+               case_brr(bungt, _d);
+               case_brf(bungt, _d, 64);
+               case_brr(bltgt, _d);
+               case_brf(bltgt, _d, 64);
+               case_brr(bord, _d);
+               case_brf(bord, _d, 64);
+               case_brr(bunord, _d);
+               case_brf(bunord, _d, 64);
+           case jit_code_jmpr:
+               jmpr(rn(node->u.w));
+               break;
+           case jit_code_jmpi:
+               if (node->flag & jit_flag_node) {
+                   temp = node->u.n;
+                   assert(temp->code == jit_code_label ||
+                          temp->code == jit_code_epilog);
+                   if (temp->flag & jit_flag_patch)
+                       jmpi(temp->u.w);
+                   else {
+                       word = jmpi(_jit->pc.w);
+                       patch(word, node);
+                   }
+               }
+               else
+                   jmpi(node->u.w);
+               break;
+           case jit_code_callr:
+               callr(rn(node->u.w));
+               break;
+           case jit_code_calli:
+               if (node->flag & jit_flag_node) {
+                   temp = node->u.n;
+                   assert(temp->code == jit_code_label ||
+                          temp->code == jit_code_epilog);
+                   word = calli_p(temp->u.w);
+                   if (!(temp->flag & jit_flag_patch))
+                       patch(word, node);
+               }
+               else
+                   calli(node->u.w);
+               break;
+           case jit_code_prolog:
+               _jitc->function = _jitc->functions.ptr + node->w.w;
+               undo.node = node;
+               undo.word = _jit->pc.w;
+#if DEVEL_DISASSEMBLER
+               undo.prevw = prevw;
+#endif
+               undo.patch_offset = _jitc->patches.offset;
+           restart_function:
+               _jitc->again = 0;
+               prolog(node);
+               break;
+           case jit_code_epilog:
+               assert(_jitc->function == _jitc->functions.ptr + node->w.w);
+               if (_jitc->again) {
+                   for (temp = undo.node->next;
+                        temp != node; temp = temp->next) {
+                       if (temp->code == jit_code_label ||
+                           temp->code == jit_code_epilog)
+                           temp->flag &= ~jit_flag_patch;
+                   }
+                   temp->flag &= ~jit_flag_patch;
+                   node = undo.node;
+                   _jit->pc.w = undo.word;
+#if DEVEL_DISASSEMBLER
+                   prevw = undo.prevw;
+#endif
+                   _jitc->patches.offset = undo.patch_offset;
+                   goto restart_function;
+               }
+               /* remember label is defined */
+               node->flag |= jit_flag_patch;
+               node->u.w = _jit->pc.w;
+               epilog(node);
+               _jitc->function = NULL;
+               break;
+#if !NEW_ABI
+           case jit_code_movr_w_f:
+               movr_w_f(rn(node->u.w), rn(node->v.w));
+               break;
+#endif
+           case jit_code_movr_f_w:
+               movr_f_w(rn(node->u.w), rn(node->v.w));
+               break;
+           case jit_code_movi_f_w:
+               assert(node->flag & jit_flag_data);
+               movi_f_w(rn(node->u.w), (jit_float32_t *)node->v.n->u.w);
+               break;
+#if NEW_ABI
+           case jit_code_movr_d_w:
+               movr_d_w(rn(node->u.w), rn(node->v.w));
+               break;
+           case jit_code_movi_d_w:
+               assert(node->flag & jit_flag_data);
+               movi_d_w(rn(node->u.w), (jit_float64_t *)node->v.n->u.w);
+               break;
+#else
+           case jit_code_movr_ww_d:
+               movr_ww_d(rn(node->u.w), rn(node->v.w), rn(node->w.w));
+               break;
+           case jit_code_movr_d_ww:
+               movr_d_ww(rn(node->u.w), rn(node->v.w), rn(node->w.w));
+               break;
+           case jit_code_movi_d_ww:
+               assert(node->flag & jit_flag_data);
+               movi_d_ww(rn(node->u.w), rn(node->v.w),
+                         (jit_float64_t *)node->w.n->u.w);
+               break;
+#endif
+           case jit_code_va_start:
+               vastart(rn(node->u.w));
+               break;
+           case jit_code_va_arg:
+               vaarg(rn(node->u.w), rn(node->v.w));
+               break;
+           case jit_code_va_arg_d:
+               vaarg_d(rn(node->u.w), rn(node->v.w));
+               break;
+           case jit_code_live:
+           case jit_code_arg:                  case jit_code_ellipsis:
+           case jit_code_va_push:
+           case jit_code_allocai:              case jit_code_allocar:
+           case jit_code_arg_f:                case jit_code_arg_d:
+           case jit_code_va_end:
+           case jit_code_ret:
+           case jit_code_retr:                 case jit_code_reti:
+           case jit_code_retr_f:               case jit_code_reti_f:
+           case jit_code_retr_d:               case jit_code_reti_d:
+           case jit_code_getarg_c:             case jit_code_getarg_uc:
+           case jit_code_getarg_s:             case jit_code_getarg_us:
+           case jit_code_getarg_i:
+#if __WORDSIZE == 64
+           case jit_code_getarg_ui:            case jit_code_getarg_l:
+#endif
+           case jit_code_getarg_f:             case jit_code_getarg_d:
+           case jit_code_putargr:              case jit_code_putargi:
+           case jit_code_putargr_f:            case jit_code_putargi_f:
+           case jit_code_putargr_d:            case jit_code_putargi_d:
+           case jit_code_pushargr:             case jit_code_pushargi:
+           case jit_code_pushargr_f:           case jit_code_pushargi_f:
+           case jit_code_pushargr_d:           case jit_code_pushargi_d:
+           case jit_code_retval_c:             case jit_code_retval_uc:
+           case jit_code_retval_s:             case jit_code_retval_us:
+           case jit_code_retval_i:
+#if __WORDSIZE == 64
+           case jit_code_retval_ui:            case jit_code_retval_l:
+#endif
+           case jit_code_retval_f:             case jit_code_retval_d:
+           case jit_code_prepare:
+           case jit_code_finishr:              case jit_code_finishi:
+               break;
+           default:
+               abort();
+       }
+       if (jit_carry != _NOREG) {
+           switch (node->code) {
+               case jit_code_note:
+               case jit_code_addcr:            case jit_code_addci:
+               case jit_code_addxr:            case jit_code_addxi:
+               case jit_code_subcr:            case jit_code_subci:
+               case jit_code_subxr:            case jit_code_subxi:
+                   break;
+               default:
+                   jit_unget_reg(jit_carry);
+                   jit_carry = _NOREG;
+                   break;
+           }
+       }
+       jit_regarg_clr(node, value);
+       assert(_jitc->regarg == 0 ||
+              (jit_carry != _NOREG && _jitc->regarg == (1 << jit_carry)));
+       assert(_jitc->synth == 0);
+       /* update register live state */
+       jit_reglive(node);
+    }
+#undef case_brf
+#undef case_brw
+#undef case_brr
+#undef case_wrr
+#undef case_rrf
+#undef case_rrw
+#undef case_rrr
+#undef case_wr
+#undef case_rw
+#undef case_rr
+
+    for (offset = 0; offset < _jitc->patches.offset; offset++) {
+       node = _jitc->patches.ptr[offset].node;
+       word = node->code == jit_code_movi ? node->v.n->u.w : node->u.n->u.w;
+       patch_at(_jitc->patches.ptr[offset].inst, word);
+    }
+
+    jit_flush(_jit->code.ptr, _jit->pc.uc);
+
+    return (_jit->code.ptr);
+}
+
+#define CODE                           1
+#  include "jit_rewind.c"
+#  include "jit_mips-cpu.c"
+#  include "jit_mips-fpu.c"
+#undef CODE
+
+void
+jit_flush(void *fptr, void *tptr)
+{
+#if defined(__linux__)
+    jit_word_t         f, t, s;
+
+    s = sysconf(_SC_PAGE_SIZE);
+    f = (jit_word_t)fptr & -s;
+    t = (((jit_word_t)tptr) + s - 1) & -s;
+    _flush_cache((void *)f, t - f, ICACHE);
+#endif
+}
+
+void
+_emit_ldxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    ldxi(rn(r0), rn(r1), i0);
+}
+
+void
+_emit_stxi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    stxi(i0, rn(r0), rn(r1));
+}
+
+void
+_emit_ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    ldxi_d(rn(r0), rn(r1), i0);
+}
+
+void
+_emit_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    stxi_d(i0, rn(r0), rn(r1));
+}
+
+static void
+_patch(jit_state_t *_jit, jit_word_t instr, jit_node_t *node)
+{
+    jit_int32_t                 flag;
+
+    assert(node->flag & jit_flag_node);
+    if (node->code == jit_code_movi)
+       flag = node->v.n->flag;
+    else
+       flag = node->u.n->flag;
+    assert(!(flag & jit_flag_patch));
+    if (_jitc->patches.offset >= _jitc->patches.length) {
+       jit_realloc((jit_pointer_t *)&_jitc->patches.ptr,
+                   _jitc->patches.length * sizeof(jit_patch_t),
+                   (_jitc->patches.length + 1024) * sizeof(jit_patch_t));
+       _jitc->patches.length += 1024;
+    }
+    _jitc->patches.ptr[_jitc->patches.offset].inst = instr;
+    _jitc->patches.ptr[_jitc->patches.offset].node = node;
+    ++_jitc->patches.offset;
+}
diff --git a/deps/lightning/lib/jit_names.c b/deps/lightning/lib/jit_names.c
new file mode 100644 (file)
index 0000000..475bc96
--- /dev/null
@@ -0,0 +1,230 @@
+/*
+ * Copyright (C) 2014-2019  Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+static char *code_name[] = {
+    "data",
+    "live",            "align",
+    "save",            "load",
+    "#name",           "#note",
+    "label",
+    "prolog",
+    "ellipsis",                "va_push",
+    "allocai",         "allocar",
+    "arg",
+    "getarg_c",                "getarg_uc",
+    "getarg_s",                "getarg_us",
+    "getarg_i",                "getarg_ui",
+    "getarg_l",
+    "putargr",         "putargi",
+    "va_start",
+    "va_arg",          "va_arg_d",
+    "va_end",
+    "addr",            "addi",
+    "addcr",           "addci",
+    "addxr",           "addxi",
+    "subr",            "subi",
+    "subcr",           "subci",
+    "subxr",           "subxi",
+    "rsbi",
+    "mulr",            "muli",
+    "qmulr",           "qmuli",
+    "qmulr_u",         "qmuli_u",
+    "divr",            "divi",
+    "divr_u",          "divi_u",
+    "qdivr",           "qdivi",
+    "qdivr_u",         "qdivi_u",
+    "remr",            "remi",
+    "remr_u",          "remi_u",
+    "andr",            "andi",
+    "orr",             "ori",
+    "xorr",            "xori",
+    "lshr",            "lshi",
+    "rshr",            "rshi",
+    "rshr_u",          "rshi_u",
+    "negr",            "comr",
+    "ltr",             "lti",
+    "ltr_u",           "lti_u",
+    "ler",             "lei",
+    "ler_u",           "lei_u",
+    "eqr",             "eqi",
+    "ger",             "gei",
+    "ger_u",           "gei_u",
+    "gtr",             "gti",
+    "gtr_u",           "gti_u",
+    "ner",             "nei",
+    "movr",            "movi",
+    "extr_c",          "extr_uc",
+    "extr_s",          "extr_us",
+    "extr_i",          "extr_ui",
+    "htonr_us",
+    "htonr_ui",                "htonr_ul",
+    "ldr_c",           "ldi_c",
+    "ldr_uc",          "ldi_uc",
+    "ldr_s",           "ldi_s",
+    "ldr_us",          "ldi_us",
+    "ldr_i",           "ldi_i",
+    "ldr_ui",          "ldi_ui",
+    "ldr_l",           "ldi_l",
+    "ldxr_c",          "ldxi_c",
+    "ldxr_uc",         "ldxi_uc",
+    "ldxr_s",          "ldxi_s",
+    "ldxr_us",         "ldxi_us",
+    "ldxr_i",          "ldxi_i",
+    "ldxr_ui",         "ldxi_ui",
+    "ldxr_l",          "ldxi_l",
+    "str_c",           "sti_c",
+    "str_s",           "sti_s",
+    "str_i",           "sti_i",
+    "str_l",           "sti_l",
+    "stxr_c",          "stxi_c",
+    "stxr_s",          "stxi_s",
+    "stxr_i",          "stxi_i",
+    "stxr_l",          "stxi_l",
+    "bltr",            "blti",
+    "bltr_u",          "blti_u",
+    "bler",            "blei",
+    "bler_u",          "blei_u",
+    "beqr",            "beqi",
+    "bger",            "bgei",
+    "bger_u",          "bgei_u",
+    "bgtr",            "bgti",
+    "bgtr_u",          "bgti_u",
+    "bner",            "bnei",
+    "bmsr",            "bmsi",
+    "bmcr",            "bmci",
+    "boaddr",          "boaddi",
+    "boaddr_u",                "boaddi_u",
+    "bxaddr",          "bxaddi",
+    "bxaddr_u",                "bxaddi_u",
+    "bosubr",          "bosubi",
+    "bosubr_u",                "bosubi_u",
+    "bxsubr",          "bxsubi",
+    "bxsubr_u",                "bxsubi_u",
+    "jmpr",            "jmpi",
+    "callr",           "calli",
+    "prepare",
+    "pushargr",                "pushargi",
+    "finishr",         "finishi",
+    "ret",
+    "retr",            "reti",
+    "retval_c",                "retval_uc",
+    "retval_s",                "retval_us",
+    "retval_i",                "retval_ui",
+    "retval_l",
+    "epilog",
+    "arg_f",           "getarg_f",
+    "putargr_f",       "putargi_f",
+    "addr_f",          "addi_f",
+    "subr_f",          "subi_f",
+    "rsbi_f",
+    "mulr_f",          "muli_f",
+    "divr_f",          "divi_f",
+    "negr_f",          "absr_f",
+    "sqrtr_f",
+    "ltr_f",           "lti_f",
+    "ler_f",           "lei_f",
+    "eqr_f",           "eqi_f",
+    "ger_f",           "gei_f",
+    "gtr_f",           "gti_f",
+    "ner_f",           "nei_f",
+    "unltr_f",         "unlti_f",
+    "unler_f",         "unlei_f",
+    "uneqr_f",         "uneqi_f",
+    "unger_f",         "ungei_f",
+    "ungtr_f",         "ungti_f",
+    "ltgtr_f",         "ltgti_f",
+    "ordr_f",          "ordi_f",
+    "unordr_f",                "unordi_f",
+    "truncr_f_i",      "truncr_f_l",
+    "extr_f",          "extr_d_f",
+    "movr_f",          "movi_f",
+    "ldr_f",           "ldi_f",
+    "ldxr_f",          "ldxi_f",
+    "str_f",           "sti_f",
+    "stxr_f",          "stxi_f",
+    "bltr_f",          "blti_f",
+    "bler_f",          "blei_f",
+    "beqr_f",          "beqi_f",
+    "bger_f",          "bgei_f",
+    "bgtr_f",          "bgti_f",
+    "bner_f",          "bnei_f",
+    "bunltr_f",                "bunlti_f",
+    "bunler_f",                "bunlei_f",
+    "buneqr_f",                "buneqi_f",
+    "bunger_f",                "bungei_f",
+    "bungtr_f",                "bungti_f",
+    "bltgtr_f",                "bltgti_f",
+    "bordr_f",         "bordi_f",
+    "bunordr_f",       "bunordi_f",
+    "pushargr_f",      "pushargi_f",
+    "retr_f",          "reti_f",
+    "retval_f",
+    "arg_d",           "getarg_d",
+    "putargr_d",       "putargi_d",
+    "addr_d",          "addi_d",
+    "subr_d",          "subi_d",
+    "rsbi_d",
+    "mulr_d",          "muli_d",
+    "divr_d",          "divi_d",
+    "negr_d",          "absr_d",
+    "sqrtr_d",
+    "ltr_d",           "lti_d",
+    "ler_d",           "lei_d",
+    "eqr_d",           "eqi_d",
+    "ger_d",           "gei_d",
+    "gtr_d",           "gti_d",
+    "ner_d",           "nei_d",
+    "unltr_d",         "unlti_d",
+    "unler_d",         "unlei_d",
+    "uneqr_d",         "uneqi_d",
+    "unger_d",         "ungei_d",
+    "ungtr_d",         "ungti_d",
+    "ltgtr_d",         "ltgti_d",
+    "ordr_d",          "ordi_d",
+    "unordr_d",                "unordi_d",
+    "truncr_d_i",      "truncr_d_l",
+    "extr_d",          "extr_f_d",
+    "movr_d",          "movi_d",
+    "ldr_d",           "ldi_d",
+    "ldxr_d",          "ldxi_d",
+    "str_d",           "sti_d",
+    "stxr_d",          "stxi_d",
+    "bltr_d",          "blti_d",
+    "bler_d",          "blei_d",
+    "beqr_d",          "beqi_d",
+    "bger_d",          "bgei_d",
+    "bgtr_d",          "bgti_d",
+    "bner_d",          "bnei_d",
+    "bunltr_d",                "bunlti_d",
+    "bunler_d",                "bunlei_d",
+    "buneqr_d",                "buneqi_d",
+    "bunger_d",                "bungei_d",
+    "bungtr_d",                "bungti_d",
+    "bltgtr_d",                "bltgti_d",
+    "bordr_d",         "bordi_d",
+    "bunordr_d",       "bunordi_d",
+    "pushargr_d",      "pushargi_d",
+    "retr_d",          "reti_d",
+    "retval_d",
+    "movr_w_f",                "movr_ww_d",
+    "movr_w_d",
+    "movr_f_w",                "movi_f_w",
+    "movr_d_ww",       "movi_d_ww",
+    "movr_d_w",                "movi_d_w",
+};
diff --git a/deps/lightning/lib/jit_note.c b/deps/lightning/lib/jit_note.c
new file mode 100644 (file)
index 0000000..c79b818
--- /dev/null
@@ -0,0 +1,414 @@
+/*
+ * Copyright (C) 2013-2019  Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+#include <lightning.h>
+#include <lightning/jit_private.h>
+
+/*
+ * Prototypes
+ */
+#define new_note(u, v)         _new_note(_jit, u, v)
+static jit_note_t *_new_note(jit_state_t *, jit_uint8_t*, char*);
+static void new_line(jit_int32_t,jit_note_t*,char*,jit_int32_t,jit_int32_t);
+#define note_search_index(u)   _note_search_index(_jit, u)
+static jit_int32_t _note_search_index(jit_state_t*, jit_uint8_t*);
+static jit_int32_t line_insert_index(jit_note_t*,jit_int32_t);
+static jit_int32_t line_search_index(jit_note_t*,jit_int32_t);
+static jit_int32_t offset_insert_index(jit_line_t*,jit_int32_t);
+static jit_int32_t offset_search_index(jit_line_t*,jit_int32_t);
+
+/*
+ * Implementation
+ */
+void
+jit_init_note(void)
+{
+}
+
+void
+jit_finish_note(void)
+{
+}
+
+jit_node_t *
+_jit_name(jit_state_t *_jit, const char *name)
+{
+    jit_node_t         *node;
+
+    node = jit_new_node(jit_code_name);
+    if (name)
+       node->v.n = jit_data(name, strlen(name) + 1, 1);
+    else
+       node->v.p = NULL;
+    if (_jitc->note.head == NULL)
+       _jitc->note.head = _jitc->note.tail = node;
+    else {
+       _jitc->note.tail->link = node;
+       _jitc->note.tail = node;
+    }
+    ++_jit->note.length;
+    _jitc->note.size += sizeof(jit_note_t);
+    /* remember previous note is invalid due to name change */
+    _jitc->note.note = NULL;
+    return (_jitc->note.name = node);
+}
+
+jit_node_t *
+_jit_note(jit_state_t *_jit, const char *name, int line)
+{
+    jit_node_t         *node;
+
+    node = jit_new_node(jit_code_note);
+    if (name)
+       node->v.n = jit_data(name, strlen(name) + 1, 1);
+    else
+       node->v.p = NULL;
+    node->w.w = line;
+    if (_jitc->note.head == NULL)
+       _jitc->note.head = _jitc->note.tail = node;
+    else {
+       _jitc->note.tail->link = node;
+       _jitc->note.tail = node;
+    }
+    if (_jitc->note.note == NULL ||
+       (name == NULL && _jitc->note.note != NULL) ||
+       (name != NULL && _jitc->note.note == NULL) ||
+       (name != NULL && _jitc->note.note != NULL &&
+        strcmp(name, (char *)_jitc->data.ptr + _jitc->note.note->v.n->u.w)))
+       _jitc->note.size += sizeof(jit_line_t);
+    _jitc->note.size += sizeof(jit_int32_t) * 2;
+    return (_jitc->note.note = node);
+}
+
+void
+_jit_annotate(jit_state_t *_jit)
+{
+    jit_node_t         *node;
+    jit_note_t         *note;
+    jit_line_t         *line;
+    jit_word_t          length;
+    jit_word_t          note_offset;
+    jit_word_t          line_offset;
+
+    /* initialize pointers in mmaped data area */
+    _jit->note.ptr = (jit_note_t *)_jitc->note.base;
+    _jit->note.length = 0;
+
+    note = NULL;
+    for (node = _jitc->note.head; node; node = node->link) {
+       if (node->code == jit_code_name)
+           note = new_note(node->u.p, node->v.p ? node->v.n->u.p : NULL);
+       else if (node->v.p) {
+           if (note == NULL)
+               note = new_note(node->u.p, NULL);
+           jit_set_note(note, node->v.n->u.p, node->w.w,
+                        (jit_uint8_t *)node->u.p - note->code);
+       }
+    }
+    /* last note */
+    if (note)
+       note->size = _jit->pc.uc - note->code;
+
+    /* annotations may be very complex with conditions to extend
+     * or ignore redundant notes, as well as add entries to earlier
+     * notes, so, relocate the information to the data buffer,
+     * with likely over allocated reserved space */
+
+    /* relocate jit_line_t objects */
+    for (note_offset = 0; note_offset < _jit->note.length; note_offset++) {
+       note = _jit->note.ptr + note_offset;
+       if ((length = sizeof(jit_line_t) * note->length) == 0)
+           continue;
+       assert(_jitc->note.base + length < _jit->data.ptr + _jit->data.length);
+       jit_memcpy(_jitc->note.base, note->lines, length);
+       jit_free((jit_pointer_t *)&note->lines);
+       note->lines = (jit_line_t *)_jitc->note.base;
+       _jitc->note.base += length;
+    }
+
+    /* relocate offset and line number information */
+    for (note_offset = 0; note_offset < _jit->note.length; note_offset++) {
+       note = _jit->note.ptr + note_offset;
+       for (line_offset = 0; line_offset < note->length; line_offset++) {
+           line = note->lines + line_offset;
+           length = sizeof(jit_int32_t) * line->length;
+           assert(_jitc->note.base + length <
+                  _jit->data.ptr + _jit->data.length);
+           jit_memcpy(_jitc->note.base, line->linenos, length);
+           jit_free((jit_pointer_t *)&line->linenos);
+           line->linenos = (jit_int32_t *)_jitc->note.base;
+           _jitc->note.base += length;
+           assert(_jitc->note.base + length <
+                  _jit->data.ptr + _jit->data.length);
+           jit_memcpy(_jitc->note.base, line->offsets, length);
+           jit_free((jit_pointer_t *)&line->offsets);
+           line->offsets = (jit_int32_t *)_jitc->note.base;
+           _jitc->note.base += length;
+       }
+    }
+}
+
+void
+_jit_set_note(jit_state_t *_jit, jit_note_t *note,
+             char *file, int lineno, jit_int32_t offset)
+{
+    jit_line_t         *line;
+    jit_int32_t                 index;
+
+    index = line_insert_index(note, offset);
+    if (note->length && index == note->length &&
+       note->lines[index - 1].file == file)
+       --index;
+    if (index >= note->length || note->lines[index].file != file)
+       new_line(index, note, file, lineno, offset);
+    else {
+       line = note->lines + index;
+       index = offset_insert_index(line, offset);
+       if (index < line->length && line->offsets[index] == offset) {
+           /* common case if no code was generated for several source lines */
+           if (line->linenos[index] < lineno)
+               line->linenos[index] = lineno;
+       }
+       else if (index < line->length && line->linenos[index] == lineno) {
+           /* common case of extending entry */
+           if (line->offsets[index] > offset)
+               line->offsets[index] = offset;
+       }
+       else {
+           /* line or offset changed */
+           if ((line->length & 15) == 0) {
+               jit_realloc((jit_pointer_t *)&line->linenos,
+                           line->length * sizeof(jit_int32_t),
+                           (line->length + 17) * sizeof(jit_int32_t));
+               jit_realloc((jit_pointer_t *)&line->offsets,
+                           line->length * sizeof(jit_int32_t),
+                           (line->length + 17) * sizeof(jit_int32_t));
+           }
+           if (index < note->length) {
+               jit_memmove(line->linenos + index + 1, line->linenos + index,
+                           sizeof(jit_int32_t) * (line->length - index));
+               jit_memmove(line->offsets + index + 1, line->offsets + index,
+                           sizeof(jit_int32_t) * (line->length - index));
+           }
+           line->linenos[index] = lineno;
+           line->offsets[index] = offset;
+           ++line->length;
+       }
+    }
+}
+
+jit_bool_t
+_jit_get_note(jit_state_t *_jit, jit_pointer_t code,
+             char **name, char **file, jit_int32_t *lineno)
+{
+    jit_note_t         *note;
+    jit_line_t         *line;
+    jit_int32_t                 index;
+    jit_int32_t                 offset;
+
+    if ((index = note_search_index((jit_uint8_t *)code)) >= _jit->note.length)
+       return (0);
+    note = _jit->note.ptr + index;
+    if ((jit_uint8_t *)code < note->code ||
+       (jit_uint8_t *)code >= note->code + note->size)
+       return (0);
+    offset = (jit_uint8_t *)code - note->code;
+    if ((index = line_search_index(note, offset)) >= note->length)
+       return (0);
+    if (index == 0 && offset < note->lines[0].offsets[0])
+       return (0);
+    line = note->lines + index;
+    if ((index = offset_search_index(line, offset)) >= line->length)
+       return (0);
+
+    if (name)
+       *name = note->name;
+    if (file)
+       *file = line->file;
+    if (lineno)
+       *lineno = line->linenos[index];
+
+    return (1);
+}
+
+static jit_note_t *
+_new_note(jit_state_t *_jit, jit_uint8_t *code, char *name)
+{
+    jit_note_t         *note;
+    jit_note_t         *prev;
+
+    if (_jit->note.length) {
+       prev = _jit->note.ptr + _jit->note.length - 1;
+       assert(code >= prev->code);
+       prev->size = code - prev->code;
+    }
+    note = (jit_note_t *)_jitc->note.base;
+    _jitc->note.base += sizeof(jit_note_t);
+    ++_jit->note.length;
+    note->code = code;
+    note->name = name;
+
+    return (note);
+}
+
+static void
+new_line(jit_int32_t index, jit_note_t *note,
+         char *file, jit_int32_t lineno, jit_int32_t offset)
+{
+    jit_line_t         *line;
+
+    if (note->lines == NULL)
+       jit_alloc((jit_pointer_t *)&note->lines, 16 * sizeof(jit_line_t));
+    else if ((note->length & 15) == 15)
+       jit_realloc((jit_pointer_t *)&note->lines,
+                   note->length * sizeof(jit_line_t),
+                   (note->length + 17) * sizeof(jit_line_t));
+
+    if (index < note->length)
+       jit_memmove(note->lines + index + 1, note->lines + index,
+                   sizeof(jit_line_t) * (note->length - index));
+    line = note->lines + index;
+    ++note->length;
+
+    line->file = file;
+    line->length = 1;
+    jit_alloc((jit_pointer_t *)&line->linenos, 16 * sizeof(jit_int32_t));
+    line->linenos[0] = lineno;
+    jit_alloc((jit_pointer_t *)&line->offsets, 16 * sizeof(jit_int32_t));
+    line->offsets[0] = offset;
+}
+
+static jit_int32_t
+_note_search_index(jit_state_t *_jit, jit_uint8_t *code)
+{
+    jit_int32_t                 bot;
+    jit_int32_t                 top;
+    jit_int32_t                 index;
+    jit_note_t         *notes;
+
+    bot = 0;
+    top = _jit->note.length;
+    notes = _jit->note.ptr;
+    for (index = (bot + top) >> 1; bot < top; index = (bot + top) >> 1) {
+       if (code < notes[index].code)
+           top = index;
+       else if (code >= notes[index].code &&
+                code - notes[index].code < notes[index].size)
+           break;
+       else
+           bot = index + 1;
+    }
+
+    return (index);
+}
+
+static jit_int32_t
+line_insert_index(jit_note_t *note, jit_int32_t offset)
+{
+    jit_int32_t                 bot;
+    jit_int32_t                 top;
+    jit_int32_t                 index;
+    jit_line_t         *lines;
+
+    bot = 0;
+    top = note->length;
+    if ((lines = note->lines) == NULL)
+       return (0);
+    for (index = (bot + top) >> 1; bot < top; index = (bot + top) >> 1) {
+       if (offset < *lines[index].offsets)
+           top = index;
+       else
+           bot = index + 1;
+    }
+
+    return ((bot + top) >> 1);
+}
+
+static jit_int32_t
+line_search_index(jit_note_t *note, jit_int32_t offset)
+{
+    jit_int32_t                 bot;
+    jit_int32_t                 top;
+    jit_int32_t                 index;
+    jit_line_t         *lines;
+
+    bot = 0;
+    top = note->length;
+    if ((lines = note->lines) == NULL)
+       return (0);
+    for (index = (bot + top) >> 1; bot < top; index = (bot + top) >> 1) {
+       if (offset < *lines[index].offsets)
+           top = index;
+       /* offset should be already verified to be in range */
+       else if (index == note->length - 1 ||
+                (offset >= *lines[index].offsets &&
+                 offset < *lines[index + 1].offsets))
+           break;
+       else
+           bot = index + 1;
+    }
+
+    return (index);
+}
+
+static jit_int32_t
+offset_insert_index(jit_line_t *line, jit_int32_t offset)
+{
+    jit_int32_t                 bot;
+    jit_int32_t                 top;
+    jit_int32_t                 index;
+    jit_int32_t                *offsets;
+
+    bot = 0;
+    top = line->length;
+    offsets = line->offsets;
+    for (index = (bot + top) >> 1; bot < top; index = (bot + top) >> 1) {
+       if (offset < offsets[index])
+           top = index;
+       else
+           bot = index + 1;
+    }
+
+    return ((bot + top) >> 1);
+}
+
+static jit_int32_t
+offset_search_index(jit_line_t *line, jit_int32_t offset)
+{
+    jit_int32_t                 bot;
+    jit_int32_t                 top;
+    jit_int32_t                 index;
+    jit_int32_t                *offsets;
+
+    bot = 0;
+    top = line->length;
+    offsets = line->offsets;
+    for (index = (bot + top) >> 1; bot < top; index = (bot + top) >> 1) {
+       if (offset < offsets[index])
+           top = index;
+       /* offset should be already verified to be in range */
+       else if (index == line->length - 1 ||
+                (offset >= offsets[index] && offset < offsets[index + 1]))
+           break;
+       else
+           bot = index + 1;
+    }
+
+    return (index);
+}
diff --git a/deps/lightning/lib/jit_ppc-cpu.c b/deps/lightning/lib/jit_ppc-cpu.c
new file mode 100644 (file)
index 0000000..c4397ad
--- /dev/null
@@ -0,0 +1,3654 @@
+/*
+ * Copyright (C) 2012-2019  Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+#if PROTO
+#  if __WORDSIZE == 32
+#    define gpr_save_area              72      /* r14~r31 = 18 * 4 */
+#    if _CALL_SYSV
+#      define params_offset            (sizeof(jit_word_t) << 1)
+#    else
+#      define params_offset            24
+#    endif
+#    define can_sign_extend_int_p(im)  1
+#    define can_zero_extend_int_p(im)  1
+#    define fits_uint32_p(im)          1
+#  else
+#    define gpr_save_area              144     /* r14~r31 = 18 * 8 */
+#    if _CALL_ELF == 2
+#      define params_offset            32
+#    else
+#      define params_offset            48
+#    endif
+#    define can_sign_extend_int_p(im)                                  \
+       (((im) >= 0 && (long)(im) <=  0x7fffffffL) ||                   \
+        ((im) <  0 && (long)(im) >= -0x80000000L))
+#    define can_zero_extend_int_p(im)                                  \
+       ((im) >= 0 && (im) < 0x80000000L)
+#    define fits_uint32_p(im)          ((im & 0xffffffff00000000L) == 0)
+#  endif
+#  define fpr_save_area                        64
+#  define alloca_offset                        -(gpr_save_area + fpr_save_area)
+#  define ii(i)                                *_jit->pc.ui++ = i
+#  if __WORDSIZE == 32
+#    define iw(i)                      *_jit->pc.ui++ = i
+#  else
+#    define iw(i)                      *_jit->pc.ul++ = i
+#  endif
+#  define can_sign_extend_short_p(im)  ((im) >= -32768 && (im) <= 32767)
+#  define can_zero_extend_short_p(im)  ((im) >= 0 && (im) <= 65535)
+#  define can_sign_extend_jump_p(im)   ((im) >= -33554432 && (im) <= 33554431)
+#  define _R0_REGNO                    0
+#  define _SP_REGNO                    1
+#  define _R2_REGNO                    2
+#  define _R11_REGNO                   11
+#  define _R12_REGNO                   12
+#  define _FP_REGNO                    31
+#  if __WORDSIZE == 32
+#    define ldr(r0,r1)                 ldr_i(r0,r1)
+#    define ldxi(r0,r1,i0)             ldxi_i(r0,r1,i0)
+#    define ldxr(r0,r1,r2)             ldxr_i(r0,r1,r2)
+#    define stxi(i0,r0,r1)             stxi_i(i0,r0,r1)
+#    define stxr(r0,r1,r2)             stxr_i(r0,r1,r2)
+#  else
+#    define ldr(r0,r1)                 ldr_l(r0,r1)
+#    define ldxi(r0,r1,i0)             ldxi_l(r0,r1,i0)
+#    define ldxr(r0,r1,r2)             ldxr_l(r0,r1,r2)
+#    define stxi(i0,r0,r1)             stxi_l(i0,r0,r1)
+#    define stxr(r0,r1,r2)             stxr_l(r0,r1,r2)
+#  endif
+#  define FXO(o,d,a,b,e,x)             _FXO(_jit,o,d,a,b,e,x,0)
+#  define FXO_(o,d,a,b,e,x)            _FXO(_jit,o,d,a,b,e,x,1)
+static void _FXO(jit_state_t*,int,int,int,int,int,int,int);
+#  define FDs(o,d,a,s)                 _FDs(_jit,o,d,a,s)
+static void _FDs(jit_state_t*,int,int,int,int);
+#  define FDu(o,d,a,s)                 _FDu(_jit,o,d,a,s)
+static void _FDu(jit_state_t*,int,int,int,int);
+#  define FX(o,d,a,b,x)                        _FX(_jit,o,d,a,b,x,0)
+#  define FX_(o,d,a,b,x)               _FX(_jit,o,d,a,b,x,1)
+static void _FX(jit_state_t*,int,int,int,int,int,int);
+#  define FI(o,t,a,k)                  _FI(_jit,o,t,a,k)
+static void _FI(jit_state_t*,int,int,int,int);
+#  define FB(o,bo,bi,t,a,k)            _FB(_jit,o,bo,bi,t,a,k)
+static void _FB(jit_state_t*,int,int,int,int,int,int);
+#  define FXL(o,bo,bi,x)               _FXL(_jit,o,bo,bi,x,0)
+#  define FXL_(o,bo,bi,x)              _FXL(_jit,o,bo,bi,x,1)
+static void _FXL(jit_state_t*,int,int,int,int,int);
+#  define FC(o,d,l,a,b,x)              _FC(_jit,o,d,l,a,b,x)
+static void _FC(jit_state_t*,int,int,int,int,int,int);
+#  define FCI(o,d,l,a,s)               _FCI(_jit,o,d,l,a,s)
+static void _FCI(jit_state_t*,int,int,int,int,int);
+#  define FXFX(o,s,x,f)                        _FXFX(_jit,o,s,x,f)
+static void _FXFX(jit_state_t*,int,int,int,int);
+#  define FM(o,s,a,h,b,e,r)            _FM(_jit,o,s,a,h,b,e,r)
+static void _FM(jit_state_t*,int,int,int,int,int,int,int);
+#  if __WORDSIZE == 64
+#    define FMDS(o,s,a,b,e,x)          _FMDS(_jit,o,s,a,b,e,x,0)
+#    define FMDS_(o,s,a,b,e,x)         _FMDS(_jit,o,s,a,b,e,x,1)
+static void _FMDS(jit_state_t*,int,int,int,int,int,int,int);
+#    define FMD(o,s,a,h,b,x,i)         _FMD(_jit,o,s,a,h,b,x,i,0)
+#    define FMD_(o,s,a,h,b,x,i)                _FMD(_jit,o,s,a,h,b,x,i,1)
+static void _FMD(jit_state_t*,int,int,int,int,int,int,int,int);
+#  define FXS(o,d,a,h,x,i)             _FXS(_jit,o,d,a,h,x,i,0)
+#  define FXS_(o,d,a,h,x,i)            _FXS(_jit,o,d,a,h,x,i,1)
+static void _FXS(jit_state_t*,int,int,int,int,int,int,int);
+#  endif
+#  define CR_0                         0
+#  define CR_1                         1
+#  define CR_2                         2
+#  define CR_3                         3
+#  define CR_4                         4
+#  define CR_5                         5
+#  define CR_6                         6
+#  define CR_7                         7
+#  define CR_LT                                0
+#  define CR_GT                                1
+#  define CR_EQ                                2
+#  define CR_SO                                3
+#  define CR_UN                                3
+#  define BCC_F                                4
+#  define BCC_T                                12
+#  define ADD(d,a,b)                   FXO(31,d,a,b,0,266)
+#  define ADD_(d,a,b)                  FXO_(31,d,a,b,0,266)
+#  define ADDO(d,a,b)                  FXO(31,d,a,b,1,266)
+#  define ADDO_(d,a,b)                 FXO_(31,d,a,b,1,266)
+#  define ADDC(d,a,b)                  FXO_(31,d,a,b,0,10)
+#  define ADDC_(d,a,b)                 FXO_(31,d,a,b,0,10)
+#  define ADDCO(d,a,b)                 FXO(31,d,a,b,1,10)
+#  define ADDCO_(d,a,b)                        FXO_(31,d,a,b,1,10)
+#  define ADDE(d,a,b)                  FXO(31,d,a,b,0,138)
+#  define ADDE_(d,a,b)                 FXO_(31,d,a,b,0,138)
+#  define ADDEO(d,a,b)                 FXO(31,d,a,b,1,138)
+#  define ADDEO_(d,a,b)                        FXO_(31,d,a,b,1,138)
+#  define ADDI(d,a,s)                  FDs(14,d,a,s)
+#  define ADDIC(d,a,s)                 FDs(12,d,a,s)
+#  define ADDIC_(d,a,s)                        FDs(13,d,a,s)
+#  define ADDIS(d,a,s)                 FDs(15,d,a,s)
+#  define LIS(d,s)                     ADDIS(d,0,s)
+#  define ADDME(d,a)                   FXO(31,d,a,0,0,234)
+#  define ADDME_(d,a)                  FXO_(31,d,a,0,0,234)
+#  define ADDMEO(d,a)                  FXO(31,d,a,0,1,234)
+#  define ADDMEO_(d,a)                 FXO_(31,d,a,0,1,234)
+#  define ADDZE(d,a)                   FXO(31,d,a,0,0,202)
+#  define ADDZE_(d,a)                  FXO_(31,d,a,0,0,202)
+#  define ADDZEO(d,a)                  FXO(31,d,a,0,1,202)
+#  define ADDZEO_(d,a)                 FXO_(31,d,a,0,1,202)
+#  define AND(d,a,b)                   FX(31,a,d,b,28)
+#  define ANDC(d,a,b)                  FXO(31,a,d,b,0,60)
+#  define ANDC_(d,a,b)                 FXO_(31,a,d,b,0,60)
+#  define AND_(d,a,b)                  FX_(31,a,b,d,28)
+#  define ANDI_(d,a,u)                 FDu(28,a,d,u)
+#  define ANDIS_(d,a,u)                        FDu(29,a,d,u)
+#  define B(t)                         FI(18,t,0,0)
+#  define BA(t)                                FI(18,t,1,0)
+#  define BL(t)                                FI(18,t,0,1)
+#  define BLA(t)                       FI(18,t,1,1)
+#  define BC(o,i,t)                    FB(16,o,i,t,0,0)
+#  define BCA(o,i,t)                   FB(16,o,i,t,1,0)
+#  define BCL(o,i,t)                   FB(16,o,i,t,0,1)
+#  define BCLA(o,i,t)                  FB(16,o,i,t,1,1)
+#  define BLT(t)                       BC(BCC_T,CR_LT,t)
+#  define BLE(t)                       BC(BCC_F,CR_GT,t)
+#  define BEQ(t)                       BC(BCC_T,CR_EQ,t)
+#  define BGE(t)                       BC(BCC_F,CR_LT,t)
+#  define BGT(t)                       BC(BCC_T,CR_GT,t)
+#  define BNE(t)                       BC(BCC_F,CR_EQ,t)
+#  define BUN(t)                       BC(BCC_T,CR_UN,t)
+#  define BNU(t)                       BC(BCC_F,CR_UN,t)
+#  define BCCTR(o,i)                   FXL(19,o,i,528)
+#  define BCCTRL(o,i)                  FXL_(19,o,i,528)
+#  define BLTCTR()                     BCCTR(BCC_T,CR_LT)
+#  define BLECTR()                     BCCTR(BCC_F,CR_GT)
+#  define BEQCTR()                     BCCTR(BCC_T,CR_EQ)
+#  define BGECTR()                     BCCTR(BCC_F,CR_LT)
+#  define BGTCTR()                     BCCTR(BCC_T,CR_GT)
+#  define BNECTR()                     BCCTR(BCC_F,CR_EQ)
+#  define BCTR()                       BCCTR(20,0)
+#  define BCTRL()                      BCCTRL(20,0)
+#  define BCLR(o,i)                    FXL(19,o,i,16)
+#  define BCLRL(o,i)                   FXL_(19,o,i,16)
+#  define BLTLR()                      BCLR(BCC_T,CR_LT)
+#  define BLELR()                      BCLR(BCC_F,CR_GT)
+#  define BEQLR()                      BCLR(BCC_T,CR_EQ)
+#  define BGELR()                      BCLR(BCC_F,CR_LT)
+#  define BGTLR()                      BCLR(BCC_T,CR_GT)
+#  define BNELR()                      BCLR(BCC_F,CR_EQ)
+#  define BLR()                                BCLR(20,0)
+#  define BLRL()                       BCLRL(20,0)
+#  define XCMP(cr,l,a,b)               FC(31,cr,l,a,b,0)
+#  define CMPD(a,b)                    XCMP(0,1,a,b)
+#  define CMPW(a,b)                    XCMP(0,0,a,b)
+#  define XCMPI(cr,l,a,s)              FCI(11,cr,l,a,s)
+#  define CMPDI(a,s)                   XCMPI(0,1,a,s)
+#  define CMPWI(a,s)                   XCMPI(0,0,a,s)
+#  define XCMPL(cr,l,a,b)              FC(31,cr,l,a,b,32)
+#  define CMPLD(a,b)                   XCMPL(0,1,a,b)
+#  define CMPLW(a,b)                   XCMPL(0,0,a,b)
+#  define XCMPLI(cr,l,a,u)             FCI(10,cr,l,a,u)
+#  define CMPLDI(a,s)                  XCMPLI(0,1,a,s)
+#  define CMPLWI(a,s)                  XCMPLI(0,0,a,s)
+#  define CNTLZW(a,s)                  FX(31,s,a,0,26)
+#  define CNTLZW_(a,s)                 FX_(31,s,a,0,26)
+#  define CRAND(d,a,b)                 FX(19,d,a,b,257)
+#  define CRANDC(d,a,b)                        FX(19,d,a,b,129)
+#  define CREQV(d,a,b)                 FX(19,d,a,b,289)
+#  define CRSET(d)                     CREQV(d,d,d)
+#  define CRNAND(d,a,b)                        FX(19,d,a,b,225)
+#  define CRNOR(d,a,b)                 FX(19,d,a,b,33)
+#  define CRNOT(d,a)                   CRNOR(d,a,a)
+#  define CROR(d,a,b)                  FX(19,d,a,b,449)
+#  define CRMOVE(d,a)                  CROR(d,a,a)
+#  define CRORC(d,a,b)                 FX(19,d,a,b,417)
+#  define CRXOR(d,a,b)                 FX(19,d,a,b,193)
+#  define CRCLR(d)                     CRXOR(d,d,d)
+#  define DCBA(a,b)                    FX(31,0,a,b,758)
+#  define DCBF(a,b)                    FX(31,0,a,b,86)
+#  define DCBI(a,b)                    FX(31,0,a,b,470)
+#  define DCBST(a,b)                   FX(31,0,a,b,54)
+#  define DCBT(a,b)                    FX(31,0,a,b,278)
+#  define DCBTST(a,b)                  FX(31,0,a,b,246)
+#  define DCBZ(a,b)                    FX(31,0,a,b,1014)
+#  define DIVW(d,a,b)                  FXO(31,d,a,b,0,491)
+#  define DIVW_(d,a,b)                 FXO_(31,d,a,b,0,491)
+#  define DIVWO(d,a,b)                 FXO(31,d,a,b,1,491)
+#  define DIVWO_(d,a,b)                        FXO_(31,d,a,b,1,491)
+#  define DIVWU(d,a,b)                 FXO(31,d,a,b,0,459)
+#  define DIVWU_(d,a,b)                        FXO_(31,d,a,b,0,459)
+#  define DIVWUO(d,a,b)                        FXO(31,d,a,b,1,459)
+#  define DIVWUO_(d,a,b)               FXO_(31,d,a,b,1,459)
+#  define DIVD(d,a,b)                  FXO(31,d,a,b,0,489)
+#  define DIVD_(d,a,b)                 FXO_(31,d,a,b,0,489)
+#  define DIVDO(d,a,b)                 FXO(31,d,a,b,1,489)
+#  define DIVDO_(d,a,b)                        FXO_(31,d,a,b,1,489)
+#  define DIVDU(d,a,b)                 FXO(31,d,a,b,0,457)
+#  define DIVDU_(d,a,b)                        FXO_(31,d,a,b,0,457)
+#  define DIVDUO(d,a,b)                        FXO(31,d,a,b,1,457)
+#  define DIVDUO_(d,a,b)               FXO_(31,d,a,b,1,457)
+#  define ECIWX(d,a,b)                 FX(31,d,a,b,310)
+#  define ECOWX(s,a,b)                 FX(31,s,a,b,438)
+#  define EIEIO()                      FX(31,0,0,0,854)
+#  define EQV(d,a,b)                   FX(31,a,d,b,284)
+#  define EQV_(d,a,b)                  FX_(31,a,d,b,284)
+#  define EXTSB(d,a)                   FX(31,a,d,0,954)
+#  define EXTSB_(d,a)                  FX_(31,a,d,0,954)
+#  define EXTSH(d,a)                   FX(31,a,d,0,922)
+#  define EXTSH_(d,a)                  FX_(31,a,d,0,922)
+#  define EXTSW(d,a)                   FX(31,a,d,0,986)
+#  define EXTSW_(d,a)                  FX_(31,a,d,0,986)
+#  define ICIB(a,b)                    FX(31,0,a,b,982)
+#  define ISYNC()                      FXL(19,0,0,150)
+#  define LBZ(d,a,s)                   FDs(34,d,a,s)
+#  define LBZU(d,a,s)                  FDs(35,d,a,s)
+#  define LBZUX(d,a,b)                 FX(31,d,a,b,119)
+#  define LBZX(d,a,b)                  FX(31,d,a,b,87)
+#  define LHA(d,a,s)                   FDs(42,d,a,s)
+#  define LHAU(d,a,s)                  FDs(43,d,a,s)
+#  define LHAUX(d,a,b)                 FX(31,d,a,b,375)
+#  define LHAX(d,a,b)                  FX(31,d,a,b,343)
+#  define LHRBX(d,a,b)                 FX(31,d,a,b,790)
+#  define LHZ(d,a,s)                   FDs(40,d,a,s)
+#  define LHZU(d,a,s)                  FDs(41,d,a,s)
+#  define LHZUX(d,a,b)                 FX(31,d,a,b,311)
+#  define LHZX(d,a,b)                  FX(31,d,a,b,279)
+#  define LA(d,a,s)                    ADDI(d,a,s)
+#  define LI(d,s)                      ADDI(d,0,s)
+#  define LMW(d,a,s)                   FDs(46,d,a,s)
+#  define LSWI(d,a,n)                  FX(31,d,a,n,597)
+#  define LSWX(d,a,b)                  FX(31,d,a,b,533)
+#  define LWARX(d,a,b)                 FX(31,d,a,b,20)
+#  define LWBRX(d,a,b)                 FX(31,d,a,b,534)
+#  define LWA(d,a,s)                   FDs(58,d,a,s|2)
+#  define LWAUX(d,a,b)                 FX(31,d,a,b,373)
+#  define LWAX(d,a,b)                  FX(31,d,a,b,341)
+#  define LWZ(d,a,s)                   FDs(32,d,a,s)
+#  define LWZU(d,a,s)                  FDs(33,d,a,s)
+#  define LWZUX(d,a,b)                 FX(31,d,a,b,55)
+#  define LWZX(d,a,b)                  FX(31,d,a,b,23)
+#  define LD(d,a,s)                    FDs(58,d,a,s)
+#  define LDX(d,a,b)                   FX(31,d,a,b,21)
+#  define MCRF(d,s)                    FXL(19,d<<2,(s)<<2,0)
+#  if DEBUG
+/* In case instruction is emulated, check the kernel can handle it.
+   Will only generate it if DEBUG is enabled.
+"""
+Chapter 6. Optional Facilities and Instructions that are being
+Phased Out of the Architecture
+...
+6.1 Move To Condition Register from XER
+The mcrxr instruction is being phased out of the archi-
+tecture. Its description is included here as an aid to
+constructing operating system code to emulate it.
+
+Move to Condition Register from XER
+X-form
+mcrxr BF
+31     BF      //      ///     ///     512     /
+0      6       9       11      16      21      31
+CR(4xBF:4xBF+3) <- XER(32:35)
+XER(32:35) <- 0b0000
+The contents of XER(32:35) are copied to Condition Reg-
+ister field BF. XER(32:35) are set to zero.
+Special Registers Altered:
+CR field BF XER(32:35)
+
+Programming Note
+Warning: This instruction has been phased out of
+the architecture. Attempting to execute this
+instruction will cause the system illegal instruction
+error handler to be invoked
+"""
+ */
+#    define MCRXR(d)                   FX(31,d<<2,0,0,512)
+#  else
+#    define MCRXR(cr)                  _MCRXR(_jit,cr);
+static void _MCRXR(jit_state_t*, jit_int32_t);
+#  endif
+#  define MFCR(d)                      FX(31,d,0,0,19)
+#  define MFMSR(d)                     FX(31,d,0,0,83)
+#  define MFSPR(d,s)                   FXFX(31,d,s<<5,339)
+#  define MFXER(d)                     MFSPR(d,1)
+#  define MFLR(d)                      MFSPR(d,8)
+#  define MFCTR(d)                     MFSPR(d,9)
+#  define MFSR(d,s)                    FX(31,d,s,0,595)
+#  define MFSRIN(d,b)                  FX(31,d,0,b,659)
+#  define MFTB(d,x,y)                  FXFX(31,d,(x)|((y)<<5),371)
+#  define MFTBL(d)                     MFTB(d,8,12)
+#  define MFTBU(d)                     MFTB(d,8,13)
+#  define MTCRF(c,s)                   FXFX(31,s,c<<1,144)
+#  define MTCR(s)                      MTCRF(0xff,s)
+#  define MTMSR(s)                     FX(31,s,0,0,146)
+#  define MTSPR(d,s)                   FXFX(31,d,s<<5,467)
+#  define MTXER(d)                     MTSPR(d,1)
+#  define MTLR(d)                      MTSPR(d,8)
+#  define MTCTR(d)                     MTSPR(d,9)
+#  define MTSR(r,s)                    FX(31,s<<1,r,0,210)
+#  define MTSRIN(r,b)                  FX(31,r<<1,0,b,242)
+#  define MULLI(d,a,s)                 FDs(07,d,a,s)
+#  define MULHW(d,a,b)                 FXO(31,d,a,b,0,75)
+#  define MULHW_(d,a,b)                        FXO_(31,d,a,b,0,75)
+#  define MULHWU(d,a,b)                        FXO(31,d,a,b,0,11)
+#  define MULHWU_(d,a,b)               FXO_(31,d,a,b,0,11)
+#  define MULLW(d,a,b)                 FXO(31,d,a,b,0,235)
+#  define MULLW_(d,a,b)                        FXO_(31,d,a,b,0,235)
+#  define MULLWO(d,a,b)                        FXO(31,d,a,b,1,235)
+#  define MULLWO_(d,a,b)               FXO_(31,d,a,b,1,235)
+#  define MULHD(d,a,b)                 FXO(31,d,a,b,0,73)
+#  define MULHD_(d,a,b)                        FXO_(31,d,a,b,0,73)
+#  define MULHDU(d,a,b)                        FXO(31,d,a,b,0,9)
+#  define MULHDU_(d,a,b)               FXO_(31,d,a,b,0,9)
+#  define MULLD(d,a,b)                 FXO(31,d,a,b,0,233)
+#  define MULLD_(d,a,b)                        FXO_(31,d,a,b,0,233)
+#  define MULLDO(d,a,b)                        FXO(31,d,a,b,1,233)
+#  define MULLDO_(d,a,b)               FXO_(31,d,a,b,1,233)
+#  define NAND(d,a,b)                  FX(31,a,d,b,476)
+#  define NAND_(d,a,b)                 FX_(31,a,d,b,476)
+#  define NEG(d,a)                     FXO(31,d,a,0,0,104)
+#  define NEG_(d,a)                    FXO_(31,d,a,0,0,104)
+#  define NEGO(d,a)                    FXO(31,d,a,0,1,104)
+#  define NEGO_(d,a)                   FXO_(31,d,a,0,1,104)
+#  define NOR(d,a,b)                   FX(31,a,d,b,124)
+#  define NOR_(d,a,b)                  FX_(31,a,d,b,124)
+#  define NOT(d,s)                     NOR(d,s,s)
+#  define OR(d,a,b)                    FX(31,a,d,b,444)
+#  define OR_(d,a,b)                   FX_(31,a,d,b,444)
+#  define MR(d,a)                      OR(d,a,a)
+#  define ORC(d,a,b)                   FX(31,a,d,b,412)
+#  define ORC_(d,a,b)                  FX_(31,a,d,b,412)
+#  define ORI(d,a,u)                   FDu(24,a,d,u)
+#  define NOP()                                ORI(0,0,0)
+#  define ORIS(d,a,u)                  FDu(25,a,d,u)
+#  define RFI()                                FXL(19,0,0,50)
+#  define RLWIMI(d,s,h,b,e)            FM(20,s,d,h,b,e,0)
+#  define RLWIMI_(d,s,h,b,e)           FM(20,s,d,h,b,e,1)
+#  define INSLWI(a,s,n,b)              RLWIMI(a,s,32-b,b,b+n-1)
+#  define INSRWI(a,s,n,b)              RLWIMI(a,s,32-(b+n),b,(b+n)-1)
+#  define RLWINM(a,s,h,b,e)            FM(21,s,a,h,b,e,0)
+#  define RLWINM_(a,s,h,b,e)           FM(21,s,a,h,b,e,1)
+#  define EXTLWI(a,s,n,b)              RLWINM(a,s,b,0,n-1)
+#  define EXTRWI(a,s,n,b)              RLWINM(a,s,b+n,32-n,31)
+#  define ROTLWI(a,s,n)                        RLWINM(a,s,n,0,31)
+#  define ROTRWI(a,s,n)                        RLWINM(a,s,32-n,0,31)
+#  define SLWI(a,s,n)                  RLWINM(a,s,n,0,31-n)
+#  define SRWI(a,s,n)                  RLWINM(a,s,32-n,n,31)
+#  define CLRLWI(a,s,n)                        RLWINM(a,s,0,n,31)
+#  define CLRRWI(a,s,n)                        RLWINM(a,s,0,0,31-n)
+#  define CLRLSWI(a,s,b,n)             RLWINM(a,s,n,b-n,31-n)
+#  define RLWNM(a,s,b,m,e)             FM(23,s,a,b,m,e,0)
+#  define RLWNM_(a,s,b,m,e)            FM(23,s,a,b,m,e,1)
+#  define ROTLW(a,s,b)                 RLWNM(a,s,b,0,31)
+#  define SC()                         FDu(17,0,0,2)
+#  define SLW(a,s,b)                   FX(31,s,a,b,24)
+#  define SLW_(a,s,b)                  FX_(31,s,a,b,24)
+#  define SRAW(a,s,b)                  FX(31,s,a,b,792)
+#  define SRAW_(a,s,b)                 FX_(31,s,a,b,792)
+#  define SRAWI(a,s,h)                 FX(31,s,a,h,824)
+#  define SRAWI_(a,s,h)                        FX_(31,s,a,h,824)
+#  define SRW(a,s,b)                   FX(31,s,a,b,536)
+#  define SRW_(a,s,b)                  FX_(31,s,a,b,536)
+#  if __WORDSIZE == 64
+#    define RLDICL(a,s,h,b)            FMD(30,s,a,h&~32,b,0,h>>5)
+#    define RLDICL_(a,s,h,b)           FMD_(30,s,a,h&~32,b,0,h>>5)
+#    define EXTRDI(x,y,n,b)            RLDICL(x,y,(b+n),(64-n))
+#    define SRDI(x,y,n)                        RLDICL(x,y,(64-n),n)
+#    define CLRLDI(x,y,n)              RLDICL(x,y,0,n)
+#    define RLDICR(a,s,h,e)            FMD(30,s,a,h&~32,e,1,h>>5)
+#    define RLDICR_(a,s,h,e)           FMD_(30,s,a,h&~32,e,1,h>>5)
+#    define EXTRLI(x,y,n,b)            RLDICR(x,y,b,(n-1))
+#    define SLDI(x,y,n)                        RLDICR(x,y,n,(63-n))
+#    define CLRRDI(x,y,n)              RLDICR(x,y,0,(63-n))
+#    define RLDIC(a,s,h,b)             FMD(30,s,a,h&~32,b,2,h>>5)
+#    define RLDIC_(a,s,h,b)            FMD_(30,s,a,h&~32,b,2,h>>5)
+#    define CLRLSLDI(x,y,b,n)          RLDIC(x,y,n,(b-n))
+#    define RLDCL(a,s,h,b)             FMDS(30,s,a,h,b,8)
+#    define RLDCL_(a,s,h,b)            FMDS_(30,s,a,h,b,8)
+#    define ROTLD(x,y,z)               RLDCL(x,y,z,0)
+#    define RLDCR(a,s,b,e)             FMDS(30,s,a,b,e,0)
+#    define RLDCR_(a,s,b,e)            FMDS_(30,s,a,b,e,0)
+#    define RLDIMI(a,s,h,b)            FMD(30,s,a,h&~32,b,3,h>>5)
+#    define RLDIMI_(a,s,h,b)           FMD_(30,s,a,h&~32,b,3,h>>5)
+#    define INSRDI(x,y,n,b)            RLDIMI(x,y,(64-(b+n)),b)
+#    define SLD(a,s,b)                 FX(31,s,a,b,27)
+#    define SLD_(a,s,b)                        FX_(31,s,a,b,27)
+#    define SRD(a,s,b)                 FX(31,s,a,b,539)
+#    define SRD_(a,s,b)                        FX_(31,s,a,b,539)
+#    define SRADI(a,s,h)               FXS(31,s,a,h&~32,413,h>>5)
+#    define SRADI_(a,s,h)              FXS_(31,s,a,h&~32,413,h>>5)
+#    define SRAD(a,s,b)                        FX(31,s,a,b,794)
+#    define SRAD_(a,s,b)               FX_(31,s,a,b,794)
+#  endif
+#  define STB(s,a,d)                   FDs(38,s,a,d)
+#  define STBU(s,a,d)                  FDs(39,s,a,d)
+#  define STBUX(s,a,b)                 FX(31,s,a,b,247)
+#  define STBX(s,a,b)                  FX(31,s,a,b,215)
+#  define STH(s,a,d)                   FDs(44,s,a,d)
+#  define STHBRX(s,a,b)                        FX(31,s,a,b,918)
+#  define STHU(s,a,d)                  FDs(45,s,a,d)
+#  define STHUX(s,a,b)                 FX(31,s,a,b,439)
+#  define STHX(s,a,b)                  FX(31,s,a,b,407)
+#  define STMW(s,a,d)                  FDs(47,s,a,d)
+#  define STWSI(s,a,nb)                        FX(31,s,a,nb,725)
+#  define STSWX(s,a,b)                 FX(31,s,a,b,661)
+#  define STW(s,a,d)                   FDs(36,s,a,d)
+#  define STWBRX(s,a,b)                        FX(31,s,a,b,662)
+#  define STWCX_(s,a,b)                        FX_(31,s,a,b,150)
+#  define STWU(s,a,d)                  FDs(37,s,a,d)
+#  define STWUX(s,a,b)                 FX(31,s,a,b,183)
+#  define STWX(s,a,b)                  FX(31,s,a,b,151)
+#  define STD(s,a,d)                   FDs(62,s,a,d)
+#  define STDX(s,a,b)                  FX(31,s,a,b,149)
+#  define STDU(s,a,d)                  FDs(62,s,a,d|1)
+#  define STDUX(s,a,b)                 FX(31,s,a,b,181)
+#  define SUBF(d,a,b)                  FXO(31,d,a,b,0,40)
+#  define SUBF_(d,a,b)                 FXO_(31,d,a,b,0,40)
+#  define SUBFO(d,a,b)                 FXO(31,d,a,b,1,40)
+#  define SUBFO_(d,a,b)                        FXO_(31,d,a,b,1,40)
+#  define SUB(d,a,b)                   SUBF(d,b,a)
+#  define SUB_(d,a,b)                  SUBF_(d,b,a)
+#  define SUBO(d,a,b)                  SUBFO(d,b,a)
+#  define SUBO_(d,a,b)                 SUBFO_(d,b,a)
+#  define SUBI(d,a,s)                  ADDI(d,a,-s)
+#  define SUBIS(d,a,s)                 ADDIS(d,a,-s)
+#  define SUBFC(d,a,b)                 FXO(31,d,a,b,0,8)
+#  define SUBFC_(d,a,b)                        FXO_(31,d,a,b,0,8)
+#  define SUBFCO(d,a,b)                        FXO(31,d,a,b,1,8)
+#  define SUBFCO_(d,a,b)               FXO_(31,d,a,b,1,8)
+#  define SUBC(d,a,b)                  SUBFC(d,b,a)
+#  define SUBIC(d,a,s)                 ADDIC(d,a,-s)
+#  define SUBIC_(d,a,s)                        ADDIC_(d,a,-s)
+#  define SUBFE(d,a,b)                 FXO(31,d,a,b,0,136)
+#  define SUBFE_(d,a,b)                        FXO_(31,d,a,b,0,136)
+#  define SUBFEO(d,a,b)                        FXO(31,d,a,b,1,136)
+#  define SUBFEO_(d,a,b)               FXO_(31,d,a,b,1,136)
+#  define SUBE(d,a,b)                  SUBFE(d,b,a)
+#  define SUBFIC(d,a,s)                        FDs(8,d,a,s)
+#  define SUBFME(d,a)                  FXO(31,d,a,0,0,232)
+#  define SUBFME_(d,a)                 FXO_(31,d,a,0,0,232)
+#  define SUBFMEO(d,a)                 FXO(31,d,a,0,1,232)
+#  define SUBFMEO_(d,a)                        FXO_(31,d,a,0,1,232)
+#  define SUBFZE(d,a)                  FXO(31,d,a,0,0,200)
+#  define SUBFZE_(d,a)                 FXO_(31,d,a,0,0,200)
+#  define SUBFZEO(d,a)                 FXO(31,d,a,0,1,200)
+#  define SUBFZEO_(d,a)                        FXO_(31,d,a,0,1,200)
+#  define SYNC()                       FX(31,0,0,0,598)
+#  define TLBIA()                      FX(31,0,0,0,370)
+#  define TLBIE(b)                     FX(31,0,0,b,306)
+#  define TLBSYNC()                    FX(31,0,0,0,566)
+#  define TW(t,a,b)                    FX(31,t,a,b,4)
+#  define TWEQ(a,b)                    FX(31,4,a,b,4)
+#  define TWLGE(a,b)                   FX(31,5,a,b,4)
+#  define TRAP()                       FX(31,31,0,0,4)
+#  define TWI(t,a,s)                   FDs(3,t,a,s)
+#  define TWGTI(a,s)                   TWI(8,a,s)
+#  define TWLLEI(a,s)                  TWI(6,a,s)
+#  define XOR(d,a,b)                   FX(31,a,d,b,316)
+#  define XOR_(d,a,b)                  FX_(31,a,d,b,316)
+#  define XORI(s,a,u)                  FDu(26,a,s,u)
+#  define XORIS(s,a,u)                 FDu(27,a,s,u)
+#  define nop(c)                       _nop(_jit,c)
+static void _nop(jit_state_t*,jit_int32_t);
+#  define movr(r0,r1)                  _movr(_jit,r0,r1)
+static void _movr(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define movi(r0,i0)                  _movi(_jit,r0,i0)
+static void _movi(jit_state_t*,jit_int32_t,jit_word_t);
+#  define movi_p(r0,i0)                        _movi_p(_jit,r0,i0)
+static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t);
+#  define negr(r0,r1)                  NEG(r0,r1)
+#  define comr(r0,r1)                  NOT(r0,r1)
+#  define extr_c(r0,r1)                        EXTSB(r0,r1)
+#  define extr_uc(r0,r1)               ANDI_(r0,r1,0xff)
+#  define extr_s(r0,r1)                        EXTSH(r0,r1)
+#  define extr_us(r0,r1)               ANDI_(r0,r1,0xffff)
+#  if __WORDSIZE == 64
+#    define extr_i(r0,r1)              EXTSW(r0,r1)
+#    define extr_ui(r0,r1)             CLRLDI(r0,r1,32)
+#  endif
+#  if __BYTE_ORDER == __BIG_ENDIAN
+#    define htonr_us(r0,r1)            extr_us(r0,r1)
+#    if __WORDSIZE == 32
+#      define htonr_ui(r0,r1)          movr(r0,r1)
+#    else
+#      define htonr_ui(r0,r1)          extr_ui(r0,r1)
+#      define htonr_ul(r0,r1)          movr(r0,r1)
+#    endif
+#  else
+#    define htonr_us(r0,r1)            _htonr_us(_jit,r0,r1)
+static void _htonr_us(jit_state_t*,jit_int32_t,jit_int32_t);
+#    define htonr_ui(r0,r1)            _htonr_ui(_jit,r0,r1)
+static void _htonr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
+#    if __WORDSIZE == 64
+#      define htonr_ul(r0,r1)          _htonr_ul(_jit,r0,r1)
+static void _htonr_ul(jit_state_t*,jit_int32_t,jit_int32_t);
+#    endif
+#  endif
+#  define addr(r0,r1,r2)               ADD(r0,r1,r2)
+#  define addi(r0,r1,i0)               _addi(_jit,r0,r1,i0)
+static void _addi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define addcr(r0,r1,r2)              ADDC(r0,r1,r2)
+#  define addci(r0,r1,i0)              _addci(_jit,r0,r1,i0)
+static void _addci(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define addxr(r0,r1,r2)              ADDE(r0,r1,r2)
+#  define addxi(r0,r1,i0)              _addxi(_jit,r0,r1,i0)
+static void _addxi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define subr(r0,r1,r2)               SUB(r0,r1,r2)
+#  define subi(r0,r1,i0)               _subi(_jit,r0,r1,i0)
+static void _subi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define subcr(r0,r1,r2)              SUBC(r0,r1,r2)
+#  define subci(r0,r1,i0)              _subci(_jit,r0,r1,i0)
+static void _subci(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define subxr(r0,r1,r2)              SUBFE(r0,r2,r1)
+#  define subxi(r0,r1,i0)              _subxi(_jit,r0,r1,i0)
+static void _subxi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define rsbi(r0, r1, i0)             _rsbi(_jit, r0, r1, i0)
+static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  if __WORDSIZE == 32
+#    define mulr(r0,r1,r2)             MULLW(r0,r1,r2)
+#    define mullr(r0,r1,r2)            MULLW(r0,r1,r2)
+#    define mulhr(r0,r1,r2)            MULHW(r0,r1,r2)
+#    define mulhr_u(r0,r1,r2)          MULHWU(r0,r1,r2)
+#  else
+#    define mulr(r0,r1,r2)             MULLD(r0,r1,r2)
+#    define mullr(r0,r1,r2)            MULLD(r0,r1,r2)
+#    define mulhr(r0,r1,r2)            MULHD(r0,r1,r2)
+#    define mulhr_u(r0,r1,r2)          MULHDU(r0,r1,r2)
+#  endif
+#  define muli(r0,r1,i0)               _muli(_jit,r0,r1,i0)
+static void _muli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define qmulr(r0,r1,r2,r3)           iqmulr(r0,r1,r2,r3,1)
+#  define qmulr_u(r0,r1,r2,r3)         iqmulr(r0,r1,r2,r3,0)
+#  define iqmulr(r0,r1,r2,r3,cc)       _iqmulr(_jit,r0,r1,r2,r3,cc)
+static void _iqmulr(jit_state_t*,jit_int32_t,jit_int32_t,
+                   jit_int32_t,jit_int32_t,jit_bool_t);
+#  define qmuli(r0,r1,r2,i0)           iqmuli(r0,r1,r2,i0,1)
+#  define qmuli_u(r0,r1,r2,i0)         iqmuli(r0,r1,r2,i0,0)
+#  define iqmuli(r0,r1,r2,i0,cc)       _iqmuli(_jit,r0,r1,r2,i0,cc)
+static void _iqmuli(jit_state_t*,jit_int32_t,jit_int32_t,
+                   jit_int32_t,jit_word_t,jit_bool_t);
+#  if __WORDSIZE == 32
+#    define divr(r0,r1,r2)             DIVW(r0,r1,r2)
+#  else
+#    define divr(r0,r1,r2)             DIVD(r0,r1,r2)
+#  endif
+#  define divi(r0,r1,i0)               _divi(_jit,r0,r1,i0)
+static void _divi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  if __WORDSIZE == 32
+#    define divr_u(r0,r1,r2)           DIVWU(r0,r1,r2)
+#  else
+#    define divr_u(r0,r1,r2)           DIVDU(r0,r1,r2)
+#  endif
+#  define divi_u(r0,r1,i0)             _divi_u(_jit,r0,r1,i0)
+static void _divi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define qdivr(r0,r1,r2,r3)           iqdivr(r0,r1,r2,r3,1)
+#  define qdivr_u(r0,r1,r2,r3)         iqdivr(r0,r1,r2,r3,0)
+#  define iqdivr(r0,r1,r2,r3,cc)       _iqdivr(_jit,r0,r1,r2,r3,cc)
+static void _iqdivr(jit_state_t*,jit_int32_t,jit_int32_t,
+                   jit_int32_t,jit_int32_t,jit_bool_t);
+#  define qdivi(r0,r1,r2,i0)           iqdivi(r0,r1,r2,i0,1)
+#  define qdivi_u(r0,r1,r2,i0)         iqdivi(r0,r1,r2,i0,0)
+#  define iqdivi(r0,r1,r2,i0,cc)       _iqdivi(_jit,r0,r1,r2,i0,cc)
+static void _iqdivi(jit_state_t*,jit_int32_t,jit_int32_t,
+                   jit_int32_t,jit_word_t,jit_bool_t);
+#  define remr(r0,r1,r2)               _remr(_jit,r0,r1,r2)
+static void _remr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define remi(r0,r1,i0)               _remi(_jit,r0,r1,i0)
+static void _remi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define remr_u(r0,r1,r2)             _remr_u(_jit,r0,r1,r2)
+static void _remr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define remi_u(r0,r1,i0)             _remi_u(_jit,r0,r1,i0)
+static void _remi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define andr(r0,r1,r2)               AND(r0,r1,r2)
+#  define andi(r0,r1,i0)               _andi(_jit,r0,r1,i0)
+static void _andi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define orr(r0,r1,r2)                        OR(r0,r1,r2)
+#  define ori(r0,r1,i0)                        _ori(_jit,r0,r1,i0)
+static void _ori(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define xorr(r0,r1,r2)               XOR(r0,r1,r2)
+#  define xori(r0,r1,i0)               _xori(_jit,r0,r1,i0)
+static void _xori(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  if __WORDSIZE == 32
+#    define lshr(r0,r1,r2)             SLW(r0,r1,r2)
+#  else
+#    define lshr(r0,r1,r2)             SLD(r0,r1,r2)
+#  endif
+#  define lshi(r0,r1,i0)               _lshi(_jit,r0,r1,i0)
+static void _lshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  if __WORDSIZE == 32
+#    define rshr(r0,r1,r2)             SRAW(r0,r1,r2)
+#  else
+#    define rshr(r0,r1,r2)             SRAD(r0,r1,r2)
+#  endif
+#  define rshi(r0,r1,i0)               _rshi(_jit,r0,r1,i0)
+static void _rshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  if __WORDSIZE == 32
+#    define rshr_u(r0,r1,r2)           SRW(r0,r1,r2)
+#  else
+#    define rshr_u(r0,r1,r2)           SRD(r0,r1,r2)
+#  endif
+#  define rshi_u(r0,r1,i0)             _rshi_u(_jit,r0,r1,i0)
+static void _rshi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ltr(r0,r1,r2)                        _ltr(_jit,r0,r1,r2)
+static void _ltr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define lti(r0,r1,i0)                        _lti(_jit,r0,r1,i0)
+static void _lti(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ltr_u(r0,r1,r2)              _ltr_u(_jit,r0,r1,r2)
+static void _ltr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define lti_u(r0,r1,i0)              _lti_u(_jit,r0,r1,i0)
+static void _lti_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ler(r0,r1,r2)                        _ler(_jit,r0,r1,r2)
+static void _ler(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define lei(r0,r1,i0)                        _lei(_jit,r0,r1,i0)
+static void _lei(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ler_u(r0,r1,r2)              _ler_u(_jit,r0,r1,r2)
+static void _ler_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define lei_u(r0,r1,i0)              _lei_u(_jit,r0,r1,i0)
+static void _lei_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define eqr(r0,r1,r2)                        _eqr(_jit,r0,r1,r2)
+static void _eqr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define eqi(r0,r1,i0)                        _eqi(_jit,r0,r1,i0)
+static void _eqi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ger(r0,r1,r2)                        _ger(_jit,r0,r1,r2)
+static void _ger(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define gei(r0,r1,i0)                        _gei(_jit,r0,r1,i0)
+static void _gei(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ger_u(r0,r1,r2)              _ger_u(_jit,r0,r1,r2)
+static void _ger_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define gei_u(r0,r1,i0)              _gei_u(_jit,r0,r1,i0)
+static void _gei_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define gtr(r0,r1,r2)                        _gtr(_jit,r0,r1,r2)
+static void _gtr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define gti(r0,r1,i0)                        _gti(_jit,r0,r1,i0)
+static void _gti(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define gtr_u(r0,r1,r2)              _gtr_u(_jit,r0,r1,r2)
+static void _gtr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define gti_u(r0,r1,i0)              _gti_u(_jit,r0,r1,i0)
+static void _gti_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ner(r0,r1,r2)                        _ner(_jit,r0,r1,r2)
+static void _ner(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define nei(r0,r1,i0)                        _nei(_jit,r0,r1,i0)
+static void _nei(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define bltr(i0,r0,r1)                 _bltr(_jit,i0,r0,r1)
+static jit_word_t _bltr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define blti(i0,r0,i1)                 _blti(_jit,i0,r0,i1)
+static jit_word_t _blti(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bltr_u(i0,r0,r1)               _bltr_u(_jit,i0,r0,r1)
+static jit_word_t _bltr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define blti_u(i0,r0,i1)               _blti_u(_jit,i0,r0,i1)
+static jit_word_t _blti_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bler(i0,r0,r1)                 _bler(_jit,i0,r0,r1)
+static jit_word_t _bler(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define blei(i0,r0,i1)                 _blei(_jit,i0,r0,i1)
+static jit_word_t _blei(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bler_u(i0,r0,r1)               _bler_u(_jit,i0,r0,r1)
+static jit_word_t _bler_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define blei_u(i0,r0,i1)               _blei_u(_jit,i0,r0,i1)
+static jit_word_t _blei_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define beqr(i0,r0,r1)                 _beqr(_jit,i0,r0,r1)
+static jit_word_t _beqr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define beqi(i0,r0,i1)                 _beqi(_jit,i0,r0,i1)
+static jit_word_t _beqi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bger(i0,r0,r1)                 _bger(_jit,i0,r0,r1)
+static jit_word_t _bger(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bgei(i0,r0,i1)                 _bgei(_jit,i0,r0,i1)
+static jit_word_t _bgei(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bger_u(i0,r0,r1)               _bger_u(_jit,i0,r0,r1)
+static jit_word_t _bger_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bgei_u(i0,r0,i1)               _bgei_u(_jit,i0,r0,i1)
+static jit_word_t _bgei_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bgtr(i0,r0,r1)                 _bgtr(_jit,i0,r0,r1)
+static jit_word_t _bgtr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bgti(i0,r0,i1)                 _bgti(_jit,i0,r0,i1)
+static jit_word_t _bgti(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bgtr_u(i0,r0,r1)               _bgtr_u(_jit,i0,r0,r1)
+static jit_word_t _bgtr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bgti_u(i0,r0,i1)               _bgti_u(_jit,i0,r0,i1)
+static jit_word_t _bgti_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bner(i0,r0,r1)                 _bner(_jit,i0,r0,r1)
+static jit_word_t _bner(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bnei(i0,r0,i1)                 _bnei(_jit,i0,r0,i1)
+static jit_word_t _bnei(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bmsr(i0,r0,r1)                 _bmsr(_jit,i0,r0,r1)
+static jit_word_t _bmsr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bmsi(i0,r0,i1)                 _bmsi(_jit,i0,r0,i1)
+static jit_word_t _bmsi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bmcr(i0,r0,r1)                 _bmcr(_jit,i0,r0,r1)
+static jit_word_t _bmcr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bmci(i0,r0,i1)                 _bmci(_jit,i0,r0,i1)
+static jit_word_t _bmci(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define boaddr(i0,r0,r1)               _boaddr(_jit,i0,r0,r1)
+static jit_word_t _boaddr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define boaddi(i0,r0,i1)               _boaddi(_jit,i0,r0,i1)
+static jit_word_t _boaddi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bxaddr(i0,r0,r1)               _bxaddr(_jit,i0,r0,r1)
+static jit_word_t _bxaddr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bxaddi(i0,r0,i1)               _bxaddi(_jit,i0,r0,i1)
+static jit_word_t _bxaddi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bosubr(i0,r0,r1)               _bosubr(_jit,i0,r0,r1)
+static jit_word_t _bosubr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bosubi(i0,r0,i1)               _bosubi(_jit,i0,r0,i1)
+static jit_word_t _bosubi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bxsubr(i0,r0,r1)               _bxsubr(_jit,i0,r0,r1)
+static jit_word_t _bxsubr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bxsubi(i0,r0,i1)               _bxsubi(_jit,i0,r0,i1)
+static jit_word_t _bxsubi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define boaddr_u(i0,r0,r1)             _boaddr_u(_jit,i0,r0,r1)
+static jit_word_t _boaddr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define boaddi_u(i0,r0,i1)             _boaddi_u(_jit,i0,r0,i1)
+static jit_word_t _boaddi_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bxaddr_u(i0,r0,r1)             _bxaddr_u(_jit,i0,r0,r1)
+static jit_word_t _bxaddr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bxaddi_u(i0,r0,i1)             _bxaddi_u(_jit,i0,r0,i1)
+static jit_word_t _bxaddi_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bosubr_u(i0,r0,r1)             _bosubr_u(_jit,i0,r0,r1)
+static jit_word_t _bosubr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bosubi_u(i0,r0,i1)             _bosubi_u(_jit,i0,r0,i1)
+static jit_word_t _bosubi_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bxsubr_u(i0,r0,r1)             _bxsubr_u(_jit,i0,r0,r1)
+static jit_word_t _bxsubr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bxsubi_u(i0,r0,i1)             _bxsubi_u(_jit,i0,r0,i1)
+static jit_word_t _bxsubi_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#  define ldr_c(r0,r1)                 _ldr_c(_jit,r0,r1)
+static void _ldr_c(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define ldi_c(r0,i0)                 _ldi_c(_jit,r0,i0)
+static void _ldi_c(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldxr_c(r0,r1,i0)             _ldxr_c(_jit,r0,r1,i0)
+static void _ldxr_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxi_c(r0,r1,i0)             _ldxi_c(_jit,r0,r1,i0)
+static void _ldxi_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldr_uc(r0,r1)                        LBZX(r0, _R0_REGNO, r1)
+#  define ldi_uc(r0,i0)                        _ldi_uc(_jit,r0,i0)
+static void _ldi_uc(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldxr_uc(r0,r1,r2)            _ldxr_uc(_jit,r0,r1,r2)
+static void _ldxr_uc(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxi_uc(r0,r1,i0)            _ldxi_uc(_jit,r0,r1,i0)
+static void _ldxi_uc(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldr_s(r0,r1)                 LHAX(r0, _R0_REGNO, r1)
+#  define ldi_s(r0,i0)                 _ldi_s(_jit,r0,i0)
+static void _ldi_s(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldxr_s(r0,r1,i0)             _ldxr_s(_jit,r0,r1,i0)
+static void _ldxr_s(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxi_s(r0,r1,i0)             _ldxi_s(_jit,r0,r1,i0)
+static void _ldxi_s(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldr_us(r0,r1)                        LHZX(r0, _R0_REGNO, r1)
+#  define ldi_us(r0,i0)                        _ldi_us(_jit,r0,i0)
+static void _ldi_us(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldxr_us(r0,r1,i0)            _ldxr_us(_jit,r0,r1,i0)
+static void _ldxr_us(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxi_us(r0,r1,i0)            _ldxi_us(_jit,r0,r1,i0)
+static void _ldxi_us(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  if __WORDSIZE == 32
+#    define ldr_i(r0,r1)               LWZX(r0, _R0_REGNO, r1)
+#  else
+#    define ldr_i(r0,r1)               LWAX(r0, _R0_REGNO, r1)
+#  endif
+#  define ldi_i(r0,i0)                 _ldi_i(_jit,r0,i0)
+static void _ldi_i(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldxr_i(r0,r1,i0)             _ldxr_i(_jit,r0,r1,i0)
+static void _ldxr_i(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxi_i(r0,r1,i0)             _ldxi_i(_jit,r0,r1,i0)
+static void _ldxi_i(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  if __WORDSIZE == 64
+#    define ldr_ui(r0,r1)              LWZX(r0, _R0_REGNO, r1)
+#    define ldi_ui(r0,i0)              _ldi_ui(_jit,r0,i0)
+static void _ldi_ui(jit_state_t*,jit_int32_t,jit_word_t);
+#    define ldxr_ui(r0,r1,i0)          _ldxr_ui(_jit,r0,r1,i0)
+static void _ldxr_ui(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#    define ldxi_ui(r0,r1,i0)          _ldxi_ui(_jit,r0,r1,i0)
+static void _ldxi_ui(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#    define ldr_l(r0,r1)               LDX(r0, _R0_REGNO, r1)
+#    define ldi_l(r0,i0)               _ldi_l(_jit,r0,i0)
+static void _ldi_l(jit_state_t*,jit_int32_t,jit_word_t);
+#    define ldxr_l(r0,r1,i0)           _ldxr_l(_jit,r0,r1,i0)
+static void _ldxr_l(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#    define ldxi_l(r0,r1,i0)           _ldxi_l(_jit,r0,r1,i0)
+static void _ldxi_l(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  endif
+#  define str_c(r0,r1)                 STBX(r1, _R0_REGNO, r0)
+#  define sti_c(i0,r0)                 _sti_c(_jit,i0,r0)
+static void _sti_c(jit_state_t*,jit_word_t,jit_int32_t);
+#  define stxr_c(r0,r1,r2)             _stxr_c(_jit,r0,r1,r2)
+static void _stxr_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define stxi_c(i0,r0,r1)             _stxi_c(_jit,i0,r0,r1)
+static void _stxi_c(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define str_s(r0,r1)                 STHX(r1, _R0_REGNO, r0)
+#  define sti_s(i0,r0)                 _sti_s(_jit,i0,r0)
+static void _sti_s(jit_state_t*,jit_word_t,jit_int32_t);
+#  define stxr_s(r0,r1,r2)             _stxr_s(_jit,r0,r1,r2)
+static void _stxr_s(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define stxi_s(i0,r0,r1)             _stxi_s(_jit,i0,r0,r1)
+static void _stxi_s(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define str_i(r0,r1)                 STWX(r1, _R0_REGNO, r0)
+#  define sti_i(i0,r0)                 _sti_i(_jit,i0,r0)
+static void _sti_i(jit_state_t*,jit_word_t,jit_int32_t);
+#  define stxr_i(r0,r1,r2)             _stxr_i(_jit,r0,r1,r2)
+static void _stxr_i(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define stxi_i(i0,r0,r1)             _stxi_i(_jit,i0,r0,r1)
+static void _stxi_i(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  if __WORDSIZE == 64
+#    define str_l(r0,r1)               STDX(r1, _R0_REGNO, r0)
+#    define sti_l(i0,r0)               _sti_l(_jit,i0,r0)
+static void _sti_l(jit_state_t*,jit_word_t,jit_int32_t);
+#    define stxr_l(r0,r1,r2)           _stxr_l(_jit,r0,r1,r2)
+static void _stxr_l(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#    define stxi_l(i0,r0,r1)           _stxi_l(_jit,i0,r0,r1)
+static void _stxi_l(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  endif
+#  define jmpr(r0)                     _jmpr(_jit,r0)
+static void _jmpr(jit_state_t*,jit_int32_t);
+#  define jmpi(i0)                     _jmpi(_jit,i0)
+static jit_word_t _jmpi(jit_state_t*,jit_word_t);
+#  define jmpi_p(i0)                   _jmpi_p(_jit,i0)
+static jit_word_t _jmpi_p(jit_state_t*,jit_word_t) maybe_unused;
+#  if _CALL_SYSV
+#    define callr(r0,i0)               _callr(_jit,r0,i0)
+static void _callr(jit_state_t*,jit_int32_t,jit_int32_t);
+#    define calli(i0,i1)               _calli(_jit,i0,i1)
+static void _calli(jit_state_t*,jit_word_t,jit_int32_t);
+#  define calli_p(i0,i1)               _calli_p(_jit,i0,i1)
+static jit_word_t _calli_p(jit_state_t*,jit_word_t,jit_int32_t);
+#  else
+#    define callr(r0)                  _callr(_jit,r0)
+static void _callr(jit_state_t*,jit_int32_t);
+#    define calli(i0)                  _calli(_jit,i0)
+static void _calli(jit_state_t*,jit_word_t);
+#    define calli_p(i0)                        _calli_p(_jit,i0)
+static jit_word_t _calli_p(jit_state_t*,jit_word_t);
+#endif
+#  define prolog(node)                 _prolog(_jit, node)
+static void _prolog(jit_state_t*, jit_node_t*);
+#  define epilog(node)                 _epilog(_jit, node)
+static void _epilog(jit_state_t*, jit_node_t*);
+#  define vastart(r0)                  _vastart(_jit, r0)
+static void _vastart(jit_state_t*, jit_int32_t);
+#  define vaarg(r0, r1)                        _vaarg(_jit, r0, r1)
+static void _vaarg(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define vaarg_d(r0, r1)              _vaarg_d(_jit, r0, r1)
+static void _vaarg_d(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define patch_at(i,l)                        _patch_at(_jit,i,l)
+static void _patch_at(jit_state_t*,jit_word_t,jit_word_t);
+#endif
+
+#if CODE
+#  define _u16(v)                      ((v) & 0xffff)
+#  define _u26(v)                      ((v) & 0x3ffffff)
+static void
+_FXO(jit_state_t *_jit, int o, int d, int a, int b, int e, int x, int r)
+{
+    assert(!(o & ~((1 << 6) - 1)));
+    assert(!(d & ~((1 << 5) - 1)));
+    assert(!(a & ~((1 << 5) - 1)));
+    assert(!(b & ~((1 << 5) - 1)));
+    assert(!(e & ~((1 << 1) - 1)));
+    assert(!(x & ~((1 << 9) - 1)));
+    assert(!(r & ~((1 << 1) - 1)));
+    ii((o<<26)|(d<<21)|(a<<16)|(b<<11)|(e<<10)|(x<<1)|r);
+}
+
+static void
+_FDs(jit_state_t *_jit, int o, int d, int a, int s)
+{
+    assert(!(o & ~((1 << 6) - 1)));
+    assert(!(d & ~((1 << 5) - 1)));
+    assert(!(a & ~((1 << 5) - 1)));
+    assert(can_sign_extend_short_p(s));
+    ii((o<<26)|(d<<21)|(a<<16)|_u16(s));
+}
+
+static void
+_FDu(jit_state_t *_jit, int o, int d, int a, int s)
+{
+    assert(!(o & ~((1 << 6) - 1)));
+    assert(!(d & ~((1 << 5) - 1)));
+    assert(!(a & ~((1 << 5) - 1)));
+    assert(can_zero_extend_short_p(s));
+    ii((o<<26)|(d<<21)|(a<<16)|_u16(s));
+}
+
+static void
+_FX(jit_state_t *_jit, int o, int s, int a, int b, int x, int r)
+{
+    assert(!(o & ~((1 <<  6) - 1)));
+    assert(!(s & ~((1 <<  5) - 1)));
+    assert(!(a & ~((1 <<  5) - 1)));
+    assert(!(b & ~((1 <<  5) - 1)));
+    assert(!(x & ~((1 << 10) - 1)));
+    assert(!(r & ~((1 <<  1) - 1)));
+    ii((o<<26)|(s<<21)|(a<<16)|(b<<11)|(x<<1)|r);
+}
+
+static void
+_FI(jit_state_t *_jit, int o, int t, int a, int k)
+{
+    assert(!(o & ~(( 1 <<  6) - 1)));
+    assert(!(t & 3) && can_sign_extend_jump_p(t));
+    assert(!(a & ~(( 1 <<  1) - 1)));
+    assert(!(k & ~(( 1 <<  1) - 1)));
+    ii((o<<26)|_u26(t)|(a<<1)|k);
+}
+
+static void
+_FB(jit_state_t *_jit, int o, int bo, int bi, int t, int a, int k)
+{
+    assert(!( o & ~((1 <<  6) - 1)));
+    assert(!(bo & ~((1 <<  5) - 1)));
+    assert(!(bi & ~((1 <<  5) - 1)));
+    assert(!(t & 3) && can_sign_extend_short_p(t));
+    assert(!(a & ~(( 1 <<  1) - 1)));
+    assert(!(k & ~(( 1 <<  1) - 1)));
+    ii((o<<26)|(bo<<21)|(bi<<16)|_u16(t)|(a<<1)|k);
+}
+
+static void
+_FXL(jit_state_t *_jit, int o, int bo, int bi, int x, int k)
+{
+    assert(!( o & ~((1 <<  6) - 1)));
+    assert(!(bo & ~((1 <<  5) - 1)));
+    assert(!(bi & ~((1 <<  5) - 1)));
+    assert(!(x & ~(( 1 << 10) - 1)));
+    assert(!(k & ~(( 1 <<  1) - 1)));
+    ii((o<<26)|(bo<<21)|(bi<<16)|(x<<1)|k);
+}
+
+static void
+_FC(jit_state_t *_jit, int o, int d, int l, int a, int b, int x)
+{
+    assert(!(o & ~((1 <<  6) - 1)));
+    assert(!(d & ~((1 <<  3) - 1)));
+    assert(!(l & ~((1 <<  1) - 1)));
+    assert(!(a & ~((1 <<  5) - 1)));
+    assert(!(b & ~((1 <<  5) - 1)));
+    assert(!(x & ~((1 << 10) - 1)));
+    ii((o<<26)|(d<<23)|(l<<21)|(a<<16)|(b<<11)|(x<<1));
+}
+
+static void
+_FCI(jit_state_t *_jit, int o, int d, int l, int a, int s)
+{
+    assert(!(o & ~((1 << 6) - 1)));
+    assert(!(d & ~((1 << 3) - 1)));
+    assert(!(l & ~((1 << 1) - 1)));
+    assert(!(a & ~((1 << 5) - 1)));
+    if (o == 11)       assert(can_sign_extend_short_p(s));
+    else if (o == 10)  assert(can_zero_extend_short_p(s));
+#if DEBUG
+    else               abort();
+#endif
+    ii((o<<26)|(d<<23)|(l<<21)|(a<<16)|_u16(s));
+}
+
+static void
+_FXFX(jit_state_t *_jit, int o, int d, int x, int f)
+{
+    assert(!(o & ~((1 <<  6) - 1)));
+    assert(!(d & ~((1 <<  5) - 1)));
+    assert(!(x & ~((1 << 10) - 1)));
+    assert(!(f & ~((1 << 10) - 1)));
+    ii((o<<26)|(d<<21)|(x<<11)|(f<<1));
+}
+
+static void
+_FM(jit_state_t *_jit, int o, int s, int a, int h, int b, int e, int r)
+{
+    assert(!(o & ~((1 << 6) - 1)));
+    assert(!(s & ~((1 << 5) - 1)));
+    assert(!(a & ~((1 << 5) - 1)));
+    assert(!(h & ~((1 << 5) - 1)));
+    assert(!(b & ~((1 << 5) - 1)));
+    assert(!(e & ~((1 << 5) - 1)));
+    assert(!(r & ~((1 << 1) - 1)));
+    ii((o<<26)|(s<<21)|(a<<16)|(h<<11)|(b<<6)|(e<<1)|r);
+}
+
+#  if __WORDSIZE == 64
+static void
+_FMDS(jit_state_t *_jit, int o, int s, int a, int b, int e, int x, int r)
+{
+    assert(!(o & ~((1 << 6) - 1)));
+    assert(!(s & ~((1 << 5) - 1)));
+    assert(!(a & ~((1 << 5) - 1)));
+    assert(!(b & ~((1 << 5) - 1)));
+    assert(!(e & ~((1 << 6) - 1)));
+    assert(!(x & ~((1 << 4) - 1)));
+    assert(!(r & ~((1 << 1) - 1)));
+    e = (e >> 5) | ((e << 1) & 63);
+    ii((o<<26)|(s<<21)|(a<<16)|(b<<11)|(e<<5)|(x<<1)|r);
+}
+
+static void
+_FMD(jit_state_t *_jit, int o, int s, int a, int h, int e, int x, int i, int r)
+{
+    assert(!(o & ~((1 << 6) - 1)));
+    assert(!(s & ~((1 << 5) - 1)));
+    assert(!(a & ~((1 << 5) - 1)));
+    assert(!(h & ~((1 << 5) - 1)));
+    assert(!(e & ~((1 << 6) - 1)));
+    assert(!(x & ~((1 << 3) - 1)));
+    assert(!(i & ~((1 << 1) - 1)));
+    assert(!(r & ~((1 << 1) - 1)));
+    e = (e >> 5) | ((e << 1) & 63);
+    ii((o<<26)|(s<<21)|(a<<16)|(h<<11)|(e<<5)|(x<<2)|(i<<1)|r);
+}
+
+static void
+_FXS(jit_state_t *_jit, int o, int s, int a, int h, int x, int i, int r)
+{
+    assert(!(o & ~((1 << 6) - 1)));
+    assert(!(s & ~((1 << 5) - 1)));
+    assert(!(a & ~((1 << 5) - 1)));
+    assert(!(h & ~((1 << 5) - 1)));
+    assert(!(x & ~((1 << 9) - 1)));
+    assert(!(i & ~((1 << 1) - 1)));
+    assert(!(r & ~((1 << 1) - 1)));
+    ii((o<<26)|(s<<21)|(a<<16)|(h<<11)|(x<<2)|(i<<1)|r);
+}
+#endif
+
+#if !DEBUG
+/*
+ * Use the sequence commented at
+ * http://tenfourfox.blogspot.com/2011/04/attention-g5-owners-your-javascript-no.html
+ */
+static void
+_MCRXR(jit_state_t *_jit, jit_int32_t cr)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    MFXER(rn(reg));
+    MTCRF(128, rn(reg));
+    RLWINM(rn(reg), rn(reg), 0, 0, 28);
+    MTXER(rn(reg));
+    jit_unget_reg(reg);
+}
+#endif
+
+static void
+_nop(jit_state_t *_jit, jit_int32_t i0)
+{
+    for (; i0 > 0; i0 -= 4)
+       NOP();
+    assert(i0 == 0);
+}
+
+static void
+_movr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (r0 != r1)
+       MR(r0, r1);
+}
+
+static void
+_movi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    if (can_sign_extend_short_p(i0))
+       LI(r0, i0);
+    else {
+       if (can_sign_extend_int_p(i0))
+           LIS(r0, (jit_int16_t)(i0 >> 16));
+       else if (can_zero_extend_int_p(i0)) {
+           if (i0 & 0xffff0000) {
+               ORI(r0, r0, (jit_uint16_t)(i0 >> 16));
+               SLWI(r0, r0, 16);
+           }
+       }
+#  if __WORDSIZE == 64
+       else {
+           movi(r0, (jit_uint32_t)(i0 >> 32));
+           if (i0 & 0xffff0000) {
+               SLDI(r0, r0, 16);
+               ORI(r0, r0, (jit_uint16_t)(i0 >> 16));
+               SLDI(r0, r0, 16);
+           }
+           else
+               SLDI(r0, r0, 32);
+       }
+#  endif
+       if (i0 & 0xffff)
+           ORI(r0, r0, (jit_uint16_t)i0);
+    }
+}
+
+static jit_word_t
+_movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_word_t         word = _jit->pc.w;
+#  if __WORDSIZE == 32
+    LIS(r0, (jit_int16_t)(i0 >> 16));
+    ORI(r0, r0, (jit_uint16_t)i0);
+#  else
+    LIS(r0, (jit_int16_t)(i0 >> 48));
+    ORI(r0, r0, (jit_uint16_t)(i0 >> 32));
+    SLDI(r0, r0, 16);
+    ORI(r0, r0, (jit_uint16_t)(i0 >> 16));
+    SLDI(r0, r0, 16);
+    ORI(r0, r0, (jit_uint16_t)i0);
+#  endif
+    return (word);
+}
+
+#  if __BYTE_ORDER == __LITTLE_ENDIAN
+static void
+_htonr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_gpr);
+    rshi(rn(t0), r1, 8);
+    andi(r0, r1, 0xff);
+    andi(rn(t0), rn(t0), 0xff);
+    lshi(r0, r0, 8);
+    orr(r0, r0, rn(t0));
+    jit_unget_reg(t0);
+}
+
+static void
+_htonr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    ROTLWI(rn(reg), r1, 8);
+    RLWIMI(rn(reg), r1, 24, 0, 7);
+    RLWIMI(rn(reg), r1, 24, 16, 23);
+    CLRLDI(r0, rn(reg), 32);
+    jit_unget_reg(reg);
+}
+
+#    if __WORDSIZE == 64
+static void
+_htonr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    rshi_u(rn(reg), r1, 32);
+    htonr_ui(r0, r1);
+    htonr_ui(rn(reg), rn(reg));
+    lshi(r0, r0, 32);
+    orr(r0, r0, rn(reg));
+    jit_unget_reg(reg);
+}
+#    endif
+#  endif
+
+static void
+_addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0))
+       ADDI(r0, r1, i0);
+    else if (can_zero_extend_int_p(i0) && !(i0 & 0x0000ffff))
+       ADDIS(r0, r1, i0 >> 16);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ADD(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_addci(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0))
+       ADDIC(r0, r1, i0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ADDC(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_addxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    ADDE(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_subi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    jit_word_t         ni0 = -i0;
+    if (can_sign_extend_short_p(ni0))
+       ADDI(r0, r1, ni0);
+    else if (can_zero_extend_int_p(ni0) && !(ni0 & 0x0000ffff))
+       ADDIS(r0, r1, ni0 >> 16);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       SUB(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_subci(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    SUBC(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_subxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    SUBE(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_rsbi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    subi(r0, r1, i0);
+    negr(r0, r0);
+}
+
+static void
+_muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0))
+       MULLI(r0, r1, i0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       mulr(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_iqmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+       jit_int32_t r2, jit_int32_t r3, jit_bool_t sign)
+{
+    jit_int32_t                reg;
+    if (r0 == r2 || r0 == r3) {
+       reg = jit_get_reg(jit_class_gpr);
+       mullr(rn(reg), r2, r3);
+    }
+    else
+       mullr(r0, r2, r3);
+    if (sign)
+       mulhr(r1, r2, r3);
+    else
+       mulhr_u(r1, r2, r3);
+    if (r0 == r2 || r0 == r3) {
+       movr(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_iqmuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+       jit_int32_t r2, jit_word_t i0, jit_bool_t sign)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    iqmulr(r0, r1, r2, rn(reg), sign);
+    jit_unget_reg(reg);
+}
+
+static void
+_divi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    divr(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_divi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    divr_u(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_iqdivr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+       jit_int32_t r2, jit_int32_t r3, jit_bool_t sign)
+{
+    jit_int32_t                sv0, rg0;
+    jit_int32_t                sv1, rg1;
+
+    if (r0 == r2 || r0 == r3) {
+       sv0 = jit_get_reg(jit_class_gpr);
+       rg0 = rn(sv0);
+    }
+    else
+       rg0 = r0;
+    if (r1 == r2 || r1 == r3) {
+       sv1 = jit_get_reg(jit_class_gpr);
+       rg1 = rn(sv1);
+    }
+    else
+       rg1 = r1;
+
+    if (sign)
+       divr(rg0, r2, r3);
+    else
+       divr_u(rg0, r2, r3);
+    mulr(rg1, r3, rg0);
+    subr(rg1, r2, rg1);
+    if (rg0 != r0) {
+       movr(r0, rg0);
+       jit_unget_reg(sv0);
+    }
+    if (rg1 != r1) {
+       movr(r1, rg1);
+       jit_unget_reg(sv1);
+    }
+}
+
+static void
+_iqdivi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+       jit_int32_t r2, jit_word_t i0, jit_bool_t sign)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    iqdivr(r0, r1, r2, rn(reg), sign);
+    jit_unget_reg(reg);
+}
+
+static void
+_remr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (r0 == r1 || r0 == r2) {
+       reg = jit_get_reg(jit_class_gpr);
+       divr(rn(reg), r1, r2);
+       mulr(rn(reg), r2, rn(reg));
+       subr(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else {
+       divr(r0, r1, r2);
+       mulr(r0, r2, r0);
+       subr(r0, r1, r0);
+    }
+}
+
+static void
+_remi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    remr(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_remr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (r0 == r1 || r0 == r2) {
+       reg = jit_get_reg(jit_class_gpr);
+       divr_u(rn(reg), r1, r2);
+       mulr(rn(reg), r2, rn(reg));
+       subr(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else {
+       divr_u(r0, r1, r2);
+       mulr(r0, r2, r0);
+       subr(r0, r1, r0);
+    }
+}
+
+static void
+_remi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    remr_u(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_andi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_zero_extend_short_p(i0))
+       ANDI_(r0, r1, i0);
+    else if (can_zero_extend_int_p(i0) && !(i0 & 0x0000ffff))
+       ANDIS_(r0, r1, (jit_uword_t)i0 >> 16);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       AND(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_zero_extend_short_p(i0))
+       ORI(r0, r1, i0);
+    else if (can_zero_extend_int_p(i0) && !(i0 & 0x0000ffff))
+       ORIS(r0, r1, (jit_uword_t)i0 >> 16);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       OR(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_xori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_zero_extend_short_p(i0))
+       XORI(r0, r1, i0);
+    else if (can_zero_extend_int_p(i0) && !(i0 & 0x0000ffff))
+       XORIS(r0, r1, (jit_uword_t)i0 >> 16);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       XOR(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_lshi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    if (i0 == 0)
+       movr(r0, r1);
+    else {
+#  if __WORDSIZE == 32
+       SLWI(r0, r1, i0);
+#  else
+       SLDI(r0, r1, i0);
+#  endif
+    }
+}
+
+static void
+_rshi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    if (i0 == 0)
+       movr(r0, r1);
+    else {
+#  if __WORDSIZE == 32
+       SRAWI(r0, r1, i0);
+#  else
+       SRADI(r0, r1, i0);
+#  endif
+    }
+}
+
+static void
+_rshi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    if (i0 == 0)
+       movr(r0, r1);
+    else {
+#  if __WORDSIZE == 32
+       SRWI(r0, r1, i0);
+#  else
+       SRDI(r0, r1, i0);
+#  endif
+    }
+}
+
+static void
+_ltr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    CMPW(r1, r2);
+    MFCR(r0);
+    EXTRWI(r0, r0, 1, CR_LT);
+}
+
+static void
+_lti(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0))
+       CMPWI(r1, i0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       CMPW(r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+    MFCR(r0);
+    EXTRWI(r0, r0, 1, CR_LT);
+}
+
+static void
+_ltr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    CMPLW(r1, r2);
+    MFCR(r0);
+    EXTRWI(r0, r0, 1, CR_LT);
+}
+
+static void
+_lti_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_zero_extend_short_p(i0))
+       CMPLWI(r1, i0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       CMPLW(r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+    MFCR(r0);
+    EXTRWI(r0, r0, 1, CR_LT);
+}
+
+static void
+_ler(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    CMPW(r1, r2);
+    CRNOT(CR_GT, CR_GT);
+    MFCR(r0);
+    EXTRWI(r0, r0, 1, CR_GT);
+}
+
+static void
+_lei(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0))
+       CMPWI(r1, i0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       CMPW(r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+    CRNOT(CR_GT, CR_GT);
+    MFCR(r0);
+    EXTRWI(r0, r0, 1, CR_GT);
+}
+
+static void
+_ler_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    CMPLW(r1, r2);
+    CRNOT(CR_GT, CR_GT);
+    MFCR(r0);
+    EXTRWI(r0, r0, 1, CR_GT);
+}
+
+static void
+_lei_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_zero_extend_short_p(i0))
+       CMPLWI(r1, i0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       CMPLW(r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+    CRNOT(CR_GT, CR_GT);
+    MFCR(r0);
+    EXTRWI(r0, r0, 1, CR_GT);
+}
+
+static void
+_eqr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    CMPW(r1, r2);
+    MFCR(r0);
+    EXTRWI(r0, r0, 1, CR_EQ);
+}
+
+static void
+_eqi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0))
+       CMPWI(r1, i0);
+    else if (can_zero_extend_short_p(i0))
+       CMPLWI(r1, i0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       CMPW(r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+    MFCR(r0);
+    EXTRWI(r0, r0, 1, CR_EQ);
+}
+
+static void
+_ger(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    CMPW(r1, r2);
+    CRNOT(CR_LT, CR_LT);
+    MFCR(r0);
+    EXTRWI(r0, r0, 1, CR_LT);
+}
+
+static void
+_gei(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0))
+       CMPWI(r1, i0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       CMPW(r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+    CRNOT(CR_LT, CR_LT);
+    MFCR(r0);
+    EXTRWI(r0, r0, 1, CR_LT);
+}
+
+static void
+_ger_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    CMPLW(r1, r2);
+    CRNOT(CR_LT, CR_LT);
+    MFCR(r0);
+    EXTRWI(r0, r0, 1, CR_LT);
+}
+
+static void
+_gei_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_zero_extend_short_p(i0))
+       CMPLWI(r1, i0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       CMPLW(r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+    CRNOT(CR_LT, CR_LT);
+    MFCR(r0);
+    EXTRWI(r0, r0, 1, CR_LT);
+}
+
+static void
+_gtr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    CMPW(r1, r2);
+    MFCR(r0);
+    EXTRWI(r0, r0, 1, CR_GT);
+}
+
+static void
+_gti(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0))
+       CMPWI(r1, i0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       CMPW(r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+    MFCR(r0);
+    EXTRWI(r0, r0, 1, CR_GT);
+}
+
+static void
+_gtr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    CMPLW(r1, r2);
+    MFCR(r0);
+    EXTRWI(r0, r0, 1, CR_GT);
+}
+
+static void
+_gti_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_zero_extend_short_p(i0))
+       CMPLWI(r1, i0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       CMPLW(r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+    MFCR(r0);
+    EXTRWI(r0, r0, 1, CR_GT);
+}
+
+static void
+_ner(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    CMPW(r1, r2);
+    CRNOT(CR_EQ, CR_EQ);
+    MFCR(r0);
+    EXTRWI(r0, r0, 1, CR_EQ);
+}
+
+static void
+_nei(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0))
+       CMPWI(r1, i0);
+    else if (can_zero_extend_short_p(i0))
+       CMPLWI(r1, i0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       CMPW(r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+    CRNOT(CR_EQ, CR_EQ);
+    MFCR(r0);
+    EXTRWI(r0, r0, 1, CR_EQ);
+}
+
+static jit_word_t
+_bltr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d, w;
+    CMPW(r0, r1);
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BLT(d);
+    return (w);
+}
+
+static jit_word_t
+_blti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_int32_t                reg;
+    jit_word_t         d, w;
+    if (can_sign_extend_short_p(i1))
+       CMPWI(r0, i1);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i1);
+       CMPW(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BLT(d);
+    return (w);
+}
+
+static jit_word_t
+_bltr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d, w;
+    CMPLW(r0, r1);
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BLT(d);
+    return (w);
+}
+
+static jit_word_t
+_blti_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_int32_t                reg;
+    jit_word_t         d, w;
+    if (can_zero_extend_short_p(i1))
+       CMPLWI(r0, i1);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i1);
+       CMPLW(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BLT(d);
+    return (w);
+}
+
+static jit_word_t
+_bler(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d, w;
+    CMPW(r0, r1);
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BLE(d);
+    return (w);
+}
+
+static jit_word_t
+_blei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_int32_t                reg;
+    jit_word_t         d, w;
+    if (can_sign_extend_short_p(i1))
+       CMPWI(r0, i1);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i1);
+       CMPW(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BLE(d);
+    return (w);
+}
+
+static jit_word_t
+_bler_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d, w;
+    CMPLW(r0, r1);
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BLE(d);
+    return (w);
+}
+
+static jit_word_t
+_blei_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_int32_t                reg;
+    jit_word_t         d, w;
+    if (can_zero_extend_short_p(i1))
+       CMPLWI(r0, i1);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i1);
+       CMPLW(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BLE(d);
+    return (w);
+}
+
+static jit_word_t
+_beqr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d, w;
+    CMPW(r0, r1);
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BEQ(d);
+    return (w);
+}
+
+static jit_word_t
+_beqi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_int32_t                reg;
+    jit_word_t         d, w;
+    if (can_sign_extend_short_p(i1))
+       CMPWI(r0, i1);
+    else if (can_zero_extend_short_p(i1))
+       CMPLWI(r0, i1);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i1);
+       CMPW(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BEQ(d);
+    return (w);
+}
+
+static jit_word_t
+_bger(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d, w;
+    CMPW(r0, r1);
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BGE(d);
+    return (w);
+}
+
+static jit_word_t
+_bgei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_int32_t                reg;
+    jit_word_t         d, w;
+    if (can_sign_extend_short_p(i1))
+       CMPWI(r0, i1);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i1);
+       CMPW(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BGE(d);
+    return (w);
+}
+
+static jit_word_t
+_bger_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d, w;
+    CMPLW(r0, r1);
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BGE(d);
+    return (w);
+}
+
+static jit_word_t
+_bgei_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_int32_t                reg;
+    jit_word_t         d, w;
+    if (can_zero_extend_short_p(i1))
+       CMPLWI(r0, i1);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i1);
+       CMPLW(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BGE(d);
+    return (w);
+}
+
+static jit_word_t
+_bgtr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d, w;
+    CMPW(r0, r1);
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BGT(d);
+    return (w);
+}
+
+static jit_word_t
+_bgti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_int32_t                reg;
+    jit_word_t         d, w;
+    if (can_sign_extend_short_p(i1))
+       CMPWI(r0, i1);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i1);
+       CMPW(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BGT(d);
+    return (w);
+}
+
+static jit_word_t
+_bgtr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d, w;
+    CMPLW(r0, r1);
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BGT(d);
+    return (w);
+}
+
+static jit_word_t
+_bgti_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_int32_t                reg;
+    jit_word_t         d, w;
+    if (can_zero_extend_short_p(i1))
+       CMPLWI(r0, i1);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i1);
+       CMPLW(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BGT(d);
+    return (w);
+}
+
+static jit_word_t
+_bner(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d, w;
+    CMPW(r0, r1);
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BNE(d);
+    return (w);
+}
+
+static jit_word_t
+_bnei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_int32_t                reg;
+    jit_word_t         d, w;
+    if (can_sign_extend_short_p(i1))
+       CMPWI(r0, i1);
+    else if (can_zero_extend_short_p(i1))
+       CMPLWI(r0, i1);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i1);
+       CMPW(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BNE(d);
+    return (w);
+}
+
+static jit_word_t
+_bmsr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    andr(rn(reg), r0, r1);
+    w = bnei(i0, rn(reg), 0);
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static jit_word_t
+_bmsi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    andi(rn(reg), r0, i1);
+    w = bnei(i0, rn(reg), 0);
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static jit_word_t
+_bmcr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    andr(rn(reg), r0, r1);
+    w = beqi(i0, rn(reg), 0);
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static jit_word_t
+_bmci(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    andi(rn(reg), r0, i1);
+    w = beqi(i0, rn(reg), 0);
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static jit_word_t
+_boaddr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d, w;
+    ADDO(r0, r0, r1);
+    MCRXR(CR_0);
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BGT(d);                            /* GT = bit 1 of XER = OV */
+    return (w);
+}
+
+static jit_word_t
+_boaddi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    movi(rn(reg), i1);
+    w = boaddr(i0, r0, rn(reg));
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static jit_word_t
+_bxaddr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d, w;
+    ADDO(r0, r0, r1);
+    MCRXR(CR_0);
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BLE(d);
+    return (w);
+}
+
+static jit_word_t
+_bxaddi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    movi(rn(reg), i1);
+    w = bxaddr(i0, r0, rn(reg));
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static jit_word_t
+_bosubr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d, w;
+    SUBO(r0, r0, r1);
+    MCRXR(CR_0);
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BGT(d);
+    return (w);
+}
+
+static jit_word_t
+_bosubi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    movi(rn(reg), i1);
+    w = bosubr(i0, r0, rn(reg));
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static jit_word_t
+_bxsubr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d, w;
+    SUBO(r0, r0, r1);
+    MCRXR(CR_0);
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BLE(d);
+    return (w);
+}
+
+static jit_word_t
+_bxsubi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    movi(rn(reg), i1);
+    w = bxsubr(i0, r0, rn(reg));
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static jit_word_t
+_boaddr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d, w;
+    ADDC(r0, r0, r1);
+    MCRXR(CR_0);
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BEQ(d);                            /* EQ = bit 2 of XER = CA */
+    return (w);
+}
+
+static jit_word_t
+_boaddi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_int32_t                reg;
+    jit_word_t         d, w;
+    if (can_sign_extend_short_p(i1)) {
+       ADDIC(r0, r0, i1);
+       MCRXR(CR_0);
+       w = _jit->pc.w;
+       d = (i0 - w) & ~3;
+       BEQ(d);
+       return (w);
+    }
+    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    movi(rn(reg), i1);
+    w = boaddr_u(i0, r0, rn(reg));
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static jit_word_t
+_bxaddr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d, w;
+    ADDC(r0, r0, r1);
+    MCRXR(CR_0);
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BNE(d);
+    return (w);
+}
+
+static jit_word_t
+_bxaddi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_int32_t                reg;
+    jit_word_t         d, w;
+    if (can_sign_extend_short_p(i1)) {
+       ADDIC(r0, r0, i1);
+       MCRXR(CR_0);
+       w = _jit->pc.w;
+       d = (i0 - w) & ~3;
+       BNE(d);
+       return (w);
+    }
+    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    movi(rn(reg), i1);
+    w = bxaddr_u(i0, r0, rn(reg));
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static jit_word_t
+_bosubr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d, w;
+    SUBC(r0, r0, r1);
+    MCRXR(CR_0);
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BNE(d);                            /* PPC uses "carry" not "borrow" */
+    return (w);
+}
+
+static jit_word_t
+_bosubi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    movi(rn(reg), i1);
+    w = bosubr_u(i0, r0, rn(reg));
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static jit_word_t
+_bxsubr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d, w;
+    SUBC(r0, r0, r1);
+    MCRXR(CR_0);
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BEQ(d);
+    return (w);
+}
+
+static jit_word_t
+_bxsubi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    movi(rn(reg), i1);
+    w = bxsubr_u(i0, r0, rn(reg));
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static void
+_ldr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    ldr_uc(r0, r1);
+    extr_c(r0, r0);
+}
+
+static void
+_ldi_c(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    ldi_uc(r0, i0);
+    extr_c(r0, r0);
+}
+
+static void
+_ldxr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    ldxr_uc(r0, r1, r2);
+    extr_c(r0, r0);
+}
+
+static void
+_ldxi_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    ldxi_uc(r0, r1, i0);
+    extr_c(r0, r0);
+}
+
+static void
+_ldi_uc(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_bool_t         inv;
+    jit_int32_t                reg;
+    jit_word_t         lo, hi;
+    if (can_sign_extend_short_p(i0))
+       LBZ(r0, _R0_REGNO, i0);
+    else if (can_sign_extend_int_p(i0)) {
+       hi = (jit_int16_t)((i0 >> 16) + ((jit_uint16_t)i0 >> 15));
+       lo = (jit_int16_t)(i0 - (hi << 16));
+       reg = jit_get_reg(jit_class_gpr);
+       if ((inv = reg == _R0))         reg = jit_get_reg(jit_class_gpr);
+       LIS(rn(reg), hi);
+       LBZ(r0, rn(reg), lo);
+       jit_unget_reg(reg);
+       if (inv)                        jit_unget_reg(_R0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldr_uc(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldxr_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (r1 == _R0_REGNO) {
+       if (r2 != _R0_REGNO)
+           LBZX(r0, r2, r1);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movr(rn(reg), r1);
+           LBZX(r0, rn(reg), r2);
+           jit_unget_reg(reg);
+       }
+    }
+    else
+       LBZX(r0, r1, r2);
+}
+
+static void
+_ldxi_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0 == 0)
+       ldr_uc(r0, r1);
+    else if (can_sign_extend_short_p(i0)) {
+       if (r1 == _R0_REGNO) {
+           reg = jit_get_reg(jit_class_gpr);
+           movr(rn(reg), r1);
+           LBZ(r0, rn(reg), i0);
+           jit_unget_reg(reg);
+       }
+       else
+           LBZ(r0, r1, i0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldxr_uc(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldi_s(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_bool_t         inv;
+    jit_int32_t                reg;
+    jit_word_t         lo, hi;
+    if (can_sign_extend_short_p(i0))
+       LHA(r0, _R0_REGNO, i0);
+    else if (can_sign_extend_int_p(i0)) {
+       hi = (jit_int16_t)((i0 >> 16) + ((jit_uint16_t)i0 >> 15));
+       lo = (jit_int16_t)(i0 - (hi << 16));
+       reg = jit_get_reg(jit_class_gpr);
+       if ((inv = reg == _R0))         reg = jit_get_reg(jit_class_gpr);
+       LIS(rn(reg), hi);
+       LHA(r0, rn(reg), lo);
+       jit_unget_reg(reg);
+       if (inv)                        jit_unget_reg(_R0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldr_s(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldxr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (r1 == _R0_REGNO) {
+       if (r2 != _R0_REGNO)
+           LHAX(r0, r2, r1);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movr(rn(reg), r1);
+           LHAX(r0, rn(reg), r2);
+           jit_unget_reg(reg);
+       }
+    }
+    else
+       LHAX(r0, r1, r2);
+}
+
+static void
+_ldxi_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0 == 0)
+       ldr_s(r0, r1);
+    else if (can_sign_extend_short_p(i0)) {
+       if (r1 == _R0_REGNO) {
+           reg = jit_get_reg(jit_class_gpr);
+           movr(rn(reg), r1);
+           LHA(r0, rn(reg), i0);
+           jit_unget_reg(reg);
+       }
+       else
+           LHA(r0, r1, i0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldxr_s(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldi_us(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_bool_t         inv;
+    jit_int32_t                reg;
+    jit_word_t         lo, hi;
+    if (can_sign_extend_short_p(i0))
+       LHZ(r0, _R0_REGNO, i0);
+    else if (can_sign_extend_int_p(i0)) {
+       hi = (jit_int16_t)((i0 >> 16) + ((jit_uint16_t)i0 >> 15));
+       lo = (jit_int16_t)(i0 - (hi << 16));
+       reg = jit_get_reg(jit_class_gpr);
+       if ((inv = reg == _R0))         reg = jit_get_reg(jit_class_gpr);
+       LIS(rn(reg), hi);
+       LHZ(r0, rn(reg), lo);
+       jit_unget_reg(reg);
+       if (inv)                        jit_unget_reg(_R0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldr_us(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldxr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (r1 == _R0_REGNO) {
+       if (r2 != _R0_REGNO)
+           LHZX(r0, r2, r1);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movr(rn(reg), r1);
+           LHZX(r0, rn(reg), r2);
+           jit_unget_reg(reg);
+       }
+    }
+    else
+       LHZX(r0, r1, r2);
+}
+
+static void
+_ldxi_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0 == 0)
+       ldr_us(r0, r1);
+    else if (can_sign_extend_short_p(i0)) {
+       if (r1 == _R0_REGNO) {
+           reg = jit_get_reg(jit_class_gpr);
+           movr(rn(reg), r1);
+           LHZ(r0, rn(reg), i0);
+           jit_unget_reg(reg);
+       }
+       else
+           LHZ(r0, r1, i0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldxr_us(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+#  if __WORDSIZE == 32
+static void
+_ldi_i(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_bool_t         inv;
+    jit_int32_t                reg;
+    jit_word_t         lo, hi;
+    if (can_sign_extend_short_p(i0))
+       LWZ(r0, _R0_REGNO, i0);
+    else if (can_sign_extend_int_p(i0)) {
+       hi = (jit_int16_t)((i0 >> 16) + ((jit_uint16_t)i0 >> 15));
+       lo = (jit_int16_t)(i0 - (hi << 16));
+       reg = jit_get_reg(jit_class_gpr);
+       if ((inv = reg == _R0))         reg = jit_get_reg(jit_class_gpr);
+       LIS(rn(reg), hi);
+       LWZ(r0, rn(reg), lo);
+       jit_unget_reg(reg);
+       if (inv)                        jit_unget_reg(_R0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldr_i(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldxr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (r1 == _R0_REGNO) {
+       if (r2 != _R0_REGNO)
+           LWZX(r0, r2, r1);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movr(rn(reg), r1);
+           LWZX(r0, rn(reg), r2);
+           jit_unget_reg(reg);
+       }
+    }
+    else
+       LWZX(r0, r1, r2);
+}
+
+static void
+_ldxi_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0 == 0)
+       ldr_i(r0, r1);
+    else if (can_sign_extend_short_p(i0)) {
+       if (r1 == _R0_REGNO) {
+           reg = jit_get_reg(jit_class_gpr);
+           movr(rn(reg), r1);
+           LWZ(r0, rn(reg), i0);
+           jit_unget_reg(reg);
+       }
+       else
+           LWZ(r0, r1, i0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldxr_i(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+#  else
+static void
+_ldi_i(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_bool_t         inv;
+    jit_int32_t                reg;
+    jit_word_t         lo, hi;
+    if (can_sign_extend_short_p(i0))
+       LWA(r0, _R0_REGNO, i0);
+    else if (can_sign_extend_int_p(i0)) {
+       hi = (jit_int16_t)((i0 >> 16) + ((jit_uint16_t)i0 >> 15));
+       lo = (jit_int16_t)(i0 - (hi << 16));
+       reg = jit_get_reg(jit_class_gpr);
+       if ((inv = reg == _R0))         reg = jit_get_reg(jit_class_gpr);
+       LIS(rn(reg), hi);
+       LWA(r0, rn(reg), lo);
+       jit_unget_reg(reg);
+       if (inv)                        jit_unget_reg(_R0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldr_i(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldxr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (r1 == _R0_REGNO) {
+       if (r2 != _R0_REGNO)
+           LWZX(r0, r2, r1);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movr(rn(reg), r1);
+           LWAX(r0, rn(reg), r2);
+           jit_unget_reg(reg);
+       }
+    }
+    else
+       LWZX(r0, r1, r2);
+}
+
+static void
+_ldxi_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0 == 0)
+       ldr_i(r0, r1);
+    else if (can_sign_extend_short_p(i0)) {
+       if (r1 == _R0_REGNO) {
+           reg = jit_get_reg(jit_class_gpr);
+           movr(rn(reg), r1);
+           LWA(r0, rn(reg), i0);
+           jit_unget_reg(reg);
+       }
+       else
+           LWA(r0, r1, i0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldxr_i(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldi_ui(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_bool_t         inv;
+    jit_int32_t                reg;
+    jit_word_t         lo, hi;
+    if (can_sign_extend_short_p(i0))
+       LWZ(r0, _R0_REGNO, i0);
+    else if (can_sign_extend_int_p(i0)) {
+       hi = (jit_int16_t)((i0 >> 16) + ((jit_uint16_t)i0 >> 15));
+       lo = (jit_int16_t)(i0 - (hi << 16));
+       reg = jit_get_reg(jit_class_gpr);
+       if ((inv = reg == _R0))         reg = jit_get_reg(jit_class_gpr);
+       LIS(rn(reg), hi);
+       LWZ(r0, rn(reg), lo);
+       jit_unget_reg(reg);
+       if (inv)                        jit_unget_reg(_R0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldr_ui(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldxr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (r1 == _R0_REGNO) {
+       if (r2 != _R0_REGNO)
+           LWZX(r0, r2, r1);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movr(rn(reg), r1);
+           LWZX(r0, rn(reg), r2);
+           jit_unget_reg(reg);
+       }
+    }
+    else
+       LWZX(r0, r1, r2);
+}
+
+static void
+_ldxi_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0 == 0)
+       ldr_i(r0, r1);
+    else if (can_sign_extend_short_p(i0)) {
+       if (r1 == _R0_REGNO) {
+           reg = jit_get_reg(jit_class_gpr);
+           movr(rn(reg), r1);
+           LWZ(r0, rn(reg), i0);
+           jit_unget_reg(reg);
+       }
+       else
+           LWZ(r0, r1, i0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldxr_ui(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldi_l(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_bool_t         inv;
+    jit_int32_t                reg;
+    jit_word_t         lo, hi;
+    if (can_sign_extend_short_p(i0))
+       LD(r0, _R0_REGNO, i0);
+    else if (can_sign_extend_int_p(i0)) {
+       hi = (jit_int16_t)((i0 >> 16) + ((jit_uint16_t)i0 >> 15));
+       lo = (jit_int16_t)(i0 - (hi << 16));
+       reg = jit_get_reg(jit_class_gpr);
+       if ((inv = reg == _R0))         reg = jit_get_reg(jit_class_gpr);
+       LIS(rn(reg), hi);
+       LD(r0, rn(reg), lo);
+       jit_unget_reg(reg);
+       if (inv)                        jit_unget_reg(_R0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldr_l(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldxr_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (r1 == _R0_REGNO) {
+       if (r2 != _R0_REGNO)
+           LDX(r0, r2, r1);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movr(rn(reg), r1);
+           LDX(r0, rn(reg), r2);
+           jit_unget_reg(reg);
+       }
+    }
+    else
+       LDX(r0, r1, r2);
+}
+
+static void
+_ldxi_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0 == 0)
+       ldr_l(r0, r1);
+    else if (can_sign_extend_short_p(i0)) {
+       if (r1 == _R0_REGNO) {
+           reg = jit_get_reg(jit_class_gpr);
+           movr(rn(reg), r1);
+           LD(r0, rn(reg), i0);
+           jit_unget_reg(reg);
+       }
+       else
+           LD(r0, r1, i0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldxr_l(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+#  endif
+
+static void
+_sti_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_bool_t         inv;
+    jit_int32_t                reg;
+    jit_word_t         lo, hi;
+    if (can_sign_extend_short_p(i0))
+       STB(r0, _R0_REGNO, i0);
+    else if (can_sign_extend_int_p(i0)) {
+       hi = (jit_int16_t)((i0 >> 16) + ((jit_uint16_t)i0 >> 15));
+       lo = (jit_int16_t)(i0 - (hi << 16));
+       reg = jit_get_reg(jit_class_gpr);
+       if ((inv = reg == _R0))         reg = jit_get_reg(jit_class_gpr);
+       LIS(rn(reg), hi);
+       STB(r0, rn(reg), lo);
+       jit_unget_reg(reg);
+       if (inv)                        jit_unget_reg(_R0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       str_c(rn(reg), r0);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_stxr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (r0 == _R0_REGNO) {
+       if (r1 != _R0_REGNO)
+           STBX(r2, r1, r0);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movr(rn(reg), r0);
+           STBX(r2, rn(reg), r1);
+           jit_unget_reg(reg);
+       }
+    }
+    else
+       STBX(r2, r0, r1);
+}
+
+static void
+_stxi_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (i0 == 0)
+       str_c(r0, r1);
+    else if (can_sign_extend_short_p(i0)) {
+       if (r0 == _R0_REGNO) {
+           reg = jit_get_reg(jit_class_gpr);
+           movr(rn(reg), i0);
+           STB(r1, rn(reg), i0);
+           jit_unget_reg(reg);
+       }
+       else
+           STB(r1, r0, i0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       stxr_c(rn(reg), r0, r1);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_sti_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_bool_t         inv;
+    jit_int32_t                reg;
+    jit_word_t         lo, hi;
+    if (can_sign_extend_short_p(i0))
+       STH(r0, _R0_REGNO, i0);
+    else if (can_sign_extend_int_p(i0)) {
+       hi = (jit_int16_t)((i0 >> 16) + ((jit_uint16_t)i0 >> 15));
+       lo = (jit_int16_t)(i0 - (hi << 16));
+       reg = jit_get_reg(jit_class_gpr);
+       if ((inv = reg == _R0))         reg = jit_get_reg(jit_class_gpr);
+       LIS(rn(reg), hi);
+       STH(r0, rn(reg), lo);
+       jit_unget_reg(reg);
+       if (inv)                        jit_unget_reg(_R0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       str_s(rn(reg), r0);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_stxr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (r0 == _R0_REGNO) {
+       if (r1 != _R0_REGNO)
+           STHX(r2, r1, r0);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movr(rn(reg), r0);
+           STHX(r2, rn(reg), r1);
+           jit_unget_reg(reg);
+       }
+    }
+    else
+       STHX(r2, r0, r1);
+}
+
+static void
+_stxi_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (i0 == 0)
+       str_s(r0, r1);
+    else if (can_sign_extend_short_p(i0)) {
+       if (r0 == _R0_REGNO) {
+           reg = jit_get_reg(jit_class_gpr);
+           movr(rn(reg), i0);
+           STH(r1, rn(reg), i0);
+           jit_unget_reg(reg);
+       }
+       else
+           STH(r1, r0, i0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       stxr_s(rn(reg), r0, r1);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_sti_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_bool_t         inv;
+    jit_int32_t                reg;
+    jit_word_t         lo, hi;
+    if (can_sign_extend_short_p(i0))
+       STW(r0, _R0_REGNO, i0);
+    else if (can_sign_extend_int_p(i0)) {
+       hi = (jit_int16_t)((i0 >> 16) + ((jit_uint16_t)i0 >> 15));
+       lo = (jit_int16_t)(i0 - (hi << 16));
+       reg = jit_get_reg(jit_class_gpr);
+       if ((inv = reg == _R0))         reg = jit_get_reg(jit_class_gpr);
+       LIS(rn(reg), hi);
+       STW(r0, rn(reg), lo);
+       jit_unget_reg(reg);
+       if (inv)                        jit_unget_reg(_R0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       str_i(rn(reg), r0);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_stxr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (r0 == _R0_REGNO) {
+       if (r1 != _R0_REGNO)
+           STWX(r2, r1, r0);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movr(rn(reg), r0);
+           STWX(r2, rn(reg), r1);
+           jit_unget_reg(reg);
+       }
+    }
+    else
+       STWX(r2, r0, r1);
+}
+
+static void
+_stxi_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (i0 == 0)
+       str_i(r0, r1);
+    else if (can_sign_extend_short_p(i0)) {
+       if (r0 == _R0_REGNO) {
+           reg = jit_get_reg(jit_class_gpr);
+           movr(rn(reg), i0);
+           STW(r1, rn(reg), i0);
+           jit_unget_reg(reg);
+       }
+       else
+           STW(r1, r0, i0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       stxr_i(rn(reg), r0, r1);
+       jit_unget_reg(reg);
+    }
+}
+
+#  if __WORDSIZE == 64
+static void
+_sti_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_bool_t         inv;
+    jit_int32_t                reg;
+    jit_word_t         lo, hi;
+    if (can_sign_extend_short_p(i0))
+       STD(r0, _R0_REGNO, i0);
+    else if (can_sign_extend_int_p(i0)) {
+       hi = (jit_int16_t)((i0 >> 16) + ((jit_uint16_t)i0 >> 15));
+       lo = (jit_int16_t)(i0 - (hi << 16));
+       reg = jit_get_reg(jit_class_gpr);
+       if ((inv = reg == _R0))         reg = jit_get_reg(jit_class_gpr);
+       LIS(rn(reg), hi);
+       STD(r0, rn(reg), lo);
+       jit_unget_reg(reg);
+       if (inv)                        jit_unget_reg(_R0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       str_l(rn(reg), r0);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_stxr_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (r0 == _R0_REGNO) {
+       if (r1 != _R0_REGNO)
+           STDX(r2, r1, r0);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movr(rn(reg), r0);
+           STDX(r2, rn(reg), r1);
+           jit_unget_reg(reg);
+       }
+    }
+    else
+       STDX(r2, r0, r1);
+}
+
+static void
+_stxi_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (i0 == 0)
+       str_l(r0, r1);
+    else if (can_sign_extend_short_p(i0)) {
+       if (r0 == _R0_REGNO) {
+           reg = jit_get_reg(jit_class_gpr);
+           movr(rn(reg), i0);
+           STD(r1, rn(reg), i0);
+           jit_unget_reg(reg);
+       }
+       else
+           STD(r1, r0, i0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       stxr_l(rn(reg), r0, r1);
+       jit_unget_reg(reg);
+    }
+}
+#  endif
+
+static void
+_jmpr(jit_state_t *_jit, jit_int32_t r0)
+{
+#if 0
+    MTLR(r0);
+    BLR();
+#else
+    MTCTR(r0);
+    BCTR();
+#endif
+}
+
+/* pc relative jump */
+static jit_word_t
+_jmpi(jit_state_t *_jit, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    jit_word_t         w, d;
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    if (can_sign_extend_jump_p(d))
+       B(d);
+    else {
+       reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+       w = movi_p(rn(reg), i0);
+       jmpr(rn(reg));
+       jit_unget_reg(reg);
+    }
+    return (w);
+}
+
+/* absolute jump */
+static jit_word_t
+_jmpi_p(jit_state_t *_jit, jit_word_t i0)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    w = movi_p(rn(reg), i0);
+    jmpr(rn(reg));
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static void
+_callr(jit_state_t *_jit, jit_int32_t r0
+#  if _CALL_SYSV
+       , jit_int32_t varargs
+#  endif
+       )
+{
+#  if _CALL_AIXDESC
+    stxi(sizeof(void*) * 5, _SP_REGNO, _R2_REGNO);
+    /* FIXME Pretend to not know about r11? */
+    if (r0 == _R0_REGNO) {
+       movr(_R11_REGNO, _R0_REGNO);
+       ldxi(_R2_REGNO, _R11_REGNO, sizeof(void*));
+       ldxi(_R11_REGNO, _R11_REGNO, sizeof(void*) * 2);
+    }
+    else {
+       ldxi(_R2_REGNO, r0, sizeof(void*));
+       ldxi(_R11_REGNO, r0, sizeof(void*) * 2);
+    }
+    ldr(r0, r0);
+#  else
+#    if _CALL_SYSV
+    /* Tell double arguments were passed in registers. */
+    if (varargs)
+       CREQV(6, 6, 6);
+#    endif
+    movr(_R12_REGNO, r0);
+#  endif
+
+    MTCTR(r0);
+    BCTRL();
+
+#  if _CALL_AIXDESC
+    ldxi(_R2_REGNO, _SP_REGNO, sizeof(void*) * 5);
+#  endif
+}
+
+/* assume fixed address or reachable address */
+static void
+_calli(jit_state_t *_jit, jit_word_t i0
+#  if _CALL_SYSV
+       , jit_int32_t varargs
+#  endif
+       )
+{
+#  if _CALL_SYSV
+    jit_word_t         d;
+    d = (i0 - _jit->pc.w) & ~3;
+    if (can_sign_extend_jump_p(d))
+       BL(d);
+    else
+#  endif
+    {
+       movi(_R12_REGNO, i0);
+       callr(_R12_REGNO
+#  if _CALL_SYSV
+             , varargs
+#  endif
+             );
+    }
+}
+
+/* absolute jump */
+static jit_word_t
+_calli_p(jit_state_t *_jit, jit_word_t i0
+#  if _CALL_SYSV
+       , jit_int32_t varargs
+#  endif
+        )
+{
+    jit_word_t         w;
+    w = movi_p(_R12_REGNO, i0);
+    callr(_R12_REGNO
+#  if _CALL_SYSV
+         , varargs
+#  endif
+         );
+    return (w);
+}
+
+/* order is not guaranteed to be sequential */
+static jit_int32_t save[] = {
+    _R14, _R15, _R16, _R17, _R18, _R19, _R20, _R21, _R22,
+    _R23, _R24, _R25, _R26, _R27, _R28, _R29, _R30, _R31,
+};
+
+static void
+_prolog(jit_state_t *_jit, jit_node_t *node)
+{
+    unsigned long      regno;
+    jit_word_t         offset;
+
+    if (_jitc->function->define_frame || _jitc->function->assume_frame) {
+       jit_int32_t     frame = -_jitc->function->frame;
+       assert(_jitc->function->self.aoff >= frame);
+       if (_jitc->function->assume_frame)
+           return;
+       _jitc->function->self.aoff = frame;
+    }
+    if (_jitc->function->allocar) {
+       _jitc->function->self.aoff -= 2 * sizeof(jit_word_t);
+       _jitc->function->self.aoff &= -16;
+    }
+    _jitc->function->stack = ((_jitc->function->self.alen +
+                             _jitc->function->self.size -
+                             _jitc->function->self.aoff) + 15) & -16;
+
+    /* return address */
+    MFLR(_R0_REGNO);
+
+    /* params >= %r31+params_offset+(8*sizeof(jit_word_t))
+     * alloca <  %r31-80 */
+
+#if _CALL_SYSV
+    stxi(sizeof(jit_word_t), _SP_REGNO, _R0_REGNO);
+#else
+    stxi(sizeof(void*) * 2, _SP_REGNO, _R0_REGNO);
+#endif
+    offset = -gpr_save_area;
+    for (regno = 0; regno < jit_size(save); regno++, offset += sizeof(void*)) {
+       if (jit_regset_tstbit(&_jitc->function->regset, save[regno]))
+           stxi(offset, _SP_REGNO, rn(save[regno]));
+    }
+    for (offset = 0; offset < 8; offset++) {
+       if (jit_regset_tstbit(&_jitc->function->regset, _F14 + offset))
+           stxi_d(-(gpr_save_area + 8 + offset * 8),
+                  _SP_REGNO, rn(_F14 + offset));
+    }
+
+    stxi(-(sizeof(void*)), _SP_REGNO, _FP_REGNO);
+
+    movr(_FP_REGNO, _SP_REGNO);
+#if __WORDSIZE == 32
+    STWU(_SP_REGNO, _SP_REGNO, -_jitc->function->stack);
+#else
+    STDU(_SP_REGNO, _SP_REGNO, -_jitc->function->stack);
+#endif
+
+    if (_jitc->function->allocar) {
+       regno = jit_get_reg(jit_class_gpr);
+       movi(rn(regno), _jitc->function->self.aoff);
+       stxi_i(_jitc->function->aoffoff, _FP_REGNO, rn(regno));
+       jit_unget_reg(regno);
+    }
+
+#if !_CALL_SYSV
+    if (_jitc->function->self.call & jit_call_varargs) {
+       for (regno = _jitc->function->vagp; jit_arg_reg_p(regno); ++regno)
+           stxi(params_offset + regno * sizeof(jit_word_t),
+                _FP_REGNO, rn(JIT_RA0 - regno));
+    }
+#else
+    if (_jitc->function->self.call & jit_call_varargs) {
+       for (regno = _jitc->function->vagp; jit_arg_reg_p(regno); ++regno)
+           stxi(_jitc->function->vaoff + first_gp_offset +
+                regno * sizeof(jit_word_t), _FP_REGNO, rn(JIT_RA0 - regno));
+       for (regno = _jitc->function->vafp; jit_arg_f_reg_p(regno); ++regno)
+           stxi_d(_jitc->function->vaoff + first_fp_offset +
+                  regno * va_fp_increment, _FP_REGNO,
+                  rn(JIT_FA0 - regno));
+    }
+#endif
+}
+
+static void
+_epilog(jit_state_t *_jit, jit_node_t *node)
+{
+    unsigned long      regno;
+    jit_word_t         offset;
+
+    if (_jitc->function->assume_frame)
+       return;
+    if (_jitc->function->allocar)
+       ldr(_SP_REGNO, _SP_REGNO);
+    else
+       addi(_SP_REGNO, _SP_REGNO, _jitc->function->stack);
+#if _CALL_SYSV
+    ldxi(_R0_REGNO, _SP_REGNO, sizeof(jit_word_t));
+#else
+    ldxi(_R0_REGNO, _SP_REGNO, sizeof(void*) * 2);
+#endif
+    offset = -gpr_save_area;
+    for (regno = 0; regno < jit_size(save); regno++, offset += sizeof(void*)) {
+       if (jit_regset_tstbit(&_jitc->function->regset, save[regno]))
+           ldxi(rn(save[regno]), _SP_REGNO, offset);
+    }
+    for (offset = 0; offset < 8; offset++) {
+       if (jit_regset_tstbit(&_jitc->function->regset, _F14 + offset))
+           ldxi_d(rn(_F14 + offset), _SP_REGNO,
+                  -(gpr_save_area + 8 + offset * 8));
+    }
+
+    MTLR(_R0_REGNO);
+    ldxi(_FP_REGNO, _SP_REGNO, -(sizeof(void*)));
+
+    BLR();
+}
+
+static void
+_vastart(jit_state_t *_jit, jit_int32_t r0)
+{
+#if !_CALL_SYSV
+    assert(_jitc->function->self.call & jit_call_varargs);
+    /* Initialize stack pointer to the first stack argument. */
+    addi(r0, _FP_REGNO, _jitc->function->self.size);
+#else
+    jit_int32_t                reg;
+    assert(_jitc->function->self.call & jit_call_varargs);
+
+    /* Return jit_va_list_t in the register argument */
+    addi(r0, _FP_REGNO, _jitc->function->vaoff);
+    reg = jit_get_reg(jit_class_gpr);
+
+    /* Initialize the gp counter. */
+    movi(rn(reg), _jitc->function->vagp);
+    stxi_c(offsetof(jit_va_list_t, ngpr), r0, rn(reg));
+
+    /* Initialize the fp counter. */
+    movi(rn(reg), _jitc->function->vafp);
+    stxi_c(offsetof(jit_va_list_t, nfpr), r0, rn(reg));
+
+    /* Initialize overflow pointer to the first stack argument. */
+    addi(rn(reg), _FP_REGNO, _jitc->function->self.size);
+    stxi(offsetof(jit_va_list_t, over), r0, rn(reg));
+
+    /* Initialize register save area pointer. */
+    addi(rn(reg), r0, first_gp_offset);
+    stxi(offsetof(jit_va_list_t, save), r0, rn(reg));
+
+    jit_unget_reg(reg);
+#endif
+}
+
+static void
+_vaarg(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+#if !_CALL_SYSV
+    assert(_jitc->function->self.call & jit_call_varargs);
+    /* Load argument. */
+    ldr(r0, r1);
+    /* Update va_list. */
+    addi(r1, r1, sizeof(jit_word_t));
+#else
+    jit_int32_t                rg0;
+    jit_int32_t                rg1;
+    jit_word_t         ge_code;
+    jit_word_t         lt_code;
+
+    assert(_jitc->function->self.call & jit_call_varargs);
+
+    rg0 = jit_get_reg(jit_class_gpr);
+    rg1 = jit_get_reg(jit_class_gpr);
+
+    /* Load the gp offset in save area in the first temporary. */
+    ldxi_uc(rn(rg0), r1, offsetof(jit_va_list_t, ngpr));
+
+    /* Jump over if there are no remaining arguments in the save area. */
+    ge_code = bgei(_jit->pc.w, rn(rg0), 8);
+
+    /* Update the gp counter. */
+    addi(rn(rg1), rn(rg0), 1);
+    stxi_c(offsetof(jit_va_list_t, ngpr), r1, rn(rg1));
+
+    /* Load the save area pointer in the second temporary. */
+    ldxi(rn(rg1), r1, offsetof(jit_va_list_t, save));
+
+    /* Load the vararg argument in the first argument. */
+    lshi(rn(rg0), rn(rg0), va_gp_shift);
+    ldxr(r0, rn(rg1), rn(rg0));
+
+    /* Will only need one temporary register below. */
+    jit_unget_reg(rg1);
+
+    /* Jump over overflow code. */
+    lt_code = _jit->pc.w;
+    B(0);
+
+    /* Where to land if argument is in overflow area. */
+    patch_at(ge_code, _jit->pc.w);
+
+    /* Load overflow pointer. */
+    ldxi(rn(rg0), r1, offsetof(jit_va_list_t, over));
+
+    /* Load argument. */
+    ldr(r0, rn(rg0));
+
+    /* Update overflow pointer. */
+    addi(rn(rg0), rn(rg0), va_gp_increment);
+    stxi(offsetof(jit_va_list_t, over), r1, rn(rg0));
+
+    /* Where to land if argument is in save area. */
+    patch_at(lt_code, _jit->pc.w);
+
+    jit_unget_reg(rg0);
+#endif
+}
+
+static void
+_vaarg_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+#if !_CALL_SYSV
+    assert(_jitc->function->self.call & jit_call_varargs);
+    /* Load argument. */
+    ldr_d(r0, r1);
+    /* Update va_list. */
+    addi(r1, r1, sizeof(jit_float64_t));
+#else
+    jit_int32_t                rg0;
+    jit_int32_t                rg1;
+    jit_word_t         ge_code;
+    jit_word_t         lt_code;
+
+    assert(_jitc->function->self.call & jit_call_varargs);
+
+    rg0 = jit_get_reg(jit_class_gpr);
+    rg1 = jit_get_reg(jit_class_gpr);
+
+    /* Load the fp offset in save area in the first temporary. */
+    ldxi_uc(rn(rg0), r1, offsetof(jit_va_list_t, nfpr));
+
+    /* Jump over if there are no remaining arguments in the save area. */
+    ge_code = bgei(_jit->pc.w, rn(rg0), 8);
+
+    /* Update the fp counter. */
+    addi(rn(rg1), rn(rg0), 1);
+    stxi_c(offsetof(jit_va_list_t, nfpr), r1, rn(rg1));
+
+    /* Load the save area pointer in the second temporary. */
+    ldxi(rn(rg1), r1, offsetof(jit_va_list_t, save));
+
+    /* Load the vararg argument in the first argument. */
+    lshi(rn(rg0), rn(rg0), 3);
+    addi(rn(rg0), rn(rg0), offsetof(jit_va_list_t, first_fp_argument) -
+        offsetof(jit_va_list_t, first_gp_argument));
+    ldxr_d(r0, rn(rg1), rn(rg0));
+
+    /* Jump over overflow code. */
+    lt_code = _jit->pc.w;
+    B(0);
+
+    /* Where to land if argument is in overflow area. */
+    patch_at(ge_code, _jit->pc.w);
+
+    /* Load overflow pointer. */
+    ldxi(rn(rg0), r1, offsetof(jit_va_list_t, over));
+
+#  if __WORDSIZE == 32
+    /* Align if required. */
+    andi(rn(rg1), rn(rg0), 7);
+    addr(rn(rg0), rn(rg0), rn(rg1));
+#  endif
+
+    /* Load argument. */
+    ldr_d(r0, rn(rg0));
+
+    /* Update overflow pointer. */
+    addi(rn(rg0), rn(rg0), va_fp_increment);
+    stxi(offsetof(jit_va_list_t, over), r1, rn(rg0));
+
+    /* Where to land if argument is in save area. */
+    patch_at(lt_code, _jit->pc.w);
+
+    jit_unget_reg(rg0);
+    jit_unget_reg(rg1);
+#endif
+}
+
+static void
+_patch_at(jit_state_t *_jit, jit_word_t instr, jit_word_t label)
+{
+    jit_word_t          d;
+    union {
+       jit_int32_t     *i;
+       jit_word_t       w;
+    } u;
+    u.w = instr;
+    switch ((u.i[0] & 0xfc000000) >> 26) {
+       case 16:                                        /* BCx */
+           d = label - instr;
+           assert(!(d & 3));
+           if (!can_sign_extend_short_p(d)) {
+               /* use absolute address */
+               assert(can_sign_extend_short_p(label));
+               d |= 2;
+           }
+           u.i[0] = (u.i[0] & ~0xfffd) | (d & 0xfffe);
+           break;
+       case 18:                                        /* Bx */
+#if _CALL_AIXDESC
+           if (_jitc->jump && (!(u.i[0] & 1))) {       /* jmpi label */
+               /* zero is used for toc and env, so, quick check
+                * if this is a "jmpi main" like initial jit
+                * instruction */
+               if (((long *)label)[1] == 0 && ((long *)label)[2] == 0) {
+                   for (d = 0; d < _jitc->prolog.offset; d++) {
+                       /* not so pretty, but hides powerpc
+                        * specific abi intrinsics and/or
+                        * implementation from user */
+                       if (_jitc->prolog.ptr[d] == label) {
+                           label += sizeof(void*) * 3;
+                           break;
+                       }
+                   }
+               }
+           }
+#endif
+           d = label - instr;
+           assert(!(d & 3));
+           if (!can_sign_extend_jump_p(d)) {
+               /* use absolute address */
+               assert(can_sign_extend_jump_p(label));
+               d |= 2;
+           }
+           u.i[0] = (u.i[0] & ~0x3fffffd) | (d & 0x3fffffe);
+           break;
+       case 15:                                        /* LI */
+#if __WORDSIZE == 32
+#  define MTCTR_OFF            2
+#  define BCTR_OFF             3
+#else
+#  define MTCTR_OFF            6
+#  define BCTR_OFF             7
+#endif
+#if _CALL_AIXDESC
+           /* movi reg label; jmpr reg */
+           if (_jitc->jump &&
+#if 0
+               /* check for MLTR(reg) */
+               (u.i[MTCTR_OFF] >> 26) == 31 &&
+               ((u.i[MTCTR_OFF] >> 16) & 0x3ff) == 8 &&
+               ((u.i[MTCTR_OFF] >> 1) & 0x3ff) == 467 &&
+               /* check for BLR */
+               u.i[BCTR_OFF] == 0x4e800020) {
+#else
+               /* check for MTCTR(reg) */
+               (u.i[MTCTR_OFF] >> 26) == 31 &&
+               ((u.i[MTCTR_OFF] >> 16) & 0x3ff) == 9 &&
+               ((u.i[MTCTR_OFF] >> 1) & 0x3ff) == 467 &&
+               /* check for BCTR */
+               u.i[BCTR_OFF] == 0x4e800420) {
+#endif
+               /* zero is used for toc and env, so, quick check
+                * if this is a "jmpi main" like initial jit
+                * instruction */
+               if (((long *)label)[1] == 0 && ((long *)label)[2] == 0) {
+                   for (d = 0; d < _jitc->prolog.offset; d++) {
+                       /* not so pretty, but hides powerpc
+                        * specific abi intrinsics and/or
+                        * implementation from user */
+                       if (_jitc->prolog.ptr[d] == label) {
+                           label += sizeof(void*) * 3;
+                           break;
+                       }
+                   }
+               }
+           }
+#endif
+#undef BCTR_OFF
+#undef MTCTR_OFF
+#if __WORDSIZE == 32
+           assert(!(u.i[0] & 0x1f0000));
+           u.i[0] = (u.i[0] & ~0xffff) | ((label >> 16) & 0xffff);
+           assert((u.i[1] & 0xfc000000) >> 26 == 24);  /* ORI */
+           assert(((u.i[1] >> 16) & 0x1f) == ((u.i[1] >> 21) & 0x1f));
+           u.i[1] = (u.i[1] & ~0xffff) | (label & 0xffff);
+#else
+           assert(!(u.i[0] & 0x1f0000));
+           u.i[0] = (u.i[0] & ~0xffff) | ((label >> 48) & 0xffff);
+           assert((u.i[1] & 0xfc000000) >> 26 == 24);  /* ORI */
+           assert(((u.i[1] >> 16) & 0x1f) == ((u.i[1] >> 21) & 0x1f));
+           u.i[1] = (u.i[1] & ~0xffff) | ((label >> 32) & 0xffff);
+           /* not fully validating SLDI */
+           assert((u.i[2] & 0xfc000000) >> 26 == 30);  /* SLDI */
+           assert(((u.i[2] >> 16) & 0x1f) == ((u.i[2] >> 21) & 0x1f));
+           assert((u.i[3] & 0xfc000000) >> 26 == 24);  /* ORI */
+           assert(((u.i[3] >> 16) & 0x1f) == ((u.i[3] >> 21) & 0x1f));
+           u.i[3] = (u.i[3] & ~0xffff) | ((label >> 16) & 0xffff);
+           /* not fully validating SLDI */
+           assert((u.i[4] & 0xfc000000) >> 26 == 30);  /* SLDI */
+           assert(((u.i[4] >> 16) & 0x1f) == ((u.i[4] >> 21) & 0x1f));
+           assert((u.i[5] & 0xfc000000) >> 26 == 24);  /* ORI */
+           assert(((u.i[5] >> 16) & 0x1f) == ((u.i[5] >> 21) & 0x1f));
+           u.i[5] = (u.i[5] & ~0xffff) | (label & 0xffff);
+#endif
+           break;
+       default:
+           assert(!"unhandled branch opcode");
+    }
+}
+#endif
diff --git a/deps/lightning/lib/jit_ppc-fpu.c b/deps/lightning/lib/jit_ppc-fpu.c
new file mode 100644 (file)
index 0000000..1e84f8e
--- /dev/null
@@ -0,0 +1,1182 @@
+/*
+ * Copyright (C) 2012-2019  Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+#if PROTO
+#define FA(o,d,a,b,c,x)                        _FA(_jit,o,d,a,b,c,x,0)
+#define FA_(o,d,a,b,c,x)               _FA(_jit,o,d,a,b,c,x,1)
+static void _FA(jit_state_t*,int,int,int,int,int,int,int);
+#define FXFL(o,m,b,x)                  _FXFL(_jit,o,m,b,x,0)
+#define FXFL_(o,m,b,x)                 _FXFL(_jit,o,m,b,x,1)
+static void _FXFL(jit_state_t*,int,int,int,int,int) maybe_unused;
+#  define FABS(d,b)                    FX(63,d,0,b,264)
+#  define FABS_(d,b)                   FX_(63,d,0,b,264)
+#  define FADD(d,a,b)                  FA(63,d,a,b,0,21)
+#  define FADD_(d,a,b)                 FA_(63,d,a,b,0,21)
+#  define FADDS(d,a,b)                 FA(59,d,a,b,0,21)
+#  define FADDS_(d,a,b)                        FA_(59,d,a,b,0,21)
+#  define FCFID(d,b)                   FX(63,d,0,b,846)
+#  define FCMPO(cr,a,b)                        FC(63,cr,0,a,b,32)
+#  define FCMPU(cr,a,b)                        FC(63,cr,0,a,b,0)
+#  define FCTIW(d,b)                   FX(63,d,0,b,14)
+#  define FCTIW_(d,b)                  FX_(63,d,0,b,14)
+#  define FCTIWZ(d,b)                  FX(63,d,0,b,15)
+#  define FCTIWZ_(d,b)                 FX_(63,d,0,b,15)
+#  define FCTID(d,b)                   FX(63,d,0,b,814)
+#  define FCTID_(d,b)                  FX_(63,d,0,b,814)
+#  define FCTIDZ(d,b)                  FX(63,d,0,b,815)
+#  define FCTIDZ_(d,b)                 FX_(63,d,0,b,815)
+#  define FDIV(d,a,b)                  FA(63,d,a,b,0,18)
+#  define FDIV_(d,a,b)                 FA_(63,d,a,b,0,18)
+#  define FDIVS(d,a,b)                 FA(59,d,a,b,0,18)
+#  define FDIVS_(d,a,b)                        FA_(59,d,a,b,0,18)
+#  define FMADD(d,a,b,c)               FA(63,d,a,b,c,29)
+#  define FMADD_(d,a,b,c)              FA(63,d,a,b,c,29)
+#  define FMADDS(d,a,b,c)              FA(59,d,a,b,c,29)
+#  define FMADDS_(d,a,b,c)             FA(59,d,a,b,c,29)
+#  define FMR(d,b)                     FX(63,d,0,b,72)
+#  define FMR_(d,b)                    FX_(63,d,0,b,72)
+#  define FMSUB(d,a,b,c)               FA(63,d,a,b,c,28)
+#  define FMSUB_(d,a,b,c)              FA(63,d,a,b,c,28)
+#  define FMSUBS(d,a,b,c)              FA(59,d,a,b,c,28)
+#  define FMSUBS_(d,a,b,c)             FA(59,d,a,b,c,28)
+#  define FMUL(d,a,c)                  FA(63,d,a,0,c,25)
+#  define FMUL_(d,a,c)                 FA_(63,d,a,0,c,25)
+#  define FMULS(d,a,c)                 FA(59,d,a,0,c,25)
+#  define FMULS_(d,a,c)                        FA_(59,d,a,0,c,25)
+#  define FNABS(d,b)                   FX(63,d,0,b,136)
+#  define FNABS_(d,b)                  FX_(63,d,0,b,136)
+#  define FNEG(d,b)                    FX(63,d,0,b,40)
+#  define FNEG_(d,b)                   FX_(63,d,0,b,40)
+#  define FNMADD(d,a,b,c)              FA(63,d,a,b,c,31)
+#  define FNMADD_(d,a,b,c)             FA_(63,d,a,b,c,31)
+#  define FNMADDS(d,a,b,c)             FA(59,d,a,b,c,31)
+#  define FNMADDS_(d,a,b,c)            FA_(59,d,a,b,c,31)
+#  define FNMSUB(d,a,b,c)              FA(63,d,a,b,c,30)
+#  define FNMSUB_(d,a,b,c)             FA_(63,d,a,b,c,30)
+#  define FNMSUBS(d,a,b,c)             FA(59,d,a,b,c,30)
+#  define FNMSUBS_(d,a,b,c)            FA_(59,d,a,b,c,30)
+#  define FRES(d,b)                    FA(59,d,0,b,0,24)
+#  define FRES_(d,b)                   FA_(59,d,0,b,0,24)
+#  define FRSP(d,b)                    FA(63,d,0,b,0,12)
+#  define FRSP_(d,b)                   FA_(63,d,0,b,0,12)
+#  define FRSQTRE(d,b)                 FA(63,d,0,b,0,26)
+#  define FRSQTRE_(d,b)                        FA_(63,d,0,b,0,26)
+#  define FSEL(d,a,b,c)                        FA(63,d,a,b,c,23)
+#  define FSEL_(d,a,b,c)               FA_(63,d,a,b,c,23)
+#  define FSQRT(d,b)                   FA(63,d,0,b,0,22)
+#  define FSQRT_(d,b)                  FA_(63,d,0,b,0,22)
+#  define FSQRTS(d,b)                  FA(59,d,0,b,0,22)
+#  define FSQRTS_(d,b)                 FA_(59,d,0,b,0,22)
+#  define FSUB(d,a,b)                  FA(63,d,a,b,0,20)
+#  define FSUB_(d,a,b)                 FA(63,d,a,b,0,20)
+#  define FSUBS(d,a,b)                 FA(59,d,a,b,0,20)
+#  define FSUBS_(d,a,b)                        FA(59,d,a,b,0,20)
+#  define LFD(d,a,s)                   FDs(50,d,a,s)
+#  define LFDU(d,a,s)                  FDs(51,d,a,s)
+#  define LFDUX(d,a,b)                 FX(31,d,a,b,631)
+#  define LFDX(d,a,b)                  FX(31,d,a,b,599)
+#  define LFS(d,a,s)                   FDs(48,d,a,s)
+#  define LFSU(d,a,s)                  FDs(49,d,a,s)
+#  define LFSUX(d,a,b)                 FX(31,d,a,b,567)
+#  define LFSX(d,a,b)                  FX(31,d,a,b,535)
+#  define MCRFS(d,s)                   FXL(63,d<<2,(s)<<2,64)
+#  define MFFS(d)                      FX(63,d,0,0,583)
+#  define MFFS_(d)                     FX_(63,d,0,0,583)
+#  define MTFSB0(d)                    FX(63,d,0,0,70)
+#  define MTFSB0_(d)                   FX_(63,d,0,0,70)
+#  define MTFSB1(d)                    FX(63,d,0,0,38)
+#  define MTFSB1_(d)                   FX_(63,d,0,0,38)
+#  define MTFSF(m,b)                   FXFL(63,m,b,711)
+#  define MTFSF_(m,b)                  FXFL_(63,m,b,711)
+#  define MTFSFI(d,i)                  FX(63,d<<2,0,i<<1,134)
+#  define MTFSFI_(d,i)                 FX_(63,d<<2,0,i<<1,134)
+#  define STFD(s,a,d)                  FDs(54,s,a,d)
+#  define STFDU(s,a,d)                 FDs(55,s,a,d)
+#  define STFDUX(s,a,b)                        FX(31,s,a,b,759)
+#  define STFDX(s,a,b)                 FX(31,s,a,b,727)
+#  define STFIWX(s,a,b)                        FX(31,s,a,b,983)
+#  define STFS(s,a,d)                  FDs(52,s,a,d)
+#  define STFSU(s,a,d)                 FDs(53,s,a,d)
+#  define STFSUX(s,a,b)                        FX(31,s,a,b,695)
+#  define STFSX(s,a,b)                 FX(31,s,a,b,663)
+#  define movr_f(r0,r1)                        movr_d(r0,r1)
+#  define movr_d(r0,r1)                        _movr_d(_jit,r0,r1)
+static void _movr_d(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define movi_f(r0,i0)                        _movi_f(_jit,r0,i0)
+static void _movi_f(jit_state_t*,jit_int32_t,jit_float32_t*);
+#  define movi_d(r0,i0)                        _movi_d(_jit,r0,i0)
+static void _movi_d(jit_state_t*,jit_int32_t,jit_float64_t*);
+#  define extr_f(r0,r1)                        extr_d(r0,r1)
+#  define extr_d(r0,r1)                        _extr_d(_jit,r0,r1)
+static void _extr_d(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define truncr_f(r0,r1)              truncr_d(r0,r1)
+#  define truncr_f_i(r0,r1)            truncr_d_i(r0,r1)
+#  define truncr_d_i(r0,r1)            _truncr_d_i(_jit,r0,r1)
+static void _truncr_d_i(jit_state_t*,jit_int32_t,jit_int32_t);
+#  if __WORDSIZE == 32
+#    define truncr_d(r0,r1)            truncr_d_i(r0,r1)
+#  else
+#    define truncr_d(r0,r1)            truncr_d_l(r0,r1)
+#    define truncr_f_l(r0,r1)          truncr_d_l(r0,r1)
+#    define truncr_d_l(r0,r1)          _truncr_d_l(_jit,r0,r1)
+static void _truncr_d_l(jit_state_t*,jit_int32_t,jit_int32_t);
+#  endif
+#  define extr_d_f(r0,r1)              FRSP(r0,r1)
+#  define extr_f_d(r0,r1)              movr_d(r0,r1)
+#  define absr_f(r0,r1)                        absr_d(r0,r1)
+#  define absr_d(r0,r1)                        FABS(r0,r1)
+#  define negr_f(r0,r1)                        negr_d(r0,r1)
+#  define negr_d(r0,r1)                        FNEG(r0,r1)
+#  define sqrtr_f(r0,r1)               FSQRTS(r0,r1)
+#  define sqrtr_d(r0,r1)               FSQRT(r0,r1)
+#  define addr_f(r0,r1,r2)             FADDS(r0,r1,r2)
+#  define addr_d(r0,r1,r2)             FADD(r0,r1,r2)
+#  define addi_f(r0,r1,i0)             _addi_f(_jit,r0,r1,i0)
+static void _addi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define addi_d(r0,r1,i0)             _addi_d(_jit,r0,r1,i0)
+static void _addi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define subr_f(r0,r1,r2)             FSUBS(r0,r1,r2)
+#  define subi_f(r0,r1,i0)             _subi_f(_jit,r0,r1,i0)
+static void _subi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define subr_d(r0,r1,r2)             FSUB(r0,r1,r2)
+#  define subi_d(r0,r1,i0)             _subi_d(_jit,r0,r1,i0)
+static void _subi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define rsbr_f(r0,r1,r2)             subr_f(r0,r2,r1)
+#  define rsbi_f(r0,r1,i0)             _rsbi_f(_jit,r0,r1,i0)
+static void _rsbi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define rsbr_d(r0,r1,r2)             subr_d(r0,r2,r1)
+#  define rsbi_d(r0,r1,i0)             _rsbi_d(_jit,r0,r1,i0)
+static void _rsbi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define mulr_f(r0,r1,r2)             FMULS(r0,r1,r2)
+#  define muli_f(r0,r1,i0)             _muli_f(_jit,r0,r1,i0)
+static void _muli_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define mulr_d(r0,r1,r2)             FMUL(r0,r1,r2)
+#  define muli_d(r0,r1,i0)             _muli_d(_jit,r0,r1,i0)
+static void _muli_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define divr_f(r0,r1,r2)             FDIVS(r0,r1,r2)
+#  define divi_f(r0,r1,i0)             _divi_f(_jit,r0,r1,i0)
+static void _divi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define divr_d(r0,r1,r2)             FDIV(r0,r1,r2)
+#  define divi_d(r0,r1,i0)             _divi_d(_jit,r0,r1,i0)
+static void _divi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define ltr_f(r0,r1,r2)              ltr_d(r0,r1,r2)
+#  define ltr_d(r0,r1,r2)              _ltr_d(_jit,r0,r1,r2)
+static void _ltr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define lti_f(r0,r1,i0)              _lti_f(_jit,r0,r1,i0)
+static void _lti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define lti_d(r0,r1,i0)              _lti_d(_jit,r0,r1,i0)
+static void _lti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define ler_f(r0,r1,r2)              ler_d(r0,r1,r2)
+#  define ler_d(r0,r1,r2)              _ler_d(_jit,r0,r1,r2)
+static void _ler_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define lei_f(r0,r1,i0)              _lei_f(_jit,r0,r1,i0)
+static void _lei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define lei_d(r0,r1,i0)              _lei_d(_jit,r0,r1,i0)
+static void _lei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define eqr_f(r0,r1,r2)              eqr_d(r0,r1,r2)
+#  define eqr_d(r0,r1,r2)              _eqr_d(_jit,r0,r1,r2)
+static void _eqr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define eqi_f(r0,r1,i0)              _eqi_f(_jit,r0,r1,i0)
+static void _eqi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define eqi_d(r0,r1,i0)              _eqi_d(_jit,r0,r1,i0)
+static void _eqi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define ger_f(r0,r1,r2)              ger_d(r0,r1,r2)
+#  define ger_d(r0,r1,r2)              _ger_d(_jit,r0,r1,r2)
+static void _ger_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define gei_f(r0,r1,i0)              _gei_f(_jit,r0,r1,i0)
+static void _gei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define gei_d(r0,r1,i0)              _gei_d(_jit,r0,r1,i0)
+static void _gei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define gtr_f(r0,r1,r2)              gtr_d(r0,r1,r2)
+#  define gtr_d(r0,r1,r2)              _gtr_d(_jit,r0,r1,r2)
+static void _gtr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define gti_f(r0,r1,i0)              _gti_f(_jit,r0,r1,i0)
+static void _gti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define gti_d(r0,r1,i0)              _gti_d(_jit,r0,r1,i0)
+static void _gti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define ner_f(r0,r1,r2)              ner_d(r0,r1,r2)
+#  define ner_d(r0,r1,r2)              _ner_d(_jit,r0,r1,r2)
+static void _ner_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define nei_f(r0,r1,i0)              _nei_f(_jit,r0,r1,i0)
+static void _nei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define nei_d(r0,r1,i0)              _nei_d(_jit,r0,r1,i0)
+static void _nei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define unltr_f(r0,r1,r2)            unltr_d(r0,r1,r2)
+#  define unltr_d(r0,r1,r2)            _unltr_d(_jit,r0,r1,r2)
+static void _unltr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define unlti_f(r0,r1,i0)            _unlti_f(_jit,r0,r1,i0)
+static void _unlti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define unlti_d(r0,r1,i0)            _unlti_d(_jit,r0,r1,i0)
+static void _unlti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define unler_f(r0,r1,r2)            unler_d(r0,r1,r2)
+#  define unler_d(r0,r1,r2)            _unler_d(_jit,r0,r1,r2)
+static void _unler_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define unlei_f(r0,r1,i0)            _unlei_f(_jit,r0,r1,i0)
+static void _unlei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define unlei_d(r0,r1,i0)            _unlei_d(_jit,r0,r1,i0)
+static void _unlei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define uneqr_f(r0,r1,r2)            uneqr_d(r0,r1,r2)
+#  define uneqr_d(r0,r1,r2)            _uneqr_d(_jit,r0,r1,r2)
+static void _uneqr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define uneqi_f(r0,r1,i0)            _uneqi_f(_jit,r0,r1,i0)
+static void _uneqi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define uneqi_d(r0,r1,i0)            _uneqi_d(_jit,r0,r1,i0)
+static void _uneqi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define unger_f(r0,r1,r2)            unger_d(r0,r1,r2)
+#  define unger_d(r0,r1,r2)            _unger_d(_jit,r0,r1,r2)
+static void _unger_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ungei_f(r0,r1,i0)            _ungei_f(_jit,r0,r1,i0)
+static void _ungei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define ungei_d(r0,r1,i0)            _ungei_d(_jit,r0,r1,i0)
+static void _ungei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define ungtr_f(r0,r1,r2)            ungtr_d(r0,r1,r2)
+#  define ungtr_d(r0,r1,r2)            _ungtr_d(_jit,r0,r1,r2)
+static void _ungtr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ungti_f(r0,r1,i0)            _ungti_f(_jit,r0,r1,i0)
+static void _ungti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define ungti_d(r0,r1,i0)            _ungti_d(_jit,r0,r1,i0)
+static void _ungti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define ltgtr_f(r0,r1,r2)            ltgtr_d(r0,r1,r2)
+#  define ltgtr_d(r0,r1,r2)            _ltgtr_d(_jit,r0,r1,r2)
+static void _ltgtr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ltgti_f(r0,r1,i0)            _ltgti_f(_jit,r0,r1,i0)
+static void _ltgti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define ltgti_d(r0,r1,i0)            _ltgti_d(_jit,r0,r1,i0)
+static void _ltgti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define ordr_f(r0,r1,r2)             ordr_d(r0,r1,r2)
+#  define ordr_d(r0,r1,r2)             _ordr_d(_jit,r0,r1,r2)
+static void _ordr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ordi_f(r0,r1,i0)             _ordi_f(_jit,r0,r1,i0)
+static void _ordi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define ordi_d(r0,r1,i0)             _ordi_d(_jit,r0,r1,i0)
+static void _ordi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define unordr_f(r0,r1,r2)           unordr_d(r0,r1,r2)
+#  define unordr_d(r0,r1,r2)           _unordr_d(_jit,r0,r1,r2)
+static void _unordr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define unordi_f(r0,r1,i0)           _unordi_f(_jit,r0,r1,i0)
+static void _unordi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define unordi_d(r0,r1,i0)           _unordi_d(_jit,r0,r1,i0)
+static void _unordi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define bltr_f(i0,r0,r1)             bltr_d(i0,r0,r1)
+#  define bltr_d(i0,r0,r1)             _bltr_d(_jit,i0,r0,r1)
+static jit_word_t _bltr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define blti_f(i0,r0,i1)             _blti_f(_jit,i0,r0,i1)
+static jit_word_t _blti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t*);
+#  define blti_d(i0,r0,i1)             _blti_d(_jit,i0,r0,i1)
+static jit_word_t _blti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t*);
+#  define bler_f(i0,r0,r1)             bler_d(i0,r0,r1)
+#  define bler_d(i0,r0,r1)             _bler_d(_jit,i0,r0,r1)
+static jit_word_t _bler_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define blei_f(i0,r0,i1)             _blei_f(_jit,i0,r0,i1)
+static jit_word_t _blei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t*);
+#  define blei_d(i0,r0,i1)             _blei_d(_jit,i0,r0,i1)
+static jit_word_t _blei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t*);
+#  define beqr_f(i0,r0,r1)             beqr_d(i0,r0,r1)
+#  define beqr_d(i0,r0,r1)             _beqr_d(_jit,i0,r0,r1)
+static jit_word_t _beqr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define beqi_f(i0,r0,i1)             _beqi_f(_jit,i0,r0,i1)
+static jit_word_t _beqi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t*);
+#  define beqi_d(i0,r0,i1)             _beqi_d(_jit,i0,r0,i1)
+static jit_word_t _beqi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t*);
+#  define bger_f(i0,r0,r1)             bger_d(i0,r0,r1)
+#  define bger_d(i0,r0,r1)             _bger_d(_jit,i0,r0,r1)
+static jit_word_t _bger_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bgei_f(i0,r0,i1)             _bgei_f(_jit,i0,r0,i1)
+static jit_word_t _bgei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t*);
+#  define bgei_d(i0,r0,i1)             _bgei_d(_jit,i0,r0,i1)
+static jit_word_t _bgei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t*);
+#  define bgtr_f(i0,r0,r1)             bgtr_d(i0,r0,r1)
+#  define bgtr_d(i0,r0,r1)             _bgtr_d(_jit,i0,r0,r1)
+static jit_word_t _bgtr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bgti_f(i0,r0,i1)             _bgti_f(_jit,i0,r0,i1)
+static jit_word_t _bgti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t*);
+#  define bgti_d(i0,r0,i1)             _bgti_d(_jit,i0,r0,i1)
+static jit_word_t _bgti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t*);
+#  define bner_f(i0,r0,r1)             bner_d(i0,r0,r1)
+#  define bner_d(i0,r0,r1)             _bner_d(_jit,i0,r0,r1)
+static jit_word_t _bner_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bnei_f(i0,r0,i1)             _bnei_f(_jit,i0,r0,i1)
+static jit_word_t _bnei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t*);
+#  define bnei_d(i0,r0,i1)             _bnei_d(_jit,i0,r0,i1)
+static jit_word_t _bnei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t*);
+#  define bunltr_f(i0,r0,r1)           bunltr_d(i0,r0,r1)
+#  define bunltr_d(i0,r0,r1)           _bunltr_d(_jit,i0,r0,r1)
+static jit_word_t _bunltr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bunlti_f(i0,r0,i1)           _bunlti_f(_jit,i0,r0,i1)
+static jit_word_t _bunlti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t*);
+#  define bunlti_d(i0,r0,i1)           _bunlti_d(_jit,i0,r0,i1)
+static jit_word_t _bunlti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t*);
+#  define bunler_f(i0,r0,r1)           bunler_d(i0,r0,r1)
+#  define bunler_d(i0,r0,r1)           _bunler_d(_jit,i0,r0,r1)
+static jit_word_t _bunler_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bunlei_f(i0,r0,i1)           _bunlei_f(_jit,i0,r0,i1)
+static jit_word_t _bunlei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t*);
+#  define bunlei_d(i0,r0,i1)           _bunlei_d(_jit,i0,r0,i1)
+static jit_word_t _bunlei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t*);
+#  define buneqr_f(i0,r0,r1)           buneqr_d(i0,r0,r1)
+#  define buneqr_d(i0,r0,r1)           _buneqr_d(_jit,i0,r0,r1)
+static jit_word_t _buneqr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define buneqi_f(i0,r0,i1)           _buneqi_f(_jit,i0,r0,i1)
+static jit_word_t _buneqi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t*);
+#  define buneqi_d(i0,r0,i1)           _buneqi_d(_jit,i0,r0,i1)
+static jit_word_t _buneqi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t*);
+#  define bunger_f(i0,r0,r1)           bunger_d(i0,r0,r1)
+#  define bunger_d(i0,r0,r1)           _bunger_d(_jit,i0,r0,r1)
+static jit_word_t _bunger_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bungei_f(i0,r0,i1)           _bungei_f(_jit,i0,r0,i1)
+static jit_word_t _bungei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t*);
+#  define bungei_d(i0,r0,i1)           _bungei_d(_jit,i0,r0,i1)
+static jit_word_t _bungei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t*);
+#  define bungtr_f(i0,r0,r1)           bungtr_d(i0,r0,r1)
+#  define bungtr_d(i0,r0,r1)           _bungtr_d(_jit,i0,r0,r1)
+static jit_word_t _bungtr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bungti_f(i0,r0,i1)           _bungti_f(_jit,i0,r0,i1)
+static jit_word_t _bungti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t*);
+#  define bungti_d(i0,r0,i1)           _bungti_d(_jit,i0,r0,i1)
+static jit_word_t _bungti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t*);
+#  define bltgtr_f(i0,r0,r1)           bltgtr_d(i0,r0,r1)
+#  define bltgtr_d(i0,r0,r1)           _bltgtr_d(_jit,i0,r0,r1)
+static jit_word_t _bltgtr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bltgti_f(i0,r0,i1)           _bltgti_f(_jit,i0,r0,i1)
+static jit_word_t _bltgti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t*);
+#  define bltgti_d(i0,r0,i1)           _bltgti_d(_jit,i0,r0,i1)
+static jit_word_t _bltgti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t*);
+#  define bordr_f(i0,r0,r1)            bordr_d(i0,r0,r1)
+#  define bordr_d(i0,r0,r1)            _bordr_d(_jit,i0,r0,r1)
+static jit_word_t _bordr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bordi_f(i0,r0,i1)            _bordi_f(_jit,i0,r0,i1)
+static jit_word_t _bordi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t*);
+#  define bordi_d(i0,r0,i1)            _bordi_d(_jit,i0,r0,i1)
+static jit_word_t _bordi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t*);
+#  define bunordr_f(i0,r0,r1)          bunordr_d(i0,r0,r1)
+#  define bunordr_d(i0,r0,r1)          _bunordr_d(_jit,i0,r0,r1)
+static jit_word_t _bunordr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bunordi_f(i0,r0,i1)          _bunordi_f(_jit,i0,r0,i1)
+static jit_word_t _bunordi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t*);
+#  define bunordi_d(i0,r0,i1)          _bunordi_d(_jit,i0,r0,i1)
+static jit_word_t _bunordi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t*);
+#  define ldr_f(r0,r1)                 LFSX(r0, _R0_REGNO, r1)
+#  define ldi_f(r0,i0)                 _ldi_f(_jit,r0,i0)
+static void _ldi_f(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldxr_f(r0,r1,r2)             _ldxr_f(_jit,r0,r1,r2)
+static void _ldxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxi_f(r0,r1,i0)             _ldxi_f(_jit,r0,r1,i0)
+static void _ldxi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define str_f(r0,r1)                 STFSX(r1, _R0_REGNO, r0)
+#  define sti_f(i0,r0)                 _sti_f(_jit,i0,r0)
+static void _sti_f(jit_state_t*,jit_word_t,jit_int32_t);
+#  define stxr_f(r0,r1,r2)             _stxr_f(_jit,r0,r1,r2)
+static void _stxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define stxi_f(i0,r0,r1)             _stxi_f(_jit,i0,r0,r1)
+static void _stxi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define ldr_d(r0,r1)                 LFDX(r0, _R0_REGNO, r1)
+#  define ldi_d(r0,i0)                 _ldi_d(_jit,r0,i0)
+static void _ldi_d(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldxr_d(r0,r1,r2)             _ldxr_d(_jit,r0,r1,r2)
+static void _ldxr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxi_d(r0,r1,i0)             _ldxi_d(_jit,r0,r1,i0)
+static void _ldxi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define str_d(r0,r1)                 STFDX(r1, _R0_REGNO, r0)
+#  define sti_d(i0,r0)                 _sti_d(_jit,i0,r0)
+static void _sti_d(jit_state_t*,jit_word_t,jit_int32_t);
+#  define stxr_d(r0,r1,r2)             _stxr_d(_jit,r0,r1,r2)
+static void _stxr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define stxi_d(i0,r0,r1)             _stxi_d(_jit,i0,r0,r1)
+static void _stxi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#endif
+
+#if CODE
+#  define _u16(v)                      ((v) & 0xffff)
+static void
+_FA(jit_state_t *_jit, int o, int d, int a, int b, int c, int x, int r)
+{
+    assert(!(o & ~((1 << 6) - 1)));
+    assert(!(d & ~((1 << 5) - 1)));
+    assert(!(a & ~((1 << 5) - 1)));
+    assert(!(b & ~((1 << 5) - 1)));
+    assert(!(c & ~((1 << 5) - 1)));
+    assert(!(x & ~((1 << 5) - 1)));
+    assert(!(r & ~((1 << 1) - 1)));
+    ii((o<<26)|(d<<21)|(a<<16)|(b<<11)|(c<<6)|(x<<1)|r);
+}
+
+static void
+_FXFL(jit_state_t *_jit, int o, int m, int b, int x, int r)
+{
+    assert(!(o & ~((1 <<  6) - 1)));
+    assert(!(m & ~((1 <<  8) - 1)));
+    assert(!(b & ~((1 <<  5) - 1)));
+    assert(!(x & ~((1 << 10) - 1)));
+    assert(!(r & ~((1 <<  1) - 1)));
+    ii((o<<26)|(m<<17)|(b<<11)|(x<<1)|r);
+}
+
+static void
+_movr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (r0 != r1)
+       FMR(r0,r1);
+}
+
+static void
+_movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t *i0)
+{
+    union {
+       jit_int32_t      i;
+       jit_float32_t    f;
+    } data;
+    jit_int32_t                 reg;
+
+    if (_jitc->no_data) {
+       data.f = *i0;
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), data.i & 0xffffffff);
+       stxi_i(alloca_offset - 4, _FP_REGNO, rn(reg));
+       jit_unget_reg(reg);
+       ldxi_f(r0, _FP_REGNO, alloca_offset - 4);
+    }
+    else
+       ldi_f(r0, (jit_word_t)i0);
+}
+
+static void
+_movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t *i0)
+{
+    union {
+       jit_int32_t      i[2];
+       jit_word_t       w;
+       jit_float64_t    d;
+    } data;
+    jit_int32_t                 reg;
+
+    if (_jitc->no_data) {
+       data.d = *i0;
+       reg = jit_get_reg(jit_class_gpr);
+#  if __WORDSIZE == 32
+       movi(rn(reg), data.i[0]);
+       stxi(alloca_offset - 8, _FP_REGNO, rn(reg));
+       movi(rn(reg), data.i[1]);
+       stxi(alloca_offset - 4, _FP_REGNO, rn(reg));
+#  else
+       movi(rn(reg), data.w);
+       stxi(alloca_offset - 8, _FP_REGNO, rn(reg));
+#  endif
+       jit_unget_reg(reg);
+       ldxi_d(r0, _FP_REGNO, alloca_offset - 8);
+    }
+    else
+       ldi_d(r0, (jit_word_t)i0);
+}
+
+/* should only work on newer ppc (fcfid is a ppc64 instruction) */
+static void
+_extr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+#  if __WORDSIZE == 32
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    rshi(rn(reg), r1, 31);
+    /* use reserved 8 bytes area */
+    stxi(alloca_offset - 4, _FP_REGNO, r1);
+    stxi(alloca_offset - 8, _FP_REGNO, rn(reg));
+    jit_unget_reg(reg);
+#  else
+    stxi(alloca_offset - 8, _FP_REGNO, r1);
+#  endif
+    ldxi_d(r0, _FP_REGNO, alloca_offset - 8);
+    FCFID(r0, r0);
+}
+
+static void
+_truncr_d_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_fpr);
+    FCTIWZ(rn(reg), r1);
+    /* use reserved 8 bytes area */
+    stxi_d(alloca_offset - 8, _FP_REGNO, rn(reg));
+    ldxi_i(r0, _FP_REGNO, alloca_offset - 4);
+    jit_unget_reg(reg);
+}
+
+#  if __WORDSIZE == 64
+static void
+_truncr_d_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_fpr);
+    FCTIDZ(rn(reg), r1);
+    /* use reserved 8 bytes area */
+    stxi_d(alloca_offset - 8, _FP_REGNO, rn(reg));
+    ldxi(r0, _FP_REGNO, alloca_offset - 8);
+    jit_unget_reg(reg);
+}
+#  endif
+
+#  define fpr_opi(name, type, size)                                    \
+static void                                                            \
+_##name##i_##type(jit_state_t *_jit,                                   \
+                 jit_int32_t r0, jit_int32_t r1,                       \
+                 jit_float##size##_t *i0)                              \
+{                                                                      \
+    jit_int32_t                reg = jit_get_reg(jit_class_fpr);               \
+    movi_##type(rn(reg), i0);                                          \
+    name##r_##type(r0, r1, rn(reg));                                   \
+    jit_unget_reg(reg);                                                        \
+}
+#  define fpr_bopi(name, type, size)                                   \
+static jit_word_t                                                      \
+_b##name##i_##type(jit_state_t *_jit,                                  \
+                 jit_word_t i0, jit_int32_t r0,                        \
+                 jit_float##size##_t *i1)                              \
+{                                                                      \
+    jit_word_t         word;                                           \
+    jit_int32_t                reg = jit_get_reg(jit_class_fpr|                \
+                                         jit_class_nospill);           \
+    movi_##type(rn(reg), i1);                                          \
+    word = b##name##r_##type(i0, r0, rn(reg));                         \
+    jit_unget_reg(reg);                                                        \
+    return (word);                                                     \
+}
+#  define fopi(name)                   fpr_opi(name, f, 32)
+#  define fbopi(name)                  fpr_bopi(name, f, 32)
+#  define dopi(name)                   fpr_opi(name, d, 64)
+#  define dbopi(name)                  fpr_bopi(name, d, 64)
+
+fopi(add)
+dopi(add)
+fopi(sub)
+dopi(sub)
+fopi(rsb)
+dopi(rsb)
+fopi(mul)
+dopi(mul)
+fopi(div)
+dopi(div)
+
+static void
+_ltr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    FCMPO(CR_0, r1, r2);
+    MFCR(r0);
+    EXTRWI(r0, r0, 1, CR_LT);
+}
+fopi(lt)
+dopi(lt)
+
+static void
+_ler_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    FCMPO(CR_0, r1, r2);
+    CREQV(CR_GT, CR_GT, CR_UN);
+    MFCR(r0);
+    EXTRWI(r0, r0, 1, CR_GT);
+}
+fopi(le)
+dopi(le)
+
+static void
+_eqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    FCMPO(CR_0, r1, r2);
+    MFCR(r0);
+    EXTRWI(r0, r0, 1, CR_EQ);
+}
+fopi(eq)
+dopi(eq)
+
+static void
+_ger_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    FCMPO(CR_0, r1, r2);
+    CREQV(CR_LT, CR_LT, CR_UN);
+    MFCR(r0);
+    EXTRWI(r0, r0, 1, CR_LT);
+}
+fopi(ge)
+dopi(ge)
+
+static void
+_gtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    FCMPO(CR_0, r1, r2);
+    MFCR(r0);
+    EXTRWI(r0, r0, 1, CR_GT);
+}
+fopi(gt)
+dopi(gt)
+
+static void
+_ner_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    FCMPO(CR_0, r1, r2);
+    CRNOT(CR_EQ, CR_EQ);
+    MFCR(r0);
+    EXTRWI(r0, r0, 1, CR_EQ);
+}
+fopi(ne)
+dopi(ne)
+
+static void
+_unltr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    FCMPU(CR_0, r1, r2);
+    CROR(CR_LT, CR_LT, CR_UN);
+    MFCR(r0);
+    EXTRWI(r0, r0, 1, CR_LT);
+}
+fopi(unlt)
+dopi(unlt)
+
+static void
+_unler_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    FCMPU(CR_0, r1, r2);
+    CRNOT(CR_GT, CR_GT);
+    MFCR(r0);
+    EXTRWI(r0, r0, 1, CR_GT);
+}
+fopi(unle)
+dopi(unle)
+
+static void
+_uneqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    FCMPU(CR_0, r1, r2);
+    CROR(CR_EQ, CR_EQ, CR_UN);
+    MFCR(r0);
+    EXTRWI(r0, r0, 1, CR_EQ);
+}
+fopi(uneq)
+dopi(uneq)
+
+static void
+_unger_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    FCMPU(CR_0, r1, r2);
+    CRNOT(CR_LT, CR_LT);
+    MFCR(r0);
+    EXTRWI(r0, r0, 1, CR_LT);
+}
+fopi(unge)
+dopi(unge)
+
+static void
+_ungtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    FCMPU(CR_0, r1, r2);
+    CROR(CR_GT, CR_GT, CR_UN);
+    MFCR(r0);
+    EXTRWI(r0, r0, 1, CR_GT);
+}
+fopi(ungt)
+dopi(ungt)
+
+static void
+_ltgtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    FCMPU(CR_0, r1, r2);
+    CROR(CR_GT, CR_GT, CR_LT);
+    MFCR(r0);
+    EXTRWI(r0, r0, 1, CR_GT);
+}
+fopi(ltgt)
+dopi(ltgt)
+
+static void
+_ordr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    FCMPU(CR_0, r1, r2);
+    CRNOT(CR_UN, CR_UN);
+    MFCR(r0);
+    EXTRWI(r0, r0, 1, CR_UN);
+}
+fopi(ord)
+dopi(ord)
+
+static void
+_unordr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    FCMPU(CR_0, r1, r2);
+    MFCR(r0);
+    EXTRWI(r0, r0, 1, CR_UN);
+}
+fopi(unord)
+dopi(unord)
+
+static jit_word_t
+_bltr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d, w;
+    FCMPO(CR_0, r0, r1);
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BLT(d);
+    return (w);
+}
+fbopi(lt)
+dbopi(lt)
+
+static jit_word_t
+_bler_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d, w;
+    FCMPO(CR_0, r0, r1);
+    CREQV(CR_GT, CR_GT, CR_UN);
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BGT(d);
+    return (w);
+}
+fbopi(le)
+dbopi(le)
+
+static jit_word_t
+_beqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d, w;
+    FCMPO(CR_0, r0, r1);
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BEQ(d);
+    return (w);
+}
+fbopi(eq)
+dbopi(eq)
+
+static jit_word_t
+_bger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d, w;
+    FCMPO(CR_0, r0, r1);
+    CREQV(CR_LT, CR_LT, CR_UN);
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BLT(d);
+    return (w);
+}
+fbopi(ge)
+dbopi(ge)
+
+static jit_word_t
+_bgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d, w;
+    FCMPO(CR_0, r0, r1);
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BGT(d);
+    return (w);
+}
+fbopi(gt)
+dbopi(gt)
+
+static jit_word_t
+_bner_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d, w;
+    FCMPO(CR_0, r0, r1);
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BNE(d);
+    return (w);
+}
+fbopi(ne)
+dbopi(ne)
+
+static jit_word_t
+_bunltr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d, w;
+    FCMPU(CR_0, r0, r1);
+    CROR(CR_LT, CR_LT, CR_UN);
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BLT(d);
+    return (w);
+}
+fbopi(unlt)
+dbopi(unlt)
+
+static jit_word_t
+_bunler_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d, w;
+    FCMPU(CR_0, r0, r1);
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BLE(d);
+    return (w);
+}
+fbopi(unle)
+dbopi(unle)
+
+static jit_word_t
+_buneqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d, w;
+    FCMPU(CR_0, r0, r1);
+    CROR(CR_EQ, CR_EQ, CR_UN);
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BEQ(d);
+    return (w);
+}
+fbopi(uneq)
+dbopi(uneq)
+
+static jit_word_t
+_bunger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d, w;
+    FCMPU(CR_0, r0, r1);
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BGE(d);
+    return (w);
+}
+fbopi(unge)
+dbopi(unge)
+
+static jit_word_t
+_bungtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d, w;
+    FCMPU(CR_0, r0, r1);
+    CROR(CR_GT, CR_GT, CR_UN);
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BGT(d);
+    return (w);
+}
+fbopi(ungt)
+dbopi(ungt)
+
+static jit_word_t
+_bltgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d, w;
+    FCMPU(CR_0, r0, r1);
+    CROR(CR_EQ, CR_LT, CR_GT);
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BEQ(d);
+    return (w);
+}
+fbopi(ltgt)
+dbopi(ltgt)
+
+static jit_word_t
+_bordr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d, w;
+    FCMPU(CR_0, r0, r1);
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BNU(d);
+    return (w);
+}
+fbopi(ord)
+dbopi(ord)
+
+static jit_word_t
+_bunordr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d, w;
+    FCMPU(CR_0, r0, r1);
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BUN(d);
+    return (w);
+}
+fbopi(unord)
+dbopi(unord)
+
+static void
+_ldi_f(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_bool_t         inv;
+    jit_int32_t                reg;
+    jit_word_t         lo, hi;
+    if (can_sign_extend_short_p(i0))
+       LFS(r0, _R0_REGNO, i0);
+    else if (can_sign_extend_int_p(i0)) {
+       hi = (jit_int16_t)((i0 >> 16) + ((jit_uint16_t)i0 >> 15));
+       lo = (jit_int16_t)(i0 - (hi << 16));
+       reg = jit_get_reg(jit_class_gpr);
+       if ((inv = reg == _R0))         reg = jit_get_reg(jit_class_gpr);
+       LIS(rn(reg), hi);
+       LFS(r0, rn(reg), lo);
+       jit_unget_reg(reg);
+       if (inv)                        jit_unget_reg(_R0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldr_f(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldi_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_bool_t         inv;
+    jit_int32_t                reg;
+    jit_word_t         lo, hi;
+    if (can_sign_extend_short_p(i0))
+       LFD(r0, _R0_REGNO, i0);
+    else if (can_sign_extend_int_p(i0)) {
+       hi = (jit_int16_t)((i0 >> 16) + ((jit_uint16_t)i0 >> 15));
+       lo = (jit_int16_t)(i0 - (hi << 16));
+       reg = jit_get_reg(jit_class_gpr);
+       if ((inv = reg == _R0))         reg = jit_get_reg(jit_class_gpr);
+       LIS(rn(reg), hi);
+       LFD(r0, rn(reg), lo);
+       jit_unget_reg(reg);
+       if (inv)                        jit_unget_reg(_R0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldr_d(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (r1 == _R0_REGNO) {
+       if (r2 != _R0_REGNO)
+           LFSX(r0, r2, r1);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movr(rn(reg), r1);
+           LFSX(r0, rn(reg), r2);
+           jit_unget_reg(reg);
+       }
+    }
+    else
+       LFSX(r0, r1, r2);
+}
+
+static void
+_ldxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (r1 == _R0_REGNO) {
+       if (r2 != _R0_REGNO)
+           LFDX(r0, r2, r1);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movr(rn(reg), r1);
+           LFDX(r0, rn(reg), r2);
+           jit_unget_reg(reg);
+       }
+    }
+    else
+       LFDX(r0, r1, r2);
+}
+
+static void
+_ldxi_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0 == 0)
+       ldr_f(r0, r1);
+    else if (can_sign_extend_short_p(i0)) {
+       if (r1 == _R0_REGNO) {
+           reg = jit_get_reg(jit_class_gpr);
+           movr(rn(reg), r1);
+           LFS(r0, rn(reg), i0);
+           jit_unget_reg(reg);
+       }
+       else
+           LFS(r0, r1, i0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldxr_f(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0 == 0)
+       ldr_d(r0, r1);
+    else if (can_sign_extend_short_p(i0)) {
+       if (r1 == _R0_REGNO) {
+           reg = jit_get_reg(jit_class_gpr);
+           movr(rn(reg), r1);
+           LFD(r0, rn(reg), i0);
+           jit_unget_reg(reg);
+       }
+       else
+           LFD(r0, r1, i0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldxr_d(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_bool_t         inv;
+    jit_int32_t                reg;
+    jit_word_t         lo, hi;
+    if (can_sign_extend_short_p(i0))
+       STFS(r0, _R0_REGNO, i0);
+    else if (can_sign_extend_int_p(i0)) {
+       hi = (jit_int16_t)((i0 >> 16) + ((jit_uint16_t)i0 >> 15));
+       lo = (jit_int16_t)(i0 - (hi << 16));
+       reg = jit_get_reg(jit_class_gpr);
+       if ((inv = reg == _R0))         reg = jit_get_reg(jit_class_gpr);
+       LIS(rn(reg), hi);
+       STFS(r0, rn(reg), lo);
+       jit_unget_reg(reg);
+       if (inv)                        jit_unget_reg(_R0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       str_f(rn(reg), r0);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_sti_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_bool_t         inv;
+    jit_int32_t                reg;
+    jit_word_t         lo, hi;
+    if (can_sign_extend_short_p(i0))
+       STFD(r0, _R0_REGNO, i0);
+    else if (can_sign_extend_int_p(i0)) {
+       hi = (jit_int16_t)((i0 >> 16) + ((jit_uint16_t)i0 >> 15));
+       lo = (jit_int16_t)(i0 - (hi << 16));
+       reg = jit_get_reg(jit_class_gpr);
+       if ((inv = reg == _R0))         reg = jit_get_reg(jit_class_gpr);
+       LIS(rn(reg), hi);
+       STFD(r0, rn(reg), lo);
+       jit_unget_reg(reg);
+       if (inv)                        jit_unget_reg(_R0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       str_d(rn(reg), r0);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_stxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (r0 == _R0_REGNO) {
+       if (r1 != _R0_REGNO)
+           STFSX(r2, r1, r0);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movr(rn(reg), r1);
+           STFSX(r2, rn(reg), r0);
+           jit_unget_reg(reg);
+       }
+    }
+    else
+       STFSX(r2, r0, r1);
+}
+
+static void
+_stxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (r0 == _R0_REGNO) {
+       if (r1 != _R0_REGNO)
+           STFDX(r2, r1, r0);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movr(rn(reg), r0);
+           STFDX(r2, rn(reg), r1);
+           jit_unget_reg(reg);
+       }
+    }
+    else
+       STFDX(r2, r0, r1);
+}
+
+static void
+_stxi_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (i0 == 0)
+       str_f(r0, r1);
+    else if (can_sign_extend_short_p(i0)) {
+       if (r0 == _R0_REGNO) {
+           reg = jit_get_reg(jit_class_gpr);
+           movr(rn(reg), i0);
+           STFS(r1, rn(reg), i0);
+           jit_unget_reg(reg);
+       }
+       else
+           STFS(r1, r0, i0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       stxr_f(rn(reg), r0, r1);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (i0 == 0)
+       str_d(r0, r1);
+    else if (can_sign_extend_short_p(i0)) {
+       if (r0 == _R0_REGNO) {
+           reg = jit_get_reg(jit_class_gpr);
+           movr(rn(reg), i0);
+           STFD(r1, rn(reg), i0);
+           jit_unget_reg(reg);
+       }
+       else
+           STFD(r1, r0, i0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       stxr_d(rn(reg), r0, r1);
+       jit_unget_reg(reg);
+    }
+}
+#endif
diff --git a/deps/lightning/lib/jit_ppc-sz.c b/deps/lightning/lib/jit_ppc-sz.c
new file mode 100644 (file)
index 0000000..788ac45
--- /dev/null
@@ -0,0 +1,1627 @@
+#if __WORDSIZE == 32
+#if defined(__powerpc__)
+#if __BYTE_ORDER == __BIG_ENDIAN
+#if _CALL_SYSV
+#define JIT_INSTR_MAX 124
+    0, /* data */
+    0, /* live */
+    0, /* align */
+    0, /* save */
+    0, /* load */
+    0, /* #name */
+    0, /* #note */
+    0, /* label */
+    124,       /* prolog */
+    0, /* ellipsis */
+    0, /* va_push */
+    0, /* allocai */
+    0, /* allocar */
+    0, /* arg */
+    0, /* getarg_c */
+    0, /* getarg_uc */
+    0, /* getarg_s */
+    0, /* getarg_us */
+    0, /* getarg_i */
+    0, /* getarg_ui */
+    0, /* getarg_l */
+    0, /* putargr */
+    0, /* putargi */
+    36,        /* va_start */
+    52,        /* va_arg */
+    64,        /* va_arg_d */
+    0, /* va_end */
+    4, /* addr */
+    12,        /* addi */
+    4, /* addcr */
+    12,        /* addci */
+    4, /* addxr */
+    8, /* addxi */
+    4, /* subr */
+    12,        /* subi */
+    4, /* subcr */
+    12,        /* subci */
+    4, /* subxr */
+    8, /* subxi */
+    16,        /* rsbi */
+    4, /* mulr */
+    12,        /* muli */
+    12,        /* qmulr */
+    16,        /* qmuli */
+    12,        /* qmulr_u */
+    16,        /* qmuli_u */
+    4, /* divr */
+    12,        /* divi */
+    4, /* divr_u */
+    12,        /* divi_u */
+    20,        /* qdivr */
+    16,        /* qdivi */
+    20,        /* qdivr_u */
+    16,        /* qdivi_u */
+    12,        /* remr */
+    20,        /* remi */
+    12,        /* remr_u */
+    20,        /* remi_u */
+    4, /* andr */
+    12,        /* andi */
+    4, /* orr */
+    12,        /* ori */
+    4, /* xorr */
+    12,        /* xori */
+    4, /* lshr */
+    4, /* lshi */
+    4, /* rshr */
+    4, /* rshi */
+    4, /* rshr_u */
+    4, /* rshi_u */
+    4, /* negr */
+    4, /* comr */
+    12,        /* ltr */
+    12,        /* lti */
+    12,        /* ltr_u */
+    16,        /* lti_u */
+    16,        /* ler */
+    16,        /* lei */
+    16,        /* ler_u */
+    16,        /* lei_u */
+    12,        /* eqr */
+    12,        /* eqi */
+    16,        /* ger */
+    16,        /* gei */
+    16,        /* ger_u */
+    16,        /* gei_u */
+    12,        /* gtr */
+    12,        /* gti */
+    12,        /* gtr_u */
+    12,        /* gti_u */
+    16,        /* ner */
+    16,        /* nei */
+    4, /* movr */
+    8, /* movi */
+    4, /* extr_c */
+    4, /* extr_uc */
+    4, /* extr_s */
+    4, /* extr_us */
+    0, /* extr_i */
+    0, /* extr_ui */
+    4, /* htonr_us */
+    4, /* htonr_ui */
+    0, /* htonr_ul */
+    8, /* ldr_c */
+    12,        /* ldi_c */
+    4, /* ldr_uc */
+    8, /* ldi_uc */
+    4, /* ldr_s */
+    8, /* ldi_s */
+    4, /* ldr_us */
+    8, /* ldi_us */
+    4, /* ldr_i */
+    8, /* ldi_i */
+    0, /* ldr_ui */
+    0, /* ldi_ui */
+    0, /* ldr_l */
+    0, /* ldi_l */
+    8, /* ldxr_c */
+    16,        /* ldxi_c */
+    4, /* ldxr_uc */
+    12,        /* ldxi_uc */
+    4, /* ldxr_s */
+    12,        /* ldxi_s */
+    4, /* ldxr_us */
+    12,        /* ldxi_us */
+    4, /* ldxr_i */
+    12,        /* ldxi_i */
+    0, /* ldxr_ui */
+    0, /* ldxi_ui */
+    0, /* ldxr_l */
+    0, /* ldxi_l */
+    4, /* str_c */
+    8, /* sti_c */
+    4, /* str_s */
+    8, /* sti_s */
+    4, /* str_i */
+    8, /* sti_i */
+    0, /* str_l */
+    0, /* sti_l */
+    4, /* stxr_c */
+    12,        /* stxi_c */
+    4, /* stxr_s */
+    12,        /* stxi_s */
+    4, /* stxr_i */
+    12,        /* stxi_i */
+    0, /* stxr_l */
+    0, /* stxi_l */
+    8, /* bltr */
+    8, /* blti */
+    8, /* bltr_u */
+    12,        /* blti_u */
+    8, /* bler */
+    8, /* blei */
+    8, /* bler_u */
+    12,        /* blei_u */
+    8, /* beqr */
+    16,        /* beqi */
+    8, /* bger */
+    8, /* bgei */
+    8, /* bger_u */
+    8, /* bgei_u */
+    8, /* bgtr */
+    8, /* bgti */
+    8, /* bgtr_u */
+    8, /* bgti_u */
+    8, /* bner */
+    16,        /* bnei */
+    12,        /* bmsr */
+    12,        /* bmsi */
+    12,        /* bmcr */
+    12,        /* bmci */
+    12,        /* boaddr */
+    16,        /* boaddi */
+    12,        /* boaddr_u */
+    12,        /* boaddi_u */
+    12,        /* bxaddr */
+    16,        /* bxaddi */
+    12,        /* bxaddr_u */
+    12,        /* bxaddi_u */
+    12,        /* bosubr */
+    16,        /* bosubi */
+    12,        /* bosubr_u */
+    16,        /* bosubi_u */
+    12,        /* bxsubr */
+    16,        /* bxsubi */
+    12,        /* bxsubr_u */
+    16,        /* bxsubi_u */
+    8, /* jmpr */
+    4, /* jmpi */
+    12,        /* callr */
+    20,        /* calli */
+    0, /* prepare */
+    0, /* pushargr */
+    0, /* pushargi */
+    0, /* finishr */
+    0, /* finishi */
+    0, /* ret */
+    0, /* retr */
+    0, /* reti */
+    0, /* retval_c */
+    0, /* retval_uc */
+    0, /* retval_s */
+    0, /* retval_us */
+    0, /* retval_i */
+    0, /* retval_ui */
+    0, /* retval_l */
+    124,       /* epilog */
+    0, /* arg_f */
+    0, /* getarg_f */
+    0, /* putargr_f */
+    0, /* putargi_f */
+    4, /* addr_f */
+    16,        /* addi_f */
+    4, /* subr_f */
+    16,        /* subi_f */
+    16,        /* rsbi_f */
+    4, /* mulr_f */
+    16,        /* muli_f */
+    4, /* divr_f */
+    16,        /* divi_f */
+    4, /* negr_f */
+    4, /* absr_f */
+    4, /* sqrtr_f */
+    12,        /* ltr_f */
+    24,        /* lti_f */
+    16,        /* ler_f */
+    28,        /* lei_f */
+    12,        /* eqr_f */
+    24,        /* eqi_f */
+    16,        /* ger_f */
+    28,        /* gei_f */
+    12,        /* gtr_f */
+    24,        /* gti_f */
+    16,        /* ner_f */
+    28,        /* nei_f */
+    16,        /* unltr_f */
+    28,        /* unlti_f */
+    16,        /* unler_f */
+    28,        /* unlei_f */
+    16,        /* uneqr_f */
+    28,        /* uneqi_f */
+    16,        /* unger_f */
+    28,        /* ungei_f */
+    16,        /* ungtr_f */
+    28,        /* ungti_f */
+    16,        /* ltgtr_f */
+    28,        /* ltgti_f */
+    16,        /* ordr_f */
+    28,        /* ordi_f */
+    12,        /* unordr_f */
+    24,        /* unordi_f */
+    12,        /* truncr_f_i */
+    0, /* truncr_f_l */
+    20,        /* extr_f */
+    4, /* extr_d_f */
+    4, /* movr_f */
+    12,        /* movi_f */
+    4, /* ldr_f */
+    8, /* ldi_f */
+    4, /* ldxr_f */
+    12,        /* ldxi_f */
+    4, /* str_f */
+    8, /* sti_f */
+    4, /* stxr_f */
+    12,        /* stxi_f */
+    8, /* bltr_f */
+    20,        /* blti_f */
+    12,        /* bler_f */
+    24,        /* blei_f */
+    8, /* beqr_f */
+    20,        /* beqi_f */
+    12,        /* bger_f */
+    24,        /* bgei_f */
+    8, /* bgtr_f */
+    20,        /* bgti_f */
+    8, /* bner_f */
+    20,        /* bnei_f */
+    12,        /* bunltr_f */
+    24,        /* bunlti_f */
+    8, /* bunler_f */
+    20,        /* bunlei_f */
+    12,        /* buneqr_f */
+    24,        /* buneqi_f */
+    8, /* bunger_f */
+    20,        /* bungei_f */
+    12,        /* bungtr_f */
+    24,        /* bungti_f */
+    12,        /* bltgtr_f */
+    24,        /* bltgti_f */
+    8, /* bordr_f */
+    20,        /* bordi_f */
+    8, /* bunordr_f */
+    20,        /* bunordi_f */
+    0, /* pushargr_f */
+    0, /* pushargi_f */
+    0, /* retr_f */
+    0, /* reti_f */
+    0, /* retval_f */
+    0, /* arg_d */
+    0, /* getarg_d */
+    0, /* putargr_d */
+    0, /* putargi_d */
+    4, /* addr_d */
+    24,        /* addi_d */
+    4, /* subr_d */
+    24,        /* subi_d */
+    24,        /* rsbi_d */
+    4, /* mulr_d */
+    24,        /* muli_d */
+    4, /* divr_d */
+    24,        /* divi_d */
+    4, /* negr_d */
+    4, /* absr_d */
+    4, /* sqrtr_d */
+    12,        /* ltr_d */
+    32,        /* lti_d */
+    16,        /* ler_d */
+    36,        /* lei_d */
+    12,        /* eqr_d */
+    32,        /* eqi_d */
+    16,        /* ger_d */
+    36,        /* gei_d */
+    12,        /* gtr_d */
+    32,        /* gti_d */
+    16,        /* ner_d */
+    36,        /* nei_d */
+    16,        /* unltr_d */
+    36,        /* unlti_d */
+    16,        /* unler_d */
+    36,        /* unlei_d */
+    16,        /* uneqr_d */
+    36,        /* uneqi_d */
+    16,        /* unger_d */
+    36,        /* ungei_d */
+    16,        /* ungtr_d */
+    36,        /* ungti_d */
+    16,        /* ltgtr_d */
+    36,        /* ltgti_d */
+    16,        /* ordr_d */
+    36,        /* ordi_d */
+    12,        /* unordr_d */
+    32,        /* unordi_d */
+    12,        /* truncr_d_i */
+    0, /* truncr_d_l */
+    20,        /* extr_d */
+    4, /* extr_f_d */
+    4, /* movr_d */
+    24,        /* movi_d */
+    4, /* ldr_d */
+    8, /* ldi_d */
+    4, /* ldxr_d */
+    12,        /* ldxi_d */
+    4, /* str_d */
+    8, /* sti_d */
+    4, /* stxr_d */
+    12,        /* stxi_d */
+    8, /* bltr_d */
+    28,        /* blti_d */
+    12,        /* bler_d */
+    32,        /* blei_d */
+    8, /* beqr_d */
+    32,        /* beqi_d */
+    12,        /* bger_d */
+    32,        /* bgei_d */
+    8, /* bgtr_d */
+    28,        /* bgti_d */
+    8, /* bner_d */
+    28,        /* bnei_d */
+    12,        /* bunltr_d */
+    32,        /* bunlti_d */
+    8, /* bunler_d */
+    28,        /* bunlei_d */
+    12,        /* buneqr_d */
+    32,        /* buneqi_d */
+    8, /* bunger_d */
+    28,        /* bungei_d */
+    12,        /* bungtr_d */
+    32,        /* bungti_d */
+    12,        /* bltgtr_d */
+    32,        /* bltgti_d */
+    8, /* bordr_d */
+    28,        /* bordi_d */
+    8, /* bunordr_d */
+    28,        /* bunordi_d */
+    0, /* pushargr_d */
+    0, /* pushargi_d */
+    0, /* retr_d */
+    0, /* reti_d */
+    0, /* retval_d */
+    0, /* movr_w_f */
+    0, /* movr_ww_d */
+    0, /* movr_w_d */
+    0, /* movr_f_w */
+    0, /* movi_f_w */
+    0, /* movr_d_ww */
+    0, /* movi_d_ww */
+    0, /* movr_d_w */
+    0, /* movi_d_w */
+#endif /* _CALL_SYV */
+#endif /* __BYTE_ORDER */
+#endif /* __powerpc__ */
+#endif /* __WORDSIZE */
+
+#if __WORDSIZE == 32
+#if defined(__powerpc__)
+#if __BYTE_ORDER == __BIG_ENDIAN
+#if !_CALL_SYSV
+#define JIT_INSTR_MAX 136
+    0, /* data */
+    0, /* live */
+    0, /* align */
+    0, /* save */
+    0, /* load */
+    0, /* #name */
+    0, /* #note */
+    0, /* label */
+    136,       /* prolog */
+    0, /* ellipsis */
+    0, /* va_push */
+    0, /* allocai */
+    0, /* allocar */
+    0, /* arg */
+    0, /* getarg_c */
+    0, /* getarg_uc */
+    0, /* getarg_s */
+    0, /* getarg_us */
+    0, /* getarg_i */
+    0, /* getarg_ui */
+    0, /* getarg_l */
+    0, /* putargr */
+    0, /* putargi */
+    4, /* va_start */
+    8, /* va_arg */
+    8, /* va_arg_d */
+    0, /* va_end */
+    4, /* addr */
+    12,        /* addi */
+    4, /* addcr */
+    12,        /* addci */
+    4, /* addxr */
+    8, /* addxi */
+    4, /* subr */
+    12,        /* subi */
+    4, /* subcr */
+    12,        /* subci */
+    4, /* subxr */
+    8, /* subxi */
+    16,        /* rsbi */
+    4, /* mulr */
+    12,        /* muli */
+    12,        /* qmulr */
+    16,        /* qmuli */
+    12,        /* qmulr_u */
+    16,        /* qmuli_u */
+    4, /* divr */
+    12,        /* divi */
+    4, /* divr_u */
+    12,        /* divi_u */
+    20,        /* qdivr */
+    16,        /* qdivi */
+    20,        /* qdivr_u */
+    16,        /* qdivi_u */
+    12,        /* remr */
+    20,        /* remi */
+    12,        /* remr_u */
+    20,        /* remi_u */
+    4, /* andr */
+    12,        /* andi */
+    4, /* orr */
+    12,        /* ori */
+    4, /* xorr */
+    12,        /* xori */
+    4, /* lshr */
+    4, /* lshi */
+    4, /* rshr */
+    4, /* rshi */
+    4, /* rshr_u */
+    4, /* rshi_u */
+    4, /* negr */
+    4, /* comr */
+    12,        /* ltr */
+    12,        /* lti */
+    12,        /* ltr_u */
+    16,        /* lti_u */
+    16,        /* ler */
+    16,        /* lei */
+    16,        /* ler_u */
+    16,        /* lei_u */
+    12,        /* eqr */
+    12,        /* eqi */
+    16,        /* ger */
+    16,        /* gei */
+    16,        /* ger_u */
+    16,        /* gei_u */
+    12,        /* gtr */
+    12,        /* gti */
+    12,        /* gtr_u */
+    12,        /* gti_u */
+    16,        /* ner */
+    16,        /* nei */
+    4, /* movr */
+    8, /* movi */
+    4, /* extr_c */
+    4, /* extr_uc */
+    4, /* extr_s */
+    4, /* extr_us */
+    0, /* extr_i */
+    0, /* extr_ui */
+    4, /* htonr_us */
+    4, /* htonr_ui */
+    0, /* htonr_ul */
+    8, /* ldr_c */
+    12,        /* ldi_c */
+    4, /* ldr_uc */
+    8, /* ldi_uc */
+    4, /* ldr_s */
+    8, /* ldi_s */
+    4, /* ldr_us */
+    8, /* ldi_us */
+    4, /* ldr_i */
+    8, /* ldi_i */
+    0, /* ldr_ui */
+    0, /* ldi_ui */
+    0, /* ldr_l */
+    0, /* ldi_l */
+    8, /* ldxr_c */
+    16,        /* ldxi_c */
+    4, /* ldxr_uc */
+    12,        /* ldxi_uc */
+    4, /* ldxr_s */
+    12,        /* ldxi_s */
+    4, /* ldxr_us */
+    12,        /* ldxi_us */
+    4, /* ldxr_i */
+    12,        /* ldxi_i */
+    0, /* ldxr_ui */
+    0, /* ldxi_ui */
+    0, /* ldxr_l */
+    0, /* ldxi_l */
+    4, /* str_c */
+    8, /* sti_c */
+    4, /* str_s */
+    8, /* sti_s */
+    4, /* str_i */
+    8, /* sti_i */
+    0, /* str_l */
+    0, /* sti_l */
+    4, /* stxr_c */
+    12,        /* stxi_c */
+    4, /* stxr_s */
+    12,        /* stxi_s */
+    4, /* stxr_i */
+    12,        /* stxi_i */
+    0, /* stxr_l */
+    0, /* stxi_l */
+    8, /* bltr */
+    8, /* blti */
+    8, /* bltr_u */
+    12,        /* blti_u */
+    8, /* bler */
+    8, /* blei */
+    8, /* bler_u */
+    12,        /* blei_u */
+    8, /* beqr */
+    16,        /* beqi */
+    8, /* bger */
+    8, /* bgei */
+    8, /* bger_u */
+    8, /* bgei_u */
+    8, /* bgtr */
+    8, /* bgti */
+    8, /* bgtr_u */
+    8, /* bgti_u */
+    8, /* bner */
+    16,        /* bnei */
+    12,        /* bmsr */
+    12,        /* bmsi */
+    12,        /* bmcr */
+    12,        /* bmci */
+    12,        /* boaddr */
+    16,        /* boaddi */
+    12,        /* boaddr_u */
+    12,        /* boaddi_u */
+    12,        /* bxaddr */
+    16,        /* bxaddi */
+    12,        /* bxaddr_u */
+    12,        /* bxaddi_u */
+    12,        /* bosubr */
+    16,        /* bosubi */
+    12,        /* bosubr_u */
+    16,        /* bosubi_u */
+    12,        /* bxsubr */
+    16,        /* bxsubi */
+    12,        /* bxsubr_u */
+    16,        /* bxsubi_u */
+    8, /* jmpr */
+    4, /* jmpi */
+    28,        /* callr */
+    40,        /* calli */
+    0, /* prepare */
+    0, /* pushargr */
+    0, /* pushargi */
+    0, /* finishr */
+    0, /* finishi */
+    0, /* ret */
+    0, /* retr */
+    0, /* reti */
+    0, /* retval_c */
+    0, /* retval_uc */
+    0, /* retval_s */
+    0, /* retval_us */
+    0, /* retval_i */
+    0, /* retval_ui */
+    0, /* retval_l */
+    124,       /* epilog */
+    0, /* arg_f */
+    0, /* getarg_f */
+    0, /* putargr_f */
+    0, /* putargi_f */
+    4, /* addr_f */
+    16,        /* addi_f */
+    4, /* subr_f */
+    16,        /* subi_f */
+    16,        /* rsbi_f */
+    4, /* mulr_f */
+    16,        /* muli_f */
+    4, /* divr_f */
+    16,        /* divi_f */
+    4, /* negr_f */
+    4, /* absr_f */
+    4, /* sqrtr_f */
+    12,        /* ltr_f */
+    24,        /* lti_f */
+    16,        /* ler_f */
+    28,        /* lei_f */
+    12,        /* eqr_f */
+    24,        /* eqi_f */
+    16,        /* ger_f */
+    28,        /* gei_f */
+    12,        /* gtr_f */
+    24,        /* gti_f */
+    16,        /* ner_f */
+    28,        /* nei_f */
+    16,        /* unltr_f */
+    28,        /* unlti_f */
+    16,        /* unler_f */
+    28,        /* unlei_f */
+    16,        /* uneqr_f */
+    28,        /* uneqi_f */
+    16,        /* unger_f */
+    28,        /* ungei_f */
+    16,        /* ungtr_f */
+    28,        /* ungti_f */
+    16,        /* ltgtr_f */
+    28,        /* ltgti_f */
+    16,        /* ordr_f */
+    28,        /* ordi_f */
+    12,        /* unordr_f */
+    24,        /* unordi_f */
+    12,        /* truncr_f_i */
+    0, /* truncr_f_l */
+    20,        /* extr_f */
+    4, /* extr_d_f */
+    4, /* movr_f */
+    12,        /* movi_f */
+    4, /* ldr_f */
+    8, /* ldi_f */
+    4, /* ldxr_f */
+    12,        /* ldxi_f */
+    4, /* str_f */
+    8, /* sti_f */
+    4, /* stxr_f */
+    12,        /* stxi_f */
+    8, /* bltr_f */
+    20,        /* blti_f */
+    12,        /* bler_f */
+    24,        /* blei_f */
+    8, /* beqr_f */
+    20,        /* beqi_f */
+    12,        /* bger_f */
+    24,        /* bgei_f */
+    8, /* bgtr_f */
+    20,        /* bgti_f */
+    8, /* bner_f */
+    20,        /* bnei_f */
+    12,        /* bunltr_f */
+    24,        /* bunlti_f */
+    8, /* bunler_f */
+    20,        /* bunlei_f */
+    12,        /* buneqr_f */
+    24,        /* buneqi_f */
+    8, /* bunger_f */
+    20,        /* bungei_f */
+    12,        /* bungtr_f */
+    24,        /* bungti_f */
+    12,        /* bltgtr_f */
+    24,        /* bltgti_f */
+    8, /* bordr_f */
+    20,        /* bordi_f */
+    8, /* bunordr_f */
+    20,        /* bunordi_f */
+    0, /* pushargr_f */
+    0, /* pushargi_f */
+    0, /* retr_f */
+    0, /* reti_f */
+    0, /* retval_f */
+    0, /* arg_d */
+    0, /* getarg_d */
+    0, /* putargr_d */
+    0, /* putargi_d */
+    4, /* addr_d */
+    24,        /* addi_d */
+    4, /* subr_d */
+    24,        /* subi_d */
+    24,        /* rsbi_d */
+    4, /* mulr_d */
+    24,        /* muli_d */
+    4, /* divr_d */
+    24,        /* divi_d */
+    4, /* negr_d */
+    4, /* absr_d */
+    4, /* sqrtr_d */
+    12,        /* ltr_d */
+    32,        /* lti_d */
+    16,        /* ler_d */
+    36,        /* lei_d */
+    12,        /* eqr_d */
+    32,        /* eqi_d */
+    16,        /* ger_d */
+    36,        /* gei_d */
+    12,        /* gtr_d */
+    32,        /* gti_d */
+    16,        /* ner_d */
+    36,        /* nei_d */
+    16,        /* unltr_d */
+    36,        /* unlti_d */
+    16,        /* unler_d */
+    36,        /* unlei_d */
+    16,        /* uneqr_d */
+    36,        /* uneqi_d */
+    16,        /* unger_d */
+    36,        /* ungei_d */
+    16,        /* ungtr_d */
+    36,        /* ungti_d */
+    16,        /* ltgtr_d */
+    36,        /* ltgti_d */
+    16,        /* ordr_d */
+    36,        /* ordi_d */
+    12,        /* unordr_d */
+    32,        /* unordi_d */
+    12,        /* truncr_d_i */
+    0, /* truncr_d_l */
+    20,        /* extr_d */
+    4, /* extr_f_d */
+    4, /* movr_d */
+    24,        /* movi_d */
+    4, /* ldr_d */
+    8, /* ldi_d */
+    4, /* ldxr_d */
+    12,        /* ldxi_d */
+    4, /* str_d */
+    8, /* sti_d */
+    4, /* stxr_d */
+    12,        /* stxi_d */
+    8, /* bltr_d */
+    28,        /* blti_d */
+    12,        /* bler_d */
+    32,        /* blei_d */
+    8, /* beqr_d */
+    32,        /* beqi_d */
+    12,        /* bger_d */
+    32,        /* bgei_d */
+    8, /* bgtr_d */
+    28,        /* bgti_d */
+    8, /* bner_d */
+    28,        /* bnei_d */
+    12,        /* bunltr_d */
+    32,        /* bunlti_d */
+    8, /* bunler_d */
+    28,        /* bunlei_d */
+    12,        /* buneqr_d */
+    32,        /* buneqi_d */
+    8, /* bunger_d */
+    28,        /* bungei_d */
+    12,        /* bungtr_d */
+    32,        /* bungti_d */
+    12,        /* bltgtr_d */
+    32,        /* bltgti_d */
+    8, /* bordr_d */
+    28,        /* bordi_d */
+    8, /* bunordr_d */
+    28,        /* bunordi_d */
+    0, /* pushargr_d */
+    0, /* pushargi_d */
+    0, /* retr_d */
+    0, /* reti_d */
+    0, /* retval_d */
+    0, /* movr_w_f */
+    0, /* movr_ww_d */
+    0, /* movr_w_d */
+    0, /* movr_f_w */
+    0, /* movi_f_w */
+    0, /* movr_d_ww */
+    0, /* movi_d_ww */
+    0, /* movr_d_w */
+    0, /* movi_d_w */
+#endif /* _CALL_AIX */
+#endif /* __BYTEORDER */
+#endif /* __powerpc__ */
+#endif /* __WORDSIZE */
+
+#if __WORDSIZE == 64
+#if defined(__powerpc__)
+#if __BYTE_ORDER == __BIG_ENDIAN
+#define JIT_INSTR_MAX 148
+    0, /* data */
+    0, /* live */
+    4, /* align */
+    0, /* save */
+    0, /* load */
+    0, /* #name */
+    0, /* #note */
+    0, /* label */
+    148,       /* prolog */
+    0, /* ellipsis */
+    0, /* va_push */
+    0, /* allocai */
+    0, /* allocar */
+    0, /* arg */
+    0, /* getarg_c */
+    0, /* getarg_uc */
+    0, /* getarg_s */
+    0, /* getarg_us */
+    0, /* getarg_i */
+    0, /* getarg_ui */
+    0, /* getarg_l */
+    0, /* putargr */
+    0, /* putargi */
+    4, /* va_start */
+    8, /* va_arg */
+    8, /* va_arg_d */
+    0, /* va_end */
+    4, /* addr */
+    28,        /* addi */
+    4, /* addcr */
+    28,        /* addci */
+    4, /* addxr */
+    8, /* addxi */
+    4, /* subr */
+    28,        /* subi */
+    4, /* subcr */
+    28,        /* subci */
+    4, /* subxr */
+    8, /* subxi */
+    44,        /* rsbi */
+    4, /* mulr */
+    28,        /* muli */
+    12,        /* qmulr */
+    28,        /* qmuli */
+    12,        /* qmulr_u */
+    28,        /* qmuli_u */
+    4, /* divr */
+    28,        /* divi */
+    4, /* divr_u */
+    28,        /* divi_u */
+    20,        /* qdivr */
+    16,        /* qdivi */
+    20,        /* qdivr_u */
+    16,        /* qdivi_u */
+    12,        /* remr */
+    36,        /* remi */
+    12,        /* remr_u */
+    36,        /* remi_u */
+    4, /* andr */
+    28,        /* andi */
+    4, /* orr */
+    28,        /* ori */
+    4, /* xorr */
+    28,        /* xori */
+    4, /* lshr */
+    4, /* lshi */
+    4, /* rshr */
+    4, /* rshi */
+    4, /* rshr_u */
+    4, /* rshi_u */
+    4, /* negr */
+    4, /* comr */
+    12,        /* ltr */
+    12,        /* lti */
+    12,        /* ltr_u */
+    16,        /* lti_u */
+    16,        /* ler */
+    16,        /* lei */
+    16,        /* ler_u */
+    16,        /* lei_u */
+    12,        /* eqr */
+    12,        /* eqi */
+    16,        /* ger */
+    16,        /* gei */
+    16,        /* ger_u */
+    16,        /* gei_u */
+    12,        /* gtr */
+    12,        /* gti */
+    12,        /* gtr_u */
+    12,        /* gti_u */
+    16,        /* ner */
+    16,        /* nei */
+    4, /* movr */
+    36,        /* movi */
+    4, /* extr_c */
+    4, /* extr_uc */
+    4, /* extr_s */
+    4, /* extr_us */
+    4, /* extr_i */
+    4, /* extr_ui */
+    4, /* htonr_us */
+    4, /* htonr_ui */
+    4, /* htonr_ul */
+    8, /* ldr_c */
+    28,        /* ldi_c */
+    4, /* ldr_uc */
+    24,        /* ldi_uc */
+    4, /* ldr_s */
+    24,        /* ldi_s */
+    4, /* ldr_us */
+    24,        /* ldi_us */
+    4, /* ldr_i */
+    24,        /* ldi_i */
+    4, /* ldr_ui */
+    24,        /* ldi_ui */
+    4, /* ldr_l */
+    24,        /* ldi_l */
+    8, /* ldxr_c */
+    16,        /* ldxi_c */
+    4, /* ldxr_uc */
+    12,        /* ldxi_uc */
+    4, /* ldxr_s */
+    12,        /* ldxi_s */
+    4, /* ldxr_us */
+    12,        /* ldxi_us */
+    4, /* ldxr_i */
+    12,        /* ldxi_i */
+    4, /* ldxr_ui */
+    12,        /* ldxi_ui */
+    4, /* ldxr_l */
+    12,        /* ldxi_l */
+    4, /* str_c */
+    24,        /* sti_c */
+    4, /* str_s */
+    24,        /* sti_s */
+    4, /* str_i */
+    24,        /* sti_i */
+    4, /* str_l */
+    24,        /* sti_l */
+    4, /* stxr_c */
+    12,        /* stxi_c */
+    4, /* stxr_s */
+    12,        /* stxi_s */
+    4, /* stxr_i */
+    12,        /* stxi_i */
+    4, /* stxr_l */
+    12,        /* stxi_l */
+    8, /* bltr */
+    8, /* blti */
+    8, /* bltr_u */
+    12,        /* blti_u */
+    8, /* bler */
+    8, /* blei */
+    8, /* bler_u */
+    12,        /* blei_u */
+    8, /* beqr */
+    44,        /* beqi */
+    8, /* bger */
+    8, /* bgei */
+    8, /* bger_u */
+    8, /* bgei_u */
+    8, /* bgtr */
+    8, /* bgti */
+    8, /* bgtr_u */
+    8, /* bgti_u */
+    8, /* bner */
+    36,        /* bnei */
+    12,        /* bmsr */
+    12,        /* bmsi */
+    12,        /* bmcr */
+    12,        /* bmci */
+    12,        /* boaddr */
+    16,        /* boaddi */
+    12,        /* boaddr_u */
+    12,        /* boaddi_u */
+    12,        /* bxaddr */
+    16,        /* bxaddi */
+    12,        /* bxaddr_u */
+    12,        /* bxaddi_u */
+    12,        /* bosubr */
+    16,        /* bosubi */
+    12,        /* bosubr_u */
+    16,        /* bosubi_u */
+    12,        /* bxsubr */
+    16,        /* bxsubi */
+    12,        /* bxsubr_u */
+    16,        /* bxsubi_u */
+    8, /* jmpr */
+    4, /* jmpi */
+    28,        /* callr */
+    56,        /* calli */
+    0, /* prepare */
+    0, /* pushargr */
+    0, /* pushargi */
+    0, /* finishr */
+    0, /* finishi */
+    0, /* ret */
+    0, /* retr */
+    0, /* reti */
+    0, /* retval_c */
+    0, /* retval_uc */
+    0, /* retval_s */
+    0, /* retval_us */
+    0, /* retval_i */
+    0, /* retval_ui */
+    0, /* retval_l */
+    124,       /* epilog */
+    0, /* arg_f */
+    0, /* getarg_f */
+    0, /* putargr_f */
+    0, /* putargi_f */
+    4, /* addr_f */
+    28,        /* addi_f */
+    4, /* subr_f */
+    28,        /* subi_f */
+    28,        /* rsbi_f */
+    4, /* mulr_f */
+    28,        /* muli_f */
+    4, /* divr_f */
+    28,        /* divi_f */
+    4, /* negr_f */
+    4, /* absr_f */
+    4, /* sqrtr_f */
+    12,        /* ltr_f */
+    36,        /* lti_f */
+    16,        /* ler_f */
+    40,        /* lei_f */
+    12,        /* eqr_f */
+    36,        /* eqi_f */
+    16,        /* ger_f */
+    40,        /* gei_f */
+    12,        /* gtr_f */
+    36,        /* gti_f */
+    16,        /* ner_f */
+    40,        /* nei_f */
+    16,        /* unltr_f */
+    40,        /* unlti_f */
+    16,        /* unler_f */
+    40,        /* unlei_f */
+    16,        /* uneqr_f */
+    40,        /* uneqi_f */
+    16,        /* unger_f */
+    40,        /* ungei_f */
+    16,        /* ungtr_f */
+    40,        /* ungti_f */
+    16,        /* ltgtr_f */
+    40,        /* ltgti_f */
+    16,        /* ordr_f */
+    40,        /* ordi_f */
+    12,        /* unordr_f */
+    36,        /* unordi_f */
+    12,        /* truncr_f_i */
+    12,        /* truncr_f_l */
+    12,        /* extr_f */
+    4, /* extr_d_f */
+    4, /* movr_f */
+    24,        /* movi_f */
+    4, /* ldr_f */
+    24,        /* ldi_f */
+    4, /* ldxr_f */
+    12,        /* ldxi_f */
+    4, /* str_f */
+    24,        /* sti_f */
+    4, /* stxr_f */
+    12,        /* stxi_f */
+    8, /* bltr_f */
+    32,        /* blti_f */
+    12,        /* bler_f */
+    36,        /* blei_f */
+    8, /* beqr_f */
+    32,        /* beqi_f */
+    12,        /* bger_f */
+    36,        /* bgei_f */
+    8, /* bgtr_f */
+    32,        /* bgti_f */
+    8, /* bner_f */
+    32,        /* bnei_f */
+    12,        /* bunltr_f */
+    36,        /* bunlti_f */
+    8, /* bunler_f */
+    32,        /* bunlei_f */
+    12,        /* buneqr_f */
+    36,        /* buneqi_f */
+    8, /* bunger_f */
+    32,        /* bungei_f */
+    12,        /* bungtr_f */
+    36,        /* bungti_f */
+    12,        /* bltgtr_f */
+    36,        /* bltgti_f */
+    8, /* bordr_f */
+    32,        /* bordi_f */
+    8, /* bunordr_f */
+    32,        /* bunordi_f */
+    0, /* pushargr_f */
+    0, /* pushargi_f */
+    0, /* retr_f */
+    0, /* reti_f */
+    0, /* retval_f */
+    0, /* arg_d */
+    0, /* getarg_d */
+    0, /* putargr_d */
+    0, /* putargi_d */
+    4, /* addr_d */
+    28,        /* addi_d */
+    4, /* subr_d */
+    28,        /* subi_d */
+    32,        /* rsbi_d */
+    4, /* mulr_d */
+    28,        /* muli_d */
+    4, /* divr_d */
+    28,        /* divi_d */
+    4, /* negr_d */
+    4, /* absr_d */
+    4, /* sqrtr_d */
+    12,        /* ltr_d */
+    40,        /* lti_d */
+    16,        /* ler_d */
+    44,        /* lei_d */
+    12,        /* eqr_d */
+    40,        /* eqi_d */
+    16,        /* ger_d */
+    44,        /* gei_d */
+    12,        /* gtr_d */
+    40,        /* gti_d */
+    16,        /* ner_d */
+    44,        /* nei_d */
+    16,        /* unltr_d */
+    44,        /* unlti_d */
+    16,        /* unler_d */
+    44,        /* unlei_d */
+    16,        /* uneqr_d */
+    44,        /* uneqi_d */
+    16,        /* unger_d */
+    44,        /* ungei_d */
+    16,        /* ungtr_d */
+    44,        /* ungti_d */
+    16,        /* ltgtr_d */
+    44,        /* ltgti_d */
+    16,        /* ordr_d */
+    44,        /* ordi_d */
+    12,        /* unordr_d */
+    40,        /* unordi_d */
+    12,        /* truncr_d_i */
+    12,        /* truncr_d_l */
+    12,        /* extr_d */
+    4, /* extr_f_d */
+    4, /* movr_d */
+    32,        /* movi_d */
+    4, /* ldr_d */
+    24,        /* ldi_d */
+    4, /* ldxr_d */
+    12,        /* ldxi_d */
+    4, /* str_d */
+    24,        /* sti_d */
+    4, /* stxr_d */
+    12,        /* stxi_d */
+    8, /* bltr_d */
+    32,        /* blti_d */
+    12,        /* bler_d */
+    36,        /* blei_d */
+    8, /* beqr_d */
+    40,        /* beqi_d */
+    12,        /* bger_d */
+    40,        /* bgei_d */
+    8, /* bgtr_d */
+    36,        /* bgti_d */
+    8, /* bner_d */
+    36,        /* bnei_d */
+    12,        /* bunltr_d */
+    36,        /* bunlti_d */
+    8, /* bunler_d */
+    32,        /* bunlei_d */
+    12,        /* buneqr_d */
+    36,        /* buneqi_d */
+    8, /* bunger_d */
+    36,        /* bungei_d */
+    12,        /* bungtr_d */
+    40,        /* bungti_d */
+    12,        /* bltgtr_d */
+    40,        /* bltgti_d */
+    8, /* bordr_d */
+    36,        /* bordi_d */
+    8, /* bunordr_d */
+    32,        /* bunordi_d */
+    0, /* pushargr_d */
+    0, /* pushargi_d */
+    0, /* retr_d */
+    0, /* reti_d */
+    0, /* retval_d */
+    0, /* movr_w_f */
+    0, /* movr_ww_d */
+    0, /* movr_w_d */
+    0, /* movr_f_w */
+    0, /* movi_f_w */
+    0, /* movr_d_ww */
+    0, /* movi_d_ww */
+    0, /* movr_d_w */
+    0, /* movi_d_w */
+#endif /* __BYTEORDER */
+#endif /* __powerpc__ */
+#endif /* __WORDSIZE */
+
+#if __WORDSIZE == 64
+#if defined(__powerpc__)
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+#define JIT_INSTR_MAX 124
+    0, /* data */
+    0, /* live */
+    4, /* align */
+    0, /* save */
+    0, /* load */
+    0, /* #name */
+    0, /* #note */
+    0, /* label */
+    124,       /* prolog */
+    0, /* ellipsis */
+    0, /* va_push */
+    0, /* allocai */
+    0, /* allocar */
+    0, /* arg */
+    0, /* getarg_c */
+    0, /* getarg_uc */
+    0, /* getarg_s */
+    0, /* getarg_us */
+    0, /* getarg_i */
+    0, /* getarg_ui */
+    0, /* getarg_l */
+    0, /* putargr */
+    0, /* putargi */
+    4, /* va_start */
+    8, /* va_arg */
+    8, /* va_arg_d */
+    0, /* va_end */
+    4, /* addr */
+    28,        /* addi */
+    4, /* addcr */
+    28,        /* addci */
+    4, /* addxr */
+    8, /* addxi */
+    4, /* subr */
+    28,        /* subi */
+    4, /* subcr */
+    28,        /* subci */
+    4, /* subxr */
+    8, /* subxi */
+    44,        /* rsbi */
+    4, /* mulr */
+    28,        /* muli */
+    12,        /* qmulr */
+    28,        /* qmuli */
+    12,        /* qmulr_u */
+    28,        /* qmuli_u */
+    4, /* divr */
+    28,        /* divi */
+    4, /* divr_u */
+    28,        /* divi_u */
+    20,        /* qdivr */
+    16,        /* qdivi */
+    20,        /* qdivr_u */
+    16,        /* qdivi_u */
+    12,        /* remr */
+    36,        /* remi */
+    12,        /* remr_u */
+    36,        /* remi_u */
+    4, /* andr */
+    28,        /* andi */
+    4, /* orr */
+    28,        /* ori */
+    4, /* xorr */
+    28,        /* xori */
+    4, /* lshr */
+    4, /* lshi */
+    4, /* rshr */
+    4, /* rshi */
+    4, /* rshr_u */
+    4, /* rshi_u */
+    4, /* negr */
+    4, /* comr */
+    12,        /* ltr */
+    12,        /* lti */
+    12,        /* ltr_u */
+    16,        /* lti_u */
+    16,        /* ler */
+    16,        /* lei */
+    16,        /* ler_u */
+    16,        /* lei_u */
+    12,        /* eqr */
+    12,        /* eqi */
+    16,        /* ger */
+    16,        /* gei */
+    16,        /* ger_u */
+    16,        /* gei_u */
+    12,        /* gtr */
+    12,        /* gti */
+    12,        /* gtr_u */
+    12,        /* gti_u */
+    16,        /* ner */
+    16,        /* nei */
+    4, /* movr */
+    36,        /* movi */
+    4, /* extr_c */
+    4, /* extr_uc */
+    4, /* extr_s */
+    4, /* extr_us */
+    4, /* extr_i */
+    4, /* extr_ui */
+    20,        /* htonr_us */
+    16,        /* htonr_ui */
+    44,        /* htonr_ul */
+    8, /* ldr_c */
+    28,        /* ldi_c */
+    4, /* ldr_uc */
+    24,        /* ldi_uc */
+    4, /* ldr_s */
+    24,        /* ldi_s */
+    4, /* ldr_us */
+    24,        /* ldi_us */
+    4, /* ldr_i */
+    24,        /* ldi_i */
+    4, /* ldr_ui */
+    24,        /* ldi_ui */
+    4, /* ldr_l */
+    24,        /* ldi_l */
+    8, /* ldxr_c */
+    16,        /* ldxi_c */
+    4, /* ldxr_uc */
+    12,        /* ldxi_uc */
+    4, /* ldxr_s */
+    12,        /* ldxi_s */
+    4, /* ldxr_us */
+    12,        /* ldxi_us */
+    4, /* ldxr_i */
+    12,        /* ldxi_i */
+    4, /* ldxr_ui */
+    12,        /* ldxi_ui */
+    4, /* ldxr_l */
+    12,        /* ldxi_l */
+    4, /* str_c */
+    24,        /* sti_c */
+    4, /* str_s */
+    24,        /* sti_s */
+    4, /* str_i */
+    24,        /* sti_i */
+    4, /* str_l */
+    24,        /* sti_l */
+    4, /* stxr_c */
+    12,        /* stxi_c */
+    4, /* stxr_s */
+    12,        /* stxi_s */
+    4, /* stxr_i */
+    12,        /* stxi_i */
+    4, /* stxr_l */
+    12,        /* stxi_l */
+    8, /* bltr */
+    8, /* blti */
+    8, /* bltr_u */
+    12,        /* blti_u */
+    8, /* bler */
+    8, /* blei */
+    8, /* bler_u */
+    12,        /* blei_u */
+    8, /* beqr */
+    44,        /* beqi */
+    8, /* bger */
+    8, /* bgei */
+    8, /* bger_u */
+    8, /* bgei_u */
+    8, /* bgtr */
+    8, /* bgti */
+    8, /* bgtr_u */
+    8, /* bgti_u */
+    8, /* bner */
+    36,        /* bnei */
+    12,        /* bmsr */
+    12,        /* bmsi */
+    12,        /* bmcr */
+    12,        /* bmci */
+    12,        /* boaddr */
+    16,        /* boaddi */
+    12,        /* boaddr_u */
+    12,        /* boaddi_u */
+    12,        /* bxaddr */
+    16,        /* bxaddi */
+    12,        /* bxaddr_u */
+    12,        /* bxaddi_u */
+    12,        /* bosubr */
+    16,        /* bosubi */
+    12,        /* bosubr_u */
+    16,        /* bosubi_u */
+    12,        /* bxsubr */
+    16,        /* bxsubi */
+    12,        /* bxsubr_u */
+    16,        /* bxsubi_u */
+    8, /* jmpr */
+    4, /* jmpi */
+    12,        /* callr */
+    36,        /* calli */
+    0, /* prepare */
+    0, /* pushargr */
+    0, /* pushargi */
+    0, /* finishr */
+    0, /* finishi */
+    0, /* ret */
+    0, /* retr */
+    0, /* reti */
+    0, /* retval_c */
+    0, /* retval_uc */
+    0, /* retval_s */
+    0, /* retval_us */
+    0, /* retval_i */
+    0, /* retval_ui */
+    0, /* retval_l */
+    124,       /* epilog */
+    0, /* arg_f */
+    0, /* getarg_f */
+    0, /* putargr_f */
+    0, /* putargi_f */
+    4, /* addr_f */
+    28,        /* addi_f */
+    4, /* subr_f */
+    28,        /* subi_f */
+    28,        /* rsbi_f */
+    4, /* mulr_f */
+    28,        /* muli_f */
+    4, /* divr_f */
+    28,        /* divi_f */
+    4, /* negr_f */
+    4, /* absr_f */
+    4, /* sqrtr_f */
+    12,        /* ltr_f */
+    36,        /* lti_f */
+    16,        /* ler_f */
+    40,        /* lei_f */
+    12,        /* eqr_f */
+    36,        /* eqi_f */
+    16,        /* ger_f */
+    40,        /* gei_f */
+    12,        /* gtr_f */
+    36,        /* gti_f */
+    16,        /* ner_f */
+    40,        /* nei_f */
+    16,        /* unltr_f */
+    40,        /* unlti_f */
+    16,        /* unler_f */
+    40,        /* unlei_f */
+    16,        /* uneqr_f */
+    40,        /* uneqi_f */
+    16,        /* unger_f */
+    40,        /* ungei_f */
+    16,        /* ungtr_f */
+    40,        /* ungti_f */
+    16,        /* ltgtr_f */
+    40,        /* ltgti_f */
+    16,        /* ordr_f */
+    40,        /* ordi_f */
+    12,        /* unordr_f */
+    36,        /* unordi_f */
+    12,        /* truncr_f_i */
+    12,        /* truncr_f_l */
+    12,        /* extr_f */
+    4, /* extr_d_f */
+    4, /* movr_f */
+    24,        /* movi_f */
+    4, /* ldr_f */
+    24,        /* ldi_f */
+    4, /* ldxr_f */
+    12,        /* ldxi_f */
+    4, /* str_f */
+    24,        /* sti_f */
+    4, /* stxr_f */
+    12,        /* stxi_f */
+    8, /* bltr_f */
+    32,        /* blti_f */
+    12,        /* bler_f */
+    36,        /* blei_f */
+    8, /* beqr_f */
+    32,        /* beqi_f */
+    12,        /* bger_f */
+    36,        /* bgei_f */
+    8, /* bgtr_f */
+    32,        /* bgti_f */
+    8, /* bner_f */
+    32,        /* bnei_f */
+    12,        /* bunltr_f */
+    36,        /* bunlti_f */
+    8, /* bunler_f */
+    32,        /* bunlei_f */
+    12,        /* buneqr_f */
+    36,        /* buneqi_f */
+    8, /* bunger_f */
+    32,        /* bungei_f */
+    12,        /* bungtr_f */
+    36,        /* bungti_f */
+    12,        /* bltgtr_f */
+    36,        /* bltgti_f */
+    8, /* bordr_f */
+    32,        /* bordi_f */
+    8, /* bunordr_f */
+    32,        /* bunordi_f */
+    0, /* pushargr_f */
+    0, /* pushargi_f */
+    0, /* retr_f */
+    0, /* reti_f */
+    0, /* retval_f */
+    0, /* arg_d */
+    0, /* getarg_d */
+    0, /* putargr_d */
+    0, /* putargi_d */
+    4, /* addr_d */
+    28,        /* addi_d */
+    4, /* subr_d */
+    28,        /* subi_d */
+    32,        /* rsbi_d */
+    4, /* mulr_d */
+    28,        /* muli_d */
+    4, /* divr_d */
+    28,        /* divi_d */
+    4, /* negr_d */
+    4, /* absr_d */
+    4, /* sqrtr_d */
+    12,        /* ltr_d */
+    40,        /* lti_d */
+    16,        /* ler_d */
+    44,        /* lei_d */
+    12,        /* eqr_d */
+    40,        /* eqi_d */
+    16,        /* ger_d */
+    44,        /* gei_d */
+    12,        /* gtr_d */
+    40,        /* gti_d */
+    16,        /* ner_d */
+    44,        /* nei_d */
+    16,        /* unltr_d */
+    44,        /* unlti_d */
+    16,        /* unler_d */
+    44,        /* unlei_d */
+    16,        /* uneqr_d */
+    44,        /* uneqi_d */
+    16,        /* unger_d */
+    44,        /* ungei_d */
+    16,        /* ungtr_d */
+    44,        /* ungti_d */
+    16,        /* ltgtr_d */
+    44,        /* ltgti_d */
+    16,        /* ordr_d */
+    44,        /* ordi_d */
+    12,        /* unordr_d */
+    40,        /* unordi_d */
+    12,        /* truncr_d_i */
+    12,        /* truncr_d_l */
+    12,        /* extr_d */
+    4, /* extr_f_d */
+    4, /* movr_d */
+    32,        /* movi_d */
+    4, /* ldr_d */
+    24,        /* ldi_d */
+    4, /* ldxr_d */
+    12,        /* ldxi_d */
+    4, /* str_d */
+    24,        /* sti_d */
+    4, /* stxr_d */
+    12,        /* stxi_d */
+    8, /* bltr_d */
+    32,        /* blti_d */
+    12,        /* bler_d */
+    36,        /* blei_d */
+    8, /* beqr_d */
+    40,        /* beqi_d */
+    12,        /* bger_d */
+    40,        /* bgei_d */
+    8, /* bgtr_d */
+    36,        /* bgti_d */
+    8, /* bner_d */
+    36,        /* bnei_d */
+    12,        /* bunltr_d */
+    36,        /* bunlti_d */
+    8, /* bunler_d */
+    32,        /* bunlei_d */
+    12,        /* buneqr_d */
+    36,        /* buneqi_d */
+    8, /* bunger_d */
+    36,        /* bungei_d */
+    12,        /* bungtr_d */
+    40,        /* bungti_d */
+    12,        /* bltgtr_d */
+    40,        /* bltgti_d */
+    8, /* bordr_d */
+    36,        /* bordi_d */
+    8, /* bunordr_d */
+    32,        /* bunordi_d */
+    0, /* pushargr_d */
+    0, /* pushargi_d */
+    0, /* retr_d */
+    0, /* reti_d */
+    0, /* retval_d */
+    0, /* movr_w_f */
+    0, /* movr_ww_d */
+    0, /* movr_w_d */
+    0, /* movr_f_w */
+    0, /* movi_f_w */
+    0, /* movr_d_ww */
+    0, /* movi_d_ww */
+    0, /* movr_d_w */
+    0, /* movi_d_w */
+#endif /* __BYTE_ORDER */
+#endif /* __powerpc__ */
+#endif /* __WORDSIZE */
diff --git a/deps/lightning/lib/jit_ppc.c b/deps/lightning/lib/jit_ppc.c
new file mode 100644 (file)
index 0000000..0826f4e
--- /dev/null
@@ -0,0 +1,1912 @@
+/*
+ * Copyright (C) 2012-2019  Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+#define jit_arg_reg_p(i)               ((i) >= 0 && (i) < 8)
+#if !_CALL_SYSV
+#  define jit_arg_f_reg_p(i)           ((i) >= 0 && (i) < 13)
+#else
+#  define jit_arg_f_reg_p(i)           ((i) >= 0 && (i) < 8)
+#  if __WORDSIZE == 32
+#    define va_gp_shift                        2
+#  else
+#    define va_gp_shift                        3
+#  endif
+#  define va_gp_increment              sizeof(jit_word_t)
+#  define first_gp_argument            r3
+#  define first_gp_offset              offsetof(jit_va_list_t,         \
+                                                first_gp_argument)
+#  define va_fp_increment              sizeof(jit_float64_t)
+#  define first_fp_argument            f1
+#  define first_fp_offset              offsetof(jit_va_list_t,         \
+                                                first_fp_argument)
+#endif
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+#  define C_DISP                       0
+#  define S_DISP                       0
+#  define I_DISP                       0
+#  define F_DISP                       0
+#else
+#  define C_DISP                       (__WORDSIZE >> 3) - sizeof(jit_int8_t)
+#  define S_DISP                       (__WORDSIZE >> 3) - sizeof(jit_int16_t)
+#  define I_DISP                       (__WORDSIZE >> 3) - sizeof(jit_int32_t)
+#  define F_DISP                       (__WORDSIZE >> 3) - sizeof(jit_float32_t)
+#endif
+
+/*
+ * Types
+ */
+#if _CALL_SYSV
+typedef struct jit_va_list {
+    jit_uint8_t                ngpr;
+    jit_uint8_t                nfpr;
+    jit_uint16_t       _pad;
+#  if __WORDSIZE == 64
+    jit_uint32_t       _pad2;
+#  endif
+    jit_pointer_t      over;
+    jit_pointer_t      save;
+#  if __WORDSIZE == 32
+    jit_word_t         _pad2;
+#  endif
+    jit_word_t         r3;
+    jit_word_t         r4;
+    jit_word_t         r5;
+    jit_word_t         r6;
+    jit_word_t         r7;
+    jit_word_t         r8;
+    jit_word_t         r9;
+    jit_word_t         r10;
+    jit_float64_t      f1;
+    jit_float64_t      f2;
+    jit_float64_t      f3;
+    jit_float64_t      f4;
+    jit_float64_t      f5;
+    jit_float64_t      f6;
+    jit_float64_t      f7;
+    jit_float64_t      f8;
+} jit_va_list_t;
+#else
+typedef jit_pointer_t jit_va_list_t;
+#endif
+
+/*
+ * Prototypes
+ */
+#define patch(instr, node)             _patch(_jit, instr, node)
+static void _patch(jit_state_t*,jit_word_t,jit_node_t*);
+
+/* libgcc */
+extern void __clear_cache(void *, void *);
+
+#define PROTO                          1
+#  include "jit_ppc-cpu.c"
+#  include "jit_ppc-fpu.c"
+#undef PROTO
+
+/*
+ * Initialization
+ */
+jit_register_t         _rvs[] = {
+    { rc(sav) | 0,                     "r0" },
+    { rc(sav) | 11,                    "r11" },        /* env */
+    { rc(sav) | 12,                    "r12" },        /* exception */
+    { rc(sav) | 13,                    "r13" },        /* thread */
+    { rc(sav) | 2,                     "r2" },         /* toc */
+    { rc(sav) | rc(gpr) | 14,          "r14" },
+    { rc(sav) | rc(gpr) | 15,          "r15" },
+    { rc(sav) | rc(gpr) | 16,          "r16" },
+    { rc(sav) | rc(gpr) | 17,          "r17" },
+    { rc(sav) | rc(gpr) | 18,          "r18" },
+    { rc(sav) | rc(gpr) | 19,          "r19" },
+    { rc(sav) | rc(gpr) | 20,          "r20" },
+    { rc(sav) | rc(gpr) | 21,          "r21" },
+    { rc(sav) | rc(gpr) | 22,          "r22" },
+    { rc(sav) | rc(gpr) | 23,          "r23" },
+    { rc(sav) | rc(gpr) | 24,          "r24" },
+    { rc(sav) | rc(gpr) | 25,          "r25" },
+    { rc(sav) | rc(gpr) | 26,          "r26" },
+    { rc(sav) | rc(gpr) | 27,          "r27" },
+    { rc(sav) | rc(gpr) | 28,          "r28" },
+    { rc(sav) | rc(gpr) | 29,          "r29" },
+    { rc(sav) | rc(gpr) | 30,          "r30" },
+    { rc(sav) | 1,                     "r1" },
+    { rc(sav) | 31,                    "r31" },
+    { rc(arg) | rc(gpr) | 10,          "r10" },
+    { rc(arg) | rc(gpr) | 9,           "r9" },
+    { rc(arg) | rc(gpr) | 8,           "r8" },
+    { rc(arg) | rc(gpr) | 7,           "r7" },
+    { rc(arg) | rc(gpr) | 6,           "r6" },
+    { rc(arg) | rc(gpr) | 5,           "r5" },
+    { rc(arg) | rc(gpr) | 4,           "r4" },
+    { rc(arg) | rc(gpr) | 3,           "r3" },
+    { rc(fpr) | 0,                     "f0" },
+    { rc(sav) | rc(fpr) | 14,          "f14" },
+    { rc(sav) | rc(fpr) | 15,          "f15" },
+    { rc(sav) | rc(fpr) | 16,          "f16" },
+    { rc(sav) | rc(fpr) | 17,          "f17" },
+    { rc(sav) | rc(fpr) | 18,          "f18" },
+    { rc(sav) | rc(fpr) | 19,          "f19" },
+    { rc(sav) | rc(fpr) | 20,          "f20" },
+    { rc(sav) | rc(fpr) | 21,          "f21" },
+    { rc(sav) | rc(fpr) | 22,          "f22" },
+    { rc(sav) | rc(fpr) | 23,          "f23" },
+    { rc(sav) | rc(fpr) | 24,          "f24" },
+    { rc(sav) | rc(fpr) | 25,          "f25" },
+    { rc(sav) | rc(fpr) | 26,          "f26" },
+    { rc(sav) | rc(fpr) | 27,          "f27" },
+    { rc(sav) | rc(fpr) | 28,          "f28" },
+    { rc(sav) | rc(fpr) | 29,          "f29" },
+    { rc(sav) | rc(fpr) | 30,          "f30" },
+    { rc(sav) | rc(fpr) | 31,          "f31" },
+#if !_CALL_SYSV
+    { rc(arg) | rc(fpr) | 13,          "f13" },
+    { rc(arg) | rc(fpr) | 12,          "f12" },
+    { rc(arg) | rc(fpr) | 11,          "f11" },
+    { rc(arg) | rc(fpr) | 10,          "f10" },
+    { rc(arg) | rc(fpr) | 9,           "f9" },
+#else
+    { rc(fpr) | 13,                    "f13" },
+    { rc(fpr) | 12,                    "f12" },
+    { rc(fpr) | 11,                    "f11" },
+    { rc(fpr) | 10,                    "f10" },
+    { rc(fpr) | 9,                     "f9" },
+#endif
+    { rc(arg) | rc(fpr) | 8,           "f8" },
+    { rc(arg) | rc(fpr) | 7,           "f7" },
+    { rc(arg) | rc(fpr) | 6,           "f6" },
+    { rc(arg) | rc(fpr) | 5,           "f5" },
+    { rc(arg) | rc(fpr) | 4,           "f4" },
+    { rc(arg) | rc(fpr) | 3,           "f3" },
+    { rc(arg) | rc(fpr) | 2,           "f2" },
+    { rc(arg) | rc(fpr) | 1,           "f1" },
+    { _NOREG,                          "<none>" },
+};
+
+/*
+ * Implementation
+ */
+void
+jit_get_cpu(void)
+{
+}
+
+void
+_jit_init(jit_state_t *_jit)
+{
+    _jitc->reglen = jit_size(_rvs) - 1;
+}
+
+void
+_jit_prolog(jit_state_t *_jit)
+{
+    jit_int32_t                 offset;
+
+    if (_jitc->function)
+       jit_epilog();
+    assert(jit_regset_cmp_ui(&_jitc->regarg, 0) == 0);
+    jit_regset_set_ui(&_jitc->regsav, 0);
+    offset = _jitc->functions.offset;
+    if (offset >= _jitc->functions.length) {
+       jit_realloc((jit_pointer_t *)&_jitc->functions.ptr,
+                   _jitc->functions.length * sizeof(jit_function_t),
+                   (_jitc->functions.length + 16) * sizeof(jit_function_t));
+       _jitc->functions.length += 16;
+    }
+    _jitc->function = _jitc->functions.ptr + _jitc->functions.offset++;
+    _jitc->function->self.size = params_offset;
+    _jitc->function->self.argi = _jitc->function->self.argf =
+       _jitc->function->self.alen = 0;
+    /* float conversion */
+    _jitc->function->self.aoff = alloca_offset - 8;
+    _jitc->function->self.call = jit_call_default;
+    jit_alloc((jit_pointer_t *)&_jitc->function->regoff,
+             _jitc->reglen * sizeof(jit_int32_t));
+
+    /* _no_link here does not mean the jit_link() call can be removed
+     * by rewriting as:
+     * _jitc->function->prolog = jit_new_node(jit_code_prolog);
+     */
+    _jitc->function->prolog = jit_new_node_no_link(jit_code_prolog);
+    jit_link(_jitc->function->prolog);
+    _jitc->function->prolog->w.w = offset;
+    _jitc->function->epilog = jit_new_node_no_link(jit_code_epilog);
+    /* u:      label value
+     * v:      offset in blocks vector
+     * w:      offset in functions vector
+     */
+    _jitc->function->epilog->w.w = offset;
+
+    jit_regset_new(&_jitc->function->regset);
+}
+
+jit_int32_t
+_jit_allocai(jit_state_t *_jit, jit_int32_t length)
+{
+    assert(_jitc->function);
+    switch (length) {
+       case 0: case 1:                                         break;
+       case 2:         _jitc->function->self.aoff &= -2;       break;
+       case 3: case 4: _jitc->function->self.aoff &= -4;       break;
+       default:        _jitc->function->self.aoff &= -8;       break;
+    }
+    _jitc->function->self.aoff -= length;
+    if (!_jitc->realize) {
+       jit_inc_synth_ww(allocai, _jitc->function->self.aoff, length);
+       jit_dec_synth();
+    }
+    return (_jitc->function->self.aoff);
+}
+
+void
+_jit_allocar(jit_state_t *_jit, jit_int32_t u, jit_int32_t v)
+{
+    jit_int32_t                 r0, r1;
+    assert(_jitc->function);
+    jit_inc_synth_ww(allocar, u, v);
+    if (!_jitc->function->allocar) {
+       _jitc->function->aoffoff = jit_allocai(sizeof(jit_int32_t));
+       _jitc->function->allocar = 1;
+    }
+    r0 = jit_get_reg(jit_class_gpr);
+    r1 = jit_get_reg(jit_class_gpr);
+    jit_ldr(r0, JIT_SP);
+    jit_negr(r1, v);
+    jit_andi(r1, r1, -16);
+    jit_ldxi_i(u, JIT_FP, _jitc->function->aoffoff);
+    jit_addr(u, u, r1);
+    jit_addr(JIT_SP, JIT_SP, r1);
+    jit_stxi_i(_jitc->function->aoffoff, JIT_FP, u);
+    jit_str(JIT_SP, r0);
+    jit_unget_reg(r1);
+    jit_unget_reg(r0);
+    jit_dec_synth();
+}
+
+void
+_jit_ret(jit_state_t *_jit)
+{
+    jit_node_t         *instr;
+    assert(_jitc->function);
+    jit_inc_synth(ret);
+    /* jump to epilog */
+    instr = jit_jmpi();
+    jit_patch_at(instr, _jitc->function->epilog);
+    jit_dec_synth();
+}
+
+void
+_jit_retr(jit_state_t *_jit, jit_int32_t u)
+{
+    jit_inc_synth_w(retr, u);
+    if (JIT_RET != u)
+       jit_movr(JIT_RET, u);
+    jit_live(JIT_RET);
+    jit_ret();
+    jit_dec_synth();
+}
+
+void
+_jit_reti(jit_state_t *_jit, jit_word_t u)
+{
+    jit_inc_synth_w(reti, u);
+    jit_movi(JIT_RET, u);
+    jit_ret();
+    jit_dec_synth();
+}
+
+void
+_jit_retr_f(jit_state_t *_jit, jit_int32_t u)
+{
+    jit_inc_synth_w(retr_f, u);
+    if (JIT_RET != u)
+       jit_movr_f(JIT_FRET, u);
+    else
+       jit_live(JIT_FRET);
+    jit_ret();
+    jit_dec_synth();
+}
+
+void
+_jit_reti_f(jit_state_t *_jit, jit_float32_t u)
+{
+    jit_inc_synth_f(reti_f, u);
+    jit_movi_f(JIT_FRET, u);
+    jit_ret();
+    jit_dec_synth();
+}
+
+void
+_jit_retr_d(jit_state_t *_jit, jit_int32_t u)
+{
+    jit_inc_synth_w(retr_d, u);
+    if (JIT_FRET != u)
+       jit_movr_d(JIT_FRET, u);
+    else
+       jit_live(JIT_FRET);
+    jit_ret();
+    jit_dec_synth();
+}
+
+void
+_jit_reti_d(jit_state_t *_jit, jit_float64_t u)
+{
+    jit_inc_synth_d(reti_d, u);
+    jit_movi_d(JIT_FRET, u);
+    jit_ret();
+    jit_dec_synth();
+}
+
+void
+_jit_epilog(jit_state_t *_jit)
+{
+    assert(_jitc->function);
+    assert(_jitc->function->epilog->next == NULL);
+    jit_link(_jitc->function->epilog);
+    _jitc->function = NULL;
+}
+
+jit_bool_t
+_jit_arg_register_p(jit_state_t *_jit, jit_node_t *u)
+{
+    if (u->code == jit_code_arg)
+       return (jit_arg_reg_p(u->u.w));
+    assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d);
+    return (jit_arg_f_reg_p(u->u.w));
+}
+
+void
+_jit_ellipsis(jit_state_t *_jit)
+{
+    jit_inc_synth(ellipsis);
+    if (_jitc->prepare) {
+       jit_link_prepare();
+       assert(!(_jitc->function->call.call & jit_call_varargs));
+       _jitc->function->call.call |= jit_call_varargs;
+    }
+    else {
+       jit_link_prolog();
+       assert(!(_jitc->function->self.call & jit_call_varargs));
+       _jitc->function->self.call |= jit_call_varargs;
+#if _CALL_SYSV
+       /* Allocate va_list like object in the stack.
+        * If applicable, with enough space to save all argument
+        * registers, and use fixed offsets for them. */
+       _jitc->function->vaoff = jit_allocai(sizeof(jit_va_list_t));
+#endif
+       _jitc->function->vagp = _jitc->function->self.argi;
+       _jitc->function->vafp = _jitc->function->self.argf;
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_va_push(jit_state_t *_jit, jit_int32_t u)
+{
+    jit_inc_synth_w(va_push, u);
+    jit_pushargr(u);
+    jit_dec_synth();
+}
+
+jit_node_t *
+_jit_arg(jit_state_t *_jit)
+{
+    jit_node_t         *node;
+    jit_int32_t                 offset;
+    jit_bool_t          incr = 1;
+    assert(_jitc->function);
+    if (jit_arg_reg_p(_jitc->function->self.argi)) {
+       offset = _jitc->function->self.argi++;
+#if _CALL_SYSV
+       incr = 0;
+#endif
+    }
+    else
+       offset = _jitc->function->self.size;
+    if (incr)
+       _jitc->function->self.size += sizeof(jit_word_t);
+    node = jit_new_node_ww(jit_code_arg, offset,
+                          ++_jitc->function->self.argn);
+    jit_link_prolog();
+    return (node);
+}
+
+jit_node_t *
+_jit_arg_f(jit_state_t *_jit)
+{
+    jit_node_t         *node;
+    jit_int32_t                 offset;
+    jit_bool_t          incr = 1;
+    assert(_jitc->function);
+    if (jit_arg_f_reg_p(_jitc->function->self.argf)) {
+       offset = _jitc->function->self.argf++;
+#if _CALL_SYSV
+       incr = 0;
+#endif
+    }
+    else
+       offset = _jitc->function->self.size + F_DISP;
+#if !_CALL_SYSV
+    if (jit_arg_reg_p(_jitc->function->self.argi)) {
+#  if __WORDSIZE == 32
+       _jitc->function->self.argi += 2;
+#  else
+       _jitc->function->self.argi++;
+#  endif
+    }
+#endif
+    if (incr)
+       _jitc->function->self.size += sizeof(jit_word_t);
+    node = jit_new_node_ww(jit_code_arg_f, offset,
+                          ++_jitc->function->self.argn);
+    jit_link_prolog();
+    return (node);
+}
+
+jit_node_t *
+_jit_arg_d(jit_state_t *_jit)
+{
+    jit_node_t         *node;
+    jit_int32_t                 offset;
+    jit_bool_t          incr = 1;
+    assert(_jitc->function);
+    if (jit_arg_f_reg_p(_jitc->function->self.argf)) {
+       offset = _jitc->function->self.argf++;
+#if _CALL_SYSV
+       incr = 0;
+#endif
+    }
+    else {
+#if _CALL_SYSV
+       if (_jitc->function->self.size & 7)
+           _jitc->function->self.size += 4;
+#endif
+       offset = _jitc->function->self.size;
+    }
+#if !_CALL_SYSV
+    if (jit_arg_reg_p(_jitc->function->self.argi)) {
+#  if __WORDSIZE == 32
+       _jitc->function->self.argi += 2;
+#  else
+       _jitc->function->self.argi++;
+#  endif
+    }
+#endif
+    if (incr)
+       _jitc->function->self.size += sizeof(jit_float64_t);
+    node = jit_new_node_ww(jit_code_arg_d, offset,
+                          ++_jitc->function->self.argn);
+    jit_link_prolog();
+    return (node);
+}
+
+void
+_jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_c, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_extr_c(u, JIT_RA0 - v->u.w);
+    else
+       jit_ldxi_c(u, JIT_FP, v->u.w + C_DISP);
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_uc, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_extr_uc(u, JIT_RA0 - v->u.w);
+    else
+       jit_ldxi_uc(u, JIT_FP, v->u.w + C_DISP);
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_s, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_extr_s(u, JIT_RA0 - v->u.w);
+    else
+       jit_ldxi_s(u, JIT_FP, v->u.w + S_DISP);
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_us, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_extr_us(u, JIT_RA0 - v->u.w);
+    else
+       jit_ldxi_us(u, JIT_FP, v->u.w + S_DISP);
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_i, u, v);
+    if (jit_arg_reg_p(v->u.w)) {
+#if __WORDSIZE == 32
+       jit_movr(u, JIT_RA0 - v->u.w);
+#else
+       jit_extr_i(u, JIT_RA0 - v->u.w);
+#endif
+    }
+    else
+       jit_ldxi_i(u, JIT_FP, v->u.w + I_DISP);
+    jit_dec_synth();
+}
+
+#if __WORDSIZE == 64
+void
+_jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_ui, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_extr_ui(u, JIT_RA0 - v->u.w);
+    else
+       jit_ldxi_ui(u, JIT_FP, v->u.w + I_DISP);
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_l, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_movr(u, JIT_RA0 - v->u.w);
+    else
+       jit_ldxi_l(u, JIT_FP, v->u.w);
+    jit_dec_synth();
+}
+#endif
+
+void
+_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(putargr, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_movr(JIT_RA0 - v->u.w, u);
+    else
+       jit_stxi(v->u.w, JIT_FP, u);
+    jit_dec_synth();
+}
+
+void
+_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v)
+{
+    jit_int32_t                regno;
+    jit_inc_synth_wp(putargi, u, v);
+    assert(v->code == jit_code_arg);
+    if (jit_arg_reg_p(v->u.w))
+       jit_movi(JIT_RA0 - v->u.w, u);
+    else {
+       regno = jit_get_reg(jit_class_gpr);
+       jit_movi(regno, u);
+       jit_stxi(v->u.w, JIT_FP, regno);
+       jit_unget_reg(regno);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg_f);
+    jit_inc_synth_wp(getarg_f, u, v);
+    if (jit_arg_f_reg_p(v->u.w))
+       jit_movr_d(u, JIT_FA0 - v->u.w);
+    else
+       jit_ldxi_f(u, JIT_FP, v->u.w);
+    jit_dec_synth();
+}
+
+void
+_jit_putargr_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg_f);
+    jit_inc_synth_wp(putargr_f, u, v);
+    if (jit_arg_f_reg_p(v->u.w))
+       jit_movr_d(JIT_FA0 - v->u.w, u);
+    else
+       jit_stxi_f(v->u.w, JIT_FP, u);
+    jit_dec_synth();
+}
+
+void
+_jit_putargi_f(jit_state_t *_jit, jit_float32_t u, jit_node_t *v)
+{
+    jit_int32_t                regno;
+    assert(v->code == jit_code_arg_f);
+    jit_inc_synth_fp(putargi_f, u, v);
+    if (jit_arg_f_reg_p(v->u.w))
+       jit_movi_d(JIT_FA0 - v->u.w, u);
+    else {
+       regno = jit_get_reg(jit_class_fpr);
+       jit_movi_d(regno, u);
+       jit_stxi_f(v->u.w, JIT_FP, regno);
+       jit_unget_reg(regno);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg_d);
+    jit_inc_synth_wp(getarg_d, u, v);
+    if (jit_arg_f_reg_p(v->u.w))
+       jit_movr_d(u, JIT_FA0 - v->u.w);
+    else
+       jit_ldxi_d(u, JIT_FP, v->u.w);
+    jit_dec_synth();
+}
+
+void
+_jit_putargr_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg_d);
+    jit_inc_synth_wp(putargr_d, u, v);
+    if (jit_arg_f_reg_p(v->u.w))
+       jit_movr_d(JIT_FA0 - v->u.w, u);
+    else
+       jit_stxi_d(v->u.w, JIT_FP, u);
+    jit_dec_synth();
+}
+
+void
+_jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v)
+{
+    jit_int32_t                regno;
+    assert(v->code == jit_code_arg_d);
+    jit_inc_synth_dp(putargi_d, u, v);
+    if (jit_arg_f_reg_p(v->u.w))
+       jit_movi_d(JIT_FA0 - v->u.w, u);
+    else {
+       regno = jit_get_reg(jit_class_fpr);
+       jit_movi_d(regno, u);
+       jit_stxi_d(v->u.w, JIT_FP, regno);
+       jit_unget_reg(regno);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_pushargr(jit_state_t *_jit, jit_int32_t u)
+{
+    jit_bool_t         incr = 1;
+    assert(_jitc->function);
+    jit_inc_synth_w(pushargr, u);
+    jit_link_prepare();
+    if (jit_arg_reg_p(_jitc->function->call.argi)) {
+       jit_movr(JIT_RA0 - _jitc->function->call.argi, u);
+       ++_jitc->function->call.argi;
+#if _CALL_SYSV
+       incr = 0;
+#endif
+    }
+    else
+       jit_stxi(_jitc->function->call.size + params_offset, JIT_SP, u);
+    if (incr)
+       _jitc->function->call.size += sizeof(jit_word_t);
+    jit_dec_synth();
+}
+
+void
+_jit_pushargi(jit_state_t *_jit, jit_word_t u)
+{
+    jit_int32_t                 regno;
+    jit_bool_t          incr = 1;
+    assert(_jitc->function);
+    jit_inc_synth_w(pushargi, u);
+    jit_link_prepare();
+    if (jit_arg_reg_p(_jitc->function->call.argi)) {
+       jit_movi(JIT_RA0 - _jitc->function->call.argi, u);
+       ++_jitc->function->call.argi;
+#if _CALL_SYSV
+       incr = 0;
+#endif
+    }
+    else {
+       regno = jit_get_reg(jit_class_gpr);
+       jit_movi(regno, u);
+       jit_stxi(_jitc->function->call.size + params_offset, JIT_SP, regno);
+       jit_unget_reg(regno);
+    }
+    if (incr)
+       _jitc->function->call.size += sizeof(jit_word_t);
+    jit_dec_synth();
+}
+
+void
+_jit_pushargr_f(jit_state_t *_jit, jit_int32_t u)
+{
+    jit_bool_t         incr = 1;
+    assert(_jitc->function);
+    jit_inc_synth_w(pushargr_f, u);
+    jit_link_prepare();
+    if (jit_arg_f_reg_p(_jitc->function->call.argf)
+#if !_CALL_SYSV
+       && !(_jitc->function->call.call & jit_call_varargs)
+#endif
+       ) {
+       jit_movr_d(JIT_FA0 - _jitc->function->call.argf, u);
+       ++_jitc->function->call.argf;
+#if !_CALL_SYSV
+       /* in case of excess arguments */
+       if (jit_arg_reg_p(_jitc->function->call.argi)) {
+#  if __WORDSIZE == 32
+           _jitc->function->call.argi += 2;
+           if (!jit_arg_reg_p(_jitc->function->call.argi - 1))
+               --_jitc->function->call.argi;
+#  else
+           _jitc->function->call.argi++;
+#  endif
+       }
+#elif _CALL_SYSV
+       incr = 0;
+#endif
+    }
+#if !_CALL_SYSV
+    else if (jit_arg_reg_p(_jitc->function->call.argi
+#  if __WORDSIZE == 32
+                         + 1
+#  endif
+                          )) {
+       /* use reserved 8 bytes area */
+       jit_stxi_d(alloca_offset - 8, JIT_FP, u);
+       jit_ldxi(JIT_RA0 - _jitc->function->call.argi, JIT_FP,
+                alloca_offset - 8);
+       _jitc->function->call.argi++;
+#  if __WORDSIZE == 32
+       jit_ldxi(JIT_RA0 - _jitc->function->call.argi, JIT_FP,
+                alloca_offset - 4);
+       _jitc->function->call.argi++;
+#  endif
+    }
+#endif
+    else
+       jit_stxi_f(_jitc->function->call.size + params_offset + F_DISP,
+                  JIT_SP, u);
+    if (incr)
+       _jitc->function->call.size += sizeof(jit_word_t);
+    jit_dec_synth();
+}
+
+void
+_jit_pushargi_f(jit_state_t *_jit, jit_float32_t u)
+{
+    jit_bool_t          incr = 1;
+    jit_int32_t                 regno;
+    assert(_jitc->function);
+    jit_inc_synth_f(pushargi_f, u);
+    jit_link_prepare();
+    if (jit_arg_f_reg_p(_jitc->function->call.argf)
+#if !_CALL_SYSV
+       && !(_jitc->function->call.call & jit_call_varargs)
+#endif
+       ) {
+       jit_movi_d(JIT_FA0 - _jitc->function->call.argf, u);
+       ++_jitc->function->call.argf;
+#if !_CALL_SYSV
+           /* in case of excess arguments */
+#  if __WORDSIZE == 32
+       _jitc->function->call.argi += 2;
+       if (!jit_arg_reg_p(_jitc->function->call.argi - 1))
+           --_jitc->function->call.argi;
+#  else
+       _jitc->function->call.argi++;
+#  endif
+#elif _CALL_SYSV
+       incr = 0;
+#endif
+    }
+    else {
+       regno = jit_get_reg(jit_class_fpr);
+       jit_movi_f(regno, u);
+#if !_CALL_SYSV
+       if (jit_arg_reg_p(_jitc->function->call.argi
+#  if __WORDSIZE == 32
+                         + 1
+#  endif
+                         )) {
+           /* use reserved 8 bytes area */
+           jit_stxi_d(alloca_offset - 8, JIT_FP, regno);
+           jit_ldxi(JIT_RA0 - _jitc->function->call.argi, JIT_FP,
+                    alloca_offset - 8);
+           _jitc->function->call.argi++;
+#  if __WORDSIZE == 32
+           jit_ldxi(JIT_RA0 - _jitc->function->call.argi, JIT_FP,
+                    alloca_offset - 4);
+           _jitc->function->call.argi++;
+#  endif
+       }
+       else
+#endif
+           jit_stxi_f(_jitc->function->call.size + params_offset + F_DISP,
+                      JIT_SP, regno);
+       jit_unget_reg(regno);
+    }
+    if (incr)
+       _jitc->function->call.size += sizeof(jit_word_t);
+    jit_dec_synth();
+}
+
+void
+_jit_pushargr_d(jit_state_t *_jit, jit_int32_t u)
+{
+    jit_bool_t         incr = 1;
+    assert(_jitc->function);
+    jit_inc_synth_w(pushargr_d, u);
+    jit_link_prepare();
+    if (jit_arg_f_reg_p(_jitc->function->call.argf)
+#if !_CALL_SYSV
+       && !(_jitc->function->call.call & jit_call_varargs)
+#endif
+       ) {
+       jit_movr_d(JIT_FA0 - _jitc->function->call.argf, u);
+       ++_jitc->function->call.argf;
+#if !_CALL_SYSV
+           /* in case of excess arguments */
+#  if __WORDSIZE == 32
+       _jitc->function->call.argi += 2;
+       if (!jit_arg_reg_p(_jitc->function->call.argi - 1))
+           --_jitc->function->call.argi;
+#  else
+       _jitc->function->call.argi++;
+#  endif
+#else /* _CALL_SYSV */
+       incr = 0;
+#endif
+    }
+#if !_CALL_SYSV
+    else if (jit_arg_reg_p(_jitc->function->call.argi
+#  if __WORDSIZE == 32
+                         + 1
+#  endif
+                          )) {
+       /* use reserved 8 bytes area */
+       jit_stxi_d(alloca_offset - 8, JIT_FP, u);
+       jit_ldxi(JIT_RA0 - _jitc->function->call.argi, JIT_FP,
+                alloca_offset - 8);
+       _jitc->function->call.argi++;
+#  if __WORDSIZE == 32
+       jit_ldxi(JIT_RA0 - _jitc->function->call.argi, JIT_FP,
+                alloca_offset - 4);
+       _jitc->function->call.argi++;
+#  endif
+    }
+    else
+#endif /* !_CALL_SYSV */
+    {
+#if _CALL_SYSV
+       if (_jitc->function->call.size & 7)
+           _jitc->function->call.size += 4;
+#endif
+       jit_stxi_d(_jitc->function->call.size + params_offset, JIT_SP, u);
+#if !_CALL_SYSV && __WORDSIZE == 32
+       if (jit_arg_reg_p(_jitc->function->call.argi)) {
+           jit_ldxi(JIT_RA0 - _jitc->function->call.argi, JIT_SP,
+                    _jitc->function->call.size + params_offset);
+           _jitc->function->call.argi++;
+       }
+#endif
+    }
+    if (incr)
+       _jitc->function->call.size += sizeof(jit_float64_t);
+    jit_dec_synth();
+}
+
+void
+_jit_pushargi_d(jit_state_t *_jit, jit_float64_t u)
+{
+    jit_int32_t                 regno;
+    jit_bool_t          incr = 1;
+    assert(_jitc->function);
+    jit_inc_synth_d(pushargi_d, u);
+    jit_link_prepare();
+    if (jit_arg_f_reg_p(_jitc->function->call.argf)
+#if !_CALL_SYSV
+       && !(_jitc->function->call.call & jit_call_varargs)
+#endif
+       ) {
+       jit_movi_d(JIT_FA0 - _jitc->function->call.argf, u);
+       ++_jitc->function->call.argf;
+#if !_CALL_SYSV
+       /* in case of excess arguments */
+       if (jit_arg_reg_p(_jitc->function->call.argi)) {
+#  if __WORDSIZE == 32
+           _jitc->function->call.argi += 2;
+           if (!jit_arg_reg_p(_jitc->function->call.argi - 1))
+               --_jitc->function->call.argi;
+#  else
+           _jitc->function->call.argi++;
+#  endif
+       }
+#else /* _CALL_SYSV */
+           incr = 0;
+#endif
+    }
+    else {
+       regno = jit_get_reg(jit_class_fpr);
+       jit_movi_d(regno, u);
+#if !_CALL_SYSV
+       if (jit_arg_reg_p(_jitc->function->call.argi
+#  if __WORDSIZE == 32
+                         + 1
+#  endif
+                         )) {
+           /* use reserved 8 bytes area */
+           jit_stxi_d(alloca_offset - 8, JIT_FP, regno);
+           jit_ldxi(JIT_RA0 - _jitc->function->call.argi, JIT_FP,
+                    alloca_offset - 8);
+           _jitc->function->call.argi++;
+#  if __WORDSIZE == 32
+           jit_ldxi(JIT_RA0 - _jitc->function->call.argi, JIT_FP,
+                    alloca_offset - 4);
+           _jitc->function->call.argi++;
+#  endif
+       }
+       else
+#endif /* !_CALL_SYSV */
+       {
+#if _CALL_SYSV
+           if (_jitc->function->call.size & 7)
+               _jitc->function->call.size += 4;
+#endif
+           jit_stxi_d(_jitc->function->call.size + params_offset,
+                      JIT_SP, regno);
+#if !_CALL_SYSV && __WORDSIZE == 32
+           if (jit_arg_reg_p(_jitc->function->call.argi)) {
+               jit_ldxi(JIT_RA0 - _jitc->function->call.argi, JIT_SP,
+                        _jitc->function->call.size + params_offset);
+               _jitc->function->call.argi++;
+           }
+#endif
+       }
+       jit_unget_reg(regno);
+    }
+    if (incr)
+       _jitc->function->call.size += sizeof(jit_float64_t);
+    jit_dec_synth();
+}
+
+jit_bool_t
+_jit_regarg_p(jit_state_t *_jit, jit_node_t *node, jit_int32_t regno)
+{
+    jit_int32_t                spec;
+    spec = jit_class(_rvs[regno].spec);
+    if (spec & jit_class_arg) {
+       if (spec & jit_class_gpr) {
+           regno = JIT_RA0 - regno;
+           if (regno >= 0 && regno < node->v.w)
+               return (1);
+       }
+       else if (spec & jit_class_fpr) {
+           regno = JIT_FA0 - regno;
+           if (regno >= 0 && regno < node->w.w)
+               return (1);
+       }
+    }
+    return (0);
+}
+
+void
+_jit_finishr(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_node_t         *call;
+    assert(_jitc->function);
+    jit_inc_synth_w(finishr, r0);
+    if (_jitc->function->self.alen < _jitc->function->call.size)
+       _jitc->function->self.alen = _jitc->function->call.size;
+    call = jit_callr(r0);
+    call->v.w = _jitc->function->call.argi;
+    call->w.w = _jitc->function->call.argf;
+#if _CALL_SYSV
+    /* If passing float arguments in registers */
+    if ((_jitc->function->call.call & jit_call_varargs) && call->w.w)
+       call->flag |= jit_flag_varargs;
+#endif
+    _jitc->function->call.argi = _jitc->function->call.argf = 0;
+    _jitc->prepare = 0;
+    jit_dec_synth();
+}
+
+jit_node_t *
+_jit_finishi(jit_state_t *_jit, jit_pointer_t i0)
+{
+    jit_node_t         *node;
+    assert(_jitc->function);
+    jit_inc_synth_w(finishi, (jit_word_t)i0);
+    if (_jitc->function->self.alen < _jitc->function->call.size)
+       _jitc->function->self.alen = _jitc->function->call.size;
+    node = jit_calli(i0);
+    node->v.w = _jitc->function->call.argi;
+    node->w.w = _jitc->function->call.argf;
+#if _CALL_SYSV
+    if ((_jitc->function->call.call & jit_call_varargs) && node->w.w)
+       node->flag |= jit_flag_varargs;
+#endif
+    _jitc->function->call.argi = _jitc->function->call.argf = 0;
+    _jitc->prepare = 0;
+    jit_dec_synth();
+    return (node);
+}
+
+void
+_jit_retval_c(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth(retval_c);
+    jit_extr_c(r0, JIT_RET);
+    jit_dec_synth();
+}
+
+void
+_jit_retval_uc(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth(retval_uc);
+    jit_extr_uc(r0, JIT_RET);
+    jit_dec_synth();
+}
+
+void
+_jit_retval_s(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth(retval_s);
+    jit_extr_s(r0, JIT_RET);
+    jit_dec_synth();
+}
+
+void
+_jit_retval_us(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth(retval_us);
+    jit_extr_us(r0, JIT_RET);
+    jit_dec_synth();
+}
+
+void
+_jit_retval_i(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth(retval_i);
+#if __WORDSIZE == 32
+    if (r0 != JIT_RET)
+       jit_movr(r0, JIT_RET);
+#else
+    jit_extr_i(r0, JIT_RET);
+#endif
+    jit_dec_synth();
+}
+
+#if __WORDSIZE == 64
+void
+_jit_retval_ui(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth(retval_ui);
+    jit_extr_ui(r0, JIT_RET);
+    jit_dec_synth();
+}
+
+void
+_jit_retval_l(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth(retval_l);
+    if (r0 != JIT_RET)
+       jit_movr(r0, JIT_RET);
+    jit_dec_synth();
+}
+#endif
+
+void
+_jit_retval_f(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth(retval_f);
+    jit_retval_d(r0);
+    jit_dec_synth();
+}
+
+void
+_jit_retval_d(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth(retval_d);
+    if (r0 != JIT_FRET)
+       jit_movr_d(r0, JIT_FRET);
+    jit_dec_synth();
+}
+
+jit_pointer_t
+_emit_code(jit_state_t *_jit)
+{
+    jit_node_t         *node;
+    jit_node_t         *temp;
+    jit_word_t          word;
+    jit_int32_t                 value;
+    jit_int32_t                 offset;
+    struct {
+       jit_node_t      *node;
+       jit_word_t       word;
+#if DEVEL_DISASSEMBLER
+       jit_word_t       prevw;
+#endif
+       jit_word_t       patch_offset;
+#if _CALL_AIXDESC
+       jit_word_t       prolog_offset;
+#endif
+    } undo;
+#if DEVEL_DISASSEMBLER
+    jit_word_t          prevw;
+#endif
+
+    _jitc->function = NULL;
+
+    jit_reglive_setup();
+
+    undo.word = 0;
+    undo.node = NULL;
+    undo.patch_offset = 0;
+
+#if DEVEL_DISASSEMBLER
+    prevw = _jit->pc.w;
+#endif
+#if _CALL_AIXDESC
+    undo.prolog_offset = 0;
+    for (node = _jitc->head; node; node = node->next)
+       if (node->code != jit_code_label &&
+           node->code != jit_code_note &&
+           node->code != jit_code_name)
+           break;
+    if (node && (node->code != jit_code_prolog ||
+                !(_jitc->functions.ptr + node->w.w)->assume_frame)) {
+       /* code may start with a jump so add an initial function descriptor */
+       word = _jit->pc.w + sizeof(void*) * 3;
+       iw(word);                       /* addr */
+       iw(0);                          /* toc */
+       iw(0);                          /* env */
+    }
+#endif
+
+#define case_rr(name, type)                                            \
+           case jit_code_##name##r##type:                              \
+               name##r##type(rn(node->u.w), rn(node->v.w));            \
+               break
+#define case_rw(name, type)                                            \
+           case jit_code_##name##i##type:                              \
+               name##i##type(rn(node->u.w), node->v.w);                \
+               break
+#define case_wr(name, type)                                            \
+           case jit_code_##name##i##type:                              \
+               name##i##type(node->u.w, rn(node->v.w));                \
+               break
+#define case_rrr(name, type)                                           \
+           case jit_code_##name##r##type:                              \
+               name##r##type(rn(node->u.w),                            \
+                             rn(node->v.w), rn(node->w.w));            \
+               break
+#define case_rrrr(name, type)                                          \
+           case jit_code_##name##r##type:                              \
+               name##r##type(rn(node->u.q.l), rn(node->u.q.h),         \
+                             rn(node->v.w), rn(node->w.w));            \
+               break
+#define case_rrw(name, type)                                           \
+           case jit_code_##name##i##type:                              \
+               name##i##type(rn(node->u.w), rn(node->v.w), node->w.w); \
+               break
+#define case_rrrw(name, type)                                          \
+           case jit_code_##name##i##type:                              \
+               name##i##type(rn(node->u.q.l), rn(node->u.q.h),         \
+                             rn(node->v.w), node->w.w);                \
+               break
+#define case_rrf(name, type, size)                                     \
+           case jit_code_##name##i##type:                              \
+               assert(node->flag & jit_flag_data);                     \
+               name##i##type(rn(node->u.w), rn(node->v.w),             \
+                             (jit_float##size##_t *)node->w.n->u.w);   \
+               break
+#define case_wrr(name, type)                                           \
+           case jit_code_##name##i##type:                              \
+               name##i##type(node->u.w, rn(node->v.w), rn(node->w.w)); \
+               break
+#define case_brr(name, type)                                           \
+           case jit_code_##name##r##type:                              \
+               temp = node->u.n;                                       \
+               assert(temp->code == jit_code_label ||                  \
+                      temp->code == jit_code_epilog);                  \
+               if (temp->flag & jit_flag_patch)                        \
+                   name##r##type(temp->u.w, rn(node->v.w),             \
+                                 rn(node->w.w));                       \
+               else {                                                  \
+                   word = name##r##type(_jit->pc.w,                    \
+                                        rn(node->v.w), rn(node->w.w)); \
+                   patch(word, node);                                  \
+               }                                                       \
+               break
+#define case_brw(name, type)                                           \
+           case jit_code_##name##i##type:                              \
+               temp = node->u.n;                                       \
+               assert(temp->code == jit_code_label ||                  \
+                      temp->code == jit_code_epilog);                  \
+               if (temp->flag & jit_flag_patch)                        \
+                   name##i##type(temp->u.w,                            \
+                                 rn(node->v.w), node->w.w);            \
+               else {                                                  \
+                   word = name##i##type(_jit->pc.w,                    \
+                                        rn(node->v.w), node->w.w);     \
+                   patch(word, node);                                  \
+               }                                                       \
+               break
+#define case_brf(name, type, size)                                     \
+           case jit_code_##name##i##type:                              \
+               temp = node->u.n;                                       \
+               assert(temp->code == jit_code_label ||                  \
+                      temp->code == jit_code_epilog);                  \
+               if (temp->flag & jit_flag_patch)                        \
+                   name##i##type(temp->u.w, rn(node->v.w),             \
+                               (jit_float##size##_t *)node->w.n->u.w); \
+               else {                                                  \
+                   word = name##i##type(_jit->pc.w, rn(node->v.w),     \
+                               (jit_float##size##_t *)node->w.n->u.w); \
+                   patch(word, node);                                  \
+               }                                                       \
+               break
+    for (node = _jitc->head; node; node = node->next) {
+       if (_jit->pc.uc >= _jitc->code.end)
+           return (NULL);
+
+#if DEVEL_DISASSEMBLER
+       node->offset = (jit_uword_t)_jit->pc.w - (jit_uword_t)prevw;
+       prevw = _jit->pc.w;
+#endif
+       value = jit_classify(node->code);
+       jit_regarg_set(node, value);
+       switch (node->code) {
+           case jit_code_align:
+               assert(!(node->u.w & (node->u.w - 1)) &&
+                      node->u.w <= sizeof(jit_word_t));
+               if (node->u.w == sizeof(jit_word_t) &&
+                   (word = _jit->pc.w & (sizeof(jit_word_t) - 1)))
+                   nop(sizeof(jit_word_t) - word);
+               break;
+           case jit_code_note:         case jit_code_name:
+               node->u.w = _jit->pc.w;
+               break;
+           case jit_code_label:
+               /* remember label is defined */
+               node->flag |= jit_flag_patch;
+               node->u.w = _jit->pc.w;
+               break;
+               case_rrr(add,);
+               case_rrw(add,);
+               case_rrr(addc,);
+               case_rrw(addc,);
+               case_rrr(addx,);
+               case_rrw(addx,);
+               case_rrr(sub,);
+               case_rrw(sub,);
+               case_rrr(subc,);
+               case_rrw(subc,);
+               case_rrr(subx,);
+               case_rrw(subx,);
+               case_rrw(rsb,);
+               case_rrr(mul,);
+               case_rrw(mul,);
+               case_rrrr(qmul,);
+               case_rrrw(qmul,);
+               case_rrrr(qmul, _u);
+               case_rrrw(qmul, _u);
+               case_rrr(div,);
+               case_rrw(div,);
+               case_rrr(div, _u);
+               case_rrw(div, _u);
+               case_rrrr(qdiv,);
+               case_rrrw(qdiv,);
+               case_rrrr(qdiv, _u);
+               case_rrrw(qdiv, _u);
+               case_rrr(rem,);
+               case_rrw(rem,);
+               case_rrr(rem, _u);
+               case_rrw(rem, _u);
+               case_rrr(and,);
+               case_rrw(and,);
+               case_rrr(or,);
+               case_rrw(or,);
+               case_rrr(xor,);
+               case_rrw(xor,);
+               case_rrr(lsh,);
+               case_rrw(lsh,);
+               case_rrr(rsh,);
+               case_rrw(rsh,);
+               case_rrr(rsh, _u);
+               case_rrw(rsh, _u);
+               case_rr(ext, _c);
+               case_rr(ext, _uc);
+               case_rr(ext, _s);
+               case_rr(ext, _us);
+#  if __WORDSIZE == 64
+               case_rr(ext, _i);
+               case_rr(ext, _ui);
+#  endif
+               case_rr(hton, _us);
+               case_rr(hton, _ui);
+#  if __WORDSIZE == 64
+               case_rr(hton, _ul);
+#  endif
+               case_rr(neg,);
+               case_rr(com,);
+               case_rr(mov,);
+           case jit_code_movi:
+               if (node->flag & jit_flag_node) {
+                   temp = node->v.n;
+                   if (temp->code == jit_code_data ||
+                       (temp->code == jit_code_label &&
+                        (temp->flag & jit_flag_patch)))
+                       movi(rn(node->u.w), temp->u.w);
+                   else {
+                       assert(temp->code == jit_code_label ||
+                              temp->code == jit_code_epilog);
+                       word = movi_p(rn(node->u.w), node->v.w);
+                       patch(word, node);
+                   }
+               }
+               else
+                   movi(rn(node->u.w), node->v.w);
+               break;
+               case_rr(trunc, _f_i);
+               case_rr(trunc, _d_i);
+#  if __WORDSIZE == 64
+               case_rr(trunc, _f_l);
+               case_rr(trunc, _d_l);
+#  endif
+               case_rrr(lt,);
+               case_rrw(lt,);
+               case_rrr(lt, _u);
+               case_rrw(lt, _u);
+               case_rrr(le,);
+               case_rrw(le,);
+               case_rrr(le, _u);
+               case_rrw(le, _u);
+               case_rrr(eq,);
+               case_rrw(eq,);
+               case_rrr(ge,);
+               case_rrw(ge,);
+               case_rrr(ge, _u);
+               case_rrw(ge, _u);
+               case_rrr(gt,);
+               case_rrw(gt,);
+               case_rrr(gt, _u);
+               case_rrw(gt, _u);
+               case_rrr(ne,);
+               case_rrw(ne,);
+               case_rr(ld, _c);
+               case_rw(ld, _c);
+               case_brr(blt,);
+               case_brw(blt,);
+               case_brr(blt, _u);
+               case_brw(blt, _u);
+               case_brr(ble,);
+               case_brw(ble,);
+               case_brr(ble, _u);
+               case_brw(ble, _u);
+               case_brr(beq,);
+               case_brw(beq,);
+               case_brr(bge,);
+               case_brw(bge,);
+               case_brr(bge, _u);
+               case_brw(bge, _u);
+               case_brr(bgt,);
+               case_brw(bgt,);
+               case_brr(bgt, _u);
+               case_brw(bgt, _u);
+               case_brr(bne,);
+               case_brw(bne,);
+               case_brr(bms,);
+               case_brw(bms,);
+               case_brr(bmc,);
+               case_brw(bmc,);
+               case_brr(boadd,);
+               case_brw(boadd,);
+               case_brr(boadd, _u);
+               case_brw(boadd, _u);
+               case_brr(bxadd,);
+               case_brw(bxadd,);
+               case_brr(bxadd, _u);
+               case_brw(bxadd, _u);
+               case_brr(bosub,);
+               case_brw(bosub,);
+               case_brr(bosub, _u);
+               case_brw(bosub, _u);
+               case_brr(bxsub,);
+               case_brw(bxsub,);
+               case_brr(bxsub, _u);
+               case_brw(bxsub, _u);
+               case_rrr(ldx, _c);
+               case_rrw(ldx, _c);
+               case_rr(ld, _uc);
+               case_rw(ld, _uc);
+               case_rrr(ldx, _uc);
+               case_rrw(ldx, _uc);
+               case_rr(ld, _s);
+               case_rw(ld, _s);
+               case_rrr(ldx, _s);
+               case_rrw(ldx, _s);
+               case_rr(ld, _us);
+               case_rw(ld, _us);
+               case_rrr(ldx, _us);
+               case_rrw(ldx, _us);
+               case_rr(ld, _i);
+               case_rw(ld, _i);
+               case_rrr(ldx, _i);
+               case_rrw(ldx, _i);
+#if __WORDSIZE == 64
+               case_rr(ld, _ui);
+               case_rw(ld, _ui);
+               case_rrr(ldx, _ui);
+               case_rrw(ldx, _ui);
+               case_rr(ld, _l);
+               case_rw(ld, _l);
+               case_rrr(ldx, _l);
+               case_rrw(ldx, _l);
+#endif
+               case_rr(st, _c);
+               case_wr(st, _c);
+               case_rrr(stx, _c);
+               case_wrr(stx, _c);
+               case_rr(st, _s);
+               case_wr(st, _s);
+               case_rrr(stx, _s);
+               case_wrr(stx, _s);
+               case_rr(st, _i);
+               case_wr(st, _i);
+               case_rrr(stx, _i);
+               case_wrr(stx, _i);
+#if __WORDSIZE == 64
+               case_rr(st, _l);
+               case_wr(st, _l);
+               case_rrr(stx, _l);
+               case_wrr(stx, _l);
+#endif
+               case_rr(mov, _f);
+           case jit_code_movi_f:
+               assert(node->flag & jit_flag_data);
+               movi_f(rn(node->u.w), (jit_float32_t *)node->v.n->u.w);
+               break;
+               case_rr(ext, _f);
+               case_rr(ext, _d_f);
+               case_rr(abs, _f);
+               case_rr(neg, _f);
+               case_rr(sqrt, _f);
+               case_rrr(add, _f);
+               case_rrf(add, _f, 32);
+               case_rrr(sub, _f);
+               case_rrf(sub, _f, 32);
+               case_rrf(rsb, _f, 32);
+               case_rrr(mul, _f);
+               case_rrf(mul, _f, 32);
+               case_rrr(div, _f);
+               case_rrf(div, _f, 32);
+               case_rrr(lt, _f);
+               case_rrf(lt, _f, 32);
+               case_rrr(le, _f);
+               case_rrf(le, _f, 32);
+               case_rrr(eq, _f);
+               case_rrf(eq, _f, 32);
+               case_rrr(ge, _f);
+               case_rrf(ge, _f, 32);
+               case_rrr(gt, _f);
+               case_rrf(gt, _f, 32);
+               case_rrr(ne, _f);
+               case_rrf(ne, _f, 32);
+               case_rrr(unlt, _f);
+               case_rrf(unlt, _f, 32);
+               case_rrr(unle, _f);
+               case_rrf(unle, _f, 32);
+               case_rrr(uneq, _f);
+               case_rrf(uneq, _f, 32);
+               case_rrr(unge, _f);
+               case_rrf(unge, _f, 32);
+               case_rrr(ungt, _f);
+               case_rrf(ungt, _f, 32);
+               case_rrr(ltgt, _f);
+               case_rrf(ltgt, _f, 32);
+               case_rrr(ord, _f);
+               case_rrf(ord, _f, 32);
+               case_rrr(unord, _f);
+               case_rrf(unord, _f, 32);
+               case_brr(blt, _f);
+               case_brf(blt, _f, 32);
+               case_brr(ble, _f);
+               case_brf(ble, _f, 32);
+               case_brr(beq, _f);
+               case_brf(beq, _f, 32);
+               case_brr(bge, _f);
+               case_brf(bge, _f, 32);
+               case_brr(bgt, _f);
+               case_brf(bgt, _f, 32);
+               case_brr(bne, _f);
+               case_brf(bne, _f, 32);
+               case_brr(bunlt, _f);
+               case_brf(bunlt, _f, 32);
+               case_brr(bunle, _f);
+               case_brf(bunle, _f, 32);
+               case_brr(buneq, _f);
+               case_brf(buneq, _f, 32);
+               case_brr(bunge, _f);
+               case_brf(bunge, _f, 32);
+               case_brr(bungt, _f);
+               case_brf(bungt, _f, 32);
+               case_brr(bltgt, _f);
+               case_brf(bltgt, _f, 32);
+               case_brr(bord, _f);
+               case_brf(bord, _f, 32);
+               case_brr(bunord, _f);
+               case_brf(bunord, _f, 32);
+               case_rr(ld, _f);
+               case_rw(ld, _f);
+               case_rrr(ldx, _f);
+               case_rrw(ldx, _f);
+               case_rr(st, _f);
+               case_wr(st, _f);
+               case_rrr(stx, _f);
+               case_wrr(stx, _f);
+               case_rr(mov, _d);
+           case jit_code_movi_d:
+               assert(node->flag & jit_flag_data);
+               movi_d(rn(node->u.w), (jit_float64_t *)node->v.n->u.w);
+               break;
+               case_rr(ext, _d);
+               case_rr(ext, _f_d);
+               case_rr(abs, _d);
+               case_rr(neg, _d);
+               case_rr(sqrt, _d);
+               case_rrr(add, _d);
+               case_rrf(add, _d, 64);
+               case_rrr(sub, _d);
+               case_rrf(sub, _d, 64);
+               case_rrf(rsb, _d, 64);
+               case_rrr(mul, _d);
+               case_rrf(mul, _d, 64);
+               case_rrr(div, _d);
+               case_rrf(div, _d, 64);
+               case_rrr(lt, _d);
+               case_rrf(lt, _d, 64);
+               case_rrr(le, _d);
+               case_rrf(le, _d, 64);
+               case_rrr(eq, _d);
+               case_rrf(eq, _d, 64);
+               case_rrr(ge, _d);
+               case_rrf(ge, _d, 64);
+               case_rrr(gt, _d);
+               case_rrf(gt, _d, 64);
+               case_rrr(ne, _d);
+               case_rrf(ne, _d, 64);
+               case_rrr(unlt, _d);
+               case_rrf(unlt, _d, 64);
+               case_rrr(unle, _d);
+               case_rrf(unle, _d, 64);
+               case_rrr(uneq, _d);
+               case_rrf(uneq, _d, 64);
+               case_rrr(unge, _d);
+               case_rrf(unge, _d, 64);
+               case_rrr(ungt, _d);
+               case_rrf(ungt, _d, 64);
+               case_rrr(ltgt, _d);
+               case_rrf(ltgt, _d, 64);
+               case_rrr(ord, _d);
+               case_rrf(ord, _d, 64);
+               case_rrr(unord, _d);
+               case_rrf(unord, _d, 64);
+               case_brr(blt, _d);
+               case_brf(blt, _d, 64);
+               case_brr(ble, _d);
+               case_brf(ble, _d, 64);
+               case_brr(beq, _d);
+               case_brf(beq, _d, 64);
+               case_brr(bge, _d);
+               case_brf(bge, _d, 64);
+               case_brr(bgt, _d);
+               case_brf(bgt, _d, 64);
+               case_brr(bne, _d);
+               case_brf(bne, _d, 64);
+               case_brr(bunlt, _d);
+               case_brf(bunlt, _d, 64);
+               case_brr(bunle, _d);
+               case_brf(bunle, _d, 64);
+               case_brr(buneq, _d);
+               case_brf(buneq, _d, 64);
+               case_brr(bunge, _d);
+               case_brf(bunge, _d, 64);
+               case_brr(bungt, _d);
+               case_brf(bungt, _d, 64);
+               case_brr(bltgt, _d);
+               case_brf(bltgt, _d, 64);
+               case_brr(bord, _d);
+               case_brf(bord, _d, 64);
+               case_brr(bunord, _d);
+               case_brf(bunord, _d, 64);
+               case_rr(ld, _d);
+               case_rw(ld, _d);
+               case_rrr(ldx, _d);
+               case_rrw(ldx, _d);
+               case_rr(st, _d);
+               case_wr(st, _d);
+               case_rrr(stx, _d);
+               case_wrr(stx, _d);
+           case jit_code_jmpr:
+               jmpr(rn(node->u.w));
+               break;
+           case jit_code_jmpi:
+               if (node->flag & jit_flag_node) {
+#if _CALL_AIXDESC
+                   if (_jit->pc.uc == _jit->code.ptr + sizeof(void*) * 3)
+                       _jitc->jump = 1;
+#endif
+                   temp = node->u.n;
+                   assert(temp->code == jit_code_label ||
+                          temp->code == jit_code_epilog);
+                   if (temp->flag & jit_flag_patch)
+                       jmpi(temp->u.w);
+                   else {
+                       word = jmpi(_jit->pc.w);
+                       patch(word, node);
+                   }
+               }
+               else
+                   (void)jmpi_p(node->u.w);
+               break;
+           case jit_code_callr:
+               callr(rn(node->u.w)
+#if _CALL_SYSV
+                     , !!(node->flag & jit_flag_varargs)
+#endif
+                     );
+               break;
+           case jit_code_calli:
+               if (node->flag & jit_flag_node) {
+                   temp = node->u.n;
+                   assert(temp->code == jit_code_label ||
+                          temp->code == jit_code_epilog);
+                   word = calli_p(temp->u.w
+#if _CALL_SYSV
+                                  , !!(node->flag & jit_flag_varargs)
+#endif
+                                  );
+                   if (!(temp->flag & jit_flag_patch))
+                       patch(word, node);
+               }
+               else
+                   calli(node->u.w
+#if _CALL_SYSV
+                         , !!(node->flag & jit_flag_varargs)
+#endif
+                         );
+               break;
+           case jit_code_prolog:
+               _jitc->function = _jitc->functions.ptr + node->w.w;
+               undo.node = node;
+               undo.word = _jit->pc.w;
+#if DEVEL_DISASSEMBLER
+               undo.prevw = prevw;
+#endif
+               undo.patch_offset = _jitc->patches.offset;
+#if _CALL_AIXDESC
+               undo.prolog_offset = _jitc->prolog.offset;
+#endif
+           restart_function:
+               _jitc->again = 0;
+#if _CALL_AIXDESC
+               if (_jitc->jump && !_jitc->function->assume_frame) {
+                   /* remember prolog to hide offset adjustment for a jump
+                    * to the start of a function, what is expected to be
+                    * a common practice as first jit instruction */
+                   if (_jitc->prolog.offset >= _jitc->prolog.length) {
+                       _jitc->prolog.length += 16;
+                       jit_realloc((jit_pointer_t *)&_jitc->prolog.ptr,
+                                   (_jitc->prolog.length - 16) *
+                                   sizeof(jit_word_t),
+                                   _jitc->prolog.length * sizeof(jit_word_t));
+                   }
+                   _jitc->prolog.ptr[_jitc->prolog.offset++] = _jit->pc.w;
+                   /* function descriptor */
+                   word = _jit->pc.w + sizeof(void*) * 3;
+                   iw(word);                           /* addr */
+                   iw(0);                              /* toc */
+                   iw(0);                              /* env */
+               }
+#endif
+               prolog(node);
+               break;
+           case jit_code_epilog:
+               assert(_jitc->function == _jitc->functions.ptr + node->w.w);
+               if (_jitc->again) {
+                   for (temp = undo.node->next;
+                        temp != node; temp = temp->next) {
+                       if (temp->code == jit_code_label ||
+                           temp->code == jit_code_epilog)
+                           temp->flag &= ~jit_flag_patch;
+                   }
+                   temp->flag &= ~jit_flag_patch;
+                   node = undo.node;
+                   _jit->pc.w = undo.word;
+#if DEVEL_DISASSEMBLER
+                   prevw = undo.prevw;
+#endif
+                   _jitc->patches.offset = undo.patch_offset;
+#if _CALL_AIXDESC
+                   _jitc->prolog.offset = undo.prolog_offset;
+#endif
+                   goto restart_function;
+               }
+               /* remember label is defined */
+               node->flag |= jit_flag_patch;
+               node->u.w = _jit->pc.w;
+               epilog(node);
+               _jitc->function = NULL;
+               break;
+           case jit_code_va_start:
+               vastart(rn(node->u.w));
+               break;
+           case jit_code_va_arg:
+               vaarg(rn(node->u.w), rn(node->v.w));
+               break;
+           case jit_code_va_arg_d:
+               vaarg_d(rn(node->u.w), rn(node->v.w));
+               break;
+           case jit_code_live:
+           case jit_code_arg:                  case jit_code_ellipsis:
+           case jit_code_va_push:
+           case jit_code_allocai:              case jit_code_allocar:
+           case jit_code_arg_f:                case jit_code_arg_d:
+           case jit_code_va_end:
+           case jit_code_ret:
+           case jit_code_retr:                 case jit_code_reti:
+           case jit_code_retr_f:               case jit_code_reti_f:
+           case jit_code_retr_d:               case jit_code_reti_d:
+           case jit_code_getarg_c:             case jit_code_getarg_uc:
+           case jit_code_getarg_s:             case jit_code_getarg_us:
+           case jit_code_getarg_i:
+#if __WORDSIZE == 64
+           case jit_code_getarg_ui:            case jit_code_getarg_l:
+#endif
+           case jit_code_getarg_f:             case jit_code_getarg_d:
+           case jit_code_putargr:              case jit_code_putargi:
+           case jit_code_putargr_f:            case jit_code_putargi_f:
+           case jit_code_putargr_d:            case jit_code_putargi_d:
+           case jit_code_pushargr:             case jit_code_pushargi:
+           case jit_code_pushargr_f:           case jit_code_pushargi_f:
+           case jit_code_pushargr_d:           case jit_code_pushargi_d:
+           case jit_code_retval_c:             case jit_code_retval_uc:
+           case jit_code_retval_s:             case jit_code_retval_us:
+           case jit_code_retval_i:
+#if __WORDSIZE == 64
+           case jit_code_retval_ui:            case jit_code_retval_l:
+#endif
+           case jit_code_retval_f:             case jit_code_retval_d:
+           case jit_code_prepare:
+           case jit_code_finishr:              case jit_code_finishi:
+               break;
+           default:
+               abort();
+       }
+       jit_regarg_clr(node, value);
+       assert(_jitc->regarg == 0 && _jitc->synth == 0);
+       /* update register live state */
+       jit_reglive(node);
+    }
+#undef case_brf
+#undef case_brw
+#undef case_brr
+#undef case_wrr
+#undef case_rrf
+#undef case_rrw
+#undef case_rrr
+#undef case_wr
+#undef case_rw
+#undef case_rr
+
+    for (offset = 0; offset < _jitc->patches.offset; offset++) {
+       node = _jitc->patches.ptr[offset].node;
+       word = node->code == jit_code_movi ? node->v.n->u.w : node->u.n->u.w;
+       patch_at(_jitc->patches.ptr[offset].inst, word);
+    }
+
+    jit_flush(_jit->code.ptr, _jit->pc.uc);
+
+    return (_jit->code.ptr);
+}
+
+#define CODE                           1
+#  include "jit_ppc-cpu.c"
+#  include "jit_ppc-fpu.c"
+#undef CODE
+
+void
+jit_flush(void *fptr, void *tptr)
+{
+#if defined(__GNUC__)
+    jit_word_t         f, t, s;
+
+    s = sysconf(_SC_PAGE_SIZE);
+    f = (jit_word_t)fptr & -s;
+    t = (((jit_word_t)tptr) + s - 1) & -s;
+    __clear_cache((void *)f, (void *)t);
+#endif
+}
+
+void
+_emit_ldxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+#if __WORDSIZE == 32
+    ldxi_i(rn(r0), rn(r1), i0);
+#else
+    ldxi_l(rn(r0), rn(r1), i0);
+#endif
+}
+
+void
+_emit_stxi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+#if __WORDSIZE == 32
+    stxi_i(i0, rn(r0), rn(r1));
+#else
+    stxi_l(i0, rn(r0), rn(r1));
+#endif
+}
+
+void
+_emit_ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    ldxi_d(rn(r0), rn(r1), i0);
+}
+
+void
+_emit_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    stxi_d(i0, rn(r0), rn(r1));
+}
+
+static void
+_patch(jit_state_t *_jit, jit_word_t instr, jit_node_t *node)
+{
+    jit_int32_t                 flag;
+
+    assert(node->flag & jit_flag_node);
+    if (node->code == jit_code_movi)
+       flag = node->v.n->flag;
+    else
+       flag = node->u.n->flag;
+    assert(!(flag & jit_flag_patch));
+    if (_jitc->patches.offset >= _jitc->patches.length) {
+       jit_realloc((jit_pointer_t *)&_jitc->patches.ptr,
+                   _jitc->patches.length * sizeof(jit_patch_t),
+                   (_jitc->patches.length + 1024) * sizeof(jit_patch_t));
+       _jitc->patches.length += 1024;
+    }
+    _jitc->patches.ptr[_jitc->patches.offset].inst = instr;
+    _jitc->patches.ptr[_jitc->patches.offset].node = node;
+    ++_jitc->patches.offset;
+}
diff --git a/deps/lightning/lib/jit_print.c b/deps/lightning/lib/jit_print.c
new file mode 100644 (file)
index 0000000..3d7c0ac
--- /dev/null
@@ -0,0 +1,367 @@
+/*
+ * Copyright (C) 2012-2019  Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+#include <lightning.h>
+#include <lightning/jit_private.h>
+
+#define print_chr(value)               fputc(value, stdout)
+#define print_hex(value)               fprintf(stdout, "0x%lx", value)
+#define print_dec(value)               fprintf(stdout, "%ld", value)
+#define print_flt(value)               fprintf(stdout, "%g", value)
+#define print_str(value)               fprintf(stdout, "%s", value)
+#define print_ptr(value)               fprintf(stdout, "%p", value)
+#define print_reg(value)                                               \
+    do {                                                               \
+       if ((value) & jit_regno_patch)                                  \
+           print_chr('?');                                             \
+       print_str(_rvs[jit_regno(value)].name);                         \
+    } while (0)
+#define print_arg(value)                                               \
+    do {                                                               \
+       print_chr('#');                                                 \
+       if (value)                                                      \
+           print_dec((value)->v.w);                                    \
+       else                                                            \
+           print_chr('?');                                             \
+    } while (0)
+
+/*
+ * Initialization
+ */
+#include "jit_names.c"
+
+/*
+ * Implementation
+ */
+void
+_jit_print(jit_state_t *_jit)
+{
+    jit_node_t         *node;
+
+    if ((node = _jitc->head)) {
+       jit_print_node(node);
+       for (node = node->next; node; node = node->next) {
+           print_chr('\n');
+           jit_print_node(node);
+       }
+       print_chr('\n');
+    }
+}
+
+void
+_jit_print_node(jit_state_t *_jit, jit_node_t *node)
+{
+    jit_block_t                *block;
+    jit_int32_t                 value;
+    jit_int32_t                 offset;
+
+    if (node->code == jit_code_label ||
+       node->code == jit_code_prolog || node->code == jit_code_epilog) {
+       print_chr('L');
+       print_dec(node->v.w);
+       print_chr(':');
+       block = _jitc->blocks.ptr + node->v.w;
+       for (offset = 0; offset < _jitc->reglen; offset++) {
+           if (jit_regset_tstbit(&block->reglive, offset)) {
+               print_chr(' ');
+               print_reg(offset);
+           }
+       }
+       if (node->code == jit_code_prolog ||
+           node->code == jit_code_epilog) {
+           print_str(" /* ");
+           print_str(code_name[node->code]);
+           print_str(" */");
+       }
+       return;
+    }
+    value = jit_classify(node->code) &
+       (jit_cc_a0_int|jit_cc_a0_flt|jit_cc_a0_dbl|jit_cc_a0_jmp|
+        jit_cc_a0_reg|jit_cc_a0_rlh|jit_cc_a0_arg|
+        jit_cc_a1_reg|jit_cc_a1_int|jit_cc_a1_flt|jit_cc_a1_dbl|jit_cc_a1_arg|
+        jit_cc_a2_reg|jit_cc_a2_int|jit_cc_a2_flt|jit_cc_a2_dbl);
+    if (!(node->flag & jit_flag_synth) && ((value & jit_cc_a0_jmp) ||
+                                          node->code == jit_code_finishr ||
+                                          node->code == jit_code_finishi))
+       print_str("    ");
+    else
+       print_chr('\t');
+    if (node->flag & jit_flag_synth)
+       print_str(" \\__ ");
+    print_str(code_name[node->code]);
+    switch (node->code) {
+       r:
+           print_chr(' ');     print_reg(node->u.w);   return;
+       w:
+           print_chr(' ');     print_hex(node->u.w);   return;
+       f:
+           print_chr(' ');
+           if (node->flag & jit_flag_data)
+               print_flt(*(jit_float32_t *)node->u.n->u.w);
+           else
+               print_flt(node->u.f);
+           return;
+       d:
+           print_chr(' ');
+           if (node->flag & jit_flag_data)
+               print_flt(*(jit_float64_t *)node->u.n->u.w);
+           else
+               print_flt(node->u.d);
+           return;
+       n:
+           print_chr(' ');
+           if (!(node->flag & jit_flag_node))
+               print_ptr(node->u.p);
+           else {
+               print_chr('L');
+               print_dec(node->u.n->v.w);
+           }
+           return;
+       a:
+           print_chr(' ');     print_arg(node);        return;
+       r_r:
+           print_chr(' ');     print_reg(node->u.w);
+           print_chr(' ');     print_reg(node->v.w);   return;
+       r_w:
+           print_chr(' ');     print_reg(node->u.w);
+           print_chr(' ');     print_hex(node->v.w);   return;
+       r_f:
+           print_chr(' ');     print_reg(node->u.w);
+           print_chr(' ');
+           if (node->flag & jit_flag_data)
+               print_flt(*(jit_float32_t *)node->v.n->u.w);
+           else
+               print_flt(node->v.f);
+           return;
+       r_d:
+           print_chr(' ');     print_reg(node->u.w);
+           print_chr(' ');
+           if (node->flag & jit_flag_data)
+               print_flt(*(jit_float64_t *)node->v.n->u.w);
+           else
+               print_flt(node->v.d);
+           return;
+       r_a:
+           print_chr(' ');     print_reg(node->u.w);
+           print_chr(' ');     print_arg(node->v.n);
+           return;
+       w_r:
+           print_chr(' ');     print_hex(node->u.w);
+           print_chr(' ');     print_reg(node->v.w);   return;
+       w_w:
+           print_chr(' ');     print_hex(node->u.w);
+           print_chr(' ');     print_hex(node->v.w);   return;
+       w_a:
+           print_chr(' ');     print_hex(node->u.w);
+           print_chr(' ');     print_arg(node->v.n);
+           return;
+       f_a:
+           print_chr(' ');
+           if (node->flag & jit_flag_data)
+               print_flt(*(jit_float32_t *)node->u.n->u.w);
+           else
+               print_flt(node->u.f);
+           print_chr(' ');     print_arg(node->v.n);
+           return;
+       d_a:
+           print_chr(' ');
+           if (node->flag & jit_flag_data)
+               print_flt(*(jit_float64_t *)node->u.n->u.w);
+           else
+               print_flt(node->u.d);
+           print_chr(' ');     print_arg(node->v.n);
+           return;
+       r_r_r:
+           print_chr(' ');     print_reg(node->u.w);
+           print_chr(' ');     print_reg(node->v.w);
+           print_chr(' ');     print_reg(node->w.w);   return;
+       r_r_w:
+           print_chr(' ');     print_reg(node->u.w);
+           print_chr(' ');     print_reg(node->v.w);
+           print_chr(' ');     print_hex(node->w.w);   return;
+       q_r_r:
+           print_str(" (");    print_reg(node->u.q.l);
+           print_chr(' ');     print_reg(node->u.q.h);
+           print_str(") ");    print_reg(node->v.w);
+           print_chr(' ');     print_reg(node->w.w);   return;
+       q_r_w:
+           print_str(" (");    print_reg(node->u.q.l);
+           print_chr(' ');     print_reg(node->u.q.h);
+           print_str(") ");    print_reg(node->v.w);
+           print_chr(' ');     print_hex(node->w.w);   return;
+       r_r_f:
+           print_chr(' ');     print_reg(node->u.w);
+           print_chr(' ');     print_reg(node->v.w);
+           print_chr(' ');
+           if (node->flag & jit_flag_data)
+               print_flt(*(jit_float32_t *)node->w.n->u.w);
+           else
+               print_flt(node->w.f);
+           return;
+       r_r_d:
+           print_chr(' ');     print_reg(node->u.w);
+           print_chr(' ');     print_reg(node->v.w);
+           print_chr(' ');
+           if (node->flag & jit_flag_data)
+               print_flt(*(jit_float64_t *)node->w.n->u.w);
+           else
+               print_flt(node->w.d);
+           return;
+       w_r_r:
+           print_chr(' ');     print_hex(node->u.w);
+           print_chr(' ');     print_reg(node->v.w);
+           print_chr(' ');     print_reg(node->w.w);   return;
+       n_r_r:
+           print_chr(' ');
+           if (!(node->flag & jit_flag_node))
+               print_ptr(node->u.p);
+           else {
+               print_chr('L');
+               print_dec(node->u.n->v.w);
+           }
+           print_chr(' ');     print_reg(node->v.w);
+           print_chr(' ');     print_reg(node->w.w);   return;
+       n_r_w:
+           print_chr(' ');
+           if (!(node->flag & jit_flag_node))
+               print_ptr(node->u.p);
+           else {
+               print_chr('L');
+               print_dec(node->u.n->v.w);
+           }
+           print_chr(' ');     print_reg(node->v.w);
+           print_chr(' ');     print_hex(node->w.w);   return;
+       n_r_f:
+           print_chr(' ');
+           if (!(node->flag & jit_flag_node))
+               print_ptr(node->u.p);
+           else{
+               print_chr('L');
+               print_dec(node->u.n->v.w);
+           }
+           print_chr(' ');     print_reg(node->v.w);
+           print_chr(' ');
+           if (node->flag & jit_flag_data)
+               print_flt(*(jit_float32_t *)node->w.n->u.w);
+           else
+               print_flt(node->w.f);
+           return;
+       n_r_d:
+           print_chr(' ');
+           if (!(node->flag & jit_flag_node))
+               print_ptr(node->u.p);
+           else {
+               print_chr('L');
+               print_dec(node->u.n->v.w);
+           }
+           print_chr(' ');     print_reg(node->v.w);
+           print_chr(' ');
+           if (node->flag & jit_flag_data)
+               print_flt(*(jit_float64_t *)node->w.n->u.w);
+           else
+               print_flt(node->w.d);
+           return;
+       case jit_code_name:
+           print_chr(' ');
+           if (node->v.p && _jitc->emit)
+               print_str(node->v.n->u.p);
+           break;
+       case jit_code_note:
+           print_chr(' ');
+           if (node->v.p && _jitc->emit)
+               print_str(node->v.n->u.p);
+           if (node->v.p && _jitc->emit && node->w.w)
+               print_chr(':');
+           if (node->w.w)
+               print_dec(node->w.w);
+           break;
+       case jit_code_data:
+       case jit_code_label:
+       case jit_code_ellipsis:
+       case jit_code_prolog:   case jit_code_epilog:
+       case jit_code_ret:      case jit_code_prepare:
+           break;
+       case jit_code_save:     case jit_code_load:
+           goto r;
+       default:
+           switch (value) {
+               case jit_cc_a0_reg:
+               case jit_cc_a0_reg|jit_cc_a0_chg:
+               case jit_cc_a0_reg|jit_cc_a0_jmp:
+                   goto r;
+               case jit_cc_a0_int:
+                   goto w;
+               case jit_cc_a0_flt:
+                   goto f;
+               case jit_cc_a0_dbl:
+                   goto d;
+               case jit_cc_a0_jmp:
+                   goto n;
+               case jit_cc_a0_int|jit_cc_a0_arg:
+                   goto a;
+               case jit_cc_a0_reg|jit_cc_a1_reg:
+                   goto r_r;
+               case jit_cc_a0_reg|jit_cc_a1_int:
+                   goto r_w;
+               case jit_cc_a0_reg|jit_cc_a1_flt:
+                   goto r_f;
+               case jit_cc_a0_reg|jit_cc_a1_dbl:
+                   goto r_d;
+               case jit_cc_a0_reg|jit_cc_a1_arg:
+                   goto r_a;
+               case jit_cc_a0_int|jit_cc_a1_reg:
+                   goto w_r;
+               case jit_cc_a0_int|jit_cc_a1_int:
+                   goto w_w;
+               case jit_cc_a0_int|jit_cc_a1_arg:
+                   goto w_a;
+               case jit_cc_a0_flt|jit_cc_a1_arg:
+                   goto f_a;
+               case jit_cc_a0_dbl|jit_cc_a1_arg:
+                   goto d_a;
+               case jit_cc_a0_reg|jit_cc_a1_reg|jit_cc_a2_reg:
+                   goto r_r_r;
+               case jit_cc_a0_reg|jit_cc_a1_reg|jit_cc_a2_int:
+                   goto r_r_w;
+               case jit_cc_a0_reg|jit_cc_a0_rlh|
+                    jit_cc_a1_reg|jit_cc_a2_reg:
+                   goto q_r_r;
+               case jit_cc_a0_reg|jit_cc_a0_rlh|
+                    jit_cc_a1_reg|jit_cc_a2_int:
+                   goto q_r_w;
+               case jit_cc_a0_reg|jit_cc_a1_reg|jit_cc_a2_flt:
+                   goto r_r_f;
+               case jit_cc_a0_reg|jit_cc_a1_reg|jit_cc_a2_dbl:
+                   goto r_r_d;
+               case jit_cc_a0_int|jit_cc_a1_reg|jit_cc_a2_reg:
+                   goto w_r_r;
+               case jit_cc_a0_jmp|jit_cc_a1_reg|jit_cc_a2_reg:
+                   goto n_r_r;
+               case jit_cc_a0_jmp|jit_cc_a1_reg|jit_cc_a2_int:
+                   goto n_r_w;
+               case jit_cc_a0_jmp|jit_cc_a1_reg|jit_cc_a2_flt:
+                   goto n_r_f;
+               case jit_cc_a0_jmp|jit_cc_a1_reg|jit_cc_a2_dbl:
+                   goto n_r_d;
+               default:
+                   abort();
+           }
+           break;
+    }
+}
diff --git a/deps/lightning/lib/jit_rewind.c b/deps/lightning/lib/jit_rewind.c
new file mode 100644 (file)
index 0000000..5ef1be5
--- /dev/null
@@ -0,0 +1,192 @@
+/*
+ * Copyright (C) 2015-2019  Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+#include <lightning.h>
+#include <lightning/jit_private.h>
+
+#if PROTO
+#  define free_synth_list(node)                _free_synth_list(_jit,node)
+static jit_node_t *_free_synth_list(jit_state_t*,jit_node_t*);
+#define rewind_prolog()                        _rewind_prolog(_jit)
+static void _rewind_prolog(jit_state_t*);
+#define rewind_prepare()               _rewind_prepare(_jit)
+static void _rewind_prepare(jit_state_t*);
+#endif
+
+#if CODE
+/*
+ * Implementation
+ */
+static jit_node_t *
+_free_synth_list(jit_state_t *_jit, jit_node_t *node)
+{
+    jit_node_t         *next;
+    next = node->next;
+    free_node(node);
+    for (node = next; node && (node->flag & jit_flag_synth); node = next) {
+       next = node->next;
+       free_node(node);
+    }
+    return (next);
+}
+
+static void
+_rewind_prolog(jit_state_t *_jit)
+{
+    jit_node_t         *node;
+    jit_node_t         *next;
+    _jitc->function->self.size = stack_framesize;
+#if __arm__
+    assert(jit_cpu.abi);
+    _jitc->function->self.size += 64;
+#endif
+#if __mips__ && NEW_ABI
+    /* Only add extra stack space if there are varargs
+     * arguments in registers. */
+    assert(jit_arg_reg_p(_jitc->function->self.argi));
+    _jitc->function->self.size += 64;
+#endif
+    _jitc->function->self.argi =
+       _jitc->function->self.argf = _jitc->function->self.argn = 0;
+    _jitc->tail = _jitc->function->prolog;
+    node = _jitc->tail->next;
+    _jitc->tail->next = (jit_node_t *)0;
+    _jitc->tail->link = (jit_node_t *)0;
+    for (; node; node = next) {
+       next = node->next;
+       switch (node->code) {
+           case jit_code_arg:
+               node->next = (jit_node_t *)0;
+               jit_make_arg(node);
+               break;
+           case jit_code_arg_f:
+               node->next = (jit_node_t *)0;
+               jit_make_arg_f(node);
+               break;
+           case jit_code_arg_d:
+               node->next = (jit_node_t *)0;
+               jit_make_arg_d(node);
+               break;
+           case jit_code_getarg_c:
+               jit_getarg_c(node->u.w, node->v.n);
+               next = free_synth_list(node);
+               break;
+           case jit_code_getarg_uc:
+               jit_getarg_uc(node->u.w, node->v.n);
+               next = free_synth_list(node);
+               break;
+           case jit_code_getarg_s:
+               jit_getarg_s(node->u.w, node->v.n);
+               next = free_synth_list(node);
+               break;
+           case jit_code_getarg_us:
+               jit_getarg_us(node->u.w, node->v.n);
+               next = free_synth_list(node);
+               break;
+           case jit_code_getarg_i:
+               jit_getarg_i(node->u.w, node->v.n);
+               next = free_synth_list(node);
+               break;
+           case jit_code_getarg_f:
+               jit_getarg_f(node->u.w, node->v.n);
+               next = free_synth_list(node);
+               break;
+           case jit_code_getarg_d:
+               jit_getarg_d(node->u.w, node->v.n);
+               next = free_synth_list(node);
+               break;
+           case jit_code_putargr:
+               jit_putargr(node->u.w, node->v.n);
+               next = free_synth_list(node);
+               break;
+           case jit_code_putargi:
+               jit_putargi(node->u.w, node->v.n);
+               next = free_synth_list(node);
+               break;
+           case jit_code_putargr_f:
+               jit_putargr_f(node->u.w, node->v.n);
+               next = free_synth_list(node);
+               break;
+           case jit_code_putargi_f:
+               jit_putargi_f(node->u.f, node->v.n);
+               next = free_synth_list(node);
+               break;
+           case jit_code_putargr_d:
+               jit_putargr_d(node->u.w, node->v.n);
+               next = free_synth_list(node);
+               break;
+           case jit_code_putargi_d:
+               jit_putargi_d(node->u.d, node->v.n);
+               next = free_synth_list(node);
+               break;
+           default:
+               node->next = (jit_node_t *)0;
+               link_node(node);
+               break;
+       }
+    }
+}
+
+static void
+_rewind_prepare(jit_state_t *_jit)
+{
+    jit_node_t         *node;
+    jit_node_t         *next;
+    _jitc->function->call.argi =
+       _jitc->function->call.argf =
+       _jitc->function->call.size = 0;
+    _jitc->tail = _jitc->prepare;
+    node = _jitc->tail->next;
+    _jitc->tail->next = (jit_node_t *)0;
+    _jitc->tail->link = (jit_node_t *)0;
+    for (; node; node = next) {
+       next = node->next;
+       switch (node->code) {
+           case jit_code_pushargr:
+               jit_pushargr(node->u.w);
+               next = free_synth_list(node);
+               break;
+           case jit_code_pushargi:
+               jit_pushargi(node->u.w);
+               next = free_synth_list(node);
+               break;
+           case jit_code_pushargr_f:
+               jit_pushargr_f(node->u.w);
+               next = free_synth_list(node);
+               break;
+           case jit_code_pushargi_f:
+               jit_pushargi_f(node->u.f);
+               next = free_synth_list(node);
+               break;
+           case jit_code_pushargr_d:
+               jit_pushargr_d(node->u.w);
+               next = free_synth_list(node);
+               break;
+           case jit_code_pushargi_d:
+               jit_pushargi_d(node->u.d);
+               next = free_synth_list(node);
+               break;
+           default:
+               node->next = (jit_node_t *)0;
+               link_node(node);
+               break;
+       }
+    }
+}
+#endif
diff --git a/deps/lightning/lib/jit_riscv-cpu.c b/deps/lightning/lib/jit_riscv-cpu.c
new file mode 100644 (file)
index 0000000..388489f
--- /dev/null
@@ -0,0 +1,2378 @@
+/*
+ * Copyright (C) 2019  Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+#if PROTO
+#define _ZERO_REGNO            0
+#define _RA_REGNO              1
+#define _SP_REGNO              2
+#define _FP_REGNO              8
+typedef union {
+#  define ui                   jit_uint32_t
+    struct  {
+       ui opcode       : 7;
+       ui rd           : 5;
+       ui funct3       : 3;
+       ui rs1          : 5;
+       ui rs2          : 5;
+       ui funct7       : 7;
+    } R;
+    struct  {
+       ui opcode       : 7;
+       ui rd           : 5;
+       ui funct3       : 3;
+       ui rs1          : 5;
+       ui rs2          : 5;
+       ui funct2       : 2;
+       ui rs3          : 5;
+    } R4;
+    struct  {
+       ui opcode       : 7;
+       ui rd           : 5;
+       ui funct3       : 3;
+       ui rs1          : 5;
+       ui imm11_0      : 12;
+    } I;
+#  if __WORDSIZE == 64
+    struct  {
+       ui opcode       : 7;
+       ui rd           : 5;
+       ui funct3       : 3;
+       ui rs1          : 5;
+       ui shamt        : 6;
+       ui imm6_0       : 6;
+    } IS;
+#  endif
+    struct  {
+       ui opcode       : 7;
+       ui imm4_0       : 5;
+       ui funct3       : 3;
+       ui rs1          : 5;
+       ui rs2          : 5;
+       ui imm11_5      : 7;
+    } S;
+    struct  {
+       ui opcode       : 7;
+       ui imm11        : 1;
+       ui imm4_1       : 4;
+       ui funct3       : 3;
+       ui rs1          : 5;
+       ui rs2          : 5;
+       ui imm10_5      : 6;
+       ui imm12        : 1;
+    } B;
+    struct  {
+       ui opcode       : 7;
+       ui rd           : 5;
+       ui imm12_31     : 20;
+    } U;
+    struct  {
+       ui opcode       : 7;
+       ui rd           : 5;
+       ui imm19_12     : 8;
+       ui imm11        : 1;
+       ui imm10_1      : 10;
+       ui imm20        : 1;
+    } J;
+    jit_int32_t                w;
+#  undef ui
+} instr_t;
+#  define ii(i)                                *_jit->pc.ui++ = i
+/* FIXME could jit_rewind_prolog() to only use extra 64 bytes
+ * if a variadic jit function that have variadic arguments in
+ * registers */
+#  define stack_framesize              (200 + 64)
+#  define ldr(r0, r1)                  ldr_l(r0, r1)
+#  define ldi(r0, im)                  ldi_l(r0, im)
+#  define ldxr(r0, r1, r2)             ldxr_l(r0, r1, r2)
+#  define ldxi(r0, r1, im)             ldxi_l(r0, r1, im)
+#  define str(r0, r1)                  str_l(r0, r1)
+#  define sti(im, r0)                  sti_l(im, r0)
+#  define stxr(r0, r1, r2)             stxr_l(r0, r1, r2)
+#  define stxi(im, r0, r1)             stxi_l(im, r0, r1)
+#  define simm6_p(im)                  ((im) <= 31 && (im) >= -32)
+#  define simm12_p(im)                 ((im) <= 2047 && (im) >= -2048)
+#  define simm20_p(im)                 ((im) <= 524287 && (im) >= -524288)
+#  define simm32_p(im)                 ((im) <= 2147483647LL && (im) >= -2147483648LL)
+
+/*
+ * RV32I Base Instruction Set
+ */
+#  define LUI(rd, imm)                 Utype(55, rd, imm)
+#  define AUIPC(rd, imm)               Utype(23, rd, imm)
+#  define JAL(rd, imm)                 Jtype(111, rd, imm)
+#  define JALR(rd, rs1, imm)           Itype(103, rd, 0, rs1, imm)
+#  define BEQ(rs1, rs2, imm)           Btype(99, 0, rs1, rs2, imm)
+#  define BNE(rs1, rs2, imm)           Btype(99, 1, rs1, rs2, imm)
+#  define BLT(rs1, rs2, imm)           Btype(99, 4, rs1, rs2, imm)
+#  define BGE(rs1, rs2, imm)           Btype(99, 5, rs1, rs2, imm)
+#  define BLTU(rs1, rs2, imm)          Btype(99, 6, rs1, rs2, imm)
+#  define BGEU(rs1, rs2, imm)          Btype(99, 7, rs1, rs2, imm)
+#  define LB(rd, rs1, imm)             Itype(3, rd, 0, rs1, imm)
+#  define LH(rd, rs1, imm)             Itype(3, rd, 1, rs1, imm)
+#  define LW(rd, rs1, imm)             Itype(3, rd, 2, rs1, imm)
+#  define LBU(rd, rs1, imm)            Itype(3, rd, 4, rs1, imm)
+#  define LHU(rd, rs1, imm)            Itype(3, rd, 5, rs1, imm)
+#  define SB(rs1, rs2, imm)            Stype(35, 0, rs1, rs2, imm)
+#  define SH(rs1, rs2, imm)            Stype(35, 1, rs1, rs2, imm)
+#  define SW(rs1, rs2, imm)            Stype(35, 2, rs1, rs2, imm)
+#  define ADDI(rd, rs1, imm)           Itype(19, rd, 0, rs1, imm)
+#  define SLTI(rd, rs1, imm)           Itype(19, rd, 2, rs1, imm)
+#  define SLTIU(rd, rs1, imm)          Itype(19, rd, 3, rs1, imm)
+#  define XORI(rd, rs1, imm)           Itype(19, rd, 4, rs1, imm)
+#  define ORI(rd, rs1, imm)            Itype(19, rd, 6, rs1, imm)
+#  define ANDI(rd, rs1, imm)           Itype(19, rd, 7, rs1, imm)
+#  if __WORDSIZE == 32
+#    define SLLI(rd, rs1, imm)         Rtype(19, rd, 1, rs1, imm, 0)
+#    define SRLI(rd, rs1, imm)         Rtype(19, rd, 5, rs1, imm, 0)
+#    define SRAI(rd, rs1, imm)         Rtype(19, rd, 5, rs1, imm, 32)
+#  endif
+#  define ADD(rd, rs1, rs2)            Rtype(51, rd, 0, rs1, rs2, 0)
+#  define SUB(rd, rs1, rs2)            Rtype(51, rd, 0, rs1, rs2, 32)
+#  define SLL(rd, rs1, rs2)            Rtype(51, rd, 1, rs1, rs2, 0)
+#  define SLT(rd, rs1, rs2)            Rtype(51, rd, 2, rs1, rs2, 0)
+#  define SLTU(rd, rs1, rs2)           Rtype(51, rd, 3, rs1, rs2, 0)
+#  define XOR(rd, rs1, rs2)            Rtype(51, rd, 4, rs1, rs2, 0)
+#  define SRL(rd, rs1, rs2)            Rtype(51, rd, 5, rs1, rs2, 0)
+#  define SRA(rd, rs1, rs2)            Rtype(51, rd, 5, rs1, rs2, 32)
+#  define OR(rd, rs1, rs2)             Rtype(51, rd, 6, rs1, rs2, 0)
+#  define AND(rd, rs1, rs2)            Rtype(51, rd, 7, rs1, rs2, 0)
+#  define FENCE(imm)                   Itype(15, 0, 0, 0, im)
+#  define FENCE_I(imm)                 Itype(15, 0, 1, 0, im)
+#  define ECALL()                      Itype(115, 0, 0, 0, 0)
+#  define EBREAK()                     Itype(115, 0, 0, 0, 1)
+#  define CSRRW(rd, rs1, csr)          Itype(115, rd, 1, rs1, csr)
+#  define CSRRS(rd, rs1, csr)          Itype(115, rd, 2, rs1, csr)
+#  define CSRRC(rd, rs1, csr)          Itype(115, rd, 3, rs1, csr)
+#  define CSRRWI(rd, zimm, csr)                Itype(115, rd, 5, zimm, csr)
+#  define CSRRSI(rd, zimm, csr)                Itype(115, rd, 6, zimm, csr)
+#  define CSRRCI(rd, zimm, csr)                Itype(115, rd, 7, zimm, csr)
+/*
+ * RV64I Base Instruction Set (in addition to RV32I)
+ */
+#  define LWU(rd, rs1, imm)            Itype(3, rd, 6, rs1, imm)
+#  define LD(rd, rs1, imm)             Itype(3, rd, 3, rs1, imm)
+#  define SD(rs1, rs2, imm)            Stype(35, 3, rs1, rs2, imm)
+#  if __WORDSIZE == 64
+#    define SLLI(rd, rs1, sh)          IStype(19, rd, 1, rs1, sh, 0)
+#    define SRLI(rd, rs1, sh)          IStype(19, rd, 5, rs1, sh, 0)
+#    define SRAI(rd, rs1, sh)          IStype(19, rd, 5, rs1, sh, 16)
+#  endif
+#  define ADDIW(rd, rs1, imm)          Itype(27, rd, 0, rs1, imm)
+#  define SLLIW(rd, rs1, imm)          Rtype(27, rd, 1, rs1, imm, 0)
+#  define SRLIW(rd, rs1, imm)          Rtype(27, rd, 3, rs1, imm, 0)
+#  define SRAIW(rd, rs1, imm)          Rtype(27, rd, 3, rs1, imm, 32)
+#  define ADDW(rd, rs1, imm)           Rtype(59, rd, 0, rs1, imm, 0)
+#  define SUBW(rd, rs1, imm)           Rtype(59, rd, 0, rs1, imm, 32)
+#  define SLLW(rd, rs1, imm)           Rtype(59, rd, 1, rs1, imm, 0)
+#  define SRLW(rd, rs1, imm)           Rtype(59, rd, 5, rs1, imm, 0)
+#  define SRAW(rd, rs1, imm)           Rtype(59, rd, 5, rs1, imm, 32)
+/*
+ * RV32M Standard Extension
+ */
+#  define MUL(rd, rs1, rs2)            Rtype(51, rd, 0, rs1, rs2, 1)
+#  define MULH(rd, rs1, rs2)           Rtype(51, rd, 1, rs1, rs2, 1)
+#  define MULHSU(rd, rs1, rs2)         Rtype(51, rd, 2, rs1, rs2, 1)
+#  define MULHU(rd, rs1, rs2)          Rtype(51, rd, 3, rs1, rs2, 1)
+#  define DIV(rd, rs1, rs2)            Rtype(51, rd, 4, rs1, rs2, 1)
+#  define DIVU(rd, rs1, rs2)           Rtype(51, rd, 5, rs1, rs2, 1)
+#  define REM(rd, rs1, rs2)            Rtype(51, rd, 6, rs1, rs2, 1)
+#  define REMU(rd, rs1, rs2)           Rtype(51, rd, 7, rs1, rs2, 1)
+/*
+ * RV64M Standard Extension (in addition to RV32M)
+ */
+#  define MULW(rd, rs1, rs2)           Rtype(59, rd, 0, rs1, rs2, 1)
+#  define DIVW(rd, rs1, rs2)           Rtype(59, rd, 4, rs1, rs2, 1)
+#  define DIVUW(rd, rs1, rs2)          Rtype(59, rd, 5, rs1, rs2, 1)
+#  define REMW(rd, rs1, rs2)           Rtype(59, rd, 6, rs1, rs2, 1)
+#  define REMUW(rd, rs1, rs2)          Rtype(59, rd, 7, rs1, rs2, 1)
+/*
+ * RV32A Standard Extension
+ */
+#  define LR_W(rd, rs1)                        R4type(47, rd, 2, rs1, 0, 0, 2)
+#  define SC_W(rd, rs1, rs2)           R4type(47, rd, 2, rs1, rs2, 0, 3)
+#  define AMOSWAP_W(rd, rs1, rs2)      R4type(47, rd, 2, rs1, rs2, 0, 1)
+#  define AMOADD_W(rd, rs1, rs2)       R4type(47, rd, 2, rs1, rs2, 0, 0)
+#  define AMOXOR_W(rd, rs1, rs2)       R4type(47, rd, 2, rs1, rs2, 0, 4)
+#  define AMOAND_W(rd, rs1, rs2)       R4type(47, rd, 2, rs1, rs2, 0, 12)
+#  define AMOOR_W(rd, rs1, rs2)                R4type(47, rd, 2, rs1, rs2, 0, 8)
+#  define AMOMIN_W(rd, rs1, rs2)       R4type(47, rd, 2, rs1, rs2, 0, 16)
+#  define AMOMAX_W(rd, rs1, rs2)       R4type(47, rd, 2, rs1, rs2, 0, 20)
+#  define AMOMINU_W(rd, rs1, rs2)      R4type(47, rd, 2, rs1, rs2, 0, 24)
+#  define AMOMAXU_W(rd, rs1, rs2)      R4type(47, rd, 2, rs1, rs2, 0, 28)
+/*
+ * RV64A Standard Extension (in addition to RV32A)
+ */
+#  define LR_D(rd, rs1)                        R4type(47, rd, 3, rs1, 0, 0, 2)
+#  define SC_D(rd, rs1, rs2)           R4type(47, rd, 3, rs1, rs2, 0, 3)
+#  define AMOSWAP_D(rd, rs1, rs2)      R4type(47, rd, 3, rs1, rs2, 0, 1)
+#  define AMOADD_D(rd, rs1, rs2)       R4type(47, rd, 3, rs1, rs2, 0, 0)
+#  define AMOXOR_D(rd, rs1, rs2)       R4type(47, rd, 3, rs1, rs2, 0, 4)
+#  define AMOAND_D(rd, rs1, rs2)       R4type(47, rd, 3, rs1, rs2, 0, 12)
+#  define AMOOR_D(rd, rs1, rs2)                R4type(47, rd, 3, rs1, rs2, 0, 8)
+#  define AMOMIN_D(rd, rs1, rs2)       R4type(47, rd, 3, rs1, rs2, 0, 16)
+#  define AMOMAX_D(rd, rs1, rs2)       R4type(47, rd, 3, rs1, rs2, 0, 20)
+#  define AMOMINU_D(rd, rs1, rs2)      R4type(47, rd, 3, rs1, rs2, 0, 24)
+#  define AMOMAXU_D(rd, rs1, rs2)      R4type(47, rd, 3, rs1, rs2, 0, 28)
+/*
+ * Pseudo Instructions
+ */
+#  define NOP()                                ADDI(_ZERO_REGNO, _ZERO_REGNO, 0)
+#  define MV(r0, r1)                   ADDI(r0, r1, 0)
+#  define NOT(r0, r1)                  XORI(r0, r1, -1)
+#  define NEG(r0, r1)                  SUB(r0, _ZERO_REGNO, r1)
+#  define NEGW(r0, r1)                 SUBW(r0, _ZERO_REGNO, r1)
+#  define SEXT_W(r0, r1)               ADDIW(r0, r1, 0)
+#  define RET()                                JALR(0, 1, 0)
+
+/*
+ * Enconding functions
+ */
+#  define Rtype(op, rd, fct, rs1, rs2, fct2)                   \
+       _Rtype(_jit, op, rd, fct, rs1, rs2, fct2)
+static void _Rtype(jit_state_t*, jit_int32_t, jit_int32_t,
+                  jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define R4type(op, rd, fct, rs1,rs2,fct2,rs3)                        \
+       _R4type(_jit, op, rd, fct, rs1, rs2, fct2, rs3)
+static void _R4type(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t,
+                   jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define Itype(op, rd, fct, rs1, imm)                         \
+       _Itype(_jit, op, rd, fct, rs1, imm)
+static void _Itype(jit_state_t*, jit_int32_t, jit_int32_t,
+                  jit_int32_t, jit_int32_t, jit_int32_t);
+#  if __WORDSIZE == 64
+#  define IStype(op, rd, fct, rs1, sh, imm)                    \
+       _IStype(_jit, op, rd, fct, rs1, sh, imm)
+static void _IStype(jit_state_t*, jit_int32_t, jit_int32_t,
+                  jit_int32_t, jit_int32_t, jit_int32_t,jit_int32_t);
+#  endif
+#  define Stype(op, fct, rs1, rs2, imm)                                \
+       _Stype(_jit, op, fct, rs1, rs2, imm)
+static void _Stype(jit_state_t*, jit_int32_t, jit_int32_t,
+                  jit_int32_t, jit_int32_t, jit_int32_t);
+#  define Btype(op, fct, rs1, rs2, imm)                                \
+       _Btype(_jit, op, fct, rs1, rs2, imm)
+static void _Btype(jit_state_t*, jit_int32_t, jit_int32_t,
+                  jit_int32_t, jit_int32_t, jit_int32_t);
+#  define Utype(op, rd, imm)           _Utype(_jit, op, rd, imm)
+static void _Utype(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define Jtype(op, rd, imm)           _Jtype(_jit, op, rd, imm)
+static void _Jtype(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+/*
+ * Lightning instructions
+ */
+#  define nop(im)                      _nop(_jit, im)
+static void _nop(jit_state_t*, jit_int32_t);
+#  define addr(r0, r1, r2)             ADD(r0, r1, r2)
+#  define addi(r0, r1, im)             _addi(_jit, r0, r1, im)
+static void _addi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define addcr(r0, r1, r2)            _addcr(_jit, r0, r1, r2)
+static void _addcr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define addci(r0, r1, im)            _addci(_jit, r0, r1, im)
+static void _addci(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define addxr(r0, r1, r2)            _addxr(_jit, r0, r1, r2)
+static void _addxr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define addxi(r0, r1, im)            _addxi(_jit, r0, r1, im)
+static void _addxi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define subr(r0, r1, r2)             SUB(r0, r1, r2)
+#  define subi(r0, r1, im)             _subi(_jit, r0, r1, im)
+static void _subi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define subcr(r0, r1, r2)            _subcr(_jit, r0, r1, r2)
+static void _subcr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define subci(r0, r1, im)            _subci(_jit, r0, r1, im)
+static void _subci(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define subxr(r0, r1, r2)            _subxr(_jit, r0, r1, r2)
+static void _subxr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define subxi(r0, r1, im)            _subxi(_jit, r0, r1, im)
+static void _subxi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define rsbi(r0, r1, im)             _rsbi(_jit, r0, r1, im)
+static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define mulr(r0, r1, r2)             MUL(r0, r1, r2)
+#  define muli(r0, r1, im)             _muli(_jit, r0, r1, im)
+static void _muli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define divr(r0, r1, r2)             DIV(r0, r1, r2)
+#  define divi(r0, r1, im)             _divi(_jit, r0, r1, im)
+static void _divi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define divr_u(r0, r1, r2)           DIVU(r0, r1, r2)
+#  define divi_u(r0, r1, im)           _divi_u(_jit, r0, r1, im)
+static void _divi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define remr(r0, r1, r2)             REM(r0, r1, r2)
+#  define remi(r0, r1, im)             _remi(_jit, r0, r1, im)
+static void _remi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define remr_u(r0, r1, r2)           REMU(r0, r1, r2)
+#  define remi_u(r0, r1, im)           _remi_u(_jit, r0, r1, im)
+static void _remi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define qmulr(r0, r1, r2, r3)                _qmulr(_jit,r0,r1,r2,r3)
+static void _qmulr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define qmuli(r0, r1, r2, i0)                _qmuli(_jit,r0,r1,r2,i0)
+static void _qmuli(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_word_t);
+#  define qmulr_u(r0, r1, r2, r3)      _qmulr_u(_jit,r0,r1,r2,r3)
+static void _qmulr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define qmuli_u(r0, r1, r2, i0)      _qmuli_u(_jit,r0,r1,r2,i0)
+static void _qmuli_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_word_t);
+static void _iqdivr(jit_state_t*,jit_bool_t,
+                   jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define qdivr(r0,r1,r2,r3)           _iqdivr(_jit,1,r0,r1,r2,r3)
+#  define qdivr_u(r0,r1,r2,r3)         _iqdivr(_jit,0,r0,r1,r2,r3)
+static void _iqdivr(jit_state_t*,jit_bool_t,
+                   jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define qdivi(r0,r1,r2,i0)           _qdivi(_jit,r0,r1,r2,i0)
+static void _qdivi(jit_state_t*,jit_int32_t,
+                  jit_int32_t,jit_int32_t,jit_word_t);
+#  define qdivi_u(r0,r1,r2,i0)         _qdivi_u(_jit,r0,r1,r2,i0)
+static void _qdivi_u(jit_state_t*,jit_int32_t,
+                    jit_int32_t,jit_int32_t,jit_word_t);
+#  define lshr(r0, r1, r2)             SLL(r0, r1, r2)
+#  define lshi(r0, r1, im)             _lshi(_jit, r0, r1, im)
+static void _lshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define rshr(r0, r1, r2)             SRA(r0, r1, r2)
+#  define rshi(r0, r1, im)             _rshi(_jit, r0, r1, im)
+static void _rshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define rshr_u(r0, r1, r2)           SRL(r0, r1, r2)
+#  define rshi_u(r0, r1, im)           _rshi_u(_jit, r0, r1, im)
+static void _rshi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define negr(r0, r1)                 NEG(r0, r1)
+#  define comr(r0, r1)                 NOT(r0, r1)
+#  define andr(r0, r1, r2)             AND(r0, r1, r2)
+#  define andi(r0, r1, im)             _andi(_jit, r0, r1, im)
+static void _andi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define orr(r0, r1, r2)              OR(r0, r1, r2)
+#  define ori(r0, r1, im)              _ori(_jit, r0, r1, im)
+static void _ori(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define xorr(r0, r1, r2)             XOR(r0, r1, r2)
+#  define xori(r0, r1, im)             _xori(_jit, r0, r1, im)
+static void _xori(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldr_c(r0, r1)                        LB(r0, r1, 0)
+#  define ldi_c(r0, im)                        _ldi_c(_jit, r0, im)
+static void _ldi_c(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldr_uc(r0, r1)               LBU(r0, r1, 0)
+#  define ldi_uc(r0, im)               _ldi_uc(_jit, r0, im)
+static void _ldi_uc(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldr_s(r0, r1)                        LH(r0, r1, 0)
+#  define ldi_s(r0, im)                        _ldi_s(_jit, r0, im)
+static void _ldi_s(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldr_us(r0, r1)               LHU(r0, r1, 0)
+#  define ldi_us(r0, im)               _ldi_us(_jit, r0, im)
+static void _ldi_us(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldr_i(r0, r1)                        LW(r0, r1, 0)
+#  define ldi_i(r0, im)                        _ldi_i(_jit, r0, im)
+static void _ldi_i(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldr_ui(r0, r1)               LWU(r0, r1, 0)
+#  define ldi_ui(r0, im)               _ldi_ui(_jit, r0, im)
+static void _ldi_ui(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldr_l(r0, r1)                        LD(r0, r1, 0)
+#  define ldi_l(r0, im)                        _ldi_l(_jit, r0, im)
+static void _ldi_l(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldxr_c(r0, r1, r2)           _ldxr_c(_jit, r0, r1, r2)
+static void _ldxr_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxi_c(r0, r1, im)           _ldxi_c(_jit, r0, r1, im)
+static void _ldxi_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldxr_uc(r0, r1, r2)          _ldxr_uc(_jit, r0, r1, r2)
+static void _ldxr_uc(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxi_uc(r0, r1, im)          _ldxi_uc(_jit, r0, r1, im)
+static void _ldxi_uc(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldxr_s(r0, r1, r2)           _ldxr_s(_jit, r0, r1, r2)
+static void _ldxr_s(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxi_s(r0, r1, im)           _ldxi_s(_jit, r0, r1, im)
+static void _ldxi_s(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldxr_us(r0, r1, r2)          _ldxr_us(_jit, r0, r1, r2)
+static void _ldxr_us(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxi_us(r0, r1, im)          _ldxi_us(_jit, r0, r1, im)
+static void _ldxi_us(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldxr_i(r0, r1, r2)           _ldxr_i(_jit, r0, r1, r2)
+static void _ldxr_i(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxi_i(r0, r1, im)           _ldxi_i(_jit, r0, r1, im)
+static void _ldxi_i(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldxr_ui(r0, r1, r2)          _ldxr_ui(_jit, r0, r1, r2)
+static void _ldxr_ui(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxi_ui(r0, r1, im)          _ldxi_ui(_jit, r0, r1, im)
+static void _ldxi_ui(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldxr_l(r0, r1, r2)           _ldxr_l(_jit, r0, r1, r2)
+static void _ldxr_l(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxi_l(r0, r1, im)           _ldxi_l(_jit, r0, r1, im)
+static void _ldxi_l(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define str_c(r0, r1)                        SB(r0, r1, 0)
+#  define sti_c(im, r0)                        _sti_c(_jit, im, r0)
+static void _sti_c(jit_state_t*,jit_word_t,jit_int32_t);
+#  define str_s(r0, r1)                        SH(r0, r1, 0)
+#  define sti_s(im, r0)                        _sti_s(_jit, im, r0)
+static void _sti_s(jit_state_t*,jit_word_t,jit_int32_t);
+#  define str_i(r0, r1)                        SW(r0, r1, 0)
+#  define sti_i(im, r0)                        _sti_i(_jit, im, r0)
+static void _sti_i(jit_state_t*,jit_word_t,jit_int32_t);
+#  define str_l(r0, r1)                        SD(r0, r1, 0)
+#  define sti_l(im, r0)                        _sti_l(_jit, im, r0)
+static void _sti_l(jit_state_t*,jit_word_t,jit_int32_t);
+#  define stxr_c(r0, r1, r2)           _stxr_c(_jit, r0, r1, r2)
+static void _stxr_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define stxi_c(i0, r0, r1)           _stxi_c(_jit, i0, r0, r1)
+static void _stxi_c(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define stxr_s(r0, r1, r2)           _stxr_s(_jit, r0, r1, r2)
+static void _stxr_s(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define stxi_s(i0, r0, r1)           _stxi_s(_jit, i0, r0, r1)
+static void _stxi_s(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define stxr_i(r0, r1, r2)           _stxr_i(_jit, r0, r1, r2)
+static void _stxr_i(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define stxi_i(i0, r0, r1)           _stxi_i(_jit, i0, r0, r1)
+static void _stxi_i(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define stxr_l(r0, r1, r2)           _stxr_l(_jit, r0, r1, r2)
+static void _stxr_l(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define stxi_l(i0, r0, r1)           _stxi_l(_jit, i0, r0, r1)
+static void _stxi_l(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define htonr_us(r0, r1)             _htonr_us(_jit, r0, r1)
+static void _htonr_us(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define htonr_ui(r0, r1)             _htonr_ui(_jit, r0, r1)
+static void _htonr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define htonr_ul(r0, r1)             _htonr_ul(_jit, r0, r1)
+static void _htonr_ul(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define extr_c(r0, r1)               _extr_c(_jit, r0, r1)
+static void _extr_c(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define extr_uc(r0, r1)              andi(r0, r1, 0xff)
+#  define extr_s(r0, r1)               _extr_s(_jit, r0, r1)
+static void _extr_s(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define extr_us(r0, r1)              _extr_us(_jit, r0, r1)
+static void _extr_us(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define extr_i(r0, r1)               SEXT_W(r0, r1)
+#  define extr_ui(r0, r1)              _extr_ui(_jit, r0, r1)
+static void _extr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define movr(r0, r1)                 MV(r0, r1)
+#  define movi(r0, im)                 _movi(_jit, r0, im)
+static void _movi(jit_state_t*,jit_int32_t,jit_word_t);
+#  define movi_p(r0, im)               _movi_p(_jit, r0, im)
+static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ltr(r0, r1, r2)              SLT(r0, r1, r2)
+#  define lti(r0, r1, im)              _lti(_jit, r0, r1, im)
+static void _lti(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ltr_u(r0, r1, r2)            SLTU(r0, r1, r2)
+#  define lti_u(r0, r1, im)            _lti_u(_jit, r0, r1, im)
+static void _lti_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ler(r0, r1, r2)              _ler(_jit, r0, r1, r2)
+static void _ler(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define lei(r0, r1, im)              _lei(_jit, r0, r1, im)
+static void _lei(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ler_u(r0, r1, r2)            _ler_u(_jit, r0, r1, r2)
+static void _ler_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define lei_u(r0, r1, im)            _lei_u(_jit, r0, r1, im)
+static void _lei_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define eqr(r0, r1, r2)              _eqr(_jit, r0, r1, r2)
+static void _eqr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define eqi(r0, r1, im)              _eqi(_jit, r0, r1, im)
+static void _eqi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ger(r0, r1, r2)              _ger(_jit, r0, r1, r2)
+static void _ger(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define gei(r0, r1, r2)              _gei(_jit, r0, r1, r2)
+static void _gei(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ger_u(r0, r1, r2)            _ger_u(_jit, r0, r1, r2)
+static void _ger_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define gei_u(r0, r1, im)            _gei_u(_jit, r0, r1, im)
+static void _gei_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define gtr(r0, r1, r2)              SLT(r0, r2, r1)
+#  define gti(r0, r1, im)              _gti(_jit, r0, r1, im)
+static void _gti(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define gtr_u(r0, r1, r2)            SLTU(r0, r2, r1)
+#  define gti_u(r0, r1, im)            _gti_u(_jit, r0, r1, im)
+static void _gti_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ner(r0, r1, r2)              _ner(_jit, r0, r1, r2)
+static void _ner(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define nei(r0, r1, im)              _nei(_jit, r0, r1, im)
+static void _nei(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define bltr(br, r0, r1)             _bltr(_jit, br, r0, r1)
+static jit_word_t _bltr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define blti(br, r0, im)             _blti(_jit, br, r0, im)
+static jit_word_t _blti(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#  define bltr_u(br, r0, r1)           _bltr_u(_jit, br, r0, r1)
+static jit_word_t _bltr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define blti_u(br, r0, im)           _blti_u(_jit, br, r0, im)
+static jit_word_t _blti_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#  define bler(br, r0, r1)             _bler(_jit, br, r0, r1)
+static jit_word_t _bler(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define blei(br, r0, im)             _blei(_jit, br, r0, im)
+static jit_word_t _blei(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#  define bler_u(br, r0, r1)           _bler_u(_jit, br, r0, r1)
+static jit_word_t _bler_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define blei_u(br, r0, im)           _blei_u(_jit, br, r0, im)
+static jit_word_t _blei_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#  define beqr(br, r0, r1)             _beqr(_jit, br, r0, r1)
+static jit_word_t _beqr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define beqi(br, r0, im)             _beqi(_jit, br, r0, im)
+static jit_word_t _beqi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#  define bger(br, r0, r1)             _bger(_jit, br, r0, r1)
+static jit_word_t _bger(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bgei(br, r0, im)             _bgei(_jit, br, r0, im)
+static jit_word_t _bgei(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#  define bger_u(br, r0, r1)           _bger_u(_jit, br, r0, r1)
+static jit_word_t _bger_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bgei_u(br, r0, im)           _bgei_u(_jit, br, r0, im)
+static jit_word_t _bgei_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#  define bgtr(br, r0, r1)             _bgtr(_jit, br, r0, r1)
+static jit_word_t _bgtr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bgti(br, r0, im)             _bgti(_jit, br, r0, im)
+static jit_word_t _bgti(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#  define bgtr_u(br, r0, r1)           _bgtr_u(_jit, br, r0, r1)
+static jit_word_t _bgtr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bgti_u(br, r0, im)           _bgti_u(_jit, br, r0, im)
+static jit_word_t _bgti_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#  define bner(br, r0, r1)             _bner(_jit, br, r0, r1)
+static jit_word_t _bner(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bnei(br, r0, im)             _bnei(_jit, br, r0, im)
+static jit_word_t _bnei(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#  define boaddr(br, r0, r1)           _boaddr(_jit, br, r0, r1)
+#  define boaddi(br, r0, im)           _boaddi(_jit, br, r0, im)
+static jit_word_t _boaddr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+static jit_word_t _boaddi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#  define boaddr_u(br, r0, r1)         _boaddr_u(_jit, br, r0, r1)
+#  define boaddi_u(br, r0, im)         _boaddi_u(_jit, br, r0, im)
+static jit_word_t _boaddr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+static jit_word_t _boaddi_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#  define bxaddr(br, r0, r1)           _bxaddr(_jit, br, r0, r1)
+#  define bxaddi(br, r0, im)           _bxaddi(_jit, br, r0, im)
+static jit_word_t _bxaddr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+static jit_word_t _bxaddi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#  define bxaddr_u(br, r0, r1)         _bxaddr_u(_jit, br, r0, r1)
+#  define bxaddi_u(br, r0, im)         _bxaddi_u(_jit, br, r0, im)
+static jit_word_t _bxaddr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+static jit_word_t _bxaddi_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#  define bosubr(br, r0, r1)           _bosubr(_jit, br, r0, r1)
+#  define bosubi(br, r0, im)           _bosubi(_jit, br, r0, im)
+static jit_word_t _bosubr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+static jit_word_t _bosubi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#  define bosubr_u(br, r0, r1)         _bosubr_u(_jit, br, r0, r1)
+#  define bosubi_u(br, r0, im)         _bosubi_u(_jit, br, r0, im)
+static jit_word_t _bosubr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+static jit_word_t _bosubi_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#  define bxsubr(br, r0, r1)           _bxsubr(_jit, br, r0, r1)
+#  define bxsubi(br, r0, im)           _bxsubi(_jit, br, r0, im)
+static jit_word_t _bxsubr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+static jit_word_t _bxsubi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#  define bxsubr_u(br, r0, r1)         _bxsubr_u(_jit, br, r0, r1)
+#  define bxsubi_u(br, r0, im)         _bxsubi_u(_jit, br, r0, im)
+static jit_word_t _bxsubr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+static jit_word_t _bxsubi_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#  define bmsr(br, r0, r1)             _bmsr(_jit, br, r0, r1)
+#  define bmsi(br, r0, im)             _bmsi(_jit, br, r0, im)
+static jit_word_t _bmsr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+static jit_word_t _bmsi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#  define bmcr(br, r0, r1)             _bmcr(_jit, br, r0, r1)
+#  define bmci(br, r0, im)             _bmci(_jit, br, r0, im)
+static jit_word_t _bmcr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+static jit_word_t _bmci(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#  define jmpr(r0)                     JALR(_ZERO_REGNO, r0, 0)
+#  define jmpi(im)                     _jmpi(_jit, im)
+static void _jmpi(jit_state_t*,jit_word_t);
+#  define jmpi_p(im)                   _jmpi_p(_jit, im)
+static jit_word_t _jmpi_p(jit_state_t*,jit_word_t);
+#  define callr(r0)                    JALR(_RA_REGNO, r0, 0)
+#  define calli(im)                    _calli(_jit, im)
+static void _calli(jit_state_t*,jit_word_t);
+#  define calli_p(im)          _calli_p(_jit, im)
+static jit_word_t _calli_p(jit_state_t*,jit_word_t);
+#  define prolog(i0)                   _prolog(_jit,i0)
+static void _prolog(jit_state_t*,jit_node_t*);
+#  define epilog(i0)                   _epilog(_jit,i0)
+static void _epilog(jit_state_t*,jit_node_t*);
+#  define vastart(r0)                  _vastart(_jit, r0)
+static void _vastart(jit_state_t*, jit_int32_t);
+#  define vaarg(r0, r1)                        _vaarg(_jit, r0, r1)
+static void _vaarg(jit_state_t*, jit_int32_t, jit_int32_t);
+#define patch_abs(instr,label)         _patch_at(_jit,instr,label)
+#define patch_at(instr,label)          _patch_at(_jit,instr,label)
+static void _patch_at(jit_state_t*,jit_word_t,jit_word_t);
+#endif         /* PROTO */
+
+#if CODE
+static void
+_Rtype(jit_state_t *_jit, jit_int32_t op, jit_int32_t rd,
+       jit_int32_t fct, jit_int32_t rs1, jit_int32_t rs2, jit_int32_t fct2)
+{
+    instr_t    i;
+    assert(!(op   & ~0x7f));
+    assert(!(rd   & ~0x1f));
+    assert(!(fct  & ~0x07));
+    assert(!(rs1  & ~0x1f));
+    assert(!(rs2  & ~0x1f));
+    assert(!(fct2 & ~0x7f));
+    i.R.opcode = op;
+    i.R.rd     = rd;
+    i.R.funct3 = fct;
+    i.R.rs1    = rs1;
+    i.R.rs2    = rs2;
+    i.R.funct7 = fct2;
+    ii(i.w);
+}
+
+static void
+_R4type(jit_state_t *_jit, jit_int32_t op, jit_int32_t rd, jit_int32_t fct,
+       jit_int32_t rs1, jit_int32_t rs2, jit_int32_t fct2, jit_int32_t rs3)
+{
+    instr_t    i;
+    assert(!(op   & ~0x7f));
+    assert(!(rd   & ~0x1f));
+    assert(!(fct  & ~0x07));
+    assert(!(rs1  & ~0x1f));
+    assert(!(rs2  & ~0x1f));
+    assert(!(fct2 & ~0x03));
+    assert(!(rs3  & ~0x1f));
+    i.R4.opcode        = op;
+    i.R4.rd    = rd;
+    i.R4.funct3        = fct;
+    i.R4.rs1   = rs1;
+    i.R4.rs2   = rs2;
+    i.R4.funct2        = fct2;
+    i.R4.rs3   = rs3;
+    ii(i.w);
+}
+
+static void
+_Itype(jit_state_t *_jit, jit_int32_t op, jit_int32_t rd,
+       jit_int32_t fct, jit_int32_t rs1, jit_int32_t imm)
+{
+    instr_t    i;
+    assert(!(op  &  ~0x7f));
+    assert(!(rd  &  ~0x1f));
+    assert(!(fct &  ~0x07));
+    assert(!(rs1 &  ~0x1f));
+    assert(simm12_p(imm));
+    i.I.opcode = op;
+    i.I.rd     = rd;
+    i.I.funct3 = fct;
+    i.I.rs1    = rs1;
+    i.I.imm11_0        = imm;
+    ii(i.w);
+}
+
+#  if __WORDSIZE == 64
+static void
+_IStype(jit_state_t *_jit, jit_int32_t op, jit_int32_t rd,
+       jit_int32_t fct, jit_int32_t rs1, jit_int32_t sh, jit_int32_t imm)
+{
+    instr_t    i;
+    assert(!(op  &  ~0x7f));
+    assert(!(rd  &  ~0x1f));
+    assert(!(fct &  ~0x07));
+    assert(!(rs1 &  ~0x1f));
+    assert(!(sh  &  ~0x3f));
+    assert(simm6_p(imm));
+    i.IS.opcode        = op;
+    i.IS.rd    = rd;
+    i.IS.funct3        = fct;
+    i.IS.rs1   = rs1;
+    i.IS.shamt = sh;
+    i.IS.imm6_0 = imm;
+    ii(i.w);
+}
+#  endif
+
+static void
+_Stype(jit_state_t *_jit, jit_int32_t op, jit_int32_t fct,
+       jit_int32_t rs1, jit_int32_t rs2, jit_int32_t imm)
+{
+    instr_t    i;
+    assert(!(op  &  ~0x7f));
+    assert(!(fct &  ~0x07));
+    assert(!(rs1 &  ~0x1f));
+    assert(!(rs2 &  ~0x1f));
+    assert(simm12_p(imm));
+    i.S.opcode = op;
+    i.S.imm4_0 = imm & 0x1f;
+    i.S.funct3 = fct;
+    i.S.rs1    = rs1;
+    i.S.rs2    = rs2;
+    i.S.imm11_5        = (imm >> 5) & 0x7f;
+    ii(i.w);
+}
+
+static void
+_Btype(jit_state_t *_jit, jit_int32_t op, jit_int32_t fct,
+       jit_int32_t rs1, jit_int32_t rs2, jit_int32_t imm)
+{
+    instr_t    i;
+    assert(!(op  & ~0x7f));
+    assert(!(fct & ~0x07));
+    assert(!(rs1 & ~0x1f));
+    assert(!(rs2 & ~0x1f));
+    assert(!(imm & 1) && simm12_p(imm));
+    i.B.opcode = op;
+    i.B.imm11  = (imm >> 11) & 0x1;
+    i.B.imm4_1 = (imm >>  1) & 0xf;
+    i.B.funct3 = fct;
+    i.B.rs1    = rs1;
+    i.B.rs2    = rs2;
+    i.B.imm10_5        = (imm >>  5) & 0x3f;
+    i.B.imm12  = (imm >> 12) & 0x1;
+    ii(i.w);
+}
+
+static void
+_Utype(jit_state_t *_jit, jit_int32_t op, jit_int32_t rd, jit_int32_t imm)
+{
+    instr_t    i;
+    assert(!(op        & ~0x7f));
+    assert(!(rd        & ~0x1f));
+    assert(simm20_p(imm));
+    i.U.opcode = op;
+    i.U.rd     = rd;
+    i.U.imm12_31= imm;
+    ii(i.w);
+}
+
+static void
+_Jtype(jit_state_t *_jit, jit_int32_t op, jit_int32_t rd, jit_int32_t imm)
+{
+    instr_t    i;
+    assert(!(op & ~0x7f));
+    assert(!(rd & ~0x1f));
+    assert(!(imm & 1) && imm <= 1048575 && imm >= -1048576);
+    i.J.opcode = op;
+    i.J.rd     = rd;
+    i.J.imm19_12= (imm >> 12) &  0xff;
+    i.J.imm11  = (imm >> 11) &   0x1;
+    i.J.imm10_1        = (imm >>  1) & 0x3ff;
+    i.J.imm20  = (imm >> 20) &   0x1;
+    ii(i.w);
+}
+
+static void
+_nop(jit_state_t *_jit, jit_int32_t im)
+{
+    for (; im > 0; im -= 4)
+       NOP();
+    assert(im == 0);
+}
+
+static void
+_addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    if (simm12_p(i0))
+       ADDI(r0, r1, i0);
+    else {
+       jit_int32_t     t0;
+       t0 = jit_get_reg(jit_class_gpr);
+       movi(rn(t0), i0);
+       addr(r0, r1, rn(t0));
+       jit_unget_reg(t0);
+    }
+}
+
+static void
+_addcr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                t0;
+    if (jit_carry == _NOREG)
+       jit_carry = jit_get_reg(jit_class_gpr);
+    if (r0 == r1) {
+       t0 = jit_get_reg(jit_class_gpr);
+       addr(rn(t0), r1, r2);
+       SLTU(rn(jit_carry), rn(t0), r1);
+       movr(r0, rn(t0));
+       jit_unget_reg(t0);
+    }
+    else {
+       addr(r0, r1, r2);
+       SLTU(rn(jit_carry), r0, r1);
+    }
+}
+
+static void
+_addci(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                t0;
+    if (jit_carry == _NOREG)
+       jit_carry = jit_get_reg(jit_class_gpr);
+    if (r0 == r1) {
+       t0 = jit_get_reg(jit_class_gpr);
+       addi(rn(t0), r1, i0);
+       SLTU(rn(jit_carry), rn(t0), r1);
+       movr(r0, rn(t0));
+       jit_unget_reg(t0);
+    }
+    else {
+       addi(r0, r1, i0);
+       SLTU(rn(jit_carry), r0, r1);
+    }
+}
+
+static void
+_addxr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                t0;
+    assert(jit_carry != _NOREG);
+    t0 = jit_get_reg(jit_class_gpr);
+    movr(rn(t0), rn(jit_carry));
+    addcr(r0, r1, r2);
+    addcr(r0, r0, rn(t0));
+    jit_unget_reg(t0);
+}
+
+static void
+_addxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                t0;
+    assert(jit_carry != _NOREG);
+    t0 = jit_get_reg(jit_class_gpr);
+    movr(rn(t0), rn(jit_carry));
+    addci(r0, r1, i0);
+    addcr(r0, r0, rn(t0));
+    jit_unget_reg(t0);
+}
+
+static void
+_subi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    if (simm12_p(-i0))
+       ADDI(r0, r1, -i0);
+    else {
+       jit_int32_t     t0;
+       t0 = jit_get_reg(jit_class_gpr);
+       movi(rn(t0), i0);
+       subr(r0, r1, rn(t0));
+       jit_unget_reg(t0);
+    }
+}
+
+static void
+_subcr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                t0;
+    if (jit_carry == _NOREG)
+       jit_carry = jit_get_reg(jit_class_gpr);
+    if (r0 == r1) {
+       t0 = jit_get_reg(jit_class_gpr);
+       subr(rn(t0), r1, r2);
+       SLTU(rn(jit_carry), r1, rn(t0));
+       movr(r0, rn(t0));
+       jit_unget_reg(t0);
+    }
+    else {
+       subr(r0, r1, r2);
+       SLTU(rn(jit_carry), r1, r0);
+    }
+}
+
+static void
+_subci(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                t0;
+    if (jit_carry == _NOREG)
+       jit_carry = jit_get_reg(jit_class_gpr);
+    if (r0 == r1) {
+       t0 = jit_get_reg(jit_class_gpr);
+       subi(rn(t0), r1, i0);
+       SLTU(rn(jit_carry), r1, rn(t0));
+       movr(r0, rn(t0));
+       jit_unget_reg(t0);
+    }
+    else {
+       subi(r0, r1, i0);
+       SLTU(rn(jit_carry), r1, r0);
+    }
+}
+
+static void
+_subxr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                t0;
+    assert(jit_carry != _NOREG);
+    t0 = jit_get_reg(jit_class_gpr);
+    movr(rn(t0), rn(jit_carry));
+    subcr(r0, r1, r2);
+    subcr(r0, r0, rn(t0));
+    jit_unget_reg(t0);
+}
+
+static void
+_subxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                t0;
+    assert(jit_carry != _NOREG);
+    t0 = jit_get_reg(jit_class_gpr);
+    movr(rn(t0), rn(jit_carry));
+    subci(r0, r1, i0);
+    subcr(r0, r0, rn(t0));
+    jit_unget_reg(t0);
+}
+
+static void
+_rsbi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    subi(r0, r1, i0);
+    negr(r0, r0);
+}
+
+static void
+_muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_gpr);
+    movi(rn(t0), i0);
+    mulr(r0, r1, rn(t0));
+    jit_unget_reg(t0);
+}
+
+static void
+_divi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_gpr);
+    movi(rn(t0), i0);
+    divr(r0, r1, rn(t0));
+    jit_unget_reg(t0);
+}
+
+static void
+_divi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_gpr);
+    movi(rn(t0), i0);
+    divr_u(r0, r1, rn(t0));
+    jit_unget_reg(t0);
+}
+
+static void
+_remi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_gpr);
+    movi(rn(t0), i0);
+    remr(r0, r1, rn(t0));
+    jit_unget_reg(t0);
+}
+
+static void
+_remi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_gpr);
+    movi(rn(t0), i0);
+    remr_u(r0, r1, rn(t0));
+    jit_unget_reg(t0);
+}
+
+static void
+_qmulr(jit_state_t *_jit, jit_int32_t r0,
+       jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+    jit_int32_t                t0;
+    if (r0 == r2 || r0 == r3) {
+       t0 = jit_get_reg(jit_class_gpr);
+       mulr(rn(t0), r2, r3);
+    }
+    else
+       mulr(r0, r2, r3);
+    MULH(r1, r2, r3);
+    if (r0 == r2 || r0 == r3) {
+       movr(r0, rn(t0));
+       jit_unget_reg(t0);
+    }
+}
+
+static void
+_qmuli(jit_state_t *_jit, jit_int32_t r0,
+       jit_int32_t r1, jit_int32_t r2, jit_word_t i0)
+{
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_gpr);
+    movi(rn(t0), i0);
+    qmulr(r0, r1, r2, rn(t0));
+    jit_unget_reg(t0);
+}
+
+static void
+_qmulr_u(jit_state_t *_jit, jit_int32_t r0,
+        jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+    jit_int32_t                t0;
+    if (r0 == r2 || r0 == r3) {
+       t0 = jit_get_reg(jit_class_gpr);
+       mulr(rn(t0), r2, r3);
+    }
+    else
+       mulr(r0, r2, r3);
+    MULHU(r1, r2, r3);
+    if (r0 == r2 || r0 == r3) {
+       movr(r0, rn(t0));
+       jit_unget_reg(t0);
+    }
+}
+
+static void
+_qmuli_u(jit_state_t *_jit, jit_int32_t r0,
+        jit_int32_t r1, jit_int32_t r2, jit_word_t i0)
+{
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_gpr);
+    movi(rn(t0), i0);
+    qmulr_u(r0, r1, r2, rn(t0));
+    jit_unget_reg(t0);
+}
+
+static void
+_iqdivr(jit_state_t *_jit, jit_bool_t sign,
+       jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+    jit_int32_t                sv0, rg0;
+    jit_int32_t                sv1, rg1;
+    if (r0 == r2 || r0 == r3) {
+       sv0 = jit_get_reg(jit_class_gpr);
+       rg0 = rn(sv0);
+    }
+    else
+       rg0 = r0;
+    if (r1 == r2 || r1 == r3) {
+       sv1 = jit_get_reg(jit_class_gpr);
+       rg1 = rn(sv1);
+    }
+    else
+       rg1 = r1;
+    if (sign)
+       divr(rg0, r2, r3);
+    else
+       divr_u(rg0, r2, r3);
+    mulr(rg1, r3, rg0);
+    subr(rg1, r2, rg1);
+    if (rg0 != r0) {
+       movr(r0, rg0);
+       jit_unget_reg(sv0);
+    }
+    if (rg1 != r1) {
+       movr(r1, rg1);
+       jit_unget_reg(sv1);
+    }
+}
+
+static void
+_qdivi(jit_state_t *_jit, jit_int32_t r0,
+       jit_int32_t r1, jit_int32_t r2, jit_word_t i0)
+{
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_gpr);
+    movi(rn(t0), i0);
+    qdivr(r0, r1, r2, rn(t0));
+    jit_unget_reg(t0);
+}
+
+static void
+_qdivi_u(jit_state_t *_jit, jit_int32_t r0,
+        jit_int32_t r1, jit_int32_t r2, jit_word_t i0)
+{
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_gpr);
+    movi(rn(t0), i0);
+    qdivr_u(r0, r1, r2, rn(t0));
+    jit_unget_reg(t0);
+}
+
+static void
+_lshi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    if (i0 == 0)
+       movr(r0, r1);
+    else {
+       assert(i0 > 0 && i0 < 64);
+       SLLI(r0, r1, i0);
+    }
+}
+
+static void
+_rshi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    if (i0 == 0)
+       movr(r0, r1);
+    else {
+       assert(i0 > 0 && i0 < 64);
+       SRAI(r0, r1, i0);
+    }
+}
+
+static void
+_rshi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    if (i0 == 0)
+       movr(r0, r1);
+    else {
+       assert(i0 > 0 && i0 < 64);
+       SRLI(r0, r1, i0);
+    }
+}
+
+static void
+_andi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    if (simm12_p(i0))
+       ANDI(r0, r1, i0);
+    else {
+       jit_int32_t     t0;
+       t0 = jit_get_reg(jit_class_gpr);
+       movi(rn(t0), i0);
+       andr(r0, r1, rn(t0));
+       jit_unget_reg(t0);
+    }
+}
+
+static void
+_ori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    if (simm12_p(i0))
+       ORI(r0, r1, i0);
+    else {
+       jit_int32_t     t0;
+       t0 = jit_get_reg(jit_class_gpr);
+       movi(rn(t0), i0);
+       orr(r0, r1, rn(t0));
+       jit_unget_reg(t0);
+    }
+}
+
+static void
+_xori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    if (simm12_p(i0))
+       XORI(r0, r1, i0);
+    else {
+       jit_int32_t     t0;
+       t0 = jit_get_reg(jit_class_gpr);
+       movi(rn(t0), i0);
+       xorr(r0, r1, rn(t0));
+       jit_unget_reg(t0);
+    }
+}
+
+#  define DEFLD(T,O)                                                   \
+static void                                                            \
+_ldi_##T(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)             \
+{                                                                      \
+    if (simm12_p(i0))                                                  \
+       L##O(r0, _ZERO_REGNO, i0);                                      \
+    else {                                                             \
+       jit_int32_t     t0;                                             \
+       t0 = jit_get_reg(jit_class_gpr);                                \
+       movi(rn(t0), i0);                                               \
+       ldr_##T(r0, rn(t0));                                            \
+       jit_unget_reg(t0);                                              \
+    }                                                                  \
+}                                                                      \
+                                                                       \
+static void                                                            \
+_ldxr_##T(jit_state_t *_jit,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)\
+{                                                                      \
+    jit_int32_t        t0;                                                     \
+    t0 = jit_get_reg(jit_class_gpr);                                   \
+    addr(rn(t0), r1, r2);                                              \
+    ldr_##T(r0, rn(t0));                                               \
+    jit_unget_reg(t0);                                                 \
+}                                                                      \
+                                                                       \
+static void                                                            \
+_ldxi_##T(jit_state_t *_jit,jit_int32_t r0,jit_int32_t r1,jit_word_t i0)\
+{                                                                      \
+    if (simm12_p(i0))                                                  \
+       L##O(r0, r1, i0);                                               \
+    else {                                                             \
+       jit_int32_t     t0;                                             \
+       t0 = jit_get_reg(jit_class_gpr);                                \
+       addi(rn(t0), r1, i0);                                           \
+       ldr_##T(r0, rn(t0));                                            \
+       jit_unget_reg(t0);                                              \
+    }                                                                  \
+}
+
+DEFLD(c,B)
+DEFLD(uc,BU)
+DEFLD(s,H)
+DEFLD(us,HU)
+DEFLD(i,W)
+DEFLD(ui,WU)
+DEFLD(l,D)
+
+#  define DEFST(T, O)                                                  \
+static void                                                            \
+_sti_##T(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)             \
+{                                                                      \
+    if (simm12_p(i0))                                                  \
+       S##O(_ZERO_REGNO, r0, i0);                                      \
+    else {                                                             \
+       jit_int32_t     t0;                                             \
+       t0 = jit_get_reg(jit_class_gpr);                                \
+       movi(rn(t0), i0);                                               \
+       str_##T(rn(t0), r0);                                            \
+       jit_unget_reg(t0);                                              \
+    }                                                                  \
+}                                                                      \
+                                                                       \
+static void                                                            \
+_stxr_##T(jit_state_t *_jit,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)\
+{                                                                      \
+    jit_int32_t        t0;                                                     \
+    t0 = jit_get_reg(jit_class_gpr);                                   \
+    addr(rn(t0), r0, r1);                                              \
+    str_##T(rn(t0), r2);                                               \
+    jit_unget_reg(t0);                                                 \
+}                                                                      \
+                                                                       \
+static void                                                            \
+_stxi_##T(jit_state_t *_jit,jit_word_t i0,jit_int32_t r0,jit_int32_t r1)\
+{                                                                      \
+    if (simm12_p(i0))                                                  \
+       S##O(r0, r1, i0);                                               \
+    else {                                                             \
+       jit_int32_t     t0;                                             \
+       t0 = jit_get_reg(jit_class_gpr);                                \
+       addi(rn(t0), r0, i0);                                           \
+       str_##T(rn(t0), r1);                                            \
+       jit_unget_reg(t0);                                              \
+    }                                                                  \
+}
+
+DEFST(c, B)
+DEFST(s, H)
+DEFST(i, W)
+DEFST(l, D)
+
+static void
+_htonr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_gpr);
+    rshi(rn(t0), r1, 8);
+    andi(r0, r1, 0xff);
+    andi(rn(t0), rn(t0), 0xff);
+    lshi(r0, r0, 8);
+    orr(r0, r0, rn(t0));
+    jit_unget_reg(t0);
+}
+
+static void
+_htonr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                t0;
+    jit_int32_t                t1;
+    jit_int32_t                t2;
+    t0 = jit_get_reg(jit_class_gpr);
+    t1 = jit_get_reg(jit_class_gpr);
+    t2 = jit_get_reg(jit_class_gpr);
+    rshi(rn(t0), r1, 24);
+    rshi(rn(t1), r1, 16);
+    rshi(rn(t2), r1,  8);
+    andi(rn(t0), rn(t0), 0xff);
+    andi(rn(t1), rn(t1), 0xff);
+    andi(rn(t2), rn(t2), 0xff);
+    andi(r0, r1, 0xff);
+    lshi(r0, r0, 24);
+    lshi(rn(t1), rn(t1), 8);
+    orr(r0, r0, rn(t0));
+    lshi(rn(t2), rn(t2), 16);
+    orr(r0, r0, rn(t1));
+    orr(r0, r0, rn(t2));
+    jit_unget_reg(t2);
+    jit_unget_reg(t1);
+    jit_unget_reg(t0);
+}
+
+static void
+_htonr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_gpr);
+    rshi_u(rn(t0), r1, 32);
+    htonr_ui(r0, r1);
+    htonr_ui(rn(t0), rn(t0));
+    lshi(r0, r0, 32);
+    orr(r0, r0, rn(t0));
+    jit_unget_reg(t0);
+}
+
+static void
+_extr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    lshi(r0, r1, 56);
+    rshi(r0, r0, 56);
+}
+
+static void
+_extr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    lshi(r0, r1, 48);
+    rshi(r0, r0, 48);
+}
+
+static void
+_extr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    lshi(r0, r1, 48);
+    rshi_u(r0, r0, 48);
+}
+
+static void
+_extr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    lshi(r0, r1, 32);
+    rshi_u(r0, r0, 32);
+}
+
+static void
+_movi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    if (simm32_p(i0)) {
+       jit_int32_t     lo = (jit_int32_t)i0 << 20 >> 20;
+       jit_int32_t     hi = i0 - lo;
+       if (hi) {
+           LUI(r0, hi >> 12);
+           if (lo)
+               ADDIW(r0, r0, lo);
+       }
+       else
+           ADDIW(r0, _ZERO_REGNO, lo);
+    }
+    else {
+       jit_int32_t     lo = i0 << 32 >> 32;
+       jit_word_t      hi = i0 - lo;
+       jit_int32_t     t0 = jit_get_reg(jit_class_gpr);
+       movi(rn(t0), (jit_int32_t)(hi >> 32));
+       movi(r0, lo);
+       lshi(rn(t0), rn(t0), 32);
+       addr(r0, r0, rn(t0));
+       jit_unget_reg(t0);
+    }
+}
+
+static jit_word_t
+_movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    jit_int32_t                ww = i0 << 32 >> 32;
+    jit_int32_t                lo = ww << 20 >> 20;
+    jit_int32_t                hi = ww - lo;
+    w = _jit->pc.w;
+    t0 = jit_get_reg(jit_class_gpr);
+    LUI(r0, hi >> 12);
+    ADDIW(r0, r0, lo);
+    ww = i0 >> 32;
+    lo = ww << 20 >> 20;
+    hi = ww - lo;
+    LUI(rn(t0), hi >> 12);
+    ADDIW(rn(t0), rn(t0), lo);
+    SLLI(rn(t0), rn(t0), 32);
+    ADD(r0, r0, rn(t0));
+    jit_unget_reg(t0);
+    return (w);
+}
+
+static void
+_lti(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    if (simm12_p(i0))
+       SLTI(r0, r1, i0);
+    else {
+       jit_int32_t     t0;
+       t0 = jit_get_reg(jit_class_gpr);
+       movi(r0, i0);
+       ltr(r0, r1, rn(t0));
+       jit_unget_reg(t0);
+    }
+}
+
+static void
+_lti_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    if (simm12_p(i0))
+       SLTIU(r0, r1, i0);
+    else {
+       jit_int32_t     t0;
+       t0 = jit_get_reg(jit_class_gpr);
+       movi(r0, i0);
+       ltr_u(r0, r1, rn(t0));
+       jit_unget_reg(t0);
+    }
+}
+
+static void
+_ler(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    SLT(r0, r2, r1);
+    XORI(r0, r0, 1);
+}
+
+static void
+_lei(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                t0;
+    if (i0 == 0) {
+       SLT(r0, _ZERO_REGNO, r1);
+       XORI(r0, r0, 1);
+    }
+    else {
+       t0 = jit_get_reg(jit_class_gpr);
+       movi(rn(t0), i0);
+       ler(r0, r1, rn(t0));
+       jit_unget_reg(t0);
+    }
+}
+
+static void
+_ler_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    SLTU(r0, r2, r1);
+    XORI(r0, r0, 1);
+}
+
+static void
+_lei_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                t0;
+    if (i0 == 0) {
+       SLTU(r0, _ZERO_REGNO, r1);
+       XORI(r0, r0, 1);
+    }
+    else {
+       t0 = jit_get_reg(jit_class_gpr);
+       movi(rn(t0), i0);
+       ler_u(r0, r1, rn(t0));
+       jit_unget_reg(t0);
+    }
+}
+
+static void
+_eqr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    subr(r0, r1, r2);
+    SLTU(r0, _ZERO_REGNO, r0);
+    XORI(r0, r0, 1);
+}
+
+static void
+_eqi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    if (i0) {
+       subi(r0, r1, i0);
+       SLTU(r0, _ZERO_REGNO, r0);
+    }
+    else
+       SLTU(r0, _ZERO_REGNO, r1);
+    XORI(r0, r0, 1);
+}
+
+static void
+_ger(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    SLT(r0, r1, r2);
+    XORI(r0, r0, 1);
+}
+
+static void
+_gei(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_gpr);
+    movi(rn(t0), i0);
+    ger(r0, r1, rn(t0));
+    jit_unget_reg(t0);
+}
+
+static void
+_ger_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    SLTU(r0, r1, r2);
+    XORI(r0, r0, 1);
+}
+
+static void
+_gei_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_gpr);
+    movi(rn(t0), i0);
+    ger_u(r0, r1, rn(t0));
+    jit_unget_reg(t0);
+}
+
+static void
+_gti(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t        t0;
+    t0 = jit_get_reg(jit_class_gpr);
+    movi(r0, i0);
+    ltr(r0, rn(t0), r1);
+    jit_unget_reg(t0);
+}
+
+static void
+_gti_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t        t0;
+    t0 = jit_get_reg(jit_class_gpr);
+    movi(r0, i0);
+    ltr_u(r0, rn(t0), r1);
+    jit_unget_reg(t0);
+}
+
+static void
+_ner(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    subr(r0, r1, r2);
+    SLTU(r0, _ZERO_REGNO, r0);
+}
+
+static void
+_nei(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    if (i0) {
+       subi(r0, r1, i0);
+       SLTU(r0, _ZERO_REGNO, r0);
+    }
+    else
+       SLTU(r0, _ZERO_REGNO, r1);
+}
+
+static jit_word_t
+_bltr(jit_state_t *_jit, jit_word_t br, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    w = _jit->pc.w;
+    BLT(r0, r1, br - w);
+    return (w);
+}
+
+static jit_word_t
+_blti(jit_state_t *_jit, jit_word_t br, jit_int32_t r0, jit_word_t i0)
+{
+    jit_word_t         w;
+    jit_reg_t          t0;
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    movi(rn(t0), i0);
+    w = bltr(br, r0, rn(t0));
+    jit_unget_reg(t0);
+    return (w);
+}
+
+static jit_word_t
+_bltr_u(jit_state_t *_jit, jit_word_t br, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    w = _jit->pc.w;
+    BLTU(r0, r1, br - w);
+    return (w);
+}
+
+static jit_word_t
+_blti_u(jit_state_t *_jit, jit_word_t br, jit_int32_t r0, jit_word_t i0)
+{
+    jit_word_t         w;
+    jit_reg_t          t0;
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    movi(rn(t0), i0);
+    w = bltr_u(br, r0, rn(t0));
+    jit_unget_reg(t0);
+    return (w);
+}
+
+static jit_word_t
+_bler(jit_state_t *_jit, jit_word_t br, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    w = _jit->pc.w;
+    BGE(r1, r0, br - w);
+    return (w);
+}
+
+static jit_word_t
+_blei(jit_state_t *_jit, jit_word_t br, jit_int32_t r0, jit_word_t i0)
+{
+    jit_word_t         w;
+    jit_reg_t          t0;
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    movi(rn(t0), i0);
+    w = bler(br, r0, rn(t0));
+    jit_unget_reg(t0);
+    return (w);
+}
+
+static jit_word_t
+_bler_u(jit_state_t *_jit, jit_word_t br, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    w = _jit->pc.w;
+    BGEU(r1, r0, br - w);
+    return (w);
+}
+
+static jit_word_t
+_blei_u(jit_state_t *_jit, jit_word_t br, jit_int32_t r0, jit_word_t i0)
+{
+    jit_word_t         w;
+    jit_reg_t          t0;
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    movi(rn(t0), i0);
+    w = bler_u(br, r0, rn(t0));
+    jit_unget_reg(t0);
+    return (w);
+}
+
+static jit_word_t
+_beqr(jit_state_t *_jit, jit_word_t br, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    w = _jit->pc.w;
+    BEQ(r1, r0, br - w);
+    return (w);
+}
+
+static jit_word_t
+_beqi(jit_state_t *_jit, jit_word_t br, jit_int32_t r0, jit_word_t i0)
+{
+    jit_word_t         w;
+    jit_reg_t          t0;
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    movi(rn(t0), i0);
+    w = beqr(br, r0, rn(t0));
+    jit_unget_reg(t0);
+    return (w);
+}
+
+static jit_word_t
+_bger(jit_state_t *_jit, jit_word_t br, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    w = _jit->pc.w;
+    BGE(r0, r1, br - w);
+    return (w);
+}
+
+static jit_word_t
+_bgei(jit_state_t *_jit, jit_word_t br, jit_int32_t r0, jit_word_t i0)
+{
+    jit_word_t         w;
+    jit_reg_t          t0;
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    movi(rn(t0), i0);
+    w = bger(br, r0, rn(t0));
+    jit_unget_reg(t0);
+    return (w);
+}
+
+static jit_word_t
+_bger_u(jit_state_t *_jit, jit_word_t br, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    w = _jit->pc.w;
+    BGEU(r0, r1, br - w);
+    return (w);
+}
+
+static jit_word_t
+_bgei_u(jit_state_t *_jit, jit_word_t br, jit_int32_t r0, jit_word_t i0)
+{
+    jit_word_t         w;
+    jit_reg_t          t0;
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    movi(rn(t0), i0);
+    w = bger_u(br, r0, rn(t0));
+    jit_unget_reg(t0);
+    return (w);
+}
+
+static jit_word_t
+_bgtr(jit_state_t *_jit, jit_word_t br, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    w = _jit->pc.w;
+    BLT(r1, r0, br - w);
+    return (w);
+}
+
+static jit_word_t
+_bgti(jit_state_t *_jit, jit_word_t br, jit_int32_t r0, jit_word_t i0)
+{
+    jit_word_t         w;
+    jit_reg_t          t0;
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    movi(rn(t0), i0);
+    w = bgtr(br, r0, rn(t0));
+    jit_unget_reg(t0);
+    return (w);
+}
+
+static jit_word_t
+_bgtr_u(jit_state_t *_jit, jit_word_t br, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    w = _jit->pc.w;
+    BLTU(r1, r0, br - w);
+    return (w);
+}
+
+static jit_word_t
+_bgti_u(jit_state_t *_jit, jit_word_t br, jit_int32_t r0, jit_word_t i0)
+{
+    jit_word_t         w;
+    jit_reg_t          t0;
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    movi(rn(t0), i0);
+    w = bgtr_u(br, r0, rn(t0));
+    jit_unget_reg(t0);
+    return (w);
+}
+
+static jit_word_t
+_bner(jit_state_t *_jit, jit_word_t br, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    w = _jit->pc.w;
+    BNE(r1, r0, br - w);
+    return (w);
+}
+
+static jit_word_t
+_bnei(jit_state_t *_jit, jit_word_t br, jit_int32_t r0, jit_word_t i0)
+{
+    jit_word_t         w;
+    jit_reg_t          t0;
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    movi(rn(t0), i0);
+    w = bner(br, r0, rn(t0));
+    jit_unget_reg(t0);
+    return (w);
+}
+
+static jit_word_t
+_boaddr(jit_state_t *_jit, jit_word_t br, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w, jal;
+    jit_int32_t                t0, t1;
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    t1 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    /* t0 = r1 < 0; */
+    SLT(rn(t0), r1, _ZERO_REGNO);
+    /* t1 = r0 */
+    movr(rn(t1), r0);
+    /* r0 = r0 + r1 */
+    addr(r0, r0, r1);
+    /* overflow = r1 < 0 ? t1 < r0 : r0 < t1 */
+    w = _jit->pc.w;
+    BNE(rn(t0), _ZERO_REGNO, 0);
+    /* r1 >= 0 */
+    SLT(rn(t1), r0, rn(t1));
+    jal = _jit->pc.w;
+    JAL(_ZERO_REGNO, 0);
+    /* r1 < 0 */
+    patch_at(w, _jit->pc.w);
+    SLT(rn(t1), rn(t1), r0);
+    /**/
+    patch_at(jal, _jit->pc.w);
+    w = _jit->pc.w;
+    BNE(rn(t1), _ZERO_REGNO, br - w);
+    jit_unget_reg(t1);
+    jit_unget_reg(t0);
+    return (w);
+}
+
+static jit_word_t
+_boaddi(jit_state_t *_jit, jit_word_t br, jit_int32_t r0, jit_word_t i0)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    movi(rn(t0), i0);
+    w = boaddr(br, r0, rn(t0));
+    jit_unget_reg(t0);
+    return (w);
+}
+
+static jit_word_t
+_boaddr_u(jit_state_t *_jit, jit_word_t br, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    jit_int32_t                t0, t1;
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    t1 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    addr(rn(t0), r0, r1);
+    SLTU(rn(t1), rn(t0), r0);
+    movr(r0, rn(t0));
+    w = _jit->pc.w;
+    BNE(_ZERO_REGNO, rn(t1), br - w);
+    jit_unget_reg(t1);
+    jit_unget_reg(t0);
+    return (w);
+}
+
+static jit_word_t
+_boaddi_u(jit_state_t *_jit, jit_word_t br, jit_int32_t r0, jit_word_t i0)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    movi(rn(t0), i0);
+    w = boaddr_u(br, r0, rn(t0));
+    jit_unget_reg(t0);
+    return (w);
+}
+
+static jit_word_t
+_bxaddr(jit_state_t *_jit, jit_word_t br, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w, jal;
+    jit_int32_t                t0, t1;
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    t1 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    /* t0 = r1 < 0; */
+    SLT(rn(t0), r1, _ZERO_REGNO);
+    /* t1 = r0 */
+    movr(rn(t1), r0);
+    /* r0 = r0 + r1 */
+    addr(r0, r0, r1);
+    /* overflow = r1 < 0 ? t1 < r0 : r0 < t1 */
+    w = _jit->pc.w;
+    BNE(rn(t0), _ZERO_REGNO, 0);
+    /* r1 >= 0 */
+    SLT(rn(t1), r0, rn(t1));
+    jal = _jit->pc.w;
+    JAL(_ZERO_REGNO, 0);
+    /* r1 < 0 */
+    patch_at(w, _jit->pc.w);
+    SLT(rn(t1), rn(t1), r0);
+    /**/
+    patch_at(jal, _jit->pc.w);
+    w = _jit->pc.w;
+    BEQ(rn(t1), _ZERO_REGNO, br - w);
+    jit_unget_reg(t1);
+    jit_unget_reg(t0);
+    return (w);
+}
+
+static jit_word_t
+_bxaddi(jit_state_t *_jit, jit_word_t br, jit_int32_t r0, jit_word_t i0)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    movi(rn(t0), i0);
+    w = bxaddr(br, r0, rn(t0));
+    jit_unget_reg(t0);
+    return (w);
+}
+
+static jit_word_t
+_bxaddr_u(jit_state_t *_jit, jit_word_t br, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    jit_int32_t                t0, t1;
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    t1 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    addr(rn(t0), r0, r1);
+    SLTU(rn(t1), rn(t0), r0);
+    movr(r0, rn(t0));
+    w = _jit->pc.w;
+    BEQ(_ZERO_REGNO, rn(t1), br - w);
+    jit_unget_reg(t1);
+    jit_unget_reg(t0);
+    return (w);
+}
+
+static jit_word_t
+_bxaddi_u(jit_state_t *_jit, jit_word_t br, jit_int32_t r0, jit_word_t i0)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    movi(rn(t0), i0);
+    w = bxaddr_u(br, r0, rn(t0));
+    jit_unget_reg(t0);
+    return (w);
+}
+
+static jit_word_t
+_bosubr(jit_state_t *_jit, jit_word_t br, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w, jal;
+    jit_int32_t                t0, t1;
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    t1 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    /* t0 = 0 < r1; */
+    SLT(rn(t0), _ZERO_REGNO, r1);
+    /* t1 = r0 */
+    movr(rn(t1), r0);
+    /* r0 = r0 - r1 */
+    subr(r0, r0, r1);
+    /* overflow = r1 < 0 ? t1 < r0 : r0 < t1 */
+    w = _jit->pc.w;
+    BNE(rn(t0), _ZERO_REGNO, 0);
+    /* r1 >= 0 */
+    SLT(rn(t1), r0, rn(t1));
+    jal = _jit->pc.w;
+    JAL(_ZERO_REGNO, 0);
+    /* r1 < 0 */
+    patch_at(w, _jit->pc.w);
+    SLT(rn(t1), rn(t1), r0);
+    /**/
+    patch_at(jal, _jit->pc.w);
+    w = _jit->pc.w;
+    BNE(rn(t1), _ZERO_REGNO, br - w);
+    jit_unget_reg(t1);
+    jit_unget_reg(t0);
+    return (w);
+}
+
+static jit_word_t
+_bosubi(jit_state_t *_jit, jit_word_t br, jit_int32_t r0, jit_word_t i0)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    movi(rn(t0), i0);
+    w = bosubr(br, r0, rn(t0));
+    jit_unget_reg(t0);
+    return (w);
+}
+
+static jit_word_t
+_bosubr_u(jit_state_t *_jit, jit_word_t br, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    jit_int32_t                t0, t1;
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    t1 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    subr(rn(t0), r0, r1);
+    SLTU(rn(t1), r0, rn(t0));
+    movr(r0, rn(t0));
+    w = _jit->pc.w;
+    BNE(_ZERO_REGNO, rn(t1), br - w);
+    jit_unget_reg(t1);
+    jit_unget_reg(t0);
+    return (w);
+}
+
+static jit_word_t
+_bosubi_u(jit_state_t *_jit, jit_word_t br, jit_int32_t r0, jit_word_t i0)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    movi(rn(t0), i0);
+    w = bosubr_u(br, r0, rn(t0));
+    jit_unget_reg(t0);
+    return (w);
+}
+
+static jit_word_t
+_bxsubr(jit_state_t *_jit, jit_word_t br, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w, jal;
+    jit_int32_t                t0, t1;
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    t1 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    /* t0 = r1 < 0; */
+    SLT(rn(t0), _ZERO_REGNO, r1);
+    /* t1 = r0 */
+    movr(rn(t1), r0);
+    /* r0 = r0 - r1 */
+    subr(r0, r0, r1);
+    /* overflow = r1 < 0 ? t1 < r0 : r0 < t1 */
+    w = _jit->pc.w;
+    BNE(rn(t0), _ZERO_REGNO, 0);
+    /* r1 >= 0 */
+    SLT(rn(t1), r0, rn(t1));
+    jal = _jit->pc.w;
+    JAL(_ZERO_REGNO, 0);
+    /* r1 < 0 */
+    patch_at(w, _jit->pc.w);
+    SLT(rn(t1), rn(t1), r0);
+    /**/
+    patch_at(jal, _jit->pc.w);
+    w = _jit->pc.w;
+    BEQ(rn(t1), _ZERO_REGNO, br - w);
+    jit_unget_reg(t1);
+    jit_unget_reg(t0);
+    return (w);
+}
+
+static jit_word_t
+_bxsubi(jit_state_t *_jit, jit_word_t br, jit_int32_t r0, jit_word_t i0)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    movi(rn(t0), i0);
+    w = bxsubr(br, r0, rn(t0));
+    jit_unget_reg(t0);
+    return (w);
+}
+
+static jit_word_t
+_bxsubr_u(jit_state_t *_jit, jit_word_t br, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    jit_int32_t                t0, t1;
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    t1 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    subr(rn(t0), r0, r1);
+    SLTU(rn(t1), r0, rn(t0));
+    movr(r0, rn(t0));
+    w = _jit->pc.w;
+    BEQ(_ZERO_REGNO, rn(t1), br - w);
+    jit_unget_reg(t1);
+    jit_unget_reg(t0);
+    return (w);
+}
+
+static jit_word_t
+_bxsubi_u(jit_state_t *_jit, jit_word_t br, jit_int32_t r0, jit_word_t i0)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    movi(rn(t0), i0);
+    w = bxsubr_u(br, r0, rn(t0));
+    jit_unget_reg(t0);
+    return (w);
+}
+
+static jit_word_t
+_bmsr(jit_state_t *_jit, jit_word_t br, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    AND(rn(t0), r0, r1);
+    w = _jit->pc.w;
+    BNE(_ZERO_REGNO, rn(t0), br - w);
+    jit_unget_reg(t0);
+    return (w);
+}
+
+static jit_word_t
+_bmsi(jit_state_t *_jit, jit_word_t br, jit_int32_t r0, jit_word_t i0)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    movi(rn(t0), i0);
+    w = bmsr(br, r0, rn(t0));
+    jit_unget_reg(t0);
+    return (w);
+}
+
+static jit_word_t
+_bmcr(jit_state_t *_jit, jit_word_t br, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    AND(rn(t0), r0, r1);
+    w = _jit->pc.w;
+    BEQ(_ZERO_REGNO, rn(t0), br - w);
+    jit_unget_reg(t0);
+    return (w);
+}
+
+static jit_word_t
+_bmci(jit_state_t *_jit, jit_word_t br, jit_int32_t r0, jit_word_t i0)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    movi(rn(t0), i0);
+    w = bmcr(br, r0, rn(t0));
+    jit_unget_reg(t0);
+    return (w);
+}
+
+static void
+_jmpi(jit_state_t *_jit, jit_word_t i0)
+{
+    jit_int32_t                t0;
+    jit_word_t         dsp;
+    dsp = i0 - _jit->pc.w;
+    if (simm20_p(dsp))
+       JAL(_ZERO_REGNO, dsp);
+    else {
+       t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+       movi(rn(t0), i0);
+       jmpr(rn(t0));
+       jit_unget_reg(t0);
+    }
+}
+
+static jit_word_t
+_jmpi_p(jit_state_t *_jit, jit_word_t i0)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    w = movi_p(rn(t0), i0);
+    jmpr(rn(t0));
+    jit_unget_reg(t0);
+    return (w);
+}
+
+static void
+_calli(jit_state_t *_jit, jit_word_t i0)
+{
+    jit_int32_t                t0;
+    jit_word_t         dsp;
+    dsp = i0 - _jit->pc.w;
+    if (simm20_p(dsp))
+       JAL(_RA_REGNO, dsp);
+    else {
+       t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+       movi(rn(t0), i0);
+       callr(rn(t0));
+       jit_unget_reg(t0);
+    }
+}
+
+static jit_word_t
+_calli_p(jit_state_t *_jit, jit_word_t i0)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    w = movi_p(rn(t0), i0);
+    callr(rn(t0));
+    jit_unget_reg(t0);
+    return (w);
+}
+
+static void
+_prolog(jit_state_t *_jit, jit_node_t *node)
+{
+    jit_int32_t                reg;
+    if (_jitc->function->define_frame || _jitc->function->assume_frame) {
+       jit_int32_t     frame = -_jitc->function->frame;
+       assert(_jitc->function->self.aoff >= frame);
+       if (_jitc->function->assume_frame)
+           return;
+       _jitc->function->self.aoff = frame;
+    }
+    if (_jitc->function->allocar)
+       _jitc->function->self.aoff &= -16;
+    _jitc->function->stack = ((_jitc->function->self.alen -
+                             /* align stack at 16 bytes */
+                             _jitc->function->self.aoff) + 15) & -16;
+    subi(_SP_REGNO, _SP_REGNO, stack_framesize);
+    stxi(0, _SP_REGNO, _RA_REGNO);
+    stxi(8, _SP_REGNO, _FP_REGNO);
+    if (jit_regset_tstbit(&_jitc->function->regset, _S1))
+       stxi(16, _SP_REGNO, 9);
+    if (jit_regset_tstbit(&_jitc->function->regset, _S2))
+       stxi(24, _SP_REGNO, 18);
+    if (jit_regset_tstbit(&_jitc->function->regset, _S3))
+       stxi(32, _SP_REGNO, 19);
+    if (jit_regset_tstbit(&_jitc->function->regset, _S4))
+       stxi(40, _SP_REGNO, 20);
+    if (jit_regset_tstbit(&_jitc->function->regset, _S5))
+       stxi(48, _SP_REGNO, 21);
+    if (jit_regset_tstbit(&_jitc->function->regset, _S6))
+       stxi(56, _SP_REGNO, 22);
+    if (jit_regset_tstbit(&_jitc->function->regset, _S7))
+       stxi(64, _SP_REGNO, 23);
+    if (jit_regset_tstbit(&_jitc->function->regset, _S8))
+       stxi(72, _SP_REGNO, 24);
+    if (jit_regset_tstbit(&_jitc->function->regset, _S9))
+       stxi(80, _SP_REGNO, 25);
+    if (jit_regset_tstbit(&_jitc->function->regset, _S10))
+       stxi(88, _SP_REGNO, 26);
+    if (jit_regset_tstbit(&_jitc->function->regset, _S11))
+       stxi(96, _SP_REGNO, 27);
+    if (jit_regset_tstbit(&_jitc->function->regset, _FS0))
+       stxi_d(104, _SP_REGNO, 8);
+    if (jit_regset_tstbit(&_jitc->function->regset, _FS1))
+       stxi_d(112, _SP_REGNO, 9);
+    if (jit_regset_tstbit(&_jitc->function->regset, _FS2))
+       stxi_d(120, _SP_REGNO, 18);
+    if (jit_regset_tstbit(&_jitc->function->regset, _FS3))
+       stxi_d(128, _SP_REGNO, 19);
+    if (jit_regset_tstbit(&_jitc->function->regset, _FS4))
+       stxi_d(136, _SP_REGNO, 20);
+    if (jit_regset_tstbit(&_jitc->function->regset, _FS5))
+       stxi_d(144, _SP_REGNO, 21);
+    if (jit_regset_tstbit(&_jitc->function->regset, _FS6))
+       stxi_d(152, _SP_REGNO, 22);
+    if (jit_regset_tstbit(&_jitc->function->regset, _FS7))
+       stxi_d(160, _SP_REGNO, 23);
+    if (jit_regset_tstbit(&_jitc->function->regset, _FS8))
+       stxi_d(168, _SP_REGNO, 24);
+    if (jit_regset_tstbit(&_jitc->function->regset, _FS9))
+       stxi_d(176, _SP_REGNO, 25);
+    if (jit_regset_tstbit(&_jitc->function->regset, _FS10))
+       stxi_d(184, _SP_REGNO, 26);
+    if (jit_regset_tstbit(&_jitc->function->regset, _FS11))
+       stxi_d(192, _SP_REGNO, 27);
+    movr(_FP_REGNO, _SP_REGNO);
+    if (_jitc->function->stack)
+       subi(_SP_REGNO, _SP_REGNO, _jitc->function->stack);
+    if (_jitc->function->allocar) {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), _jitc->function->self.aoff);
+       stxi_i(_jitc->function->aoffoff, _FP_REGNO, rn(reg));
+       jit_unget_reg(reg);
+    }
+    if (_jitc->function->self.call & jit_call_varargs) {
+       for (reg = _jitc->function->vagp; jit_arg_reg_p(reg); ++reg)
+           stxi(stack_framesize - ((8 - reg) * 8),
+                _FP_REGNO, rn(JIT_RA0 - reg));
+    }
+}
+
+static void
+_epilog(jit_state_t *_jit, jit_node_t *node)
+{
+    if (_jitc->function->assume_frame)
+       return;
+    movr(_SP_REGNO, _FP_REGNO);
+    ldxi(_RA_REGNO, _SP_REGNO, 0);
+    ldxi(_FP_REGNO, _SP_REGNO, 8);
+    if (jit_regset_tstbit(&_jitc->function->regset, _S1))
+       ldxi(9, _SP_REGNO, 16);
+    if (jit_regset_tstbit(&_jitc->function->regset, _S2))
+       ldxi(18, _SP_REGNO, 24);
+    if (jit_regset_tstbit(&_jitc->function->regset, _S3))
+       ldxi(19, _SP_REGNO, 32);
+    if (jit_regset_tstbit(&_jitc->function->regset, _S4))
+       ldxi(20, _SP_REGNO, 40);
+    if (jit_regset_tstbit(&_jitc->function->regset, _S5))
+       ldxi(21, _SP_REGNO, 48);
+    if (jit_regset_tstbit(&_jitc->function->regset, _S6))
+       ldxi(22, _SP_REGNO, 56);
+    if (jit_regset_tstbit(&_jitc->function->regset, _S7))
+       ldxi(23, _SP_REGNO, 64);
+    if (jit_regset_tstbit(&_jitc->function->regset, _S8))
+       ldxi(24, _SP_REGNO, 72);
+    if (jit_regset_tstbit(&_jitc->function->regset, _S9))
+       ldxi(25, _SP_REGNO, 80);
+    if (jit_regset_tstbit(&_jitc->function->regset, _S10))
+       ldxi(26, _SP_REGNO, 88);
+    if (jit_regset_tstbit(&_jitc->function->regset, _S11))
+       ldxi(27, _SP_REGNO, 96);
+    if (jit_regset_tstbit(&_jitc->function->regset, _FS0))
+       ldxi_d(8, _SP_REGNO, 104);
+    if (jit_regset_tstbit(&_jitc->function->regset, _FS1))
+       ldxi_d(9, _SP_REGNO, 112);
+    if (jit_regset_tstbit(&_jitc->function->regset, _FS2))
+       ldxi_d(18, _SP_REGNO, 120);
+    if (jit_regset_tstbit(&_jitc->function->regset, _FS3))
+       ldxi_d(19, _SP_REGNO, 128);
+    if (jit_regset_tstbit(&_jitc->function->regset, _FS4))
+       ldxi_d(20, _SP_REGNO, 136);
+    if (jit_regset_tstbit(&_jitc->function->regset, _FS5))
+       ldxi_d(21, _SP_REGNO, 144);
+    if (jit_regset_tstbit(&_jitc->function->regset, _FS6))
+       ldxi_d(22, _SP_REGNO, 152);
+    if (jit_regset_tstbit(&_jitc->function->regset, _FS7))
+       ldxi_d(23, _SP_REGNO, 160);
+    if (jit_regset_tstbit(&_jitc->function->regset, _FS8))
+       ldxi_d(24, _SP_REGNO, 168);
+    if (jit_regset_tstbit(&_jitc->function->regset, _FS9))
+       ldxi_d(25, _SP_REGNO, 176);
+    if (jit_regset_tstbit(&_jitc->function->regset, _FS10))
+       ldxi_d(26, _SP_REGNO, 184);
+    if (jit_regset_tstbit(&_jitc->function->regset, _FS11))
+       ldxi_d(27, _SP_REGNO, 192);
+    addi(_SP_REGNO, _SP_REGNO, stack_framesize);
+    RET();
+}
+
+static void
+_vastart(jit_state_t *_jit, jit_int32_t r0)
+{
+    assert(_jitc->function->self.call & jit_call_varargs);
+    /* Initialize va_list to the first stack argument. */
+    if (jit_arg_reg_p(_jitc->function->vagp))
+       addi(r0, _FP_REGNO, stack_framesize - ((8 - _jitc->function->vagp) * 8));
+    else
+       addi(r0, _FP_REGNO, _jitc->function->self.size);
+}
+
+static void
+_vaarg(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    assert(_jitc->function->self.call & jit_call_varargs);
+    /* Load argument. */
+    ldr(r0, r1);
+    /* Update va_list. */
+    addi(r1, r1, sizeof(jit_word_t));
+}
+
+static void
+_patch_at(jit_state_t *_jit, jit_word_t instr, jit_word_t label)
+{
+    instr_t             i;
+    union {
+       jit_int32_t     *i;
+       jit_word_t       w;
+    } u;
+    u.w = instr;
+    i.w = u.i[0];
+    /* movi_p? */
+    if (i.U.opcode == 55) {                                    /* LUI */
+       jit_int32_t     ww = label << 32 >> 32;
+       jit_int32_t     lo = ww << 20 >> 20;
+       jit_int32_t     hi = ww - lo;
+       i.U.imm12_31 = hi >> 12;
+       u.i[0] = i.w;
+       i.w = u.i[1];
+       if (i.I.opcode == 27 && i.I.funct3 == 0) {              /* ADDIW */
+           i.I.imm11_0 = lo & 0xfff;
+           u.i[1] = i.w;
+           i.w = u.i[2];
+           if (i.U.opcode == 55) {                             /* LUI */
+               ww = label >> 32;
+               lo = ww << 20 >> 20;
+               hi = ww - lo;
+               i.U.imm12_31 = hi >> 12;
+               u.i[2] = i.w;
+               i.w = u.i[3];
+               if (i.I.opcode == 27 && i.I.funct3 == 0) {      /* ADDIW */
+                   i.I.imm11_0 = lo & 0xfff;
+                   u.i[3] = i.w;
+                   i.w = u.i[4];
+                   assert(i.IS.opcode == 19);                  /* SLLI */
+                   assert(i.IS.shamt == 32);
+                   i.w = u.i[5];
+                   assert(i.R.opcode == 51);                   /* ADD */
+               }
+               else
+                   abort();
+           }
+           else
+               abort();
+       }
+       else
+           abort();
+    }
+    /* b{lt,le,eq,ge,gt,ne}{,_u}? */
+    else if (i.B.opcode == 99) {               /* B{EQ,NE,LT,GE,LTU,GEU} */
+       jit_word_t jmp = label - instr;
+       assert(simm12_p(jmp));
+       i.B.imm11       = (jmp >> 11) & 0x1;
+       i.B.imm4_1      = (jmp >> 1) & 0xf;
+       i.B.imm10_5     = (jmp >> 5) & 0x3f;
+       i.B.imm12       = (jmp >> 12) & 0x1;
+       u.i[0] = i.w;
+    }
+    else if (i.J.opcode == 111) {              /* JAL */
+       jit_word_t jmp = label - instr;
+       i.J.imm19_12    = (jmp >> 12) &  0xff;
+       i.J.imm11       = (jmp >> 11) &   0x1;
+       i.J.imm10_1     = (jmp >>  1) & 0x3ff;
+       i.J.imm20       = (jmp >> 20) &   0x1;
+       u.i[0] = i.w;
+    }
+    else
+       abort();
+}
+#endif         /* CODE */
diff --git a/deps/lightning/lib/jit_riscv-fpu.c b/deps/lightning/lib/jit_riscv-fpu.c
new file mode 100644 (file)
index 0000000..367975e
--- /dev/null
@@ -0,0 +1,1271 @@
+/*
+ * Copyright (C) 2019  Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+#if PROTO
+/*
+ * RV32F Standard Extension
+ */
+#  define FLW(rd, rs1, im)             Itype(7, rd, 2, rs1, im)
+#  define FSW(rs1, rs2, imm)           Stype(39, 2, rs1, rs2, imm)
+#  define FMADD_S(rd, rs1, rs2, rs3)   R4type(67, rd, 0, rs1, rs2, 0, rs3)
+#  define FMSUB_S(rd, rs1, rs2, rs3)   R4type(71, rd, 0, rs1, rs2, 0, rs3)
+#  define FNMSUB_S(rd, rs1, rs2, rs3)  R4type(75, rd, 0, rs1, rs2, 0, rs3)
+#  define FNMADD_S(rd, rs1, rs2, rs3)  R4type(79, rd, 0, rs1, rs2, 0, rs3)
+#  define FADD_S(rd, rs1, rs2)         Rtype(83, rd, 0, rs1, rs2, 0)
+#  define FSUB_S(rd, rs1, rs2)         Rtype(83, rd, 0, rs1, rs2, 4)
+#  define FMUL_S(rd, rs1, rs2)         Rtype(83, rd, 0, rs1, rs2, 8)
+#  define FDIV_S(rd, rs1, rs2)         Rtype(83, rd, 0, rs1, rs2, 12)
+#  define FSQRT_S(rd, rs1)             Rtype(83, rd, 0, rs1, 0, 44)
+#  define FSGNJ_S(rd, rs1, rs2)                Rtype(83, rd, 0, rs1, rs2, 16)
+#  define FSGNJN_S(rd, rs1, rs2)       Rtype(83, rd, 1, rs1, rs2, 16)
+#  define FSGNJX_S(rd, rs1, rs2)       Rtype(83, rd, 2, rs1, rs2, 16)
+#  define FMIN_S(rd, rs1, rs2)         Rtype(83, rd, 0, rs1, rs2, 20)
+#  define FMAX_S(rd, rs1, rs2)         Rtype(83, rd, 1, rs1, rs2, 20)
+#  define FCVT_W_S(rd, rs1)            Rtype(83, rd, 0, rs1, 0, 96)
+#  define FCVT_WU_S(rd, rs1)           Rtype(83, rd, 1, rs1, 1, 96)
+#  define FMV_X_W(rd, rs1)             Rtype(83, rd, 0, rs1, 0, 112)
+#  define FEQ_S(rd, rs1, rs2)          Rtype(83, rd, 2, rs1, rs2, 80)
+#  define FLT_S(rd, rs1, rs2)          Rtype(83, rd, 1, rs1, rs2, 80)
+#  define FLE_S(rd, rs1, rs2)          Rtype(83, rd, 0, rs1, rs2, 80)
+#  define FCLASS_S(rd, rs1)            Rtype(83, rd, 1, rs1, 0, 112)
+#  define FCVT_S_W(rd, rs1)            Rtype(83, rd, 0, rs1, 0, 104)
+#  define FCVT_S_WU(rd, rs1)           Rtype(83, rd, 0, rs1, 1, 104)
+#  define FMV_W_X(rd, rs1)             Rtype(83, rd, 0, rs1, 0, 120)
+/*
+ * RV64F Standard Extension (in addition to RV32F)
+ */
+#  define FCVT_L_S(rd, rs1)            Rtype(83, rd, 0, rs1, 2, 96)
+#  define FCVT_LU_S(rd, rs1)           Rtype(83, rd, 0, rs1, 3, 96)
+#  define FCVT_S_L(rd, rs1)            Rtype(83, rd, 0, rs1, 2, 104)
+#  define FCVT_S_LU(rd, rs1)           Rtype(83, rd, 0, rs1, 3, 104)
+/*
+ * RV32D Standard Extension
+ */
+#  define FLD(rd, rs1, im)             Itype(7, rd, 3, rs1, im)
+#  define FSD(rs1, rs2, imm)           Stype(39, 3, rs1, rs2, imm)
+#  define FMADD_D(rd, rs1, rs2, rs3)   R4type(67, rd, 0, rs1, rs2, 1, rs3)
+#  define FMSUB_D(rd, rs1, rs2, rs3)   R4type(71, rd, 0, rs1, rs2, 1, rs3)
+#  define FNMSUB_D(rd, rs1, rs2, rs3)  R4type(75, rd, 0, rs1, rs2, 1, rs3)
+#  define FNMADD_D(rd, rs1, rs2, rs3)  R4type(79, rd, 0, rs1, rs2, 1, rs3)
+#  define FADD_D(rd, rs1, rs2)         Rtype(83, rd, 0, rs1, rs2, 1)
+#  define FSUB_D(rd, rs1, rs2)         Rtype(83, rd, 0, rs1, rs2, 5)
+#  define FMUL_D(rd, rs1, rs2)         Rtype(83, rd, 0, rs1, rs2, 9)
+#  define FDIV_D(rd, rs1, rs2)         Rtype(83, rd, 0, rs1, rs2, 13)
+#  define FSQRT_D(rd, rs1)             Rtype(83, rd, 0, rs1, 0, 45)
+#  define FSGNJ_D(rd, rs1, rs2)                Rtype(83, rd, 0, rs1, rs2, 17)
+#  define FSGNJN_D(rd, rs1, rs2)       Rtype(83, rd, 1, rs1, rs2, 17)
+#  define FSGNJX_D(rd, rs1, rs2)       Rtype(83, rd, 2, rs1, rs2, 17)
+#  define FMIN_D(rd, rs1, rs2)         Rtype(83, rd, 0, rs1, rs2, 21)
+#  define FMAX_D(rd, rs1, rs2)         Rtype(83, rd, 1, rs1, rs2, 21)
+#  define FCVT_S_D(rd, rs1)            Rtype(83, rd, 0, rs1, 1, 32)
+#  define FCVT_D_S(rd, rs1)            Rtype(83, rd, 0, rs1, 0, 33)
+#  define FEQ_D(rd, rs1, rs2)          Rtype(83, rd, 2, rs1, rs2, 81)
+#  define FLT_D(rd, rs1, rs2)          Rtype(83, rd, 1, rs1, rs2, 81)
+#  define FLE_D(rd, rs1, rs2)          Rtype(83, rd, 0, rs1, rs2, 81)
+#  define FCLASS_D(rd, rs1)            Rtype(83, rd, 1, rs1, 0, 113)
+#  define FCVT_W_D(rd, rs1)            Rtype(83, rd, 0, rs1, 0, 97)
+#  define FCVT_WU_D(rd, rs1)           Rtype(83, rd, 0, rs1, 1, 97)
+#  define FCVT_D_W(rd, rs1)            Rtype(83, rd, 0, rs1, 0, 105)
+#  define FCVT_D_WU(rd, rs1)           Rtype(83, rd, 0, rs1, 1, 105)
+/*
+ * RV64D Standard Extension (in addition to RV32D)
+ */
+#  define FCVT_L_D(rd, rs1)            Rtype(83, rd, 0, rs1, 2, 97)
+#  define FCVT_LU_D(rd, rs1)           Rtype(83, rd, 0, rs1, 3, 97)
+#  define FMV_X_D(rd, rs1)             Rtype(83, rd, 0, rs1, 0, 113)
+#  define FCVT_D_L(rd, rs1)            Rtype(83, rd, 0, rs1, 2, 105)
+#  define FCVT_D_LU(rd, rs1)           Rtype(83, rd, 0, rs1, 3, 105)
+#  define FMV_D_X(rd, rs1)             Rtype(83, rd, 0, rs1, 0, 121)
+/*
+ * Pseudo instructions
+ */
+#  define FMV_S(r0, r1)                        FSGNJ_S(r0, r1, r1)
+#  define FABS_S(r0, r1)               FSGNJX_S(r0, r1, r1)
+#  define FNEG_S(r0, r1)               FSGNJN_S(r0, r1, r1)
+#  define FMV_D(r0, r1)                        FSGNJ_D(r0, r1, r1)
+#  define FABS_D(r0, r1)               FSGNJX_D(r0, r1, r1)
+#  define FNEG_D(r0, r1)               FSGNJN_D(r0, r1, r1)
+
+/*
+ * Lightning instructions
+ */
+#  define truncr_f_i(r0, r1)           FCVT_W_S(r0, r1)
+#  define truncr_d_i(r0, r1)           FCVT_W_D(r0, r1)
+#  define truncr_f_l(r0, r1)           FCVT_L_S(r0, r1)
+#  define truncr_d_l(r0, r1)           FCVT_L_D(r0, r1)
+#  define addr_f(r0, r1, r2)           FADD_S(r0, r1, r2)
+#  define addi_f(r0, r1, im)           _addi_f(_jit, r0, r1, im)
+static void _addi_f(jit_state_t *_jit,jit_int32_t,jit_int32_t,jit_float32_t);
+#  define subr_f(r0, r1, r2)           FSUB_S(r0, r1, r2)
+#  define subi_f(r0, r1, im)           _subi_f(_jit, r0, r1, im)
+static void _subi_f(jit_state_t *_jit,jit_int32_t,jit_int32_t,jit_float32_t);
+#  define rsbr_f(r0, r1, r2)           FSUB_S(r0, r2, r1)
+#  define rsbi_f(r0, r1, im)           _rsbi_f(_jit, r0, r1, im)
+static void _rsbi_f(jit_state_t *_jit,jit_int32_t,jit_int32_t,jit_float32_t);
+#  define mulr_f(r0, r1, r2)           FMUL_S(r0, r1, r2)
+#  define muli_f(r0, r1, im)           _muli_f(_jit, r0, r1, im)
+static void _muli_f(jit_state_t *_jit,jit_int32_t,jit_int32_t,jit_float32_t);
+#  define divr_f(r0, r1, r2)           FDIV_S(r0, r1, r2)
+#  define divi_f(r0, r1, im)           _divi_f(_jit, r0, r1, im)
+static void _divi_f(jit_state_t *_jit,jit_int32_t,jit_int32_t,jit_float32_t);
+#  define absr_f(r0, r1)               FABS_S(r0, r1)
+#  define negr_f(r0, r1)               FNEG_S(r0, r1)
+#  define sqrtr_f(r0, r1)              FSQRT_S(r0, r1)
+#  define extr_f(r0, r1)               FCVT_S_L(r0, r1)
+#  define ldr_f(r0, r1)                        FLW(r0, r1, 0)
+#  define ldi_f(r0, im)                        _ldi_f(_jit, r0, im)
+static void _ldi_f(jit_state_t*, jit_int32_t, jit_word_t);
+#  define ldxr_f(r0, r1, r2)           _ldxr_f(_jit, r0, r1, r2)
+static void _ldxr_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define ldxi_f(r0, r1, i0)           _ldxi_f(_jit, r0, r1, i0)
+static void _ldxi_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#  define str_f(r0, r1)                        FSW(r0, r1, 0)
+#  define sti_f(im, r0)                        _sti_f(_jit, im, r0)
+static void _sti_f(jit_state_t*, jit_word_t, jit_int32_t);
+#  define stxr_f(r0, r1, r2)           _stxr_f(_jit, r0, r1, r2)
+static void _stxr_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define stxi_f(im, r0, r1)           _stxi_f(_jit, im, r0, r1)
+static void _stxi_f(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
+#  define movr_f(r0, r1)               FMV_S(r0, r1)
+#  define movi_f(r0, im)               _movi_f(_jit, r0, im)
+static void _movi_f(jit_state_t*, jit_int32_t, jit_float32_t);
+#  define movr_f_w(r0, r1)             FMV_X_W(r0, r1)
+#  define movi_f_w(r0, im)             _movi_f_w(_jit, r0, im)
+static void _movi_f_w(jit_state_t*, jit_int32_t, jit_float32_t);
+#  define movr_w_f(r0, r1)             FMV_W_X(r0, r1)
+#  define extr_d_f(r0, r1)             FCVT_S_D(r0, r1)
+#  define ltr_f(r0, r1, r2)            FLT_S(r0, r1, r2)
+#  define lti_f(r0, r1, im)            _lti_f(_jit, r0, r1, im)
+static void _lti_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_float32_t);
+#  define ler_f(r0, r1, r2)            FLE_S(r0, r1, r2)
+#  define lei_f(r0, r1, im)            _lei_f(_jit, r0, r1, im)
+static void _lei_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_float32_t);
+#  define eqr_f(r0, r1, r2)            FEQ_S(r0, r1, r2)
+#  define eqi_f(r0, r1, im)            _eqi_f(_jit, r0, r1, im)
+static void _eqi_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_float32_t);
+#  define ger_f(r0, r1, r2)            FLE_S(r0, r2, r1)
+#  define gei_f(r0, r1, im)            _gei_f(_jit, r0, r1, im)
+static void _gei_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_float32_t);
+#  define gtr_f(r0, r1, r2)            FLT_S(r0, r2, r1)
+#  define gti_f(r0, r1, im)            _gti_f(_jit, r0, r1, im)
+static void _gti_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_float32_t);
+#  define ner_f(r0, r1, r2)            _ner_f(_jit, r0, r1, r2)
+static void _ner_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define nei_f(r0, r1, im)            _nei_f(_jit, r0, r1, im)
+static void _nei_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_float32_t);
+#  define unltr_f(r0, r1, r2)          _unltr_f(_jit, r0, r1, r2)
+static void _unltr_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define unlti_f(r0, r1, im)          _unlti_f(_jit, r0, r1, im)
+static void _unlti_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_float32_t);
+#  define unler_f(r0, r1, r2)          _unler_f(_jit, r0, r1, r2)
+static void _unler_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define unlei_f(r0, r1, im)          _unlei_f(_jit, r0, r1, im)
+static void _unlei_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_float32_t);
+#  define uneqr_f(r0, r1, r2)          _uneqr_f(_jit, r0, r1, r2)
+static void _uneqr_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define uneqi_f(r0, r1, im)          _uneqi_f(_jit, r0, r1, im)
+static void _uneqi_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_float32_t);
+#  define unger_f(r0, r1, r2)          _unger_f(_jit, r0, r1, r2)
+static void _unger_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define ungei_f(r0, r1, im)          _ungei_f(_jit, r0, r1, im)
+static void _ungei_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_float32_t);
+#  define ungtr_f(r0, r1, r2)          _ungtr_f(_jit, r0, r1, r2)
+static void _ungtr_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define ungti_f(r0, r1, im)          _ungti_f(_jit, r0, r1, im)
+static void _ungti_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_float32_t);
+#  define ltgtr_f(r0, r1, r2)          _ltgtr_f(_jit, r0, r1, r2)
+static void _ltgtr_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define ltgti_f(r0, r1, im)          _ltgti_f(_jit, r0, r1, im)
+static void _ltgti_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_float32_t);
+#  define ordr_f(r0, r1, r2)           _ordr_f(_jit, r0, r1, r2)
+static void _ordr_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define ordi_f(r0, r1, im)           _ordi_f(_jit, r0, r1, im)
+static void _ordi_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_float32_t);
+#  define unordr_f(r0, r1, r2)         _unordr_f(_jit, r0, r1, r2)
+static void _unordr_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define unordi_f(r0, r1, im)         _unordi_f(_jit, r0, r1, im)
+static void _unordi_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_float32_t);
+#  define bltr_f(br, r0, r1)           _bltr_f(_jit,br,r0,r1)
+static jit_word_t _bltr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define blti_f(br, r0, im)           _blti_f(_jit,br,r0,im)
+static jit_word_t _blti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
+#  define bler_f(br, r0, r1)           _bler_f(_jit,br,r0,r1)
+static jit_word_t _bler_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define blei_f(br, r0, im)           _blei_f(_jit,br,r0,im)
+static jit_word_t _blei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
+#  define beqr_f(br, r0, r1)           _beqr_f(_jit,br,r0,r1)
+static jit_word_t _beqr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define beqi_f(br, r0, im)           _beqi_f(_jit,br,r0,im)
+static jit_word_t _beqi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
+#  define bger_f(br, r0, r1)           _bger_f(_jit,br,r0,r1)
+static jit_word_t _bger_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bgei_f(br, r0, im)           _bgei_f(_jit,br,r0,im)
+static jit_word_t _bgei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
+#  define bgtr_f(br, r0, r1)           _bgtr_f(_jit,br,r0,r1)
+static jit_word_t _bgtr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bgti_f(br, r0, im)           _bgti_f(_jit,br,r0,im)
+static jit_word_t _bgti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
+#  define bner_f(br, r0, r1)           _bner_f(_jit,br,r0,r1)
+static jit_word_t _bner_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bnei_f(br, r0, im)           _bnei_f(_jit,br,r0,im)
+static jit_word_t _bnei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
+#  define bunltr_f(br, r0, r1)         _bunltr_f(_jit,br,r0,r1)
+static jit_word_t _bunltr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bunlti_f(br, r0, im)         _bunlti_f(_jit,br,r0,im)
+static jit_word_t _bunlti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
+#  define bunler_f(br, r0, r1)         _bunler_f(_jit,br,r0,r1)
+static jit_word_t _bunler_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bunlei_f(br, r0, im)         _bunlei_f(_jit,br,r0,im)
+static jit_word_t _bunlei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
+#  define buneqr_f(br, r0, r1)         _buneqr_f(_jit,br,r0,r1)
+static jit_word_t _buneqr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define buneqi_f(br, r0, im)         _buneqi_f(_jit,br,r0,im)
+static jit_word_t _buneqi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
+#  define bunger_f(br, r0, r1)         _bunger_f(_jit,br,r0,r1)
+static jit_word_t _bunger_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bungei_f(br, r0, im)         _bungei_f(_jit,br,r0,im)
+static jit_word_t _bungei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
+#  define bungtr_f(br, r0, r1)         _bungtr_f(_jit,br,r0,r1)
+static jit_word_t _bungtr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bungti_f(br, r0, im)         _bungti_f(_jit,br,r0,im)
+static jit_word_t _bungti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
+#  define bltgtr_f(br, r0, r1)         _bltgtr_f(_jit,br,r0,r1)
+static jit_word_t _bltgtr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bltgti_f(br, r0, im)         _bltgti_f(_jit,br,r0,im)
+static jit_word_t _bltgti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
+#  define bordr_f(br, r0, r1)          _bordr_f(_jit,br,r0,r1)
+static jit_word_t _bordr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bordi_f(br, r0, im)          _bordi_f(_jit,br,r0,im)
+static jit_word_t _bordi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
+#  define bunordr_f(br, r0, r1)                _bunordr_f(_jit,br,r0,r1)
+static jit_word_t _bunordr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bunordi_f(br, r0, im)                _bunordi_f(_jit,br,r0,im)
+static jit_word_t _bunordi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
+#  define addr_d(r0, r1, r2)           FADD_D(r0, r1, r2)
+#  define addi_d(r0, r1, im)           _addi_d(_jit, r0, r1, im)
+static void _addi_d(jit_state_t *_jit,jit_int32_t,jit_int32_t,jit_float64_t);
+#  define subr_d(r0, r1, r2)           FSUB_D(r0, r1, r2)
+#  define subi_d(r0, r1, im)           _subi_d(_jit, r0, r1, im)
+static void _subi_d(jit_state_t *_jit,jit_int32_t,jit_int32_t,jit_float64_t);
+#  define rsbr_d(r0, r1, r2)           FSUB_D(r0, r2, r1)
+#  define rsbi_d(r0, r1, im)           _rsbi_d(_jit, r0, r1, im)
+static void _rsbi_d(jit_state_t *_jit,jit_int32_t,jit_int32_t,jit_float64_t);
+#  define mulr_d(r0, r1, r2)           FMUL_D(r0, r1, r2)
+#  define muli_d(r0, r1, im)           _muli_d(_jit, r0, r1, im)
+static void _muli_d(jit_state_t *_jit,jit_int32_t,jit_int32_t,jit_float64_t);
+#  define divr_d(r0, r1, r2)           FDIV_D(r0, r1, r2)
+#  define divi_d(r0, r1, im)           _divi_d(_jit, r0, r1, im)
+static void _divi_d(jit_state_t *_jit,jit_int32_t,jit_int32_t,jit_float64_t);
+#  define absr_d(r0, r1)               FABS_D(r0, r1)
+#  define negr_d(r0, r1)               FNEG_D(r0, r1)
+#  define sqrtr_d(r0, r1)              FSQRT_D(r0, r1)
+#  define extr_d(r0, r1)               FCVT_D_L(r0, r1)
+#  define ldr_d(r0, r1)                        FLD(r0, r1, 0)
+#  define ldi_d(r0, im)                        _ldi_d(_jit, r0, im)
+static void _ldi_d(jit_state_t*, jit_int32_t, jit_word_t);
+#  define ldxr_d(r0, r1, r2)           _ldxr_d(_jit, r0, r1, r2)
+static void _ldxr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define ldxi_d(r0, r1, i0)           _ldxi_d(_jit, r0, r1, i0)
+static void _ldxi_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#  define str_d(r0, r1)                        FSD(r0, r1, 0)
+#  define sti_d(im, r0)                        _sti_d(_jit, im, r0)
+static void _sti_d(jit_state_t*, jit_word_t, jit_int32_t);
+#  define stxr_d(r0, r1, r2)           _stxr_d(_jit, r0, r1, r2)
+static void _stxr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define stxi_d(im, r0, r1)           _stxi_d(_jit, im, r0, r1)
+static void _stxi_d(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
+#  define movr_d(r0, r1)               FMV_D(r0, r1)
+#  define movi_d(r0, im)               _movi_d(_jit, r0, im)
+static void _movi_d(jit_state_t*, jit_int32_t, jit_float64_t);
+#  define movr_d_w(r0, r1)             FMV_X_D(r0, r1)
+#  define movi_d_w(r0, im)             _movi_d_w(_jit, r0, im)
+static void _movi_d_w(jit_state_t*, jit_int32_t, jit_float64_t);
+#  define movr_w_d(r0, r1)             FMV_D_X(r0, r1)
+#  define extr_f_d(r0, r1)             FCVT_D_S(r0, r1)
+#  define ltr_d(r0, r1, r2)            FLT_D(r0, r1, r2)
+#  define lti_d(r0, r1, r2)            _lti_d(_jit, r0, r1, r2)
+static void _lti_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_float64_t);
+#  define ler_d(r0, r1, r2)            FLE_D(r0, r1, r2)
+#  define lei_d(r0, r1, r2)            _lei_d(_jit, r0, r1, r2)
+static void _lei_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_float64_t);
+#  define eqr_d(r0, r1, r2)            FEQ_D(r0, r1, r2)
+#  define eqi_d(r0, r1, r2)            _eqi_d(_jit, r0, r1, r2)
+static void _eqi_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_float64_t);
+#  define ger_d(r0, r1, r2)            FLE_D(r0, r2, r1)
+#  define gei_d(r0, r1, r2)            _gei_d(_jit, r0, r1, r2)
+static void _gei_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_float64_t);
+#  define gtr_d(r0, r1, r2)            FLT_D(r0, r2, r1)
+#  define gti_d(r0, r1, r2)            _gti_d(_jit, r0, r1, r2)
+static void _gti_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_float64_t);
+#  define ner_d(r0, r1, r2)            _ner_d(_jit, r0, r1, r2)
+static void _ner_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define nei_d(r0, r1, r2)            _nei_d(_jit, r0, r1, r2)
+static void _nei_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_float64_t);
+#  define unltr_d(r0, r1, r2)          _unltr_d(_jit, r0, r1, r2)
+static void _unltr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define unlti_d(r0, r1, im)          _unlti_d(_jit, r0, r1, im)
+static void _unlti_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_float64_t);
+#  define unler_d(r0, r1, r2)          _unler_d(_jit, r0, r1, r2)
+static void _unler_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define unlei_d(r0, r1, im)          _unlei_d(_jit, r0, r1, im)
+static void _unlei_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_float64_t);
+#  define uneqr_d(r0, r1, r2)          _uneqr_d(_jit, r0, r1, r2)
+static void _uneqr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define uneqi_d(r0, r1, im)          _uneqi_d(_jit, r0, r1, im)
+static void _uneqi_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_float64_t);
+#  define unger_d(r0, r1, r2)          _unger_d(_jit, r0, r1, r2)
+static void _unger_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define ungei_d(r0, r1, im)          _ungei_d(_jit, r0, r1, im)
+static void _ungei_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_float64_t);
+#  define ungtr_d(r0, r1, r2)          _ungtr_d(_jit, r0, r1, r2)
+static void _ungtr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define ungti_d(r0, r1, im)          _ungti_d(_jit, r0, r1, im)
+static void _ungti_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_float64_t);
+#  define ltgtr_d(r0, r1, r2)          _ltgtr_d(_jit, r0, r1, r2)
+static void _ltgtr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define ltgti_d(r0, r1, im)          _ltgti_d(_jit, r0, r1, im)
+static void _ltgti_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_float64_t);
+#  define ordr_d(r0, r1, r2)           _ordr_d(_jit, r0, r1, r2)
+static void _ordr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define ordi_d(r0, r1, im)           _ordi_d(_jit, r0, r1, im)
+static void _ordi_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_float64_t);
+#  define unordr_d(r0, r1, r2)         _unordr_d(_jit, r0, r1, r2)
+static void _unordr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define unordi_d(r0, r1, im)         _unordi_d(_jit, r0, r1, im)
+static void _unordi_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_float64_t);
+#  define bltr_d(br, r0, r1)           _bltr_d(_jit,br,r0,r1)
+static jit_word_t _bltr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define blti_d(br, r0, im)           _blti_d(_jit,br,r0,im)
+static jit_word_t _blti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
+#  define bler_d(br, r0, r1)           _bler_d(_jit,br,r0,r1)
+static jit_word_t _bler_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define blei_d(br, r0, im)           _blei_d(_jit,br,r0,im)
+static jit_word_t _blei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
+#  define beqr_d(br, r0, r1)           _beqr_d(_jit,br,r0,r1)
+static jit_word_t _beqr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define beqi_d(br, r0, im)           _beqi_d(_jit,br,r0,im)
+static jit_word_t _beqi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
+#  define bger_d(br, r0, r1)           _bger_d(_jit,br,r0,r1)
+static jit_word_t _bger_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bgei_d(br, r0, im)           _bgei_d(_jit,br,r0,im)
+static jit_word_t _bgei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
+#  define bgtr_d(br, r0, r1)           _bgtr_d(_jit,br,r0,r1)
+static jit_word_t _bgtr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bgti_d(br, r0, im)           _bgti_d(_jit,br,r0,im)
+static jit_word_t _bgti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
+#  define bner_d(br, r0, r1)           _bner_d(_jit,br,r0,r1)
+static jit_word_t _bner_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bnei_d(br, r0, im)           _bnei_d(_jit,br,r0,im)
+static jit_word_t _bnei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
+#  define bunltr_d(br, r0, r1)         _bunltr_d(_jit,br,r0,r1)
+static jit_word_t _bunltr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bunlti_d(br, r0, im)         _bunlti_d(_jit,br,r0,im)
+static jit_word_t _bunlti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
+#  define bunler_d(br, r0, r1)         _bunler_d(_jit,br,r0,r1)
+static jit_word_t _bunler_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bunlei_d(br, r0, im)         _bunlei_d(_jit,br,r0,im)
+static jit_word_t _bunlei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
+#  define buneqr_d(br, r0, r1)         _buneqr_d(_jit,br,r0,r1)
+static jit_word_t _buneqr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define buneqi_d(br, r0, im)         _buneqi_d(_jit,br,r0,im)
+static jit_word_t _buneqi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
+#  define bunger_d(br, r0, r1)         _bunger_d(_jit,br,r0,r1)
+static jit_word_t _bunger_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bungei_d(br, r0, im)         _bungei_d(_jit,br,r0,im)
+static jit_word_t _bungei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
+#  define bungtr_d(br, r0, r1)         _bungtr_d(_jit,br,r0,r1)
+static jit_word_t _bungtr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bungti_d(br, r0, im)         _bungti_d(_jit,br,r0,im)
+static jit_word_t _bungti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
+#  define bltgtr_d(br, r0, r1)         _bltgtr_d(_jit,br,r0,r1)
+static jit_word_t _bltgtr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bltgti_d(br, r0, im)         _bltgti_d(_jit,br,r0,im)
+static jit_word_t _bltgti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
+#  define bordr_d(br, r0, r1)          _bordr_d(_jit,br,r0,r1)
+static jit_word_t _bordr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bordi_d(br, r0, im)          _bordi_d(_jit,br,r0,im)
+static jit_word_t _bordi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
+#  define bunordr_d(br, r0, r1)                _bunordr_d(_jit,br,r0,r1)
+static jit_word_t _bunordr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bunordi_d(br, r0, im)                _bunordi_d(_jit,br,r0,im)
+static jit_word_t _bunordi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
+#  define vaarg_d(r0, r1)              _vaarg_d(_jit, r0, r1)
+static void _vaarg_d(jit_state_t*, jit_int32_t, jit_int32_t);
+#endif /* PROTO */
+
+#if CODE
+#  define fpr_opi(name, type, size)                                    \
+static void                                                            \
+_##name##i_##type(jit_state_t *_jit,                                   \
+                 jit_int32_t r0, jit_int32_t r1,                       \
+                 jit_float##size##_t i0)                               \
+{                                                                      \
+    jit_int32_t                reg = jit_get_reg(jit_class_fpr);               \
+    movi_##type(rn(reg), i0);                                          \
+    name##r_##type(r0, r1, rn(reg));                                   \
+    jit_unget_reg(reg);                                                        \
+}
+#  define fopi(name)                   fpr_opi(name, f, 32)
+#  define dopi(name)                   fpr_opi(name, d, 64)
+
+fopi(add)
+fopi(sub)
+fopi(rsb)
+fopi(mul)
+fopi(div)
+
+static void
+_ldi_f(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                t0;
+    if (simm12_p(i0))
+       FLW(r0, _ZERO_REGNO, i0);
+    else {
+       t0 = jit_get_reg(jit_class_gpr);
+       movi(rn(t0), i0);
+       ldr_f(r0, rn(t0));
+       jit_unget_reg(t0);
+    }
+}
+
+static void
+_ldxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_gpr);
+    addr(rn(t0), r1, r2);
+    ldr_f(r0, rn(t0));
+    jit_unget_reg(t0);
+}
+
+static void
+_ldxi_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                t0;
+    if (simm12_p(i0))
+       FLW(r0, r1, i0);
+    else {
+       t0 = jit_get_reg(jit_class_gpr);
+       addi(rn(t0), r1, i0);
+       ldr_f(r0, rn(t0));
+       jit_unget_reg(t0);
+    }
+}
+
+static void
+_sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                t0;
+    if (simm12_p(i0))
+       FSW(r0, _ZERO_REGNO, i0);
+    else {
+       t0 = jit_get_reg(jit_class_gpr);
+       movi(rn(t0), i0);
+       str_f(rn(t0), r0);
+       jit_unget_reg(t0);
+    }
+}
+
+static void
+_stxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_gpr);
+    addr(rn(t0), r0, r1);
+    str_f(rn(t0), r2);
+    jit_unget_reg(t0);
+}
+
+static void
+_stxi_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                t0;
+    if (simm12_p(i0))
+       FSW(r0, r1, i0);
+    else {
+       t0 = jit_get_reg(jit_class_gpr);
+       addi(rn(t0), r0, i0);
+       str_f(rn(t0), r1);
+       jit_unget_reg(t0);
+    }
+}
+
+static void
+_movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t i0)
+{
+    union {
+       jit_int32_t      i;
+       jit_float32_t    f;
+    } data;
+    jit_int32_t                 reg;
+    data.f = i0;
+    if (data.i == 0)
+       movr_w_f(r0, _ZERO_REGNO);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), data.i);
+       movr_w_f(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_movi_f_w(jit_state_t *_jit, jit_int32_t r0, jit_float32_t i0)
+{
+    union {
+       jit_int32_t     i;
+       jit_float32_t   f;
+    } data;
+    data.f = i0;
+    movi(r0, data.i);
+}
+
+fopi(lt)
+fopi(le)
+fopi(eq)
+fopi(ge)
+fopi(gt)
+
+static void
+_ner_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    eqr_f(r0, r1, r2);
+    xori(r0, r0, 1);
+}
+fopi(ne)
+
+#  define fpr_bopi(name, type, size)                                   \
+static jit_word_t                                                      \
+_b##name##i_##type(jit_state_t *_jit,                                  \
+                 jit_word_t i0, jit_int32_t r0,                        \
+                 jit_float##size##_t i1)                               \
+{                                                                      \
+    jit_word_t         word;                                           \
+    jit_int32_t                reg = jit_get_reg(jit_class_fpr|                \
+                                         jit_class_nospill);           \
+    movi_##type(rn(reg), i1);                                          \
+    word = b##name##r_##type(i0, r0, rn(reg));                         \
+    jit_unget_reg(reg);                                                        \
+    return (word);                                                     \
+}
+#  define fbopi(name)                  fpr_bopi(name, f, 32)
+#  define dbopi(name)                  fpr_bopi(name, d, 64)
+
+#  define unop(CLASS, OP)                                              \
+    jit_word_t         w;                                              \
+    jit_int32_t                t0, t1;                                         \
+    t0 = jit_get_reg(jit_class_gpr);                                   \
+    FCLASS_##CLASS(rn(t0), r1);                                                \
+    t1 = jit_get_reg(jit_class_gpr);                                   \
+    FCLASS_##CLASS(rn(t1), r2);                                                \
+    orr(rn(t0), rn(t0), rn(t1));                                       \
+    jit_unget_reg(t1);                                                 \
+    rshi(rn(t0), rn(t0), 8);                                           \
+    ltr(r0, _ZERO_REGNO, rn(t0));                                      \
+    jit_unget_reg(t0);                                                 \
+    w = _jit->pc.w;                                                    \
+    BLT(_ZERO_REGNO, r0, 0);                                           \
+    OP(r0, r1, r2);                                                    \
+    patch_at(w, _jit->pc.w)
+
+static void
+_unltr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    unop(S, ltr_f);
+}
+fopi(unlt)
+
+static void
+_unler_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    unop(S, ler_f);
+}
+fopi(unle)
+
+static void
+_uneqr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    unop(S, eqr_f);
+}
+fopi(uneq)
+
+static void
+_unger_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    unop(S, ger_f);
+}
+fopi(unge)
+
+static void
+_ungtr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    unop(S, gtr_f);
+}
+fopi(ungt)
+
+static void
+_ltgtr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w0, w1;
+    jit_int32_t                t0, t1;
+    t0 = jit_get_reg(jit_class_gpr);
+    FCLASS_S(rn(t0), r1);
+    t1 = jit_get_reg(jit_class_gpr);
+    FCLASS_S(rn(t1), r2);
+    orr(rn(t0), rn(t0), rn(t1));
+    jit_unget_reg(t1);
+    rshi(rn(t0), rn(t0), 8);
+    ltr(r0, _ZERO_REGNO, rn(t0));
+    jit_unget_reg(t0);
+    w0 = _jit->pc.w;
+    BEQ(_ZERO_REGNO, r0, 0);
+    movr(r0, _ZERO_REGNO);
+    w1 = _jit->pc.w;
+    JAL(_ZERO_REGNO, 0);
+    patch_at(w0, _jit->pc.w);
+    ner_f(r0, r1, r2);
+    patch_at(w1, _jit->pc.w);
+}
+fopi(ltgt)
+
+static void
+_ordr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                t0, t1; 
+    t0 = jit_get_reg(jit_class_gpr);
+    FCLASS_S(rn(t0), r1);
+    t1 = jit_get_reg(jit_class_gpr);
+    FCLASS_S(rn(t1), r2);
+    orr(rn(t0), rn(t0), rn(t1));
+    jit_unget_reg(t1);
+    rshi(rn(t0), rn(t0), 8);
+    eqr(r0, _ZERO_REGNO, rn(t0));
+    jit_unget_reg(t0);
+}
+fopi(ord)
+
+static void
+_unordr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                t0, t1; 
+    t0 = jit_get_reg(jit_class_gpr);
+    FCLASS_S(rn(t0), r1);
+    t1 = jit_get_reg(jit_class_gpr);
+    FCLASS_S(rn(t1), r2);
+    orr(rn(t0), rn(t0), rn(t1));
+    jit_unget_reg(t1);
+    rshi(rn(t0), rn(t0), 8);
+    ltr(r0, _ZERO_REGNO, rn(t0));
+    jit_unget_reg(t0);
+}
+fopi(unord)
+
+static jit_word_t
+_bltr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    ltr_f(rn(t0), r1, r2);
+    w = bner(i0, _ZERO_REGNO, rn(t0));
+    jit_unget_reg(t0);
+    return (w);
+}
+fbopi(lt)
+
+static jit_word_t
+_bler_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    ler_f(rn(t0), r1, r2);
+    w = bner(i0, _ZERO_REGNO, rn(t0));
+    jit_unget_reg(t0);
+    return (w);
+}
+fbopi(le)
+
+static jit_word_t
+_beqr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    eqr_f(rn(t0), r1, r2);
+    w = bner(i0, _ZERO_REGNO, rn(t0));
+    jit_unget_reg(t0);
+    return (w);
+}
+fbopi(eq)
+
+static jit_word_t
+_bger_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    ger_f(rn(t0), r1, r2);
+    w = bner(i0, _ZERO_REGNO, rn(t0));
+    jit_unget_reg(t0);
+    return (w);
+}
+fbopi(ge)
+
+static jit_word_t
+_bgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    gtr_f(rn(t0), r1, r2);
+    w = bner(i0, _ZERO_REGNO, rn(t0));
+    jit_unget_reg(t0);
+    return (w);
+}
+fbopi(gt)
+
+static jit_word_t
+_bner_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    eqr_f(rn(t0), r1, r2);
+    w = beqr(i0, _ZERO_REGNO, rn(t0));
+    jit_unget_reg(t0);
+    return (w);
+}
+fbopi(ne)
+
+static jit_word_t
+_bunltr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    unltr_f(rn(t0), r1, r2);
+    w = bner(i0, _ZERO_REGNO, rn(t0));
+    jit_unget_reg(t0);
+    return (w);
+}
+fbopi(unlt)
+
+static jit_word_t
+_bunler_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    unler_f(rn(t0), r1, r2);
+    w = bner(i0, _ZERO_REGNO, rn(t0));
+    jit_unget_reg(t0);
+    return (w);
+}
+fbopi(unle)
+
+static jit_word_t
+_buneqr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    uneqr_f(rn(t0), r1, r2);
+    w = bner(i0, _ZERO_REGNO, rn(t0));
+    jit_unget_reg(t0);
+    return (w);
+}
+fbopi(uneq)
+
+static jit_word_t
+_bunger_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    unger_f(rn(t0), r1, r2);
+    w = bner(i0, _ZERO_REGNO, rn(t0));
+    jit_unget_reg(t0);
+    return (w);
+}
+fbopi(unge)
+
+static jit_word_t
+_bungtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    ungtr_f(rn(t0), r1, r2);
+    w = bner(i0, _ZERO_REGNO, rn(t0));
+    jit_unget_reg(t0);
+    return (w);
+}
+fbopi(ungt)
+
+static jit_word_t
+_bltgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    ltgtr_f(rn(t0), r1, r2);
+    w = bner(i0, _ZERO_REGNO, rn(t0));
+    jit_unget_reg(t0);
+    return (w);
+}
+fbopi(ltgt)
+
+static jit_word_t
+_bordr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    ordr_f(rn(t0), r1, r2);
+    w = bner(i0, _ZERO_REGNO, rn(t0));
+    jit_unget_reg(t0);
+    return (w);
+}
+fbopi(ord)
+
+static jit_word_t
+_bunordr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    unordr_f(rn(t0), r1, r2);
+    w = bner(i0, _ZERO_REGNO, rn(t0));
+    jit_unget_reg(t0);
+    return (w);
+}
+fbopi(unord)
+
+dopi(add)
+dopi(sub)
+dopi(rsb)
+dopi(mul)
+dopi(div)
+
+static void
+_ldi_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                t0;
+    if (simm12_p(i0))
+       FLD(r0, _ZERO_REGNO, i0);
+    else {
+       t0 = jit_get_reg(jit_class_gpr);
+       movi(rn(t0), i0);
+       ldr_d(r0, rn(t0));
+       jit_unget_reg(t0);
+    }
+}
+
+static void
+_ldxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_gpr);
+    addr(rn(t0), r1, r2);
+    ldr_d(r0, rn(t0));
+    jit_unget_reg(t0);
+}
+
+static void
+_ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                t0;
+    if (simm12_p(i0))
+       FLD(r0, r1, i0);
+    else {
+       t0 = jit_get_reg(jit_class_gpr);
+       addi(rn(t0), r1, i0);
+       ldr_d(r0, rn(t0));
+       jit_unget_reg(t0);
+    }
+}
+
+static void
+_sti_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                t0;
+    if (simm12_p(i0))
+       FSD(r0, _ZERO_REGNO, i0);
+    else {
+       t0 = jit_get_reg(jit_class_gpr);
+       movi(rn(t0), i0);
+       str_d(rn(t0), r0);
+       jit_unget_reg(t0);
+    }
+}
+
+static void
+_stxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_gpr);
+    addr(rn(t0), r0, r1);
+    str_d(rn(t0), r2);
+    jit_unget_reg(t0);
+}
+
+static void
+_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                t0;
+    if (simm12_p(i0))
+       FSD(r0, r1, i0);
+    else {
+       t0 = jit_get_reg(jit_class_gpr);
+       addi(rn(t0), r0, i0);
+       str_d(rn(t0), r1);
+       jit_unget_reg(t0);
+    }
+}
+
+static void
+_movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t i0)
+{
+    union {
+       jit_word_t       w;
+       jit_float64_t    d;
+    } data;
+    jit_int32_t                 reg;
+    data.d = i0;
+    if (data.w == 0)
+       movr_w_d(r0, _ZERO_REGNO);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), data.w);
+       movr_w_d(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_movi_d_w(jit_state_t *_jit, jit_int32_t r0, jit_float64_t i0)
+{
+    union {
+       jit_int64_t     l;
+       jit_float64_t   d;
+    } data;
+    data.d = i0;
+    movi(r0, data.l);
+}
+
+dopi(lt)
+dopi(le)
+dopi(eq)
+dopi(ge)
+dopi(gt)
+
+static void
+_ner_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    eqr_d(r0, r1, r2);
+    xori(r0, r0, 1);
+}
+dopi(ne)
+
+static void
+_unltr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    unop(D, ltr_d);
+}
+dopi(unlt)
+
+static void
+_unler_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    unop(D, ler_d);
+}
+dopi(unle)
+
+static void
+_uneqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    unop(D, eqr_d);
+}
+dopi(uneq)
+
+static void
+_unger_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    unop(D, ger_d);
+}
+dopi(unge)
+
+static void
+_ungtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    unop(D, gtr_d);
+}
+dopi(ungt)
+
+static void
+_ltgtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w0, w1;
+    jit_int32_t                t0, t1;
+    t0 = jit_get_reg(jit_class_gpr);
+    FCLASS_D(rn(t0), r1);
+    t1 = jit_get_reg(jit_class_gpr);
+    FCLASS_D(rn(t1), r2);
+    orr(rn(t0), rn(t0), rn(t1));
+    jit_unget_reg(t1);
+    rshi(rn(t0), rn(t0), 8);
+    ltr(r0, _ZERO_REGNO, rn(t0));
+    jit_unget_reg(t0);
+    w0 = _jit->pc.w;
+    BEQ(_ZERO_REGNO, r0, 0);
+    movr(r0, _ZERO_REGNO);
+    w1 = _jit->pc.w;
+    JAL(_ZERO_REGNO, 0);
+    patch_at(w0, _jit->pc.w);
+    ner_d(r0, r1, r2);
+    patch_at(w1, _jit->pc.w);
+}
+dopi(ltgt)
+
+static void
+_ordr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                t0, t1; 
+    t0 = jit_get_reg(jit_class_gpr);
+    FCLASS_D(rn(t0), r1);
+    t1 = jit_get_reg(jit_class_gpr);
+    FCLASS_D(rn(t1), r2);
+    orr(rn(t0), rn(t0), rn(t1));
+    jit_unget_reg(t1);
+    rshi(rn(t0), rn(t0), 8);
+    eqr(r0, _ZERO_REGNO, rn(t0));
+    jit_unget_reg(t0);
+}
+dopi(ord)
+
+static void
+_unordr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                t0, t1; 
+    t0 = jit_get_reg(jit_class_gpr);
+    FCLASS_D(rn(t0), r1);
+    t1 = jit_get_reg(jit_class_gpr);
+    FCLASS_D(rn(t1), r2);
+    orr(rn(t0), rn(t0), rn(t1));
+    jit_unget_reg(t1);
+    rshi(rn(t0), rn(t0), 8);
+    ltr(r0, _ZERO_REGNO, rn(t0));
+    jit_unget_reg(t0);
+}
+dopi(unord)
+
+static jit_word_t
+_bltr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    ltr_d(rn(t0), r1, r2);
+    w = bner(i0, _ZERO_REGNO, rn(t0));
+    jit_unget_reg(t0);
+    return (w);
+}
+dbopi(lt)
+
+static jit_word_t
+_bler_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    ler_d(rn(t0), r1, r2);
+    w = bner(i0, _ZERO_REGNO, rn(t0));
+    jit_unget_reg(t0);
+    return (w);
+}
+dbopi(le)
+
+static jit_word_t
+_beqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    eqr_d(rn(t0), r1, r2);
+    w = bner(i0, _ZERO_REGNO, rn(t0));
+    jit_unget_reg(t0);
+    return (w);
+}
+dbopi(eq)
+
+static jit_word_t
+_bger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    ger_d(rn(t0), r1, r2);
+    w = bner(i0, _ZERO_REGNO, rn(t0));
+    jit_unget_reg(t0);
+    return (w);
+}
+dbopi(ge)
+
+static jit_word_t
+_bgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    gtr_d(rn(t0), r1, r2);
+    w = bner(i0, _ZERO_REGNO, rn(t0));
+    jit_unget_reg(t0);
+    return (w);
+}
+dbopi(gt)
+
+static jit_word_t
+_bner_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    eqr_d(rn(t0), r1, r2);
+    w = beqr(i0, _ZERO_REGNO, rn(t0));
+    jit_unget_reg(t0);
+    return (w);
+}
+dbopi(ne)
+
+static jit_word_t
+_bunltr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    unltr_d(rn(t0), r1, r2);
+    w = bner(i0, _ZERO_REGNO, rn(t0));
+    jit_unget_reg(t0);
+    return (w);
+}
+dbopi(unlt)
+
+static jit_word_t
+_bunler_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    unler_d(rn(t0), r1, r2);
+    w = bner(i0, _ZERO_REGNO, rn(t0));
+    jit_unget_reg(t0);
+    return (w);
+}
+dbopi(unle)
+
+static jit_word_t
+_buneqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    uneqr_d(rn(t0), r1, r2);
+    w = bner(i0, _ZERO_REGNO, rn(t0));
+    jit_unget_reg(t0);
+    return (w);
+}
+dbopi(uneq)
+
+static jit_word_t
+_bunger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    unger_d(rn(t0), r1, r2);
+    w = bner(i0, _ZERO_REGNO, rn(t0));
+    jit_unget_reg(t0);
+    return (w);
+}
+dbopi(unge)
+
+static jit_word_t
+_bungtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    ungtr_d(rn(t0), r1, r2);
+    w = bner(i0, _ZERO_REGNO, rn(t0));
+    jit_unget_reg(t0);
+    return (w);
+}
+dbopi(ungt)
+
+static jit_word_t
+_bltgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    ltgtr_d(rn(t0), r1, r2);
+    w = bner(i0, _ZERO_REGNO, rn(t0));
+    jit_unget_reg(t0);
+    return (w);
+}
+dbopi(ltgt)
+
+static jit_word_t
+_bordr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    ordr_d(rn(t0), r1, r2);
+    w = bner(i0, _ZERO_REGNO, rn(t0));
+    jit_unget_reg(t0);
+    return (w);
+}
+dbopi(ord)
+
+static jit_word_t
+_bunordr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    unordr_d(rn(t0), r1, r2);
+    w = bner(i0, _ZERO_REGNO, rn(t0));
+    jit_unget_reg(t0);
+    return (w);
+}
+dbopi(unord)
+
+static void
+_vaarg_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    assert(_jitc->function->self.call & jit_call_varargs);
+    /* Load argument. */
+    ldr_d(r0, r1);
+    /* Update va_list. */
+    addi(r1, r1, sizeof(jit_float64_t));
+}
+
+#endif /* CODE */
diff --git a/deps/lightning/lib/jit_riscv-sz.c b/deps/lightning/lib/jit_riscv-sz.c
new file mode 100644 (file)
index 0000000..2f1d725
--- /dev/null
@@ -0,0 +1,401 @@
+#if __WORDSIZE == 64
+#define JIT_INSTR_MAX 116
+    0, /* data */
+    0, /* live */
+    4, /* align */
+    0, /* save */
+    0, /* load */
+    0, /* #name */
+    0, /* #note */
+    0, /* label */
+    112,       /* prolog */
+    0, /* ellipsis */
+    0, /* va_push */
+    0, /* allocai */
+    0, /* allocar */
+    0, /* arg */
+    0, /* getarg_c */
+    0, /* getarg_uc */
+    0, /* getarg_s */
+    0, /* getarg_us */
+    0, /* getarg_i */
+    0, /* getarg_ui */
+    0, /* getarg_l */
+    0, /* putargr */
+    0, /* putargi */
+    4, /* va_start */
+    8, /* va_arg */
+    8, /* va_arg_d */
+    0, /* va_end */
+    4, /* addr */
+    20,        /* addi */
+    12,        /* addcr */
+    28,        /* addci */
+    28,        /* addxr */
+    28,        /* addxi */
+    4, /* subr */
+    20,        /* subi */
+    12,        /* subcr */
+    28,        /* subci */
+    28,        /* subxr */
+    28,        /* subxi */
+    28,        /* rsbi */
+    4, /* mulr */
+    20,        /* muli */
+    12,        /* qmulr */
+    24,        /* qmuli */
+    12,        /* qmulr_u */
+    24,        /* qmuli_u */
+    4, /* divr */
+    20,        /* divi */
+    4, /* divr_u */
+    20,        /* divi_u */
+    20,        /* qdivr */
+    16,        /* qdivi */
+    20,        /* qdivr_u */
+    16,        /* qdivi_u */
+    4, /* remr */
+    20,        /* remi */
+    4, /* remr_u */
+    20,        /* remi_u */
+    4, /* andr */
+    20,        /* andi */
+    4, /* orr */
+    20,        /* ori */
+    4, /* xorr */
+    20,        /* xori */
+    4, /* lshr */
+    4, /* lshi */
+    4, /* rshr */
+    4, /* rshi */
+    4, /* rshr_u */
+    4, /* rshi_u */
+    4, /* negr */
+    4, /* comr */
+    4, /* ltr */
+    4, /* lti */
+    4, /* ltr_u */
+    4, /* lti_u */
+    8, /* ler */
+    12,        /* lei */
+    8, /* ler_u */
+    12,        /* lei_u */
+    12,        /* eqr */
+    12,        /* eqi */
+    8, /* ger */
+    12,        /* gei */
+    8, /* ger_u */
+    12,        /* gei_u */
+    4, /* gtr */
+    8, /* gti */
+    4, /* gtr_u */
+    8, /* gti_u */
+    8, /* ner */
+    8, /* nei */
+    4, /* movr */
+    24,        /* movi */
+    8, /* extr_c */
+    4, /* extr_uc */
+    8, /* extr_s */
+    8, /* extr_us */
+    4, /* extr_i */
+    8, /* extr_ui */
+    20,        /* htonr_us */
+    52,        /* htonr_ui */
+    116,       /* htonr_ul */
+    4, /* ldr_c */
+    12,        /* ldi_c */
+    4, /* ldr_uc */
+    12,        /* ldi_uc */
+    4, /* ldr_s */
+    12,        /* ldi_s */
+    4, /* ldr_us */
+    12,        /* ldi_us */
+    4, /* ldr_i */
+    12,        /* ldi_i */
+    4, /* ldr_ui */
+    12,        /* ldi_ui */
+    4, /* ldr_l */
+    12,        /* ldi_l */
+    8, /* ldxr_c */
+    16,        /* ldxi_c */
+    8, /* ldxr_uc */
+    16,        /* ldxi_uc */
+    8, /* ldxr_s */
+    16,        /* ldxi_s */
+    8, /* ldxr_us */
+    16,        /* ldxi_us */
+    8, /* ldxr_i */
+    16,        /* ldxi_i */
+    8, /* ldxr_ui */
+    16,        /* ldxi_ui */
+    8, /* ldxr_l */
+    16,        /* ldxi_l */
+    4, /* str_c */
+    12,        /* sti_c */
+    4, /* str_s */
+    12,        /* sti_s */
+    4, /* str_i */
+    12,        /* sti_i */
+    4, /* str_l */
+    12,        /* sti_l */
+    8, /* stxr_c */
+    16,        /* stxi_c */
+    8, /* stxr_s */
+    16,        /* stxi_s */
+    8, /* stxr_i */
+    16,        /* stxi_i */
+    8, /* stxr_l */
+    16,        /* stxi_l */
+    4, /* bltr */
+    8, /* blti */
+    4, /* bltr_u */
+    8, /* blti_u */
+    4, /* bler */
+    8, /* blei */
+    4, /* bler_u */
+    8, /* blei_u */
+    4, /* beqr */
+    28,        /* beqi */
+    4, /* bger */
+    8, /* bgei */
+    4, /* bger_u */
+    8, /* bgei_u */
+    4, /* bgtr */
+    8, /* bgti */
+    4, /* bgtr_u */
+    8, /* bgti_u */
+    4, /* bner */
+    20,        /* bnei */
+    8, /* bmsr */
+    12,        /* bmsi */
+    8, /* bmcr */
+    12,        /* bmci */
+    32,        /* boaddr */
+    36,        /* boaddi */
+    16,        /* boaddr_u */
+    20,        /* boaddi_u */
+    32,        /* bxaddr */
+    36,        /* bxaddi */
+    16,        /* bxaddr_u */
+    20,        /* bxaddi_u */
+    32,        /* bosubr */
+    36,        /* bosubi */
+    16,        /* bosubr_u */
+    20,        /* bosubi_u */
+    32,        /* bxsubr */
+    36,        /* bxsubi */
+    16,        /* bxsubr_u */
+    20,        /* bxsubi_u */
+    4, /* jmpr */
+    28,        /* jmpi */
+    4, /* callr */
+    28,        /* calli */
+    0, /* prepare */
+    0, /* pushargr */
+    0, /* pushargi */
+    0, /* finishr */
+    0, /* finishi */
+    0, /* ret */
+    0, /* retr */
+    0, /* reti */
+    0, /* retval_c */
+    0, /* retval_uc */
+    0, /* retval_s */
+    0, /* retval_us */
+    0, /* retval_i */
+    0, /* retval_ui */
+    0, /* retval_l */
+    112,       /* epilog */
+    0, /* arg_f */
+    0, /* getarg_f */
+    0, /* putargr_f */
+    0, /* putargi_f */
+    4, /* addr_f */
+    12,        /* addi_f */
+    4, /* subr_f */
+    12,        /* subi_f */
+    12,        /* rsbi_f */
+    4, /* mulr_f */
+    12,        /* muli_f */
+    4, /* divr_f */
+    12,        /* divi_f */
+    4, /* negr_f */
+    4, /* absr_f */
+    4, /* sqrtr_f */
+    4, /* ltr_f */
+    12,        /* lti_f */
+    4, /* ler_f */
+    12,        /* lei_f */
+    4, /* eqr_f */
+    12,        /* eqi_f */
+    4, /* ger_f */
+    12,        /* gei_f */
+    4, /* gtr_f */
+    12,        /* gti_f */
+    8, /* ner_f */
+    16,        /* nei_f */
+    28,        /* unltr_f */
+    36,        /* unlti_f */
+    28,        /* unler_f */
+    36,        /* unlei_f */
+    28,        /* uneqr_f */
+    36,        /* uneqi_f */
+    28,        /* unger_f */
+    36,        /* ungei_f */
+    28,        /* ungtr_f */
+    36,        /* ungti_f */
+    40,        /* ltgtr_f */
+    48,        /* ltgti_f */
+    28,        /* ordr_f */
+    36,        /* ordi_f */
+    20,        /* unordr_f */
+    28,        /* unordi_f */
+    4, /* truncr_f_i */
+    4, /* truncr_f_l */
+    4, /* extr_f */
+    4, /* extr_d_f */
+    4, /* movr_f */
+    8, /* movi_f */
+    4, /* ldr_f */
+    12,        /* ldi_f */
+    8, /* ldxr_f */
+    16,        /* ldxi_f */
+    4, /* str_f */
+    12,        /* sti_f */
+    8, /* stxr_f */
+    16,        /* stxi_f */
+    8, /* bltr_f */
+    16,        /* blti_f */
+    8, /* bler_f */
+    16,        /* blei_f */
+    8, /* beqr_f */
+    16,        /* beqi_f */
+    8, /* bger_f */
+    16,        /* bgei_f */
+    8, /* bgtr_f */
+    16,        /* bgti_f */
+    8, /* bner_f */
+    16,        /* bnei_f */
+    32,        /* bunltr_f */
+    40,        /* bunlti_f */
+    32,        /* bunler_f */
+    40,        /* bunlei_f */
+    32,        /* buneqr_f */
+    40,        /* buneqi_f */
+    32,        /* bunger_f */
+    40,        /* bungei_f */
+    32,        /* bungtr_f */
+    40,        /* bungti_f */
+    44,        /* bltgtr_f */
+    52,        /* bltgti_f */
+    32,        /* bordr_f */
+    40,        /* bordi_f */
+    24,        /* bunordr_f */
+    32,        /* bunordi_f */
+    0, /* pushargr_f */
+    0, /* pushargi_f */
+    0, /* retr_f */
+    0, /* reti_f */
+    0, /* retval_f */
+    0, /* arg_d */
+    0, /* getarg_d */
+    0, /* putargr_d */
+    0, /* putargi_d */
+    4, /* addr_d */
+    24,        /* addi_d */
+    4, /* subr_d */
+    24,        /* subi_d */
+    24,        /* rsbi_d */
+    4, /* mulr_d */
+    24,        /* muli_d */
+    4, /* divr_d */
+    24,        /* divi_d */
+    4, /* negr_d */
+    4, /* absr_d */
+    4, /* sqrtr_d */
+    4, /* ltr_d */
+    24,        /* lti_d */
+    4, /* ler_d */
+    24,        /* lei_d */
+    4, /* eqr_d */
+    24,        /* eqi_d */
+    4, /* ger_d */
+    24,        /* gei_d */
+    4, /* gtr_d */
+    24,        /* gti_d */
+    8, /* ner_d */
+    28,        /* nei_d */
+    28,        /* unltr_d */
+    48,        /* unlti_d */
+    28,        /* unler_d */
+    48,        /* unlei_d */
+    28,        /* uneqr_d */
+    48,        /* uneqi_d */
+    28,        /* unger_d */
+    48,        /* ungei_d */
+    28,        /* ungtr_d */
+    48,        /* ungti_d */
+    40,        /* ltgtr_d */
+    60,        /* ltgti_d */
+    28,        /* ordr_d */
+    48,        /* ordi_d */
+    20,        /* unordr_d */
+    40,        /* unordi_d */
+    4, /* truncr_d_i */
+    4, /* truncr_d_l */
+    4, /* extr_d */
+    4, /* extr_f_d */
+    4, /* movr_d */
+    20,        /* movi_d */
+    4, /* ldr_d */
+    12,        /* ldi_d */
+    8, /* ldxr_d */
+    16,        /* ldxi_d */
+    4, /* str_d */
+    12,        /* sti_d */
+    8, /* stxr_d */
+    16,        /* stxi_d */
+    8, /* bltr_d */
+    28,        /* blti_d */
+    8, /* bler_d */
+    28,        /* blei_d */
+    8, /* beqr_d */
+    28,        /* beqi_d */
+    8, /* bger_d */
+    28,        /* bgei_d */
+    8, /* bgtr_d */
+    28,        /* bgti_d */
+    8, /* bner_d */
+    28,        /* bnei_d */
+    32,        /* bunltr_d */
+    52,        /* bunlti_d */
+    32,        /* bunler_d */
+    52,        /* bunlei_d */
+    32,        /* buneqr_d */
+    52,        /* buneqi_d */
+    32,        /* bunger_d */
+    52,        /* bungei_d */
+    32,        /* bungtr_d */
+    52,        /* bungti_d */
+    44,        /* bltgtr_d */
+    64,        /* bltgti_d */
+    32,        /* bordr_d */
+    52,        /* bordi_d */
+    24,        /* bunordr_d */
+    44,        /* bunordi_d */
+    0, /* pushargr_d */
+    0, /* pushargi_d */
+    0, /* retr_d */
+    0, /* reti_d */
+    0, /* retval_d */
+    4, /* movr_w_f */
+    0, /* movr_ww_d */
+    4, /* movr_w_d */
+    0, /* movr_f_w */
+    4, /* movi_f_w */
+    0, /* movr_d_ww */
+    0, /* movi_d_ww */
+    4, /* movr_d_w */
+    16,        /* movi_d_w */
+#endif /* __WORDSIZE */
diff --git a/deps/lightning/lib/jit_riscv.c b/deps/lightning/lib/jit_riscv.c
new file mode 100644 (file)
index 0000000..55b2391
--- /dev/null
@@ -0,0 +1,1615 @@
+/*
+ * Copyright (C) 2019  Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+#define jit_arg_reg_p(i)               ((i) >= 0 && (i) < 8)
+#define jit_arg_f_reg_p(i)             ((i) >= 0 && (i) < 8)
+
+/*
+ * Types
+ */
+typedef jit_pointer_t jit_va_list_t;
+
+/*
+ * Prototypes
+ */
+#define patch(instr, node)             _patch(_jit, instr, node)
+static void _patch(jit_state_t*,jit_word_t,jit_node_t*);
+
+#define PROTO                          1
+#  include "jit_riscv-cpu.c"
+#  include "jit_riscv-fpu.c"
+#undef PROTO
+
+/*
+ * Initialization
+ */
+jit_register_t         _rvs[] = {
+    { 0x00,                            "zero" },
+    { 0x01,                            "ra" },
+    { 0x02,                            "sp" },
+    { 0x03,                            "gp" },
+#if 0          /* Pretend it does not exist, so _NOREG can be used in
+                * a 64 bit bitmask */
+    { 0x04,                            "tp" },
+#endif
+    { rc(gpr) | 0x05,                  "t0" },
+    { rc(gpr) | 0x06,                  "t1" },
+    { rc(gpr) | 0x07,                  "t2" },
+    { rc(gpr) | 0x1c,                  "t3" },
+    { rc(gpr) | 0x1d,                  "t4" },
+    { rc(gpr) | 0x1e,                  "t5" },
+    { rc(gpr) | 0x1f,                  "t6" },
+    { 0x08,                            "fp" },
+    { rc(sav) | rc(gpr) | 0x09,                "s1" },
+    { rc(sav) | rc(gpr) | 0x12,                "s2" },
+    { rc(sav) | rc(gpr) | 0x13,                "s3" },
+    { rc(sav) | rc(gpr) | 0x14,                "s4" },
+    { rc(sav) | rc(gpr) | 0x15,                "s5" },
+    { rc(sav) | rc(gpr) | 0x16,                "s6" },
+    { rc(sav) | rc(gpr) | 0x17,                "s7" },
+    { rc(sav) | rc(gpr) | 0x18,                "s8" },
+    { rc(sav) | rc(gpr) | 0x19,                "s9" },
+    { rc(sav) | rc(gpr) | 0x1a,                "s10" },
+    { rc(sav) | rc(gpr) | 0x1b,                "s11" },
+    { rc(arg) | rc(gpr) | 0x11,                "a7" },
+    { rc(arg) | rc(gpr) | 0x10,                "a6" },
+    { rc(arg) | rc(gpr) | 0x0f,                "a5" },
+    { rc(arg) | rc(gpr) | 0x0e,                "a4" },
+    { rc(arg) | rc(gpr) | 0x0d,                "a3" },
+    { rc(arg) | rc(gpr) | 0x0c,                "a2" },
+    { rc(arg) | rc(gpr) | 0x0b,                "a1" },
+    { rc(arg) | rc(gpr) | 0x0a,                "a0" },
+    { rc(fpr) | 0x00,                  "ft0" },
+    { rc(fpr) | 0x01,                  "ft1" },
+    { rc(fpr) | 0x02,                  "ft2" },
+    { rc(fpr) | 0x03,                  "ft3" },
+    { rc(fpr) | 0x04,                  "ft4" },
+    { rc(fpr) | 0x05,                  "ft5" },
+    { rc(fpr) | 0x06,                  "ft6" },
+    { rc(fpr) | 0x07,                  "ft7" },
+    { rc(fpr) | 0x1c,                  "ft8" },
+    { rc(fpr) | 0x1d,                  "ft9" },
+    { rc(fpr) | 0x1e,                  "ft10" },
+    { rc(fpr) | 0x1f,                  "ft11" },
+    { rc(sav) | rc(fpr) | 0x08,                "fs0" },
+    { rc(sav) | rc(fpr) | 0x09,                "fs1" },
+    { rc(sav) | rc(fpr) | 0x12,                "fs2" },
+    { rc(sav) | rc(fpr) | 0x13,                "fs3" },
+    { rc(sav) | rc(fpr) | 0x14,                "fs4" },
+    { rc(sav) | rc(fpr) | 0x15,                "fs5" },
+    { rc(sav) | rc(fpr) | 0x16,                "fs6" },
+    { rc(sav) | rc(fpr) | 0x17,                "fs7" },
+    { rc(sav) | rc(fpr) | 0x18,                "fs8" },
+    { rc(sav) | rc(fpr) | 0x19,                "fs9" },
+    { rc(sav) | rc(fpr) | 0x1a,                "fs10" },
+    { rc(sav) | rc(fpr) | 0x1b,                "fs11" },
+    { rc(arg) | rc(fpr) | 0x11,                "fa7" },
+    { rc(arg) | rc(fpr) | 0x10,                "fa6" },
+    { rc(arg) | rc(fpr) | 0x0f,                "fa5" },
+    { rc(arg) | rc(fpr) | 0x0e,                "fa4" },
+    { rc(arg) | rc(fpr) | 0x0d,                "fa3" },
+    { rc(arg) | rc(fpr) | 0x0c,                "fa2" },
+    { rc(arg) | rc(fpr) | 0x0b,                "fa1" },
+    { rc(arg) | rc(fpr) | 0x0a,                "fa0" },
+    { _NOREG,                          "<none>" },
+};
+
+/*
+ * Implementation
+ */
+void
+jit_get_cpu(void)
+{
+}
+
+void
+_jit_init(jit_state_t *_jit)
+{
+    _jitc->reglen = jit_size(_rvs) - 1;
+    jit_carry = _NOREG;
+}
+
+void
+_jit_prolog(jit_state_t *_jit)
+{
+    jit_int32_t                 offset;
+
+    if (_jitc->function)
+       jit_epilog();
+    assert(jit_regset_cmp_ui(&_jitc->regarg, 0) == 0);
+    jit_regset_set_ui(&_jitc->regsav, 0);
+    offset = _jitc->functions.offset;
+    if (offset >= _jitc->functions.length) {
+       jit_realloc((jit_pointer_t *)&_jitc->functions.ptr,
+                   _jitc->functions.length * sizeof(jit_function_t),
+                   (_jitc->functions.length + 16) * sizeof(jit_function_t));
+       _jitc->functions.length += 16;
+    }
+    _jitc->function = _jitc->functions.ptr + _jitc->functions.offset++;
+    _jitc->function->self.size = stack_framesize;
+    _jitc->function->self.argi = _jitc->function->self.argf =
+       _jitc->function->self.alen = 0;
+    _jitc->function->self.aoff = 0;
+    _jitc->function->self.call = jit_call_default;
+    jit_alloc((jit_pointer_t *)&_jitc->function->regoff,
+             _jitc->reglen * sizeof(jit_int32_t));
+
+    /* _no_link here does not mean the jit_link() call can be removed
+     * by rewriting as:
+     * _jitc->function->prolog = jit_new_node(jit_code_prolog);
+     */
+    _jitc->function->prolog = jit_new_node_no_link(jit_code_prolog);
+    jit_link(_jitc->function->prolog);
+    _jitc->function->prolog->w.w = offset;
+    _jitc->function->epilog = jit_new_node_no_link(jit_code_epilog);
+    /* u:      label value
+     * v:      offset in blocks vector
+     * w:      offset in functions vector
+     */
+    _jitc->function->epilog->w.w = offset;
+
+    jit_regset_new(&_jitc->function->regset);
+}
+
+jit_int32_t
+_jit_allocai(jit_state_t *_jit, jit_int32_t length)
+{
+    assert(_jitc->function);
+    switch (length) {
+       case 0: case 1:                                         break;
+       case 2:         _jitc->function->self.aoff &= -2;       break;
+       case 3: case 4: _jitc->function->self.aoff &= -4;       break;
+       default:        _jitc->function->self.aoff &= -8;       break;
+    }
+    _jitc->function->self.aoff -= length;
+    if (!_jitc->realize) {
+       jit_inc_synth_ww(allocai, _jitc->function->self.aoff, length);
+       jit_dec_synth();
+    }
+    return (_jitc->function->self.aoff);
+}
+
+void
+_jit_allocar(jit_state_t *_jit, jit_int32_t u, jit_int32_t v)
+{
+    jit_int32_t                 r0;
+    assert(_jitc->function);
+    jit_inc_synth_ww(allocar, u, v);
+    if (!_jitc->function->allocar) {
+       _jitc->function->aoffoff = jit_allocai(sizeof(jit_int32_t));
+       _jitc->function->allocar = 1;
+    }
+    r0 = jit_get_reg(jit_class_gpr);
+    jit_negr(r0, v);
+    jit_andi(r0, r0, -16);
+    jit_ldxi_i(u, JIT_FP, _jitc->function->aoffoff);
+    jit_addr(u, u, r0);
+    jit_addr(JIT_SP, JIT_SP, r0);
+    jit_stxi_i(_jitc->function->aoffoff, JIT_FP, u);
+    jit_unget_reg(r0);
+    jit_dec_synth();
+}
+
+void
+_jit_ret(jit_state_t *_jit)
+{
+    jit_node_t         *instr;
+    assert(_jitc->function);
+    jit_inc_synth(ret);
+    /* jump to epilog */
+    instr = jit_jmpi();
+    jit_patch_at(instr, _jitc->function->epilog);
+    jit_dec_synth();
+}
+
+void
+_jit_retr(jit_state_t *_jit, jit_int32_t u)
+{
+    jit_inc_synth_w(retr, u);
+    if (JIT_RET != u)
+       jit_movr(JIT_RET, u);
+    jit_live(JIT_RET);
+    jit_ret();
+    jit_dec_synth();
+}
+
+void
+_jit_reti(jit_state_t *_jit, jit_word_t u)
+{
+    jit_inc_synth_w(reti, u);
+    jit_movi(JIT_RET, u);
+    jit_ret();
+    jit_dec_synth();
+}
+
+void
+_jit_retr_f(jit_state_t *_jit, jit_int32_t u)
+{
+    jit_inc_synth_w(retr_f, u);
+    if (u != JIT_FRET)
+       jit_movr_f(JIT_FRET, u);
+    else
+       jit_live(JIT_FRET);
+    jit_ret();
+    jit_dec_synth();
+}
+
+void
+_jit_reti_f(jit_state_t *_jit, jit_float32_t u)
+{
+    jit_inc_synth_f(reti_f, u);
+    jit_movi_f(JIT_FRET, u);
+    jit_ret();
+    jit_dec_synth();
+}
+
+void
+_jit_retr_d(jit_state_t *_jit, jit_int32_t u)
+{
+    jit_inc_synth_w(retr_d, u);
+    if (u != JIT_FRET)
+       jit_movr_d(JIT_FRET, u);
+    else
+       jit_live(JIT_FRET);
+    jit_ret();
+    jit_dec_synth();
+}
+
+void
+_jit_reti_d(jit_state_t *_jit, jit_float64_t u)
+{
+    jit_inc_synth_d(reti_d, u);
+    jit_movi_d(JIT_FRET, u);
+    jit_ret();
+    jit_dec_synth();
+}
+
+void
+_jit_epilog(jit_state_t *_jit)
+{
+    assert(_jitc->function);
+    assert(_jitc->function->epilog->next == NULL);
+    jit_link(_jitc->function->epilog);
+    _jitc->function = NULL;
+}
+
+jit_bool_t
+_jit_arg_register_p(jit_state_t *_jit, jit_node_t *u)
+{
+    if (u->code == jit_code_arg)
+       return (jit_arg_reg_p(u->u.w));
+    assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d);
+    return (jit_arg_f_reg_p(u->u.w));
+}
+
+void
+_jit_ellipsis(jit_state_t *_jit)
+{
+    jit_inc_synth(ellipsis);
+    if (_jitc->prepare) {
+       jit_link_prepare();
+       assert(!(_jitc->function->call.call & jit_call_varargs));
+       _jitc->function->call.call |= jit_call_varargs;
+    }
+    else {
+       jit_link_prolog();
+       assert(!(_jitc->function->self.call & jit_call_varargs));
+       _jitc->function->self.call |= jit_call_varargs;
+       _jitc->function->vagp = _jitc->function->self.argi;
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_va_push(jit_state_t *_jit, jit_int32_t u)
+{
+    jit_inc_synth_w(va_push, u);
+    jit_pushargr(u);
+    jit_dec_synth();
+}
+
+jit_node_t *
+_jit_arg(jit_state_t *_jit)
+{
+    jit_node_t         *node;
+    jit_int32_t                 offset;
+    assert(_jitc->function);
+    assert(!(_jitc->function->self.call & jit_call_varargs));
+    if (jit_arg_reg_p(_jitc->function->self.argi))
+       offset = _jitc->function->self.argi++;
+    else {
+       offset = _jitc->function->self.size;
+       _jitc->function->self.size += sizeof(jit_word_t);
+    }
+    node = jit_new_node_ww(jit_code_arg, offset,
+                          ++_jitc->function->self.argn);
+    jit_link_prolog();
+    return (node);
+}
+
+jit_node_t *
+_jit_arg_f(jit_state_t *_jit)
+{
+    jit_node_t         *node;
+    jit_int32_t                 offset;
+    assert(_jitc->function);
+    assert(!(_jitc->function->self.call & jit_call_varargs));
+    if (jit_arg_f_reg_p(_jitc->function->self.argf))
+       offset = _jitc->function->self.argf++;
+    else if (jit_arg_reg_p(_jitc->function->self.argi)) {
+       offset = _jitc->function->self.argi++;
+       offset += 8;
+    }
+    else {
+       offset = _jitc->function->self.size;
+       _jitc->function->self.size += sizeof(jit_word_t);
+    }
+    node = jit_new_node_ww(jit_code_arg_f, offset,
+                          ++_jitc->function->self.argn);
+    jit_link_prolog();
+    return (node);
+}
+
+jit_node_t *
+_jit_arg_d(jit_state_t *_jit)
+{
+    jit_node_t         *node;
+    jit_int32_t                 offset;
+    assert(_jitc->function);
+    assert(!(_jitc->function->self.call & jit_call_varargs));
+    if (jit_arg_f_reg_p(_jitc->function->self.argf))
+       offset = _jitc->function->self.argf++;
+    else if (jit_arg_reg_p(_jitc->function->self.argi)) {
+       offset = _jitc->function->self.argi++;
+       offset += 8;
+    }
+    else {
+       offset = _jitc->function->self.size;
+       _jitc->function->self.size += sizeof(jit_word_t);
+    }
+    node = jit_new_node_ww(jit_code_arg_d, offset,
+                          ++_jitc->function->self.argn);
+    jit_link_prolog();
+    return (node);
+}
+
+void
+_jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_c, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_extr_c(u, JIT_RA0 - v->u.w);
+    else
+       jit_ldxi_c(u, JIT_FP, v->u.w);
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_uc, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_extr_uc(u, JIT_RA0 - v->u.w);
+    else
+       jit_ldxi_uc(u, JIT_FP, v->u.w);
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_s, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_extr_s(u, JIT_RA0 - v->u.w);
+    else
+       jit_ldxi_s(u, JIT_FP, v->u.w);
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_us, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_extr_us(u, JIT_RA0 - v->u.w);
+    else
+       jit_ldxi_us(u, JIT_FP, v->u.w);
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_i, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_extr_i(u, JIT_RA0 - v->u.w);
+    else
+       jit_ldxi_i(u, JIT_FP, v->u.w);
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_ui, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_extr_ui(u, JIT_RA0 - v->u.w);
+    else
+       jit_ldxi_ui(u, JIT_FP, v->u.w);
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_l, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_movr(u, JIT_RA0 - v->u.w);
+    else
+       jit_ldxi_l(u, JIT_FP, v->u.w);
+    jit_dec_synth();
+}
+
+void
+_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(putargr, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_movr(JIT_RA0 - v->u.w, u);
+    else
+       jit_stxi(v->u.w, JIT_FP, u);
+    jit_dec_synth();
+}
+
+void
+_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v)
+{
+    jit_int32_t                regno;
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(putargi, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_movi(JIT_RA0 - v->u.w, u);
+    else {
+       regno = jit_get_reg(jit_class_gpr);
+       jit_movi(regno, u);
+       jit_stxi(v->u.w, JIT_FP, regno);
+       jit_unget_reg(regno);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg_f);
+    jit_inc_synth_wp(getarg_f, u, v);
+    if (jit_arg_f_reg_p(v->u.w))
+       jit_movr_f(u, JIT_FA0 - v->u.w);
+    else if (jit_arg_reg_p(v->u.w - 8))
+       jit_movr_w_f(u, JIT_RA0 - (v->u.w - 8));
+    else
+       jit_ldxi_f(u, JIT_FP, v->u.w);
+    jit_dec_synth();
+}
+
+void
+_jit_putargr_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg_f);
+    jit_inc_synth_wp(putargr_f, u, v);
+    if (jit_arg_f_reg_p(v->u.w))
+       jit_movr_f(JIT_FA0 - v->u.w, u);
+    else if (jit_arg_reg_p(v->u.w - 8))
+       jit_movr_f_w(JIT_RA0 - (v->u.w - 8), u);
+    else
+       jit_stxi_f(v->u.w, JIT_FP, u);
+    jit_dec_synth();
+}
+
+void
+_jit_putargi_f(jit_state_t *_jit, jit_float32_t u, jit_node_t *v)
+{
+    jit_int32_t                regno;
+    assert(v->code == jit_code_arg_f);
+    jit_inc_synth_fp(putargi_f, u, v);
+    if (jit_arg_f_reg_p(v->u.w))
+       jit_movi_f(JIT_FA0 - v->u.w, u);
+    else if (jit_arg_reg_p(v->u.w - 8)) {
+       union {
+           jit_float32_t       f;
+           jit_int32_t         i;
+       } uu;
+       uu.f = u;
+       jit_movi(JIT_RA0 - (v->u.w - 8), uu.i);
+    }
+    else {
+       regno = jit_get_reg(jit_class_fpr);
+       jit_movi_f(regno, u);
+       jit_stxi_f(v->u.w, JIT_FP, regno);
+       jit_unget_reg(regno);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg_d);
+    jit_inc_synth_wp(getarg_d, u, v);
+    if (jit_arg_f_reg_p(v->u.w))
+       jit_movr_d(u, JIT_FA0 - v->u.w);
+    else if (jit_arg_reg_p(v->u.w - 8))
+       jit_movr_w_d(u, JIT_RA0 - (v->u.w - 8));
+    else
+       jit_ldxi_d(u, JIT_FP, v->u.w);
+    jit_dec_synth();
+}
+
+void
+_jit_putargr_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg_d);
+    jit_inc_synth_wp(putargr_d, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_movr_d(JIT_FA0 - v->u.w, u);
+    else if (jit_arg_reg_p(v->u.w - 8))
+       jit_movr_d_w(JIT_RA0 - (v->u.w - 8), u);
+    else
+       jit_stxi_d(v->u.w, JIT_FP, u);
+    jit_dec_synth();
+}
+
+void
+_jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v)
+{
+    jit_int32_t                regno;
+    assert(v->code == jit_code_arg_d);
+    jit_inc_synth_dp(putargi_d, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_movi_d(JIT_FA0 - v->u.w, u);
+    else if (jit_arg_reg_p(v->u.w - 8)) {
+       union {
+           jit_float64_t       d;
+           jit_int64_t         w;
+       } uu;
+       uu.d = u;
+       jit_movi(JIT_RA0 - (v->u.w - 8), uu.w);
+    }
+    else {
+       regno = jit_get_reg(jit_class_fpr);
+       jit_movi_d(regno, u);
+       jit_stxi_d(v->u.w, JIT_FP, regno);
+       jit_unget_reg(regno);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_pushargr(jit_state_t *_jit, jit_int32_t u)
+{
+    assert(_jitc->function);
+    jit_inc_synth_w(pushargr, u);
+    jit_link_prepare();
+    if (jit_arg_reg_p(_jitc->function->call.argi)) {
+       jit_movr(JIT_RA0 - _jitc->function->call.argi, u);
+       ++_jitc->function->call.argi;
+    }
+    else {
+       jit_stxi(_jitc->function->call.size, JIT_SP, u);
+       _jitc->function->call.size += sizeof(jit_word_t);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_pushargi(jit_state_t *_jit, jit_word_t u)
+{
+    jit_int32_t                 regno;
+    assert(_jitc->function);
+    jit_inc_synth_w(pushargi, u);
+    jit_link_prepare();
+    if (jit_arg_reg_p(_jitc->function->call.argi)) {
+       jit_movi(JIT_RA0 - _jitc->function->call.argi, u);
+       ++_jitc->function->call.argi;
+    }
+    else {
+       regno = jit_get_reg(jit_class_gpr);
+       jit_movi(regno, u);
+       jit_stxi(_jitc->function->call.size, JIT_SP, regno);
+       jit_unget_reg(regno);
+       _jitc->function->call.size += sizeof(jit_word_t);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_pushargr_f(jit_state_t *_jit, jit_int32_t u)
+{
+    assert(_jitc->function);
+    jit_inc_synth_w(pushargr_f, u);
+    jit_link_prepare();
+    if (jit_arg_f_reg_p(_jitc->function->call.argf) &&
+       !(_jitc->function->call.call & jit_call_varargs)) {
+       jit_movr_f(JIT_FA0 - _jitc->function->call.argf, u);
+       ++_jitc->function->call.argf;
+    }
+    else if (jit_arg_reg_p(_jitc->function->call.argi)) {
+       jit_movr_f_w(JIT_RA0 - _jitc->function->call.argi, u);
+       ++_jitc->function->call.argi;
+    }
+    else {
+       jit_stxi_f(_jitc->function->call.size, JIT_SP, u);
+       _jitc->function->call.size += sizeof(jit_word_t);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_pushargi_f(jit_state_t *_jit, jit_float32_t u)
+{
+    jit_int32_t                regno;
+    assert(_jitc->function);
+    jit_inc_synth_f(pushargi_f, u);
+    jit_link_prepare();
+    if (jit_arg_f_reg_p(_jitc->function->call.argf) &&
+       !(_jitc->function->call.call & jit_call_varargs)) {
+       jit_movi_f(JIT_FA0 - _jitc->function->call.argf, u);
+       ++_jitc->function->call.argf;
+    }
+    else if (jit_arg_reg_p(_jitc->function->call.argi)) {
+       jit_movi_f_w(JIT_RA0 - _jitc->function->call.argi, u);
+       ++_jitc->function->call.argi;
+    }
+    else {
+       regno = jit_get_reg(jit_class_fpr);
+       jit_movi_f(regno, u);
+       jit_stxi_f(_jitc->function->call.size, JIT_SP, regno);
+       jit_unget_reg(regno);
+       _jitc->function->call.size += sizeof(jit_word_t);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_pushargr_d(jit_state_t *_jit, jit_int32_t u)
+{
+    assert(_jitc->function);
+    jit_inc_synth_w(pushargr_d, u);
+    jit_link_prepare();
+    if (jit_arg_f_reg_p(_jitc->function->call.argf) &&
+       !(_jitc->function->call.call & jit_call_varargs)) {
+       jit_movr_d(JIT_FA0 - _jitc->function->call.argf, u);
+       ++_jitc->function->call.argf;
+    }
+    else if (jit_arg_reg_p(_jitc->function->call.argi)) {
+       jit_movr_d_w(JIT_RA0 - _jitc->function->call.argi, u);
+       ++_jitc->function->call.argi;
+    }
+    else {
+       jit_stxi_d(_jitc->function->call.size, JIT_SP, u);
+       _jitc->function->call.size += sizeof(jit_word_t);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_pushargi_d(jit_state_t *_jit, jit_float64_t u)
+{
+    jit_int32_t                regno;
+    assert(_jitc->function);
+    jit_inc_synth_d(pushargi_d, u);
+    jit_link_prepare();
+    if (jit_arg_f_reg_p(_jitc->function->call.argf) &&
+       !(_jitc->function->call.call & jit_call_varargs)) {
+       jit_movi_d(JIT_FA0 - _jitc->function->call.argf, u);
+       ++_jitc->function->call.argf;
+    }
+    else if (jit_arg_reg_p(_jitc->function->call.argi)) {
+       jit_movi_d_w(JIT_RA0 - _jitc->function->call.argi, u);
+       ++_jitc->function->call.argi;
+    }
+    else {
+       regno = jit_get_reg(jit_class_fpr);
+       jit_movi_d(regno, u);
+       jit_stxi_d(_jitc->function->call.size, JIT_SP, regno);
+       jit_unget_reg(regno);
+       _jitc->function->call.size += sizeof(jit_word_t);
+    }
+    jit_dec_synth();
+}
+
+jit_bool_t
+_jit_regarg_p(jit_state_t *_jit, jit_node_t *node, jit_int32_t regno)
+{
+    jit_int32_t                spec;
+    spec = jit_class(_rvs[regno].spec);
+    if (spec & jit_class_arg) {
+       regno = JIT_RA0 - regno;
+       if (regno >= 0 && regno < node->v.w)
+           return (1);
+       if (spec & jit_class_fpr) {
+           regno = JIT_FA0 - regno;
+           if (regno >= 0 && regno < node->w.w)
+               return (1);
+       }
+    }
+
+    return (0);
+}
+
+void
+_jit_finishr(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_node_t         *node;
+    assert(_jitc->function);
+    jit_inc_synth_w(finishr, r0);
+    if (_jitc->function->self.alen < _jitc->function->call.size)
+       _jitc->function->self.alen = _jitc->function->call.size;
+    node = jit_callr(r0);
+    node->v.w = _jitc->function->self.argi;
+    node->w.w = _jitc->function->call.argf;
+    _jitc->function->call.argi = _jitc->function->call.argf =
+       _jitc->function->call.size = 0;
+    _jitc->prepare = 0;
+    jit_dec_synth();
+}
+
+jit_node_t *
+_jit_finishi(jit_state_t *_jit, jit_pointer_t i0)
+{
+    jit_node_t         *node;
+    assert(_jitc->function);
+    jit_inc_synth_w(finishi, (jit_word_t)i0);
+    if (_jitc->function->self.alen < _jitc->function->call.size)
+       _jitc->function->self.alen = _jitc->function->call.size;
+    node = jit_calli(i0);
+    node->v.w = _jitc->function->call.argi;
+    node->w.w = _jitc->function->call.argf;
+    _jitc->function->call.argi = _jitc->function->call.argf =
+       _jitc->function->call.size = 0;
+    _jitc->prepare = 0;
+    jit_dec_synth();
+    return (node);
+}
+
+void
+_jit_retval_c(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_c, r0);
+    jit_extr_c(r0, JIT_RET);
+    jit_dec_synth();
+}
+
+void
+_jit_retval_uc(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_uc, r0);
+    jit_extr_uc(r0, JIT_RET);
+    jit_dec_synth();
+}
+
+void
+_jit_retval_s(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_s, r0);
+    jit_extr_s(r0, JIT_RET);
+    jit_dec_synth();
+}
+
+void
+_jit_retval_us(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_us, r0);
+    jit_extr_us(r0, JIT_RET);
+    jit_dec_synth();
+}
+
+void
+_jit_retval_i(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_i, r0);
+    jit_extr_i(r0, JIT_RET);
+    jit_dec_synth();
+}
+
+void
+_jit_retval_ui(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_ui, r0);
+    jit_extr_ui(r0, JIT_RET);
+    jit_dec_synth();
+}
+
+void
+_jit_retval_l(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_l, r0);
+    if (r0 != JIT_RET)
+       jit_movr(r0, JIT_RET);
+    jit_dec_synth();
+}
+
+void
+_jit_retval_f(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_f, r0);
+    if (r0 != JIT_FRET)
+       jit_movr_f(r0, JIT_FRET);
+    jit_dec_synth();
+}
+
+void
+_jit_retval_d(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_d, r0);
+    if (r0 != JIT_FRET)
+       jit_movr_d(r0, JIT_FRET);
+    jit_dec_synth();
+}
+
+jit_pointer_t
+_emit_code(jit_state_t *_jit)
+{
+    jit_node_t         *node;
+    jit_node_t         *temp;
+    jit_word_t          word;
+    jit_word_t          value;
+    jit_int32_t                 offset;
+    struct {
+       jit_node_t      *node;
+       jit_uint8_t     *data;
+       jit_word_t       word;
+#if DEVEL_DISASSEMBLER
+       jit_word_t       prevw;
+#endif
+       jit_int32_t      const_offset;
+       jit_int32_t      patch_offset;
+    } undo;
+#if DEVEL_DISASSEMBLER
+    jit_word_t          prevw;
+#endif
+
+    _jitc->function = NULL;
+
+    jit_reglive_setup();
+
+    undo.word = 0;
+    undo.node = NULL;
+    undo.const_offset = undo.patch_offset = 0;
+#  define assert_data(node)            /**/
+#define case_rr(name, type)                                            \
+           case jit_code_##name##r##type:                              \
+               name##r##type(rn(node->u.w), rn(node->v.w));            \
+               break
+#define case_rw(name, type)                                            \
+           case jit_code_##name##i##type:                              \
+               name##i##type(rn(node->u.w), node->v.w);                \
+               break
+#define case_wr(name, type)                                            \
+           case jit_code_##name##i##type:                              \
+               name##i##type(node->u.w, rn(node->v.w));                \
+               break
+#define case_rrr(name, type)                                           \
+           case jit_code_##name##r##type:                              \
+               name##r##type(rn(node->u.w),                            \
+                             rn(node->v.w), rn(node->w.w));            \
+               break
+#define case_rrrr(name, type)                                          \
+           case jit_code_##name##r##type:                              \
+               name##r##type(rn(node->u.q.l), rn(node->u.q.h),         \
+                             rn(node->v.w), rn(node->w.w));            \
+               break
+#define case_rrw(name, type)                                           \
+           case jit_code_##name##i##type:                              \
+               name##i##type(rn(node->u.w), rn(node->v.w), node->w.w); \
+               break
+#define case_rrrw(name, type)                                          \
+           case jit_code_##name##i##type:                              \
+               name##i##type(rn(node->u.q.l), rn(node->u.q.h),         \
+                             rn(node->v.w), node->w.w);                \
+               break
+#define case_rrf(name)                                                 \
+           case jit_code_##name##i_f:                                  \
+               assert_data(node);                                      \
+               name##i_f(rn(node->u.w), rn(node->v.w), node->w.f);     \
+               break
+#define case_rrd(name)                                                 \
+           case jit_code_##name##i_d:                                  \
+               assert_data(node);                                      \
+               name##i_d(rn(node->u.w), rn(node->v.w), node->w.d);     \
+               break
+#define case_wrr(name, type)                                           \
+           case jit_code_##name##i##type:                              \
+               name##i##type(node->u.w, rn(node->v.w), rn(node->w.w)); \
+               break
+#define case_brr(name, type)                                           \
+           case jit_code_##name##r##type:                              \
+               temp = node->u.n;                                       \
+               assert(temp->code == jit_code_label ||                  \
+                      temp->code == jit_code_epilog);                  \
+               if (temp->flag & jit_flag_patch)                        \
+                   name##r##type(temp->u.w, rn(node->v.w),             \
+                                 rn(node->w.w));                       \
+               else {                                                  \
+                   word = name##r##type(_jit->pc.w,                    \
+                                        rn(node->v.w), rn(node->w.w)); \
+                   patch(word, node);                                  \
+               }                                                       \
+               break
+#define case_brw(name, type)                                           \
+           case jit_code_##name##i##type:                              \
+               temp = node->u.n;                                       \
+               assert(temp->code == jit_code_label ||                  \
+                      temp->code == jit_code_epilog);                  \
+               if (temp->flag & jit_flag_patch)                        \
+                   name##i##type(temp->u.w,                            \
+                                 rn(node->v.w), node->w.w);            \
+               else {                                                  \
+                   word = name##i##type(_jit->pc.w,                    \
+                                        rn(node->v.w), node->w.w);     \
+                   patch(word, node);                                  \
+               }                                                       \
+               break;
+#define case_brf(name)                                                 \
+           case jit_code_##name##i_f:                                  \
+               temp = node->u.n;                                       \
+               assert(temp->code == jit_code_label ||                  \
+                      temp->code == jit_code_epilog);                  \
+               if (temp->flag & jit_flag_patch)                        \
+                   name##i_f(temp->u.w, rn(node->v.w), node->w.f);     \
+               else {                                                  \
+                   word = name##i_f(_jit->pc.w, rn(node->v.w),         \
+                               node->w.f);                             \
+                   patch(word, node);                                  \
+               }                                                       \
+               break
+#define case_brd(name)                                                 \
+           case jit_code_##name##i_d:                                  \
+               temp = node->u.n;                                       \
+               assert(temp->code == jit_code_label ||                  \
+                      temp->code == jit_code_epilog);                  \
+               if (temp->flag & jit_flag_patch)                        \
+                   name##i_d(temp->u.w, rn(node->v.w), node->w.d);     \
+               else {                                                  \
+                   word = name##i_d(_jit->pc.w, rn(node->v.w),         \
+                               node->w.d);                             \
+                   patch(word, node);                                  \
+               }                                                       \
+               break
+#if DEVEL_DISASSEMBLER
+    prevw = _jit->pc.w;
+#endif
+    for (node = _jitc->head; node; node = node->next) {
+       if (_jit->pc.uc >= _jitc->code.end)
+           return (NULL);
+
+#if DEVEL_DISASSEMBLER
+       node->offset = (jit_uword_t)_jit->pc.w - (jit_uword_t)prevw;
+       prevw = _jit->pc.w;
+#endif
+       value = jit_classify(node->code);
+       jit_regarg_set(node, value);
+       switch (node->code) {
+           case jit_code_align:
+               assert(!(node->u.w & (node->u.w - 1)) &&
+                      node->u.w <= sizeof(jit_word_t));
+               if (node->u.w == sizeof(jit_word_t) &&
+                   (word = _jit->pc.w & (sizeof(jit_word_t) - 1)))
+                   nop(sizeof(jit_word_t) - word);
+               break;
+           case jit_code_note:         case jit_code_name:
+               node->u.w = _jit->pc.w;
+               break;
+           case jit_code_label:
+               /* remember label is defined */
+               node->flag |= jit_flag_patch;
+               node->u.w = _jit->pc.w;
+               break;
+               case_rrr(add,);
+               case_rrw(add,);
+               case_rrr(addc,);
+               case_rrw(addc,);
+               case_rrr(addx,);
+               case_rrw(addx,);
+               case_rrr(sub,);
+               case_rrw(sub,);
+               case_rrr(subc,);
+               case_rrw(subc,);
+               case_rrr(subx,);
+               case_rrw(subx,);
+               case_rrw(rsb,);
+               case_rrr(mul,);
+               case_rrw(mul,);
+               case_rrrr(qmul,);
+               case_rrrw(qmul,);
+               case_rrrr(qmul, _u);
+               case_rrrw(qmul, _u);
+               case_rrr(div,);
+               case_rrw(div,);
+               case_rrr(div, _u);
+               case_rrw(div, _u);
+               case_rrrr(qdiv,);
+               case_rrrw(qdiv,);
+               case_rrrr(qdiv, _u);
+               case_rrrw(qdiv, _u);
+               case_rrr(rem,);
+               case_rrw(rem,);
+               case_rrr(rem, _u);
+               case_rrw(rem, _u);
+               case_rrr(lsh,);
+               case_rrw(lsh,);
+               case_rrr(rsh,);
+               case_rrw(rsh,);
+               case_rrr(rsh, _u);
+               case_rrw(rsh, _u);
+               case_rr(neg,);
+               case_rr(com,);
+               case_rrr(and,);
+               case_rrw(and,);
+               case_rrr(or,);
+               case_rrw(or,);
+               case_rrr(xor,);
+               case_rrw(xor,);
+               case_rr(trunc, _f_i);
+               case_rr(trunc, _d_i);
+               case_rr(trunc, _f_l);
+               case_rr(trunc, _d_l);
+               case_rr(ld, _c);
+               case_rw(ld, _c);
+               case_rr(ld, _uc);
+               case_rw(ld, _uc);
+               case_rr(ld, _s);
+               case_rw(ld, _s);
+               case_rr(ld, _us);
+               case_rw(ld, _us);
+               case_rr(ld, _i);
+               case_rw(ld, _i);
+               case_rr(ld, _ui);
+               case_rw(ld, _ui);
+               case_rr(ld, _l);
+               case_rw(ld, _l);
+               case_rrr(ldx, _c);
+               case_rrw(ldx, _c);
+               case_rrr(ldx, _uc);
+               case_rrw(ldx, _uc);
+               case_rrr(ldx, _s);
+               case_rrw(ldx, _s);
+               case_rrr(ldx, _us);
+               case_rrw(ldx, _us);
+               case_rrr(ldx, _i);
+               case_rrw(ldx, _i);
+               case_rrr(ldx, _ui);
+               case_rrw(ldx, _ui);
+               case_rrr(ldx, _l);
+               case_rrw(ldx, _l);
+               case_rr(st, _c);
+               case_wr(st, _c);
+               case_rr(st, _s);
+               case_wr(st, _s);
+               case_rr(st, _i);
+               case_wr(st, _i);
+               case_rr(st, _l);
+               case_wr(st, _l);
+               case_rrr(stx, _c);
+               case_wrr(stx, _c);
+               case_rrr(stx, _s);
+               case_wrr(stx, _s);
+               case_rrr(stx, _i);
+               case_wrr(stx, _i);
+               case_rrr(stx, _l);
+               case_wrr(stx, _l);
+               case_rr(hton, _us);
+               case_rr(hton, _ui);
+               case_rr(hton, _ul);
+               case_rr(ext, _c);
+               case_rr(ext, _uc);
+               case_rr(ext, _s);
+               case_rr(ext, _us);
+               case_rr(ext, _i);
+               case_rr(ext, _ui);
+               case_rr(mov,);
+           case jit_code_movi:
+               if (node->flag & jit_flag_node) {
+                   temp = node->v.n;
+                   if (temp->code == jit_code_data ||
+                       (temp->code == jit_code_label &&
+                        (temp->flag & jit_flag_patch)))
+                       movi(rn(node->u.w), temp->u.w);
+                   else {
+                       assert(temp->code == jit_code_label ||
+                              temp->code == jit_code_epilog);
+                       word = movi_p(rn(node->u.w), temp->u.w);
+                       patch(word, node);
+                   }
+               }
+               else
+                   movi(rn(node->u.w), node->v.w);
+               break;
+               case_rrr(lt,);
+               case_rrw(lt,);
+               case_rrr(lt, _u);
+               case_rrw(lt, _u);
+               case_rrr(le,);
+               case_rrw(le,);
+               case_rrr(le, _u);
+               case_rrw(le, _u);
+               case_rrr(eq,);
+               case_rrw(eq,);
+               case_rrr(ge,);
+               case_rrw(ge,);
+               case_rrr(ge, _u);
+               case_rrw(ge, _u);
+               case_rrr(gt,);
+               case_rrw(gt,);
+               case_rrr(gt, _u);
+               case_rrw(gt, _u);
+               case_rrr(ne,);
+               case_rrw(ne,);
+               case_brr(blt,);
+               case_brw(blt,);
+               case_brr(blt, _u);
+               case_brw(blt, _u);
+               case_brr(ble,);
+               case_brw(ble,);
+               case_brr(ble, _u);
+               case_brw(ble, _u);
+               case_brr(beq,);
+               case_brw(beq,);
+               case_brr(bge,);
+               case_brw(bge,);
+               case_brr(bge, _u);
+               case_brw(bge, _u);
+               case_brr(bgt,);
+               case_brw(bgt,);
+               case_brr(bgt, _u);
+               case_brw(bgt, _u);
+               case_brr(bne,);
+               case_brw(bne,);
+               case_brr(boadd,);
+               case_brw(boadd,);
+               case_brr(boadd, _u);
+               case_brw(boadd, _u);
+               case_brr(bxadd,);
+               case_brw(bxadd,);
+               case_brr(bxadd, _u);
+               case_brw(bxadd, _u);
+               case_brr(bosub,);
+               case_brw(bosub,);
+               case_brr(bosub, _u);
+               case_brw(bosub, _u);
+               case_brr(bxsub,);
+               case_brw(bxsub,);
+               case_brr(bxsub, _u);
+               case_brw(bxsub, _u);
+               case_brr(bms,);
+               case_brw(bms,);
+               case_brr(bmc,);
+               case_brw(bmc,);
+               case_rrr(add, _f);
+               case_rrf(add);
+               case_rrr(sub, _f);
+               case_rrf(sub);
+               case_rrf(rsb);
+               case_rrr(mul, _f);
+               case_rrf(mul);
+               case_rrr(div, _f);
+               case_rrf(div);
+               case_rr(abs, _f);
+               case_rr(neg, _f);
+               case_rr(sqrt, _f);
+               case_rr(ext, _f);
+               case_rr(ld, _f);
+               case_rw(ld, _f);
+               case_rrr(ldx, _f);
+               case_rrw(ldx, _f);
+               case_rr(st, _f);
+               case_wr(st, _f);
+               case_rrr(stx, _f);
+               case_wrr(stx, _f);
+               case_rr(mov, _f);
+           case jit_code_movi_f:
+               assert_data(node);
+               movi_f(rn(node->u.w), node->v.f);
+               break;
+               case_rr(ext, _d_f);
+               case_rrr(lt, _f);
+               case_rrf(lt);
+               case_rrr(le, _f);
+               case_rrf(le);
+               case_rrr(eq, _f);
+               case_rrf(eq);
+               case_rrr(ge, _f);
+               case_rrf(ge);
+               case_rrr(gt, _f);
+               case_rrf(gt);
+               case_rrr(ne, _f);
+               case_rrf(ne);
+               case_rrr(unlt, _f);
+               case_rrf(unlt);
+               case_rrr(unle, _f);
+               case_rrf(unle);
+               case_rrr(uneq, _f);
+               case_rrf(uneq);
+               case_rrr(unge, _f);
+               case_rrf(unge);
+               case_rrr(ungt, _f);
+               case_rrf(ungt);
+               case_rrr(ltgt, _f);
+               case_rrf(ltgt);
+               case_rrr(ord, _f);
+               case_rrf(ord);
+               case_rrr(unord, _f);
+               case_rrf(unord);
+               case_brr(blt, _f);
+               case_brf(blt);
+               case_brr(ble, _f);
+               case_brf(ble);
+               case_brr(beq, _f);
+               case_brf(beq);
+               case_brr(bge, _f);
+               case_brf(bge);
+               case_brr(bgt, _f);
+               case_brf(bgt);
+               case_brr(bne, _f);
+               case_brf(bne);
+               case_brr(bunlt, _f);
+               case_brf(bunlt);
+               case_brr(bunle, _f);
+               case_brf(bunle);
+               case_brr(buneq, _f);
+               case_brf(buneq);
+               case_brr(bunge, _f);
+               case_brf(bunge);
+               case_brr(bungt, _f);
+               case_brf(bungt);
+               case_brr(bltgt, _f);
+               case_brf(bltgt);
+               case_brr(bord, _f);
+               case_brf(bord);
+               case_brr(bunord, _f);
+               case_brf(bunord);
+               case_rrr(add, _d);
+               case_rrd(add);
+               case_rrr(sub, _d);
+               case_rrd(sub);
+               case_rrd(rsb);
+               case_rrr(mul, _d);
+               case_rrd(mul);
+               case_rrr(div, _d);
+               case_rrd(div);
+               case_rr(abs, _d);
+               case_rr(neg, _d);
+               case_rr(sqrt, _d);
+               case_rr(ext, _d);
+               case_rr(ld, _d);
+               case_rw(ld, _d);
+               case_rrr(ldx, _d);
+               case_rrw(ldx, _d);
+               case_rr(st, _d);
+               case_wr(st, _d);
+               case_rrr(stx, _d);
+               case_wrr(stx, _d);
+               case_rr(mov, _d);
+           case jit_code_movi_d:
+               assert_data(node);
+               movi_d(rn(node->u.w), node->v.d);
+               break;
+               case_rr(ext, _f_d);
+               case_rrr(lt, _d);
+               case_rrd(lt);
+               case_rrr(le, _d);
+               case_rrd(le);
+               case_rrr(eq, _d);
+               case_rrd(eq);
+               case_rrr(ge, _d);
+               case_rrd(ge);
+               case_rrr(gt, _d);
+               case_rrd(gt);
+               case_rrr(ne, _d);
+               case_rrd(ne);
+               case_rrr(unlt, _d);
+               case_rrd(unlt);
+               case_rrr(unle, _d);
+               case_rrd(unle);
+               case_rrr(uneq, _d);
+               case_rrd(uneq);
+               case_rrr(unge, _d);
+               case_rrd(unge);
+               case_rrr(ungt, _d);
+               case_rrd(ungt);
+               case_rrr(ltgt, _d);
+               case_rrd(ltgt);
+               case_rrr(ord, _d);
+               case_rrd(ord);
+               case_rrr(unord, _d);
+               case_rrd(unord);
+               case_brr(blt, _d);
+               case_brd(blt);
+               case_brr(ble, _d);
+               case_brd(ble);
+               case_brr(beq, _d);
+               case_brd(beq);
+               case_brr(bge, _d);
+               case_brd(bge);
+               case_brr(bgt, _d);
+               case_brd(bgt);
+               case_brr(bne, _d);
+               case_brd(bne);
+               case_brr(bunlt, _d);
+               case_brd(bunlt);
+               case_brr(bunle, _d);
+               case_brd(bunle);
+               case_brr(buneq, _d);
+               case_brd(buneq);
+               case_brr(bunge, _d);
+               case_brd(bunge);
+               case_brr(bungt, _d);
+               case_brd(bungt);
+               case_brr(bltgt, _d);
+               case_brd(bltgt);
+               case_brr(bord, _d);
+               case_brd(bord);
+               case_brr(bunord, _d);
+               case_brd(bunord);
+           case jit_code_jmpr:
+               jmpr(rn(node->u.w));
+               break;
+           case jit_code_jmpi:
+               if (node->flag & jit_flag_node) {
+                   temp = node->u.n;
+                   assert(temp->code == jit_code_label ||
+                          temp->code == jit_code_epilog);
+                   if (temp->flag & jit_flag_patch)
+                       jmpi(temp->u.w);
+                   else {
+                       word = jmpi_p(_jit->pc.w);
+                       patch(word, node);
+                   }
+               }
+               else
+                   jmpi(node->u.w);
+               break;
+           case jit_code_callr:
+               callr(rn(node->u.w));
+               break;
+           case jit_code_calli:
+               if (node->flag & jit_flag_node) {
+                   temp = node->u.n;
+                   assert(temp->code == jit_code_label ||
+                          temp->code == jit_code_epilog);
+                   if (temp->flag & jit_flag_patch)
+                       calli(temp->u.w);
+                   else {
+                       word = calli_p(_jit->pc.w);
+                       patch(word, node);
+                   }
+               }
+               else
+                   calli(node->u.w);
+               break;
+           case jit_code_prolog:
+               _jitc->function = _jitc->functions.ptr + node->w.w;
+               undo.node = node;
+               undo.word = _jit->pc.w;
+#if DEVEL_DISASSEMBLER
+               undo.prevw = prevw;
+#endif
+               undo.patch_offset = _jitc->patches.offset;
+           restart_function:
+               _jitc->again = 0;
+               prolog(node);
+               break;
+           case jit_code_epilog:
+               assert(_jitc->function == _jitc->functions.ptr + node->w.w);
+               if (_jitc->again) {
+                   for (temp = undo.node->next;
+                        temp != node; temp = temp->next) {
+                       if (temp->code == jit_code_label ||
+                           temp->code == jit_code_epilog)
+                           temp->flag &= ~jit_flag_patch;
+                   }
+                   temp->flag &= ~jit_flag_patch;
+                   node = undo.node;
+                   _jit->pc.w = undo.word;
+#if DEVEL_DISASSEMBLER
+                   prevw = undo.prevw;
+#endif
+                   _jitc->patches.offset = undo.patch_offset;
+                   goto restart_function;
+               }
+               /* remember label is defined */
+               node->flag |= jit_flag_patch;
+               node->u.w = _jit->pc.w;
+               epilog(node);
+               _jitc->function = NULL;
+               break;
+           case jit_code_movr_w_f:
+               movr_w_f(rn(node->u.w), rn(node->v.w));
+               break;
+           case jit_code_movr_f_w:
+               movr_f_w(rn(node->u.w), rn(node->v.w));
+               break;
+           case jit_code_movi_f_w:
+               assert_data(node);
+               movi_f_w(rn(node->u.w), node->v.f);
+               break;
+           case jit_code_movr_w_d:
+               movr_w_d(rn(node->u.w), rn(node->v.w));
+               break;
+           case jit_code_movr_d_w:
+               movr_d_w(rn(node->u.w), rn(node->v.w));
+               break;
+           case jit_code_movi_d_w:
+               assert_data(node);
+               movi_d_w(rn(node->u.w), node->v.d);
+               break;
+           case jit_code_va_start:
+               vastart(rn(node->u.w));
+               break;
+           case jit_code_va_arg:
+               vaarg(rn(node->u.w), rn(node->v.w));
+               break;
+           case jit_code_va_arg_d:
+               vaarg_d(rn(node->u.w), rn(node->v.w));
+               break;
+           case jit_code_live:                 case jit_code_ellipsis:
+           case jit_code_va_push:
+           case jit_code_allocai:              case jit_code_allocar:
+           case jit_code_arg:
+           case jit_code_arg_f:                case jit_code_arg_d:
+           case jit_code_va_end:
+           case jit_code_ret:
+           case jit_code_retr:                 case jit_code_reti:
+           case jit_code_retr_f:               case jit_code_reti_f:
+           case jit_code_retr_d:               case jit_code_reti_d:
+           case jit_code_getarg_c:             case jit_code_getarg_uc:
+           case jit_code_getarg_s:             case jit_code_getarg_us:
+           case jit_code_getarg_i:             case jit_code_getarg_ui:
+           case jit_code_getarg_l:
+           case jit_code_getarg_f:             case jit_code_getarg_d:
+           case jit_code_putargr:              case jit_code_putargi:
+           case jit_code_putargr_f:            case jit_code_putargi_f:
+           case jit_code_putargr_d:            case jit_code_putargi_d:
+           case jit_code_pushargr:             case jit_code_pushargi:
+           case jit_code_pushargr_f:           case jit_code_pushargi_f:
+           case jit_code_pushargr_d:           case jit_code_pushargi_d:
+           case jit_code_retval_c:             case jit_code_retval_uc:
+           case jit_code_retval_s:             case jit_code_retval_us:
+           case jit_code_retval_i:
+           case jit_code_retval_ui:            case jit_code_retval_l:
+           case jit_code_retval_f:             case jit_code_retval_d:
+           case jit_code_prepare:
+           case jit_code_finishr:              case jit_code_finishi:
+               break;
+           default:
+               abort();
+       }
+       if (jit_carry != _NOREG) {
+           switch (node->code) {
+               case jit_code_note:
+               case jit_code_addcr:            case jit_code_addci:
+               case jit_code_addxr:            case jit_code_addxi:
+               case jit_code_subcr:            case jit_code_subci:
+               case jit_code_subxr:            case jit_code_subxi:
+                   break;
+               default:
+                   jit_unget_reg(jit_carry);
+                   jit_carry = _NOREG;
+                   break;
+           }
+       }
+       jit_regarg_clr(node, value);
+       assert(_jitc->regarg == 0 ||
+              (jit_carry != _NOREG && _jitc->regarg == (1 << jit_carry)));
+       assert(_jitc->synth == 0);
+       /* update register live state */
+       jit_reglive(node);
+    }
+#undef case_brw
+#undef case_brr
+#undef case_wrr
+#undef case_rrw
+#undef case_rrr
+#undef case_wr
+#undef case_rw
+#undef case_rr
+
+    for (offset = 0; offset < _jitc->patches.offset; offset++) {
+       node = _jitc->patches.ptr[offset].node;
+       word = _jitc->patches.ptr[offset].inst;
+       value = node->code == jit_code_movi ? node->v.n->u.w : node->u.n->u.w;
+       patch_at(word, value);
+    }
+
+    jit_flush(_jit->code.ptr, _jit->pc.uc);
+
+    return (_jit->code.ptr);
+}
+
+#define CODE                           1
+#  include "jit_riscv-cpu.c"
+#  include "jit_riscv-fpu.c"
+#undef CODE
+
+void
+jit_flush(void *fptr, void *tptr)
+{
+#if defined(__GNUC__)
+    jit_word_t         f, t, s;
+
+    s = sysconf(_SC_PAGE_SIZE);
+    f = (jit_word_t)fptr & -s;
+    t = (((jit_word_t)tptr) + s - 1) & -s;
+    __clear_cache((void *)f, (void *)t);
+#endif
+}
+
+void
+_emit_ldxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    ldxi(rn(r0), rn(r1), i0);
+}
+
+void
+_emit_stxi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    stxi(i0, rn(r0), rn(r1));
+}
+
+void
+_emit_ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    ldxi_d(rn(r0), rn(r1), i0);
+}
+
+void
+_emit_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    stxi_d(i0, rn(r0), rn(r1));
+}
+
+static void
+_patch(jit_state_t *_jit, jit_word_t instr, jit_node_t *node)
+{
+    jit_int32_t                 flag;
+
+    assert(node->flag & jit_flag_node);
+    if (node->code == jit_code_movi)
+       flag = node->v.n->flag;
+    else
+       flag = node->u.n->flag;
+    assert(!(flag & jit_flag_patch));
+    if (_jitc->patches.offset >= _jitc->patches.length) {
+       jit_realloc((jit_pointer_t *)&_jitc->patches.ptr,
+                   _jitc->patches.length * sizeof(jit_patch_t),
+                   (_jitc->patches.length + 1024) * sizeof(jit_patch_t));
+       _jitc->patches.length += 1024;
+    }
+    _jitc->patches.ptr[_jitc->patches.offset].inst = instr;
+    _jitc->patches.ptr[_jitc->patches.offset].node = node;
+    ++_jitc->patches.offset;
+}
diff --git a/deps/lightning/lib/jit_s390-cpu.c b/deps/lightning/lib/jit_s390-cpu.c
new file mode 100644 (file)
index 0000000..02cac60
--- /dev/null
@@ -0,0 +1,3848 @@
+/*
+ * Copyright (C) 2013-2019  Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+#if PROTO
+#  if __WORDSIZE == 32
+#    define ldr(r0,r1)                 ldr_i(r0,r1)
+#    define ldxr(r0,r1,r2)             ldxr_i(r0,r1,r2)
+#    define ldxi(r0,r1,i0)             ldxi_i(r0,r1,i0)
+#    define stxi(i0,r0,r1)             stxi_i(i0,r0,r1)
+#  else
+#    define ldr(r0,r1)                 ldr_l(r0,r1)
+#    define ldxr(r0,r1,r2)             ldxr_l(r0,r1,r2)
+#    define ldxi(r0,r1,i0)             ldxi_l(r0,r1,i0)
+#    define stxi(i0,r0,r1)             stxi_l(i0,r0,r1)
+#  endif
+#  define is(i)                                *_jit->pc.us++ = i
+#  if __WORDSIZE == 32
+#    define stack_framesize            96
+#  else
+#    define stack_framesize            160
+#  endif
+#  define _R0_REGNO                    0
+#  define _R1_REGNO                    1
+#  define _R7_REGNO                    7
+#  define _R13_REGNO                   13
+#  define _FP_REGNO                    _R13_REGNO
+#  define _R14_REGNO                   14
+#  define _R15_REGNO                   15
+#  define u12_p(i0)                    ((i0) >= 0 && (i0) <= 4095)
+#  define s16_p(i0)                    ((i0) >= -32768 && (i0) <= 32767)
+#  define x16(i0)                      ((i0) & 0xffff)
+#  define s20_p(i0)                    ((i0) >= -524288 && (i0) <= 524287)
+#  define x20(i0)                      ((i0) & 0xfffff)
+#  if __WORDSIZE == 32
+#    define s32_p(i0)                  1
+#  else
+#    define s32_p(i0)                                                  \
+    ((i0) >= -2147483648L && (i0) < 2147483647L)
+#  endif
+
+/*
+       Condition Code          Instruction     (Mask) Bit Mask Value
+       0                       8               8
+       1                       9               4
+       2                       10              2
+       3                       11              1
+
+AGR:
+       0       Zero
+       1       < zero
+       2       > zero
+       3       Overflow
+--
+1      ->      overflow                CC_O
+14     ->      no overflow             CC_NO
+
+ALGR:
+       0       Zero, no carry
+       1       Not zero, no carry
+       2       Zero, carry
+       3       Not zero, carry
+--
+2|1    ->      carry                   CC_NLE
+8|4    ->      no carry                CC_LE
+
+SGR:
+       0       Zero
+       1       < zero
+       2       > zero
+       3       Overflow
+--
+1      ->      overflow                CC_O
+14     ->      no overflow             CC_NO
+
+SLGR:
+       0       --
+       1       Not zero, borrow
+       2       Zero, no borrow
+       3       Not zero, no borrow
+--
+4      ->      borrow                  CC_L
+11     ->      no borrow               CC_NL
+  */
+
+#  define CC_NV                                0x0
+#  define CC_O                         0x1
+#  define CC_H                         0x2
+#  define CC_NLE                       0x3
+#  define CC_L                         0x4
+#  define CC_NHE                       0x5
+#  define CC_LH                                0x6
+#  define CC_NE                                0x7
+#  define CC_E                         0x8
+#  define CC_NLH                       0x9
+#  define CC_HE                                0xA
+#  define CC_NL                                0xB
+#  define CC_LE                                0xC
+#  define CC_NH                                0xD
+#  define CC_NO                                0xE
+#  define CC_AL                                0xF
+#  define _us                          jit_uint16_t
+#  define _ui                          jit_uint32_t
+#  define E_(Op)                       _E(_jit,Op)
+static void _E(jit_state_t*,_ui);
+#  define I_(Op,I)                     _I(_jit,Op,I)
+static void _I(jit_state_t*,_ui,_ui);
+#  define RR_(Op,R1,R2)                        _RR(_jit,Op,R1,R2)
+static void _RR(jit_state_t*,_ui,_ui,_ui);
+#  define RRE_(Op,R1,R2)               _RRE(_jit,Op,R1,R2)
+static void _RRE(jit_state_t*,_ui,_ui,_ui);
+#  define RRF_(Op,R3,M4,R1,R2)         _RRF(_jit,Op,R3,M4,R1,R2)
+static void _RRF(jit_state_t*,_ui,_ui,_ui,_ui,_ui);
+#  define RX_(Op,R1,X2,B2,D2)          _RX(_jit,Op,R1,X2,B2,D2)
+static void _RX(jit_state_t*,_ui,_ui,_ui,_ui,_ui);
+#  define RXE_(Op,R1,X2,B2,D2,Op2)     _RXE(_jit,Op,R1,X2,B2,D2,Op2)
+static void _RXE(jit_state_t*,_ui,_ui,_ui,_ui,_ui,_ui);
+#  define RXF_(Op,R3,X2,B2,D2,R1,Op2)  _RXF(_jit,Op,R3,X2,B2,D2,R1,Op2)
+static void _RXF(jit_state_t*,_ui,_ui,_ui,_ui,_ui,_ui,_ui);
+#  define RXY_(Op,R1,X2,B2,D2,Op2)     _RXY(_jit,Op,R1,X2,B2,D2,Op2)
+static void _RXY(jit_state_t*,_ui,_ui,_ui,_ui,_ui,_ui);
+#  define RS_(Op,R1,R3,B2,D2)          _RS(_jit,Op,R1,R3,B2,D2)
+static void _RS(jit_state_t*,_ui,_ui,_ui,_ui,_ui);
+#  define RSY_(Op,R1,R3,B2,D2,Op2)     RXY_(Op,R1,R3,B2,D2,Op2)
+#  define RSL_(Op,L1,B1,D1,Op2)                _RSL(_jit,Op,L1,B1,D1,Op2)
+static void _RSL(jit_state_t*,_ui,_ui,_ui,_ui,_ui);
+#  define RSI_(Op,R1,R3,I2)            _RSI(_jit,Op,R1,R3,I2)
+static void _RSI(jit_state_t*,_ui,_ui,_ui,_ui);
+#  define RI_(Op,R1,Op2,I2)            RSI_(Op,R1,Op2,I2)
+#  define RIE_(Op,R1,R3,I2,Op2)                _RIE(_jit,Op,R1,R3,I2,Op2)
+static void _RIE(jit_state_t*,_ui,_ui,_ui,_ui,_ui);
+#  define RIL_(Op,R1,Op2,I2)           _RIL(_jit,Op,R1,Op2,I2)
+static void _RIL(jit_state_t*,_ui,_ui,_ui,_ui);
+#  define SI_(Op,I2,B1,D1)             _SI(_jit,Op,I2,B1,D1)
+static void _SI(jit_state_t*,_ui,_ui,_ui,_ui);
+#  define SIY_(Op,I2,B1,D1,Op2)                _SIY(_jit,Op,I2,B1,D1,Op2)
+static void _SIY(jit_state_t*,_ui,_ui,_ui,_ui,_ui);
+#  define S_(Op,B2,D2)                 _S(_jit,Op,B2,D2)
+static void _S(jit_state_t*,_ui,_ui,_ui);
+#  define SSL_(Op,L,B1,D1,B2,D2)       SS_(Op,(L)>>4,(L)&0xF,B1,D1,B2,D2)
+#  define SS_(Op,LL,LH,B1,D1,B2,D2)    _SS(_jit,Op,LL,LH,B1,D1,B2,D2)
+static void _SS(jit_state_t*,_ui,_ui,_ui,_ui,_ui,_ui,_ui);
+#  define SSE_(Op,B1,D1,B2,D2)         _SSE(_jit,Op,B1,D1,B2,D2)
+static void _SSE(jit_state_t*,_ui,_ui,_ui,_ui,_ui);
+#  undef _us
+#  undef _ui
+#  define nop(c)                       _nop(_jit,c)
+static void _nop(jit_state_t*,jit_int32_t);
+#  if __WORDSIZE == 32
+#    define ADD_(r0,r1)                        AR(r0,r1)
+#    define ADDI_(r0,i0)               AHI(r0,i0)
+#    define ADDC_(r0,r1)               ALR(r0,r1)
+#    define ADDX_(r0,r1)               ALCR(r0,r1)
+#    define AND_(r0,r1)                        NR(r0,r1)
+#    define CMP_(r0,r1)                        CR(r0,r1)
+#    define CMPU_(r0,r1)               CLR(r0,r1)
+#    define DIVREM_(r0,r1)             DR(r0,r1)
+#    define DIVREMU_(r0,r1)            DLR(r0,r1)
+#    define OR_(r0,r1)                 OR(r0,r1)
+#    define MUL_(r0,r1)                        MSR(r0,r1)
+#    define MULI_(r0,i0)               MHI(r0,i0)
+#    define MULU_(r0,r1)               MLR(r0,r1)
+#    define SUB_(r0,r1)                        SR(r0,r1)
+#    define SUBC_(r0,r1)               SLR(r0,r1)
+#    define SUBX_(r0,r1)               SLBR(r0,r1)
+#    define TEST_(r0,r1)               LTR(r0,r1)
+#    define XOR_(r0,r1)                        XR(r0,r1)
+#  else
+#    define ADD_(r0,r1)                        AGR(r0,r1)
+#    define ADDI_(r0,i0)               AGHI(r0,i0)
+#    define ADDC_(r0,r1)               ALGR(r0,r1)
+#    define ADDX_(r0,r1)               ALCGR(r0,r1)
+#    define AND_(r0,r1)                        NGR(r0,r1)
+#    define CMP_(r0,r1)                        CGR(r0,r1)
+#    define CMPU_(r0,r1)               CLGR(r0,r1)
+#    define DIVREM_(r0,r1)             DSGR(r0,r1)
+#    define DIVREMU_(r0,r1)            DLGR(r0,r1)
+#    define MUL_(r0,r1)                        MSGR(r0,r1)
+#    define MULI_(r0,i0)               MGHI(r0,i0)
+#    define MULU_(r0,r1)               MLGR(r0,r1)
+#    define OR_(r0,r1)                 OGR(r0,r1)
+#    define SUB_(r0,r1)                        SGR(r0,r1)
+#    define SUBC_(r0,r1)               SLGR(r0,r1)
+#    define SUBX_(r0,r1)               SLBGR(r0,r1)
+#    define TEST_(r0,r1)               LTGR(r0,r1)
+#    define XOR_(r0,r1)                        XGR(r0,r1)
+#  endif
+/****************************************************************
+ * General Instructions                                                *
+ ****************************************************************/
+/* ADD */
+#  define AR(R1,R2)                    RR_(0x1A,R1,R2)
+#  define AGR(R1,R2)                   RRE_(0xB908,R1,R2)
+#  define AGFR(R1,R2)                  RRE_(0xB918,R1,R2)
+#  define A(R1,D2,X2,B2)               RX_(0x5A,R1,X2,B2,D2)
+#  define AY(R1,D2,X2,B2)              RXY_(0xE3,R1,X2,B2,D2,0x5A)
+#  define AG(R1,D2,X2,B2)              RXY_(0xE3,R1,X2,B2,D2,0x08)
+#  define AGF(R1,D2,X2,B2)             RXY_(0xE3,R1,X2,B2,D2,0x18)
+/* ADD HALFWORD */
+#  define AH(R1,D2,X2,B2)              RX_(0x4A,R1,X2,B2,D2)
+#  define AHY(R1,D2,X2,B2)             RXY_(0xE3,R1,X2,B2,D2,0x7A)
+/* ADD HALFWORD IMMEDIATE */
+#  define AHI(R1,I2)                   RI_(0xA7,R1,0xA,I2)
+#  define AGHI(R1,I2)                  RI_(0xA7,R1,0xB,I2)
+/* ADD LOGICAL */
+#  define ALR(R1,R2)                   RR_(0x1E,R1,R2)
+#  define ALGR(R1,R2)                  RRE_(0xB90A,R1,R2)
+#  define ALGFR(R1,R2)                 RRE_(0xB91A,R1,R2)
+#  define AL(R1,D2,X2,B2)              RX_(0x5E,R1,X2,B2,D2)
+#  define ALY(R1,D2,X2,B2)             RXY_(0xE3,R1,X2,B2,D2,0x5E)
+#  define ALG(R1,D2,X2,B2)             RXY_(0xE3,R1,X2,B2,D2,0x0A)
+#  define ALGF(R1,D2,X2,B2)            RXY_(0xE3,R1,X2,B2,D2,0x1A)
+/* ADD LOGICAL WITH CARRY */
+#  define ALCR(R1,R2)                  RRE_(0xB998,R1,R2)
+#  define ALCGR(R1,R2)                 RRE_(0xB988,R1,R2)
+#  define ALC(R1,D2,X2,B2)             RXY_(0xE3,R1,X2,B2,D2,0x98)
+#  define ALCG(R1,D2,X2,B2)            RXY_(0xE3,R1,X2,B2,D2,0x88)
+/* AND */
+#  define NR(R1,R2)                    RR_(0x14,R1,R2)
+#  define NGR(R1,R2)                   RRE_(0xB980,R1,R2)
+#  define N(R1,D2,X2,B2)               RX_(0x54,R1,X2,B2,D2)
+#  define NY(R1,D2,X2,B2)              RXY_(0xE3,R1,X2,B2,D2,0x54)
+#  define NG(R1,D2,X2,B2)              RXY_(0xE3,R1,X2,B2,D2,0x80)
+#  define NI(D1,B1,I2)                 SI_(0x94,I2,B1,D1)
+#  define NIY(D1,B1,I2)                        SIY_(0xEB,I2,B1,D1,0x54)
+#  define NC(D1,L,B1,D2,B2)            SSL_(0xD4,L,B1,D1,B2,D2)
+/* AND IMMEDIATE */
+#  define NIHH(R1,I2)                  RI_(0xA5,R1,0x4,I2)
+#  define NIHL(R1,I2)                  RI_(0xA5,R1,0x5,I2)
+#  define NILH(R1,I2)                  RI_(0xA5,R1,0x6,I2)
+#  define NILL(R1,I2)                  RI_(0xA5,R1,0x7,I2)
+/* BRANCH AND LINK */
+#  define BALR(R1,R2)                  RR_(0x05,R1,R2)
+#  define BAL(R1,D2,X2,B2)             RX_(0x45,R1,X2,B2,D2)
+/* BRANCH AND SAVE */
+#  define BASR(R1,R2)                  RR_(0x0D,R1,R2)
+#  define BAS(R1,D2,X2,B2)             RX_(0x4D,R1,X2,B2,D2)
+/* BRANCH AND SAVE AND SET MODE */
+#  define BASSM(R1,R2)                 RR_(0x0C,R1,R2)
+/* BRANCH AND SET MODE */
+#  define BSM(R1,R2)                   RR_(0x0B,R1,R2)
+/* BRANCH ON CONDITION */
+#  define BCR(M1,R2)                   RR_(0x07,M1,R2)
+#  define BR(R2)                       BCR(CC_AL,R2)
+#  define NOPR(R2)                     BCR(CC_NV,R2)
+#  define BC(M1,D2,X2,B2)              RX_(0x47,M1,X2,B2,D2)
+/* BRANCH ON COUNT */
+#  define BCTR(R1,R2)                  RR_(0x06,R1,R2)
+#  define BCTGR(R1,R2)                 RRE_(0xB946,R1,R2)
+#  define BCT(R1,D2,X2,B2)             RX_(0x46,R1,X2,B2,D2)
+#  define BCTG(R1,D2,X2,B2)            RXY_(0xE3,R1,X2,B2,D2,0x46)
+/* BRANCH ON INDEX HIGH */
+#  define BXH(R1,R3,D2,B2)             RS_(0x86,R1,R3,B2,D2)
+#  define BXHG(R1,R3,B2,D2)            RSY_(0xEB,R1,R3,B2,D2,0x44)
+/* BRANCH ON INDEX LOW OR EQUAL */
+#  define BXLE(R1,R3,D2,B2)            RS_(0x87,R1,R3,B2,D2)
+#  define BXLEG(R1,R3,B2,D2)           RSY_(0xEB,R1,R3,B2,D2,0x45)
+/* BRANCH RELATIVE AND SAVE */
+#  define BRAS(R1,I2)                  RI_(0xA7,R1,0x5,I2)
+/* BRANCH RELATIVE AND SAVE LONG */
+#  define BRASL(R1,I2)                 RIL_(0xC0,R1,0x5,I2)
+/* BRANCH RELATIVE ON CONDITION */
+#  define BRC(M1,I2)                   RI_(0xA7,M1,0x4,I2)
+#  define J(I2)                                BRC(CC_AL,I2)
+/* BRANCH RELATIVE ON CONDITION LONG */
+#  define BRCL(M1,I2)                  RIL_(0xC0,M1,0x4,I2)
+#  define BRL(I2)                      BRCL(CC_AL,I2)
+/* BRANCH RELATIVE ON COUNT */
+#  define BRCT(M1,I2)                  RI_(0xA7,M1,0x6,I2)
+#  define BRCTG(M1,I2)                 RI_(0xA7,M1,0x7,I2)
+/* BRANCH RELATIVE ON INDEX HIGH */
+#  define BRXH(R1,R3,I2)               RSI_(0x84,R1,R3,I2)
+#  define BRXHG(R1,R3,I2)              RIE_(0xEC,R1,R3,I2,0x44)
+/* BRANCH RELATIVE ON INDEX LOW OR EQUAL */
+#  define BRXLE(R1,R3,I2)              RSI_(0x85,R1,R3,I2)
+#  define BRXLEG(R1,R3,I2)             RIE_(0xEC,R1,R3,I2,0x45)
+/* CHECKSUM */
+#  define CKSUM(R1,R2)                 RRE_(0xB241,R1,R2)
+/* CIPHER MESAGE (KM) */
+#  define KM(R1,R2)                    RRE_(0xB92E,R1,R2)
+/* CIPHER MESAGE WITH CHAINING (KMC) */
+#  define KMC(R1,R2)                   RRE_(0xB92F,R1,R2)
+/* COMPARE */
+#  define CR(R1,R2)                    RR_(0x19,R1,R2)
+#  define CGR(R1,R2)                   RRE_(0xB920,R1,R2)
+#  define CGFR(R1,R2)                  RRE_(0xB930,R1,R2)
+#  define C(R1,D2,X2,B2)               RX_(0x59,R1,X2,B2,D2)
+#  define CY(R1,D2,X2,B2)              RXY_(0xE3,R1,X2,B2,D2,0x59)
+#  define CG(R1,D2,X2,B2)              RXY_(0xE3,R1,X2,B2,D2,0x20)
+#  define CGF(R1,D2,X2,B2)             RXY_(0xE3,R1,X2,B2,D2,0x30)
+/* COMPARE AND FORM CODEWORD */
+#  define CFC(D2,B2)                   S_(0xB21A,B2,D2)
+/* COMPARE AND SWAP */
+#  define CS(R1,R3,D2,B2)              RS_(0xBA,R1,R3,B2,D2)
+#  define CSY(R1,R3,D2,B2)             RSY_(0xEB,R1,R3,B2,D2,0x14)
+#  define CSG(R1,R3,D2,B2)             RSY_(0xEB,R1,R3,B2,D2,0x30)
+/* COMPARE DOUBLE AND SWAP */
+#  define CDS(R1,R3,D2,B2)             RS_(0xBB,R1,R3,B2,D2)
+#  define CSDY(R1,R3,D2,B2)            RSY_(0xEB,R1,R3,B2,D2,0x31)
+#  define CSDG(R1,R3,D2,B2)            RSY_(0xEB,R1,R3,B2,D2,0x3E)
+/* COMPARE HALFWORD */
+#  define CH(R1,D2,X2,B2)              RX_(0x49,R1,X2,B2,D2)
+#  define CHY(R1,D2,X2,B2)             RXY_(0xE3,R1,X2,B2,D2,0x79)
+/* COMPARE HALFWORD IMMEDIATE */
+#  define CHI(R1,I2)                   RI_(0xA7,R1,0xE,I2)
+#  define CGHI(R1,I2)                  RI_(0xA7,R1,0xF,I2)
+/* COMPARE LOGICAL */
+#  define CLR(R1,R2)                   RR_(0x15,R1,R2)
+#  define CLGR(R1,R2)                  RRE_(0xB921,R1,R2)
+#  define CLGFR(R1,R2)                 RRE_(0xB931,R1,R2)
+#  define CL(R1,D2,X2,B2)              RX_(0x55,R1,X2,B2,D2)
+#  define CLY(R1,D2,X2,B2)             RXY_(0xE3,R1,X2,B2,D2,0x55)
+#  define CLG(R1,D2,X2,B2)             RXY_(0xE3,R1,X2,B2,D2,0x21)
+#  define CLGF(R1,D2,X2,B2)            RXY_(0xE3,R1,X2,B2,D2,0x31)
+#  define CLI(D1,B1,I2)                        SI_(0x95,I2,B1,D1)
+#  define CLIY(D1,B1,I2)               SIY_(0xEB,I2,B1,D1,0x55)
+#  define CLC(D1,L,B1,D2,B2)           SSL_(0xD5,L,B1,D1,B2,D2)
+/* COMPARE LOGICAL CHARACTERS UNDER MASK */
+#  define CLM(R1,M3,D2,B2)             RS_(0xBD,R1,M3,B2,D2)
+#  define CLMY(R1,M3,D2,B2)            RSY_(0xEB,R1,M3,B2,D2,0x21)
+#  define CLMH(R1,M3,D2,B2)            RSY_(0xEB,R1,M3,B2,D2,0x20)
+/* COMPARE LOGICAL LONG */
+#  define CLCL(R1,R2)                  RR_(0x0F,R1,R2)
+/* COMPARE LOGICAL LONG EXTENDED */
+#  define CLCLE(R1,R3,D2,B2)           RS_(0xA9,R1,R3,B2,D2)
+/* COMPARE LOGICAL LONG UNICODE */
+#  define CLCLU(R1,R3,D2,B2)           RSY_(0xEB,R1,R3,B2,D2,0x8F)
+/* COMPARE LOGICAL STRING */
+#  define CLST(R1,R2)                  RRE_(0xB25D,R1,R2)
+/* COMPARE UNTIL SUBSTRING EQUAL */
+#  define CUSE(R1,R2)                  RRE_(0xB257,R1,R2)
+/* COMPRESSION CALL */
+#  define CMPSC(R1,R2)                 RRE_(0xB263,R1,R2)
+/* COMPUTE INTERMEDIATE MESSAGE DIGEST (KIMD) */
+#  define KIMD(R1,R2)                  RRE_(0xB93E,R1,R2)
+/* COMPUTE LAST MESSAGE DIGEST (KIMD) */
+#  define KLMD(R1,R2)                  RRE_(0xB93F,R1,R2)
+/* COMPUTE MESSAGE AUTHENTICATION CODE (KMAC) */
+#  define KMAC(R1,R2)                  RRE_(0xB91E,R1,R2)
+/* CONVERT TO BINARY */
+#  define CVB(R1,D2,X2,B2)             RX_(0x4F,R1,X2,B2,D2)
+#  define CVBY(R1,D2,X2,B2)            RXY_(0xE3,R1,X2,B2,D2,0x06)
+#  define CVBG(R1,D2,X2,B2)            RXY_(0xE3,R1,X2,B2,D2,0x0e)
+/* CONVERT TO DECIMAL */
+#  define CVD(R1,D2,X2,B2)             RX_(0x4E,R1,X2,B2,D2)
+#  define CVDY(R1,D2,X2,B2)            RXY_(0xE3,R1,X2,B2,D2,0x26)
+#  define CVDG(R1,D2,X2,B2)            RXY_(0xE3,R1,X2,B2,D2,0x2E)
+/* CONVERT UNICODE TO UTF-8 */
+#  define CUUTF(R1,R2)                 RRE_(0xB2A6,R1,R2)
+/* CONVERT UTF-8 TO UNICODE */
+#  define CUTFU(R1,R2)                 RRE_(0xB2A7,R1,R2)
+/* COPY ACCESS */
+#  define CPYA(R1,R2)                  RRE_(0xB24D,R1,R2)
+/* DIVIDE */
+#  define DR(R1,R2)                    RR_(0x1D,R1,R2)
+#  define D(R1,D2,X2,B2)               RX_(0x5D,R1,X2,B2,D2)
+/* DIVIDE LOGICAL */
+#  define DLR(R1,R2)                   RRE_(0xB997,R1,R2)
+#  define DLGR(R1,R2)                  RRE_(0xB987,R1,R2)
+#  define DL(R1,D2,X2,B2)              RXY_(0xE3,R1,X2,B2,D2,0x97)
+#  define DLG(R1,D2,X2,B2)             RXY_(0xE3,R1,X2,B2,D2,0x87)
+/* DIVIDE SINGLE */
+#  define DSGR(R1,R2)                  RRE_(0xB90D,R1,R2)
+#  define DSGFR(R1,R2)                 RRE_(0xB91D,R1,R2)
+#  define DSG(R1,D2,X2,B2)             RXY_(0xE3,R1,X2,B2,D2,0x0D)
+#  define DSGF(R1,D2,X2,B2)            RXY_(0xE3,R1,X2,B2,D2,0x1D)
+/* EXCLUSIVE OR */
+#  define XR(R1,R2)                    RR_(0x17,R1,R2)
+#  define XGR(R1,R2)                   RRE_(0xB982,R1,R2)
+#  define X(R1,D2,X2,B2)               RX_(0x57,R1,X2,B2,D2)
+#  define XY(R1,D2,X2,B2)              RXY_(0xE3,R1,X2,B2,D2,0x57)
+#  define XG(R1,D2,X2,B2)              RXY_(0xE3,R1,X2,B2,D2,0x82)
+#  define XI(D1,B1,I2)                 SI_(0x97,I2,B1,D1)
+#  define XIY(D1,B1,I2)                        SIY_(0xEB,I2,B1,D1,0x57)
+#  define XC(D1,L,B1,D2,B2)            SSL_(0xD7,L,B1,D1,B2,D2)
+/* EXECUTE */
+#  define EX(R1,D2,X2,B2)              RX_(0x44,R1,X2,B2,D2)
+/* EXTRACT ACCESS */
+#  define EAR(R1,R2)                   RRE_(0xB24F,R1,R2)
+/* EXTRACT PSW */
+#  define EPSW(R1,R2)                  RRE_(0xB98D,R1,R2)
+/* INSERT CHARACTER */
+#  define IC(R1,D2,X2,B2)              RX_(0x43,R1,X2,B2,D2)
+#  define ICY(R1,D2,X2,B2)             RXY_(0xE3,R1,X2,B2,D2,0x73)
+/* INSERT CHARACTERS UNDER MASK */
+#  define ICM(R1,M3,D2,B2)             RS_(0xBF,R1,M3,B2,D2)
+#  define ICMY(R1,M3,D2,B2)            RSY_(0xEB,R1,M3,B2,D2,0x81)
+#  define ICMH(R1,M3,D2,B2)            RSY_(0xEB,R1,M3,B2,D2,0x80)
+/* INSERT IMMEDIATE */
+#  define IIHH(R1,I2)                  RI_(0xA5,R1,0x0,I2)
+#  define IIHL(R1,I2)                  RI_(0xA5,R1,0x1,I2)
+#  define IILH(R1,I2)                  RI_(0xA5,R1,0x2,I2)
+#  define IILL(R1,I2)                  RI_(0xA5,R1,0x3,I2)
+/* INSERT PROGRAM MASK */
+#  define IPM(R1)                      RRE_(0xB222,R1,0)
+/* LOAD */
+#  define LR(R1,R2)                    RR_(0x18,R1,R2)
+#  define LGR(R1,R2)                   RRE_(0xB904,R1,R2)
+#  define LGFR(R1,R2)                  RRE_(0xB914,R1,R2)
+#  define L(R1,D2,X2,B2)               RX_(0x58,R1,X2,B2,D2)
+#  define LY(R1,D2,X2,B2)              RXY_(0xE3,R1,X2,B2,D2,0x58)
+#  define LG(R1,D2,X2,B2)              RXY_(0xE3,R1,X2,B2,D2,0x04)
+#  define LGF(R1,D2,X2,B2)             RXY_(0xE3,R1,X2,B2,D2,0x14)
+/* LOAD ACCESS MULTIPLE */
+#  define LAM(R1,R3,D2,B2)             RS_(0x9A,R1,R3,B2,D2)
+#  define LAMY(R1,R3,D2,B2)            RSY_(0xEB,R1,R3,B2,D2,0x9A)
+/* LOAD ADDRESS */
+#  define LA(R1,D2,X2,B2)              RX_(0x41,R1,X2,B2,D2)
+#  define LAY(R1,D2,X2,B2)             RXY_(0xE3,R1,X2,B2,D2,0x71)
+/* LOAD ADDRESS EXTENDED */
+#  define LAE(R1,D2,X2,B2)             RX_(0x51,R1,X2,B2,D2)
+/* LOAD ADDRESS RELATIVE LONG */
+#  define LARL(R1,I2)                  RIL_(0xC0,R1,0x0,I2)
+/* LOAD AND TEST */
+#  define LTR(R1,R2)                   RR_(0x12,R1,R2)
+#  define LTGR(R1,R2)                  RRE_(0xB902,R1,R2)
+#  define LTGFR(R1,R2)                 RRE_(0xB912,R1,R2)
+/* LOAD BYTE */
+#  define LGBR(R1,R2)                  RRE_(0xB906,R1,R2)      /* disasm */
+#  define LB(R1,D2,X2,B2)              RXY_(0xE3,R1,X2,B2,D2,0x76)
+#  define LGB(R1,D2,X2,B2)             RXY_(0xE3,R1,X2,B2,D2,0x77)
+/* LOAD COMPLEMENT */
+#  define LCR(R1,R2)                   RR_(0x13,R1,R2)
+#  define LCGR(R1,R2)                  RRE_(0xB903,R1,R2)
+#  define LCGFR(R1,R2)                 RRE_(0xB913,R1,R2)
+/* LOAD HALFWORD */
+#  define LH(R1,D2,X2,B2)              RX_(0x48,R1,X2,B2,D2)
+#  define LHY(R1,D2,X2,B2)             RXY_(0xE3,R1,X2,B2,D2,0x78)
+#  define LGHR(R1,R2)                  RRE_(0xB907,R1,R2)      /* disasm */
+#  define LGH(R1,D2,X2,B2)             RXY_(0xE3,R1,X2,B2,D2,0x15)
+/* LOAD HALFWORD IMMEDIATE */
+#  define LHI(R1,I2)                   RI_(0xA7,R1,0x8,I2)
+#  define LGHI(R1,I2)                  RI_(0xA7,R1,0x9,I2)
+/* LOAD LOGICAL */
+#  define LLGFR(R1,R2)                 RRE_(0xB916,R1,R2)
+#  define LLGF(R1,D2,X2,B2)            RXY_(0xE3,R1,X2,B2,D2,0x16)
+/* LOAD LOGICAL CHARACTER */
+#  define LLGCR(R1,R2)                 RRE_(0xB984,R1,R2)      /* disasm */
+#  define LLGC(R1,D2,X2,B2)            RXY_(0xE3,R1,X2,B2,D2,0x90)
+/* LOAD LOGICAL HALFWORD */
+#  define LLGHR(R1,R2)                 RRE_(0xB985,R1,R2)      /* disasm */
+#  define LLGH(R1,D2,X2,B2)            RXY_(0xE3,R1,X2,B2,D2,0x91)
+/* LOAD LOGICAL IMMEDIATE */
+#  define LLIHH(R1,I2)                 RI_(0xA5,R1,0xC,I2)
+#  define LLIHL(R1,I2)                 RI_(0xA5,R1,0xD,I2)
+#  define LLILH(R1,I2)                 RI_(0xA5,R1,0xE,I2)
+#  define LLILL(R1,I2)                 RI_(0xA5,R1,0xF,I2)
+/* LOAD LOGICAL THIRTY ONE BITS */
+#  define LLGTR(R1,R2)                 RRE_(0xB917,R1,R2)
+#  define LLGT(R1,D2,X2,B2)            RXY_(0xE3,R1,X2,B2,D2,0x17)
+/* LOAD MULTIPLE */
+#  define LM(R1,R3,D2,B2)              RS_(0x98,R1,R3,B2,D2)
+#  define LMY(R1,R3,D2,B2)             RSY_(0xEB,R1,R3,B2,D2,0x98)
+#  define LMG(R1,R3,D2,B2)             RSY_(0xEB,R1,R3,B2,D2,0x04)
+/* LOAD MULTIPLE DISJOINT */
+#  define LMD(R1,R3,D2,B2,D4,B4)       SS_(0xEF,R1,R3,B2,D2,B4,D4)
+/* LOAD MULTIPLE HIGH */
+#  define LMH(R1,R3,D2,B2)             RSY_(0xEB,R1,R3,B2,D2,0x96)
+/* LOAD NEGATIVE */
+#  define LNR(R1,R2)                   RR_(0x11,R1,R2)
+#  define LNGR(R1,R2)                  RRE_(0xB901,R1,R2)
+#  define LNGFR(R1,R2)                 RRE_(0xB911,R1,R2)
+/* LOAD PAIR FROM QUADWORD */
+#  define LPQ(R1,D2,X2,B2)             RXY_(0xE3,R1,X2,B2,D2,0x8F)
+/* LOAD POSITIVE */
+#  define LPR(R1,R2)                   RR_(0x10,R1,R2)
+#  define LPGR(R1,R2)                  RRE_(0xB900,R1,R2)
+#  define LPGFR(R1,R2)                 RRE_(0xB910,R1,R2)
+/* LOAD REVERSED */
+#  define LRVR(R1,R2)                  RRE_(0xB91F,R1,R2)
+#  define LRVGR(R1,R2)                 RRE_(0xB90F,R1,R2)
+#  define LRVH(R1,D2,X2,B2)            RXY_(0xE3,R1,X2,B2,D2,0x1F)
+#  define LRV(R1,D2,X2,B2)             RXY_(0xE3,R1,X2,B2,D2,0x1E)
+#  define LRVG(R1,D2,X2,B2)            RXY_(0xE3,R1,X2,B2,D2,0x0F)
+/* MONITOR CALL */
+#  define MC(D1,B1,I2)                 SI_(0xAF,I2,B1,D1)
+/* MOVE */
+#  define MVI(D1,B1,I2)                        SI_(0x92,I2,B1,D1)
+#  define MVIY(D1,B1,I2)               SIY_(0xEB,I2,B1,D1,0x52)
+#  define MVC(D1,L,B1,D2,B2)           SSL_(0xD2,L,B1,D1,B2,D2)
+/* MOVE INVERSE */
+#  define MVCIN(D1,L,B1,D2,B2)         SSL_(0xE8,L,B1,D1,B2,D2)
+/* MOVE LONG */
+#  define MVCL(R1,R2)                  RR_(0x0E,R1,R2)
+/* MOVE LONG EXTENDED */
+#  define MVCLE(R1,R3,D2,B2)           RS_(0xA8,R1,R3,B2,D2)
+/* MOVE LONG UNICODE */
+#  define MVCLU(R1,R3,D2,B2)           RSY_(0xEB,R1,R3,B2,D2,0x8E)
+/* MOVE NUMERICS */
+#  define MVN(D1,L,B1,D2,B2)           SSL_(0xD1,L,B1,D1,B2,D2)
+/* MOVE STRING */
+#  define MVST(R1,R2)                  RRE_(0xB255,R1,R2)
+/* MOVE WITH OFFSET */
+#  define MVO(D1,L1,B1,D2,L2,B2)       SS_(0xF1,L1,L2,B1,D1,B2,D2)
+/* MOVE ZONES */
+#  define MVZ(D1,L,B1,D2,B2)           SSL_(0xD3,L,B1,D1,B2,D2)
+/* MULTIPLY */
+#  define MR(R1,R2)                    RR_(0x1C,R1,R2)
+#  define M(R1,D2,X2,B2)               RX_(0x5C,R1,X2,B2,D2)
+/* MULTIPLY HALFWORD */
+#  define MH(R1,D2,X2,B2)              RX_(0x4C,R1,X2,B2,D2)
+/* MULTIPLY HALFWORD IMMEDIATE */
+#  define MHI(R1,I2)                   RI_(0xA7,R1,0xC,I2)
+#  define MGHI(R1,I2)                  RI_(0xA7,R1,0xD,I2)
+/* MULTIPLY LOGICAL */
+#  define MLR(R1,R2)                   RRE_(0xB996,R1,R2)
+#  define MLGR(R1,R2)                  RRE_(0xB986,R1,R2)
+#  define ML(R1,D2,X2,B2)              RXY_(0xE3,R1,X2,B2,D2,0x96)
+#  define MLG(R1,D2,X2,B2)             RXY_(0xE3,R1,X2,B2,D2,0x86)
+/* MULTIPLY SINGLE */
+#  define MSR(R1,R2)                   RRE_(0xB252,R1,R2)
+#  define MSGR(R1,R2)                  RRE_(0xB90C,R1,R2)
+#  define MSGFR(R1,R2)                 RRE_(0xB91C,R1,R2)
+#  define MS(R1,D2,X2,B2)              RX_(0x71,R1,X2,B2,D2)
+#  define MSY(R1,D2,X2,B2)             RXY_(0xE3,R1,X2,B2,D2,0x51)
+#  define MSG(R1,D2,X2,B2)             RXY_(0xE3,R1,X2,B2,D2,0x0C)
+#  define MSGF(R1,D2,X2,B2)            RXY_(0xE3,R1,X2,B2,D2,0x1C)
+/* OR */
+#  define OR(R1,R2)                    RR_(0x16,R1,R2)
+#  define OGR(R1,R2)                   RRE_(0xB981,R1,R2)
+#  define O(R1,D2,X2,B2)               RX_(0x56,R1,X2,B2,D2)
+#  define OY(R1,D2,X2,B2)              RXY_(0xE3,R1,X2,B2,D2,0x56)
+#  define OG(R1,D2,X2,B2)              RXY_(0xE3,R1,X2,B2,D2,0x81)
+#  define OI(D1,B1,I2)                 SI_(0x96,I2,B1,D1)
+#  define OIY(D1,B1,I2)                        SIY_(0xEB,I2,B1,D1,0x56)
+#  define OC(D1,L,B1,D2,B2)            SSL_(0xD6,L,B1,D1,B2,D2)
+/* OR IMMEDIATE */
+#  define OIHH(R1,I2)                  RI_(0xA5,R1,0x8,I2)
+#  define OIHL(R1,I2)                  RI_(0xA5,R1,0x9,I2)
+#  define OILH(R1,I2)                  RI_(0xA5,R1,0xA,I2)
+#  define OILL(R1,I2)                  RI_(0xA5,R1,0xB,I2)
+/* PACK */
+#  define PACK(D1,L1,B1,D2,L2,B2)      SS_(0xF2,L1,L2,B1,D1,B2,D2)
+/* PACK ASCII */
+#  define PKA(D1,B1,D2,L2,B2)          SSL_(0xE9,L2,B1,D1,B2,D2)
+/* PACK UNICODE */
+#  define PKU(D1,B1,D2,L2,B2)          SSL_(0xE1,L2,B1,D1,B2,D2)
+/* PERFORM LOCKED OPERATION */
+#  define PLO(R1,D2,B2,R3,D4,B4)       SS_(0xEE,R1,R3,B2,D2,B4,D4)
+/* ROTATE LEFT SINGLE LOGICAL */
+#  define RLL(R1,R3,D2,B2)             RSY_(0xEB,R1,R3,B2,D2,0x1D)
+#  define RLLG(R1,R3,D2,B2)            RSY_(0xEB,R1,R3,B2,D2,0x1C)
+/* SEARCH STRING */
+#  define SRST(R1,R2)                  RRE_(0xB25E,R1,R2)
+/* SET ACCESS */
+#  define SAR(R1,R2)                   RRE_(0xB24E,R1,R2)
+/* SET ADDRESSING MODE */
+#  define SAM24()                      E_(0x10C)
+#  define SAM31()                      E_(0x10D)
+#  define SAM64()                      E_(0x10E)
+/* SET PROGRAM MASK */
+#  define SPM(R1)                      RR_(0x04,R1,0)
+/* SHIFT LEFT DOUBLE */
+#  define SLDA(R1,D2,B2)               RS_(0x8F,R1,0,B2,D2)
+/* SHIFT LEFT DOUBLE LOGICAL */
+#  define SLDL(R1,D2,B2)               RS_(0x8D,R1,0,B2,D2)
+/* SHIFT LEFT SINGLE */
+#  define SLA(R1,D2,B2)                        RS_(0x8B,R1,0,B2,D2)
+#  define SLAG(R1,R3,D2,B2)            RSY_(0xEB,R1,R3,B2,D2,0x0B)
+/* SHIFT LEFT SINGLE LOGICAL */
+#  define SLL(R1,D2,B2)                        RS_(0x89,R1,0,B2,D2)
+#  define SLLG(R1,R3,D2,B2)            RSY_(0xEB,R1,R3,B2,D2,0x0D)
+/* SHIFT RIGHT DOUBLE */
+#  define SRDA(R1,D2,B2)               RS_(0x8E,R1,0,B2,D2)
+/* SHIFT RIGHT DOUBLE LOGICAL */
+#  define SRDL(R1,D2,B2)               RS_(0x8C,R1,0,B2,D2)
+/* SHIFT RIGHT SINGLE */
+#  define SRA(R1,D2,B2)                        RS_(0x8A,R1,0,B2,D2)
+#  define SRAG(R1,R3,D2,B2)            RSY_(0xEB,R1,R3,B2,D2,0x0A)
+/* SHIFT RIGHT SINGLE LOGICAL */
+#  define SRL(R1,D2,B2)                        RS_(0x88,R1,0,B2,D2)
+#  define SRLG(R1,R3,D2,B2)            RSY_(0xEB,R1,R3,B2,D2,0x0C)
+/* STORE */
+#  define ST(R1,D2,X2,B2)              RX_(0x50,R1,X2,B2,D2)
+#  define STY(R1,D2,X2,B2)             RXY_(0xE3,R1,X2,B2,D2,0x50)
+#  define STG(R1,D2,X2,B2)             RXY_(0xE3,R1,X2,B2,D2,0x24)
+/* STORE ACCESS MULTIPLE */
+#  define STAM(R1,R3,D2,B2)            RS_(0x9B,R1,R3,B2,D2)
+#  define STAMY(R1,R3,D2,B2)           RSY_(0xEB,R1,R3,B2,D2,0x9B)
+/* STORE CHARACTER */
+#  define STC(R1,D2,X2,B2)             RX_(0x42,R1,X2,B2,D2)
+#  define STCY(R1,D2,X2,B2)            RXY_(0xE3,R1,X2,B2,D2,0x72)
+/* STORE CHARACTERS UNDER MASK */
+#  define STCM(R1,M3,D2,B2)            RS_(0xBE,R1,M3,B2,D2)
+#  define STCMY(R1,M3,D2,B2)           RSY_(0xEB,R1,M3,B2,D2,0x2D)
+#  define STCMH(R1,M3,D2,B2)           RSY_(0xEB,R1,M3,B2,D2,0x2C)
+/* STORE CLOCK */
+#  define STCK(D2,B2)                  S_(0xB205,B2,D2)
+/* STORE CLOCK EXTENDED */
+#  define STCKE(D2,B2)                 S_(0xB278,B2,D2)
+/* STORE HALFWORD */
+#  define STH(R1,D2,X2,B2)             RX_(0x40,R1,X2,B2,D2)
+#  define STHY(R1,D2,X2,B2)            RXY_(0xE3,R1,X2,B2,D2,0x70)
+/* STORE MULTIPLE */
+#  define STM(R1,R3,D2,B2)             RS_(0x90,R1,R3,B2,D2)
+#  define STMY(R1,R3,D2,B2)            RSY_(0xEB,R1,R3,B2,D2,0x90)
+#  define STMG(R1,R3,D2,B2)            RSY_(0xEB,R1,R3,B2,D2,0x24)
+/* STORE MULTIPLE HIGH */
+#  define STMH(R1,R3,D2,B2)            RSY_(0xEB,R1,R3,B2,D2,0x26)
+/* STORE PAIR TO QUADWORD */
+#  define STPQ(R1,D2,X2,B2)            RXY_(0xE3,R1,X2,B2,D2,0x8E)
+/* STORE REVERSED */
+#  define STRVH(R1,D2,X2,B2)           RXY_(0xE3,R1,X2,B2,D2,0x3F)
+#  define STRV(R1,D2,X2,B2)            RXY_(0xE3,R1,X2,B2,D2,0x3E)
+#  define STRVG(R1,D2,X2,B2)           RXY_(0xE3,R1,X2,B2,D2,0x2F)
+/* SUBTRACT */
+#  define SR(R1,R2)                    RR_(0x1B,R1,R2)
+#  define SGR(R1,R2)                   RRE_(0xB909,R1,R2)
+#  define SGFR(R1,R2)                  RRE_(0xB919,R1,R2)
+#  define S(R1,D2,X2,B2)               RX_(0x5B,R1,X2,B2,D2)
+#  define SY(R1,D2,X2,B2)              RXY_(0xE3,R1,X2,B2,D2,0x5B)
+#  define SG(R1,D2,X2,B2)              RXY_(0xE3,R1,X2,B2,D2,0x09)
+#  define SGF(R1,D2,X2,B2)             RXY_(0xE3,R1,X2,B2,D2,0x19)
+/* SUBTRACT HALFWORD */
+#  define SH(R1,D2,X2,B2)              RX_(0x4B,R1,X2,B2,D2)
+#  define SHY(R1,D2,X2,B2)             RXY_(0xE3,R1,X2,B2,D2,0x7B)
+/* SUBTRACT LOGICAL */
+#  define SLR(R1,R2)                   RR_(0x1F,R1,R2)
+#  define SLGR(R1,R2)                  RRE_(0xB90B,R1,R2)
+#  define SLGFR(R1,R2)                 RRE_(0xB91B,R1,R2)
+#  define SL(R1,D2,X2,B2)              RX_(0x5F,R1,X2,B2,D2)
+#  define SLY(R1,D2,X2,B2)             RXY_(0xE3,R1,X2,B2,D2,0x5F)
+#  define SLG(R1,D2,X2,B2)             RXY_(0xE3,R1,X2,B2,D2,0x0B)
+#  define SLGF(R1,D2,X2,B2)            RXY_(0xE3,R1,X2,B2,D2,0x1B)
+/* SUBTRACT LOGICAL WITH BORROW */
+#  define SLBR(R1,R2)                  RRE_(0xB999,R1,R2)
+#  define SLBGR(R1,R2)                 RRE_(0xB989,R1,R2)
+#  define SLB(R1,D2,X2,B2)             RXY_(0xE3,R1,X2,B2,D2,0x99)
+#  define SLBG(R1,D2,X2,B2)            RXY_(0xE3,R1,X2,B2,D2,0x89)
+/* SUPERVISOR CALL */
+#  define SVC(I)                       I_(0xA,I)
+/* TEST ADDRESSING MODE */
+#  define TAM()                                E_(0x10B)
+/* TEST AND SET */
+#  define TS(D2,B2)                    RS_(0x93,0,0,B2,D2)
+/* TEST UNDER MASK (TEST UNDER MASK HIGH, TEST UNDER MASK LOW) */
+#  define TM(D1,B1,I2)                 SI_(0x91,I2,B1,D1)
+#  define TMY(D1,B1,I2)                        SIY_(0xEB,I2,B1,D1,0x51)
+#  define TMHH(R1,I2)                  RI_(0xA7,R1,0x2,I2)
+#  define TMHL(R1,I2)                  RI_(0xA7,R1,0x3,I2)
+#  define TMLH(R1,I2)                  RI_(0xA7,R1,0x0,I2)
+#  define TMH(R1,I2)                   TMLH(R1,I2)
+#  define TMLL(R1,I2)                  RI_(0xA7,R1,0x1,I2)
+#  define TML(R1,I2)                   TMLL(R1,I2)
+/* TRANSLATE */
+#  define TR(D1,L,B1,D2,B2)            SSL_(0xDC,L,B1,D1,B2,D2)
+/* TRANSLATE AND TEST */
+#  define TRT(D1,L,B1,D2,B2)           SSL_(0xDD,L,B1,D1,B2,D2)
+/* TRANSLATE EXTENDED */
+#  define TRE(R1,R2)                   RRE_(0xB2A5,R1,R2)
+/* TRANSLATE ONE TO ONE */
+#  define TROO(R1,R2)                  RRE_(0xB993,R1,R2)
+/* TRANSLATE ONE TO TWO */
+#  define TROT(R1,R2)                  RRE_(0xB992,R1,R2)
+/* TRANSLATE TWO TO ONE */
+#  define TRTO(R1,R2)                  RRE_(0xB991,R1,R2)
+/* TRANSLATE TWO TO TWO */
+#  define TRTT(R1,R2)                  RRE_(0xB990,R1,R2)
+/* UNPACK */
+#  define UNPK(D1,L1,B1,D2,L2,B2)      SS_(0xF3,L1,L2,B1,D1,B2,D2)
+/* UNPACK ASCII */
+#  define UNPKA(D1,L1,B1,D2,L2,B2)     SS_(0xEA,L1,L2,B1,D1,B2,D2)
+/* UNPACK UNICODE */
+#  define UNPKU(D1,L1,B1,D2,L2,B2)     SS_(0xE2,L1,L2,B1,D1,B2,D2)
+/* UPDATE TREE */
+#  define UPT()                                E_(0x0102)
+/****************************************************************
+ * Decimal Instructions                                                *
+ ****************************************************************/
+/* ADD DECIMAL */
+#  define AP(D1,L1,B1,D2,L2,B2)                SS_(0xFA,L1,L2,B1,D1,B2,D2)
+/* COMPARE DECIMAL */
+#  define CP(D1,L1,B1,D2,L2,B2)                SS_(0xF9,L1,L2,B1,D1,B2,D2)
+/* DIVIDE DECIMAL */
+#  define DP(D1,L1,B1,D2,L2,B2)                SS_(0xFD,L1,L2,B1,D1,B2,D2)
+/* EDIT */
+#  define ED(D1,L,B1,D2,B2)            SSL_(0xDE,L,B1,D1,B2,D2)
+/* EDIT AND MARK */
+#  define EDMK(D1,L,B1,D2,B2)          SSL_(0xDE,L,B1,D1,B2,D2)
+/* MULTIPLY DECIMAL */
+#  define MP(D1,L1,B1,D2,L2,B2)                SS_(0xFC,L1,L2,B1,D1,B2,D2)
+/* SHIFT AND ROUND DECIMAL */
+#  define SRP(D1,L1,B1,D2,L2,B2)       SS_(0xF0,L1,L2,B1,D1,B2,D2)
+/* SUBTRACE DECIMAL */
+#  define SP(D1,L1,B1,D2,L2,B2)                SS_(0xFB,L1,L2,B1,D1,B2,D2)
+/* TEST DECIMAL */
+#  define TP(D1,L1,B1)                 RSL_(0xEB,L1,B1,D1,0xC0)
+/* ZERO AND ADD */
+#  define ZAP(D1,L1,B1,D2,L2,B2)       SS_(0xF8,L1,L2,B1,D1,B2,D2)
+/****************************************************************
+ * Control Instructions                                                *
+ ****************************************************************/
+/* BRANCH AND SET AUTHORITY */
+#  define BSA(R1,R2)                   RRE_(0xB25A,R1,R2)
+/* BRANCH AND STACK */
+#  define BAKR(R1,R2)                  RRE_(0xB240,R1,R2)
+/* BRANCH IN SUBSPACE GROUP */
+#  define BSG(R1,R2)                   RRE_(0xB258,R1,R2)
+/* COMPARE AND SWAP AND PURGE */
+#  define CSP(R1,R2)                   RRE_(0xB250,R1,R2)
+#  define CSPG(R1,R2)                  RRE_(0xB98A,R1,R2)
+/* DIAGNOSE */
+#  define DIAG()                       SI_(0x83,0,0,0)
+/* EXTRACT AND SET EXTENDED AUTHORITY */
+#  define ESEA(R1)                     RRE_(0xB99D,R1,0)
+/* EXTRACT PRIMARY ASN */
+#  define EPAR(R1)                     RRE_(0xB226,R1,0)
+/* EXTRACT SECONDARY ASN */
+#  define ESAR(R1)                     RRE_(0xB227,R1,0)
+/* EXTRACT STACKED REGISTERS */
+#  define EREG(R1,R2)                  RRE_(0xB249,R1,R2)
+#  define EREGG(R1,R2)                 RRE_(0xB90E,R1,R2)
+/* EXTRACT STACKED STATE */
+#  define ESTA(R1,R2)                  RRE_(0xB24A,R1,R2)
+/* INSERT ADDRESS SPACE CONTROL */
+#  define IAC(R1)                      RRE_(0xB224,R1,0)
+/* INSERT PSW KEY */
+#  define IPK()                                S_(0xB20B,0,0)
+/* INSERT STORAGE KEY EXTENDED */
+#  define ISKE(R1,R2)                  RRE_(0xB229,R1,R2)
+/* INSERT VIRTUAL STORAGE KEY */
+#  define IVSK(R1,R2)                  RRE_(0xB223,R1,R2)
+/* INVALIDATE DAT TABLE ENTRY */
+#  define IDTE(R1,R2,R3)               RRF_(0xB98E,R3,0,R1,R2)
+/* INVALIDATE PAGE TABLE ENTRY */
+#  define IPTE(R1,R2)                  RRE_(0xB221,R1,R2)
+/* LOAD ADDRESS SPACE PARAMETER */
+#  define LASP(D1,B1,D2,B2)            SSE_(0xE500,B1,D1,B2,D2)
+/* LOAD CONTROL */
+#  define LCTL(R1,R3,D2,B2)            RS_(0xB7,R1,R3,B2,D2)
+#  define LCTLG(R1,R3,D2,B2)           RSY_(0xEB,R1,R3,B2,D2,0x2F)
+/* LOAD PSW */
+#  define LPSW(D2,B2)                  SI_(0x82,0,B2,D2)
+/* LOAD PSW EXTENDED */
+#  define LPSWE(D2,B2)                 S_(0xB2B2,B2,D2)
+/* LOAD REAL ADDRESS */
+#  define LRA(R1,D2,X2,B2)             RX_(0xB1,R1,X2,B2,D2)
+#  define LRAY(R1,D2,X2,B2)            RXY_(0xE3,R1,X2,B2,D2,0x13)
+#  define LRAG(R1,D2,X2,B2)            RXY_(0xE3,R1,X2,B2,D2,0x03)
+/* LOAD USING REAL ADDRESS */
+#  define LURA(R1,R2)                  RRE_(0xB24B,R1,R2)
+#  define LURAG(R1,R2)                 RRE_(0xB905,R1,R2)
+/* MODIFY STACKED STATE */
+#  define MSTA(R1)                     RRE_(0xB247,R1,0)
+/* MOVE PAGE */
+#  define MVPG(R1,R2)                  RRE_(0xB254,R1,R2)
+/* MOVE TO PRIMARY */
+#  define MVCP(D1,R1,B1,D2,B2,R3)      SS_(0xDA,R1,R3,B1,D1,B2,D2)
+/* MOVE TO SECONDARY */
+#  define MVCS(D1,R1,B1,D2,B2,R3)      SS_(0xDB,R1,R3,B1,D1,B2,D2)
+/* MOVE WITH DESTINATION KEY */
+#  define MVCDK(D1,B1,D2,B2)           SSE_(0xE50F,B1,D1,B2,D2)
+/* MOVE WITH KEY */
+#  define MVCK(D1,R1,B1,D2,B2,R3)      SS_(0xD9,R1,R3,B1,D1,B2,D2)
+/* MOVE WITH SOURCE KEY */
+#  define MVCSK(D1,B1,D2,B2)           SSE_(0xE50E,B1,D1,B2,D2)
+/* PAGE IN */
+#  define PGIN(R1,R2)                  RRE_(0xB22E,R1,R2)
+/* PAGE OUT */
+#  define PGOUT(R1,R2)                 RRE_(0xB22F,R1,R2)
+/* PROGRAM CALL */
+#  define PC(D2,B2)                    S_(0xB218,B2,D2)
+/* PROGRAM RETURN */
+#  define PR()                         E_(0x0101)
+/* PROGRAM TRANSFER */
+#  define PT(R1,R2)                    RRE_(0xB228,R1,R2)
+/* PURGE ALB */
+#  define PALB()                       RRE_(0xB248,0,0)
+/* PURGE TLB */
+#  define PTLB()                       S_(0xB20D,0,0)
+/* RESET REFERENCE BIT EXTENDED */
+#  define RRBE(R1,R2)                  RRE_(0xB22A,R1,R2)
+/* RESUME PROGRAM */
+#  define RP(D2,B2)                    S_(0xB277,B2,D2)
+/* SET ADDRESS SPACE CONTROL */
+#  define SAC(D2,B2)                   S_(0xB219,B2,D2)
+/* SET ADDRESS SPACE CONTROL FAST */
+#  define SACF(D2,B2)                  S_(0xB279,B2,D2)
+/* SET CLOCK */
+#  define SCK(D2,B2)                   S_(0xB204,B2,D2)
+/* SET CLOCK COMPARATOR */
+#  define SCKC(D2,B2)                  S_(0xB206,B2,D2)
+/* SET CLOCK PROGRAMMABLE FIELD */
+#  define SCKPF()                      E_(0x0107)
+/* SET CPU TIMER */
+#  define SPT(D2,B2)                   S_(0xB208,B2,D2)
+/* SET PREFIX */
+#  define SPX(D2,B2)                   S_(0xB210,B2,D2)
+/* SET PSW FROM ADDRESS */
+#  define SPKA(D2,B2)                  S_(0xB20A,B2,D2)
+/* SET SECONDARY ASN */
+#  define SSAR(R1)                     RRE_(0xB225,R1,0)
+/* SET STORAGE KEY EXTENDED */
+#  define SSKE(R1,R2)                  RRE_(0xB22B,R1,R2)
+/* SET SYSTEM MASK */
+#  define SSM(D2,B2)                   SI_(0x80,0,B2,D2)
+/* SIGNAL PROCESSOR */
+#  define SIGP(R1,R3,D2,B2)            RS_(0xAE,R1,R3,B2,D2)
+/* STORE CLOCK COMPARATOR */
+#  define STCKC(D2,B2)                 S_(0xB207,B2,D2)
+/* STORE CONTROL */
+#  define STCTL(R1,R3,D2,B2)           RS_(0xB6,R1,R3,B2,D2)
+#  define STCTG(R1,R3,D2,B2)           RSY_(0xEB,R1,R3,B2,D2,0x25)
+/* STORE CPU ADDRESS */
+#  define STAP(D2,B2)                  S_(0xB212,B2,D2)
+/* STORE CPU ID */
+#  define STIDP(D2,B2)                 S_(0xB202,B2,D2)
+/* STORE CPU TIMER */
+#  define STPT(D2,B2)                  S_(0xB209,B2,D2)
+/* STORE FACILITY LIST */
+#  define STFL(D2,B2)                  S_(0xB2B1,B2,D2)
+/* STORE PREFIX */
+#  define STPX(D2,B2)                  S_(0xB211,B2,D2)
+/* STORE REAL ADDRES */
+#  define STRAG(D1,B1,D2,B2)           SSE_(0xE502,B1,D1,B2,D2)
+/* STORE SYSTEM INFORMATION */
+#  define STSI(D2,B2)                  S_(0xB27D,B2,D2)
+/* STORE THEN AND SYSTEM MASK */
+#  define STNSM(D1,B1,I2)              SI_(0xAC,I2,B1,D1)
+/* STORE THEN OR SYSTEM MASK */
+#  define STOSM(D1,B1,I2)              SI_(0xAD,I2,B1,D1)
+/* STORE USING REAL ADDRESS */
+#  define STURA(R1,R2)                 RRE_(0xB246,R1,R2)
+#  define STURG(R1,R2)                 RRE_(0xB925,R1,R2)
+/* TEST ACCESS */
+#  define TAR(R1,R2)                   RRE_(0xB24C,R1,R2)
+/* TEST BLOCK */
+#  define TB(R1,R2)                    RRE_(0xB22C,R1,R2)
+/* TEST PROTECTION */
+#  define TPROT(D1,B1,D2,B2)           SSE_(0xE501,B1,D1,B2,D2)
+/* TRACE */
+#  define TRACE(R1,R3,D2,B2)           RS_(0x99,R1,R3,B2,D2)
+#  define TRACG(R1,R3,D2,B2)           RSY_(0xEB,R1,R3,B2,D2,0x0F)
+/* TRAP */
+#  define TRAP2()                      E_(0x01FF)
+#  define TRAP4(D2,B2)                 S_(0xB2FF,B2,D2)
+/****************************************************************
+ * I/O Instructions                                            *
+ ****************************************************************/
+/* CANCEL SUBCHANNEL */
+#  define XSCH()                       S_(0xB276,0,0)
+/* CLEAR SUBCHANNEL */
+#  define CSCH()                       S_(0xB230,0,0)
+/* HALT SUBCHANNEL */
+#  define HSCH()                       S_(0xB231,0,0)
+/* MODIFY SUBCHANNEL */
+#  define MSCH(D2,B2)                  S_(0xB232,B2,D2)
+/* RESET CHANNEL PATH */
+#  define RCHP()                       S_(0xB23B,0,0)
+/* RESUME SUBCHANNEL */
+#  define RSCH()                       S_(0xB238,0,0)
+/* SET ADDRESS LIMIT */
+#  define SAL()                                S_(0xB237,0,0)
+/* SET CHANNEL MONITOR */
+#  define SCHM()                       S_(0xB23C,0,0)
+/* START SUBCHANNEL */
+#  define SSCH(D2,B2)                  S_(0xB233,B2,D2)
+/* STORE CHANNEL PATH STATUS */
+#  define STCPS(D2,B2)                 S_(0xB23A,B2,D2)
+/* STORE CHANNEL REPORT WORD */
+#  define STCRW(D2,B2)                 S_(0xB239,B2,D2)
+/* STORE SUBCHANNEL */
+#  define STSCH(D2,B2)                 S_(0xB234,B2,D2)
+/* TEST PENDING INTERRUPTION */
+#  define TPI(D2,B2)                   S_(0xB236,B2,D2)
+/* TEST SUBCHANNEL */
+#  define TSCH(D2,B2)                  S_(0xB235,B2,D2)
+#  define xdivr(r0,r1)                 _xdivr(_jit,r0,r1)
+static jit_int32_t _xdivr(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define xdivr_u(r0,r1)               _xdivr_u(_jit,r0,r1)
+static jit_int32_t _xdivr_u(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define xdivi(r0,i0)                 _xdivi(_jit,r0,i0)
+static jit_int32_t _xdivi(jit_state_t*,jit_int32_t,jit_word_t);
+#  define xdivi_u(r0,i0)               _xdivi_u(_jit,r0,i0)
+static jit_int32_t _xdivi_u(jit_state_t*,jit_int32_t,jit_word_t);
+#  define crr(cc,r0,r1,r2)             _crr(_jit,cc,r0,r1,r2)
+static void _crr(jit_state_t*,
+                jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define cri(cc,r0,r1,i0)             _cri(_jit,cc,r0,r1,i0)
+static void _cri(jit_state_t*,
+                jit_int32_t,jit_int32_t,jit_int32_t,jit_word_t);
+#  define crr_u(cc,r0,r1,r2)           _crr_u(_jit,cc,r0,r1,r2)
+static void _crr_u(jit_state_t*,
+                  jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define cri_u(cc,r0,r1,i0)           _cri_u(_jit,cc,r0,r1,i0)
+static void _cri_u(jit_state_t*,
+                  jit_int32_t,jit_int32_t,jit_int32_t,jit_word_t);
+#  define brr(cc,i0,r0,r1)             _brr(_jit,cc,i0,r0,r1)
+static void _brr(jit_state_t*,jit_int32_t,
+                jit_word_t,jit_int32_t,jit_int32_t);
+#  define brr_p(cc,i0,r0,r1)           _brr_p(_jit,cc,i0,r0,r1)
+static jit_word_t _brr_p(jit_state_t*,jit_int32_t,
+                        jit_word_t,jit_int32_t,jit_int32_t);
+#  define bri(cc,i0,r0,i1)             _bri(_jit,cc,i0,r0,i1)
+static void _bri(jit_state_t*,jit_int32_t,
+                jit_word_t,jit_int32_t,jit_word_t);
+#  define bri_p(cc,i0,r0,i1)           _bri_p(_jit,cc,i0,r0,i1)
+static jit_word_t _bri_p(jit_state_t*,jit_int32_t,
+                        jit_word_t,jit_int32_t,jit_word_t);
+#  define brr_u(cc,i0,r0,r1)           _brr_u(_jit,cc,i0,r0,r1)
+static void _brr_u(jit_state_t*,jit_int32_t,
+                  jit_word_t,jit_int32_t,jit_int32_t);
+#  define brr_u_p(cc,i0,r0,r1)         _brr_u_p(_jit,cc,i0,r0,r1)
+static jit_word_t _brr_u_p(jit_state_t*,jit_int32_t,
+                          jit_word_t,jit_int32_t,jit_int32_t);
+#  define bri_u(cc,i0,r0,i1)           _bri_u(_jit,cc,i0,r0,i1)
+static void _bri_u(jit_state_t*,jit_int32_t,
+                  jit_word_t,jit_int32_t,jit_word_t);
+#  define bri_u_p(cc,i0,r0,i1)         _bri_u_p(_jit,cc,i0,r0,i1)
+static jit_word_t _bri_u_p(jit_state_t*,jit_int32_t,
+                          jit_word_t,jit_int32_t,jit_word_t);
+#  define baddr(c,s,i0,r0,r1)          _baddr(_jit,c,s,i0,r0,r1)
+static void _baddr(jit_state_t*,jit_int32_t,jit_bool_t,
+                  jit_word_t,jit_int32_t,jit_int32_t);
+#  define baddr_p(c,s,i0,r0,r1)                _baddr_p(_jit,c,s,i0,r0,r1)
+static jit_word_t _baddr_p(jit_state_t*,jit_int32_t,jit_bool_t,
+                          jit_word_t,jit_int32_t,jit_int32_t);
+#  define baddi(c,s,i0,r0,i1)          _baddi(_jit,c,s,i0,r0,i1)
+static void _baddi(jit_state_t*,jit_int32_t,jit_bool_t,
+                  jit_word_t,jit_int32_t,jit_word_t);
+#  define baddi_p(c,s,i0,r0,i1)                _baddi_p(_jit,c,s,i0,r0,i1)
+static jit_word_t _baddi_p(jit_state_t*,jit_int32_t,jit_bool_t,
+                          jit_word_t,jit_int32_t,jit_word_t);
+#  define bsubr(c,s,i0,r0,r1)          _bsubr(_jit,c,s,i0,r0,r1)
+static void _bsubr(jit_state_t*,jit_int32_t,jit_bool_t,
+                  jit_word_t,jit_int32_t,jit_int32_t);
+#  define bsubr_p(c,s,i0,r0,r1)                _bsubr_p(_jit,c,s,i0,r0,r1)
+static jit_word_t _bsubr_p(jit_state_t*,jit_int32_t,jit_bool_t,
+                          jit_word_t,jit_int32_t,jit_int32_t);
+#  define bsubi(c,s,i0,r0,i1)          _bsubi(_jit,c,s,i0,r0,i1)
+static void _bsubi(jit_state_t*,jit_int32_t,jit_bool_t,
+                  jit_word_t,jit_int32_t,jit_word_t);
+#  define bsubi_p(c,s,i0,r0,i1)                _bsubi_p(_jit,c,s,i0,r0,i1)
+static jit_word_t _bsubi_p(jit_state_t*,jit_int32_t,jit_bool_t,
+                          jit_word_t,jit_int32_t,jit_word_t);
+#  define bmxr(cc,i0,r0,r1)            _bmxr(_jit,cc,i0,r0,r1)
+static void _bmxr(jit_state_t*,jit_int32_t,
+                 jit_word_t,jit_int32_t,jit_int32_t);
+#  define bmxr_p(cc,i0,r0,r1)          _bmxr_p(_jit,cc,i0,r0,r1)
+static jit_word_t _bmxr_p(jit_state_t*,jit_int32_t,
+                         jit_word_t,jit_int32_t,jit_int32_t);
+#  define bmxi(cc,i0,r0,i1)            _bmxi(_jit,cc,i0,r0,i1)
+static void _bmxi(jit_state_t*,jit_int32_t,
+                 jit_word_t,jit_int32_t,jit_word_t);
+#  define bmxi_p(cc,i0,r0,i1)          _bmxi_p(_jit,cc,i0,r0,i1)
+static jit_word_t _bmxi_p(jit_state_t*,jit_int32_t,
+                         jit_word_t,jit_int32_t,jit_word_t);
+#  define movr(r0,r1)                  _movr(_jit,r0,r1)
+static void _movr(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define movi(r0,i0)                  _movi(_jit,r0,i0)
+static void _movi(jit_state_t*,jit_int32_t,jit_word_t);
+#  define movi_p(r0,i0)                        _movi_p(_jit,r0,i0)
+static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t);
+#  define addr(r0,r1,r2)               _addr(_jit,r0,r1,r2)
+static void _addr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define addi(r0,r1,i0)               _addi(_jit,r0,r1,i0)
+static void _addi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define addcr(r0,r1,r2)              _addcr(_jit,r0,r1,r2)
+static void _addcr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define addci(r0,r1,i0)              _addci(_jit,r0,r1,i0)
+static void _addci(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define addxr(r0,r1,r2)              _addxr(_jit,r0,r1,r2)
+static void _addxr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define addxi(r0,r1,i0)              _addxi(_jit,r0,r1,i0)
+static void _addxi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define subr(r0,r1,r2)               _subr(_jit,r0,r1,r2)
+static void _subr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define subi(r0,r1,i0)               _subi(_jit,r0,r1,i0)
+static void _subi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define subcr(r0,r1,r2)              _subcr(_jit,r0,r1,r2)
+static void _subcr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define subci(r0,r1,i0)              _subci(_jit,r0,r1,i0)
+static void _subci(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define subxr(r0,r1,r2)              _subxr(_jit,r0,r1,r2)
+static void _subxr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define subxi(r0,r1,i0)              _subxi(_jit,r0,r1,i0)
+static void _subxi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define rsbi(r0, r1, i0)             _rsbi(_jit, r0, r1, i0)
+static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define mulr(r0,r1,r2)               _mulr(_jit,r0,r1,r2)
+static void _mulr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define muli(r0,r1,i0)               _muli(_jit,r0,r1,i0)
+static void _muli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define qmulr(r0,r1,r2,r3)           _qmulr(_jit,r0,r1,r2,r3)
+static void _qmulr(jit_state_t*,jit_int32_t,
+                  jit_int32_t,jit_int32_t,jit_int32_t);
+#  define qmuli(r0,r1,r2,i0)           _qmuli(_jit,r0,r1,r2,i0)
+static void _qmuli(jit_state_t*,jit_int32_t,
+                  jit_int32_t,jit_int32_t,jit_word_t);
+#  define qmulr_u(r0,r1,r2,r3)         _qmulr_u(_jit,r0,r1,r2,r3)
+static void _qmulr_u(jit_state_t*,jit_int32_t,
+                    jit_int32_t,jit_int32_t,jit_int32_t);
+#  define qmuli_u(r0,r1,r2,i0)         _qmuli_u(_jit,r0,r1,r2,i0)
+static void _qmuli_u(jit_state_t*,jit_int32_t,
+                    jit_int32_t,jit_int32_t,jit_word_t);
+#  define divr(r0,r1,r2)               _divr(_jit,r0,r1,r2)
+static void _divr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define divi(r0,r1,i0)               _divi(_jit,r0,r1,i0)
+static void _divi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define divr_u(r0,r1,r2)             _divr_u(_jit,r0,r1,r2)
+static void _divr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define divi_u(r0,r1,i0)             _divi_u(_jit,r0,r1,i0)
+static void _divi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define remr(r0,r1,r2)               _remr(_jit,r0,r1,r2)
+static void _remr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define remi(r0,r1,i0)               _remi(_jit,r0,r1,i0)
+static void _remi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define remr_u(r0,r1,r2)             _remr_u(_jit,r0,r1,r2)
+static void _remr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define remi_u(r0,r1,i0)             _remi_u(_jit,r0,r1,i0)
+static void _remi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define qdivr(r0,r1,r2,r3)           _qdivr(_jit,r0,r1,r2,r3)
+static void _qdivr(jit_state_t*,jit_int32_t,
+                  jit_int32_t,jit_int32_t,jit_int32_t);
+#  define qdivi(r0,r1,r2,i0)           _qdivi(_jit,r0,r1,r2,i0)
+static void _qdivi(jit_state_t*,jit_int32_t,
+                  jit_int32_t,jit_int32_t,jit_word_t);
+#  define qdivr_u(r0,r1,r2,r3)         _qdivr_u(_jit,r0,r1,r2,r3)
+static void _qdivr_u(jit_state_t*,jit_int32_t,
+                    jit_int32_t,jit_int32_t,jit_int32_t);
+#  define qdivi_u(r0,r1,r2,i0)         _qdivi_u(_jit,r0,r1,r2,i0)
+static void _qdivi_u(jit_state_t*,jit_int32_t,
+                    jit_int32_t,jit_int32_t,jit_word_t);
+#  if __WORDSIZE == 32
+#    define lshr(r0,r1,r2)             _lshr(_jit,r0,r1,r2)
+static void _lshr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  else
+#    define lshr(r0,r1,r2)             SLLG(r0,r1,0,r2)
+#  endif
+#  define lshi(r0,r1,i0)               _lshi(_jit,r0,r1,i0)
+static void _lshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  if __WORDSIZE == 32
+#    define rshr(r0,r1,r2)             _rshr(_jit,r0,r1,r2)
+static void _rshr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  else
+#    define rshr(r0,r1,r2)             SRAG(r0,r1,0,r2)
+#  endif
+#  define rshi(r0,r1,i0)               _rshi(_jit,r0,r1,i0)
+static void _rshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  if __WORDSIZE == 32
+#    define rshr_u(r0,r1,r2)           _rshr_u(_jit,r0,r1,r2)
+static void _rshr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  else
+#    define rshr_u(r0,r1,r2)           SRLG(r0,r1,0,r2)
+#  endif
+#  define rshi_u(r0,r1,i0)             _rshi_u(_jit,r0,r1,i0)
+static void _rshi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  if __WORDSIZE == 32
+#    define negr(r0,r1)                        LCR(r0,r1)
+#  else
+#    define negr(r0,r1)                        LCGR(r0,r1)
+#  endif
+#  define comr(r0,r1)                  _comr(_jit,r0,r1)
+static void _comr(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define andr(r0,r1,r2)               _andr(_jit,r0,r1,r2)
+static void _andr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define andi(r0,r1,i0)               _andi(_jit,r0,r1,i0)
+static void _andi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define orr(r0,r1,r2)                        _orr(_jit,r0,r1,r2)
+static void _orr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ori(r0,r1,i0)                        _ori(_jit,r0,r1,i0)
+static void _ori(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define xorr(r0,r1,r2)               _xorr(_jit,r0,r1,r2)
+static void _xorr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define xori(r0,r1,i0)               _xori(_jit,r0,r1,i0)
+static void _xori(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define htonr_us(r0,r1)              extr_us(r0,r1)
+#  if __WORDSIZE == 32
+#    define htonr_ui(r0,r1)            movr(r0,r1)
+#  else
+#    define htonr_ui(r0,r1)            extr_ui(r0,r1)
+#    define htonr_ul(r0,r1)            movr(r0,r1)
+#  endif
+#  define extr_c(r0,r1)                        LGBR(r0,r1)
+#  define extr_uc(r0,r1)               LLGCR(r0,r1)
+#  define extr_s(r0,r1)                        LGHR(r0,r1)
+#  define extr_us(r0,r1)               LLGHR(r0,r1)
+#  if __WORDSIZE == 64
+#    define extr_i(r0,r1)              LGFR(r0,r1)
+#    define extr_ui(r0,r1)             LLGFR(r0,r1)
+#  endif
+#  define ldr_c(r0,r1)                 LGB(r0,0,0,r1)
+#  define ldi_c(r0,i0)                 _ldi_c(_jit,r0,i0)
+static void _ldi_c(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldxr_c(r0,r1,r2)             _ldxr_c(_jit,r0,r1,r2)
+static void _ldxr_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxi_c(r0,r1,i0)             _ldxi_c(_jit,r0,r1,i0)
+static void _ldxi_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldr_uc(r0,r1)                        LLGC(r0,0,0,r1)
+#  define ldi_uc(r0,i0)                        _ldi_uc(_jit,r0,i0)
+static void _ldi_uc(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldxr_uc(r0,r1,r2)            _ldxr_uc(_jit,r0,r1,r2)
+static void _ldxr_uc(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxi_uc(r0,r1,i0)            _ldxi_uc(_jit,r0,r1,i0)
+static void _ldxi_uc(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  if __WORDSIZE == 32
+#    define ldr_s(r0,r1)               LH(r0,0,0,r1)
+#  else
+#    define ldr_s(r0,r1)               LGH(r0,0,0,r1)
+#  endif
+#  define ldi_s(r0,i0)                 _ldi_s(_jit,r0,i0)
+static void _ldi_s(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldxr_s(r0,r1,r2)             _ldxr_s(_jit,r0,r1,r2)
+static void _ldxr_s(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxi_s(r0,r1,i0)             _ldxi_s(_jit,r0,r1,i0)
+static void _ldxi_s(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldr_us(r0,r1)                        LLGH(r0,0,0,r1)
+#  define ldi_us(r0,i0)                        _ldi_us(_jit,r0,i0)
+static void _ldi_us(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldxr_us(r0,r1,r2)            _ldxr_us(_jit,r0,r1,r2)
+static void _ldxr_us(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxi_us(r0,r1,i0)            _ldxi_us(_jit,r0,r1,i0)
+static void _ldxi_us(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  if __WORDSIZE == 32
+#    define ldr_i(r0,r1)               LLGF(r0,0,0,r1)
+#  else
+#    define ldr_i(r0,r1)               LGF(r0,0,0,r1)
+#  endif
+#  define ldi_i(r0,i0)                 _ldi_i(_jit,r0,i0)
+static void _ldi_i(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldxr_i(r0,r1,r2)             _ldxr_i(_jit,r0,r1,r2)
+static void _ldxr_i(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxi_i(r0,r1,i0)             _ldxi_i(_jit,r0,r1,i0)
+static void _ldxi_i(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  if __WORDSIZE == 64
+#    define ldr_ui(r0,r1)              LLGF(r0,0,0,r1)
+#    define ldi_ui(r0,i0)              _ldi_ui(_jit,r0,i0)
+static void _ldi_ui(jit_state_t*,jit_int32_t,jit_word_t);
+#    define ldxr_ui(r0,r1,r2)          _ldxr_ui(_jit,r0,r1,r2)
+static void _ldxr_ui(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#    define ldxi_ui(r0,r1,i0)          _ldxi_ui(_jit,r0,r1,i0)
+static void _ldxi_ui(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#    define ldr_l(r0,r1)               LG(r0,0,0,r1)
+#    define ldi_l(r0,i0)               _ldi_l(_jit,r0,i0)
+static void _ldi_l(jit_state_t*,jit_int32_t,jit_word_t);
+#    define ldxr_l(r0,r1,r2)           _ldxr_l(_jit,r0,r1,r2)
+static void _ldxr_l(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#    define ldxi_l(r0,r1,i0)           _ldxi_l(_jit,r0,r1,i0)
+static void _ldxi_l(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  endif
+#  define str_c(r0,r1)                 STC(r1,0,0,r0)
+#  define sti_c(i0,r0)                 _sti_c(_jit,i0,r0)
+static void _sti_c(jit_state_t*,jit_word_t,jit_int32_t);
+#  define stxr_c(r0,r1,r2)             _stxr_c(_jit,r0,r1,r2)
+static void _stxr_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define stxi_c(i0,r0,r1)             _stxi_c(_jit,i0,r0,r1)
+static void _stxi_c(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define str_s(r0,r1)                 STH(r1,0,0,r0)
+#  define sti_s(i0,r0)                 _sti_s(_jit,i0,r0)
+static void _sti_s(jit_state_t*,jit_word_t,jit_int32_t);
+#  define stxr_s(r0,r1,r2)             _stxr_s(_jit,r0,r1,r2)
+static void _stxr_s(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define stxi_s(i0,r0,r1)             _stxi_s(_jit,i0,r0,r1)
+static void _stxi_s(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define str_i(r0,r1)                 ST(r1,0,0,r0)
+#  define sti_i(i0,r0)                 _sti_i(_jit,i0,r0)
+static void _sti_i(jit_state_t*,jit_word_t,jit_int32_t);
+#  define stxr_i(r0,r1,r2)             _stxr_i(_jit,r0,r1,r2)
+static void _stxr_i(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define stxi_i(i0,r0,r1)             _stxi_i(_jit,i0,r0,r1)
+static void _stxi_i(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  if __WORDSIZE == 64
+#    define str_l(r0,r1)               STG(r1,0,0,r0)
+#    define sti_l(i0,r0)               _sti_l(_jit,i0,r0)
+static void _sti_l(jit_state_t*,jit_word_t,jit_int32_t);
+#  define stxr_l(r0,r1,r2)             _stxr_l(_jit,r0,r1,r2)
+static void _stxr_l(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define stxi_l(i0,r0,r1)             _stxi_l(_jit,i0,r0,r1)
+static void _stxi_l(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  endif
+#  define ltr(r0,r1,r2)                        crr(CC_L,r0,r1,r2)
+#  define lti(r0,r1,i0)                        cri(CC_L,r0,r1,i0)
+#  define ltr_u(r0,r1,r2)              crr_u(CC_L,r0,r1,r2)
+#  define lti_u(r0,r1,i0)              cri_u(CC_L,r0,r1,i0)
+#  define ler(r0,r1,r2)                        crr(CC_LE,r0,r1,r2)
+#  define lei(r0,r1,i0)                        cri(CC_LE,r0,r1,i0)
+#  define ler_u(r0,r1,r2)              crr_u(CC_LE,r0,r1,r2)
+#  define lei_u(r0,r1,i0)              cri_u(CC_LE,r0,r1,i0)
+#  define eqr(r0,r1,r2)                        crr(CC_E,r0,r1,r2)
+#  define eqi(r0,r1,i0)                        cri(CC_E,r0,r1,i0)
+#  define ger(r0,r1,r2)                        crr(CC_HE,r0,r1,r2)
+#  define gei(r0,r1,i0)                        cri(CC_HE,r0,r1,i0)
+#  define ger_u(r0,r1,r2)              crr_u(CC_HE,r0,r1,r2)
+#  define gei_u(r0,r1,i0)              cri_u(CC_HE,r0,r1,i0)
+#  define gtr(r0,r1,r2)                        crr(CC_H,r0,r1,r2)
+#  define gti(r0,r1,i0)                        cri(CC_H,r0,r1,i0)
+#  define gtr_u(r0,r1,r2)              crr_u(CC_H,r0,r1,r2)
+#  define gti_u(r0,r1,i0)              cri_u(CC_H,r0,r1,i0)
+#  define ner(r0,r1,r2)                        crr(CC_NE,r0,r1,r2)
+#  define nei(r0,r1,i0)                        cri(CC_NE,r0,r1,i0)
+#  define bltr(i0,r0,r1)               brr(CC_L,i0,r0,r1)
+#  define bltr_p(i0,r0,r1)             brr_p(CC_L,i0,r0,r1)
+#  define blti(i0,r0,i1)               bri(CC_L,i0,r0,i1)
+#  define blti_p(i0,r0,i1)             bri_p(CC_L,i0,r0,i1)
+#  define bltr_u(i0,r0,r1)             brr_u(CC_L,i0,r0,r1)
+#  define bltr_u_p(i0,r0,r1)           brr_u_p(CC_L,i0,r0,r1)
+#  define blti_u(i0,r0,i1)             bri_u(CC_L,i0,r0,i1)
+#  define blti_u_p(i0,r0,i1)           bri_u_p(CC_L,i0,r0,i1)
+#  define bler(i0,r0,r1)               brr(CC_LE,i0,r0,r1)
+#  define bler_p(i0,r0,r1)             brr_p(CC_LE,i0,r0,r1)
+#  define blei(i0,r0,i1)               bri(CC_LE,i0,r0,i1)
+#  define blei_p(i0,r0,i1)             bri_p(CC_LE,i0,r0,i1)
+#  define bler_u(i0,r0,r1)             brr_u(CC_LE,i0,r0,r1)
+#  define bler_u_p(i0,r0,r1)           brr_u_p(CC_LE,i0,r0,r1)
+#  define blei_u(i0,r0,i1)             bri_u(CC_LE,i0,r0,i1)
+#  define blei_u_p(i0,r0,i1)           bri_u_p(CC_LE,i0,r0,i1)
+#  define beqr(i0,r0,r1)               brr(CC_E,i0,r0,r1)
+#  define beqr_p(i0,r0,r1)             brr_p(CC_E,i0,r0,r1)
+#  define beqi(i0,r0,i1)               bri(CC_E,i0,r0,i1)
+#  define beqi_p(i0,r0,i1)             bri_p(CC_E,i0,r0,i1)
+#  define bger(i0,r0,r1)               brr(CC_HE,i0,r0,r1)
+#  define bger_p(i0,r0,r1)             brr_p(CC_HE,i0,r0,r1)
+#  define bgei(i0,r0,i1)               bri(CC_HE,i0,r0,i1)
+#  define bgei_p(i0,r0,i1)             bri_p(CC_HE,i0,r0,i1)
+#  define bger_u(i0,r0,r1)             brr_u(CC_HE,i0,r0,r1)
+#  define bger_u_p(i0,r0,r1)           brr_u_p(CC_HE,i0,r0,r1)
+#  define bgei_u(i0,r0,i1)             bri_u(CC_HE,i0,r0,i1)
+#  define bgei_u_p(i0,r0,i1)           bri_u_p(CC_HE,i0,r0,i1)
+#  define bgtr(i0,r0,r1)               brr(CC_H,i0,r0,r1)
+#  define bgtr_p(i0,r0,r1)             brr_p(CC_H,i0,r0,r1)
+#  define bgti(i0,r0,i1)               bri(CC_H,i0,r0,i1)
+#  define bgti_p(i0,r0,i1)             bri_p(CC_H,i0,r0,i1)
+#  define bgtr_u(i0,r0,r1)             brr_u(CC_H,i0,r0,r1)
+#  define bgtr_u_p(i0,r0,r1)           brr_u_p(CC_H,i0,r0,r1)
+#  define bgti_u(i0,r0,i1)             bri_u(CC_H,i0,r0,i1)
+#  define bgti_u_p(i0,r0,i1)           bri_u_p(CC_H,i0,r0,i1)
+#  define bner(i0,r0,r1)               brr(CC_NE,i0,r0,r1)
+#  define bner_p(i0,r0,r1)             brr_p(CC_NE,i0,r0,r1)
+#  define bnei(i0,r0,i1)               bri(CC_NE,i0,r0,i1)
+#  define bnei_p(i0,r0,i1)             bri_p(CC_NE,i0,r0,i1)
+#  define boaddr(i0,r0,r1)             baddr(CC_O,1,i0,r0,r1)
+#  define boaddr_p(i0,r0,r1)           baddr_p(CC_O,1,i0,r0,r1)
+#  define boaddi(i0,r0,i1)             baddi(CC_O,1,i0,r0,i1)
+#  define boaddi_p(i0,r0,i1)           baddi_p(CC_O,1,i0,r0,i1)
+#  define boaddr_u(i0,r0,r1)           baddr(CC_NLE,0,i0,r0,r1)
+#  define boaddr_u_p(i0,r0,r1)         baddr_p(CC_NLE,0,i0,r0,r1)
+#  define boaddi_u(i0,r0,i1)           baddi(CC_NLE,0,i0,r0,i1)
+#  define boaddi_u_p(i0,r0,i1)         baddi_p(CC_NLE,0,i0,r0,i1)
+#  define bxaddr(i0,r0,r1)             baddr(CC_NO,1,i0,r0,r1)
+#  define bxaddr_p(i0,r0,r1)           baddr_p(CC_NO,1,i0,r0,r1)
+#  define bxaddi(i0,r0,i1)             baddi(CC_NO,1,i0,r0,i1)
+#  define bxaddi_p(i0,r0,i1)           baddi_p(CC_NO,1,i0,r0,i1)
+#  define bxaddr_u(i0,r0,r1)           baddr(CC_LE,0,i0,r0,r1)
+#  define bxaddr_u_p(i0,r0,r1)         baddr_p(CC_LE,0,i0,r0,r1)
+#  define bxaddi_u(i0,r0,i1)           baddi(CC_LE,0,i0,r0,i1)
+#  define bxaddi_u_p(i0,r0,i1)         baddi_p(CC_LE,0,i0,r0,i1)
+#  define bosubr(i0,r0,r1)             bsubr(CC_O,1,i0,r0,r1)
+#  define bosubr_p(i0,r0,r1)           bsubr_p(CC_O,1,i0,r0,r1)
+#  define bosubi(i0,r0,i1)             bsubi(CC_O,1,i0,r0,i1)
+#  define bosubi_p(i0,r0,i1)           bsubi_p(CC_O,1,i0,r0,i1)
+#  define bosubr_u(i0,r0,r1)           bsubr(CC_L,0,i0,r0,r1)
+#  define bosubr_u_p(i0,r0,r1)         bsubr_p(CC_L,0,i0,r0,r1)
+#  define bosubi_u(i0,r0,i1)           bsubi(CC_L,0,i0,r0,i1)
+#  define bosubi_u_p(i0,r0,i1)         bsubi_p(CC_L,0,i0,r0,i1)
+#  define bxsubr(i0,r0,r1)             bsubr(CC_NO,1,i0,r0,r1)
+#  define bxsubr_p(i0,r0,r1)           bsubr_p(CC_NO,1,i0,r0,r1)
+#  define bxsubi(i0,r0,i1)             bsubi(CC_NO,1,i0,r0,i1)
+#  define bxsubi_p(i0,r0,i1)           bsubi_p(CC_NO,1,i0,r0,i1)
+#  define bxsubr_u(i0,r0,r1)           bsubr(CC_NL,0,i0,r0,r1)
+#  define bxsubr_u_p(i0,r0,r1)         bsubr_p(CC_NL,0,i0,r0,r1)
+#  define bxsubi_u(i0,r0,i1)           bsubi(CC_NL,0,i0,r0,i1)
+#  define bxsubi_u_p(i0,r0,i1)         bsubi_p(CC_NL,0,i0,r0,i1)
+#  define bmsr(i0,r0,r1)               bmxr(CC_NE,i0,r0,r1)
+#  define bmsr_p(i0,r0,r1)             bmxr_p(CC_NE,i0,r0,r1)
+#  define bmsi(i0,r0,i1)               bmxi(CC_NE,i0,r0,i1)
+#  define bmsi_p(i0,r0,i1)             bmxi_p(CC_NE,i0,r0,i1)
+#  define bmcr(i0,r0,r1)               bmxr(CC_E,i0,r0,r1)
+#  define bmcr_p(i0,r0,r1)             bmxr_p(CC_E,i0,r0,r1)
+#  define bmci(i0,r0,i1)               bmxi(CC_E,i0,r0,i1)
+#  define bmci_p(i0,r0,i1)             bmxi_p(CC_E,i0,r0,i1)
+#  define jmpr(r0)                     BR(r0)
+#  define jmpi(i0)                     _jmpi(_jit,i0)
+static void _jmpi(jit_state_t*,jit_word_t);
+#  define jmpi_p(i0)                   _jmpi_p(_jit,i0)
+static jit_word_t _jmpi_p(jit_state_t*,jit_word_t);
+#  define callr(r0)                    BALR(_R14_REGNO,r0)
+#  define calli(i0)                    _calli(_jit,i0)
+static void _calli(jit_state_t*,jit_word_t);
+#  define calli_p(i0)                  _calli_p(_jit,i0)
+static jit_word_t _calli_p(jit_state_t*,jit_word_t);
+#  define prolog(i0)                   _prolog(_jit,i0)
+static void _prolog(jit_state_t*,jit_node_t*);
+#  define epilog(i0)                   _epilog(_jit,i0)
+static void _epilog(jit_state_t*,jit_node_t*);
+#  define vastart(r0)                  _vastart(_jit, r0)
+static void _vastart(jit_state_t*, jit_int32_t);
+#  define vaarg(r0, r1)                        _vaarg(_jit, r0, r1)
+static void _vaarg(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define patch_at(instr,label)                _patch_at(_jit,instr,label)
+static void _patch_at(jit_state_t*,jit_word_t,jit_word_t);
+#endif
+
+#if CODE
+#  define _us                          jit_uint16_t
+#  define _ui                          jit_uint32_t
+static void
+_E(jit_state_t *_jit, _ui Op)
+{
+    union {
+       struct {
+           _us op;
+       } b;
+       _us     s;
+    } i0;
+    i0.b.op = Op;
+    assert(i0.b.op == Op);
+    is(i0.s);
+}
+
+static void
+_I(jit_state_t *_jit, _ui Op, _ui I)
+{
+    union {
+       struct {
+           _us op : 8;
+           _us i  : 8;
+       } b;
+       _us     s;
+    } i0;
+    i0.b.op = Op;
+    i0.b.i  = I;
+    assert(i0.b.op == Op);
+    assert(i0.b.i  == I);
+    is(i0.s);
+}
+
+static void
+_RR(jit_state_t *_jit, _ui Op, _ui R1, _ui R2)
+{
+    union {
+       struct {
+           _us op : 8;
+           _us r1 : 4;
+           _us r2 : 4;
+       } b;
+       _us     s;
+    } i0;
+    i0.b.op = Op;
+    i0.b.r1 = R1;
+    i0.b.r2 = R2;
+    assert(i0.b.op == Op);
+    assert(i0.b.r1 == R1);
+    assert(i0.b.r2 == R2);
+    is(i0.s);
+}
+
+static void
+_RRE(jit_state_t *_jit, _ui Op, _ui R1, _ui R2)
+{
+    union {
+       struct {
+           _us op;
+       } b;
+       _us     s;
+    } i0;
+    union {
+       struct {
+           _us _  : 8;
+           _us r1 : 4;
+           _us r2 : 4;
+       } b;
+       _us     s;
+    } i1;
+    i0.b.op = Op;
+    i1.b._ = 0;
+    i1.b.r1 = R1;
+    i1.b.r2 = R2;
+    assert(i0.b.op == Op);
+    assert(i1.b.r1 == R1);
+    assert(i1.b.r2 == R2);
+    is(i0.s);
+    is(i1.s);
+}
+
+static void
+_RRF(jit_state_t *_jit, _ui Op, _ui R3, _ui M4, _ui R1, _ui R2)
+{
+    union {
+       struct {
+           _us op;
+       } b;
+       _us     s;
+    } i0;
+    union {
+       struct {
+           _us r3 : 4;
+           _us m4 : 4;
+           _us r1 : 4;
+           _us r2 : 4;
+       } b;
+       _us     s;
+    } i1;
+    i0.b.op = Op;
+    i1.b.r3 = R3;
+    i1.b.m4 = M4;
+    i1.b.r1 = R1;
+    i1.b.r2 = R2;
+    assert(i0.b.op == Op);
+    assert(i1.b.r3 == R3);
+    assert(i1.b.m4 == M4);
+    assert(i1.b.r1 == R1);
+    assert(i1.b.r2 == R2);
+    is(i0.s);
+    is(i1.s);
+}
+
+static void
+_RX(jit_state_t *_jit, _ui Op, _ui R1, _ui X2, _ui B2, _ui D2)
+{
+    union {
+       struct {
+           _us op :  8;
+           _us r1 :  4;
+           _us x2 :  4;
+       } b;
+       _us     s;
+    } i0;
+    union {
+       struct {
+           _us b2 :  4;
+           _us d2 : 12;
+       } b;
+       _us     s;
+    } i1;
+    i0.b.op = Op;
+    i0.b.r1 = R1;
+    i0.b.x2 = X2;
+    i1.b.b2 = B2;
+    i1.b.d2 = D2;
+    assert(i0.b.op == Op);
+    assert(i0.b.r1 == R1);
+    assert(i0.b.x2 == X2);
+    assert(i1.b.b2 == B2);
+    assert(i1.b.d2 == D2);
+    is(i0.s);
+    is(i1.s);
+}
+
+static void
+_RXE(jit_state_t *_jit, _ui Op, _ui R1, _ui X2, _ui B2, _ui D2, _ui Op2)
+{
+    union {
+       struct {
+           _us op :  8;
+           _us r1 :  4;
+           _us x2 :  4;
+       } b;
+       _us     s;
+    } i0;
+    union {
+       struct {
+           _us b2 :  4;
+           _us d2 : 12;
+       } b;
+       _ui     s;
+    } i1;
+    union {
+       struct {
+           _us _  :  8;
+           _us op :  8;
+       } b;
+       _us     s;
+    } i2;
+    i2.b._ = 0;
+    i0.b.op = Op;
+    i0.b.r1 = R1;
+    i0.b.x2 = X2;
+    i1.b.b2 = B2;
+    i1.b.d2 = D2;
+    i2.b.op = Op2;
+    assert(i0.b.op == Op);
+    assert(i0.b.r1 == R1);
+    assert(i0.b.x2 == X2);
+    assert(i1.b.b2 == B2);
+    assert(i1.b.d2 == D2);
+    assert(i2.b.op == Op2);
+    is(i0.s);
+    is(i1.s);
+    is(i2.s);
+}
+
+static void
+_RXF(jit_state_t *_jit, _ui Op, _ui R3, _ui X2, _ui B2, _ui D2, _ui R1, _ui Op2)
+{
+    union {
+       struct {
+           _us op :  8;
+           _us r3 :  4;
+           _us x2 :  4;
+       } b;
+       _us     s;
+    } i0;
+    union {
+       struct {
+           _us b2 :  4;
+           _us d2 : 12;
+       } b;
+       _us     s;
+    } i1;
+    union {
+       struct {
+           _us r1 :  4;
+           _us _  :  4;
+           _us op :  8;
+       } b;
+       _us     s;
+    } i2;
+    i2.b._ = 0;
+    i0.b.op = Op;
+    i0.b.r3 = R3;
+    i0.b.x2 = X2;
+    i1.b.b2 = B2;
+    i1.b.d2 = D2;
+    i2.b.r1 = R1;
+    i2.b.op = Op2;
+    assert(i0.b.op == Op);
+    assert(i0.b.r3 == R3);
+    assert(i0.b.x2 == X2);
+    assert(i1.b.b2 == B2);
+    assert(i1.b.d2 == D2);
+    assert(i2.b.r1 == R1);
+    assert(i2.b.op == Op2);
+    is(i0.s);
+    is(i1.s);
+    is(i2.s);
+}
+
+static void
+_RXY(jit_state_t *_jit, _ui Op, _ui R1, _ui X2, _ui B2, _ui D2, _ui Op2)
+{
+    union {
+       struct {
+           _us op :  8;
+           _us r1 :  4;
+           _us x2 :  4;
+       } b;
+       _us     s;
+    } i0;
+    union {
+       struct {
+           _us b2 :  4;
+           _us dl : 12;
+       } b;
+       _us     s;
+    } i1;
+    union {
+       struct {
+           _us dh :  8;
+           _us op :  8;
+       } b;
+       _us     s;
+    } i2;
+    i0.s = i1.s = i2.s = 0;
+    i0.b.op = Op;
+    i0.b.r1 = R1;
+    i0.b.x2 = X2;
+    i1.b.b2 = B2;
+    i1.b.dl = D2 & 0xfff;
+    i2.b.dh = D2 >> 12;
+    i2.b.op = Op2;
+    assert(i0.b.op == Op);
+    assert(i0.b.r1 == R1);
+    assert(i0.b.x2 == X2);
+    assert(i1.b.b2 == B2);
+    assert(i2.b.dh == D2 >> 12);
+    assert(i2.b.op == Op2);
+    is(i0.s);
+    is(i1.s);
+    is(i2.s);
+}
+
+static void
+_RS(jit_state_t *_jit, _ui Op, _ui R1, _ui R3, _ui B2, _ui D2)
+{
+    union {
+       struct {
+           _us op :  8;
+           _us r1 :  4;
+           _us r3 :  4;
+       } b;
+       _us     s;
+    } i0;
+    union {
+       struct {
+           _us b2 :  4;
+           _us d2 : 12;
+       } b;
+       _us     s;
+    } i1;
+    i0.s = i1.s = 0;
+    i0.b.op = Op;
+    i0.b.r1 = R1;
+    i0.b.r3 = R3;
+    i1.b.b2 = B2;
+    i1.b.d2 = D2;
+    assert(i0.b.op == Op);
+    assert(i0.b.r1 == R1);
+    assert(i0.b.r3 == R3);
+    assert(i1.b.b2 == B2);
+    assert(i1.b.d2 == D2);
+    is(i0.s);
+    is(i1.s);
+}
+
+static void
+_RSL(jit_state_t *_jit, _ui Op, _ui L1, _ui B1, _ui D1, _ui Op2)
+{
+    union {
+       struct {
+           _us op :  8;
+           _us l1 :  4;
+           _us _  :  4;
+       } b;
+       _us     s;
+    } i0;
+    union {
+       struct {
+           _us b1 :  4;
+           _us d1 : 12;
+       } b;
+       _us     s;
+    } i1;
+    union {
+       struct {
+           _us _  :  8;
+           _us op :  8;
+       } b;
+       _us     s;
+    } i2;
+    i0.b._ = 0;
+    i2.b._ = 0;
+    i0.b.op = Op;
+    i0.b.l1 = L1;
+    i1.b.b1 = B1;
+    i1.b.d1 = D1;
+    i2.b.op = Op2;
+    assert(i0.b.op == Op);
+    assert(i0.b.l1 == L1);
+    assert(i1.b.b1 == B1);
+    assert(i1.b.d1 == D1);
+    assert(i2.b.op == Op2);
+    is(i0.s);
+    is(i1.s);
+    is(i2.s);
+}
+
+static void
+_RSI(jit_state_t *_jit, _ui Op, _ui R1, _ui R3, _ui I2)
+{
+    union {
+       struct {
+           _us op :  8;
+           _us r1 :  4;
+           _us r3 :  4;
+       } b;
+       _us     s;
+    } i0;
+    union {
+       struct {
+           _us i2;
+       } b;
+       _us     s;
+    } i1;
+    i0.b.op = Op;
+    i0.b.r1 = R1;
+    i0.b.r3 = R3;
+    i1.b.i2 = I2;
+    assert(i0.b.op == Op);
+    assert(i0.b.r1 == R1);
+    assert(i0.b.r3 == R3);
+    assert(i1.b.i2 == I2);
+    is(i0.s);
+    is(i1.s);
+}
+
+static void
+_RIE(jit_state_t *_jit, _ui Op, _ui R1, _ui R3, _ui I2, _ui Op2)
+{
+    union {
+       struct {
+           _us op :  8;
+           _us r1 :  4;
+           _us r3 :  4;
+       } b;
+       _us     s;
+    } i0;
+    union {
+       struct {
+           _us i2;
+       } b;
+       _us     s;
+    } i1;
+    union {
+       struct {
+           _us _  :  8;
+           _us op :  8;
+       } b;
+       _us     s;
+    } i2;
+    i2.b._ = 0;
+    i0.b.op = Op;
+    i0.b.r1 = R1;
+    i0.b.r3 = R3;
+    i1.b.i2 = I2;
+    i2.b.op = Op2;
+    assert(i0.b.op == Op);
+    assert(i0.b.r1 == R1);
+    assert(i0.b.r3 == R3);
+    assert(i1.b.i2 == I2);
+    assert(i2.b.op == Op2);
+    is(i0.s);
+    is(i1.s);
+    is(i2.s);
+}
+
+static void
+_RIL(jit_state_t *_jit, _ui Op, _ui R1, _ui Op2, _ui I2)
+{
+    union {
+       struct {
+           _us o1 :  8;
+           _us r1 :  4;
+           _us o2 :  4;
+       } b;
+       _us     s;
+    } i0;
+    union {
+       struct {
+           _ui ih : 16;
+           _ui il : 16;
+       } b;
+       _ui     i;
+    } i12;
+    i0.b.o1 = Op;
+    i0.b.r1 = R1;
+    i0.b.o2 = Op2;
+    i12.i   = I2;
+    assert(i0.b.o1 == Op);
+    assert(i0.b.r1 == R1);
+    assert(i0.b.o2 == Op2);
+    is(i0.s);
+    is(i12.b.ih);
+    is(i12.b.il);
+}
+
+static void
+_SI(jit_state_t *_jit, _ui Op, _ui I2, _ui B1, _ui D1)
+{
+    union {
+       struct {
+           _us op :  8;
+           _us i2 :  8;
+       } b;
+       _us     s;
+    } i0;
+    union {
+       struct {
+           _us b1 :  4;
+           _us d1 : 12;
+       } b;
+       _us     s;
+    } i1;
+    i0.b.op = Op;
+    i0.b.i2 = I2;
+    i1.b.b1 = B1;
+    i1.b.d1 = D1;
+    assert(i0.b.op == Op);
+    assert(i0.b.i2 == I2);
+    assert(i1.b.b1 == B1);
+    assert(i1.b.d1 == D1);
+    is(i0.s);
+    is(i1.s);
+}
+
+static void
+_SIY(jit_state_t *_jit, _ui Op, _ui I2, _ui B1, _ui D1, _ui Op2)
+{
+    union {
+       struct {
+           _us op :  8;
+           _us i2 :  8;
+       } b;
+       _us     s;
+    } i0;
+    union {
+       struct {
+           _us b1 :  4;
+           _us dl : 12;
+       } b;
+       _us     s;
+    } i1;
+    union {
+       struct {
+           _us dh :  8;
+           _us op :  8;
+       } b;
+       _us     s;
+    } i2;
+    i0.b.op = Op;
+    i0.b.i2 = I2;
+    i1.b.b1 = B1;
+    i1.b.dl = D1 & 0xfff;
+    i2.b.dh = D1 >> 8;
+    i2.b.op = Op2;
+    assert(i0.b.op == Op);
+    assert(i0.b.i2 == I2);
+    assert(i1.b.b1 == B1);
+    assert(i2.b.dh == D1 >> 8);
+    assert(i2.b.op == Op2);
+    is(i0.s);
+    is(i1.s);
+    is(i2.s);
+}
+
+static void
+_S(jit_state_t *_jit, _ui Op, _ui B2, _ui D2)
+{
+    union {
+       struct {
+           _us op;
+       } b;
+       _us     s;
+    } i0;
+    union {
+       struct {
+           _us b2 :  4;
+           _us d2 : 12;
+       } b;
+       _us     s;
+    } i1;
+    i0.b.op = Op;
+    i1.b.b2 = B2;
+    i1.b.d2 = D2;
+    assert(i0.b.op == Op);
+    assert(i1.b.b2 == B2);
+    assert(i1.b.d2 == D2);
+    is(i0.s);
+    is(i1.s);
+}
+
+static void
+_SS(jit_state_t *_jit, _ui Op, _ui LL, _ui LH, _ui B1, _ui D1, _ui B2, _ui D2)
+{
+    union {
+       struct {
+           _us op :  8;
+           _us ll :  4;
+           _us lh :  4;
+       } b;
+       _us     s;
+    } i0;
+    union {
+       struct {
+           _us b1 :  4;
+           _us d1 : 12;
+       } b;
+       _us     s;
+    } i1;
+    union {
+       struct {
+           _us b2 :  4;
+           _us d2 : 12;
+       } b;
+       _us     s;
+    } i2;
+    i0.b.op = Op;
+    i0.b.ll = LL;
+    i0.b.lh = LH;
+    i1.b.b1 = B1;
+    i1.b.d1 = D1;
+    i2.b.b2 = B2;
+    i2.b.d2 = D2;
+    assert(i0.b.op == Op);
+    assert(i0.b.ll == LL);
+    assert(i0.b.lh == LH);
+    assert(i1.b.b1 == B1);
+    assert(i1.b.d1 == D1);
+    assert(i2.b.b2 == B2);
+    assert(i2.b.d2 == D2);
+    is(i0.s);
+    is(i1.s);
+    is(i2.s);
+}
+
+static void
+_SSE(jit_state_t *_jit, _ui Op, _ui B1, _ui D1, _ui B2, _ui D2)
+{
+    union {
+       struct {
+           _us op;
+       } b;
+       _us     s;
+    } i0;
+    union {
+       struct {
+           _us b1 :  4;
+           _us d1 : 12;
+       } b;
+       _us     s;
+    } i1;
+    union {
+       struct {
+           _us b2 :  4;
+           _us d2 : 12;
+       } b;
+       _us     s;
+    } i2;
+    i0.b.op = Op;
+    i1.b.b1 = B1;
+    i1.b.d1 = D1;
+    i2.b.b2 = B2;
+    i2.b.d2 = D2;
+    assert(i0.b.op == Op);
+    assert(i1.b.b1 == B1);
+    assert(i1.b.d1 == D1);
+    assert(i2.b.b2 == B2);
+    assert(i2.b.d2 == D2);
+    is(i0.s);
+    is(i1.s);
+    is(i2.s);
+}
+#  undef _us
+#  undef _ui
+
+static void
+_nop(jit_state_t *_jit, jit_int32_t c)
+{
+    assert(c >= 0 && !(c & 1));
+    while (c) {
+       NOPR(_R7_REGNO);
+       c -= 2;
+    }
+}
+
+static jit_int32_t
+_xdivr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                regno;
+    regno = jit_get_reg_pair();
+#if __WORDSIZE == 32
+    movr(rn(regno), r0);
+    SRDA(rn(regno), 32, 0);
+#else
+    movr(rn(regno) + 1, r0);
+#endif
+    DIVREM_(rn(regno), r1);
+    jit_unget_reg_pair(regno);
+    return (regno);
+}
+
+static jit_int32_t
+_xdivr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                regno;
+    regno = jit_get_reg_pair();
+#if __WORDSIZE == 32
+    movr(rn(regno), r0);
+    SRDL(rn(regno), 32, 0);
+#else
+    movr(rn(regno) + 1, r0);
+#endif
+    movi(rn(regno), 0);
+    DIVREMU_(rn(regno), r1);
+    jit_unget_reg_pair(regno);
+    return (regno);
+}
+
+static jit_int32_t
+_xdivi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                imm, regno;
+    regno = jit_get_reg_pair();
+    imm = jit_get_reg(jit_class_gpr);
+#if __WORDSIZE == 32
+    movr(rn(regno), r0);
+    SRDA(rn(regno), 32, 0);
+#else
+    movr(rn(regno) + 1, r0);
+#endif
+    movi(rn(imm), i0);
+    DIVREM_(rn(regno), rn(imm));
+    jit_unget_reg(imm);
+    jit_unget_reg_pair(regno);
+    return (regno);
+}
+
+static jit_int32_t
+_xdivi_u(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    /* cannot overlap because operand is 128-bit */
+    jit_int32_t                imm, regno;
+    regno = jit_get_reg_pair();
+    imm = jit_get_reg(jit_class_gpr);
+#if __WORDSIZE == 32
+    movr(rn(regno), r0);
+    SRDL(rn(regno), 32, 0);
+#else
+    movr(rn(regno) + 1, r0);
+#endif
+    movi(rn(regno), 0);
+    movi(rn(imm), i0);
+    DIVREMU_(rn(regno), rn(imm));
+    jit_unget_reg(imm);
+    jit_unget_reg_pair(regno);
+    return (regno);
+}
+
+static void
+_crr(jit_state_t *_jit, jit_int32_t cc,
+     jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    jit_int32_t                reg, rg;
+    if (r0 == r1 || r0 == r2) {
+       reg = jit_get_reg(jit_class_gpr);
+       rg = rn(reg);
+    }
+    else
+       rg = r0;
+    movi(rg, 1);
+    CMP_(r1, r2);
+    w = _jit->pc.w;
+    BRC(cc, 0);
+    movi(rg, 0);
+    patch_at(w, _jit->pc.w);
+    if (r0 == r1 || r0 == r2) {
+       movr(r0, rg);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_cri(jit_state_t *_jit, jit_int32_t cc,
+     jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    crr(cc, r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_crr_u(jit_state_t *_jit, jit_int32_t cc,
+       jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    jit_int32_t                reg, rg;
+    if (r0 == r1 || r0 == r2) {
+       reg = jit_get_reg(jit_class_gpr);
+       rg = rn(reg);
+    }
+    else
+       rg = r0;
+    movi(rg, 1);
+    CMPU_(r1, r2);
+    w = _jit->pc.w;
+    BRC(cc, 0);
+    movi(rg, 0);
+    patch_at(w, _jit->pc.w);
+    if (r0 == r1 || r0 == r2) {
+       movr(r0, rg);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_cri_u(jit_state_t *_jit, jit_int32_t cc,
+       jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    crr_u(cc, r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_brr(jit_state_t *_jit, jit_int32_t cc,
+     jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d;
+    CMP_(r0, r1);
+    d = (i0 - _jit->pc.w) >> 1;
+    if (s16_p(d))
+       BRC(cc, x16(d));
+    else {
+       assert(s32_p(d));
+       BRCL(cc, d);
+    }
+}
+
+static jit_word_t
+_brr_p(jit_state_t *_jit, jit_int32_t cc,
+       jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    CMP_(r0, r1);
+    w = _jit->pc.w;
+    BRCL(cc, 0);
+    return (w);
+}
+
+static void
+_bri(jit_state_t *_jit, jit_int32_t cc,
+     jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    movi(rn(reg), i1);
+    brr(cc, i0, r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static jit_word_t
+_bri_p(jit_state_t *_jit, jit_int32_t cc,
+     jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    movi(rn(reg), i1);
+    w = brr_p(cc, i0, r0, rn(reg));
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static void
+_brr_u(jit_state_t *_jit, jit_int32_t cc,
+       jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d;
+    CMPU_(r0, r1);
+    d = (i0 - _jit->pc.w) >> 1;
+    if (s16_p(d))
+       BRC(cc, x16(d));
+    else {
+       assert(s32_p(d));
+       BRCL(cc, d);
+    }
+}
+
+static jit_word_t
+_brr_u_p(jit_state_t *_jit, jit_int32_t cc,
+        jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    CMPU_(r0, r1);
+    w = _jit->pc.w;
+    BRCL(cc, 0);
+    return (w);
+}
+
+static void
+_bri_u(jit_state_t *_jit, jit_int32_t cc,
+       jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    movi(rn(reg), i1);
+    brr_u(cc, i0, r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static jit_word_t
+_bri_u_p(jit_state_t *_jit, jit_int32_t cc,
+        jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    movi(rn(reg), i1);
+    w = brr_u_p(cc, i0, r0, rn(reg));
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static void
+_baddr(jit_state_t *_jit, jit_int32_t c, jit_bool_t s,
+       jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d;
+    if (s)             addr(r0, r0, r1);
+    else               addcr(r0, r0, r1);
+    d = (i0 - _jit->pc.w) >> 1;
+    if (s16_p(d))
+       BRC(c, x16(d));
+    else {
+       assert(s32_p(d));
+       BRCL(c, d);
+    }
+}
+
+static void
+_baddi(jit_state_t *_jit, jit_int32_t c, jit_bool_t s,
+       jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    movi(rn(reg), i1);
+    baddr(c, s, i0, r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static jit_word_t
+_baddr_p(jit_state_t *_jit, jit_int32_t c, jit_bool_t s,
+        jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d, w;
+    if (s)             addr(r0, r0, r1);
+    else               addcr(r0, r0, r1);
+    d = (i0 - _jit->pc.w) >> 1;
+    w = _jit->pc.w;
+    BRCL(c, d);
+    return (w);
+}
+
+static jit_word_t
+_baddi_p(jit_state_t *_jit, jit_int32_t c, jit_bool_t s,
+        jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    movi(rn(reg), i1);
+    w = baddr_p(c, s, i0, r0, rn(reg));
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static void
+_bsubr(jit_state_t *_jit, jit_int32_t c, jit_bool_t s,
+       jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d;
+    if (s)             subr(r0, r0, r1);
+    else               subcr(r0, r0, r1);
+    d = (i0 - _jit->pc.w) >> 1;
+    if (s16_p(d))
+       BRC(c, x16(d));
+    else {
+       assert(s32_p(d));
+       BRCL(c, d);
+    }
+}
+
+static void
+_bsubi(jit_state_t *_jit, jit_int32_t c, jit_bool_t s,
+       jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    movi(rn(reg), i1);
+    bsubr(c, s, i0, r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static jit_word_t
+_bsubr_p(jit_state_t *_jit, jit_int32_t c, jit_bool_t s,
+        jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d, w;
+    if (s)             subr(r0, r0, r1);
+    else               subcr(r0, r0, r1);
+    d = (i0 - _jit->pc.w) >> 1;
+    w = _jit->pc.w;
+    BRCL(c, d);
+    return (w);
+}
+
+static jit_word_t
+_bsubi_p(jit_state_t *_jit, jit_int32_t c, jit_bool_t s,
+        jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    movi(rn(reg), i1);
+    w = bsubr_p(c, s, i0, r0, rn(reg));
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static void
+_bmxr(jit_state_t *_jit, jit_int32_t cc,
+      jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movr(rn(reg), r0);
+    andr(rn(reg), rn(reg), r1);
+    TEST_(rn(reg), rn(reg));
+    jit_unget_reg(reg);
+    d = (i0 - _jit->pc.w) >> 1;
+    if (s16_p(d))
+       BRC(cc, x16(d));
+    else {
+       assert(s32_p(d));
+       BRCL(cc, d);
+    }
+}
+
+static jit_word_t
+_bmxr_p(jit_state_t *_jit, jit_int32_t cc,
+       jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movr(rn(reg), r0);
+    andr(rn(reg), rn(reg), r1);
+    TEST_(rn(reg), rn(reg));
+    jit_unget_reg(reg);
+    w = _jit->pc.w;
+    BRCL(cc, 0);
+    return (w);
+}
+
+static void
+_bmxi(jit_state_t *_jit, jit_int32_t cc,
+      jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         d;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i1);
+    andr(rn(reg), rn(reg), r0);
+    TEST_(rn(reg), rn(reg));
+    jit_unget_reg(reg);
+    d = (i0 - _jit->pc.w) >> 1;
+    if (s16_p(d))
+       BRC(cc, x16(d));
+    else {
+       assert(s32_p(d));
+       BRCL(cc, d);
+    }
+}
+
+static jit_word_t
+_bmxi_p(jit_state_t *_jit, jit_int32_t cc,
+       jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i1);
+    andr(rn(reg), rn(reg), r0);
+    TEST_(rn(reg), rn(reg));
+    jit_unget_reg(reg);
+    w = _jit->pc.w;
+    BRCL(cc, 0);
+    return (w);
+}
+
+static void
+_movr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+#if __WORDSIZE == 32
+    if (r0 != r1)
+       LR(r0, r1);
+#else
+    if (r0 != r1)
+       LGR(r0, r1);
+#endif
+}
+
+static void
+_movi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_word_t         d;
+#if __WORDSIZE == 64
+    jit_int32_t                bits;
+#endif
+    d = (i0 - _jit->pc.w) >> 1;
+    if (s16_p(i0)) {
+#if __WORDSIZE == 32
+       LHI(r0, x16(i0));
+#else
+       LGHI(r0, x16(i0));
+#endif
+    }
+    /* easy way of loading a large amount of 32 bit values and
+     * usually address of constants */
+    else if (!(i0 & 1) &&
+#if __WORDSIZE == 32
+            i0 > 0
+#else
+            s32_p(d)
+#endif
+            )
+       LARL(r0, d);
+    else {
+#if __WORDSIZE == 32
+       LHI(r0, x16(i0));
+       IILH(r0, x16((jit_uword_t)i0 >> 16));
+#else
+       bits = 0;
+       if (i0 &             0xffffL)   bits |= 1;
+       if (i0 &         0xffff0000L)   bits |= 2;
+       if (i0 &     0xffff00000000L)   bits |= 4;
+       if (i0 & 0xffff000000000000L)   bits |= 8;
+       if (bits != 15)                 LGHI(r0, 0);
+       if (bits & 1)                   IILL(r0, x16(i0));
+       if (bits & 2)                   IILH(r0, x16((jit_uword_t)i0 >> 16));
+       if (bits & 4)                   IIHL(r0, x16((jit_uword_t)i0 >> 32));
+       if (bits & 8)                   IIHH(r0, x16((jit_uword_t)i0 >> 48));
+#endif
+    }
+}
+
+static jit_word_t
+_movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_word_t         w;
+    w = _jit->pc.w;
+#if __WORDSIZE == 32
+    LHI(r0, x16(i0));
+#else
+    IILL(r0, x16(i0));
+#endif
+    IILH(r0, x16((jit_uword_t)i0 >> 16));
+#if __WORDSIZE == 64
+    IIHL(r0, x16((jit_uword_t)i0 >> 32));
+    IIHH(r0, x16((jit_uword_t)i0 >> 48));
+#endif
+    return (w);
+}
+
+static void
+_addr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (r0 == r2)
+       ADD_(r0, r1);
+    else {
+       movr(r0, r1);
+       ADD_(r0, r2);
+    }
+}
+
+static void
+_addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (r0 == r1 && s16_p(i0))
+       ADDI_(r0, x16(i0));
+#if __WORDSIZE == 64
+    else if (s20_p(i0))
+       LAY(r0, x20(i0), 0, r1);
+#endif
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       addr(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_addcr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (r0 == r2)
+       ADDC_(r0, r1);
+    else {
+       movr(r0, r1);
+       ADDC_(r0, r2);
+    }
+}
+
+static void
+_addci(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    addcr(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_addxr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (r0 == r2)
+       ADDX_(r0, r1);
+    else {
+       movr(r0, r1);
+       ADDX_(r0, r2);
+    }
+}
+
+static void
+_addxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    addxr(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_subr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (r0 == r2) {
+       reg = jit_get_reg(jit_class_gpr);
+       movr(rn(reg), r2);
+       movr(r0, r1);
+       SUB_(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else {
+       movr(r0, r1);
+       SUB_(r0, r2);
+    }
+}
+
+static void
+_subi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (r0 == r1 && s16_p(-i0))
+       ADDI_(r0, x16(-i0));
+#if __WORDSIZE == 64
+    else if (s20_p(-i0))
+       LAY(r0, x20(-i0), 0, r1);
+#endif
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       subr(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_subcr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (r0 == r2) {
+       reg = jit_get_reg(jit_class_gpr);
+       movr(rn(reg), r2);
+       movr(r0, r1);
+       SUBC_(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else {
+       movr(r0, r1);
+       SUBC_(r0, r2);
+    }
+}
+
+static void
+_subci(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    subcr(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_subxr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (r0 == r2) {
+       reg = jit_get_reg(jit_class_gpr);
+       movr(rn(reg), r2);
+       movr(r0, r1);
+       SUBX_(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else {
+       movr(r0, r1);
+       SUBX_(r0, r2);
+    }
+}
+
+static void
+_subxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    subxr(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_rsbi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    subi(r0, r1, i0);
+    negr(r0, r0);
+}
+
+static void
+_mulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (r0 == r2)
+       MUL_(r0, r1);
+    else {
+       movr(r0, r1);
+       MUL_(r0, r2);
+    }
+}
+
+static void
+_muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (s16_p(i0)) {
+       movr(r0, r1);
+       MULI_(r0, x16(i0));
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       mulr(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_qmulr(jit_state_t *_jit,
+       jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+    jit_int32_t                reg;
+    /* The only invalid condition is r0 == r1 */
+    jit_int32_t                t2, t3, s2, s3;
+    if (r2 == r0 || r2 == r1) {
+       s2 = jit_get_reg(jit_class_gpr);
+       t2 = rn(s2);
+       movr(t2, r2);
+    }
+    else
+       t2 = r2;
+    if (r3 == r0 || r3 == r1) {
+       s3 = jit_get_reg(jit_class_gpr);
+       t3 = rn(s3);
+       movr(t3, r3);
+    }
+    else
+       t3 = r3;
+    qmulr_u(r0, r1, r2, r3);
+    reg = jit_get_reg(jit_class_gpr);
+    /**/
+    rshi(rn(reg), t2, 63);
+    mulr(rn(reg), rn(reg), t3);
+    addr(r1, r1, rn(reg));
+    /**/
+    rshi(rn(reg), t3, 63);
+    mulr(rn(reg), rn(reg), t2);
+    addr(r1, r1, rn(reg));
+    jit_unget_reg(reg);
+    if (t2 != r2)
+       jit_unget_reg(s2);
+    if (t3 != r3)
+       jit_unget_reg(s3);
+}
+
+static void
+_qmuli(jit_state_t *_jit,
+       jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    qmulr(r0, r1, r2, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_qmulr_u(jit_state_t *_jit,
+        jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+    jit_int32_t                regno;
+    regno = jit_get_reg_pair();
+    movr(rn(regno) + 1, r2);
+    MULU_(rn(regno), r3);
+    movr(r0, rn(regno) + 1);
+    movr(r1, rn(regno));
+    jit_unget_reg_pair(regno);
+}
+
+static void
+_qmuli_u(jit_state_t *_jit,
+        jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_word_t i0)
+{
+    jit_int32_t                regno;
+    regno = jit_get_reg_pair();
+    movr(rn(regno) + 1, r2);
+    movi(rn(regno), i0);
+    MULU_(rn(regno), rn(regno));
+    movr(r0, rn(regno) + 1);
+    movr(r1, rn(regno));
+    jit_unget_reg_pair(regno);
+}
+
+static void
+_divr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                regno;
+    regno = xdivr(r1, r2);
+    movr(r0, rn(regno) + 1);
+}
+
+static void
+_divi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                regno;
+    regno = xdivi(r1, i0);
+    movr(r0, rn(regno) + 1);
+}
+
+static void
+_divr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                regno;
+    regno = xdivr_u(r1, r2);
+    movr(r0, rn(regno) + 1);
+}
+
+static void
+_divi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                regno;
+    regno = xdivi_u(r1, i0);
+    movr(r0, rn(regno) + 1);
+}
+
+static void
+_remr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                regno;
+    regno = xdivr(r1, r2);
+    movr(r0, rn(regno));
+}
+
+static void
+_remi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                regno;
+    regno = xdivi(r1, i0);
+    movr(r0, rn(regno));
+}
+
+static void
+_remr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                regno;
+    regno = xdivr_u(r1, r2);
+    movr(r0, rn(regno));
+}
+
+static void
+_remi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                regno;
+    regno = xdivi_u(r1, i0);
+    movr(r0, rn(regno));
+}
+
+static void
+_qdivr(jit_state_t *_jit,
+       jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+    jit_int32_t                regno;
+    regno = xdivr(r2, r3);
+    movr(r0, rn(regno) + 1);
+    movr(r1, rn(regno));
+}
+
+static void
+_qdivi(jit_state_t *_jit,
+       jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_word_t i0)
+{
+    jit_int32_t                regno;
+    regno = xdivi(r2, i0);
+    movr(r0, rn(regno) + 1);
+    movr(r1, rn(regno));
+}
+
+static void
+_qdivr_u(jit_state_t *_jit,
+        jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+    jit_int32_t                regno;
+    regno = xdivr_u(r2, r3);
+    movr(r0, rn(regno) + 1);
+    movr(r1, rn(regno));
+}
+
+static void
+_qdivi_u(jit_state_t *_jit,
+        jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_word_t i0)
+{
+    jit_int32_t                regno;
+    regno = xdivi_u(r2, i0);
+    movr(r0, rn(regno) + 1);
+    movr(r1, rn(regno));
+}
+
+#  if __WORDSIZE == 32
+static void
+_lshr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (r0 == r2) {
+       reg = jit_get_reg_but_zero(0);
+       movr(rn(reg), r2);
+       movr(r0, r1);
+       SLL(r0, 0, rn(reg));
+       jit_unget_reg_but_zero(reg);
+    }
+    else {
+       movr(r0, r1);
+       SLL(r0, 0, r2);
+    }
+}
+#endif
+
+static void
+_lshi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg_but_zero(0);
+    movi(rn(reg), i0);
+    lshr(r0, r1, rn(reg));
+    jit_unget_reg_but_zero(reg);
+}
+
+#  if __WORDSIZE == 32
+static void
+_rshr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (r0 == r2) {
+       reg = jit_get_reg_but_zero(0);
+       movr(rn(reg), r2);
+       movr(r0, r1);
+       SRA(r0, 0, rn(reg));
+       jit_unget_reg_but_zero(reg);
+    }
+    else {
+       movr(r0, r1);
+       SRA(r0, 0, r2);
+    }
+}
+#endif
+
+static void
+_rshi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg_but_zero(0);
+    movi(rn(reg), i0);
+    rshr(r0, r1, rn(reg));
+    jit_unget_reg_but_zero(reg);
+}
+
+#  if __WORDSIZE == 32
+static void
+_rshr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (r0 == r2) {
+       reg = jit_get_reg_but_zero(0);
+       movr(rn(reg), r2);
+       movr(r0, r1);
+       SRL(r0, 0, rn(reg));
+       jit_unget_reg_but_zero(reg);
+    }
+    else {
+       movr(r0, r1);
+       SRL(r0, 0, r2);
+    }
+}
+#endif
+
+static void
+_rshi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg_but_zero(0);
+    movi(rn(reg), i0);
+    rshr_u(r0, r1, rn(reg));
+    jit_unget_reg_but_zero(reg);
+}
+
+static void
+_comr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), -1);
+    movr(r0, r1);
+    XOR_(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_andr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (r0 == r2)
+       AND_(r0, r1);
+    else {
+       movr(r0, r1);
+       AND_(r0, r2);
+    }
+}
+
+static void
+_andi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    movr(r0, r1);
+    NILL(r0, x16(i0));
+    NILH(r0, x16((jit_uword_t)i0 >> 16));
+#if __WORDSIZE == 64
+    NIHL(r0, x16((jit_uword_t)i0 >> 32));
+    NIHH(r0, x16((jit_uword_t)i0 >> 48));
+#endif
+}
+
+static void
+_orr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (r0 == r2)
+       OR_(r0, r1);
+    else {
+       movr(r0, r1);
+       OR_(r0, r2);
+    }
+}
+
+static void
+_ori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    movr(r0, r1);
+    OILL(r0, x16(i0));
+    OILH(r0, x16((jit_uword_t)i0 >> 16));
+#if __WORDSIZE == 64
+    OIHL(r0, x16((jit_uword_t)i0 >> 32));
+    OIHH(r0, x16((jit_uword_t)i0 >> 48));
+#endif
+}
+
+static void
+_xorr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (r0 == r2)
+       XOR_(r0, r1);
+    else {
+       movr(r0, r1);
+       XOR_(r0, r2);
+    }
+}
+
+static void
+_xori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    xorr(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldi_c(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    movi(r0, i0);
+    ldr_c(r0, r0);
+}
+
+static void
+_ldxr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (r0 == r2) {
+       addr(r0, r0, r1);
+       ldr_c(r0, r0);
+    }
+    else {
+       movr(r0, r1);
+       addr(r0, r0, r2);
+       ldr_c(r0, r0);
+    }
+}
+
+static void
+_ldxi_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (s20_p(i0)) {
+#if __WORDSIZE == 32
+       LB(r0, x20(i0), 0, r1);
+#else
+       LGB(r0, x20(i0), 0, r1);
+#endif
+    }
+    else if (r0 != r1) {
+       movi(r0, i0);
+       addr(r0, r0, r1);
+       ldr_c(r0, r0);
+    }
+    else {
+       reg = jit_get_reg_but_zero(0);
+       movi(rn(reg), i0);
+       addr(rn(reg), rn(reg), r1);
+       ldr_c(r0, rn(reg));
+       jit_unget_reg_but_zero(reg);
+    }
+}
+
+static void
+_ldi_uc(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    movi(r0, i0);
+    ldr_uc(r0, r0);
+}
+
+static void
+_ldxr_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (r0 == r2) {
+       addr(r0, r0, r1);
+       ldr_uc(r0, r0);
+    }
+    else {
+       movr(r0, r1);
+       addr(r0, r0, r2);
+       ldr_uc(r0, r0);
+    }
+}
+
+static void
+_ldxi_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (s20_p(i0))
+       LLGC(r0, x20(i0), 0, r1);
+    else if (r0 != r1) {
+       movi(r0, i0);
+       addr(r0, r0, r1);
+       ldr_uc(r0, r0);
+    }
+    else {
+       reg = jit_get_reg_but_zero(0);
+       movi(rn(reg), i0);
+       addr(rn(reg), rn(reg), r1);
+       ldr_uc(r0, rn(reg));
+       jit_unget_reg_but_zero(reg);
+    }
+}
+
+static void
+_ldi_s(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    movi(r0, i0);
+    ldr_s(r0, r0);
+}
+
+static void
+_ldxr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (r0 == r2) {
+       addr(r0, r0, r1);
+       ldr_s(r0, r0);
+    }
+    else {
+       movr(r0, r1);
+       addr(r0, r0, r2);
+       ldr_s(r0, r0);
+    }
+}
+
+static void
+_ldxi_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+#if __WORDSIZE == 32
+    if (u12_p(i0))
+       LH(r0, i0, 0, r1);
+    else
+#endif
+    if (s20_p(i0)) {
+#if __WORDSIZE == 32
+       LHY(r0, x20(i0), 0, r1);
+#else
+       LGH(r0, x20(i0), 0, r1);
+#endif
+    }
+    else if (r0 != r1) {
+       movi(r0, i0);
+       addr(r0, r0, r1);
+       ldr_s(r0, r0);
+    }
+    else {
+       reg = jit_get_reg_but_zero(0);
+       movi(rn(reg), i0);
+       addr(rn(reg), rn(reg), r1);
+       ldr_s(r0, rn(reg));
+       jit_unget_reg_but_zero(reg);
+    }
+}
+
+static void
+_ldi_us(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    movi(r0, i0);
+    ldr_us(r0, r0);
+}
+
+static void
+_ldxr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (r0 == r2) {
+       addr(r0, r0, r1);
+       ldr_us(r0, r0);
+    }
+    else {
+       movr(r0, r1);
+       addr(r0, r0, r2);
+       ldr_us(r0, r0);
+    }
+}
+
+static void
+_ldxi_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (s20_p(i0))
+       LLGH(r0, x20(i0), 0, r1);
+    else if (r0 != r1) {
+       movi(r0, i0);
+       addr(r0, r0, r1);
+       ldr_us(r0, r0);
+    }
+    else {
+       reg = jit_get_reg_but_zero(0);
+       movi(rn(reg), i0);
+       addr(rn(reg), rn(reg), r1);
+       ldr_us(r0, rn(reg));
+       jit_unget_reg_but_zero(reg);
+    }
+}
+
+static void
+_ldi_i(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    movi(r0, i0);
+    ldr_i(r0, r0);
+}
+
+static void
+_ldxr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (r0 == r2) {
+       addr(r0, r0, r1);
+       ldr_i(r0, r0);
+    }
+    else {
+       movr(r0, r1);
+       addr(r0, r0, r2);
+       ldr_i(r0, r0);
+    }
+}
+
+static void
+_ldxi_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (s20_p(i0))
+       LGF(r0, x20(i0), 0, r1);
+    else if (r0 != r1) {
+       movi(r0, i0);
+       addr(r0, r0, r1);
+       ldr_i(r0, r0);
+    }
+    else {
+       reg = jit_get_reg_but_zero(0);
+       movi(rn(reg), i0);
+       addr(rn(reg), rn(reg), r1);
+       ldr_i(r0, rn(reg));
+       jit_unget_reg_but_zero(reg);
+    }
+}
+
+#if __WORDSIZE == 64
+static void
+_ldi_ui(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    movi(r0, i0);
+    ldr_ui(r0, r0);
+}
+
+static void
+_ldxr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (r0 == r2) {
+       addr(r0, r0, r1);
+       ldr_ui(r0, r0);
+    }
+    else {
+       movr(r0, r1);
+       addr(r0, r0, r2);
+       ldr_ui(r0, r0);
+    }
+}
+
+static void
+_ldxi_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (s20_p(i0))
+       LLGF(r0, x20(i0), 0, r1);
+    else if (r0 != r1) {
+       movi(r0, i0);
+       addr(r0, r0, r1);
+       ldr_ui(r0, r0);
+    }
+    else {
+       reg = jit_get_reg_but_zero(0);
+       movi(rn(reg), i0);
+       addr(rn(reg), rn(reg), r1);
+       ldr_ui(r0, rn(reg));
+       jit_unget_reg_but_zero(reg);
+    }
+}
+
+static void
+_ldi_l(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    movi(r0, i0);
+    ldr_l(r0, r0);
+}
+
+static void
+_ldxr_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (r0 == r2) {
+       addr(r0, r0, r1);
+       ldr_l(r0, r0);
+    }
+    else {
+       movr(r0, r1);
+       addr(r0, r0, r2);
+       ldr_l(r0, r0);
+    }
+}
+
+static void
+_ldxi_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (s20_p(i0))
+       LG(r0, x20(i0), 0, r1);
+    else if (r0 != r1) {
+       movi(r0, i0);
+       addr(r0, r0, r1);
+       ldr_l(r0, r0);
+    }
+    else {
+       reg = jit_get_reg_but_zero(0);
+       movi(rn(reg), i0);
+       addr(rn(reg), rn(reg), r1);
+       ldr_l(r0, rn(reg));
+       jit_unget_reg_but_zero(reg);
+    }
+}
+#endif
+
+static void
+_sti_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg_but_zero(0);
+    movi(rn(reg), i0);
+    str_c(rn(reg), r0);
+    jit_unget_reg_but_zero(reg);
+}
+
+static void
+_stxr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg_but_zero(0);
+    movr(rn(reg), r0);
+    addr(rn(reg), rn(reg), r1);
+    str_c(rn(reg), r2);
+    jit_unget_reg_but_zero(reg);
+}
+
+static void
+_stxi_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (u12_p(i0))
+       STC(r1, i0, 0, r0);
+    else if (s20_p(i0))
+       STCY(r1, x20(i0), 0, r0);
+    else {
+       reg = jit_get_reg_but_zero(0);
+       addi(rn(reg), r0, i0);
+       str_c(rn(reg), r1);
+       jit_unget_reg_but_zero(reg);
+    }
+}
+
+static void
+_sti_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg_but_zero(0);
+    movi(rn(reg), i0);
+    str_s(rn(reg), r0);
+    jit_unget_reg_but_zero(reg);
+}
+
+static void
+_stxr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg_but_zero(0);
+    movr(rn(reg), r0);
+    addr(rn(reg), rn(reg), r1);
+    str_s(rn(reg), r2);
+    jit_unget_reg_but_zero(reg);
+}
+
+static void
+_stxi_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (u12_p(i0))
+       STH(r1, i0, 0, r0);
+    else if (s20_p(i0))
+       STHY(r1, x20(i0), 0, r0);
+    else {
+       reg = jit_get_reg_but_zero(0);
+       addi(rn(reg), r0, i0);
+       str_s(rn(reg), r1);
+       jit_unget_reg_but_zero(reg);
+    }
+}
+
+static void
+_sti_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg_but_zero(0);
+    movi(rn(reg), i0);
+    str_i(rn(reg), r0);
+    jit_unget_reg_but_zero(reg);
+}
+
+static void
+_stxr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg_but_zero(0);
+    movr(rn(reg), r0);
+    addr(rn(reg), rn(reg), r1);
+    str_i(rn(reg), r2);
+    jit_unget_reg_but_zero(reg);
+}
+
+static void
+_stxi_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (u12_p(i0))
+       ST(r1, i0, 0, r0);
+    else if (s20_p(i0))
+       STY(r1, x20(i0), 0, r0);
+    else {
+       reg = jit_get_reg_but_zero(0);
+       addi(rn(reg), r0, i0);
+       str_i(rn(reg), r1);
+       jit_unget_reg_but_zero(reg);
+    }
+}
+
+#if __WORDSIZE == 64
+static void
+_sti_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg_but_zero(0);
+    movi(rn(reg), i0);
+    str_l(rn(reg), r0);
+    jit_unget_reg_but_zero(reg);
+}
+
+static void
+_stxr_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg_but_zero(0);
+    movr(rn(reg), r0);
+    addr(rn(reg), rn(reg), r1);
+    str_l(rn(reg), r2);
+    jit_unget_reg_but_zero(reg);
+}
+
+static void
+_stxi_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (s20_p(i0))
+       STG(r1, x20(i0), 0, r0);
+    else {
+       reg = jit_get_reg_but_zero(0);
+       addi(rn(reg), r0, i0);
+       str_l(rn(reg), r1);
+       jit_unget_reg_but_zero(reg);
+    }
+}
+#endif
+
+static void
+_jmpi(jit_state_t *_jit, jit_word_t i0)
+{
+    jit_word_t         d;
+    jit_int32_t                reg;
+    d = (i0 - _jit->pc.w) >> 1;
+    if (s16_p(d))
+       J(x16(d));
+    else if (s32_p(d))
+       BRL(d);
+    else {
+       reg = jit_get_reg_but_zero(jit_class_nospill);
+       movi(rn(reg), i0);
+       jmpr(rn(reg));
+       jit_unget_reg_but_zero(reg);
+    }
+}
+
+static jit_word_t
+_jmpi_p(jit_state_t *_jit, jit_word_t i0)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg_but_zero(jit_class_nospill);
+    w = movi_p(rn(reg), i0);
+    jmpr(rn(reg));
+    jit_unget_reg_but_zero(reg);
+    return (w);
+}
+
+static void
+_calli(jit_state_t *_jit, jit_word_t i0)
+{
+    jit_word_t         d;
+    jit_int32_t                reg;
+    d = (i0 - _jit->pc.w) >> 1;
+    if (s32_p(d))
+       BRASL(_R14_REGNO, d);
+    else {
+       reg = jit_get_reg_but_zero(0);
+       movi(rn(reg), i0);
+       callr(rn(reg));
+       jit_unget_reg_but_zero(reg);
+    }
+}
+
+static jit_word_t
+_calli_p(jit_state_t *_jit, jit_word_t i0)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg_but_zero(0);
+    w = movi_p(rn(reg), i0);
+    callr(rn(reg));
+    jit_unget_reg_but_zero(reg);
+    return (w);
+}
+
+static jit_int32_t     gprs[] = {
+    _R2, _R3, _R4, _R5,
+    _R6, _R7, _R8, _R9, _R10, _R11, _R12, _R13
+};
+
+static void
+_prolog(jit_state_t *_jit, jit_node_t *i0)
+{
+    jit_int32_t                regno, offset;
+    if (_jitc->function->define_frame || _jitc->function->assume_frame) {
+       jit_int32_t     frame = -_jitc->function->frame;
+       assert(_jitc->function->self.aoff >= frame);
+       if (_jitc->function->assume_frame)
+           return;
+       _jitc->function->self.aoff = frame;
+    }
+    if (_jitc->function->allocar)
+       _jitc->function->self.aoff &= -8;
+    _jitc->function->stack = ((_jitc->function->self.alen -
+                             /* align stack at 8 bytes */
+                             _jitc->function->self.aoff) + 7) & -8;
+    /* *IFF* a non variadic function,
+     * Lightning does not reserve stack space for spilling arguments
+     * in registers.
+     * S390x, as per gcc, has 8 stack slots for spilling arguments,
+     * (%r6 is callee save) and uses an alloca like approach to save
+     * callee save fpr registers.
+     * Since argument registers are not saved in any lightning port,
+     * use the 8 slots to spill any modified fpr register, and still
+     * use the same stack frame logic as gcc.
+     * Save at least %r13 to %r15, as %r13 is used as frame pointer.
+     * *IFF* a variadic function, a "standard" stack frame, with
+     * fpr registers saved in an alloca'ed area, is used.
+     */
+    if ((_jitc->function->self.call & jit_call_varargs) &&
+       jit_arg_reg_p(_jitc->function->vagp))
+       regno = _jitc->function->vagp;
+    else {
+       for (regno = 4; regno < jit_size(gprs) - 1; regno++) {
+           if (jit_regset_tstbit(&_jitc->function->regset, gprs[regno]))
+               break;
+       }
+    }
+#if __WORDSIZE == 32
+#  define FP_OFFSET            64
+    if (_jitc->function->self.call & jit_call_varargs)
+       offset = regno * 4 + 8;
+    else
+       offset = (regno - 4) * 4 + 32;
+    STM(rn(gprs[regno]), _R15_REGNO, x20(offset), _R15_REGNO);
+#else
+#  define FP_OFFSET            128
+    if (_jitc->function->self.call & jit_call_varargs)
+       offset = regno * 8 + 16;
+    else
+       offset = (regno - 4) * 8 + 48;
+    STMG(rn(gprs[regno]), _R15_REGNO, x20(offset), _R15_REGNO);
+#endif
+
+#define SPILL(R, O)                                                    \
+    do {                                                               \
+       if (jit_regset_tstbit(&_jitc->function->regset, R))             \
+           stxi_d(O, _R15_REGNO, rn(R));                               \
+    } while (0)
+    if (_jitc->function->self.call & jit_call_varargs) {
+       for (regno = _jitc->function->vafp; jit_arg_f_reg_p(regno); ++regno)
+           stxi_d(FP_OFFSET + regno * 8, _R15_REGNO, rn(_F0 - regno));
+       SPILL(_F8, _jitc->function->vaoff + offsetof(jit_va_list_t, f8));
+       SPILL(_F9, _jitc->function->vaoff + offsetof(jit_va_list_t, f9));
+       SPILL(_F10, _jitc->function->vaoff + offsetof(jit_va_list_t, f10));
+       SPILL(_F11, _jitc->function->vaoff + offsetof(jit_va_list_t, f11));
+       SPILL(_F12, _jitc->function->vaoff + offsetof(jit_va_list_t, f12));
+       SPILL(_F13, _jitc->function->vaoff + offsetof(jit_va_list_t, f13));
+       SPILL(_F14, _jitc->function->vaoff + offsetof(jit_va_list_t, f14));
+    }
+    else {
+       /* First 4 in low address */
+#if __WORDSIZE == 32
+       SPILL(_F10, 0);
+       SPILL(_F11, 8);
+       SPILL(_F12, 16);
+       SPILL(_F13, 24);
+       /* gpr registers here */
+       SPILL(_F14, 72);
+       SPILL(_F8, 80);
+       SPILL(_F9, 88);
+#else
+       SPILL(_F10, 16);
+       SPILL(_F11, 24);
+       SPILL(_F12, 32);
+       SPILL(_F13, 48);
+       /* Last 3 in high address */
+       SPILL(_F14, 136);
+       SPILL(_F8, 144);
+       SPILL(_F9, 152);
+#endif
+    }
+#undef SPILL
+    movr(_R13_REGNO, _R15_REGNO);
+    subi(_R15_REGNO, _R15_REGNO, stack_framesize + _jitc->function->stack);
+    if (_jitc->function->allocar) {
+       regno = jit_get_reg(jit_class_gpr);
+       movi(rn(regno), _jitc->function->self.aoff);
+       stxi_i(_jitc->function->aoffoff, _R13_REGNO, rn(regno));
+       jit_unget_reg(regno);
+    }
+}
+
+static void
+_epilog(jit_state_t *_jit, jit_node_t *i0)
+{
+    jit_int32_t                regno, offset;
+    if (_jitc->function->assume_frame)
+       return;
+    if ((_jitc->function->self.call & jit_call_varargs) &&
+       jit_arg_reg_p(_jitc->function->vagp))
+       regno = _jitc->function->vagp;
+    else {
+       for (regno = 4; regno < jit_size(gprs) - 1; regno++) {
+           if (jit_regset_tstbit(&_jitc->function->regset, gprs[regno]))
+               break;
+       }
+    }
+#if __WORDSIZE == 32
+    if (_jitc->function->self.call & jit_call_varargs)
+       offset = regno * 4 + 8;
+    else
+       offset = (regno - 4) * 4 + 32;
+#else
+    if (_jitc->function->self.call & jit_call_varargs)
+       offset = regno * 8 + 16;
+    else
+       offset = (regno - 4) * 8 + 48;
+#endif
+    movr(_R15_REGNO, _R13_REGNO);
+
+#define LOAD(R, O)                                                     \
+    do {                                                               \
+       if (jit_regset_tstbit(&_jitc->function->regset, R))             \
+           ldxi_d(rn(R), _R15_REGNO, O);                               \
+    } while (0)
+    if (_jitc->function->self.call & jit_call_varargs) {
+       LOAD(_F8, _jitc->function->vaoff + offsetof(jit_va_list_t, f8));
+       LOAD(_F9, _jitc->function->vaoff + offsetof(jit_va_list_t, f9));
+       LOAD(_F10, _jitc->function->vaoff + offsetof(jit_va_list_t, f10));
+       LOAD(_F11, _jitc->function->vaoff + offsetof(jit_va_list_t, f11));
+       LOAD(_F12, _jitc->function->vaoff + offsetof(jit_va_list_t, f12));
+       LOAD(_F13, _jitc->function->vaoff + offsetof(jit_va_list_t, f13));
+       LOAD(_F14, _jitc->function->vaoff + offsetof(jit_va_list_t, f14));
+    }
+    else {
+#if __WORDSIZE == 32
+       LOAD(_F10, 0);
+       LOAD(_F11, 8);
+       LOAD(_F12, 16);
+       LOAD(_F13, 24);
+       LOAD(_F14, 72);
+       LOAD(_F8, 80);
+       LOAD(_F9, 88);
+#else
+       LOAD(_F10, 16);
+       LOAD(_F11, 24);
+       LOAD(_F12, 32);
+       LOAD(_F13, 48);
+       LOAD(_F14, 136);
+       LOAD(_F8, 144);
+       LOAD(_F9, 152);
+#endif
+    }
+#undef LOAD
+#if __WORDSIZE == 32
+    LM(rn(gprs[regno]), _R15_REGNO, x20(offset), _R15_REGNO);
+#else
+    LMG(rn(gprs[regno]), _R15_REGNO, x20(offset), _R15_REGNO);
+#endif
+    BR(_R14_REGNO);
+}
+
+static void
+_vastart(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+
+    assert(_jitc->function->self.call & jit_call_varargs);
+
+    /* Return jit_va_list_t in the register argument */
+    addi(r0, _R13_REGNO, _jitc->function->vaoff);
+    reg = jit_get_reg(jit_class_gpr);
+
+    /* Initialize gp offset in the save area. */
+    movi(rn(reg), _jitc->function->vagp);
+    stxi(offsetof(jit_va_list_t, gpoff), r0, rn(reg));
+
+    /* Initialize fp offset in the save area. */
+    movi(rn(reg), _jitc->function->vafp);
+    stxi(offsetof(jit_va_list_t, fpoff), r0, rn(reg));
+
+    /* Initialize overflow pointer to the first stack argument. */
+    addi(rn(reg), _R13_REGNO, _jitc->function->self.size);
+    stxi(offsetof(jit_va_list_t, over), r0, rn(reg));
+
+    /* Initialize register save area pointer. */
+    stxi(offsetof(jit_va_list_t, save), r0, _R13_REGNO);
+
+    jit_unget_reg(reg);
+}
+
+static void
+_vaarg(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                rg0;
+    jit_int32_t                rg1;
+    jit_int32_t                rg2;
+    jit_word_t         ge_code;
+    jit_word_t         lt_code;
+
+    assert(_jitc->function->self.call & jit_call_varargs);
+
+    rg0 = jit_get_reg_but_zero(0);
+    rg1 = jit_get_reg_but_zero(0);
+
+    /* Load the gp offset in save area in the first temporary. */
+    ldxi(rn(rg0), r1, offsetof(jit_va_list_t, gpoff));
+
+    /* Jump over if there are no remaining arguments in the save area. */
+    ge_code = bgei_p(_jit->pc.w, rn(rg0), 5);
+
+    /* Load the save area pointer in the second temporary. */
+    ldxi(rn(rg1), r1, offsetof(jit_va_list_t, save));
+
+    /* Scale offset */
+    rg2 = jit_get_reg_but_zero(0);
+    lshi(rn(rg2), rn(rg0),
+#if __WORDSIZE == 32
+        2
+#else
+        3
+#endif
+        );
+    /* Add offset to saved area. */
+    addi(rn(rg2), rn(rg2), 2 * sizeof(jit_word_t));
+
+    /* Load the vararg argument in the first argument. */
+    ldxr(r0, rn(rg1), rn(rg2));
+    jit_unget_reg_but_zero(rg2);
+
+    /* Update the gp offset. */
+    addi(rn(rg0), rn(rg0), 1);
+    stxi(offsetof(jit_va_list_t, gpoff), r1, rn(rg0));
+
+    /* Will only need one temporary register below. */
+    jit_unget_reg_but_zero(rg1);
+
+    /* Jump over overflow code. */
+    lt_code = jmpi_p(_jit->pc.w);
+
+    /* Where to land if argument is in overflow area. */
+    patch_at(ge_code, _jit->pc.w);
+
+    /* Load overflow pointer. */
+    ldxi(rn(rg0), r1, offsetof(jit_va_list_t, over));
+
+    /* Load argument. */
+    ldr(r0, rn(rg0));
+
+    /* Update overflow pointer. */
+    addi(rn(rg0), rn(rg0), sizeof(jit_word_t));
+    stxi(offsetof(jit_va_list_t, over), r1, rn(rg0));
+
+    /* Where to land if argument is in save area. */
+    patch_at(lt_code, _jit->pc.w);
+
+    jit_unget_reg_but_zero(rg0);
+}
+
+static void
+_patch_at(jit_state_t *_jit, jit_word_t instr, jit_word_t label)
+{
+    jit_word_t                  d;
+    union {
+       jit_uint16_t            *s;
+       jit_word_t               w;
+    } u;
+    u.w = instr;
+    union {
+       struct {
+           jit_uint16_t        op :  8;
+           jit_uint16_t        r1 :  4;
+           jit_uint16_t        r3 :  4;
+       } b;
+       jit_uint16_t            s;
+    } i0;
+    union {
+       struct {
+           jit_uint16_t        i2;
+       } b;
+       jit_uint16_t            s;
+    } i1;
+    union {
+       struct {
+           jit_uint32_t        ih : 16;
+           jit_uint32_t        il : 16;
+       } b;
+       jit_uint32_t            i;
+    } i12;
+    i0.s = u.s[0];
+    /* movi_p */
+    if (i0.b.op ==
+#if __WORDSIZE == 32
+       0xA7 && i0.b.r3 == 8
+#else
+       0xA5
+#endif
+       ) {
+#if __WORDSIZE == 64
+       assert(i0.b.r3 == 3);
+#endif
+       i1.b.i2 = (jit_uword_t)label;
+       u.s[1] = i1.s;
+       i0.s = u.s[2];
+       assert(i0.b.op == 0xA5 && i0.b.r3 == 2);
+       i1.b.i2 = (jit_uword_t)label >> 16;
+       u.s[3] = i1.s;
+#if __WORDSIZE == 64
+       i0.s = u.s[4];
+       assert(i0.b.op == 0xA5 && i0.b.r3 == 1);
+       i1.b.i2 = (jit_uword_t)label >> 32;
+       u.s[5] = i1.s;
+       i0.s = u.s[6];
+       assert(i0.b.op == 0xA5 && i0.b.r3 == 0);
+       i1.b.i2 = (jit_uword_t)label >> 48;
+       u.s[7] = i1.s;
+#endif
+    }
+    /* BRC */
+    else if (i0.b.op == 0xA7) {
+       assert(i0.b.r3 == 0x4);
+       d = (label - instr) >> 1;
+       assert(s16_p(d));
+       i1.b.i2 = d;
+       u.s[1] = i1.s;
+    }
+    /* BRCL */
+    else if (i0.b.op == 0xC0) {
+       assert(i0.b.r3 == 0x4);
+       d = (label - instr) >> 1;
+       assert(s32_p(d));
+       i12.i = d;
+       u.s[1] = i12.b.ih;
+       u.s[2] = i12.b.il;
+    }
+    else
+       abort();
+}
+#endif
diff --git a/deps/lightning/lib/jit_s390-fpu.c b/deps/lightning/lib/jit_s390-fpu.c
new file mode 100644 (file)
index 0000000..6d60513
--- /dev/null
@@ -0,0 +1,1316 @@
+/*
+ * Copyright (C) 2013-2019  Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+#if PROTO
+#  define RND_CUR                      0
+#  define RND_BIAS_NEAR                        1
+#  define RND_NEAR                     4
+#  define RND_ZERO                     5
+#  define RND_POS_INF                  6
+#  define RND_NEG_INF                  7
+/****************************************************************
+ * Floating Point Instructions                                 *
+ ****************************************************************/
+/* CONVERT BFP TO HFP */
+#  define THDER(R1,R2)                 RRE_(0xB358,R1,R2)
+#  define THDR(R1,R2)                  RRE_(0xB359,R1,R2)
+/* CONVERT HFP TO BFP */
+#  define TBEDR(R1,R2)                 RRE_(0xB350,R1,R2)
+#  define TBDR(R1,R2)                  RRE_(0xB351,R1,R2)
+/* LOAD */
+#  define LER(R1,R2)                   RR_(0x38,R1,R2)
+#  define LDR(R1,R2)                   RR_(0x28,R1,R2)
+#  define LXR(R1,R2)                   RRE_(0xB365,R1,R2)
+#  define LE(R1,D2,X2,B2)              RX_(0x78,R1,X2,B2,D2)
+#  define LD(R1,D2,X2,B2)              RX_(0x68,R1,X2,B2,D2)
+#  define LEY(R1,D2,X2,B2)             RXY_(0xED,R1,X2,B2,D2,0x64)
+#  define LDY(R1,D2,X2,B2)             RXY_(0xED,R1,X2,B2,D2,0x65)
+/* LOAD ZERO */
+#  define LZER(R1)                     RRE_(0xB374,R1,0)
+#  define LZDR(R1)                     RRE_(0xB375,R1,0)
+#  define LZXR(R1)                     RRE_(0xB376,R1,0)
+/* STORE */
+#  define STE(R1,D2,X2,B2)             RX_(0x70,R1,X2,B2,D2)
+#  define STD(R1,D2,X2,B2)             RX_(0x60,R1,X2,B2,D2)
+#  define STEY(R1,D2,X2,B2)            RXY_(0xED,R1,X2,B2,D2,0x66)
+#  define STDY(R1,D2,X2,B2)            RXY_(0xED,R1,X2,B2,D2,0x67)
+/****************************************************************
+ * Hexadecimal Floating Point Instructions                     *
+ ****************************************************************/
+/* ADD NORMALIZED */
+#  define AER(R1,R2)                   RR_(0x3A,R1,R2)
+#  define ADR(R1,R2)                   RR_(0x2A,R1,R2)
+#  define AXR(R1,R2)                   RR_(0x36,R1,R2)
+#  define AE(R1,D2,X2,B2)              RX_(0x7A,R1,X2,B2,D2)
+#  define AD(R1,D2,X2,B2)              RX_(0x6A,R1,X2,B2,D2)
+/* ADD UNNORMALIZED */
+#  define AUR(R1,R2)                   RR_(0x3E,R1,R2)
+#  define AWR(R1,R2)                   RR_(0x2E,R1,R2)
+#  define AU(R1,D2,X2,B2)              RX_(0x7E,R1,X2,B2,D2)
+#  define AW(R1,D2,X2,B2)              RX_(0x6E,R1,X2,B2,D2)
+/* COMPARE */
+#  define CER(R1,R2)                   RR_(0x39,R1,R2)
+#  define CDR(R1,R2)                   RR_(0x29,R1,R2)
+#  define CXR(R1,R2)                   RRE_(0xB369,R1,R2)
+#  define CE(R1,D2,X2,B2)              RX_(0x79,R1,X2,B2,D2)
+#  define CD(R1,D2,X2,B2)              RX_(0x69,R1,X2,B2,D2)
+/* CONVERT FROM FIXED */
+#  define CEFR(R1,R2)                  RRE_(0xB3B4,R1,R2)
+#  define CDFR(R1,R2)                  RRE_(0xB3B5,R1,R2)
+#  define CXFR(R1,R2)                  RRE_(0xB3B6,R1,R2)
+#  define CEGR(R1,R2)                  RRE_(0xB3C4,R1,R2)
+#  define CDGR(R1,R2)                  RRE_(0xB3C5,R1,R2)
+#  define CXGR(R1,R2)                  RRE_(0xB3C6,R1,R2)
+/* CONVERT TO FIXED */
+#  define CFER(R1,R2)                  RRE_(0xB3B8,R1,R2)
+#  define CFDR(R1,R2)                  RRE_(0xB3B9,R1,R2)
+#  define CFXR(R1,R2)                  RRE_(0xB3BA,R1,R2)
+#  define CGER(R1,R2)                  RRE_(0xB3C8,R1,R2)
+#  define CGDR(R1,R2)                  RRE_(0xB3C9,R1,R2)
+#  define CGXR(R1,R2)                  RRE_(0xB3CA,R1,R2)
+/* DIVIDE */
+#  define DER(R1,R2)                   RR_(0x3D,R1,R2)
+#  define DDR(R1,R2)                   RR_(0x2D,R1,R2)
+#  define DXR(R1,R2)                   RRE_(0xB22D,R1,R2)
+#  define DE(R1,D2,X2,B2)              RX_(0x7D,R1,X2,B2,D2)
+#  define DD(R1,D2,X2,B2)              RX_(0x6D,R1,X2,B2,D2)
+/* HALVE */
+#  define HER(R1,R2)                   RR_(0x34,R1,R2)
+#  define HDR(R1,R2)                   RR_(0x24,R1,R2)
+/* LOAD AND TEST */
+#  define LTER(R1,R2)                  RR_(0x32,R1,R2)
+#  define LTDR(R1,R2)                  RR_(0x22,R1,R2)
+#  define LTXR(R1,R2)                  RRE_(0xB362,R1,R2)
+/* LOAD COMPLEMENT */
+#  define LCER(R1,R2)                  RR_(0x33,R1,R2)
+#  define LCDR(R1,R2)                  RR_(0x23,R1,R2)
+#  define LCXR(R1,R2)                  RRE_(0xB363,R1,R2)
+/* LOAD FP INTEGER */
+#  define FIER(R1,R2)                  RRE_(0xB377,R1,R2)
+#  define FIDR(R1,R2)                  RRE_(0xB37F,R1,R2)
+#  define FIXR(R1,R2)                  RRE_(0xB367,R1,R2)
+/* LOAD LENGHTENED */
+#  define LDER(R1,R2)                  RRE_(0xB324,R1,R2)
+#  define LXDR(R1,R2)                  RRE_(0xB325,R1,R2)
+#  define LXER(R1,R2)                  RRE_(0xB326,R1,R2)
+#  define LDE(R1,D2,X2,B2)             RXE_(0xED,R1,X2,B2,D2,0x24)
+#  define LXD(R1,D2,X2,B2)             RXE_(0xED,R1,X2,B2,D2,0x25)
+#  define LXE(R1,D2,X2,B2)             RXE_(0xED,R1,X2,B2,D2,0x26)
+/* LOAD NEGATIVE */
+#  define LNER(R1,R2)                  RR_(0x31,R1,R2)
+#  define LNDR(R1,R2)                  RR_(0x21,R1,R2)
+#  define LNXR(R1,R2)                  RRE_(0xB361,R1,R2)
+/* LOAD POSITIVE */
+#  define LPER(R1,R2)                  RR_(0x30,R1,R2)
+#  define LPDR(R1,R2)                  RR_(0x20,R1,R2)
+#  define LPXR(R1,R2)                  RRE_(0xB360,R1,R2)
+/* LOAD ROUNDED */
+#  define LEDR(R1,R2)                  RR_(0x35,R1,R2)
+#  define LDXR(R1,R2)                  RR_(0x25,R1,R2)
+#  define LRER(R1,R2)                  LEDR(R1,R2)
+#  define LRDR(R1,R2)                  LDXR(R1,R2)
+#  define LRXR(R1,R2)                  RRE_(0xB366,R1,R2)
+/* MULTIPLY */
+#  define MEER(R1,R2)                  RRE_(0xB337,R1,R2)
+#  define MDR(R1,R2)                   RR_(0x2C,R1,R2)
+#  define MXR(R1,R2)                   RR_(0x26,R1,R2)
+#  define MDER(R1,R2)                  RR_(0x3C,R1,R2)
+#  define MXDR(R1,R2)                  RR_(0x27,R1,R2)
+#  define MER(R1,R2)                   MDER(R1,R2)
+#  define MEE(R1,D2,X2,B2)             RXE_(0xED,R1,X2,B2,D2,0x37)
+#  define MD(R1,D2,X2,B2)              RX_(0x6C,R1,X2,B2,D2)
+#  define MDE(R1,D2,X2,B2)             RX_(0x7C,R1,X2,B2,D2)
+#  define MXD(R1,D2,X2,B2)             RX_(0x67,R1,X2,B2,D2)
+#  define ME(R1,D2,X2,B2)              MDE(R1,D2,X2,B2)
+/* MULTIPLY AND ADD */
+#  define MAER(R1,R3,R2)               RRF_(0xB32E,R1,0,R3,R2)
+#  define MADR(R1,R3,R2)               RRF_(0xB33E,R1,0,R3,R2)
+#  define MAE(R1,R3,D2,X2,B2)          RXF_(0xED,R3,X2,B2,D2,R1,0x2E)
+#  define MAD(R1,R3,D2,X2,B2)          RXF_(0xED,R3,X2,B2,D2,R1,0x3E)
+/* MULTIPLY AND SUBTRACT */
+#  define MSER(R1,R3,R2)               RRF_(0xB32F,R1,0,R3,R2)
+#  define MSDR(R1,R3,R2)               RRF_(0xB33F,R1,0,R3,R2)
+#  define MSE(R1,R3,D2,X2,B2)          RXF_(0xED,R3,X2,B2,D2,R1,0x2F)
+#  define MSD(R1,R3,D2,X2,B2)          RXF_(0xED,R3,X2,B2,D2,R1,0x3F)
+/* SQUARE ROOT */
+#  define SQER(R1,R2)                  RRE_(0xB245,R1,R2)
+#  define SQDR(R1,R2)                  RRE_(0xB244,R1,R2)
+#  define SQXR(R1,R2)                  RRE_(0xB336,R1,R2)
+#  define SQE(R1,D2,X2,B2)             RXE_(0xED,R1,X2,B2,D2,0x34)
+#  define SQD(R1,D2,X2,B2)             RXE_(0xED,R1,X2,B2,D2,0x35)
+/* SUBTRACT NORMALIZED */
+#  define SER(R1,R2)                   RR_(0x3B,R1,R2)
+#  define SDR(R1,R2)                   RR_(0x2B,R1,R2)
+#  define SXR(R1,R2)                   RR_(0x37,R1,R2)
+#  define SE(R1,D2,X2,B2)              RX_(0x7B,R1,X2,B2,D2)
+#  define SD(R1,D2,X2,B2)              RX_(0x6B,R1,X2,B2,D2)
+/* SUBTRACT UNNORMALIZED */
+#  define SUR(R1,R2)                   RR_(0x3F,R1,R2)
+#  define SWR(R1,R2)                   RR_(0x2F,R1,R2)
+#  define SU(R1,D2,X2,B2)              RX_(0x7F,R1,X2,B2,D2)
+#  define SW(R1,D2,X2,B2)              RX_(0x6F,R1,X2,B2,D2)
+/****************************************************************
+ * Binary Floating Point Instructions                          *
+ ****************************************************************/
+/* ADD */
+#  define AEBR(R1,R2)                  RRE_(0xB30A,R1,R2)
+#  define ADBR(R1,R2)                  RRE_(0xB31A,R1,R2)
+#  define AXBR(R1,R2)                  RRE_(0xB34A,R1,R2)
+#  define AEB(R1,D2,X2,B2)             RXE_(0xED,R1,X2,B2,D2,0x0A)
+#  define ADB(R1,D2,X2,B2)             RXE_(0xED,R1,X2,B2,D2,0x1A)
+/* COMPARE */
+#  define CEBR(R1,R2)                  RRE_(0xB309,R1,R2)
+#  define CDBR(R1,R2)                  RRE_(0xB319,R1,R2)
+#  define CXBR(R1,R2)                  RRE_(0xB349,R1,R2)
+#  define CEB(R1,D2,X2,B2)             RXE_(0xED,R1,X2,B2,D2,0x09)
+#  define CDB(R1,D2,X2,B2)             RXE_(0xED,R1,X2,B2,D2,0x19)
+/* COMPARE AND SIGNAL */
+#  define KEBR(R1,R2)                  RRE_(0xB308,R1,R2)
+#  define KDBR(R1,R2)                  RRE_(0xB318,R1,R2)
+#  define KXBR(R1,R2)                  RRE_(0xB348,R1,R2)
+#  define KEB(R1,D2,X2,B2)             RXE_(0xED,R1,X2,B2,D2,0x08)
+#  define KDB(R1,D2,X2,B2)             RXE_(0xED,R1,X2,B2,D2,0x18)
+/* CONVERT FROM FIXED */
+#  define CEFBR(R1,R2)                 RRE_(0xB394,R1,R2)
+#  define CDFBR(R1,R2)                 RRE_(0xB395,R1,R2)
+#  define CXFBR(R1,R2)                 RRE_(0xB396,R1,R2)
+#  define CEGBR(R1,R2)                 RRE_(0xB3A4,R1,R2)
+#  define CDGBR(R1,R2)                 RRE_(0xB3A5,R1,R2)
+#  define CXGBR(R1,R2)                 RRE_(0xB3A6,R1,R2)
+/* CONVERT TO FIXED */
+#  define CFEBR(R1,M3,R2)              RRF_(0xB398,M3,0,R1,R2)
+#  define CFDBR(R1,M3,R2)              RRF_(0xB399,M3,0,R1,R2)
+#  define CFXBR(R1,M3,R2)              RRF_(0xB39A,M3,0,R1,R2)
+#  define CGEBR(R1,M3,R2)              RRF_(0xB3A8,M3,0,R1,R2)
+#  define CGDBR(R1,M3,R2)              RRF_(0xB3A9,M3,0,R1,R2)
+#  define CGXBR(R1,M3,R2)              RRF_(0xB3AA,M3,0,R1,R2)
+/* DIVIDE */
+#  define DEBR(R1,R2)                  RRE_(0xB30D,R1,R2)
+#  define DDBR(R1,R2)                  RRE_(0xB31D,R1,R2)
+#  define DXBR(R1,R2)                  RRE_(0xB34D,R1,R2)
+#  define DEB(R1,D2,X2,B2)             RXE_(0xED,R1,X2,B2,D2,0x0D)
+#  define DDB(R1,D2,X2,B2)             RXE_(0xED,R1,X2,B2,D2,0x1D)
+/* DIVIDE TO INTEGER */
+#  define DIEBR(R1,R3,R2,M4)           RRF_(0xB353,R3,M4,R1,R2)
+#  define DIDBR(R1,R3,R2,M4)           RRF_(0xB35B,R3,M4,R1,R2)
+/* EXTRACT FPC */
+#  define EFPC(R1)                     RRE_(0xB38C,R1,0)
+/* LOAD AND TEST */
+#  define LTEBR(R1,R2)                 RRE_(0xB302,R1,R2)
+#  define LTDBR(R1,R2)                 RRE_(0xB312,R1,R2)
+#  define LTXBR(R1,R2)                 RRE_(0xB342,R1,R2)
+/* LOAD COMPLEMENT */
+#  define LCEBR(R1,R2)                 RRE_(0xB303,R1,R2)
+#  define LCDBR(R1,R2)                 RRE_(0xB313,R1,R2)
+#  define LCXBR(R1,R2)                 RRE_(0xB343,R1,R2)
+/* LOAD FP INTEGER */
+#  define FIEBR(R1,M3,R2)              RRF_(0xB357,M3,0,R1,R2)
+#  define FIDBR(R1,M3,R2)              RRF_(0xB35F,M3,0,R1,R2)
+#  define FIXBR(R1,M3,R2)              RRF_(0xB347,M3,0,R1,R2)
+/* LOAD FPC */
+#  define LFPC(D2,B2)                  S_(0xB29D,B2,D2)
+/* LOAD LENGTHENED */
+#  define LDEBR(R1,R2)                 RRE_(0xB304,R1,R2)
+#  define LXDBR(R1,R2)                 RRE_(0xB305,R1,R2)
+#  define LXEBR(R1,R2)                 RRE_(0xB306,R1,R2)
+#  define LDEB(R1,D2,X2,B2)            RXE_(0xED,R1,X2,B2,D2,0x04)
+#  define LXDB(R1,D2,X2,B2)            RXE_(0xED,R1,X2,B2,D2,0x05)
+#  define LXEB(R1,D2,X2,B2)            RXE_(0xED,R1,X2,B2,D2,0x06)
+/* LOAD NEGATIVE */
+#  define LNEBR(R1,R2)                 RRE_(0xB301,R1,R2)
+#  define LNDBR(R1,R2)                 RRE_(0xB311,R1,R2)
+#  define LNXBR(R1,R2)                 RRE_(0xB341,R1,R2)
+/* LOAD POSITIVE */
+#  define LPEBR(R1,R2)                 RRE_(0xB300,R1,R2)
+#  define LPDBR(R1,R2)                 RRE_(0xB310,R1,R2)
+#  define LPXBR(R1,R2)                 RRE_(0xB340,R1,R2)
+/* LOAD ROUNDED */
+#  define LEDBR(R1,R2)                 RRE_(0xB344,R1,R2)
+#  define LDXBR(R1,R2)                 RRE_(0xB345,R1,R2)
+#  define LEXBR(R1,R2)                 RRE_(0xB346,R1,R2)
+/* MULTIPLY */
+#  define MEEBR(R1,R2)                 RRE_(0xB317,R1,R2)
+#  define MDBR(R1,R2)                  RRE_(0xB31C,R1,R2)
+#  define MXBR(R1,R2)                  RRE_(0xB34C,R1,R2)
+#  define MDEBR(R1,R2)                 RRE_(0xB30C,R1,R2)
+#  define MXDBR(R1,R2)                 RRE_(0xB307,R1,R2)
+#  define MEEB(R1,D2,X2,B2)            RXE_(0xED,R1,X2,B2,D2,0x17)
+#  define MDB(R1,D2,X2,B2)             RXE_(0xED,R1,X2,B2,D2,0x1C)
+#  define MDEB(R1,D2,X2,B2)            RXE_(0xED,R1,X2,B2,D2,0x0C)
+#  define MXDB(R1,D2,X2,B2)            RXE_(0xED,R1,X2,B2,D2,0x07)
+/* MULTIPLY AND ADD */
+#  define MAEBR(R1,R3,R2)              RRF_(0xB30E,R1,0,R3,R2)
+#  define MADBR(R1,R3,R2)              RRF_(0xB31E,R1,0,R3,R2)
+#  define MAEB(R1,R3,D2,X2,B2)         RXF_(0xED,R3,X2,B2,D2,R1,0x0E)
+#  define MADB(R1,R3,D2,X2,B2)         RXF_(0xED,R3,X2,B2,D2,R1,0x1E)
+/* MULTIPLY AND SUBTRACT */
+#  define MSEBR(R1,R3,R2)              RRF_(0xB30F,R1,0,R3,R2)
+#  define MSDBR(R1,R3,R2)              RRF_(0xB31F,R1,0,R3,R2)
+#  define MSEB(R1,R3,D2,X2,B2)         RXF_(0xED,R3,X2,B2,D2,R1,0x0F)
+#  define MSDB(R1,R3,D2,X2,B2)         RXF_(0xED,R3,X2,B2,D2,R1,0x1F)
+/* SET FPC */
+#  define SFPC(R1)                     RRE_(0xB384,R1,0)
+/* SET ROUNDING MODE */
+#  define SRNM(D2,B2)                  S_(0xB299,B2,D2)
+/* SQUARE ROOT */
+#  define SQEBR(R1,R2)                 RRE_(0xB314,R1,R2)
+#  define SQDBR(R1,R2)                 RRE_(0xB315,R1,R2)
+#  define SQXBR(R1,R2)                 RRE_(0xB316,R1,R2)
+/* STORE FPC */
+#  define STFPC(D2,B2)                 S_(0xB29C,B2,D2)
+/* SUBTRACT */
+#  define SEBR(R1,R2)                  RRE_(0xB30B,R1,R2)
+#  define SDBR(R1,R2)                  RRE_(0xB31B,R1,R2)
+#  define SXBR(R1,R2)                  RRE_(0xB34B,R1,R2)
+#  define SEB(R1,D2,X2,B2)             RXE_(0xED,R1,X2,B2,D2,0x0B)
+#  define SDB(R1,D2,X2,B2)             RXE_(0xED,R1,X2,B2,D2,0x1B)
+/* TEST DATA CLASS */
+#  define TCEB(R1,D2,X2,B2)            RXE_(0xED,R1,X2,B2,D2,0x10)
+#  define TCDB(R1,D2,X2,B2)            RXE_(0xED,R1,X2,B2,D2,0x11)
+#  define TCXB(R1,D2,X2,B2)            RXE_(0xED,R1,X2,B2,D2,0x12)
+#  define fp(code,r0,r1,i0)            _fp(_jit,jit_code_##code##i_f,r0,r1,i0)
+static void _fp(jit_state_t*,jit_code_t,
+               jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define dp(code,r0,r1,i0)            _dp(_jit,jit_code_##code##i_d,r0,r1,i0)
+static void _dp(jit_state_t*,jit_code_t,
+               jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define fr(cc,r0,r1,r2)              _fr(_jit,cc,r0,r1,r2)
+static void _fr(jit_state_t*,jit_int32_t,
+               jit_int32_t,jit_int32_t,jit_int32_t);
+#  define dr(cc,r0,r1,r2)              _dr(_jit,cc,r0,r1,r2)
+static void _dr(jit_state_t*,jit_int32_t,
+               jit_int32_t,jit_int32_t,jit_int32_t);
+#  define fi(cc,r0,r1,i0)              _fi(_jit,cc,r0,r1,i0)
+static void _fi(jit_state_t*,jit_int32_t,
+               jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define di(cc,r0,r1,i0)              _di(_jit,cc,r0,r1,i0)
+static void _di(jit_state_t*,jit_int32_t,
+               jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define bfr(cc,i0,r0,r1)             _bfr(_jit,cc,i0,r0,r1)
+static void _bfr(jit_state_t*,jit_int32_t,
+                jit_word_t,jit_int32_t,jit_int32_t);
+#  define bdr(cc,i0,r0,r1)             _bdr(_jit,cc,i0,r0,r1)
+static void _bdr(jit_state_t*,jit_int32_t,
+                jit_word_t,jit_int32_t,jit_int32_t);
+#  define bfr_p(cc,i0,r0,r1)           _bfr_p(_jit,cc,i0,r0,r1)
+static jit_word_t _bfr_p(jit_state_t*,jit_int32_t,
+                        jit_word_t,jit_int32_t,jit_int32_t);
+#  define bdr_p(cc,i0,r0,r1)           _bdr_p(_jit,cc,i0,r0,r1)
+static jit_word_t _bdr_p(jit_state_t*,jit_int32_t,
+                        jit_word_t,jit_int32_t,jit_int32_t);
+#  define bfi(cc,i0,r0,i1)             _bfi(_jit,cc,i0,r0,i1)
+static void _bfi(jit_state_t*,jit_int32_t,
+                jit_word_t,jit_int32_t,jit_float32_t*);
+#  define bdi(cc,i0,r0,i1)             _bdi(_jit,cc,i0,r0,i1)
+static void _bdi(jit_state_t*,jit_int32_t,
+                jit_word_t,jit_int32_t,jit_float64_t*);
+#  define bfi_p(cc,i0,r0,i1)           _bfi_p(_jit,cc,i0,r0,i1)
+static jit_word_t _bfi_p(jit_state_t*,jit_int32_t,
+                        jit_word_t,jit_int32_t,jit_float32_t*);
+#  define bdi_p(cc,i0,r0,i1)           _bdi_p(_jit,cc,i0,r0,i1)
+static jit_word_t _bdi_p(jit_state_t*,jit_int32_t,
+                        jit_word_t,jit_int32_t,jit_float64_t*);
+#  define buneqr(db,i0,r0,r1)          _buneqr(_jit,db,i0,r0,r1)
+static jit_word_t _buneqr(jit_state_t*,jit_int32_t,
+                         jit_word_t,jit_int32_t,jit_int32_t);
+#  define buneqi(db,i0,r0,i1)          _buneqi(_jit,db,i0,r0,(jit_word_t)i1)
+static jit_word_t _buneqi(jit_state_t*,jit_int32_t,
+                         jit_word_t,jit_int32_t,jit_word_t);
+#  define bltgtr(db,i0,r0,r1)          _bltgtr(_jit,db,i0,r0,r1)
+static jit_word_t _bltgtr(jit_state_t*,jit_int32_t,
+                         jit_word_t,jit_int32_t,jit_int32_t);
+#  define bltgti(db,i0,r0,i1)          _bltgti(_jit,db,i0,r0,(jit_word_t)i1)
+static jit_word_t _bltgti(jit_state_t*,jit_int32_t,
+                         jit_word_t,jit_int32_t,jit_word_t);
+#  define movr_f(r0,r1)                        _movr_f(_jit,r0,r1)
+static void _movr_f(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define movi_f(r0,i0)                        _movi_f(_jit,r0,i0)
+static void _movi_f(jit_state_t*,jit_int32_t,jit_float32_t*);
+#  define movr_d(r0,r1)                        _movr_d(_jit,r0,r1)
+static void _movr_d(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define movi_d(r0,i0)                        _movi_d(_jit,r0,i0)
+static void _movi_d(jit_state_t*,jit_int32_t,jit_float64_t*);
+#  define absr_f(r0,r1)                        LPEBR(r0,r1)
+#  define absr_d(r0,r1)                        LPDBR(r0,r1)
+#  define negr_f(r0,r1)                        LCEBR(r0,r1)
+#  define negr_d(r0,r1)                        LCDBR(r0,r1)
+#  define sqrtr_f(r0,r1)               SQEBR(r0,r1)
+#  define sqrtr_d(r0,r1)               SQDBR(r0,r1)
+#  define truncr_f_i(r0,r1)            CFEBR(r0,RND_ZERO,r1)
+#  define truncr_d_i(r0,r1)            CFDBR(r0,RND_ZERO,r1)
+#  if __WORDSIZE == 64
+#    define truncr_f_l(r0,r1)          CGEBR(r0,RND_ZERO,r1)
+#    define truncr_d_l(r0,r1)          CGDBR(r0,RND_ZERO,r1)
+#  endif
+#  if __WORDSIZE == 32
+#    define extr_f(r0,r1)              CEFBR(r0,r1)
+#    define extr_d(r0,r1)              CDFBR(r0,r1)
+#  else
+#    define extr_f(r0,r1)              CEGBR(r0,r1)
+#    define extr_d(r0,r1)              CDGBR(r0,r1)
+#  endif
+#  define extr_d_f(r0,r1)              LEDBR(r0,r1)
+#  define extr_f_d(r0,r1)              LDEBR(r0,r1)
+#  define addr_f(r0,r1,r2)             _addr_f(_jit,r0,r1,r2)
+static void _addr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define addi_f(r0,r1,i0)             fp(add,r0,r1,i0)
+#  define addr_d(r0,r1,r2)             _addr_d(_jit,r0,r1,r2)
+static void _addr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define addi_d(r0,r1,i0)             dp(add,r0,r1,i0)
+#  define subr_f(r0,r1,r2)             _subr_f(_jit,r0,r1,r2)
+static void _subr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define subi_f(r0,r1,i0)             fp(sub,r0,r1,i0)
+#  define subr_d(r0,r1,r2)             _subr_d(_jit,r0,r1,r2)
+static void _subr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define subi_d(r0,r1,i0)             dp(sub,r0,r1,i0)
+#  define rsbr_f(r0,r1,r2)             subr_f(r0,r2,r1)
+#  define rsbi_f(r0,r1,i0)             fp(rsb,r0,r1,i0)
+#  define rsbr_d(r0,r1,r2)             subr_d(r0,r2,r1)
+#  define rsbi_d(r0,r1,i0)             dp(rsb,r0,r1,i0)
+#  define mulr_f(r0,r1,r2)             _mulr_f(_jit,r0,r1,r2)
+static void _mulr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define muli_f(r0,r1,i0)             fp(mul,r0,r1,i0)
+#  define mulr_d(r0,r1,r2)             _mulr_d(_jit,r0,r1,r2)
+static void _mulr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define muli_d(r0,r1,i0)             dp(mul,r0,r1,i0)
+#  define divr_f(r0,r1,r2)             _divr_f(_jit,r0,r1,r2)
+static void _divr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define divi_f(r0,r1,i0)             fp(div,r0,r1,i0)
+#  define divr_d(r0,r1,r2)             _divr_d(_jit,r0,r1,r2)
+static void _divr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define divi_d(r0,r1,i0)             dp(div,r0,r1,i0)
+#  define ldr_f(r0,r1)                 LE(r0,0,0,r1)
+#  define ldr_d(r0,r1)                 LD(r0,0,0,r1)
+#  define ldi_f(r0,i0)                 _ldi_f(_jit,r0,i0)
+static void _ldi_f(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldi_d(r0,i0)                 _ldi_d(_jit,r0,i0)
+static void _ldi_d(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldxr_f(r0,r1,r2)             _ldxr_f(_jit,r0,r1,r2)
+static void _ldxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxr_d(r0,r1,r2)             _ldxr_d(_jit,r0,r1,r2)
+static void _ldxr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxi_f(r0,r1,i0)             _ldxi_f(_jit,r0,r1,i0)
+static void _ldxi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldxi_d(r0,r1,i0)             _ldxi_d(_jit,r0,r1,i0)
+static void _ldxi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define str_f(r0,r1)                 STE(r1,0,0,r0)
+#  define str_d(r0,r1)                 STD(r1,0,0,r0)
+#  define sti_f(i0,r0)                 _sti_f(_jit,i0,r0)
+static void _sti_f(jit_state_t*,jit_word_t,jit_int32_t);
+#  define sti_d(i0,r0)                 _sti_d(_jit,i0,r0)
+static void _sti_d(jit_state_t*,jit_word_t,jit_int32_t);
+#  define stxr_f(r0,r1,r2)             _stxr_f(_jit,r0,r1,r2)
+static void _stxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define stxr_d(r0,r1,r2)             _stxr_d(_jit,r0,r1,r2)
+static void _stxr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define stxi_f(i0,r0,r1)             _stxi_f(_jit,i0,r0,r1)
+static void _stxi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define stxi_d(i0,r0,r1)             _stxi_d(_jit,i0,r0,r1)
+static void _stxi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define ltr_f(r0,r1,r2)              fr(CC_L,r0,r1,r2)
+#  define ltr_d(r0,r1,r2)              dr(CC_L,r0,r1,r2)
+#  define lti_f(r0,r1,i0)              fi(CC_L,r0,r1,i0)
+#  define lti_d(r0,r1,i0)              di(CC_L,r0,r1,i0)
+#  define ler_f(r0,r1,r2)              fr(CC_LE,r0,r1,r2)
+#  define ler_d(r0,r1,r2)              dr(CC_LE,r0,r1,r2)
+#  define lei_f(r0,r1,i0)              fi(CC_LE,r0,r1,i0)
+#  define lei_d(r0,r1,i0)              di(CC_LE,r0,r1,i0)
+#  define eqr_f(r0,r1,r2)              fr(CC_E,r0,r1,r2)
+#  define eqr_d(r0,r1,r2)              dr(CC_E,r0,r1,r2)
+#  define eqi_f(r0,r1,i0)              fi(CC_E,r0,r1,i0)
+#  define eqi_d(r0,r1,i0)              di(CC_E,r0,r1,i0)
+#  define ger_f(r0,r1,r2)              fr(CC_HE,r0,r1,r2)
+#  define ger_d(r0,r1,r2)              dr(CC_HE,r0,r1,r2)
+#  define gei_f(r0,r1,i0)              fi(CC_HE,r0,r1,i0)
+#  define gei_d(r0,r1,i0)              di(CC_HE,r0,r1,i0)
+#  define gtr_f(r0,r1,r2)              fr(CC_H,r0,r1,r2)
+#  define gtr_d(r0,r1,r2)              dr(CC_H,r0,r1,r2)
+#  define gti_f(r0,r1,i0)              fi(CC_H,r0,r1,i0)
+#  define gti_d(r0,r1,i0)              di(CC_H,r0,r1,i0)
+#  define ner_f(r0,r1,r2)              fr(CC_NE,r0,r1,r2)
+#  define ner_d(r0,r1,r2)              dr(CC_NE,r0,r1,r2)
+#  define nei_f(r0,r1,i0)              fi(CC_NE,r0,r1,i0)
+#  define nei_d(r0,r1,i0)              di(CC_NE,r0,r1,i0)
+#  define unltr_f(r0,r1,r2)            fr(CC_NHE,r0,r1,r2)
+#  define unltr_d(r0,r1,r2)            dr(CC_NHE,r0,r1,r2)
+#  define unlti_f(r0,r1,i0)            fi(CC_NHE,r0,r1,i0)
+#  define unlti_d(r0,r1,i0)            di(CC_NHE,r0,r1,i0)
+#  define unler_f(r0,r1,r2)            fr(CC_NH,r0,r1,r2)
+#  define unler_d(r0,r1,r2)            dr(CC_NH,r0,r1,r2)
+#  define unlei_f(r0,r1,i0)            fi(CC_NH,r0,r1,i0)
+#  define unlei_d(r0,r1,i0)            di(CC_NH,r0,r1,i0)
+#  define uneqr_f(r0,r1,r2)            _uneqr_f(_jit,r0,r1,r2)
+static void _uneqr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define uneqr_d(r0,r1,r2)            _uneqr_d(_jit,r0,r1,r2)
+static void _uneqr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define uneqi_f(r0,r1,i0)            fp(uneq,r0,r1,i0)
+#  define uneqi_d(r0,r1,i0)            dp(uneq,r0,r1,i0)
+#  define unger_f(r0,r1,r2)            fr(CC_NL,r0,r1,r2)
+#  define unger_d(r0,r1,r2)            dr(CC_NL,r0,r1,r2)
+#  define ungei_f(r0,r1,i0)            fi(CC_NL,r0,r1,i0)
+#  define ungei_d(r0,r1,i0)            di(CC_NL,r0,r1,i0)
+#  define ungtr_f(r0,r1,r2)            fr(CC_NLE,r0,r1,r2)
+#  define ungtr_d(r0,r1,r2)            dr(CC_NLE,r0,r1,r2)
+#  define ungti_f(r0,r1,i0)            fi(CC_NLE,r0,r1,i0)
+#  define ungti_d(r0,r1,i0)            di(CC_NLE,r0,r1,i0)
+#  define ltgtr_f(r0,r1,r2)            _ltgtr_f(_jit,r0,r1,r2)
+static void _ltgtr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ltgtr_d(r0,r1,r2)            _ltgtr_d(_jit,r0,r1,r2)
+static void _ltgtr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ltgti_f(r0,r1,i0)            fp(ltgt,r0,r1,i0)
+#  define ltgti_d(r0,r1,i0)            dp(ltgt,r0,r1,i0)
+#  define ordr_f(r0,r1,r2)             fr(CC_NO,r0,r1,r2)
+#  define ordr_d(r0,r1,r2)             dr(CC_NO,r0,r1,r2)
+#  define ordi_f(r0,r1,i0)             fi(CC_NO,r0,r1,i0)
+#  define ordi_d(r0,r1,i0)             di(CC_NO,r0,r1,i0)
+#  define unordr_f(r0,r1,r2)           fr(CC_O,r0,r1,r2)
+#  define unordr_d(r0,r1,r2)           dr(CC_O,r0,r1,r2)
+#  define unordi_f(r0,r1,i0)           fi(CC_O,r0,r1,i0)
+#  define unordi_d(r0,r1,i0)           di(CC_O,r0,r1,i0)
+#  define bltr_f(i0,r0,r1)             bfr(CC_L,i0,r0,r1)
+#  define bltr_d(i0,r0,r1)             bdr(CC_L,i0,r0,r1)
+#  define blti_f(i0,r0,i1)             bfi(CC_L,i0,r0,i1)
+#  define blti_d(i0,r0,i1)             bdi(CC_L,i0,r0,i1)
+#  define bltr_f_p(i0,r0,r1)           bfr_p(CC_L,i0,r0,r1)
+#  define bltr_d_p(i0,r0,r1)           bdr_p(CC_L,i0,r0,r1)
+#  define blti_f_p(i0,r0,i1)           bfi_p(CC_L,i0,r0,i1)
+#  define blti_d_p(i0,r0,i1)           bdi_p(CC_L,i0,r0,i1)
+#  define bler_f(i0,r0,r1)             bfr(CC_LE,i0,r0,r1)
+#  define bler_d(i0,r0,r1)             bdr(CC_LE,i0,r0,r1)
+#  define blei_f(i0,r0,i1)             bfi(CC_LE,i0,r0,i1)
+#  define blei_d(i0,r0,i1)             bdi(CC_LE,i0,r0,i1)
+#  define bler_f_p(i0,r0,r1)           bfr_p(CC_LE,i0,r0,r1)
+#  define bler_d_p(i0,r0,r1)           bdr_p(CC_LE,i0,r0,r1)
+#  define blei_f_p(i0,r0,i1)           bfi_p(CC_LE,i0,r0,i1)
+#  define blei_d_p(i0,r0,i1)           bdi_p(CC_LE,i0,r0,i1)
+#  define beqr_f(i0,r0,r1)             bfr(CC_E,i0,r0,r1)
+#  define beqr_d(i0,r0,r1)             bdr(CC_E,i0,r0,r1)
+#  define beqi_f(i0,r0,i1)             bfi(CC_E,i0,r0,i1)
+#  define beqi_d(i0,r0,i1)             bdi(CC_E,i0,r0,i1)
+#  define beqr_f_p(i0,r0,r1)           bfr_p(CC_E,i0,r0,r1)
+#  define beqr_d_p(i0,r0,r1)           bdr_p(CC_E,i0,r0,r1)
+#  define beqi_f_p(i0,r0,i1)           bfi_p(CC_E,i0,r0,i1)
+#  define beqi_d_p(i0,r0,i1)           bdi_p(CC_E,i0,r0,i1)
+#  define bger_f(i0,r0,r1)             bfr(CC_HE,i0,r0,r1)
+#  define bger_d(i0,r0,r1)             bdr(CC_HE,i0,r0,r1)
+#  define bgei_f(i0,r0,i1)             bfi(CC_HE,i0,r0,i1)
+#  define bgei_d(i0,r0,i1)             bdi(CC_HE,i0,r0,i1)
+#  define bger_f_p(i0,r0,r1)           bfr_p(CC_HE,i0,r0,r1)
+#  define bger_d_p(i0,r0,r1)           bdr_p(CC_HE,i0,r0,r1)
+#  define bgei_f_p(i0,r0,i1)           bfi_p(CC_HE,i0,r0,i1)
+#  define bgei_d_p(i0,r0,i1)           bdi_p(CC_HE,i0,r0,i1)
+#  define bgtr_f(i0,r0,r1)             bfr(CC_H,i0,r0,r1)
+#  define bgtr_d(i0,r0,r1)             bdr(CC_H,i0,r0,r1)
+#  define bgti_f(i0,r0,i1)             bfi(CC_H,i0,r0,i1)
+#  define bgti_d(i0,r0,i1)             bdi(CC_H,i0,r0,i1)
+#  define bgtr_f_p(i0,r0,r1)           bfr_p(CC_H,i0,r0,r1)
+#  define bgtr_d_p(i0,r0,r1)           bdr_p(CC_H,i0,r0,r1)
+#  define bgti_f_p(i0,r0,i1)           bfi_p(CC_H,i0,r0,i1)
+#  define bgti_d_p(i0,r0,i1)           bdi_p(CC_H,i0,r0,i1)
+#  define bner_f(i0,r0,r1)             bfr(CC_NE,i0,r0,r1)
+#  define bner_d(i0,r0,r1)             bdr(CC_NE,i0,r0,r1)
+#  define bnei_f(i0,r0,i1)             bfi(CC_NE,i0,r0,i1)
+#  define bnei_d(i0,r0,i1)             bdi(CC_NE,i0,r0,i1)
+#  define bner_f_p(i0,r0,r1)           bfr_p(CC_NE,i0,r0,r1)
+#  define bner_d_p(i0,r0,r1)           bdr_p(CC_NE,i0,r0,r1)
+#  define bnei_f_p(i0,r0,i1)           bfi_p(CC_NE,i0,r0,i1)
+#  define bnei_d_p(i0,r0,i1)           bdi_p(CC_NE,i0,r0,i1)
+#  define bunltr_f(i0,r0,r1)           bfr(CC_NHE,i0,r0,r1)
+#  define bunltr_d(i0,r0,r1)           bdr(CC_NHE,i0,r0,r1)
+#  define bunlti_f(i0,r0,i1)           bfi(CC_NHE,i0,r0,i1)
+#  define bunlti_d(i0,r0,i1)           bdi(CC_NHE,i0,r0,i1)
+#  define bunltr_f_p(i0,r0,r1)         bfr_p(CC_NHE,i0,r0,r1)
+#  define bunltr_d_p(i0,r0,r1)         bdr_p(CC_NHE,i0,r0,r1)
+#  define bunlti_f_p(i0,r0,i1)         bfi_p(CC_NHE,i0,r0,i1)
+#  define bunlti_d_p(i0,r0,i1)         bdi_p(CC_NHE,i0,r0,i1)
+#  define bunler_f(i0,r0,r1)           bfr(CC_NH,i0,r0,r1)
+#  define bunler_d(i0,r0,r1)           bdr(CC_NH,i0,r0,r1)
+#  define bunlei_f(i0,r0,i1)           bfi(CC_NH,i0,r0,i1)
+#  define bunlei_d(i0,r0,i1)           bdi(CC_NH,i0,r0,i1)
+#  define bunler_f_p(i0,r0,r1)         bfr_p(CC_NH,i0,r0,r1)
+#  define bunler_d_p(i0,r0,r1)         bdr_p(CC_NH,i0,r0,r1)
+#  define bunlei_f_p(i0,r0,i1)         bfi_p(CC_NH,i0,r0,i1)
+#  define bunlei_d_p(i0,r0,i1)         bdi_p(CC_NH,i0,r0,i1)
+#  define buneqr_f(i0,r0,r1)           buneqr(0,i0,r0,r1)
+#  define buneqr_d(i0,r0,r1)           buneqr(1,i0,r0,r1)
+#  define buneqi_f(i0,r0,i1)           buneqi(0,i0,r0,i1)
+#  define buneqi_d(i0,r0,i1)           buneqi(1,i0,r0,i1)
+#  define buneqr_f_p(i0,r0,r1)         buneqr(0,i0,r0,r1)
+#  define buneqr_d_p(i0,r0,r1)         buneqr(1,i0,r0,r1)
+#  define buneqi_f_p(i0,r0,i1)         buneqi(0,i0,r0,i1)
+#  define buneqi_d_p(i0,r0,i1)         buneqi(1,i0,r0,i1)
+#  define bunger_f(i0,r0,r1)           bfr(CC_NL,i0,r0,r1)
+#  define bunger_d(i0,r0,r1)           bdr(CC_NL,i0,r0,r1)
+#  define bungei_f(i0,r0,i1)           bfi(CC_NL,i0,r0,i1)
+#  define bungei_d(i0,r0,i1)           bdi(CC_NL,i0,r0,i1)
+#  define bunger_f_p(i0,r0,r1)         bfr_p(CC_NL,i0,r0,r1)
+#  define bunger_d_p(i0,r0,r1)         bdr_p(CC_NL,i0,r0,r1)
+#  define bungei_f_p(i0,r0,i1)         bfi_p(CC_NL,i0,r0,i1)
+#  define bungei_d_p(i0,r0,i1)         bdi_p(CC_NL,i0,r0,i1)
+#  define bungtr_f(i0,r0,r1)           bfr(CC_NLE,i0,r0,r1)
+#  define bungtr_d(i0,r0,r1)           bdr(CC_NLE,i0,r0,r1)
+#  define bungti_f(i0,r0,i1)           bfi(CC_NLE,i0,r0,i1)
+#  define bungti_d(i0,r0,i1)           bdi(CC_NLE,i0,r0,i1)
+#  define bungtr_f_p(i0,r0,r1)         bfr_p(CC_NLE,i0,r0,r1)
+#  define bungtr_d_p(i0,r0,r1)         bdr_p(CC_NLE,i0,r0,r1)
+#  define bungti_f_p(i0,r0,i1)         bfi_p(CC_NLE,i0,r0,i1)
+#  define bungti_d_p(i0,r0,i1)         bdi_p(CC_NLE,i0,r0,i1)
+#  define bltgtr_f(i0,r0,r1)           bltgtr(0,i0,r0,r1)
+#  define bltgtr_d(i0,r0,r1)           bltgtr(1,i0,r0,r1)
+#  define bltgti_f(i0,r0,i1)           bltgti(0,i0,r0,i1)
+#  define bltgti_d(i0,r0,i1)           bltgti(1,i0,r0,i1)
+#  define bltgtr_f_p(i0,r0,r1)         bltgtr(0,i0,r0,r1)
+#  define bltgtr_d_p(i0,r0,r1)         bltgtr(1,i0,r0,r1)
+#  define bltgti_f_p(i0,r0,i1)         bltgti(0,i0,r0,i1)
+#  define bltgti_d_p(i0,r0,i1)         bltgti(1,i0,r0,i1)
+#  define bordr_f(i0,r0,r1)            bfr(CC_NO,i0,r0,r1)
+#  define bordr_d(i0,r0,r1)            bdr(CC_NO,i0,r0,r1)
+#  define bordi_f(i0,r0,i1)            bfi(CC_NO,i0,r0,i1)
+#  define bordi_d(i0,r0,i1)            bdi(CC_NO,i0,r0,i1)
+#  define bordr_f_p(i0,r0,r1)          bfr_p(CC_NO,i0,r0,r1)
+#  define bordr_d_p(i0,r0,r1)          bdr_p(CC_NO,i0,r0,r1)
+#  define bordi_f_p(i0,r0,i1)          bfi_p(CC_NO,i0,r0,i1)
+#  define bordi_d_p(i0,r0,i1)          bdi_p(CC_NO,i0,r0,i1)
+#  define bunordr_f(i0,r0,r1)          bfr(CC_O,i0,r0,r1)
+#  define bunordr_d(i0,r0,r1)          bdr(CC_O,i0,r0,r1)
+#  define bunordi_f(i0,r0,i1)          bfi(CC_O,i0,r0,i1)
+#  define bunordi_d(i0,r0,i1)          bdi(CC_O,i0,r0,i1)
+#  define bunordr_f_p(i0,r0,r1)                bfr_p(CC_O,i0,r0,r1)
+#  define bunordr_d_p(i0,r0,r1)                bdr_p(CC_O,i0,r0,r1)
+#  define bunordi_f_p(i0,r0,i1)                bfi_p(CC_O,i0,r0,i1)
+#  define bunordi_d_p(i0,r0,i1)                bdi_p(CC_O,i0,r0,i1)
+#  define vaarg_d(r0, r1)              _vaarg_d(_jit, r0, r1)
+static void _vaarg_d(jit_state_t*, jit_int32_t, jit_int32_t);
+#endif
+
+#if CODE
+static void
+_fp(jit_state_t *_jit, jit_code_t code,
+    jit_int32_t r0, jit_int32_t r1, jit_float32_t *i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_fpr);
+    movi_f(rn(reg), i0);
+    switch (code) {
+       case jit_code_addi_f:   addr_f(r0, r1, rn(reg));        break;
+       case jit_code_subi_f:   subr_f(r0, r1, rn(reg));        break;
+       case jit_code_rsbi_f:   rsbr_f(r0, r1, rn(reg));        break;
+       case jit_code_muli_f:   mulr_f(r0, r1, rn(reg));        break;
+       case jit_code_divi_f:   divr_f(r0, r1, rn(reg));        break;
+       case jit_code_uneqi_f:  uneqr_f(r0, r1, rn(reg));       break;
+       case jit_code_ltgti_f:  ltgtr_f(r0, r1, rn(reg));       break;
+       default:                abort();
+    }
+    jit_unget_reg(reg);
+}
+
+static void
+_dp(jit_state_t *_jit, jit_code_t code,
+    jit_int32_t r0, jit_int32_t r1, jit_float64_t *i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_fpr);
+    movi_d(rn(reg), i0);
+    switch (code) {
+       case jit_code_addi_d:   addr_d(r0, r1, rn(reg));        break;
+       case jit_code_subi_d:   subr_d(r0, r1, rn(reg));        break;
+       case jit_code_rsbi_d:   rsbr_d(r0, r1, rn(reg));        break;
+       case jit_code_muli_d:   mulr_d(r0, r1, rn(reg));        break;
+       case jit_code_divi_d:   divr_d(r0, r1, rn(reg));        break;
+       case jit_code_uneqi_d:  uneqr_d(r0, r1, rn(reg));       break;
+       case jit_code_ltgti_d:  ltgtr_d(r0, r1, rn(reg));       break;
+       default:                abort();
+    }
+    jit_unget_reg(reg);
+}
+
+static void
+_fr(jit_state_t *_jit, jit_int32_t cc,
+     jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    LGHI(r0, 1);
+    CEBR(r1, r2);
+    w = _jit->pc.w;
+    BRC(cc, 0);
+    LGHI(r0, 0);
+    patch_at(w, _jit->pc.w);
+}
+
+static void
+_dr(jit_state_t *_jit, jit_int32_t cc,
+     jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    LGHI(r0, 1);
+    CDBR(r1, r2);
+    w = _jit->pc.w;
+    BRC(cc, 0);
+    LGHI(r0, 0);
+    patch_at(w, _jit->pc.w);
+}
+
+static void
+_fi(jit_state_t *_jit, jit_int32_t cc,
+     jit_int32_t r0, jit_int32_t r1, jit_float32_t *i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_fpr|jit_class_nospill);
+    movi_f(rn(reg), i0);
+    fr(cc, r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_di(jit_state_t *_jit, jit_int32_t cc,
+     jit_int32_t r0, jit_int32_t r1, jit_float64_t *i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_fpr|jit_class_nospill);
+    movi_d(rn(reg), i0);
+    dr(cc, r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+
+static void
+_bfr(jit_state_t *_jit, jit_int32_t cc,
+     jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d;
+    CEBR(r0, r1);
+    d = (i0 - _jit->pc.w) >> 1;
+    if (s16_p(d))
+       BRC(cc, x16(d));
+    else {
+       assert(s32_p(d));
+       BRCL(cc, d);
+    }
+}
+
+static void
+_bdr(jit_state_t *_jit, jit_int32_t cc,
+     jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d;
+    CDBR(r0, r1);
+    d = (i0 - _jit->pc.w) >> 1;
+    if (s16_p(d))
+       BRC(cc, x16(d));
+    else {
+       assert(s32_p(d));
+       BRCL(cc, d);
+    }
+}
+
+static jit_word_t
+_bfr_p(jit_state_t *_jit, jit_int32_t cc,
+       jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    CEBR(r0, r1);
+    w = _jit->pc.w;
+    BRCL(cc, 0);
+    return (w);
+}
+
+static jit_word_t
+_bdr_p(jit_state_t *_jit, jit_int32_t cc,
+       jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    CDBR(r0, r1);
+    w = _jit->pc.w;
+    BRCL(cc, 0);
+    return (w);
+}
+
+static void
+_bfi(jit_state_t *_jit, jit_int32_t cc,
+     jit_word_t i0, jit_int32_t r0, jit_float32_t *i1)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    movi_f(rn(reg), i1);
+    bfr(cc, i0, r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_bdi(jit_state_t *_jit, jit_int32_t cc,
+     jit_word_t i0, jit_int32_t r0, jit_float64_t *i1)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    movi_d(rn(reg), i1);
+    bdr(cc, i0, r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static jit_word_t
+_bfi_p(jit_state_t *_jit, jit_int32_t cc,
+     jit_word_t i0, jit_int32_t r0, jit_float32_t *i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    movi_f(rn(reg), i1);
+    w = bfr_p(cc, i0, r0, rn(reg));
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static jit_word_t
+_bdi_p(jit_state_t *_jit, jit_int32_t cc,
+     jit_word_t i0, jit_int32_t r0, jit_float64_t *i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    movi_d(rn(reg), i1);
+    w = bdr_p(cc, i0, r0, rn(reg));
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static jit_word_t
+_buneqr(jit_state_t *_jit, jit_int32_t db,
+       jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         unord, ne, w;
+    if (db)    CDBR(r0, r1);
+    else       CEBR(r0, r1);
+    unord = _jit->pc.w;
+    BRC(CC_O, 0);                      /* unord satisfies condition */
+    ne = _jit->pc.w;
+    BRC(CC_NE, 0);                     /* ne does not satisfy condition */
+    patch_at(unord, _jit->pc.w);
+    w = _jit->pc.w;
+    BRCL(CC_AL, (i0 - _jit->pc.w) >> 1);
+    patch_at(ne, _jit->pc.w);
+    return (w);
+}
+
+static jit_word_t
+_buneqi(jit_state_t *_jit, jit_int32_t db,
+       jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_fpr|jit_class_nospill);
+    if (db)
+       movi_d(rn(reg), (jit_float64_t *)i1);
+    else
+       movi_f(rn(reg), (jit_float32_t *)i1);
+    w = buneqr(db, i0, r0, rn(reg));
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static jit_word_t
+_bltgtr(jit_state_t *_jit, jit_int32_t db,
+       jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         unord, eq, w;
+    if (db)    CDBR(r0, r1);
+    else       CEBR(r0, r1);
+    unord = _jit->pc.w;
+    BRC(CC_O, 0);                      /* unord does not satisfy condition */
+    eq = _jit->pc.w;
+    BRC(CC_E, 0);                      /* eq does not satisfy condition */
+    w = _jit->pc.w;
+    BRCL(CC_AL, (i0 - _jit->pc.w) >> 1);
+    patch_at(unord, _jit->pc.w);
+    patch_at(eq, _jit->pc.w);
+    return (w);
+}
+
+static jit_word_t
+_bltgti(jit_state_t *_jit, jit_int32_t db,
+       jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_fpr|jit_class_nospill);
+    if (db)
+       movi_d(rn(reg), (jit_float64_t *)i1);
+    else
+       movi_f(rn(reg), (jit_float32_t *)i1);
+    w = bltgtr(db, i0, r0, rn(reg));
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static void
+_movr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (r0 != r1)
+       LER(r0, r1);
+}
+
+static void
+_movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t *i0)
+{
+    union {
+       jit_int32_t      i;
+       jit_float32_t    f;
+    } data;
+    jit_int32_t                 reg;
+
+    if (*(jit_int32_t *)i0 == 0)
+       LZER(r0);
+    else if (_jitc->no_data) {
+       data.f = *i0;
+       reg = jit_get_reg_but_zero(0);
+       movi(rn(reg), data.i & 0xffffffff);
+       stxi_i(-4, _FP_REGNO, rn(reg));
+       jit_unget_reg_but_zero(reg);
+       ldxi_f(r0, _FP_REGNO, -4);
+    }
+    else
+       ldi_f(r0, (jit_word_t)i0);
+}
+
+static void
+_movr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (r0 != r1)
+       LDR(r0, r1);
+}
+
+static void
+_movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t *i0)
+{
+    union {
+#if __WORDSIZE == 32
+       jit_int32_t      i[2];
+#else
+       jit_int64_t      l;
+#endif
+       jit_float64_t    d;
+    } data;
+    jit_int32_t                 reg;
+
+    if (*(jit_int64_t *)i0 == 0)
+       LZDR(r0);
+    else if (_jitc->no_data) {
+       data.d = *i0;
+       reg = jit_get_reg_but_zero(0);
+#if __WORDSIZE == 32
+       movi(rn(reg), data.i[0]);
+       stxi_i(-8, _FP_REGNO, rn(reg));
+       movi(rn(reg), data.i[1]);
+       stxi_i(-4, _FP_REGNO, rn(reg));
+#else
+       movi(rn(reg), data.l);
+       stxi_l(-8, _FP_REGNO, rn(reg));
+#endif
+       jit_unget_reg_but_zero(reg);
+       ldxi_d(r0, _FP_REGNO, -8);
+    }
+    else
+       ldi_d(r0, (jit_word_t)i0);
+}
+
+static void
+_addr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (r0 == r2)
+       AEBR(r0, r1);
+    else {
+       movr_f(r0, r1);
+       AEBR(r0, r2);
+    }
+}
+
+static void
+_addr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (r0 == r2)
+       ADBR(r0, r1);
+    else {
+       movr_d(r0, r1);
+       ADBR(r0, r2);
+    }
+}
+
+static void
+_subr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (r0 == r2) {
+       reg = jit_get_reg(jit_class_fpr);
+       movr_f(rn(reg), r2);
+       movr_f(r0, r1);
+       SEBR(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else {
+       movr_f(r0, r1);
+       SEBR(r0, r2);
+    }
+}
+
+static void
+_subr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (r0 == r2) {
+       reg = jit_get_reg(jit_class_fpr);
+       movr_d(rn(reg), r2);
+       movr_d(r0, r1);
+       SDBR(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else {
+       movr_d(r0, r1);
+       SDBR(r0, r2);
+    }
+}
+
+static void
+_mulr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (r0 == r2)
+       MEEBR(r0, r1);
+    else {
+       movr_f(r0, r1);
+       MEEBR(r0, r2);
+    }
+}
+
+static void
+_mulr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (r0 == r2)
+       MDBR(r0, r1);
+    else {
+       movr_d(r0, r1);
+       MDBR(r0, r2);
+    }
+}
+
+static void
+_divr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (r0 == r2) {
+       reg = jit_get_reg(jit_class_fpr);
+       movr_f(rn(reg), r2);
+       movr_f(r0, r1);
+       DEBR(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else {
+       movr_f(r0, r1);
+       DEBR(r0, r2);
+    }
+}
+
+static void
+_divr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (r0 == r2) {
+       reg = jit_get_reg(jit_class_fpr);
+       movr_d(rn(reg), r2);
+       movr_d(r0, r1);
+       DDBR(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else {
+       movr_d(r0, r1);
+       DDBR(r0, r2);
+    }
+}
+
+static void
+_ldi_f(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg_but_zero(0);
+    movi(rn(reg), i0);
+    ldr_f(r0, rn(reg));
+    jit_unget_reg_but_zero(reg);
+}
+
+static void
+_ldi_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg_but_zero(0);
+    movi(rn(reg), i0);
+    ldr_d(r0, rn(reg));
+    jit_unget_reg_but_zero(reg);
+}
+
+static void
+_ldxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg_but_zero(0);
+    movr(rn(reg), r1);
+    addr(rn(reg), rn(reg), r2);
+    ldr_f(r0, rn(reg));
+    jit_unget_reg_but_zero(reg);
+}
+
+static void
+_ldxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg_but_zero(0);
+    movr(rn(reg), r1);
+    addr(rn(reg), rn(reg), r2);
+    ldr_d(r0, rn(reg));
+    jit_unget_reg_but_zero(reg);
+}
+
+static void
+_ldxi_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (u12_p(i0))
+       LE(r0, i0, 0, r1);
+    else if (s20_p(i0))
+       LEY(r0, x20(i0), 0, r1);
+    else {
+       reg = jit_get_reg_but_zero(0);
+       movi(rn(reg), i0);
+       addr(rn(reg), rn(reg), r1);
+       ldr_f(r0, rn(reg));
+       jit_unget_reg_but_zero(reg);
+    }
+}
+
+static void
+_ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (u12_p(i0))
+       LD(r0, i0, 0, r1);
+    else if (s20_p(i0))
+       LDY(r0, x20(i0), 0, r1);
+    else {
+       reg = jit_get_reg_but_zero(0);
+       movi(rn(reg), i0);
+       addr(rn(reg), rn(reg), r1);
+       ldr_d(r0, rn(reg));
+       jit_unget_reg_but_zero(reg);
+    }
+}
+
+static void
+_sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg_but_zero(0);
+    movi(rn(reg), i0);
+    str_f(rn(reg), r0);
+    jit_unget_reg_but_zero(reg);
+}
+
+static void
+_sti_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg_but_zero(0);
+    movi(rn(reg), i0);
+    str_d(rn(reg), r0);
+    jit_unget_reg_but_zero(reg);
+}
+
+static void
+_stxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg_but_zero(0);
+    movr(rn(reg), r0);
+    addr(rn(reg), rn(reg), r1);
+    str_f(rn(reg), r2);
+    jit_unget_reg_but_zero(reg);
+}
+
+static void
+_stxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg_but_zero(0);
+    movr(rn(reg), r0);
+    addr(rn(reg), rn(reg), r1);
+    str_d(rn(reg), r2);
+    jit_unget_reg_but_zero(reg);
+}
+
+static void
+_stxi_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (u12_p(i0))
+       STE(r1, i0, 0, r0);
+    else if (s20_p(i0))
+       STEY(r1, x20(i0), 0, r0);
+    else {
+       reg = jit_get_reg_but_zero(0);
+       movi(rn(reg), i0);
+       addr(rn(reg), rn(reg), r0);
+       str_f(rn(reg), r1);
+       jit_unget_reg_but_zero(reg);
+    }
+}
+
+static void
+_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (u12_p(i0))
+       STD(r1, i0, 0, r0);
+    else if (s20_p(i0))
+       STDY(r1, x20(i0), 0, r0);
+    else {
+       reg = jit_get_reg_but_zero(0);
+       movi(rn(reg), i0);
+       addr(rn(reg), rn(reg), r0);
+       str_d(rn(reg), r1);
+       jit_unget_reg_but_zero(reg);
+    }
+}
+
+static void
+_uneqr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         unord, eq;
+    movi(r0, 1);                       /* set to one */
+    CEBR(r1, r2);
+    unord = _jit->pc.w;                        /* keep set to one if unord */
+    BRC(CC_O, 0);
+    eq = _jit->pc.w;
+    BRC(CC_E, 0);                      /* keep set to one if eq */
+    movi(r0, 0);                       /* set to zero */
+    patch_at(unord, _jit->pc.w);
+    patch_at(eq, _jit->pc.w);
+}
+
+static void
+_uneqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         unord, eq;
+    movi(r0, 1);                       /* set to one */
+    CDBR(r1, r2);
+    unord = _jit->pc.w;                        /* keep set to one if unord */
+    BRC(CC_O, 0);
+    eq = _jit->pc.w;
+    BRC(CC_E, 0);                      /* keep set to one if eq */
+    movi(r0, 0);                       /* set to zero */
+    patch_at(unord, _jit->pc.w);
+    patch_at(eq, _jit->pc.w);
+}
+
+static void
+_ltgtr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         unord, eq;
+    movi(r0, 0);                       /* set to zero */
+    CEBR(r1, r2);
+    unord = _jit->pc.w;                        /* keep set to zero if unord */
+    BRC(CC_O, 0);
+    eq = _jit->pc.w;
+    BRC(CC_E, 0);                      /* keep set to zero if eq */
+    movi(r0, 1);                       /* set to one */
+    patch_at(unord, _jit->pc.w);
+    patch_at(eq, _jit->pc.w);
+}
+
+static void
+_ltgtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         unord, eq;
+    movi(r0, 0);                       /* set to zero */
+    CDBR(r1, r2);
+    unord = _jit->pc.w;                        /* keep set to zero if unord */
+    BRC(CC_O, 0);
+    eq = _jit->pc.w;
+    BRC(CC_E, 0);                      /* keep set to zero if eq */
+    movi(r0, 1);                       /* set to one */
+    patch_at(unord, _jit->pc.w);
+    patch_at(eq, _jit->pc.w);
+}
+
+static void
+_vaarg_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                rg0;
+    jit_int32_t                rg1;
+    jit_int32_t                rg2;
+    jit_word_t         ge_code;
+    jit_word_t         lt_code;
+
+    assert(_jitc->function->self.call & jit_call_varargs);
+
+    rg0 = jit_get_reg_but_zero(jit_class_gpr);
+    rg1 = jit_get_reg_but_zero(jit_class_gpr);
+
+    /* Load the fp offset in save area in the first temporary. */
+    ldxi(rn(rg0), r1, offsetof(jit_va_list_t, fpoff));
+
+    /* Jump over if there are no remaining arguments in the save area. */
+    ge_code = bgei_p(_jit->pc.w, rn(rg0), NUM_FLOAT_REG_ARGS);
+
+    /* Load the save area pointer in the second temporary. */
+    ldxi(rn(rg1), r1, offsetof(jit_va_list_t, save));
+
+    /* Scale offset. */
+    rg2 = jit_get_reg_but_zero(0);
+    lshi(rn(rg2), rn(rg0), 3);
+    /* Add offset to saved area */
+    addi(rn(rg2), rn(rg2), 16 * sizeof(jit_word_t));
+
+    /* Load the vararg argument in the first argument. */
+    ldxr_d(r0, rn(rg1), rn(rg2));
+    jit_unget_reg_but_zero(rg2);
+
+    /* Update the fp offset. */
+    addi(rn(rg0), rn(rg0), 1);
+    stxi(offsetof(jit_va_list_t, fpoff), r1, rn(rg0));
+
+    /* Will only need one temporary register below. */
+    jit_unget_reg_but_zero(rg1);
+
+    /* Jump over overflow code. */
+    lt_code = jmpi_p(_jit->pc.w);
+
+    /* Where to land if argument is in overflow area. */
+    patch_at(ge_code, _jit->pc.w);
+
+    /* Load overflow pointer. */
+    ldxi(rn(rg0), r1, offsetof(jit_va_list_t, over));
+
+    /* Load argument. */
+    ldr_d(r0, rn(rg0));
+
+    /* Update overflow pointer. */
+    addi(rn(rg0), rn(rg0), sizeof(jit_float64_t));
+    stxi(offsetof(jit_va_list_t, over), r1, rn(rg0));
+
+    /* Where to land if argument is in save area. */
+    patch_at(lt_code, _jit->pc.w);
+
+    jit_unget_reg_but_zero(rg0);
+}
+#endif
diff --git a/deps/lightning/lib/jit_s390-sz.c b/deps/lightning/lib/jit_s390-sz.c
new file mode 100644 (file)
index 0000000..bb8b2dc
--- /dev/null
@@ -0,0 +1,804 @@
+
+#if __WORDSIZE == 32
+#define JIT_INSTR_MAX 104
+    0, /* data */
+    0, /* live */
+    6, /* align */
+    0, /* save */
+    0, /* load */
+    0, /* #name */
+    0, /* #note */
+    2, /* label */
+    42,        /* prolog */
+    0, /* ellipsis */
+    0, /* va_push */
+    0, /* allocai */
+    0, /* allocar */
+    0, /* arg */
+    0, /* getarg_c */
+    0, /* getarg_uc */
+    0, /* getarg_s */
+    0, /* getarg_us */
+    0, /* getarg_i */
+    0, /* getarg_ui */
+    0, /* getarg_l */
+    0, /* putargr */
+    0, /* putargi */
+    44,        /* va_start */
+    104,       /* va_arg */
+    100,       /* va_arg_d */
+    0, /* va_end */
+    8, /* addr */
+    24,        /* addi */
+    8, /* addcr */
+    20,        /* addci */
+    8, /* addxr */
+    12,        /* addxi */
+    12,        /* subr */
+    24,        /* subi */
+    12,        /* subcr */
+    20,        /* subci */
+    12,        /* subxr */
+    12,        /* subxi */
+    28,        /* rsbi */
+    8, /* mulr */
+    24,        /* muli */
+    60,        /* qmulr */
+    68,        /* qmuli */
+    16,        /* qmulr_u */
+    32,        /* qmuli_u */
+    12,        /* divr */
+    28,        /* divi */
+    16,        /* divr_u */
+    32,        /* divi_u */
+    16,        /* qdivr */
+    20,        /* qdivi */
+    20,        /* qdivr_u */
+    24,        /* qdivi_u */
+    12,        /* remr */
+    28,        /* remi */
+    16,        /* remr_u */
+    32,        /* remi_u */
+    8, /* andr */
+    20,        /* andi */
+    8, /* orr */
+    20,        /* ori */
+    8, /* xorr */
+    24,        /* xori */
+    6, /* lshr */
+    10,        /* lshi */
+    6, /* rshr */
+    10,        /* rshi */
+    6, /* rshr_u */
+    10,        /* rshi_u */
+    4, /* negr */
+    12,        /* comr */
+    20,        /* ltr */
+    24,        /* lti */
+    20,        /* ltr_u */
+    24,        /* lti_u */
+    20,        /* ler */
+    24,        /* lei */
+    20,        /* ler_u */
+    24,        /* lei_u */
+    20,        /* eqr */
+    24,        /* eqi */
+    20,        /* ger */
+    24,        /* gei */
+    20,        /* ger_u */
+    24,        /* gei_u */
+    20,        /* gtr */
+    24,        /* gti */
+    20,        /* gtr_u */
+    24,        /* gti_u */
+    20,        /* ner */
+    24,        /* nei */
+    4, /* movr */
+    16,        /* movi */
+    4, /* extr_c */
+    4, /* extr_uc */
+    4, /* extr_s */
+    4, /* extr_us */
+    4, /* extr_i */
+    4, /* extr_ui */
+    4, /* htonr_us */
+    4, /* htonr_ui */
+    4, /* htonr_ul */
+    6, /* ldr_c */
+    18,        /* ldi_c */
+    6, /* ldr_uc */
+    18,        /* ldi_uc */
+    6, /* ldr_s */
+    18,        /* ldi_s */
+    6, /* ldr_us */
+    18,        /* ldi_us */
+    6, /* ldr_i */
+    18,        /* ldi_i */
+    6, /* ldr_ui */
+    18,        /* ldi_ui */
+    6, /* ldr_l */
+    18,        /* ldi_l */
+    14,        /* ldxr_c */
+    26,        /* ldxi_c */
+    14,        /* ldxr_uc */
+    26,        /* ldxi_uc */
+    14,        /* ldxr_s */
+    26,        /* ldxi_s */
+    14,        /* ldxr_us */
+    26,        /* ldxi_us */
+    14,        /* ldxr_i */
+    26,        /* ldxi_i */
+    14,        /* ldxr_ui */
+    26,        /* ldxi_ui */
+    14,        /* ldxr_l */
+    26,        /* ldxi_l */
+    4, /* str_c */
+    16,        /* sti_c */
+    4, /* str_s */
+    16,        /* sti_s */
+    4, /* str_i */
+    16,        /* sti_i */
+    6, /* str_l */
+    18,        /* sti_l */
+    12,        /* stxr_c */
+    28,        /* stxi_c */
+    12,        /* stxr_s */
+    28,        /* stxi_s */
+    12,        /* stxr_i */
+    28,        /* stxi_i */
+    14,        /* stxr_l */
+    30,        /* stxi_l */
+    10,        /* bltr */
+    14,        /* blti */
+    10,        /* bltr_u */
+    14,        /* blti_u */
+    10,        /* bler */
+    14,        /* blei */
+    10,        /* bler_u */
+    14,        /* blei_u */
+    10,        /* beqr */
+    26,        /* beqi */
+    10,        /* bger */
+    14,        /* bgei */
+    10,        /* bger_u */
+    14,        /* bgei_u */
+    10,        /* bgtr */
+    14,        /* bgti */
+    10,        /* bgtr_u */
+    14,        /* bgti_u */
+    10,        /* bner */
+    26,        /* bnei */
+    18,        /* bmsr */
+    18,        /* bmsi */
+    18,        /* bmcr */
+    18,        /* bmci */
+    10,        /* boaddr */
+    14,        /* boaddi */
+    10,        /* boaddr_u */
+    14,        /* boaddi_u */
+    10,        /* bxaddr */
+    14,        /* bxaddi */
+    10,        /* bxaddr_u */
+    14,        /* bxaddi_u */
+    10,        /* bosubr */
+    14,        /* bosubi */
+    10,        /* bosubr_u */
+    14,        /* bosubi_u */
+    10,        /* bxsubr */
+    14,        /* bxsubi */
+    10,        /* bxsubr_u */
+    14,        /* bxsubi_u */
+    2, /* jmpr */
+    18,        /* jmpi */
+    2, /* callr */
+    18,        /* calli */
+    0, /* prepare */
+    0, /* pushargr */
+    0, /* pushargi */
+    0, /* finishr */
+    0, /* finishi */
+    0, /* ret */
+    0, /* retr */
+    0, /* reti */
+    0, /* retval_c */
+    0, /* retval_uc */
+    0, /* retval_s */
+    0, /* retval_us */
+    0, /* retval_i */
+    0, /* retval_ui */
+    0, /* retval_l */
+    40,        /* epilog */
+    0, /* arg_f */
+    0, /* getarg_f */
+    0, /* putargr_f */
+    0, /* putargi_f */
+    6, /* addr_f */
+    26,        /* addi_f */
+    8, /* subr_f */
+    26,        /* subi_f */
+    28,        /* rsbi_f */
+    6, /* mulr_f */
+    26,        /* muli_f */
+    8, /* divr_f */
+    26,        /* divi_f */
+    4, /* negr_f */
+    4, /* absr_f */
+    4, /* sqrtr_f */
+    16,        /* ltr_f */
+    36,        /* lti_f */
+    16,        /* ler_f */
+    36,        /* lei_f */
+    16,        /* eqr_f */
+    36,        /* eqi_f */
+    16,        /* ger_f */
+    36,        /* gei_f */
+    16,        /* gtr_f */
+    36,        /* gti_f */
+    16,        /* ner_f */
+    36,        /* nei_f */
+    16,        /* unltr_f */
+    36,        /* unlti_f */
+    16,        /* unler_f */
+    36,        /* unlei_f */
+    20,        /* uneqr_f */
+    40,        /* uneqi_f */
+    16,        /* unger_f */
+    36,        /* ungei_f */
+    16,        /* ungtr_f */
+    36,        /* ungti_f */
+    20,        /* ltgtr_f */
+    40,        /* ltgti_f */
+    16,        /* ordr_f */
+    36,        /* ordi_f */
+    16,        /* unordr_f */
+    36,        /* unordi_f */
+    4, /* truncr_f_i */
+    4, /* truncr_f_l */
+    4, /* extr_f */
+    4, /* extr_d_f */
+    2, /* movr_f */
+    20,        /* movi_f */
+    4, /* ldr_f */
+    16,        /* ldi_f */
+    12,        /* ldxr_f */
+    24,        /* ldxi_f */
+    4, /* str_f */
+    16,        /* sti_f */
+    12,        /* stxr_f */
+    24,        /* stxi_f */
+    10,        /* bltr_f */
+    30,        /* blti_f */
+    10,        /* bler_f */
+    30,        /* blei_f */
+    10,        /* beqr_f */
+    30,        /* beqi_f */
+    10,        /* bger_f */
+    30,        /* bgei_f */
+    10,        /* bgtr_f */
+    30,        /* bgti_f */
+    10,        /* bner_f */
+    30,        /* bnei_f */
+    10,        /* bunltr_f */
+    30,        /* bunlti_f */
+    10,        /* bunler_f */
+    30,        /* bunlei_f */
+    18,        /* buneqr_f */
+    38,        /* buneqi_f */
+    10,        /* bunger_f */
+    30,        /* bungei_f */
+    10,        /* bungtr_f */
+    30,        /* bungti_f */
+    18,        /* bltgtr_f */
+    38,        /* bltgti_f */
+    10,        /* bordr_f */
+    30,        /* bordi_f */
+    10,        /* bunordr_f */
+    30,        /* bunordi_f */
+    0, /* pushargr_f */
+    0, /* pushargi_f */
+    0, /* retr_f */
+    0, /* reti_f */
+    0, /* retval_f */
+    0, /* arg_d */
+    0, /* getarg_d */
+    0, /* putargr_d */
+    0, /* putargi_d */
+    6, /* addr_d */
+    26,        /* addi_d */
+    8, /* subr_d */
+    26,        /* subi_d */
+    28,        /* rsbi_d */
+    6, /* mulr_d */
+    26,        /* muli_d */
+    8, /* divr_d */
+    26,        /* divi_d */
+    4, /* negr_d */
+    4, /* absr_d */
+    4, /* sqrtr_d */
+    16,        /* ltr_d */
+    36,        /* lti_d */
+    16,        /* ler_d */
+    36,        /* lei_d */
+    16,        /* eqr_d */
+    36,        /* eqi_d */
+    16,        /* ger_d */
+    36,        /* gei_d */
+    16,        /* gtr_d */
+    36,        /* gti_d */
+    16,        /* ner_d */
+    36,        /* nei_d */
+    16,        /* unltr_d */
+    36,        /* unlti_d */
+    16,        /* unler_d */
+    36,        /* unlei_d */
+    20,        /* uneqr_d */
+    40,        /* uneqi_d */
+    16,        /* unger_d */
+    36,        /* ungei_d */
+    16,        /* ungtr_d */
+    36,        /* ungti_d */
+    20,        /* ltgtr_d */
+    40,        /* ltgti_d */
+    16,        /* ordr_d */
+    36,        /* ordi_d */
+    16,        /* unordr_d */
+    36,        /* unordi_d */
+    4, /* truncr_d_i */
+    4, /* truncr_d_l */
+    4, /* extr_d */
+    4, /* extr_f_d */
+    2, /* movr_d */
+    24,        /* movi_d */
+    4, /* ldr_d */
+    16,        /* ldi_d */
+    12,        /* ldxr_d */
+    24,        /* ldxi_d */
+    4, /* str_d */
+    16,        /* sti_d */
+    12,        /* stxr_d */
+    24,        /* stxi_d */
+    10,        /* bltr_d */
+    30,        /* blti_d */
+    10,        /* bler_d */
+    30,        /* blei_d */
+    10,        /* beqr_d */
+    34,        /* beqi_d */
+    10,        /* bger_d */
+    30,        /* bgei_d */
+    10,        /* bgtr_d */
+    30,        /* bgti_d */
+    10,        /* bner_d */
+    30,        /* bnei_d */
+    10,        /* bunltr_d */
+    30,        /* bunlti_d */
+    10,        /* bunler_d */
+    30,        /* bunlei_d */
+    18,        /* buneqr_d */
+    38,        /* buneqi_d */
+    10,        /* bunger_d */
+    30,        /* bungei_d */
+    10,        /* bungtr_d */
+    30,        /* bungti_d */
+    18,        /* bltgtr_d */
+    38,        /* bltgti_d */
+    10,        /* bordr_d */
+    30,        /* bordi_d */
+    10,        /* bunordr_d */
+    30,        /* bunordi_d */
+    0, /* pushargr_d */
+    0, /* pushargi_d */
+    0, /* retr_d */
+    0, /* reti_d */
+    0, /* retval_d */
+    0, /* movr_w_f */
+    0, /* movr_ww_d */
+    0, /* movr_w_d */
+    0, /* movr_f_w */
+    0, /* movi_f_w */
+    0, /* movr_d_ww */
+    0, /* movi_d_ww */
+    0, /* movr_d_w */
+    0, /* movi_d_w */
+#endif /* __WORDSIZE */
+
+#if __WORDSIZE == 64
+#define JIT_INSTR_MAX 104
+    0, /* data */
+    0, /* live */
+    6, /* align */
+    0, /* save */
+    0, /* load */
+    0, /* #name */
+    0, /* #note */
+    2, /* label */
+    42,        /* prolog */
+    0, /* ellipsis */
+    0, /* va_push */
+    0, /* allocai */
+    0, /* allocar */
+    0, /* arg */
+    0, /* getarg_c */
+    0, /* getarg_uc */
+    0, /* getarg_s */
+    0, /* getarg_us */
+    0, /* getarg_i */
+    0, /* getarg_ui */
+    0, /* getarg_l */
+    0, /* putargr */
+    0, /* putargi */
+    44,        /* va_start */
+    104,       /* va_arg */
+    100,       /* va_arg_d */
+    0, /* va_end */
+    8, /* addr */
+    24,        /* addi */
+    8, /* addcr */
+    20,        /* addci */
+    8, /* addxr */
+    12,        /* addxi */
+    12,        /* subr */
+    24,        /* subi */
+    12,        /* subcr */
+    20,        /* subci */
+    12,        /* subxr */
+    12,        /* subxi */
+    28,        /* rsbi */
+    8, /* mulr */
+    24,        /* muli */
+    60,        /* qmulr */
+    68,        /* qmuli */
+    16,        /* qmulr_u */
+    32,        /* qmuli_u */
+    12,        /* divr */
+    28,        /* divi */
+    16,        /* divr_u */
+    32,        /* divi_u */
+    16,        /* qdivr */
+    20,        /* qdivi */
+    20,        /* qdivr_u */
+    24,        /* qdivi_u */
+    12,        /* remr */
+    28,        /* remi */
+    16,        /* remr_u */
+    32,        /* remi_u */
+    8, /* andr */
+    20,        /* andi */
+    8, /* orr */
+    20,        /* ori */
+    8, /* xorr */
+    24,        /* xori */
+    6, /* lshr */
+    10,        /* lshi */
+    6, /* rshr */
+    10,        /* rshi */
+    6, /* rshr_u */
+    10,        /* rshi_u */
+    4, /* negr */
+    12,        /* comr */
+    20,        /* ltr */
+    24,        /* lti */
+    20,        /* ltr_u */
+    24,        /* lti_u */
+    20,        /* ler */
+    24,        /* lei */
+    20,        /* ler_u */
+    24,        /* lei_u */
+    20,        /* eqr */
+    24,        /* eqi */
+    20,        /* ger */
+    24,        /* gei */
+    20,        /* ger_u */
+    24,        /* gei_u */
+    20,        /* gtr */
+    24,        /* gti */
+    20,        /* gtr_u */
+    24,        /* gti_u */
+    20,        /* ner */
+    24,        /* nei */
+    4, /* movr */
+    16,        /* movi */
+    4, /* extr_c */
+    4, /* extr_uc */
+    4, /* extr_s */
+    4, /* extr_us */
+    4, /* extr_i */
+    4, /* extr_ui */
+    4, /* htonr_us */
+    4, /* htonr_ui */
+    4, /* htonr_ul */
+    6, /* ldr_c */
+    18,        /* ldi_c */
+    6, /* ldr_uc */
+    18,        /* ldi_uc */
+    6, /* ldr_s */
+    18,        /* ldi_s */
+    6, /* ldr_us */
+    18,        /* ldi_us */
+    6, /* ldr_i */
+    18,        /* ldi_i */
+    6, /* ldr_ui */
+    18,        /* ldi_ui */
+    6, /* ldr_l */
+    18,        /* ldi_l */
+    14,        /* ldxr_c */
+    26,        /* ldxi_c */
+    14,        /* ldxr_uc */
+    26,        /* ldxi_uc */
+    14,        /* ldxr_s */
+    26,        /* ldxi_s */
+    14,        /* ldxr_us */
+    26,        /* ldxi_us */
+    14,        /* ldxr_i */
+    26,        /* ldxi_i */
+    14,        /* ldxr_ui */
+    26,        /* ldxi_ui */
+    14,        /* ldxr_l */
+    26,        /* ldxi_l */
+    4, /* str_c */
+    16,        /* sti_c */
+    4, /* str_s */
+    16,        /* sti_s */
+    4, /* str_i */
+    16,        /* sti_i */
+    6, /* str_l */
+    18,        /* sti_l */
+    12,        /* stxr_c */
+    28,        /* stxi_c */
+    12,        /* stxr_s */
+    28,        /* stxi_s */
+    12,        /* stxr_i */
+    28,        /* stxi_i */
+    14,        /* stxr_l */
+    30,        /* stxi_l */
+    10,        /* bltr */
+    14,        /* blti */
+    10,        /* bltr_u */
+    14,        /* blti_u */
+    10,        /* bler */
+    14,        /* blei */
+    10,        /* bler_u */
+    14,        /* blei_u */
+    10,        /* beqr */
+    26,        /* beqi */
+    10,        /* bger */
+    14,        /* bgei */
+    10,        /* bger_u */
+    14,        /* bgei_u */
+    10,        /* bgtr */
+    14,        /* bgti */
+    10,        /* bgtr_u */
+    14,        /* bgti_u */
+    10,        /* bner */
+    26,        /* bnei */
+    18,        /* bmsr */
+    18,        /* bmsi */
+    18,        /* bmcr */
+    18,        /* bmci */
+    10,        /* boaddr */
+    14,        /* boaddi */
+    10,        /* boaddr_u */
+    14,        /* boaddi_u */
+    10,        /* bxaddr */
+    14,        /* bxaddi */
+    10,        /* bxaddr_u */
+    14,        /* bxaddi_u */
+    10,        /* bosubr */
+    14,        /* bosubi */
+    10,        /* bosubr_u */
+    14,        /* bosubi_u */
+    10,        /* bxsubr */
+    14,        /* bxsubi */
+    10,        /* bxsubr_u */
+    14,        /* bxsubi_u */
+    2, /* jmpr */
+    18,        /* jmpi */
+    2, /* callr */
+    18,        /* calli */
+    0, /* prepare */
+    0, /* pushargr */
+    0, /* pushargi */
+    0, /* finishr */
+    0, /* finishi */
+    0, /* ret */
+    0, /* retr */
+    0, /* reti */
+    0, /* retval_c */
+    0, /* retval_uc */
+    0, /* retval_s */
+    0, /* retval_us */
+    0, /* retval_i */
+    0, /* retval_ui */
+    0, /* retval_l */
+    40,        /* epilog */
+    0, /* arg_f */
+    0, /* getarg_f */
+    0, /* putargr_f */
+    0, /* putargi_f */
+    6, /* addr_f */
+    26,        /* addi_f */
+    8, /* subr_f */
+    26,        /* subi_f */
+    28,        /* rsbi_f */
+    6, /* mulr_f */
+    26,        /* muli_f */
+    8, /* divr_f */
+    26,        /* divi_f */
+    4, /* negr_f */
+    4, /* absr_f */
+    4, /* sqrtr_f */
+    16,        /* ltr_f */
+    36,        /* lti_f */
+    16,        /* ler_f */
+    36,        /* lei_f */
+    16,        /* eqr_f */
+    36,        /* eqi_f */
+    16,        /* ger_f */
+    36,        /* gei_f */
+    16,        /* gtr_f */
+    36,        /* gti_f */
+    16,        /* ner_f */
+    36,        /* nei_f */
+    16,        /* unltr_f */
+    36,        /* unlti_f */
+    16,        /* unler_f */
+    36,        /* unlei_f */
+    20,        /* uneqr_f */
+    40,        /* uneqi_f */
+    16,        /* unger_f */
+    36,        /* ungei_f */
+    16,        /* ungtr_f */
+    36,        /* ungti_f */
+    20,        /* ltgtr_f */
+    40,        /* ltgti_f */
+    16,        /* ordr_f */
+    36,        /* ordi_f */
+    16,        /* unordr_f */
+    36,        /* unordi_f */
+    4, /* truncr_f_i */
+    4, /* truncr_f_l */
+    4, /* extr_f */
+    4, /* extr_d_f */
+    2, /* movr_f */
+    20,        /* movi_f */
+    4, /* ldr_f */
+    16,        /* ldi_f */
+    12,        /* ldxr_f */
+    24,        /* ldxi_f */
+    4, /* str_f */
+    16,        /* sti_f */
+    12,        /* stxr_f */
+    24,        /* stxi_f */
+    10,        /* bltr_f */
+    30,        /* blti_f */
+    10,        /* bler_f */
+    30,        /* blei_f */
+    10,        /* beqr_f */
+    30,        /* beqi_f */
+    10,        /* bger_f */
+    30,        /* bgei_f */
+    10,        /* bgtr_f */
+    30,        /* bgti_f */
+    10,        /* bner_f */
+    30,        /* bnei_f */
+    10,        /* bunltr_f */
+    30,        /* bunlti_f */
+    10,        /* bunler_f */
+    30,        /* bunlei_f */
+    18,        /* buneqr_f */
+    38,        /* buneqi_f */
+    10,        /* bunger_f */
+    30,        /* bungei_f */
+    10,        /* bungtr_f */
+    30,        /* bungti_f */
+    18,        /* bltgtr_f */
+    38,        /* bltgti_f */
+    10,        /* bordr_f */
+    30,        /* bordi_f */
+    10,        /* bunordr_f */
+    30,        /* bunordi_f */
+    0, /* pushargr_f */
+    0, /* pushargi_f */
+    0, /* retr_f */
+    0, /* reti_f */
+    0, /* retval_f */
+    0, /* arg_d */
+    0, /* getarg_d */
+    0, /* putargr_d */
+    0, /* putargi_d */
+    6, /* addr_d */
+    26,        /* addi_d */
+    8, /* subr_d */
+    26,        /* subi_d */
+    28,        /* rsbi_d */
+    6, /* mulr_d */
+    26,        /* muli_d */
+    8, /* divr_d */
+    26,        /* divi_d */
+    4, /* negr_d */
+    4, /* absr_d */
+    4, /* sqrtr_d */
+    16,        /* ltr_d */
+    36,        /* lti_d */
+    16,        /* ler_d */
+    36,        /* lei_d */
+    16,        /* eqr_d */
+    36,        /* eqi_d */
+    16,        /* ger_d */
+    36,        /* gei_d */
+    16,        /* gtr_d */
+    36,        /* gti_d */
+    16,        /* ner_d */
+    36,        /* nei_d */
+    16,        /* unltr_d */
+    36,        /* unlti_d */
+    16,        /* unler_d */
+    36,        /* unlei_d */
+    20,        /* uneqr_d */
+    40,        /* uneqi_d */
+    16,        /* unger_d */
+    36,        /* ungei_d */
+    16,        /* ungtr_d */
+    36,        /* ungti_d */
+    20,        /* ltgtr_d */
+    40,        /* ltgti_d */
+    16,        /* ordr_d */
+    36,        /* ordi_d */
+    16,        /* unordr_d */
+    36,        /* unordi_d */
+    4, /* truncr_d_i */
+    4, /* truncr_d_l */
+    4, /* extr_d */
+    4, /* extr_f_d */
+    2, /* movr_d */
+    24,        /* movi_d */
+    4, /* ldr_d */
+    16,        /* ldi_d */
+    12,        /* ldxr_d */
+    24,        /* ldxi_d */
+    4, /* str_d */
+    16,        /* sti_d */
+    12,        /* stxr_d */
+    24,        /* stxi_d */
+    10,        /* bltr_d */
+    30,        /* blti_d */
+    10,        /* bler_d */
+    30,        /* blei_d */
+    10,        /* beqr_d */
+    34,        /* beqi_d */
+    10,        /* bger_d */
+    30,        /* bgei_d */
+    10,        /* bgtr_d */
+    30,        /* bgti_d */
+    10,        /* bner_d */
+    30,        /* bnei_d */
+    10,        /* bunltr_d */
+    30,        /* bunlti_d */
+    10,        /* bunler_d */
+    30,        /* bunlei_d */
+    18,        /* buneqr_d */
+    38,        /* buneqi_d */
+    10,        /* bunger_d */
+    30,        /* bungei_d */
+    10,        /* bungtr_d */
+    30,        /* bungti_d */
+    18,        /* bltgtr_d */
+    38,        /* bltgti_d */
+    10,        /* bordr_d */
+    30,        /* bordi_d */
+    10,        /* bunordr_d */
+    30,        /* bunordi_d */
+    0, /* pushargr_d */
+    0, /* pushargi_d */
+    0, /* retr_d */
+    0, /* reti_d */
+    0, /* retval_d */
+    0, /* movr_w_f */
+    0, /* movr_ww_d */
+    0, /* movr_w_d */
+    0, /* movr_f_w */
+    0, /* movi_f_w */
+    0, /* movr_d_ww */
+    0, /* movi_d_ww */
+    0, /* movr_d_w */
+    0, /* movi_d_w */
+#endif /* __WORDSIZE */
diff --git a/deps/lightning/lib/jit_s390.c b/deps/lightning/lib/jit_s390.c
new file mode 100644 (file)
index 0000000..7cd1d7f
--- /dev/null
@@ -0,0 +1,1690 @@
+/*
+ * Copyright (C) 2013-2019  Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+#include <lightning.h>
+#include <lightning/jit_private.h>
+
+#if __WORDSIZE == 32
+#  define NUM_FLOAT_REG_ARGS           2
+#else
+#  define NUM_FLOAT_REG_ARGS           4
+#endif
+#define jit_arg_reg_p(i)               ((i) >= 0 && (i) < 5)
+#define jit_arg_f_reg_p(i)             ((i) >= 0 && (i) < NUM_FLOAT_REG_ARGS)
+
+/*
+ * Types
+ */
+typedef struct jit_va_list {
+    /* The offsets are "1" based, as addresses are fixed in the
+     * standard stack frame format. */
+    jit_word_t         gpoff;
+    jit_word_t         fpoff;
+
+    /* Easier when there is an explicitly defined type...
+(gdb) ptype ap
+type = struct __va_list_tag {
+    long __gpr;
+    long __fpr;
+    void *__overflow_arg_area;
+    void *__reg_save_area;
+
+    Note that gopff (__gpr) and fpoff (__fpr) are jit_word_t equivalent
+    and, again, "1" (unit) based, so must be adjusted at va_arg time.
+ */
+    jit_pointer_t      over;
+    jit_pointer_t      save;
+
+    /* For variadic functions, always allocate space to save callee
+     * save fpr registers.
+     * Note that s390 has a standard stack frame format that lightning
+     * does not fully comply with, but for variadic functions it must,
+     * for those (variadic) do not use the "empty" spaces for any
+     * callee save fpr register, but save them after the va_list
+     * space; and use the standard stack frame format, as required
+     * by variadic functions (and have a compatible va_list pointer). */
+    jit_float64_t      f8;
+    jit_float64_t      f9;
+    jit_float64_t      f10;
+    jit_float64_t      f11;
+    jit_float64_t      f12;
+    jit_float64_t      f13;
+    jit_float64_t      f14;
+    jit_float64_t      f15;
+} jit_va_list_t;
+
+/*
+ * Prototypes
+ */
+#define jit_get_reg_pair()             _jit_get_reg_pair(_jit)
+static jit_int32_t _jit_get_reg_pair(jit_state_t*);
+#define jit_unget_reg_pair(regno)      _jit_unget_reg_pair(_jit,regno)
+static void _jit_unget_reg_pair(jit_state_t*,jit_int32_t);
+#define jit_get_reg_but_zero(flags)    _jit_get_reg_but_zero(_jit,flags)
+static jit_int32_t _jit_get_reg_but_zero(jit_state_t*,jit_int32_t);
+#define jit_unget_reg_but_zero(reg)    jit_unget_reg(reg)
+#define patch(instr, node)             _patch(_jit, instr, node)
+static void _patch(jit_state_t*,jit_word_t,jit_node_t*);
+
+/* libgcc */
+extern void __clear_cache(void *, void *);
+
+#define PROTO                          1
+#  include "jit_s390-cpu.c"
+#  include "jit_s390-fpu.c"
+#undef PROTO
+
+/*
+ * Initialization
+ */
+jit_register_t         _rvs[] = {
+    { rc(gpr) | 0x0,                   "%r0" },
+    { rc(gpr) | 0x1,                   "%r1" },
+    { rc(gpr) | rc(sav) | 0xc,         "%r12" },
+    { rc(gpr) | rc(sav) | 0xb,         "%r11" },
+    { rc(gpr) | rc(sav) | 0xa,         "%r10" },
+    { rc(gpr) | rc(sav) | 0x9,         "%r9" },
+    { rc(gpr) | rc(sav) | 0x8,         "%r8" },
+    { rc(gpr) | rc(sav) | 0x7,         "%r7" },
+    { rc(gpr) | rc(arg) | rc(sav) | 0x6,"%r6" },
+    { rc(gpr) | rc(arg) | 0x5,         "%r5" },
+    { rc(gpr) | rc(arg) | 0x4,         "%r4" },
+    { rc(gpr) | rc(arg) | 0x3,         "%r3" },
+    { rc(gpr) | rc(arg) | 0x2,         "%r2" },
+    { rc(sav) | 0xd,                   "%r13" },       /* used as JIT_FP */
+    { 0xe,                             "%r14" },
+    { rc(sav) | 0xf,                   "%r15" },
+    { rc(fpr) | 0x1,                   "%f1" },
+    { rc(fpr) | 0x3,                   "%f3" },
+    { rc(fpr) | 0x5,                   "%f5" },
+    { rc(fpr) | 0x7,                   "%f7" },
+    { rc(fpr) | rc(sav) | 0xe,         "%f14" },
+    /* Do not use as temporary to simplify stack layout */
+    { 0xf,                             "%f15" },
+    { rc(fpr) | rc(sav) | 0x8,         "%f8" },
+    { rc(fpr) | rc(sav) | 0x9,         "%f9" },
+    { rc(fpr) | rc(sav) | 0xa,         "%f10" },
+    { rc(fpr) | rc(sav) | 0xb,         "%f11" },
+    { rc(fpr) | rc(sav) | 0xc,         "%f12" },
+    { rc(fpr) | rc(sav) | 0xd,         "%f13" },
+    { rc(fpr) | rc(arg) | 0x6,         "%f6" },
+    { rc(fpr) | rc(arg) | 0x4,         "%f4" },
+    { rc(fpr) | rc(arg) | 0x2,         "%f2" },
+    { rc(fpr) | rc(arg) | 0x0,         "%f0" },
+    { _NOREG,                          "<none>" },
+};
+
+/*
+ * Implementation
+ */
+void
+jit_get_cpu(void)
+{
+}
+
+void
+_jit_init(jit_state_t *_jit)
+{
+    _jitc->reglen = jit_size(_rvs) - 1;
+}
+
+void
+_jit_prolog(jit_state_t *_jit)
+{
+    jit_int32_t                offset;
+
+    if (_jitc->function)
+       jit_epilog();
+    assert(jit_regset_cmp_ui(&_jitc->regarg, 0) == 0);
+    jit_regset_set_ui(&_jitc->regsav, 0);
+    offset = _jitc->functions.offset;
+    if (offset >= _jitc->functions.length) {
+       jit_realloc((jit_pointer_t *)&_jitc->functions.ptr,
+                   _jitc->functions.length * sizeof(jit_function_t),
+                   (_jitc->functions.length + 16) * sizeof(jit_function_t));
+       _jitc->functions.length += 16;
+    }
+    _jitc->function = _jitc->functions.ptr + _jitc->functions.offset++;
+    _jitc->function->self.size = stack_framesize;
+    _jitc->function->self.argi = _jitc->function->self.argf =
+       _jitc->function->self.aoff = _jitc->function->self.alen = 0;
+    /* preallocate 8 bytes if not using a constant data buffer */
+    if (_jitc->no_data)
+       _jitc->function->self.aoff = -8;
+    _jitc->function->self.call = jit_call_default;
+    jit_alloc((jit_pointer_t *)&_jitc->function->regoff,
+             _jitc->reglen * sizeof(jit_int32_t));
+
+    /* _no_link here does not mean the jit_link() call can be removed
+     * by rewriting as:
+     * _jitc->function->prolog = jit_new_node(jit_code_prolog);
+     */
+    _jitc->function->prolog = jit_new_node_no_link(jit_code_prolog);
+    jit_link(_jitc->function->prolog);
+    _jitc->function->prolog->w.w = offset;
+    _jitc->function->epilog = jit_new_node_no_link(jit_code_epilog);
+    /* u:      label value
+     * v:      offset in blocks vector
+     * w:      offset in functions vector
+     */
+    _jitc->function->epilog->w.w = offset;
+
+    jit_regset_new(&_jitc->function->regset);
+}
+
+jit_int32_t
+_jit_allocai(jit_state_t *_jit, jit_int32_t length)
+{
+    assert(_jitc->function);
+    switch (length) {
+       case 0: case 1:                                         break;
+       case 2:         _jitc->function->self.aoff &= -2;       break;
+       case 3: case 4: _jitc->function->self.aoff &= -4;       break;
+       default:        _jitc->function->self.aoff &= -8;       break;
+    }
+    _jitc->function->self.aoff -= length;
+    if (!_jitc->realize) {
+       jit_inc_synth_ww(allocai, _jitc->function->self.aoff, length);
+       jit_dec_synth();
+    }
+    return (_jitc->function->self.aoff);
+}
+
+void
+_jit_allocar(jit_state_t *_jit, jit_int32_t u, jit_int32_t v)
+{
+    jit_int32_t                 reg;
+    assert(_jitc->function);
+    jit_inc_synth_ww(allocar, u, v);
+    if (!_jitc->function->allocar) {
+       _jitc->function->aoffoff = jit_allocai(sizeof(jit_int32_t));
+       _jitc->function->allocar = 1;
+    }
+    reg = jit_get_reg(jit_class_gpr);
+    jit_negr(reg, v);
+    jit_andi(reg, reg, -8);
+    jit_ldxi_i(u, JIT_FP, _jitc->function->aoffoff);
+    jit_addr(u, u, reg);
+    jit_addr(JIT_SP, JIT_SP, reg);
+    jit_stxi_i(_jitc->function->aoffoff, JIT_FP, u);
+    jit_unget_reg(reg);
+    jit_dec_synth();
+}
+
+void
+_jit_ret(jit_state_t *_jit)
+{
+    jit_node_t         *instr;
+    assert(_jitc->function);
+    jit_inc_synth(ret);
+    /* jump to epilog */
+    instr = jit_jmpi();
+    jit_patch_at(instr, _jitc->function->epilog);
+    jit_dec_synth();
+}
+
+void
+_jit_retr(jit_state_t *_jit, jit_int32_t u)
+{
+    jit_inc_synth_w(retr, u);
+    jit_movr(JIT_RET, u);
+    jit_ret();
+    jit_dec_synth();
+}
+
+void
+_jit_reti(jit_state_t *_jit, jit_word_t u)
+{
+    jit_inc_synth_w(reti, u);
+    jit_movi(JIT_RET, u);
+    jit_ret();
+    jit_dec_synth();
+}
+
+void
+_jit_retr_f(jit_state_t *_jit, jit_int32_t u)
+{
+    jit_inc_synth_w(retr_f, u);
+    jit_movr_f(JIT_FRET, u);
+    jit_ret();
+    jit_dec_synth();
+}
+
+void
+_jit_reti_f(jit_state_t *_jit, jit_float32_t u)
+{
+    jit_inc_synth_f(reti_f, u);
+    jit_movi_f(JIT_FRET, u);
+    jit_ret();
+    jit_dec_synth();
+}
+
+void
+_jit_retr_d(jit_state_t *_jit, jit_int32_t u)
+{
+    jit_inc_synth_w(retr_d, u);
+    jit_movr_d(JIT_FRET, u);
+    jit_ret();
+    jit_dec_synth();
+}
+
+void
+_jit_reti_d(jit_state_t *_jit, jit_float64_t u)
+{
+    jit_inc_synth_d(reti_d, u);
+    jit_movi_d(JIT_FRET, u);
+    jit_ret();
+    jit_dec_synth();
+}
+
+void
+_jit_epilog(jit_state_t *_jit)
+{
+    assert(_jitc->function);
+    assert(_jitc->function->epilog->next == NULL);
+    jit_link(_jitc->function->epilog);
+    _jitc->function = NULL;
+}
+
+jit_bool_t
+_jit_arg_register_p(jit_state_t *_jit, jit_node_t *u)
+{
+    if (u->code == jit_code_arg)
+       return (jit_arg_reg_p(u->u.w));
+    assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d);
+    return (jit_arg_f_reg_p(u->u.w));
+}
+
+void
+_jit_ellipsis(jit_state_t *_jit)
+{
+    jit_inc_synth(ellipsis);
+    if (_jitc->prepare) {
+       jit_link_prepare();
+       assert(!(_jitc->function->call.call & jit_call_varargs));
+       _jitc->function->call.call |= jit_call_varargs;
+    }
+    else {
+       jit_link_prolog();
+       assert(!(_jitc->function->self.call & jit_call_varargs));
+       _jitc->function->self.call |= jit_call_varargs;
+
+       /* Allocate va_list like object in the stack. */
+       _jitc->function->vaoff = jit_allocai(sizeof(jit_va_list_t));
+
+       /* Initialize gp offset in save area. */
+       if (jit_arg_reg_p(_jitc->function->self.argi))
+           _jitc->function->vagp = _jitc->function->self.argi;
+       else
+           _jitc->function->vagp = 5;
+
+       /* Initialize fp offset in save area. */
+       if (jit_arg_f_reg_p(_jitc->function->self.argf))
+           _jitc->function->vafp = _jitc->function->self.argf;
+       else
+           _jitc->function->vafp = NUM_FLOAT_REG_ARGS;
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_va_push(jit_state_t *_jit, jit_int32_t u)
+{
+    jit_inc_synth_w(va_push, u);
+    jit_pushargr(u);
+    jit_dec_synth();
+}
+
+jit_node_t *
+_jit_arg(jit_state_t *_jit)
+{
+    jit_node_t         *node;
+    jit_int32_t                 offset;
+    assert(_jitc->function);
+    if (jit_arg_reg_p(_jitc->function->self.argi))
+       offset = _jitc->function->self.argi++;
+    else {
+       offset = _jitc->function->self.size;
+       _jitc->function->self.size += sizeof(jit_word_t);
+    }
+    node = jit_new_node_ww(jit_code_arg, offset,
+                          ++_jitc->function->self.argn);
+    jit_link_prolog();
+    return (node);
+}
+
+jit_node_t *
+_jit_arg_f(jit_state_t *_jit)
+{
+    jit_node_t         *node;
+    jit_int32_t                 offset;
+    assert(_jitc->function);
+    if (jit_arg_f_reg_p(_jitc->function->self.argf))
+       offset = _jitc->function->self.argf++;
+    else {
+       offset = _jitc->function->self.size;
+       _jitc->function->self.size += sizeof(jit_word_t);
+    }
+    node = jit_new_node_ww(jit_code_arg_f, offset,
+                          ++_jitc->function->self.argn);
+    jit_link_prolog();
+    return (node);
+}
+
+jit_node_t *
+_jit_arg_d(jit_state_t *_jit)
+{
+    jit_node_t         *node;
+    jit_int32_t                 offset;
+    assert(_jitc->function);
+    if (jit_arg_f_reg_p(_jitc->function->self.argf))
+       offset = _jitc->function->self.argf++;
+    else {
+       offset = _jitc->function->self.size;
+       _jitc->function->self.size += sizeof(jit_float64_t);
+    }
+    node = jit_new_node_ww(jit_code_arg_d, offset,
+                          ++_jitc->function->self.argn);
+    jit_link_prolog();
+    return (node);
+}
+
+void
+_jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_c, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_extr_c(u, _R2 - v->u.w);
+    else
+       jit_ldxi_c(u, JIT_FP,
+                  v->u.w + (__WORDSIZE >> 3) - sizeof(jit_int8_t));
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_uc, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_extr_uc(u, _R2 - v->u.w);
+    else
+       jit_ldxi_uc(u, JIT_FP,
+                   v->u.w + (__WORDSIZE >> 3) - sizeof(jit_uint8_t));
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_s, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_extr_s(u, _R2 - v->u.w);
+    else
+       jit_ldxi_s(u, JIT_FP,
+                  v->u.w + (__WORDSIZE >> 3) - sizeof(jit_int16_t));
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_us, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_extr_us(u, _R2 - v->u.w);
+    else
+       jit_ldxi_us(u, JIT_FP,
+                   v->u.w + (__WORDSIZE >> 3) - sizeof(jit_uint16_t));
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_i, u, v);
+    if (jit_arg_reg_p(v->u.w)) {
+#if __WORDSIZE == 32
+       jit_movr(u, _R2 - v->u.w);
+#else
+       jit_extr_i(u, _R2 - v->u.w);
+#endif
+    }
+    else
+       jit_ldxi_i(u, JIT_FP,
+                  v->u.w + (__WORDSIZE >> 3) - sizeof(jit_int32_t));
+    jit_dec_synth();
+}
+
+#if __WORDSIZE == 64
+void
+_jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_ui, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_extr_ui(u, _R2 - v->u.w);
+    else
+       jit_ldxi_ui(u, JIT_FP,
+                   v->u.w + (__WORDSIZE >> 3) - sizeof(jit_uint32_t));
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_l, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_movr(u, _R2 - v->u.w);
+    else
+       jit_ldxi_l(u, JIT_FP, v->u.w);
+    jit_dec_synth();
+}
+#endif
+
+void
+_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(putargr, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_movr(_R2 - v->u.w, u);
+    else
+       jit_stxi(v->u.w, JIT_FP, u);
+    jit_dec_synth();
+}
+
+void
+_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v)
+{
+    jit_int32_t                regno;
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(putargi, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_movi(_R2 - v->u.w, u);
+    else {
+       regno = jit_get_reg(jit_class_gpr);
+       jit_movi(regno, u);
+       jit_stxi(v->u.w, JIT_FP, regno);
+       jit_unget_reg(regno);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg_f);
+    jit_inc_synth_wp(getarg_f, u, v);
+    if (jit_arg_f_reg_p(v->u.w))
+       jit_movr_f(u, _F0 - v->u.w);
+    else
+       jit_ldxi_f(u, JIT_FP,
+                  v->u.w
+#if __WORDSIZE == 64
+                  + (__WORDSIZE >> 3) - sizeof(jit_float32_t)
+#endif
+                  );
+    jit_dec_synth();
+}
+
+void
+_jit_putargr_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg_f);
+    jit_inc_synth_wp(putargr_f, u, v);
+    if (jit_arg_f_reg_p(v->u.w))
+       jit_movr_f(_F0 - v->u.w, u);
+    else
+       jit_stxi_f(v->u.w
+#if __WORDSIZE == 64
+                  + (__WORDSIZE >> 3) - sizeof(jit_float32_t)
+#endif
+                  , JIT_FP, u);
+    jit_dec_synth();
+}
+
+void
+_jit_putargi_f(jit_state_t *_jit, jit_float32_t u, jit_node_t *v)
+{
+    jit_int32_t                regno;
+    assert(v->code == jit_code_arg_f);
+    jit_inc_synth_fp(putargi_f, u, v);
+    if (jit_arg_f_reg_p(v->u.w))
+       jit_movi_f(_F0 - v->u.w, u);
+    else {
+       regno = jit_get_reg(jit_class_fpr);
+       jit_movi_f(regno, u);
+       jit_stxi_f(v->u.w
+#if __WORDSIZE == 64
+                  + (__WORDSIZE >> 3) - sizeof(jit_float32_t)
+#endif
+                  , JIT_FP, regno);
+       jit_unget_reg(regno);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg_d);
+    jit_inc_synth_wp(getarg_d, u, v);
+    if (jit_arg_f_reg_p(v->u.w))
+       jit_movr_d(u, _F0 - v->u.w);
+    else
+       jit_ldxi_d(u, JIT_FP, v->u.w);
+    jit_dec_synth();
+}
+
+void
+_jit_putargr_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg_d);
+    jit_inc_synth_wp(putargr_d, u, v);
+    if (jit_arg_f_reg_p(v->u.w))
+       jit_movr_d(_F0 - v->u.w, u);
+    else
+       jit_stxi_d(v->u.w, JIT_FP, u);
+    jit_dec_synth();
+}
+
+void
+_jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v)
+{
+    jit_int32_t                regno;
+    assert(v->code == jit_code_arg_d);
+    jit_inc_synth_dp(putargi_d, u, v);
+    if (jit_arg_f_reg_p(v->u.w))
+       jit_movi_d(_F0 - v->u.w, u);
+    else {
+       regno = jit_get_reg(jit_class_fpr);
+       jit_movi_d(regno, u);
+       jit_stxi_d(v->u.w, JIT_FP, regno);
+       jit_unget_reg(regno);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_pushargr(jit_state_t *_jit, jit_int32_t u)
+{
+    assert(_jitc->function);
+    jit_inc_synth_w(pushargr, u);
+    jit_link_prepare();
+    if (jit_arg_reg_p(_jitc->function->call.argi)) {
+       jit_movr(_R2 - _jitc->function->call.argi, u);
+       ++_jitc->function->call.argi;
+    }
+    else {
+       jit_stxi(_jitc->function->call.size + stack_framesize, JIT_SP, u);
+       _jitc->function->call.size += sizeof(jit_word_t);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_pushargi(jit_state_t *_jit, jit_word_t u)
+{
+    jit_int32_t                 regno;
+    assert(_jitc->function);
+    jit_inc_synth_w(pushargi, u);
+    jit_link_prepare();
+    if (jit_arg_reg_p(_jitc->function->call.argi)) {
+       jit_movi(_R2 - _jitc->function->call.argi, u);
+       ++_jitc->function->call.argi;
+    }
+    else {
+       regno = jit_get_reg(jit_class_gpr);
+       jit_movi(regno, u);
+       jit_stxi(_jitc->function->call.size + stack_framesize, JIT_SP, regno);
+       jit_unget_reg(regno);
+       _jitc->function->call.size += sizeof(jit_word_t);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_pushargr_f(jit_state_t *_jit, jit_int32_t u)
+{
+    assert(_jitc->function);
+    jit_inc_synth_w(pushargr_f, u);
+    jit_link_prepare();
+    if (jit_arg_f_reg_p(_jitc->function->call.argf)) {
+       jit_movr_f(_F0 - _jitc->function->call.argf, u);
+       ++_jitc->function->call.argf;
+    }
+    else {
+       jit_stxi_f(_jitc->function->call.size + stack_framesize
+#if __WORDSIZE == 64
+                  + (__WORDSIZE >> 3) - sizeof(jit_float32_t)
+#endif
+                  , JIT_SP, u);
+       _jitc->function->call.size += sizeof(jit_word_t);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_pushargi_f(jit_state_t *_jit, jit_float32_t u)
+{
+    jit_int32_t                regno;
+    assert(_jitc->function);
+    jit_inc_synth_f(pushargi_f, u);
+    jit_link_prepare();
+    if (jit_arg_f_reg_p(_jitc->function->call.argf)) {
+       jit_movi_f(_F0 - _jitc->function->call.argf, u);
+       ++_jitc->function->call.argf;
+    }
+    else {
+       regno = jit_get_reg(jit_class_fpr);
+       jit_movi_f(regno, u);
+       jit_stxi_f(_jitc->function->call.size + stack_framesize
+#if __WORDSIZE == 64
+                  + (__WORDSIZE >> 3) - sizeof(jit_float32_t)
+#endif
+                  , JIT_SP, regno);
+       jit_unget_reg(regno);
+       _jitc->function->call.size += sizeof(jit_word_t);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_pushargr_d(jit_state_t *_jit, jit_int32_t u)
+{
+    assert(_jitc->function);
+    jit_inc_synth_w(pushargr_d, u);
+    jit_link_prepare();
+    if (jit_arg_f_reg_p(_jitc->function->call.argf)) {
+       jit_movr_d(_F0 - _jitc->function->call.argf, u);
+       ++_jitc->function->call.argf;
+    }
+    else {
+       jit_stxi_d(_jitc->function->call.size + stack_framesize, JIT_SP, u);
+       _jitc->function->call.size += sizeof(jit_float64_t);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_pushargi_d(jit_state_t *_jit, jit_float64_t u)
+{
+    jit_int32_t                regno;
+    assert(_jitc->function);
+    jit_inc_synth_d(pushargi_d, u);
+    jit_link_prepare();
+    if (jit_arg_f_reg_p(_jitc->function->call.argf)) {
+       jit_movi_d(_F0 - _jitc->function->call.argf, u);
+       ++_jitc->function->call.argf;
+    }
+    else {
+       regno = jit_get_reg(jit_class_fpr);
+       jit_movi_d(regno, u);
+       jit_stxi_d(_jitc->function->call.size + stack_framesize, JIT_SP, regno);
+       jit_unget_reg(regno);
+       _jitc->function->call.size += sizeof(jit_float64_t);
+    }
+    jit_dec_synth();
+}
+
+jit_bool_t
+_jit_regarg_p(jit_state_t *_jit, jit_node_t *node, jit_int32_t regno)
+{
+    jit_int32_t                spec;
+    spec = jit_class(_rvs[regno].spec);
+    if (spec & jit_class_arg) {
+       regno = _R2 - regno;
+       if (regno >= 0 && regno < node->v.w)
+           return (1);
+       if (spec & jit_class_fpr) {
+           regno = _F0 - regno;
+           if (regno >= 0 && regno < node->w.w)
+               return (1);
+       }
+    }
+    return (0);
+}
+
+void
+_jit_finishr(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_node_t         *call;
+    assert(_jitc->function);
+    jit_inc_synth_w(finishr, r0);
+    if (_jitc->function->self.alen < _jitc->function->call.size)
+       _jitc->function->self.alen = _jitc->function->call.size;
+    call = jit_callr(r0);
+    call->v.w = _jitc->function->call.argi;
+    call->w.w = _jitc->function->call.argf;
+    _jitc->function->call.argi = _jitc->function->call.argf =
+       _jitc->function->call.size = 0;
+    _jitc->prepare = 0;
+    jit_dec_synth();
+}
+
+jit_node_t *
+_jit_finishi(jit_state_t *_jit, jit_pointer_t i0)
+{
+    jit_node_t         *node;
+    assert(_jitc->function);
+    jit_inc_synth_w(finishi, (jit_word_t)i0);
+    if (_jitc->function->self.alen < _jitc->function->call.size)
+       _jitc->function->self.alen = _jitc->function->call.size;
+    node = jit_calli(i0);
+    node->v.w = _jitc->function->call.argi;
+    node->w.w = _jitc->function->call.argf;
+    _jitc->function->call.argi = _jitc->function->call.argf =
+       _jitc->function->call.size = 0;
+    _jitc->prepare = 0;
+    jit_dec_synth();
+    return (node);
+}
+
+void
+_jit_retval_c(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_c, r0);
+    jit_extr_c(r0, JIT_RET);
+    jit_dec_synth();
+}
+
+void
+_jit_retval_uc(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_uc, r0);
+    jit_extr_uc(r0, JIT_RET);
+    jit_dec_synth();
+}
+
+void
+_jit_retval_s(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_s, r0);
+    jit_extr_s(r0, JIT_RET);
+    jit_dec_synth();
+}
+
+void
+_jit_retval_us(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_us, r0);
+    jit_extr_us(r0, JIT_RET);
+    jit_dec_synth();
+}
+
+void
+_jit_retval_i(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_i, r0);
+#if __WORDSIZE == 64
+    jit_extr_i(r0, JIT_RET);
+#else
+    jit_movr(r0, JIT_RET);
+#endif
+    jit_dec_synth();
+}
+
+#if __WORDSIZE == 64
+void
+_jit_retval_ui(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_ui, r0);
+    jit_extr_ui(r0, JIT_RET);
+    jit_dec_synth();
+}
+
+void
+_jit_retval_l(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_l, r0);
+    jit_movr(r0, JIT_RET);
+    jit_dec_synth();
+}
+#endif
+
+void
+_jit_retval_f(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_f, r0);
+    jit_movr_f(r0, JIT_FRET);
+    jit_dec_synth();
+}
+
+void
+_jit_retval_d(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_d, r0);
+    jit_movr_d(r0, JIT_FRET);
+    jit_dec_synth();
+}
+
+jit_pointer_t
+_emit_code(jit_state_t *_jit)
+{
+    jit_node_t         *node;
+    jit_node_t         *temp;
+    jit_word_t          word;
+    jit_int32_t                 value;
+    jit_int32_t                 offset;
+    struct {
+       jit_node_t      *node;
+       jit_word_t       word;
+#if DEVEL_DISASSEMBLER
+       jit_word_t       prevw;
+#endif
+       jit_int32_t      patch_offset;
+    } undo;
+#if DEVEL_DISASSEMBLER
+    jit_word_t          prevw;
+#endif
+
+    _jitc->function = NULL;
+
+    jit_reglive_setup();
+
+    undo.word = 0;
+    undo.node = NULL;
+    undo.patch_offset = 0;
+
+#define assert_data(node)              /**/
+#define case_rr(name, type)                                            \
+           case jit_code_##name##r##type:                              \
+               name##r##type(rn(node->u.w), rn(node->v.w));            \
+               break
+#define case_rw(name, type)                                            \
+           case jit_code_##name##i##type:                              \
+               name##i##type(rn(node->u.w), node->v.w);                \
+               break
+#define case_wr(name, type)                                            \
+           case jit_code_##name##i##type:                              \
+               name##i##type(node->u.w, rn(node->v.w));                \
+               break
+#define case_rrr(name, type)                                           \
+           case jit_code_##name##r##type:                              \
+               name##r##type(rn(node->u.w),                            \
+                             rn(node->v.w), rn(node->w.w));            \
+               break
+#define case_rrrr(name, type)                                          \
+           case jit_code_##name##r##type:                              \
+               name##r##type(rn(node->u.q.l), rn(node->u.q.h),         \
+                             rn(node->v.w), rn(node->w.w));            \
+               break
+#define case_rrw(name, type)                                           \
+           case jit_code_##name##i##type:                              \
+               name##i##type(rn(node->u.w), rn(node->v.w), node->w.w); \
+               break
+#define case_rrrw(name, type)                                          \
+           case jit_code_##name##i##type:                              \
+               name##i##type(rn(node->u.q.l), rn(node->u.q.h),         \
+                             rn(node->v.w), node->w.w);                \
+               break
+#define case_rrf(name)                                                 \
+           case jit_code_##name##i_f:                                  \
+               assert_data(node);                                      \
+               name##i_f(rn(node->u.w), rn(node->v.w),                 \
+                         (jit_float32_t *)node->w.n->u.w);             \
+               break
+#define case_rrd(name)                                                 \
+           case jit_code_##name##i_d:                                  \
+               assert_data(node);                                      \
+               name##i_d(rn(node->u.w), rn(node->v.w),                 \
+                         (jit_float64_t *)node->w.n->u.w);             \
+               break
+#define case_wrr(name, type)                                           \
+           case jit_code_##name##i##type:                              \
+               name##i##type(node->u.w, rn(node->v.w), rn(node->w.w)); \
+               break
+#define case_brr(name, type)                                           \
+           case jit_code_##name##r##type:                              \
+               temp = node->u.n;                                       \
+               assert(temp->code == jit_code_label ||                  \
+                      temp->code == jit_code_epilog);                  \
+               if (temp->flag & jit_flag_patch)                        \
+                   name##r##type(temp->u.w, rn(node->v.w),             \
+                                 rn(node->w.w));                       \
+               else {                                                  \
+                   word = name##r##type##_p(_jit->pc.w,                \
+                                            rn(node->v.w),             \
+                                            rn(node->w.w));            \
+                   patch(word, node);                                  \
+               }                                                       \
+               break
+#define case_brw(name, type)                                           \
+           case jit_code_##name##i##type:                              \
+               temp = node->u.n;                                       \
+               assert(temp->code == jit_code_label ||                  \
+                      temp->code == jit_code_epilog);                  \
+               if (temp->flag & jit_flag_patch)                        \
+                   name##i##type(temp->u.w,                            \
+                                 rn(node->v.w), node->w.w);            \
+               else {                                                  \
+                   word = name##i##type##_p(_jit->pc.w,                \
+                                            rn(node->v.w), node->w.w); \
+                   patch(word, node);                                  \
+               }                                                       \
+               break;
+#define case_brf(name)                                                 \
+           case jit_code_##name##i_f:                                  \
+               temp = node->u.n;                                       \
+               assert(temp->code == jit_code_label ||                  \
+                      temp->code == jit_code_epilog);                  \
+               if (temp->flag & jit_flag_patch)                        \
+                   name##i_f(temp->u.w, rn(node->v.w),                 \
+                             (jit_float32_t *)node->w.n->u.w);         \
+               else {                                                  \
+                   word = name##i_f_p(_jit->pc.w, rn(node->v.w),       \
+                                      (jit_float32_t *)node->w.n->u.w);\
+                   patch(word, node);                                  \
+               }                                                       \
+               break
+#define case_brd(name)                                                 \
+           case jit_code_##name##i_d:                                  \
+               temp = node->u.n;                                       \
+               assert(temp->code == jit_code_label ||                  \
+                      temp->code == jit_code_epilog);                  \
+               if (temp->flag & jit_flag_patch)                        \
+                   name##i_d(temp->u.w, rn(node->v.w),                 \
+                             (jit_float64_t *)node->w.n->u.w);         \
+               else {                                                  \
+                   word = name##i_d_p(_jit->pc.w, rn(node->v.w),       \
+                                      (jit_float64_t *)node->w.n->u.w);\
+                   patch(word, node);                                  \
+               }                                                       \
+               break
+#if DEVEL_DISASSEMBLER
+    prevw = _jit->pc.w;
+#endif
+    for (node = _jitc->head; node; node = node->next) {
+       if (_jit->pc.uc >= _jitc->code.end)
+           return (NULL);
+
+#if DEVEL_DISASSEMBLER
+       node->offset = (jit_uword_t)_jit->pc.w - (jit_uword_t)prevw;
+       prevw = _jit->pc.w;
+#endif
+       value = jit_classify(node->code);
+       jit_regarg_set(node, value);
+       switch (node->code) {
+           case jit_code_align:
+               assert(!(node->u.w & (node->u.w - 1)) &&
+                      node->u.w <= sizeof(jit_word_t));
+               if (node->u.w == sizeof(jit_word_t) &&
+                   (word = _jit->pc.w & (sizeof(jit_word_t) - 1)))
+                   nop(sizeof(jit_word_t) - word);
+               break;
+           case jit_code_note:         case jit_code_name:
+               node->u.w = _jit->pc.w;
+               break;
+           case jit_code_label:
+               if ((node->link || (node->flag & jit_flag_use)) &&
+                   (word = _jit->pc.w & 3))
+                   nop(4 - word);
+               /* remember label is defined */
+               node->flag |= jit_flag_patch;
+               node->u.w = _jit->pc.w;
+               break;
+               case_rrr(add,);
+               case_rrw(add,);
+               case_rrr(addc,);
+               case_rrw(addc,);
+               case_rrr(addx,);
+               case_rrw(addx,);
+               case_rrr(sub,);
+               case_rrw(sub,);
+               case_rrr(subc,);
+               case_rrw(subc,);
+               case_rrr(subx,);
+               case_rrw(subx,);
+               case_rrw(rsb,);
+               case_rrr(mul,);
+               case_rrw(mul,);
+               case_rrrr(qmul,);
+               case_rrrw(qmul,);
+               case_rrrr(qmul, _u);
+               case_rrrw(qmul, _u);
+               case_rrr(div,);
+               case_rrw(div,);
+               case_rrr(div, _u);
+               case_rrw(div, _u);
+               case_rrr(rem,);
+               case_rrw(rem,);
+               case_rrr(rem, _u);
+               case_rrw(rem, _u);
+               case_rrrr(qdiv,);
+               case_rrrw(qdiv,);
+               case_rrrr(qdiv, _u);
+               case_rrrw(qdiv, _u);
+               case_rrr(lsh,);
+               case_rrw(lsh,);
+               case_rrr(rsh,);
+               case_rrw(rsh,);
+               case_rrr(rsh, _u);
+               case_rrw(rsh, _u);
+               case_rr(neg,);
+               case_rr(com,);
+               case_rrr(and,);
+               case_rrw(and,);
+               case_rrr(or,);
+               case_rrw(or,);
+               case_rrr(xor,);
+               case_rrw(xor,);
+               case_rr(trunc, _f_i);
+               case_rr(trunc, _d_i);
+#if __WORDSIZE == 64
+               case_rr(trunc, _f_l);
+               case_rr(trunc, _d_l);
+#endif
+               case_rr(ld, _c);
+               case_rw(ld, _c);
+               case_rr(ld, _uc);
+               case_rw(ld, _uc);
+               case_rr(ld, _s);
+               case_rw(ld, _s);
+               case_rr(ld, _us);
+               case_rw(ld, _us);
+               case_rr(ld, _i);
+               case_rw(ld, _i);
+#if __WORDSIZE == 64
+               case_rr(ld, _ui);
+               case_rw(ld, _ui);
+               case_rr(ld, _l);
+               case_rw(ld, _l);
+#endif
+               case_rrr(ldx, _c);
+               case_rrw(ldx, _c);
+               case_rrr(ldx, _uc);
+               case_rrw(ldx, _uc);
+               case_rrr(ldx, _s);
+               case_rrw(ldx, _s);
+               case_rrr(ldx, _us);
+               case_rrw(ldx, _us);
+               case_rrr(ldx, _i);
+               case_rrw(ldx, _i);
+#if __WORDSIZE == 64
+               case_rrr(ldx, _ui);
+               case_rrw(ldx, _ui);
+               case_rrr(ldx, _l);
+               case_rrw(ldx, _l);
+#endif
+               case_rr(st, _c);
+               case_wr(st, _c);
+               case_rr(st, _s);
+               case_wr(st, _s);
+               case_rr(st, _i);
+               case_wr(st, _i);
+#if __WORDSIZE == 64
+               case_rr(st, _l);
+               case_wr(st, _l);
+#endif
+               case_rrr(stx, _c);
+               case_wrr(stx, _c);
+               case_rrr(stx, _s);
+               case_wrr(stx, _s);
+               case_rrr(stx, _i);
+               case_wrr(stx, _i);
+#if __WORDSIZE == 64
+               case_rrr(stx, _l);
+               case_wrr(stx, _l);
+#endif
+               case_rr(hton, _us);
+               case_rr(hton, _ui);
+#if __WORDSIZE == 64
+               case_rr(hton, _ul);
+#endif
+               case_rr(ext, _c);
+               case_rr(ext, _uc);
+               case_rr(ext, _s);
+               case_rr(ext, _us);
+#if __WORDSIZE == 64
+               case_rr(ext, _i);
+               case_rr(ext, _ui);
+#endif
+               case_rr(mov,);
+           case jit_code_movi:
+               if (node->flag & jit_flag_node) {
+                   temp = node->v.n;
+                   if (temp->code == jit_code_data ||
+                       (temp->code == jit_code_label &&
+                        (temp->flag & jit_flag_patch)))
+                       movi(rn(node->u.w), temp->u.w);
+                   else {
+                       assert(temp->code == jit_code_label ||
+                              temp->code == jit_code_epilog);
+                       word = movi_p(rn(node->u.w), temp->u.w);
+                       patch(word, node);
+                   }
+               }
+               else
+                   movi(rn(node->u.w), node->v.w);
+               break;
+               case_rrr(lt,);
+               case_rrw(lt,);
+               case_rrr(lt, _u);
+               case_rrw(lt, _u);
+               case_rrr(le,);
+               case_rrw(le,);
+               case_rrr(le, _u);
+               case_rrw(le, _u);
+               case_rrr(eq,);
+               case_rrw(eq,);
+               case_rrr(ge,);
+               case_rrw(ge,);
+               case_rrr(ge, _u);
+               case_rrw(ge, _u);
+               case_rrr(gt,);
+               case_rrw(gt,);
+               case_rrr(gt, _u);
+               case_rrw(gt, _u);
+               case_rrr(ne,);
+               case_rrw(ne,);
+               case_brr(blt,);
+               case_brw(blt,);
+               case_brr(blt, _u);
+               case_brw(blt, _u);
+               case_brr(ble,);
+               case_brw(ble,);
+               case_brr(ble, _u);
+               case_brw(ble, _u);
+               case_brr(beq,);
+               case_brw(beq,);
+               case_brr(bge,);
+               case_brw(bge,);
+               case_brr(bge, _u);
+               case_brw(bge, _u);
+               case_brr(bgt,);
+               case_brw(bgt,);
+               case_brr(bgt, _u);
+               case_brw(bgt, _u);
+               case_brr(bne,);
+               case_brw(bne,);
+               case_brr(boadd,);
+               case_brw(boadd,);
+               case_brr(boadd, _u);
+               case_brw(boadd, _u);
+               case_brr(bxadd,);
+               case_brw(bxadd,);
+               case_brr(bxadd, _u);
+               case_brw(bxadd, _u);
+               case_brr(bosub,);
+               case_brw(bosub,);
+               case_brr(bosub, _u);
+               case_brw(bosub, _u);
+               case_brr(bxsub,);
+               case_brw(bxsub,);
+               case_brr(bxsub, _u);
+               case_brw(bxsub, _u);
+               case_brr(bms,);
+               case_brw(bms,);
+               case_brr(bmc,);
+               case_brw(bmc,);
+               case_rrr(add, _f);
+               case_rrf(add);
+               case_rrr(sub, _f);
+               case_rrf(sub);
+               case_rrf(rsb);
+               case_rrr(mul, _f);
+               case_rrf(mul);
+               case_rrr(div, _f);
+               case_rrf(div);
+               case_rr(abs, _f);
+               case_rr(neg, _f);
+               case_rr(sqrt, _f);
+               case_rr(ext, _f);
+               case_rr(ld, _f);
+               case_rw(ld, _f);
+               case_rrr(ldx, _f);
+               case_rrw(ldx, _f);
+               case_rr(st, _f);
+               case_wr(st, _f);
+               case_rrr(stx, _f);
+               case_wrr(stx, _f);
+               case_rr(mov, _f);
+           case jit_code_movi_f:
+               assert_data(node);
+               movi_f(rn(node->u.w), (jit_float32_t *)node->v.n->u.w);
+               break;
+               case_rr(ext, _d_f);
+               case_rrr(lt, _f);
+               case_rrf(lt);
+               case_rrr(le, _f);
+               case_rrf(le);
+               case_rrr(eq, _f);
+               case_rrf(eq);
+               case_rrr(ge, _f);
+               case_rrf(ge);
+               case_rrr(gt, _f);
+               case_rrf(gt);
+               case_rrr(ne, _f);
+               case_rrf(ne);
+               case_rrr(unlt, _f);
+               case_rrf(unlt);
+               case_rrr(unle, _f);
+               case_rrf(unle);
+               case_rrr(uneq, _f);
+               case_rrf(uneq);
+               case_rrr(unge, _f);
+               case_rrf(unge);
+               case_rrr(ungt, _f);
+               case_rrf(ungt);
+               case_rrr(ltgt, _f);
+               case_rrf(ltgt);
+               case_rrr(ord, _f);
+               case_rrf(ord);
+               case_rrr(unord, _f);
+               case_rrf(unord);
+               case_brr(blt, _f);
+               case_brf(blt);
+               case_brr(ble, _f);
+               case_brf(ble);
+               case_brr(beq, _f);
+               case_brf(beq);
+               case_brr(bge, _f);
+               case_brf(bge);
+               case_brr(bgt, _f);
+               case_brf(bgt);
+               case_brr(bne, _f);
+               case_brf(bne);
+               case_brr(bunlt, _f);
+               case_brf(bunlt);
+               case_brr(bunle, _f);
+               case_brf(bunle);
+               case_brr(buneq, _f);
+               case_brf(buneq);
+               case_brr(bunge, _f);
+               case_brf(bunge);
+               case_brr(bungt, _f);
+               case_brf(bungt);
+               case_brr(bltgt, _f);
+               case_brf(bltgt);
+               case_brr(bord, _f);
+               case_brf(bord);
+               case_brr(bunord, _f);
+               case_brf(bunord);
+               case_rrr(add, _d);
+               case_rrd(add);
+               case_rrr(sub, _d);
+               case_rrd(sub);
+               case_rrd(rsb);
+               case_rrr(mul, _d);
+               case_rrd(mul);
+               case_rrr(div, _d);
+               case_rrd(div);
+               case_rr(abs, _d);
+               case_rr(neg, _d);
+               case_rr(sqrt, _d);
+               case_rr(ext, _d);
+               case_rr(ld, _d);
+               case_rw(ld, _d);
+               case_rrr(ldx, _d);
+               case_rrw(ldx, _d);
+               case_rr(st, _d);
+               case_wr(st, _d);
+               case_rrr(stx, _d);
+               case_wrr(stx, _d);
+               case_rr(mov, _d);
+           case jit_code_movi_d:
+               assert_data(node);
+               movi_d(rn(node->u.w), (jit_float64_t *)node->v.n->u.w);
+               break;
+               case_rr(ext, _f_d);
+               case_rrr(lt, _d);
+               case_rrd(lt);
+               case_rrr(le, _d);
+               case_rrd(le);
+               case_rrr(eq, _d);
+               case_rrd(eq);
+               case_rrr(ge, _d);
+               case_rrd(ge);
+               case_rrr(gt, _d);
+               case_rrd(gt);
+               case_rrr(ne, _d);
+               case_rrd(ne);
+               case_rrr(unlt, _d);
+               case_rrd(unlt);
+               case_rrr(unle, _d);
+               case_rrd(unle);
+               case_rrr(uneq, _d);
+               case_rrd(uneq);
+               case_rrr(unge, _d);
+               case_rrd(unge);
+               case_rrr(ungt, _d);
+               case_rrd(ungt);
+               case_rrr(ltgt, _d);
+               case_rrd(ltgt);
+               case_rrr(ord, _d);
+               case_rrd(ord);
+               case_rrr(unord, _d);
+               case_rrd(unord);
+               case_brr(blt, _d);
+               case_brd(blt);
+               case_brr(ble, _d);
+               case_brd(ble);
+               case_brr(beq, _d);
+               case_brd(beq);
+               case_brr(bge, _d);
+               case_brd(bge);
+               case_brr(bgt, _d);
+               case_brd(bgt);
+               case_brr(bne, _d);
+               case_brd(bne);
+               case_brr(bunlt, _d);
+               case_brd(bunlt);
+               case_brr(bunle, _d);
+               case_brd(bunle);
+               case_brr(buneq, _d);
+               case_brd(buneq);
+               case_brr(bunge, _d);
+               case_brd(bunge);
+               case_brr(bungt, _d);
+               case_brd(bungt);
+               case_brr(bltgt, _d);
+               case_brd(bltgt);
+               case_brr(bord, _d);
+               case_brd(bord);
+               case_brr(bunord, _d);
+               case_brd(bunord);
+           case jit_code_jmpr:
+               jmpr(rn(node->u.w));
+               break;
+           case jit_code_jmpi:
+               if (node->flag & jit_flag_node) {
+                   temp = node->u.n;
+                   assert(temp->code == jit_code_label ||
+                          temp->code == jit_code_epilog);
+                   if (temp->flag & jit_flag_patch)
+                       jmpi(temp->u.w);
+                   else {
+                       word = jmpi_p(_jit->pc.w);
+                       patch(word, node);
+                   }
+               }
+               else
+                   jmpi(node->u.w);
+               break;
+           case jit_code_callr:
+               callr(rn(node->u.w));
+               break;
+           case jit_code_calli:
+               if (node->flag & jit_flag_node) {
+                   temp = node->u.n;
+                   assert(temp->code == jit_code_label ||
+                          temp->code == jit_code_epilog);
+                   if (temp->flag & jit_flag_patch)
+                       calli(temp->u.w);
+                   else {
+                       word = calli_p(_jit->pc.w);
+                       patch(word, node);
+                   }
+               }
+               else
+                   calli(node->u.w);
+               break;
+           case jit_code_prolog:
+               _jitc->function = _jitc->functions.ptr + node->w.w;
+               undo.node = node;
+               undo.word = _jit->pc.w;
+#if DEVEL_DISASSEMBLER
+               undo.prevw = prevw;
+#endif
+               undo.patch_offset = _jitc->patches.offset;
+           restart_function:
+               _jitc->again = 0;
+               prolog(node);
+               break;
+           case jit_code_epilog:
+               assert(_jitc->function == _jitc->functions.ptr + node->w.w);
+               if (_jitc->again) {
+                   for (temp = undo.node->next;
+                        temp != node; temp = temp->next) {
+                       if (temp->code == jit_code_label ||
+                           temp->code == jit_code_epilog)
+                           temp->flag &= ~jit_flag_patch;
+                   }
+                   temp->flag &= ~jit_flag_patch;
+                   node = undo.node;
+                   _jit->pc.w = undo.word;
+#if DEVEL_DISASSEMBLER
+                   prevw = undo.prevw;
+#endif
+                   _jitc->patches.offset = undo.patch_offset;
+                   goto restart_function;
+               }
+               if (node->link && (word = _jit->pc.w & 3))
+                   nop(4 - word);
+               /* remember label is defined */
+               node->flag |= jit_flag_patch;
+               node->u.w = _jit->pc.w;
+               epilog(node);
+               _jitc->function = NULL;
+               break;
+           case jit_code_va_start:
+               vastart(rn(node->u.w));
+               break;
+           case jit_code_va_arg:
+               vaarg(rn(node->u.w), rn(node->v.w));
+               break;
+           case jit_code_va_arg_d:
+               vaarg_d(rn(node->u.w), rn(node->v.w));
+               break;
+           case jit_code_live:                 case jit_code_ellipsis:
+           case jit_code_va_push:
+           case jit_code_allocai:              case jit_code_allocar:
+           case jit_code_arg:
+           case jit_code_arg_f:                case jit_code_arg_d:
+           case jit_code_va_end:
+           case jit_code_ret:
+           case jit_code_retr:                 case jit_code_reti:
+           case jit_code_retr_f:               case jit_code_reti_f:
+           case jit_code_retr_d:               case jit_code_reti_d:
+           case jit_code_getarg_c:             case jit_code_getarg_uc:
+           case jit_code_getarg_s:             case jit_code_getarg_us:
+           case jit_code_getarg_i:
+#if __WORDSIZE == 64
+           case jit_code_getarg_ui:            case jit_code_getarg_l:
+#endif
+           case jit_code_getarg_f:             case jit_code_getarg_d:
+           case jit_code_putargr:              case jit_code_putargi:
+           case jit_code_putargr_f:            case jit_code_putargi_f:
+           case jit_code_putargr_d:            case jit_code_putargi_d:
+           case jit_code_pushargr:             case jit_code_pushargi:
+           case jit_code_pushargr_f:           case jit_code_pushargi_f:
+           case jit_code_pushargr_d:           case jit_code_pushargi_d:
+           case jit_code_retval_c:             case jit_code_retval_uc:
+           case jit_code_retval_s:             case jit_code_retval_us:
+           case jit_code_retval_i:
+#if __WORDSIZE == 64
+           case jit_code_retval_ui:            case jit_code_retval_l:
+#endif
+           case jit_code_retval_f:             case jit_code_retval_d:
+           case jit_code_prepare:
+           case jit_code_finishr:              case jit_code_finishi:
+               break;
+           default:
+               abort();
+       }
+       jit_regarg_clr(node, value);
+       assert(_jitc->regarg == 0 && _jitc->synth == 0);
+       /* update register live state */
+       jit_reglive(node);
+    }
+#undef case_brw
+#undef case_brr
+#undef case_wrr
+#undef case_rrw
+#undef case_rrr
+#undef case_wr
+#undef case_rw
+#undef case_rr
+
+    for (offset = 0; offset < _jitc->patches.offset; offset++) {
+       node = _jitc->patches.ptr[offset].node;
+       word = node->code == jit_code_movi ? node->v.n->u.w : node->u.n->u.w;
+       patch_at(_jitc->patches.ptr[offset].inst, word);
+    }
+
+    jit_flush(_jit->code.ptr, _jit->pc.uc);
+
+    return (_jit->code.ptr);
+}
+
+#define CODE                           1
+#  include "jit_s390-cpu.c"
+#  include "jit_s390-fpu.c"
+#undef CODE
+
+void
+jit_flush(void *fptr, void *tptr)
+{
+#if defined(__GNUC__)
+    jit_word_t         f, t, s;
+
+    s = sysconf(_SC_PAGE_SIZE);
+    f = (jit_word_t)fptr & -s;
+    t = (((jit_word_t)tptr) + s - 1) & -s;
+    __clear_cache((void *)f, (void *)t);
+#endif
+}
+
+void
+_emit_ldxi(jit_state_t *_jit, jit_gpr_t r0, jit_gpr_t r1, jit_word_t i0)
+{
+    ldxi(rn(r0), rn(r1), i0);
+}
+
+void
+_emit_stxi(jit_state_t *_jit, jit_word_t i0, jit_gpr_t r0, jit_gpr_t r1)
+{
+    stxi(i0, rn(r0), rn(r1));
+}
+
+void
+_emit_ldxi_d(jit_state_t *_jit, jit_fpr_t r0, jit_gpr_t r1, jit_word_t i0)
+{
+    ldxi_d(rn(r0), rn(r1), i0);
+}
+
+void
+_emit_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_gpr_t r0, jit_fpr_t r1)
+{
+    stxi_d(i0, rn(r0), rn(r1));
+}
+
+static jit_int32_t
+_jit_get_reg_pair(jit_state_t *_jit)
+{
+    jit_int32_t                r1, r2;
+    /* Try to find a register pair for use with operations that
+     * require a odd based register pair. Search for the best
+     * match to avoid spills or at least a valid operation.
+     */
+
+    /* Try non callee save first */
+    if (jit_reg_free_p(_R0) && jit_reg_free_p(_R1))
+       r1 = _R0, r2 = _R1;
+    else if (jit_reg_free_p(_R2) && jit_reg_free_p(_R3))
+       r1 = _R2, r2 = _R3;
+    else if (jit_reg_free_p(_R4) && jit_reg_free_p(_R5))
+       r1 = _R4, r2 = _R5;
+    /* Try callee save registers */
+    else if (jit_reg_free_p(_R10) && jit_reg_free_p(_R11))
+       r1 = _R10, r2 = _R11;
+    else if (jit_reg_free_p(_R8) && jit_reg_free_p(_R9))
+       r1 = _R8, r2 = _R9;
+    else if (jit_reg_free_p(_R6) && jit_reg_free_p(_R7))
+       r1 = _R6, r2 = _R7;
+
+    /* We *must* find a register pair */
+    else if (jit_reg_free_if_spill_p(_R0) && jit_reg_free_if_spill_p(_R1))
+       r1 = _R0, r2 = _R1;
+    else if (jit_reg_free_if_spill_p(_R2) && jit_reg_free_if_spill_p(_R3))
+       r1 = _R2, r2 = _R3;
+    else if (jit_reg_free_if_spill_p(_R4) && jit_reg_free_if_spill_p(_R5))
+       r1 = _R4, r2 = _R5;
+    else if (jit_reg_free_if_spill_p(_R10) && jit_reg_free_if_spill_p(_R11))
+       r1 = _R10, r2 = _R11;
+    else if (jit_reg_free_if_spill_p(_R8) && jit_reg_free_if_spill_p(_R9))
+       r1 = _R8, r2 = _R9;
+    else if (jit_reg_free_if_spill_p(_R6) && jit_reg_free_if_spill_p(_R7))
+       r1 = _R6, r2 = _R7;
+    else
+       /* Do not jit_get_reg() all registers to avoid it */
+       abort();
+
+    (void)jit_get_reg(jit_class_gpr|jit_class_named|r1);
+    (void)jit_get_reg(jit_class_gpr|jit_class_named|r2);
+
+    return (r1);
+}
+
+static void
+_jit_unget_reg_pair(jit_state_t *_jit, jit_int32_t reg)
+{
+    jit_int32_t                r1, r2;
+    r1 = reg;
+    switch (r1) {
+       case _R0:       r2 = _R1;       break;
+       case _R2:       r2 = _R3;       break;
+       case _R4:       r2 = _R5;       break;
+       case _R6:       r2 = _R7;       break;
+       case _R8:       r2 = _R9;       break;
+       case _R10:      r2 = _R11;      break;
+       default:        abort();
+    }
+    jit_unget_reg(r1);
+    jit_unget_reg(r2);
+}
+
+static jit_int32_t
+_jit_get_reg_but_zero(jit_state_t *_jit, jit_int32_t flags)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    if (reg == _R0) {
+       reg = jit_get_reg(jit_class_gpr|flags);
+       jit_unget_reg(_R0);
+    }
+    return (reg);
+}
+
+static void
+_patch(jit_state_t *_jit, jit_word_t instr, jit_node_t *node)
+{
+    jit_int32_t                flag;
+
+    assert(node->flag & jit_flag_node);
+    if (node->code == jit_code_movi)
+       flag = node->v.n->flag;
+    else
+       flag = node->u.n->flag;
+    assert(!(flag & jit_flag_patch));
+    if (_jitc->patches.offset >= _jitc->patches.length) {
+       jit_realloc((jit_pointer_t *)&_jitc->patches.ptr,
+                   _jitc->patches.length * sizeof(jit_patch_t),
+                   (_jitc->patches.length + 1024) * sizeof(jit_patch_t));
+       _jitc->patches.length += 1024;
+    }
+    _jitc->patches.ptr[_jitc->patches.offset].inst = instr;
+    _jitc->patches.ptr[_jitc->patches.offset].node = node;
+    ++_jitc->patches.offset;
+}
diff --git a/deps/lightning/lib/jit_size.c b/deps/lightning/lib/jit_size.c
new file mode 100644 (file)
index 0000000..61f1aa4
--- /dev/null
@@ -0,0 +1,132 @@
+/*
+ * Copyright (C) 2013-2019  Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+#include <lightning.h>
+#include <lightning/jit_private.h>
+#if GET_JIT_SIZE
+#  include <stdio.h>
+#endif
+
+/*
+ * Initialization
+ */
+static jit_int16_t     _szs[jit_code_last_code] = {
+#if GET_JIT_SIZE
+#  define JIT_INSTR_MAX                256
+#else
+#  if defined(__i386__) || defined(__x86_64__)
+#    include "jit_x86-sz.c"
+#  elif defined(__mips__)
+#    include "jit_mips-sz.c"
+#  elif defined(__arm__)
+#    include "jit_arm-sz.c"
+#  elif defined(__powerpc__)
+#    include "jit_ppc-sz.c"
+#  elif defined(__sparc__)
+#    include "jit_sparc-sz.c"
+#  elif defined(__ia64__)
+#    include "jit_ia64-sz.c"
+#  elif defined(__hppa__)
+#    include "jit_hppa-sz.c"
+#  elif defined(__aarch64__)
+#    include "jit_aarch64-sz.c"
+#  elif defined(__s390__) || defined(__s390x__)
+#    include "jit_s390-sz.c"
+#  elif defined(__alpha__)
+#    include "jit_alpha-sz.c"
+#  elif defined(__riscv)
+#    include "jit_riscv-sz.c"
+#  endif
+#endif
+};
+
+/*
+ * Implementation
+ */
+void
+jit_init_size(void)
+{
+#if DEBUG
+#  if !GET_JIT_SIZE
+    jit_word_t         offset;
+
+    for (offset = 0; offset < jit_size(_szs); offset++)
+       if (_szs[offset] != 0)
+           return;
+    /* Ensure data was collected */
+    abort();
+#  endif
+#endif
+}
+
+#if GET_JIT_SIZE
+void
+_jit_size_prepare(jit_state_t *_jit)
+{
+    _jitc->cptr = _jit->code.ptr;
+    _jitc->size = _jit->pc.w;
+}
+
+void
+_jit_size_collect(jit_state_t *_jit, jit_node_t *node)
+{
+    jit_word_t         length;
+
+    if (_jitc->cptr == _jit->code.ptr) {
+       length = _jit->pc.w - _jitc->size;
+       if (_szs[node->code] < length)
+           _szs[node->code] = length;
+    }
+}
+
+#else
+jit_word_t
+_jit_get_size(jit_state_t *_jit)
+{
+    jit_word_t          size;
+    jit_node_t         *node;
+
+    for (size = JIT_INSTR_MAX, node = _jitc->head; node; node = node->next)
+       size += _szs[node->code];
+
+    return ((size + 4095) & -4096);
+}
+#endif
+
+jit_word_t
+jit_get_max_instr(void)
+{
+    return (JIT_INSTR_MAX >= 144 ? JIT_INSTR_MAX : 144);
+}
+
+void
+jit_finish_size(void)
+{
+#if GET_JIT_SIZE
+    FILE               *fp;
+    jit_word_t          offset;
+
+    /* Define a single path */
+    fp = fopen(JIT_SIZE_PATH, "a");
+    assert(fp);
+    for (offset = 0; offset < jit_size(_szs); offset++)
+       fprintf(fp, "%d %d\n", offset, _szs[offset]);
+    fclose(fp);
+#endif
+}
diff --git a/deps/lightning/lib/jit_sparc-cpu.c b/deps/lightning/lib/jit_sparc-cpu.c
new file mode 100644 (file)
index 0000000..051647a
--- /dev/null
@@ -0,0 +1,2568 @@
+/*
+ * Copyright (C) 2013-2019  Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+#if PROTO
+#  define _G2_REGNO                            0x02
+#  define _G3_REGNO                            0x03
+#  define _G4_REGNO                            0x04
+#  define _O0_REGNO                            0x08
+#  define _O1_REGNO                            0x09
+#  define _SP_REGNO                            0x0e
+#  define _FP_REGNO                            0x1e
+#  define _O7_REGNO                            0x0f
+#  define _L0_REGNO                            0x10
+#  define _L1_REGNO                            0x11
+#  define _L2_REGNO                            0x12
+#  define _L3_REGNO                            0x13
+#  define _L4_REGNO                            0x14
+#  define _L5_REGNO                            0x15
+#  define _L6_REGNO                            0x16
+#  define _L7_REGNO                            0x17
+#  define _I7_REGNO                            0x1f
+/*
+ *                                     - previous stack frame
+ * fp  ----
+ * fp- local variables (in lightning, 8 bytes space for float conversion)
+ * fp- alloca
+ * sp+ stack arguments
+ * sp+ 6 words to save register arguments
+ * sp+ 1 word for hidden address of aggregate return value (32 bits only)
+ * sp+ 16 words for in and local registers
+ * sp  ----
+ *     decreasing memory address       - next stack frame (not yet allocated)
+ */
+#  define stack_framesize                      ((16 + (__WORDSIZE == 32) + 6) * sizeof(jit_word_t))
+typedef union {
+    struct {                           jit_uint32_t b: 2;      } op;
+    struct {   jit_uint32_t _: 2;      jit_uint32_t b: 1;      } a;
+    struct {   jit_uint32_t _: 2;      jit_uint32_t b: 5;      } rd;
+    struct {   jit_uint32_t _: 2;      jit_uint32_t b: 30;     } disp30;
+    struct {   jit_uint32_t _: 3;      jit_uint32_t b: 4;      } cond;
+    struct {   jit_uint32_t _: 7;      jit_uint32_t b: 3;      } op2;
+    struct {   jit_uint32_t _: 7;      jit_uint32_t b: 6;      } op3;
+    struct {   jit_uint32_t _: 10;     jit_uint32_t b: 1;      } cc1;
+    struct {   jit_uint32_t _: 10;     jit_uint32_t b: 22;     } imm22;
+    struct {   jit_uint32_t _: 10;     jit_uint32_t b: 22;     } disp22;
+    struct {   jit_uint32_t _: 11;     jit_uint32_t b: 1;      } cc0;
+    struct {   jit_uint32_t _: 12;     jit_uint32_t b: 1;      } p;
+    struct {   jit_uint32_t _: 13;     jit_uint32_t b: 19;     } disp19;
+    struct {   jit_uint32_t _: 13;     jit_uint32_t b: 5;      } rs1;
+    struct {   jit_uint32_t _: 18;     jit_uint32_t b: 1;      } i;
+    struct {   jit_uint32_t _: 18;     jit_uint32_t b: 9;      } opf;
+    struct {   jit_uint32_t _: 19;     jit_uint32_t b: 1;      } x;
+    struct {   jit_uint32_t _: 19;     jit_uint32_t b: 8;      } asi;
+    struct {   jit_uint32_t _: 19;     jit_uint32_t b: 6;      } res;
+    struct {   jit_uint32_t _: 19;     jit_uint32_t b: 13;     } simm13;
+    struct {   jit_uint32_t _: 20;     jit_uint32_t b: 7;      } asix;
+    struct {   jit_uint32_t _: 20;     jit_uint32_t b: 6;      } asis;
+    struct {   jit_uint32_t _: 26;     jit_uint32_t b: 6;      } shim;
+    struct {   jit_uint32_t _: 25;     jit_uint32_t b: 7;      } imm7;
+    struct {   jit_uint32_t _: 27;     jit_uint32_t b: 5;      } rs2;
+    jit_int32_t                                                          v;
+} jit_instr_t;
+#  define ii(i)                                *_jit->pc.ui++ = i
+#  define s7_p(imm)                    ((imm) <= 63 && (imm) >= -64)
+#  define s13_p(imm)                   ((imm) <= 4095 && (imm) >= -4096)
+#  define s19_p(imm)                   ((imm) <= 262143 && (imm) >= -262144)
+#  define s22_p(imm)                   ((imm) <= 2097151 && (imm) >= -20971512)
+#  define s30_p(imm)                   ((imm) <= 536870911 && (imm) >= -536870912)
+#  define f1(op, disp30)               _f1(_jit, op, disp30)
+static void _f1(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define f2r(op, rd, op2, imm22)      _f2r(_jit, op, rd, op2, imm22)
+static void _f2r(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define f2b(op, a, cond, op2, disp22)        _f2b(_jit, op, a, cond, op2, disp22)
+static void
+_f2b(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#  if __WORDSIZE == 64
+#    define f2bp(op,a,cond,op2,cc1,cc0,p,disp19)                               \
+       _f2bp(_jit,op,a,cond,op2,cc1,cc0,p,disp19)
+static void
+_f2bp(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,
+      jit_int32_t,jit_int32_t,jit_int32_t);
+#  endif
+#  define f3r(op, rd, op3, rs1, rs2)   _f3r(_jit, op, rd, op3, rs1, rs2)
+static void _f3r(jit_state_t*,
+                jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#  if __WORDSIZE == 64
+#  define f3rx(op, rd, op3, rs1, rs2)  _f3rx(_jit, op, rd, op3, rs1, rs2)
+static void _f3rx(jit_state_t*,
+                 jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#  endif
+#  define f3i(op, rd, op3, rs1, simm13)        _f3i(_jit, op, rd, op3, rs1, simm13)
+static void _f3i(jit_state_t*,
+                jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define f3s(op, rd, op3, rs1, simm13)        _f3s(_jit, op, rd, op3, rs1, simm13)
+static void _f3s(jit_state_t*,
+                jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define f3t(cond, rs1, i, ri)                _f3t(_jit, cond, rs1, i, ri)
+static void _f3t(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t)
+    maybe_unused;
+#  define f3a(op, rd, op3, rs1, rs2)   _f3a(_jit, op, rd, op3, rs1, asi, rs2)
+static void _f3a(jit_state_t*,jit_int32_t,
+                jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t)
+    maybe_unused;
+#  define LDSB(rs1, rs2, rd)           f3r(3, rd, 9, rs1, rs2)
+#  define LDSBI(rs1, imm, rd)          f3i(3, rd, 9, rs1, imm)
+#  define LDSH(rs1, rs2, rd)           f3r(3, rd, 10, rs1, rs2)
+#  define LDSHI(rs1, imm, rd)          f3i(3, rd, 10, rs1, imm)
+#  define LDUB(rs1, rs2, rd)           f3r(3, rd, 1, rs1, rs2)
+#  define LDUBI(rs1, imm, rd)          f3i(3, rd, 1, rs1, imm)
+#  define LDUH(rs1, rs2, rd)           f3r(3, rd, 2, rs1, rs2)
+#  define LDUHI(rs1, imm, rd)          f3i(3, rd, 2, rs1, imm)
+#  if __WORDSIZE == 32
+#    define LD(rs1, rs2, rd)           f3r(3, rd, 0, rs1, rs2)
+#    define LDI(rs1, imm, rd)          f3i(3, rd, 0, rs1, imm)
+#    define LDD(rs1, rs2, rd)          f3r(3, rd, 3, rs1, rs2)
+#    define LDDI(rs1, imm, rd)         f3i(3, rd, 3, rs1, imm)
+#  else
+#    define LDSW(rs1, rs2, rd)         f3r(3, rd, 8, rs1, rs2)
+#    define LDSWI(rs1, imm, rd)                f3i(3, rd, 8, rs1, imm)
+#    define LDUW(rs1, rs2, rd)         f3r(3, rd, 0, rs1, rs2)
+#    define LDUWI(rs1, imm, rd)                f3i(3, rd, 0, rs1, imm)
+#    define LDX(rs1, rs2, rd)          f3r(3, rd, 11, rs1, rs2)
+#    define LDXI(rs1, imm, rd)         f3i(3, rd, 11, rs1, imm)
+#  endif
+#  define LDSBA(rs1, rs2, asi, rd)     f3a(3, rd, 25, rs1, asi, rs2)
+#  define LDSHA(rs1, rs2, asi, rd)     f3a(3, rd, 26, rs1, asi, rs2)
+#  define LDUBA(rs1, rs2, asi, rd)     f3a(3, rd, 17, rs1, asi, rs2)
+#  define LDUHA(rs1, rs2, asi, rd)     f3a(3, rd, 18, rs1, asi, rs2)
+#  if __WORDSIZE == 32
+#    define LDA(rs1, rs2, asi, rd)     f3a(3, rd, 16, rs1, asi, rs2)
+#    define LDDA(rs1, rs2, asi, rd)    f3a(3, rd, 19, rs1, asi, rs2)
+#  else
+#    define LDSWA(rs1, rs2, asi, rd)   f3a(3, rd, 24, rs1, asi, rs2)
+#    define LDUWA(rs1, rs2, asi, rd)   f3a(3, rd, 16, rs1, asi, rs2)
+#    define LDXA(rs1, rs2, asi, rd)    f3a(3, rd, 27, rs1, asi, rs2)
+#  endif
+#  define LDC(rs1, rs2, rd)            f3r(3, rd, 48, rs1, rs2)
+#  define LDCI(rs1, imm, rd)           f3i(3, rd, 48, rs1, imm)
+#  define LDDC(rs1, rs2, rd)           f3r(3, rd, 51, rs1, rs2)
+#  define LDDCI(rs1, imm, rd)          f3i(3, rd, 51, rs1, imm)
+#  define LDCSR(rs1, rs2, rd)          f3r(3, rd, 49, rs1, rs2)
+#  define LDCSRI(rs1, imm, rd)         f3i(3, rd, 49, rs1, imm)
+#  define STB(rd, rs1, rs2)            f3r(3, rd, 5, rs1, rs2)
+#  define STBI(rd, rs1, imm)           f3i(3, rd, 5, rs1, imm)
+#  define STH(rd, rs1, rs2)            f3r(3, rd, 6, rs1, rs2)
+#  define STHI(rd, rs1, imm)           f3i(3, rd, 6, rs1, imm)
+#  if __WORDSIZE == 32
+#    define ST(rd, rs1, rs2)           f3r(3, rd, 4, rs1, rs2)
+#    define STI(rd, rs1, imm)          f3i(3, rd, 4, rs1, imm)
+#    define STD(rrd, s1, rs2)          f3r(3, rd, 7, rs1, rs2)
+#    define STDI(rd, rs1, imm)         f3i(3, rd, 7, rs1, imm)
+#  else
+#    define STW(rd, rs1, rs2)          f3r(3, rd, 4, rs1, rs2)
+#    define STWI(rd, rs1, imm)         f3i(3, rd, 4, rs1, imm)
+#    define STX(rd, rs1, rs2)          f3r(3, rd, 14, rs1, rs2)
+#    define STXI(rd, rs1, imm)         f3i(3, rd, 14, rs1, imm)
+#  endif
+#  define STBA(rd, rs1, rs2)           f3a(3, rd, 21, rs1, asi, rs2)
+#  define STHA(rd, rs1, rs2)           f3a(3, rd, 22, rs1, asi, rs2)
+#  if __WORDSIZE == 32
+#    define STA(rd, rs1, rs2)          f3a(3, rd, 20, rs1, asi, rs2)
+#    define STDA(rd, rs1, rs2)         f3a(3, rd, 23, rs1, asi, rs2)
+#  else
+#    define STWA(rd, rs1, rs2)         f3a(3, rd, 20, rs1, asi, rs2)
+#    define STXA(rd, rs1, rs2)         f3a(3, rd, 30, rs1, asi, rs2)
+#  endif
+#  define STC(rd, rs1, rs2)            f3r(3, rd, 52, rs1, rs2)
+#  define STCI(rd, rs1, imm)           f3i(3, rd, 52, rs1, imm)
+#  define STDC(rd, rs1, rs2)           f3r(3, rd, 55, rs1, rs2)
+#  define STDCI(rd, rs1, imm)          f3i(3, rd, 55, rs1, imm)
+#  define STCSR(rd, rs1, rs2)          f3r(3, rd, 53, rs1, rs2)
+#  define STCSRI(rd, rs1, imm)         f3i(3, rd, 53, rs1, imm)
+#  define STDCQ(rd, rs1, rs2)          f3r(3, rd, 54, rs1, rs2)
+#  define STDCQI(rd, rs1, imm)         f3i(3, rd, 54, rs1, imm)
+#  define LDSTUB(rs1, rs2, rd)         f3r(3, rd, 13, rs1, rs2)
+#  define LDSTUBI(rs1, imm, rd)                f3r(3, rd, 13, rs1, imm)
+#  define LDSTUBA(rs1, rs2, asi, rd)   f3a(3, rd, 21, rs1, asi, rs2)
+#  define SWAP(rs1, rs2, rd)           f3r(3, rd, 15, rs1, rs2)
+#  define SWAPI(rs1, imm, rd)          f3r(3, rd, 15, rs1, imm)
+#  define SWAPA(rs1, rs2, asi, rd)     f3a(3, rd, 23, rs1, asi, rs2)
+#  define NOP()                                SETHI(0, 0)
+#  define HI(im)                       ((im) >> 10)
+#  define LO(im)                       ((im) & 0x3ff)
+#  define SETHI(im, rd)                        f2r(0, rd, 4, im)
+#  define AND(rs1, rs2, rd)            f3r(2, rd, 1, rs1, rs2)
+#  define ANDI(rs1, imm, rd)           f3i(2, rd, 1, rs1, imm)
+#  define ANDcc(rs1, rs2, rd)          f3r(2, rd, 17, rs1, rs2)
+#  define ANDIcc(rs1, imm, rd)         f3i(2, rd, 17, rs1, imm)
+#  define BTST(rs1, rs2)               ANDcc(rs1, rs2, 0)
+#  define BTSTI(rs1, imm)              ANDIcc(rs1, imm, 0)
+#  define ANDN(rs1, rs2, rd)           f3r(2, rd, 5, rs1, rs2)
+#  define ANDNI(rs1, imm, rd)          f3i(2, rd, 5, rs1, imm)
+#  define ANDNcc(rs1, rs2, rd)         f3r(2, rd, 21, rs1, rs2)
+#  define ANDNIcc(rs1, imm, rd)                f3i(2, rd, 21, rs1, imm)
+#  define OR(rs1, rs2, rd)             f3r(2, rd, 2, rs1, rs2)
+#  define ORI(rs1, imm, rd)            f3i(2, rd, 2, rs1, imm)
+#  define ORcc(rs1, rs2, rd)           f3r(2, rd, 18, rs1, rs2)
+#  define ORIcc(rs1, imm, rd)          f3i(2, rd, 18, rs1, imm)
+#  define ORN(rs1, rs2, rd)            f3r(2, rd, 6, rs1, rs2)
+#  define ORNI(rs1, imm, rd)           f3i(2, rd, 6, rs1, imm)
+#  define ORNcc(rs1, rs2, rd)          f3r(2, rd, 22, rs1, rs2)
+#  define ORNIcc(rs1, imm, rd)         f3i(2, rd, 22, rs1, imm)
+#  define XOR(rs1, rs2, rd)            f3r(2, rd, 3, rs1, rs2)
+#  define XORI(rs1, imm, rd)           f3i(2, rd, 3, rs1, imm)
+#  define XORcc(rs1, rs2, rd)          f3r(2, rd, 19, rs1, rs2)
+#  define XORIcc(rs1, imm, rd)         f3i(2, rd, 19, rs1, imm)
+#  define XNOR(rs1, rs2, rd)           f3r(2, rd, 7, rs1, rs2)
+#  define XNORI(rs1, imm, rd)          f3i(2, rd, 7, rs1, imm)
+#  define XNORcc(rs1, rs2, rd)         f3r(2, rd, 23, rs1, rs2)
+#  define XNORIcc(rs1, imm, rd)                f3i(2, rd, 23, rs1, imm)
+#  define SLL(rs1, rs2, rd)            f3r(2, rd, 37, rs1, rs2)
+#  define SLLI(rs1, imm, rd)           f3i(2, rd, 37, rs1, imm)
+#  define SRL(rs1, rs2, rd)            f3r(2, rd, 38, rs1, rs2)
+#  define SRLI(rs1, imm, rd)           f3i(2, rd, 38, rs1, imm)
+#  define SRA(rs1, rs2, rd)            f3r(2, rd, 39, rs1, rs2)
+#  define SRAI(rs1, imm, rd)           f3i(2, rd, 39, rs1, imm)
+#  if __WORDSIZE == 64
+#    define SLLX(rs1, rs2, rd)         f3rx(2, rd, 37, rs1, rs2)
+#    define SLLXI(rs1, imm, rd)                f3s(2, rd, 37, rs1, imm)
+#    define SRLX(rs1, rs2, rd)         f3rx(2, rd, 38, rs1, rs2)
+#    define SRLXI(rs1, imm, rd)                f3s(2, rd, 38, rs1, imm)
+#    define SRAX(rs1, rs2, rd)         f3rx(2, rd, 39, rs1, rs2)
+#    define SRAXI(rs1, imm, rd)                f3s(2, rd, 39, rs1, imm)
+#  endif
+#  define ADD(rs1, rs2, rd)            f3r(2, rd, 0, rs1, rs2)
+#  define ADDI(rs1, imm, rd)           f3i(2, rd, 0, rs1, imm)
+#  define ADDcc(rs1, rs2, rd)          f3r(2, rd, 16, rs1, rs2)
+#  define ADDIcc(rs1, imm, rd)         f3i(2, rd, 16, rs1, imm)
+#  define ADDX(rs1, rs2, rd)           f3r(2, rd, 8, rs1, rs2)
+#  define ADDXI(rs1, imm, rd)          f3i(2, rd, 8, rs1, imm)
+#  define ADDXcc(rs1, rs2, rd)         f3r(2, rd, 24, rs1, rs2)
+#  define ADDXIcc(rs1, imm, rd)                f3i(2, rd, 24, rs1, imm)
+#  define TADDcc(rs1, rs2, rd)         f3r(2, rd, 32, rs1, rs2)
+#  define TADDIcc(rs1, imm, rd)                f3i(2, rd, 32, rs1, imm)
+#  define TADDccTV(rs1, rs2, rd)       f3r(2, rd, 34, rs1, rs2)
+#  define TADDIccTV(rs1, imm, rd)      f3i(2, rd, 34, rs1, imm)
+#  define SUB(rs1, rs2, rd)            f3r(2, rd, 4, rs1, rs2)
+#  define NEG(rs1, rd)                 SUB(0, rs1, rd)
+#  define SUBI(rs1, imm, rd)           f3i(2, rd, 4, rs1, imm)
+#  define SUBcc(rs1, rs2, rd)          f3r(2, rd, 20, rs1, rs2)
+#  define SUBIcc(rs1, imm, rd)         f3i(2, rd, 20, rs1, imm)
+#  define CMP(rs1, rs2)                        SUBcc(rs1, rs2, 0)
+#  define CMPI(rs1, imm)               SUBIcc(rs1, imm, 0)
+#  define SUBX(rs1, rs2, rd)           f3r(2, rd, 12, rs1, rs2)
+#  define SUBXI(rs1, imm, rd)          f3i(2, rd, 12, rs1, imm)
+#  define SUBXcc(rs1, rs2, rd)         f3r(2, rd, 28, rs1, rs2)
+#  define SUBXIcc(rs1, imm, rd)                f3i(2, rd, 28, rs1, imm)
+#  define TSUBcc(rs1, rs2, rd)         f3r(2, rd, 33, rs1, rs2)
+#  define TDADDIcc(rs1, imm, rd)       f3i(2, rd, 33, rs1, imm)
+#  define TSUBccTV(rs1, rs2, rd)       f3r(2, rd, 35, rs1, rs2)
+#  define TSUBIccTV(rs1, imm, rd)      f3i(2, rd, 35, rs1, imm)
+#  define MULScc(rs1, rs2, rd)         f3r(2, rd, 36, rs1, rs2)
+#  define MULSIcc(rs1, imm, rd)                f3i(2, rd, 36, rs1, imm)
+#  define UMUL(rs1, rs2, rd)           f3r(2, rd, 10, rs1, rs2)
+#  define UMULI(rs1, imm, rd)          f3i(2, rd, 10, rs1, imm)
+#  define SMUL(rs1, rs2, rd)           f3r(2, rd, 11, rs1, rs2)
+#  define SMULI(rs1, imm, rd)          f3i(2, rd, 11, rs1, imm)
+#  define UMULcc(rs1, rs2, rd)         f3r(2, rd, 26, rs1, rs2)
+#  define UMULIcc(rs1, imm, rd)                f3i(2, rd, 26, rs1, imm)
+#  define SMULcc(rs1, rs2, rd)         f3r(2, rd, 27, rs1, rs2)
+#  define SMULIcc(rs1, imm, rd)                f3i(2, rd, 27, rs1, imm)
+#  define UDIV(rs1, rs2, rd)           f3r(2, rd, 14, rs1, rs2)
+#  define UDIVI(rs1, imm, rd)          f3i(2, rd, 14, rs1, imm)
+#  define SDIV(rs1, rs2, rd)           f3r(2, rd, 15, rs1, rs2)
+#  define SDIVI(rs1, imm, rd)          f3i(2, rd, 15, rs1, imm)
+#  define UDIVcc(rs1, rs2, rd)         f3r(2, rd, 30, rs1, rs2)
+#  define UDIVIcc(rs1, imm, rd)                f3i(2, rd, 30, rs1, imm)
+#  define SDIVcc(rs1, rs2, rd)         f3r(2, rd, 31, rs1, rs2)
+#  define SDIVIcc(rs1, imm, rd)                f3i(2, rd, 31, rs1, imm)
+#  if __WORDSIZE == 64
+#    define MULX(rs1, rs2, rd)         f3r(2, rd, 9, rs1, rs2)
+#    define MULXI(rs1, imm, rd)                f3i(2, rd, 9, rs1, imm)
+#    define SDIVX(rs1, rs2, rd)                f3r(2, rd, 45, rs1, rs2)
+#    define SDIVXI(rs1, imm, rd)       f3i(2, rd, 45, rs1, imm)
+#    define UDIVX(rs1, rs2, rd)                f3r(2, rd, 13, rs1, rs2)
+#    define UDIVXI(rs1, imm, rd)       f3i(2, rd, 13, rs1, imm)
+#  endif
+#  define SAVE(rs1, rs2, rd)           f3r(2, rd, 60, rs1, rs2)
+#  define SAVEI(rs1, imm, rd)          f3i(2, rd, 60, rs1, imm)
+#  define RESTORE(rs1, rs2, rd)                f3r(2, rd, 61, rs1, rs2)
+#  define RESTOREI(rs1, imm, rd)       f3i(2, rd, 61, rs1, imm)
+#  define SPARC_BA                     8       /* always */
+#  define SPARC_BN                     0       /* never */
+#  define SPARC_BNE                    9       /* not equal - not Z */
+#  define SPARC_BNZ                    SPARC_BNE
+#  define SPARC_BE                     1       /* equal - Z */
+#  define SPARC_BZ                     SPARC_BE
+#  define SPARC_BG                     10      /* greater - not (Z or (N xor V)) */
+#  define SPARC_BLE                    2       /* less or equal - Z or (N xor V) */
+#  define SPARC_BGE                    11      /* greater or equal - not (N xor V) */
+#  define SPARC_BL                     3       /* less - N xor V */
+#  define SPARC_BGU                    12      /* greater unsigned - not (C or Z) */
+#  define SPARC_BLEU                   4       /* less or equal unsigned - C or Z */
+#  define SPARC_BCC                    13      /* carry clear - not C */
+#  define SPARC_BGEU                   SPARC_BCC
+#  define SPARC_BCS                    5       /* carry set - C */
+#  define SPARC_BLU                    SPARC_BCS
+#  define SPARC_BPOS                   14      /* positive - not N */
+#  define SPARC_BNEG                   6       /* negative - N */
+#  define SPARC_BVC                    15      /* overflow clear - not V */
+#  define SPARC_BVS                    7       /* overflow set - V */
+/* Preferred BPcc integer branch opcodes */
+#  if __WORDSIZE == 64
+#    define SPARC_BPA                  8       /* always - 1 */
+#    define SPARC_BPN                  0       /* never - 0 */
+#    define SPARC_BPNE                 9       /* not equal - not Z */
+#    define SPARC_BPE                  1       /* equal - Z */
+#    define SPARC_BPG                  10      /* greater - not (Z or (N xor V)) */
+#    define SPARC_BPLE                 2       /* less or equal - Z or (N xor V) */
+#    define SPARC_BPGE                 11      /* greater or equal - not (N xor V) */
+#    define SPARC_BPL                  3       /* less - N xor V */
+#    define SPARC_BPGU                 12      /* greater unsigned - not (C or V) */
+#    define SPARC_BPLEU                        4       /* less or equal unsigned  - C or Z */
+#    define SPARC_BPCC                 13      /* carry clear (greater than or equal, unsigned) - not C */
+#    define SPARC_BPCS                 5       /* carry set (less than, unsigned) - C */
+#    define SPARC_BPPOS                        14      /* positive - not N */
+#    define SPARC_BPNEG                        6       /* negative - N */
+#    define SPARC_BPVC                 15      /* overflow clear - not V */
+#    define SPARC_BPVS                 7       /* overflow set - V */
+#  endif
+#  define B(cc, imm)                   f2b(0, 0, cc, 2, imm)
+#  define Ba(cc, imm)                  f2b(0, 1, cc, 2, imm)
+#  define BA(imm)                      B(SPARC_BA, imm)
+#  define BAa(imm)                     Ba(SPARC_BA, imm)
+#  define BN(imm)                      B(SPARC_BN, imm)
+#  define BNa(imm)                     Ba(SPARC_BN, imm)
+#  define BNE(imm)                     B(SPARC_BNE, imm)
+#  define BNEa(imm)                    Ba(SPARC_BNE, imm)
+#  define BNZ(imm)                     BNE(imm)
+#  define BNZa(imm)                    BNEa(imm)
+#  define BE(imm)                      B(SPARC_BE, imm)
+#  define BEa(imm)                     Ba(SPARC_BE, imm)
+#  define BZ(imm)                      BE(imm)
+#  define BZa(imm)                     BEa(imm)
+#  define BG(imm)                      B(SPARC_BG, imm)
+#  define BGa(imm)                     Ba(SPARC_BG, imm)
+#  define BLE(imm)                     B(SPARC_BLE, imm)
+#  define BLEa(imm)                    Ba(SPARC_BLE, imm)
+#  define BGE(imm)                     B(SPARC_BGE, imm)
+#  define BGEa(imm)                    Ba(SPARC_BGE, imm)
+#  define BL(imm)                      B(SPARC_BL, imm)
+#  define BLa(imm)                     Ba(SPARC_BL, imm)
+#  define BGU(imm)                     B(SPARC_BGU, imm)
+#  define BGUa(imm)                    Ba(SPARC_BGU, imm)
+#  define BLEU(imm)                    B(SPARC_BLEU, imm)
+#  define BLEUa(imm)                   Ba(SPARC_BLEU, imm)
+#  define BCC(imm)                     B(SPARC_BCC, imm)
+#  define BCCa(imm)                    Ba(SPARC_BCC, imm)
+#  define BGEU(imm)                    BCC(imm)
+#  define BGEUa(imm)                   BCCa(imm)
+#  define BCS(imm)                     B(SPARC_BCS, imm)
+#  define BCSa(imm)                    Ba(SPARC_BCS, imm)
+#  define BLU(imm)                     BCS(imm)
+#  define BLUa(imm)                    BCSa(imm)
+#  define BPOS(imm)                    B(SPARC_BPOS, imm)
+#  define BPOSa(imm)                   Ba(SPARC_BPOS, imm)
+#  define BNEG(imm)                    B(SPARC_BNEG, imm)
+#  define BNEGa(imm)                   Ba(SPARC_BNEG, imm)
+#  define BVC(imm)                     B(SPARC_BVC, imm)
+#  define BVCa(imm)                    Ba(SPARC_BVC, imm)
+#  define BVS(imm)                     B(SPARC_BVS, imm)
+#  define BVSa(imm)                    Ba(SPARC_BVS, imm)
+#  if __WORDSIZE == 64
+#    define BPccap(cc,a,cc1, cc2,p,imm)        f2bp(0, a, cc, 1, cc1, cc0, p, imm)
+#    define BPap(cc, imm)              f2bp(0, 1, cc, 1, 1, 0, p, imm)
+#    define BPa(cc, imm)               f2bp(0, 1, cc, 1, 1, 0, 1, imm)
+#    define BP(cc, imm)                        f2bp(0, 0, cc, 1, 1, 0, 1, imm)
+#    define BPA(imm)                   BP(SPARC_BPA, imm)
+#    define BPN(imm)                   BP(SPARC_BPN, imm)
+#    define BNPE(imm)                  BP(SPARC_BPNE, imm)
+#    define BPE(imm)                   BP(SPARC_BPE, imm)
+#    define BPG(imm)                   BP(SPARC_BPG, imm)
+#    define BPLE(imm)                  BP(SPARC_BPLE, imm)
+#    define BPGE(imm)                  BP(SPARC_BPGE, imm)
+#    define BPL(imm)                   BP(SPARC_BPL, imm)
+#    define BPGU(imm)                  BP(SPARC_BPGU, imm)
+#    define BPLEU(imm)                 BP(SPARC_BPLEU, imm)
+#    define BPCC(imm)                  BP(SPARC_BPCC, imm)
+#    define BPCS(imm)                  BP(SPARC_BPCS, imm)
+#    define BPPOS(imm)                 BP(SPARC_BPPOS, imm)
+#    define BPNEG(imm)                 BP(SPARC_BPNEG, imm)
+#    define BPVC(imm)                  BP(SPARC_BPVC, imm)
+#    define BPVS(imm)                  BP(SPARC_BPVS, imm)
+#  endif
+#  define SPARC_CBA                    8       /* always */
+#  define SPARC_CBN                    0       /* never */
+#  define SPARC_CB3                    7       /* 3 */
+#  define SPARC_CB2                    6       /* 2 */
+#  define SPARC_CB23                   5       /* 2 or 3 */
+#  define SPARC_CB1                    4       /* 1 */
+#  define SPARC_CB13                   3       /* 1 or 3 */
+#  define SPARC_CB12                   2       /* 1 or 2 */
+#  define SPARC_CB123                  1       /* 1 or 2 or 3 */
+#  define SPARC_CB0                    9       /* 0 */
+#  define SPARC_CB03                   10      /* 0 or 3 */
+#  define SPARC_CB02                   11      /* 0 or 2 */
+#  define SPARC_CB023                  12      /* 0 or 2 or 3 */
+#  define SPARC_CB01                   13      /* 0 or 1 */
+#  define SPARC_CB013                  14      /* 0 or 1 or 3 */
+#  define SPARC_CB012                  15      /* 0 or 1 or 2 */
+#  define CB(cc, imm)                  f2b(0, 0, cc, 7, imm)
+#  define CBa(cc, imm)                 f2b(0, 1, cc, 7, imm)
+#  define CBA(imm)                     CB(SPARC_CBA, imm)
+#  define CBAa(imm)                    CBa(SPARC_CBA, imm)
+#  define CBN(imm)                     CB(SPARC_CBN, imm)
+#  define CBNa(imm)                    CBa(SPARC_CBN, imm)
+#  define CB3(imm)                     CB(SPARC_CB3, imm)
+#  define CB3a(imm)                    CBa(SPARC_CB3, imm)
+#  define CB2(imm)                     CB(SPARC_CB2, imm)
+#  define CB2a(imm)                    CBa(SPARC_CB2, imm)
+#  define CB23(imm)                    CB(SPARC_CB23, imm)
+#  define CB23a(imm)                   CBa(SPARC_CB23, imm)
+#  define CB1(imm)                     CB(SPARC_CB1, imm)
+#  define CB1a(imm)                    CBa(SPARC_CB1, imm)
+#  define CB13(imm)                    CB(SPARC_CB13, imm)
+#  define CB13a(imm)                   CBa(SPARC_CB13, imm)
+#  define CB12(imm)                    CB(SPARC_CB12, imm)
+#  define CB12a(imm)                   CBa(SPARC_CB12, imm)
+#  define CB123(imm)                   CB(SPARC_CB123, imm)
+#  define CB123a(imm)                  CBa(SPARC_CB123, imm)
+#  define CB0(imm)                     CB(SPARC_CB0, imm)
+#  define CB0a(imm)                    CBa(SPARC_CB0, imm)
+#  define CB03(imm)                    CB(SPARC_CB03, imm)
+#  define CB03a(imm)                   CBa(SPARC_CB03, imm)
+#  define CB02(imm)                    CB(SPARC_CB02, imm)
+#  define CB02a(imm)                   CBa(SPARC_CB02, imm)
+#  define CB023(imm)                   CB(SPARC_CB103, imm)
+#  define CB023a(imm)                  CBa(SPARC_CB023, imm)
+#  define CB01(imm)                    CB(SPARC_CB01, imm)
+#  define CB01a(imm)                   CBa(SPARC_CB01, imm)
+#  define CB013(imm)                   CB(SPARC_CB013, imm)
+#  define CB013a(imm)                  CBa(SPARC_CB013, imm)
+#  define CB012(imm)                   CB(SPARC_CB012, imm)
+#  define CB012a(imm)                  CBa(SPARC_CB012, imm)
+#  define CALLI(imm)                   f1(1, imm)
+#  define CALL(r0)                     JMPL(_O7_REGNO, r0, 0)
+#  define RETL()                       JMPLI(0, _O7_REGNO, 8)
+#  define RET()                                JMPLI(0, _I7_REGNO, 8)
+#  define JMPL(rd, rs1, rs2)           f3r(2, rd, 56, rs1, rs2)
+#  define JMPLI(rd, rs1, imm)          f3i(2, rd, 56, rs1, imm)
+#  define RETT(rs1, rs2)               f3r(2, 0, 57, rs1, rs2)
+#  define RETTI(rs1, imm)              f3i(2, 0, 57, rs1, imm)
+#  define SPARC_TA                     8       /* always */
+#  define SPARC_TN                     0       /* never */
+#  define SPARC_TNE                    9       /* not equal - not Z */
+#  define SPARC_TNZ                    SPARC_BNE
+#  define SPARC_TE                     1       /* equal - Z */
+#  define SPARC_TZ                     SPARC_BE
+#  define SPARC_TG                     10      /* greater - not (Z or (N xor V)) */
+#  define SPARC_TLE                    2       /* less or equal - Z or (N xor V) */
+#  define SPARC_TGE                    11      /* greater or equal - not (N xor V) */
+#  define SPARC_TL                     3       /* less - N xor V */
+#  define SPARC_TGU                    12      /* greater unsigned - not (C or Z) */
+#  define SPARC_TLEU                   4       /* less or equal unsigned - C or Z */
+#  define SPARC_TCC                    13      /* carry clear - not C */
+#  define SPARC_TGEU                   SPARC_BCC
+#  define SPARC_TCS                    5       /* carry set - C */
+#  define SPARC_TLU                    SPARC_BCS
+#  define SPARC_TPOS                   14      /* positive - not N */
+#  define SPARC_TNEG                   6       /* negative - N */
+#  define SPARC_TVC                    15      /* overflow clear - not V */
+#  define SPARC_TVS                    7       /* overflow set - V */
+#  define T(cc, rs1, rs2)              f3t(cc, rs1, 0, rs2)
+#  define TI(cc, rs1, imm)             f3t(cc, rs1, 1, imm)
+#  define TA(rs1, rs2)                 T(SPARC_TA, rs1, rs2)
+#  define TAI(rs1, imm)                        TI(SPARC_TA, rs1, imm)
+#  define TN(rs1, rs2)                 T(SPARC_TN, rs1, rs2)
+#  define TNI(rs1, imm)                        TI(SPARC_TN, rs1, imm)
+#  define TNE(rs1, rs2)                        T(SPARC_TNE, rs1, rs2)
+#  define TNEI(rs1, imm)               TI(SPARC_TNE, rs1, imm)
+#  define TNZ(rs1, rs2)                        TNE(rs1, rs2)
+#  define TNZI(rs1, imm)               TNEI(rs1, imm)
+#  define TE(rs1, rs2)                 T(SPARC_TE, rs1, rs2)
+#  define TEI(rs1, imm)                        TI(SPARC_TE, rs1, imm)
+#  define TZ(rs1, rs2)                 TE(rs1, rs2)
+#  define TZI(rs1, imm)                        TEI(rs1, imm)
+#  define TG(rs1, rs2)                 T(SPARC_TG, rs1, rs2)
+#  define TGI(rs1, imm)                        TI(SPARC_TG, rs1, imm)
+#  define TLE(rs1, rs2)                        T(SPARC_TLE, rs1, rs2)
+#  define TLEI(rs1, imm)               TI(SPARC_TLE, rs1, imm)
+#  define TGE(rs1, rs2)                        T(SPARC_TGE, rs1, rs2)
+#  define TGEI(rs1, imm)               TI(SPARC_TGE, rs1, imm)
+#  define TL(rs1, rs2)                 T(SPARC_TL, rs1, rs2)
+#  define TLI(rs1, imm)                        TI(SPARC_TL, rs1, imm)
+#  define TGU(rs1, rs2)                        T(SPARC_TGU, rs1, rs2)
+#  define TGUI(rs1, imm)               TI(SPARC_TGU, rs1, imm)
+#  define TLEU(rs1, rs2)               T(SPARC_TLEU, rs1, rs2)
+#  define TLEUI(rs1, imm)              TI(SPARC_TLEU, rs1, imm)
+#  define TCC(rs1, rs2)                        T(SPARC_TCC, rs1, rs2)
+#  define TCCI(rs1, imm)               TI(SPARC_TCC, rs1, imm)
+#  define TGEU(rs1, rs2)               TCC(rs1, rs2)
+#  define TGEUI(rs1, imm)              TCCI(rs1, imm)
+#  define TCS(rs1, rs2)                        T(SPARC_TCC, rs1, rs2)
+#  define TCSI(rs1, imm)               TI(SPARC_TCC, rs1, imm)
+#  define TLU(rs1, rs2)                        TCS(rs1, rs2)
+#  define TLUI(rs1, imm)               TCSI(rs1, imm)
+#  define TPOS(rs1, rs2)               T(SPARC_TPOS, rs1, rs2)
+#  define TPOSI(rs1, imm)              TI(SPARC_TPOS, rs1, imm)
+#  define TNEG(rs1, rs2)               T(SPARC_TNEG, rs1, rs2)
+#  define TNEGI(rs1, imm)              TI(SPARC_TNEG, rs1, imm)
+#  define TVC(rs1, rs2)                        T(SPARC_TVC, rs1, rs2)
+#  define TVCI(rs1, imm)               TI(SPARC_TVC, rs1, imm)
+#  define TVS(rs1, rs2)                        T(SPARC_TVS, rs1, rs2)
+#  define TVSI(rs1, imm)               TI(SPARC_TVS, rs1, imm)
+#  define RDY(rd)                      f3r(2, rd, 40, 0, 0)
+#  define RDASR(rs1, rd)               f3r(2, rd, 40, rs1, 0)
+#  define RDPSR(rd)                    f3r(2, rd, 41, 0, 0)
+#  define RDWIM(rd)                    f3r(2, rd, 42, 0, 0)
+#  define RDTBR(rd)                    f3r(2, rd, 43, 0, 0)
+#  define WRY(rs1, rs2)                        f3r(2, 0, 48, rs1, rs2)
+#  define WRYI(rs1, imm)               f3i(2, 0, 48, rs1, imm)
+#  define WRASR(rs1, rs2, rd)          f3r(2, rd, 48, rs1, rs2)
+#  define WRASRI(rs1, imm, rd)         f3i(2, rd, 48, rs1, imm)
+#  define WRPSR(rs1, rs2, rd)          f3r(2, rd, 49, rs1, rs2)
+#  define WRPSRI(rs1, imm, rd)         f3i(2, rd, 49, rs1, imm)
+#  define WRWIM(rs1, rs2, rd)          f3r(2, rd, 50, rs1, rs2)
+#  define WRWIMI(rs1, imm, rd)         f3i(2, rd, 50, rs1, imm)
+#  define WRTBR(rs1, rs2, rd)          f3r(2, rd, 51, rs1, rs2)
+#  define WRTBRI(rs1, imm, rd)         f3i(2, rd, 51, rs1, imm)
+#  define STBAR()                      f3i(2, 0, 40, 15, 0)
+#  define UNIMP(imm)                   f2r(0, 0, 0, imm)
+#  define FLUSH(rs1, rs2)              f3r(2, 0, 59, rs1, rs2)
+#  define FLUSHI(rs1, im)              f3i(2, 0, 59, rs1, imm)
+#  define nop(i0)                      _nop(_jit, i0)
+static void _nop(jit_state_t*, jit_int32_t);
+#  define movr(r0, r1)                 _movr(_jit, r0, r1)
+static void _movr(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define movi(r0, i0)                 _movi(_jit, r0, i0)
+static void _movi(jit_state_t*, jit_int32_t, jit_word_t);
+#  define movi_p(r0, i0)               _movi_p(_jit, r0, i0)
+static jit_word_t _movi_p(jit_state_t*, jit_int32_t, jit_word_t);
+#  define comr(r0, r1)                 XNOR(r1, 0, r0)
+#  define negr(r0, r1)                 NEG(r1, r0)
+#  define addr(r0, r1, r2)             ADD(r1, r2, r0)
+#  define addi(r0, r1, i0)             _addi(_jit, r0, r1, i0)
+static void _addi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#  if __WORDSIZE == 32
+#    define addcr(r0, r1, r2)          ADDcc(r1, r2, r0)
+#  else
+#    define addcr(r0, r1, r2)          _addcr(_jit, r0, r1, r2)
+static void _addcr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  endif
+#  define addci(r0, r1, i0)            _addci(_jit, r0, r1, i0)
+static void _addci(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#  if __WORDSIZE == 32
+#    define addxr(r0, r1, r2)          ADDXcc(r1, r2, r0)
+#  else
+#    define addxr(r0, r1, r2)          _addxr(_jit, r0, r1, r2)
+static void _addxr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  endif
+#  define addxi(r0, r1, i0)            _addxi(_jit, r0, r1, i0)
+static void _addxi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#  define subr(r0, r1, r2)             SUB(r1, r2, r0)
+#  define subi(r0, r1, i0)             _subi(_jit, r0, r1, i0)
+static void _subi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#  if __WORDSIZE == 32
+#    define subcr(r0, r1, r2)          SUBcc(r1, r2, r0)
+#  else
+#    define subcr(r0, r1, r2)          _subcr(_jit, r0, r1, r2)
+static void _subcr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  endif
+#  define subci(r0, r1, i0)            _subci(_jit, r0, r1, i0)
+static void _subci(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#  if __WORDSIZE == 32
+#    define subxr(r0, r1, r2)          SUBXcc(r1, r2, r0)
+#  else
+#    define subxr(r0, r1, r2)          _subxr(_jit, r0, r1, r2)
+static void _subxr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  endif
+#  define subxi(r0, r1, i0)            _subxi(_jit, r0, r1, i0)
+static void _subxi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#  define rsbi(r0, r1, i0)             _rsbi(_jit, r0, r1, i0)
+static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  if __WORDSIZE == 32
+#    define mulr(r0, r1, r2)           UMUL(r1, r2, r0)
+#  else
+#    define mulr(r0, r1, r2)           MULX(r1, r2, r0)
+#  endif
+#  define muli(r0, r1, i0)             _muli(_jit, r0, r1, i0)
+static void _muli(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#  if __WORDSIZE == 32
+#    define qmulr(r0,r1,r2,r3)         iqmulr(r0,r1,r2,r3,1)
+#    define qmulr_u(r0,r1,r2,r3)       iqmulr(r0,r1,r2,r3,0)
+#    define iqmulr(r0,r1,r2,r3,cc)     _iqmulr(_jit,r0,r1,r2,r3,cc)
+static void _iqmulr(jit_state_t*,jit_int32_t,jit_int32_t,
+                   jit_int32_t,jit_int32_t,jit_bool_t);
+#  define qmuli(r0,r1,r2,i0)           iqmuli(r0,r1,r2,i0,1)
+#  define qmuli_u(r0,r1,r2,i0)         iqmuli(r0,r1,r2,i0,0)
+#  define iqmuli(r0,r1,r2,i0,cc)       _iqmuli(_jit,r0,r1,r2,i0,cc)
+static void _iqmuli(jit_state_t*,jit_int32_t,jit_int32_t,
+                   jit_int32_t,jit_word_t,jit_bool_t);
+#  else
+#    define qmulr(r0,r1,r2,r3)         _qmulr(_jit,r0,r1,r2,r3)
+static void _qmulr(jit_state_t*,jit_int32_t,jit_int32_t,
+                  jit_int32_t,jit_int32_t);
+#  define qmuli(r0,r1,r2,i0)           _qmuli(_jit,r0,r1,r2,i0)
+static void _qmuli(jit_state_t*,jit_int32_t,jit_int32_t,
+                  jit_int32_t,jit_word_t);
+#    define qmulr_u(r0,r1,r2,r3)       _qmulr_u(_jit,r0,r1,r2,r3)
+static void _qmulr_u(jit_state_t*,jit_int32_t,jit_int32_t,
+                    jit_int32_t,jit_int32_t);
+#  define qmuli_u(r0,r1,r2,i0)         _qmuli_u(_jit,r0,r1,r2,i0)
+static void _qmuli_u(jit_state_t*,jit_int32_t,jit_int32_t,
+                    jit_int32_t,jit_word_t);
+#  endif
+#  define divr(r0, r1, r2)             _divr(_jit, r0, r1, r2)
+static void _divr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define divi(r0, r1, i0)             _divi(_jit, r0, r1, i0)
+static void _divi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#  define divr_u(r0, r1, r2)           _divr_u(_jit, r0, r1, r2)
+static void _divr_u(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define divi_u(r0, r1, i0)           _divi_u(_jit, r0, r1, i0)
+static void _divi_u(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#  define qdivr(r0,r1,r2,r3)           iqdivr(r0,r1,r2,r3,1)
+#  define qdivr_u(r0,r1,r2,r3)         iqdivr(r0,r1,r2,r3,0)
+#  define iqdivr(r0,r1,r2,r3,cc)       _iqdivr(_jit,r0,r1,r2,r3,cc)
+static void _iqdivr(jit_state_t*,jit_int32_t,jit_int32_t,
+                   jit_int32_t,jit_int32_t,jit_bool_t);
+#  define qdivi(r0,r1,r2,i0)           iqdivi(r0,r1,r2,i0,1)
+#  define qdivi_u(r0,r1,r2,i0)         iqdivi(r0,r1,r2,i0,0)
+#  define iqdivi(r0,r1,r2,i0,cc)       _iqdivi(_jit,r0,r1,r2,i0,cc)
+static void _iqdivi(jit_state_t*,jit_int32_t,jit_int32_t,
+                   jit_int32_t,jit_word_t,jit_bool_t);
+#  define remr(r0, r1, r2)             _remr(_jit, r0, r1, r2)
+static void _remr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define remi(r0, r1, i0)             _remi(_jit, r0, r1, i0)
+static void _remi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#  define remr_u(r0, r1, r2)           _remr_u(_jit, r0, r1, r2)
+static void _remr_u(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define remi_u(r0, r1, i0)           _remi_u(_jit, r0, r1, i0)
+static void _remi_u(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#  define andr(r0, r1, r2)             AND(r1, r2, r0)
+#  define andi(r0, r1, i0)             _andi(_jit, r0, r1, i0)
+static void _andi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#  define orr(r0, r1, r2)              OR(r1, r2, r0)
+#  define ori(r0, r1, i0)              _ori(_jit, r0, r1, i0)
+static void _ori(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#  define xorr(r0, r1, r2)             XOR(r1, r2, r0)
+#  define xori(r0, r1, i0)             _xori(_jit, r0, r1, i0)
+static void _xori(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#  if __WORDSIZE == 32
+#    define lshr(r0, r1, r2)           SLL(r1, r2, r0)
+#    define lshi(r0, r1, i0)           SLLI(r1, i0, r0)
+#    define rshr(r0, r1, r2)           SRA(r1, r2, r0)
+#    define rshi(r0, r1, i0)           SRAI(r1, i0, r0)
+#    define rshr_u(r0, r1, r2)         SRL(r1, r2, r0)
+#    define rshi_u(r0, r1, i0)         SRLI(r1, i0, r0)
+#  else
+#    define lshr(r0, r1, r2)           SLLX(r1, r2, r0)
+#    define lshi(r0, r1, i0)           SLLXI(r1, i0, r0)
+#    define rshr(r0, r1, r2)           SRAX(r1, r2, r0)
+#    define rshi(r0, r1, i0)           SRAXI(r1, i0, r0)
+#    define rshr_u(r0, r1, r2)         SRLX(r1, r2, r0)
+#    define rshi_u(r0, r1, i0)         SRLXI(r1, i0, r0)
+#  endif
+#  define htonr_us(r0,r1)              extr_us(r0,r1)
+#  define extr_c(r0,r1)                        _extr_c(_jit,r0,r1)
+static void _extr_c(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define extr_uc(r0,r1)               andi(r0, r1, 0xff)
+#  define extr_s(r0,r1)                        _extr_s(_jit,r0,r1)
+static void _extr_s(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define extr_us(r0,r1)               _extr_us(_jit,r0,r1)
+static void _extr_us(jit_state_t*,jit_int32_t,jit_int32_t);
+#  if __WORDSIZE == 32
+#    define htonr_ui(r0,r1)            movr(r0,r1)
+#  else
+#    define htonr_ui(r0,r1)            extr_ui(r0,r1)
+#    define htonr_ul(r0,r1)            movr(r0,r1)
+#    define extr_i(r0,r1)              _extr_i(_jit,r0,r1)
+static void _extr_i(jit_state_t*,jit_int32_t,jit_int32_t);
+#    define extr_ui(r0,r1)             _extr_ui(_jit,r0,r1)
+static void _extr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
+#  endif
+#  define cr(cc, r0, r1, r2)           _cr(_jit, cc, r0, r1, r2)
+static void _cr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define cw(cc, r0, r1, i0)           _cw(_jit, cc, r0, r1, i0)
+static void _cw(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_word_t);
+#  if __WORDSIZE == 32
+#    define ltr(r0, r1, r2)            cr(SPARC_BL, r0, r1, r2)
+#    define lti(r0, r1, i0)            cw(SPARC_BL, r0, r1, i0)
+#    define ltr_u(r0, r1, r2)          cr(SPARC_BLU, r0, r1, r2)
+#    define lti_u(r0, r1, i0)          cw(SPARC_BLU, r0, r1, i0)
+#    define ler(r0, r1, r2)            cr(SPARC_BLE, r0, r1, r2)
+#    define lei(r0, r1, i0)            cw(SPARC_BLE, r0, r1, i0)
+#    define ler_u(r0, r1, r2)          cr(SPARC_BLEU, r0, r1, r2)
+#    define lei_u(r0, r1, i0)          cw(SPARC_BLEU, r0, r1, i0)
+#    define eqr(r0, r1, r2)            cr(SPARC_BE, r0, r1, r2)
+#    define eqi(r0, r1, i0)            cw(SPARC_BE, r0, r1, i0)
+#    define ger(r0, r1, r2)            cr(SPARC_BGE, r0, r1, r2)
+#    define gei(r0, r1, i0)            cw(SPARC_BGE, r0, r1, i0)
+#    define ger_u(r0, r1, r2)          cr(SPARC_BGEU, r0, r1, r2)
+#    define gei_u(r0, r1, i0)          cw(SPARC_BGEU, r0, r1, i0)
+#    define gtr(r0, r1, r2)            cr(SPARC_BG, r0, r1, r2)
+#    define gti(r0, r1, i0)            cw(SPARC_BG, r0, r1, i0)
+#    define gtr_u(r0, r1, r2)          cr(SPARC_BGU, r0, r1, r2)
+#    define gti_u(r0, r1, i0)          cw(SPARC_BGU, r0, r1, i0)
+#    define ner(r0, r1, r2)            cr(SPARC_BNE, r0, r1, r2)
+#    define nei(r0, r1, i0)            cw(SPARC_BNE, r0, r1, i0)
+#  else
+#  define ltr(r0, r1, r2)              cr(SPARC_BPL, r0, r1, r2)
+#  define lti(r0, r1, i0)              cw(SPARC_BPL, r0, r1, i0)
+#  define ltr_u(r0, r1, r2)            cr(SPARC_BPCS, r0, r1, r2)
+#  define lti_u(r0, r1, i0)            cw(SPARC_BPCS, r0, r1, i0)
+#  define ler(r0, r1, r2)              cr(SPARC_BPLE, r0, r1, r2)
+#  define lei(r0, r1, i0)              cw(SPARC_BPLE, r0, r1, i0)
+#  define ler_u(r0, r1, r2)            cr(SPARC_BPLEU, r0, r1, r2)
+#  define lei_u(r0, r1, i0)            cw(SPARC_BPLEU, r0, r1, i0)
+#  define eqr(r0, r1, r2)              cr(SPARC_BPE, r0, r1, r2)
+#  define eqi(r0, r1, i0)              cw(SPARC_BPE, r0, r1, i0)
+#  define ger(r0, r1, r2)              cr(SPARC_BPGE, r0, r1, r2)
+#  define gei(r0, r1, i0)              cw(SPARC_BPGE, r0, r1, i0)
+#  define ger_u(r0, r1, r2)            cr(SPARC_BPCC, r0, r1, r2)
+#  define gei_u(r0, r1, i0)            cw(SPARC_BPCC, r0, r1, i0)
+#  define gtr(r0, r1, r2)              cr(SPARC_BPG, r0, r1, r2)
+#  define gti(r0, r1, i0)              cw(SPARC_BPG, r0, r1, i0)
+#  define gtr_u(r0, r1, r2)            cr(SPARC_BPGU, r0, r1, r2)
+#  define gti_u(r0, r1, i0)            cw(SPARC_BPGU, r0, r1, i0)
+#  define ner(r0, r1, r2)              cr(SPARC_BPNE, r0, r1, r2)
+#  define nei(r0, r1, i0)              cw(SPARC_BPNE, r0, r1, i0)
+#  endif
+#  define ldr_c(r0, r1)                        LDSB(r1, 0, r0)
+#  define ldi_c(r0, i0)                        _ldi_c(_jit, r0, i0)
+static void _ldi_c(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldr_uc(r0, r1)               LDUB(r1, 0, r0)
+#  define ldi_uc(r0, i0)               _ldi_uc(_jit, r0, i0)
+static void _ldi_uc(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldr_s(r0, r1)                        LDSH(r1, 0, r0)
+#  define ldi_s(r0, i0)                        _ldi_s(_jit, r0, i0)
+static void _ldi_s(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldr_us(r0, r1)               LDUH(r1, 0, r0)
+#  define ldi_us(r0, i0)               _ldi_us(_jit, r0, i0)
+static void _ldi_us(jit_state_t*,jit_int32_t,jit_word_t);
+#  if __WORDSIZE == 32
+#    define ldr_i(r0, r1)              LD(r1, 0, r0)
+#    define ldr(u, v)                  ldr_i(u, v)
+#    define ldi(u, v)                  ldi_i(u, v)
+#  else
+#    define ldr_i(r0, r1)              LDSW(r1, 0, r0)
+#    define ldr_ui(r0, r1)             LDUW(r1, 0, r0)
+#    define ldr_l(r0, r1)              LDX(r1, 0, r0)
+#    define ldr(u, v)                  ldr_l(u, v)
+#    define ldi(u, v)                  ldi_l(u, v)
+#  endif
+#  define ldi_i(r0, i0)                        _ldi_i(_jit, r0, i0)
+static void _ldi_i(jit_state_t*,jit_int32_t,jit_word_t);
+#  if __WORDSIZE == 64
+#    define ldi_ui(r0, i0)             _ldi_ui(_jit, r0, i0)
+static void _ldi_ui(jit_state_t*,jit_int32_t,jit_word_t);
+#    define ldi_l(r0, i0)              _ldi_l(_jit, r0, i0)
+static void _ldi_l(jit_state_t*,jit_int32_t,jit_word_t);
+#  endif
+#  define ldxr_c(r0, r1, r2)           LDSB(r1, r2, r0)
+#  define ldxi_c(r0, r1, i0)           _ldxi_c(_jit, r0, r1, i0)
+static void _ldxi_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldxr_uc(r0, r1, r2)          LDUB(r1, r2, r0)
+#  define ldxi_uc(r0, r1, i0)          _ldxi_uc(_jit, r0, r1, i0)
+static void _ldxi_uc(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldxr_s(r0, r1, r2)           LDSH(r1, r2, r0)
+#  define ldxi_s(r0, r1, i0)           _ldxi_s(_jit, r0, r1, i0)
+static void _ldxi_s(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldxr_us(r0, r1, r2)          LDUH(r1, r2, r0)
+#  define ldxi_us(r0, r1, i0)          _ldxi_us(_jit, r0, r1, i0)
+static void _ldxi_us(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  if __WORDSIZE == 32
+#    define ldxr(u, v, w)              ldxr_i(u, v, w)
+#    define ldxr_i(r0, r1, r2)         LD(r1, r2, r0)
+#    define ldxi(u, v, w)              ldxi_i(u, v, w)
+#  else
+#    define ldxr(u, v, w)              ldxr_l(u, v, w)
+#    define ldxr_i(r0, r1, r2)         LDSW(r1, r2, r0)
+#    define ldxr_ui(r0, r1, r2)                LDUW(r1, r2, r0)
+#    define ldxr_l(r0, r1, r2)         LDX(r1, r2, r0)
+#    define ldxi(u, v, w)              ldxi_l(u, v, w)
+#  endif
+#  define ldxi_i(r0, r1, i0)           _ldxi_i(_jit, r0, r1, i0)
+static void _ldxi_i(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  if __WORDSIZE == 64
+#    define ldxi_ui(r0, r1, i0)                _ldxi_ui(_jit, r0, r1, i0)
+static void _ldxi_ui(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#    define ldxi_l(r0, r1, i0)         _ldxi_l(_jit, r0, r1, i0)
+static void _ldxi_l(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  endif
+#  define str_c(r0, r1)                        STB(r1, r0, 0)
+#  define sti_c(i0, r0)                        _sti_c(_jit, i0, r0)
+static void _sti_c(jit_state_t*,jit_word_t,jit_int32_t);
+#  define str_s(r0, r1)                        STH(r1, r0, 0)
+#  define sti_s(i0, r0)                        _sti_s(_jit, i0, r0)
+static void _sti_s(jit_state_t*,jit_word_t,jit_int32_t);
+#  if __WORDSIZE == 32
+#    define str(u, v)                  str_i(u, v)
+#    define str_i(r0, r1)              STI(r1, r0, 0)
+#    define sti(u, v)                  sti_i(u, v)
+#  else
+#    define str(u, v)                  str_l(u, v)
+#    define str_i(r0, r1)              STW(r1, r0, 0)
+#    define str_l(r0, r1)              STX(r1, r0, 0)
+#    define sti(u, v)                  sti_l(u, v)
+#  endif
+#  define sti_i(i0, r0)                        _sti_i(_jit, i0, r0)
+static void _sti_i(jit_state_t*,jit_word_t,jit_int32_t);
+#  if __WORDSIZE == 64
+#    define sti_l(i0, r0)              _sti_l(_jit, i0, r0)
+static void _sti_l(jit_state_t*,jit_word_t,jit_int32_t);
+#  endif
+#  define stxr_c(r0, r1, r2)           STB(r2, r1, r0)
+#  define stxi_c(i0, r0, r1)           _stxi_c(_jit, i0, r0, r1)
+static void _stxi_c(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define stxr_s(r0, r1, r2)           STH(r2, r1, r0)
+#  define stxi_s(i0, r0, r1)           _stxi_s(_jit, i0, r0, r1)
+static void _stxi_s(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  if __WORDSIZE == 32
+#    define stxr(u, v, w)              stxr_i(u, v, w)
+#    define stxr_i(r0, r1, r2)         ST(r2, r1, r0)
+#    define stxi(u, v, w)              stxi_i(u, v, w)
+#  else
+#    define stxr(u, v, w)              stxr_l(u, v, w)
+#    define stxr_i(r0, r1, r2)         STW(r2, r1, r0)
+#    define stxi(u, v, w)              stxi_l(u, v, w)
+#    define stxr_l(r0, r1, r2)         STX(r2, r1, r0)
+#  endif
+#  define stxi_i(i0, r0, r1)           _stxi_i(_jit, i0, r0, r1)
+static void _stxi_i(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  if __WORDSIZE == 64
+#    define stxi_l(i0, r0, r1)         _stxi_l(_jit, i0, r0, r1)
+static void _stxi_l(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  endif
+#  define br(cc, i0, r0, r1)           _br(_jit, cc, i0, r0, r1)
+static jit_word_t
+_br(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bw(cc, i0, r0, i1)           _bw(_jit, cc, i0, r0, i1)
+static jit_word_t
+_bw(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_word_t);
+#  if __WORDSIZE == 32
+#    define bltr(i0, r0, r1)           br(SPARC_BL, i0, r0, r1)
+#    define blti(i0, r0, i1)           bw(SPARC_BL, i0, r0, i1)
+#    define bltr_u(i0, r0, r1)         br(SPARC_BLU, i0, r0, r1)
+#    define blti_u(i0, r0, i1)         bw(SPARC_BLU, i0, r0, i1)
+#    define bler(i0, r0, r1)           br(SPARC_BLE, i0, r0, r1)
+#    define blei(i0, r0, i1)           bw(SPARC_BLE, i0, r0, i1)
+#    define bler_u(i0, r0, r1)         br(SPARC_BLEU, i0, r0, r1)
+#    define blei_u(i0, r0, i1)         bw(SPARC_BLEU, i0, r0, i1)
+#    define beqr(i0, r0, r1)           br(SPARC_BE, i0, r0, r1)
+#    define beqi(i0, r0, i1)           bw(SPARC_BE, i0, r0, i1)
+#    define bger(i0, r0, r1)           br(SPARC_BGE, i0, r0, r1)
+#    define bgei(i0, r0, i1)           bw(SPARC_BGE, i0, r0, i1)
+#    define bger_u(i0, r0, r1)         br(SPARC_BGEU, i0, r0, r1)
+#    define bgei_u(i0, r0, i1)         bw(SPARC_BGEU, i0, r0, i1)
+#    define bgtr(i0, r0, r1)           br(SPARC_BG, i0, r0, r1)
+#    define bgti(i0, r0, i1)           bw(SPARC_BG, i0, r0, i1)
+#    define bgtr_u(i0, r0, r1)         br(SPARC_BGU, i0, r0, r1)
+#    define bgti_u(i0, r0, i1)         bw(SPARC_BGU, i0, r0, i1)
+#    define bner(i0, r0, r1)           br(SPARC_BNE, i0, r0, r1)
+#    define bnei(i0, r0, i1)           bw(SPARC_BNE, i0, r0, i1)
+#  else
+#    define bltr(i0, r0, r1)           br(SPARC_BPL, i0, r0, r1)
+#    define blti(i0, r0, i1)           bw(SPARC_BPL, i0, r0, i1)
+#    define bltr_u(i0, r0, r1)         br(SPARC_BPCS, i0, r0, r1)
+#    define blti_u(i0, r0, i1)         bw(SPARC_BPCS, i0, r0, i1)
+#    define bler(i0, r0, r1)           br(SPARC_BPLE, i0, r0, r1)
+#    define blei(i0, r0, i1)           bw(SPARC_BPLE, i0, r0, i1)
+#    define bler_u(i0, r0, r1)         br(SPARC_BPLEU, i0, r0, r1)
+#    define blei_u(i0, r0, i1)         bw(SPARC_BPLEU, i0, r0, i1)
+#    define beqr(i0, r0, r1)           br(SPARC_BPE, i0, r0, r1)
+#    define beqi(i0, r0, i1)           bw(SPARC_BPE, i0, r0, i1)
+#    define bger(i0, r0, r1)           br(SPARC_BPGE, i0, r0, r1)
+#    define bgei(i0, r0, i1)           bw(SPARC_BPGE, i0, r0, i1)
+#    define bger_u(i0, r0, r1)         br(SPARC_BPCC, i0, r0, r1)
+#    define bgei_u(i0, r0, i1)         bw(SPARC_BPCC, i0, r0, i1)
+#    define bgtr(i0, r0, r1)           br(SPARC_BPG, i0, r0, r1)
+#    define bgti(i0, r0, i1)           bw(SPARC_BPG, i0, r0, i1)
+#    define bgtr_u(i0, r0, r1)         br(SPARC_BPGU, i0, r0, r1)
+#    define bgti_u(i0, r0, i1)         bw(SPARC_BPGU, i0, r0, i1)
+#    define bner(i0, r0, r1)           br(SPARC_BPNE, i0, r0, r1)
+#    define bnei(i0, r0, i1)           bw(SPARC_BPNE, i0, r0, i1)
+#  endif
+#  define b_asr(jif,add,sgn,i0,r0,r1)  _b_asr(_jit,jif,add,sgn,i0,r0,r1)
+static jit_word_t
+_b_asr(jit_state_t*,jit_bool_t,jit_bool_t,jit_bool_t,
+       jit_word_t,jit_int32_t,jit_int32_t);
+#  define b_asw(jif,add,sgn,i0,r0,i1)  _b_asw(_jit,jif,add,sgn,i0,r0,i1)
+static jit_word_t
+_b_asw(jit_state_t*,jit_bool_t,jit_bool_t,jit_bool_t,
+       jit_word_t,jit_int32_t,jit_word_t);
+#  define boaddr(i0, r0, r1)           b_asr(1, 1, 1, i0, r0, r1)
+#  define boaddi(i0, r0, i1)           b_asw(1, 1, 1, i0, r0, i1)
+#  define boaddr_u(i0, r0, r1)         b_asr(1, 1, 0, i0, r0, r1)
+#  define boaddi_u(i0, r0, i1)         b_asw(1, 1, 0, i0, r0, i1)
+#  define bxaddr(i0, r0, r1)           b_asr(0, 1, 1, i0, r0, r1)
+#  define bxaddi(i0, r0, i1)           b_asw(0, 1, 1, i0, r0, i1)
+#  define bxaddr_u(i0, r0, r1)         b_asr(0, 1, 0, i0, r0, r1)
+#  define bxaddi_u(i0, r0, i1)         b_asw(0, 1, 0, i0, r0, i1)
+#  define bosubr(i0, r0, r1)           b_asr(1, 0, 1, i0, r0, r1)
+#  define bosubi(i0, r0, i1)           b_asw(1, 0, 1, i0, r0, i1)
+#  define bosubr_u(i0, r0, r1)         b_asr(1, 0, 0, i0, r0, r1)
+#  define bosubi_u(i0, r0, i1)         b_asw(1, 0, 0, i0, r0, i1)
+#  define bxsubr(i0, r0, r1)           b_asr(0, 0, 1, i0, r0, r1)
+#  define bxsubi(i0, r0, i1)           b_asw(0, 0, 1, i0, r0, i1)
+#  define bxsubr_u(i0, r0, r1)         b_asr(0, 0, 0, i0, r0, r1)
+#  define bxsubi_u(i0, r0, i1)         b_asw(0, 0, 0, i0, r0, i1)
+#  define bm_r(set, i0, r0, r1)                _bm_r(_jit,set,i0,r0,r1)
+static jit_word_t
+_bm_r(jit_state_t*,jit_bool_t,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bm_w(set,i0,r0,i1)           _bm_w(_jit,set,i0,r0,i1)
+static jit_word_t
+_bm_w(jit_state_t*,jit_bool_t,jit_word_t,jit_int32_t,jit_word_t);
+#  define bmsr(i0, r0, r1)             bm_r(1, i0, r0, r1)
+#  define bmsi(i0, r0, i1)             bm_w(1, i0, r0, i1)
+#  define bmcr(i0, r0, r1)             bm_r(0, i0, r0, r1)
+#  define bmci(i0, r0, i1)             bm_w(0, i0, r0, i1)
+#  define jmpr(r0)                     _jmpr(_jit, r0)
+static void _jmpr(jit_state_t*,jit_int32_t);
+#  define jmpi(i0)                     _jmpi(_jit, i0)
+static void _jmpi(jit_state_t*,jit_word_t);
+#  define jmpi_p(i0)                   _jmpi_p(_jit, i0)
+static jit_word_t _jmpi_p(jit_state_t*,jit_word_t);
+#  define callr(r0)                    _callr(_jit, r0)
+static void _callr(jit_state_t*,jit_int32_t);
+#  define calli(i0)                    _calli(_jit, i0)
+static void _calli(jit_state_t*,jit_word_t);
+#  define calli_p(i0)                  _calli_p(_jit, i0)
+static jit_word_t _calli_p(jit_state_t*,jit_word_t);
+#  define prolog(node)                 _prolog(_jit, node)
+static void _prolog(jit_state_t*,jit_node_t*);
+#  define epilog(node)                 _epilog(_jit, node)
+static void _epilog(jit_state_t*,jit_node_t*);
+#define vastart(r0)                    _vastart(_jit, r0)
+static void _vastart(jit_state_t*, jit_int32_t);
+#define vaarg(r0, r1)                  _vaarg(_jit, r0, r1)
+static void _vaarg(jit_state_t*, jit_int32_t, jit_int32_t);
+#define patch_at(jump, label)          _patch_at(_jit, jump, label)
+static void _patch_at(jit_state_t*,jit_word_t,jit_word_t);
+#endif
+
+#if CODE
+static void
+_f2r(jit_state_t *_jit,
+     jit_int32_t op, jit_int32_t rd, jit_int32_t op2, jit_int32_t imm22)
+{
+    jit_instr_t                v;
+    assert(!(op  & 0xfffffffc));
+    assert(!(rd  & 0xffffffe0));
+    assert(!(op2 & 0xfffffff8));
+    assert(s22_p(imm22));
+    v.op.b    = op;
+    v.rd.b    = rd;
+    v.op2.b   = op2;
+    v.imm22.b = imm22;
+    ii(v.v);
+}
+
+static void
+_f2b(jit_state_t *_jit,
+     jit_int32_t op, jit_int32_t a, jit_int32_t cond, jit_int32_t op2,
+     jit_int32_t disp22)
+{
+    jit_instr_t                v;
+    assert(!(op   & 0xfffffffc));
+    assert(!(a    & 0xfffffffe));
+    assert(!(cond & 0xfffffff0));
+    assert(!(op2  & 0xfffffff8));
+    assert(s22_p(disp22));
+    v.op.b     = op;
+    v.a.b      = a;
+    v.cond.b   = cond;
+    v.op2.b    = op2;
+    v.disp22.b = disp22;
+    ii(v.v);
+}
+
+#  if __WORDSIZE == 64
+static void
+_f2bp(jit_state_t *_jit,
+      jit_int32_t op, jit_int32_t a, jit_int32_t cond, jit_int32_t op2,
+      jit_int32_t cc1, jit_int32_t cc0, jit_int32_t p, jit_int32_t disp19)
+{
+    jit_instr_t                v;
+    assert(!(op   & 0xfffffffc));
+    assert(!(a    & 0xfffffffe));
+    assert(!(cond & 0xfffffff0));
+    assert(!(op2  & 0xfffffff8));
+    assert(s19_p(disp19));
+    v.op.b     = op;
+    v.a.b      = a;
+    v.cond.b   = cond;
+    v.op2.b    = op2;
+    v.cc1.b    = cc1;
+    v.cc0.b    = cc0;
+    v.p.b      = p;
+    v.disp19.b = disp19;
+    ii(v.v);
+}
+#  endif
+
+static void
+_f3r(jit_state_t *_jit, jit_int32_t op, jit_int32_t rd,
+     jit_int32_t op3, jit_int32_t rs1, jit_int32_t rs2)
+{
+    jit_instr_t                v;
+    assert(!(op  & 0xfffffffc));
+    assert(!(rd  & 0xffffffe0));
+    assert(!(op3 & 0xffffffc0));
+    assert(!(rs1 & 0xffffffe0));
+    assert(!(rs2 & 0xffffffe0));
+    v.op.b  = op;
+    v.rd.b  = rd;
+    v.op3.b = op3;
+    v.rs1.b = rs1;
+    v.i.b   = 0;
+    v.asi.b = 0;
+    v.rs2.b = rs2;
+    ii(v.v);
+}
+
+#  if __WORDSIZE == 64
+static void
+_f3rx(jit_state_t *_jit, jit_int32_t op, jit_int32_t rd,
+      jit_int32_t op3, jit_int32_t rs1, jit_int32_t rs2)
+{
+    jit_instr_t                v;
+    assert(!(op  & 0xfffffffc));
+    assert(!(rd  & 0xffffffe0));
+    assert(!(op3 & 0xffffffc0));
+    assert(!(rs1 & 0xffffffe0));
+    assert(!(rs2 & 0xffffffe0));
+    v.op.b   = op;
+    v.rd.b   = rd;
+    v.op3.b  = op3;
+    v.rs1.b  = rs1;
+    v.i.b    = 0;
+    v.x.b    = 1;
+    v.asix.b = 0;
+    v.rs2.b  = rs2;
+    ii(v.v);
+}
+
+static void
+_f3s(jit_state_t *_jit, jit_int32_t op, jit_int32_t rd,
+      jit_int32_t op3, jit_int32_t rs1, jit_int32_t shim)
+{
+    jit_instr_t                v;
+    assert(!(op   & 0xfffffffc));
+    assert(!(rd   & 0xffffffe0));
+    assert(!(op3  & 0xffffffc0));
+    assert(!(rs1  & 0xffffffe0));
+    assert(!(shim & 0xffffffc0));
+    v.op.b   = op;
+    v.rd.b   = rd;
+    v.op3.b  = op3;
+    v.rs1.b  = rs1;
+    v.i.b    = 1;
+    v.x.b    = 1;
+    v.asis.b = 0;
+    v.shim.b = shim;
+    ii(v.v);
+}
+#  endif
+
+static void
+_f3i(jit_state_t *_jit, jit_int32_t op, jit_int32_t rd,
+     jit_int32_t op3, jit_int32_t rs1, jit_int32_t simm13)
+{
+    jit_instr_t                v;
+    assert(!(op  & 0xfffffffc));
+    assert(!(rd  & 0xffffffe0));
+    assert(!(op3 & 0xffffffc0));
+    assert(!(rs1 & 0xffffffe0));
+    assert(s13_p(simm13));
+    v.op.b     = op;
+    v.rd.b     = rd;
+    v.op3.b    = op3;
+    v.rs1.b    = rs1;
+    v.i.b      = 1;
+    v.simm13.b = simm13;
+    ii(v.v);
+}
+
+static void
+_f3t(jit_state_t *_jit, jit_int32_t cond,
+     jit_int32_t rs1, jit_int32_t i, jit_int32_t rs2_imm7)
+{
+    jit_instr_t                v;
+    assert(!(cond & 0xfffffff0));
+    assert(!(i    & 0xfffffffe));
+    assert(!(rs1 & 0xffffffe0));
+    v.op.b     = 2;
+    v.rd.b     = cond;
+    v.op3.b    = 58;
+    v.i.b      = i;
+    if (i) {
+       assert(s7_p(rs2_imm7));
+       v.res.b  = 0;
+       v.imm7.b = rs2_imm7;
+    }
+    else {
+       assert(!(rs2_imm7 & 0xffffffe0));
+       v.asi.b = 0;
+       v.rs2.b = rs2_imm7;
+    }
+    ii(v.v);
+}
+
+static void
+_f3a(jit_state_t *_jit, jit_int32_t op, jit_int32_t rd,
+     jit_int32_t op3, jit_int32_t rs1, jit_int32_t asi, jit_int32_t rs2)
+{
+    jit_instr_t                v;
+    assert(!(op  & 0xfffffffc));
+    assert(!(rd  & 0xffffffe0));
+    assert(!(op3 & 0xffffffc0));
+    assert(!(rs1 & 0xffffffe0));
+    assert(!(asi & 0xffffff00));
+    assert(!(rs2 & 0xffffffe0));
+    v.op.b    = op;
+    v.rd.b    = rd;
+    v.op3.b   = op3;
+    v.rs1.b   = rs1;
+    v.i.b     = 0;
+    v.asi.b   = asi;
+    v.rs2.b   = rs2;
+    ii(v.v);
+}
+
+static void
+_f1(jit_state_t *_jit, jit_int32_t op, jit_int32_t disp30)
+{
+    jit_instr_t                v;
+    assert(!(op  & 0xfffffffc));
+    assert(s30_p(disp30));
+    v.op.b     = op;
+    v.disp30.b = disp30;
+    ii(v.v);
+}
+
+static void
+_nop(jit_state_t *_jit, jit_int32_t i0)
+{
+    for (; i0 > 0; i0 -= 4)
+       NOP();
+    assert(i0 == 0);
+}
+
+static void
+_movr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (r0 != r1)
+       ORI(r1, 0, r0);
+}
+
+static void
+_movi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    if (s13_p(i0))
+       ORI(0, i0, r0);
+    else {
+#  if __WORDSIZE == 64
+       if (i0 & 0xffffffff00000000) {
+           jit_int32_t reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), (i0 >> 32) & 0xffffffff);
+           movi(r0, i0 & 0xffffffff);
+           lshi(rn(reg), rn(reg), 32);
+           OR(rn(reg), r0, r0);
+           jit_unget_reg(reg);
+       }
+       else {
+#  endif
+           SETHI(HI((int)i0), r0);
+           if (LO(i0))
+               ORI(r0, LO(i0), r0);
+#  if __WORDSIZE == 64
+       }
+#  endif
+    }
+}
+
+static jit_word_t
+_movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_word_t         w;
+#  if __WORDSIZE == 64
+    jit_int32_t                reg;
+#  endif
+    w = _jit->pc.w;
+#  if __WORDSIZE == 64
+    reg = jit_get_reg(jit_class_gpr);
+    SETHI(HI((int)i0), r0);
+    ORI(r0, LO(i0), r0);
+    i0 = (int)(i0 >> 32);
+    SETHI(HI(i0), rn(reg));
+    ORI(rn(reg), LO(i0), rn(reg));
+    SLLXI(rn(reg), 32, rn(reg));
+    OR(rn(reg), r0, r0);
+    jit_unget_reg(reg);
+#  else
+    SETHI(HI(i0), r0);
+    ORI(r0, LO(i0), r0);
+#  endif
+    return (w);
+}
+
+static void
+_addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (s13_p(i0))
+       ADDI(r1, i0, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       addr(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+#  if __WORDSIZE == 64
+static void
+_addcr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (jit_carry == _NOREG)
+       jit_carry = jit_get_reg(jit_class_gpr);
+    if (r0 == r1) {
+       reg = jit_get_reg(jit_class_gpr);
+       addr(rn(reg), r1, r2);
+       ltr_u(rn(jit_carry), rn(reg), r1);
+       movr(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else {
+       addr(r0, r1, r2);
+       ltr_u(rn(jit_carry), r0, r1);
+    }
+}
+#  endif
+
+static void
+_addci(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+#  if __WORDSIZE == 32
+    jit_int32_t                reg;
+    if (s13_p(i0))
+       ADDIcc(r1, i0, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       addcr(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+#  else
+    jit_int32_t                reg;
+    if (jit_carry == _NOREG)
+       jit_carry = jit_get_reg(jit_class_gpr);
+    if (r0 == r1) {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r1, i0);
+       ltr_u(rn(jit_carry), rn(reg), r1);
+       movr(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else {
+       addi(r0, r1, i0);
+       ltr_u(rn(jit_carry), r0, r1);
+    }
+#  endif
+}
+
+#  if __WORDSIZE == 64
+static void
+_addxr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    assert(jit_carry != _NOREG);
+    reg = jit_get_reg(jit_class_gpr);
+    movr(rn(reg), rn(jit_carry));
+    addcr(r0, r1, r2);
+    addcr(r0, r0, rn(reg));
+    jit_unget_reg(reg);
+}
+#  endif
+
+static void
+_addxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+#  if __WORDSIZE == 32
+    jit_int32_t                reg;
+    if (s13_p(i0))
+       ADDXIcc(r1, i0, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       addxr(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+#  else
+    jit_int32_t                reg;
+    assert(jit_carry != _NOREG);
+    reg = jit_get_reg(jit_class_gpr);
+    movr(rn(reg), rn(jit_carry));
+    addci(r0, r1, i0);
+    addcr(r0, r0, rn(reg));
+    jit_unget_reg(reg);
+#  endif
+}
+
+static void
+_subi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (s13_p(i0))
+       SUBI(r1, i0, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       subr(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+#  if __WORDSIZE == 64
+static void
+_subcr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (jit_carry == _NOREG)
+       jit_carry = jit_get_reg(jit_class_gpr);
+    if (r0 == r1) {
+       reg = jit_get_reg(jit_class_gpr);
+       subr(rn(reg), r1, r2);
+       ltr_u(rn(jit_carry), r1, rn(reg));
+       movr(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else {
+       subr(r0, r1, r2);
+       ltr_u(rn(jit_carry), r1, r0);
+    }
+}
+#  endif
+
+static void
+_subci(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+#  if __WORDSIZE == 32
+    jit_int32_t                reg;
+    if (s13_p(i0))
+       SUBIcc(r1, i0, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       subcr(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+#  else
+    jit_int32_t                reg;
+    if (jit_carry == _NOREG)
+       jit_carry = jit_get_reg(jit_class_gpr);
+    if (r0 == r1) {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r1, -i0);
+       ltr_u(rn(jit_carry), r1, rn(reg));
+       movr(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else {
+       addi(r0, r1, -i0);
+       ltr_u(rn(jit_carry), r1, r0);
+    }
+#  endif
+}
+
+#  if __WORDSIZE == 64
+static void
+_subxr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    assert(jit_carry != _NOREG);
+    reg = jit_get_reg(jit_class_gpr);
+    movr(rn(reg), rn(jit_carry));
+    subcr(r0, r1, r2);
+    subcr(r0, r0, rn(reg));
+    jit_unget_reg(reg);
+}
+#endif
+
+static void
+_subxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+#  if __WORDSIZE == 32
+    jit_int32_t                reg;
+    if (s13_p(i0))
+       SUBXIcc(r1, i0, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       subxr(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+#  else
+    jit_int32_t                reg;
+    assert(jit_carry != _NOREG);
+    reg = jit_get_reg(jit_class_gpr);
+    movr(rn(reg), rn(jit_carry));
+    subci(r0, r1, i0);
+    subcr(r0, r0, rn(reg));
+    jit_unget_reg(reg);
+#  endif
+}
+
+static void
+_rsbi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    subi(r0, r1, i0);
+    negr(r0, r0);
+}
+
+static void
+_muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (s13_p(i0)) {
+#  if __WORDSIZE == 32
+       UMULI(r1, i0, r0);
+#  else
+       MULXI(r1, i0, r0);
+#  endif
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       mulr(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+#  if __WORDSIZE == 32
+static void
+_iqmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+       jit_int32_t r2, jit_int32_t r3, jit_bool_t sign)
+{
+    if (sign)
+       SMUL(r2, r3, r0);
+    else
+       UMUL(r2, r3, r0);
+    RDY(r1);
+}
+
+static void
+_iqmuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+       jit_int32_t r2, jit_word_t i0, jit_bool_t sign)
+{
+    jit_int32_t                reg;
+    if (s13_p(i0)) {
+       if (sign)
+           SMULI(r2, i0, r0);
+       else
+           UMULI(r2, i0, r0);
+       RDY(r1);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       iqmulr(r0, r1, r2, rn(reg), sign);
+       jit_unget_reg(reg);
+    }
+}
+
+#  else
+static __int128_t __llmul(jit_word_t a, jit_word_t b)
+{
+    return (__int128_t)a * (__int128_t)b;
+}
+
+#  define QMUL_PROLOG()                                                \
+    do {                                                       \
+       (void)jit_get_reg(_O0|jit_class_gpr|jit_class_named);   \
+       (void)jit_get_reg(_O1|jit_class_gpr|jit_class_named);   \
+       if (r0 != _G2_REGNO && r1 != _G2_REGNO)                 \
+           stxi(BIAS(-8), _FP_REGNO, _G2_REGNO);               \
+       if (r0 != _G3_REGNO && r1 != _G3_REGNO)                 \
+           stxi(BIAS(-16), _FP_REGNO, _G3_REGNO);              \
+       if (r0 != _G4_REGNO && r1 != _G4_REGNO)                 \
+           stxi(BIAS(-24), _FP_REGNO, _G4_REGNO);              \
+    } while (0)
+
+#  define QMUL_EPILOG()                                                \
+    do {                                                       \
+       if (r0 != _G2_REGNO && r1 != _G2_REGNO)                 \
+           ldxi(_G2_REGNO, _FP_REGNO, BIAS(-8));               \
+       if (r0 != _G3_REGNO && r1 != _G3_REGNO)                 \
+           ldxi(_G3_REGNO, _FP_REGNO, BIAS(-16));              \
+       if (r0 != _G4_REGNO && r1 != _G4_REGNO)                 \
+           ldxi(_G4_REGNO, _FP_REGNO, BIAS(-24));              \
+       (void)jit_unget_reg(_O0);                               \
+       (void)jit_unget_reg(_O1);                               \
+    } while (0)
+
+static void
+_qmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+       jit_int32_t r2, jit_int32_t r3)
+{
+    QMUL_PROLOG();
+    movr(_O0_REGNO, r3);
+    movr(_O1_REGNO, r2);
+    calli((jit_word_t)__llmul);
+    movr(r0, _O1_REGNO);
+    movr(r1, _O0_REGNO);
+    QMUL_EPILOG();
+}
+
+static void
+_qmuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+       jit_int32_t r2, jit_word_t i0)
+{
+    QMUL_PROLOG();
+    movi(_O0_REGNO, i0);
+    movr(_O1_REGNO, r2);
+    calli((jit_word_t)__llmul);
+    movr(r0, _O1_REGNO);
+    movr(r1, _O0_REGNO);
+    QMUL_EPILOG();
+}
+
+static __uint128_t __ullmul(jit_uword_t a, jit_uword_t b)
+{
+    return (__uint128_t)a * (__uint128_t)b;
+}
+
+static void
+_qmulr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+        jit_int32_t r2, jit_int32_t r3)
+{
+    QMUL_PROLOG();
+    movr(_O0_REGNO, r3);
+    movr(_O1_REGNO, r2);
+    calli((jit_word_t)__ullmul);
+    movr(r0, _O1_REGNO);
+    movr(r1, _O0_REGNO);
+    QMUL_EPILOG();
+}
+
+static void
+_qmuli_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+        jit_int32_t r2, jit_word_t i0)
+{
+    QMUL_PROLOG();
+    movi(_O0_REGNO, i0);
+    movr(_O1_REGNO, r2);
+    calli((jit_word_t)__ullmul);
+    movr(r0, _O1_REGNO);
+    movr(r1, _O0_REGNO);
+    QMUL_EPILOG();
+}
+#  endif
+
+static void
+_divr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+#  if __WORDSIZE == 32
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    rshi(rn(reg), r1, 31);
+    WRY(rn(reg), 0);
+    SDIV(r1, r2, r0);
+    jit_unget_reg(reg);
+#  else
+    SDIVX(r1, r2, r0);
+#  endif
+}
+
+static void
+_divi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+#  if __WORDSIZE == 32
+    reg = jit_get_reg(jit_class_gpr);
+#  endif
+    if (s13_p(i0)) {
+#  if __WORDSIZE == 32
+       rshi(rn(reg), r1, 31);
+       WRY(rn(reg), 0);
+       SDIVI(r1, i0, r0);
+#  else
+       SDIVXI(r1, i0, r0);
+#  endif
+    }
+    else {
+#  if __WORDSIZE == 64
+       reg = jit_get_reg(jit_class_gpr);
+#  endif
+       movi(rn(reg), i0);
+       divr(r0, r1, rn(reg));
+#  if __WORDSIZE == 64
+       jit_unget_reg(reg);
+#  endif
+    }
+#  if __WORDSIZE == 32
+    jit_unget_reg(reg);
+#  endif
+}
+
+static void
+_divr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+#  if __WORDSIZE == 32
+    WRYI(0, 0);
+    UDIV(r1, r2, r0);
+#  else
+    UDIVX(r1, r2, r0);
+#  endif
+}
+
+static void
+_divi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (s13_p(i0)) {
+#  if __WORDSIZE == 32
+       WRYI(0, 0);
+       UDIVI(r1, i0, r0);
+#  else
+       UDIVXI(r1, i0, r0);
+#  endif
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       divr_u(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_iqdivr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+       jit_int32_t r2, jit_int32_t r3, jit_bool_t sign)
+{
+    jit_int32_t                sv0, rg0;
+    jit_int32_t                sv1, rg1;
+
+    if (r0 == r2 || r0 == r3) {
+       sv0 = jit_get_reg(jit_class_gpr);
+       rg0 = rn(sv0);
+    }
+    else
+       rg0 = r0;
+    if (r1 == r2 || r1 == r3) {
+       sv1 = jit_get_reg(jit_class_gpr);
+       rg1 = rn(sv1);
+    }
+    else
+       rg1 = r1;
+
+    if (sign)
+       divr(rg0, r2, r3);
+    else
+       divr_u(rg0, r2, r3);
+    mulr(rg1, r3, rg0);
+    subr(rg1, r2, rg1);
+    if (rg0 != r0) {
+       movr(r0, rg0);
+       jit_unget_reg(sv0);
+    }
+    if (rg1 != r1) {
+       movr(r1, rg1);
+       jit_unget_reg(sv1);
+    }
+}
+
+static void
+_iqdivi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+       jit_int32_t r2, jit_word_t i0, jit_bool_t sign)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    iqdivr(r0, r1, r2, rn(reg), sign);
+    jit_unget_reg(reg);
+}
+
+static void
+_remr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (r0 == r1 || r0 == r2) {
+       reg = jit_get_reg(jit_class_gpr);
+       divr(rn(reg), r1, r2);
+       mulr(rn(reg), r2, rn(reg));
+       subr(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else {
+       divr(r0, r1, r2);
+       mulr(r0, r2, r0);
+       subr(r0, r1, r0);
+    }
+}
+
+static void
+_remi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    remr(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_remr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (r0 == r1 || r0 == r2) {
+       reg = jit_get_reg(jit_class_gpr);
+       divr_u(rn(reg), r1, r2);
+       mulr(rn(reg), r2, rn(reg));
+       subr(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else {
+       divr_u(r0, r1, r2);
+       mulr(r0, r2, r0);
+       subr(r0, r1, r0);
+    }
+}
+
+static void
+_remi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    remr_u(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_andi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (s13_p(i0))
+       ANDI(r1, i0, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       andr(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (s13_p(i0))
+       ORI(r1, i0, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       orr(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_xori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (s13_p(i0))
+       XORI(r1, i0, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       xorr(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_extr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    lshi(r0, r1, __WORDSIZE - 8);
+    rshi(r0, r0, __WORDSIZE - 8);
+}
+
+static void
+_extr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    lshi(r0, r1, __WORDSIZE - 16);
+    rshi(r0, r0, __WORDSIZE - 16);
+}
+
+static void
+_extr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    lshi(r0, r1, __WORDSIZE - 16);
+    rshi_u(r0, r0, __WORDSIZE - 16);
+}
+
+#if __WORDSIZE == 64
+static void
+_extr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    lshi(r0, r1, __WORDSIZE - 32);
+    rshi(r0, r0, __WORDSIZE - 32);
+}
+
+static void
+_extr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    lshi(r0, r1, __WORDSIZE - 32);
+    rshi_u(r0, r0, __WORDSIZE - 32);
+}
+#endif
+
+static void
+_cr(jit_state_t *_jit, jit_int32_t cc,
+    jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    CMP(r1, r2);
+#  if __WORDSIZE == 32
+    Ba(cc, 3);
+#  else
+    BPa(cc, 3);
+#  endif
+    movi(r0, 1);
+    movi(r0, 0);
+}
+
+static void
+_cw(jit_state_t *_jit, jit_int32_t cc,
+    jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (s13_p(i0)) {
+       CMPI(r1, i0);
+#  if __WORDSIZE == 32
+       Ba(cc, 3);
+#  else
+       BPa(cc, 3);
+#  endif
+       movi(r0, 1);
+       movi(r0, 0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       cr(cc, r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldi_c(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (s13_p(i0))
+       LDSBI(0, i0, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldr_c(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldi_uc(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (s13_p(i0))
+       LDUBI(0, i0, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldr_uc(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldi_s(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (s13_p(i0))
+       LDSHI(0, i0, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldr_s(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldi_us(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (s13_p(i0))
+       LDUHI(0, i0, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldr_us(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldi_i(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (s13_p(i0)) {
+#  if __WORDSIZE == 32
+       LDI(0, i0, r0);
+#  else
+       LDSWI(0, i0, r0);
+#  endif
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldr_i(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+#  if __WORDSIZE == 64
+static void
+_ldi_ui(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (s13_p(i0))
+       LDUWI(0, i0, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldr_ui(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldi_l(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (s13_p(i0))
+       LDXI(0, i0, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldr_l(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+#  endif
+
+static void
+_ldxi_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (s13_p(i0))
+       LDSBI(r1, i0, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldxr_c(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldxi_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (s13_p(i0))
+       LDUBI(r1, i0, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldxr_uc(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldxi_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (s13_p(i0))
+       LDSHI(r1, i0, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldxr_s(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldxi_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (s13_p(i0))
+       LDUHI(r1, i0, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldxr_us(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldxi_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (s13_p(i0)) {
+#  if __WORDSIZE == 32
+       LDI(r1, i0, r0);
+#  else
+       LDSWI(r1, i0, r0);
+#  endif
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldxr_i(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+#  if __WORDSIZE == 64
+static void
+_ldxi_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (s13_p(i0))
+       LDUWI(r1, i0, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldxr_ui(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldxi_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (s13_p(i0))
+       LDXI(r1, i0, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldxr_l(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+#  endif
+
+static void
+_sti_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    if (s13_p(i0))
+       STBI(r0, 0, i0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       str_c(rn(reg), r0);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_sti_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    if (s13_p(i0))
+       STHI(r0, 0, i0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       str_s(rn(reg), r0);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_sti_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    if (s13_p(i0)) {
+#  if __WORDSIZE == 32
+       STI(r0, 0, i0);
+#  else
+       STWI(r0, 0, i0);
+#  endif
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       str_i(rn(reg), r0);
+       jit_unget_reg(reg);
+    }
+}
+
+#  if __WORDSIZE == 64
+static void
+_sti_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    if (s13_p(i0))
+       STXI(r0, 0, i0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       str_l(rn(reg), r0);
+       jit_unget_reg(reg);
+    }
+}
+#  endif
+
+static void
+_stxi_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (s13_p(i0))
+       STBI(r1, r0, i0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       stxr_c(r0, rn(reg), r1);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_stxi_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (s13_p(i0))
+       STHI(r1, r0, i0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       stxr_s(r0, rn(reg), r1);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_stxi_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (s13_p(i0)) {
+#  if __WORDSIZE == 32
+       STI(r1, r0, i0);
+#  else
+       STWI(r1, r0, i0);
+#  endif
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       stxr_i(r0, rn(reg), r1);
+       jit_unget_reg(reg);
+    }
+}
+
+#  if __WORDSIZE == 64
+static void
+_stxi_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (s13_p(i0))
+       STXI(r1, r0, i0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       stxr_l(r0, rn(reg), r1);
+       jit_unget_reg(reg);
+    }
+}
+#  endif
+
+static jit_word_t
+_br(jit_state_t *_jit, jit_int32_t cc,
+    jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    CMP(r0, r1);
+    w = _jit->pc.w;
+#  if __WORDSIZE == 32
+    B(cc, (i0 - w) >> 2);
+#  else
+    BP(cc, (i0 - w) >> 2);
+#  endif
+    NOP();
+    return (w);
+}
+
+static jit_word_t
+_bw(jit_state_t *_jit, jit_int32_t cc,
+    jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    if (s13_p(i1)) {
+       CMPI(r0, i1);
+       w = _jit->pc.w;
+#  if __WORDSIZE == 32
+       B(cc, (i0 - w) >> 2);
+#  else
+       B(cc, (i0 - w) >> 2);
+#  endif
+       NOP();
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+       movi(rn(reg), i1);
+       w = br(cc, i0, r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    return (w);
+}
+
+static jit_word_t
+_b_asr(jit_state_t *_jit, jit_bool_t jif, jit_bool_t add, jit_bool_t sgn,
+       jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    if (add)
+       ADDcc(r0, r1, r0);
+    else
+       SUBcc(r0, r1, r0);
+    w = _jit->pc.w;
+#  if __WORDSIZE == 32
+    B(sgn ?
+      (jif ? SPARC_BVS : SPARC_BVC) :
+      (jif ? SPARC_BCS : SPARC_BCC),
+      (i0 - w) >> 2);
+#  else
+    BP(sgn ?
+       (jif ? SPARC_BPVS : SPARC_BPVC) :
+       (jif ? SPARC_BPCS : SPARC_BPCC),
+       (i0 - w) >> 2);
+#  endif
+    NOP();
+    return (w);
+}
+
+static jit_word_t
+_b_asw(jit_state_t *_jit, jit_bool_t jif, jit_bool_t add, jit_bool_t sgn,
+       jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    if (s13_p(i1)) {
+       if (add)
+           ADDIcc(r0, i1, r0);
+       else
+           SUBIcc(r0, i1, r0);
+       w = _jit->pc.w;
+#  if __WORDSIZE == 32
+       B(sgn ?
+         (jif ? SPARC_BVS : SPARC_BVC) :
+         (jif ? SPARC_BCS : SPARC_BCC),
+         (i0 - w) >> 2);
+#  else
+       BP(sgn ?
+          (jif ? SPARC_BPVS : SPARC_BPVC) :
+          (jif ? SPARC_BPCS : SPARC_BPCC),
+          (i0 - w) >> 2);
+#  endif
+       NOP();
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+       movi(rn(reg), i1);
+       w = b_asr(jif, add, sgn, i0, r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    return (w);
+}
+
+static jit_word_t
+_bm_r(jit_state_t *_jit, jit_bool_t set,
+      jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    BTST(r0, r1);
+    w = _jit->pc.w;
+#  if __WORDSIZE == 32
+    B(set ? SPARC_BNZ : SPARC_BZ, (i0 - w) >> 2);
+#  else
+    BP(set ? SPARC_BPNE : SPARC_BPE, (i0 - w) >> 2);
+#  endif
+    NOP();
+    return (w);
+}
+
+static jit_word_t
+_bm_w(jit_state_t *_jit, jit_bool_t set,
+      jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    if (s13_p(i1)) {
+       BTSTI(r0, i1);
+       w = _jit->pc.w;
+#  if __WORDSIZE == 32
+       B(set ? SPARC_BNZ : SPARC_BZ, (i0 - w) >> 2);
+#  else
+       BP(set ? SPARC_BPNE : SPARC_BPE, (i0 - w) >> 2);
+#  endif
+       NOP();
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+       movi(rn(reg), i1);
+       w = bm_r(set, i0, r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    return (w);
+}
+
+static void
+_jmpr(jit_state_t *_jit, jit_int32_t r0)
+{
+    JMPL(0, r0, 0);
+    NOP();
+}
+
+static void
+_jmpi(jit_state_t *_jit, jit_word_t i0)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    w = (i0 - _jit->pc.w) >> 2;
+    if (s22_p(w)) {
+       BA(w);
+       NOP();
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+       movi(rn(reg), i0);
+       jmpr(rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static jit_word_t
+_jmpi_p(jit_state_t *_jit, jit_word_t i0)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    w = movi_p(rn(reg), i0);
+    jmpr(rn(reg));
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static void
+_callr(jit_state_t *_jit, jit_int32_t r0)
+{
+    CALL(r0);
+    NOP();
+}
+
+static void
+_calli(jit_state_t *_jit, jit_word_t i0)
+{
+    jit_word_t         w;
+    w = (i0 - _jit->pc.w) >> 2;
+    CALLI(w);
+    NOP();
+}
+
+static jit_word_t
+_calli_p(jit_state_t *_jit, jit_word_t i0)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    w = movi_p(rn(reg), i0);
+    callr(rn(reg));
+    jit_unget_reg(reg);
+    return (w);
+}
+
+#define OFF(n)         BIAS(((n) * sizeof(jit_word_t)))
+static void
+_prolog(jit_state_t *_jit, jit_node_t *node)
+{
+    jit_int32_t                reg;
+    if (_jitc->function->define_frame || _jitc->function->assume_frame) {
+       jit_int32_t     frame = -_jitc->function->frame;
+       assert(_jitc->function->self.aoff >= frame);
+       if (_jitc->function->assume_frame)
+           return;
+       _jitc->function->self.aoff = frame;
+    }
+    if (_jitc->function->allocar)
+       _jitc->function->self.aoff &= -16;
+    /* align at 16 bytes boundary */
+    _jitc->function->stack = ((stack_framesize +
+                             _jitc->function->self.alen -
+                             _jitc->function->self.aoff) + 15) & -16;
+    SAVEI(_SP_REGNO, -_jitc->function->stack, _SP_REGNO);
+
+    /* (most) other backends do not save incoming arguments, so,
+     * only save locals here */
+    if (jit_regset_tstbit(&_jitc->function->regset, _L0))
+       stxi(OFF(0), _SP_REGNO, _L0_REGNO);
+    if (jit_regset_tstbit(&_jitc->function->regset, _L1))
+       stxi(OFF(1), _SP_REGNO, _L1_REGNO);
+    if (jit_regset_tstbit(&_jitc->function->regset, _L2))
+       stxi(OFF(2), _SP_REGNO, _L2_REGNO);
+    if (jit_regset_tstbit(&_jitc->function->regset, _L3))
+       stxi(OFF(3), _SP_REGNO, _L3_REGNO);
+    if (jit_regset_tstbit(&_jitc->function->regset, _L4))
+       stxi(OFF(4), _SP_REGNO, _L4_REGNO);
+    if (jit_regset_tstbit(&_jitc->function->regset, _L5))
+       stxi(OFF(5), _SP_REGNO, _L5_REGNO);
+    if (jit_regset_tstbit(&_jitc->function->regset, _L6))
+       stxi(OFF(6), _SP_REGNO, _L6_REGNO);
+    if (jit_regset_tstbit(&_jitc->function->regset, _L7))
+       stxi(OFF(7), _SP_REGNO, _L7_REGNO);
+
+    if (_jitc->function->allocar) {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), BIAS(_jitc->function->self.aoff));
+       /* Already "biased" by allocai */
+       stxi_i(_jitc->function->aoffoff, _FP_REGNO, rn(reg));
+       jit_unget_reg(reg);
+    }
+
+    if (_jitc->function->self.call & jit_call_varargs) {
+       for (reg = _jitc->function->vagp; jit_arg_reg_p(reg); ++reg)
+           stxi(BIAS((16 + (__WORDSIZE == 32)) * sizeof(jit_word_t) +
+                     reg * sizeof(jit_word_t)), _FP_REGNO, rn(_I0 + reg));
+    }
+}
+
+static void
+_epilog(jit_state_t *_jit, jit_node_t *node)
+{
+    if (_jitc->function->assume_frame)
+       return;
+    /* (most) other backends do not save incoming arguments, so,
+     * only save locals here */
+    if (jit_regset_tstbit(&_jitc->function->regset, _L0))
+       ldxi(_L0_REGNO, _FP_REGNO, _jitc->function->stack + OFF(0));
+    if (jit_regset_tstbit(&_jitc->function->regset, _L1))
+       ldxi(_L1_REGNO, _FP_REGNO, _jitc->function->stack + OFF(1));
+    if (jit_regset_tstbit(&_jitc->function->regset, _L2))
+       ldxi(_L2_REGNO, _FP_REGNO, _jitc->function->stack + OFF(2));
+    if (jit_regset_tstbit(&_jitc->function->regset, _L3))
+       ldxi(_L3_REGNO, _FP_REGNO, _jitc->function->stack + OFF(3));
+    if (jit_regset_tstbit(&_jitc->function->regset, _L4))
+       ldxi(_L4_REGNO, _FP_REGNO, _jitc->function->stack + OFF(4));
+    if (jit_regset_tstbit(&_jitc->function->regset, _L5))
+       ldxi(_L5_REGNO, _FP_REGNO, _jitc->function->stack + OFF(5));
+    if (jit_regset_tstbit(&_jitc->function->regset, _L6))
+       ldxi(_L6_REGNO, _FP_REGNO, _jitc->function->stack + OFF(6));
+    if (jit_regset_tstbit(&_jitc->function->regset, _L7))
+       ldxi(_L7_REGNO, _FP_REGNO, _jitc->function->stack + OFF(7));
+    RESTOREI(0, 0, 0);
+    RETL();
+    NOP();
+}
+
+static void
+_vastart(jit_state_t *_jit, jit_int32_t r0)
+{
+    /* Initialize stack pointer to the first stack argument. */
+    if (jit_arg_reg_p(_jitc->function->vagp))
+       addi(r0, _FP_REGNO, BIAS((16 + (__WORDSIZE == 32) +
+                                 _jitc->function->vagp) *
+                                sizeof(jit_word_t)));
+    else
+       addi(r0, _FP_REGNO, BIAS(_jitc->function->self.size));
+}
+
+static void
+_vaarg(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    assert(_jitc->function->self.call & jit_call_varargs);
+
+    /* Load argument. */
+    ldr(r0, r1);
+
+    /* Update vararg stack pointer. */
+    addi(r1, r1, sizeof(jit_word_t));
+}
+
+static void
+_patch_at(jit_state_t *_jit, jit_word_t instr, jit_word_t label)
+{
+    jit_instr_t                 i;
+    union {
+       jit_int32_t     *i;
+       jit_word_t       w;
+    } u;
+
+    u.w = instr;
+    i.v = u.i[0];
+
+    if (i.op.b == 0) {                         /* conditional branch */
+       if (i.op2.b == 2 || i.op2.b == 6) {     /* int or float condition */
+           i.disp22.b = (label - instr) >> 2;
+           u.i[0] = i.v;
+       }
+#  if __WORDSIZE == 64
+       else if (i.op2.b == 1) {
+           i.disp19.b = (label - instr) >> 2;
+           u.i[0] = i.v;
+       }
+#  endif
+       else if (i.op2.b == 4) {        /* movi_p */
+           /* SETHI */
+           i.imm22.b = HI((int)label);
+           u.i[0] = i.v;
+           i.v = u.i[1];
+           if (i.op.b == 2 && i.op3.b == 2) {
+               /* ORI */
+               i.simm13.b = LO(label);
+               u.i[1] = i.v;
+#  if __WORDSIZE == 64
+               i.v = u.i[2];
+               assert(i.op2.b == 4);
+               label = (label >> 32) & 0xffffffff;
+               i.imm22.b = HI((int)label);
+               u.i[2] = i.v;
+               i.v = u.i[3];
+               assert(i.op.b == 2 && i.op3.b == 2);
+               /* ORI */
+               i.simm13.b = LO(label);
+               u.i[3] = i.v;
+#  endif
+           }
+           else
+               abort();
+       }
+       else
+           abort();
+    }
+    else
+       abort();
+}
+#endif
diff --git a/deps/lightning/lib/jit_sparc-fpu.c b/deps/lightning/lib/jit_sparc-fpu.c
new file mode 100644 (file)
index 0000000..ae2cbab
--- /dev/null
@@ -0,0 +1,1499 @@
+/*
+ * Copyright (C) 2013-2019  Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+#if PROTO
+#  if __WORDSIZE == 32
+#    define FPR(r)                     (r)
+#    define CLASS_SNG                  jit_class_fpr
+#    define CLASS_DBL                  jit_class_fpr
+#  else
+#    define single_precision_p(r)      ((r) >= 0 && (r) <= 31)
+#    define FPR(r)                     ((r) > 31 ? (r) - 31 : (r))
+#    define CLASS_SNG                  (jit_class_fpr | jit_class_sng)
+#    define CLASS_DBL                  (jit_class_fpr | jit_class_dbl)
+#  endif
+#  define LDF(rs1, rs2, rd)            f3r(3, FPR(rd), 32, FPR(rs1), FPR(rs2))
+#  define LDFI(rs1, imm, rd)           f3i(3, FPR(rd), 32, FPR(rs1), imm)
+#  define LDDF(rs1, rs2, rd)           f3r(3, FPR(rd), 35, FPR(rs1), FPR(rs2))
+#  define LDDFI(rs1, imm, rd)          f3i(3, FPR(rd), 35, FPR(rs1), imm)
+#  define LDFSR(rs1, rs2, rd)          f3r(3, FPR(rd), 33, FPR(rs1), FPR(rs2))
+#  define LDFSRI(rs1, imm, rd)         f3i(3, FPR(rd), 33, FPR(rs1), imm)
+#  define STF(rd, rs1, rs2)            f3r(3, FPR(rd), 36, FPR(rs1), FPR(rs2))
+#  define STFI(rd, rs1, imm)           f3i(3, FPR(rd), 36, FPR(rs1), imm)
+#  define STDF(rd, rs1, rs2)           f3r(3, FPR(rd), 39, FPR(rs1), FPR(rs2))
+#  define STDFI(rd, rs1, imm)          f3i(3, FPR(rd), 39, FPR(rs1), imm)
+#  define STFSR(rd, rs1, rs2)          f3r(3, FPR(rd), 37, FPR(rs1), FPR(rs2))
+#  define STFSRI(rd, rs1, imm)         f3i(3, FPR(rd), 37, FPR(rs1), imm)
+#  define STDFQ(rd, rs1, rs2)          f3r(3, FPR(rd), 38, FPR(rs1), FPR(rs2))
+#  define STFDFQ(rd, rs1, imm)         f3i(3, FPR(rd), 38, FPR(rs1), imm)
+#  define SPARC_FBA                    8       /* always - 1 */
+#  define SPARC_FBN                    0       /* never - 0 */
+#  define SPARC_FBU                    7       /* unordered - U */
+#  define SPARC_FBG                    6       /* greater - G */
+#  define SPARC_FBUG                   5       /* unordered or greater - G or U */
+#  define SPARC_FBL                    4       /* less - L */
+#  define SPARC_FBUL                   3       /* unordered or less - L or U */
+#  define SPARC_FBLG                   2       /* less or greater - L or G */
+#  define SPARC_FBNE                   1       /* not equal - L or G or U */
+#  define SPARC_FBNZ                   SPARC_FBNE
+#  define SPARC_FBE                    9       /* equal - E */
+#  define SPARC_FBZ                    SPARC_FBE
+#  define SPARC_FBUE                   10      /* unordered or equal - E or U */
+#  define SPARC_FBGE                   11      /* greater or equal - E or G */
+#  define SPARC_FBUGE                  12      /* unordered or greater or equal - E or G or U */
+#  define SPARC_FBLE                   13      /* less or equal - E or L */
+#  define SPARC_FBULE                  14      /* unordered or less or equal - E or L or U */
+#  define SPARC_FBO                    15      /* ordered - E or L or G */
+#  define FB(cc, imm)                  f2b(0, 0, cc, 6, imm)
+#  define FBa(cc, imm)                 f2b(0, 1, cc, 6, imm)
+#  define FBA(imm)                     FB(SPARC_FBA, imm)
+#  define FBAa(imm)                    FBa(SPARC_FBA, imm)
+#  define FBN(imm)                     FB(SPARC_FBN, imm)
+#  define FBNa(imm)                    FBa(SPARC_FBN, imm)
+#  define FBU(imm)                     FB(SPARC_FBU, imm)
+#  define FBUa(imm)                    FBa(SPARC_FBU, imm)
+#  define FBG(imm)                     FB(SPARC_FBG, imm)
+#  define FBGa(imm)                    FBa(SPARC_FBG, imm)
+#  define FBUG(imm)                    FB(SPARC_FBUG, imm)
+#  define FBUGa(imm)                   FBa(SPARC_FBUG, imm)
+#  define FBL(imm)                     FB(SPARC_FBL, imm)
+#  define FBLa(imm)                    FBa(SPARC_FBL, imm)
+#  define FBUL(imm)                    FB(SPARC_FBUL, imm)
+#  define FBULa(imm)                   FBa(SPARC_FBUL, imm)
+#  define FBLG(imm)                    FB(SPARC_FBLG, imm)
+#  define FBLGa(imm)                   FBa(SPARC_FBLG, imm)
+#  define FBNE(imm)                    FB(SPARC_FBNE, imm)
+#  define FBNEa(imm)                   FBa(SPARC_FBNE, imm)
+#  define FBE(imm)                     FB(SPARC_FBE, imm)
+#  define FBEa(imm)                    FBa(SPARC_FBE, imm)
+#  define FBUE(imm)                    FB(SPARC_FBUE, imm)
+#  define FBUEa(imm)                   FBa(SPARC_FBUE, imm)
+#  define FBLE(imm)                    FB(SPARC_FBLE, imm)
+#  define FBLEa(imm)                   FBa(SPARC_FBLE, imm)
+#  define FBO(imm)                     FB(SPARC_FBO, imm)
+#  define FBOa(imm)                    FBa(SPARC_FBO, imm)
+#  define FPop1(rd, rs1, opf, rs2)     f3f(rd, 52, rs1, opf, rs2)
+#  define FPop2(rd, rs1, opf, rs2)     f3f(rd, 53, rs1, opf, rs2)
+#  define f3f(rd, op3, rs1, opf, rs2)  _f3f(_jit, rd, op3, rs1, opf, rs2)
+static void
+_f3f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t, jit_int32_t,jit_int32_t);
+#  define FITOS(rs2, rd)               FPop1(rd, 0, 196, rs2)
+#  define FITOD(rs2, rd)               FPop1(rd, 0, 200, rs2)
+#  define FITOQ(rs2, rd)               FPop1(rd, 0, 204, rs2)
+#  if __WORDSIZE == 64
+#    define FXTOS(rs2, rd)             FPop1(rd, 0, 132, rs2)
+#    define FXTOD(rs2, rd)             FPop1(rd, 0, 136, rs2)
+#    define FxTOQ(rs2, rd)             FPop1(rd, 0, 140, rs2)
+#  endif
+#  define FSTOI(rs2, rd)               FPop1(rd, 0, 209, rs2)
+#  define FDTOI(rs2, rd)               FPop1(rd, 0, 210, rs2)
+#  define FQTOI(rs2, rd)               FPop1(rd, 0, 211, rs2)
+#  define FSTOX(rs2, rd)               FPop1(rd, 0, 129, rs2)
+#  define FDTOX(rs2, rd)               FPop1(rd, 0, 130, rs2)
+#  define FQTOX(rs2, rd)               FPop1(rd, 0, 131, rs2)
+#  define FSTOD(rs2, rd)               FPop1(rd, 0, 201, rs2)
+#  define FSTOQ(rs2, rd)               FPop1(rd, 0, 205, rs2)
+#  define FDTOS(rs2, rd)               FPop1(rd, 0, 198, rs2)
+#  define FDTOQ(rs2, rd)               FPop1(rd, 0, 206, rs2)
+#  define FQTOS(rs2, rd)               FPop1(rd, 0, 199, rs2)
+#  define FQTOD(rs2, rd)               FPop1(rd, 0, 203, rs2)
+#  define FMOVS(rs2, rd)               FPop1(rd, 0,   1, rs2)
+#  define FMOVD(rs2, rd)               FPop1(rd, 0,   2, rs2)
+#  define FMOVQ(rs2, rd)               FPop1(rd, 0,   3, rs2)
+#  define FNEGS(rs2, rd)               FPop1(rd, 0,   5, rs2)
+#  define FNEGD(rs2, rd)               FPop1(rd, 0,   6, rs2)
+#  define FNEGQ(rs2, rd)               FPop1(rd, 0,   7, rs2)
+#  define FABSS(rs2, rd)               FPop1(rd, 0,   9, rs2)
+#  define FABSD(rs2, rd)               FPop1(rd, 0,  10, rs2)
+#  define FABSQ(rs2, rd)               FPop1(rd, 0,  11, rs2)
+#  define FSQRTS(rs2, rd)              FPop1(rd, 0,  41, rs2)
+#  define FSQRTD(rs2, rd)              FPop1(rd, 0,  42, rs2)
+#  define FSQRTQ(rs2, rd)              FPop1(rd, 0,  43, rs2)
+#  define SPARC_FADDS                  65
+#  define SPARC_FADDD                  66
+#  define SPARC_FADDQ                  67
+#  define SPARC_FSUBS                  69
+#  define SPARC_FSUBD                  70
+#  define SPARC_FSUBQ                  71
+#  define SPARC_FMULS                  73
+#  define SPARC_FMULD                  74
+#  define SPARC_FMULQ                  75
+#  define SPARC_FSMULD                 105
+#  define SPARC_FDMULQ                 110
+#  define SPARC_FDIVS                  77
+#  define SPARC_FDIVD                  78
+#  define SPARC_FDIVQ                  79
+#  define FADDS(rs1, rs2, rd)          FPop1(rd, rs1,  SPARC_FADDS, rs2)
+#  define FADDD(rs1, rs2, rd)          FPop1(rd, rs1,  SPARC_FADDD, rs2)
+#  define FADDQ(rs1, rs2, rd)          FPop1(rd, rs1,  SPARC_FADDQ, rs2)
+#  define FSUBS(rs1, rs2, rd)          FPop1(rd, rs1,  SPARC_FSUBS, rs2)
+#  define FSUBD(rs1, rs2, rd)          FPop1(rd, rs1,  SPARC_FSUBD, rs2)
+#  define FSUBQ(rs1, rs2, rd)          FPop1(rd, rs1,  SPARC_FSUBQ, rs2)
+#  define FMULS(rs1, rs2, rd)          FPop1(rd, rs1,  SPARC_FMULS, rs2)
+#  define FMULD(rs1, rs2, rd)          FPop1(rd, rs1,  SPARC_FMULD, rs2)
+#  define FMULQ(rs1, rs2, rd)          FPop1(rd, rs1,  SPARC_FMULQ, rs2)
+#  define FSMULD(rs1, rs2, rd)         FPop1(rd, rs1,  SPARC_FSMULD, rs2)
+#  define FDMULQ(rs1, rs2, rd)         FPop1(rd, rs1,  SPARC_FDMULQ, rs2)
+#  define FDIVS(rs1, rs2, rd)          FPop1(rd, rs1,  SPARC_FDIVS, rs2)
+#  define FDIVD(rs1, rs2, rd)          FPop1(rd, rs1,  SPARC_FDIVD, rs2)
+#  define FDIVQ(rs1, rs2, rd)          FPop1(rd, rs1,  SPARC_FDIVQ, rs2)
+#  define SPARC_FCMPS                  81
+#  define SPARC_FCMPD                  82
+#  define SPARC_FCMPQ                  83
+#  define SPARC_FCMPES                 85
+#  define SPARC_FCMPED                 86
+#  define SPARC_FCMPEQ                 87
+#  define FCMPS(rs1, rs2)              FPop2(0, rs1, SPARC_FCMPS, rs2)
+#  define FCMPD(rs1, rs2)              FPop2(0, rs1, SPARC_FCMPD, rs2)
+#  define FCMPQ(rs1, rs2)              FPop2(0, rs1, SPARC_FCMPQ, rs2)
+#  define FCMPES(rs1, rs2)             FPop2(0, rs1, SPARC_FCMPES, rs2)
+#  define FCMPED(rs1, rs2)             FPop2(0, rs1, SPARC_FCMPED, rs2)
+#  define FCMPEQ(rs1, rs2)             FPop2(0, rs1, SPARC_FCMPEQ, rs2)
+#  define CPop1(rd, rs1, opc, rs2)     f3f(rd, 54, rs1, opf, rs2)
+#  define CPop2(rd, rs1, opc, rs2)     f3f(rd, 55, rs1, opf, rs2)
+#  define extr_f(r0, r1)               _extr_f(_jit, r0, r1)
+static void _extr_f(jit_state_t*, jit_int32_t, jit_int32_t);
+#  if __WORDSIZSE == 32
+#    define truncr_f(r0, r1)           truncr_f_i(r0, r1)
+#  define truncr_d(r0, r1)             truncr_d_i(r0, r1)
+#  else
+#    define truncr_f(r0, r1)           truncr_f_l(r0, r1)
+#  define truncr_d(r0, r1)             truncr_d_l(r0, r1)
+#  endif
+#  define truncr_f_i(r0, r1)           _truncr_f_i(_jit, r0, r1)
+static void _truncr_f_i(jit_state_t*, jit_int32_t, jit_int32_t);
+#  if __WORDSIZE == 64
+#    define truncr_f_l(r0, r1)         _truncr_f_l(_jit, r0, r1)
+static void _truncr_f_l(jit_state_t*, jit_int32_t, jit_int32_t);
+#  endif
+#  if __WORDSIZE == 32
+#    define extr_d_f(r0, r1)           FDTOS(r1, r0)
+#  else
+#    define extr_d_f(r0, r1)           _extr_d_f(_jit, r0, r1)
+static void _extr_d_f(jit_state_t*, jit_int32_t, jit_int32_t);
+#  endif
+#  define movi_f(r0, i0)               _movi_f(_jit, r0, i0)
+#  if __WORDSIZE == 32
+#    define movr_f(r0, r1)             FMOVS(r1, r0)
+#  else
+#    define movr_f(r0, r1)             _movr_f(_jit, r0, r1)
+static void _movr_f(jit_state_t*, jit_int32_t, jit_int32_t);
+#  endif
+static void _movi_f(jit_state_t*, jit_int32_t, jit_float32_t*);
+#  if __WORDSIZE == 32
+#    define negr_f(r0, r1)             FNEGS(r1, r0)
+#    define absr_f(r0, r1)             FABSS(r1, r0)
+#    define sqrtr_f(r0, r1)            FSQRTS(r1, r0)
+#  else
+#    define negr_f(r0, r1)             _negr_f(_jit, r0, r1)
+static void _negr_f(jit_state_t*, jit_int32_t, jit_int32_t);
+#    define absr_f(r0, r1)             _absr_f(_jit, r0, r1)
+static void _absr_f(jit_state_t*, jit_int32_t, jit_int32_t);
+#    define sqrtr_f(r0, r1)            _sqrtr_f(_jit, r0, r1)
+static void _sqrtr_f(jit_state_t*, jit_int32_t, jit_int32_t);
+#  endif
+#  define extr_d(r0, r1)               _extr_d(_jit, r0, r1)
+static void _extr_d(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define truncr_d_i(r0, r1)           _truncr_d_i(_jit, r0, r1)
+static void _truncr_d_i(jit_state_t*, jit_int32_t, jit_int32_t);
+#  if __WORDSIZE == 64
+#    define truncr_d_l(r0, r1)         _truncr_d_l(_jit, r0, r1)
+static void _truncr_d_l(jit_state_t*, jit_int32_t, jit_int32_t);
+#  endif
+#  if __WORDSIZE == 32
+#    define extr_f_d(r0, r1)           FSTOD(r1, r0)
+#  else
+#    define extr_f_d(r0, r1)           _extr_f_d(_jit, r0, r1)
+static void _extr_f_d(jit_state_t*, jit_int32_t, jit_int32_t);
+#  endif
+#  define movi_d(r0, i0)               _movi_d(_jit, r0, i0)
+static void _movi_d(jit_state_t*, jit_int32_t, jit_float64_t*);
+#  if __WORDSIZE == 32
+#  define movr_d(r0, r1)               _movr_d(_jit, r0, r1)
+static void _movr_d(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define negr_d(r0, r1)               _negr_d(_jit, r0, r1)
+static void _negr_d(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define absr_d(r0, r1)               _absr_d(_jit, r0, r1)
+static void _absr_d(jit_state_t*, jit_int32_t, jit_int32_t);
+#  else
+#    define movr_d(r0, r1)             FMOVD(r1, r0)
+#    define negr_d(r0, r1)             FNEGD(r1, r0)
+#    define absr_d(r0, r1)             FABSD(r1, r0)
+#  endif
+#  define sqrtr_d(r0, r1)              FSQRTD(r1, r0)
+#  define fop1f(op, r0, r1, i0)                _fop1f(_jit, op, r0, r1, i0)
+static void _fop1f(jit_state_t*,jit_int32_t,
+                  jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define rfop1f(op, r0, r1, i0)       _rfop1f(_jit, op, r0, r1, i0)
+static void _rfop1f(jit_state_t*,jit_int32_t,
+                   jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define fop1d(op, r0, r1, i0)                _fop1d(_jit, op, r0, r1, i0)
+static void _fop1d(jit_state_t*,jit_int32_t,
+                  jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define rfop1d(op, r0, r1, i0)       _rfop1d(_jit, op, r0, r1, i0)
+static void _rfop1d(jit_state_t*,jit_int32_t,
+                   jit_int32_t,jit_int32_t,jit_float64_t*);
+#  if __WORDSIZE == 32
+#    define addr_f(r0, r1, r2)         FADDS(r1, r2, r0)
+#    define subr_f(r0, r1, r2)         FSUBS(r1, r2, r0)
+#    define mulr_f(r0, r1, r2)         FMULS(r1, r2, r0)
+#    define divr_f(r0, r1, r2)         FDIVS(r1, r2, r0)
+#  else
+#    define fop2f(op, r0, r1, r2)      _fop2f(_jit, op, r0, r1, r2)
+static void _fop2f(jit_state_t*, jit_int32_t,
+                  jit_int32_t, jit_int32_t, jit_int32_t);
+#    define addr_f(r0, r1, r2)         fop2f(SPARC_FADDS, r0, r1, r2)
+#    define subr_f(r0, r1, r2)         fop2f(SPARC_FSUBS, r0, r1, r2)
+#    define mulr_f(r0, r1, r2)         fop2f(SPARC_FMULS, r0, r1, r2)
+#    define divr_f(r0, r1, r2)         fop2f(SPARC_FDIVS, r0, r1, r2)
+#  endif
+#  define addi_f(r0, r1, i0)           fop1f(SPARC_FADDS, r0, r1, i0)
+#  define subi_f(r0, r1, i0)           fop1f(SPARC_FSUBS, r0, r1, i0)
+#  define rsbr_f(r0, r1, r2)           subr_f(r0, r2, r1)
+#  define rsbi_f(r0, r1, i0)           rfop1f(SPARC_FSUBS, r0, r1, i0)
+#  define rsbr_d(r0, r1, r2)           subr_d(r0, r2, r1)
+#  define rsbi_d(r0, r1, i0)           rfop1d(SPARC_FSUBD, r0, r1, i0)
+#  define muli_f(r0, r1, i0)           fop1f(SPARC_FMULS, r0, r1, i0)
+#  define divi_f(r0, r1, i0)           fop1f(SPARC_FDIVS, r0, r1, i0)
+#  define addr_d(r0, r1, r2)           FADDD(r1, r2, r0)
+#  define addi_d(r0, r1, i0)           fop1d(SPARC_FADDD, r0, r1, i0)
+#  define subr_d(r0, r1, r2)           FSUBD(r1, r2, r0)
+#  define subi_d(r0, r1, i0)           fop1d(SPARC_FSUBD, r0, r1, i0)
+#  define rsbr_d(r0, r1, r2)           subr_d(r0, r2, r1)
+#  define rsbi_d(r0, r1, i0)           rfop1d(SPARC_FSUBD, r0, r1, i0)
+#  define mulr_d(r0, r1, r2)           FMULD(r1, r2, r0)
+#  define muli_d(r0, r1, i0)           fop1d(SPARC_FMULD, r0, r1, i0)
+#  define divr_d(r0, r1, r2)           FDIVD(r1, r2, r0)
+#  define divi_d(r0, r1, i0)           fop1d(SPARC_FDIVD, r0, r1, i0)
+#define fcr(cc, r0, r1, r2)            _fcr(_jit, cc, r0, r1, r2)
+static void _fcr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#define fcw(cc, r0, r1, i0)            _fcw(_jit, cc, r0, r1, i0)
+static void
+_fcw(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define ltr_f(r0, r1, r2)            fcr(SPARC_FBL, r0, r1, r2)
+#  define lti_f(r0, r1, i0)            fcw(SPARC_FBL, r0, r1, i0)
+#  define ler_f(r0, r1, r2)            fcr(SPARC_FBLE, r0, r1, r2)
+#  define lei_f(r0, r1, i0)            fcw(SPARC_FBLE, r0, r1, i0)
+#  define eqr_f(r0, r1, r2)            fcr(SPARC_FBE, r0, r1, r2)
+#  define eqi_f(r0, r1, i0)            fcw(SPARC_FBE, r0, r1, i0)
+#  define ger_f(r0, r1, r2)            fcr(SPARC_FBGE, r0, r1, r2)
+#  define gei_f(r0, r1, i0)            fcw(SPARC_FBGE, r0, r1, i0)
+#  define gtr_f(r0, r1, r2)            fcr(SPARC_FBG, r0, r1, r2)
+#  define gti_f(r0, r1, i0)            fcw(SPARC_FBG, r0, r1, i0)
+#  define ner_f(r0, r1, r2)            fcr(SPARC_FBNE, r0, r1, r2)
+#  define nei_f(r0, r1, i0)            fcw(SPARC_FBNE, r0, r1, i0)
+#  define unltr_f(r0, r1, r2)          fcr(SPARC_FBUL, r0, r1, r2)
+#  define unlti_f(r0, r1, i0)          fcw(SPARC_FBUL, r0, r1, i0)
+#  define unler_f(r0, r1, r2)          fcr(SPARC_FBULE, r0, r1, r2)
+#  define unlei_f(r0, r1, i0)          fcw(SPARC_FBULE, r0, r1, i0)
+#  define uneqr_f(r0, r1, r2)          fcr(SPARC_FBUE, r0, r1, r2)
+#  define uneqi_f(r0, r1, i0)          fcw(SPARC_FBUE, r0, r1, i0)
+#  define unger_f(r0, r1, r2)          fcr(SPARC_FBUGE, r0, r1, r2)
+#  define ungei_f(r0, r1, i0)          fcw(SPARC_FBUGE, r0, r1, i0)
+#  define ungtr_f(r0, r1, r2)          fcr(SPARC_FBUG, r0, r1, r2)
+#  define ungti_f(r0, r1, i0)          fcw(SPARC_FBUG, r0, r1, i0)
+#  define ltgtr_f(r0, r1, r2)          fcr(SPARC_FBLG, r0, r1, r2)
+#  define ltgti_f(r0, r1, i0)          fcw(SPARC_FBLG, r0, r1, i0)
+#  define ordr_f(r0, r1, r2)           fcr(SPARC_FBO, r0, r1, r2)
+#  define ordi_f(r0, r1, i0)           fcw(SPARC_FBO, r0, r1, i0)
+#  define unordr_f(r0, r1, r2)         fcr(SPARC_FBU, r0, r1, r2)
+#  define unordi_f(r0, r1, i0)         fcw(SPARC_FBU, r0, r1, i0)
+#define dcr(cc, r0, r1, r2)            _dcr(_jit, cc, r0, r1, r2)
+static void _dcr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#define dcw(cc, r0, r1, i0)            _dcw(_jit, cc, r0, r1, i0)
+static void
+_dcw(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define ltr_d(r0, r1, r2)            dcr(SPARC_FBL, r0, r1, r2)
+#  define lti_d(r0, r1, i0)            dcw(SPARC_FBL, r0, r1, i0)
+#  define ler_d(r0, r1, r2)            dcr(SPARC_FBLE, r0, r1, r2)
+#  define lei_d(r0, r1, i0)            dcw(SPARC_FBLE, r0, r1, i0)
+#  define eqr_d(r0, r1, r2)            dcr(SPARC_FBE, r0, r1, r2)
+#  define eqi_d(r0, r1, i0)            dcw(SPARC_FBE, r0, r1, i0)
+#  define ger_d(r0, r1, r2)            dcr(SPARC_FBGE, r0, r1, r2)
+#  define gei_d(r0, r1, i0)            dcw(SPARC_FBGE, r0, r1, i0)
+#  define gtr_d(r0, r1, r2)            dcr(SPARC_FBG, r0, r1, r2)
+#  define gti_d(r0, r1, i0)            dcw(SPARC_FBG, r0, r1, i0)
+#  define ner_d(r0, r1, r2)            dcr(SPARC_FBNE, r0, r1, r2)
+#  define nei_d(r0, r1, i0)            dcw(SPARC_FBNE, r0, r1, i0)
+#  define unltr_d(r0, r1, r2)          dcr(SPARC_FBUL, r0, r1, r2)
+#  define unlti_d(r0, r1, i0)          dcw(SPARC_FBUL, r0, r1, i0)
+#  define unler_d(r0, r1, r2)          dcr(SPARC_FBULE, r0, r1, r2)
+#  define unlei_d(r0, r1, i0)          dcw(SPARC_FBULE, r0, r1, i0)
+#  define uneqr_d(r0, r1, r2)          dcr(SPARC_FBUE, r0, r1, r2)
+#  define uneqi_d(r0, r1, i0)          dcw(SPARC_FBUE, r0, r1, i0)
+#  define unger_d(r0, r1, r2)          dcr(SPARC_FBUGE, r0, r1, r2)
+#  define ungei_d(r0, r1, i0)          dcw(SPARC_FBUGE, r0, r1, i0)
+#  define ungtr_d(r0, r1, r2)          dcr(SPARC_FBUG, r0, r1, r2)
+#  define ungti_d(r0, r1, i0)          dcw(SPARC_FBUG, r0, r1, i0)
+#  define ltgtr_d(r0, r1, r2)          dcr(SPARC_FBLG, r0, r1, r2)
+#  define ltgti_d(r0, r1, i0)          dcw(SPARC_FBLG, r0, r1, i0)
+#  define ordr_d(r0, r1, r2)           dcr(SPARC_FBO, r0, r1, r2)
+#  define ordi_d(r0, r1, i0)           dcw(SPARC_FBO, r0, r1, i0)
+#  define unordr_d(r0, r1, r2)         dcr(SPARC_FBU, r0, r1, r2)
+#  define unordi_d(r0, r1, i0)         dcw(SPARC_FBU, r0, r1, i0)
+#  if __WORDSIZE == 32
+#    define ldr_f(r0, r1)              LDF(r1, 0, r0)
+#  else
+#  define ldr_f(r0, r1)                        _ldr_f(_jit, r0, r1)
+static void _ldr_f(jit_state_t*,jit_int32_t,jit_int32_t);
+#  endif
+#  define ldi_f(r0, i0)                        _ldi_f(_jit, r0, i0)
+static void _ldi_f(jit_state_t*,jit_int32_t,jit_word_t);
+#  if __WORDSIZE == 32
+#    define ldxr_f(r0, r1, r2)         LDF(r1, r2, r0)
+#  else
+#  define ldxr_f(r0, r1, r2)           _ldxr_f(_jit, r0, r1, r2)
+static void _ldxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  endif
+#  define ldxi_f(r0, r1, i0)           _ldxi_f(_jit, r0, r1, i0)
+static void _ldxi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  if __WORDSIZE == 32
+#    define str_f(r0, r1)              STF(r1, r0, 0)
+#  else
+#  define str_f(r0, r1)                        _str_f(_jit, r0, r1)
+static void _str_f(jit_state_t*,jit_int32_t,jit_int32_t);
+#  endif
+#  define sti_f(r0, i0)                        _sti_f(_jit, r0, i0)
+static void _sti_f(jit_state_t*,jit_word_t,jit_int32_t);
+#  if __WORDSIZE == 32
+#    define stxr_f(r0, r1, r2)         STF(r2, r1, r0)
+#  else
+#  define stxr_f(r0, r1, r2)           _stxr_f(_jit, r0, r1, r2)
+static void _stxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  endif
+#  define stxi_f(r0, r1, i0)           _stxi_f(_jit, r0, r1, i0)
+static void _stxi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define ldr_d(r0, r1)                        LDDF(r1, 0, r0)
+#  define ldi_d(r0, i0)                        _ldi_d(_jit, r0, i0)
+static void _ldi_d(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldxr_d(r0, r1, r2)           LDDF(r1, r2, r0)
+#  define ldxi_d(r0, r1, i0)           _ldxi_d(_jit, r0, r1, i0)
+static void _ldxi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define str_d(r0, r1)                        STDF(r1, r0, 0)
+#  define sti_d(r0, i0)                        _sti_d(_jit, r0, i0)
+static void _sti_d(jit_state_t*,jit_word_t,jit_int32_t);
+#  define stxr_d(r0, r1, r2)           STDF(r2, r1, r0)
+#  define stxi_d(r0, r1, i0)           _stxi_d(_jit, r0, r1, i0)
+static void _stxi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define fbr(cc, i0, r0, r1)          _fbr(_jit, cc, i0, r0, r1)
+static jit_word_t
+_fbr(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_int32_t);
+#  define fbw(cc, i0, r0, i1)          _fbw(_jit, cc, i0, r0, i1)
+static jit_word_t
+_fbw(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_float32_t*);
+#  define bltr_f(i0, r0, r1)           fbr(SPARC_FBL, i0, r0, r1)
+#  define blti_f(i0, r0, i1)           fbw(SPARC_FBL, i0, r0, i1)
+#  define bler_f(i0, r0, r1)           fbr(SPARC_FBLE, i0, r0, r1)
+#  define blei_f(i0, r0, i1)           fbw(SPARC_FBLE, i0, r0, i1)
+#  define beqr_f(i0, r0, r1)           fbr(SPARC_FBE, i0, r0, r1)
+#  define beqi_f(i0, r0, i1)           fbw(SPARC_FBE, i0, r0, i1)
+#  define bger_f(i0, r0, r1)           fbr(SPARC_FBGE, i0, r0, r1)
+#  define bgei_f(i0, r0, i1)           fbw(SPARC_FBGE, i0, r0, i1)
+#  define bgtr_f(i0, r0, r1)           fbr(SPARC_FBG, i0, r0, r1)
+#  define bgti_f(i0, r0, i1)           fbw(SPARC_FBG, i0, r0, i1)
+#  define bner_f(i0, r0, r1)           fbr(SPARC_FBNE, i0, r0, r1)
+#  define bnei_f(i0, r0, i1)           fbw(SPARC_FBNE, i0, r0, i1)
+#  define bunltr_f(i0, r0, r1)         fbr(SPARC_FBUL, i0, r0, r1)
+#  define bunlti_f(i0, r0, i1)         fbw(SPARC_FBUL, i0, r0, i1)
+#  define bunler_f(i0, r0, r1)         fbr(SPARC_FBULE, i0, r0, r1)
+#  define bunlei_f(i0, r0, i1)         fbw(SPARC_FBULE, i0, r0, i1)
+#  define buneqr_f(i0, r0, r1)         fbr(SPARC_FBUE, i0, r0, r1)
+#  define buneqi_f(i0, r0, i1)         fbw(SPARC_FBUE, i0, r0, i1)
+#  define bunger_f(i0, r0, r1)         fbr(SPARC_FBUGE, i0, r0, r1)
+#  define bungei_f(i0, r0, i1)         fbw(SPARC_FBUGE, i0, r0, i1)
+#  define bungtr_f(i0, r0, r1)         fbr(SPARC_FBUG, i0, r0, r1)
+#  define bungti_f(i0, r0, i1)         fbw(SPARC_FBUG, i0, r0, i1)
+#  define bltgtr_f(i0, r0, r1)         fbr(SPARC_FBLG, i0, r0, r1)
+#  define bltgti_f(i0, r0, i1)         fbw(SPARC_FBLG, i0, r0, i1)
+#  define bordr_f(i0, r0, r1)          fbr(SPARC_FBO, i0, r0, r1)
+#  define bordi_f(i0, r0, i1)          fbw(SPARC_FBO, i0, r0, i1)
+#  define bunordr_f(i0, r0, r1)                fbr(SPARC_FBU, i0, r0, r1)
+#  define bunordi_f(i0, r0, i1)                fbw(SPARC_FBU, i0, r0, i1)
+#  define dbr(cc, i0, r0, r1)          _dbr(_jit, cc, i0, r0, r1)
+static jit_word_t
+_dbr(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_int32_t);
+#  define dbw(cc, i0, r0, i1)          _dbw(_jit, cc, i0, r0, i1)
+static jit_word_t
+_dbw(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_float64_t*);
+#  define bltr_d(i0, r0, r1)           dbr(SPARC_FBL, i0, r0, r1)
+#  define blti_d(i0, r0, i1)           dbw(SPARC_FBL, i0, r0, i1)
+#  define bler_d(i0, r0, r1)           dbr(SPARC_FBLE, i0, r0, r1)
+#  define blei_d(i0, r0, i1)           dbw(SPARC_FBLE, i0, r0, i1)
+#  define beqr_d(i0, r0, r1)           dbr(SPARC_FBE, i0, r0, r1)
+#  define beqi_d(i0, r0, i1)           dbw(SPARC_FBE, i0, r0, i1)
+#  define bger_d(i0, r0, r1)           dbr(SPARC_FBGE, i0, r0, r1)
+#  define bgei_d(i0, r0, i1)           dbw(SPARC_FBGE, i0, r0, i1)
+#  define bgtr_d(i0, r0, r1)           dbr(SPARC_FBG, i0, r0, r1)
+#  define bgti_d(i0, r0, i1)           dbw(SPARC_FBG, i0, r0, i1)
+#  define bner_d(i0, r0, r1)           dbr(SPARC_FBNE, i0, r0, r1)
+#  define bnei_d(i0, r0, i1)           dbw(SPARC_FBNE, i0, r0, i1)
+#  define bunltr_d(i0, r0, r1)         dbr(SPARC_FBUL, i0, r0, r1)
+#  define bunlti_d(i0, r0, i1)         dbw(SPARC_FBUL, i0, r0, i1)
+#  define bunler_d(i0, r0, r1)         dbr(SPARC_FBULE, i0, r0, r1)
+#  define bunlei_d(i0, r0, i1)         dbw(SPARC_FBULE, i0, r0, i1)
+#  define buneqr_d(i0, r0, r1)         dbr(SPARC_FBUE, i0, r0, r1)
+#  define buneqi_d(i0, r0, i1)         dbw(SPARC_FBUE, i0, r0, i1)
+#  define bunger_d(i0, r0, r1)         dbr(SPARC_FBUGE, i0, r0, r1)
+#  define bungei_d(i0, r0, i1)         dbw(SPARC_FBUGE, i0, r0, i1)
+#  define bungtr_d(i0, r0, r1)         dbr(SPARC_FBUG, i0, r0, r1)
+#  define bungti_d(i0, r0, i1)         dbw(SPARC_FBUG, i0, r0, i1)
+#  define bltgtr_d(i0, r0, r1)         dbr(SPARC_FBLG, i0, r0, r1)
+#  define bltgti_d(i0, r0, i1)         dbw(SPARC_FBLG, i0, r0, i1)
+#  define bordr_d(i0, r0, r1)          dbr(SPARC_FBO, i0, r0, r1)
+#  define bordi_d(i0, r0, i1)          dbw(SPARC_FBO, i0, r0, i1)
+#  define bunordr_d(i0, r0, r1)                dbr(SPARC_FBU, i0, r0, r1)
+#  define bunordi_d(i0, r0, i1)                dbw(SPARC_FBU, i0, r0, i1)
+#  define vaarg_d(r0, r1)              _vaarg_d(_jit, r0, r1)
+static void _vaarg_d(jit_state_t*, jit_int32_t, jit_int32_t);
+#endif
+
+#if CODE
+static void
+_f3f(jit_state_t *_jit, jit_int32_t rd,
+     jit_int32_t op3, jit_int32_t rs1, jit_int32_t opf, jit_int32_t rs2)
+{
+    jit_instr_t                v;
+#  if __WORDSIZE == 64
+    if (rd > 31) {
+       assert(rd <= 63 && (rd & 1) == 0);
+       rd -= 31;
+    }
+    if (rs1 > 31) {
+       assert(rs1 <= 63 && (rs1 & 1) == 0);
+       rs1 -= 31;
+    }
+    if (rs2 > 31) {
+       assert(rs2 <= 63 && (rs2 & 1) == 0);
+       rs2 -= 31;
+    }
+#  endif
+    assert(!(rd  & 0xffffffe0));
+    assert(!(op3 & 0xffffffc0));
+    assert(!(rs1 & 0xffffffe0));
+    assert(!(opf & 0xfffffe00));
+    assert(!(rs2 & 0xffffffe0));
+    v.op.b    = 2;
+    v.rd.b    = rd;
+    v.op3.b   = op3;
+    v.rs1.b   = rs1;
+    v.opf.b   = opf;
+    v.rs2.b   = rs2;
+    ii(v.v);
+}
+
+#  if __WORDSIZE == 64
+static void
+_movr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                t0, t1;
+    if (r0 != r1) {
+       if (single_precision_p(r0)) {
+           if (single_precision_p(r1))
+               FMOVS(r1, r0);
+           else {
+               t1 = jit_get_reg(CLASS_SNG);
+               movr_d(rn(t1), r1);
+               FMOVS(rn(t1), r0);
+               jit_unget_reg(t1);
+           }
+       }
+       else {
+           if (single_precision_p(r1)) {
+               t0 = jit_get_reg(CLASS_SNG);
+               FMOVS(r1, rn(t0));
+               movr_d(r0, rn(t0));
+               jit_unget_reg(t0);
+           }
+           else {
+               t1 = jit_get_reg(CLASS_SNG);
+               movr_d(rn(t1), r1);
+               FMOVS(rn(t1), rn(t1));
+               movr_d(r0, rn(t1));
+               jit_unget_reg(t1);
+           }
+       }
+    }
+}
+
+static void
+_negr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                t0, t1;
+    if (single_precision_p(r0)) {
+       if (single_precision_p(r1))
+           FNEGS(r1, r0);
+       else {
+           t1 = jit_get_reg(CLASS_SNG);
+           movr_d(rn(t1), r1);
+           FNEGS(rn(t1), r0);
+           jit_unget_reg(t1);
+       }
+    }
+    else {
+       if (single_precision_p(r1)) {
+           t0 = jit_get_reg(CLASS_SNG);
+           FNEGS(r1, rn(t0));
+           movr_d(r0, rn(t0));
+           jit_unget_reg(t0);
+       }
+       else {
+           t1 = jit_get_reg(CLASS_SNG);
+           movr_d(rn(t1), r1);
+           FNEGS(rn(t1), rn(t1));
+           movr_d(r0, rn(t1));
+           jit_unget_reg(t1);
+       }
+    }
+}
+
+static void
+_absr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                t0, t1;
+    if (single_precision_p(r0)) {
+       if (single_precision_p(r1))
+           FABSS(r1, r0);
+       else {
+           t1 = jit_get_reg(CLASS_SNG);
+           movr_d(rn(t1), r1);
+           FABSS(rn(t1), r0);
+           jit_unget_reg(t1);
+       }
+    }
+    else {
+       if (single_precision_p(r1)) {
+           t0 = jit_get_reg(CLASS_SNG);
+           FABSS(r1, rn(t0));
+           movr_d(r0, rn(t0));
+           jit_unget_reg(t0);
+       }
+       else {
+           t1 = jit_get_reg(CLASS_SNG);
+           movr_d(rn(t1), r1);
+           FABSS(rn(t1), rn(t1));
+           movr_d(r0, rn(t1));
+           jit_unget_reg(t1);
+       }
+    }
+}
+
+static void
+_sqrtr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                t0, t1;
+    if (single_precision_p(r0)) {
+       if (single_precision_p(r1))
+           FSQRTS(r1, r0);
+       else {
+           t1 = jit_get_reg(CLASS_SNG);
+           movr_d(rn(t1), r1);
+           FSQRTS(rn(t1), r0);
+           jit_unget_reg(t1);
+       }
+    }
+    else {
+       if (single_precision_p(r1)) {
+           t0 = jit_get_reg(CLASS_SNG);
+           FSQRTS(r1, rn(t0));
+           movr_d(r0, rn(t0));
+           jit_unget_reg(t0);
+       }
+       else {
+           t1 = jit_get_reg(CLASS_SNG);
+           movr_d(rn(t1), r1);
+           FSQRTS(rn(t1), rn(t1));
+           movr_d(r0, rn(t1));
+           jit_unget_reg(t1);
+       }
+    }
+}
+#  endif
+
+#  if __WORDSIZE == 64
+static void
+_extr_d_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (!single_precision_p(r0)) {
+       reg = jit_get_reg(CLASS_SNG);
+       movr_d(rn(reg), r0);
+       FDTOS(r1, rn(reg));
+       movr_d(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else
+       FDTOS(r1, r0);
+}
+#  endif
+
+static void
+_movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t *i0)
+{
+    union {
+       jit_int32_t      i;
+       jit_float32_t    f;
+    } data;
+    jit_int32_t                 reg;
+
+    if (_jitc->no_data) {
+       data.f = *i0;
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), data.i & 0xffffffff);
+       stxi_i(BIAS(-8), _FP_REGNO, rn(reg));
+       jit_unget_reg(reg);
+       ldxi_f(r0, _FP_REGNO, BIAS(-8));
+    }
+    else
+       ldi_f(r0, (jit_word_t)i0);
+}
+
+#  if __WORDSIZE == 64
+static void
+_extr_f_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (!single_precision_p(r1)) {
+       reg = jit_get_reg(CLASS_SNG);
+       movr_d(rn(reg), r1);
+       FSTOD(rn(reg), r0);
+       jit_unget_reg(reg);
+    }
+    else
+       FSTOD(r1, r0);
+}
+#  endif
+
+static void
+_movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t *i0)
+{
+    union {
+#  if __WORDSIZE == 32
+       jit_int32_t      i[2];
+#  else
+       jit_word_t       w;
+#  endif
+       jit_float64_t    d;
+    } data;
+    jit_int32_t                 reg;
+
+    if (_jitc->no_data) {
+       data.d = *i0;
+       reg = jit_get_reg(jit_class_gpr);
+# if __WORDSIZE == 32
+       movi(rn(reg), data.i[0]);
+#  else
+       movi(rn(reg), data.w);
+#  endif
+       stxi(BIAS(-8), _FP_REGNO, rn(reg));
+#  if __WORDSIZE == 32
+       movi(rn(reg), data.i[1]);
+       stxi_i(BIAS(-4), _FP_REGNO, rn(reg));
+#  endif
+       jit_unget_reg(reg);
+       ldxi_d(r0, _FP_REGNO, BIAS(-8));
+    }
+    else
+       ldi_d(r0, (jit_word_t)i0);
+}
+
+#  if __WORDSIZE == 32
+static void
+_movr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    assert(!(r0 & 1));
+    assert(!(r1 & 1));
+    if (r0 != r1) {
+       FMOVS(r1, r0);
+       FMOVS(r1 + 1, r0 + 1);
+    }
+}
+
+static void
+_negr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    assert(!(r0 & 1));
+    assert(!(r1 & 1));
+    FNEGS(r1, r0);
+    if (r0 != r1)
+       FMOVS(r1 + 1, r0 + 1);
+}
+
+static void
+_absr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    assert(!(r0 & 1));
+    assert(!(r1 & 1));
+    FABSS(r1, r0);
+    if (r0 != r1)
+       FMOVS(r1 + 1, r0 + 1);
+}
+#  endif
+
+#  if __WORDSIZE == 64
+#    define single_rrr(NAME, CODE)                                     \
+static void                                                            \
+NAME(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)        \
+{                                                                      \
+    jit_int32_t                x0, t0, x1, t1, x2, t2, mask = 0;               \
+    if (!single_precision_p(r0)) {                                     \
+       mask |= 1;                                                      \
+       t0 = jit_get_reg(CLASS_SNG);                                    \
+       x0 = rn(t0);                                                    \
+       if (r0 == r1) {                                                 \
+           x1 = x0;                                                    \
+           movr_d(x1, r1);                                             \
+           if (r0 == r2)                                               \
+               x2 = x0;                                                \
+       }                                                               \
+       else if (r0 == r2) {                                            \
+           x2 = x0;                                                    \
+           movr_d(x2, r2);                                             \
+       }                                                               \
+    }                                                                  \
+    else                                                               \
+       x0 = r0;                                                        \
+    if (!single_precision_p(r1)) {                                     \
+       if (r0 != r1) {                                                 \
+           mask |= 2;                                                  \
+           t1 = jit_get_reg(CLASS_SNG);                                \
+           x1 = rn(t1);                                                \
+           movr_d(x1, r1);                                             \
+           if (r1 == r2)                                               \
+               x2 = x1;                                                \
+       }                                                               \
+    }                                                                  \
+    else                                                               \
+       x1 = r1;                                                        \
+    if (!single_precision_p(r2)) {                                     \
+       if (r0 != r2 && r1 != r2) {                                     \
+           mask |= 4;                                                  \
+           t2 = jit_get_reg(CLASS_SNG);                                \
+           x2 = rn(t2);                                                \
+           movr_d(x2, r2);                                             \
+       }                                                               \
+    }                                                                  \
+    else                                                               \
+       x2 = r2;                                                        \
+    CODE(x1, x2, x0);                                                  \
+    if (mask & 1) {                                                    \
+       movr_d(r0, x0);                                                 \
+       jit_unget_reg(t0);                                              \
+    }                                                                  \
+    if (mask & 2)                                                      \
+       jit_unget_reg(t1);                                              \
+    if (mask & 4)                                                      \
+       jit_unget_reg(t2);                                              \
+}
+
+static void
+_fop2f(jit_state_t *_jit, jit_int32_t op,
+       jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                x0, t0, x1, t1, x2, t2, mask = 0;
+    if (!single_precision_p(r0)) {
+       mask |= 1;
+       t0 = jit_get_reg(CLASS_SNG);
+       x0 = rn(t0);
+       if (r0 == r1) {
+           x1 = x0;
+           movr_d(x1, r1);
+           if (r0 == r2)
+               x2 = x0;
+       }
+       else if (r0 == r2) {
+           x2 = x0;
+           movr_d(x2, r2);
+       }
+    }
+    else
+       x0 = r0;
+    if (!single_precision_p(r1)) {
+       if (r0 != r1) {
+           mask |= 2;
+           t1 = jit_get_reg(CLASS_SNG);
+           x1 = rn(t1);
+           movr_d(x1, r1);
+           if (r1 == r2)
+               x2 = x1;
+       }
+    }
+    else
+       x1 = r1;
+    if (!single_precision_p(r2)) {
+       if (r0 != r2 && r1 != r2) {
+           mask |= 4;
+           t2 = jit_get_reg(CLASS_SNG);
+           x2 = rn(t2);
+           movr_d(x2, r2);
+       }
+    }
+    else
+       x2 = r2;
+    FPop1(x0, x1,  op, x2);
+    if (mask & 1) {
+       movr_d(r0, x0);
+       jit_unget_reg(t0);
+    }
+    if (mask & 2)
+       jit_unget_reg(t1);
+    if (mask & 4)
+       jit_unget_reg(t2);
+}
+#  endif
+
+static void
+_fop1f(jit_state_t *_jit, jit_int32_t op,
+       jit_int32_t r0, jit_int32_t r1, jit_float32_t *i0)
+{
+    jit_int32_t                reg;
+#  if __WORDSIZE == 64
+    jit_int32_t                x0, t0, x1, t1, mask = 0;
+#  endif
+    reg = jit_get_reg(CLASS_SNG);
+    movi_f(rn(reg), i0);
+#  if __WORDSIZE == 64
+    if (!single_precision_p(r0)) {
+       mask |= 1;
+       t0 = jit_get_reg(CLASS_SNG);
+       x0 = rn(t0);
+       if (r0 == r1) {
+           x1 = x0;
+           movr_d(x1, r1);
+       }
+    }
+    else
+       x0 = r0;
+    if (!single_precision_p(r1)) {
+       if (r0 != r1) {
+           mask |= 2;
+           t1 = jit_get_reg(CLASS_SNG);
+           x1 = rn(t1);
+           movr_d(x1, r1);
+       }
+    }
+    else
+       x1 = r1;
+    FPop1(x0, x1, op, rn(reg));
+    if (mask & 1) {
+       movr_d(r0, x0);
+       jit_unget_reg(t0);
+    }
+    if (mask & 2)
+       jit_unget_reg(t1);
+#  else
+    FPop1(r0, r1, op, rn(reg));
+#  endif
+    jit_unget_reg(reg);
+}
+
+static void
+_rfop1f(jit_state_t *_jit, jit_int32_t op,
+       jit_int32_t r0, jit_int32_t r1, jit_float32_t *i0)
+{
+    jit_int32_t                reg;
+#  if __WORDSIZE == 64
+    jit_int32_t                x0, t0, x1, t1, mask = 0;
+#  endif
+    reg = jit_get_reg(CLASS_SNG);
+    movi_f(rn(reg), i0);
+#  if __WORDSIZE == 64
+    if (!single_precision_p(r0)) {
+       mask |= 1;
+       t0 = jit_get_reg(CLASS_SNG);
+       x0 = rn(t0);
+       if (r0 == r1) {
+           x1 = x0;
+           movr_d(x1, r1);
+       }
+    }
+    else
+       x0 = r0;
+    if (!single_precision_p(r1)) {
+       if (r0 != r1) {
+           mask |= 2;
+           t1 = jit_get_reg(CLASS_SNG);
+           x1 = rn(t1);
+           movr_d(x1, r1);
+       }
+    }
+    else
+       x1 = r1;
+    FPop1(x0, rn(reg), op, x1);
+    if (mask & 1) {
+       movr_d(r0, x0);
+       jit_unget_reg(t0);
+    }
+    if (mask & 2)
+       jit_unget_reg(t1);
+#  else
+    FPop1(r0, rn(reg), op, r1);
+#  endif
+    jit_unget_reg(reg);
+}
+
+static void
+_fop1d(jit_state_t *_jit, jit_int32_t op,
+       jit_int32_t r0, jit_int32_t r1, jit_float64_t *i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(CLASS_DBL);
+    movi_d(rn(reg), i0);
+    FPop1(r0, r1, op, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_rfop1d(jit_state_t *_jit, jit_int32_t op,
+       jit_int32_t r0, jit_int32_t r1, jit_float64_t *i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(CLASS_DBL);
+    movi_d(rn(reg), i0);
+    FPop1(r0, rn(reg), op, r1);
+    jit_unget_reg(reg);
+}
+
+static void
+_extr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    stxi(BIAS(-8), _FP_REGNO, r1);
+#  if __WORDSIZE == 32
+    ldxi_f(r0, _FP_REGNO, BIAS(-8));
+    FITOS(r0, r0);
+#  else
+    ldxi_d(r0, _FP_REGNO, BIAS(-8));
+    if (!single_precision_p(r0)) {
+       jit_int32_t     reg;
+       reg = jit_get_reg(CLASS_SNG);
+       movr_d(rn(reg), r0);
+       FXTOS(rn(reg), rn(reg));
+       movr_d(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else
+       FXTOS(r0, r0);
+#  endif
+}
+
+static void
+_truncr_f_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(CLASS_SNG);
+#  if __WORDSIZE == 64
+    if (!single_precision_p(r1)) {
+       movr_d(rn(reg), r1);
+       FSTOI(rn(reg), rn(reg));
+    }
+    else
+#  endif
+       FSTOI(r1, rn(reg));
+    stxi_f(BIAS(-8), _FP_REGNO, rn(reg));
+    ldxi_i(r0, _FP_REGNO, BIAS(-8));
+    jit_unget_reg(reg);
+}
+
+#  if __WORDSIZE == 64
+static void
+_truncr_f_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(CLASS_SNG);
+#  if __WORDSIZE == 64
+    if (!single_precision_p(r1)) {
+       movr_d(rn(reg), r1);
+       FSTOX(rn(reg), rn(reg));
+    }
+    else
+#  endif
+       FSTOX(r1, rn(reg));
+    stxi_d(BIAS(-8), _FP_REGNO, rn(reg));
+    ldxi_l(r0, _FP_REGNO, BIAS(-8));
+    jit_unget_reg(reg);
+}
+#  endif
+
+static void
+_fcr(jit_state_t *_jit, jit_int32_t cc,
+     jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+#  if __WORDSIZE == 64
+    jit_int32_t                x0, t0, x1, t1, mask = 0;
+    if (!single_precision_p(r1)) {
+       mask |= 1;
+       t0 = jit_get_reg(CLASS_SNG);
+       x0 = rn(t0);
+       movr_d(x0, r1);
+    }
+    else
+       x0 = r1;
+    if (r1 == r2)
+       x1 = x0;
+    else if (!single_precision_p(r2)) {
+       mask |= 2;
+       t1 = jit_get_reg(CLASS_SNG);
+       x1 = rn(t1);
+       movr_d(x1, r2);
+    }
+    else
+       x1 = r2;
+    FCMPS(x0, x1);
+    if (mask & 1)
+       jit_unget_reg(t0);
+    if (mask & 2)
+       jit_unget_reg(t1);
+#  else
+    FCMPS(r1, r2);
+#  endif
+    FBa(cc, 3);
+    movi(r0, 1);
+    movi(r0, 0);
+}
+
+static void
+_fcw(jit_state_t *_jit, jit_int32_t cc,
+     jit_int32_t r0, jit_int32_t r1, jit_float32_t *i0)
+{
+    jit_int32_t                reg;
+#  if __WORDSIZE == 64
+    jit_int32_t                x0, t0, mask = 0;
+    if (!single_precision_p(r1)) {
+       mask |= 1;
+       t0 = jit_get_reg(CLASS_SNG);
+       x0 = rn(t0);
+       movr_d(x0, r1);
+    }
+    else
+       x0 = r1;
+#  endif
+    reg = jit_get_reg(CLASS_SNG);
+    movi_f(rn(reg), i0);
+#  if __WORDSIZE == 64
+    FCMPS(x0, rn(reg));
+    if (mask & 1)
+       jit_unget_reg(t0);
+#  else
+    FCMPS(r1, rn(reg));
+#  endif
+    jit_unget_reg(reg);
+    FBa(cc, 3);
+    movi(r0, 1);
+    movi(r0, 0);
+}
+
+static void
+_dcr(jit_state_t *_jit, jit_int32_t cc,
+     jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    FCMPD(r1, r2);
+    FBa(cc, 3);
+    movi(r0, 1);
+    movi(r0, 0);
+}
+
+static void
+_dcw(jit_state_t *_jit, jit_int32_t cc,
+     jit_int32_t r0, jit_int32_t r1, jit_float64_t *i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(CLASS_DBL);
+    movi_d(rn(reg), i0);
+    FCMPD(r1, rn(reg));
+    jit_unget_reg(reg);
+    FBa(cc, 3);
+    movi(r0, 1);
+    movi(r0, 0);
+}
+
+#  if __WORDSIZE == 64
+static void
+_ldr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (!single_precision_p(r0)) {
+       reg = jit_get_reg(CLASS_SNG);
+       LDF(r1, 0, rn(reg));
+       movr_d(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else
+       LDF(r1, 0, r0);
+}
+#  endif
+
+static void
+_ldi_f(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (s13_p(i0)) {
+#  if __WORDSIZE == 64
+       if (!single_precision_p(r0)) {
+           reg = jit_get_reg(CLASS_SNG);
+           LDFI(0, i0, rn(reg));
+           movr_d(r0, rn(reg));
+           jit_unget_reg(reg);
+       }
+       else
+#  endif
+           LDFI(0, i0, r0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldr_f(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+#  if __WORDSIZE == 64
+static void
+_ldxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (!single_precision_p(r0)) {
+       reg = jit_get_reg(CLASS_SNG);
+       LDF(r1, r2, rn(reg));
+       movr_d(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else
+       LDF(r1, r2, r0);
+}
+#  endif
+
+static void
+_ldxi_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (s13_p(i0)) {
+#  if __WORDSIZE == 64
+       if (!single_precision_p(r0)) {
+           reg = jit_get_reg(CLASS_SNG);
+           LDFI(r1, i0, rn(reg));
+           movr_d(r0, rn(reg));
+           jit_unget_reg(reg);
+       }
+       else
+#  endif
+           LDFI(r1, i0, r0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldxr_f(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+#  if __WORDSIZE == 64
+static void
+_str_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (!single_precision_p(r1)) {
+       reg = jit_get_reg(CLASS_SNG);
+       movr_d(rn(reg), r1);
+       STF(rn(reg), r0, 0);
+       jit_unget_reg(reg);
+    }
+    else
+       STF(r1, r0, 0);
+}
+# endif
+
+static void
+_sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    if (s13_p(i0)) {
+#  if __WORDSIZE == 64
+       if (!single_precision_p(r0)) {
+           reg = jit_get_reg(CLASS_SNG);
+           movr_d(rn(reg), r0);
+           STFI(rn(reg), 0, i0);
+           jit_unget_reg(reg);
+       }
+       else
+#  endif
+           STFI(r0, 0, i0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       str_f(rn(reg), r0);
+       jit_unget_reg(reg);
+    }
+}
+
+#  if __WORDSIZE == 64
+static void
+_stxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (!single_precision_p(r2)) {
+       reg = jit_get_reg(CLASS_SNG);
+       movr_d(rn(reg), r2);
+       STF(rn(reg), r1, r0);
+       jit_unget_reg(reg);
+    }
+    else
+       STF(r2, r1, r0);
+}
+# endif
+
+static void
+_stxi_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (s13_p(i0)) {
+#  if __WORDSIZE == 64
+       if (!single_precision_p(r1)) {
+           reg = jit_get_reg(CLASS_SNG);
+           movr_d(rn(reg), r1);
+           STFI(rn(reg), r0, i0);
+           jit_unget_reg(reg);
+       }
+       else
+#  endif
+           STFI(r1, r0, i0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       stxr_f(rn(reg), r0, r1);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_extr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    stxi(BIAS(-8), _FP_REGNO, r1);
+#  if __WORDSIZE == 32
+    stxi(BIAS(-4), _FP_REGNO, 0);
+#  endif
+    ldxi_d(r0, _FP_REGNO, BIAS(-8));
+#  if __WORDSIZE == 32
+    FITOD(r0, r0);
+#  else
+    FXTOD(r0, r0);
+#  endif
+}
+
+static void
+_truncr_d_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(CLASS_SNG);
+#  if __WORDSIZE == 64
+    if (!single_precision_p(r1)) {
+       movr_d(rn(reg), r1);
+       FDTOI(rn(reg), rn(reg));
+    }
+    else
+#  endif
+       FDTOI(r1, rn(reg));
+    stxi_d(BIAS(-8), _FP_REGNO, rn(reg));
+    ldxi_i(r0, _FP_REGNO, BIAS(-8));
+    jit_unget_reg(reg);
+}
+
+#  if __WORDSIZE == 64
+static void
+_truncr_d_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(CLASS_DBL);
+    FDTOX(r1, rn(reg));
+    stxi_d(BIAS(-8), _FP_REGNO, rn(reg));
+    ldxi_l(r0, _FP_REGNO, BIAS(-8));
+    jit_unget_reg(reg);
+}
+#  endif
+
+static void
+_ldi_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (s13_p(i0))
+       LDDFI(0, i0, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldr_d(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t i0)
+{
+    jit_int32_t                reg;
+    if (s13_p(i0))
+       LDDFI(r1, i0, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldxr_d(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_sti_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    if (s13_p(i0))
+       STDFI(r0, 0, i0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       str_d(rn(reg), r0);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_stxi_d(jit_state_t *_jit, jit_int32_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (s13_p(i0))
+       STDFI(r1, r0, i0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       stxr_d(rn(reg), r0, r1);
+       jit_unget_reg(reg);
+    }
+}
+
+static jit_word_t
+_fbr(jit_state_t *_jit, jit_int32_t cc,
+     jit_word_t i0, jit_int32_t r0,jit_int32_t r1)
+{
+#  if __WORDSIZE == 64
+    jit_int32_t                x0, t0, x1, t1, mask = 0;
+#  endif
+    jit_word_t         w;
+#  if __WORDSIZE == 64
+    if (!single_precision_p(r0)) {
+       mask |= 1;
+       t0 = jit_get_reg(CLASS_SNG);
+       x0 = rn(t0);
+       movr_d(x0, r0);
+    }
+    else
+       x0 = r0;
+    if (r0 == r1)
+       x1 = x0;
+    else if (!single_precision_p(r1)) {
+       mask |= 2;
+       t1 = jit_get_reg(CLASS_SNG);
+       x1 = rn(t1);
+       movr_d(x1, r1);
+    }
+    else
+       x1 = r1;
+    FCMPS(x0, x1);
+    if (mask & 1)
+       jit_unget_reg(t0);
+    if (mask & 2)
+       jit_unget_reg(t1);
+#  else
+    FCMPS(r0, r1);
+#  endif
+    w = _jit->pc.w;
+    FB(cc, (i0 - w) >> 2);
+    NOP();
+    return (w);
+}
+
+static jit_word_t
+_fbw(jit_state_t *_jit, jit_int32_t cc,
+     jit_word_t i0, jit_int32_t r0, jit_float32_t *i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+#  if __WORDSIZE == 64
+    jit_int32_t                x0, t0, mask = 0;
+    if (!single_precision_p(r0)) {
+       mask |= 1;
+       t0 = jit_get_reg(CLASS_SNG);
+       x0 = rn(t0);
+       movr_d(x0, r0);
+    }
+    else
+       x0 = r0;
+#  endif
+    reg = jit_get_reg(CLASS_SNG);
+    movi_f(rn(reg), i1);
+#  if __WORDSIZE == 64
+    FCMPS(x0, rn(reg));
+    if (mask & 1)
+       jit_unget_reg(t0);
+#  else
+    FCMPS(r0, rn(reg));
+#  endif
+    jit_unget_reg(reg);
+    w = _jit->pc.w;
+    FB(cc, (i0 - w) >> 2);
+    NOP();
+    return (w);
+}
+
+static jit_word_t
+_dbr(jit_state_t *_jit, jit_int32_t cc,
+     jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    FCMPD(r0, r1);
+    w = _jit->pc.w;
+    FB(cc, (i0 - w) >> 2);
+    NOP();
+    return (w);
+}
+
+static jit_word_t
+_dbw(jit_state_t *_jit, jit_int32_t cc,
+     jit_word_t i0, jit_int32_t r0, jit_float64_t *i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(CLASS_DBL);
+    movi_d(rn(reg), i1);
+    FCMPD(r0, rn(reg));
+    jit_unget_reg(reg);
+    w = _jit->pc.w;
+    FB(cc, (i0 - w) >> 2);
+    NOP();
+    return (w);
+}
+
+static void
+_vaarg_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    assert(_jitc->function->self.call & jit_call_varargs);
+
+    /* Load argument. */
+    ldr_d(r0, r1);
+
+    /* Update vararg stack pointer. */
+    addi(r1, r1, 8);
+}
+#endif
diff --git a/deps/lightning/lib/jit_sparc-sz.c b/deps/lightning/lib/jit_sparc-sz.c
new file mode 100644 (file)
index 0000000..ac683b6
--- /dev/null
@@ -0,0 +1,803 @@
+#if __WORDSIZE == 32
+#define JIT_INSTR_MAX 44
+    0, /* data */
+    0, /* live */
+    0, /* align */
+    0, /* save */
+    0, /* load */
+    0, /* #name */
+    0, /* #note */
+    0, /* label */
+    36,        /* prolog */
+    0, /* ellipsis */
+    0, /* va_push */
+    0, /* allocai */
+    0, /* allocar */
+    0, /* arg */
+    0, /* getarg_c */
+    0, /* getarg_uc */
+    0, /* getarg_s */
+    0, /* getarg_us */
+    0, /* getarg_i */
+    0, /* getarg_ui */
+    0, /* getarg_l */
+    0, /* putargr */
+    0, /* putargi */
+    4, /* va_start */
+    8, /* va_arg */
+    8, /* va_arg_d */
+    0, /* va_end */
+    4, /* addr */
+    12,        /* addi */
+    4, /* addcr */
+    12,        /* addci */
+    4, /* addxr */
+    4, /* addxi */
+    4, /* subr */
+    12,        /* subi */
+    4, /* subcr */
+    12,        /* subci */
+    4, /* subxr */
+    4, /* subxi */
+    16,        /* rsbi */
+    4, /* mulr */
+    12,        /* muli */
+    8, /* qmulr */
+    16,        /* qmuli */
+    8, /* qmulr_u */
+    16,        /* qmuli_u */
+    12,        /* divr */
+    20,        /* divi */
+    8, /* divr_u */
+    16,        /* divi_u */
+    28,        /* qdivr */
+    24,        /* qdivi */
+    24,        /* qdivr_u */
+    20,        /* qdivi_u */
+    20,        /* remr */
+    28,        /* remi */
+    16,        /* remr_u */
+    24,        /* remi_u */
+    4, /* andr */
+    12,        /* andi */
+    4, /* orr */
+    12,        /* ori */
+    4, /* xorr */
+    12,        /* xori */
+    4, /* lshr */
+    4, /* lshi */
+    4, /* rshr */
+    4, /* rshi */
+    4, /* rshr_u */
+    4, /* rshi_u */
+    4, /* negr */
+    4, /* comr */
+    16,        /* ltr */
+    16,        /* lti */
+    16,        /* ltr_u */
+    16,        /* lti_u */
+    16,        /* ler */
+    16,        /* lei */
+    16,        /* ler_u */
+    16,        /* lei_u */
+    16,        /* eqr */
+    16,        /* eqi */
+    16,        /* ger */
+    16,        /* gei */
+    16,        /* ger_u */
+    16,        /* gei_u */
+    16,        /* gtr */
+    16,        /* gti */
+    16,        /* gtr_u */
+    16,        /* gti_u */
+    16,        /* ner */
+    16,        /* nei */
+    4, /* movr */
+    8, /* movi */
+    8, /* extr_c */
+    4, /* extr_uc */
+    8, /* extr_s */
+    8, /* extr_us */
+    0, /* extr_i */
+    0, /* extr_ui */
+    8, /* htonr_us */
+    4, /* htonr_ui */
+    0, /* htonr_ul */
+    4, /* ldr_c */
+    12,        /* ldi_c */
+    4, /* ldr_uc */
+    12,        /* ldi_uc */
+    4, /* ldr_s */
+    12,        /* ldi_s */
+    4, /* ldr_us */
+    12,        /* ldi_us */
+    4, /* ldr_i */
+    12,        /* ldi_i */
+    0, /* ldr_ui */
+    0, /* ldi_ui */
+    0, /* ldr_l */
+    0, /* ldi_l */
+    4, /* ldxr_c */
+    8, /* ldxi_c */
+    4, /* ldxr_uc */
+    8, /* ldxi_uc */
+    4, /* ldxr_s */
+    8, /* ldxi_s */
+    4, /* ldxr_us */
+    8, /* ldxi_us */
+    4, /* ldxr_i */
+    8, /* ldxi_i */
+    0, /* ldxr_ui */
+    0, /* ldxi_ui */
+    0, /* ldxr_l */
+    0, /* ldxi_l */
+    4, /* str_c */
+    12,        /* sti_c */
+    4, /* str_s */
+    12,        /* sti_s */
+    4, /* str_i */
+    12,        /* sti_i */
+    0, /* str_l */
+    0, /* sti_l */
+    4, /* stxr_c */
+    8, /* stxi_c */
+    4, /* stxr_s */
+    8, /* stxi_s */
+    4, /* stxr_i */
+    8, /* stxi_i */
+    0, /* stxr_l */
+    0, /* stxi_l */
+    12,        /* bltr */
+    12,        /* blti */
+    12,        /* bltr_u */
+    12,        /* blti_u */
+    12,        /* bler */
+    12,        /* blei */
+    12,        /* bler_u */
+    12,        /* blei_u */
+    12,        /* beqr */
+    20,        /* beqi */
+    12,        /* bger */
+    12,        /* bgei */
+    12,        /* bger_u */
+    12,        /* bgei_u */
+    12,        /* bgtr */
+    12,        /* bgti */
+    12,        /* bgtr_u */
+    12,        /* bgti_u */
+    12,        /* bner */
+    20,        /* bnei */
+    12,        /* bmsr */
+    12,        /* bmsi */
+    12,        /* bmcr */
+    12,        /* bmci */
+    12,        /* boaddr */
+    12,        /* boaddi */
+    12,        /* boaddr_u */
+    12,        /* boaddi_u */
+    12,        /* bxaddr */
+    12,        /* bxaddi */
+    12,        /* bxaddr_u */
+    12,        /* bxaddi_u */
+    12,        /* bosubr */
+    12,        /* bosubi */
+    12,        /* bosubr_u */
+    12,        /* bosubi_u */
+    12,        /* bxsubr */
+    12,        /* bxsubi */
+    12,        /* bxsubr_u */
+    12,        /* bxsubi_u */
+    8, /* jmpr */
+    16,        /* jmpi */
+    8, /* callr */
+    16,        /* calli */
+    0, /* prepare */
+    0, /* pushargr */
+    0, /* pushargi */
+    0, /* finishr */
+    0, /* finishi */
+    0, /* ret */
+    0, /* retr */
+    0, /* reti */
+    0, /* retval_c */
+    0, /* retval_uc */
+    0, /* retval_s */
+    0, /* retval_us */
+    0, /* retval_i */
+    0, /* retval_ui */
+    0, /* retval_l */
+    44,        /* epilog */
+    0, /* arg_f */
+    0, /* getarg_f */
+    0, /* putargr_f */
+    0, /* putargi_f */
+    4, /* addr_f */
+    16,        /* addi_f */
+    4, /* subr_f */
+    16,        /* subi_f */
+    16,        /* rsbi_f */
+    4, /* mulr_f */
+    16,        /* muli_f */
+    4, /* divr_f */
+    16,        /* divi_f */
+    4, /* negr_f */
+    4, /* absr_f */
+    4, /* sqrtr_f */
+    16,        /* ltr_f */
+    32,        /* lti_f */
+    16,        /* ler_f */
+    32,        /* lei_f */
+    16,        /* eqr_f */
+    32,        /* eqi_f */
+    16,        /* ger_f */
+    32,        /* gei_f */
+    16,        /* gtr_f */
+    32,        /* gti_f */
+    16,        /* ner_f */
+    32,        /* nei_f */
+    16,        /* unltr_f */
+    32,        /* unlti_f */
+    16,        /* unler_f */
+    32,        /* unlei_f */
+    16,        /* uneqr_f */
+    32,        /* uneqi_f */
+    16,        /* unger_f */
+    32,        /* ungei_f */
+    16,        /* ungtr_f */
+    32,        /* ungti_f */
+    16,        /* ltgtr_f */
+    32,        /* ltgti_f */
+    16,        /* ordr_f */
+    32,        /* ordi_f */
+    16,        /* unordr_f */
+    32,        /* unordi_f */
+    12,        /* truncr_f_i */
+    0, /* truncr_f_l */
+    12,        /* extr_f */
+    4, /* extr_d_f */
+    4, /* movr_f */
+    16,        /* movi_f */
+    4, /* ldr_f */
+    12,        /* ldi_f */
+    4, /* ldxr_f */
+    8, /* ldxi_f */
+    4, /* str_f */
+    12,        /* sti_f */
+    4, /* stxr_f */
+    8, /* stxi_f */
+    12,        /* bltr_f */
+    24,        /* blti_f */
+    12,        /* bler_f */
+    24,        /* blei_f */
+    12,        /* beqr_f */
+    24,        /* beqi_f */
+    12,        /* bger_f */
+    24,        /* bgei_f */
+    12,        /* bgtr_f */
+    24,        /* bgti_f */
+    12,        /* bner_f */
+    28,        /* bnei_f */
+    12,        /* bunltr_f */
+    28,        /* bunlti_f */
+    12,        /* bunler_f */
+    28,        /* bunlei_f */
+    12,        /* buneqr_f */
+    28,        /* buneqi_f */
+    12,        /* bunger_f */
+    28,        /* bungei_f */
+    12,        /* bungtr_f */
+    28,        /* bungti_f */
+    12,        /* bltgtr_f */
+    24,        /* bltgti_f */
+    12,        /* bordr_f */
+    24,        /* bordi_f */
+    12,        /* bunordr_f */
+    28,        /* bunordi_f */
+    0, /* pushargr_f */
+    0, /* pushargi_f */
+    0, /* retr_f */
+    0, /* reti_f */
+    0, /* retval_f */
+    0, /* arg_d */
+    0, /* getarg_d */
+    0, /* putargr_d */
+    0, /* putargi_d */
+    4, /* addr_d */
+    24,        /* addi_d */
+    4, /* subr_d */
+    24,        /* subi_d */
+    24,        /* rsbi_d */
+    4, /* mulr_d */
+    24,        /* muli_d */
+    4, /* divr_d */
+    24,        /* divi_d */
+    8, /* negr_d */
+    8, /* absr_d */
+    4, /* sqrtr_d */
+    16,        /* ltr_d */
+    40,        /* lti_d */
+    16,        /* ler_d */
+    40,        /* lei_d */
+    16,        /* eqr_d */
+    40,        /* eqi_d */
+    16,        /* ger_d */
+    40,        /* gei_d */
+    16,        /* gtr_d */
+    40,        /* gti_d */
+    16,        /* ner_d */
+    40,        /* nei_d */
+    16,        /* unltr_d */
+    40,        /* unlti_d */
+    16,        /* unler_d */
+    40,        /* unlei_d */
+    16,        /* uneqr_d */
+    40,        /* uneqi_d */
+    16,        /* unger_d */
+    40,        /* ungei_d */
+    16,        /* ungtr_d */
+    40,        /* ungti_d */
+    16,        /* ltgtr_d */
+    40,        /* ltgti_d */
+    16,        /* ordr_d */
+    40,        /* ordi_d */
+    16,        /* unordr_d */
+    40,        /* unordi_d */
+    12,        /* truncr_d_i */
+    0, /* truncr_d_l */
+    16,        /* extr_d */
+    4, /* extr_f_d */
+    8, /* movr_d */
+    24,        /* movi_d */
+    4, /* ldr_d */
+    12,        /* ldi_d */
+    4, /* ldxr_d */
+    8, /* ldxi_d */
+    4, /* str_d */
+    12,        /* sti_d */
+    4, /* stxr_d */
+    8, /* stxi_d */
+    12,        /* bltr_d */
+    32,        /* blti_d */
+    12,        /* bler_d */
+    32,        /* blei_d */
+    12,        /* beqr_d */
+    32,        /* beqi_d */
+    12,        /* bger_d */
+    32,        /* bgei_d */
+    12,        /* bgtr_d */
+    32,        /* bgti_d */
+    12,        /* bner_d */
+    36,        /* bnei_d */
+    12,        /* bunltr_d */
+    36,        /* bunlti_d */
+    12,        /* bunler_d */
+    36,        /* bunlei_d */
+    12,        /* buneqr_d */
+    36,        /* buneqi_d */
+    12,        /* bunger_d */
+    36,        /* bungei_d */
+    12,        /* bungtr_d */
+    36,        /* bungti_d */
+    12,        /* bltgtr_d */
+    32,        /* bltgti_d */
+    12,        /* bordr_d */
+    32,        /* bordi_d */
+    12,        /* bunordr_d */
+    36,        /* bunordi_d */
+    0, /* pushargr_d */
+    0, /* pushargi_d */
+    0, /* retr_d */
+    0, /* reti_d */
+    0, /* retval_d */
+    0, /* movr_w_f */
+    0, /* movr_ww_d */
+    0, /* movr_w_d */
+    0, /* movr_f_w */
+    0, /* movi_f_w */
+    0, /* movr_d_ww */
+    0, /* movi_d_ww */
+    0, /* movr_d_w */
+    0, /* movi_d_w */
+#endif /* __WORDSIZE */
+
+#if __WORDSIZE == 64
+#define JIT_INSTR_MAX 64
+    0, /* data */
+    0, /* live */
+    4, /* align */
+    0, /* save */
+    0, /* load */
+    0, /* #name */
+    0, /* #note */
+    4, /* label */
+    36,        /* prolog */
+    0, /* ellipsis */
+    0, /* va_push */
+    0, /* allocai */
+    0, /* allocar */
+    0, /* arg */
+    0, /* getarg_c */
+    0, /* getarg_uc */
+    0, /* getarg_s */
+    0, /* getarg_us */
+    0, /* getarg_i */
+    0, /* getarg_ui */
+    0, /* getarg_l */
+    0, /* putargr */
+    0, /* putargi */
+    4, /* va_start */
+    8, /* va_arg */
+    8, /* va_arg_d */
+    0, /* va_end */
+    4, /* addr */
+    28,        /* addi */
+    24,        /* addcr */
+    48,        /* addci */
+    52,        /* addxr */
+    52,        /* addxi */
+    4, /* subr */
+    28,        /* subi */
+    24,        /* subcr */
+    48,        /* subci */
+    52,        /* subxr */
+    52,        /* subxi */
+    32,        /* rsbi */
+    4, /* mulr */
+    28,        /* muli */
+    48,        /* qmulr */
+    64,        /* qmuli */
+    48,        /* qmulr_u */
+    64,        /* qmuli_u */
+    4, /* divr */
+    28,        /* divi */
+    4, /* divr_u */
+    28,        /* divi_u */
+    20,        /* qdivr */
+    16,        /* qdivi */
+    20,        /* qdivr_u */
+    16,        /* qdivi_u */
+    12,        /* remr */
+    36,        /* remi */
+    12,        /* remr_u */
+    36,        /* remi_u */
+    4, /* andr */
+    28,        /* andi */
+    4, /* orr */
+    28,        /* ori */
+    4, /* xorr */
+    28,        /* xori */
+    4, /* lshr */
+    4, /* lshi */
+    4, /* rshr */
+    4, /* rshi */
+    4, /* rshr_u */
+    4, /* rshi_u */
+    4, /* negr */
+    4, /* comr */
+    16,        /* ltr */
+    16,        /* lti */
+    16,        /* ltr_u */
+    16,        /* lti_u */
+    16,        /* ler */
+    16,        /* lei */
+    16,        /* ler_u */
+    16,        /* lei_u */
+    16,        /* eqr */
+    16,        /* eqi */
+    16,        /* ger */
+    16,        /* gei */
+    16,        /* ger_u */
+    16,        /* gei_u */
+    16,        /* gtr */
+    16,        /* gti */
+    16,        /* gtr_u */
+    16,        /* gti_u */
+    16,        /* ner */
+    16,        /* nei */
+    4, /* movr */
+    24,        /* movi */
+    8, /* extr_c */
+    4, /* extr_uc */
+    8, /* extr_s */
+    8, /* extr_us */
+    8, /* extr_i */
+    8, /* extr_ui */
+    8, /* htonr_us */
+    8, /* htonr_ui */
+    4, /* htonr_ul */
+    4, /* ldr_c */
+    28,        /* ldi_c */
+    4, /* ldr_uc */
+    28,        /* ldi_uc */
+    4, /* ldr_s */
+    28,        /* ldi_s */
+    4, /* ldr_us */
+    28,        /* ldi_us */
+    4, /* ldr_i */
+    28,        /* ldi_i */
+    4, /* ldr_ui */
+    28,        /* ldi_ui */
+    4, /* ldr_l */
+    28,        /* ldi_l */
+    4, /* ldxr_c */
+    24,        /* ldxi_c */
+    4, /* ldxr_uc */
+    24,        /* ldxi_uc */
+    4, /* ldxr_s */
+    24,        /* ldxi_s */
+    4, /* ldxr_us */
+    24,        /* ldxi_us */
+    4, /* ldxr_i */
+    24,        /* ldxi_i */
+    4, /* ldxr_ui */
+    24,        /* ldxi_ui */
+    4, /* ldxr_l */
+    24,        /* ldxi_l */
+    4, /* str_c */
+    28,        /* sti_c */
+    4, /* str_s */
+    28,        /* sti_s */
+    4, /* str_i */
+    28,        /* sti_i */
+    4, /* str_l */
+    28,        /* sti_l */
+    4, /* stxr_c */
+    24,        /* stxi_c */
+    4, /* stxr_s */
+    24,        /* stxi_s */
+    4, /* stxr_i */
+    24,        /* stxi_i */
+    4, /* stxr_l */
+    24,        /* stxi_l */
+    12,        /* bltr */
+    12,        /* blti */
+    12,        /* bltr_u */
+    12,        /* blti_u */
+    12,        /* bler */
+    12,        /* blei */
+    12,        /* bler_u */
+    12,        /* blei_u */
+    12,        /* beqr */
+    36,        /* beqi */
+    12,        /* bger */
+    12,        /* bgei */
+    12,        /* bger_u */
+    12,        /* bgei_u */
+    12,        /* bgtr */
+    12,        /* bgti */
+    12,        /* bgtr_u */
+    12,        /* bgti_u */
+    12,        /* bner */
+    36,        /* bnei */
+    12,        /* bmsr */
+    12,        /* bmsi */
+    12,        /* bmcr */
+    12,        /* bmci */
+    12,        /* boaddr */
+    12,        /* boaddi */
+    12,        /* boaddr_u */
+    12,        /* boaddi_u */
+    12,        /* bxaddr */
+    12,        /* bxaddi */
+    12,        /* bxaddr_u */
+    12,        /* bxaddi_u */
+    12,        /* bosubr */
+    12,        /* bosubi */
+    12,        /* bosubr_u */
+    12,        /* bosubi_u */
+    12,        /* bxsubr */
+    12,        /* bxsubi */
+    12,        /* bxsubr_u */
+    12,        /* bxsubi_u */
+    8, /* jmpr */
+    32,        /* jmpi */
+    8, /* callr */
+    32,        /* calli */
+    0, /* prepare */
+    0, /* pushargr */
+    0, /* pushargi */
+    0, /* finishr */
+    0, /* finishi */
+    0, /* ret */
+    0, /* retr */
+    0, /* reti */
+    0, /* retval_c */
+    0, /* retval_uc */
+    0, /* retval_s */
+    0, /* retval_us */
+    0, /* retval_i */
+    0, /* retval_ui */
+    0, /* retval_l */
+    44,        /* epilog */
+    0, /* arg_f */
+    0, /* getarg_f */
+    0, /* putargr_f */
+    0, /* putargi_f */
+    16,        /* addr_f */
+    40,        /* addi_f */
+    24,        /* subr_f */
+    40,        /* subi_f */
+    40,        /* rsbi_f */
+    16,        /* mulr_f */
+    40,        /* muli_f */
+    16,        /* divr_f */
+    40,        /* divi_f */
+    12,        /* negr_f */
+    12,        /* absr_f */
+    12,        /* sqrtr_f */
+    24,        /* ltr_f */
+    48,        /* lti_f */
+    24,        /* ler_f */
+    48,        /* lei_f */
+    24,        /* eqr_f */
+    48,        /* eqi_f */
+    24,        /* ger_f */
+    48,        /* gei_f */
+    24,        /* gtr_f */
+    48,        /* gti_f */
+    24,        /* ner_f */
+    48,        /* nei_f */
+    24,        /* unltr_f */
+    48,        /* unlti_f */
+    24,        /* unler_f */
+    48,        /* unlei_f */
+    24,        /* uneqr_f */
+    48,        /* uneqi_f */
+    24,        /* unger_f */
+    48,        /* ungei_f */
+    24,        /* ungtr_f */
+    48,        /* ungti_f */
+    24,        /* ltgtr_f */
+    48,        /* ltgti_f */
+    24,        /* ordr_f */
+    48,        /* ordi_f */
+    24,        /* unordr_f */
+    48,        /* unordi_f */
+    16,        /* truncr_f_i */
+    16,        /* truncr_f_l */
+    20,        /* extr_f */
+    12,        /* extr_d_f */
+    16,        /* movr_f */
+    32,        /* movi_f */
+    8, /* ldr_f */
+    32,        /* ldi_f */
+    8, /* ldxr_f */
+    28,        /* ldxi_f */
+    8, /* str_f */
+    32,        /* sti_f */
+    8, /* stxr_f */
+    28,        /* stxi_f */
+    20,        /* bltr_f */
+    44,        /* blti_f */
+    20,        /* bler_f */
+    44,        /* blei_f */
+    28,        /* beqr_f */
+    60,        /* beqi_f */
+    20,        /* bger_f */
+    44,        /* bgei_f */
+    20,        /* bgtr_f */
+    44,        /* bgti_f */
+    20,        /* bner_f */
+    44,        /* bnei_f */
+    20,        /* bunltr_f */
+    44,        /* bunlti_f */
+    20,        /* bunler_f */
+    44,        /* bunlei_f */
+    20,        /* buneqr_f */
+    44,        /* buneqi_f */
+    20,        /* bunger_f */
+    44,        /* bungei_f */
+    20,        /* bungtr_f */
+    44,        /* bungti_f */
+    20,        /* bltgtr_f */
+    44,        /* bltgti_f */
+    20,        /* bordr_f */
+    44,        /* bordi_f */
+    20,        /* bunordr_f */
+    44,        /* bunordi_f */
+    0, /* pushargr_f */
+    0, /* pushargi_f */
+    0, /* retr_f */
+    0, /* reti_f */
+    0, /* retval_f */
+    0, /* arg_d */
+    0, /* getarg_d */
+    0, /* putargr_d */
+    0, /* putargi_d */
+    4, /* addr_d */
+    32,        /* addi_d */
+    4, /* subr_d */
+    32,        /* subi_d */
+    32,        /* rsbi_d */
+    4, /* mulr_d */
+    32,        /* muli_d */
+    4, /* divr_d */
+    32,        /* divi_d */
+    4, /* negr_d */
+    4, /* absr_d */
+    4, /* sqrtr_d */
+    16,        /* ltr_d */
+    48,        /* lti_d */
+    16,        /* ler_d */
+    48,        /* lei_d */
+    16,        /* eqr_d */
+    48,        /* eqi_d */
+    16,        /* ger_d */
+    48,        /* gei_d */
+    16,        /* gtr_d */
+    48,        /* gti_d */
+    16,        /* ner_d */
+    48,        /* nei_d */
+    16,        /* unltr_d */
+    48,        /* unlti_d */
+    16,        /* unler_d */
+    48,        /* unlei_d */
+    16,        /* uneqr_d */
+    48,        /* uneqi_d */
+    16,        /* unger_d */
+    48,        /* ungei_d */
+    16,        /* ungtr_d */
+    48,        /* ungti_d */
+    16,        /* ltgtr_d */
+    48,        /* ltgti_d */
+    16,        /* ordr_d */
+    48,        /* ordi_d */
+    16,        /* unordr_d */
+    48,        /* unordi_d */
+    16,        /* truncr_d_i */
+    12,        /* truncr_d_l */
+    12,        /* extr_d */
+    8, /* extr_f_d */
+    4, /* movr_d */
+    32,        /* movi_d */
+    4, /* ldr_d */
+    28,        /* ldi_d */
+    4, /* ldxr_d */
+    24,        /* ldxi_d */
+    4, /* str_d */
+    28,        /* sti_d */
+    4, /* stxr_d */
+    24,        /* stxi_d */
+    12,        /* bltr_d */
+    40,        /* blti_d */
+    12,        /* bler_d */
+    40,        /* blei_d */
+    12,        /* beqr_d */
+    40,        /* beqi_d */
+    12,        /* bger_d */
+    40,        /* bgei_d */
+    12,        /* bgtr_d */
+    40,        /* bgti_d */
+    12,        /* bner_d */
+    44,        /* bnei_d */
+    12,        /* bunltr_d */
+    44,        /* bunlti_d */
+    12,        /* bunler_d */
+    44,        /* bunlei_d */
+    12,        /* buneqr_d */
+    44,        /* buneqi_d */
+    12,        /* bunger_d */
+    44,        /* bungei_d */
+    12,        /* bungtr_d */
+    44,        /* bungti_d */
+    12,        /* bltgtr_d */
+    40,        /* bltgti_d */
+    12,        /* bordr_d */
+    40,        /* bordi_d */
+    12,        /* bunordr_d */
+    44,        /* bunordi_d */
+    0, /* pushargr_d */
+    0, /* pushargi_d */
+    0, /* retr_d */
+    0, /* reti_d */
+    0, /* retval_d */
+    0, /* movr_w_f */
+    0, /* movr_ww_d */
+    0, /* movr_w_d */
+    0, /* movr_f_w */
+    0, /* movi_f_w */
+    0, /* movr_d_ww */
+    0, /* movi_d_ww */
+    0, /* movr_d_w */
+    0, /* movi_d_w */
+#endif /* __WORDSIZE */
diff --git a/deps/lightning/lib/jit_sparc.c b/deps/lightning/lib/jit_sparc.c
new file mode 100644 (file)
index 0000000..158c09d
--- /dev/null
@@ -0,0 +1,1924 @@
+/*
+ * Copyright (C) 2013-2019  Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+#define jit_arg_reg_p(i)               ((i) >= 0 && (i) < 6)
+#if __WORDSIZE == 32
+#  define jit_arg_d_reg_p(i)           ((i) >= 0 && (i) < 5)
+#  define BIAS(n)                      (n)
+#else
+#  define jit_arg_d_reg_p(i)           ((i) >= 0 && (i) < 16)
+#  define BIAS(n)                      ((n) + 2047)
+#endif
+
+/*
+ * Types
+ */
+typedef jit_pointer_t  jit_va_list_t;
+
+/*
+ * Prototypes
+ */
+#define patch(instr, node)             _patch(_jit, instr, node)
+static void _patch(jit_state_t*,jit_word_t,jit_node_t*);
+
+#define PROTO                          1
+#  include "jit_sparc-cpu.c"
+#  include "jit_sparc-fpu.c"
+#undef PROTO
+
+/*
+ * Initialization
+ */
+jit_register_t         _rvs[] = {
+    { 0x00,                            "%g0" },
+    { 0x01,                            "%g1" },
+    { rc(gpr) | 0x02,                  "%g2" },
+    { rc(gpr) | 0x03,                  "%g3" },
+    { rc(gpr) | 0x04,                  "%g4" },
+    { 0x05,                            "%g5" },
+    { 0x06,                            "%g6" },
+    { 0x07,                            "%g7" },
+    { rc(arg) | rc(gpr) | 0x08,                "%o0" },
+    { rc(arg) | rc(gpr) | 0x09,                "%o1" },
+    { rc(arg) | rc(gpr) | 0x0a,                "%o2" },
+    { rc(arg) | rc(gpr) | 0x0b,                "%o3" },
+    { rc(arg) | rc(gpr) | 0x0c,                "%o4" },
+    { rc(arg) | rc(gpr) | 0x0d,                "%o5" },
+    { rc(sav) | 0x0e,                  "%sp" },
+    { 0x0f,                            "%o7" },
+    { rc(sav) | rc(gpr) | 0x10,                "%l0" },
+    { rc(sav) | rc(gpr) | 0x11,                "%l1" },
+    { rc(sav) | rc(gpr) | 0x12,                "%l2" },
+    { rc(sav) | rc(gpr) | 0x13,                "%l3" },
+    { rc(sav) | rc(gpr) | 0x14,                "%l4" },
+    { rc(sav) | rc(gpr) | 0x15,                "%l5" },
+    { rc(sav) | rc(gpr) | 0x16,                "%l6" },
+    { rc(sav) | rc(gpr) | 0x17,                "%l7" },
+    { 0x18,                            "%i0" },
+    { 0x19,                            "%i1" },
+    { 0x1a,                            "%i2" },
+    { 0x1b,                            "%i3" },
+    { 0x1c,                            "%i4" },
+    { 0x1d,                            "%i5" },
+    { rc(sav) | 0x1e,                  "%fp" },
+    { 0x1f,                            "%i7" },
+#  if __WORDSIZE == 32
+    { rc(fpr) | 0x00,                  "%f0" },
+    { 0x01,                            "%f1" },
+    { rc(fpr) | 0x02,                  "%f2" },
+    { 0x03,                            "%f3" },
+    { rc(fpr) | 0x04,                  "%f4" },
+    { 0x05,                            "%f5" },
+    { rc(fpr) | 0x06,                  "%f6" },
+    { 0x07,                            "%f7" },
+    { rc(fpr) | 0x08,                  "%f8" },
+    { 0x09,                            "%f9" },
+    { rc(fpr) | 0x0a,                  "%f10" },
+    { 0x0b,                            "%f11" },
+    { rc(fpr) | 0x0c,                  "%f12" },
+    { 0x0d,                            "%f13" },
+    { rc(fpr) | 0x0e,                  "%f14" },
+    { 0x0f,                            "%f15" },
+#  else
+    { rc(fpr) | rc(dbl) | 0x3e,                "%f62" },
+    { rc(fpr) | rc(dbl) | 0x3c,                "%f60" },
+    { rc(fpr) | rc(dbl) | 0x3a,                "%f58" },
+    { rc(fpr) | rc(dbl) | 0x38,                "%f56" },
+    { rc(fpr) | rc(dbl) | 0x36,                "%f54" },
+    { rc(fpr) | rc(dbl) | 0x34,                "%f52" },
+    { rc(fpr) | rc(dbl) | 0x32,                "%f50" },
+    { rc(fpr) | rc(dbl) | 0x30,                "%f48" },
+    { rc(fpr) | rc(dbl) | 0x2e,                "%f46" },
+    { rc(fpr) | rc(dbl) | 0x2c,                "%f44" },
+    { rc(fpr) | rc(dbl) | 0x2a,                "%f42" },
+    { rc(fpr) | rc(dbl) | 0x28,                "%f40" },
+    { rc(fpr) | rc(dbl) | 0x26,                "%f38" },
+    { rc(fpr) | rc(dbl) | 0x24,                "%f36" },
+    { rc(fpr) | rc(dbl) | 0x22,                "%f34" },
+    { rc(fpr) | rc(dbl) | 0x20,                "%f32" },
+    { 0x1f,                            "%f31" },
+    { rc(arg)|rc(fpr)|rc(sng)|0x1e,    "%f30" },
+    { 0x1d,                            "%f29" },
+    { rc(arg)|rc(fpr)|rc(sng)|0x1c,    "%f28" },
+    { 0x1b,                            "%f27" },
+    { rc(arg)|rc(fpr)|rc(sng)|0x1a,    "%f26" },
+    { 0x19,                            "%f25" },
+    { rc(arg)|rc(fpr)|rc(sng)|0x18,    "%f24" },
+    { 0x17,                            "%f23" },
+    { rc(arg)|rc(fpr)|rc(sng)|0x16,    "%f22" },
+    { 0x15,                            "%f21" },
+    { rc(arg)|rc(fpr)|rc(sng)|0x14,    "%f20" },
+    { 0x13,                            "%f19" },
+    { rc(arg)|rc(fpr)|rc(sng)|0x12,    "%f18" },
+    { 0x11,                            "%f17" },
+    { rc(arg)|rc(fpr)|rc(sng)|0x10,    "%f16" },
+    { 0x0f,                            "%f15" },
+    { rc(arg)|rc(fpr)|rc(sng)|0x0e,    "%f14" },
+    { 0x0d,                            "%f13" },
+    { rc(arg)|rc(fpr)|rc(sng)|0x0c,    "%f12" },
+    { 0x0b,                            "%f11" },
+    { rc(arg)|rc(fpr)|rc(sng)|0x0a,    "%f10" },
+    { 0x09,                            "%f9" },
+    { rc(arg)|rc(fpr)|rc(sng)|0x08,    "%f8" },
+    { 0x07,                            "%f7" },
+    { rc(arg)|rc(fpr)|rc(sng)|0x06,    "%f6" },
+    { 0x05,                            "%f5" },
+    { rc(arg)|rc(fpr)|rc(sng)|0x04,    "%f4" },
+    { 0x03,                            "%f3" },
+    { rc(arg)|rc(fpr)|rc(sng)|0x02,    "%f2" },
+    { 0x01,                            "%f1" },
+    { rc(arg)|rc(fpr)|rc(sng)|0x00,    "%f0" },
+#  endif
+    { _NOREG,                          "<none>" },
+};
+
+/*
+ * Implementation
+ */
+void
+jit_get_cpu(void)
+{
+}
+
+void
+_jit_init(jit_state_t *_jit)
+{
+    _jitc->reglen = jit_size(_rvs) - 1;
+#  if __WORDSIZE == 64
+    jit_carry = _NOREG;
+#  endif
+}
+
+void
+_jit_prolog(jit_state_t *_jit)
+{
+    jit_int32_t                 offset;
+
+    if (_jitc->function)
+       jit_epilog();
+    assert(jit_regset_cmp_ui(&_jitc->regarg, 0) == 0);
+    jit_regset_set_ui(&_jitc->regsav, 0);
+    offset = _jitc->functions.offset;
+    if (offset >= _jitc->functions.length) {
+       jit_realloc((jit_pointer_t *)&_jitc->functions.ptr,
+                   _jitc->functions.length * sizeof(jit_function_t),
+                   (_jitc->functions.length + 16) * sizeof(jit_function_t));
+       _jitc->functions.length += 16;
+    }
+    _jitc->function = _jitc->functions.ptr + _jitc->functions.offset++;
+    _jitc->function->self.size = stack_framesize;
+    _jitc->function->self.argi = _jitc->function->self.argf =
+       _jitc->function->self.aoff = _jitc->function->self.alen = 0;
+    /* float conversion */
+#  if __WORDSIZE == 32
+    _jitc->function->self.aoff = -8;
+#  else
+    /* extra slots in case qmul is called */
+    _jitc->function->self.aoff = -24;
+#  endif
+     _jitc->function->self.call = jit_call_default;
+    jit_alloc((jit_pointer_t *)&_jitc->function->regoff,
+             _jitc->reglen * sizeof(jit_int32_t));
+
+    /* _no_link here does not mean the jit_link() call can be removed
+     * by rewriting as:
+     * _jitc->function->prolog = jit_new_node(jit_code_prolog);
+     */
+    _jitc->function->prolog = jit_new_node_no_link(jit_code_prolog);
+    jit_link(_jitc->function->prolog);
+    _jitc->function->prolog->w.w = offset;
+    _jitc->function->epilog = jit_new_node_no_link(jit_code_epilog);
+    /* u:      label value
+     * v:      offset in blocks vector
+     * w:      offset in functions vector
+     */
+    _jitc->function->epilog->w.w = offset;
+
+    jit_regset_new(&_jitc->function->regset);
+}
+
+jit_int32_t
+_jit_allocai(jit_state_t *_jit, jit_int32_t length)
+{
+    assert(_jitc->function);
+    switch (length) {
+       case 0: case 1:                                         break;
+       case 2:         _jitc->function->self.aoff &= -2;       break;
+       case 3: case 4: _jitc->function->self.aoff &= -4;       break;
+       default:        _jitc->function->self.aoff &= -8;       break;
+    }
+    _jitc->function->self.aoff -= length;
+    if (!_jitc->realize) {
+       jit_inc_synth_ww(allocai, _jitc->function->self.aoff, length);
+       jit_dec_synth();
+    }
+    return (BIAS(_jitc->function->self.aoff));
+}
+
+void
+_jit_allocar(jit_state_t *_jit, jit_int32_t u, jit_int32_t v)
+{
+    jit_int32_t                 reg;
+    assert(_jitc->function);
+    jit_inc_synth_ww(allocar, u, v);
+    if (!_jitc->function->allocar) {
+       _jitc->function->aoffoff = jit_allocai(sizeof(jit_int32_t));
+       _jitc->function->allocar = 1;
+    }
+    reg = jit_get_reg(jit_class_gpr);
+    jit_negr(reg, v);
+    jit_andi(reg, reg, -16);
+    jit_ldxi_i(u, JIT_FP, _jitc->function->aoffoff);
+    jit_addr(u, u, reg);
+    jit_addr(_SP, _SP, reg);
+    jit_stxi_i(_jitc->function->aoffoff, JIT_FP, u);
+    jit_unget_reg(reg);
+    jit_dec_synth();
+}
+
+void
+_jit_ret(jit_state_t *_jit)
+{
+    jit_node_t         *instr;
+    assert(_jitc->function);
+    jit_inc_synth(ret);
+    /* jump to epilog */
+    instr = jit_jmpi();
+    jit_patch_at(instr, _jitc->function->epilog);
+    jit_dec_synth();
+}
+
+void
+_jit_retr(jit_state_t *_jit, jit_int32_t u)
+{
+    jit_inc_synth_w(retr, u);
+    if (JIT_RET != u)
+       jit_movr(JIT_RET, u);
+    jit_live(JIT_RET);
+    jit_ret();
+    jit_dec_synth();
+}
+
+void
+_jit_reti(jit_state_t *_jit, jit_word_t u)
+{
+    jit_inc_synth_w(reti, u);
+    jit_movi(JIT_RET, u);
+    jit_ret();
+    jit_dec_synth();
+}
+
+void
+_jit_retr_f(jit_state_t *_jit, jit_int32_t u)
+{
+    jit_inc_synth_w(retr_f, u);
+    if (JIT_FRET != u)
+       jit_movr_f(JIT_FRET, u);
+    else
+       jit_live(JIT_FRET);
+    jit_ret();
+    jit_dec_synth();
+}
+
+void
+_jit_reti_f(jit_state_t *_jit, jit_float32_t u)
+{
+    jit_inc_synth_f(reti_f, u);
+    jit_movi_f(JIT_FRET, u);
+    jit_ret();
+    jit_dec_synth();
+}
+
+void
+_jit_retr_d(jit_state_t *_jit, jit_int32_t u)
+{
+    jit_inc_synth_w(retr_d, u);
+    if (JIT_FRET != u)
+       jit_movr_d(JIT_FRET, u);
+    else
+       jit_live(JIT_FRET);
+    jit_ret();
+    jit_dec_synth();
+}
+
+void
+_jit_reti_d(jit_state_t *_jit, jit_float64_t u)
+{
+    jit_inc_synth_d(reti_d, u);
+    jit_movi_d(JIT_FRET, u);
+    jit_ret();
+    jit_dec_synth();
+}
+
+void
+_jit_epilog(jit_state_t *_jit)
+{
+    assert(_jitc->function);
+    assert(_jitc->function->epilog->next == NULL);
+    jit_link(_jitc->function->epilog);
+    _jitc->function = NULL;
+}
+
+jit_bool_t
+_jit_arg_register_p(jit_state_t *_jit, jit_node_t *u)
+{
+#  if __WORDSIZE == 32
+    if (u->code == jit_code_arg || u->code == jit_code_arg_f)
+       return (jit_arg_reg_p(u->u.w));
+    assert(u->code == jit_code_arg_d);
+    return (jit_arg_d_reg_p(u->u.w));
+#  else
+    if (u->code == jit_code_arg)
+       return (jit_arg_reg_p(u->u.w));
+    assert(u->code == jit_code_arg_d || u->code == jit_code_arg_f);
+    return (jit_arg_d_reg_p(u->u.w));
+#  endif
+}
+
+void
+_jit_ellipsis(jit_state_t *_jit)
+{
+    jit_inc_synth(ellipsis);
+    if (_jitc->prepare) {
+       jit_link_prepare();
+       assert(!(_jitc->function->call.call & jit_call_varargs));
+       _jitc->function->call.call |= jit_call_varargs;
+    }
+    else {
+       jit_link_prolog();
+       assert(!(_jitc->function->self.call & jit_call_varargs));
+       _jitc->function->self.call |= jit_call_varargs;
+
+       _jitc->function->vagp = _jitc->function->self.argi;
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_va_push(jit_state_t *_jit, jit_int32_t u)
+{
+    jit_inc_synth_w(va_push, u);
+    jit_pushargr(u);
+    jit_dec_synth();
+}
+
+jit_node_t *
+_jit_arg(jit_state_t *_jit)
+{
+    jit_node_t         *node;
+    jit_int32_t                 offset;
+    assert(_jitc->function);
+    if (jit_arg_reg_p(_jitc->function->self.argi))
+       offset = _jitc->function->self.argi++;
+    else {
+#  if __WORDSIZE == 64
+       if (jit_arg_d_reg_p(_jitc->function->self.argi))
+           ++_jitc->function->self.argi;
+#   endif
+       offset = BIAS(_jitc->function->self.size);
+       _jitc->function->self.size += sizeof(jit_word_t);
+    }
+    node = jit_new_node_ww(jit_code_arg, offset,
+                          ++_jitc->function->self.argn);
+    jit_link_prolog();
+    return (node);
+}
+
+jit_node_t *
+_jit_arg_f(jit_state_t *_jit)
+{
+    jit_node_t         *node;
+    jit_int32_t                 offset;
+#  if __WORDSIZE == 64
+    jit_bool_t          inc;
+#  endif
+    assert(_jitc->function);
+#  if __WORDSIZE == 32
+    if (jit_arg_reg_p(_jitc->function->self.argi))
+       offset = _jitc->function->self.argi++;
+    else {
+       offset = _jitc->function->self.size;
+       _jitc->function->self.size += sizeof(jit_word_t);
+    }
+#  else
+    inc = !jit_arg_reg_p(_jitc->function->self.argi);
+    if (jit_arg_d_reg_p(_jitc->function->self.argi))
+       offset = _jitc->function->self.argi++;
+    else
+       offset = BIAS(_jitc->function->self.size);
+    if (inc)
+       _jitc->function->self.size += sizeof(jit_word_t);
+#  endif
+    node = jit_new_node_ww(jit_code_arg_f, offset,
+                          ++_jitc->function->self.argn);
+    jit_link_prolog();
+    return (node);
+}
+
+jit_node_t *
+_jit_arg_d(jit_state_t *_jit)
+{
+    jit_node_t         *node;
+    jit_int32_t                 offset;
+#  if __WORDSIZE == 64
+    jit_bool_t          inc;
+#  endif
+    assert(_jitc->function);
+#  if __WORDSIZE == 32
+    if (jit_arg_d_reg_p(_jitc->function->self.argi)) {
+       offset = _jitc->function->self.argi;
+       _jitc->function->self.argi += 2;
+    }
+    else if (jit_arg_reg_p(_jitc->function->self.argi)) {
+       offset = _jitc->function->self.argi++;
+       _jitc->function->self.size += sizeof(jit_float32_t);
+    }
+    else {
+       offset = _jitc->function->self.size;
+       _jitc->function->self.size += sizeof(jit_float64_t);
+    }
+#  else
+    inc = !jit_arg_reg_p(_jitc->function->self.argi);
+    if (jit_arg_d_reg_p(_jitc->function->self.argi))
+       offset = _jitc->function->self.argi++;
+    else
+       offset = BIAS(_jitc->function->self.size);
+    if (inc)
+       _jitc->function->self.size += sizeof(jit_word_t);
+#  endif
+    node = jit_new_node_ww(jit_code_arg_d, offset,
+                          ++_jitc->function->self.argn);
+    jit_link_prolog();
+    return (node);
+}
+
+void
+_jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_c, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_extr_c(u, _I0 + v->u.w);
+    else
+       jit_ldxi_c(u, JIT_FP,
+                  v->u.w + (__WORDSIZE >> 3) - sizeof(jit_int8_t));
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_uc, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_extr_uc(u, _I0 + v->u.w);
+    else
+       jit_ldxi_uc(u, JIT_FP,
+                   v->u.w + (__WORDSIZE >> 3) - sizeof(jit_uint8_t));
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_s, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_extr_s(u, _I0 + v->u.w);
+    else
+       jit_ldxi_s(u, JIT_FP,
+                  v->u.w + (__WORDSIZE >> 3) - sizeof(jit_int16_t));
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_us, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_extr_us(u, _I0 + v->u.w);
+    else
+       jit_ldxi_us(u, JIT_FP,
+                   v->u.w + (__WORDSIZE >> 3) - sizeof(jit_uint16_t));
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_i, u, v);
+    if (jit_arg_reg_p(v->u.w)) {
+#  if __WORDSIZE == 64
+       jit_extr_i(u, _I0 + v->u.w);
+#  else
+       jit_movr(u, _I0 + v->u.w);
+#  endif
+    }
+    else
+       jit_ldxi_i(u, JIT_FP,
+                  v->u.w + (__WORDSIZE >> 3) - sizeof(jit_int32_t));
+    jit_dec_synth();
+}
+
+#  if __WORDSIZE == 64
+void
+_jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_i, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_extr_ui(u, _I0 + v->u.w);
+    else
+       jit_ldxi_ui(u, JIT_FP,
+                   v->u.w + (__WORDSIZE >> 3) - sizeof(jit_int32_t));
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_i, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_movr(u, _I0 + v->u.w);
+    else
+       jit_ldxi_l(u, JIT_FP, v->u.w);
+    jit_dec_synth();
+}
+#  endif
+
+void
+_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(putargr, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_movr(_I0 + v->u.w, u);
+    else
+       jit_stxi(v->u.w, JIT_FP, u);
+    jit_dec_synth();
+}
+
+void
+_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v)
+{
+    jit_int32_t                regno;
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(putargi, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_movi(_I0 + v->u.w, u);
+    else {
+       regno = jit_get_reg(jit_class_gpr);
+       jit_movi(regno, u);
+       jit_stxi(v->u.w, JIT_FP, regno);
+       jit_unget_reg(regno);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg_f);
+    assert(_jitc->function);
+    jit_inc_synth_wp(getarg_f, u, v);
+#  if __WORDSIZE == 32
+    if (jit_arg_reg_p(v->u.w)) {
+       jit_stxi_i(-4, JIT_FP, _I0 + v->u.w);
+       jit_ldxi_f(u, JIT_FP, -4);
+    }
+#  else
+    if (jit_arg_d_reg_p(v->u.w)) {
+       jit_live(_F0 - (v->u.w << 1));  /* pair of registers is live */
+       jit_movr_f(u, (_F0 - (v->u.w << 1)) - 1);
+    }
+#  endif
+    else
+       jit_ldxi_f(u, JIT_FP, v->u.w + (__WORDSIZE >> 3) -
+                  sizeof(jit_float32_t));
+    jit_dec_synth();
+}
+
+void
+_jit_putargr_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg_f);
+    jit_inc_synth_wp(putargr_f, u, v);
+#  if __WORDSIZE == 32
+    if (jit_arg_reg_p(v->u.w)) {
+       jit_stxi_f(-4, JIT_FP, u);
+       jit_ldxi_i(_I0 + v->u.w, JIT_FP, -4);
+    }
+#  else
+    if (jit_arg_d_reg_p(v->u.w)) {
+       jit_live(_F0 - (v->u.w << 1));  /* pair of registers is live */
+       jit_movr_f((_F0 - (v->u.w << 1)) - 1, u);
+    }
+#  endif
+    else
+       jit_stxi_f(v->u.w + (__WORDSIZE >> 3) -
+                  sizeof(jit_float32_t), JIT_FP, u);
+    jit_dec_synth();
+}
+
+void
+_jit_putargi_f(jit_state_t *_jit, jit_float32_t u, jit_node_t *v)
+{
+    jit_int32_t                regno;
+    assert(v->code == jit_code_arg_f);
+    jit_inc_synth_fp(putargi_f, u, v);
+#  if __WORDSIZE == 32
+    regno = jit_get_reg(jit_class_fpr);
+    jit_movi_f(regno, u);
+    if (jit_arg_reg_p(v->u.w)) {
+       jit_stxi_f(-4, JIT_FP, regno);
+       jit_ldxi_i(_I0 + v->u.w, JIT_FP, -4);
+    }
+    else
+       jit_stxi_f(v->u.w, JIT_FP, regno);
+    jit_unget_reg(regno);
+#  else
+    if (jit_arg_d_reg_p(v->u.w)) {
+       jit_live(_F0 - (v->u.w << 1));  /* pair of registers is live */
+       jit_movi_f((_F0 - (v->u.w << 1)) - 1, u);
+    }
+    else {
+       regno = jit_get_reg(jit_class_fpr | jit_class_sng);
+       jit_movi_f(regno, u);
+       jit_stxi_f(v->u.w + (__WORDSIZE >> 3) -
+                  sizeof(jit_float32_t), JIT_FP, regno);
+       jit_unget_reg(regno);
+    }
+#  endif
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg_d);
+    assert(_jitc->function);
+    jit_inc_synth_wp(getarg_d, u, v);
+    if (jit_arg_d_reg_p(v->u.w)) {
+#  if __WORDSIZE == 32
+       jit_stxi(-8, JIT_FP, _I0 + v->u.w);
+       jit_stxi(-4, JIT_FP, _I0 + v->u.w + 1);
+       jit_ldxi_d(u, JIT_FP, -8);
+#  else
+       jit_movr_d(u, _F0 - (v->u.w << 1));
+#  endif
+    }
+#  if __WORDSIZE == 32
+    else if (jit_arg_reg_p(v->u.w)) {
+       jit_stxi(-8, JIT_FP, _I0 + v->u.w);
+       jit_ldxi_f(u, JIT_FP, -8);
+       jit_ldxi_f(u + 1, JIT_FP, stack_framesize);
+    }
+#  endif
+    else {
+#  if __WORDSIZE == 32
+       jit_ldxi_f(u, JIT_FP, v->u.w);
+       jit_ldxi_f(u + 1, JIT_FP, v->u.w + 4);
+#  else
+       jit_ldxi_d(u, JIT_FP, v->u.w);
+#  endif
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_putargr_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+   jit_int32_t         regno;
+    assert(v->code == jit_code_arg_d);
+    jit_inc_synth_wp(putargr_d, u, v);
+#  if __WORDSIZE == 32
+    if (jit_arg_d_reg_p(v->u.w)) {
+       jit_stxi_d(-8, JIT_FP, u);
+       jit_ldxi(_I0 + v->u.w, JIT_FP, -8);
+       jit_ldxi(_I0 + v->u.w + 1, JIT_FP, -4);
+    }
+    else if (jit_arg_reg_p(v->u.w)) {
+       regno = jit_get_reg(jit_class_gpr);
+       jit_stxi_d(-8, JIT_FP, u);
+       jit_ldxi(_I0 + v->u.w, JIT_FP, -8);
+       jit_ldxi(regno, JIT_FP, -4);
+       jit_stxi(stack_framesize, JIT_FP, regno);
+       jit_unget_reg(regno);
+    }
+    else if ((v->u.w & 7) == 0)
+       jit_stxi_d(v->u.w, JIT_FP, u);
+    else {
+       jit_stxi_d(-8, JIT_FP, u);
+       regno = jit_get_reg(jit_class_gpr);
+       jit_ldxi(regno, JIT_FP, -8);
+       jit_stxi(v->u.w, JIT_FP, regno);
+       jit_ldxi(regno, JIT_FP, -4);
+       jit_stxi(v->u.w + 4, JIT_FP, regno);
+       jit_unget_reg(regno);
+    }
+#  else
+    if (jit_arg_d_reg_p(v->u.w))
+       jit_movr_d(_F0 - (v->u.w << 1), u);
+    else
+       jit_stxi_d(v->u.w, JIT_FP, u);
+#  endif
+    jit_dec_synth();
+}
+
+void
+_jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v)
+{
+#  if __WORDSIZE == 32
+    jit_int32_t                gpr;
+#  endif
+   jit_int32_t         regno;
+    assert(v->code == jit_code_arg_d);
+    jit_inc_synth_dp(putargi_d, u, v);
+#  if __WORDSIZE == 32
+    regno = jit_get_reg(jit_class_fpr);
+    jit_movi_d(regno, u);
+    if (jit_arg_d_reg_p(v->u.w)) {
+       jit_stxi_d(-8, JIT_FP, regno);
+       jit_ldxi(_I0 + v->u.w, JIT_FP, -8);
+       jit_ldxi(_I0 + v->u.w + 1, JIT_FP, -4);
+    }
+    else if (jit_arg_reg_p(v->u.w)) {
+       gpr = jit_get_reg(jit_class_gpr);
+       jit_stxi_d(-8, JIT_FP, regno);
+       jit_ldxi(_I0 + v->u.w, JIT_FP, -8);
+       jit_ldxi(gpr, JIT_FP, -4);
+       jit_stxi(stack_framesize, JIT_FP, gpr);
+       jit_unget_reg(gpr);
+    }
+    else if ((v->u.w & 7) == 0)
+       jit_stxi_d(v->u.w, JIT_FP, regno);
+    else {
+       jit_stxi_d(-8, JIT_FP, regno);
+       gpr = jit_get_reg(jit_class_gpr);
+       jit_ldxi(gpr, JIT_FP, -8);
+       jit_stxi(v->u.w, JIT_FP, gpr);
+       jit_ldxi(gpr, JIT_FP, -4);
+       jit_stxi(v->u.w + 4, JIT_FP, gpr);
+       jit_unget_reg(gpr);
+    }
+    jit_unget_reg(regno);
+#  else
+    if (jit_arg_d_reg_p(v->u.w))
+       jit_movi_d(_F0 - (v->u.w << 1), u);
+    else {
+       regno = jit_get_reg(jit_class_fpr | jit_class_dbl);
+       jit_movi_d(regno, u);
+       jit_stxi_d(v->u.w, JIT_FP, regno);
+       jit_unget_reg(regno);
+    }
+#  endif
+    jit_dec_synth();
+}
+
+void
+_jit_pushargr(jit_state_t *_jit, jit_int32_t u)
+{
+    jit_inc_synth_w(pushargr, u);
+    jit_link_prepare();
+    if (jit_arg_reg_p(_jitc->function->call.argi)) {
+       jit_movr(_O0 + _jitc->function->call.argi, u);
+       ++_jitc->function->call.argi;
+    }
+    else {
+#  if __WORDSIZE == 64
+       if (jit_arg_d_reg_p(_jitc->function->call.argi))
+           ++_jitc->function->call.argi;
+#  endif
+       jit_stxi(BIAS(_jitc->function->call.size + stack_framesize),
+                JIT_SP, u);
+       _jitc->function->call.size += sizeof(jit_word_t);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_pushargi(jit_state_t *_jit, jit_word_t u)
+{
+    jit_int32_t                regno;
+    jit_inc_synth_w(pushargi, u);
+    jit_link_prepare();
+    if (jit_arg_reg_p(_jitc->function->call.argi)) {
+       jit_movi(_O0 + _jitc->function->call.argi, u);
+       ++_jitc->function->call.argi;
+    }
+    else {
+#  if __WORDSIZE == 64
+       if (jit_arg_d_reg_p(_jitc->function->call.argi))
+           ++_jitc->function->call.argi;
+#  endif
+       regno = jit_get_reg(jit_class_gpr);
+       jit_movi(regno, u);
+       jit_stxi(BIAS(_jitc->function->call.size + stack_framesize),
+                JIT_SP, regno);
+       jit_unget_reg(regno);
+       _jitc->function->call.size += sizeof(jit_word_t);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_pushargr_f(jit_state_t *_jit, jit_int32_t u)
+{
+    jit_inc_synth_w(pushargr_f, u);
+    jit_link_prepare();
+#  if __WORDSIZE == 32
+    if (jit_arg_reg_p(_jitc->function->call.argi)) {
+       jit_stxi_f(-8, JIT_FP, u);
+       jit_ldxi(_O0 + _jitc->function->call.argi, JIT_FP, -8);
+       ++_jitc->function->call.argi;
+    }
+    else {
+       jit_stxi_f(_jitc->function->call.size + stack_framesize,
+                  JIT_SP, u);
+       _jitc->function->call.size += sizeof(jit_float32_t);
+    }
+#  else
+    if ((_jitc->function->call.call & jit_call_varargs) &&
+       jit_arg_reg_p(_jitc->function->call.argi)) {
+       jit_stxi_f(BIAS(-8), JIT_FP, u);
+       jit_ldxi_i(_O0 + _jitc->function->call.argi, JIT_FP, BIAS(-8));
+       ++_jitc->function->call.argi;
+    }
+    else if (!(_jitc->function->call.call & jit_call_varargs) &&
+            jit_arg_d_reg_p(_jitc->function->call.argi)) {
+       /* pair of registers is live */
+       jit_live(_F0 - (_jitc->function->call.argi << 1));
+       jit_movr_f((_F0 - (_jitc->function->call.argi << 1)) - 1, u);
+       if (!jit_arg_reg_p(_jitc->function->call.argi))
+           _jitc->function->call.size += sizeof(jit_float64_t);
+       ++_jitc->function->call.argi;
+    }
+    else {
+       jit_stxi_f(BIAS(_jitc->function->call.size + stack_framesize + 4),
+                  JIT_SP, u);
+       _jitc->function->call.size += sizeof(jit_float64_t);
+    }
+#  endif
+    jit_dec_synth();
+}
+
+void
+_jit_pushargi_f(jit_state_t *_jit, jit_float32_t u)
+{
+    jit_int32_t                regno;
+    jit_inc_synth_f(pushargi_f, u);
+    jit_link_prepare();
+#  if __WORDSIZE == 32
+    regno = jit_get_reg(jit_class_fpr);
+    jit_movi_f(regno, u);
+    if (jit_arg_reg_p(_jitc->function->call.argi)) {
+       jit_stxi_f(-8, JIT_FP, regno);
+       jit_ldxi(_O0 + _jitc->function->call.argi, JIT_FP, -8);
+       _jitc->function->call.argi++;
+    }
+    else {
+       jit_stxi_f(_jitc->function->call.size + stack_framesize,
+                  JIT_SP, regno);
+       _jitc->function->call.size += sizeof(jit_float32_t);
+    }
+    jit_unget_reg(regno);
+#  else
+    if ((_jitc->function->call.call & jit_call_varargs) &&
+       jit_arg_reg_p(_jitc->function->call.argi)) {
+       regno = jit_get_reg(jit_class_fpr | jit_class_sng);
+       jit_movi_f(regno, u);
+       jit_stxi_f(BIAS(-8), JIT_FP, regno);
+       jit_ldxi_i(_O0 + _jitc->function->call.argi, JIT_FP, BIAS(-8));
+       ++_jitc->function->call.argi;
+       jit_unget_reg(regno);
+    }
+    else if (!(_jitc->function->call.call & jit_call_varargs) &&
+            jit_arg_d_reg_p(_jitc->function->call.argi)) {
+       /* pair of registers is live */
+       jit_live(_F0 - (_jitc->function->call.argi << 1));
+       jit_movi_f((_F0 - (_jitc->function->call.argi << 1)) - 1, u);
+       if (!jit_arg_reg_p(_jitc->function->call.argi))
+           _jitc->function->call.size += sizeof(jit_float64_t);
+       ++_jitc->function->call.argi;
+    }
+    else {
+       regno = jit_get_reg(jit_class_fpr | jit_class_sng);
+       jit_movi_f(regno, u);
+       jit_stxi_f(BIAS(_jitc->function->call.size + stack_framesize + 4),
+                  JIT_SP, regno);
+       jit_unget_reg(regno);
+       _jitc->function->call.size += sizeof(jit_float64_t);
+    }
+#  endif
+    jit_dec_synth();
+}
+
+void
+_jit_pushargr_d(jit_state_t *_jit, jit_int32_t u)
+{
+    jit_inc_synth_w(pushargr_d, u);
+    jit_link_prepare();
+#  if __WORDSIZE == 32
+    if (jit_arg_d_reg_p(_jitc->function->call.argi)) {
+       jit_stxi_d(BIAS(-8), JIT_FP, u);
+       jit_ldxi(_O0 + _jitc->function->call.argi, JIT_FP, BIAS(-8));
+       jit_ldxi(_O0 + _jitc->function->call.argi + 1, JIT_FP, -4);
+       _jitc->function->call.argi += 2;
+    }
+    else if (jit_arg_reg_p(_jitc->function->call.argi)) {
+       jit_stxi_f(-8, JIT_FP, u);
+       jit_ldxi(_O0 + _jitc->function->call.argi, JIT_FP, -8);
+       ++_jitc->function->call.argi;
+       jit_stxi_f(stack_framesize, JIT_SP, u + 1);
+       _jitc->function->call.size += sizeof(jit_float32_t);
+    }
+    else {
+       jit_stxi_f(_jitc->function->call.size + stack_framesize,
+                  JIT_SP, u);
+       jit_stxi_f(_jitc->function->call.size + stack_framesize + 4,
+                  JIT_SP, u + 1);
+       _jitc->function->call.size += sizeof(jit_float64_t);
+    }
+#  else
+    if ((_jitc->function->call.call & jit_call_varargs) &&
+       jit_arg_reg_p(_jitc->function->call.argi)) {
+       jit_stxi_d(BIAS(-8), JIT_FP, u);
+       jit_ldxi(_O0 + _jitc->function->call.argi, JIT_FP, BIAS(-8));
+       ++_jitc->function->call.argi;
+    }
+    else if (!(_jitc->function->call.call & jit_call_varargs) &&
+            jit_arg_d_reg_p(_jitc->function->call.argi)) {
+       jit_movr_d(_F0 - (_jitc->function->call.argi << 1), u);
+       if (!jit_arg_reg_p(_jitc->function->call.argi))
+           _jitc->function->call.size += sizeof(jit_float64_t);
+       ++_jitc->function->call.argi;
+    }
+    else {
+       jit_stxi_d(BIAS(_jitc->function->call.size + stack_framesize),
+                  JIT_SP, u);
+       _jitc->function->call.size += sizeof(jit_float64_t);
+    }
+#  endif
+    jit_dec_synth();
+}
+
+void
+_jit_pushargi_d(jit_state_t *_jit, jit_float64_t u)
+{
+    jit_int32_t                regno;
+    jit_inc_synth_d(pushargi_d, u);
+    jit_link_prepare();
+#  if __WORDSIZE == 32
+    regno = jit_get_reg(jit_class_fpr);
+    jit_movi_d(regno, u);
+    if (jit_arg_d_reg_p(_jitc->function->call.argi)) {
+       jit_stxi_d(BIAS(-8), JIT_FP, regno);
+       jit_ldxi(_O0 + _jitc->function->call.argi, JIT_FP, BIAS(-8));
+       jit_ldxi(_O0 + _jitc->function->call.argi + 1, JIT_FP, -4);
+       _jitc->function->call.argi += 2;
+    }
+    else if (jit_arg_reg_p(_jitc->function->call.argi)) {
+       jit_stxi_f(-8, JIT_FP, regno);
+       jit_ldxi(_O0 + _jitc->function->call.argi, JIT_FP, -8);
+       ++_jitc->function->call.argi;
+       jit_stxi_f(stack_framesize, JIT_SP, u + 1);
+       _jitc->function->call.size += sizeof(jit_float32_t);
+    }
+    else {
+       jit_stxi_f(_jitc->function->call.size + stack_framesize,
+                  JIT_SP, regno);
+       jit_stxi_f(_jitc->function->call.size + stack_framesize + 4,
+                  JIT_SP, regno + 1);
+       _jitc->function->call.size += sizeof(jit_float64_t);
+    }
+    jit_unget_reg(regno);
+#  else
+    if ((_jitc->function->call.call & jit_call_varargs) &&
+       jit_arg_reg_p(_jitc->function->call.argi)) {
+       regno = jit_get_reg(jit_class_fpr | jit_class_dbl);
+       jit_movi_d(regno, u);
+       jit_stxi_d(BIAS(-8), JIT_FP, regno);
+       jit_ldxi(_O0 + _jitc->function->call.argi, JIT_FP, BIAS(-8));
+       ++_jitc->function->call.argi;
+       jit_unget_reg(regno);
+    }
+    else if (!(_jitc->function->call.call & jit_call_varargs) &&
+            jit_arg_d_reg_p(_jitc->function->call.argi)) {
+       jit_movi_d(_F0 - (_jitc->function->call.argi << 1), u);
+       if (!jit_arg_reg_p(_jitc->function->call.argi))
+           _jitc->function->call.size += sizeof(jit_float64_t);
+       ++_jitc->function->call.argi;
+    }
+    else {
+       regno = jit_get_reg(jit_class_fpr | jit_class_dbl);
+       jit_movi_d(regno, u);
+       jit_stxi_d(BIAS(_jitc->function->call.size + stack_framesize),
+                  JIT_SP, regno);
+       jit_unget_reg(regno);
+       _jitc->function->call.size += sizeof(jit_float64_t);
+    }
+#  endif
+    jit_dec_synth();
+}
+
+jit_bool_t
+_jit_regarg_p(jit_state_t *_jit, jit_node_t *node, jit_int32_t regno)
+{
+    jit_int32_t                spec;
+
+    spec = jit_class(_rvs[regno].spec);
+    if ((spec & (jit_class_arg|jit_class_gpr)) ==
+       (jit_class_arg|jit_class_gpr)) {
+       regno -= _O0;
+       if (regno >= 0 && regno < node->v.w)
+           return (1);
+    }
+#  if __WORDSIZE == 64
+    if ((spec & (jit_class_arg|jit_class_fpr)) ==
+       (jit_class_arg|jit_class_fpr)) {
+       regno = _F0 - (regno >> 1);
+       if (regno >= 0 && regno < node->v.w)
+           return (1);
+    }
+#  endif
+
+    return (0);
+}
+
+void
+_jit_finishr(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_node_t         *call;
+    assert(_jitc->function);
+    jit_inc_synth_w(finishr, r0);
+    if (_jitc->function->self.alen < _jitc->function->call.size)
+       _jitc->function->self.alen = _jitc->function->call.size;
+    call = jit_callr(r0);
+    call->v.w = _jitc->function->self.argi;
+    call->w.w = _jitc->function->self.argf;
+    _jitc->function->call.argi = _jitc->function->call.argf =
+       _jitc->function->call.size = 0;
+    _jitc->prepare = 0;
+    jit_dec_synth();
+}
+
+jit_node_t *
+_jit_finishi(jit_state_t *_jit, jit_pointer_t i0)
+{
+    jit_node_t         *node;
+    assert(_jitc->function);
+    jit_inc_synth_w(finishi, (jit_word_t)i0);
+    if (_jitc->function->self.alen < _jitc->function->call.size)
+       _jitc->function->self.alen = _jitc->function->call.size;
+    node = jit_calli(i0);
+    node->v.w = _jitc->function->call.argi;
+    node->w.w = _jitc->function->call.argf;
+    _jitc->function->call.argi = _jitc->function->call.argf =
+       _jitc->function->call.size = 0;
+    _jitc->prepare = 0;
+    jit_dec_synth();
+    return (node);
+}
+
+void
+_jit_retval_c(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_c, r0);
+    jit_extr_c(r0, _O0);
+    jit_dec_synth();
+}
+
+void
+_jit_retval_uc(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_uc, r0);
+    jit_extr_uc(r0, _O0);
+    jit_dec_synth();
+}
+
+void
+_jit_retval_s(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_s, r0);
+    jit_extr_s(r0, _O0);
+    jit_dec_synth();
+}
+
+void
+_jit_retval_us(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_us, r0);
+    jit_extr_us(r0, _O0);
+    jit_dec_synth();
+}
+
+void
+_jit_retval_i(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_i, r0);
+#  if __WORDSIZE == 32
+    if (r0 != _O0)
+       jit_movr(r0, _O0);
+#  else
+    jit_extr_i(r0, _O0);
+#  endif
+    jit_dec_synth();
+}
+
+#  if __WORDSIZE == 64
+void
+_jit_retval_ui(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_i, r0);
+    if (r0 != _O0)
+       jit_extr_ui(r0, _O0);
+    jit_dec_synth();
+}
+
+void
+_jit_retval_l(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_i, r0);
+    if (r0 != _O0)
+       jit_movr(r0, _O0);
+    jit_dec_synth();
+}
+#  endif
+
+void
+_jit_retval_f(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_f, r0);
+    if (r0 != JIT_FRET)
+       jit_movr_f(r0, JIT_FRET);
+    jit_dec_synth();
+}
+
+void
+_jit_retval_d(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_d, r0);
+    if (r0 != JIT_FRET)
+       jit_movr_d(r0, JIT_FRET);
+    jit_dec_synth();
+}
+
+jit_pointer_t
+_emit_code(jit_state_t *_jit)
+{
+    jit_node_t         *node;
+    jit_node_t         *temp;
+    jit_word_t          word;
+    jit_int32_t                 value;
+    jit_int32_t                 offset;
+    struct {
+       jit_node_t      *node;
+       jit_word_t       word;
+#if DEVEL_DISASSEMBLER
+       jit_word_t       prevw;
+#endif
+       jit_int32_t      patch_offset;
+    } undo;
+#if DEVEL_DISASSEMBLER
+    jit_word_t          prevw;
+#endif
+
+    _jitc->function = NULL;
+
+    jit_reglive_setup();
+
+    undo.word = 0;
+    undo.node = NULL;
+    undo.patch_offset = 0;
+
+#define case_rr(name, type)                                            \
+           case jit_code_##name##r##type:                              \
+               name##r##type(rn(node->u.w), rn(node->v.w));            \
+               break
+#define case_rw(name, type)                                            \
+           case jit_code_##name##i##type:                              \
+               name##i##type(rn(node->u.w), node->v.w);                \
+               break
+#define case_wr(name, type)                                            \
+           case jit_code_##name##i##type:                              \
+               name##i##type(node->u.w, rn(node->v.w));                \
+               break
+#define case_rf(name)                                                  \
+           case jit_code_##name##i##type:                              \
+               assert(node->flag & jit_flag_data);                     \
+               name##_f(rn(node->u.w),                                 \
+               (jit_float32_t *)node->v.n->u.w);                       \
+               break
+#define case_rd(name)                                                  \
+           case jit_code_##name##i_d:                                  \
+               assert(node->flag & jit_flag_data);                     \
+               name##_d(rn(node->u.w),                                 \
+                        (jit_float64_t *)node->v.n->u.w);              \
+               break
+#define case_rrr(name, type)                                           \
+           case jit_code_##name##r##type:                              \
+               name##r##type(rn(node->u.w),                            \
+                             rn(node->v.w), rn(node->w.w));            \
+               break
+#define case_rrrr(name, type)                                          \
+           case jit_code_##name##r##type:                              \
+               name##r##type(rn(node->u.q.l), rn(node->u.q.h),         \
+                             rn(node->v.w), rn(node->w.w));            \
+               break
+#define case_rrw(name, type)                                           \
+           case jit_code_##name##i##type:                              \
+               name##i##type(rn(node->u.w),                            \
+                             rn(node->v.w), node->w.w);                \
+               break
+#define case_rrrw(name, type)                                          \
+           case jit_code_##name##i##type:                              \
+               name##i##type(rn(node->u.q.l), rn(node->u.q.h),         \
+                             rn(node->v.w), node->w.w);                \
+               break
+#define case_rrf(name, type, size)                                     \
+           case jit_code_##name##i##type:                              \
+               assert(node->flag & jit_flag_data);                     \
+               name##i##type(rn(node->u.w), rn(node->v.w),             \
+                             (jit_float##size##_t *)node->w.n->u.w);   \
+               break
+#define case_wrr(name, type)                                           \
+           case jit_code_##name##i##type:                              \
+               name##i##type(node->u.w, rn(node->v.w), rn(node->w.w)); \
+               break
+#define case_brr(name, type)                                           \
+           case jit_code_##name##r##type:                              \
+               temp = node->u.n;                                       \
+               assert(temp->code == jit_code_label ||                  \
+                      temp->code == jit_code_epilog);                  \
+               if (temp->flag & jit_flag_patch)                        \
+                   name##r##type(temp->u.w, rn(node->v.w),             \
+                                 rn(node->w.w));                       \
+               else {                                                  \
+                   word = name##r##type(_jit->pc.w,                    \
+                                        rn(node->v.w), rn(node->w.w)); \
+                   patch(word, node);                                  \
+               }                                                       \
+               break
+#define case_brw(name, type)                                           \
+           case jit_code_##name##i##type:                              \
+               temp = node->u.n;                                       \
+               assert(temp->code == jit_code_label ||                  \
+                      temp->code == jit_code_epilog);                  \
+               if (temp->flag & jit_flag_patch)                        \
+                   name##i##type(temp->u.w,                            \
+                                 rn(node->v.w), node->w.w);            \
+               else {                                                  \
+                   word = name##i##type(_jit->pc.w,                    \
+                                        rn(node->v.w), node->w.w);     \
+                   patch(word, node);                                  \
+               }                                                       \
+               break
+#define case_brf(name, type, size)                                     \
+           case jit_code_##name##i##type:                              \
+               temp = node->u.n;                                       \
+               assert(temp->code == jit_code_label ||                  \
+                      temp->code == jit_code_epilog);                  \
+               if (temp->flag & jit_flag_patch)                        \
+                   name##i##type(temp->u.w, rn(node->v.w),             \
+                               (jit_float##size##_t *)node->w.n->u.w); \
+               else {                                                  \
+                   word = name##i##type(_jit->pc.w, rn(node->v.w),     \
+                               (jit_float##size##_t *)node->w.n->u.w); \
+                   patch(word, node);                                  \
+               }                                                       \
+               break
+#if DEVEL_DISASSEMBLER
+    prevw = _jit->pc.w;
+#endif
+    for (node = _jitc->head; node; node = node->next) {
+       if (_jit->pc.uc >= _jitc->code.end)
+           return (NULL);
+
+#if DEVEL_DISASSEMBLER
+       node->offset = (jit_uword_t)_jit->pc.w - (jit_uword_t)prevw;
+       prevw = _jit->pc.w;
+#endif
+       value = jit_classify(node->code);
+       jit_regarg_set(node, value);
+       switch (node->code) {
+           case jit_code_align:
+               assert(!(node->u.w & (node->u.w - 1)) &&
+                      node->u.w <= sizeof(jit_word_t));
+               if (node->u.w == sizeof(jit_word_t) &&
+                   (word = _jit->pc.w & (sizeof(jit_word_t) - 1)))
+                   nop(sizeof(jit_word_t) - word);
+               break;
+           case jit_code_note:         case jit_code_name:
+               node->u.w = _jit->pc.w;
+               break;
+           case jit_code_label:
+               if ((node->link || (node->flag & jit_flag_use)) &&
+                   (word = _jit->pc.w & (sizeof(jit_word_t) - 1)))
+                   nop(sizeof(jit_word_t) - word);
+               /* remember label is defined */
+               node->flag |= jit_flag_patch;
+               node->u.w = _jit->pc.w;
+               break;
+               case_rrr(add,);
+               case_rrw(add,);
+               case_rrr(addc,);
+               case_rrw(addc,);
+               case_rrr(addx,);
+               case_rrw(addx,);
+               case_rrr(sub,);
+               case_rrw(sub,);
+               case_rrr(subc,);
+               case_rrw(subc,);
+               case_rrr(subx,);
+               case_rrw(subx,);
+               case_rrw(rsb,);
+               case_rrr(mul,);
+               case_rrw(mul,);
+               case_rrrr(qmul,);
+               case_rrrw(qmul,);
+               case_rrrr(qmul, _u);
+               case_rrrw(qmul, _u);
+               case_rrr(div,);
+               case_rrw(div,);
+               case_rrr(div, _u);
+               case_rrw(div, _u);
+               case_rrrr(qdiv,);
+               case_rrrw(qdiv,);
+               case_rrrr(qdiv, _u);
+               case_rrrw(qdiv, _u);
+               case_rrr(rem,);
+               case_rrw(rem,);
+               case_rrr(rem, _u);
+               case_rrw(rem, _u);
+               case_rrr(and,);
+               case_rrw(and,);
+               case_rrr(or,);
+               case_rrw(or,);
+               case_rrr(xor,);
+               case_rrw(xor,);
+               case_rrr(lsh,);
+               case_rrw(lsh,);
+               case_rrr(rsh,);
+               case_rrw(rsh,);
+               case_rrr(rsh, _u);
+               case_rrw(rsh, _u);
+               case_rr(trunc, _f_i);
+               case_rr(trunc, _d_i);
+#if __WORDSIZE == 64
+               case_rr(trunc, _f_l);
+               case_rr(trunc, _d_l);
+#endif
+               case_rrr(lt,);
+               case_rrw(lt,);
+               case_rrr(lt, _u);
+               case_rrw(lt, _u);
+               case_rrr(le,);
+               case_rrw(le,);
+               case_rrr(le, _u);
+               case_rrw(le, _u);
+               case_rrr(eq,);
+               case_rrw(eq,);
+               case_rrr(ge,);
+               case_rrw(ge,);
+               case_rrr(ge, _u);
+               case_rrw(ge, _u);
+               case_rrr(gt,);
+               case_rrw(gt,);
+               case_rrr(gt, _u);
+               case_rrw(gt, _u);
+               case_rrr(ne,);
+               case_rrw(ne,);
+               case_rr(ld, _c);
+               case_rw(ld, _c);
+               case_rr(ld, _uc);
+               case_rw(ld, _uc);
+               case_rr(ld, _s);
+               case_rw(ld, _s);
+               case_rr(ld, _us);
+               case_rw(ld, _us);
+               case_rr(ld, _i);
+               case_rw(ld, _i);
+#if __WORDSIZE == 64
+               case_rr(ld, _ui);
+               case_rw(ld, _ui);
+               case_rr(ld, _l);
+               case_rw(ld, _l);
+#endif
+               case_rrr(ldx, _c);
+               case_rrw(ldx, _c);
+               case_rrr(ldx, _uc);
+               case_rrw(ldx, _uc);
+               case_rrr(ldx, _s);
+               case_rrw(ldx, _s);
+               case_rrr(ldx, _us);
+               case_rrw(ldx, _us);
+               case_rrr(ldx, _i);
+               case_rrw(ldx, _i);
+#if __WORDSIZE == 64
+               case_rrr(ldx, _ui);
+               case_rrw(ldx, _ui);
+               case_rrr(ldx, _l);
+               case_rrw(ldx, _l);
+#endif
+               case_rr(st, _c);
+               case_wr(st, _c);
+               case_rr(st, _s);
+               case_wr(st, _s);
+               case_rr(st, _i);
+               case_wr(st, _i);
+#if __WORDSIZE == 64
+               case_rr(st, _l);
+               case_wr(st, _l);
+#endif
+               case_rrr(stx, _c);
+               case_wrr(stx, _c);
+               case_rrr(stx, _s);
+               case_wrr(stx, _s);
+               case_rrr(stx, _i);
+               case_wrr(stx, _i);
+#if __WORDSIZE == 64
+               case_rrr(stx, _l);
+               case_wrr(stx, _l);
+#endif
+               case_rr(hton, _us);
+               case_rr(hton, _ui);
+#if __WORDSIZE == 64
+               case_rr(hton, _ul);
+#endif
+               case_rr(ext, _c);
+               case_rr(ext, _uc);
+               case_rr(ext, _s);
+               case_rr(ext, _us);
+#if __WORDSIZE == 64
+               case_rr(ext, _i);
+               case_rr(ext, _ui);
+#endif
+               case_rr(mov,);
+           case jit_code_movi:
+               if (node->flag & jit_flag_node) {
+                   temp = node->v.n;
+                   if (temp->code == jit_code_data ||
+                       (temp->code == jit_code_label &&
+                        (temp->flag & jit_flag_patch)))
+                       movi(rn(node->u.w), temp->u.w);
+                   else {
+                       assert(temp->code == jit_code_label ||
+                              temp->code == jit_code_epilog);
+                       word = movi_p(rn(node->u.w), node->v.w);
+                       patch(word, node);
+                   }
+               }
+               else
+                   movi(rn(node->u.w), node->v.w);
+               break;
+               case_rr(neg,);
+               case_rr(com,);
+               case_brr(blt,);
+               case_brw(blt,);
+               case_brr(blt, _u);
+               case_brw(blt, _u);
+               case_brr(ble,);
+               case_brw(ble,);
+               case_brr(ble, _u);
+               case_brw(ble, _u);
+               case_brr(beq,);
+               case_brw(beq,);
+               case_brr(bge,);
+               case_brw(bge,);
+               case_brr(bge, _u);
+               case_brw(bge, _u);
+               case_brr(bgt,);
+               case_brw(bgt,);
+               case_brr(bgt, _u);
+               case_brw(bgt, _u);
+               case_brr(bne,);
+               case_brw(bne,);
+               case_brr(boadd,);
+               case_brw(boadd,);
+               case_brr(boadd, _u);
+               case_brw(boadd, _u);
+               case_brr(bxadd,);
+               case_brw(bxadd,);
+               case_brr(bxadd, _u);
+               case_brw(bxadd, _u);
+               case_brr(bosub,);
+               case_brw(bosub,);
+               case_brr(bosub, _u);
+               case_brw(bosub, _u);
+               case_brr(bxsub,);
+               case_brw(bxsub,);
+               case_brr(bxsub, _u);
+               case_brw(bxsub, _u);
+               case_brr(bms,);
+               case_brw(bms,);
+               case_brr(bmc,);
+               case_brw(bmc,);
+               case_rrr(add, _f);
+               case_rrf(add, _f, 32);
+               case_rrr(sub, _f);
+               case_rrf(sub, _f, 32);
+               case_rrf(rsb, _f, 32);
+               case_rrr(mul, _f);
+               case_rrf(mul, _f, 32);
+               case_rrr(div, _f);
+               case_rrf(div, _f, 32);
+               case_rr(abs, _f);
+               case_rr(neg, _f);
+               case_rr(sqrt, _f);
+               case_rr(ext, _f);
+               case_rr(ext, _d_f);
+               case_rrr(lt, _f);
+               case_rrf(lt, _f, 32);
+               case_rrr(le, _f);
+               case_rrf(le, _f, 32);
+               case_rrr(eq, _f);
+               case_rrf(eq, _f, 32);
+               case_rrr(ge, _f);
+               case_rrf(ge, _f, 32);
+               case_rrr(gt, _f);
+               case_rrf(gt, _f, 32);
+               case_rrr(ne, _f);
+               case_rrf(ne, _f, 32);
+               case_rrr(unlt, _f);
+               case_rrf(unlt, _f, 32);
+               case_rrr(unle, _f);
+               case_rrf(unle, _f, 32);
+               case_rrr(uneq, _f);
+               case_rrf(uneq, _f, 32);
+               case_rrr(unge, _f);
+               case_rrf(unge, _f, 32);
+               case_rrr(ungt, _f);
+               case_rrf(ungt, _f, 32);
+               case_rrr(ltgt, _f);
+               case_rrf(ltgt, _f, 32);
+               case_rrr(ord, _f);
+               case_rrf(ord, _f, 32);
+               case_rrr(unord, _f);
+               case_rrf(unord, _f, 32);
+               case_rr(ld, _f);
+               case_rw(ld, _f);
+               case_rrr(ldx, _f);
+               case_rrw(ldx, _f);
+               case_rr(st, _f);
+               case_wr(st, _f);
+               case_rrr(stx, _f);
+               case_wrr(stx, _f);
+               case_rr(mov, _f);
+           case jit_code_movi_f:
+               assert(node->flag & jit_flag_data);
+               movi_f(rn(node->u.w), (jit_float32_t *)node->v.n->u.w);
+               break;
+               case_brr(blt, _f);
+               case_brf(blt, _f, 32);
+               case_brr(ble, _f);
+               case_brf(ble, _f, 32);
+               case_brr(beq, _f);
+               case_brf(beq, _f, 32);
+               case_brr(bge, _f);
+               case_brf(bge, _f, 32);
+               case_brr(bgt, _f);
+               case_brf(bgt, _f, 32);
+               case_brr(bne, _f);
+               case_brf(bne, _f, 32);
+               case_brr(bunlt, _f);
+               case_brf(bunlt, _f, 32);
+               case_brr(bunle, _f);
+               case_brf(bunle, _f, 32);
+               case_brr(buneq, _f);
+               case_brf(buneq, _f, 32);
+               case_brr(bunge, _f);
+               case_brf(bunge, _f, 32);
+               case_brr(bungt, _f);
+               case_brf(bungt, _f, 32);
+               case_brr(bltgt, _f);
+               case_brf(bltgt, _f, 32);
+               case_brr(bord, _f);
+               case_brf(bord, _f, 32);
+               case_brr(bunord, _f);
+               case_brf(bunord, _f, 32);
+               case_rrr(add, _d);
+               case_rrf(add, _d, 64);
+               case_rrr(sub, _d);
+               case_rrf(sub, _d, 64);
+               case_rrf(rsb, _d, 64);
+               case_rrr(mul, _d);
+               case_rrf(mul, _d, 64);
+               case_rrr(div, _d);
+               case_rrf(div, _d, 64);
+               case_rr(abs, _d);
+               case_rr(neg, _d);
+               case_rr(sqrt, _d);
+               case_rr(ext, _d);
+               case_rr(ext, _f_d);
+               case_rrr(lt, _d);
+               case_rrf(lt, _d, 64);
+               case_rrr(le, _d);
+               case_rrf(le, _d, 64);
+               case_rrr(eq, _d);
+               case_rrf(eq, _d, 64);
+               case_rrr(ge, _d);
+               case_rrf(ge, _d, 64);
+               case_rrr(gt, _d);
+               case_rrf(gt, _d, 64);
+               case_rrr(ne, _d);
+               case_rrf(ne, _d, 64);
+               case_rrr(unlt, _d);
+               case_rrf(unlt, _d, 64);
+               case_rrr(unle, _d);
+               case_rrf(unle, _d, 64);
+               case_rrr(uneq, _d);
+               case_rrf(uneq, _d, 64);
+               case_rrr(unge, _d);
+               case_rrf(unge, _d, 64);
+               case_rrr(ungt, _d);
+               case_rrf(ungt, _d, 64);
+               case_rrr(ltgt, _d);
+               case_rrf(ltgt, _d, 64);
+               case_rrr(ord, _d);
+               case_rrf(ord, _d, 64);
+               case_rrr(unord, _d);
+               case_rrf(unord, _d, 64);
+               case_rr(ld, _d);
+               case_rw(ld, _d);
+               case_rrr(ldx, _d);
+               case_rrw(ldx, _d);
+               case_rr(st, _d);
+               case_wr(st, _d);
+               case_rrr(stx, _d);
+               case_wrr(stx, _d);
+               case_rr(mov, _d);
+           case jit_code_movi_d:
+               assert(node->flag & jit_flag_data);
+               movi_d(rn(node->u.w), (jit_float64_t *)node->v.n->u.w);
+               break;
+               case_brr(blt, _d);
+               case_brf(blt, _d, 64);
+               case_brr(ble, _d);
+               case_brf(ble, _d, 64);
+               case_brr(beq, _d);
+               case_brf(beq, _d, 64);
+               case_brr(bge, _d);
+               case_brf(bge, _d, 64);
+               case_brr(bgt, _d);
+               case_brf(bgt, _d, 64);
+               case_brr(bne, _d);
+               case_brf(bne, _d, 64);
+               case_brr(bunlt, _d);
+               case_brf(bunlt, _d, 64);
+               case_brr(bunle, _d);
+               case_brf(bunle, _d, 64);
+               case_brr(buneq, _d);
+               case_brf(buneq, _d, 64);
+               case_brr(bunge, _d);
+               case_brf(bunge, _d, 64);
+               case_brr(bungt, _d);
+               case_brf(bungt, _d, 64);
+               case_brr(bltgt, _d);
+               case_brf(bltgt, _d, 64);
+               case_brr(bord, _d);
+               case_brf(bord, _d, 64);
+               case_brr(bunord, _d);
+               case_brf(bunord, _d, 64);
+           case jit_code_jmpr:
+               jmpr(rn(node->u.w));
+               break;
+           case jit_code_jmpi:
+               if (node->flag & jit_flag_node) {
+                   temp = node->u.n;
+                   assert(temp->code == jit_code_label ||
+                          temp->code == jit_code_epilog);
+                   if (temp->flag & jit_flag_patch)
+                       jmpi(temp->u.w);
+                   else {
+                       word = jmpi_p(_jit->pc.w);
+                       patch(word, node);
+                   }
+               }
+               else
+                   jmpi(node->u.w);
+               break;
+           case jit_code_callr:
+               callr(rn(node->u.w));
+               break;
+           case jit_code_calli:
+               if (node->flag & jit_flag_node) {
+                   temp = node->u.n;
+                   assert(temp->code == jit_code_label ||
+                          temp->code == jit_code_epilog);
+                   word = calli_p(temp->u.w);
+                   if (!(temp->flag & jit_flag_patch))
+                       patch(word, node);
+               }
+               else
+                   calli(node->u.w);
+               break;
+           case jit_code_prolog:
+               _jitc->function = _jitc->functions.ptr + node->w.w;
+               undo.node = node;
+               undo.word = _jit->pc.w;
+#if DEVEL_DISASSEMBLER
+               undo.prevw = prevw;
+#endif
+               undo.patch_offset = _jitc->patches.offset;
+           restart_function:
+               _jitc->again = 0;
+               prolog(node);
+               break;
+           case jit_code_epilog:
+               assert(_jitc->function == _jitc->functions.ptr + node->w.w);
+               if (_jitc->again) {
+                   for (temp = undo.node->next;
+                        temp != node; temp = temp->next) {
+                       if (temp->code == jit_code_label ||
+                           temp->code == jit_code_epilog)
+                           temp->flag &= ~jit_flag_patch;
+                   }
+                   temp->flag &= ~jit_flag_patch;
+                   node = undo.node;
+                   _jit->pc.w = undo.word;
+#if DEVEL_DISASSEMBLER
+                   prevw = undo.prevw;
+#endif
+                   _jitc->patches.offset = undo.patch_offset;
+                   goto restart_function;
+               }
+               /* remember label is defined */
+               node->flag |= jit_flag_patch;
+               node->u.w = _jit->pc.w;
+               epilog(node);
+               _jitc->function = NULL;
+               break;
+           case jit_code_va_start:
+               vastart(rn(node->u.w));
+               break;
+           case jit_code_va_arg:
+               vaarg(rn(node->u.w), rn(node->v.w));
+               break;
+           case jit_code_va_arg_d:
+               vaarg_d(rn(node->u.w), rn(node->v.w));
+               break;
+           case jit_code_live:                 case jit_code_ellipsis:
+           case jit_code_va_push:
+           case jit_code_allocai:              case jit_code_allocar:
+           case jit_code_arg:
+           case jit_code_arg_f:                case jit_code_arg_d:
+           case jit_code_va_end:
+           case jit_code_ret:
+           case jit_code_retr:                 case jit_code_reti:
+           case jit_code_retr_f:               case jit_code_reti_f:
+           case jit_code_retr_d:               case jit_code_reti_d:
+           case jit_code_getarg_c:             case jit_code_getarg_uc:
+           case jit_code_getarg_s:             case jit_code_getarg_us:
+           case jit_code_getarg_i:
+#if __WORDSIZE == 64
+           case jit_code_getarg_ui:            case jit_code_getarg_l:
+#endif
+           case jit_code_getarg_f:             case jit_code_getarg_d:
+           case jit_code_putargr:              case jit_code_putargi:
+           case jit_code_putargr_f:            case jit_code_putargi_f:
+           case jit_code_putargr_d:            case jit_code_putargi_d:
+           case jit_code_pushargr:             case jit_code_pushargi:
+           case jit_code_pushargr_f:           case jit_code_pushargi_f:
+           case jit_code_pushargr_d:           case jit_code_pushargi_d:
+           case jit_code_retval_c:             case jit_code_retval_uc:
+           case jit_code_retval_s:             case jit_code_retval_us:
+           case jit_code_retval_i:
+#if __WORDSIZE == 64
+           case jit_code_retval_ui:            case jit_code_retval_l:
+#endif
+           case jit_code_retval_f:             case jit_code_retval_d:
+           case jit_code_prepare:
+           case jit_code_finishr:              case jit_code_finishi:
+               break;
+           default:
+               abort();
+       }
+#  if __WORDSIZE == 64
+       if (jit_carry != _NOREG) {
+           switch (node->code) {
+               case jit_code_note:
+               case jit_code_addcr:            case jit_code_addci:
+               case jit_code_addxr:            case jit_code_addxi:
+               case jit_code_subcr:            case jit_code_subci:
+               case jit_code_subxr:            case jit_code_subxi:
+                   break;
+               default:
+                   jit_unget_reg(jit_carry);
+                   jit_carry = _NOREG;
+                   break;
+           }
+       }
+#  endif
+       jit_regarg_clr(node, value);
+#  if __WORDSIZE == 64
+       if (jit_regset_cmp_ui(&_jitc->regarg, 0) != 0) {
+           assert(jit_regset_scan1(&_jitc->regarg, 0) == jit_carry);
+           assert(jit_regset_scan1(&_jitc->regarg, jit_carry + 1) == ULONG_MAX);
+       }
+       assert(_jitc->synth == 0);
+#  else
+       assert(_jitc->regarg == 0 && _jitc->synth == 0);
+#  endif
+       jit_reglive(node);
+    }
+#undef case_brf
+#undef case_brw
+#undef case_brr
+#undef case_wrr
+#undef case_rrf
+#undef case_rrrw
+#undef case_rrw
+#undef case_rrrr
+#undef case_rrr
+#undef case_rf
+#undef case_wr
+#undef case_rw
+#undef case_rr
+
+    for (offset = 0; offset < _jitc->patches.offset; offset++) {
+       node = _jitc->patches.ptr[offset].node;
+       word = node->code == jit_code_movi ? node->v.n->u.w : node->u.n->u.w;
+       patch_at(_jitc->patches.ptr[offset].inst, word);
+    }
+
+    jit_flush(_jit->code.ptr, _jit->pc.uc);
+
+    return (_jit->code.ptr);
+}
+
+#define CODE                           1
+#  include "jit_sparc-cpu.c"
+#  include "jit_sparc-fpu.c"
+#undef CODE
+
+void
+jit_flush(void *fptr, void *tptr)
+{
+}
+
+void
+_emit_ldxi(jit_state_t *_jit, jit_gpr_t r0, jit_gpr_t r1, jit_word_t i0)
+{
+    ldxi(rn(r0), rn(r1), i0);
+}
+
+void
+_emit_stxi(jit_state_t *_jit, jit_word_t i0, jit_gpr_t r0, jit_gpr_t r1)
+{
+    stxi(i0, rn(r0), rn(r1));
+}
+
+void
+_emit_ldxi_d(jit_state_t *_jit, jit_fpr_t r0, jit_gpr_t r1, jit_word_t i0)
+{
+    ldxi_d(rn(r0), rn(r1), i0);
+}
+
+void
+_emit_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_gpr_t r0, jit_fpr_t r1)
+{
+    stxi_d(i0, rn(r0), rn(r1));
+}
+
+static void
+_patch(jit_state_t *_jit, jit_word_t instr, jit_node_t *node)
+{
+    jit_int32_t                flag;
+
+    assert(node->flag & jit_flag_node);
+    if (node->code == jit_code_movi)
+       flag = node->v.n->flag;
+    else
+       flag = node->u.n->flag;
+    assert(!(flag & jit_flag_patch));
+    if (_jitc->patches.offset >= _jitc->patches.length) {
+       jit_realloc((jit_pointer_t *)&_jitc->patches.ptr,
+                   _jitc->patches.length * sizeof(jit_patch_t),
+                   (_jitc->patches.length + 1024) * sizeof(jit_patch_t));
+       memset(_jitc->patches.ptr + _jitc->patches.length, 0,
+              1024 * sizeof(jit_patch_t));
+       _jitc->patches.length += 1024;
+    }
+    _jitc->patches.ptr[_jitc->patches.offset].inst = instr;
+    _jitc->patches.ptr[_jitc->patches.offset].node = node;
+    ++_jitc->patches.offset;
+}
diff --git a/deps/lightning/lib/jit_x86-cpu.c b/deps/lightning/lib/jit_x86-cpu.c
new file mode 100644 (file)
index 0000000..4627783
--- /dev/null
@@ -0,0 +1,3842 @@
+/*
+ * Copyright (C) 2012-2019  Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+/* avoid using it due to partial stalls */
+#define USE_INC_DEC                    0
+
+#if PROTO
+#  if __X32 || __X64_32
+#    define WIDE                       0
+#    define ldi(u, v)                  ldi_i(u, v)
+#    define ldr(u, v)                  ldr_i(u, v)
+#    define ldxr(u, v, w)              ldxr_i(u, v, w)
+#    define ldxi(u, v, w)              ldxi_i(u, v, w)
+#    define sti(u, v)                  sti_i(u, v)
+#    define stxi(u, v, w)              stxi_i(u, v, w)
+#    define can_sign_extend_int_p(im)  1
+#    define can_zero_extend_int_p(im)  1
+#    define fits_uint32_p(im)          1
+#  else
+#    define WIDE                       1
+#    define ldi(u, v)                  ldi_l(u, v)
+#    define ldr(u, v)                  ldr_l(u, v)
+#    define ldxr(u, v, w)              ldxr_l(u, v, w)
+#    define ldxi(u, v, w)              ldxi_l(u, v, w)
+#    define sti(u, v)                  sti_l(u, v)
+#    define stxi(u, v, w)              stxi_l(u, v, w)
+#    define can_sign_extend_int_p(im)                                  \
+       (((im) >= 0 && (long long)(im) <=  0x7fffffffLL) ||             \
+        ((im) <  0 && (long long)(im) >  -0x80000000LL))
+#    define can_zero_extend_int_p(im)                                  \
+       ((im) >= 0 && (im) < 0x80000000LL)
+#    define fits_uint32_p(im)          (((im) & 0xffffffff00000000LL) == 0)
+#  endif
+#  if __X32 || __CYGWIN__ || __X64_32 || _WIN32
+#      define reg8_p(rn)                                               \
+      ((rn) >= _RAX_REGNO && (rn) <= _RBX_REGNO)
+#  else
+#      define reg8_p(rn)               1
+#  endif
+#  define _RAX_REGNO                   0
+#  define _RCX_REGNO                   1
+#  define _RDX_REGNO                   2
+#  define _RBX_REGNO                   3
+#  define _RSP_REGNO                   4
+#  define _RBP_REGNO                   5
+#  define _RSI_REGNO                   6
+#  define _RDI_REGNO                   7
+#  define _R8_REGNO                    8
+#  define _R9_REGNO                    9
+#  define _R10_REGNO                   10
+#  define _R11_REGNO                   11
+#  define _R12_REGNO                   12
+#  define _R13_REGNO                   13
+#  define _R14_REGNO                   14
+#  define _R15_REGNO                   15
+#  define r7(reg)                      ((reg) & 7)
+#  define r8(reg)                      ((reg) & 15)
+#  define _SCL1                                0x00
+#  define _SCL2                                0x01
+#  define _SCL4                                0x02
+#  define _SCL8                                0x03
+#  define X86_ADD                      0
+#  define X86_OR                       1 << 3
+#  define X86_ADC                      2 << 3
+#  define X86_SBB                      3 << 3
+#  define X86_AND                      4 << 3
+#  define X86_SUB                      5 << 3
+#  define X86_XOR                      6 << 3
+#  define X86_CMP                      7 << 3
+#  define X86_ROL                      0
+#  define X86_ROR                      1
+#  define X86_RCL                      2
+#  define X86_RCR                      3
+#  define X86_SHL                      4
+#  define X86_SHR                      5
+#  define X86_SAR                      7
+#  define X86_NOT                      2
+#  define X86_NEG                      3
+#  define X86_MUL                      4
+#  define X86_IMUL                     5
+#  define X86_DIV                      6
+#  define X86_IDIV                     7
+#  define X86_CC_O                     0x0
+#  define X86_CC_NO                    0x1
+#  define X86_CC_NAE                   0x2
+#  define X86_CC_B                     0x2
+#  define X86_CC_C                     0x2
+#  define X86_CC_AE                    0x3
+#  define X86_CC_NB                    0x3
+#  define X86_CC_NC                    0x3
+#  define X86_CC_E                     0x4
+#  define X86_CC_Z                     0x4
+#  define X86_CC_NE                    0x5
+#  define X86_CC_NZ                    0x5
+#  define X86_CC_BE                    0x6
+#  define X86_CC_NA                    0x6
+#  define X86_CC_A                     0x7
+#  define X86_CC_NBE                   0x7
+#  define X86_CC_S                     0x8
+#  define X86_CC_NS                    0x9
+#  define X86_CC_P                     0xa
+#  define X86_CC_PE                    0xa
+#  define X86_CC_NP                    0xb
+#  define X86_CC_PO                    0xb
+#  define X86_CC_L                     0xc
+#  define X86_CC_NGE                   0xc
+#  define X86_CC_GE                    0xd
+#  define X86_CC_NL                    0xd
+#  define X86_CC_LE                    0xe
+#  define X86_CC_NG                    0xe
+#  define X86_CC_G                     0xf
+#  define X86_CC_NLE                   0xf
+#  define mrm(md, r, m)                        *_jit->pc.uc++ = (md<<6) | (r<<3) | m
+#  define sib(sc, i, b)                        *_jit->pc.uc++ = (sc<<6) | (i<<3) | b
+#  define ic(c)                                *_jit->pc.uc++ = c
+#  define is(s)                                *_jit->pc.us++ = s
+#  define ii(i)                                *_jit->pc.ui++ = i
+#  if __X64 && !__X64_32
+#    define il(l)                      *_jit->pc.ul++ = l
+#  else
+#    define il(l)                      ii(l)
+#  endif
+#  define patch_abs(instr, label)                                      \
+       *(jit_word_t *)(instr - sizeof(jit_word_t)) = label
+#  define patch_rel(instr, label)                                      \
+       *(jit_int32_t *)(instr - 4) = label - instr
+#  define patch_rel_char(instr, label)                                 \
+       *(jit_int8_t *)(instr - 1) = label - instr
+#  define rex(l, w, r, x, b)           _rex(_jit, l, w, r, x, b)
+static void
+_rex(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define rx(rd, md, rb, ri, ms)       _rx(_jit, rd, md, rb, ri, ms)
+static void
+_rx(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define nop(n)                       _nop(_jit, n)
+static void _nop(jit_state_t*, jit_int32_t);
+#  define emms()                       is(0x770f)
+#  define lea(md, rb, ri, ms, rd)      _lea(_jit, md, rb, ri, ms, rd)
+static void
+_lea(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define pushr(r0)                    _pushr(_jit, r0)
+static void _pushr(jit_state_t*, jit_int32_t) maybe_unused;
+#  define popr(r0)                     _popr(_jit, r0)
+static void _popr(jit_state_t*, jit_int32_t) maybe_unused;
+#  define xchgr(r0, r1)                        _xchgr(_jit, r0, r1)
+static void _xchgr(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define testr(r0, r1)                        _testr(_jit, r0, r1)
+static void _testr(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define testi(r0, i0)                        _testi(_jit, r0, i0)
+static void _testi(jit_state_t*, jit_int32_t, jit_word_t);
+#  define cc(code, r0)                 _cc(_jit, code, r0)
+static void _cc(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define icmpr(r0, r1)                        alur(X86_CMP, r0, r1)
+#  define alur(code, r0, r1)           _alur(_jit, code, r0, r1)
+static void _alur(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define icmpi(r0, i0)                        alui(X86_CMP, r0, i0)
+#  define alui(code, r0, i0)           _alui(_jit, code, r0, i0)
+static void _alui(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#  define iaddr(r0, r1)                        alur(X86_ADD, r0, r1)
+#  define save(r0)                     _save(_jit, r0)
+static void _save(jit_state_t*, jit_int32_t);
+#  define load(r0)                     _load(_jit, r0)
+static void _load(jit_state_t*, jit_int32_t);
+#  define addr(r0, r1, r2)             _addr(_jit, r0, r1, r2)
+static void _addr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define iaddi(r0, i0)                        alui(X86_ADD, r0, i0)
+#  define addi(r0, r1, i0)             _addi(_jit, r0, r1, i0)
+static void _addi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#define addcr(r0, r1, r2)              _addcr(_jit, r0, r1, r2)
+static void _addcr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#define addci(r0, r1, i0)              _addci(_jit, r0, r1, i0)
+static void _addci(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#  define iaddxr(r0, r1)               alur(X86_ADC, r0, r1)
+#  define addxr(r0, r1, r2)            _addxr(_jit, r0, r1, r2)
+static void _addxr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define iaddxi(r0, i0)               alui(X86_ADC, r0, i0)
+#  define addxi(r0, r1, i0)            _addxi(_jit, r0, r1, i0)
+static void _addxi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#  define isubr(r0, r1)                        alur(X86_SUB, r0, r1)
+#  define subr(r0, r1, r2)             _subr(_jit, r0, r1, r2)
+static void _subr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define isubi(r0, i0)                        alui(X86_SUB, r0, i0)
+#  define subi(r0, r1, i0)             _subi(_jit, r0, r1, i0)
+static void _subi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#  define subcr(r0, r1, r2)            _subcr(_jit, r0, r1, r2)
+static void _subcr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define subci(r0, r1, i0)            _subci(_jit, r0, r1, i0)
+static void _subci(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define isubxr(r0, r1)               alur(X86_SBB, r0, r1)
+#  define subxr(r0, r1, r2)            _subxr(_jit, r0, r1, r2)
+static void _subxr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define isubxi(r0, i0)               alui(X86_SBB, r0, i0)
+#  define subxi(r0, r1, i0)            _subxi(_jit, r0, r1, i0)
+static void _subxi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define rsbi(r0, r1, i0)             _rsbi(_jit, r0, r1, i0)
+static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define imulr(r0, r1)                        _imulr(_jit, r0, r1)
+static void _imulr(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define imuli(r0, r1, i0)            _imuli(_jit, r0, r1, i0)
+static void _imuli(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#  define mulr(r0, r1, r2)             _mulr(_jit, r0, r1, r2)
+static void _mulr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define muli(r0, r1, i0)             _muli(_jit, r0, r1, i0)
+static void _muli(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#  define umulr(r0)                    unr(X86_IMUL, r0)
+#  define umulr_u(r0)                  unr(X86_MUL, r0)
+#  define qmulr(r0, r1, r2, r3)                _iqmulr(_jit, r0, r1, r2, r3, 1)
+#  define qmulr_u(r0, r1, r2, r3)      _iqmulr(_jit, r0, r1, r2, r3, 0)
+#  define iqmulr(r0, r1, r2, r3, sign) _iqmulr(_jit, r0, r1, r2, r3, sign)
+static void _iqmulr(jit_state_t*, jit_int32_t, jit_int32_t,
+                   jit_int32_t,jit_int32_t, jit_bool_t);
+#  define qmuli(r0, r1, r2, i0)                _iqmuli(_jit, r0, r1, r2, i0, 1)
+#  define qmuli_u(r0, r1, r2, i0)      _iqmuli(_jit, r0, r1, r2, i0, 0)
+#  define iqmuli(r0, r1, r2, i0, sign) _iqmuli(_jit, r0, r1, r2, i0, sign)
+static void _iqmuli(jit_state_t*, jit_int32_t, jit_int32_t,
+                   jit_int32_t,jit_word_t, jit_bool_t);
+#  define sign_extend_rdx_rax()                _sign_extend_rdx_rax(_jit)
+static void _sign_extend_rdx_rax(jit_state_t*);
+#  define idivr(r0)                    unr(X86_IDIV, r0)
+#  define idivr_u(r0)                  unr(X86_DIV, r0)
+#  define divremr(r0, r1, r2, i0, i1)  _divremr(_jit, r0, r1, r2, i0, i1)
+static void
+_divremr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,
+        jit_bool_t,jit_bool_t);
+#  define divremi(r0, r1, i0, i1, i2)  _divremi(_jit, r0, r1, i0, i1, i2)
+static void
+_divremi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t,jit_bool_t,jit_bool_t);
+#  define divr(r0, r1, r2)             divremr(r0, r1, r2, 1, 1)
+#  define divi(r0, r1, i0)             divremi(r0, r1, i0, 1, 1)
+#  define divr_u(r0, r1, r2)           divremr(r0, r1, r2, 0, 1)
+#  define divi_u(r0, r1, i0)           divremi(r0, r1, i0, 0, 1)
+#  define qdivr(r0, r1, r2, r3)                _iqdivr(_jit, r0, r1, r2, r3, 1)
+#  define qdivr_u(r0, r1, r2, r3)      _iqdivr(_jit, r0, r1, r2, r3, 0)
+#  define iqdivr(r0, r1, r2, r3, sign) _iqdivr(_jit, r0, r1, r2, r3, sign)
+static void _iqdivr(jit_state_t*, jit_int32_t, jit_int32_t,
+                   jit_int32_t,jit_int32_t, jit_bool_t);
+#  define qdivi(r0, r1, r2, i0)                _iqdivi(_jit, r0, r1, r2, i0, 1)
+#  define qdivi_u(r0, r1, r2, i0)      _iqdivi(_jit, r0, r1, r2, i0, 0)
+#  define iqdivi(r0, r1, r2, i0, sign) _iqdivi(_jit, r0, r1, r2, i0, sign)
+static void _iqdivi(jit_state_t*, jit_int32_t, jit_int32_t,
+                   jit_int32_t,jit_word_t, jit_bool_t);
+#  define remr(r0, r1, r2)             divremr(r0, r1, r2, 1, 0)
+#  define remi(r0, r1, i0)             divremi(r0, r1, i0, 1, 0)
+#  define remr_u(r0, r1, r2)           divremr(r0, r1, r2, 0, 0)
+#  define remi_u(r0, r1, i0)           divremi(r0, r1, i0, 0, 0)
+#  define iandr(r0, r1)                        alur(X86_AND, r0, r1)
+#  define andr(r0, r1, r2)             _andr(_jit, r0, r1, r2)
+static void _andr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define iandi(r0, i0)                        alui(X86_AND, r0, i0)
+#  define andi(r0, r1, i0)             _andi(_jit, r0, r1, i0)
+static void _andi(jit_state_t*, jit_int32_t,jit_int32_t,jit_word_t);
+#  define iorr(r0, r1)                 alur(X86_OR, r0, r1)
+#  define orr(r0, r1, r2)              _orr(_jit, r0, r1, r2)
+static void _orr(jit_state_t*, jit_int32_t,jit_int32_t,jit_int32_t);
+#  define iori(r0, i0)                 alui(X86_OR, r0, i0)
+#  define ori(r0, r1, i0)              _ori(_jit, r0, r1, i0)
+static void _ori(jit_state_t*, jit_int32_t,jit_int32_t,jit_word_t);
+#  define ixorr(r0, r1)                        alur(X86_XOR, r0, r1)
+#  define xorr(r0, r1, r2)             _xorr(_jit, r0, r1, r2)
+static void _xorr(jit_state_t*, jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ixori(r0, i0)                        alui(X86_XOR, r0, i0)
+#  define xori(r0, r1, i0)             _xori(_jit, r0, r1, i0)
+static void _xori(jit_state_t*, jit_int32_t,jit_int32_t,jit_word_t);
+#  define irotshr(code, r0)            _irotshr(_jit, code, r0)
+static void _irotshr(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define rotshr(code, r0, r1, r2)     _rotshr(_jit, code, r0, r1, r2)
+static void
+_rotshr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define irotshi(code, r0, i0)                _irotshi(_jit, code, r0, i0)
+static void _irotshi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#  define rotshi(code, r0, r1, i0)     _rotshi(_jit, code, r0, r1, i0)
+static void
+_rotshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_word_t);
+#  define lshr(r0, r1, r2)             rotshr(X86_SHL, r0, r1, r2)
+#  define lshi(r0, r1, i0)             _lshi(_jit, r0, r1, i0)
+static void _lshi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#  define rshr(r0, r1, r2)             rotshr(X86_SAR, r0, r1, r2)
+#  define rshi(r0, r1, i0)             rotshi(X86_SAR, r0, r1, i0)
+#  define rshr_u(r0, r1, r2)           rotshr(X86_SHR, r0, r1, r2)
+#  define rshi_u(r0, r1, i0)           rotshi(X86_SHR, r0, r1, i0)
+#  define unr(code, r0)                        _unr(_jit, code, r0)
+static void _unr(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define inegr(r0)                    unr(X86_NEG, r0)
+#  define negr(r0, r1)                 _negr(_jit, r0, r1)
+static void _negr(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define icomr(r0)                    unr(X86_NOT, r0)
+#  define comr(r0, r1)                 _comr(_jit, r0, r1)
+static void _comr(jit_state_t*, jit_int32_t, jit_int32_t);
+#  if USE_INC_DEC
+#    define incr(r0, r1)               _incr(_jit, r0, r1)
+static void _incr(jit_state_t*, jit_int32_t, jit_int32_t);
+#    define decr(r0, r1)               _decr(_jit, r0, r1)
+static void _decr(jit_state_t*, jit_int32_t, jit_int32_t);
+#  endif
+#  define cr(code, r0, r1, r2)         _cr(_jit, code, r0, r1, r2)
+static void
+_cr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define ci(code, r0, r1, i0)         _ci(_jit, code, r0, r1, i0)
+static void
+_ci(jit_state_t *_jit, jit_int32_t, jit_int32_t, jit_int32_t, jit_word_t);
+#  define ci0(code, r0, r1)            _ci0(_jit, code, r0, r1)
+static void _ci0(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define ltr(r0, r1, r2)              _ltr(_jit, r0, r1, r2)
+static void _ltr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define lti(r0, r1, i0)                      _lti(_jit, r0, r1, i0)
+static void _lti(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#  define ltr_u(r0, r1, r2)            _ltr_u(_jit, r0, r1, r2)
+static void _ltr_u(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define lti_u(r0, r1, i0)            ci(X86_CC_B, r0, r1, i0)
+#  define ler(r0, r1, r2)              _ler(_jit, r0, r1, r2)
+static void _ler(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define lei(r0, r1, i0)              ci(X86_CC_LE, r0, r1, i0)
+#  define ler_u(r0, r1, r2)            _ler_u(_jit, r0, r1, r2)
+static void _ler_u(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define lei_u(r0, r1, i0)            _lei_u(_jit, r0, r1, i0)
+static void _lei_u(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#  define eqr(r0, r1, r2)              _eqr(_jit, r0, r1, r2)
+static void _eqr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define eqi(r0, r1, i0)              _eqi(_jit, r0, r1, i0)
+static void _eqi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#  define ger(r0, r1, r2)              _ger(_jit, r0, r1, r2)
+static void _ger(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define gei(r0, r1, i0)              _gei(_jit, r0, r1, i0)
+static void _gei(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#  define ger_u(r0, r1, r2)            _ger_u(_jit, r0, r1, r2)
+static void _ger_u(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define gei_u(r0, r1, i0)            _gei_u(_jit, r0, r1, i0)
+static void _gei_u(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#  define gtr(r0, r1, r2)              _gtr(_jit, r0, r1, r2)
+static void _gtr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define gti(r0, r1, i0)              _ci(_jit, X86_CC_G, r0, r1, i0)
+#  define gtr_u(r0, r1, r2)            _gtr_u(_jit, r0, r1, r2)
+static void _gtr_u(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define gti_u(r0, r1, i0)            _gti_u(_jit, r0, r1, i0)
+static void _gti_u(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#  define ner(r0, r1, r2)              _ner(_jit, r0, r1, r2)
+static void _ner(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define nei(r0, r1, i0)              _nei(_jit, r0, r1, i0)
+static void _nei(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#  define movr(r0, r1)                 _movr(_jit, r0, r1)
+static void _movr(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define imovi(r0, i0)                        _imovi(_jit, r0, i0)
+static void _imovi(jit_state_t*, jit_int32_t, jit_word_t);
+#  define movi(r0, i0)                 _movi(_jit, r0, i0)
+static void _movi(jit_state_t*, jit_int32_t, jit_word_t);
+#  define movi_p(r0, i0)               _movi_p(_jit, r0, i0)
+static jit_word_t _movi_p(jit_state_t*, jit_int32_t, jit_word_t);
+#  define movcr(r0, r1)                        _movcr(_jit, r0, r1)
+static void _movcr(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define movcr_u(r0, r1)              _movcr_u(_jit, r0, r1)
+static void _movcr_u(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define movsr(r0, r1)                        _movsr(_jit, r0, r1)
+static void _movsr(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define movsr_u(r0, r1)              _movsr_u(_jit, r0, r1)
+static void _movsr_u(jit_state_t*,jit_int32_t,jit_int32_t);
+#  if __X64 && !__X64_32
+#    define movir(r0, r1)              _movir(_jit, r0, r1)
+static void _movir(jit_state_t*,jit_int32_t,jit_int32_t);
+#    define movir_u(r0, r1)            _movir_u(_jit, r0, r1)
+static void _movir_u(jit_state_t*,jit_int32_t,jit_int32_t);
+#  endif
+#  define htonr_us(r0, r1)             _htonr_us(_jit, r0, r1)
+static void _htonr_us(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define htonr_ui(r0, r1)             _htonr_ui(_jit, r0, r1)
+static void _htonr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
+#  if __X64 && !__X64_32
+#define htonr_ul(r0, r1)               _htonr_ul(_jit, r0, r1)
+static void _htonr_ul(jit_state_t*,jit_int32_t,jit_int32_t);
+#endif
+#  define extr_c(r0, r1)               _extr_c(_jit, r0, r1)
+static void _extr_c(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define extr_uc(r0, r1)              _extr_uc(_jit, r0, r1)
+static void _extr_uc(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define extr_s(r0, r1)               movsr(r0, r1)
+#  define extr_us(r0, r1)              movsr_u(r0, r1)
+#  if __X64 && !__X64_32
+#    define extr_i(r0, r1)             movir(r0, r1)
+#    define extr_ui(r0, r1)            movir_u(r0, r1)
+#  endif
+#  define ldr_c(r0, r1)                        _ldr_c(_jit, r0, r1)
+static void _ldr_c(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define ldi_c(r0, i0)                        _ldi_c(_jit, r0, i0)
+static void _ldi_c(jit_state_t*, jit_int32_t, jit_word_t);
+#  define ldr_uc(r0, r1)               _ldr_uc(_jit, r0, r1)
+static void _ldr_uc(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define ldi_uc(r0, i0)               _ldi_uc(_jit, r0, i0)
+static void _ldi_uc(jit_state_t*, jit_int32_t, jit_word_t);
+#  define ldr_s(r0, r1)                        _ldr_s(_jit, r0, r1)
+static void _ldr_s(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define ldi_s(r0, i0)                        _ldi_s(_jit, r0, i0)
+static void _ldi_s(jit_state_t*, jit_int32_t, jit_word_t);
+#  define ldr_us(r0, r1)               _ldr_us(_jit, r0, r1)
+static void _ldr_us(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define ldi_us(r0, i0)               _ldi_us(_jit, r0, i0)
+static void _ldi_us(jit_state_t*, jit_int32_t, jit_word_t);
+#  if __X32 || !__X64_32
+#    define ldr_i(r0, r1)              _ldr_i(_jit, r0, r1)
+static void _ldr_i(jit_state_t*, jit_int32_t, jit_int32_t);
+#    define ldi_i(r0, i0)              _ldi_i(_jit, r0, i0)
+static void _ldi_i(jit_state_t*, jit_int32_t, jit_word_t);
+#  endif
+#  if __X64
+#    if __X64_32
+#      define ldr_i(r0, r1)            _ldr_ui(_jit, r0, r1)
+#      define ldi_i(r0, i0)            _ldi_ui(_jit, r0, i0)
+#    else
+#      define ldr_ui(r0, r1)           _ldr_ui(_jit, r0, r1)
+#      define ldi_ui(r0, i0)           _ldi_ui(_jit, r0, i0)
+#    endif
+static void _ldr_ui(jit_state_t*, jit_int32_t, jit_int32_t);
+static void _ldi_ui(jit_state_t*, jit_int32_t, jit_word_t);
+#    if !__X64_32
+#      define ldr_l(r0, r1)            _ldr_l(_jit, r0, r1)
+static void _ldr_l(jit_state_t*, jit_int32_t, jit_int32_t);
+#      define ldi_l(r0, i0)            _ldi_l(_jit, r0, i0)
+static void _ldi_l(jit_state_t*, jit_int32_t, jit_word_t);
+#    endif
+#  endif
+#  define ldxr_c(r0, r1, r2)           _ldxr_c(_jit, r0, r1, r2)
+static void _ldxr_c(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define ldxi_c(r0, r1, i0)           _ldxi_c(_jit, r0, r1, i0)
+static void _ldxi_c(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#  define ldxr_uc(r0, r1, r2)          _ldxr_uc(_jit, r0, r1, r2)
+static void _ldxr_uc(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define ldxi_uc(r0, r1, i0)          _ldxi_uc(_jit, r0, r1, i0)
+static void _ldxi_uc(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#  define ldxr_s(r0, r1, r2)           _ldxr_s(_jit, r0, r1, r2)
+static void _ldxr_s(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define ldxi_s(r0, r1, i0)           _ldxi_s(_jit, r0, r1, i0)
+static void _ldxi_s(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#  define ldxr_us(r0, r1, r2)          _ldxr_us(_jit, r0, r1, r2)
+static void _ldxr_us(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define ldxi_us(r0, r1, i0)          _ldxi_us(_jit, r0, r1, i0)
+static void _ldxi_us(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#  if __X32 || !__X64_32
+#    define ldxr_i(r0, r1, r2)         _ldxr_i(_jit, r0, r1, r2)
+static void _ldxr_i(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#    define ldxi_i(r0, r1, i0)         _ldxi_i(_jit, r0, r1, i0)
+static void _ldxi_i(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#  endif
+#  if __X64
+#    if __X64_32
+#      define ldxr_i(r0, r1, r2)       _ldxr_ui(_jit, r0, r1, r2)
+#      define ldxi_i(r0, r1, i0)       _ldxi_ui(_jit, r0, r1, i0)
+#    else
+#      define ldxr_ui(r0, r1, r2)      _ldxr_ui(_jit, r0, r1, r2)
+#      define ldxi_ui(r0, r1, i0)      _ldxi_ui(_jit, r0, r1, i0)
+#    endif
+static void _ldxr_ui(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+static void _ldxi_ui(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#    if !__X64_32
+#      define ldxr_l(r0, r1, r2)       _ldxr_l(_jit, r0, r1, r2)
+static void _ldxr_l(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#      define ldxi_l(r0, r1, i0)       _ldxi_l(_jit, r0, r1, i0)
+static void _ldxi_l(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#    endif
+#  endif
+#  define str_c(r0, r1)                        _str_c(_jit, r0, r1)
+static void _str_c(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define sti_c(i0, r0)                        _sti_c(_jit, i0, r0)
+static void _sti_c(jit_state_t*, jit_word_t, jit_int32_t);
+#  define str_s(r0, r1)                        _str_s(_jit, r0, r1)
+static void _str_s(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define sti_s(i0, r0)                        _sti_s(_jit, i0, r0)
+static void _sti_s(jit_state_t*, jit_word_t, jit_int32_t);
+#  define str_i(r0, r1)                        _str_i(_jit, r0, r1)
+static void _str_i(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define sti_i(i0, r0)                        _sti_i(_jit, i0, r0)
+static void _sti_i(jit_state_t*, jit_word_t, jit_int32_t);
+#  if __X64 && !__X64_32
+#    define str_l(r0, r1)              _str_l(_jit, r0, r1)
+static void _str_l(jit_state_t*, jit_int32_t, jit_int32_t);
+#    define sti_l(i0, r0)              _sti_l(_jit, i0, r0)
+static void _sti_l(jit_state_t*, jit_word_t, jit_int32_t);
+#  endif
+#  define stxr_c(r0, r1, r2)           _stxr_c(_jit, r0, r1, r2)
+static void _stxr_c(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define stxi_c(i0, r0, r1)           _stxi_c(_jit, i0, r0, r1)
+static void _stxi_c(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
+#  define stxr_s(r0, r1, r2)           _stxr_s(_jit, r0, r1, r2)
+static void _stxr_s(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define stxi_s(i0, r0, r1)           _stxi_s(_jit, i0, r0, r1)
+static void _stxi_s(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
+#  define stxr_i(r0, r1, r2)           _stxr_i(_jit, r0, r1, r2)
+static void _stxr_i(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define stxi_i(i0, r0, r1)           _stxi_i(_jit, i0, r0, r1)
+static void _stxi_i(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
+#  if __X64 && !__X64_32
+#    define stxr_l(r0, r1, r2)         _stxr_l(_jit, r0, r1, r2)
+static void _stxr_l(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#    define stxi_l(i0, r0, r1)         _stxi_l(_jit, i0, r0, r1)
+static void _stxi_l(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
+#  endif
+#  define jcc(code, i0)                        _jcc(_jit, code, i0)
+#  define jo(i0)                       jcc(X86_CC_O, i0)
+#  define jno(i0)                      jcc(X86_CC_NO, i0)
+#  define jnae(i0)                     jcc(X86_CC_NAE, i0)
+#  define jb(i0)                       jcc(X86_CC_B, i0)
+#  define jc(i0)                       jcc(X86_CC_C, i0)
+#  define jae(i0)                      jcc(X86_CC_AE, i0)
+#  define jnb(i0)                      jcc(X86_CC_NB, i0)
+#  define jnc(i0)                      jcc(X86_CC_NC, i0)
+#  define je(i0)                       jcc(X86_CC_E, i0)
+#  define jz(i0)                       jcc(X86_CC_Z, i0)
+#  define jne(i0)                      jcc(X86_CC_NE, i0)
+#  define jnz(i0)                      jcc(X86_CC_NZ, i0)
+#  define jbe(i0)                      jcc(X86_CC_BE, i0)
+#  define jna(i0)                      jcc(X86_CC_NA, i0)
+#  define ja(i0)                       jcc(X86_CC_A, i0)
+#  define jnbe(i0)                     jcc(X86_CC_NBE, i0)
+#  define js(i0)                       jcc(X86_CC_S, i0)
+#  define jns(i0)                      jcc(X86_CC_NS, i0)
+#  define jp(i0)                       jcc(X86_CC_P, i0)
+#  define jpe(i0)                      jcc(X86_CC_PE, i0)
+#  define jnp(i0)                      jcc(X86_CC_NP, i0)
+#  define jpo(i0)                      jcc(X86_CC_PO, i0)
+#  define jl(i0)                       jcc(X86_CC_L, i0)
+#  define jnge(i0)                     jcc(X86_CC_NGE, i0)
+#  define jge(i0)                      jcc(X86_CC_GE, i0)
+#  define jnl(i0)                      jcc(X86_CC_NL, i0)
+#  define jle(i0)                      jcc(X86_CC_LE, i0)
+#  define jng(i0)                      jcc(X86_CC_NG, i0)
+#  define jg(i0)                       jcc(X86_CC_G, i0)
+#  define jnle(i0)                     jcc(X86_CC_NLE, i0)
+static void _jcc(jit_state_t*, jit_int32_t, jit_word_t);
+#  define jccs(code, i0)               _jccs(_jit, code, i0)
+#  define jos(i0)                      jccs(X86_CC_O, i0)
+#  define jnos(i0)                     jccs(X86_CC_NO, i0)
+#  define jnaes(i0)                    jccs(X86_CC_NAE, i0)
+#  define jbs(i0)                      jccs(X86_CC_B, i0)
+#  define jcs(i0)                      jccs(X86_CC_C, i0)
+#  define jaes(i0)                     jccs(X86_CC_AE, i0)
+#  define jnbs(i0)                     jccs(X86_CC_NB, i0)
+#  define jncs(i0)                     jccs(X86_CC_NC, i0)
+#  define jes(i0)                      jccs(X86_CC_E, i0)
+#  define jzs(i0)                      jccs(X86_CC_Z, i0)
+#  define jnes(i0)                     jccs(X86_CC_NE, i0)
+#  define jnzs(i0)                     jccs(X86_CC_NZ, i0)
+#  define jbes(i0)                     jccs(X86_CC_BE, i0)
+#  define jnas(i0)                     jccs(X86_CC_NA, i0)
+#  define jas(i0)                      jccs(X86_CC_A, i0)
+#  define jnbes(i0)                    jccs(X86_CC_NBE, i0)
+#  define jss(i0)                      jccs(X86_CC_S, i0)
+#  define jnss(i0)                     jccs(X86_CC_NS, i0)
+#  define jps(i0)                      jccs(X86_CC_P, i0)
+#  define jpes(i0)                     jccs(X86_CC_PE, i0)
+#  define jnps(i0)                     jccs(X86_CC_NP, i0)
+#  define jpos(i0)                     jccs(X86_CC_PO, i0)
+#  define jls(i0)                      jccs(X86_CC_L, i0)
+#  define jnges(i0)                    jccs(X86_CC_NGE, i0)
+#  define jges(i0)                     jccs(X86_CC_GE, i0)
+#  define jnls(i0)                     jccs(X86_CC_NL, i0)
+#  define jles(i0)                     jccs(X86_CC_LE, i0)
+#  define jngs(i0)                     jccs(X86_CC_NG, i0)
+#  define jgs(i0)                      jccs(X86_CC_G, i0)
+#  define jnles(i0)                    jccs(X86_CC_NLE, i0)
+static void _jccs(jit_state_t*, jit_int32_t, jit_word_t);
+#  define jcr(code, i0, r0, r1)                _jcr(_jit, code, i0, r0, r1)
+static void _jcr(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_int32_t);
+#  define jci(code, i0, r0, i1)                _jci(_jit, code, i0, r0, i1)
+static void _jci(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_word_t);
+#  define jci0(code, i0, r0)           _jci0(_jit, code, i0, r0)
+static void _jci0(jit_state_t*, jit_int32_t, jit_word_t, jit_int32_t);
+#  define bltr(i0, r0, r1)             _bltr(_jit, i0, r0, r1)
+static jit_word_t _bltr(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
+#  define blti(i0, r0, i1)             _blti(_jit, i0, r0, i1)
+static jit_word_t _blti(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
+#  define bltr_u(i0, r0, r1)           _bltr_u(_jit, i0, r0, r1)
+static jit_word_t _bltr_u(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
+#  define blti_u(i0, r0, i1)           _blti_u(_jit, i0, r0, i1)
+static jit_word_t _blti_u(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
+#  define bler(i0, r0, r1)             _bler(_jit, i0, r0, r1)
+static jit_word_t _bler(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
+#  define blei(i0, r0, i1)             _blei(_jit, i0, r0, i1)
+static jit_word_t _blei(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
+#  define bler_u(i0, r0, r1)           _bler_u(_jit, i0, r0, r1)
+static jit_word_t _bler_u(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
+#  define blei_u(i0, r0, i1)           _blei_u(_jit, i0, r0, i1)
+static jit_word_t _blei_u(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
+#  define beqr(i0, r0, r1)             _beqr(_jit, i0, r0, r1)
+static jit_word_t _beqr(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
+#  define beqi(i0, r0, i1)             _beqi(_jit, i0, r0, i1)
+static jit_word_t _beqi(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
+#  define bger(i0, r0, r1)             _bger(_jit, i0, r0, r1)
+static jit_word_t _bger(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
+#  define bgei(i0, r0, i1)             _bgei(_jit, i0, r0, i1)
+static jit_word_t _bgei(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
+#  define bger_u(i0, r0, r1)           _bger_u(_jit, i0, r0, r1)
+static jit_word_t _bger_u(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
+#  define bgei_u(i0, r0, i1)           _bgei_u(_jit, i0, r0, i1)
+static jit_word_t _bgei_u(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
+#  define bgtr(i0, r0, r1)             _bgtr(_jit, i0, r0, r1)
+static jit_word_t _bgtr(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
+#  define bgti(i0, r0, i1)             _bgti(_jit, i0, r0, i1)
+static jit_word_t _bgti(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
+#  define bgtr_u(i0, r0, r1)           _bgtr_u(_jit, i0, r0, r1)
+static jit_word_t _bgtr_u(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
+#  define bgti_u(i0, r0, i1)           _bgti_u(_jit, i0, r0, i1)
+static jit_word_t _bgti_u(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
+#  define bner(i0, r0, r1)             _bner(_jit, i0, r0, r1)
+static jit_word_t _bner(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
+#  define bnei(i0, r0, i1)             _bnei(_jit, i0, r0, i1)
+static jit_word_t _bnei(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
+#  define bmsr(i0, r0, r1)             _bmsr(_jit, i0, r0, r1)
+static jit_word_t _bmsr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bmsi(i0, r0, i1)             _bmsi(_jit, i0, r0, i1)
+static jit_word_t _bmsi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#  define bmcr(i0, r0, r1)             _bmcr(_jit, i0, r0, r1)
+static jit_word_t _bmcr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bmci(i0, r0, i1)             _bmci(_jit, i0, r0, i1)
+static jit_word_t _bmci(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#  define boaddr(i0, r0, r1)           _boaddr(_jit, i0, r0, r1)
+static jit_word_t _boaddr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define boaddi(i0, r0, i1)           _boaddi(_jit, i0, r0, i1)
+static jit_word_t _boaddi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#  define boaddr_u(i0, r0, r1)         _boaddr_u(_jit, i0, r0, r1)
+static jit_word_t _boaddr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define boaddi_u(i0, r0, i1)         _boaddi_u(_jit, i0, r0, i1)
+static jit_word_t _boaddi_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#  define bxaddr(i0, r0, r1)           _bxaddr(_jit, i0, r0, r1)
+static jit_word_t _bxaddr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bxaddi(i0, r0, i1)           _bxaddi(_jit, i0, r0, i1)
+static jit_word_t _bxaddi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#  define bxaddr_u(i0, r0, r1)         _bxaddr_u(_jit, i0, r0, r1)
+static jit_word_t _bxaddr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bxaddi_u(i0, r0, i1)         _bxaddi_u(_jit, i0, r0, i1)
+static jit_word_t _bxaddi_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#  define bosubr(i0, r0, r1)           _bosubr(_jit, i0, r0, r1)
+static jit_word_t _bosubr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bosubi(i0, r0, i1)           _bosubi(_jit, i0, r0, i1)
+static jit_word_t _bosubi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#  define bosubr_u(i0, r0, r1)         _bosubr_u(_jit, i0, r0, r1)
+static jit_word_t _bosubr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bosubi_u(i0, r0, i1)         _bosubi_u(_jit, i0, r0, i1)
+static jit_word_t _bosubi_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#  define bxsubr(i0, r0, r1)           _bxsubr(_jit, i0, r0, r1)
+static jit_word_t _bxsubr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bxsubi(i0, r0, i1)           _bxsubi(_jit, i0, r0, i1)
+static jit_word_t _bxsubi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#  define bxsubr_u(i0, r0, r1)         _bxsubr_u(_jit, i0, r0, r1)
+static jit_word_t _bxsubr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bxsubi_u(i0, r0, i1)         _bxsubi_u(_jit, i0, r0, i1)
+static jit_word_t _bxsubi_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#  define callr(r0)                    _callr(_jit, r0)
+static void _callr(jit_state_t*, jit_int32_t);
+#  define calli(i0)                    _calli(_jit, i0)
+static jit_word_t _calli(jit_state_t*, jit_word_t);
+#  define jmpr(r0)                     _jmpr(_jit, r0)
+static void _jmpr(jit_state_t*, jit_int32_t);
+#  define jmpi(i0)                     _jmpi(_jit, i0)
+static jit_word_t _jmpi(jit_state_t*, jit_word_t);
+#  define jmpsi(i0)                    _jmpsi(_jit, i0)
+static void _jmpsi(jit_state_t*, jit_uint8_t);
+#  define prolog(node)                 _prolog(_jit, node)
+static void _prolog(jit_state_t*, jit_node_t*);
+#  define epilog(node)                 _epilog(_jit, node)
+static void _epilog(jit_state_t*, jit_node_t*);
+#  define vastart(r0)                  _vastart(_jit, r0)
+static void _vastart(jit_state_t*, jit_int32_t);
+#  define vaarg(r0, r1)                        _vaarg(_jit, r0, r1)
+static void _vaarg(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define vaarg_d(r0, r1, i0)          _vaarg_d(_jit, r0, r1, i0)
+static void _vaarg_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_bool_t);
+#  define patch_at(node, instr, label) _patch_at(_jit, node, instr, label)
+static void _patch_at(jit_state_t*, jit_node_t*, jit_word_t, jit_word_t);
+#  if !defined(HAVE_FFSL)
+#    if __X32
+#      define ffsl(i)                  __builtin_ffs(i)
+#    else
+#      define ffsl(l)                  __builtin_ffsl(l)
+#    endif
+#  endif
+#endif
+
+#if CODE
+static void
+_rex(jit_state_t *_jit, jit_int32_t l, jit_int32_t w,
+     jit_int32_t r, jit_int32_t x, jit_int32_t b)
+{
+#if __X64
+    jit_int32_t        v = 0x40 | (w << 3);
+
+    if (r != _NOREG)
+       v |= (r & 8) >> 1;
+    if (x != _NOREG)
+       v |= (x & 8) >> 2;
+    if (b != _NOREG)
+       v |= (b & 8) >> 3;
+    if (l || v != 0x40)
+       ic(v);
+#endif
+}
+
+static void
+_rx(jit_state_t *_jit, jit_int32_t rd, jit_int32_t md,
+    jit_int32_t rb, jit_int32_t ri, jit_int32_t ms)
+{
+    if (ri == _NOREG) {
+       if (rb == _NOREG) {
+#if __X32
+           mrm(0x00, r7(rd), 0x05);
+#else
+           mrm(0x00, r7(rd), 0x04);
+           sib(_SCL1, 0x04, 0x05);
+#endif
+           ii(md);
+       }
+       else if (r7(rb) == _RSP_REGNO) {
+           if (md == 0) {
+               mrm(0x00, r7(rd), 0x04);
+               sib(ms, 0x04, 0x04);
+           }
+           else if ((jit_int8_t)md == md) {
+               mrm(0x01, r7(rd), 0x04);
+               sib(ms, 0x04, 0x04);
+               ic(md);
+           }
+           else {
+               mrm(0x02, r7(rd), 0x04);
+               sib(ms, 0x04, 0x04);
+               ii(md);
+           }
+       }
+       else {
+           if (md == 0 && r7(rb) != _RBP_REGNO)
+               mrm(0x00, r7(rd), r7(rb));
+           else if ((jit_int8_t)md == md) {
+               mrm(0x01, r7(rd), r7(rb));
+               ic(md);
+           }
+           else {
+               mrm(0x02, r7(rd), r7(rb));
+               ii(md);
+           }
+       }
+    }
+    else if (rb == _NOREG) {
+       mrm(0x00, r7(rd), 0x04);
+       sib(ms, r7(ri), 0x05);
+       ii(md);
+    }
+    else if (r8(ri) != _RSP_REGNO) {
+       if (md == 0 && r7(rb) != _RBP_REGNO) {
+           mrm(0x00, r7(rd), 0x04);
+           sib(ms, r7(ri), r7(rb));
+       }
+       else if ((jit_int8_t)md == md) {
+           mrm(0x01, r7(rd), 0x04);
+           sib(ms, r7(ri), r7(rb));
+           ic(md);
+       }
+       else {
+           mrm(0x02, r7(rd), 0x04);
+           sib(ms, r7(ri), r7(rb));
+           ic(md);
+       }
+    }
+    else {
+       fprintf(stderr, "illegal index register");
+       abort();
+    }
+}
+
+static void
+_nop(jit_state_t *_jit, jit_int32_t count)
+{
+    switch (count) {
+       case 0:
+           break;
+       case 1:         /* NOP */
+           ic(0x90);   break;
+       case 2:         /* 66 NOP */
+           ic(0x66);   ic(0x90);
+           break;
+       case 3:         /* NOP DWORD ptr [EAX] */
+           ic(0x0f);   ic(0x1f);       ic(0x00);
+           break;
+       case 4:         /* NOP DWORD ptr [EAX + 00H] */
+           ic(0x0f);   ic(0x1f);       ic(0x40);       ic(0x00);
+           break;
+       case 5:         /* NOP DWORD ptr [EAX + EAX*1 + 00H] */
+           ic(0x0f);   ic(0x1f);       ic(0x44);       ic(0x00);
+           ic(0x00);
+           break;
+       case 6:         /* 66 NOP DWORD ptr [EAX + EAX*1 + 00H] */
+           ic(0x66);   ic(0x0f);       ic(0x1f);       ic(0x44);
+           ic(0x00);   ic(0x00);
+           break;
+       case 7:         /* NOP DWORD ptr [EAX + 00000000H] */
+           ic(0x0f);   ic(0x1f);       ic(0x80);       ii(0x0000);
+           break;
+       case 8:         /* NOP DWORD ptr [EAX + EAX*1 + 00000000H] */
+           ic(0x0f);   ic(0x1f);       ic(0x84);       ic(0x00);
+           ii(0x0000);
+           break;
+       case 9:         /* 66 NOP DWORD ptr [EAX + EAX*1 + 00000000H] */
+           ic(0x66);   ic(0x0f);       ic(0x1f);       ic(0x84);
+           ic(0x00);   ii(0x0000);
+           break;
+       default:
+           abort();
+    }
+}
+
+static void
+_lea(jit_state_t *_jit, jit_int32_t md, jit_int32_t rb,
+     jit_int32_t ri, jit_int32_t ms, jit_int32_t rd)
+{
+    rex(0, WIDE, rd, ri, rb);
+    ic(0x8d);
+    rx(rd, md, rb, ri, ms);
+}
+
+static void
+_pushr(jit_state_t *_jit, jit_int32_t r0)
+{
+    rex(0, WIDE, 0, 0, r0);
+    ic(0x50 | r7(r0));
+}
+
+static void
+_popr(jit_state_t *_jit, jit_int32_t r0)
+{
+    rex(0, WIDE, 0, 0, r0);
+    ic(0x58 | r7(r0));
+}
+
+static void
+_xchgr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    rex(0, WIDE, r1, _NOREG, r0);
+    ic(0x87);
+    mrm(0x03, r7(r1), r7(r0));
+}
+
+static void
+_testr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    rex(0, WIDE, r1, _NOREG, r0);
+    ic(0x85);
+    mrm(0x03, r7(r1), r7(r0));
+}
+
+static void
+_testi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    rex(0, WIDE, _NOREG, _NOREG, r0);
+    if (r0 == _RAX_REGNO)
+       ic(0xa9);
+    else {
+       ic(0xf7);
+       mrm(0x03, 0x00, r7(r0));
+    }
+    ii(i0);
+}
+
+static void
+_cc(jit_state_t *_jit, jit_int32_t code, jit_int32_t r0)
+{
+    rex(0, 0, _NOREG, _NOREG, r0);
+    ic(0x0f);
+    ic(0x90 | code);
+    mrm(0x03, 0x00, r7(r0));
+}
+
+static void
+_alur(jit_state_t *_jit, jit_int32_t code, jit_int32_t r0, jit_int32_t r1)
+{
+    rex(0, WIDE, r1, _NOREG, r0);
+    ic(code | 0x01);
+    mrm(0x03, r7(r1), r7(r0));
+}
+
+static void
+_alui(jit_state_t *_jit, jit_int32_t code, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_int_p(i0)) {
+       rex(0, WIDE, _NOREG, _NOREG, r0);
+       if ((jit_int8_t)i0 == i0) {
+           ic(0x83);
+           ic(0xc0 | code | r7(r0));
+           ic(i0);
+       }
+       else {
+           if (r0 == _RAX_REGNO)
+               ic(code | 0x05);
+           else {
+               ic(0x81);
+               ic(0xc0 | code | r7(r0));
+           }
+           ii(i0);
+       }
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       alur(code, r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_save(jit_state_t *_jit, jit_int32_t r0)
+{
+    if (!_jitc->function->regoff[r0]) {
+       _jitc->function->regoff[r0] = jit_allocai(sizeof(jit_word_t));
+       _jitc->again = 1;
+    }
+    assert(!jit_regset_tstbit(&_jitc->regsav, r0));
+    jit_regset_setbit(&_jitc->regsav, r0);
+    stxi(_jitc->function->regoff[r0], _RBP_REGNO, r0);
+}
+
+static void
+_load(jit_state_t *_jit, jit_int32_t r0)
+{
+    assert(_jitc->function->regoff[r0]);
+    assert(jit_regset_tstbit(&_jitc->regsav, r0));
+    jit_regset_clrbit(&_jitc->regsav, r0);
+    ldxi(r0, _RBP_REGNO, _jitc->function->regoff[r0]);
+}
+
+static void
+_addr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (r0 == r1)
+       iaddr(r0, r2);
+    else if (r0 == r2)
+       iaddr(r0, r1);
+    else
+       lea(0, r1, r2, _SCL1, r0);
+}
+
+static void
+_addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0 == 0)
+       movr(r0, r1);
+#if USE_INC_DEC
+    else if (i0 == 1)
+       incr(r0, r1);
+    else if (i0 == -1)
+       decr(r0, r1);
+#endif
+    else if (can_sign_extend_int_p(i0)) {
+       if (r0 == r1)
+           iaddi(r0, i0);
+       else
+           lea(i0, r1, _NOREG, _SCL1, r0);
+    }
+    else if (r0 != r1) {
+       movi(r0, i0);
+       iaddr(r0, r1);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       iaddr(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_addcr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (r0 == r2)
+       iaddr(r0, r1);
+    else {
+       movr(r0, r1);
+       iaddr(r0, r2);
+    }
+}
+
+static void
+_addci(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_int_p(i0)) {
+       movr(r0, r1);
+       iaddi(r0, i0);
+    }
+    else if (r0 == r1) {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       iaddr(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else {
+       movi(r0, i0);
+       iaddr(r0, r1);
+    }
+}
+
+static void
+_addxr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (r0 == r2)
+       iaddxr(r0, r1);
+    else {
+       movr(r0, r1);
+       iaddxr(r0, r2);
+    }
+}
+
+static void
+_addxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_int_p(i0)) {
+       movr(r0, r1);
+       iaddxi(r0, i0);
+    }
+    else if (r0 == r1) {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       iaddxr(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else {
+       movi(r0, i0);
+       iaddxr(r0, r1);
+    }
+}
+
+static void
+_subr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (r1 == r2)
+       ixorr(r0, r0);
+    else if (r0 == r2) {
+       isubr(r0, r1);
+       inegr(r0);
+    }
+    else {
+       movr(r0, r1);
+       isubr(r0, r2);
+    }
+}
+
+static void
+_subi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0 == 0)
+       movr(r0, r1);
+#if USE_INC_DEC
+    else if (i0 == 1)
+       decr(r0, r1);
+    else if (i0 == -1)
+       incr(r0, r1);
+#endif
+    else if (can_sign_extend_int_p(i0)) {
+       if (r0 == r1)
+           isubi(r0, i0);
+       else
+           lea(-i0, r1, _NOREG, _SCL1, r0);
+    }
+    else if (r0 != r1) {
+       movi(r0, -i0);
+       iaddr(r0, r1);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       isubr(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_subcr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (r0 == r2 && r0 != r1) {
+       reg = jit_get_reg(jit_class_gpr);
+       movr(rn(reg), r0);
+       movr(r0, r1);
+       isubr(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else {
+       movr(r0, r1);
+       isubr(r0, r2);
+    }
+}
+
+static void
+_subci(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    movr(r0, r1);
+    if (can_sign_extend_int_p(i0))
+       isubi(r0, i0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       isubr(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_subxr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (r0 == r2 && r0 != r1) {
+       reg = jit_get_reg(jit_class_gpr);
+       movr(rn(reg), r0);
+       movr(r0, r1);
+       isubxr(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else {
+       movr(r0, r1);
+       isubxr(r0, r2);
+    }
+}
+
+static void
+_subxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    movr(r0, r1);
+    if (can_sign_extend_int_p(i0))
+       isubxi(r0, i0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       imovi(rn(reg), i0);
+       isubxr(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_rsbi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    subi(r0, r1, i0);
+    negr(r0, r0);
+}
+
+static void
+_imulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    rex(0, WIDE, r0, _NOREG, r1);
+    ic(0x0f);
+    ic(0xaf);
+    mrm(0x03, r7(r0), r7(r1));
+}
+
+static void
+_imuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_int_p(i0)) {
+       rex(0, WIDE, r0, _NOREG, r1);
+       if ((jit_int8_t)i0 == i0) {
+           ic(0x6b);
+           mrm(0x03, r7(r0), r7(r1));
+           ic(i0);
+       }
+       else {
+           ic(0x69);
+           mrm(0x03, r7(r0), r7(r1));
+           ii(i0);
+       }
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       imulr(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_mulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (r0 == r1)
+       imulr(r0, r2);
+    else if (r0 == r2)
+       imulr(r0, r1);
+    else {
+       movr(r0, r1);
+       imulr(r0, r2);
+    }
+}
+
+static void
+_muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    switch (i0) {
+       case 0:
+           ixorr(r0, r0);
+           break;
+       case 1:
+           movr(r0, r1);
+           break;
+       case -1:
+           negr(r0, r1);
+           break;
+       case 2:
+           lea(0, _NOREG, r1, _SCL2, r0);
+           break;
+       case 4:
+           lea(0, _NOREG, r1, _SCL4, r0);
+           break;
+       case 8:
+           lea(0, _NOREG, r1, _SCL8, r0);
+           break;
+       default:
+           if (i0 > 0 && !(i0 & (i0 - 1)))
+               lshi(r0, r1, ffsl(i0) - 1);
+           else if (can_sign_extend_int_p(i0))
+               imuli(r0, r1, i0);
+           else if (r0 != r1) {
+               movi(r0, i0);
+               imulr(r0, r1);
+           }
+           else
+               imuli(r0, r0, i0);
+           break;
+    }
+}
+
+#define savset(rn)                                                     \
+    if (r0 != rn) {                                                    \
+       sav |= 1 << rn;                                                 \
+       if (r1 != rn && r2 != rn)                                       \
+           set |= 1 << rn;                                             \
+    }
+#define isavset(rn)                                                    \
+    if (r0 != rn) {                                                    \
+       sav |= 1 << rn;                                                 \
+       if (r1 != rn)                                                   \
+           set |= 1 << rn;                                             \
+    }
+#define qsavset(rn)                                                    \
+    if (r0 != rn && r1 != rn) {                                                \
+       sav |= 1 << rn;                                                 \
+       if (r2 != rn && r3 != rn)                                       \
+           set |= 1 << rn;                                             \
+    }
+#define allocr(rn, rv)                                                 \
+    if (set & (1 << rn))                                               \
+       (void)jit_get_reg(rv|jit_class_gpr|jit_class_named);            \
+    if (sav & (1 << rn)) {                                             \
+       if ( jit_regset_tstbit(&_jitc->regsav, rv) ||                   \
+           !jit_regset_tstbit(&_jitc->reglive, rv))                    \
+           sav &= ~(1 << rn);                                          \
+       else                                                            \
+           save(rv);                                                   \
+    }
+#define clear(rn, rv)                                                  \
+    if (set & (1 << rn))                                               \
+       jit_unget_reg(rv);                                              \
+    if (sav & (1 << rn))                                               \
+       load(rv);
+static void
+_iqmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+       jit_int32_t r2, jit_int32_t r3, jit_bool_t sign)
+{
+    jit_int32_t                mul;
+    jit_int32_t                sav;
+    jit_int32_t                set;
+
+    sav = set = 0;
+    qsavset(_RDX_REGNO);
+    qsavset(_RAX_REGNO);
+    allocr(_RDX_REGNO, _RDX);
+    allocr(_RAX_REGNO, _RAX);
+
+    if (r3 == _RAX_REGNO)
+       mul = r2;
+    else {
+       mul = r3;
+       movr(_RAX_REGNO, r2);
+    }
+    if (sign)
+       umulr(mul);
+    else
+       umulr_u(mul);
+
+    if (r0 == _RDX_REGNO && r1 == _RAX_REGNO)
+       xchgr(_RAX_REGNO, _RDX_REGNO);
+    else {
+       if (r0 != _RDX_REGNO)
+           movr(r0, _RAX_REGNO);
+       movr(r1, _RDX_REGNO);
+       if (r0 == _RDX_REGNO)
+           movr(r0, _RAX_REGNO);
+    }
+
+    clear(_RDX_REGNO, _RDX);
+    clear(_RAX_REGNO, _RAX);
+}
+
+static void
+_iqmuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+       jit_int32_t r2, jit_word_t i0, jit_bool_t sign)
+{
+    jit_int32_t                reg;
+
+    if (i0 == 0) {
+       ixorr(r0, r0);
+       ixorr(r1, r1);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       if (sign)
+           qmulr(r0, r1, r2, rn(reg));
+       else
+           qmulr_u(r0, r1, r2, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_sign_extend_rdx_rax(jit_state_t *_jit)
+{
+    rex(0, WIDE, 0, 0, 0);
+    ic(0x99);
+}
+
+static void
+_divremr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2,
+        jit_bool_t sign, jit_bool_t divide)
+{
+    jit_int32_t                div;
+    jit_int32_t                reg;
+    jit_int32_t                set;
+    jit_int32_t                sav;
+    jit_int32_t                use;
+
+    sav = set = use = 0;
+    savset(_RDX_REGNO);
+    savset(_RAX_REGNO);
+    allocr(_RDX_REGNO, _RDX);
+    allocr(_RAX_REGNO, _RAX);
+
+    if (r2 == _RAX_REGNO) {
+       if (r0 == _RAX_REGNO || r0 == _RDX_REGNO) {
+           if ((reg = jit_get_reg(jit_class_gpr|jit_class_chk)) == JIT_NOREG)
+               reg = jit_get_reg((r1 == _RCX_REGNO ? _RBX : _RCX) |
+                                 jit_class_gpr|jit_class_named);
+           use = 1;
+           div = rn(reg);
+           movr(div, _RAX_REGNO);
+           if (r1 != _RAX_REGNO)
+               movr(_RAX_REGNO, r1);
+       }
+       else {
+           if (r0 == r1)
+               xchgr(r0, _RAX_REGNO);
+           else {
+               if (r0 != _RAX_REGNO)
+                   movr(r0, _RAX_REGNO);
+               if (r1 != _RAX_REGNO)
+                   movr(_RAX_REGNO, r1);
+           }
+           div = r0;
+       }
+    }
+    else if (r2 == _RDX_REGNO) {
+       if (r0 == _RAX_REGNO || r0 == _RDX_REGNO) {
+           if ((reg = jit_get_reg(jit_class_gpr|jit_class_chk)) == JIT_NOREG)
+               reg = jit_get_reg((r1 == _RCX_REGNO ? _RBX : _RCX) |
+                                 jit_class_gpr|jit_class_named);
+           use = 1;
+           div = rn(reg);
+           movr(div, _RDX_REGNO);
+           if (r1 != _RAX_REGNO)
+               movr(_RAX_REGNO, r1);
+       }
+       else {
+           if (r1 != _RAX_REGNO)
+               movr(_RAX_REGNO, r1);
+           movr(r0, _RDX_REGNO);
+           div = r0;
+       }
+    }
+    else {
+       if (r1 != _RAX_REGNO)
+           movr(_RAX_REGNO, r1);
+       div = r2;
+    }
+
+    if (sign) {
+       sign_extend_rdx_rax();
+       idivr(div);
+    }
+    else {
+       ixorr(_RDX_REGNO, _RDX_REGNO);
+       idivr_u(div);
+    }
+
+    if (use)
+       jit_unget_reg(reg);
+
+    if (divide)
+       movr(r0, _RAX_REGNO);
+    else
+       movr(r0, _RDX_REGNO);
+
+    clear(_RDX_REGNO, _RDX);
+    clear(_RAX_REGNO, _RAX);
+}
+
+static void
+_divremi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0,
+        jit_bool_t sign, jit_bool_t divide)
+{
+    jit_int32_t                reg;
+    jit_int32_t                div;
+    jit_int32_t                sav;
+    jit_int32_t                set;
+    jit_int32_t                use;
+
+    if (divide) {
+       switch (i0) {
+           case 1:
+               movr(r0, r1);
+               return;
+           case -1:
+               if (sign) {
+                   negr(r0, r1);
+                   return;
+               }
+               break;
+           default:
+               if (i0 > 0 && !(i0 & (i0 - 1))) {
+                   movr(r0, r1);
+                   if (sign)
+                       rshi(r0, r0, ffsl(i0) - 1);
+                   else
+                       rshi_u(r0, r0, ffsl(i0) - 1);
+                   return;
+               }
+               break;
+       }
+    }
+    else if (i0 == 1 || (sign && i0 == -1)) {
+       ixorr(r0, r0);
+       return;
+    }
+    else if (!sign && i0 > 0 && !(i0 & (i0 - 1))) {
+       if (can_sign_extend_int_p(i0)) {
+           movr(r0, r1);
+           iandi(r0, i0 - 1);
+       }
+       else if (r0 != r1) {
+           movi(r0, i0 - 1);
+           iandr(r0, r1);
+       }
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0 - 1);
+           iandr(r0, rn(reg));
+           jit_unget_reg(reg);
+       }
+       return;
+    }
+
+    sav = set = use = 0;
+    isavset(_RDX_REGNO);
+    isavset(_RAX_REGNO);
+    allocr(_RDX_REGNO, _RDX);
+    allocr(_RAX_REGNO, _RAX);
+
+    if (r0 == _RAX_REGNO || r0 == _RDX_REGNO || r0 == r1) {
+       if ((reg = jit_get_reg(jit_class_gpr|jit_class_chk)) == JIT_NOREG)
+           reg = jit_get_reg((r1 == _RCX_REGNO ? _RBX : _RCX) |
+                             jit_class_gpr|jit_class_named);
+       use = 1;
+       div = rn(reg);
+    }
+    else
+       div = r0;
+
+    movi(div, i0);
+    movr(_RAX_REGNO, r1);
+
+    if (sign) {
+       sign_extend_rdx_rax();
+       idivr(div);
+    }
+    else {
+       ixorr(_RDX_REGNO, _RDX_REGNO);
+       idivr_u(div);
+    }
+
+    if (use)
+       jit_unget_reg(reg);
+
+    if (divide)
+       movr(r0, _RAX_REGNO);
+    else
+       movr(r0, _RDX_REGNO);
+
+    clear(_RDX_REGNO, _RDX);
+    clear(_RAX_REGNO, _RAX);
+}
+
+static void
+_iqdivr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+       jit_int32_t r2, jit_int32_t r3, jit_bool_t sign)
+{
+    jit_int32_t                div;
+    jit_int32_t                reg;
+    jit_int32_t                sav;
+    jit_int32_t                set;
+    jit_int32_t                use;
+
+    sav = set = use = 0;
+    qsavset(_RDX_REGNO);
+    qsavset(_RAX_REGNO);
+    allocr(_RDX_REGNO, _RDX);
+    allocr(_RAX_REGNO, _RAX);
+    if (r3 == _RAX_REGNO) {
+       if (r0 == _RAX_REGNO || r0 == _RDX_REGNO) {
+           if ((reg = jit_get_reg(jit_class_gpr|jit_class_chk)) == JIT_NOREG)
+               reg = jit_get_reg((r1 == _RCX_REGNO ? _RBX : _RCX) |
+                                 jit_class_gpr|jit_class_named);
+           use = 1;
+           div = rn(reg);
+           movr(div, _RAX_REGNO);
+           if (r2 != _RAX_REGNO)
+               movr(_RAX_REGNO, r2);
+       }
+       else {
+           if (r0 == r2)
+               xchgr(r0, _RAX_REGNO);
+           else {
+               if (r0 != _RAX_REGNO)
+                   movr(r0, _RAX_REGNO);
+               if (r2 != _RAX_REGNO)
+                   movr(_RAX_REGNO, r2);
+           }
+           div = r0;
+       }
+    }
+    else if (r3 == _RDX_REGNO) {
+       if (r0 == _RAX_REGNO || r0 == _RDX_REGNO) {
+           if ((reg = jit_get_reg(jit_class_gpr|jit_class_chk)) == JIT_NOREG)
+               reg = jit_get_reg((r1 == _RCX_REGNO ? _RBX : _RCX) |
+                                 jit_class_gpr|jit_class_named);
+           use = 1;
+           div = rn(reg);
+           movr(div, _RDX_REGNO);
+           if (r2 != _RAX_REGNO)
+               movr(_RAX_REGNO, r2);
+       }
+       else {
+           if (r2 != _RAX_REGNO)
+               movr(_RAX_REGNO, r2);
+           movr(r0, _RDX_REGNO);
+           div = r0;
+       }
+    }
+    else {
+       if (r2 != _RAX_REGNO)
+           movr(_RAX_REGNO, r2);
+       div = r3;
+    }
+    if (sign) {
+       sign_extend_rdx_rax();
+       idivr(div);
+    }
+    else {
+       ixorr(_RDX_REGNO, _RDX_REGNO);
+       idivr_u(div);
+    }
+    if (use)
+       jit_unget_reg(reg);
+
+    if (r0 == _RDX_REGNO && r1 == _RAX_REGNO)
+       xchgr(_RAX_REGNO, _RDX_REGNO);
+    else {
+       if (r0 != _RDX_REGNO)
+           movr(r0, _RAX_REGNO);
+       movr(r1, _RDX_REGNO);
+       if (r0 == _RDX_REGNO)
+           movr(r0, _RAX_REGNO);
+    }
+
+    clear(_RDX_REGNO, _RDX);
+    clear(_RAX_REGNO, _RAX);
+}
+
+static void
+_iqdivi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+       jit_int32_t r2, jit_word_t i0, jit_bool_t sign)
+{
+    jit_int32_t                reg;
+
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    if (sign)
+       qdivr(r0, r1, r2, rn(reg));
+    else
+       qdivr_u(r0, r1, r2, rn(reg));
+    jit_unget_reg(reg);
+}
+#undef clear
+#undef allocr
+#undef savset
+
+static void
+_andr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (r1 == r2)
+       movr(r0, r1);
+    else if (r0 == r1)
+       iandr(r0, r2);
+    else if (r0 == r2)
+       iandr(r0, r1);
+    else {
+       movr(r0, r1);
+       iandr(r0, r2);
+    }
+}
+
+static void
+_andi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+
+    if (i0 == 0)
+       ixorr(r0, r0);
+    else if (i0 == -1)
+       movr(r0, r1);
+    else if (r0 == r1) {
+       if (can_sign_extend_int_p(i0))
+           iandi(r0, i0);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           iandr(r0, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+    else {
+       movi(r0, i0);
+       iandr(r0, r1);
+    }
+}
+
+static void
+_orr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (r1 == r2)
+       movr(r0, r1);
+    else if (r0 == r1)
+       iorr(r0, r2);
+    else if (r0 == r2)
+       iorr(r0, r1);
+    else {
+       movr(r0, r1);
+       iorr(r0, r2);
+    }
+}
+
+static void
+_ori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0 == 0)
+       movr(r0, r1);
+    else if (i0 == -1)
+       movi(r0, -1);
+    else if (can_sign_extend_int_p(i0)) {
+       movr(r0, r1);
+       iori(r0, i0);
+    }
+    else if (r0 != r1) {
+       movi(r0, i0);
+       iorr(r0, r1);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       iorr(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_xorr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (r1 == r2)
+       ixorr(r0, r0);
+    else if (r0 == r1)
+       ixorr(r0, r2);
+    else if (r0 == r2)
+       ixorr(r0, r1);
+    else {
+       movr(r0, r1);
+       ixorr(r0, r2);
+    }
+}
+
+static void
+_xori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0 == 0)
+       movr(r0, r1);
+    else if (i0 == -1)
+       comr(r0, r1);
+    else if (can_sign_extend_int_p(i0)) {
+       movr(r0, r1);
+       ixori(r0, i0);
+    }
+    else if (r0 != r1) {
+       movi(r0, i0);
+       ixorr(r0, r1);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ixorr(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_irotshr(jit_state_t *_jit, jit_int32_t code, jit_int32_t r0)
+{
+    rex(0, WIDE, _RCX_REGNO, _NOREG, r0);
+    ic(0xd3);
+    mrm(0x03, code, r7(r0));
+}
+
+static void
+_rotshr(jit_state_t *_jit, jit_int32_t code,
+       jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    jit_int32_t                use;
+
+    if (r0 == _RCX_REGNO) {
+       reg = jit_get_reg(jit_class_gpr);
+       movr(rn(reg), r1);
+       if (r2 != _RCX_REGNO)
+           movr(_RCX_REGNO, r2);
+       irotshr(code, rn(reg));
+       movr(_RCX_REGNO, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else if (r2 != _RCX_REGNO) {
+       use = !jit_reg_free_p(_RCX);
+       if (use) {
+           reg = jit_get_reg(jit_class_gpr);
+           movr(rn(reg), _RCX_REGNO);
+       }
+       else
+           reg = 0;
+       if (r1 == _RCX_REGNO) {
+           if (r0 == r2)
+               xchgr(r0, _RCX_REGNO);
+           else {
+               movr(r0, r1);
+               movr(_RCX_REGNO, r2);
+           }
+       }
+       else {
+           movr(_RCX_REGNO, r2);
+           movr(r0, r1);
+       }
+       irotshr(code, r0);
+       if (use) {
+           movr(_RCX_REGNO, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+    else {
+       movr(r0, r1);
+       irotshr(code, r0);
+    }
+}
+
+static void
+_irotshi(jit_state_t *_jit, jit_int32_t code, jit_int32_t r0, jit_word_t i0)
+{
+    rex(0, WIDE, _NOREG, _NOREG, r0);
+    if (i0 == 1) {
+       ic(0xd1);
+       mrm(0x03, code, r7(r0));
+    }
+    else {
+       ic(0xc1);
+       mrm(0x03, code, r7(r0));
+       ic(i0);
+    }
+}
+
+static void
+_rotshi(jit_state_t *_jit, jit_int32_t code,
+       jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    movr(r0, r1);
+    if (i0)
+       irotshi(code, r0, i0);
+}
+
+static void
+_lshi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    if (i0 == 0)
+       movr(r0, r1);
+    else if (i0 <= 3)
+       lea(0, _NOREG, r1, i0 == 1 ? _SCL2 : i0 == 2 ? _SCL4 : _SCL8, r0);
+    else
+       rotshi(X86_SHL, r0, r1, i0);
+}
+
+static void
+_unr(jit_state_t *_jit, jit_int32_t code, jit_int32_t r0)
+{
+    rex(0, WIDE, _NOREG, _NOREG, r0);
+    ic(0xf7);
+    mrm(0x03, code, r7(r0));
+}
+
+static void
+_negr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (r0 == r1)
+       inegr(r0);
+    else {
+       ixorr(r0, r0);
+       isubr(r0, r1);
+    }
+}
+
+static void
+_comr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    movr(r0, r1);
+    icomr(r0);
+}
+
+#if USE_INC_DEC
+static void
+_incr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    movr(r0, r1);
+#  if __X64
+    rex(0, WIDE, _NOREG, _NOREG, r0);
+    ic(0xff);
+    ic(0xc0 | r7(r0));
+#  else
+    ic(0x40 | r7(r0));
+#  endif
+}
+
+static void
+_decr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    movr(r0, r1);
+#  if __X64
+    rex(0, WIDE, _NOREG, _NOREG, r0);
+    ic(0xff);
+    ic(0xc8 | r7(r0));
+#  else
+    ic(0x48 | r7(r0));
+#  endif
+}
+#endif
+
+static void
+_cr(jit_state_t *_jit,
+    jit_int32_t code, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    jit_bool_t         same;
+    if (reg8_p(r0)) {
+       same = r0 == r1 || r0 == r2;
+       if (!same)
+           ixorr(r0, r0);
+       icmpr(r1, r2);
+       if (same)
+           imovi(r0, 0);
+       cc(code, r0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr|jit_class_rg8);
+       ixorr(rn(reg), rn(reg));
+       icmpr(r1, r2);
+       cc(code, rn(reg));
+       movr(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ci(jit_state_t *_jit,
+    jit_int32_t code, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    jit_bool_t         same;
+    if (reg8_p(r0)) {
+       same = r0 == r1;
+       if (!same)
+           ixorr(r0, r0);
+       icmpi(r1, i0);
+       if (same)
+           imovi(r0, 0);
+       cc(code, r0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr|jit_class_rg8);
+       ixorr(rn(reg), rn(reg));
+       icmpi(r1, i0);
+       cc(code, rn(reg));
+       movr(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ci0(jit_state_t *_jit, jit_int32_t code, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    jit_bool_t         same;
+    if (reg8_p(r0)) {
+       same = r0 == r1;
+       if (!same)
+           ixorr(r0, r0);
+       testr(r1, r1);
+       if (same)
+           imovi(r0, 0);
+       cc(code, r0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr|jit_class_rg8);
+       ixorr(rn(reg), rn(reg));
+       testr(r1, r1);
+       cc(code, rn(reg));
+       movr(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ltr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (r1 == r2)
+       movi(r0, 0);
+    else
+       cr(X86_CC_L, r0, r1, r2);
+}
+
+static void
+_lti(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    if (i0)
+       ci(X86_CC_L, r0, r1, i0);
+    else
+       ci0(X86_CC_S, r0, r1);
+}
+
+static void
+_ltr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (r1 == r2)
+       movi(r0, 0);
+    else
+       cr(X86_CC_B, r0, r1, r2);
+}
+
+static void
+_ler(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (r1 == r2)
+       movi(r0, 1);
+    else
+       cr(X86_CC_LE, r0, r1, r2);
+}
+
+static void
+_ler_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (r1 == r2)
+       movi(r0, 1);
+    else
+       cr(X86_CC_BE, r0, r1, r2);
+}
+
+static void
+_lei_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    if (i0)
+       ci(X86_CC_BE, r0, r1, i0);
+    else
+       ci0(X86_CC_E, r0, r1);
+}
+
+static void
+_eqr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (r1 == r2)
+       movi(r0, 1);
+    else
+       cr(X86_CC_E, r0, r1, r2);
+}
+
+static void
+_eqi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    if (i0)
+       ci(X86_CC_E, r0, r1, i0);
+    else
+       ci0(X86_CC_E, r0, r1);
+}
+
+static void
+_ger(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (r1 == r2)
+       movi(r0, 1);
+    else
+       cr(X86_CC_GE, r0, r1, r2);
+}
+
+static void
+_gei(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    if (i0)
+       ci(X86_CC_GE, r0, r1, i0);
+    else
+       ci0(X86_CC_NS, r0, r1);
+}
+
+static void
+_ger_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (r1 == r2)
+       movi(r0, 1);
+    else
+       cr(X86_CC_AE, r0, r1, r2);
+}
+
+static void
+_gei_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    if (i0)
+       ci(X86_CC_AE, r0, r1, i0);
+    else
+       ci0(X86_CC_NB, r0, r1);
+}
+
+static void
+_gtr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (r1 == r2)
+       movi(r0, 0);
+    else
+       cr(X86_CC_G, r0, r1, r2);
+}
+
+static void
+_gtr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (r1 == r2)
+       movi(r0, 0);
+    else
+       cr(X86_CC_A, r0, r1, r2);
+}
+
+static void
+_gti_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    if (i0)
+       ci(X86_CC_A, r0, r1, i0);
+    else
+       ci0(X86_CC_NE, r0, r1);
+}
+
+static void
+_ner(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (r1 == r2)
+       movi(r0, 0);
+    else
+       cr(X86_CC_NE, r0, r1, r2);
+}
+
+static void
+_nei(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    if (i0)
+       ci(X86_CC_NE, r0, r1, i0);
+    else
+       ci0(X86_CC_NE, r0, r1);
+}
+
+static void
+_movr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (r0 != r1) {
+       rex(0, 1, r1, _NOREG, r0);
+       ic(0x89);
+       ic(0xc0 | (r1 << 3) | r7(r0));
+    }
+}
+
+static void
+_imovi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+#if __X64
+#  if !__X64_32
+    if (fits_uint32_p(i0)) {
+#  endif
+       rex(0, 0, _NOREG, _NOREG, r0);
+       ic(0xb8 | r7(r0));
+       ii(i0);
+#  if !__X64_32
+    }
+    else {
+       rex(0, 1, _NOREG, _NOREG, r0);
+       ic(0xb8 | r7(r0));
+       il(i0);
+    }
+#  endif
+#else
+    ic(0xb8 | r7(r0));
+    ii(i0);
+#endif
+}
+
+static void
+_movi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    if (i0)
+       imovi(r0, i0);
+    else
+       ixorr(r0, r0);
+}
+
+static jit_word_t
+_movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    rex(0, WIDE, _NOREG, _NOREG, r0);
+    ic(0xb8 | r7(r0));
+    il(i0);
+    return (_jit->pc.w);
+}
+
+static void
+_movcr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    rex(0, WIDE, r0, _NOREG, r1);
+    ic(0x0f);
+    ic(0xbe);
+    mrm(0x03, r7(r0), r7(r1));
+}
+
+static void
+_movcr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    rex(0, WIDE, r0, _NOREG, r1);
+    ic(0x0f);
+    ic(0xb6);
+    mrm(0x03, r7(r0), r7(r1));
+}
+
+static void
+_movsr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    rex(0, WIDE, r0, _NOREG, r1);
+    ic(0x0f);
+    ic(0xbf);
+    mrm(0x03, r7(r0), r7(r1));
+}
+
+static void
+_movsr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    rex(0, WIDE, r0, _NOREG, r1);
+    ic(0x0f);
+    ic(0xb7);
+    mrm(0x03, r7(r0), r7(r1));
+}
+
+#if __X64
+static void
+_movir(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    rex(0, 1, r0, _NOREG, r1);
+    ic(0x63);
+    mrm(0x03, r7(r0), r7(r1));
+}
+
+static void
+_movir_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    rex(0, 0, r1, _NOREG, r0);
+    ic(0x89);
+    ic(0xc0 | (r1 << 3) | r7(r0));
+}
+#endif
+
+static void
+_htonr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    extr_us(r0, r1);
+    ic(0x66);
+    rex(0, 0, _NOREG, _NOREG, r0);
+    ic(0xc1);
+    mrm(0x03, X86_ROR, r7(r0));
+    ic(8);
+}
+
+static void
+_htonr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    movr(r0, r1);
+    rex(0, 0, _NOREG, _NOREG, r0);
+    ic(0x0f);
+    ic(0xc8 | r7(r0));
+}
+
+#if __X64 && !__X64_32
+static void
+_htonr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    movr(r0, r1);
+    rex(0, 1, _NOREG, _NOREG, r0);
+    ic(0x0f);
+    ic(0xc8 | r7(r0));
+}
+#endif
+
+static void
+_extr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (reg8_p(r1))
+       movcr(r0, r1);
+    else {
+       reg = jit_get_reg(jit_class_gpr|jit_class_rg8);
+       movr(rn(reg), r1);
+       movcr(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_extr_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (reg8_p(r1))
+       movcr_u(r0, r1);
+    else {
+       reg = jit_get_reg(jit_class_gpr|jit_class_rg8);
+       movr(rn(reg), r1);
+       movcr_u(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    rex(0, WIDE, r0, _NOREG, r1);
+    ic(0x0f);
+    ic(0xbe);
+    rx(r0, 0, r1, _NOREG, _SCL1);
+}
+
+static void
+_ldi_c(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_int_p(i0)) {
+       rex(0, WIDE, r0, _NOREG, _NOREG);
+       ic(0x0f);
+       ic(0xbe);
+       rx(r0, i0, _NOREG, _NOREG, _SCL1);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldr_c(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldr_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    rex(0, WIDE, r0, _NOREG, r1);
+    ic(0x0f);
+    ic(0xb6);
+    rx(r0, 0, r1, _NOREG, _SCL1);
+}
+
+static void
+_ldi_uc(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_int_p(i0)) {
+       rex(0, WIDE, r0, _NOREG, _NOREG);
+       ic(0x0f);
+       ic(0xb6);
+       rx(r0, i0, _NOREG, _NOREG, _SCL1);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldr_uc(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    rex(0, WIDE, r0, _NOREG, r1);
+    ic(0x0f);
+    ic(0xbf);
+    rx(r0, 0, r1, _NOREG, _SCL1);
+}
+
+static void
+_ldi_s(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_int_p(i0)) {
+       rex(0, WIDE, r0, _NOREG, _NOREG);
+       ic(0x0f);
+       ic(0xbf);
+       rx(r0, i0, _NOREG, _NOREG, _SCL1);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldr_s(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    rex(0, WIDE, r0, _NOREG, r1);
+    ic(0x0f);
+    ic(0xb7);
+    rx(r0, 0, r1, _NOREG, _SCL1);
+}
+
+static void
+_ldi_us(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_int_p(i0)) {
+       rex(0, WIDE, r0, _NOREG, _NOREG);
+       ic(0x0f);
+       ic(0xb7);
+       rx(r0, i0, _NOREG, _NOREG, _SCL1);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldr_us(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+#if __X32 || !__X64_32
+static void
+_ldr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+#if __X64
+    rex(0, WIDE, r0, _NOREG, r1);
+    ic(0x63);
+#else
+    ic(0x8b);
+#endif
+    rx(r0, 0, r1, _NOREG, _SCL1);
+}
+
+static void
+_ldi_i(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_int_p(i0)) {
+#if __X64
+       rex(0, WIDE, r0, _NOREG, _NOREG);
+       ic(0x63);
+#else
+       ic(0x8b);
+#endif
+       rx(r0, i0, _NOREG, _NOREG, _SCL1);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldr_i(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+#endif
+
+#if __X64
+static void
+_ldr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    rex(0, 0, r0, _NOREG, r1);
+    ic(0x63);
+    rx(r0, 0, r1, _NOREG, _SCL1);
+}
+
+static void
+_ldi_ui(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_int_p(i0)) {
+       rex(0, 0, r0, _NOREG, _NOREG);
+       ic(0x63);
+       rx(r0, i0, _NOREG, _NOREG, _SCL1);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldr_ui(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+#  if !__X64_32
+static void
+_ldr_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    rex(0, 1, r0, _NOREG, r1);
+    ic(0x8b);
+    rx(r0, 0, r1, _NOREG, _SCL1);
+}
+
+static void
+_ldi_l(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_int_p(i0)) {
+       rex(0, 1, r0, _NOREG, _NOREG);
+       ic(0x8b);
+       rx(r0, i0, _NOREG, _NOREG, _SCL1);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldr_l(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+#  endif
+#endif
+
+static void
+_ldxr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+#if __X64_32
+    addr(r0, r1, r2);
+    ldr_c(r0, r0);
+#else
+    rex(0, WIDE, r0, r1, r2);
+    ic(0x0f);
+    ic(0xbe);
+    rx(r0, 0, r2, r1, _SCL1);
+#endif
+}
+
+static void
+_ldxi_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_int_p(i0)) {
+       rex(0, WIDE, r0, _NOREG, r1);
+       ic(0x0f);
+       ic(0xbe);
+       rx(r0, i0, r1, _NOREG, _SCL1);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldxr_c(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldxr_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+#if __X64_32
+    addr(r0, r1, r2);
+    ldr_uc(r0, r0);
+#else
+    rex(0, WIDE, r0, r1, r2);
+    ic(0x0f);
+    ic(0xb6);
+    rx(r0, 0, r2, r1, _SCL1);
+#endif
+}
+
+static void
+_ldxi_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_int_p(i0)) {
+       rex(0, WIDE, r0, _NOREG, r1);
+       ic(0x0f);
+       ic(0xb6);
+       rx(r0, i0, r1, _NOREG, _SCL1);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldxr_uc(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldxr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+#if __X64_32
+    addr(r0, r1, r2);
+    ldr_s(r0, r0);
+#else
+    rex(0, WIDE, r0, r1, r2);
+    ic(0x0f);
+    ic(0xbf);
+    rx(r0, 0, r2, r1, _SCL1);
+#endif
+}
+
+static void
+_ldxi_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_int_p(i0)) {
+       rex(0, WIDE, r0, _NOREG, r1);
+       ic(0x0f);
+       ic(0xbf);
+       rx(r0, i0, r1, _NOREG, _SCL1);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldxr_s(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldxr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+#if __X64_32
+    addr(r0, r1, r2);
+    ldr_us(r0, r0);
+#else
+    rex(0, WIDE, r0, r1, r2);
+    ic(0x0f);
+    ic(0xb7);
+    rx(r0, 0, r2, r1, _SCL1);
+#endif
+}
+
+static void
+_ldxi_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_int_p(i0)) {
+       rex(0, WIDE, r0, _NOREG, r1);
+       ic(0x0f);
+       ic(0xb7);
+       rx(r0, i0, r1, _NOREG, _SCL1);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldxr_us(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+#if __X64 || !__X64_32
+static void
+_ldxr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+#if __X64
+    rex(0, WIDE, r0, r1, r2);
+    ic(0x63);
+#else
+    ic(0x8b);
+#endif
+    rx(r0, 0, r2, r1, _SCL1);
+}
+
+static void
+_ldxi_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_int_p(i0)) {
+#if __X64
+       rex(0, WIDE, r0, _NOREG, r1);
+       ic(0x63);
+#else
+       ic(0x8b);
+#endif
+       rx(r0, i0, r1, _NOREG, _SCL1);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldxr_i(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+#endif
+
+#if __X64
+static void
+_ldxr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+#if __X64_32
+    addr(r0, r1, r2);
+    /* to avoid confusion with macro renames */
+    _ldr_ui(_jit, r0, r0);
+#else
+    rex(0, 0, r0, r1, r2);
+    ic(0x8b);
+    rx(r0, 0, r2, r1, _SCL1);
+#endif
+}
+
+static void
+_ldxi_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_int_p(i0)) {
+       rex(0, 0, r0, _NOREG, r1);
+       ic(0x8b);
+       rx(r0, i0, r1, _NOREG, _SCL1);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldxr_ui(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+#  if !__X64_32
+static void
+_ldxr_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    rex(0, 1, r0, r1, r2);
+    ic(0x8b);
+    rx(r0, 0, r2, r1, _SCL1);
+}
+
+static void
+_ldxi_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_int_p(i0)) {
+       rex(0, 1, r0, _NOREG, r1);
+       ic(0x8b);
+       rx(r0, i0, r1, _NOREG, _SCL1);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldxr_l(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+#  endif
+#endif
+
+static void
+_str_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (reg8_p(r1)) {
+       rex(0, 0, r1, _NOREG, r0);
+       ic(0x88);
+       rx(r1, 0, r0, _NOREG, _SCL1);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr|jit_class_rg8);
+       movr(rn(reg), r1);
+       rex(0, 0, rn(reg), _NOREG, r0);
+       ic(0x88);
+       rx(rn(reg), 0, r0, _NOREG, _SCL1);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_sti_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_int_p(i0)) {
+       if (reg8_p(r0)) {
+           rex(0, 0, r0, _NOREG, _NOREG);
+           ic(0x88);
+           rx(r0, i0, _NOREG, _NOREG, _SCL1);
+       }
+       else {
+           reg = jit_get_reg(jit_class_gpr|jit_class_rg8);
+           movr(rn(reg), r0);
+           rex(0, 0, rn(reg), _NOREG, _NOREG);
+           ic(0x88);
+           rx(rn(reg), i0, _NOREG, _NOREG, _SCL1);
+           jit_unget_reg(reg);
+       }
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       str_c(rn(reg), r0);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_str_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    ic(0x66);
+    rex(0, 0, r1, _NOREG, r0);
+    ic(0x89);
+    rx(r1, 0, r0, _NOREG, _SCL1);
+}
+
+static void
+_sti_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_int_p(i0)) {
+       ic(0x66);
+       rex(0, 0, r0, _NOREG, _NOREG);
+       ic(0x89);
+       rx(r0, i0, _NOREG, _NOREG, _SCL1);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       str_s(rn(reg), r0);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_str_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    rex(0, 0, r1, _NOREG, r0);
+    ic(0x89);
+    rx(r1, 0, r0, _NOREG, _SCL1);
+}
+
+static void
+_sti_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_int_p(i0)) {
+       rex(0, 0, r0, _NOREG, _NOREG);
+       ic(0x89);
+       rx(r0, i0, _NOREG, _NOREG, _SCL1);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       str_i(rn(reg), r0);
+       jit_unget_reg(reg);
+    }
+}
+
+#if __X64 && !__X64_32
+static void
+_str_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    rex(0, 1, r1, _NOREG, r0);
+    ic(0x89);
+    rx(r1, 0, r0, _NOREG, _SCL1);
+}
+
+static void
+_sti_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_int_p(i0)) {
+       rex(0, 1, r0, _NOREG, _NOREG);
+       ic(0x89);
+       rx(r0, i0, _NOREG, _NOREG, _SCL1);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       str_l(rn(reg), r0);
+       jit_unget_reg(reg);
+    }
+}
+#endif
+
+static void
+_stxr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+#if __X64_32
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r0, r1);
+    str_c(rn(reg), r2);
+    jit_unget_reg(reg);
+#else
+    if (reg8_p(r2)) {
+       rex(0, 0, r2, r1, r0);
+       ic(0x88);
+       rx(r2, 0, r0, r1, _SCL1);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr|jit_class_rg8);
+       movr(rn(reg), r2);
+       rex(0, 0, rn(reg), r1, r0);
+       ic(0x88);
+       rx(rn(reg), 0, r0, r1, _SCL1);
+       jit_unget_reg(reg);
+    }
+#endif
+}
+
+static void
+_stxi_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_int_p(i0)) {
+       if (reg8_p(r1)) {
+           rex(0, 0, r1, _NOREG, r0);
+           ic(0x88);
+           rx(r1, i0, r0, _NOREG, _SCL1);
+       }
+       else {
+           reg = jit_get_reg(jit_class_gpr|jit_class_rg8);
+           movr(rn(reg), r1);
+           rex(0, 0, rn(reg), _NOREG, r0);
+           ic(0x88);
+           rx(rn(reg), i0, r0, _NOREG, _SCL1);
+           jit_unget_reg(reg);
+       }
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       stxr_c(rn(reg), r0, r1);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_stxr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+#if __X64_32
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r0, r1);
+    str_s(rn(reg), r2);
+    jit_unget_reg(reg);
+#else
+    ic(0x66);
+    rex(0, 0, r2, r1, r0);
+    ic(0x89);
+    rx(r2, 0, r0, r1, _SCL1);
+#endif
+}
+
+static void
+_stxi_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_int_p(i0)) {
+       ic(0x66);
+       rex(0, 0, r1, _NOREG, r0);
+       ic(0x89);
+       rx(r1, i0, r0, _NOREG, _SCL1);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       stxr_s(rn(reg), r0, r1);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_stxr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+#if __X64_32
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r0, r1);
+    str_i(rn(reg), r2);
+    jit_unget_reg(reg);
+#else
+    rex(0, 0, r2, r1, r0);
+    ic(0x89);
+    rx(r2, 0, r0, r1, _SCL1);
+#endif
+}
+
+static void
+_stxi_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_int_p(i0)) {
+       rex(0, 0, r1, _NOREG, r0);
+       ic(0x89);
+       rx(r1, i0, r0, _NOREG, _SCL1);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       stxr_i(rn(reg), r0, r1);
+       jit_unget_reg(reg);
+    }
+}
+
+#if __X64 && !__X64_32
+static void
+_stxr_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    rex(0, 1, r2, r1, r0);
+    ic(0x89);
+    rx(r2, 0, r0, r1, _SCL1);
+}
+
+static void
+_stxi_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_int_p(i0)) {
+       rex(0, 1, r1, _NOREG, r0);
+       ic(0x89);
+       rx(r1, i0, r0, _NOREG, _SCL1);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       stxr_l(rn(reg), r0, r1);
+       jit_unget_reg(reg);
+    }
+}
+#endif
+
+static void
+_jccs(jit_state_t *_jit, jit_int32_t code, jit_word_t i0)
+{
+    jit_word_t         w;
+    ic(0x70 | code);
+    w = i0 - (_jit->pc.w + 1);
+    ic(w);
+}
+
+static void
+_jcc(jit_state_t *_jit, jit_int32_t code, jit_word_t i0)
+{
+    jit_word_t         w;
+    ic(0x0f);
+    ic(0x80 | code);
+    w = i0 - (_jit->pc.w + 4);
+    ii(w);
+}
+
+static void
+_jcr(jit_state_t *_jit,
+     jit_int32_t code, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    alur(X86_CMP, r0, r1);
+    jcc(code, i0);
+}
+
+static void
+_jci(jit_state_t *_jit,
+     jit_int32_t code, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    alui(X86_CMP, r0, i1);
+    jcc(code, i0);
+}
+
+static void
+_jci0(jit_state_t *_jit, jit_int32_t code, jit_word_t i0, jit_int32_t r0)
+{
+    testr(r0, r0);
+    jcc(code, i0);
+}
+
+static jit_word_t
+_bltr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jcr(X86_CC_L, i0, r0, r1);
+    return (_jit->pc.w);
+}
+
+static jit_word_t
+_blti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    if (i1)            jci (X86_CC_L, i0, r0, i1);
+    else               jci0(X86_CC_S, i0, r0);
+    return (_jit->pc.w);
+}
+
+static jit_word_t
+_bltr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jcr(X86_CC_B, i0, r0, r1);
+    return (_jit->pc.w);
+}
+
+static jit_word_t
+_blti_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    if (i1)            jci (X86_CC_B, i0, r0, i1);
+    else               jci0(X86_CC_B, i0, r0);
+    return (_jit->pc.w);
+}
+
+static jit_word_t
+_bler(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    if (r0 == r1)      jmpi(i0);
+    else               jcr (X86_CC_LE, i0, r0, r1);
+    return (_jit->pc.w);
+}
+
+static jit_word_t
+_blei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    if (i1)            jci (X86_CC_LE, i0, r0, i1);
+    else               jci0(X86_CC_LE, i0, r0);
+    return (_jit->pc.w);
+}
+
+static jit_word_t
+_bler_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    if (r0 == r1)      jmpi(i0);
+    else               jcr (X86_CC_BE, i0, r0, r1);
+    return (_jit->pc.w);
+}
+
+static jit_word_t
+_blei_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    if (i1)            jci (X86_CC_BE, i0, r0, i1);
+    else               jci0(X86_CC_BE, i0, r0);
+    return (_jit->pc.w);
+}
+
+static jit_word_t
+_beqr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    if (r0 == r1)      jmpi(i0);
+    else               jcr (X86_CC_E, i0, r0, r1);
+    return (_jit->pc.w);
+}
+
+static jit_word_t
+_beqi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    if (i1)            jci (X86_CC_E, i0, r0, i1);
+    else               jci0(X86_CC_E, i0, r0);
+    return (_jit->pc.w);
+}
+
+static jit_word_t
+_bger(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    if (r0 == r1)      jmpi(i0);
+    else               jcr (X86_CC_GE, i0, r0, r1);
+    return (_jit->pc.w);
+}
+
+static jit_word_t
+_bgei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    if (i1)            jci (X86_CC_GE, i0, r0, i1);
+    else               jci0(X86_CC_NS, i0, r0);
+    return (_jit->pc.w);
+}
+
+static jit_word_t
+_bger_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    if (r0 == r1)      jmpi(i0);
+    else               jcr (X86_CC_AE, i0, r0, r1);
+    return (_jit->pc.w);
+}
+
+static jit_word_t
+_bgei_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    if (i1)            jci (X86_CC_AE, i0, r0, i1);
+    else               jmpi(i0);
+    return (_jit->pc.w);
+}
+
+static jit_word_t
+_bgtr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jcr(X86_CC_G, i0, r0, r1);
+    return (_jit->pc.w);
+}
+
+static jit_word_t
+_bgti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jci(X86_CC_G, i0, r0, i1);
+    return (_jit->pc.w);
+}
+
+static jit_word_t
+_bgtr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jcr(X86_CC_A, i0, r0, r1);
+    return (_jit->pc.w);
+}
+
+static jit_word_t
+_bgti_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    if (i1)            jci (X86_CC_A, i0, r0, i1);
+    else               jci0(X86_CC_NE, i0, r0);
+    return (_jit->pc.w);
+}
+
+static jit_word_t
+_bner(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jcr(X86_CC_NE, i0, r0, r1);
+    return (_jit->pc.w);
+}
+
+static jit_word_t
+_bnei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    if (i1)            jci (X86_CC_NE, i0, r0, i1);
+    else               jci0(X86_CC_NE, i0, r0);
+    return (_jit->pc.w);
+}
+
+static jit_word_t
+_bmsr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    testr(r0, r1);
+    jnz(i0);
+    return (_jit->pc.w);
+}
+
+static jit_word_t
+_bmsi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_int32_t                reg;
+    if (can_zero_extend_int_p(i1))
+       testi(r0, i1);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i1);
+       testr(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    jnz(i0);
+    return (_jit->pc.w);
+}
+
+static jit_word_t
+_bmcr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    testr(r0, r1);
+    jz(i0);
+    return (_jit->pc.w);
+}
+
+static jit_word_t
+_bmci(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_int32_t                reg;
+    if (can_zero_extend_int_p(i1))
+       testi(r0, i1);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i1);
+       testr(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    jz(i0);
+    return (_jit->pc.w);
+}
+
+static jit_word_t
+_boaddr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    iaddr(r0, r1);
+    jo(i0);
+    return (_jit->pc.w);
+}
+
+static jit_word_t
+_boaddi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_int_p(i1)) {
+       iaddi(r0, i1);
+       jo(i0);
+       return (_jit->pc.w);
+    }
+    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    movi(rn(reg), i1);
+    jit_unget_reg(reg);
+    return (boaddr(i0, r0, rn(reg)));
+}
+
+static jit_word_t
+_boaddr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    iaddr(r0, r1);
+    jc(i0);
+    return (_jit->pc.w);
+}
+
+static jit_word_t
+_boaddi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_int_p(i1)) {
+       iaddi(r0, i1);
+       jc(i0);
+       return (_jit->pc.w);
+    }
+    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    movi(rn(reg), i1);
+    jit_unget_reg(reg);
+    return (boaddr_u(i0, r0, rn(reg)));
+}
+
+static jit_word_t
+_bxaddr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    iaddr(r0, r1);
+    jno(i0);
+    return (_jit->pc.w);
+}
+
+static jit_word_t
+_bxaddi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_int_p(i1)) {
+       iaddi(r0, i1);
+       jno(i0);
+       return (_jit->pc.w);
+    }
+    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    movi(rn(reg), i1);
+    jit_unget_reg(reg);
+    return (bxaddr(i0, r0, rn(reg)));
+}
+
+static jit_word_t
+_bxaddr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    iaddr(r0, r1);
+    jnc(i0);
+    return (_jit->pc.w);
+}
+
+static jit_word_t
+_bxaddi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_int_p(i1)) {
+       iaddi(r0, i1);
+       jnc(i0);
+       return (_jit->pc.w);
+    }
+    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    movi(rn(reg), i1);
+    jit_unget_reg(reg);
+    return (bxaddr_u(i0, r0, rn(reg)));
+}
+
+static jit_word_t
+_bosubr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    isubr(r0, r1);
+    jo(i0);
+    return (_jit->pc.w);
+}
+
+static jit_word_t
+_bosubi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_int_p(i1)) {
+       isubi(r0, i1);
+       jo(i0);
+       return (_jit->pc.w);
+    }
+    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    movi(rn(reg), i1);
+    jit_unget_reg(reg);
+    return (bosubr(i0, r0, rn(reg)));
+}
+
+static jit_word_t
+_bosubr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    isubr(r0, r1);
+    jc(i0);
+    return (_jit->pc.w);
+}
+
+static jit_word_t
+_bosubi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_int_p(i1)) {
+       isubi(r0, i1);
+       jc(i0);
+       return (_jit->pc.w);
+    }
+    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    movi(rn(reg), i1);
+    jit_unget_reg(reg);
+    return (bosubr_u(i0, r0, rn(reg)));
+}
+
+static jit_word_t
+_bxsubr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    isubr(r0, r1);
+    jno(i0);
+    return (_jit->pc.w);
+}
+
+static jit_word_t
+_bxsubi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_int_p(i1)) {
+       isubi(r0, i1);
+       jno(i0);
+       return (_jit->pc.w);
+    }
+    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    movi(rn(reg), i1);
+    jit_unget_reg(reg);
+    return (bxsubr(i0, r0, rn(reg)));
+}
+
+static jit_word_t
+_bxsubr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    isubr(r0, r1);
+    jnc(i0);
+    return (_jit->pc.w);
+}
+
+static jit_word_t
+_bxsubi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_int_p(i1)) {
+       isubi(r0, i1);
+       jnc(i0);
+       return (_jit->pc.w);
+    }
+    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    movi(rn(reg), i1);
+    jit_unget_reg(reg);
+    return (bxsubr_u(i0, r0, rn(reg)));
+}
+
+static void
+_callr(jit_state_t *_jit, jit_int32_t r0)
+{
+    rex(0, 0, _NOREG, _NOREG, r0);
+    ic(0xff);
+    mrm(0x03, 0x02, r7(r0));
+}
+
+static jit_word_t
+_calli(jit_state_t *_jit, jit_word_t i0)
+{
+    jit_word_t         word;
+#if __X64
+    jit_int32_t                reg;
+
+    reg = jit_get_reg(jit_class_gpr);
+    word = movi_p(rn(reg), i0);
+    callr(rn(reg));
+    jit_unget_reg(reg);
+#else
+    jit_word_t         w;
+    ic(0xe8);
+    w = i0 - (_jit->pc.w + 4);
+    ii(w);
+    word = _jit->pc.w;
+#endif
+    return (word);
+}
+
+static void
+_jmpr(jit_state_t *_jit, jit_int32_t r0)
+{
+    rex(0, WIDE, _NOREG, _NOREG, r0);
+    ic(0xff);
+    mrm(0x03, 0x04, r7(r0));
+}
+
+static jit_word_t
+_jmpi(jit_state_t *_jit, jit_word_t i0)
+{
+    jit_word_t         w;
+    ic(0xe9);
+    w = i0 - (_jit->pc.w + 4);
+    ii(w);
+    return (_jit->pc.w);
+}
+
+static void
+_jmpsi(jit_state_t *_jit, jit_uint8_t i0)
+{
+    ic(0xeb);
+    ic(i0);
+}
+
+static void
+_prolog(jit_state_t *_jit, jit_node_t *node)
+{
+    jit_int32_t                reg;
+    if (_jitc->function->define_frame || _jitc->function->assume_frame) {
+       jit_int32_t     frame = -_jitc->function->frame;
+       assert(_jitc->function->self.aoff >= frame);
+       if (_jitc->function->assume_frame)
+           return;
+       _jitc->function->self.aoff = frame;
+    }
+    if (_jitc->function->allocar)
+       _jitc->function->self.aoff &= -16;
+#if __X64 && (__CYGWIN__ || _WIN32)
+    _jitc->function->stack = (((/* first 32 bytes must be allocated */
+                               (_jitc->function->self.alen > 32 ?
+                                _jitc->function->self.alen : 32) -
+                               /* align stack at 16 bytes */
+                               _jitc->function->self.aoff) + 15) & -16) +
+       stack_adjust;
+#else
+    _jitc->function->stack = (((_jitc->function->self.alen -
+                              _jitc->function->self.aoff) + 15) & -16) +
+       stack_adjust;
+#endif
+    subi(_RSP_REGNO, _RSP_REGNO, stack_framesize - REAL_WORDSIZE);
+    /* callee save registers */
+#if __X32
+    if (jit_regset_tstbit(&_jitc->function->regset, _RDI))
+       stxi(12, _RSP_REGNO, _RDI_REGNO);
+    if (jit_regset_tstbit(&_jitc->function->regset, _RSI))
+       stxi( 8, _RSP_REGNO, _RSI_REGNO);
+    if (jit_regset_tstbit(&_jitc->function->regset, _RBX))
+       stxi( 4, _RSP_REGNO, _RBX_REGNO);
+#else
+#  if __CYGWIN__ || _WIN32
+    if (jit_regset_tstbit(&_jitc->function->regset, _XMM15))
+       sse_stxi_d(136, _RSP_REGNO, _XMM15_REGNO);
+    if (jit_regset_tstbit(&_jitc->function->regset, _XMM14))
+       sse_stxi_d(128, _RSP_REGNO, _XMM14_REGNO);
+    if (jit_regset_tstbit(&_jitc->function->regset, _XMM13))
+       sse_stxi_d(120, _RSP_REGNO, _XMM13_REGNO);
+    if (jit_regset_tstbit(&_jitc->function->regset, _XMM12))
+       sse_stxi_d(112, _RSP_REGNO, _XMM12_REGNO);
+    if (jit_regset_tstbit(&_jitc->function->regset, _XMM11))
+       sse_stxi_d(104, _RSP_REGNO, _XMM11_REGNO);
+    if (jit_regset_tstbit(&_jitc->function->regset, _XMM10))
+       sse_stxi_d(96, _RSP_REGNO, _XMM10_REGNO);
+    if (jit_regset_tstbit(&_jitc->function->regset, _XMM9))
+       sse_stxi_d(88, _RSP_REGNO, _XMM9_REGNO);
+    if (jit_regset_tstbit(&_jitc->function->regset, _XMM8))
+       sse_stxi_d(80, _RSP_REGNO, _XMM8_REGNO);
+    if (jit_regset_tstbit(&_jitc->function->regset, _XMM7))
+       sse_stxi_d(72, _RSP_REGNO, _XMM7_REGNO);
+    if (jit_regset_tstbit(&_jitc->function->regset, _XMM6))
+       sse_stxi_d(64, _RSP_REGNO, _XMM6_REGNO);
+    if (jit_regset_tstbit(&_jitc->function->regset, _R15))
+       stxi(56, _RSP_REGNO, _R15_REGNO);
+    if (jit_regset_tstbit(&_jitc->function->regset, _R14))
+       stxi(48, _RSP_REGNO, _R14_REGNO);
+    if (jit_regset_tstbit(&_jitc->function->regset, _R13))
+       stxi(40, _RSP_REGNO, _R13_REGNO);
+    if (jit_regset_tstbit(&_jitc->function->regset, _R12))
+       stxi(32, _RSP_REGNO, _R12_REGNO);
+    if (jit_regset_tstbit(&_jitc->function->regset, _RSI))
+       stxi(24, _RSP_REGNO, _RSI_REGNO);
+    if (jit_regset_tstbit(&_jitc->function->regset, _RDI))
+       stxi(16, _RSP_REGNO, _RDI_REGNO);
+    if (jit_regset_tstbit(&_jitc->function->regset, _RBX))
+       stxi( 8, _RSP_REGNO, _RBX_REGNO);
+#  else
+    if (jit_regset_tstbit(&_jitc->function->regset, _RBX))
+       stxi(40, _RSP_REGNO, _RBX_REGNO);
+    if (jit_regset_tstbit(&_jitc->function->regset, _R12))
+       stxi(32, _RSP_REGNO, _R12_REGNO);
+    if (jit_regset_tstbit(&_jitc->function->regset, _R13))
+       stxi(24, _RSP_REGNO, _R13_REGNO);
+    if (jit_regset_tstbit(&_jitc->function->regset, _R14))
+       stxi(16, _RSP_REGNO, _R14_REGNO);
+    if (jit_regset_tstbit(&_jitc->function->regset, _R15))
+       stxi( 8, _RSP_REGNO, _R15_REGNO);
+#  endif
+#endif
+    stxi(0, _RSP_REGNO, _RBP_REGNO);
+    movr(_RBP_REGNO, _RSP_REGNO);
+
+    /* alloca */
+    subi(_RSP_REGNO, _RSP_REGNO, _jitc->function->stack);
+    if (_jitc->function->allocar) {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), _jitc->function->self.aoff);
+       stxi_i(_jitc->function->aoffoff, _RBP_REGNO, rn(reg));
+       jit_unget_reg(reg);
+    }
+
+#if __X64 && !(__CYGWIN__ || _WIN32)
+    if (_jitc->function->self.call & jit_call_varargs) {
+       jit_word_t      nofp_code;
+
+       /* Save gp registers in the save area, if any is a vararg */
+       for (reg = first_gp_from_offset(_jitc->function->vagp);
+            jit_arg_reg_p(reg); ++reg)
+           stxi(_jitc->function->vaoff + first_gp_offset +
+                reg * 8, _RBP_REGNO, rn(JIT_RA0 - reg));
+
+       reg = first_fp_from_offset(_jitc->function->vafp);
+       if (jit_arg_f_reg_p(reg)) {
+           /* Skip over if no float registers were passed as argument */
+           /* test %al, %al */
+           ic(0x84);
+           ic(0xc0);
+           jes(0);
+           nofp_code = _jit->pc.w;
+
+           /* Save fp registers in the save area, if any is a vararg */
+           /* Note that the full 16 byte xmm is not saved, because
+            * lightning only handles float and double, and, while
+            * attempting to provide a va_list compatible pointer as
+            * jit_va_start return, does not guarantee it (on all ports). */
+           for (; jit_arg_f_reg_p(reg); ++reg)
+               sse_stxi_d(_jitc->function->vaoff + first_fp_offset +
+                          reg * va_fp_increment, _RBP_REGNO, rn(_XMM0 - reg));
+
+           patch_rel_char(nofp_code, _jit->pc.w);
+       }
+    }
+#endif
+}
+
+static void
+_epilog(jit_state_t *_jit, jit_node_t *node)
+{
+    if (_jitc->function->assume_frame)
+       return;
+    /* callee save registers */
+    movr(_RSP_REGNO, _RBP_REGNO);
+#if __X32
+    if (jit_regset_tstbit(&_jitc->function->regset, _RDI))
+       ldxi(_RDI_REGNO, _RSP_REGNO, 12);
+    if (jit_regset_tstbit(&_jitc->function->regset, _RSI))
+       ldxi(_RSI_REGNO, _RSP_REGNO,  8);
+    if (jit_regset_tstbit(&_jitc->function->regset, _RBX))
+       ldxi(_RBX_REGNO, _RSP_REGNO,  4);
+#else
+#  if __CYGWIN__ || _WIN32
+    if (jit_regset_tstbit(&_jitc->function->regset, _XMM15))
+       sse_ldxi_d(_XMM15_REGNO, _RSP_REGNO, 136);
+    if (jit_regset_tstbit(&_jitc->function->regset, _XMM14))
+       sse_ldxi_d(_XMM14_REGNO, _RSP_REGNO, 128);
+    if (jit_regset_tstbit(&_jitc->function->regset, _XMM13))
+       sse_ldxi_d(_XMM13_REGNO, _RSP_REGNO, 120);
+    if (jit_regset_tstbit(&_jitc->function->regset, _XMM12))
+       sse_ldxi_d(_XMM12_REGNO, _RSP_REGNO, 112);
+    if (jit_regset_tstbit(&_jitc->function->regset, _XMM11))
+       sse_ldxi_d(_XMM11_REGNO, _RSP_REGNO, 104);
+    if (jit_regset_tstbit(&_jitc->function->regset, _XMM10))
+       sse_ldxi_d(_XMM10_REGNO, _RSP_REGNO, 96);
+    if (jit_regset_tstbit(&_jitc->function->regset, _XMM9))
+       sse_ldxi_d(_XMM9_REGNO, _RSP_REGNO, 88);
+    if (jit_regset_tstbit(&_jitc->function->regset, _XMM8))
+       sse_ldxi_d(_XMM8_REGNO, _RSP_REGNO, 80);
+    if (jit_regset_tstbit(&_jitc->function->regset, _XMM7))
+       sse_ldxi_d(_XMM7_REGNO, _RSP_REGNO, 72);
+    if (jit_regset_tstbit(&_jitc->function->regset, _XMM6))
+       sse_ldxi_d(_XMM6_REGNO, _RSP_REGNO, 64);
+    if (jit_regset_tstbit(&_jitc->function->regset, _R15))
+       ldxi(_R15_REGNO, _RSP_REGNO, 56);
+    if (jit_regset_tstbit(&_jitc->function->regset, _R14))
+       ldxi(_R14_REGNO, _RSP_REGNO, 48);
+    if (jit_regset_tstbit(&_jitc->function->regset, _R13))
+       ldxi(_R13_REGNO, _RSP_REGNO, 40);
+    if (jit_regset_tstbit(&_jitc->function->regset, _R12))
+       ldxi(_R12_REGNO, _RSP_REGNO, 32);
+    if (jit_regset_tstbit(&_jitc->function->regset, _RSI))
+       ldxi(_RSI_REGNO, _RSP_REGNO, 24);
+    if (jit_regset_tstbit(&_jitc->function->regset, _RDI))
+       ldxi(_RDI_REGNO, _RSP_REGNO, 16);
+    if (jit_regset_tstbit(&_jitc->function->regset, _RBX))
+       ldxi(_RBX_REGNO, _RSP_REGNO,  8);
+#  else
+    if (jit_regset_tstbit(&_jitc->function->regset, _RBX))
+       ldxi(_RBX_REGNO, _RSP_REGNO, 40);
+    if (jit_regset_tstbit(&_jitc->function->regset, _R12))
+       ldxi(_R12_REGNO, _RSP_REGNO, 32);
+    if (jit_regset_tstbit(&_jitc->function->regset, _R13))
+       ldxi(_R13_REGNO, _RSP_REGNO, 24);
+    if (jit_regset_tstbit(&_jitc->function->regset, _R14))
+       ldxi(_R14_REGNO, _RSP_REGNO, 16);
+    if (jit_regset_tstbit(&_jitc->function->regset, _R15))
+       ldxi(_R15_REGNO, _RSP_REGNO,  8);
+#  endif
+#endif
+    ldxi(_RBP_REGNO, _RSP_REGNO, 0);
+    addi(_RSP_REGNO, _RSP_REGNO, stack_framesize - REAL_WORDSIZE);
+
+    ic(0xc3);
+}
+
+static void
+_vastart(jit_state_t *_jit, jit_int32_t r0)
+{
+#if __X32 || __CYGWIN__ || _WIN32
+    assert(_jitc->function->self.call & jit_call_varargs);
+    addi(r0, _RBP_REGNO, _jitc->function->self.size);
+#else
+    jit_int32_t                reg;
+
+    assert(_jitc->function->self.call & jit_call_varargs);
+
+    /* Return jit_va_list_t in the register argument */
+    addi(r0, _RBP_REGNO, _jitc->function->vaoff);
+    reg = jit_get_reg(jit_class_gpr);
+
+    /* Initialize gp offset in the save area. */
+    movi(rn(reg), _jitc->function->vagp);
+    stxi_i(offsetof(jit_va_list_t, gpoff), r0, rn(reg));
+
+    /* Initialize fp offset in the save area. */
+    movi(rn(reg), _jitc->function->vafp);
+    stxi_i(offsetof(jit_va_list_t, fpoff), r0, rn(reg));
+
+    /* Initialize overflow pointer to the first stack argument. */
+    addi(rn(reg), _RBP_REGNO, _jitc->function->self.size);
+    stxi(offsetof(jit_va_list_t, over), r0, rn(reg));
+
+    /* Initialize register save area pointer. */
+    addi(rn(reg), r0, first_gp_offset);
+    stxi(offsetof(jit_va_list_t, save), r0, rn(reg));
+
+    jit_unget_reg(reg);
+#endif
+}
+
+static void
+_vaarg(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+#if __X32 || __CYGWIN__ || _WIN32
+    assert(_jitc->function->self.call & jit_call_varargs);
+    ldr(r0, r1);
+    addi(r1, r1, va_gp_increment);
+#else
+    jit_int32_t                rg0;
+    jit_int32_t                rg1;
+    jit_word_t         ge_code;
+    jit_word_t         lt_code;
+
+    assert(_jitc->function->self.call & jit_call_varargs);
+
+    rg0 = jit_get_reg(jit_class_gpr);
+    rg1 = jit_get_reg(jit_class_gpr);
+
+    /* Load the gp offset in save area in the first temporary. */
+    ldxi_i(rn(rg0), r1, offsetof(jit_va_list_t, gpoff));
+
+    /* Jump over if there are no remaining arguments in the save area. */
+    icmpi(rn(rg0), va_gp_max_offset);
+    jaes(0);
+    ge_code = _jit->pc.w;
+
+    /* Load the save area pointer in the second temporary. */
+    ldxi(rn(rg1), r1, offsetof(jit_va_list_t, save));
+
+    /* Load the vararg argument in the first argument. */
+    ldxr(r0, rn(rg1), rn(rg0));
+
+    /* Update the gp offset. */
+    addi(rn(rg0), rn(rg0), 8);
+    stxi_i(offsetof(jit_va_list_t, gpoff), r1, rn(rg0));
+
+    /* Will only need one temporary register below. */
+    jit_unget_reg(rg1);
+
+    /* Jump over overflow code. */
+    jmpsi(0);
+    lt_code = _jit->pc.w;
+
+    /* Where to land if argument is in overflow area. */
+    patch_rel_char(ge_code, _jit->pc.w);
+
+    /* Load overflow pointer. */
+    ldxi(rn(rg0), r1, offsetof(jit_va_list_t, over));
+
+    /* Load argument. */
+    ldr(r0, rn(rg0));
+
+    /* Update overflow pointer. */
+    addi(rn(rg0), rn(rg0), va_gp_increment);
+    stxi(offsetof(jit_va_list_t, over), r1, rn(rg0));
+
+    /* Where to land if argument is in save area. */
+    patch_rel_char(lt_code, _jit->pc.w);
+
+    jit_unget_reg(rg0);
+#endif
+}
+
+/* The x87 boolean argument tells if will put the result in a x87
+ * register if non false, in a sse register otherwise. */
+static void
+_vaarg_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_bool_t x87)
+{
+#if __X32 || __CYGWIN__ || _WIN32
+    assert(_jitc->function->self.call & jit_call_varargs);
+    if (x87)
+       x87_ldr_d(r0, r1);
+    else
+       sse_ldr_d(r0, r1);
+    addi(r1, r1, 8);
+#else
+    jit_int32_t                rg0;
+    jit_int32_t                rg1;
+    jit_word_t         ge_code;
+    jit_word_t         lt_code;
+
+    assert(_jitc->function->self.call & jit_call_varargs);
+
+    rg0 = jit_get_reg(jit_class_gpr);
+    rg1 = jit_get_reg(jit_class_gpr);
+
+    /* Load the fp offset in save area in the first temporary. */
+    ldxi_i(rn(rg0), r1, offsetof(jit_va_list_t, fpoff));
+
+    /* Jump over if there are no remaining arguments in the save area. */
+    icmpi(rn(rg0), va_fp_max_offset);
+    jaes(0);
+    ge_code = _jit->pc.w;
+
+    /* Load the save area pointer in the second temporary. */
+    ldxi(rn(rg1), r1, offsetof(jit_va_list_t, save));
+
+    /* Load the vararg argument in the first argument. */
+    if (x87)
+       x87_ldxr_d(r0, rn(rg1), rn(rg0));
+    else
+       sse_ldxr_d(r0, rn(rg1), rn(rg0));
+
+    /* Update the fp offset. */
+    addi(rn(rg0), rn(rg0), va_fp_increment);
+    stxi_i(offsetof(jit_va_list_t, fpoff), r1, rn(rg0));
+
+    /* Will only need one temporary register below. */
+    jit_unget_reg(rg1);
+
+    /* Jump over overflow code. */
+    jmpsi(0);
+    lt_code = _jit->pc.w;
+
+    /* Where to land if argument is in overflow area. */
+    patch_rel_char(ge_code, _jit->pc.w);
+
+    /* Load overflow pointer. */
+    ldxi(rn(rg0), r1, offsetof(jit_va_list_t, over));
+
+    /* Load argument. */
+    if (x87)
+       x87_ldr_d(r0, rn(rg0));
+    else
+       sse_ldr_d(r0, rn(rg0));
+
+    /* Update overflow pointer. */
+    addi(rn(rg0), rn(rg0), 8);
+    stxi(offsetof(jit_va_list_t, over), r1, rn(rg0));
+
+    /* Where to land if argument is in save area. */
+    patch_rel_char(lt_code, _jit->pc.w);
+
+    jit_unget_reg(rg0);
+#endif
+}
+
+static void
+_patch_at(jit_state_t *_jit, jit_node_t *node,
+         jit_word_t instr, jit_word_t label)
+{
+    switch (node->code) {
+#  if __X64
+       case jit_code_calli:
+#  endif
+       case jit_code_movi:
+           patch_abs(instr, label);
+           break;
+       default:
+           patch_rel(instr, label);
+           break;
+    }
+}
+#endif
diff --git a/deps/lightning/lib/jit_x86-sse.c b/deps/lightning/lib/jit_x86-sse.c
new file mode 100644 (file)
index 0000000..d09bda9
--- /dev/null
@@ -0,0 +1,1569 @@
+/*
+ * Copyright (C) 2012-2019  Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+#if PROTO
+#  if __X32
+#    define sse_address_p(i0)          1
+#  else
+#    if __X64_32
+#      define sse_address_p(i0)                ((jit_word_t)(i0) >= 0)
+#    else
+#      define sse_address_p(i0)                can_sign_extend_int_p(i0)
+#    endif
+#  endif
+#  define _XMM6_REGNO                  6
+#  define _XMM7_REGNO                  7
+#  define _XMM8_REGNO                  8
+#  define _XMM9_REGNO                  9
+#  define _XMM10_REGNO                 10
+#  define _XMM11_REGNO                 11
+#  define _XMM12_REGNO                 12
+#  define _XMM13_REGNO                 13
+#  define _XMM14_REGNO                 14
+#  define _XMM15_REGNO                 15
+#define X86_SSE_MOV                    0x10
+#define X86_SSE_MOV1                   0x11
+#define X86_SSE_MOVLP                  0x12
+#define X86_SSE_MOVHP                  0x16
+#define X86_SSE_MOVA                   0x28
+#define X86_SSE_CVTIS                  0x2a
+#define X86_SSE_CVTTSI                 0x2c
+#define X86_SSE_CVTSI                  0x2d
+#define X86_SSE_UCOMI                  0x2e
+#define X86_SSE_COMI                   0x2f
+#define X86_SSE_ROUND                  0x3a
+#define X86_SSE_SQRT                   0x51
+#define X86_SSE_RSQRT                  0x52
+#define X86_SSE_RCP                    0x53
+#define X86_SSE_AND                    0x54
+#define X86_SSE_ANDN                   0x55
+#define X86_SSE_OR                     0x56
+#define X86_SSE_XOR                    0x57
+#define X86_SSE_ADD                    0x58
+#define X86_SSE_MUL                    0x59
+#define X86_SSE_CVTSD                  0x5a
+#define X86_SSE_CVTDT                  0x5b
+#define X86_SSE_SUB                    0x5c
+#define X86_SSE_MIN                    0x5d
+#define X86_SSE_DIV                    0x5e
+#define X86_SSE_MAX                    0x5f
+#define X86_SSE_X2G                    0x6e
+#define X86_SSE_EQB                    0x74
+#define X86_SSE_EQW                    0x75
+#define X86_SSE_EQD                    0x76
+#define X86_SSE_G2X                    0x7e
+#define X86_SSE_MOV2                   0xd6
+#  define sser(c,r0,r1)                        _sser(_jit,c,r0,r1)
+static void _sser(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ssexr(p,c,r0,r1)             _ssexr(_jit,p,c,r0,r1)
+static void _ssexr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ssexi(c,r0,m,i)              _ssexi(_jit,c,r0,m,i)
+static void _ssexi(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define addssr(r0, r1)               ssexr(0xf3, X86_SSE_ADD, r0, r1)
+#  define addsdr(r0, r1)               ssexr(0xf2, X86_SSE_ADD, r0, r1)
+#  define subssr(r0, r1)               ssexr(0xf3, X86_SSE_SUB, r0, r1)
+#  define subsdr(r0, r1)               ssexr(0xf2, X86_SSE_SUB, r0, r1)
+#  define mulssr(r0, r1)               ssexr(0xf3, X86_SSE_MUL, r0, r1)
+#  define mulsdr(r0, r1)               ssexr(0xf2, X86_SSE_MUL, r0, r1)
+#  define divssr(r0, r1)               ssexr(0xf3, X86_SSE_DIV, r0, r1)
+#  define divsdr(r0, r1)               ssexr(0xf2, X86_SSE_DIV, r0, r1)
+#  define andpsr(r0, r1)               sser(       X86_SSE_AND, r0, r1)
+#  define andpdr(r0, r1)               ssexr(0x66, X86_SSE_AND, r0, r1)
+#  define sse_truncr_f_i(r0, r1)       ssexr(0xf3, X86_SSE_CVTTSI, r0, r1)
+#  define sse_truncr_d_i(r0, r1)       ssexr(0xf2, X86_SSE_CVTTSI, r0, r1)
+#  if __X64
+#    define sse_truncr_f_l(r0, r1)     sselxr(0xf3, X86_SSE_CVTTSI, r0, r1)
+#    define sse_truncr_d_l(r0, r1)     sselxr(0xf2, X86_SSE_CVTTSI, r0, r1)
+#    define sse_extr_f(r0, r1)         sselxr(0xf3, X86_SSE_CVTIS, r0, r1)
+#    define sse_extr_d(r0, r1)         sselxr(0xf2, X86_SSE_CVTIS, r0, r1)
+#  else
+#    define sse_extr_f(r0, r1)         ssexr(0xf3, X86_SSE_CVTIS, r0, r1)
+#    define sse_extr_d(r0, r1)         ssexr(0xf2, X86_SSE_CVTIS, r0, r1)
+#  endif
+#  define sse_extr_f_d(r0, r1)         ssexr(0xf3, X86_SSE_CVTSD, r0, r1)
+#  define sse_extr_d_f(r0, r1)         ssexr(0xf2, X86_SSE_CVTSD, r0, r1)
+#  define ucomissr(r0,r1)              sser(X86_SSE_UCOMI,r0,r1)
+#  define ucomisdr(r0,r1)              ssexr(0x66,X86_SSE_UCOMI,r0,r1)
+#  define xorpsr(r0,r1)                        sser(X86_SSE_XOR,r0,r1)
+#  define xorpdr(r0,r1)                        ssexr(0x66,X86_SSE_XOR,r0,r1)
+#  define movdlxr(r0,r1)               ssexr(0x66, X86_SSE_X2G,r0,r1)
+#  define pcmpeqlr(r0, r1)             ssexr(0x66, X86_SSE_EQD, r0, r1)
+#  define psrl(r0, i0)                 ssexi(0x72, r0, 0x02, i0)
+#  define psrq(r0, i0)                 ssexi(0x73, r0, 0x02, i0)
+#  define psll(r0, i0)                 ssexi(0x72, r0, 0x06, i0)
+#  define pslq(r0, i0)                 ssexi(0x73, r0, 0x06, i0)
+#  define movdqxr(r0,r1)               sselxr(0x66,X86_SSE_X2G,r0,r1)
+#  if __X64 && !__X64_32
+#    define sselxr(p,c,r0,r1)          _sselxr(_jit,p,c,r0,r1)
+static void
+_sselxr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t);
+#  else
+#    define sselxr(p,c,r0,r1)          ssexr(p,c,r0,r1)
+#  endif
+#  define ssexrx(p,c,md,rb,ri,ms,rd)   _ssexrx(_jit,p,c,md,rb,ri,ms,rd)
+#  define movssmr(md,rb,ri,ms,rd)      ssexrx(0xf3,X86_SSE_MOV,md,rb,ri,ms,rd)
+#  define movsdmr(md,rb,ri,ms,rd)      ssexrx(0xf2,X86_SSE_MOV,md,rb,ri,ms,rd)
+#  define movssrm(rs,md,mb,mi,ms)      ssexrx(0xf3,X86_SSE_MOV1,md,mb,mi,ms,rs)
+#  define movsdrm(rs,md,mb,mi,ms)      ssexrx(0xf2,X86_SSE_MOV1,md,mb,mi,ms,rs)
+static void
+_ssexrx(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t,
+       jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define sse_addr_f(r0, r1, r2)       _sse_addr_f(_jit, r0, r1, r2)
+static void _sse_addr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define sse_addi_f(r0, r1, i0)       _sse_addi_f(_jit, r0, r1, i0)
+static void _sse_addi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define sse_addr_d(r0, r1, r2)       _sse_addr_d(_jit, r0, r1, r2)
+static void _sse_addr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define sse_addi_d(r0, r1, i0)       _sse_addi_d(_jit, r0, r1, i0)
+static void _sse_addi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define sse_subr_f(r0, r1, r2)       _sse_subr_f(_jit, r0, r1, r2)
+static void _sse_subr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define sse_subi_f(r0, r1, i0)       _sse_subi_f(_jit, r0, r1, i0)
+static void _sse_subi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define sse_subr_d(r0, r1, r2)       _sse_subr_d(_jit, r0, r1, r2)
+static void _sse_subr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define sse_subi_d(r0, r1, i0)       _sse_subi_d(_jit, r0, r1, i0)
+static void _sse_subi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define sse_rsbr_f(r0, r1, r2)       sse_subr_f(r0, r2, r1)
+#  define sse_rsbi_f(r0, r1, i0)       _sse_rsbi_f(_jit, r0, r1, i0)
+static void _sse_rsbi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define sse_rsbr_d(r0, r1, r2)       sse_subr_d(r0, r2, r1)
+#  define sse_rsbi_d(r0, r1, i0)       _sse_rsbi_d(_jit, r0, r1, i0)
+static void _sse_rsbi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define sse_mulr_f(r0, r1, r2)       _sse_mulr_f(_jit, r0, r1, r2)
+static void _sse_mulr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define sse_muli_f(r0, r1, i0)       _sse_muli_f(_jit, r0, r1, i0)
+static void _sse_muli_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define sse_mulr_d(r0, r1, r2)       _sse_mulr_d(_jit, r0, r1, r2)
+static void _sse_mulr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define sse_muli_d(r0, r1, i0)       _sse_muli_d(_jit, r0, r1, i0)
+static void _sse_muli_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define sse_divr_f(r0, r1, r2)       _sse_divr_f(_jit, r0, r1, r2)
+static void _sse_divr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define sse_divi_f(r0, r1, i0)       _sse_divi_f(_jit, r0, r1, i0)
+static void _sse_divi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define sse_divr_d(r0, r1, r2)       _sse_divr_d(_jit, r0, r1, r2)
+static void _sse_divr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define sse_divi_d(r0, r1, i0)       _sse_divi_d(_jit, r0, r1, i0)
+static void _sse_divi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define sse_absr_f(r0, r1)           _sse_absr_f(_jit, r0, r1)
+static void _sse_absr_f(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define sse_absr_d(r0, r1)           _sse_absr_d(_jit, r0, r1)
+static void _sse_absr_d(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define sse_negr_f(r0, r1)           _sse_negr_f(_jit, r0, r1)
+static void _sse_negr_f(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define sse_negr_d(r0, r1)           _sse_negr_d(_jit, r0, r1)
+static void _sse_negr_d(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define sse_sqrtr_f(r0, r1)          ssexr(0xf3, X86_SSE_SQRT, r0, r1)
+#  define sse_sqrtr_d(r0, r1)          ssexr(0xf2, X86_SSE_SQRT, r0, r1)
+#  define ssecmpf(code, r0, r1, r2)    _ssecmp(_jit, 0, code, r0, r1, r2)
+#  define ssecmpd(code, r0, r1, r2)    _ssecmp(_jit, 1, code, r0, r1, r2)
+static void
+_ssecmp(jit_state_t*, jit_bool_t, jit_int32_t,
+       jit_int32_t, jit_int32_t, jit_int32_t);
+#define sse_movr_f(r0,r1)              _sse_movr_f(_jit,r0,r1)
+static void _sse_movr_f(jit_state_t*, jit_int32_t, jit_int32_t);
+#define sse_movi_f(r0,i0)              _sse_movi_f(_jit,r0,i0)
+static void _sse_movi_f(jit_state_t*, jit_int32_t, jit_float32_t*);
+#  define sse_lti_f(r0, r1, i0)                _sse_lti_f(_jit, r0, r1, i0)
+static void _sse_lti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define sse_ltr_f(r0, r1, r2)                ssecmpf(X86_CC_A, r0, r1, r2)
+#  define sse_lei_f(r0, r1, i0)                _sse_lei_f(_jit, r0, r1, i0)
+static void _sse_lei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define sse_ler_f(r0, r1, r2)                ssecmpf(X86_CC_AE, r0, r1, r2)
+#  define sse_eqi_f(r0, r1, i0)                _sse_eqi_f(_jit, r0, r1, i0)
+static void _sse_eqi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define sse_eqr_f(r0, r1, r2)                _sse_eqr_f(_jit, r0, r1, r2)
+static void _sse_eqr_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define sse_gei_f(r0, r1, i0)                _sse_gei_f(_jit, r0, r1, i0)
+static void _sse_gei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define sse_ger_f(r0, r1, r2)                ssecmpf(X86_CC_AE, r0, r2, r1)
+#  define sse_gti_f(r0, r1, i0)                _sse_gti_f(_jit, r0, r1, i0)
+static void _sse_gti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define sse_gtr_f(r0, r1, r2)                ssecmpf(X86_CC_A, r0, r2, r1)
+#  define sse_nei_f(r0, r1, i0)                _sse_nei_f(_jit, r0, r1, i0)
+static void _sse_nei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define sse_ner_f(r0, r1, r2)                _sse_ner_f(_jit, r0, r1, r2)
+static void _sse_ner_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define sse_unlti_f(r0, r1, i0)      _sse_unlti_f(_jit, r0, r1, i0)
+static void _sse_unlti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define sse_unltr_f(r0, r1, r2)      ssecmpf(X86_CC_NAE, r0, r2, r1)
+#  define sse_unlei_f(r0, r1, i0)      _sse_unlei_f(_jit, r0, r1, i0)
+static void _sse_unlei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define sse_unler_f(r0, r1, r2)      _sse_unler_f(_jit, r0, r1, r2)
+#  define sse_uneqi_f(r0, r1, i0)      _sse_uneqi_f(_jit, r0, r1, i0)
+static void _sse_uneqi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+static void _sse_unler_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define sse_uneqr_f(r0, r1, r2)      _sse_uneqr_f(_jit, r0, r1, r2)
+static void _sse_uneqr_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define sse_ungei_f(r0, r1, i0)      _sse_ungei_f(_jit, r0, r1, i0)
+static void _sse_ungei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define sse_unger_f(r0, r1, r2)      _sse_unger_f(_jit, r0, r1, r2)
+static void _sse_unger_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define sse_ungti_f(r0, r1, i0)      _sse_ungti_f(_jit, r0, r1, i0)
+static void _sse_ungti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define sse_ungtr_f(r0, r1, r2)      ssecmpf(X86_CC_NAE, r0, r1, r2)
+#  define sse_ltgti_f(r0, r1, i0)      _sse_ltgti_f(_jit, r0, r1, i0)
+static void _sse_ltgti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define sse_ltgtr_f(r0, r1, r2)      _sse_ltgtr_f(_jit, r0, r1, r2)
+static void _sse_ltgtr_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define sse_ordi_f(r0, r1, i0)       _sse_ordi_f(_jit, r0, r1, i0)
+static void _sse_ordi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define sse_ordr_f(r0, r1, r2)       ssecmpf(X86_CC_NP, r0, r2, r1)
+#  define sse_unordi_f(r0, r1, i0)     _sse_unordi_f(_jit, r0, r1, i0)
+static void _sse_unordi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define sse_unordr_f(r0, r1, r2)     ssecmpf(X86_CC_P, r0, r2, r1)
+#  define sse_ldr_f(r0, r1)            movssmr(0, r1, _NOREG, _SCL1, r0)
+#  define sse_ldi_f(r0, i0)            _sse_ldi_f(_jit, r0, i0)
+static void _sse_ldi_f(jit_state_t*, jit_int32_t, jit_word_t);
+#  define sse_ldxr_f(r0, r1, r2)       _sse_ldxr_f(_jit, r0, r1, r2)
+static void _sse_ldxr_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define sse_ldxi_f(r0, r1, i0)       _sse_ldxi_f(_jit, r0, r1, i0)
+static void _sse_ldxi_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#  define sse_str_f(r0, r1)            movssrm(r1, 0, r0, _NOREG, _SCL1)
+#  define sse_sti_f(i0, r0)            _sse_sti_f(_jit, i0, r0)
+static void _sse_sti_f(jit_state_t*, jit_word_t,jit_int32_t);
+#  define sse_stxr_f(r0, r1, r2)       _sse_stxr_f(_jit, r0, r1, r2)
+static void _sse_stxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define sse_stxi_f(i0, r0, r1)       _sse_stxi_f(_jit, i0, r0, r1)
+static void _sse_stxi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define sse_bltr_f(i0, r0, r1)       _sse_bltr_f(_jit, i0, r0, r1)
+static jit_word_t _sse_bltr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define sse_blti_f(i0, r0, i1)       _sse_blti_f(_jit, i0, r0, i1)
+static jit_word_t
+_sse_blti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
+#  define sse_bler_f(i0, r0, r1)       _sse_bler_f(_jit, i0, r0, r1)
+static jit_word_t _sse_bler_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define sse_blei_f(i0, r0, i1)       _sse_blei_f(_jit, i0, r0, i1)
+static jit_word_t
+_sse_blei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
+#  define sse_beqr_f(i0, r0, r1)       _sse_beqr_f(_jit, i0, r0, r1)
+static jit_word_t _sse_beqr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define sse_beqi_f(i0, r0, i1)       _sse_beqi_f(_jit, i0, r0, i1)
+static jit_word_t
+_sse_beqi_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
+#  define sse_bger_f(i0, r0, r1)       _sse_bger_f(_jit, i0, r0, r1)
+static jit_word_t _sse_bger_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define sse_bgei_f(i0, r0, i1)       _sse_bgei_f(_jit, i0, r0, i1)
+static jit_word_t
+_sse_bgei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
+#  define sse_bgtr_f(i0, r0, r1)       _sse_bgtr_f(_jit, i0, r0, r1)
+static jit_word_t _sse_bgtr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define sse_bgti_f(i0, r0, i1)       _sse_bgti_f(_jit, i0, r0, i1)
+static jit_word_t
+_sse_bgti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
+#  define sse_bner_f(i0, r0, r1)       _sse_bner_f(_jit, i0, r0, r1)
+static jit_word_t _sse_bner_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define sse_bnei_f(i0, r0, i1)       _sse_bnei_f(_jit, i0, r0, i1)
+static jit_word_t
+_sse_bnei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
+#  define sse_bunltr_f(i0, r0, r1)     _sse_bunltr_f(_jit, i0, r0, r1)
+static jit_word_t _sse_bunltr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define sse_bunlti_f(i0, r0, i1)     _sse_bunlti_f(_jit, i0, r0, i1)
+static jit_word_t
+_sse_bunlti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
+#  define sse_bunler_f(i0, r0, r1)     _sse_bunler_f(_jit, i0, r0, r1)
+static jit_word_t _sse_bunler_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define sse_bunlei_f(i0, r0, i1)     _sse_bunlei_f(_jit, i0, r0, i1)
+static jit_word_t
+_sse_bunlei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
+#  define sse_buneqr_f(i0, r0, r1)     _sse_buneqr_f(_jit, i0, r0, r1)
+static jit_word_t _sse_buneqr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define sse_buneqi_f(i0, r0, i1)     _sse_buneqi_f(_jit, i0, r0, i1)
+static jit_word_t
+_sse_buneqi_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
+#  define sse_bunger_f(i0, r0, r1)     _sse_bunger_f(_jit, i0, r0, r1)
+static jit_word_t _sse_bunger_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define sse_bungei_f(i0, r0, i1)     _sse_bungei_f(_jit, i0, r0, i1)
+static jit_word_t
+_sse_bungei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
+#  define sse_bungtr_f(i0, r0, r1)     _sse_bungtr_f(_jit, i0, r0, r1)
+static jit_word_t _sse_bungtr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define sse_bungti_f(i0, r0, i1)     _sse_bungti_f(_jit, i0, r0, i1)
+static jit_word_t
+_sse_bungti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
+#  define sse_bltgtr_f(i0, r0, r1)     _sse_bltgtr_f(_jit, i0, r0, r1)
+static jit_word_t _sse_bltgtr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define sse_bltgti_f(i0, r0, i1)     _sse_bltgti_f(_jit, i0, r0, i1)
+static jit_word_t
+_sse_bltgti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
+#  define sse_bordr_f(i0, r0, r1)      _sse_bordr_f(_jit, i0, r0, r1)
+static jit_word_t _sse_bordr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define sse_bordi_f(i0, r0, i1)      _sse_bordi_f(_jit, i0, r0, i1)
+static jit_word_t
+_sse_bordi_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
+#  define sse_bunordr_f(i0, r0, r1)    _sse_bunordr_f(_jit, i0, r0, r1)
+static jit_word_t _sse_bunordr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define sse_bunordi_f(i0, r0, i1)    _sse_bunordi_f(_jit, i0, r0, i1)
+static jit_word_t
+_sse_bunordi_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
+#define sse_movr_d(r0,r1)              _sse_movr_d(_jit,r0,r1)
+static void _sse_movr_d(jit_state_t*, jit_int32_t, jit_int32_t);
+#define sse_movi_d(r0,i0)              _sse_movi_d(_jit,r0,i0)
+static void _sse_movi_d(jit_state_t*, jit_int32_t, jit_float64_t*);
+#  define sse_ltr_d(r0, r1, r2)                ssecmpd(X86_CC_A, r0, r1, r2)
+#  define sse_lti_d(r0, r1, i0)                _sse_lti_d(_jit, r0, r1, i0)
+static void _sse_lti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define sse_ler_d(r0, r1, r2)                ssecmpd(X86_CC_AE, r0, r1, r2)
+#  define sse_lei_d(r0, r1, i0)                _sse_lei_d(_jit, r0, r1, i0)
+static void _sse_lei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define sse_eqr_d(r0, r1, r2)                _sse_eqr_d(_jit, r0, r1, r2)
+static void _sse_eqr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define sse_eqi_d(r0, r1, i0)                _sse_eqi_d(_jit, r0, r1, i0)
+static void _sse_eqi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define sse_ger_d(r0, r1, r2)                ssecmpd(X86_CC_AE, r0, r2, r1)
+#  define sse_gei_d(r0, r1, i0)                _sse_gei_d(_jit, r0, r1, i0)
+static void _sse_gei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define sse_gtr_d(r0, r1, r2)                ssecmpd(X86_CC_A, r0, r2, r1)
+#  define sse_gti_d(r0, r1, i0)                _sse_gti_d(_jit, r0, r1, i0)
+static void _sse_gti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define sse_ner_d(r0, r1, r2)                _sse_ner_d(_jit, r0, r1, r2)
+static void _sse_ner_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define sse_nei_d(r0, r1, i0)                _sse_nei_d(_jit, r0, r1, i0)
+static void _sse_nei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define sse_unltr_d(r0, r1, r2)      ssecmpd(X86_CC_NAE, r0, r2, r1)
+#  define sse_unlti_d(r0, r1, i0)      _sse_unlti_d(_jit, r0, r1, i0)
+static void _sse_unlti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define sse_unler_d(r0, r1, r2)      _sse_unler_d(_jit, r0, r1, r2)
+static void _sse_unler_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define sse_unlei_d(r0, r1, i0)      _sse_unlei_d(_jit, r0, r1, i0)
+static void _sse_unlei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define sse_uneqr_d(r0, r1, r2)      _sse_uneqr_d(_jit, r0, r1, r2)
+static void _sse_uneqr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define sse_uneqi_d(r0, r1, i0)      _sse_uneqi_d(_jit, r0, r1, i0)
+static void _sse_uneqi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define sse_unger_d(r0, r1, r2)      _sse_unger_d(_jit, r0, r1, r2)
+static void _sse_unger_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define sse_ungei_d(r0, r1, i0)      _sse_ungei_d(_jit, r0, r1, i0)
+static void _sse_ungei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define sse_ungtr_d(r0, r1, r2)      ssecmpd(X86_CC_NAE, r0, r1, r2)
+#  define sse_ungti_d(r0, r1, i0)      _sse_ungti_d(_jit, r0, r1, i0)
+static void _sse_ungti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define sse_ltgtr_d(r0, r1, r2)      _sse_ltgtr_d(_jit, r0, r1, r2)
+static void _sse_ltgtr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define sse_ltgti_d(r0, r1, i0)      _sse_ltgti_d(_jit, r0, r1, i0)
+static void _sse_ltgti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define sse_ordr_d(r0, r1, r2)       ssecmpd(X86_CC_NP, r0, r2, r1)
+#  define sse_ordi_d(r0, r1, i0)       _sse_ordi_d(_jit, r0, r1, i0)
+static void _sse_ordi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define sse_unordr_d(r0, r1, r2)     ssecmpd(X86_CC_P, r0, r2, r1)
+#  define sse_unordi_d(r0, r1, i0)     _sse_unordi_d(_jit, r0, r1, i0)
+static void _sse_unordi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define sse_ldr_d(r0, r1)            movsdmr(0, r1, _NOREG, _SCL1, r0)
+#  define sse_ldi_d(r0, i0)            _sse_ldi_d(_jit, r0, i0)
+static void _sse_ldi_d(jit_state_t*, jit_int32_t, jit_word_t);
+#  define sse_ldxr_d(r0, r1, r2)       _sse_ldxr_d(_jit, r0, r1, r2)
+static void _sse_ldxr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define sse_ldxi_d(r0, r1, i0)       _sse_ldxi_d(_jit, r0, r1, i0)
+static void _sse_ldxi_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#  define sse_bltr_d(i0, r0, r1)       _sse_bltr_d(_jit, i0, r0, r1)
+#  define sse_str_d(r0, r1)            movsdrm(r1, 0, r0, _NOREG, _SCL1)
+#  define sse_sti_d(i0, r0)            _sse_sti_d(_jit, i0, r0)
+static void _sse_sti_d(jit_state_t*, jit_word_t,jit_int32_t);
+#  define sse_stxr_d(r0, r1, r2)       _sse_stxr_d(_jit, r0, r1, r2)
+static void _sse_stxr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define sse_stxi_d(i0, r0, r1)       _sse_stxi_d(_jit, i0, r0, r1)
+static void _sse_stxi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+static jit_word_t _sse_bltr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define sse_blti_d(i0, r0, i1)       _sse_blti_d(_jit, i0, r0, i1)
+static jit_word_t
+_sse_blti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
+#  define sse_bler_d(i0, r0, r1)       _sse_bler_d(_jit, i0, r0, r1)
+static jit_word_t _sse_bler_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define sse_blei_d(i0, r0, i1)       _sse_blei_d(_jit, i0, r0, i1)
+static jit_word_t
+_sse_blei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
+#  define sse_beqr_d(i0, r0, r1)       _sse_beqr_d(_jit, i0, r0, r1)
+static jit_word_t _sse_beqr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define sse_beqi_d(i0, r0, i1)       _sse_beqi_d(_jit, i0, r0, i1)
+static jit_word_t
+_sse_beqi_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
+#  define sse_bger_d(i0, r0, r1)       _sse_bger_d(_jit, i0, r0, r1)
+static jit_word_t _sse_bger_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define sse_bgei_d(i0, r0, i1)       _sse_bgei_d(_jit, i0, r0, i1)
+static jit_word_t
+_sse_bgei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
+#  define sse_bgtr_d(i0, r0, r1)       _sse_bgtr_d(_jit, i0, r0, r1)
+static jit_word_t _sse_bgtr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define sse_bgti_d(i0, r0, i1)       _sse_bgti_d(_jit, i0, r0, i1)
+static jit_word_t
+_sse_bgti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
+#  define sse_bner_d(i0, r0, r1)       _sse_bner_d(_jit, i0, r0, r1)
+static jit_word_t _sse_bner_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define sse_bnei_d(i0, r0, i1)       _sse_bnei_d(_jit, i0, r0, i1)
+static jit_word_t
+_sse_bnei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
+#  define sse_bunltr_d(i0, r0, r1)     _sse_bunltr_d(_jit, i0, r0, r1)
+static jit_word_t _sse_bunltr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define sse_bunlti_d(i0, r0, i1)     _sse_bunlti_d(_jit, i0, r0, i1)
+static jit_word_t
+_sse_bunlti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
+#  define sse_bunler_d(i0, r0, r1)     _sse_bunler_d(_jit, i0, r0, r1)
+static jit_word_t _sse_bunler_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define sse_bunlei_d(i0, r0, i1)     _sse_bunlei_d(_jit, i0, r0, i1)
+static jit_word_t
+_sse_bunlei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
+#  define sse_buneqr_d(i0, r0, r1)     _sse_buneqr_d(_jit, i0, r0, r1)
+static jit_word_t _sse_buneqr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define sse_buneqi_d(i0, r0, i1)     _sse_buneqi_d(_jit, i0, r0, i1)
+static jit_word_t
+_sse_buneqi_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
+#  define sse_bunger_d(i0, r0, r1)     _sse_bunger_d(_jit, i0, r0, r1)
+static jit_word_t _sse_bunger_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define sse_bungei_d(i0, r0, i1)     _sse_bungei_d(_jit, i0, r0, i1)
+static jit_word_t
+_sse_bungei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
+#  define sse_bungtr_d(i0, r0, r1)     _sse_bungtr_d(_jit, i0, r0, r1)
+static jit_word_t _sse_bungtr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define sse_bungti_d(i0, r0, i1)     _sse_bungti_d(_jit, i0, r0, i1)
+static jit_word_t
+_sse_bungti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
+#  define sse_bltgtr_d(i0, r0, r1)     _sse_bltgtr_d(_jit, i0, r0, r1)
+static jit_word_t _sse_bltgtr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define sse_bltgti_d(i0, r0, i1)     _sse_bltgti_d(_jit, i0, r0, i1)
+static jit_word_t
+_sse_bltgti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
+#  define sse_bordr_d(i0, r0, r1)      _sse_bordr_d(_jit, i0, r0, r1)
+static jit_word_t _sse_bordr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define sse_bordi_d(i0, r0, i1)      _sse_bordi_d(_jit, i0, r0, i1)
+static jit_word_t
+_sse_bordi_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
+#  define sse_bunordr_d(i0, r0, r1)    _sse_bunordr_d(_jit, i0, r0, r1)
+static jit_word_t _sse_bunordr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define sse_bunordi_d(i0, r0, i1)    _sse_bunordi_d(_jit, i0, r0, i1)
+static jit_word_t
+_sse_bunordi_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
+#endif
+
+#if CODE
+#  define fpr_opi(name, type, size)                                    \
+static void                                                            \
+_sse_##name##i_##type(jit_state_t *_jit,                               \
+                     jit_int32_t r0, jit_int32_t r1,                   \
+                     jit_float##size##_t *i0)                          \
+{                                                                      \
+    jit_int32_t                reg = jit_get_reg(jit_class_fpr|jit_class_xpr); \
+    assert(jit_sse_reg_p(reg));                                                \
+    sse_movi_##type(rn(reg), i0);                                      \
+    sse_##name##r_##type(r0, r1, rn(reg));                             \
+    jit_unget_reg(reg);                                                        \
+}
+#  define fpr_bopi(name, type, size)                                   \
+static jit_word_t                                                      \
+_sse_b##name##i_##type(jit_state_t *_jit,                              \
+                      jit_word_t i0, jit_int32_t r0,                   \
+                      jit_float##size##_t *i1)                         \
+{                                                                      \
+    jit_word_t         word;                                           \
+    jit_int32_t                reg = jit_get_reg(jit_class_fpr|jit_class_xpr|  \
+                                         jit_class_nospill);           \
+    assert(jit_sse_reg_p(reg));                                                \
+    sse_movi_##type(rn(reg), i1);                                      \
+    word = sse_b##name##r_##type(i0, r0, rn(reg));                     \
+    jit_unget_reg(reg);                                                        \
+    return (word);                                                     \
+}
+#  define fopi(name)                   fpr_opi(name, f, 32)
+#  define fbopi(name)                  fpr_bopi(name, f, 32)
+#  define dopi(name)                   fpr_opi(name, d, 64)
+#  define dbopi(name)                  fpr_bopi(name, d, 64)
+static void
+_sser(jit_state_t *_jit, jit_int32_t c, jit_int32_t r0, jit_int32_t r1)
+{
+    rex(0, 0, r0, 0, r1);
+    ic(0x0f);
+    ic(c);
+    mrm(0x03, r7(r0), r7(r1));
+}
+
+static void
+_ssexr(jit_state_t *_jit, jit_int32_t p, jit_int32_t c,
+       jit_int32_t r0, jit_int32_t r1)
+{
+    ic(p);
+    rex(0, 0, r0, 0, r1);
+    ic(0x0f);
+    ic(c);
+    mrm(0x03, r7(r0), r7(r1));
+}
+
+static void
+_ssexi(jit_state_t *_jit, jit_int32_t c, jit_int32_t r0,
+       jit_int32_t m, jit_int32_t i)
+{
+    ic(0x66);
+    rex(0, 0, 0, 0, r0);
+    ic(0x0f);
+    ic(c);
+    mrm(0x03, r7(m), r7(r0));
+    ic(i);
+}
+
+#if __X64
+static void
+_sselxr(jit_state_t *_jit, jit_int32_t p, jit_int32_t c,
+       jit_int32_t r0, jit_int32_t r1)
+{
+    ic(p);
+    rex(0, 1, r0, 0, r1);
+    ic(0x0f);
+    ic(c);
+    mrm(0x03, r7(r0), r7(r1));
+}
+#endif
+
+static void
+_ssexrx(jit_state_t *_jit, jit_int32_t px, jit_int32_t code, jit_int32_t md,
+       jit_int32_t rb, jit_int32_t ri, jit_int32_t ms, jit_int32_t rd)
+{
+    ic(px);
+    rex(0, 0, rd, ri, rb);
+    ic(0x0f);
+    ic(code);
+    rx(rd, md, rb, ri, ms);
+}
+
+static void
+_sse_addr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (r0 == r1)
+       addssr(r0, r2);
+    else if (r0 == r2)
+       addssr(r0, r1);
+    else {
+       sse_movr_f(r0, r1);
+       addssr(r0, r2);
+    }
+}
+
+fopi(add)
+
+static void
+_sse_addr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (r0 == r1)
+       addsdr(r0, r2);
+    else if (r0 == r2)
+       addsdr(r0, r1);
+    else {
+       sse_movr_d(r0, r1);
+       addsdr(r0, r2);
+    }
+}
+
+dopi(add)
+
+static void
+_sse_subr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (r0 == r1)
+       subssr(r0, r2);
+    else if (r0 == r2) {
+       reg = jit_get_reg(jit_class_fpr|jit_class_xpr);
+       sse_movr_f(rn(reg), r0);
+       sse_movr_f(r0, r1);
+       subssr(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else {
+       sse_movr_f(r0, r1);
+       subssr(r0, r2);
+    }
+}
+
+fopi(sub)
+
+static void
+_sse_subr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (r0 == r1)
+       subsdr(r0, r2);
+    else if (r0 == r2) {
+       reg = jit_get_reg(jit_class_fpr|jit_class_xpr);
+       sse_movr_d(rn(reg), r0);
+       sse_movr_d(r0, r1);
+       subsdr(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else {
+       sse_movr_d(r0, r1);
+       subsdr(r0, r2);
+    }
+}
+
+dopi(sub)
+
+fopi(rsb)
+
+dopi(rsb)
+
+static void
+_sse_mulr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (r0 == r1)
+       mulssr(r0, r2);
+    else if (r0 == r2)
+       mulssr(r0, r1);
+    else {
+       sse_movr_f(r0, r1);
+       mulssr(r0, r2);
+    }
+}
+
+fopi(mul)
+
+static void
+_sse_mulr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (r0 == r1)
+       mulsdr(r0, r2);
+    else if (r0 == r2)
+       mulsdr(r0, r1);
+    else {
+       sse_movr_d(r0, r1);
+       mulsdr(r0, r2);
+    }
+}
+
+dopi(mul)
+
+static void
+_sse_divr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (r0 == r1)
+       divssr(r0, r2);
+    else if (r0 == r2) {
+       reg = jit_get_reg(jit_class_fpr|jit_class_xpr);
+       sse_movr_f(rn(reg), r0);
+       sse_movr_f(r0, r1);
+       divssr(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else {
+       sse_movr_f(r0, r1);
+       divssr(r0, r2);
+    }
+}
+
+fopi(div)
+
+static void
+_sse_divr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (r0 == r1)
+       divsdr(r0, r2);
+    else if (r0 == r2) {
+       reg = jit_get_reg(jit_class_fpr|jit_class_xpr);
+       sse_movr_d(rn(reg), r0);
+       sse_movr_d(r0, r1);
+       divsdr(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else {
+       sse_movr_d(r0, r1);
+       divsdr(r0, r2);
+    }
+}
+
+dopi(div)
+
+static void
+_sse_absr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (r0 == r1) {
+       reg = jit_get_reg(jit_class_fpr|jit_class_xpr);
+       pcmpeqlr(rn(reg), rn(reg));
+       psrl(rn(reg), 1);
+       andpsr(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else {
+       pcmpeqlr(r0, r0);
+       psrl(r0, 1);
+       andpsr(r0, r1);
+    }
+}
+
+static void
+_sse_absr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (r0 == r1) {
+       reg = jit_get_reg(jit_class_fpr|jit_class_xpr);
+       pcmpeqlr(rn(reg), rn(reg));
+       psrq(rn(reg), 1);
+       andpdr(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else {
+       pcmpeqlr(r0, r0);
+       psrq(r0, 1);
+       andpdr(r0, r1);
+    }
+}
+
+static void
+_sse_negr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                freg, ireg;
+    ireg = jit_get_reg(jit_class_gpr);
+    imovi(rn(ireg), 0x80000000);
+    if (r0 == r1) {
+       freg = jit_get_reg(jit_class_fpr|jit_class_xpr);
+       movdlxr(rn(freg), rn(ireg));
+       xorpsr(r0, rn(freg));
+       jit_unget_reg(freg);
+    }
+    else {
+       movdlxr(r0, rn(ireg));
+       xorpsr(r0, r1);
+    }
+    jit_unget_reg(ireg);
+}
+
+static void
+_sse_negr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                freg, ireg;
+    ireg = jit_get_reg(jit_class_gpr);
+    imovi(rn(ireg), 0x80000000);
+    if (r0 == r1) {
+       freg = jit_get_reg(jit_class_fpr|jit_class_xpr);
+       movdlxr(rn(freg), rn(ireg));
+       pslq(rn(freg), 32);
+       xorpdr(r0, rn(freg));
+       jit_unget_reg(freg);
+    }
+    else {
+       movdlxr(r0, rn(ireg));
+       pslq(r0, 32);
+       xorpdr(r0, r1);
+    }
+    jit_unget_reg(ireg);
+}
+
+static void
+_ssecmp(jit_state_t *_jit, jit_bool_t d, jit_int32_t code,
+       jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_bool_t         rc;
+    jit_int32_t                reg;
+    if ((rc = reg8_p(r0)))
+       reg = r0;
+    else {
+       reg = _RAX_REGNO;
+       movr(r0, reg);
+    }
+    ixorr(reg, reg);
+    if (d)
+       ucomisdr(r2, r1);
+    else
+       ucomissr(r2, r1);
+    cc(code, reg);
+    if (!rc)
+       xchgr(r0, reg);
+}
+
+static void
+_sse_movr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (r0 != r1)
+       ssexr(0xf3, X86_SSE_MOV, r0, r1);
+}
+
+static void
+_sse_movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t *i0)
+{
+    union {
+       jit_int32_t      i;
+       jit_float32_t    f;
+    } data;
+    jit_int32_t                 reg;
+    jit_bool_t          ldi;
+
+    data.f = *i0;
+    if (data.f == 0.0 && !(data.i & 0x80000000))
+       xorpsr(r0, r0);
+    else {
+       ldi = !_jitc->no_data;
+#if __X64
+       /* if will allocate a register for offset, just use immediate */
+       if (ldi && !sse_address_p(i0))
+           ldi = 0;
+#endif
+       if (ldi)
+           sse_ldi_f(r0, (jit_word_t)i0);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), data.i);
+           movdlxr(r0, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+}
+
+fopi(lt)
+fopi(le)
+
+static void
+_sse_eqr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_bool_t         rc;
+    jit_int32_t                reg;
+    jit_word_t         jp_code;
+    if ((rc = reg8_p(r0)))
+       reg = r0;
+    else {
+       reg = _RAX_REGNO;
+       movr(r0, _RAX_REGNO);
+    }
+    ixorr(reg, reg);
+    ucomissr(r2, r1);
+    jpes(0);
+    jp_code = _jit->pc.w;
+    cc(X86_CC_E, reg);
+    patch_rel_char(jp_code, _jit->pc.w);
+    if (!rc)
+       xchgr(r0, reg);
+}
+
+fopi(eq)
+fopi(ge)
+fopi(gt)
+
+static void
+_sse_ner_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_bool_t         rc;
+    jit_int32_t                reg;
+    jit_word_t         jp_code;
+    if ((rc = reg8_p(r0)))
+       reg = r0;
+    else {
+       reg = _RAX_REGNO;
+       movr(r0, _RAX_REGNO);
+    }
+    imovi(reg, 1);
+    ucomissr(r2, r1);
+    jpes(0);
+    jp_code = _jit->pc.w;
+    cc(X86_CC_NE, reg);
+    patch_rel_char(jp_code, _jit->pc.w);
+    if (!rc)
+       xchgr(r0, reg);
+}
+
+fopi(ne)
+fopi(unlt)
+
+static void
+_sse_unler_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (r1 == r2)
+       movi(r0, 1);
+    else
+       ssecmpf(X86_CC_NA, r0, r2, r1);
+}
+
+fopi(unle)
+
+static void
+_sse_uneqr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (r1 == r2)
+       movi(r0, 1);
+    else
+       ssecmpf(X86_CC_E, r0, r1, r2);
+}
+
+fopi(uneq)
+
+static void
+_sse_unger_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (r1 == r2)
+       movi(r0, 1);
+    else
+       ssecmpf(X86_CC_NA, r0, r1, r2);
+}
+
+fopi(unge)
+fopi(ungt)
+
+static void
+_sse_ltgtr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (r1 == r2)
+       ixorr(r0, r0);
+    else
+       ssecmpf(X86_CC_NE, r0, r1, r2);
+}
+
+fopi(ltgt)
+fopi(ord)
+fopi(unord)
+
+static void
+_sse_ldi_f(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (sse_address_p(i0))
+       movssmr(i0, _NOREG, _NOREG, _SCL1, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       sse_ldr_f(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_sse_ldxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+#if __X64_32
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r1, r2);
+    sse_ldr_f(r0, rn(reg));
+    jit_unget_reg(reg);
+#else
+    movssmr(0, r1, r2, _SCL1, r0);
+#endif
+}
+
+static void
+_sse_ldxi_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_int_p(i0))
+       movssmr(i0, r1, _NOREG, _SCL1, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+#if __X64_32
+       addi(rn(reg), r1, i0);
+       sse_ldr_f(r0, rn(reg));
+#else
+       movi(rn(reg), i0);
+       sse_ldxr_f(r0, r1, rn(reg));
+#endif
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_sse_sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    if (sse_address_p(i0))
+       movssrm(r0, i0, _NOREG, _NOREG, _SCL1);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       sse_str_f(rn(reg), r0);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_sse_stxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+#if __X64_32
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r0, r1);
+    sse_str_f(rn(reg), r2);
+    jit_unget_reg(reg);
+#else
+    movssrm(r2, 0, r0, r1, _SCL1);
+#endif
+}
+
+static void
+_sse_stxi_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_int_p(i0))
+       movssrm(r1, i0, r0, _NOREG, _SCL1);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+#if __X64_32
+       addi(rn(reg), r0, i0);
+       sse_str_f(rn(reg), r1);
+#else
+       movi(rn(reg), i0);
+       sse_stxr_f(rn(reg), r0, r1);
+#endif
+       jit_unget_reg(reg);
+    }
+}
+
+static jit_word_t
+_sse_bltr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    ucomissr(r1, r0);
+    ja(i0);
+    return (_jit->pc.w);
+}
+fbopi(lt)
+
+static jit_word_t
+_sse_bler_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    ucomissr(r1, r0);
+    jae(i0);
+    return (_jit->pc.w);
+}
+fbopi(le)
+
+static jit_word_t
+_sse_beqr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         jp_code;
+    ucomissr(r0, r1);
+    jps(0);
+    jp_code = _jit->pc.w;
+    je(i0);
+    patch_rel_char(jp_code, _jit->pc.w);
+    return (_jit->pc.w);
+}
+fbopi(eq)
+
+static jit_word_t
+_sse_bger_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    ucomissr(r0, r1);
+    jae(i0);
+    return (_jit->pc.w);
+}
+fbopi(ge)
+
+static jit_word_t
+_sse_bgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    ucomissr(r0, r1);
+    ja(i0);
+    return (_jit->pc.w);
+}
+fbopi(gt)
+
+static jit_word_t
+_sse_bner_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         jp_code;
+    jit_word_t         jz_code;
+    ucomissr(r0, r1);
+    jps(0);
+    jp_code = _jit->pc.w;
+    jzs(0);
+    jz_code = _jit->pc.w;
+    patch_rel_char(jp_code, _jit->pc.w);
+    jmpi(i0);
+    patch_rel_char(jz_code, _jit->pc.w);
+    return (_jit->pc.w);
+}
+fbopi(ne)
+
+static jit_word_t
+_sse_bunltr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    ucomissr(r0, r1);
+    jnae(i0);
+    return (_jit->pc.w);
+}
+fbopi(unlt)
+
+static jit_word_t
+_sse_bunler_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    if (r0 == r1)
+       jmpi(i0);
+    else {
+       ucomissr(r0, r1);
+       jna(i0);
+    }
+    return (_jit->pc.w);
+}
+fbopi(unle)
+
+static jit_word_t
+_sse_buneqr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    if (r0 == r1)
+       jmpi(i0);
+    else {
+       ucomissr(r0, r1);
+       je(i0);
+    }
+    return (_jit->pc.w);
+}
+fbopi(uneq)
+
+static jit_word_t
+_sse_bunger_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    if (r0 == r1)
+       jmpi(i0);
+    else {
+       ucomissr(r1, r0);
+       jna(i0);
+    }
+    return (_jit->pc.w);
+}
+fbopi(unge)
+
+static jit_word_t
+_sse_bungtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    ucomissr(r1, r0);
+    jnae(i0);
+    return (_jit->pc.w);
+}
+fbopi(ungt)
+
+static jit_word_t
+_sse_bltgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    ucomissr(r0, r1);
+    jne(i0);
+    return (_jit->pc.w);
+}
+fbopi(ltgt)
+
+static jit_word_t
+_sse_bordr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    ucomissr(r0, r1);
+    jnp(i0);
+    return (_jit->pc.w);
+}
+fbopi(ord)
+
+static jit_word_t
+_sse_bunordr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    ucomissr(r0, r1);
+    jp(i0);
+    return (_jit->pc.w);
+}
+fbopi(unord)
+
+dopi(lt)
+dopi(le)
+
+static void
+_sse_eqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_bool_t         rc;
+    jit_int32_t                reg;
+    jit_word_t         jp_code;
+    if ((rc = reg8_p(r0)))
+       reg = r0;
+    else {
+       reg = _RAX_REGNO;
+       movr(r0, _RAX_REGNO);
+    }
+    ixorr(reg, reg);
+    ucomisdr(r2, r1);
+    jpes(0);
+    jp_code = _jit->pc.w;
+    cc(X86_CC_E, reg);
+    patch_rel_char(jp_code, _jit->pc.w);
+    if (!rc)
+       xchgr(r0, reg);
+}
+
+dopi(eq)
+dopi(ge)
+dopi(gt)
+
+static void
+_sse_ner_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_bool_t         rc;
+    jit_int32_t                reg;
+    jit_word_t         jp_code;
+    if ((rc = reg8_p(r0)))
+       reg = r0;
+    else {
+       reg = _RAX_REGNO;
+       movr(r0, _RAX_REGNO);
+    }
+    imovi(reg, 1);
+    ucomisdr(r2, r1);
+    jpes(0);
+    jp_code = _jit->pc.w;
+    cc(X86_CC_NE, reg);
+    patch_rel_char(jp_code, _jit->pc.w);
+    if (!rc)
+       xchgr(r0, reg);
+}
+
+dopi(ne)
+dopi(unlt)
+
+static void
+_sse_unler_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (r1 == r2)
+       movi(r0, 1);
+    else
+       ssecmpd(X86_CC_NA, r0, r2, r1);
+}
+
+dopi(unle)
+
+static void
+_sse_uneqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (r1 == r2)
+       movi(r0, 1);
+    else
+       ssecmpd(X86_CC_E, r0, r1, r2);
+}
+
+dopi(uneq)
+
+static void
+_sse_unger_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (r1 == r2)
+       movi(r0, 1);
+    else
+       ssecmpd(X86_CC_NA, r0, r1, r2);
+}
+
+dopi(unge)
+dopi(ungt)
+
+static void
+_sse_ltgtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (r1 == r2)
+       ixorr(r0, r0);
+    else
+       ssecmpd(X86_CC_NE, r0, r1, r2);
+}
+
+dopi(ltgt)
+dopi(ord)
+dopi(unord)
+
+static void
+_sse_movr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (r0 != r1)
+       ssexr(0xf2, X86_SSE_MOV, r0, r1);
+}
+
+static void
+_sse_movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t *i0)
+{
+    union {
+       jit_int32_t      ii[2];
+       jit_word_t       w;
+       jit_float64_t    d;
+    } data;
+    jit_int32_t                 reg;
+    jit_bool_t          ldi;
+
+    data.d = *i0;
+    if (data.d == 0.0 && !(data.ii[1] & 0x80000000))
+       xorpdr(r0, r0);
+    else {
+       ldi = !_jitc->no_data;
+#if __X64
+       /* if will allocate a register for offset, just use immediate */
+       if (ldi && !sse_address_p(i0))
+           ldi = 0;
+#endif
+       if (ldi)
+           sse_ldi_d(r0, (jit_word_t)i0);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+#if __X64 && !__X64_32
+           movi(rn(reg), data.w);
+           movdqxr(r0, rn(reg));
+           jit_unget_reg(reg);
+#else
+           movi(rn(reg), data.ii[0]);
+           stxi_i(CVT_OFFSET, _RBP_REGNO, rn(reg));
+           movi(rn(reg), data.ii[1]);
+           stxi_i(CVT_OFFSET + 4, _RBP_REGNO, rn(reg));
+           jit_unget_reg(reg);
+           sse_ldxi_d(r0, _RBP_REGNO, CVT_OFFSET);
+#endif
+       }
+    }
+}
+
+static void
+_sse_ldi_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (sse_address_p(i0))
+       movsdmr(i0, _NOREG, _NOREG, _SCL1, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       sse_ldr_d(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_sse_ldxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+#if __X64_32
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r1, r2);
+    sse_ldr_d(r0, rn(reg));
+    jit_unget_reg(reg);
+#else
+    movsdmr(0, r1, r2, _SCL1, r0);
+#endif
+}
+
+static void
+_sse_ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_int_p(i0))
+       movsdmr(i0, r1, _NOREG, _SCL1, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+#if __X64_32
+       addi(rn(reg), r1, i0);
+       sse_ldr_d(r0, rn(reg));
+#else
+       movi(rn(reg), i0);
+       sse_ldxr_d(r0, r1, rn(reg));
+#endif
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_sse_sti_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    if (sse_address_p(i0))
+       movsdrm(r0, i0, _NOREG, _NOREG, _SCL1);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       sse_str_d(rn(reg), r0);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_sse_stxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+#if __X64_32
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r0, r1);
+    sse_str_d(rn(reg), r2);
+    jit_unget_reg(reg);
+#else
+    movsdrm(r2, 0, r0, r1, _SCL1);
+#endif
+}
+
+static void
+_sse_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_int_p(i0))
+       movsdrm(r1, i0, r0, _NOREG, _SCL1);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+#if __X64_32
+       addi(rn(reg), r0, i0);
+       sse_str_d(rn(reg), r1);
+#else
+       movi(rn(reg), i0);
+       sse_stxr_f(rn(reg), r0, r1);
+#endif
+       jit_unget_reg(reg);
+    }
+}
+
+static jit_word_t
+_sse_bltr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    ucomisdr(r1, r0);
+    ja(i0);
+    return (_jit->pc.w);
+}
+dbopi(lt)
+
+static jit_word_t
+_sse_bler_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    ucomisdr(r1, r0);
+    jae(i0);
+    return (_jit->pc.w);
+}
+dbopi(le)
+
+static jit_word_t
+_sse_beqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         jp_code;
+    ucomisdr(r0, r1);
+    jps(0);
+    jp_code = _jit->pc.w;
+    je(i0);
+    patch_rel_char(jp_code, _jit->pc.w);
+    return (_jit->pc.w);
+}
+dbopi(eq)
+
+static jit_word_t
+_sse_bger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    ucomisdr(r0, r1);
+    jae(i0);
+    return (_jit->pc.w);
+}
+dbopi(ge)
+
+static jit_word_t
+_sse_bgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    ucomisdr(r0, r1);
+    ja(i0);
+    return (_jit->pc.w);
+}
+dbopi(gt)
+
+static jit_word_t
+_sse_bner_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         jp_code;
+    jit_word_t         jz_code;
+    ucomisdr(r0, r1);
+    jps(0);
+    jp_code = _jit->pc.w;
+    jzs(0);
+    jz_code = _jit->pc.w;
+    patch_rel_char(jp_code, _jit->pc.w);
+    jmpi(i0);
+    patch_rel_char(jz_code, _jit->pc.w);
+    return (_jit->pc.w);
+}
+dbopi(ne)
+
+static jit_word_t
+_sse_bunltr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    ucomisdr(r0, r1);
+    jnae(i0);
+    return (_jit->pc.w);
+}
+dbopi(unlt)
+
+static jit_word_t
+_sse_bunler_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    if (r0 == r1)
+       jmpi(i0);
+    else {
+       ucomisdr(r0, r1);
+       jna(i0);
+    }
+    return (_jit->pc.w);
+}
+dbopi(unle)
+
+static jit_word_t
+_sse_buneqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    if (r0 == r1)
+       jmpi(i0);
+    else {
+       ucomisdr(r0, r1);
+       je(i0);
+    }
+    return (_jit->pc.w);
+}
+dbopi(uneq)
+
+static jit_word_t
+_sse_bunger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    if (r0 == r1)
+       jmpi(i0);
+    else {
+       ucomisdr(r1, r0);
+       jna(i0);
+    }
+    return (_jit->pc.w);
+}
+dbopi(unge)
+
+static jit_word_t
+_sse_bungtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    ucomisdr(r1, r0);
+    jnae(i0);
+    return (_jit->pc.w);
+}
+dbopi(ungt)
+
+static jit_word_t
+_sse_bltgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    ucomisdr(r0, r1);
+    jne(i0);
+    return (_jit->pc.w);
+}
+dbopi(ltgt)
+
+static jit_word_t
+_sse_bordr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    ucomisdr(r0, r1);
+    jnp(i0);
+    return (_jit->pc.w);
+}
+dbopi(ord)
+
+static jit_word_t
+_sse_bunordr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    ucomisdr(r0, r1);
+    jp(i0);
+    return (_jit->pc.w);
+}
+dbopi(unord)
+#  undef fopi
+#  undef fbopi
+#  undef bopi
+#  undef dbopi
+#  undef fpr_bopi
+#  undef fpr_opi
+#endif
diff --git a/deps/lightning/lib/jit_x86-sz.c b/deps/lightning/lib/jit_x86-sz.c
new file mode 100644 (file)
index 0000000..663b840
--- /dev/null
@@ -0,0 +1,1610 @@
+
+#if __X32
+#define JIT_INSTR_MAX 42
+    0, /* data */
+    0, /* live */
+    3, /* align */
+    0, /* save */
+    0, /* load */
+    0, /* #name */
+    0, /* #note */
+    3, /* label */
+    34,        /* prolog */
+    0, /* ellipsis */
+    0, /* va_push */
+    0, /* allocai */
+    0, /* allocar */
+    0, /* arg */
+    0, /* getarg_c */
+    0, /* getarg_uc */
+    0, /* getarg_s */
+    0, /* getarg_us */
+    0, /* getarg_i */
+    0, /* getarg_ui */
+    0, /* getarg_l */
+    0, /* putargr */
+    0, /* putargi */
+    3, /* va_start */
+    5, /* va_arg */
+    7, /* va_arg_d */
+    0, /* va_end */
+    3, /* addr */
+    6, /* addi */
+    4, /* addcr */
+    6, /* addci */
+    4, /* addxr */
+    5, /* addxi */
+    4, /* subr */
+    6, /* subi */
+    6, /* subcr */
+    6, /* subci */
+    6, /* subxr */
+    5, /* subxi */
+    8, /* rsbi */
+    5, /* mulr */
+    7, /* muli */
+    20,        /* qmulr */
+    25,        /* qmuli */
+    20,        /* qmulr_u */
+    25,        /* qmuli_u */
+    21,        /* divr */
+    24,        /* divi */
+    22,        /* divr_u */
+    25,        /* divi_u */
+    23,        /* qdivr */
+    26,        /* qdivi */
+    24,        /* qdivr_u */
+    27,        /* qdivi_u */
+    21,        /* remr */
+    24,        /* remi */
+    22,        /* remr_u */
+    25,        /* remi_u */
+    4, /* andr */
+    7, /* andi */
+    4, /* orr */
+    8, /* ori */
+    4, /* xorr */
+    8, /* xori */
+    16,        /* lshr */
+    7, /* lshi */
+    16,        /* rshr */
+    5, /* rshi */
+    16,        /* rshr_u */
+    5, /* rshi_u */
+    4, /* negr */
+    4, /* comr */
+    15,        /* ltr */
+    16,        /* lti */
+    15,        /* ltr_u */
+    16,        /* lti_u */
+    15,        /* ler */
+    16,        /* lei */
+    15,        /* ler_u */
+    16,        /* lei_u */
+    15,        /* eqr */
+    16,        /* eqi */
+    15,        /* ger */
+    16,        /* gei */
+    15,        /* ger_u */
+    16,        /* gei_u */
+    15,        /* gtr */
+    16,        /* gti */
+    15,        /* gtr_u */
+    16,        /* gti_u */
+    15,        /* ner */
+    16,        /* nei */
+    2, /* movr */
+    5, /* movi */
+    11,        /* extr_c */
+    11,        /* extr_uc */
+    3, /* extr_s */
+    3, /* extr_us */
+    0, /* extr_i */
+    0, /* extr_ui */
+    7, /* htonr_us */
+    4, /* htonr_ui */
+    0, /* htonr_ul */
+    3, /* ldr_c */
+    7, /* ldi_c */
+    3, /* ldr_uc */
+    7, /* ldi_uc */
+    3, /* ldr_s */
+    7, /* ldi_s */
+    3, /* ldr_us */
+    7, /* ldi_us */
+    2, /* ldr_i */
+    6, /* ldi_i */
+    0, /* ldr_ui */
+    0, /* ldi_ui */
+    0, /* ldr_l */
+    0, /* ldi_l */
+    4, /* ldxr_c */
+    7, /* ldxi_c */
+    4, /* ldxr_uc */
+    7, /* ldxi_uc */
+    4, /* ldxr_s */
+    7, /* ldxi_s */
+    4, /* ldxr_us */
+    7, /* ldxi_us */
+    3, /* ldxr_i */
+    6, /* ldxi_i */
+    0, /* ldxr_ui */
+    0, /* ldxi_ui */
+    0, /* ldxr_l */
+    0, /* ldxi_l */
+    10,        /* str_c */
+    14,        /* sti_c */
+    3, /* str_s */
+    7, /* sti_s */
+    2, /* str_i */
+    6, /* sti_i */
+    0, /* str_l */
+    0, /* sti_l */
+    11,        /* stxr_c */
+    11,        /* stxi_c */
+    4, /* stxr_s */
+    7, /* stxi_s */
+    3, /* stxr_i */
+    6, /* stxi_i */
+    0, /* stxr_l */
+    0, /* stxi_l */
+    8, /* bltr */
+    9, /* blti */
+    8, /* bltr_u */
+    9, /* blti_u */
+    8, /* bler */
+    12,        /* blei */
+    8, /* bler_u */
+    9, /* blei_u */
+    8, /* beqr */
+    12,        /* beqi */
+    8, /* bger */
+    9, /* bgei */
+    8, /* bger_u */
+    9, /* bgei_u */
+    8, /* bgtr */
+    9, /* bgti */
+    8, /* bgtr_u */
+    9, /* bgti_u */
+    8, /* bner */
+    12,        /* bnei */
+    8, /* bmsr */
+    12,        /* bmsi */
+    8, /* bmcr */
+    12,        /* bmci */
+    8, /* boaddr */
+    9, /* boaddi */
+    8, /* boaddr_u */
+    9, /* boaddi_u */
+    8, /* bxaddr */
+    9, /* bxaddi */
+    8, /* bxaddr_u */
+    9, /* bxaddi_u */
+    8, /* bosubr */
+    9, /* bosubi */
+    8, /* bosubr_u */
+    9, /* bosubi_u */
+    8, /* bxsubr */
+    9, /* bxsubi */
+    8, /* bxsubr_u */
+    9, /* bxsubi_u */
+    2, /* jmpr */
+    5, /* jmpi */
+    2, /* callr */
+    5, /* calli */
+    0, /* prepare */
+    0, /* pushargr */
+    0, /* pushargi */
+    0, /* finishr */
+    0, /* finishi */
+    0, /* ret */
+    0, /* retr */
+    0, /* reti */
+    0, /* retval_c */
+    0, /* retval_uc */
+    0, /* retval_s */
+    0, /* retval_us */
+    0, /* retval_i */
+    0, /* retval_ui */
+    0, /* retval_l */
+    24,        /* epilog */
+    0, /* arg_f */
+    0, /* getarg_f */
+    0, /* putargr_f */
+    0, /* putargi_f */
+    8, /* addr_f */
+    19,        /* addi_f */
+    12,        /* subr_f */
+    19,        /* subi_f */
+    21,        /* rsbi_f */
+    8, /* mulr_f */
+    19,        /* muli_f */
+    12,        /* divr_f */
+    19,        /* divi_f */
+    12,        /* negr_f */
+    12,        /* absr_f */
+    6, /* sqrtr_f */
+    13,        /* ltr_f */
+    27,        /* lti_f */
+    13,        /* ler_f */
+    27,        /* lei_f */
+    15,        /* eqr_f */
+    29,        /* eqi_f */
+    13,        /* ger_f */
+    27,        /* gei_f */
+    13,        /* gtr_f */
+    27,        /* gti_f */
+    18,        /* ner_f */
+    32,        /* nei_f */
+    13,        /* unltr_f */
+    27,        /* unlti_f */
+    13,        /* unler_f */
+    27,        /* unlei_f */
+    13,        /* uneqr_f */
+    27,        /* uneqi_f */
+    13,        /* unger_f */
+    27,        /* ungei_f */
+    13,        /* ungtr_f */
+    27,        /* ungti_f */
+    13,        /* ltgtr_f */
+    27,        /* ltgti_f */
+    13,        /* ordr_f */
+    27,        /* ordi_f */
+    13,        /* unordr_f */
+    27,        /* unordi_f */
+    8, /* truncr_f_i */
+    0, /* truncr_f_l */
+    8, /* extr_f */
+    4, /* extr_d_f */
+    10,        /* movr_f */
+    19,        /* movi_f */
+    4, /* ldr_f */
+    8, /* ldi_f */
+    5, /* ldxr_f */
+    8, /* ldxi_f */
+    6, /* str_f */
+    10,        /* sti_f */
+    7, /* stxr_f */
+    8, /* stxi_f */
+    10,        /* bltr_f */
+    23,        /* blti_f */
+    10,        /* bler_f */
+    23,        /* blei_f */
+    12,        /* beqr_f */
+    25,        /* beqi_f */
+    10,        /* bger_f */
+    23,        /* bgei_f */
+    10,        /* bgtr_f */
+    23,        /* bgti_f */
+    13,        /* bner_f */
+    26,        /* bnei_f */
+    10,        /* bunltr_f */
+    23,        /* bunlti_f */
+    10,        /* bunler_f */
+    23,        /* bunlei_f */
+    10,        /* buneqr_f */
+    23,        /* buneqi_f */
+    10,        /* bunger_f */
+    23,        /* bungei_f */
+    10,        /* bungtr_f */
+    23,        /* bungti_f */
+    10,        /* bltgtr_f */
+    23,        /* bltgti_f */
+    10,        /* bordr_f */
+    23,        /* bordi_f */
+    10,        /* bunordr_f */
+    23,        /* bunordi_f */
+    0, /* pushargr_f */
+    0, /* pushargi_f */
+    0, /* retr_f */
+    0, /* reti_f */
+    10,        /* retval_f */
+    0, /* arg_d */
+    0, /* getarg_d */
+    0, /* putargr_d */
+    0, /* putargi_d */
+    8, /* addr_d */
+    26,        /* addi_d */
+    12,        /* subr_d */
+    26,        /* subi_d */
+    30,        /* rsbi_d */
+    8, /* mulr_d */
+    26,        /* muli_d */
+    12,        /* divr_d */
+    26,        /* divi_d */
+    18,        /* negr_d */
+    13,        /* absr_d */
+    6, /* sqrtr_d */
+    13,        /* ltr_d */
+    37,        /* lti_d */
+    13,        /* ler_d */
+    37,        /* lei_d */
+    15,        /* eqr_d */
+    39,        /* eqi_d */
+    13,        /* ger_d */
+    37,        /* gei_d */
+    13,        /* gtr_d */
+    37,        /* gti_d */
+    18,        /* ner_d */
+    42,        /* nei_d */
+    13,        /* unltr_d */
+    37,        /* unlti_d */
+    13,        /* unler_d */
+    37,        /* unlei_d */
+    13,        /* uneqr_d */
+    37,        /* uneqi_d */
+    13,        /* unger_d */
+    37,        /* ungei_d */
+    13,        /* ungtr_d */
+    37,        /* ungti_d */
+    13,        /* ltgtr_d */
+    37,        /* ltgti_d */
+    13,        /* ordr_d */
+    37,        /* ordi_d */
+    13,        /* unordr_d */
+    37,        /* unordi_d */
+    8, /* truncr_d_i */
+    0, /* truncr_d_l */
+    8, /* extr_d */
+    4, /* extr_f_d */
+    10,        /* movr_d */
+    24,        /* movi_d */
+    4, /* ldr_d */
+    8, /* ldi_d */
+    5, /* ldxr_d */
+    8, /* ldxi_d */
+    6, /* str_d */
+    10,        /* sti_d */
+    7, /* stxr_d */
+    8, /* stxi_d */
+    10,        /* bltr_d */
+    28,        /* blti_d */
+    10,        /* bler_d */
+    28,        /* blei_d */
+    12,        /* beqr_d */
+    30,        /* beqi_d */
+    10,        /* bger_d */
+    28,        /* bgei_d */
+    10,        /* bgtr_d */
+    28,        /* bgti_d */
+    13,        /* bner_d */
+    31,        /* bnei_d */
+    10,        /* bunltr_d */
+    28,        /* bunlti_d */
+    10,        /* bunler_d */
+    28,        /* bunlei_d */
+    10,        /* buneqr_d */
+    28,        /* buneqi_d */
+    10,        /* bunger_d */
+    28,        /* bungei_d */
+    10,        /* bungtr_d */
+    28,        /* bungti_d */
+    10,        /* bltgtr_d */
+    28,        /* bltgti_d */
+    10,        /* bordr_d */
+    28,        /* bordi_d */
+    10,        /* bunordr_d */
+    28,        /* bunordi_d */
+    0, /* pushargr_d */
+    0, /* pushargi_d */
+    0, /* retr_d */
+    0, /* reti_d */
+    10,        /* retval_d */
+    0, /* movr_w_f */
+    0, /* movr_ww_d */
+    0, /* movr_w_d */
+    0, /* movr_f_w */
+    0, /* movi_f_w */
+    0, /* movr_d_ww */
+    0, /* movi_d_ww */
+    0, /* movr_d_w */
+    0, /* movi_d_w */
+#endif
+
+#if __X64
+#if __CYGWIN__ || _WIN32
+#define JIT_INSTR_MAX 130
+    0, /* data */
+    0, /* live */
+    6, /* align */
+    0, /* save */
+    0, /* load */
+    0, /* #name */
+    0, /* #note */
+    7, /* label */
+    130,       /* prolog */
+    0, /* ellipsis */
+    0, /* va_push */
+    0, /* allocai */
+    0, /* allocar */
+    0, /* arg */
+    0, /* getarg_c */
+    0, /* getarg_uc */
+    0, /* getarg_s */
+    0, /* getarg_us */
+    0, /* getarg_i */
+    0, /* getarg_ui */
+    0, /* getarg_l */
+    0, /* putargr */
+    0, /* putargi */
+    7, /* va_start */
+    7, /* va_arg */
+    9, /* va_arg_d */
+    0, /* va_end */
+    4, /* addr */
+    13,        /* addi */
+    6, /* addcr */
+    13,        /* addci */
+    6, /* addxr */
+    7, /* addxi */
+    6, /* subr */
+    13,        /* subi */
+    9, /* subcr */
+    13,        /* subci */
+    9, /* subxr */
+    7, /* subxi */
+    16,        /* rsbi */
+    7, /* mulr */
+    14,        /* muli */
+    20,        /* qmulr */
+    30,        /* qmuli */
+    20,        /* qmulr_u */
+    30,        /* qmuli_u */
+    22,        /* divr */
+    29,        /* divi */
+    23,        /* divr_u */
+    30,        /* divi_u */
+    25,        /* qdivr */
+    32,        /* qdivi */
+    26,        /* qdivr_u */
+    33,        /* qdivi_u */
+    22,        /* remr */
+    29,        /* remi */
+    23,        /* remr_u */
+    30,        /* remi_u */
+    6, /* andr */
+    13,        /* andi */
+    6, /* orr */
+    13,        /* ori */
+    6, /* xorr */
+    13,        /* xori */
+    9, /* lshr */
+    8, /* lshi */
+    9, /* rshr */
+    7, /* rshi */
+    9, /* rshr_u */
+    7, /* rshi_u */
+    6, /* negr */
+    6, /* comr */
+    13,        /* ltr */
+    14,        /* lti */
+    13,        /* ltr_u */
+    14,        /* lti_u */
+    13,        /* ler */
+    14,        /* lei */
+    13,        /* ler_u */
+    14,        /* lei_u */
+    13,        /* eqr */
+    14,        /* eqi */
+    13,        /* ger */
+    14,        /* gei */
+    13,        /* ger_u */
+    14,        /* gei_u */
+    13,        /* gtr */
+    14,        /* gti */
+    13,        /* gtr_u */
+    14,        /* gti_u */
+    13,        /* ner */
+    14,        /* nei */
+    3, /* movr */
+    10,        /* movi */
+    7, /* extr_c */
+    7, /* extr_uc */
+    4, /* extr_s */
+    4, /* extr_us */
+    3, /* extr_i */
+    3, /* extr_ui */
+    9, /* htonr_us */
+    6, /* htonr_ui */
+    6, /* htonr_ul */
+    4, /* ldr_c */
+    15,        /* ldi_c */
+    4, /* ldr_uc */
+    15,        /* ldi_uc */
+    4, /* ldr_s */
+    15,        /* ldi_s */
+    4, /* ldr_us */
+    15,        /* ldi_us */
+    3, /* ldr_i */
+    14,        /* ldi_i */
+    3, /* ldr_ui */
+    14,        /* ldi_ui */
+    3, /* ldr_l */
+    14,        /* ldi_l */
+    5, /* ldxr_c */
+    8, /* ldxi_c */
+    5, /* ldxr_uc */
+    8, /* ldxi_uc */
+    5, /* ldxr_s */
+    8, /* ldxi_s */
+    5, /* ldxr_us */
+    8, /* ldxi_us */
+    4, /* ldxr_i */
+    7, /* ldxi_i */
+    4, /* ldxr_ui */
+    6, /* ldxi_ui */
+    4, /* ldxr_l */
+    7, /* ldxi_l */
+    6, /* str_c */
+    17,        /* sti_c */
+    4, /* str_s */
+    15,        /* sti_s */
+    3, /* str_i */
+    14,        /* sti_i */
+    3, /* str_l */
+    14,        /* sti_l */
+    7, /* stxr_c */
+    7, /* stxi_c */
+    5, /* stxr_s */
+    7, /* stxi_s */
+    4, /* stxr_i */
+    6, /* stxi_i */
+    4, /* stxr_l */
+    8, /* stxi_l */
+    9, /* bltr */
+    10,        /* blti */
+    9, /* bltr_u */
+    10,        /* blti_u */
+    9, /* bler */
+    13,        /* blei */
+    9, /* bler_u */
+    10,        /* blei_u */
+    9, /* beqr */
+    19,        /* beqi */
+    9, /* bger */
+    10,        /* bgei */
+    9, /* bger_u */
+    10,        /* bgei_u */
+    9, /* bgtr */
+    10,        /* bgti */
+    9, /* bgtr_u */
+    10,        /* bgti_u */
+    9, /* bner */
+    19,        /* bnei */
+    9, /* bmsr */
+    13,        /* bmsi */
+    9, /* bmcr */
+    13,        /* bmci */
+    9, /* boaddr */
+    10,        /* boaddi */
+    9, /* boaddr_u */
+    10,        /* boaddi_u */
+    9, /* bxaddr */
+    10,        /* bxaddi */
+    9, /* bxaddr_u */
+    10,        /* bxaddi_u */
+    9, /* bosubr */
+    10,        /* bosubi */
+    9, /* bosubr_u */
+    10,        /* bosubi_u */
+    9, /* bxsubr */
+    10,        /* bxsubi */
+    9, /* bxsubr_u */
+    10,        /* bxsubi_u */
+    3, /* jmpr */
+    5, /* jmpi */
+    3, /* callr */
+    13,        /* calli */
+    0, /* prepare */
+    0, /* pushargr */
+    0, /* pushargi */
+    0, /* finishr */
+    0, /* finishi */
+    0, /* ret */
+    0, /* retr */
+    0, /* reti */
+    0, /* retval_c */
+    0, /* retval_uc */
+    0, /* retval_s */
+    0, /* retval_us */
+    0, /* retval_i */
+    0, /* retval_ui */
+    0, /* retval_l */
+    124,       /* epilog */
+    0, /* arg_f */
+    0, /* getarg_f */
+    0, /* putargr_f */
+    0, /* putargi_f */
+    10,        /* addr_f */
+    21,        /* addi_f */
+    15,        /* subr_f */
+    21,        /* subi_f */
+    27,        /* rsbi_f */
+    10,        /* mulr_f */
+    21,        /* muli_f */
+    15,        /* divr_f */
+    21,        /* divi_f */
+    15,        /* negr_f */
+    15,        /* absr_f */
+    5, /* sqrtr_f */
+    16,        /* ltr_f */
+    31,        /* lti_f */
+    16,        /* ler_f */
+    31,        /* lei_f */
+    18,        /* eqr_f */
+    33,        /* eqi_f */
+    16,        /* ger_f */
+    31,        /* gei_f */
+    16,        /* gtr_f */
+    31,        /* gti_f */
+    20,        /* ner_f */
+    35,        /* nei_f */
+    16,        /* unltr_f */
+    31,        /* unlti_f */
+    16,        /* unler_f */
+    31,        /* unlei_f */
+    16,        /* uneqr_f */
+    31,        /* uneqi_f */
+    16,        /* unger_f */
+    31,        /* ungei_f */
+    16,        /* ungtr_f */
+    31,        /* ungti_f */
+    16,        /* ltgtr_f */
+    31,        /* ltgti_f */
+    16,        /* ordr_f */
+    31,        /* ordi_f */
+    16,        /* unordr_f */
+    31,        /* unordi_f */
+    5, /* truncr_f_i */
+    5, /* truncr_f_l */
+    5, /* extr_f */
+    5, /* extr_d_f */
+    5, /* movr_f */
+    15,        /* movi_f */
+    5, /* ldr_f */
+    16,        /* ldi_f */
+    6, /* ldxr_f */
+    8, /* ldxi_f */
+    5, /* str_f */
+    16,        /* sti_f */
+    6, /* stxr_f */
+    9, /* stxi_f */
+    10,        /* bltr_f */
+    21,        /* blti_f */
+    10,        /* bler_f */
+    24,        /* blei_f */
+    12,        /* beqr_f */
+    27,        /* beqi_f */
+    10,        /* bger_f */
+    25,        /* bgei_f */
+    10,        /* bgtr_f */
+    25,        /* bgti_f */
+    13,        /* bner_f */
+    28,        /* bnei_f */
+    10,        /* bunltr_f */
+    25,        /* bunlti_f */
+    10,        /* bunler_f */
+    25,        /* bunlei_f */
+    10,        /* buneqr_f */
+    25,        /* buneqi_f */
+    10,        /* bunger_f */
+    25,        /* bungei_f */
+    10,        /* bungtr_f */
+    25,        /* bungti_f */
+    10,        /* bltgtr_f */
+    25,        /* bltgti_f */
+    10,        /* bordr_f */
+    25,        /* bordi_f */
+    10,        /* bunordr_f */
+    25,        /* bunordi_f */
+    0, /* pushargr_f */
+    0, /* pushargi_f */
+    0, /* retr_f */
+    0, /* reti_f */
+    0, /* retval_f */
+    0, /* arg_d */
+    0, /* getarg_d */
+    0, /* putargr_d */
+    0, /* putargi_d */
+    10,        /* addr_d */
+    25,        /* addi_d */
+    15,        /* subr_d */
+    25,        /* subi_d */
+    27,        /* rsbi_d */
+    10,        /* mulr_d */
+    25,        /* muli_d */
+    15,        /* divr_d */
+    25,        /* divi_d */
+    22,        /* negr_d */
+    16,        /* absr_d */
+    5, /* sqrtr_d */
+    17,        /* ltr_d */
+    32,        /* lti_d */
+    17,        /* ler_d */
+    32,        /* lei_d */
+    19,        /* eqr_d */
+    34,        /* eqi_d */
+    17,        /* ger_d */
+    32,        /* gei_d */
+    17,        /* gtr_d */
+    32,        /* gti_d */
+    21,        /* ner_d */
+    36,        /* nei_d */
+    17,        /* unltr_d */
+    32,        /* unlti_d */
+    17,        /* unler_d */
+    32,        /* unlei_d */
+    17,        /* uneqr_d */
+    32,        /* uneqi_d */
+    17,        /* unger_d */
+    32,        /* ungei_d */
+    17,        /* ungtr_d */
+    32,        /* ungti_d */
+    17,        /* ltgtr_d */
+    32,        /* ltgti_d */
+    17,        /* ordr_d */
+    32,        /* ordi_d */
+    17,        /* unordr_d */
+    32,        /* unordi_d */
+    5, /* truncr_d_i */
+    5, /* truncr_d_l */
+    5, /* extr_d */
+    5, /* extr_f_d */
+    5, /* movr_d */
+    15,        /* movi_d */
+    5, /* ldr_d */
+    16,        /* ldi_d */
+    6, /* ldxr_d */
+    8, /* ldxi_d */
+    5, /* str_d */
+    16,        /* sti_d */
+    6, /* stxr_d */
+    9, /* stxi_d */
+    11,        /* bltr_d */
+    26,        /* blti_d */
+    11,        /* bler_d */
+    26,        /* blei_d */
+    13,        /* beqr_d */
+    28,        /* beqi_d */
+    11,        /* bger_d */
+    26,        /* bgei_d */
+    11,        /* bgtr_d */
+    26,        /* bgti_d */
+    14,        /* bner_d */
+    29,        /* bnei_d */
+    11,        /* bunltr_d */
+    26,        /* bunlti_d */
+    11,        /* bunler_d */
+    26,        /* bunlei_d */
+    11,        /* buneqr_d */
+    26,        /* buneqi_d */
+    11,        /* bunger_d */
+    26,        /* bungei_d */
+    11,        /* bungtr_d */
+    26,        /* bungti_d */
+    11,        /* bltgtr_d */
+    26,        /* bltgti_d */
+    11,        /* bordr_d */
+    26,        /* bordi_d */
+    11,        /* bunordr_d */
+    26,        /* bunordi_d */
+    0, /* pushargr_d */
+    0, /* pushargi_d */
+    0, /* retr_d */
+    0, /* reti_d */
+    0, /* retval_d */
+    0, /* movr_w_f */
+    0, /* movr_ww_d */
+    0, /* movr_w_d */
+    0, /* movr_f_w */
+    0, /* movi_f_w */
+    0, /* movr_d_ww */
+    0, /* movi_d_ww */
+    0, /* movr_d_w */
+    0, /* movi_d_w */
+#else
+
+#  if __X64_32
+#define JIT_INSTR_MAX 108
+    0, /* data */
+    0, /* live */
+    3, /* align */
+    0, /* save */
+    0, /* load */
+    0, /* #name */
+    0, /* #note */
+    3, /* label */
+    108,       /* prolog */
+    0, /* ellipsis */
+    0, /* va_push */
+    0, /* allocai */
+    0, /* allocar */
+    0, /* arg */
+    0, /* getarg_c */
+    0, /* getarg_uc */
+    0, /* getarg_s */
+    0, /* getarg_us */
+    0, /* getarg_i */
+    0, /* getarg_ui */
+    0, /* getarg_l */
+    0, /* putargr */
+    0, /* putargi */
+    41,        /* va_start */
+    45,        /* va_arg */
+    54,        /* va_arg_d */
+    0, /* va_end */
+    5, /* addr */
+    7, /* addi */
+    6, /* addcr */
+    7, /* addci */
+    6, /* addxr */
+    7, /* addxi */
+    6, /* subr */
+    7, /* subi */
+    9, /* subcr */
+    7, /* subci */
+    9, /* subxr */
+    7, /* subxi */
+    10,        /* rsbi */
+    7, /* mulr */
+    8, /* muli */
+    18,        /* qmulr */
+    24,        /* qmuli */
+    18,        /* qmulr_u */
+    24,        /* qmuli_u */
+    19,        /* divr */
+    22,        /* divi */
+    20,        /* divr_u */
+    23,        /* divi_u */
+    22,        /* qdivr */
+    25,        /* qdivi */
+    23,        /* qdivr_u */
+    26,        /* qdivi_u */
+    19,        /* remr */
+    22,        /* remi */
+    20,        /* remr_u */
+    23,        /* remi_u */
+    6, /* andr */
+    9, /* andi */
+    6, /* orr */
+    10,        /* ori */
+    6, /* xorr */
+    10,        /* xori */
+    9, /* lshr */
+    8, /* lshi */
+    9, /* rshr */
+    7, /* rshi */
+    9, /* rshr_u */
+    7, /* rshi_u */
+    6, /* negr */
+    6, /* comr */
+    13,        /* ltr */
+    14,        /* lti */
+    13,        /* ltr_u */
+    14,        /* lti_u */
+    13,        /* ler */
+    14,        /* lei */
+    13,        /* ler_u */
+    14,        /* lei_u */
+    13,        /* eqr */
+    14,        /* eqi */
+    13,        /* ger */
+    14,        /* gei */
+    13,        /* ger_u */
+    14,        /* gei_u */
+    13,        /* gtr */
+    14,        /* gti */
+    13,        /* gtr_u */
+    14,        /* gti_u */
+    13,        /* ner */
+    14,        /* nei */
+    3, /* movr */
+    6, /* movi */
+    7, /* extr_c */
+    7, /* extr_uc */
+    4, /* extr_s */
+    4, /* extr_us */
+    0, /* extr_i */
+    0, /* extr_ui */
+    9, /* htonr_us */
+    6, /* htonr_ui */
+    0, /* htonr_ul */
+    5, /* ldr_c */
+    9, /* ldi_c */
+    5, /* ldr_uc */
+    9, /* ldi_uc */
+    5, /* ldr_s */
+    9, /* ldi_s */
+    5, /* ldr_us */
+    9, /* ldi_us */
+    4, /* ldr_i */
+    8, /* ldi_i */
+    0, /* ldr_ui */
+    0, /* ldi_ui */
+    0, /* ldr_l */
+    0, /* ldi_l */
+    9, /* ldxr_c */
+    7, /* ldxi_c */
+    9, /* ldxr_uc */
+    7, /* ldxi_uc */
+    9, /* ldxr_s */
+    7, /* ldxi_s */
+    9, /* ldxr_us */
+    7, /* ldxi_us */
+    8, /* ldxr_i */
+    7, /* ldxi_i */
+    0, /* ldxr_ui */
+    0, /* ldxi_ui */
+    0, /* ldxr_l */
+    0, /* ldxi_l */
+    7, /* str_c */
+    11,        /* sti_c */
+    5, /* str_s */
+    9, /* sti_s */
+    4, /* str_i */
+    8, /* sti_i */
+    0, /* str_l */
+    0, /* sti_l */
+    12,        /* stxr_c */
+    7, /* stxi_c */
+    10,        /* stxr_s */
+    7, /* stxi_s */
+    9, /* stxr_i */
+    6, /* stxi_i */
+    0, /* stxr_l */
+    0, /* stxi_l */
+    9, /* bltr */
+    10,        /* blti */
+    9, /* bltr_u */
+    10,        /* blti_u */
+    9, /* bler */
+    12,        /* blei */
+    9, /* bler_u */
+    10,        /* blei_u */
+    9, /* beqr */
+    13,        /* beqi */
+    9, /* bger */
+    10,        /* bgei */
+    9, /* bger_u */
+    10,        /* bgei_u */
+    9, /* bgtr */
+    10,        /* bgti */
+    9, /* bgtr_u */
+    10,        /* bgti_u */
+    9, /* bner */
+    13,        /* bnei */
+    9, /* bmsr */
+    13,        /* bmsi */
+    9, /* bmcr */
+    13,        /* bmci */
+    9, /* boaddr */
+    10,        /* boaddi */
+    9, /* boaddr_u */
+    10,        /* boaddi_u */
+    9, /* bxaddr */
+    10,        /* bxaddi */
+    9, /* bxaddr_u */
+    10,        /* bxaddi_u */
+    9, /* bosubr */
+    10,        /* bosubi */
+    9, /* bosubr_u */
+    10,        /* bosubi_u */
+    9, /* bxsubr */
+    10,        /* bxsubi */
+    9, /* bxsubr_u */
+    10,        /* bxsubi_u */
+    2, /* jmpr */
+    5, /* jmpi */
+    3, /* callr */
+    9, /* calli */
+    0, /* prepare */
+    0, /* pushargr */
+    0, /* pushargi */
+    0, /* finishr */
+    0, /* finishi */
+    0, /* ret */
+    0, /* retr */
+    0, /* reti */
+    0, /* retval_c */
+    0, /* retval_uc */
+    0, /* retval_s */
+    0, /* retval_us */
+    0, /* retval_i */
+    0, /* retval_ui */
+    0, /* retval_l */
+    34,        /* epilog */
+    0, /* arg_f */
+    0, /* getarg_f */
+    0, /* putargr_f */
+    0, /* putargi_f */
+    10,        /* addr_f */
+    21,        /* addi_f */
+    15,        /* subr_f */
+    21,        /* subi_f */
+    26,        /* rsbi_f */
+    10,        /* mulr_f */
+    21,        /* muli_f */
+    15,        /* divr_f */
+    21,        /* divi_f */
+    15,        /* negr_f */
+    15,        /* absr_f */
+    5, /* sqrtr_f */
+    15,        /* ltr_f */
+    26,        /* lti_f */
+    15,        /* ler_f */
+    26,        /* lei_f */
+    17,        /* eqr_f */
+    28,        /* eqi_f */
+    15,        /* ger_f */
+    26,        /* gei_f */
+    15,        /* gtr_f */
+    26,        /* gti_f */
+    20,        /* ner_f */
+    31,        /* nei_f */
+    15,        /* unltr_f */
+    26,        /* unlti_f */
+    15,        /* unler_f */
+    26,        /* unlei_f */
+    15,        /* uneqr_f */
+    26,        /* uneqi_f */
+    15,        /* unger_f */
+    26,        /* ungei_f */
+    15,        /* ungtr_f */
+    26,        /* ungti_f */
+    15,        /* ltgtr_f */
+    26,        /* ltgti_f */
+    15,        /* ordr_f */
+    26,        /* ordi_f */
+    15,        /* unordr_f */
+    26,        /* unordi_f */
+    5, /* truncr_f_i */
+    0, /* truncr_f_l */
+    5, /* extr_f */
+    5, /* extr_d_f */
+    5, /* movr_f */
+    11,        /* movi_f */
+    6, /* ldr_f */
+    10,        /* ldi_f */
+    11,        /* ldxr_f */
+    9, /* ldxi_f */
+    6, /* str_f */
+    10,        /* sti_f */
+    11,        /* stxr_f */
+    9, /* stxi_f */
+    10,        /* bltr_f */
+    21,        /* blti_f */
+    10,        /* bler_f */
+    21,        /* blei_f */
+    12,        /* beqr_f */
+    23,        /* beqi_f */
+    10,        /* bger_f */
+    21,        /* bgei_f */
+    10,        /* bgtr_f */
+    21,        /* bgti_f */
+    13,        /* bner_f */
+    24,        /* bnei_f */
+    10,        /* bunltr_f */
+    21,        /* bunlti_f */
+    10,        /* bunler_f */
+    21,        /* bunlei_f */
+    10,        /* buneqr_f */
+    21,        /* buneqi_f */
+    10,        /* bunger_f */
+    21,        /* bungei_f */
+    10,        /* bungtr_f */
+    21,        /* bungti_f */
+    10,        /* bltgtr_f */
+    21,        /* bltgti_f */
+    10,        /* bordr_f */
+    21,        /* bordi_f */
+    10,        /* bunordr_f */
+    21,        /* bunordi_f */
+    0, /* pushargr_f */
+    0, /* pushargi_f */
+    0, /* retr_f */
+    0, /* reti_f */
+    0, /* retval_f */
+    0, /* arg_d */
+    0, /* getarg_d */
+    0, /* putargr_d */
+    0, /* putargi_d */
+    10,        /* addr_d */
+    33,        /* addi_d */
+    15,        /* subr_d */
+    33,        /* subi_d */
+    38,        /* rsbi_d */
+    10,        /* mulr_d */
+    33,        /* muli_d */
+    15,        /* divr_d */
+    33,        /* divi_d */
+    22,        /* negr_d */
+    16,        /* absr_d */
+    5, /* sqrtr_d */
+    16,        /* ltr_d */
+    39,        /* lti_d */
+    16,        /* ler_d */
+    39,        /* lei_d */
+    18,        /* eqr_d */
+    41,        /* eqi_d */
+    16,        /* ger_d */
+    39,        /* gei_d */
+    16,        /* gtr_d */
+    39,        /* gti_d */
+    21,        /* ner_d */
+    44,        /* nei_d */
+    16,        /* unltr_d */
+    39,        /* unlti_d */
+    16,        /* unler_d */
+    39,        /* unlei_d */
+    16,        /* uneqr_d */
+    39,        /* uneqi_d */
+    16,        /* unger_d */
+    39,        /* ungei_d */
+    16,        /* ungtr_d */
+    39,        /* ungti_d */
+    16,        /* ltgtr_d */
+    39,        /* ltgti_d */
+    16,        /* ordr_d */
+    39,        /* ordi_d */
+    16,        /* unordr_d */
+    39,        /* unordi_d */
+    5, /* truncr_d_i */
+    0, /* truncr_d_l */
+    5, /* extr_d */
+    5, /* extr_f_d */
+    5, /* movr_d */
+    23,        /* movi_d */
+    6, /* ldr_d */
+    10,        /* ldi_d */
+    11,        /* ldxr_d */
+    9, /* ldxi_d */
+    6, /* str_d */
+    10,        /* sti_d */
+    11,        /* stxr_d */
+    9, /* stxi_d */
+    11,        /* bltr_d */
+    34,        /* blti_d */
+    11,        /* bler_d */
+    34,        /* blei_d */
+    13,        /* beqr_d */
+    36,        /* beqi_d */
+    11,        /* bger_d */
+    34,        /* bgei_d */
+    11,        /* bgtr_d */
+    34,        /* bgti_d */
+    14,        /* bner_d */
+    37,        /* bnei_d */
+    11,        /* bunltr_d */
+    34,        /* bunlti_d */
+    11,        /* bunler_d */
+    34,        /* bunlei_d */
+    11,        /* buneqr_d */
+    34,        /* buneqi_d */
+    11,        /* bunger_d */
+    34,        /* bungei_d */
+    11,        /* bungtr_d */
+    34,        /* bungti_d */
+    11,        /* bltgtr_d */
+    34,        /* bltgti_d */
+    11,        /* bordr_d */
+    34,        /* bordi_d */
+    11,        /* bunordr_d */
+    34,        /* bunordi_d */
+    0, /* pushargr_d */
+    0, /* pushargi_d */
+    0, /* retr_d */
+    0, /* reti_d */
+    0, /* retval_d */
+    0, /* movr_w_f */
+    0, /* movr_ww_d */
+    0, /* movr_w_d */
+    0, /* movr_f_w */
+    0, /* movi_f_w */
+    0, /* movr_d_ww */
+    0, /* movi_d_ww */
+    0, /* movr_d_w */
+    0, /* movi_d_w */
+
+#  else
+#define JIT_INSTR_MAX 115
+    0, /* data */
+    0, /* live */
+    6, /* align */
+    0, /* save */
+    0, /* load */
+    0, /* #name */
+    0, /* #note */
+    7, /* label */
+    115,       /* prolog */
+    0, /* ellipsis */
+    0, /* va_push */
+    0, /* allocai */
+    0, /* allocar */
+    0, /* arg */
+    0, /* getarg_c */
+    0, /* getarg_uc */
+    0, /* getarg_s */
+    0, /* getarg_us */
+    0, /* getarg_i */
+    0, /* getarg_ui */
+    0, /* getarg_l */
+    0, /* putargr */
+    0, /* putargi */
+    42,        /* va_start */
+    41,        /* va_arg */
+    50,        /* va_arg_d */
+    0, /* va_end */
+    5, /* addr */
+    13,        /* addi */
+    6, /* addcr */
+    13,        /* addci */
+    6, /* addxr */
+    7, /* addxi */
+    6, /* subr */
+    13,        /* subi */
+    9, /* subcr */
+    13,        /* subci */
+    9, /* subxr */
+    7, /* subxi */
+    16,        /* rsbi */
+    7, /* mulr */
+    14,        /* muli */
+    20,        /* qmulr */
+    30,        /* qmuli */
+    20,        /* qmulr_u */
+    30,        /* qmuli_u */
+    22,        /* divr */
+    29,        /* divi */
+    23,        /* divr_u */
+    30,        /* divi_u */
+    25,        /* qdivr */
+    32,        /* qdivi */
+    26,        /* qdivr_u */
+    33,        /* qdivi_u */
+    22,        /* remr */
+    29,        /* remi */
+    23,        /* remr_u */
+    30,        /* remi_u */
+    6, /* andr */
+    13,        /* andi */
+    6, /* orr */
+    13,        /* ori */
+    6, /* xorr */
+    13,        /* xori */
+    9, /* lshr */
+    8, /* lshi */
+    9, /* rshr */
+    7, /* rshi */
+    9, /* rshr_u */
+    7, /* rshi_u */
+    6, /* negr */
+    6, /* comr */
+    13,        /* ltr */
+    14,        /* lti */
+    13,        /* ltr_u */
+    14,        /* lti_u */
+    13,        /* ler */
+    14,        /* lei */
+    13,        /* ler_u */
+    14,        /* lei_u */
+    13,        /* eqr */
+    14,        /* eqi */
+    13,        /* ger */
+    14,        /* gei */
+    13,        /* ger_u */
+    14,        /* gei_u */
+    13,        /* gtr */
+    14,        /* gti */
+    13,        /* gtr_u */
+    14,        /* gti_u */
+    13,        /* ner */
+    14,        /* nei */
+    3, /* movr */
+    10,        /* movi */
+    4, /* extr_c */
+    4, /* extr_uc */
+    4, /* extr_s */
+    4, /* extr_us */
+    3, /* extr_i */
+    3, /* extr_ui */
+    9, /* htonr_us */
+    6, /* htonr_ui */
+    6, /* htonr_ul */
+    5, /* ldr_c */
+    9, /* ldi_c */
+    5, /* ldr_uc */
+    9, /* ldi_uc */
+    5, /* ldr_s */
+    9, /* ldi_s */
+    5, /* ldr_us */
+    9, /* ldi_us */
+    4, /* ldr_i */
+    8, /* ldi_i */
+    4, /* ldr_ui */
+    8, /* ldi_ui */
+    4, /* ldr_l */
+    8, /* ldi_l */
+    6, /* ldxr_c */
+    8, /* ldxi_c */
+    6, /* ldxr_uc */
+    8, /* ldxi_uc */
+    6, /* ldxr_s */
+    8, /* ldxi_s */
+    6, /* ldxr_us */
+    8, /* ldxi_us */
+    5, /* ldxr_i */
+    7, /* ldxi_i */
+    5, /* ldxr_ui */
+    6, /* ldxi_ui */
+    5, /* ldxr_l */
+    7, /* ldxi_l */
+    4, /* str_c */
+    8, /* sti_c */
+    5, /* str_s */
+    9, /* sti_s */
+    4, /* str_i */
+    8, /* sti_i */
+    4, /* str_l */
+    8, /* sti_l */
+    5, /* stxr_c */
+    6, /* stxi_c */
+    6, /* stxr_s */
+    7, /* stxi_s */
+    5, /* stxr_i */
+    6, /* stxi_i */
+    5, /* stxr_l */
+    7, /* stxi_l */
+    9, /* bltr */
+    10,        /* blti */
+    9, /* bltr_u */
+    10,        /* blti_u */
+    9, /* bler */
+    13,        /* blei */
+    9, /* bler_u */
+    10,        /* blei_u */
+    9, /* beqr */
+    19,        /* beqi */
+    9, /* bger */
+    10,        /* bgei */
+    9, /* bger_u */
+    10,        /* bgei_u */
+    9, /* bgtr */
+    10,        /* bgti */
+    9, /* bgtr_u */
+    10,        /* bgti_u */
+    9, /* bner */
+    19,        /* bnei */
+    9, /* bmsr */
+    13,        /* bmsi */
+    9, /* bmcr */
+    13,        /* bmci */
+    9, /* boaddr */
+    10,        /* boaddi */
+    9, /* boaddr_u */
+    10,        /* boaddi_u */
+    9, /* bxaddr */
+    10,        /* bxaddi */
+    9, /* bxaddr_u */
+    10,        /* bxaddi_u */
+    9, /* bosubr */
+    10,        /* bosubi */
+    9, /* bosubr_u */
+    10,        /* bosubi_u */
+    9, /* bxsubr */
+    10,        /* bxsubi */
+    9, /* bxsubr_u */
+    10,        /* bxsubi_u */
+    3, /* jmpr */
+    5, /* jmpi */
+    3, /* callr */
+    13,        /* calli */
+    0, /* prepare */
+    0, /* pushargr */
+    0, /* pushargi */
+    0, /* finishr */
+    0, /* finishi */
+    0, /* ret */
+    0, /* retr */
+    0, /* reti */
+    0, /* retval_c */
+    0, /* retval_uc */
+    0, /* retval_s */
+    0, /* retval_us */
+    0, /* retval_i */
+    0, /* retval_ui */
+    0, /* retval_l */
+    37,        /* epilog */
+    0, /* arg_f */
+    0, /* getarg_f */
+    0, /* putargr_f */
+    0, /* putargi_f */
+    10,        /* addr_f */
+    21,        /* addi_f */
+    15,        /* subr_f */
+    21,        /* subi_f */
+    30,        /* rsbi_f */
+    10,        /* mulr_f */
+    21,        /* muli_f */
+    15,        /* divr_f */
+    21,        /* divi_f */
+    15,        /* negr_f */
+    15,        /* absr_f */
+    5, /* sqrtr_f */
+    11,        /* ltr_f */
+    26,        /* lti_f */
+    11,        /* ler_f */
+    26,        /* lei_f */
+    13,        /* eqr_f */
+    28,        /* eqi_f */
+    11,        /* ger_f */
+    26,        /* gei_f */
+    11,        /* gtr_f */
+    26,        /* gti_f */
+    16,        /* ner_f */
+    31,        /* nei_f */
+    11,        /* unltr_f */
+    26,        /* unlti_f */
+    11,        /* unler_f */
+    26,        /* unlei_f */
+    11,        /* uneqr_f */
+    26,        /* uneqi_f */
+    11,        /* unger_f */
+    26,        /* ungei_f */
+    11,        /* ungtr_f */
+    26,        /* ungti_f */
+    11,        /* ltgtr_f */
+    26,        /* ltgti_f */
+    11,        /* ordr_f */
+    26,        /* ordi_f */
+    11,        /* unordr_f */
+    26,        /* unordi_f */
+    5, /* truncr_f_i */
+    5, /* truncr_f_l */
+    5, /* extr_f */
+    5, /* extr_d_f */
+    5, /* movr_f */
+    15,        /* movi_f */
+    6, /* ldr_f */
+    10,        /* ldi_f */
+    7, /* ldxr_f */
+    9, /* ldxi_f */
+    6, /* str_f */
+    10,        /* sti_f */
+    7, /* stxr_f */
+    9, /* stxi_f */
+    10,        /* bltr_f */
+    21,        /* blti_f */
+    10,        /* bler_f */
+    25,        /* blei_f */
+    12,        /* beqr_f */
+    27,        /* beqi_f */
+    10,        /* bger_f */
+    25,        /* bgei_f */
+    10,        /* bgtr_f */
+    25,        /* bgti_f */
+    13,        /* bner_f */
+    28,        /* bnei_f */
+    10,        /* bunltr_f */
+    25,        /* bunlti_f */
+    10,        /* bunler_f */
+    25,        /* bunlei_f */
+    10,        /* buneqr_f */
+    25,        /* buneqi_f */
+    10,        /* bunger_f */
+    25,        /* bungei_f */
+    10,        /* bungtr_f */
+    25,        /* bungti_f */
+    10,        /* bltgtr_f */
+    25,        /* bltgti_f */
+    10,        /* bordr_f */
+    25,        /* bordi_f */
+    10,        /* bunordr_f */
+    25,        /* bunordi_f */
+    0, /* pushargr_f */
+    0, /* pushargi_f */
+    0, /* retr_f */
+    0, /* reti_f */
+    0, /* retval_f */
+    0, /* arg_d */
+    0, /* getarg_d */
+    0, /* putargr_d */
+    0, /* putargi_d */
+    10,        /* addr_d */
+    25,        /* addi_d */
+    15,        /* subr_d */
+    25,        /* subi_d */
+    30,        /* rsbi_d */
+    10,        /* mulr_d */
+    25,        /* muli_d */
+    15,        /* divr_d */
+    25,        /* divi_d */
+    22,        /* negr_d */
+    16,        /* absr_d */
+    5, /* sqrtr_d */
+    12,        /* ltr_d */
+    27,        /* lti_d */
+    12,        /* ler_d */
+    27,        /* lei_d */
+    14,        /* eqr_d */
+    29,        /* eqi_d */
+    12,        /* ger_d */
+    27,        /* gei_d */
+    12,        /* gtr_d */
+    27,        /* gti_d */
+    17,        /* ner_d */
+    32,        /* nei_d */
+    12,        /* unltr_d */
+    27,        /* unlti_d */
+    12,        /* unler_d */
+    27,        /* unlei_d */
+    12,        /* uneqr_d */
+    27,        /* uneqi_d */
+    12,        /* unger_d */
+    27,        /* ungei_d */
+    12,        /* ungtr_d */
+    27,        /* ungti_d */
+    12,        /* ltgtr_d */
+    27,        /* ltgti_d */
+    12,        /* ordr_d */
+    27,        /* ordi_d */
+    12,        /* unordr_d */
+    27,        /* unordi_d */
+    5, /* truncr_d_i */
+    5, /* truncr_d_l */
+    5, /* extr_d */
+    5, /* extr_f_d */
+    5, /* movr_d */
+    15,        /* movi_d */
+    6, /* ldr_d */
+    10,        /* ldi_d */
+    7, /* ldxr_d */
+    9, /* ldxi_d */
+    6, /* str_d */
+    10,        /* sti_d */
+    7, /* stxr_d */
+    9, /* stxi_d */
+    11,        /* bltr_d */
+    26,        /* blti_d */
+    11,        /* bler_d */
+    26,        /* blei_d */
+    13,        /* beqr_d */
+    28,        /* beqi_d */
+    11,        /* bger_d */
+    26,        /* bgei_d */
+    11,        /* bgtr_d */
+    26,        /* bgti_d */
+    14,        /* bner_d */
+    29,        /* bnei_d */
+    11,        /* bunltr_d */
+    26,        /* bunlti_d */
+    11,        /* bunler_d */
+    26,        /* bunlei_d */
+    11,        /* buneqr_d */
+    26,        /* buneqi_d */
+    11,        /* bunger_d */
+    26,        /* bungei_d */
+    11,        /* bungtr_d */
+    26,        /* bungti_d */
+    11,        /* bltgtr_d */
+    26,        /* bltgti_d */
+    11,        /* bordr_d */
+    26,        /* bordi_d */
+    11,        /* bunordr_d */
+    26,        /* bunordi_d */
+    0, /* pushargr_d */
+    0, /* pushargi_d */
+    0, /* retr_d */
+    0, /* reti_d */
+    0, /* retval_d */
+    0, /* movr_w_f */
+    0, /* movr_ww_d */
+    0, /* movr_w_d */
+    0, /* movr_f_w */
+    0, /* movi_f_w */
+    0, /* movr_d_ww */
+    0, /* movi_d_ww */
+    0, /* movr_d_w */
+    0, /* movi_d_w */
+#endif /* __CYGWIN__ || _WIN32 */
+#  endif /* __X64_32 */
+#endif /* __X64 */
diff --git a/deps/lightning/lib/jit_x86-x87.c b/deps/lightning/lib/jit_x86-x87.c
new file mode 100644 (file)
index 0000000..4453bf3
--- /dev/null
@@ -0,0 +1,1344 @@
+/*
+ * Copyright (C) 2012-2019  Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+#if PROTO
+#  if __X32
+#    define x87_address_p(i0)          1
+#  else
+#    if __X64_32
+#      define x87_address_p(i0)                ((jit_word_t)(i0) >= 0)
+#    else
+#      define x87_address_p(i0)                can_sign_extend_int_p(i0)
+#    endif
+#  endif
+#  define _ST0_REGNO                   0
+#  define _ST1_REGNO                   1
+#  define _ST2_REGNO                   2
+#  define _ST3_REGNO                   3
+#  define _ST4_REGNO                   4
+#  define _ST5_REGNO                   5
+#  define _ST6_REGNO                   6
+#  define x87rx(code, md, rb, ri, ms)  _x87rx(_jit, code, md, rb, ri, ms)
+#  define fldcwm(md, rb, ri, ms)       x87rx(015, md, rb, ri, ms)
+#  define fstcwm(md, rb, ri, ms)       _fstcwm(_jit, md, rb, ri, ms)
+static void
+_fstcwm(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define fldsm(md, rb, ri, ms)                x87rx(010, md, rb, ri, ms)
+#  define fstsm(md, rb, ri, ms)                x87rx(012, md, rb, ri, ms)
+#  define fldlm(md, rb, ri, ms)                x87rx(050, md, rb, ri, ms)
+#  define fstlm(md, rb, ri, ms)                x87rx(052, md, rb, ri, ms)
+#  define fisttplm(md, rb, ri, ms)     x87rx(031, md, rb, ri, ms)
+#  define fistlm(md, rb, ri, ms)       x87rx(032, md, rb, ri, ms)
+#  define fisttpqm(md, rb, ri, ms)     x87rx(071, md, rb, ri, ms)
+#  define fildlm(md, rb, ri, ms)       x87rx(030, md, rb,ri, ms)
+#  define fildqm(md, rb, ri, ms)       x87rx(075, md, rb,ri, ms)
+static void
+_x87rx(jit_state_t*, jit_int32_t, jit_int32_t,
+       jit_int32_t, jit_int32_t, jit_int32_t);
+#  define x87ri(cc,r0)                 _x87ri(_jit,cc,r0)
+#  define fchs_()                      x87ri(014, 0)
+#  define fabs_()                      x87ri(014, 1)
+#  define fld1()                       x87ri(015, 0)
+#  define fldl2t()                     x87ri(015, 1)
+#  define fldl2e()                     x87ri(015, 2)
+#  define fldpi()                      x87ri(015, 3)
+#  define fldlg2()                     x87ri(015, 4)
+#  define fldln2()                     x87ri(015, 5)
+#  define fldz()                       x87ri(015, 6)
+#  define fsqrt_()                     x87ri(017, 2)
+#  define fldr(r0)                     x87ri(010, r0)
+#  define fxchr(r0)                    x87ri(011, r0)
+#  define fstr(r0)                     x87ri(052, r0)
+#  define fstpr(r0)                    x87ri(053, r0)
+#  define fucomir(r0)                  x87ri(035, r0)
+#  define fucomipr(r0)                 x87ri(075, r0)
+static void _x87ri(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define faddr(r0, r1)                        x87rri(000, r0, r1)
+#  define fmulr(r0, r1)                        x87rri(001, r0, r1)
+#  define fsubr(r0, r1)                        x87rri(004, r0, r1)
+#  define fsubrr(r0, r1)               x87rri(005, r0, r1)
+#  define fdivr(r0, r1)                        x87rri(006, r0, r1)
+#  define fdivrr(r0, r1)               x87rri(007, r0, r1)
+#  define x87rri(cc, r0, r1)           _x87rri(_jit, cc, r0, r1)
+static void _x87rri(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define x87_addr_f(r0, r1, r2)       _x87_addr_d(_jit, r0, r1, r2)
+#  define x87_addi_f(r0, r1, i0)       _x87_addi_f(_jit, r0, r1, i0)
+static void _x87_addi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define x87_addr_d(r0, r1, r2)       _x87_addr_d(_jit, r0, r1, r2)
+static void _x87_addr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define x87_addi_d(r0, r1, i0)       _x87_addi_d(_jit, r0, r1, i0)
+static void _x87_addi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define x87_subr_f(r0, r1, r2)       _x87_subr_d(_jit, r0, r1, r2)
+#  define x87_subi_f(r0, r1, i0)       _x87_subi_f(_jit, r0, r1, i0)
+static void _x87_subi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define x87_subr_d(r0, r1, r2)       _x87_subr_d(_jit, r0, r1, r2)
+static void _x87_subr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define x87_subi_d(r0, r1, i0)       _x87_subi_d(_jit, r0, r1, i0)
+static void _x87_subi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define x87_rsbr_f(r0, r1, r2)       x87_subr_f(r0, r2, r1)
+#  define x87_rsbi_f(r0, r1, i0)       _x87_rsbi_f(_jit, r0, r1, i0)
+static void _x87_rsbi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define x87_rsbr_d(r0, r1, r2)       x87_subr_d(r0, r2, r1)
+#  define x87_rsbi_d(r0, r1, i0)       _x87_rsbi_d(_jit, r0, r1, i0)
+static void _x87_rsbi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define x87_mulr_f(r0, r1, r2)       _x87_mulr_d(_jit, r0, r1, r2)
+#  define x87_muli_f(r0, r1, i0)       _x87_muli_f(_jit, r0, r1, i0)
+static void _x87_muli_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define x87_mulr_d(r0, r1, r2)       _x87_mulr_d(_jit, r0, r1, r2)
+static void _x87_mulr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define x87_muli_d(r0, r1, i0)       _x87_muli_d(_jit, r0, r1, i0)
+static void _x87_muli_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define x87_divr_f(r0, r1, r2)       _x87_divr_d(_jit, r0, r1, r2)
+#  define x87_divi_f(r0, r1, i0)       _x87_divi_f(_jit, r0, r1, i0)
+static void _x87_divi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define x87_divr_d(r0, r1, r2)       _x87_divr_d(_jit, r0, r1, r2)
+static void _x87_divr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define x87_divi_d(r0, r1, i0)       _x87_divi_d(_jit, r0, r1, i0)
+static void _x87_divi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define x87_absr_f(r0, r1)           _x87_absr_d(_jit, r0, r1)
+#  define x87_absr_d(r0, r1)           _x87_absr_d(_jit, r0, r1)
+static void _x87_absr_d(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define x87_negr_f(r0, r1)           _x87_negr_d(_jit, r0, r1)
+#  define x87_negr_d(r0, r1)           _x87_negr_d(_jit, r0, r1)
+static void _x87_negr_d(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define x87_sqrtr_f(r0, r1)          _x87_sqrtr_d(_jit, r0, r1)
+#  define x87_sqrtr_d(r0, r1)          _x87_sqrtr_d(_jit, r0, r1)
+static void _x87_sqrtr_d(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define x87_truncr_f_i(r0, r1)       _x87_truncr_d_i(_jit, r0, r1)
+#  define x87_truncr_d_i(r0, r1)       _x87_truncr_d_i(_jit, r0, r1)
+static void _x87_truncr_d_i(jit_state_t*, jit_int32_t, jit_int32_t);
+#  if __X64
+#    define x87_truncr_f_l(r0, r1)     _x87_truncr_d_l(_jit, r0, r1)
+#    define x87_truncr_d_l(r0, r1)     _x87_truncr_d_l(_jit, r0, r1)
+static void _x87_truncr_d_l(jit_state_t*, jit_int32_t, jit_int32_t);
+#  endif
+#  define x87_extr_f(r0, r1)           _x87_extr_d(_jit, r0, r1)
+#  define x87_extr_d(r0, r1)           _x87_extr_d(_jit, r0, r1)
+#  define x87_extr_f_d(r0, r1)         x87_movr_d(r0, r1)
+#  define x87_extr_d_f(r0, r1)         x87_movr_d(r0, r1)
+static void _x87_extr_d(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define x87cmp(code, r0, r1, r2)     _x87cmp(_jit, code, r0, r1, r2)
+static void
+_x87cmp(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define x87cmp2(code, r0, r1, r2)    _x87cmp2(_jit, code, r0, r1, r2)
+static void
+_x87cmp2(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define x87jcc(code, i0, r0, r1)     _x87jcc(_jit, code, i0, r0, r1)
+static jit_word_t
+_x87jcc(jit_state_t*, jit_int32_t, jit_word_t, jit_int32_t, jit_int32_t);
+#  define x87jcc2(code, i0, r0, r1)    _x87jcc2(_jit, code, i0, r0, r1)
+static jit_word_t
+_x87jcc2(jit_state_t*, jit_int32_t, jit_word_t, jit_int32_t, jit_int32_t);
+#define x87_movi_f(r0,i0)              _x87_movi_f(_jit,r0,i0)
+static void _x87_movi_f(jit_state_t*, jit_int32_t, jit_float32_t*);
+#  define x87_ldr_f(r0, r1)            _x87_ldr_f(_jit, r0, r1)
+static void _x87_ldr_f(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define x87_ldi_f(r0, i0)            _x87_ldi_f(_jit, r0, i0)
+static void _x87_ldi_f(jit_state_t*, jit_int32_t, jit_word_t);
+#  define x87_ldxr_f(r0, r1, r2)       _x87_ldxr_f(_jit, r0, r1, r2)
+static void _x87_ldxr_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define x87_ldxi_f(r0, r1, i0)       _x87_ldxi_f(_jit, r0, r1, i0)
+static void _x87_ldxi_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#  define x87_str_f(r0, r1)            _x87_str_f(_jit, r0, r1)
+static void _x87_str_f(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define x87_sti_f(i0, r0)            _x87_sti_f(_jit, i0, r0)
+static void _x87_sti_f(jit_state_t*,jit_word_t, jit_int32_t);
+#  define x87_stxr_f(r0, r1, r2)       _x87_stxr_f(_jit, r0, r1, r2)
+static void _x87_stxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define x87_stxi_f(i0, r0, r1)       _x87_stxi_f(_jit, i0, r0, r1)
+static void _x87_stxi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define x87_ltr_f(r0, r1, r2)                x87cmp(X86_CC_A, r0, r2, r1)
+#  define x87_lti_f(r0, r1, i0)                _x87_lti_f(_jit, r0, r1, i0)
+static void _x87_lti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define x87_ler_f(r0, r1, r2)                x87cmp(X86_CC_AE, r0, r2, r1)
+#  define x87_lei_f(r0, r1, i0)                _x87_lei_f(_jit, r0, r1, i0)
+static void _x87_lei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define x87_eqr_f(r0, r1, r2)                x87_eqr_d(r0, r2, r1)
+#  define x87_eqi_f(r0, r1, i0)                _x87_eqi_f(_jit, r0, r1, i0)
+static void _x87_eqi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define x87_ger_f(r0, r1, r2)                x87cmp(X86_CC_AE, r0, r1, r2)
+#  define x87_gei_f(r0, r1, i0)                _x87_gei_f(_jit, r0, r1, i0)
+static void _x87_gei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define x87_gtr_f(r0, r1, r2)                x87cmp(X86_CC_A, r0, r1, r2)
+#  define x87_gti_f(r0, r1, i0)                _x87_gti_f(_jit, r0, r1, i0)
+static void _x87_gti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define x87_ner_f(r0, r1, r2)                x87_ner_d(r0, r2, r1)
+#  define x87_nei_f(r0, r1, i0)                _x87_nei_f(_jit, r0, r1, i0)
+static void _x87_nei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define x87_unltr_f(r0, r1, r2)      x87cmp(X86_CC_NAE, r0, r1, r2)
+#  define x87_unlti_f(r0, r1, i0)      _x87_unlti_f(_jit, r0, r1, i0)
+static void _x87_unlti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define x87_unler_f(r0, r1, r2)      x87cmp(X86_CC_NA, r0, r1, r2)
+#  define x87_unlei_f(r0, r1, i0)      _x87_unlei_f(_jit, r0, r1, i0)
+static void _x87_unlei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define x87_uneqr_f(r0, r1, r2)      x87cmp2(X86_CC_E, r0, r1, r2)
+#  define x87_uneqi_f(r0, r1, i0)      _x87_uneqi_f(_jit, r0, r1, i0)
+static void _x87_uneqi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define x87_unger_f(r0, r1, r2)      x87cmp(X86_CC_NA, r0, r2, r1)
+#  define x87_ungei_f(r0, r1, i0)      _x87_ungei_f(_jit, r0, r1, i0)
+static void _x87_ungei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define x87_ungtr_f(r0, r1, r2)      x87cmp(X86_CC_NAE, r0, r2, r1)
+#  define x87_ungti_f(r0, r1, i0)      _x87_ungti_f(_jit, r0, r1, i0)
+static void _x87_ungti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define x87_ltgtr_f(r0, r1, r2)      x87_ltgtr_d(r0, r1, r2)
+#  define x87_ltgti_f(r0, r1, i0)      _x87_ltgti_f(_jit, r0, r1, i0)
+static void _x87_ltgti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define x87_ordr_f(r0, r1, r2)       x87cmp2(X86_CC_NP, r0, r2, r1)
+#  define x87_ordi_f(r0, r1, i0)       _x87_ordi_f(_jit, r0, r1, i0)
+static void _x87_ordi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define x87_unordr_f(r0, r1, r2)     x87cmp2(X86_CC_P, r0, r2, r1)
+#  define x87_unordi_f(r0, r1, i0)     _x87_unordi_f(_jit, r0, r1, i0)
+static void _x87_unordi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define x87_ltr_d(r0, r1, r2)                x87cmp(X86_CC_A, r0, r2, r1)
+#  define x87_lti_d(r0, r1, i0)                _x87_lti_d(_jit, r0, r1, i0)
+static void _x87_lti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define x87_ler_d(r0, r1, r2)                x87cmp(X86_CC_AE, r0, r2, r1)
+#  define x87_lei_d(r0, r1, i0)                _x87_lei_d(_jit, r0, r1, i0)
+static void _x87_lei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define x87_eqr_d(r0, r1, r2)                _x87_eqr_d(_jit, r0, r2, r1)
+static void _x87_eqr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define x87_eqi_d(r0, r1, i0)                _x87_eqi_d(_jit, r0, r1, i0)
+static void _x87_eqi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define x87_ger_d(r0, r1, r2)                x87cmp(X86_CC_AE, r0, r1, r2)
+#  define x87_gei_d(r0, r1, i0)                _x87_gei_d(_jit, r0, r1, i0)
+static void _x87_gei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define x87_gtr_d(r0, r1, r2)                x87cmp(X86_CC_A, r0, r1, r2)
+#  define x87_gti_d(r0, r1, i0)                _x87_gti_d(_jit, r0, r1, i0)
+static void _x87_gti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define x87_ner_d(r0, r1, r2)                _x87_ner_d(_jit, r0, r2, r1)
+static void _x87_ner_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define x87_nei_d(r0, r1, i0)                _x87_nei_d(_jit, r0, r1, i0)
+static void _x87_nei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define x87_unltr_d(r0, r1, r2)      x87cmp(X86_CC_NAE, r0, r1, r2)
+#  define x87_unlti_d(r0, r1, i0)      _x87_unlti_d(_jit, r0, r1, i0)
+static void _x87_unlti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define x87_unler_d(r0, r1, r2)      x87cmp(X86_CC_NA, r0, r1, r2)
+#  define x87_unlei_d(r0, r1, i0)      _x87_unlei_d(_jit, r0, r1, i0)
+static void _x87_unlei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define x87_uneqr_d(r0, r1, r2)      x87cmp2(X86_CC_E, r0, r1, r2)
+#  define x87_uneqi_d(r0, r1, i0)      _x87_uneqi_d(_jit, r0, r1, i0)
+static void _x87_uneqi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define x87_unger_d(r0, r1, r2)      x87cmp(X86_CC_NA, r0, r2, r1)
+#  define x87_ungei_d(r0, r1, i0)      _x87_ungei_d(_jit, r0, r1, i0)
+static void _x87_ungei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define x87_ungtr_d(r0, r1, r2)      x87cmp(X86_CC_NAE, r0, r2, r1)
+#  define x87_ungti_d(r0, r1, i0)      _x87_ungti_d(_jit, r0, r1, i0)
+static void _x87_ungti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define x87_ltgtr_d(r0, r1, r2)      _x87_ltgtr_d(_jit, r0, r1, r2)
+static void _x87_ltgtr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define x87_ltgti_d(r0, r1, i0)      _x87_ltgti_d(_jit, r0, r1, i0)
+static void _x87_ltgti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define x87_ordr_d(r0, r1, r2)       x87cmp2(X86_CC_NP, r0, r2, r1)
+#  define x87_ordi_d(r0, r1, i0)       _x87_ordi_d(_jit, r0, r1, i0)
+static void _x87_ordi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define x87_unordr_d(r0, r1, r2)     x87cmp2(X86_CC_P, r0, r2, r1)
+#  define x87_unordi_d(r0, r1, i0)     _x87_unordi_d(_jit, r0, r1, i0)
+static void _x87_unordi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#define x87_movr_f(r0,r1)              _x87_movr_d(_jit,r0,r1)
+#define x87_movr_d(r0,r1)              _x87_movr_d(_jit,r0,r1)
+static void _x87_movr_d(jit_state_t*, jit_int32_t, jit_int32_t);
+#define x87_movi_d(r0,i0)              _x87_movi_d(_jit,r0,i0)
+static void _x87_movi_d(jit_state_t*, jit_int32_t, jit_float64_t*);
+#  define x87_ldr_d(r0, r1)            _x87_ldr_d(_jit, r0, r1)
+static void _x87_ldr_d(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define x87_ldi_d(r0, i0)            _x87_ldi_d(_jit, r0, i0)
+static void _x87_ldi_d(jit_state_t*, jit_int32_t, jit_word_t);
+#  define x87_ldxr_d(r0, r1, r2)       _x87_ldxr_d(_jit, r0, r1, r2)
+static void _x87_ldxr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define x87_ldxi_d(r0, r1, i0)       _x87_ldxi_d(_jit, r0, r1, i0)
+static void _x87_ldxi_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#  define x87_str_d(r0, r1)            _x87_str_d(_jit, r0, r1)
+static void _x87_str_d(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define x87_sti_d(i0, r0)            _x87_sti_d(_jit, i0, r0)
+static void _x87_sti_d(jit_state_t*,jit_word_t,jit_int32_t);
+#  define x87_stxr_d(r0, r1, r2)       _x87_stxr_d(_jit, r0, r1, r2)
+static void _x87_stxr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define x87_stxi_d(i0, r0, r1)       _x87_stxi_d(_jit, i0, r0, r1)
+static void _x87_stxi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define x87_bltr_f(i0, r0, r1)       x87jcc(X86_CC_A, i0, r1, r0)
+#  define x87_blti_f(i0, r0, i1)       _x87_blti_f(_jit, i0, r0, i1)
+static jit_word_t
+_x87_blti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
+#  define x87_bler_f(i0, r0, r1)       x87jcc(X86_CC_AE, i0, r1, r0)
+#  define x87_blei_f(i0, r0, i1)       _x87_blei_f(_jit, i0, r0, i1)
+static jit_word_t
+_x87_blei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
+#  define x87_beqr_f(i0, r0, r1)       _x87_beqr_d(_jit, i0, r0, r1)
+#  define x87_beqi_f(i0, r0, i1)       _x87_beqi_f(_jit, i0, r0, i1)
+static jit_word_t
+_x87_beqi_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
+#  define x87_bger_f(i0, r0, r1)       x87jcc(X86_CC_AE, i0, r0, r1)
+#  define x87_bgei_f(i0, r0, i1)       _x87_bgei_f(_jit, i0, r0, i1)
+static jit_word_t
+_x87_bgei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
+#  define x87_bgtr_f(i0, r0, r1)       x87jcc(X86_CC_A, i0, r0, r1)
+#  define x87_bgti_f(i0, r0, i1)       _x87_bgti_f(_jit, i0, r0, i1)
+static jit_word_t
+_x87_bgti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
+#  define x87_bner_f(i0, r0, r1)       _x87_bner_d(_jit, i0, r0, r1)
+#  define x87_bnei_f(i0, r0, i1)       _x87_bnei_f(_jit, i0, r0, i1)
+static jit_word_t
+_x87_bnei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
+#  define x87_bunltr_f(i0, r0, r1)     x87jcc(X86_CC_NAE, i0, r0, r1)
+#  define x87_bunlti_f(i0, r0, i1)     _x87_bunlti_f(_jit, i0, r0, i1)
+static jit_word_t
+_x87_bunlti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
+#  define x87_bunler_f(i0, r0, r1)     x87jcc(X86_CC_NA, i0, r0, r1)
+#  define x87_bunlei_f(i0, r0, i1)     _x87_bunlei_f(_jit, i0, r0, i1)
+static jit_word_t
+_x87_bunlei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
+#  define x87_buneqr_f(i0, r0, r1)     x87jcc2(X86_CC_E, i0, r0, r1)
+#  define x87_buneqi_f(i0, r0, i1)     _x87_buneqi_f(_jit, i0, r0, i1)
+static jit_word_t
+_x87_buneqi_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
+#  define x87_bunger_f(i0, r0, r1)     x87jcc(X86_CC_NA, i0, r1, r0)
+#  define x87_bungei_f(i0, r0, i1)     _x87_bungei_f(_jit, i0, r0, i1)
+static jit_word_t
+_x87_bungei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
+#  define x87_bungtr_f(i0, r0, r1)     x87jcc(X86_CC_NAE, i0, r1, r0)
+#  define x87_bungti_f(i0, r0, i1)     _x87_bungti_f(_jit, i0, r0, i1)
+static jit_word_t
+_x87_bungti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
+#  define x87_bltgtr_f(i0, r0, r1)     x87jcc2(X86_CC_NE, i0, r0, r1)
+#  define x87_bltgti_f(i0, r0, i1)     _x87_bltgti_f(_jit, i0, r0, i1)
+static jit_word_t
+_x87_bltgti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
+#  define x87_bordr_f(i0, r0, r1)      x87jcc2(X86_CC_NP, i0, r0, r1)
+#  define x87_bordi_f(i0, r0, i1)      _x87_bordi_f(_jit, i0, r0, i1)
+static jit_word_t
+_x87_bordi_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
+#  define x87_bunordr_f(i0, r0, r1)    x87jcc2(X86_CC_P, i0, r0, r1)
+#  define x87_bunordi_f(i0, r0, i1)    _x87_bunordi_f(_jit, i0, r0, i1)
+static jit_word_t
+_x87_bunordi_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
+#  define x87_bltr_d(i0, r0, r1)       x87jcc(X86_CC_A, i0, r1, r0)
+#  define x87_blti_d(i0, r0, i1)       _x87_blti_d(_jit, i0, r0, i1)
+static jit_word_t
+_x87_blti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
+#  define x87_bler_d(i0, r0, r1)       x87jcc(X86_CC_AE, i0, r1, r0)
+#  define x87_blei_d(i0, r0, i1)       _x87_blei_d(_jit, i0, r0, i1)
+static jit_word_t
+_x87_blei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
+#  define x87_beqr_d(i0, r0, r1)       _x87_beqr_d(_jit, i0, r0, r1)
+static jit_word_t
+_x87_beqr_d(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
+#  define x87_beqi_d(i0, r0, i1)       _x87_beqi_d(_jit, i0, r0, i1)
+static jit_word_t
+_x87_beqi_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
+#  define x87_bger_d(i0, r0, r1)       x87jcc(X86_CC_AE, i0, r0, r1)
+#  define x87_bgei_d(i0, r0, i1)       _x87_bgei_d(_jit, i0, r0, i1)
+static jit_word_t
+_x87_bgei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
+#  define x87_bgtr_d(i0, r0, r1)       x87jcc(X86_CC_A, i0, r0, r1)
+#  define x87_bgti_d(i0, r0, i1)       _x87_bgti_d(_jit, i0, r0, i1)
+static jit_word_t
+_x87_bgti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
+#  define x87_bner_d(i0, r0, r1)       _x87_bner_d(_jit, i0, r0, r1)
+static jit_word_t
+_x87_bner_d(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
+#  define x87_bnei_d(i0, r0, i1)       _x87_bnei_d(_jit, i0, r0, i1)
+static jit_word_t
+_x87_bnei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
+#  define x87_bunltr_d(i0, r0, r1)     x87jcc(X86_CC_NAE, i0, r0, r1)
+#  define x87_bunlti_d(i0, r0, i1)     _x87_bunlti_d(_jit, i0, r0, i1)
+static jit_word_t
+_x87_bunlti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
+#  define x87_bunler_d(i0, r0, r1)     x87jcc(X86_CC_NA, i0, r0, r1)
+#  define x87_bunlei_d(i0, r0, i1)     _x87_bunlei_d(_jit, i0, r0, i1)
+static jit_word_t
+_x87_bunlei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
+#  define x87_buneqr_d(i0, r0, r1)     x87jcc2(X86_CC_E, i0, r0, r1)
+#  define x87_buneqi_d(i0, r0, i1)     _x87_buneqi_d(_jit, i0, r0, i1)
+static jit_word_t
+_x87_buneqi_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
+#  define x87_bunger_d(i0, r0, r1)     x87jcc(X86_CC_NA, i0, r1, r0)
+#  define x87_bungei_d(i0, r0, i1)     _x87_bungei_d(_jit, i0, r0, i1)
+static jit_word_t
+_x87_bungei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
+#  define x87_bungtr_d(i0, r0, r1)     x87jcc(X86_CC_NAE, i0, r1, r0)
+#  define x87_bungti_d(i0, r0, i1)     _x87_bungti_d(_jit, i0, r0, i1)
+static jit_word_t
+_x87_bungti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
+#  define x87_bltgtr_d(i0, r0, r1)     x87jcc2(X86_CC_NE, i0, r0, r1)
+#  define x87_bltgti_d(i0, r0, i1)     _x87_bltgti_d(_jit, i0, r0, i1)
+static jit_word_t
+_x87_bltgti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
+#  define x87_bordr_d(i0, r0, r1)      x87jcc2(X86_CC_NP, i0, r0, r1)
+#  define x87_bordi_d(i0, r0, i1)      _x87_bordi_d(_jit, i0, r0, i1)
+static jit_word_t
+_x87_bordi_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
+#  define x87_bunordr_d(i0, r0, r1)    x87jcc2(X86_CC_P, i0, r0, r1)
+#  define x87_bunordi_d(i0, r0, i1)    _x87_bunordi_d(_jit, i0, r0, i1)
+static jit_word_t
+_x87_bunordi_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
+#endif
+
+#if CODE
+#  define fpr_opi(name, type, size)                                    \
+static void                                                            \
+_x87_##name##i_##type(jit_state_t *_jit,                               \
+                     jit_int32_t r0, jit_int32_t r1,                   \
+                     jit_float##size##_t *i0)                          \
+{                                                                      \
+    jit_int32_t                reg = jit_get_reg(jit_class_fpr);               \
+    assert(jit_x87_reg_p(reg));                                                \
+    x87_movi_##type(rn(reg), i0);                                      \
+    x87_##name##r_##type(r0, r1, rn(reg));                             \
+    jit_unget_reg(reg);                                                        \
+}
+#  define fpr_bopi(name, type, size)                                   \
+static jit_word_t                                                      \
+_x87_b##name##i_##type(jit_state_t *_jit,                              \
+                      jit_word_t i0, jit_int32_t r0,                   \
+                      jit_float##size##_t *i1)                         \
+{                                                                      \
+    jit_word_t         word;                                           \
+    jit_int32_t                reg = jit_get_reg(jit_class_fpr|                \
+                                         jit_class_nospill);           \
+    assert(jit_x87_reg_p(reg));                                                \
+    x87_movi_##type(rn(reg), i1);                                      \
+    word = x87_b##name##r_##type(i0, r0, rn(reg));                     \
+    jit_unget_reg(reg);                                                        \
+    return (word);                                                     \
+}
+#  define fopi(name)                   fpr_opi(name, f, 32)
+#  define fbopi(name)                  fpr_bopi(name, f, 32)
+#  define dopi(name)                   fpr_opi(name, d, 64)
+#  define dbopi(name)                  fpr_bopi(name, d, 64)
+
+static void
+_fstcwm(jit_state_t *_jit, jit_int32_t md,
+       jit_int32_t rb, jit_int32_t ri, jit_int32_t ms)
+{
+    ic(0x9b);
+    rex(0, 1, rb, ri, _NOREG);
+    x87rx(017, md, rb, ri, ms);
+}
+
+static void
+_x87rx(jit_state_t *_jit, jit_int32_t code, jit_int32_t md,
+       jit_int32_t rb, jit_int32_t ri, jit_int32_t ms)
+{
+    rex(0, 1, rb, ri, _NOREG);
+    ic(0xd8 | (code >> 3));
+    rx((code & 7), md, rb, ri, ms);
+}
+
+static void
+_x87ri(jit_state_t *_jit, jit_int32_t code, jit_int32_t r0)
+{
+    ic(0xd8 | (code >> 3));
+    mrm(0x03, (code & 7), r0);
+}
+
+static void
+_x87rri(jit_state_t *_jit, jit_int32_t code, jit_int32_t r0, jit_int32_t r1)
+{
+    if (r1 == _ST0_REGNO)
+       x87ri(code | 040, r0);
+    else {
+       assert(r0 == _ST0_REGNO);
+       x87ri(code, r1);
+    }
+}
+
+fopi(add)
+fopi(sub)
+fopi(rsb)
+fopi(mul)
+fopi(div)
+
+static void
+_x87_addr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (r0 == r1) {
+       if (r2 == _ST0_REGNO)
+           faddr(r0, _ST0_REGNO);
+       else if (r0 == _ST0_REGNO)
+           faddr(_ST0_REGNO, r2);
+       else {
+           fxchr(r0);
+           faddr(_ST0_REGNO, r0 == r2 ? _ST0_REGNO : r2);
+           fxchr(r0);
+       }
+    }
+    else if (r0 == r2) {
+       if (r1 == _ST0_REGNO)
+           faddr(r0, _ST0_REGNO);
+       else if (r0 == _ST0_REGNO)
+           faddr(_ST0_REGNO, r1);
+       else {
+           fxchr(r0);
+           faddr(_ST0_REGNO, r1);
+           fxchr(r0);
+       }
+    }
+    else {
+       fldr(r1);
+       faddr(_ST0_REGNO, r2 + 1);
+       fstpr(r0 + 1);
+    }
+}
+
+dopi(add)
+
+static void
+_x87_subr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (r0 == r1) {
+       if (r2 == _ST0_REGNO)
+           fsubrr(r0, _ST0_REGNO);
+       else if (r0 == _ST0_REGNO)
+           fsubr(_ST0_REGNO, r2);
+       else {
+           fxchr(r0);
+           fsubr(_ST0_REGNO, r0 == r2 ? _ST0_REGNO : r2);
+           fxchr(r0);
+       }
+    }
+    else if (r0 == r2) {
+       if (r1 == _ST0_REGNO)
+           fsubr(r0, _ST0_REGNO);
+       else if (r0 == _ST0_REGNO)
+           fsubrr(_ST0_REGNO, r1);
+       else {
+           fxchr(r0);
+           fsubrr(_ST0_REGNO, r1);
+           fxchr(r0);
+       }
+    }
+    else {
+       fldr(r1);
+       fsubr(_ST0_REGNO, r2 + 1);
+       fstpr(r0 + 1);
+    }
+}
+
+dopi(sub)
+
+dopi(rsb)
+
+static void
+_x87_mulr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (r0 == r1) {
+       if (r2 == _ST0_REGNO)
+           fmulr(r0, _ST0_REGNO);
+       else if (r0 == _ST0_REGNO)
+           fmulr(_ST0_REGNO, r2);
+       else {
+           fxchr(r0);
+           fmulr(_ST0_REGNO, r0 == r2 ? _ST0_REGNO : r2);
+           fxchr(r0);
+       }
+    }
+    else if (r0 == r2) {
+       if (r1 == _ST0_REGNO)
+           fmulr(r0, _ST0_REGNO);
+       else if (r0 == _ST0_REGNO)
+           fmulr(_ST0_REGNO, r1);
+       else {
+           fxchr(r0);
+           fmulr(_ST0_REGNO, r1);
+           fxchr(r0);
+       }
+    }
+    else {
+       fldr(r1);
+       fmulr(_ST0_REGNO, r2 + 1);
+       fstpr(r0 + 1);
+    }
+}
+
+dopi(mul)
+
+static void
+_x87_divr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (r0 == r1) {
+       if (r2 == _ST0_REGNO)
+           fdivrr(r0, _ST0_REGNO);
+       else if (r0 == _ST0_REGNO)
+           fdivr(_ST0_REGNO, r2);
+       else {
+           fxchr(r0);
+           fdivr(_ST0_REGNO, r0 == r2 ? _ST0_REGNO : r2);
+           fxchr(r0);
+       }
+    }
+    else if (r0 == r2) {
+       if (r1 == _ST0_REGNO)
+           fdivr(r0, _ST0_REGNO);
+       else if (r0 == _ST0_REGNO)
+           fsubrr(_ST0_REGNO, r1);
+       else {
+           fxchr(r0);
+           fdivrr(_ST0_REGNO, r1);
+           fxchr(r0);
+       }
+    }
+    else {
+       fldr(r1);
+       fdivr(_ST0_REGNO, r2 + 1);
+       fstpr(r0 + 1);
+    }
+}
+
+dopi(div)
+
+static void
+_x87_absr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (r0 == r1) {
+       if (r1 == _ST0_REGNO)
+           fabs_();
+       else {
+           fxchr(r0);
+           fabs_();
+           fxchr(r0);
+       }
+    }
+    else {
+       fldr(r1);
+       fabs_();
+       fstpr(r0 + 1);
+    }
+}
+
+static void
+_x87_negr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (r0 == r1) {
+       if (r1 == _ST0_REGNO)
+           fchs_();
+       else {
+           fxchr(r0);
+           fchs_();
+           fxchr(r0);
+       }
+    }
+    else {
+       fldr(r1);
+       fchs_();
+       fstpr(r0 + 1);
+    }
+}
+
+static void
+_x87_sqrtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (r0 == r1) {
+       if (r1 == _ST0_REGNO)
+           fsqrt_();
+       else {
+           fxchr(r0);
+           fsqrt_();
+           fxchr(r0);
+       }
+    }
+    else {
+       fldr(r1);
+       fsqrt_();
+       fstpr(r0 + 1);
+    }
+}
+
+static void
+_x87_truncr_d_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+#if defined(sun)
+    /* for the sake of passing test cases in x87 mode, otherwise only sse
+     * is supported */
+    fstcwm(-4, _RBP_REGNO, _NOREG, _SCL1);
+    ldxi_s(r0, _RBP_REGNO, -4);
+    extr_uc(r0, r0);
+#  define FPCW_CHOP    0xc00
+    ori(r0, r0, FPCW_CHOP);
+    stxi_s(-8, _RBP_REGNO, r0);
+    fldcwm(-8, _RBP_REGNO, _NOREG, _SCL1);
+    if (r1 == _ST0_REGNO)
+       fistlm(CVT_OFFSET, _RBP_REGNO, _NOREG, _SCL1);
+    else {
+       fxchr(r1);
+       fistlm(CVT_OFFSET, _RBP_REGNO, _NOREG, _SCL1);
+       fxchr(r1);
+    }
+    fldcwm(-4, _RBP_REGNO, _NOREG, _SCL1);
+    ldxi(r0, _RBP_REGNO, CVT_OFFSET);
+#else
+    fldr(r1);
+    fisttplm(CVT_OFFSET, _RBP_REGNO, _NOREG, _SCL1);
+    ldxi_i(r0, _RBP_REGNO, CVT_OFFSET);
+#endif
+}
+
+#  if __X64
+static void
+_x87_truncr_d_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    fldr(r1);
+    fisttpqm(CVT_OFFSET, _RBP_REGNO, _NOREG, _SCL1);
+    ldxi(r0, _RBP_REGNO, CVT_OFFSET);
+}
+#  endif
+
+static void
+_x87_extr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    stxi(CVT_OFFSET, _RBP_REGNO, r1);
+#  if __X32
+    fildlm(CVT_OFFSET, _RBP_REGNO, _NOREG, _SCL1);
+#  else
+    fildqm(CVT_OFFSET, _RBP_REGNO, _NOREG, _SCL1);
+#  endif
+    fstpr(r0 + 1);
+}
+
+static void
+_x87cmp(jit_state_t *_jit, jit_int32_t code,
+       jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_bool_t         rc;
+    jit_int32_t                reg;
+    if ((rc = reg8_p(r0)))
+       reg = r0;
+    else {
+       reg = _RAX_REGNO;
+       movr(r0, reg);
+    }
+    ixorr(reg, reg);
+    if (r1 == _ST0_REGNO)
+       fucomir(r2);
+    else {
+       fldr(r1);
+       fucomipr(r2 + 1);
+    }
+    cc(code, reg);
+    if (!rc)
+       xchgr(r0, reg);
+}
+
+static void
+_x87cmp2(jit_state_t *_jit, jit_int32_t code,
+        jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_bool_t                 rc;
+    jit_int32_t                        reg;
+    jit_int32_t                        f1, f2;
+    if (r2 == _ST0_REGNO)      f1 = r2, f2 = r1;
+    else                       f1 = r1, f2 = r2;
+    if ((rc = reg8_p(r0)))
+       reg = r0;
+    else {
+       reg = _RAX_REGNO;
+       movr(r0, reg);
+    }
+    ixorr(reg, reg);
+    if (f1 == _ST0_REGNO)
+       fucomir(f2);
+    else {
+       fldr(f1);
+       fucomipr(f2 + 1);
+    }
+    cc(code, reg);
+    if (!rc)
+       xchgr(r0, reg);
+}
+
+static jit_word_t
+_x87jcc(jit_state_t *_jit, jit_int32_t code,
+       jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    if (r0 == _ST0_REGNO)
+       fucomir(r1);
+    else {
+       fldr(r0);
+       fucomipr(r1 + 1);
+    }
+    jcc(code, i0);
+    return (_jit->pc.w);
+}
+
+static jit_word_t
+_x87jcc2(jit_state_t *_jit, jit_int32_t code,
+        jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                        f0, f1;
+    if (r1 == _ST0_REGNO)      f0 = r1, f1 = r0;
+    else                       f0 = r0, f1 = r1;
+    if (f0 == _ST0_REGNO)
+       fucomir(f1);
+    else {
+       fldr(f0);
+       fucomipr(f1 + 1);
+    }
+    jcc(code, i0);
+    return (_jit->pc.w);
+}
+
+fopi(lt)
+fopi(le)
+fopi(eq)
+fopi(ge)
+fopi(gt)
+fopi(ne)
+fopi(unlt)
+fopi(unle)
+fopi(uneq)
+fopi(unge)
+fopi(ungt)
+fopi(ltgt)
+fopi(ord)
+fopi(unord)
+fbopi(lt)
+fbopi(le)
+fbopi(eq)
+fbopi(ge)
+fbopi(gt)
+fbopi(ne)
+fbopi(unlt)
+fbopi(unle)
+fbopi(uneq)
+fbopi(unge)
+fbopi(ungt)
+fbopi(ltgt)
+fbopi(ord)
+fbopi(unord)
+
+static void
+_x87_movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t *i0)
+{
+    union {
+       jit_int32_t      i;
+       jit_float32_t    f;
+    } data;
+    jit_int32_t                 reg;
+
+    data.f = *i0;
+    if (data.f == 0.0 && !(data.i & 0x80000000))
+       fldz();
+    else if (data.f == 1.0)
+       fld1();
+    else if (data.f == 3.3219280948873623478703195458468f)
+       fldl2t();
+    else if (data.f == 1.4426950408889634073599246886656f)
+       fldl2e();
+    else if (data.f == 3.1415926535897932384626421096161f)
+       fldpi();
+    else if (data.f == 0.3010299956639811952137387498515f)
+       fldlg2();
+    else if (data.f == 0.6931471805599453094172323683399f)
+       fldln2();
+    else {
+       if (_jitc->no_data) {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), data.i);
+           stxi_i(CVT_OFFSET, _RBP_REGNO, rn(reg));
+           jit_unget_reg(reg);
+           x87_ldxi_f(r0, _RBP_REGNO, CVT_OFFSET);
+       }
+       else
+           x87_ldi_f(r0, (jit_word_t)i0);
+       return;
+    }
+    fstpr(r0 + 1);
+}
+
+static void
+_x87_ldr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    fldsm(0, r1, _NOREG, _SCL1);
+    fstpr(r0 + 1);
+}
+
+static void
+_x87_ldi_f(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (x87_address_p(i0)) {
+       fldsm(i0, _NOREG, _NOREG, _SCL1);
+       fstpr(r0 + 1);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       x87_ldr_f(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_x87_ldxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+#if __X64_32
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r1, r2);
+    x87_ldr_f(r0, rn(reg));
+    jit_unget_reg(reg);
+#else
+    fldsm(0, r1, r2, _SCL1);
+    fstpr(r0 + 1);
+#endif
+}
+
+static void
+_x87_ldxi_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_int_p(i0)) {
+       fldsm(i0, r1, _NOREG, _SCL1);
+       fstpr(r0 + 1);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+#if __X64_32
+       addi(rn(reg), r1, i0);
+       x87_ldr_f(r0, rn(reg));
+#else
+       movi(rn(reg), i0);
+       x87_ldxr_f(r0, r1, rn(reg));
+#endif
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_x87_str_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (r1 == _ST0_REGNO)
+       fstsm(0, r0, _NOREG, _SCL1);
+    else {
+       fxchr(r1);
+       fstsm(0, r0, _NOREG, _SCL1);
+       fxchr(r1);
+    }
+}
+
+static void
+_x87_sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    if (!x87_address_p(i0)) {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       x87_str_f(rn(reg), r0);
+       jit_unget_reg(reg);
+    }
+    else if (r0 == _ST0_REGNO)
+       fstsm(i0, _NOREG, _NOREG, _SCL1);
+    else {
+       fxchr(r0);
+       fstsm(i0, _NOREG, _NOREG, _SCL1);
+       fxchr(r0);
+    }
+}
+
+static void
+_x87_stxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+#if __X64_32
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r0, r1);
+    x87_str_f(rn(reg), r2);
+    jit_unget_reg(reg);
+#else
+    if (r2 == _ST0_REGNO)
+       fstsm(0, r0, r1, _SCL1);
+    else {
+       fxchr(r2);
+       fstsm(0, r0, r1, _SCL1);
+       fxchr(r2);
+    }
+#endif
+}
+
+static void
+_x87_stxi_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (!can_sign_extend_int_p(i0)) {
+       reg = jit_get_reg(jit_class_gpr);
+#if __X64_32
+       addi(rn(reg), r0, i0);
+       x87_str_f(rn(reg), r1);
+#else
+       movi(rn(reg), i0);
+       x87_stxr_f(rn(reg), r0, r1);
+#endif
+       jit_unget_reg(reg);
+    }
+    else if (r1 == _ST0_REGNO)
+       fstsm(i0, r0, _NOREG, _SCL1);
+    else {
+       fxchr(r1);
+       fstsm(i0, r0, _NOREG, _SCL1);
+       fxchr(r1);
+    }
+}
+
+static void
+_x87_movr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (r0 != r1) {
+       if (r1 == _ST0)
+           fstr(r0);
+       else if (r0 == _ST0) {
+           fxchr(r1);
+           fstr(r1);
+       }
+       else {
+           fldr(r1);
+           fstpr(r0 + 1);
+       }
+    }
+}
+
+static void
+_x87_movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t *i0)
+{
+    union {
+       jit_int32_t      ii[2];
+       jit_word_t       w;
+       jit_float64_t    d;
+    } data;
+    jit_int32_t                 reg;
+
+    data.d = *i0;
+    if (data.d == 0.0 && !(data.ii[1] & 0x80000000))
+       fldz();
+    else if (data.d == 1.0)
+       fld1();
+    else if (data.d == 3.3219280948873623478703195458468)
+       fldl2t();
+    else if (data.d == 1.4426950408889634073599246886656)
+       fldl2e();
+    else if (data.d == 3.1415926535897932384626421096161)
+       fldpi();
+    else if (data.d == 0.3010299956639811952137387498515)
+       fldlg2();
+    else if (data.d == 0.6931471805599453094172323683399)
+       fldln2();
+    else {
+       if (_jitc->no_data) {
+           reg = jit_get_reg(jit_class_gpr);
+#if __X32 || __X64_32
+           movi(rn(reg), data.ii[0]);
+           stxi_i(CVT_OFFSET, _RBP_REGNO, rn(reg));
+           movi(rn(reg), data.ii[1]);
+           stxi_i(CVT_OFFSET + 4, _RBP_REGNO, rn(reg));
+#else
+           movi(rn(reg), data.w);
+           stxi_l(CVT_OFFSET, _RBP_REGNO, rn(reg));
+#endif
+           jit_unget_reg(reg);
+           x87_ldxi_d(r0, _RBP_REGNO, CVT_OFFSET);
+       }
+       else
+           x87_ldi_d(r0, (jit_word_t)i0);
+       return;
+    }
+    fstpr(r0 + 1);
+}
+
+dopi(lt)
+dopi(le)
+
+static void
+_x87_eqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_bool_t                 rc;
+    jit_word_t                 jp_code;
+    jit_int32_t                        reg, f1, f2;
+    if (r2 == _ST0_REGNO)      f1 = r2, f2 = r1;
+    else                       f1 = r1, f2 = r2;
+    if ((rc = reg8_p(r0)))
+       reg = r0;
+    else {
+       reg = _RAX_REGNO;
+       movr(r0, reg);
+    }
+    ixorr(reg, reg);
+    if (f1 == _ST0_REGNO)
+       fucomir(f2);
+    else {
+       fldr(f1);
+       fucomipr(f2 + 1);
+    }
+    jpes(0);
+    jp_code = _jit->pc.w;
+    cc(X86_CC_E, reg);
+    patch_rel_char(jp_code, _jit->pc.w);
+    if (!rc)
+       xchgr(r0, reg);
+}
+
+dopi(eq)
+dopi(ge)
+dopi(gt)
+
+static void
+_x87_ner_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_bool_t                 rc;
+    jit_word_t                 jp_code;
+    jit_int32_t                        reg, f1, f2;
+    if (r2 == _ST0_REGNO)      f1 = r2, f2 = r1;
+    else                       f1 = r1, f2 = r2;
+    if ((rc = reg8_p(r0)))
+       reg = r0;
+    else {
+       reg = _RAX_REGNO;
+       movr(r0, reg);
+    }
+    imovi(reg, 1);
+    if (f1 == _ST0_REGNO)
+       fucomir(f2);
+    else {
+       fldr(f1);
+       fucomipr(f2 + 1);
+    }
+    jpes(0);
+    jp_code = _jit->pc.w;
+    cc(X86_CC_NE, reg);
+    patch_rel_char(jp_code, _jit->pc.w);
+    if (!rc)
+       xchgr(r0, reg);
+}
+
+dopi(ne)
+dopi(unlt)
+dopi(unle)
+dopi(uneq)
+dopi(unge)
+dopi(ungt)
+
+static void
+_x87_ltgtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (r1 == r2)
+       movi(r0, 1);
+    else
+       x87cmp2(X86_CC_NE, r0, r1, r2);
+}
+
+dopi(ltgt)
+dopi(ord)
+dopi(unord)
+
+static void
+_x87_ldr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    fldlm(0, r1, _NOREG, _SCL1);
+    fstpr(r0 + 1);
+}
+
+static void
+_x87_ldi_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (x87_address_p(i0)) {
+       fldlm(i0, _NOREG, _NOREG, _SCL1);
+       fstpr(r0 + 1);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       x87_ldr_d(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_x87_ldxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+#if __X64_32
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r1, r2);
+    x87_ldr_d(r0, rn(reg));
+    jit_unget_reg(reg);
+#else
+    fldlm(0, r1, r2, _SCL1);
+    fstpr(r0 + 1);
+#endif
+}
+
+static void
+_x87_ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_int_p(i0)) {
+       fldlm(i0, r1, _NOREG, _SCL1);
+       fstpr(r0 + 1);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+#if __X64_32
+       addi(rn(reg), r1, i0);
+       x87_ldr_d(r0, rn(reg));
+#else
+       movi(rn(reg), i0);
+       x87_ldxr_d(r0, r1, rn(reg));
+#endif
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_x87_str_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (r1 == _ST0_REGNO)
+       fstlm(0, r0, _NOREG, _SCL1);
+    else {
+       fxchr(r1);
+       fstlm(0, r0, _NOREG, _SCL1);
+       fxchr(r1);
+    }
+}
+
+static void
+_x87_sti_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    if (!x87_address_p(i0)) {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       x87_str_d(rn(reg), r0);
+       jit_unget_reg(reg);
+    }
+    else if (r0 == _ST0_REGNO)
+       fstlm(i0, _NOREG, _NOREG, _SCL1);
+    else {
+       fxchr(r0);
+       fstlm(i0, _NOREG, _NOREG, _SCL1);
+       fxchr(r0);
+    }
+}
+
+static void
+_x87_stxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+#if __X64_32
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r0, r1);
+    x87_str_d(rn(reg), r2);
+    jit_unget_reg(reg);
+#else
+    if (r2 == _ST0_REGNO)
+       fstlm(0, r0, r1, _SCL1);
+    else {
+       fxchr(r2);
+       fstlm(0, r0, r1, _SCL1);
+       fxchr(r2);
+    }
+#endif
+}
+
+static void
+_x87_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (!can_sign_extend_int_p(i0)) {
+       reg = jit_get_reg(jit_class_gpr);
+#if __X64_32
+       addi(rn(reg), r0, i0);
+       x87_str_d(rn(reg), r1);
+#else
+       movi(rn(reg), i0);
+       x87_stxr_d(rn(reg), r0, r1);
+#endif
+       jit_unget_reg(reg);
+    }
+    else if (r1 == _ST0_REGNO)
+       fstlm(i0, r0, _NOREG, _SCL1);
+    else {
+       fxchr(r1);
+       fstlm(i0, r0, _NOREG, _SCL1);
+       fxchr(r1);
+    }
+}
+
+dbopi(lt)
+dbopi(le)
+
+static jit_word_t
+_x87_beqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                        f0, f1;
+    jit_word_t                 jp_code;
+    if (r1 == _ST0_REGNO)      f0 = r1, f1 = r0;
+    else                       f0 = r0, f1 = r1;
+    if (f0 == _ST0_REGNO)
+       fucomir(f1);
+    else {
+       fldr(f0);
+       fucomipr(f1 + 1);
+    }
+    jpes(0);
+    jp_code = _jit->pc.w;
+    jcc(X86_CC_E, i0);
+    patch_rel_char(jp_code, _jit->pc.w);
+    return (_jit->pc.w);
+}
+dbopi(eq)
+dbopi(ge)
+dbopi(gt)
+
+static jit_word_t
+_x87_bner_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                        f0, f1;
+    jit_word_t                 jp_code;
+    jit_word_t                 jz_code;
+    if (r1 == _ST0_REGNO)      f0 = r1, f1 = r0;
+    else                       f0 = r0, f1 = r1;
+    if (f0 == _ST0_REGNO)
+       fucomir(f1);
+    else {
+       fldr(f0);
+       fucomipr(f1 + 1);
+    }
+    jpes(0);
+    jp_code = _jit->pc.w;
+    jzs(0);
+    jz_code = _jit->pc.w;
+    patch_rel_char(jp_code, _jit->pc.w);
+    jmpi(i0);
+    patch_rel_char(jz_code, _jit->pc.w);
+    return (_jit->pc.w);
+}
+dbopi(ne)
+dbopi(unlt)
+dbopi(unle)
+dbopi(uneq)
+dbopi(unge)
+dbopi(ungt)
+dbopi(ltgt)
+dbopi(ord)
+dbopi(unord)
+#  undef fopi
+#  undef fbopi
+#  undef dopi
+#  undef dbopi
+#  undef fpr_bopi
+#  undef fpr_opi
+#endif
diff --git a/deps/lightning/lib/jit_x86.c b/deps/lightning/lib/jit_x86.c
new file mode 100644 (file)
index 0000000..c34a117
--- /dev/null
@@ -0,0 +1,2264 @@
+/*
+ * Copyright (C) 2012-2019  Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+#include <lightning.h>
+#include <lightning/jit_private.h>
+
+#if __X32
+#  define jit_arg_reg_p(i)             0
+#  define jit_arg_f_reg_p(i)           0
+#  define stack_framesize              20
+#  define stack_adjust                 12
+#  define CVT_OFFSET                   -12
+#  define REAL_WORDSIZE                        4
+#  define va_gp_increment              4
+#  define va_fp_increment              8
+#else
+#  if __CYGWIN__ || _WIN32
+#    define jit_arg_reg_p(i)           ((i) >= 0 && (i) < 4)
+#    define jit_arg_f_reg_p(i)         jit_arg_reg_p(i)
+#    define stack_framesize            152
+#    define va_fp_increment            8
+#  else
+#    define jit_arg_reg_p(i)           ((i) >= 0 && (i) < 6)
+#    define jit_arg_f_reg_p(i)         ((i) >= 0 && (i) < 8)
+#    define stack_framesize            56
+#    define first_gp_argument          rdi
+#    define first_gp_offset            offsetof(jit_va_list_t, rdi)
+#    define first_gp_from_offset(gp)   ((gp) / 8)
+#    define last_gp_argument           r9
+#    define va_gp_max_offset                                           \
+       (offsetof(jit_va_list_t, r9) - offsetof(jit_va_list_t, rdi) + 8)
+#    define first_fp_argument          xmm0
+#    define first_fp_offset            offsetof(jit_va_list_t, xmm0)
+#    define last_fp_argument           xmm7
+#    define va_fp_max_offset                                           \
+       (offsetof(jit_va_list_t, xmm7) - offsetof(jit_va_list_t, rdi) + 16)
+#    define va_fp_increment            16
+#    define first_fp_from_offset(fp)   (((fp) - va_gp_max_offset) / 16)
+#  endif
+#  define va_gp_increment              8
+#  define stack_adjust                 8
+#  define CVT_OFFSET                   -8
+#  define REAL_WORDSIZE                        8
+#endif
+
+/*
+ * Types
+ */
+#if __X32 || __CYGWIN__ || _WIN32
+typedef jit_pointer_t jit_va_list_t;
+#else
+typedef struct jit_va_list {
+    jit_int32_t                gpoff;
+    jit_int32_t                fpoff;
+    jit_pointer_t      over;
+    jit_pointer_t      save;
+    /* Declared explicitly as int64 for the x32 abi */
+    jit_int64_t                rdi;
+    jit_int64_t                rsi;
+    jit_int64_t                rdx;
+    jit_int64_t                rcx;
+    jit_int64_t                r8;
+    jit_int64_t                r9;
+    jit_float64_t      xmm0;
+    jit_float64_t      _up0;
+    jit_float64_t      xmm1;
+    jit_float64_t      _up1;
+    jit_float64_t      xmm2;
+    jit_float64_t      _up2;
+    jit_float64_t      xmm3;
+    jit_float64_t      _up3;
+    jit_float64_t      xmm4;
+    jit_float64_t      _up4;
+    jit_float64_t      xmm5;
+    jit_float64_t      _up5;
+    jit_float64_t      xmm6;
+    jit_float64_t      _up6;
+    jit_float64_t      xmm7;
+    jit_float64_t      _up7;
+} jit_va_list_t;
+#endif
+
+/*
+ * Prototypes
+ */
+#define patch(instr, node)             _patch(_jit, instr, node)
+static void _patch(jit_state_t*,jit_word_t,jit_node_t*);
+#define sse_from_x87_f(r0, r1)         _sse_from_x87_f(_jit, r0, r1)
+static void _sse_from_x87_f(jit_state_t*,jit_int32_t,jit_int32_t);
+#define sse_from_x87_d(r0, r1)         _sse_from_x87_d(_jit, r0, r1)
+static void _sse_from_x87_d(jit_state_t*,jit_int32_t,jit_int32_t);
+#define x87_from_sse_f(r0, r1)         _x87_from_sse_f(_jit, r0, r1)
+static void _x87_from_sse_f(jit_state_t*,jit_int32_t,jit_int32_t);
+#define x87_from_sse_d(r0, r1)         _x87_from_sse_d(_jit, r0, r1)
+static void _x87_from_sse_d(jit_state_t*,jit_int32_t,jit_int32_t);
+
+#define PROTO                          1
+#  include "jit_x86-cpu.c"
+#  include "jit_x86-sse.c"
+#  include "jit_x86-x87.c"
+#undef PROTO
+
+/*
+ * Initialization
+ */
+jit_cpu_t              jit_cpu;
+jit_register_t         _rvs[] = {
+#if __X32
+    { rc(gpr) | rc(rg8) | 0,           "%eax" },
+    { rc(gpr) | rc(rg8) | 1,           "%ecx" },
+    { rc(gpr) | rc(rg8) | 2,           "%edx" },
+    { rc(sav) | rc(rg8) | rc(gpr) | 3, "%ebx" },
+    { rc(sav) | rc(gpr) | 6,           "%esi" },
+    { rc(sav) | rc(gpr) | 7,           "%edi" },
+    { rc(sav) | 4,                     "%esp" },
+    { rc(sav) | 5,                     "%ebp" },
+    { rc(xpr) | rc(fpr) | 0,           "%xmm0" },
+    { rc(xpr) | rc(fpr) | 1,           "%xmm1" },
+    { rc(xpr) | rc(fpr) | 2,           "%xmm2" },
+    { rc(xpr) | rc(fpr) | 3,           "%xmm3" },
+    { rc(xpr) | rc(fpr) | 4,           "%xmm4" },
+    { rc(xpr) | rc(fpr) | 5,           "%xmm5" },
+    { rc(xpr) | rc(fpr) | 6,           "%xmm6" },
+    { rc(xpr) | rc(fpr) | 7,           "%xmm7" },
+    { rc(fpr) | 0,                     "st(0)" },
+    { rc(fpr) | 1,                     "st(1)" },
+    { rc(fpr) | 2,                     "st(2)" },
+    { rc(fpr) | 3,                     "st(3)" },
+    { rc(fpr) | 4,                     "st(4)" },
+    { rc(fpr) | 5,                     "st(5)" },
+    { rc(fpr) | 6,                     "st(6)" },
+    { rc(fpr) | 7,                     "st(7)" },
+#else
+#  if __CYGWIN__ || _WIN32
+    { rc(gpr) | rc(rg8) | 0,           "%rax" },
+    { rc(gpr) | rc(rg8) | rc(rg8) | 10,        "%r10" },
+    { rc(gpr) | rc(rg8) | rc(rg8) | 11,        "%r11" },
+    { rc(sav) | rc(rg8) | rc(gpr) | 3, "%rbx" },
+    { rc(sav) | rc(gpr) | 7,           "%rdi" },
+    { rc(sav) | rc(gpr) | 6,           "%rsi" },
+    { rc(sav) | rc(gpr) | 12,          "%r12" },
+    { rc(sav) | rc(gpr) | 13,          "%r13" },
+    { rc(sav) | rc(gpr) | 14,          "%r14" },
+    { rc(sav) | rc(gpr) | 15,          "%r15" },
+    { rc(arg) | rc(rg8) | rc(gpr) | 9, "%r9" },
+    { rc(arg) | rc(rg8) | rc(gpr) | 8, "%r8" },
+    { rc(arg) | rc(rg8) | rc(gpr) | 2, "%rdx" },
+    { rc(arg) | rc(rg8) | rc(gpr) | 1, "%rcx" },
+    { rc(sav) | 4,                     "%rsp" },
+    { rc(sav) | 5,                     "%rbp" },
+    { rc(xpr) | rc(fpr) | 4,           "%xmm4" },
+    { rc(xpr) | rc(fpr) | 5,           "%xmm5" },
+    { rc(sav) | rc(xpr) | rc(fpr) | 6, "%xmm6" },
+    { rc(sav) | rc(xpr) | rc(fpr) | 7, "%xmm7" },
+    { rc(sav) | rc(xpr) | rc(fpr) | 8, "%xmm8" },
+    { rc(sav) | rc(xpr) | rc(fpr) | 9, "%xmm9" },
+    { rc(sav) | rc(xpr) | rc(fpr) | 10,        "%xmm10" },
+    { rc(sav) | rc(xpr) | rc(fpr) | 11,        "%xmm11" },
+    { rc(sav) | rc(xpr) | rc(fpr) | 12,        "%xmm12" },
+    { rc(sav) | rc(xpr) | rc(fpr) | 13,        "%xmm13" },
+    { rc(sav) | rc(xpr) | rc(fpr) | 14,        "%xmm14" },
+    { rc(sav) | rc(xpr) | rc(fpr) | 15,        "%xmm15" },
+    { rc(xpr) | rc(arg) | rc(fpr) | 3, "%xmm3" },
+    { rc(xpr) | rc(arg) | rc(fpr) | 2, "%xmm2" },
+    { rc(xpr) | rc(arg) | rc(fpr) | 1, "%xmm1" },
+    { rc(xpr) | rc(arg) | rc(fpr) | 0, "%xmm0" },
+#else
+    /* %rax is a pseudo flag argument for varargs functions */
+    { rc(arg) | rc(gpr) | rc(rg8) | 0, "%rax" },
+    { rc(gpr) | rc(rg8) | 10,          "%r10" },
+    { rc(gpr) | rc(rg8) | 11,          "%r11" },
+    { rc(sav) | rc(rg8) | rc(gpr) | 3, "%rbx" },
+    { rc(sav) | rc(rg8) | rc(gpr) | 13,        "%r13" },
+    { rc(sav) | rc(rg8) | rc(gpr) | 14,        "%r14" },
+    { rc(sav) | rc(rg8) | rc(gpr) | 15,        "%r15" },
+    { rc(sav) | rc(gpr) | rc(rg8) | 12,        "%r12" },
+    { rc(arg) | rc(rg8) | rc(gpr) | 9, "%r9" },
+    { rc(arg) | rc(rg8) | rc(gpr) | 8, "%r8" },
+    { rc(arg) | rc(rg8) | rc(gpr) | 1, "%rcx" },
+    { rc(arg) | rc(rg8) | rc(gpr) | 2, "%rdx" },
+    { rc(arg) | rc(rg8) | rc(gpr) | 6, "%rsi" },
+    { rc(arg) | rc(rg8) | rc(gpr) | 7, "%rdi" },
+    { rc(sav) | 4,                     "%rsp" },
+    { rc(sav) | 5,                     "%rbp" },
+    { rc(xpr) | rc(fpr) | 8,           "%xmm8" },
+    { rc(xpr) | rc(fpr) | 9,           "%xmm9" },
+    { rc(xpr) | rc(fpr) | 10,          "%xmm10" },
+    { rc(xpr) | rc(fpr) | 11,          "%xmm11" },
+    { rc(xpr) | rc(fpr) | 12,          "%xmm12" },
+    { rc(xpr) | rc(fpr) | 13,          "%xmm13" },
+    { rc(xpr) | rc(fpr) | 14,          "%xmm14" },
+    { rc(xpr) | rc(fpr) | 15,          "%xmm15" },
+    { rc(xpr) | rc(arg) | rc(fpr) | 7, "%xmm7" },
+    { rc(xpr) | rc(arg) | rc(fpr) | 6, "%xmm6" },
+    { rc(xpr) | rc(arg) | rc(fpr) | 5, "%xmm5" },
+    { rc(xpr) | rc(arg) | rc(fpr) | 4, "%xmm4" },
+    { rc(xpr) | rc(arg) | rc(fpr) | 3, "%xmm3" },
+    { rc(xpr) | rc(arg) | rc(fpr) | 2, "%xmm2" },
+    { rc(xpr) | rc(arg) | rc(fpr) | 1, "%xmm1" },
+    { rc(xpr) | rc(arg) | rc(fpr) | 0, "%xmm0" },
+#  endif
+    { rc(fpr) | 0,                     "st(0)" },
+    { rc(fpr) | 1,                     "st(1)" },
+    { rc(fpr) | 2,                     "st(2)" },
+    { rc(fpr) | 3,                     "st(3)" },
+    { rc(fpr) | 4,                     "st(4)" },
+    { rc(fpr) | 5,                     "st(5)" },
+    { rc(fpr) | 6,                     "st(6)" },
+    { rc(fpr) | 7,                     "st(7)" },
+#endif
+    { _NOREG,                          "<none>" },
+};
+
+/*
+ * Implementation
+ */
+void
+jit_get_cpu(void)
+{
+    union {
+       struct {
+           jit_uint32_t sse3           : 1;
+           jit_uint32_t pclmulqdq      : 1;
+           jit_uint32_t dtes64         : 1;    /* amd reserved */
+           jit_uint32_t monitor        : 1;
+           jit_uint32_t ds_cpl         : 1;    /* amd reserved */
+           jit_uint32_t vmx            : 1;    /* amd reserved */
+           jit_uint32_t smx            : 1;    /* amd reserved */
+           jit_uint32_t est            : 1;    /* amd reserved */
+           jit_uint32_t tm2            : 1;    /* amd reserved */
+           jit_uint32_t ssse3          : 1;
+           jit_uint32_t cntx_id        : 1;    /* amd reserved */
+           jit_uint32_t __reserved0    : 1;
+           jit_uint32_t fma            : 1;
+           jit_uint32_t cmpxchg16b     : 1;
+           jit_uint32_t xtpr           : 1;    /* amd reserved */
+           jit_uint32_t pdcm           : 1;    /* amd reserved */
+           jit_uint32_t __reserved1    : 1;
+           jit_uint32_t pcid           : 1;    /* amd reserved */
+           jit_uint32_t dca            : 1;    /* amd reserved */
+           jit_uint32_t sse4_1         : 1;
+           jit_uint32_t sse4_2         : 1;
+           jit_uint32_t x2apic         : 1;    /* amd reserved */
+           jit_uint32_t movbe          : 1;    /* amd reserved */
+           jit_uint32_t popcnt         : 1;
+           jit_uint32_t tsc            : 1;    /* amd reserved */
+           jit_uint32_t aes            : 1;
+           jit_uint32_t xsave          : 1;
+           jit_uint32_t osxsave        : 1;
+           jit_uint32_t avx            : 1;
+           jit_uint32_t __reserved2    : 1;    /* amd F16C */
+           jit_uint32_t __reserved3    : 1;
+           jit_uint32_t __alwayszero   : 1;    /* amd RAZ */
+       } bits;
+       jit_uword_t     cpuid;
+    } ecx;
+    union {
+       struct {
+           jit_uint32_t fpu            : 1;
+           jit_uint32_t vme            : 1;
+           jit_uint32_t de             : 1;
+           jit_uint32_t pse            : 1;
+           jit_uint32_t tsc            : 1;
+           jit_uint32_t msr            : 1;
+           jit_uint32_t pae            : 1;
+           jit_uint32_t mce            : 1;
+           jit_uint32_t cmpxchg8b      : 1;
+           jit_uint32_t apic           : 1;
+           jit_uint32_t __reserved0    : 1;
+           jit_uint32_t sep            : 1;
+           jit_uint32_t mtrr           : 1;
+           jit_uint32_t pge            : 1;
+           jit_uint32_t mca            : 1;
+           jit_uint32_t cmov           : 1;
+           jit_uint32_t pat            : 1;
+           jit_uint32_t pse36          : 1;
+           jit_uint32_t psn            : 1;    /* amd reserved */
+           jit_uint32_t clfsh          : 1;
+           jit_uint32_t __reserved1    : 1;
+           jit_uint32_t ds             : 1;    /* amd reserved */
+           jit_uint32_t acpi           : 1;    /* amd reserved */
+           jit_uint32_t mmx            : 1;
+           jit_uint32_t fxsr           : 1;
+           jit_uint32_t sse            : 1;
+           jit_uint32_t sse2           : 1;
+           jit_uint32_t ss             : 1;    /* amd reserved */
+           jit_uint32_t htt            : 1;
+           jit_uint32_t tm             : 1;    /* amd reserved */
+           jit_uint32_t __reserved2    : 1;
+           jit_uint32_t pbe            : 1;    /* amd reserved */
+       } bits;
+       jit_uword_t     cpuid;
+    } edx;
+#if __X32
+    int                        ac, flags;
+#endif
+    jit_uword_t                eax, ebx;
+
+#if __X32
+    /* adapted from glibc __sysconf */
+    __asm__ volatile ("pushfl;\n\t"
+                     "popl %0;\n\t"
+                     "movl $0x240000, %1;\n\t"
+                     "xorl %0, %1;\n\t"
+                     "pushl %1;\n\t"
+                     "popfl;\n\t"
+                     "pushfl;\n\t"
+                     "popl %1;\n\t"
+                     "xorl %0, %1;\n\t"
+                     "pushl %0;\n\t"
+                     "popfl"
+                     : "=r" (flags), "=r" (ac));
+
+    /* i386 or i486 without cpuid */
+    if ((ac & (1 << 21)) == 0)
+       /* probably without x87 as well */
+       return;
+#endif
+
+    /* query %eax = 1 function */
+#if __X32 || __X64_32
+    __asm__ volatile ("xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1"
+#else
+    __asm__ volatile ("xchgq %%rbx, %1; cpuid; xchgq %%rbx, %1"
+#endif
+                     : "=a" (eax), "=r" (ebx),
+                     "=c" (ecx.cpuid), "=d" (edx.cpuid)
+                     : "0" (1));
+
+    jit_cpu.fpu                = edx.bits.fpu;
+    jit_cpu.cmpxchg8b  = edx.bits.cmpxchg8b;
+    jit_cpu.cmov       = edx.bits.cmov;
+    jit_cpu.mmx                = edx.bits.mmx;
+    jit_cpu.sse                = edx.bits.sse;
+    jit_cpu.sse2       = edx.bits.sse2;
+    jit_cpu.sse3       = ecx.bits.sse3;
+    jit_cpu.pclmulqdq  = ecx.bits.pclmulqdq;
+    jit_cpu.ssse3      = ecx.bits.ssse3;
+    jit_cpu.fma                = ecx.bits.fma;
+    jit_cpu.cmpxchg16b = ecx.bits.cmpxchg16b;
+    jit_cpu.sse4_1     = ecx.bits.sse4_1;
+    jit_cpu.sse4_2     = ecx.bits.sse4_2;
+    jit_cpu.movbe      = ecx.bits.movbe;
+    jit_cpu.popcnt     = ecx.bits.popcnt;
+    jit_cpu.aes                = ecx.bits.aes;
+    jit_cpu.avx                = ecx.bits.avx;
+
+    /* query %eax = 0x80000001 function */
+#if __X64
+#  if __X64_32
+    __asm__ volatile ("xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1"
+#  else
+    __asm__ volatile ("xchgq %%rbx, %1; cpuid; xchgq %%rbx, %1"
+#  endif
+                     : "=a" (eax), "=r" (ebx),
+                     "=c" (ecx.cpuid), "=d" (edx.cpuid)
+                     : "0" (0x80000001));
+    jit_cpu.lahf       = ecx.cpuid & 1;
+#endif
+}
+
+void
+_jit_init(jit_state_t *_jit)
+{
+#if __X32
+    jit_int32_t                regno;
+    static jit_bool_t  first = 1;
+#endif
+
+    _jitc->reglen = jit_size(_rvs) - 1;
+#if __X32
+    if (first) {
+       if (!jit_cpu.sse2) {
+           for (regno = _jitc->reglen; regno >= 0; regno--) {
+               if (_rvs[regno].spec & jit_class_xpr)
+                   _rvs[regno].spec = 0;
+           }
+       }
+       first = 0;
+    }
+#endif
+}
+
+void
+_jit_prolog(jit_state_t *_jit)
+{
+    jit_int32_t                offset;
+
+    if (_jitc->function)
+       jit_epilog();
+    assert(jit_regset_cmp_ui(&_jitc->regarg, 0) == 0);
+    jit_regset_set_ui(&_jitc->regsav, 0);
+    offset = _jitc->functions.offset;
+    if (offset >= _jitc->functions.length) {
+       jit_realloc((jit_pointer_t *)&_jitc->functions.ptr,
+                   _jitc->functions.length * sizeof(jit_function_t),
+                   (_jitc->functions.length + 16) * sizeof(jit_function_t));
+       _jitc->functions.length += 16;
+    }
+    _jitc->function = _jitc->functions.ptr + _jitc->functions.offset++;
+    _jitc->function->self.size = stack_framesize;
+    _jitc->function->self.argi = _jitc->function->self.argf =
+       _jitc->function->self.aoff = _jitc->function->self.alen = 0;
+    /* sse/x87 conversion */
+    _jitc->function->self.aoff = CVT_OFFSET;
+    _jitc->function->self.call = jit_call_default;
+    jit_alloc((jit_pointer_t *)&_jitc->function->regoff,
+             _jitc->reglen * sizeof(jit_int32_t));
+
+    /* _no_link here does not mean the jit_link() call can be removed
+     * by rewriting as:
+     * _jitc->function->prolog = jit_new_node(jit_code_prolog);
+     */
+    _jitc->function->prolog = jit_new_node_no_link(jit_code_prolog);
+    jit_link(_jitc->function->prolog);
+    _jitc->function->prolog->w.w = offset;
+    _jitc->function->epilog = jit_new_node_no_link(jit_code_epilog);
+    /* u:      label value
+     * v:      offset in blocks vector
+     * w:      offset in functions vector
+     */
+    _jitc->function->epilog->w.w = offset;
+
+    jit_regset_new(&_jitc->function->regset);
+}
+
+jit_int32_t
+_jit_allocai(jit_state_t *_jit, jit_int32_t length)
+{
+    assert(_jitc->function);
+    switch (length) {
+       case 0: case 1:                                         break;
+       case 2:         _jitc->function->self.aoff &= -2;       break;
+       case 3: case 4: _jitc->function->self.aoff &= -4;       break;
+       default:        _jitc->function->self.aoff &= -8;       break;
+    }
+    _jitc->function->self.aoff -= length;
+
+    /* jit_allocai() may be called from jit_x86-cpu.c, and force a function
+     * generation restart on some conditions: div/rem and qmul/qdiv, due
+     * to registers constraints.
+     * The check is to prevent an assertion of a jit_xyz() being called
+     * during code generation, and attempting to add a node to the tail
+     * of the current IR generation. */
+    if (!_jitc->realize) {
+       jit_inc_synth_ww(allocai, _jitc->function->self.aoff, length);
+       jit_dec_synth();
+    }
+
+    return (_jitc->function->self.aoff);
+}
+
+void
+_jit_allocar(jit_state_t *_jit, jit_int32_t u, jit_int32_t v)
+{
+    jit_int32_t                 reg;
+    assert(_jitc->function);
+    jit_inc_synth_ww(allocar, u, v);
+    if (!_jitc->function->allocar) {
+       _jitc->function->aoffoff = jit_allocai(sizeof(jit_int32_t));
+       _jitc->function->allocar = 1;
+    }
+    reg = jit_get_reg(jit_class_gpr);
+    jit_negr(reg, v);
+    jit_andi(reg, reg, -16);
+    jit_ldxi_i(u, JIT_FP, _jitc->function->aoffoff);
+    jit_addr(u, u, reg);
+    jit_addr(JIT_SP, JIT_SP, reg);
+    jit_stxi_i(_jitc->function->aoffoff, JIT_FP, u);
+    jit_unget_reg(reg);
+    jit_dec_synth();
+}
+
+void
+_jit_ret(jit_state_t *_jit)
+{
+    jit_node_t         *instr;
+    assert(_jitc->function);
+    jit_inc_synth(ret);
+    /* jump to epilog */
+    instr = jit_jmpi();
+    jit_patch_at(instr, _jitc->function->epilog);
+    jit_dec_synth();
+}
+
+void
+_jit_retr(jit_state_t *_jit, jit_int32_t u)
+{
+    jit_inc_synth_w(retr, u);
+    /* movr(%ret, %ret) would be optimized out */
+    if (JIT_RET != u)
+       jit_movr(JIT_RET, u);
+    /* explicitly tell it is live */
+    jit_live(JIT_RET);
+    jit_ret();
+    jit_dec_synth();
+}
+
+void
+_jit_reti(jit_state_t *_jit, jit_word_t u)
+{
+    jit_inc_synth_w(reti, u);
+    jit_movi(JIT_RET, u);
+    jit_ret();
+    jit_dec_synth();
+}
+
+void
+_jit_retr_f(jit_state_t *_jit, jit_int32_t u)
+{
+    jit_inc_synth_w(retr_f, u);
+    if (JIT_FRET != u)
+       jit_movr_f(JIT_FRET, u);
+    else
+       jit_live(JIT_FRET);
+    jit_ret();
+    jit_dec_synth();
+}
+
+void
+_jit_reti_f(jit_state_t *_jit, jit_float32_t u)
+{
+    jit_inc_synth_f(reti_f, u);
+    jit_movi_f(JIT_FRET, u);
+    jit_ret();
+    jit_dec_synth();
+}
+
+void
+_jit_retr_d(jit_state_t *_jit, jit_int32_t u)
+{
+    jit_inc_synth_w(retr_d, u);
+    if (JIT_FRET != u)
+       jit_movr_d(JIT_FRET, u);
+    else
+       jit_live(JIT_FRET);
+    jit_ret();
+    jit_dec_synth();
+}
+
+void
+_jit_reti_d(jit_state_t *_jit, jit_float64_t u)
+{
+    jit_inc_synth_d(reti_d, u);
+    jit_movi_d(JIT_FRET, u);
+    jit_ret();
+    jit_dec_synth();
+}
+
+void
+_jit_epilog(jit_state_t *_jit)
+{
+    assert(_jitc->function);
+    assert(_jitc->function->epilog->next == NULL);
+    jit_link(_jitc->function->epilog);
+    _jitc->function = NULL;
+}
+
+jit_bool_t
+_jit_arg_register_p(jit_state_t *_jit, jit_node_t *u)
+{
+    if (u->code == jit_code_arg)
+       return (jit_arg_reg_p(u->u.w));
+    assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d);
+    return (jit_arg_f_reg_p(u->u.w));
+}
+
+void
+_jit_ellipsis(jit_state_t *_jit)
+{
+    jit_inc_synth(ellipsis);
+    if (_jitc->prepare) {
+       jit_link_prepare();
+       /* Remember that a varargs function call is being constructed. */
+       assert(!(_jitc->function->call.call & jit_call_varargs));
+       _jitc->function->call.call |= jit_call_varargs;
+    }
+    else {
+       jit_link_prolog();
+       /* Remember the current function is varargs. */
+       assert(!(_jitc->function->self.call & jit_call_varargs));
+       _jitc->function->self.call |= jit_call_varargs;
+
+#if __X64 && !(__CYGWIN__ || _WIN32)
+       /* Allocate va_list like object in the stack.
+        * If applicable, with enough space to save all argument
+        * registers, and use fixed offsets for them. */
+       _jitc->function->vaoff = jit_allocai(sizeof(jit_va_list_t));
+
+       /* Initialize gp offset in save area. */
+       if (jit_arg_reg_p(_jitc->function->self.argi))
+           _jitc->function->vagp = _jitc->function->self.argi * 8;
+       else
+           _jitc->function->vagp = va_gp_max_offset;
+
+       /* Initialize fp offset in save area. */
+       if (jit_arg_f_reg_p(_jitc->function->self.argf))
+           _jitc->function->vafp = _jitc->function->self.argf * 16 +
+                                   va_gp_max_offset;
+       else
+           _jitc->function->vafp = va_fp_max_offset;
+#endif
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_va_push(jit_state_t *_jit, jit_int32_t u)
+{
+    jit_inc_synth_w(va_push, u);
+    jit_pushargr(u);
+    jit_dec_synth();
+}
+
+jit_node_t *
+_jit_arg(jit_state_t *_jit)
+{
+    jit_node_t         *node;
+    jit_int32_t                 offset;
+    assert(_jitc->function);
+    assert(!(_jitc->function->self.call & jit_call_varargs));
+#if __X64
+    if (jit_arg_reg_p(_jitc->function->self.argi)) {
+       offset = _jitc->function->self.argi++;
+#  if __CYGWIN__ || _WIN32
+       _jitc->function->self.size += sizeof(jit_word_t);
+#  endif
+    }
+    else
+#endif
+    {
+       offset = _jitc->function->self.size;
+       _jitc->function->self.size += REAL_WORDSIZE;
+    }
+    node = jit_new_node_ww(jit_code_arg, offset,
+                          ++_jitc->function->self.argn);
+    jit_link_prolog();
+    return (node);
+}
+
+jit_node_t *
+_jit_arg_f(jit_state_t *_jit)
+{
+    jit_node_t         *node;
+    jit_int32_t                 offset;
+    assert(_jitc->function);
+    assert(!(_jitc->function->self.call & jit_call_varargs));
+#if __X64
+#  if __CYGWIN__ || _WIN32
+    if (jit_arg_reg_p(_jitc->function->self.argi)) {
+       offset = _jitc->function->self.argi++;
+       _jitc->function->self.size += sizeof(jit_word_t);
+    }
+#  else
+    if (jit_arg_f_reg_p(_jitc->function->self.argf))
+       offset = _jitc->function->self.argf++;
+#  endif
+    else
+#endif
+    {
+       offset = _jitc->function->self.size;
+       _jitc->function->self.size += REAL_WORDSIZE;
+    }
+    node = jit_new_node_ww(jit_code_arg_f, offset,
+                          ++_jitc->function->self.argn);
+    jit_link_prolog();
+    return (node);
+}
+
+jit_node_t *
+_jit_arg_d(jit_state_t *_jit)
+{
+    jit_node_t         *node;
+    jit_int32_t                 offset;
+    assert(_jitc->function);
+    assert(!(_jitc->function->self.call & jit_call_varargs));
+#if __X64
+#  if __CYGWIN__ || _WIN32
+    if (jit_arg_reg_p(_jitc->function->self.argi)) {
+       offset = _jitc->function->self.argi++;
+       _jitc->function->self.size += sizeof(jit_word_t);
+    }
+#  else
+    if (jit_arg_f_reg_p(_jitc->function->self.argf))
+       offset = _jitc->function->self.argf++;
+#  endif
+    else
+#endif
+    {
+       offset = _jitc->function->self.size;
+       _jitc->function->self.size += sizeof(jit_float64_t);
+    }
+    node = jit_new_node_ww(jit_code_arg_d, offset,
+                          ++_jitc->function->self.argn);
+    jit_link_prolog();
+    return (node);
+}
+
+void
+_jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_c, u, v);
+#if __X64
+    if (jit_arg_reg_p(v->u.w))
+       jit_extr_c(u, JIT_RA0 - v->u.w);
+    else
+#endif
+       jit_ldxi_c(u, _RBP, v->u.w);
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_uc, u, v);
+#if __X64
+    if (jit_arg_reg_p(v->u.w))
+       jit_extr_uc(u, JIT_RA0 - v->u.w);
+    else
+#endif
+       jit_ldxi_uc(u, _RBP, v->u.w);
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_s, u, v);
+#if __X64
+    if (jit_arg_reg_p(v->u.w))
+       jit_extr_s(u, JIT_RA0 - v->u.w);
+    else
+#endif
+       jit_ldxi_s(u, _RBP, v->u.w);
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_us, u, v);
+#if __X64
+    if (jit_arg_reg_p(v->u.w))
+       jit_extr_us(u, JIT_RA0 - v->u.w);
+    else
+#endif
+       jit_ldxi_us(u, _RBP, v->u.w);
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_i, u, v);
+#if __X64
+    if (jit_arg_reg_p(v->u.w)) {
+#  if __X64_32
+       jit_movr(u, JIT_RA0 - v->u.w);
+#  else
+       jit_extr_i(u, JIT_RA0 - v->u.w);
+#  endif
+     }
+    else
+#endif
+       jit_ldxi_i(u, _RBP, v->u.w);
+    jit_dec_synth();
+}
+
+#if __X64 && !__X64_32
+void
+_jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_ui, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_extr_ui(u, JIT_RA0 - v->u.w);
+    else
+       jit_ldxi_ui(u, _RBP, v->u.w);
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_l, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_movr(u, JIT_RA0 - v->u.w);
+    else
+       jit_ldxi_l(u, _RBP, v->u.w);
+    jit_dec_synth();
+}
+#endif
+
+void
+_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(putargr, u, v);
+#if __X64
+    if (jit_arg_reg_p(v->u.w))
+       jit_movr(JIT_RA0 - v->u.w, u);
+    else
+#endif
+       jit_stxi(v->u.w, _RBP, u);
+    jit_dec_synth();
+}
+
+void
+_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v)
+{
+    jit_int32_t                regno;
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(putargi, u, v);
+#if __X64
+    if (jit_arg_reg_p(v->u.w))
+       jit_movi(JIT_RA0 - v->u.w, u);
+    else
+#endif
+    {
+       regno = jit_get_reg(jit_class_gpr);
+       jit_movi(regno, u);
+       jit_stxi(v->u.w, _RBP, regno);
+       jit_unget_reg(regno);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg_f);
+    jit_inc_synth_wp(getarg_f, u, v);
+#if __X64
+    if (jit_arg_f_reg_p(v->u.w))
+       jit_movr_f(u, _XMM0 - v->u.w);
+    else
+#endif
+       jit_ldxi_f(u, _RBP, v->u.w);
+    jit_dec_synth();
+}
+
+void
+_jit_putargr_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg_f);
+    jit_inc_synth_wp(putargr_f, u, v);
+#if __X64
+    if (jit_arg_reg_p(v->u.w))
+       jit_movr_f(_XMM0 - v->u.w, u);
+    else
+#endif
+       jit_stxi_f(v->u.w, _RBP, u);
+    jit_dec_synth();
+}
+
+void
+_jit_putargi_f(jit_state_t *_jit, jit_float32_t u, jit_node_t *v)
+{
+    jit_int32_t                regno;
+    assert(v->code == jit_code_arg_f);
+    jit_inc_synth_fp(putargi_f, u, v);
+#if __X64
+    if (jit_arg_reg_p(v->u.w))
+       jit_movi_f(_XMM0 - v->u.w, u);
+    else
+#endif
+    {
+       regno = jit_get_reg(jit_class_gpr);
+       jit_movi_f(regno, u);
+       jit_stxi_f(v->u.w, _RBP, regno);
+       jit_unget_reg(regno);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg_d);
+    jit_inc_synth_wp(getarg_d, u, v);
+#if __X64
+    if (jit_arg_f_reg_p(v->u.w))
+       jit_movr_d(u, _XMM0 - v->u.w);
+    else
+#endif
+       jit_ldxi_d(u, _RBP, v->u.w);
+    jit_dec_synth();
+}
+
+void
+_jit_putargr_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg_d);
+    jit_inc_synth_wp(putargr_d, u, v);
+#if __X64
+    if (jit_arg_reg_p(v->u.w))
+       jit_movr_d(_XMM0 - v->u.w, u);
+    else
+#endif
+       jit_stxi_d(v->u.w, _RBP, u);
+    jit_dec_synth();
+}
+
+void
+_jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v)
+{
+    jit_int32_t                regno;
+    assert(v->code == jit_code_arg_d);
+    jit_inc_synth_dp(putargi_d, u, v);
+#if __X64
+    if (jit_arg_reg_p(v->u.w))
+       jit_movi_d(_XMM0 - v->u.w, u);
+    else
+#endif
+    {
+       regno = jit_get_reg(jit_class_gpr);
+       jit_movi_d(regno, u);
+       jit_stxi_d(v->u.w, _RBP, regno);
+       jit_unget_reg(regno);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_pushargr(jit_state_t *_jit, jit_int32_t u)
+{
+    assert(_jitc->function);
+    jit_inc_synth_w(pushargr, u);
+    jit_link_prepare();
+#if __X64
+    if (jit_arg_reg_p(_jitc->function->call.argi)) {
+       jit_movr(JIT_RA0 - _jitc->function->call.argi, u);
+       ++_jitc->function->call.argi;
+#  if __CYGWIN__ || _WIN32
+       if (_jitc->function->call.call & jit_call_varargs)
+           jit_stxi(_jitc->function->call.size, _RSP, u);
+       _jitc->function->call.size += sizeof(jit_word_t);
+#  endif
+    }
+    else
+#endif
+    {
+       jit_stxi(_jitc->function->call.size, _RSP, u);
+       _jitc->function->call.size += REAL_WORDSIZE;
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_pushargi(jit_state_t *_jit, jit_word_t u)
+{
+    jit_int32_t                 regno;
+    assert(_jitc->function);
+    jit_inc_synth_w(pushargi, u);
+    jit_link_prepare();
+#if __X64
+    if (jit_arg_reg_p(_jitc->function->call.argi)) {
+       jit_movi(JIT_RA0 - _jitc->function->call.argi, u);
+#  if __CYGWIN__ || _WIN32
+       if (_jitc->function->call.call & jit_call_varargs)
+           jit_stxi(_jitc->function->call.size, _RSP,
+                    JIT_RA0 - _jitc->function->call.argi);
+       _jitc->function->call.size += sizeof(jit_word_t);
+#  endif
+       ++_jitc->function->call.argi;
+    }
+    else
+#endif
+    {
+       regno = jit_get_reg(jit_class_gpr);
+       jit_movi(regno, u);
+       jit_stxi(_jitc->function->call.size, _RSP, regno);
+       _jitc->function->call.size += REAL_WORDSIZE;
+       jit_unget_reg(regno);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_pushargr_f(jit_state_t *_jit, jit_int32_t u)
+{
+    assert(_jitc->function);
+    jit_inc_synth_w(pushargr_f, u);
+    jit_link_prepare();
+#if __X64
+#  if __CYGWIN__ || _WIN32
+    if (jit_arg_reg_p(_jitc->function->call.argi)) {
+       jit_movr_f(_XMM0 - _jitc->function->call.argi, u);
+       if (_jitc->function->call.call & jit_call_varargs) {
+           jit_stxi_f(_jitc->function->call.size, _RSP,
+                      _XMM0 - _jitc->function->call.argi);
+           jit_ldxi_i(JIT_RA0 - _jitc->function->call.argi, _RSP,
+                      _jitc->function->call.size);
+       }
+       ++_jitc->function->call.argi;
+       _jitc->function->call.size += sizeof(jit_word_t);
+    }
+#  else
+    if (jit_arg_f_reg_p(_jitc->function->self.argf)) {
+       jit_movr_f(_XMM0 - _jitc->function->call.argf, u);
+       ++_jitc->function->call.argf;
+    }
+#  endif
+    else
+#endif
+    {
+       jit_stxi_f(_jitc->function->call.size, _RSP, u);
+       _jitc->function->call.size += REAL_WORDSIZE;
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_pushargi_f(jit_state_t *_jit, jit_float32_t u)
+{
+    jit_int32_t                regno;
+    assert(_jitc->function);
+    jit_inc_synth_f(pushargi_f, u);
+    jit_link_prepare();
+#if __X64
+#  if __CYGWIN__ || _WIN32
+    if (jit_arg_reg_p(_jitc->function->call.argi)) {
+       jit_movi_f(_XMM0 - _jitc->function->call.argi, u);
+       if (_jitc->function->call.call & jit_call_varargs) {
+           jit_stxi_f(_jitc->function->call.size, _RSP,
+                      _XMM0 - _jitc->function->call.argi);
+           jit_ldxi_i(JIT_RA0 - _jitc->function->call.argi, _RSP,
+                      _jitc->function->call.size);
+       }
+       ++_jitc->function->call.argi;
+       _jitc->function->call.size += sizeof(jit_word_t);
+    }
+#  else
+    if (jit_arg_f_reg_p(_jitc->function->call.argf)) {
+       jit_movi_f(_XMM0 - _jitc->function->call.argf, u);
+       ++_jitc->function->call.argf;
+    }
+#  endif
+    else
+#endif
+    {
+       regno = jit_get_reg(jit_class_fpr);
+       jit_movi_f(regno, u);
+       jit_stxi_f(_jitc->function->call.size, _RSP, regno);
+       _jitc->function->call.size += REAL_WORDSIZE;
+       jit_unget_reg(regno);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_pushargr_d(jit_state_t *_jit, jit_int32_t u)
+{
+    assert(_jitc->function);
+    jit_inc_synth_w(pushargr_d, u);
+    jit_link_prepare();
+#if __X64
+#  if __CYGWIN__ || _WIN32
+    if (jit_arg_reg_p(_jitc->function->call.argi)) {
+       jit_movr_d(_XMM0 - _jitc->function->call.argi, u);
+       if (_jitc->function->call.call & jit_call_varargs) {
+           jit_stxi_d(_jitc->function->call.size, _RSP,
+                      _XMM0 - _jitc->function->call.argi);
+           jit_ldxi_l(JIT_RA0 - _jitc->function->call.argi, _RSP,
+                      _jitc->function->call.size);
+       }
+       ++_jitc->function->call.argi;
+       _jitc->function->call.size += sizeof(jit_word_t);
+    }
+#  else
+    if (jit_arg_f_reg_p(_jitc->function->call.argf)) {
+       jit_movr_d(_XMM0 - _jitc->function->call.argf, u);
+       ++_jitc->function->call.argf;
+    }
+#  endif
+    else
+#endif
+    {
+       jit_stxi_d(_jitc->function->call.size, _RSP, u);
+       _jitc->function->call.size += sizeof(jit_float64_t);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_pushargi_d(jit_state_t *_jit, jit_float64_t u)
+{
+    jit_int32_t                 regno;
+    assert(_jitc->function);
+    jit_inc_synth_d(pushargi_d, u);
+    jit_link_prepare();
+#if __X64
+#  if __CYGWIN__ || _WIN32
+    if (jit_arg_reg_p(_jitc->function->call.argi)) {
+       jit_movi_d(_XMM0 - _jitc->function->call.argi, u);
+       if (_jitc->function->call.call & jit_call_varargs) {
+           jit_stxi_d(_jitc->function->call.size, _RSP,
+                      _XMM0 - _jitc->function->call.argi);
+           jit_ldxi_l(JIT_RA0 - _jitc->function->call.argi, _RSP,
+                      _jitc->function->call.size);
+       }
+       ++_jitc->function->call.argi;
+       _jitc->function->call.size += sizeof(jit_word_t);
+    }
+#  else
+    if (jit_arg_f_reg_p(_jitc->function->call.argf)) {
+       jit_movi_d(_XMM0 - _jitc->function->call.argf, u);
+       ++_jitc->function->call.argf;
+    }
+#  endif
+    else
+#endif
+    {
+       regno = jit_get_reg(jit_class_fpr);
+       jit_movi_d(regno, u);
+       jit_stxi_d(_jitc->function->call.size, _RSP, regno);
+       _jitc->function->call.size += sizeof(jit_float64_t);
+       jit_unget_reg(regno);
+    }
+    jit_dec_synth();
+}
+
+jit_bool_t
+_jit_regarg_p(jit_state_t *_jit, jit_node_t *node, jit_int32_t regno)
+{
+#if __X64
+    jit_int32_t                spec;
+
+    spec = jit_class(_rvs[regno].spec);
+    if (spec & jit_class_arg) {
+       if (spec & jit_class_gpr) {
+           regno = JIT_RA0 - regno;
+           if (regno >= 0 && regno < node->v.w)
+               return (1);
+       }
+       else if (spec & jit_class_fpr) {
+           regno = _XMM0 - regno;
+           if (regno >= 0 && regno < node->w.w)
+               return (1);
+       }
+    }
+#endif
+    return (0);
+}
+
+void
+_jit_finishr(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_int32_t                 reg;
+    jit_node_t         *call;
+    assert(_jitc->function);
+    reg = r0;
+    jit_inc_synth_w(finishr, r0);
+    if (_jitc->function->self.alen < _jitc->function->call.size)
+       _jitc->function->self.alen = _jitc->function->call.size;
+#if __X64
+#  if !(__CYGWIN__ || _WIN32)
+    if (_jitc->function->call.call & jit_call_varargs) {
+       if (jit_regno(reg) == _RAX) {
+           reg = jit_get_reg(jit_class_gpr);
+           jit_movr(reg, _RAX);
+       }
+       if (_jitc->function->call.argf)
+           jit_movi(_RAX, _jitc->function->call.argf);
+       else
+           jit_movi(_RAX, 0);
+       if (reg != r0)
+           jit_unget_reg(reg);
+    }
+#  endif
+#endif
+    call = jit_callr(reg);
+    call->v.w = _jitc->function->call.argi;
+    call->w.w = _jitc->function->call.argf;
+    _jitc->function->call.argi = _jitc->function->call.argf =
+       _jitc->function->call.size = 0;
+    _jitc->prepare = 0;
+    jit_dec_synth();
+}
+
+jit_node_t *
+_jit_finishi(jit_state_t *_jit, jit_pointer_t i0)
+{
+#if __X64
+    jit_int32_t                reg;
+#endif
+    jit_node_t         *node;
+    assert(_jitc->function);
+    jit_inc_synth_w(finishi, (jit_word_t)i0);
+    if (_jitc->function->self.alen < _jitc->function->call.size)
+       _jitc->function->self.alen = _jitc->function->call.size;
+#if __X64
+    /* FIXME preventing %rax allocation is good enough, but for consistency
+     * it should automatically detect %rax is dead, in case it has run out
+     * registers, and not save/restore it, what would be wrong if using the
+     * the return value, otherwise, just a needless noop */
+    /* >> prevent %rax from being allocated as the function pointer */
+    jit_regset_setbit(&_jitc->regarg, _RAX);
+    reg = jit_get_reg(jit_class_gpr);
+    node = jit_movi(reg, (jit_word_t)i0);
+    jit_finishr(reg);
+    jit_unget_reg(reg);
+    /* << prevent %rax from being allocated as the function pointer */
+    jit_regset_clrbit(&_jitc->regarg, _RAX);
+#else
+    node = jit_calli(i0);
+    node->v.w = _jitc->function->call.argi;
+    node->w.w = _jitc->function->call.argf;
+#endif
+    _jitc->function->call.argi = _jitc->function->call.argf =
+       _jitc->function->call.size = 0;
+    _jitc->prepare = 0;
+    jit_dec_synth();
+    return (node);
+}
+
+void
+_jit_retval_c(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_c, r0);
+    jit_extr_c(r0, JIT_RET);
+    jit_dec_synth();
+}
+
+void
+_jit_retval_uc(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_uc, r0);
+    jit_extr_uc(r0, JIT_RET);
+    jit_dec_synth();
+}
+
+void
+_jit_retval_s(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_s, r0);
+    jit_extr_s(r0, JIT_RET);
+    jit_dec_synth();
+}
+
+void
+_jit_retval_us(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_us, r0);
+    jit_extr_us(r0, JIT_RET);
+    jit_dec_synth();
+}
+
+void
+_jit_retval_i(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_i, r0);
+#if __X32 || __X64_32
+    if (r0 != JIT_RET)
+       jit_movr(r0, JIT_RET);
+#else
+    jit_extr_i(r0, JIT_RET);
+#endif
+    jit_dec_synth();
+}
+
+#if __X64 && !__X64_32
+void
+_jit_retval_ui(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_ui, r0);
+    jit_extr_ui(r0, JIT_RET);
+    jit_dec_synth();
+}
+
+void
+_jit_retval_l(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_l, r0);
+    if (r0 != JIT_RET)
+       jit_movr(r0, JIT_RET);
+    jit_dec_synth();
+}
+#endif
+
+void
+_jit_retval_f(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_f, r0);
+#if __X64
+    if (r0 != JIT_FRET)
+       jit_movr_f(r0, JIT_FRET);
+#endif
+    jit_dec_synth();
+}
+
+void
+_jit_retval_d(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_d, r0);
+#if __X64
+    if (r0 != JIT_FRET)
+       jit_movr_d(r0, JIT_FRET);
+#endif
+    jit_dec_synth();
+}
+
+jit_pointer_t
+_emit_code(jit_state_t *_jit)
+{
+    jit_node_t         *node;
+    jit_node_t         *temp;
+    jit_word_t          word;
+    jit_int32_t                 value;
+    jit_int32_t                 offset;
+    struct {
+       jit_node_t      *node;
+       jit_word_t       word;
+#if DEVEL_DISASSEMBLER
+       jit_word_t       prevw;
+#endif
+       jit_int32_t      patch_offset;
+    } undo;
+#if DEVEL_DISASSEMBLER
+    jit_word_t          prevw;
+#endif
+
+    _jitc->function = NULL;
+
+    jit_reglive_setup();
+
+    undo.word = 0;
+    undo.node = NULL;
+    undo.patch_offset = 0;
+#define case_rr(name, type)                                            \
+           case jit_code_##name##r##type:                              \
+               name##r##type(rn(node->u.w), rn(node->v.w));            \
+               break
+#define case_rw(name, type)                                            \
+           case jit_code_##name##i##type:                              \
+               name##i##type(rn(node->u.w), node->v.w);                \
+               break
+#define case_rf(name, type)                                            \
+           case jit_code_##name##r##type:                              \
+               if (jit_x87_reg_p(node->v.w))                           \
+                   x87_##name##r##type(rn(node->u.w), rn(node->v.w));  \
+               else                                                    \
+                   sse_##name##r##type(rn(node->u.w), rn(node->v.w));  \
+               break
+#define case_fr(name, type)                                            \
+           case jit_code_##name##r##type:                              \
+               if (jit_x87_reg_p(node->u.w))                           \
+                   x87_##name##r##type(rn(node->u.w), rn(node->v.w));  \
+               else                                                    \
+                   sse_##name##r##type(rn(node->u.w), rn(node->v.w));  \
+               break
+#define case_fw(name, type)                                            \
+           case jit_code_##name##i##type:                              \
+               if (jit_x87_reg_p(node->u.w))                           \
+                   x87_##name##i##type(rn(node->u.w), node->v.w);      \
+               else                                                    \
+                   sse_##name##i##type(rn(node->u.w), node->v.w);      \
+               break
+#define case_wr(name, type)                                            \
+           case jit_code_##name##i##type:                              \
+               name##i##type(node->u.w, rn(node->v.w));                \
+               break
+#define case_wf(name, type)                                            \
+           case jit_code_##name##i##type:                              \
+               if (jit_x87_reg_p(node->v.w))                           \
+                   x87_##name##i##type(node->u.w, rn(node->v.w));      \
+               else                                                    \
+                   sse_##name##i##type(node->u.w, rn(node->v.w));      \
+               break
+#define case_ff(name, type)                                            \
+           case jit_code_##name##r##type:                              \
+               if (jit_x87_reg_p(node->u.w) &&                         \
+                   jit_x87_reg_p(node->v.w))                           \
+                   x87_##name##r##type(rn(node->u.w), rn(node->v.w));  \
+               else                                                    \
+                   sse_##name##r##type(rn(node->u.w), rn(node->v.w));  \
+               break;
+#define case_rrr(name, type)                                           \
+           case jit_code_##name##r##type:                              \
+               name##r##type(rn(node->u.w),                            \
+                             rn(node->v.w), rn(node->w.w));            \
+               break
+#define case_rrrr(name, type)                                          \
+           case jit_code_##name##r##type:                              \
+               name##r##type(rn(node->u.q.l), rn(node->u.q.h),         \
+                             rn(node->v.w), rn(node->w.w));            \
+               break
+#define case_frr(name, type)                                           \
+           case jit_code_##name##r##type:                              \
+               if (jit_x87_reg_p(node->u.w))                           \
+                   x87_##name##r##type(rn(node->u.w),                  \
+                                       rn(node->v.w), rn(node->w.w));  \
+               else                                                    \
+                   sse_##name##r##type(rn(node->u.w),                  \
+                                       rn(node->v.w), rn(node->w.w));  \
+               break
+#define case_rrf(name, type)                                           \
+           case jit_code_##name##r##type:                              \
+               if (jit_x87_reg_p(node->w.w))                           \
+                   x87_##name##r##type(rn(node->u.w),                  \
+                                       rn(node->v.w), rn(node->w.w));  \
+               else                                                    \
+                   sse_##name##r##type(rn(node->u.w),                  \
+                                       rn(node->v.w), rn(node->w.w));  \
+               break
+#define case_rrw(name, type)                                           \
+           case jit_code_##name##i##type:                              \
+               name##i##type(rn(node->u.w), rn(node->v.w), node->w.w); \
+               break
+#define case_rrrw(name, type)                                          \
+           case jit_code_##name##i##type:                              \
+               name##i##type(rn(node->u.q.l), rn(node->u.q.h),         \
+                             rn(node->v.w), node->w.w);                \
+               break
+#define case_frw(name, type)                                           \
+           case jit_code_##name##i##type:                              \
+               if (jit_x87_reg_p(node->u.w))                           \
+                   x87_##name##i##type(rn(node->u.w),                  \
+                                       rn(node->v.w), node->w.w);      \
+               else                                                    \
+                   sse_##name##i##type(rn(node->u.w),                  \
+                                       rn(node->v.w), node->w.w);      \
+               break
+#define case_wrr(name, type)                                           \
+           case jit_code_##name##i##type:                              \
+               name##i##type(node->u.w, rn(node->v.w), rn(node->w.w)); \
+               break
+#define case_wrf(name, type)                                           \
+           case jit_code_##name##i##type:                              \
+               if (jit_x87_reg_p(node->w.w))                           \
+                   x87_##name##i##type(node->u.w,                      \
+                                       rn(node->v.w), rn(node->w.w));  \
+               else                                                    \
+                   sse_##name##i##type(node->u.w,                      \
+                                       rn(node->v.w), rn(node->w.w));  \
+               break
+#define case_brr(name, type)                                           \
+           case jit_code_##name##r##type:                              \
+               temp = node->u.n;                                       \
+               assert(temp->code == jit_code_label ||                  \
+                      temp->code == jit_code_epilog);                  \
+               if (temp->flag & jit_flag_patch)                        \
+                   name##r##type(temp->u.w, rn(node->v.w),             \
+                                 rn(node->w.w));                       \
+               else {                                                  \
+                   word = name##r##type(_jit->pc.w,                    \
+                                        rn(node->v.w), rn(node->w.w)); \
+                   patch(word, node);                                  \
+               }                                                       \
+               break
+#define case_brw(name, type)                                           \
+           case jit_code_##name##i##type:                              \
+               temp = node->u.n;                                       \
+               assert(temp->code == jit_code_label ||                  \
+                      temp->code == jit_code_epilog);                  \
+               if (temp->flag & jit_flag_patch)                        \
+                   name##i##type(temp->u.w,                            \
+                                 rn(node->v.w), node->w.w);            \
+               else {                                                  \
+                   word = name##i##type(_jit->pc.w,                    \
+                                        rn(node->v.w), node->w.w);     \
+                   patch(word, node);                                  \
+               }                                                       \
+               break
+#define case_rff(name, type)                                           \
+           case jit_code_##name##r##type:                              \
+               if (jit_x87_reg_p(node->v.w) &&                         \
+                   jit_x87_reg_p(node->w.w))                           \
+                   x87_##name##r##type(rn(node->u.w), rn(node->v.w),   \
+                                       rn(node->w.w));                 \
+               else                                                    \
+                   sse_##name##r##type(rn(node->u.w), rn(node->v.w),   \
+                                       rn(node->w.w));                 \
+               break;
+#define case_rfw(name, type, size)                                     \
+           case jit_code_##name##i##type:                              \
+               assert(node->flag & jit_flag_data);                     \
+               if (jit_x87_reg_p(node->v.w))                           \
+                   x87_##name##i##type(rn(node->u.w), rn(node->v.w),   \
+                               (jit_float##size##_t *)node->w.n->u.w); \
+               else                                                    \
+                   sse_##name##i##type(rn(node->u.w), rn(node->v.w),   \
+                               (jit_float##size##_t *)node->w.n->u.w); \
+               break
+#define case_fff(name, type)                                           \
+           case jit_code_##name##r##type:                              \
+               if (jit_x87_reg_p(node->u.w) &&                         \
+                   jit_x87_reg_p(node->v.w) &&                         \
+                   jit_x87_reg_p(node->w.w))                           \
+                   x87_##name##r##type(rn(node->u.w),                  \
+                                       rn(node->v.w), rn(node->w.w));  \
+               else                                                    \
+                   sse_##name##r##type(rn(node->u.w),                  \
+                                       rn(node->v.w), rn(node->w.w));  \
+               break
+#define case_ffw(name, type, size)                                     \
+           case jit_code_##name##i##type:                              \
+               assert(node->flag & jit_flag_data);                     \
+               if (jit_x87_reg_p(node->u.w) &&                         \
+                   jit_x87_reg_p(node->v.w))                           \
+                   x87_##name##i##type(rn(node->u.w), rn(node->v.w),   \
+                               (jit_float##size##_t *)node->w.n->u.w); \
+               else                                                    \
+                   sse_##name##i##type(rn(node->u.w), rn(node->v.w),   \
+                               (jit_float##size##_t *)node->w.n->u.w); \
+               break
+#define case_bff(name, type)                                           \
+           case jit_code_b##name##r##type:                             \
+               temp = node->u.n;                                       \
+               assert(temp->code == jit_code_label ||                  \
+                      temp->code == jit_code_epilog);                  \
+               if (temp->flag & jit_flag_patch) {                      \
+                   if (jit_x87_reg_p(node->v.w) &&                     \
+                       jit_x87_reg_p(node->w.w))                       \
+                       x87_b##name##r##type(temp->u.w,                 \
+                               rn(node->v.w), rn(node->w.w));          \
+                   else                                                \
+                       sse_b##name##r##type(temp->u.w,                 \
+                               rn(node->v.w), rn(node->w.w));          \
+               }                                                       \
+               else {                                                  \
+                   if (jit_x87_reg_p(node->v.w) &&                     \
+                       jit_x87_reg_p(node->w.w))                       \
+                       word = x87_b##name##r##type(_jit->pc.w,         \
+                               rn(node->v.w), rn(node->w.w));          \
+                   else                                                \
+                       word = sse_b##name##r##type(_jit->pc.w,         \
+                               rn(node->v.w), rn(node->w.w));          \
+                   patch(word, node);                                  \
+               }                                                       \
+               break
+#define case_bfw(name, type, size)                                     \
+           case jit_code_b##name##i##type:                             \
+               temp = node->u.n;                                       \
+               assert(temp->code == jit_code_label ||                  \
+                      temp->code == jit_code_epilog);                  \
+               if (temp->flag & jit_flag_patch) {                      \
+                   if (jit_x87_reg_p(node->v.w))                       \
+                       x87_b##name##i##type(temp->u.w,                 \
+                               rn(node->v.w),                          \
+                               (jit_float##size##_t *)node->w.n->u.w); \
+                   else                                                \
+                       sse_b##name##i##type(temp->u.w,                 \
+                               rn(node->v.w),                          \
+                               (jit_float##size##_t *)node->w.n->u.w); \
+               }                                                       \
+               else {                                                  \
+                   if (jit_x87_reg_p(node->v.w))                       \
+                       word = x87_b##name##i##type(_jit->pc.w,         \
+                               rn(node->v.w),                          \
+                               (jit_float##size##_t *)node->w.n->u.w); \
+                   else                                                \
+                       word = sse_b##name##i##type(_jit->pc.w,         \
+                               rn(node->v.w),                          \
+                               (jit_float##size##_t *)node->w.n->u.w); \
+                   patch(word, node);                                  \
+               }                                                       \
+               break
+#if DEVEL_DISASSEMBLER
+    prevw = _jit->pc.w;
+#endif
+    for (node = _jitc->head; node; node = node->next) {
+       if (_jit->pc.uc >= _jitc->code.end)
+           return (NULL);
+
+#if DEVEL_DISASSEMBLER
+       node->offset = (jit_uword_t)_jit->pc.w - (jit_uword_t)prevw;
+       prevw = _jit->pc.w;
+#endif
+       value = jit_classify(node->code);
+       jit_regarg_set(node, value);
+       switch (node->code) {
+           case jit_code_align:
+               assert(!(node->u.w & (node->u.w - 1)) &&
+                      node->u.w <= sizeof(jit_word_t));
+               if ((word = _jit->pc.w & (node->u.w - 1)))
+                   nop(node->u.w - word);
+               break;
+           case jit_code_note:         case jit_code_name:
+               node->u.w = _jit->pc.w;
+               break;
+           case jit_code_label:
+               if ((node->link || (node->flag & jit_flag_use)) &&
+                   (word = _jit->pc.w & (sizeof(jit_word_t) - 1)))
+                   nop(sizeof(jit_word_t) - word);
+               /* remember label is defined */
+               node->flag |= jit_flag_patch;
+               node->u.w = _jit->pc.w;
+               break;
+               case_rrr(add,);
+               case_rrw(add,);
+               case_rrr(addx,);
+               case_rrw(addx,);
+               case_rrr(addc,);
+               case_rrw(addc,);
+               case_rrr(sub,);
+               case_rrw(sub,);
+               case_rrr(subx,);
+               case_rrw(subx,);
+               case_rrr(subc,);
+               case_rrw(subc,);
+               case_rrw(rsb,);
+               case_rrr(mul,);
+               case_rrw(mul,);
+               case_rrrr(qmul,);
+               case_rrrw(qmul,);
+               case_rrrr(qmul, _u);
+               case_rrrw(qmul, _u);
+               case_rrr(div,);
+               case_rrw(div,);
+               case_rrr(div, _u);
+               case_rrw(div, _u);
+               case_rrrr(qdiv,);
+               case_rrrw(qdiv,);
+               case_rrrr(qdiv, _u);
+               case_rrrw(qdiv, _u);
+               case_rrr(rem,);
+               case_rrw(rem,);
+               case_rrr(rem, _u);
+               case_rrw(rem, _u);
+               case_rrr(and,);
+               case_rrw(and,);
+               case_rrr(or,);
+               case_rrw(or,);
+               case_rrr(xor,);
+               case_rrw(xor,);
+               case_rrr(lsh,);
+               case_rrw(lsh,);
+               case_rrr(rsh,);
+               case_rrw(rsh,);
+               case_rrr(rsh, _u);
+               case_rrw(rsh, _u);
+               case_rr(neg,);
+               case_rr(com,);
+               case_rrr(lt,);
+               case_rrw(lt,);
+               case_rrr(lt, _u);
+               case_rrw(lt, _u);
+               case_rrr(le,);
+               case_rrw(le,);
+               case_rrr(le, _u);
+               case_rrw(le, _u);
+               case_rrr(eq,);
+               case_rrw(eq,);
+               case_rrr(ge,);
+               case_rrw(ge,);
+               case_rrr(ge, _u);
+               case_rrw(ge, _u);
+               case_rrr(gt,);
+               case_rrw(gt,);
+               case_rrr(gt, _u);
+               case_rrw(gt, _u);
+               case_rrr(ne,);
+               case_rrw(ne,);
+               case_rr(mov,);
+           case jit_code_movi:
+               if (node->flag & jit_flag_node) {
+                   temp = node->v.n;
+                   if (temp->code == jit_code_data ||
+                       (temp->code == jit_code_label &&
+                        (temp->flag & jit_flag_patch)))
+                       movi(rn(node->u.w), temp->u.w);
+                   else {
+                       assert(temp->code == jit_code_label ||
+                              temp->code == jit_code_epilog);
+                       word = movi_p(rn(node->u.w), node->v.w);
+                       patch(word, node);
+                   }
+               }
+               else
+                   movi(rn(node->u.w), node->v.w);
+               break;
+               case_rr(hton, _us);
+               case_rr(hton, _ui);
+#if __X64 && !__X64_32
+               case_rr(hton, _ul);
+#endif
+               case_rr(ext, _c);
+               case_rr(ext, _uc);
+               case_rr(ext, _s);
+               case_rr(ext, _us);
+#if __X64 && !__X64_32
+               case_rr(ext, _i);
+               case_rr(ext, _ui);
+#endif
+               case_rf(trunc, _f_i);
+               case_rf(trunc, _d_i);
+#if __X64
+               case_rf(trunc, _f_l);
+               case_rf(trunc, _d_l);
+#endif
+               case_rr(ld, _c);
+               case_rw(ld, _c);
+               case_rr(ld, _uc);
+               case_rw(ld, _uc);
+               case_rr(ld, _s);
+               case_rw(ld, _s);
+               case_rr(ld, _us);
+               case_rw(ld, _us);
+               case_rr(ld, _i);
+               case_rw(ld, _i);
+#if __X64 && !__X64_32
+               case_rr(ld, _ui);
+               case_rw(ld, _ui);
+               case_rr(ld, _l);
+               case_rw(ld, _l);
+#endif
+               case_rrr(ldx, _c);
+               case_rrw(ldx, _c);
+               case_rrr(ldx, _uc);
+               case_rrw(ldx, _uc);
+               case_rrr(ldx, _s);
+               case_rrw(ldx, _s);
+               case_rrr(ldx, _us);
+               case_rrw(ldx, _us);
+               case_rrr(ldx, _i);
+               case_rrw(ldx, _i);
+#if __X64 && !__X64_32
+               case_rrr(ldx, _ui);
+               case_rrw(ldx, _ui);
+               case_rrr(ldx, _l);
+               case_rrw(ldx, _l);
+#endif
+               case_rr(st, _c);
+               case_wr(st, _c);
+               case_rr(st, _s);
+               case_wr(st, _s);
+               case_rr(st, _i);
+               case_wr(st, _i);
+#if __X64 && !__X64_32
+               case_rr(st, _l);
+               case_wr(st, _l);
+#endif
+               case_rrr(stx, _c);
+               case_wrr(stx, _c);
+               case_rrr(stx, _s);
+               case_wrr(stx, _s);
+               case_rrr(stx, _i);
+               case_wrr(stx, _i);
+#if __X64 && !__X64_32
+               case_rrr(stx, _l);
+               case_wrr(stx, _l);
+#endif
+               case_brr(blt,);
+               case_brw(blt,);
+               case_brr(blt, _u);
+               case_brw(blt, _u);
+               case_brr(ble,);
+               case_brw(ble,);
+               case_brr(ble, _u);
+               case_brw(ble, _u);
+               case_brr(beq,);
+               case_brw(beq,);
+               case_brr(bge,);
+               case_brw(bge,);
+               case_brr(bge, _u);
+               case_brw(bge, _u);
+               case_brr(bgt,);
+               case_brw(bgt,);
+               case_brr(bgt, _u);
+               case_brw(bgt, _u);
+               case_brr(bne,);
+               case_brw(bne,);
+               case_brr(bms,);
+               case_brw(bms,);
+               case_brr(bmc,);
+               case_brw(bmc,);
+               case_brr(boadd,);
+               case_brw(boadd,);
+               case_brr(boadd, _u);
+               case_brw(boadd, _u);
+               case_brr(bxadd,);
+               case_brw(bxadd,);
+               case_brr(bxadd, _u);
+               case_brw(bxadd, _u);
+               case_brr(bosub,);
+               case_brw(bosub,);
+               case_brr(bosub, _u);
+               case_brw(bosub, _u);
+               case_brr(bxsub,);
+               case_brw(bxsub,);
+               case_brr(bxsub, _u);
+               case_brw(bxsub, _u);
+               case_fff(add, _f);
+               case_ffw(add, _f, 32);
+               case_fff(sub, _f);
+               case_ffw(sub, _f, 32);
+               case_ffw(rsb, _f, 32);
+               case_fff(mul, _f);
+               case_ffw(mul, _f, 32);
+               case_fff(div, _f);
+               case_ffw(div, _f, 32);
+               case_ff(abs, _f);
+               case_ff(neg, _f);
+               case_ff(sqrt, _f);
+               case_fr(ext, _f);
+               case_fr(ext, _d_f);
+               case_rff(lt, _f);
+               case_rfw(lt, _f, 32);
+               case_rff(le, _f);
+               case_rfw(le, _f, 32);
+               case_rff(eq, _f);
+               case_rfw(eq, _f, 32);
+               case_rff(ge, _f);
+               case_rfw(ge, _f, 32);
+               case_rff(gt, _f);
+               case_rfw(gt, _f, 32);
+               case_rff(ne, _f);
+               case_rfw(ne, _f, 32);
+               case_rff(unlt, _f);
+               case_rfw(unlt, _f, 32);
+               case_rff(unle, _f);
+               case_rfw(unle, _f, 32);
+               case_rff(uneq, _f);
+               case_rfw(uneq, _f, 32);
+               case_rff(unge, _f);
+               case_rfw(unge, _f, 32);
+               case_rff(ungt, _f);
+               case_rfw(ungt, _f, 32);
+               case_rff(ltgt, _f);
+               case_rfw(ltgt, _f, 32);
+               case_rff(ord, _f);
+               case_rfw(ord, _f, 32);
+               case_rff(unord, _f);
+               case_rfw(unord, _f, 32);
+           case jit_code_movr_f:
+               if (jit_x87_reg_p(node->u.w)) {
+                   if (jit_x87_reg_p(node->v.w))
+                       x87_movr_f(rn(node->u.w), rn(node->v.w));
+                   else
+                       x87_from_sse_f(rn(node->u.w), rn(node->v.w));
+               }
+               else {
+                   if (jit_sse_reg_p(node->v.w))
+                       sse_movr_f(rn(node->u.w), rn(node->v.w));
+                   else
+                       sse_from_x87_f(rn(node->u.w), rn(node->v.w));
+               }
+               break;
+           case jit_code_movi_f:
+               assert(node->flag & jit_flag_data);
+               if (jit_x87_reg_p(node->u.w))
+                   x87_movi_f(rn(node->u.w), (jit_float32_t *)node->v.n->u.w);
+               else
+                   sse_movi_f(rn(node->u.w), (jit_float32_t *)node->v.n->u.w);
+               break;
+               case_fr(ld, _f);
+               case_fw(ld, _f);
+               case_frr(ldx, _f);
+               case_frw(ldx, _f);
+               case_rf(st, _f);
+               case_wf(st, _f);
+               case_rrf(stx, _f);
+               case_wrf(stx, _f);
+               case_bff(lt, _f);
+               case_bfw(lt, _f, 32);
+               case_bff(le, _f);
+               case_bfw(le, _f, 32);
+               case_bff(eq, _f);
+               case_bfw(eq, _f, 32);
+               case_bff(ge, _f);
+               case_bfw(ge, _f, 32);
+               case_bff(gt, _f);
+               case_bfw(gt, _f, 32);
+               case_bff(ne, _f);
+               case_bfw(ne, _f, 32);
+               case_bff(unlt, _f);
+               case_bfw(unlt, _f, 32);
+               case_bff(unle, _f);
+               case_bfw(unle, _f, 32);
+               case_bff(uneq, _f);
+               case_bfw(uneq, _f, 32);
+               case_bff(unge, _f);
+               case_bfw(unge, _f, 32);
+               case_bff(ungt, _f);
+               case_bfw(ungt, _f, 32);
+               case_bff(ltgt, _f);
+               case_bfw(ltgt, _f, 32);
+               case_bff(ord, _f);
+               case_bfw(ord, _f, 32);
+               case_bff(unord, _f);
+               case_bfw(unord, _f, 32);
+               case_fff(add, _d);
+               case_ffw(add, _d, 64);
+               case_fff(sub, _d);
+               case_ffw(sub, _d, 64);
+               case_ffw(rsb, _d, 64);
+               case_fff(mul, _d);
+               case_ffw(mul, _d, 64);
+               case_fff(div, _d);
+               case_ffw(div, _d, 64);
+               case_ff(abs, _d);
+               case_ff(neg, _d);
+               case_ff(sqrt, _d);
+               case_fr(ext, _d);
+               case_fr(ext, _f_d);
+               case_rff(lt, _d);
+               case_rfw(lt, _d, 64);
+               case_rff(le, _d);
+               case_rfw(le, _d, 64);
+               case_rff(eq, _d);
+               case_rfw(eq, _d, 64);
+               case_rff(ge, _d);
+               case_rfw(ge, _d, 64);
+               case_rff(gt, _d);
+               case_rfw(gt, _d, 64);
+               case_rff(ne, _d);
+               case_rfw(ne, _d, 64);
+               case_rff(unlt, _d);
+               case_rfw(unlt, _d, 64);
+               case_rff(unle, _d);
+               case_rfw(unle, _d, 64);
+               case_rff(uneq, _d);
+               case_rfw(uneq, _d, 64);
+               case_rff(unge, _d);
+               case_rfw(unge, _d, 64);
+               case_rff(ungt, _d);
+               case_rfw(ungt, _d, 64);
+               case_rff(ltgt, _d);
+               case_rfw(ltgt, _d, 64);
+               case_rff(ord, _d);
+               case_rfw(ord, _d, 64);
+               case_rff(unord, _d);
+               case_rfw(unord, _d, 64);
+           case jit_code_movr_d:
+               if (jit_x87_reg_p(node->u.w)) {
+                   if (jit_x87_reg_p(node->v.w))
+                       x87_movr_d(rn(node->u.w), rn(node->v.w));
+                   else
+                       x87_from_sse_d(rn(node->u.w), rn(node->v.w));
+               }
+               else {
+                   if (jit_sse_reg_p(node->v.w))
+                       sse_movr_d(rn(node->u.w), rn(node->v.w));
+                   else
+                       sse_from_x87_d(rn(node->u.w), rn(node->v.w));
+               }
+               break;
+           case jit_code_movi_d:
+               assert(node->flag & jit_flag_data);
+               if (jit_x87_reg_p(node->u.w))
+                   x87_movi_d(rn(node->u.w), (jit_float64_t *)node->v.n->u.w);
+               else
+                   sse_movi_d(rn(node->u.w), (jit_float64_t *)node->v.n->u.w);
+               break;
+               case_fr(ld, _d);
+               case_fw(ld, _d);
+               case_frr(ldx, _d);
+               case_frw(ldx, _d);
+               case_rf(st, _d);
+               case_wf(st, _d);
+               case_rrf(stx, _d);
+               case_wrf(stx, _d);
+               case_bff(lt, _d);
+               case_bfw(lt, _d, 64);
+               case_bff(le, _d);
+               case_bfw(le, _d, 64);
+               case_bff(eq, _d);
+               case_bfw(eq, _d, 64);
+               case_bff(ge, _d);
+               case_bfw(ge, _d, 64);
+               case_bff(gt, _d);
+               case_bfw(gt, _d, 64);
+               case_bff(ne, _d);
+               case_bfw(ne, _d, 64);
+               case_bff(unlt, _d);
+               case_bfw(unlt, _d, 64);
+               case_bff(unle, _d);
+               case_bfw(unle, _d, 64);
+               case_bff(uneq, _d);
+               case_bfw(uneq, _d, 64);
+               case_bff(unge, _d);
+               case_bfw(unge, _d, 64);
+               case_bff(ungt, _d);
+               case_bfw(ungt, _d, 64);
+               case_bff(ltgt, _d);
+               case_bfw(ltgt, _d, 64);
+               case_bff(ord, _d);
+               case_bfw(ord, _d, 64);
+               case_bff(unord, _d);
+               case_bfw(unord, _d, 64);
+           case jit_code_jmpr:
+               jmpr(rn(node->u.w));
+               break;
+           case jit_code_jmpi:
+               if (node->flag & jit_flag_node) {
+                   temp = node->u.n;
+                   assert(temp->code == jit_code_label ||
+                          temp->code == jit_code_epilog);
+                   if (temp->flag & jit_flag_patch)
+                       jmpi(temp->u.w);
+                   else {
+                       word = jmpi(_jit->pc.w);
+                       patch(word, node);
+                   }
+               }
+               else
+                   jmpi(node->u.w);
+               break;
+           case jit_code_callr:
+               callr(rn(node->u.w));
+               break;
+           case jit_code_calli:
+               if (node->flag & jit_flag_node) {
+                   temp = node->u.n;
+                   assert(temp->code == jit_code_label ||
+                          temp->code == jit_code_epilog);
+                   word = calli(temp->u.w);
+                   if (!(temp->flag & jit_flag_patch))
+                       patch(word, node);
+               }
+               else
+                   calli(node->u.w);
+               break;
+           case jit_code_prolog:
+               _jitc->function = _jitc->functions.ptr + node->w.w;
+               undo.node = node;
+               undo.word = _jit->pc.w;
+#if DEVEL_DISASSEMBLER
+               undo.prevw = prevw;
+#endif
+               undo.patch_offset = _jitc->patches.offset;
+           restart_function:
+               _jitc->again = 0;
+               prolog(node);
+               break;
+           case jit_code_epilog:
+               assert(_jitc->function == _jitc->functions.ptr + node->w.w);
+               if (_jitc->again) {
+                   for (temp = undo.node->next;
+                        temp != node; temp = temp->next) {
+                       if (temp->code == jit_code_label ||
+                           temp->code == jit_code_epilog)
+                           temp->flag &= ~jit_flag_patch;
+                   }
+                   temp->flag &= ~jit_flag_patch;
+                   node = undo.node;
+                   _jit->pc.w = undo.word;
+#if DEVEL_DISASSEMBLER
+                   prevw = undo.prevw;
+#endif
+                   _jitc->patches.offset = undo.patch_offset;
+                   goto restart_function;
+               }
+               if (node->link &&
+                   (word = _jit->pc.w & (sizeof(jit_word_t) - 1)))
+                   nop(sizeof(jit_word_t) - word);
+               /* remember label is defined */
+               node->flag |= jit_flag_patch;
+               node->u.w = _jit->pc.w;
+               epilog(node);
+               _jitc->function = NULL;
+               break;
+           case jit_code_va_start:
+               vastart(rn(node->u.w));
+               break;
+           case jit_code_va_arg:
+               vaarg(rn(node->u.w), rn(node->v.w));
+               break;
+           case jit_code_va_arg_d:
+               vaarg_d(rn(node->u.w), rn(node->v.w), jit_x87_reg_p(node->u.w));
+               break;
+           case jit_code_live:                 case jit_code_ellipsis:
+           case jit_code_va_push:
+           case jit_code_allocai:              case jit_code_allocar:
+           case jit_code_arg:
+           case jit_code_arg_f:                case jit_code_arg_d:
+           case jit_code_va_end:
+           case jit_code_ret:
+           case jit_code_retr:                 case jit_code_reti:
+           case jit_code_retr_f:               case jit_code_reti_f:
+           case jit_code_retr_d:               case jit_code_reti_d:
+           case jit_code_getarg_c:             case jit_code_getarg_uc:
+           case jit_code_getarg_s:             case jit_code_getarg_us:
+           case jit_code_getarg_i:
+#if __X64 && !__X64_32
+           case jit_code_getarg_ui:            case jit_code_getarg_l:
+#endif
+           case jit_code_getarg_f:             case jit_code_getarg_d:
+           case jit_code_putargr:              case jit_code_putargi:
+           case jit_code_putargr_f:            case jit_code_putargi_f:
+           case jit_code_putargr_d:            case jit_code_putargi_d:
+           case jit_code_pushargr:             case jit_code_pushargi:
+           case jit_code_pushargr_f:           case jit_code_pushargi_f:
+           case jit_code_pushargr_d:           case jit_code_pushargi_d:
+           case jit_code_retval_c:             case jit_code_retval_uc:
+           case jit_code_retval_s:             case jit_code_retval_us:
+           case jit_code_retval_i:
+#if __X64 && !__X32
+           case jit_code_retval_ui:            case jit_code_retval_l:
+#endif
+           case jit_code_prepare:
+           case jit_code_finishr:              case jit_code_finishi:
+               break;
+           case jit_code_retval_f:
+#if __X32
+               if (jit_sse_reg_p(node->u.w)) {
+                   fstpr(_ST1_REGNO);
+                   sse_from_x87_f(rn(node->u.w), _ST0_REGNO);
+               }
+               else
+                   fstpr(rn(node->u.w) + 1);
+#endif
+               break;
+           case jit_code_retval_d:
+#if __X32
+               if (jit_sse_reg_p(node->u.w)) {
+                   fstpr(_ST1_REGNO);
+                   sse_from_x87_d(rn(node->u.w), _ST0_REGNO);
+               }
+               else
+                   fstpr(rn(node->u.w) + 1);
+#endif
+               break;
+           default:
+               abort();
+       }
+       jit_regarg_clr(node, value);
+       assert(_jitc->regarg == 0 && _jitc->synth == 0);
+       /* update register live state */
+       jit_reglive(node);
+    }
+#undef case_bfw
+#undef case_bff
+#undef case_ffw
+#undef case_rfw
+#undef case_rff
+#undef case_brw
+#undef case_brr
+#undef case_wrf
+#undef case_wrr
+#undef case_frw
+#undef case_rrf
+#undef case_rrw
+#undef case_frr
+#undef case_rrr
+#undef case_wf
+#undef case_fw
+#undef case_fr
+#undef case_rr
+
+    for (offset = 0; offset < _jitc->patches.offset; offset++) {
+       node = _jitc->patches.ptr[offset].node;
+       word = node->code == jit_code_movi ? node->v.n->u.w : node->u.n->u.w;
+       patch_at(node, _jitc->patches.ptr[offset].inst, word);
+    }
+
+    jit_flush(_jit->code.ptr, _jit->pc.uc);
+
+    return (_jit->code.ptr);
+}
+
+#define CODE                           1
+#  include "jit_x86-cpu.c"
+#  include "jit_x86-sse.c"
+#  include "jit_x86-x87.c"
+#undef CODE
+
+void
+jit_flush(void *fptr, void *tptr)
+{
+}
+
+void
+_emit_ldxi(jit_state_t *_jit, jit_gpr_t r0, jit_gpr_t r1, jit_word_t i0)
+{
+    ldxi(rn(r0), rn(r1), i0);
+}
+
+void
+_emit_stxi(jit_state_t *_jit, jit_word_t i0, jit_gpr_t r0, jit_gpr_t r1)
+{
+    stxi(i0, rn(r0), rn(r1));
+}
+
+void
+_emit_ldxi_d(jit_state_t *_jit, jit_fpr_t r0, jit_gpr_t r1, jit_word_t i0)
+{
+    if (jit_x87_reg_p(r0))
+       x87_ldxi_d(rn(r0), rn(r1), i0);
+    else
+       sse_ldxi_d(rn(r0), rn(r1), i0);
+}
+
+void
+_emit_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_gpr_t r0, jit_fpr_t r1)
+{
+    if (jit_x87_reg_p(r1))
+       x87_stxi_d(i0, rn(r0), rn(r1));
+    else
+       sse_stxi_d(i0, rn(r0), rn(r1));
+}
+
+static void
+_patch(jit_state_t *_jit, jit_word_t instr, jit_node_t *node)
+{
+    jit_int32_t                flag;
+
+    assert(node->flag & jit_flag_node);
+    if (node->code == jit_code_movi)
+       flag = node->v.n->flag;
+    else
+       flag = node->u.n->flag;
+    assert(!(flag & jit_flag_patch));
+    if (_jitc->patches.offset >= _jitc->patches.length) {
+       jit_realloc((jit_pointer_t *)&_jitc->patches.ptr,
+                   _jitc->patches.length * sizeof(jit_patch_t),
+                   (_jitc->patches.length + 1024) * sizeof(jit_patch_t));
+       _jitc->patches.length += 1024;
+    }
+    _jitc->patches.ptr[_jitc->patches.offset].inst = instr;
+    _jitc->patches.ptr[_jitc->patches.offset].node = node;
+    ++_jitc->patches.offset;
+}
+
+static void
+_sse_from_x87_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    x87_stxi_f(CVT_OFFSET, _RBP_REGNO, r1);
+    sse_ldxi_f(r0, _RBP_REGNO, CVT_OFFSET);
+}
+
+static void
+_sse_from_x87_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    x87_stxi_d(CVT_OFFSET, _RBP_REGNO, r1);
+    sse_ldxi_d(r0, _RBP_REGNO, CVT_OFFSET);
+}
+
+static void
+_x87_from_sse_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    sse_stxi_f(CVT_OFFSET, _RBP_REGNO, r1);
+    x87_ldxi_f(r0, _RBP_REGNO, CVT_OFFSET);
+}
+
+static void
+_x87_from_sse_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    sse_stxi_d(CVT_OFFSET, _RBP_REGNO, r1);
+    x87_ldxi_d(r0, _RBP_REGNO, CVT_OFFSET);
+}
diff --git a/deps/lightning/lib/lightning.c b/deps/lightning/lib/lightning.c
new file mode 100644 (file)
index 0000000..22eca0c
--- /dev/null
@@ -0,0 +1,3513 @@
+/*
+ * Copyright (C) 2012-2019  Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+#include <lightning.h>
+#include <lightning/jit_private.h>
+#include <sys/mman.h>
+#if defined(__sgi)
+#  include <fcntl.h>
+#endif
+
+#ifndef MAP_ANON
+#  define MAP_ANON                     MAP_ANONYMOUS
+#  ifndef MAP_ANONYMOUS
+#    define MAP_ANONYMOUS              0
+#  endif
+#endif
+
+#define jit_regload_reload             0       /* convert to reload */
+#define jit_regload_delete             1       /* just remove node */
+#define jit_regload_isdead             2       /* delete and unset live bit */
+
+/*
+ * Prototypes
+ */
+static jit_word_t hash_data(const void*, jit_word_t);
+
+#define new_pool()                     _new_pool(_jit)
+static void _new_pool(jit_state_t*);
+
+#define new_node(u)                    _new_node(_jit, u)
+static jit_node_t *_new_node(jit_state_t*, jit_code_t);
+
+#define link_node(u)                   _link_node(_jit, u)
+static inline jit_node_t *_link_node(jit_state_t*, jit_node_t*);
+
+#define del_node(u, v)                 _del_node(_jit, u, v)
+static inline void _del_node(jit_state_t*, jit_node_t*, jit_node_t*);
+
+#define free_node(u)                   _free_node(_jit, u)
+static inline void _free_node(jit_state_t*, jit_node_t*);
+
+#define del_label(u, v)                        _del_label(_jit, u, v)
+static void _del_label(jit_state_t*, jit_node_t*, jit_node_t*);
+
+#define jit_dataset()                  _jit_dataset(_jit)
+static void
+_jit_dataset(jit_state_t *_jit);
+
+#define jit_setup(block)               _jit_setup(_jit, block)
+static void
+_jit_setup(jit_state_t *_jit, jit_block_t *block);
+
+#define jit_follow(block, todo)                _jit_follow(_jit, block, todo)
+static void
+_jit_follow(jit_state_t *_jit, jit_block_t *block, jit_bool_t *todo);
+
+#define jit_update(node, live, mask)   _jit_update(_jit, node, live, mask)
+static void
+_jit_update(jit_state_t *_jit, jit_node_t *node,
+           jit_regset_t *live, jit_regset_t *mask);
+
+#define thread_jumps()                 _thread_jumps(_jit)
+static void
+_thread_jumps(jit_state_t *_jit);
+
+#define sequential_labels()            _sequential_labels(_jit)
+static void
+_sequential_labels(jit_state_t *_jit);
+
+#define split_branches()               _split_branches(_jit)
+static void
+_split_branches(jit_state_t *_jit);
+
+#define shortcut_jump(prev, node)      _shortcut_jump(_jit, prev, node)
+static jit_bool_t
+_shortcut_jump(jit_state_t *_jit, jit_node_t *prev, jit_node_t *node);
+
+#define redundant_jump(prev, node)     _redundant_jump(_jit, prev, node)
+static jit_bool_t
+_redundant_jump(jit_state_t *_jit, jit_node_t *prev, jit_node_t *node);
+
+static jit_code_t
+reverse_jump_code(jit_code_t code);
+
+#define reverse_jump(prev, node)       _reverse_jump(_jit, prev, node)
+static jit_bool_t
+_reverse_jump(jit_state_t *_jit, jit_node_t *prev, jit_node_t *node);
+
+#define redundant_store(node, jump)    _redundant_store(_jit, node, jump)
+static void
+_redundant_store(jit_state_t *_jit, jit_node_t *node, jit_bool_t jump);
+
+#define simplify_movr(p, n, k, s)      _simplify_movr(_jit, p, n, k, s)
+static jit_bool_t
+_simplify_movr(jit_state_t *_jit, jit_node_t *prev, jit_node_t *node,
+              jit_int32_t kind, jit_int32_t size);
+
+#define simplify_movi(p, n, k, s)      _simplify_movi(_jit, p, n, k, s)
+static jit_bool_t
+_simplify_movi(jit_state_t *_jit, jit_node_t *prev, jit_node_t *node,
+              jit_int32_t kind, jit_int32_t size);
+
+#define simplify_ldxi(prev, node)      _simplify_ldxi(_jit, prev, node)
+static jit_bool_t
+_simplify_ldxi(jit_state_t *_jit, jit_node_t *prev, jit_node_t *node);
+
+#define simplify_stxi(prev, node)      _simplify_stxi(_jit, prev, node)
+static jit_bool_t
+_simplify_stxi(jit_state_t *_jit, jit_node_t *prev, jit_node_t *node);
+
+#define simplify_spill(node, regno)    _simplify_spill(_jit, node, regno)
+static void
+_simplify_spill(jit_state_t *_jit, jit_node_t *node, jit_int32_t regno);
+
+#define simplify()                     _simplify(_jit)
+static void
+_simplify(jit_state_t *_jit);
+
+#define jit_reg_undef                  -1
+#define jit_reg_static                  0
+#define jit_reg_change                  1
+#define register_change_p(n, l, r)     _register_change_p(_jit, n, l, r)
+static jit_int32_t
+_register_change_p(jit_state_t *_jit, jit_node_t *node, jit_node_t *link,
+                  jit_int32_t regno);
+
+#define spill_reglive_p(node, regno)   _spill_reglive_p(_jit, node, regno)
+static jit_bool_t
+_spill_reglive_p(jit_state_t *_jit, jit_node_t *node, jit_int32_t regno);
+
+#define patch_registers()              _patch_registers(_jit)
+static void
+_patch_registers(jit_state_t *_jit);
+
+#define patch_register(n,l,r,p)                _patch_register(_jit,n,l,r,p)
+static void
+_patch_register(jit_state_t *jit, jit_node_t *node, jit_node_t *link,
+               jit_int32_t regno, jit_int32_t patch);
+
+/*
+ * Initialization
+ */
+#if !defined(__sgi)
+#define  mmap_fd                       -1
+#endif
+
+/*
+ * Implementation
+ */
+void
+init_jit(const char *progname)
+{
+    jit_get_cpu();
+    jit_init_debug(progname);
+    jit_init_size();
+}
+
+void
+finish_jit(void)
+{
+    jit_finish_debug();
+    jit_finish_size();
+}
+
+jit_int32_t
+_jit_get_reg(jit_state_t *_jit, jit_int32_t regspec)
+{
+    jit_int32_t                spec;
+    jit_int32_t                regno;
+
+    spec = regspec & ~(jit_class_chk|jit_class_nospill);
+    if (spec & jit_class_named) {
+       regno = jit_regno(spec);
+       if (jit_regset_tstbit(&_jitc->regsav, regno))
+           /* fail if register is spilled */
+           goto fail;
+       if (jit_regset_tstbit(&_jitc->regarg, regno))
+           /* fail if register is an argument to current instruction */
+           goto fail;
+       if (jit_regset_tstbit(&_jitc->reglive, regno)) {
+           if (regspec & jit_class_nospill)
+               /* fail if register is live and should not spill/reload */
+               goto fail;
+           goto spill;
+       }
+       jit_regset_setbit(&_jitc->regarg, regno);
+       return (regno);
+    }
+    else
+       assert(jit_class(spec) != 0);
+
+    if (_jitc->emit) {
+       /* search for a free register matching spec */
+       for (regno = 0; regno < _jitc->reglen; regno++) {
+           if ((jit_class(_rvs[regno].spec) & spec) == spec &&
+               !jit_regset_tstbit(&_jitc->regarg, regno) &&
+               !jit_regset_tstbit(&_jitc->reglive, regno))
+               goto regarg;
+       }
+
+       /* search for a register matching spec that is not an argument
+        * for the current instruction */
+       for (regno = 0; regno < _jitc->reglen; regno++) {
+           if ((jit_class(_rvs[regno].spec) & spec) == spec &&
+               !jit_regset_tstbit(&_jitc->regsav, regno) &&
+               !jit_regset_tstbit(&_jitc->regarg, regno) &&
+               !(regspec & jit_class_nospill)) {
+           spill:
+               assert(_jitc->function != NULL);
+               if (spec & jit_class_gpr) {
+                   if (!_jitc->function->regoff[regno]) {
+                       _jitc->function->regoff[regno] =
+                           jit_allocai(sizeof(jit_word_t));
+                       _jitc->again = 1;
+                   }
+#if DEBUG
+                   /* emit_stxi must not need temporary registers */
+                   assert(!_jitc->getreg);
+                   _jitc->getreg = 1;
+#endif
+                   emit_stxi(_jitc->function->regoff[regno], JIT_FP, regno);
+#if DEBUG
+                   _jitc->getreg = 0;
+#endif
+               }
+               else {
+                   if (!_jitc->function->regoff[regno]) {
+                       _jitc->function->regoff[regno] =
+                           jit_allocai(sizeof(jit_float64_t));
+                       _jitc->again = 1;
+                   }
+#if DEBUG
+                   /* emit_stxi must not need temporary registers */
+                   assert(!_jitc->getreg);
+                   _jitc->getreg = 1;
+#endif
+                   emit_stxi_d(_jitc->function->regoff[regno], JIT_FP, regno);
+#if DEBUG
+                   _jitc->getreg = 0;
+#endif
+               }
+               jit_regset_setbit(&_jitc->regsav, regno);
+           regarg:
+               jit_regset_setbit(&_jitc->regarg, regno);
+               if (jit_class(_rvs[regno].spec) & jit_class_sav) {
+                   /* if will modify callee save registers without a
+                    * function prolog, better patch this assertion */
+                   assert(_jitc->function != NULL);
+                   if (!jit_regset_tstbit(&_jitc->function->regset, regno)) {
+                       jit_regset_setbit(&_jitc->function->regset, regno);
+                       _jitc->again = 1;
+                   }
+               }
+               return (regno);
+           }
+       }
+    }
+    else {
+       /* nospill hint only valid during emit" */
+       assert(!(regspec & jit_class_nospill));
+       for (regno = 0; regno < _jitc->reglen; regno++) {
+           if ((jit_class(_rvs[regno].spec) & spec) == spec &&
+               !jit_regset_tstbit(&_jitc->regsav, regno) &&
+               !jit_regset_tstbit(&_jitc->regarg, regno)) {
+               jit_regset_setbit(&_jitc->regarg, regno);
+               jit_regset_setbit(&_jitc->regsav, regno);
+               jit_save(regno);
+               return (jit_regno_patch|regno);
+           }
+       }
+    }
+
+    /* Out of hardware registers */
+fail:
+    assert(regspec & jit_class_chk);
+    return (JIT_NOREG);
+}
+
+void
+_jit_unget_reg(jit_state_t *_jit, jit_int32_t regno)
+{
+    regno = jit_regno(regno);
+    if (jit_regset_tstbit(&_jitc->regsav, regno)) {
+       if (_jitc->emit) {
+#if DEBUG
+           /* emit_ldxi must not need a temporary register */
+           assert(!_jitc->getreg);
+           _jitc->getreg = 1;
+#endif
+           if (jit_class(_rvs[regno].spec) & jit_class_gpr)
+               emit_ldxi(regno, JIT_FP, _jitc->function->regoff[regno]);
+           else
+               emit_ldxi_d(regno, JIT_FP, _jitc->function->regoff[regno]);
+#if DEBUG
+           /* emit_ldxi must not need a temporary register */
+           _jitc->getreg = 0;
+#endif
+       }
+       else
+           jit_load(regno);
+       jit_regset_clrbit(&_jitc->regsav, regno);
+    }
+#if defined(jit_carry)
+    assert((regno == jit_carry /*&& _NOREG != jit_carry*/) ||
+          jit_regset_tstbit(&_jitc->regarg, regno) != 0);
+#else
+    assert(jit_regset_tstbit(&_jitc->regarg, regno) != 0);
+#endif
+    jit_regset_clrbit(&_jitc->regarg, regno);
+}
+
+jit_bool_t
+_jit_callee_save_p(jit_state_t *_jit, jit_int32_t regno)
+{
+    assert(regno >= 0 && regno < JIT_NOREG);
+    return (!!(_rvs[regno].spec & jit_class_sav));
+}
+
+extern jit_bool_t
+_jit_pointer_p(jit_state_t *_jit, jit_pointer_t address)
+{
+    return ((jit_uint8_t *)address >= _jit->code.ptr &&
+           (jit_word_t)address < _jit->pc.w);
+}
+
+#if __ia64__
+void
+jit_regset_com(jit_regset_t *u, jit_regset_t *v)
+{
+    u->rl = ~v->rl;            u->rh = ~v->rh;
+    u->fl = ~v->fl;            u->fh = ~v->fh;
+}
+
+void
+jit_regset_and(jit_regset_t *u, jit_regset_t *v, jit_regset_t *w)
+{
+    u->rl = v->rl & w->rl;     u->rh = v->rh & w->rh;
+    u->fl = v->fl & w->fl;     u->fh = v->fh & w->fh;
+}
+
+void
+jit_regset_ior(jit_regset_t *u, jit_regset_t *v, jit_regset_t *w)
+{
+    u->rl = v->rl | w->rl;     u->rh = v->rh | w->rh;
+    u->fl = v->fl | w->fl;     u->fh = v->fh | w->fh;
+}
+
+void
+jit_regset_xor(jit_regset_t *u, jit_regset_t *v, jit_regset_t *w)
+{
+    u->rl = v->rl ^ w->rl;     u->rh = v->rh ^ w->rh;
+    u->fl = v->fl ^ w->fl;     u->fh = v->fh ^ w->fh;
+}
+
+void
+jit_regset_set(jit_regset_t *u, jit_regset_t *v)
+{
+    u->rl = v->rl;             u->rh = v->rh;
+    u->fl = v->fl;             u->fh = v->fh;
+}
+
+void
+jit_regset_set_mask(jit_regset_t *u, jit_int32_t v)
+{
+    jit_bool_t         w = !!(v & (v - 1));
+
+    assert(v >= 0 && v <= 256);
+    if (v == 0)
+       u->rl = u->rh = u->fl = u->fh = -1LL;
+    else if (v <= 64) {
+       u->rl = w ? (1LL << v) - 1 : -1LL;
+       u->rh = u->fl = u->fh = 0;
+    }
+    else if (v <= 128) {
+       u->rl = -1LL;
+       u->rh = w ? (1LL << (v - 64)) - 1 : -1LL;
+       u->fl = u->fh = 0;
+    }
+    else if (v <= 192) {
+       u->rl = u->rh = -1LL;
+       u->fl = w ? (1LL << (v - 128)) - 1 : -1LL;
+       u->fh = 0;
+    }
+    else {
+       u->rl = u->rh = u->fl = -1LL;
+       u->fh = w ? (1LL << (v - 128)) - 1 : -1LL;
+    }
+}
+
+jit_bool_t
+jit_regset_cmp_ui(jit_regset_t *u, jit_word_t v)
+{
+    return !((u->rl == v && u->rh == 0 && u->fl == 0 && u->fh == 0));
+}
+
+void
+jit_regset_set_ui(jit_regset_t *u, jit_word_t v)
+{
+    u->rl = v;
+    u->rh = u->fl = u->fh = 0;
+}
+
+jit_bool_t
+jit_regset_set_p(jit_regset_t *u)
+{
+    return (u->rl || u->rh || u->fl || u->fh);
+}
+
+void
+jit_regset_clrbit(jit_regset_t *set, jit_int32_t bit)
+{
+    assert(bit >= 0 && bit <= 255);
+    if (bit < 64)
+       set->rl &= ~(1LL << bit);
+    else if (bit < 128)
+       set->rh &= ~(1LL << (bit - 64));
+    else if (bit < 192)
+       set->fl &= ~(1LL << (bit - 128));
+    else
+       set->fh &= ~(1LL << (bit - 192));
+}
+
+void
+jit_regset_setbit(jit_regset_t *set, jit_int32_t bit)
+{
+    assert(bit >= 0 && bit <= 255);
+    if (bit < 64)
+       set->rl |= 1LL << bit;
+    else if (bit < 128)
+       set->rh |= 1LL << (bit - 64);
+    else if (bit < 192)
+       set->fl |= 1LL << (bit - 128);
+    else
+       set->fh |= 1LL << (bit - 192);
+}
+
+jit_bool_t
+jit_regset_tstbit(jit_regset_t *set, jit_int32_t bit)
+{
+    assert(bit >= 0 && bit <= 255);
+    if (bit < 64)
+       return (!!(set->rl & (1LL << bit)));
+    else if (bit < 128)
+       return (!!(set->rh & (1LL << (bit - 64))));
+    else if (bit < 192)
+       return (!!(set->fl & (1LL << (bit - 128))));
+    return (!!(set->fh & (1LL << (bit - 192))));
+}
+
+unsigned long
+jit_regset_scan1(jit_regset_t *set, jit_int32_t offset)
+{
+    assert(offset >= 0 && offset <= 255);
+    for (; offset < 64; offset++) {
+       if (set->rl & (1LL << offset))
+           return (offset);
+    }
+    for (; offset < 128; offset++) {
+       if (set->rh & (1LL << (offset - 64)))
+           return (offset);
+    }
+    for (; offset < 192; offset++) {
+       if (set->fl & (1LL << (offset - 128)))
+           return (offset);
+    }
+    for (; offset < 256; offset++) {
+       if (set->fh & (1LL << (offset - 192)))
+           return (offset);
+    }
+    return (ULONG_MAX);
+}
+
+#elif __sparc__ && __WORDSIZE == 64
+void
+jit_regset_com(jit_regset_t *u, jit_regset_t *v)
+{
+    u->rl = ~v->rl;            u->rh = ~v->rh;
+}
+
+void
+jit_regset_and(jit_regset_t *u, jit_regset_t *v, jit_regset_t *w)
+{
+    u->rl = v->rl & w->rl;     u->rh = v->rh & w->rh;
+}
+
+void
+jit_regset_ior(jit_regset_t *u, jit_regset_t *v, jit_regset_t *w)
+{
+    u->rl = v->rl | w->rl;     u->rh = v->rh | w->rh;
+}
+
+void
+jit_regset_xor(jit_regset_t *u, jit_regset_t *v, jit_regset_t *w)
+{
+    u->rl = v->rl ^ w->rl;     u->rh = v->rh ^ w->rh;
+}
+
+void
+jit_regset_set(jit_regset_t *u, jit_regset_t *v)
+{
+    u->rl = v->rl;             u->rh = v->rh;
+}
+
+void
+jit_regset_set_mask(jit_regset_t *u, jit_int32_t v)
+{
+    jit_bool_t         w = !!(v & (v - 1));
+
+    assert(v >= 0 && v <= 128);
+    if (v == 0)
+       u->rl = u->rh = -1LL;
+    else if (v <= 64) {
+       u->rl = w ? (1LL << v) - 1 : -1LL;
+       u->rh = 0;
+    }
+    else {
+       u->rl = -1LL;
+       u->rh = w ? (1LL << (v - 64)) - 1 : -1LL;
+    }
+}
+
+jit_bool_t
+jit_regset_cmp_ui(jit_regset_t *u, jit_word_t v)
+{
+    return !((u->rl == v && u->rh == 0));
+}
+
+void
+jit_regset_set_ui(jit_regset_t *u, jit_word_t v)
+{
+    u->rl = v;
+    u->rh = 0;
+}
+
+jit_bool_t
+jit_regset_set_p(jit_regset_t *u)
+{
+    return (u->rl || u->rh);
+}
+
+void
+jit_regset_clrbit(jit_regset_t *set, jit_int32_t bit)
+{
+    assert(bit >= 0 && bit <= 128);
+    if (bit < 64)
+       set->rl &= ~(1LL << bit);
+    else
+       set->rh &= ~(1LL << (bit - 64));
+}
+
+void
+jit_regset_setbit(jit_regset_t *set, jit_int32_t bit)
+{
+    assert(bit >= 0 && bit <= 127);
+    if (bit < 64)
+       set->rl |= 1LL << bit;
+    else
+       set->rh |= 1LL << (bit - 64);
+}
+
+jit_bool_t
+jit_regset_tstbit(jit_regset_t *set, jit_int32_t bit)
+{
+    assert(bit >= 0 && bit <= 127);
+    if (bit < 64)
+       return (!!(set->rl & (1LL << bit)));
+    else
+       return (!!(set->rh & (1LL << (bit - 64))));
+}
+
+unsigned long
+jit_regset_scan1(jit_regset_t *set, jit_int32_t offset)
+{
+    assert(offset >= 0 && offset <= 127);
+    for (; offset < 64; offset++) {
+       if (set->rl & (1LL << offset))
+           return (offset);
+    }
+    for (; offset < 128; offset++) {
+       if (set->rh & (1LL << (offset - 64)))
+           return (offset);
+    }
+    return (ULONG_MAX);
+}
+
+#else
+unsigned long
+jit_regset_scan1(jit_regset_t *set, jit_int32_t offset)
+{
+    jit_regset_t       mask;
+    assert(offset >= 0 && offset <= 63);
+    if ((mask = *set >> offset)) {
+       for (;;) {
+           if (mask & 1)
+               return (offset);
+           mask >>= 1;
+           ++offset;
+       }
+    }
+    return (ULONG_MAX);
+}
+#endif
+
+void
+_jit_save(jit_state_t *_jit, jit_int32_t reg)
+{
+    reg = jit_regno(reg);
+    assert(!_jitc->realize);
+    _jitc->spill[reg] = jit_new_node_w(jit_code_save, reg);
+}
+
+void
+_jit_load(jit_state_t *_jit, jit_int32_t reg)
+{
+    jit_node_t         *node;
+
+    reg = jit_regno(reg);
+    assert(!_jitc->realize);
+    assert(_jitc->spill[reg] != NULL);
+    node = jit_new_node_w(jit_code_load, reg);
+    /* create a path to flag the save/load is not required */
+    node->link = _jitc->spill[reg];
+    node->link->link = node;
+    _jitc->spill[reg] = NULL;
+}
+
+static jit_word_t
+hash_data(const void *data, jit_word_t length)
+{
+    const jit_uint8_t          *ptr;
+    jit_word_t          i, key;
+    for (i = key = 0, ptr = data; i < length; i++)
+       key = (key << (key & 1)) ^ ptr[i];
+    return (key);
+}
+
+jit_pointer_t
+_jit_address(jit_state_t *_jit, jit_node_t *node)
+{
+    assert(_jitc->done);
+    assert(node != NULL &&
+          /* If a node type that is documented to be a fixed marker */
+          (node->code == jit_code_note || node->code == jit_code_name ||
+           /* If another special fixed marker, returned by jit_indirect() */
+           (node->code == jit_code_label && (node->flag & jit_flag_use) != 0)));
+    return ((jit_pointer_t)node->u.w);
+}
+
+jit_node_t *
+_jit_data(jit_state_t *_jit, const void *data,
+         jit_word_t length, jit_int32_t align)
+{
+    jit_word_t          key;
+    jit_node_t         *node;
+
+    assert(!_jitc->realize);
+
+    /* Ensure there is space even if asking for a duplicate */
+    if (((_jitc->data.offset + 7) & -8) + length > _jit->data.length) {
+       jit_word_t       size;
+
+       size = (_jit->data.length + length + 4096) & - 4095;
+       assert(size >= _jit->data.length);
+       if (_jitc->data.ptr == NULL)
+           jit_alloc((jit_pointer_t *)&_jitc->data.ptr, size);
+       else
+           jit_realloc((jit_pointer_t *)&_jitc->data.ptr,
+                       _jit->data.length, size);
+       _jit->data.length = size;
+    }
+    if (_jitc->data.table == NULL)
+       jit_alloc((jit_pointer_t *)&_jitc->data.table,
+                 (_jitc->data.size = 16) * sizeof(jit_node_t*));
+
+    key = hash_data(data, length) & (_jitc->data.size - 1);
+    node = _jitc->data.table[key];
+    for (; node; node = node->next) {
+       if (node->v.w == length &&
+           memcmp(_jitc->data.ptr + node->u.w, data, length) == 0)
+           break;
+    }
+
+    if (!node) {
+       node = jit_new_node_no_link(jit_code_data);
+       if (!align)
+           align = length;
+       switch (align) {
+           case 0:     case 1:
+               break;
+           case 2:
+               _jitc->data.offset = (_jitc->data.offset + 1) & -2;
+               break;
+           case 3:     case 4:
+               _jitc->data.offset = (_jitc->data.offset + 3) & -4;
+               break;
+           default:
+               _jitc->data.offset = (_jitc->data.offset + 7) & -8;
+               break;
+       }
+       node->u.w = _jitc->data.offset;
+       node->v.w = length;
+       jit_memcpy(_jitc->data.ptr + _jitc->data.offset, data, length);
+       _jitc->data.offset += length;
+
+       node->next = _jitc->data.table[key];
+       _jitc->data.table[key] = node;
+       ++_jitc->data.count;
+
+       /* Rehash if more than 75% used table */
+       if (_jitc->data.count >
+           (_jitc->data.size >> 1) + (_jitc->data.size >> 2) &&
+           (_jitc->data.size << 1) > _jitc->data.size) {
+           jit_word_t    i;
+           jit_node_t  **hash;
+           jit_node_t   *next;
+           jit_node_t   *temp;
+
+           jit_alloc((jit_pointer_t *)&hash,
+                     (_jitc->data.size << 1) * sizeof(jit_node_t*));
+           for (i = 0; i < _jitc->data.size; i++) {
+               temp = _jitc->data.table[i];
+               for (; temp; temp = next) {
+                   next = temp->next;
+                   key = hash_data(_jitc->data.ptr + temp->u.w, temp->v.w) &
+                         ((_jitc->data.size << 1) - 1);
+                   temp->next = hash[key];
+                   hash[key] = temp;
+               }
+           }
+           jit_free((jit_pointer_t *)&_jitc->data.table);
+           _jitc->data.table = hash;
+           _jitc->data.size <<= 1;
+       }
+    }
+
+    return (node);
+}
+
+static void
+_new_pool(jit_state_t *_jit)
+{
+    jit_node_t         *list;
+    jit_int32_t                 offset;
+
+    if (_jitc->pool.offset >= _jitc->pool.length) {
+       jit_int32_t      length;
+
+       length = _jitc->pool.length + 16;
+       jit_realloc((jit_pointer_t *)&_jitc->pool.ptr,
+                   _jitc->pool.length * sizeof(jit_node_t *),
+                   length * sizeof(jit_node_t *));
+       _jitc->pool.length = length;
+    }
+    jit_alloc((jit_pointer_t *)(_jitc->pool.ptr + _jitc->pool.offset),
+             sizeof(jit_node_t) * 1024);
+    list = _jitc->pool.ptr[_jitc->pool.offset];
+    for (offset = 1; offset < 1024; offset++, list++)
+       list->next = list + 1;
+    list->next = _jitc->list;
+    _jitc->list = _jitc->pool.ptr[_jitc->pool.offset];
+    ++_jitc->pool.offset;
+}
+
+static jit_node_t *
+_new_node(jit_state_t *_jit, jit_code_t code)
+{
+    jit_node_t         *node;
+
+    if (_jitc->list == NULL)
+       new_pool();
+    node = _jitc->list;
+    _jitc->list = node->next;
+    if (_jitc->synth)
+       node->flag |= jit_flag_synth;
+    node->next = NULL;
+    node->code = code;
+
+    return (node);
+}
+
+static inline jit_node_t *
+_link_node(jit_state_t *_jit, jit_node_t *node)
+{
+    if (_jitc->tail)
+       _jitc->tail->next = node;
+    else
+       _jitc->head = node;
+    return (_jitc->tail = node);
+}
+
+static inline void
+_del_node(jit_state_t *_jit, jit_node_t *prev, jit_node_t *node)
+{
+    if (prev == node) {
+       assert(prev == _jitc->head);
+       _jitc->head = node->next;
+    }
+    else
+       prev->next = node->next;
+    memset(node, 0, sizeof(jit_node_t));
+    node->next = _jitc->list;
+    _jitc->list = node;
+}
+
+static inline void
+_free_node(jit_state_t *_jit, jit_node_t *node)
+{
+    memset(node, 0, sizeof(jit_node_t));
+    node->next = _jitc->list;
+    _jitc->list = node;
+}
+
+static void
+_del_label(jit_state_t *_jit, jit_node_t *prev, jit_node_t *node)
+{
+    jit_block_t                *block;
+
+    /* only allow call to del_label on linked labels */
+    block = _jitc->blocks.ptr + node->v.w;
+    assert(block->label == node);
+
+    /* del_label() should only be called when optimizing.
+     * This will leave an empty block index */
+    jit_regset_del(&block->reglive);
+    jit_regset_del(&block->regmask);
+    block->label = NULL;
+
+    /* redundant, should be already true */
+    assert(node->link == NULL);
+    del_node(prev, node);
+}
+
+jit_state_t *
+jit_new_state(void)
+{
+    jit_state_t                *_jit;
+
+    jit_alloc((jit_pointer_t *)&_jit, sizeof(jit_state_t));
+    jit_alloc((jit_pointer_t *)&_jitc, sizeof(jit_compiler_t));
+    jit_regset_new(&_jitc->regarg);
+    jit_regset_new(&_jitc->regsav);
+    jit_regset_new(&_jitc->reglive);
+    jit_regset_new(&_jitc->regmask);
+
+    jit_init();
+
+    jit_alloc((jit_pointer_t *)&_jitc->spill,
+             _jitc->reglen * sizeof(jit_node_t*));
+    jit_alloc((jit_pointer_t *)&_jitc->gen,
+             _jitc->reglen * sizeof(jit_int32_t));
+    jit_alloc((jit_pointer_t *)&_jitc->values,
+             _jitc->reglen * sizeof(jit_value_t));
+
+    jit_alloc((jit_pointer_t *)&_jitc->patches.ptr,
+             (_jitc->patches.length = 1024) * sizeof(jit_patch_t));
+    jit_alloc((jit_pointer_t *)&_jitc->functions.ptr,
+             (_jitc->functions.length = 16) * sizeof(jit_function_t));
+    jit_alloc((jit_pointer_t *)&_jitc->pool.ptr,
+             (_jitc->pool.length = 16) * sizeof(jit_node_t*));
+    jit_alloc((jit_pointer_t *)&_jitc->blocks.ptr,
+             (_jitc->blocks.length = 16) * sizeof(jit_block_t));
+#if __arm__ && DISASSEMBLER
+    jit_alloc((jit_pointer_t *)&_jitc->data_info.ptr,
+             (_jitc->data_info.length = 1024) * sizeof(jit_data_info_t));
+#endif
+
+    /* allocate at most one extra note in case jit_name() is
+     * never called, or called after adding at least one note */
+    _jit->note.length = 1;
+    _jitc->note.size = sizeof(jit_note_t);
+
+    return (_jit);
+}
+
+void
+_jit_clear_state(jit_state_t *_jit)
+{
+#if DEVEL_DISASSEMBLER
+#  define jit_really_clear_state()     _jit_really_clear_state(_jit)
+}
+
+void _jit_really_clear_state(jit_state_t *_jit)
+{
+#endif
+    jit_word_t          offset;
+    jit_function_t     *function;
+
+    /* release memory not required at jit execution time and set
+     * pointers to NULL to explicitly know they are released */
+    _jitc->head = _jitc->tail = NULL;
+
+    jit_free((jit_pointer_t *)&_jitc->data.table);
+    _jitc->data.size = _jitc->data.count = 0;
+
+    jit_free((jit_pointer_t *)&_jitc->spill);
+    jit_free((jit_pointer_t *)&_jitc->gen);
+    jit_free((jit_pointer_t *)&_jitc->values);
+
+    jit_free((jit_pointer_t *)&_jitc->blocks.ptr);
+
+    jit_free((jit_pointer_t *)&_jitc->patches.ptr);
+    _jitc->patches.offset = _jitc->patches.length = 0;
+
+    for (offset = 0; offset < _jitc->functions.offset; offset++) {
+       function = _jitc->functions.ptr + offset;
+       jit_free((jit_pointer_t *)&function->regoff);
+    }
+    jit_free((jit_pointer_t *)&_jitc->functions.ptr);
+    _jitc->functions.offset = _jitc->functions.length = 0;
+    _jitc->function = NULL;
+
+    for (offset = 0; offset < _jitc->pool.offset; offset++)
+       jit_free((jit_pointer_t *)(_jitc->pool.ptr + offset));
+    jit_free((jit_pointer_t *)&_jitc->pool.ptr);
+    _jitc->pool.offset = _jitc->pool.length = 0;
+    _jitc->list = NULL;
+
+    _jitc->note.head = _jitc->note.tail =
+       _jitc->note.name = _jitc->note.note = NULL;
+    _jitc->note.base = NULL;
+
+#if __arm__ && DISASSEMBLER
+    jit_free((jit_pointer_t *)&_jitc->data_info.ptr);
+#endif
+
+#if (__powerpc__ && _CALL_AIXDESC) || __ia64__
+    jit_free((jit_pointer_t *)&_jitc->prolog.ptr);
+#endif
+
+#if __ia64__
+    jit_regset_del(&_jitc->regs);
+#endif
+
+    jit_free((jit_pointer_t *)&_jitc);
+}
+
+void
+_jit_destroy_state(jit_state_t *_jit)
+{
+#if DEVEL_DISASSEMBLER
+    jit_really_clear_state();
+#endif
+    if (!_jit->user_code)
+       munmap(_jit->code.ptr, _jit->code.length);
+    if (!_jit->user_data)
+       munmap(_jit->data.ptr, _jit->data.length);
+    jit_free((jit_pointer_t *)&_jit);
+}
+
+void
+_jit_synth_inc(jit_state_t *_jit)
+{
+    assert(_jitc->synth < 8);
+    ++_jitc->synth;
+}
+
+jit_node_t *
+_jit_new_node(jit_state_t *_jit, jit_code_t code)
+{
+    assert(!_jitc->realize);
+    return (link_node(new_node(code)));
+}
+
+jit_node_t *
+_jit_new_node_no_link(jit_state_t *_jit, jit_code_t code)
+{
+    assert(!_jitc->realize);
+    return (new_node(code));
+}
+
+void
+_jit_link_node(jit_state_t *_jit, jit_node_t *node)
+{
+    assert(!_jitc->realize);
+    link_node(node);
+}
+
+void
+_jit_synth_dec(jit_state_t *_jit)
+{
+    assert(_jitc->synth > 0);
+    --_jitc->synth;
+}
+
+jit_node_t *
+_jit_new_node_w(jit_state_t *_jit, jit_code_t code,
+               jit_word_t u)
+{
+    jit_node_t         *node = new_node(code);
+    assert(!_jitc->realize);
+    node->u.w = u;
+    return (link_node(node));
+}
+
+jit_node_t *
+_jit_new_node_f(jit_state_t *_jit, jit_code_t code,
+               jit_float32_t u)
+{
+    jit_node_t         *node = new_node(code);
+    assert(!_jitc->realize);
+    node->u.f = u;
+    return (link_node(node));
+}
+
+jit_node_t *
+_jit_new_node_d(jit_state_t *_jit, jit_code_t code,
+               jit_float64_t u)
+{
+    jit_node_t         *node = new_node(code);
+    assert(!_jitc->realize);
+    node->u.d = u;
+    return (link_node(node));
+}
+
+jit_node_t *
+_jit_new_node_p(jit_state_t *_jit, jit_code_t code,
+               jit_pointer_t u)
+{
+    jit_node_t         *node = new_node(code);
+    assert(!_jitc->realize);
+    node->u.p = u;
+    return (link_node(node));
+}
+
+jit_node_t *
+_jit_new_node_ww(jit_state_t *_jit, jit_code_t code,
+                jit_word_t u, jit_word_t v)
+{
+    jit_node_t         *node = new_node(code);
+    assert(!_jitc->realize);
+    node->u.w = u;
+    node->v.w = v;
+    return (link_node(node));
+}
+
+jit_node_t *
+_jit_new_node_wp(jit_state_t *_jit, jit_code_t code,
+                jit_word_t u, jit_pointer_t v)
+{
+    return (jit_new_node_ww(code, u, (jit_word_t)v));
+}
+
+jit_node_t *
+_jit_new_node_fp(jit_state_t *_jit, jit_code_t code,
+                jit_float32_t u, jit_pointer_t v)
+{
+    jit_node_t         *node = new_node(code);
+    assert(!_jitc->realize);
+    node->u.f = u;
+    node->v.w = (jit_word_t)v;
+    return (link_node(node));
+}
+
+jit_node_t *
+_jit_new_node_dp(jit_state_t *_jit, jit_code_t code,
+                jit_float64_t u, jit_pointer_t v)
+{
+    jit_node_t         *node = new_node(code);
+    assert(!_jitc->realize);
+    node->u.d = u;
+    node->v.w = (jit_word_t)v;
+    return (link_node(node));
+}
+
+jit_node_t *
+_jit_new_node_pw(jit_state_t *_jit, jit_code_t code,
+                jit_pointer_t u, jit_word_t v)
+{
+    return (jit_new_node_ww(code, (jit_word_t)u, v));
+}
+
+jit_node_t *
+_jit_new_node_wf(jit_state_t *_jit, jit_code_t code,
+                jit_word_t u, jit_float32_t v)
+{
+    jit_node_t         *node = new_node(code);
+    assert(!_jitc->realize);
+    node->u.w = u;
+    node->v.f = v;
+    return (link_node(node));
+}
+
+jit_node_t *
+_jit_new_node_wd(jit_state_t *_jit, jit_code_t code,
+                jit_word_t u, jit_float64_t v)
+{
+    jit_node_t         *node = new_node(code);
+    assert(!_jitc->realize);
+    node->u.w = u;
+    node->v.d = v;
+    return (link_node(node));
+}
+
+jit_node_t *
+_jit_new_node_www(jit_state_t *_jit, jit_code_t code,
+                 jit_word_t u, jit_word_t v, jit_word_t w)
+{
+    jit_node_t         *node = new_node(code);
+    assert(!_jitc->realize);
+    node->u.w = u;
+    node->v.w = v;
+    node->w.w = w;
+    return (link_node(node));
+}
+
+jit_node_t *
+_jit_new_node_qww(jit_state_t *_jit, jit_code_t code,
+                 jit_int32_t l, jit_int32_t h,
+                 jit_word_t v, jit_word_t w)
+{
+    jit_node_t         *node = new_node(code);
+    assert(!_jitc->realize);
+    assert(l != h);
+    node->u.q.l = l;
+    node->u.q.h = h;
+    node->v.w = v;
+    node->w.w = w;
+    return (link_node(node));
+}
+
+jit_node_t *
+_jit_new_node_wwf(jit_state_t *_jit, jit_code_t code,
+                 jit_word_t u, jit_word_t v, jit_float32_t w)
+{
+    jit_node_t         *node = new_node(code);
+    assert(!_jitc->realize);
+    node->u.w = u;
+    node->v.w = v;
+    node->w.f = w;
+    return (link_node(node));
+}
+
+jit_node_t *
+_jit_new_node_wwd(jit_state_t *_jit, jit_code_t code,
+                 jit_word_t u, jit_word_t v, jit_float64_t w)
+{
+    jit_node_t         *node = new_node(code);
+    assert(!_jitc->realize);
+    node->u.w = u;
+    node->v.w = v;
+    node->w.d = w;
+    return (link_node(node));
+}
+
+jit_node_t *
+_jit_new_node_pww(jit_state_t *_jit, jit_code_t code,
+                 jit_pointer_t u, jit_word_t v, jit_word_t w)
+{
+    jit_node_t         *node = new_node(code);
+    assert(!_jitc->realize);
+    node->u.p = u;
+    node->v.w = v;
+    node->w.w = w;
+    return (link_node(node));
+}
+
+jit_node_t *
+_jit_new_node_pwf(jit_state_t *_jit, jit_code_t code,
+                 jit_pointer_t u, jit_word_t v, jit_float32_t w)
+{
+    jit_node_t         *node = new_node(code);
+    assert(!_jitc->realize);
+    node->u.p = u;
+    node->v.w = v;
+    node->w.f = w;
+    return (link_node(node));
+}
+
+jit_node_t *
+_jit_new_node_pwd(jit_state_t *_jit, jit_code_t code,
+                 jit_pointer_t u, jit_word_t v, jit_float64_t w)
+{
+    jit_node_t         *node = new_node(code);
+    assert(!_jitc->realize);
+    node->u.p = u;
+    node->v.w = v;
+    node->w.d = w;
+    return (link_node(node));
+}
+
+jit_node_t *
+_jit_label(jit_state_t *_jit)
+{
+    jit_node_t         *node;
+
+    if (!(node = _jitc->tail) || node->code != jit_code_label) {
+       node = jit_forward();
+       jit_link(node);
+    }
+
+    return (node);
+}
+
+jit_node_t *
+_jit_forward(jit_state_t *_jit)
+{
+    return (jit_new_node_no_link(jit_code_label));
+}
+
+jit_node_t *
+_jit_indirect(jit_state_t *_jit)
+{
+    jit_node_t         *node;
+
+    node = jit_label();
+    node->flag |= jit_flag_use;
+
+    return (node);
+}
+
+void
+_jit_link(jit_state_t *_jit, jit_node_t *node)
+{
+    jit_block_t                *block;
+
+    assert((node->code == jit_code_label ||
+           node->code == jit_code_prolog ||
+           node->code == jit_code_epilog) && !node->next);
+    jit_link_node(node);
+    if (_jitc->blocks.offset >= _jitc->blocks.length) {
+       jit_word_t        length;
+
+       length = _jitc->blocks.length + 16;
+       jit_realloc((jit_pointer_t *)&_jitc->blocks.ptr,
+                   _jitc->blocks.length * sizeof(jit_block_t),
+                   length * sizeof(jit_block_t));
+       _jitc->blocks.length = length;
+    }
+    block = _jitc->blocks.ptr + _jitc->blocks.offset;
+    block->label = node;
+    node->v.w = _jitc->blocks.offset;
+    jit_regset_new(&block->reglive);
+    jit_regset_new(&block->regmask);
+    ++_jitc->blocks.offset;
+}
+
+jit_bool_t
+_jit_forward_p(jit_state_t *_jit, jit_node_t *node)
+{
+    return (node->code == jit_code_label && !node->next && node != _jitc->tail);
+}
+
+jit_bool_t
+_jit_indirect_p(jit_state_t *_jit, jit_node_t *node)
+{
+    return (node->code == jit_code_label && !!(node->flag & jit_flag_use));
+}
+
+jit_bool_t
+_jit_target_p(jit_state_t *_jit, jit_node_t *node)
+{
+    return (node->code == jit_code_label && !!node->link);
+}
+
+void
+_jit_prepare(jit_state_t *_jit)
+{
+    assert(_jitc->function != NULL);
+    _jitc->function->call.call = jit_call_default;
+    _jitc->function->call.argi =
+       _jitc->function->call.argf =
+       _jitc->function->call.size = 0;
+    _jitc->prepare = jit_new_node(jit_code_prepare);
+}
+
+void
+_jit_patch(jit_state_t* _jit, jit_node_t *instr)
+{
+    jit_node_t         *label;
+
+    if (!(label = _jitc->tail) || label->code != jit_code_label)
+       label = jit_label();
+    jit_patch_at(instr, label);
+}
+
+jit_int32_t
+_jit_classify(jit_state_t *_jit, jit_code_t code)
+{
+    jit_int32_t                mask;
+
+    switch (code) {
+       case jit_code_data:     case jit_code_save:     case jit_code_load:
+       case jit_code_name:     case jit_code_label:    case jit_code_note:
+       case jit_code_prolog:   case jit_code_ellipsis: case jit_code_va_push:
+       case jit_code_epilog:   case jit_code_ret:      case jit_code_prepare:
+           mask = 0;
+           break;
+       case jit_code_live:     case jit_code_va_end:
+       case jit_code_retr:     case jit_code_retr_f:   case jit_code_retr_d:
+       case jit_code_pushargr: case jit_code_pushargr_f:
+       case jit_code_pushargr_d:
+       case jit_code_finishr:  /* synthesized will set jit_cc_a0_jmp */
+           mask = jit_cc_a0_reg;
+           break;
+       case jit_code_align:    case jit_code_reti:     case jit_code_pushargi:
+       case jit_code_finishi:  /* synthesized will set jit_cc_a0_jmp */
+           mask = jit_cc_a0_int;
+           break;
+       case jit_code_reti_f:   case jit_code_pushargi_f:
+           mask = jit_cc_a0_flt;
+           break;
+       case jit_code_reti_d:   case jit_code_pushargi_d:
+           mask = jit_cc_a0_dbl;
+           break;
+       case jit_code_allocai:
+           mask = jit_cc_a0_int|jit_cc_a1_int;
+           break;
+       case jit_code_arg:      case jit_code_arg_f:    case jit_code_arg_d:
+           mask = jit_cc_a0_int|jit_cc_a0_arg;
+           break;
+       case jit_code_calli:    case jit_code_jmpi:
+           mask = jit_cc_a0_jmp;
+           break;
+       case jit_code_callr:    case jit_code_jmpr:
+           mask = jit_cc_a0_reg|jit_cc_a0_jmp;
+           break;
+       case jit_code_retval_c: case jit_code_retval_uc:
+       case jit_code_retval_s: case jit_code_retval_us:
+       case jit_code_retval_i: case jit_code_retval_ui:
+       case jit_code_retval_l:
+       case jit_code_retval_f: case jit_code_retval_d:
+       case jit_code_va_start:
+           mask = jit_cc_a0_reg|jit_cc_a0_chg;
+           break;
+       case jit_code_getarg_c: case jit_code_getarg_uc:
+       case jit_code_getarg_s: case jit_code_getarg_us:
+       case jit_code_getarg_i: case jit_code_getarg_ui:
+       case jit_code_getarg_l:
+       case jit_code_getarg_f: case jit_code_getarg_d:
+           mask = jit_cc_a0_reg|jit_cc_a0_chg|jit_cc_a1_arg;
+           break;
+       case jit_code_putargr:  case jit_code_putargr_f:
+       case jit_code_putargr_d:
+           mask = jit_cc_a0_reg|jit_cc_a1_arg;
+           break;
+       case jit_code_putargi:
+           mask = jit_cc_a0_int|jit_cc_a1_arg;
+           break;
+       case jit_code_putargi_f:
+           mask = jit_cc_a0_flt|jit_cc_a1_arg;
+           break;
+       case jit_code_putargi_d:
+           mask = jit_cc_a0_dbl|jit_cc_a1_arg;
+           break;
+       case jit_code_movi:     case jit_code_ldi_c:    case jit_code_ldi_uc:
+       case jit_code_ldi_s:    case jit_code_ldi_us:   case jit_code_ldi_i:
+       case jit_code_ldi_ui:   case jit_code_ldi_l:    case jit_code_ldi_f:
+       case jit_code_ldi_d:
+           mask = jit_cc_a0_reg|jit_cc_a0_chg|jit_cc_a1_int;
+           break;
+       case jit_code_movi_f:   case jit_code_movi_f_w:
+           mask = jit_cc_a0_reg|jit_cc_a0_chg|jit_cc_a1_flt;
+           break;
+       case jit_code_movi_d:   case jit_code_movi_d_w:
+           mask = jit_cc_a0_reg|jit_cc_a0_chg|jit_cc_a1_dbl;
+           break;
+       case jit_code_movi_d_ww:
+           mask = jit_cc_a0_reg|jit_cc_a0_chg|jit_cc_a1_reg|jit_cc_a1_chg|
+                  jit_cc_a2_dbl;
+           break;
+       case jit_code_negr:     case jit_code_comr:     case jit_code_movr:
+       case jit_code_extr_c:   case jit_code_extr_uc:  case jit_code_extr_s:
+       case jit_code_extr_us:  case jit_code_extr_i:   case jit_code_extr_ui:
+       case jit_code_truncr_f_i:                       case jit_code_truncr_f_l:
+       case jit_code_truncr_d_i:                       case jit_code_truncr_d_l:
+       case jit_code_htonr_us: case jit_code_htonr_ui: case jit_code_htonr_ul:
+       case jit_code_ldr_c:    case jit_code_ldr_uc:
+       case jit_code_ldr_s:    case jit_code_ldr_us:   case jit_code_ldr_i:
+       case jit_code_ldr_ui:   case jit_code_ldr_l:    case jit_code_negr_f:
+       case jit_code_absr_f:   case jit_code_sqrtr_f:  case jit_code_movr_f:
+       case jit_code_extr_f:   case jit_code_extr_d_f: case jit_code_ldr_f:
+       case jit_code_negr_d:   case jit_code_absr_d:   case jit_code_sqrtr_d:
+       case jit_code_movr_d:   case jit_code_extr_d:   case jit_code_extr_f_d:
+       case jit_code_ldr_d:
+       case jit_code_movr_w_f: case jit_code_movr_f_w:
+       case jit_code_movr_w_d: case jit_code_movr_d_w:
+       case jit_code_va_arg:   case jit_code_va_arg_d:
+           mask = jit_cc_a0_reg|jit_cc_a0_chg|jit_cc_a1_reg;
+           break;
+       case jit_code_movr_d_ww:
+           mask = jit_cc_a0_reg|jit_cc_a0_chg|jit_cc_a1_reg|jit_cc_a1_chg|
+                  jit_cc_a2_reg;
+           break;
+       case jit_code_addi:     case jit_code_addxi:    case jit_code_addci:
+       case jit_code_subi:     case jit_code_subxi:    case jit_code_subci:
+       case jit_code_rsbi:
+       case jit_code_muli:     case jit_code_divi:     case jit_code_divi_u:
+       case jit_code_remi:     case jit_code_remi_u:   case jit_code_andi:
+       case jit_code_ori:      case jit_code_xori:     case jit_code_lshi:
+       case jit_code_rshi:     case jit_code_rshi_u:   case jit_code_lti:
+       case jit_code_lti_u:    case jit_code_lei:      case jit_code_lei_u:
+       case jit_code_eqi:      case jit_code_gei:      case jit_code_gei_u:
+       case jit_code_gti:      case jit_code_gti_u:    case jit_code_nei:
+       case jit_code_ldxi_c:   case jit_code_ldxi_uc:  case jit_code_ldxi_s:
+       case jit_code_ldxi_us:  case jit_code_ldxi_i:   case jit_code_ldxi_ui:
+       case jit_code_ldxi_l:   case jit_code_ldxi_f:   case jit_code_ldxi_d:
+           mask = jit_cc_a0_reg|jit_cc_a0_chg|jit_cc_a1_reg|jit_cc_a2_int;
+           break;
+       case jit_code_qmuli:    case jit_code_qmuli_u:
+       case jit_code_qdivi:    case jit_code_qdivi_u:
+           mask = jit_cc_a0_reg|jit_cc_a0_rlh|jit_cc_a0_chg|
+                  jit_cc_a1_reg|jit_cc_a2_int;
+           break;
+       case jit_code_addi_f:   case jit_code_subi_f:   case jit_code_rsbi_f:
+       case jit_code_muli_f:   case jit_code_divi_f:   case jit_code_lti_f:
+       case jit_code_lei_f:    case jit_code_eqi_f:    case jit_code_gei_f:
+       case jit_code_gti_f:    case jit_code_nei_f:    case jit_code_unlti_f:
+       case jit_code_unlei_f:  case jit_code_uneqi_f:  case jit_code_ungei_f:
+       case jit_code_ungti_f:  case jit_code_ltgti_f:  case jit_code_ordi_f:
+       case jit_code_unordi_f:
+           mask = jit_cc_a0_reg|jit_cc_a0_chg|jit_cc_a1_reg|jit_cc_a2_flt;
+           break;
+       case jit_code_addi_d:   case jit_code_subi_d:   case jit_code_rsbi_d:
+       case jit_code_muli_d:   case jit_code_divi_d:   case jit_code_lti_d:
+       case jit_code_lei_d:    case jit_code_eqi_d:    case jit_code_gei_d:
+       case jit_code_gti_d:    case jit_code_nei_d:    case jit_code_unlti_d:
+       case jit_code_unlei_d:  case jit_code_uneqi_d:  case jit_code_ungei_d:
+       case jit_code_ungti_d:  case jit_code_ltgti_d:  case jit_code_ordi_d:
+       case jit_code_unordi_d:
+           mask = jit_cc_a0_reg|jit_cc_a0_chg|jit_cc_a1_reg|jit_cc_a2_dbl;
+           break;
+       case jit_code_addr:     case jit_code_addxr:    case jit_code_addcr:
+       case jit_code_subr:     case jit_code_subxr:    case jit_code_subcr:
+       case jit_code_mulr:     case jit_code_divr:     case jit_code_divr_u:
+       case jit_code_remr:     case jit_code_remr_u:   case jit_code_andr:
+       case jit_code_orr:      case jit_code_xorr:     case jit_code_lshr:
+       case jit_code_rshr:     case jit_code_rshr_u:   case jit_code_ltr:
+       case jit_code_ltr_u:    case jit_code_ler:      case jit_code_ler_u:
+       case jit_code_eqr:      case jit_code_ger:      case jit_code_ger_u:
+       case jit_code_gtr:      case jit_code_gtr_u:    case jit_code_ner:
+       case jit_code_ldxr_c:   case jit_code_ldxr_uc:  case jit_code_ldxr_s:
+       case jit_code_ldxr_us:  case jit_code_ldxr_i:   case jit_code_ldxr_ui:
+       case jit_code_ldxr_l:   case jit_code_addr_f:   case jit_code_subr_f:
+       case jit_code_mulr_f:   case jit_code_divr_f:   case jit_code_ltr_f:
+       case jit_code_ler_f:    case jit_code_eqr_f:    case jit_code_ger_f:
+       case jit_code_gtr_f:    case jit_code_ner_f:    case jit_code_unltr_f:
+       case jit_code_unler_f:  case jit_code_uneqr_f:  case jit_code_unger_f:
+       case jit_code_ungtr_f:  case jit_code_ltgtr_f:  case jit_code_ordr_f:
+       case jit_code_unordr_f: case jit_code_ldxr_f:   case jit_code_addr_d:
+       case jit_code_subr_d:   case jit_code_mulr_d:   case jit_code_divr_d:
+       case jit_code_ltr_d:    case jit_code_ler_d:    case jit_code_eqr_d:
+       case jit_code_ger_d:    case jit_code_gtr_d:    case jit_code_ner_d:
+       case jit_code_unltr_d:  case jit_code_unler_d:  case jit_code_uneqr_d:
+       case jit_code_unger_d:  case jit_code_ungtr_d:  case jit_code_ltgtr_d:
+       case jit_code_ordr_d:   case jit_code_unordr_d: case jit_code_ldxr_d:
+       case jit_code_movr_ww_d:
+           mask = jit_cc_a0_reg|jit_cc_a0_chg|jit_cc_a1_reg|jit_cc_a2_reg;
+           break;
+       case jit_code_qmulr:    case jit_code_qmulr_u:
+       case jit_code_qdivr:    case jit_code_qdivr_u:
+           mask = jit_cc_a0_reg|jit_cc_a0_rlh|jit_cc_a0_chg|
+                  jit_cc_a1_reg|jit_cc_a2_reg;
+           break;
+       case jit_code_sti_c:    case jit_code_sti_s:    case jit_code_sti_i:
+       case jit_code_sti_l:    case jit_code_sti_f:    case jit_code_sti_d:
+           mask = jit_cc_a0_int|jit_cc_a1_reg;
+           break;
+       case jit_code_blti:     case jit_code_blti_u:   case jit_code_blei:
+       case jit_code_blei_u:   case jit_code_beqi:     case jit_code_bgei:
+       case jit_code_bgei_u:   case jit_code_bgti:     case jit_code_bgti_u:
+       case jit_code_bnei:     case jit_code_bmsi:     case jit_code_bmci:
+           mask = jit_cc_a0_jmp|jit_cc_a1_reg|jit_cc_a2_int;
+           break;
+       case jit_code_blti_f:   case jit_code_blei_f:   case jit_code_beqi_f:
+       case jit_code_bgei_f:   case jit_code_bgti_f:   case jit_code_bnei_f:
+       case jit_code_bunlti_f: case jit_code_bunlei_f: case jit_code_buneqi_f:
+       case jit_code_bungei_f: case jit_code_bungti_f: case jit_code_bltgti_f:
+       case jit_code_bordi_f:  case jit_code_bunordi_f:
+           mask = jit_cc_a0_jmp|jit_cc_a1_reg|jit_cc_a2_flt;
+           break;
+       case jit_code_blti_d:   case jit_code_blei_d:   case jit_code_beqi_d:
+       case jit_code_bgei_d:   case jit_code_bgti_d:   case jit_code_bnei_d:
+       case jit_code_bunlti_d: case jit_code_bunlei_d: case jit_code_buneqi_d:
+       case jit_code_bungei_d: case jit_code_bungti_d: case jit_code_bltgti_d:
+       case jit_code_bordi_d:  case jit_code_bunordi_d:
+           mask = jit_cc_a0_jmp|jit_cc_a1_reg|jit_cc_a2_dbl;
+           break;
+       case jit_code_allocar:  /* synthesized instructions make it
+                                * equivalent to jit_cc_a0_chg */
+       case jit_code_str_c:    case jit_code_str_s:    case jit_code_str_i:
+       case jit_code_str_l:    case jit_code_str_f:    case jit_code_str_d:
+           mask = jit_cc_a0_reg|jit_cc_a1_reg;
+           break;
+       case jit_code_stxi_c:   case jit_code_stxi_s:   case jit_code_stxi_i:
+       case jit_code_stxi_l:   case jit_code_stxi_f:   case jit_code_stxi_d:
+           mask = jit_cc_a0_int|jit_cc_a1_reg|jit_cc_a2_reg;
+           break;
+       case jit_code_bltr:     case jit_code_bltr_u:   case jit_code_bler:
+       case jit_code_bler_u:   case jit_code_beqr:     case jit_code_bger:
+       case jit_code_bger_u:   case jit_code_bgtr:     case jit_code_bgtr_u:
+       case jit_code_bner:     case jit_code_bmsr:     case jit_code_bmcr:
+       case jit_code_bltr_f:   case jit_code_bler_f:   case jit_code_beqr_f:
+       case jit_code_bger_f:   case jit_code_bgtr_f:   case jit_code_bner_f:
+       case jit_code_bunltr_f: case jit_code_bunler_f: case jit_code_buneqr_f:
+       case jit_code_bunger_f: case jit_code_bungtr_f: case jit_code_bltgtr_f:
+       case jit_code_bordr_f:  case jit_code_bunordr_f:case jit_code_bltr_d:
+       case jit_code_bler_d:   case jit_code_beqr_d:   case jit_code_bger_d:
+       case jit_code_bgtr_d:   case jit_code_bner_d:   case jit_code_bunltr_d:
+       case jit_code_bunler_d: case jit_code_buneqr_d: case jit_code_bunger_d:
+       case jit_code_bungtr_d: case jit_code_bltgtr_d: case jit_code_bordr_d:
+       case jit_code_bunordr_d:
+           mask = jit_cc_a0_jmp|jit_cc_a1_reg|jit_cc_a2_reg;
+           break;
+       case jit_code_boaddi:   case jit_code_boaddi_u: case jit_code_bxaddi:
+       case jit_code_bxaddi_u: case jit_code_bosubi:   case jit_code_bosubi_u:
+       case jit_code_bxsubi:   case jit_code_bxsubi_u:
+           mask = jit_cc_a0_jmp|jit_cc_a1_reg|jit_cc_a1_chg|jit_cc_a2_int;
+           break;
+       case jit_code_stxr_c:   case jit_code_stxr_s:   case jit_code_stxr_i:
+       case jit_code_stxr_l:   case jit_code_stxr_f:   case jit_code_stxr_d:
+           mask = jit_cc_a0_reg|jit_cc_a1_reg|jit_cc_a2_reg;
+           break;
+       case jit_code_boaddr:   case jit_code_boaddr_u: case jit_code_bxaddr:
+       case jit_code_bxaddr_u: case jit_code_bosubr:   case jit_code_bosubr_u:
+       case jit_code_bxsubr:   case jit_code_bxsubr_u:
+           mask = jit_cc_a0_jmp|jit_cc_a1_reg|jit_cc_a1_chg|jit_cc_a2_reg;
+           break;
+       default:
+           abort();
+    }
+
+    return (mask);
+}
+
+void
+_jit_patch_abs(jit_state_t *_jit, jit_node_t *instr, jit_pointer_t address)
+{
+    jit_int32_t                mask;
+
+    switch (instr->code) {
+       case jit_code_movi:     case jit_code_ldi_c:    case jit_code_ldi_uc:
+       case jit_code_ldi_s:    case jit_code_ldi_us:   case jit_code_ldi_i:
+       case jit_code_ldi_ui:   case jit_code_ldi_l:    case jit_code_ldi_f:
+       case jit_code_ldi_d:
+           instr->v.p = address;
+           break;
+       case jit_code_sti_c:    case jit_code_sti_s:    case jit_code_sti_i:
+       case jit_code_sti_l:    case jit_code_sti_f:    case jit_code_sti_d:
+           instr->u.p = address;
+           break;
+       default:
+           mask = jit_classify(instr->code);
+           assert((mask & (jit_cc_a0_reg|jit_cc_a0_jmp)) == jit_cc_a0_jmp);
+           instr->u.p = address;
+    }
+}
+
+void
+_jit_patch_at(jit_state_t *_jit, jit_node_t *instr, jit_node_t *label)
+{
+    jit_int32_t                mask;
+
+    assert(!(instr->flag & jit_flag_node));
+    instr->flag |= jit_flag_node;
+    switch (instr->code) {
+       case jit_code_movi:
+           assert(label->code == jit_code_label ||
+                  label->code == jit_code_data);
+           instr->v.n = label;
+           if (label->code == jit_code_data)
+               instr->flag |= jit_flag_data;
+           break;
+       case jit_code_jmpi:
+           assert(label->code == jit_code_label ||
+                  label->code == jit_code_epilog);
+           instr->u.n = label;
+           break;
+       default:
+           mask = jit_classify(instr->code);
+           assert((mask & (jit_cc_a0_reg|jit_cc_a0_jmp)) == jit_cc_a0_jmp);
+           assert(label->code == jit_code_label);
+           instr->u.n = label;
+           break;
+    }
+    /* link field is used as list of nodes associated with a given label */
+    instr->link = label->link;
+    label->link = instr;
+}
+
+void
+_jit_optimize(jit_state_t *_jit)
+{
+    jit_bool_t          jump;
+    jit_bool_t          todo;
+    jit_int32_t                 mask;
+    jit_node_t         *node;
+    jit_block_t                *block;
+    jit_word_t          offset;
+
+    _jitc->function = NULL;
+
+    thread_jumps();
+    sequential_labels();
+    split_branches();
+
+    /* create initial mapping of live register values
+     * at the start of a basic block */
+    for (offset = 0; offset < _jitc->blocks.offset; offset++) {
+       block = _jitc->blocks.ptr + offset;
+       if (!block->label)
+           continue;
+       if (block->label->code != jit_code_epilog)
+           jit_setup(block);
+    }
+
+    /* set live state of registers not referenced in a block, but
+     * referenced in a jump target or normal flow */
+    do {
+       todo = 0;
+       for (offset = 0; offset < _jitc->blocks.offset; offset++) {
+           block = _jitc->blocks.ptr + offset;
+           if (!block->label)
+               continue;
+           if (block->label->code != jit_code_epilog)
+               jit_follow(block, &todo);
+       }
+    } while (todo);
+
+    patch_registers();
+    simplify();
+
+    /* figure out labels that are only reached with a jump
+     * and is required to do a simple redundant_store removal
+     * on jit_beqi below */
+    jump = 1;
+    for (node = _jitc->head; node; node = node->next) {
+       switch (node->code) {
+           case jit_code_label:
+               if (!jump)
+                   node->flag |= jit_flag_head;
+               break;
+           case jit_code_jmpi:         case jit_code_jmpr:
+           case jit_code_epilog:
+               jump = 1;
+               break;
+           case jit_code_data:         case jit_code_note:
+               break;
+           default:
+               jump = 0;
+               break;
+       }
+    }
+
+    for (node = _jitc->head; node; node = node->next) {
+       mask = jit_classify(node->code);
+       if (mask & jit_cc_a0_reg)
+           node->u.w &= ~jit_regno_patch;
+       if (mask & jit_cc_a1_reg)
+           node->v.w &= ~jit_regno_patch;
+       if (mask & jit_cc_a2_reg)
+           node->w.w &= ~jit_regno_patch;
+       switch (node->code) {
+           case jit_code_prolog:
+               _jitc->function = _jitc->functions.ptr + node->w.w;
+               break;
+           case jit_code_epilog:
+               _jitc->function = NULL;
+               break;
+           case jit_code_beqi:
+               redundant_store(node, 1);
+               break;
+           case jit_code_bnei:
+               redundant_store(node, 0);
+               break;
+           default:
+#if JIT_HASH_CONSTS
+               if (mask & jit_cc_a0_flt) {
+                   node->u.p = jit_data(&node->u.f, sizeof(jit_float32_t), 4);
+                   node->flag |= jit_flag_node | jit_flag_data;
+               }
+               else if (mask & jit_cc_a0_dbl) {
+                   node->u.p = jit_data(&node->u.d, sizeof(jit_float64_t), 8);
+                   node->flag |= jit_flag_node | jit_flag_data;
+               }
+               else if (mask & jit_cc_a1_flt) {
+                   node->v.p = jit_data(&node->v.f, sizeof(jit_float32_t), 4);
+                   node->flag |= jit_flag_node | jit_flag_data;
+               }
+               else if (mask & jit_cc_a1_dbl) {
+                   node->v.p = jit_data(&node->v.d, sizeof(jit_float64_t), 8);
+                   node->flag |= jit_flag_node | jit_flag_data;
+               }
+               else if (mask & jit_cc_a2_flt) {
+                   node->w.p = jit_data(&node->w.f, sizeof(jit_float32_t), 4);
+                   node->flag |= jit_flag_node | jit_flag_data;
+               }
+               else if (mask & jit_cc_a2_dbl) {
+                   node->w.p = jit_data(&node->w.d, sizeof(jit_float64_t), 8);
+                   node->flag |= jit_flag_node | jit_flag_data;
+               }
+#endif
+               if (_jitc->function) {
+                   if ((mask & (jit_cc_a0_reg|jit_cc_a0_chg)) ==
+                       (jit_cc_a0_reg|jit_cc_a0_chg)) {
+                       if (mask & jit_cc_a0_rlh) {
+                           jit_regset_setbit(&_jitc->function->regset,
+                                             jit_regno(node->u.q.l));
+                           jit_regset_setbit(&_jitc->function->regset,
+                                             jit_regno(node->u.q.h));
+                       }
+                       else
+                           jit_regset_setbit(&_jitc->function->regset,
+                                             jit_regno(node->u.w));
+                   }
+                   if ((mask & (jit_cc_a1_reg|jit_cc_a1_chg)) ==
+                       (jit_cc_a1_reg|jit_cc_a1_chg))
+                       jit_regset_setbit(&_jitc->function->regset,
+                                         jit_regno(node->v.w));
+                   if ((mask & (jit_cc_a2_reg|jit_cc_a2_chg)) ==
+                       (jit_cc_a2_reg|jit_cc_a2_chg))
+                       jit_regset_setbit(&_jitc->function->regset,
+                                         jit_regno(node->w.w));
+               }
+               break;
+       }
+    }
+}
+
+void
+_jit_reglive(jit_state_t *_jit, jit_node_t *node)
+{
+    jit_int32_t                 spec;
+    jit_int32_t                 value;
+    jit_block_t                *block;
+
+    switch (node->code) {
+       case jit_code_label:    case jit_code_prolog:   case jit_code_epilog:
+           block = _jitc->blocks.ptr + node->v.w;
+           jit_regset_set(&_jitc->reglive, &block->reglive);
+           break;
+       case jit_code_callr:
+           value = jit_regno(node->u.w);
+           if (!(node->u.w & jit_regno_patch)) {
+               jit_regset_setbit(&_jitc->reglive, value);
+           }
+       case jit_code_calli:
+           for (value = 0; value < _jitc->reglen; value++) {
+               spec = jit_class(_rvs[value].spec);
+               if ((spec & jit_class_arg) && jit_regarg_p(node, value))
+                   jit_regset_setbit(&_jitc->reglive, value);
+               else if (!(spec & jit_class_sav))
+                   jit_regset_clrbit(&_jitc->reglive, value);
+           }
+           break;
+       default:
+           value = jit_classify(node->code);
+           if (value & jit_cc_a0_reg) {
+               if (value & jit_cc_a0_rlh) {
+                   if (!(node->u.q.l & jit_regno_patch)) {
+                       if (value & jit_cc_a0_chg) {
+                           jit_regset_clrbit(&_jitc->reglive, node->u.q.l);
+                           jit_regset_setbit(&_jitc->regmask, node->u.q.l);
+                       }
+                       else
+                           jit_regset_setbit(&_jitc->reglive, node->u.q.l);
+                   }
+                   if (!(node->u.q.h & jit_regno_patch)) {
+                       if (value & jit_cc_a0_chg) {
+                           jit_regset_clrbit(&_jitc->reglive, node->u.q.h);
+                           jit_regset_setbit(&_jitc->regmask, node->u.q.h);
+                       }
+                       else
+                           jit_regset_setbit(&_jitc->reglive, node->u.q.h);
+                   }
+               }
+               else {
+                   if (!(node->u.w & jit_regno_patch)) {
+                       if (value & jit_cc_a0_chg) {
+                           jit_regset_clrbit(&_jitc->reglive, node->u.w);
+                           jit_regset_setbit(&_jitc->regmask, node->u.w);
+                       }
+                       else
+                           jit_regset_setbit(&_jitc->reglive, node->u.w);
+                   }
+               }
+           }
+           if ((value & jit_cc_a1_reg) && !(node->v.w & jit_regno_patch)) {
+               if (value & jit_cc_a1_chg) {
+                   jit_regset_clrbit(&_jitc->reglive, node->v.w);
+                   jit_regset_setbit(&_jitc->regmask, node->v.w);
+               }
+               else
+                   jit_regset_setbit(&_jitc->reglive, node->v.w);
+           }
+           if ((value & jit_cc_a2_reg) && !(node->w.w & jit_regno_patch)) {
+               if (value & jit_cc_a2_chg) {
+                   jit_regset_clrbit(&_jitc->reglive, node->w.w);
+                   jit_regset_setbit(&_jitc->regmask, node->w.w);
+               }
+               else
+                   jit_regset_setbit(&_jitc->reglive, node->w.w);
+           }
+           if (jit_regset_set_p(&_jitc->regmask)) {
+               jit_update(node->next, &_jitc->reglive, &_jitc->regmask);
+               if (jit_regset_set_p(&_jitc->regmask)) {
+                   /* any unresolved live state is considered as live */
+                   jit_regset_ior(&_jitc->reglive,
+                                  &_jitc->reglive, &_jitc->regmask);
+                   jit_regset_set_ui(&_jitc->regmask, 0);
+               }
+           }
+           break;
+    }
+}
+
+void
+_jit_regarg_set(jit_state_t *_jit, jit_node_t *node, jit_int32_t value)
+{
+#if GET_JIT_SIZE
+    jit_size_prepare();
+#endif
+    if (value & jit_cc_a0_reg) {
+       if (value & jit_cc_a0_rlh) {
+           jit_regset_setbit(&_jitc->regarg, jit_regno(node->u.q.l));
+           jit_regset_setbit(&_jitc->regarg, jit_regno(node->u.q.h));
+       }
+       else
+           jit_regset_setbit(&_jitc->regarg, jit_regno(node->u.w));
+    }
+    if (value & jit_cc_a1_reg)
+       jit_regset_setbit(&_jitc->regarg, jit_regno(node->v.w));
+    if (value & jit_cc_a2_reg)
+       jit_regset_setbit(&_jitc->regarg, jit_regno(node->w.w));
+}
+
+void
+_jit_regarg_clr(jit_state_t *_jit, jit_node_t *node, jit_int32_t value)
+{
+#if GET_JIT_SIZE
+    jit_size_collect(node);
+#endif
+    if (value & jit_cc_a0_reg) {
+       if (value & jit_cc_a0_rlh) {
+           jit_regset_clrbit(&_jitc->regarg, jit_regno(node->u.q.l));
+           jit_regset_clrbit(&_jitc->regarg, jit_regno(node->u.q.h));
+       }
+       else
+           jit_regset_clrbit(&_jitc->regarg, jit_regno(node->u.w));
+    }
+    if (value & jit_cc_a1_reg)
+       jit_regset_clrbit(&_jitc->regarg, jit_regno(node->v.w));
+    if (value & jit_cc_a2_reg)
+       jit_regset_clrbit(&_jitc->regarg, jit_regno(node->w.w));
+}
+
+void
+_jit_realize(jit_state_t *_jit)
+{
+    assert(!_jitc->realize);
+    if (_jitc->function)
+       jit_epilog();
+    jit_optimize();
+    _jitc->realize = 1;
+
+    /* ensure it is aligned */
+    _jitc->data.offset = (_jitc->data.offset + 7) & -8;
+
+#if GET_JIT_SIZE
+    /* Heuristic to guess code buffer size */
+    _jitc->mult = 4;
+    _jit->code.length = _jitc->pool.length * 1024 * _jitc->mult;
+#else
+    _jit->code.length = jit_get_size();
+#endif
+}
+
+void
+_jit_dataset(jit_state_t *_jit)
+{
+    jit_uint8_t                *ptr;
+    jit_node_t         *node;
+    jit_word_t          offset;
+#if defined(__sgi)
+    int                         mmap_fd;
+#endif
+
+    assert(!_jitc->dataset);
+    if (!_jit->user_data) {
+
+       /* create read only data buffer */
+       _jit->data.length = (_jitc->data.offset +
+                            /* reserve space for annotations */
+                            _jitc->note.size + 4095) & -4096;
+#if defined(__sgi)
+       mmap_fd = open("/dev/zero", O_RDWR);
+#endif
+       _jit->data.ptr = mmap(NULL, _jit->data.length,
+                             PROT_READ | PROT_WRITE,
+                             MAP_PRIVATE | MAP_ANON, mmap_fd, 0);
+       assert(_jit->data.ptr != MAP_FAILED);
+#if defined(__sgi)
+       close(mmap_fd);
+#endif
+    }
+
+    if (!_jitc->no_data)
+       jit_memcpy(_jit->data.ptr, _jitc->data.ptr, _jitc->data.offset);
+
+    if (_jitc->no_note) {
+       /* Space for one note is always allocated, so revert it here
+        * if after jit_new_state was called, it is also requested to
+        * not generate annotation information */
+       _jit->note.length = 0;
+       _jitc->note.size = 0;
+    }
+    else {
+       _jitc->note.base = _jit->data.ptr;
+       if (!_jitc->no_data)
+           _jitc->note.base += _jitc->data.offset;
+       memset(_jitc->note.base, 0, _jitc->note.size);
+    }
+
+    if (_jit->user_data)
+       /* Need the temporary hashed data until jit_emit is finished */
+       ptr = _jitc->no_data ? _jitc->data.ptr : _jit->data.ptr;
+    else {
+       ptr = _jit->data.ptr;
+       /* Temporary hashed data no longer required */
+       jit_free((jit_pointer_t *)&_jitc->data.ptr);
+    }
+
+    for (offset = 0; offset < _jitc->data.size; offset++) {
+       for (node = _jitc->data.table[offset]; node; node = node->next) {
+           node->flag |= jit_flag_patch;
+           node->u.w = (jit_word_t)(ptr + node->u.w);
+       }
+    }
+
+    _jitc->dataset = 1;
+}
+
+jit_pointer_t
+_jit_get_code(jit_state_t *_jit, jit_word_t *length)
+{
+    assert(_jitc->realize);
+    if (length) {
+       if (_jitc->done)
+           /* If code already generated, return exact size of code */
+           *length = _jit->pc.uc - _jit->code.ptr;
+       else
+           /* Else return current size of the code buffer */
+           *length = _jit->code.length;
+    }
+
+    return (_jit->code.ptr);
+}
+
+void
+_jit_set_code(jit_state_t *_jit, jit_pointer_t ptr, jit_word_t length)
+{
+    assert(_jitc->realize);
+    _jit->code.ptr = ptr;
+    _jit->code.length = length;
+    _jit->user_code = 1;
+}
+
+jit_pointer_t
+_jit_get_data(jit_state_t *_jit, jit_word_t *data_size, jit_word_t *note_size)
+{
+    assert(_jitc->realize);
+    if (data_size)
+       *data_size = _jitc->data.offset;
+    if (note_size)
+       *note_size = _jitc->note.size;
+    return (_jit->data.ptr);
+}
+
+void
+_jit_set_data(jit_state_t *_jit, jit_pointer_t ptr,
+             jit_word_t length, jit_word_t flags)
+{
+    assert(_jitc->realize);
+    if (flags & JIT_DISABLE_DATA)
+       _jitc->no_data = 1;
+    else
+       assert(length >= _jitc->data.offset);
+    if (flags & JIT_DISABLE_NOTE)
+       _jitc->no_note = 1;
+    else {
+       if (flags & JIT_DISABLE_DATA)
+           assert(length >= _jitc->note.size);
+       else
+           assert(length >= _jitc->data.offset + _jitc->note.size);
+    }
+    _jit->data.ptr = ptr;
+    _jit->data.length = length;
+    _jit->user_data = 1;
+}
+
+jit_pointer_t
+_jit_emit(jit_state_t *_jit)
+{
+    jit_pointer_t       code;
+    jit_node_t         *node;
+    size_t              length;
+    int                         result;
+#if defined(__sgi)
+    int                         mmap_fd;
+#endif
+
+    if (!_jitc->realize)
+       jit_realize();
+
+    if (!_jitc->dataset)
+       jit_dataset();
+
+    _jitc->emit = 1;
+
+    if (!_jit->user_code) {
+#if defined(__sgi)
+       mmap_fd = open("/dev/zero", O_RDWR);
+#endif
+       _jit->code.ptr = mmap(NULL, _jit->code.length,
+                             PROT_EXEC | PROT_READ | PROT_WRITE,
+                             MAP_PRIVATE | MAP_ANON, mmap_fd, 0);
+       assert(_jit->code.ptr != MAP_FAILED);
+    }
+    _jitc->code.end = _jit->code.ptr + _jit->code.length -
+       jit_get_max_instr();
+    _jit->pc.uc = _jit->code.ptr;
+
+    for (;;) {
+       if ((code = emit_code()) == NULL) {
+           _jitc->patches.offset = 0;
+           for (node = _jitc->head; node; node = node->next) {
+               if (node->link &&
+                   (node->code == jit_code_label ||
+                    node->code == jit_code_epilog))
+                   node->flag &= ~jit_flag_patch;
+           }
+           if (_jit->user_code)
+               goto fail;
+#if GET_JIT_SIZE
+           ++_jitc->mult;
+           length = _jitc->pool.length * 1024 * _jitc->mult;
+#else
+           /* Should only happen on very special cases */
+           length = _jit->code.length + 4096;
+#endif
+
+#if !HAVE_MREMAP
+           munmap(_jit->code.ptr, _jit->code.length);
+#endif
+
+#if HAVE_MREMAP
+#  if __NetBSD__
+           _jit->code.ptr = mremap(_jit->code.ptr, _jit->code.length,
+                                   _jit->code.ptr, length, 0);
+#  else
+           _jit->code.ptr = mremap(_jit->code.ptr, _jit->code.length,
+                                   length, MREMAP_MAYMOVE, NULL);
+#  endif
+#else
+           _jit->code.ptr = mmap(NULL, length,
+                                 PROT_EXEC | PROT_READ | PROT_WRITE,
+                                 MAP_PRIVATE | MAP_ANON, mmap_fd, 0);
+#endif
+
+           assert(_jit->code.ptr != MAP_FAILED);
+           _jit->code.length = length;
+           _jitc->code.end = _jit->code.ptr + _jit->code.length -
+               jit_get_max_instr();
+           _jit->pc.uc = _jit->code.ptr;
+       }
+       else
+           break;
+    }
+
+#if defined(__sgi)
+    if (!_jit->user_code)
+       close(mmap_fd);
+#endif
+
+    _jitc->done = 1;
+    if (!_jitc->no_note)
+       jit_annotate();
+
+    if (_jit->user_data)
+       jit_free((jit_pointer_t *)&_jitc->data.ptr);
+    else {
+       result = mprotect(_jit->data.ptr, _jit->data.length, PROT_READ);
+       assert(result == 0);
+    }
+    if (!_jit->user_code) {
+       result = mprotect(_jit->code.ptr, _jit->code.length,
+                         PROT_READ | PROT_EXEC);
+       assert(result == 0);
+    }
+
+    return (_jit->code.ptr);
+fail:
+    return (NULL);
+}
+
+void
+_jit_frame(jit_state_t *_jit, jit_int32_t frame)
+{
+    jit_trampoline(frame, 1);
+}
+
+void
+_jit_tramp(jit_state_t *_jit, jit_int32_t frame)
+{
+    jit_trampoline(frame, 0);
+}
+
+void
+_jit_trampoline(jit_state_t *_jit, jit_int32_t frame, jit_bool_t prolog)
+{
+    jit_int32_t                regno;
+
+    /* Must be called after prolog, actually, just to simplify
+     * tests and know there is a current function and that
+     * _jitc->function->self.aoff is at the before any alloca value */
+    assert(_jitc->tail && _jitc->tail->code == jit_code_prolog);
+
+    /* + 24 for 3 possible spilled temporaries (that could be a double) */
+    frame += 24;
+#if defined(__hppa__)
+    frame += _jitc->function->self.aoff;
+#else
+    frame -= _jitc->function->self.aoff;
+#endif
+    _jitc->function->frame = frame;
+    if (prolog)
+       _jitc->function->define_frame = 1;
+    else
+       _jitc->function->assume_frame = 1;
+    for (regno = 0; regno < _jitc->reglen; regno++)
+       if (jit_class(_rvs[regno].spec) & jit_class_sav)
+           jit_regset_setbit(&_jitc->function->regset, regno);
+}
+
+/*   Compute initial reglive and regmask set values of a basic block.
+ * reglive is the set of known live registers
+ * regmask is the set of registers not referenced in the block
+ *   Registers in regmask might be live.
+ */
+static void
+_jit_setup(jit_state_t *_jit, jit_block_t *block)
+{
+    jit_node_t         *node;
+    jit_bool_t          live;
+    unsigned long       value;
+
+    jit_regset_set_mask(&block->regmask, _jitc->reglen);
+    for (value = 0; value < _jitc->reglen; ++value)
+       if (!(jit_class(_rvs[value].spec) & (jit_class_gpr|jit_class_fpr)))
+           jit_regset_clrbit(&block->regmask, value);
+
+    for (node = block->label->next; node; node = node->next) {
+       switch (node->code) {
+           case jit_code_label:        case jit_code_prolog:
+           case jit_code_epilog:
+               return;
+           default:
+               /* Check argument registers in reverse order to properly
+                * handle registers that are both, argument and result */
+               value = jit_classify(node->code);
+               if ((value & jit_cc_a2_reg) &&
+                   !(node->w.w & jit_regno_patch) &&
+                   jit_regset_tstbit(&block->regmask, node->w.w)) {
+                   live = !(value & jit_cc_a2_chg);
+                   jit_regset_clrbit(&block->regmask, node->w.w);
+                   if (live)
+                       jit_regset_setbit(&block->reglive, node->w.w);
+               }
+               if ((value & jit_cc_a1_reg) &&
+                   !(node->v.w & jit_regno_patch) &&
+                   jit_regset_tstbit(&block->regmask, node->v.w)) {
+                   live = !(value & jit_cc_a1_chg);
+                   jit_regset_clrbit(&block->regmask, node->v.w);
+                   if (live)
+                       jit_regset_setbit(&block->reglive, node->v.w);
+               }
+               if (value & jit_cc_a0_reg) {
+                   live = !(value & jit_cc_a0_chg);
+                   if (value & jit_cc_a0_rlh) {
+                       if (!(node->u.q.l & jit_regno_patch) &&
+                           jit_regset_tstbit(&block->regmask, node->u.q.l)) {
+                           jit_regset_clrbit(&block->regmask, node->u.q.l);
+                           if (live)
+                               jit_regset_setbit(&block->reglive, node->u.q.l);
+                       }
+                       if (!(node->u.q.h & jit_regno_patch) &&
+                           jit_regset_tstbit(&block->regmask, node->u.q.h)) {
+                           jit_regset_clrbit(&block->regmask, node->u.q.h);
+                           if (live)
+                               jit_regset_setbit(&block->reglive, node->u.q.h);
+                       }
+                   }
+                   else {
+                       if (!(node->u.w & jit_regno_patch) &&
+                           jit_regset_tstbit(&block->regmask, node->u.w)) {
+                           jit_regset_clrbit(&block->regmask, node->u.w);
+                           if (live)
+                               jit_regset_setbit(&block->reglive, node->u.w);
+                       }
+                   }
+               }
+               break;
+       }
+    }
+}
+
+/*  Update regmask and reglive of blocks at entry point of branch targets
+ * or normal flow that have a live register not used in this block.
+ */
+static void
+_jit_follow(jit_state_t *_jit, jit_block_t *block, jit_bool_t *todo)
+{
+    jit_node_t         *node;
+    jit_block_t                *next;
+    jit_int32_t                 spec;
+    jit_int32_t                 regno;
+    unsigned long       value;
+    jit_node_t         *label;
+    jit_regset_t        reglive;
+    jit_regset_t        regmask;
+    jit_regset_t        regtemp;
+
+    jit_regset_set(&reglive, &block->reglive);
+    jit_regset_set(&regmask, &block->regmask);
+    for (node = block->label->next; node; node = node->next) {
+       switch (node->code) {
+           case jit_code_label:
+               /*  Do not consider jmpi and jmpr cannot jump to the
+                * next instruction. */
+               next = _jitc->blocks.ptr + node->v.w;
+               /*  Set of live registers in next block that are at unknown
+                * state in this block. */
+               jit_regset_and(&regtemp, &regmask, &next->reglive);
+               if (jit_regset_set_p(&regtemp)) {
+                   /*  Add live state of next block to current block. */
+                   jit_regset_ior(&block->reglive, &block->reglive, &regtemp);
+                   /*  Remove from unknown state bitmask. */
+                   jit_regset_com(&regtemp, &regtemp);
+                   jit_regset_and(&block->regmask, &block->regmask, &regtemp);
+                   *todo = 1;
+               }
+           case jit_code_prolog:
+           case jit_code_epilog:
+               return;
+           case jit_code_callr:
+               value = jit_regno(node->u.w);
+               if (!(node->u.w & jit_regno_patch)) {
+                   if (jit_regset_tstbit(&regmask, value)) {
+                       jit_regset_clrbit(&regmask, value);
+                       jit_regset_setbit(&reglive, value);
+                   }
+               }
+           case jit_code_calli:
+               for (value = 0; value < _jitc->reglen; ++value) {
+                   value = jit_regset_scan1(&regmask, value);
+                   if (value >= _jitc->reglen)
+                       break;
+                   spec = jit_class(_rvs[value].spec);
+                   if (!(spec & jit_class_sav))
+                       jit_regset_clrbit(&regmask, value);
+                   if ((spec & jit_class_arg) && jit_regarg_p(node, value))
+                       jit_regset_setbit(&reglive, value);
+               }
+               break;
+           default:
+               value = jit_classify(node->code);
+               if (value & jit_cc_a2_reg) {
+                   if (!(node->w.w & jit_regno_patch)) {
+                       if (jit_regset_tstbit(&regmask, node->w.w)) {
+                           jit_regset_clrbit(&regmask, node->w.w);
+                           if (!(value & jit_cc_a2_chg))
+                               jit_regset_setbit(&reglive, node->w.w);
+                       }
+                   }
+               }
+               if (value & jit_cc_a1_reg) {
+                   if (!(node->v.w & jit_regno_patch)) {
+                       if (jit_regset_tstbit(&regmask, node->v.w)) {
+                           jit_regset_clrbit(&regmask, node->v.w);
+                           if (!(value & jit_cc_a1_chg))
+                               jit_regset_setbit(&reglive, node->v.w);
+                       }
+                   }
+               }
+               if (value & jit_cc_a0_reg) {
+                   if (value & jit_cc_a0_rlh) {
+                       if (!(node->u.q.l & jit_regno_patch)) {
+                           if (jit_regset_tstbit(&regmask, node->u.q.l)) {
+                               jit_regset_clrbit(&regmask, node->u.q.l);
+                               if (!(value & jit_cc_a0_chg))
+                                   jit_regset_setbit(&reglive, node->u.q.l);
+                           }
+                       }
+                       if (!(node->u.q.h & jit_regno_patch)) {
+                           if (jit_regset_tstbit(&regmask, node->u.q.h)) {
+                               jit_regset_clrbit(&regmask, node->u.q.h);
+                               if (!(value & jit_cc_a0_chg))
+                                   jit_regset_setbit(&reglive, node->u.q.h);
+                           }
+                       }
+                   }
+                   else {
+                       if (!(node->u.w & jit_regno_patch)) {
+                           if (jit_regset_tstbit(&regmask, node->u.w)) {
+                               jit_regset_clrbit(&regmask, node->u.w);
+                               if (!(value & jit_cc_a0_chg))
+                                   jit_regset_setbit(&reglive, node->u.w);
+                           }
+                       }
+                   }
+               }
+               if (value & jit_cc_a0_jmp) {
+                   if (node->flag & jit_flag_node) {
+                       label = node->u.n;
+                       /*  Do not consider jmpi and jmpr cannot jump to the
+                        * next instruction. */
+                       next = _jitc->blocks.ptr + label->v.w;
+                       jit_regset_and(&regtemp, &regmask, &next->reglive);
+                       if (jit_regset_set_p(&regtemp)) {
+                           /* Add live state. */
+                           jit_regset_ior(&block->reglive,
+                                          &block->reglive, &regtemp);
+                           /*  Remove from unknown state bitmask. */
+                           jit_regset_com(&regtemp, &regtemp);
+                           jit_regset_and(&block->regmask,
+                                          &block->regmask, &regtemp);
+                           *todo = 1;
+                       }
+                   }
+                   else {
+                       /*   Jump to unknown location.
+                        *   This is a pitfall of the implementation.
+                        *   Only jmpi to not a jit code should reach here,
+                        * or a jmpr of a computed address.
+                        *   Because the implementation needs jit_class_nospill
+                        * registers, must treat jmpr as a function call. This
+                        * means that only JIT_Vn registers can be trusted on
+                        * arrival of jmpr.
+                        */
+                       for (regno = 0; regno < _jitc->reglen; regno++) {
+                           spec = jit_class(_rvs[regno].spec);
+                           if (jit_regset_tstbit(&regmask, regno) &&
+                               (spec & (jit_class_gpr|jit_class_fpr)) &&
+                               !(spec & jit_class_sav))
+                               jit_regset_clrbit(&regmask, regno);
+                       }
+                       /*   Assume non callee save registers are live due
+                        * to jump to unknown location. */
+                       /* Treat all callee save as live. */
+                       jit_regset_ior(&reglive, &reglive, &regmask);
+                       /* Treat anything else as dead. */
+                       jit_regset_set_ui(&regmask, 0);
+                   }
+               }
+               break;
+       }
+    }
+}
+
+/*  Follow code generation up to finding a label or end of code.
+ *  When finding a label, update the set of live registers.
+ *  On branches, update based on taken branch or normal flow.
+ */
+static void
+_jit_update(jit_state_t *_jit, jit_node_t *node,
+           jit_regset_t *live, jit_regset_t *mask)
+{
+    jit_int32_t                 spec;
+    jit_int32_t                 regno;
+    unsigned long       value;
+    jit_block_t                *block;
+    jit_node_t         *label;
+    jit_regset_t        regtemp;
+
+    for (; node; node = node->next) {
+       if (jit_regset_set_p(mask) == 0)
+           break;
+       switch (node->code) {
+           case jit_code_label:
+               block = _jitc->blocks.ptr + node->v.w;
+               jit_regset_and(&regtemp, mask, &block->reglive);
+               if (jit_regset_set_p(&regtemp)) {
+                   /* Add live state. */
+                   jit_regset_ior(live, live, &regtemp);
+                   /*  Remove from unknown state bitmask. */
+                   jit_regset_com(&regtemp, &regtemp);
+                   jit_regset_and(mask, mask, &regtemp);
+               }
+               return;
+           case jit_code_prolog:
+               jit_regset_set_ui(mask, 0);
+               return;
+           case jit_code_epilog:
+               jit_regset_set_ui(mask, 0);
+               return;
+           case jit_code_callr:
+               value = jit_regno(node->u.w);
+               if (!(node->u.w & jit_regno_patch)) {
+                   if (jit_regset_tstbit(mask, value)) {
+                       jit_regset_clrbit(mask, value);
+                       jit_regset_setbit(live, value);
+                   }
+               }
+           case jit_code_calli:
+               for (value = 0; value < _jitc->reglen; ++value) {
+                   value = jit_regset_scan1(mask, value);
+                   if (value >= _jitc->reglen)
+                       break;
+                   spec = jit_class(_rvs[value].spec);
+                   if (!(spec & jit_class_sav))
+                       jit_regset_clrbit(mask, value);
+                   if ((spec & jit_class_arg) && jit_regarg_p(node, value))
+                       jit_regset_setbit(live, value);
+               }
+               break;
+           default:
+               value = jit_classify(node->code);
+               if (value & jit_cc_a2_reg) {
+                   if (!(node->w.w & jit_regno_patch)) {
+                       if (jit_regset_tstbit(mask, node->w.w)) {
+                           jit_regset_clrbit(mask, node->w.w);
+                           if (!(value & jit_cc_a2_chg))
+                               jit_regset_setbit(live, node->w.w);
+                       }
+                   }
+               }
+               if (value & jit_cc_a1_reg) {
+                   if (!(node->v.w & jit_regno_patch)) {
+                       if (jit_regset_tstbit(mask, node->v.w)) {
+                           jit_regset_clrbit(mask, node->v.w);
+                           if (!(value & jit_cc_a1_chg))
+                               jit_regset_setbit(live, node->v.w);
+                       }
+                   }
+               }
+               if (value & jit_cc_a0_reg) {
+                   if (value & jit_cc_a0_rlh) {
+                       if (!(node->u.q.l & jit_regno_patch)) {
+                           if (jit_regset_tstbit(mask, node->u.q.l)) {
+                               jit_regset_clrbit(mask, node->u.q.l);
+                               if (!(value & jit_cc_a0_chg))
+                                   jit_regset_setbit(live, node->u.q.l);
+                           }
+                       }
+                       if (!(node->u.q.h & jit_regno_patch)) {
+                           if (jit_regset_tstbit(mask, node->u.q.h)) {
+                               jit_regset_clrbit(mask, node->u.q.h);
+                               if (!(value & jit_cc_a0_chg))
+                                   jit_regset_setbit(live, node->u.q.h);
+                           }
+                       }
+                   }
+                   else {
+                       if (!(node->u.w & jit_regno_patch)) {
+                           if (jit_regset_tstbit(mask, node->u.w)) {
+                               jit_regset_clrbit(mask, node->u.w);
+                               if (!(value & jit_cc_a0_chg))
+                                   jit_regset_setbit(live, node->u.w);
+                           }
+                       }
+                   }
+               }
+               if (value & jit_cc_a0_jmp) {
+                   if (node->flag & jit_flag_node) {
+                       label = node->u.n;
+                       /*  Do not consider jmpi and jmpr cannot jump to the
+                        * next instruction. */
+                       block = _jitc->blocks.ptr + label->v.w;
+                       jit_regset_and(&regtemp, mask, &block->reglive);
+                       if (jit_regset_set_p(&regtemp)) {
+                           /* Add live state. */
+                           jit_regset_ior(live, live, &regtemp);
+                           /*  Remove from unknown state bitmask. */
+                           jit_regset_com(&regtemp, &regtemp);
+                           jit_regset_and(mask, mask, &regtemp);
+                       }
+                   }
+                   else {
+                       /*   Jump to unknown location.
+                        *   This is a pitfall of the implementation.
+                        *   Only jmpi to not a jit code should reach here,
+                        * or a jmpr of a computed address.
+                        *   Because the implementation needs jit_class_nospill
+                        * registers, must treat jmpr as a function call. This
+                        * means that only JIT_Vn registers can be trusted on
+                        * arrival of jmpr.
+                        */
+                       for (regno = 0; regno < _jitc->reglen; regno++) {
+                           spec = jit_class(_rvs[regno].spec);
+                           if (jit_regset_tstbit(mask, regno) &&
+                               (spec & (jit_class_gpr|jit_class_fpr)) &&
+                               !(spec & jit_class_sav))
+                               jit_regset_clrbit(mask, regno);
+                       }
+                       /*   Assume non callee save registers are live due
+                        * to jump to unknown location. */
+                       /* Treat all callee save as live. */
+                       jit_regset_ior(live, live, mask);
+                       /* Treat anything else as dead. */
+                       jit_regset_set_ui(mask, 0);
+                   }
+               }
+               break;
+       }
+    }
+}
+
+static void
+_thread_jumps(jit_state_t *_jit)
+{
+    jit_node_t         *prev;
+    jit_node_t         *node;
+    jit_node_t         *next;
+    jit_int32_t                 mask;
+
+    for (prev = node = _jitc->head; node;) {
+       next = node->next;
+       switch (node->code) {
+           case jit_code_jmpi:
+               if (redundant_jump(prev, node)) {
+                   node = prev;
+                   continue;
+               }
+               if (shortcut_jump(prev, node))
+                   continue;
+               break;
+           case jit_code_jmpr:
+           case jit_code_callr:        case jit_code_calli:
+               /* non optimizable jump like code */
+               break;
+           default:
+               mask = jit_classify(node->code);
+               if (mask & jit_cc_a0_jmp) {
+                   if (reverse_jump(prev, node) ||
+                       shortcut_jump(prev, node))
+                       continue;
+               }
+               break;
+       }
+       prev = node;
+       node = next;
+    }
+}
+
+static void
+_sequential_labels(jit_state_t *_jit)
+{
+    jit_node_t         *jump;
+    jit_node_t         *link;
+    jit_node_t         *prev;
+    jit_node_t         *next;
+    jit_node_t         *node;
+
+    for (prev = node = _jitc->head; node; node = next) {
+       next = node->next;
+       if (node->code == jit_code_label) {
+           if (!node->flag) {
+               if (!node->link) {
+                   del_label(prev, node);
+                   continue;
+               }
+               if (prev != node && prev->code == jit_code_label) {
+                   if ((jump = node->link)) {
+                       for (; jump; jump = link) {
+                           link = jump->link;
+                           jump->u.n = prev;
+                           jump->link = prev->link;
+                           prev->link = jump;
+                       }
+                       node->link = NULL;
+                   }
+                   del_label(prev, node);
+                   continue;
+               }
+           }
+           if (next && next->code == jit_code_label && !next->flag) {
+               if ((jump = next->link)) {
+                   for (; jump; jump = link) {
+                       link = jump->link;
+                       jump->u.n = node;
+                       jump->link = node->link;
+                       node->link = jump;
+                   }
+                   next->link = NULL;
+               }
+               del_label(node, next);
+               next = node->next;
+               continue;
+           }
+       }
+       prev = node;
+    }
+}
+
+static void
+_split_branches(jit_state_t *_jit)
+{
+    jit_node_t         *node;
+    jit_node_t         *next;
+    jit_node_t         *label;
+    jit_block_t                *block;
+
+    for (node = _jitc->head; node; node = next) {
+       if ((next = node->next)) {
+           if (next->code == jit_code_label ||
+               next->code == jit_code_prolog ||
+               next->code == jit_code_epilog)
+               continue;
+           /* split block on branches */
+           if (jit_classify(node->code) & jit_cc_a0_jmp) {
+               label = new_node(jit_code_label);
+               label->next = next;
+               node->next = label;
+               if (_jitc->blocks.offset >= _jitc->blocks.length) {
+                   jit_word_t    length;
+
+                   length = _jitc->blocks.length + 16;
+                   jit_realloc((jit_pointer_t *)&_jitc->blocks.ptr,
+                               _jitc->blocks.length * sizeof(jit_block_t),
+                               length * sizeof(jit_block_t));
+                   _jitc->blocks.length = length;
+               }
+               block = _jitc->blocks.ptr + _jitc->blocks.offset;
+               block->label = label;
+               label->v.w = _jitc->blocks.offset;
+               jit_regset_new(&block->reglive);
+               jit_regset_new(&block->regmask);
+               ++_jitc->blocks.offset;
+           }
+       }
+    }
+}
+
+static jit_bool_t
+_shortcut_jump(jit_state_t *_jit, jit_node_t *prev, jit_node_t *node)
+{
+    jit_bool_t          cond;
+    jit_node_t         *jump;
+    jit_node_t         *next;
+    jit_node_t         *temp;
+
+    if (!(node->flag & jit_flag_node))
+       return (0);
+    assert(node->code != jit_code_jmpr);
+    cond = node->code != jit_code_jmpi;
+    jump = node->u.n;
+    for (next = jump->next; next; next = next->next) {
+       switch (next->code) {
+           case jit_code_jmpi:
+               if (!(next->flag & jit_flag_node))
+                   return (0);
+               if (jump->link == node)
+                   jump->link = node->link;
+               else {
+                   for (temp = jump->link;
+                        temp->link != node;
+                        temp = temp->link)
+                       assert(temp != NULL);
+                   temp->link = node->link;
+               }
+               jump = next->u.n;
+               node->u.n = jump;
+               node->link = jump->link;
+               jump->link = node;
+               return (1);
+           case jit_code_jmpr:
+               if (cond)
+                   return (0);
+               node->code = jit_code_jmpr;
+               node->u.w = next->u.w;
+               node->link = NULL;
+               node->flag &= ~jit_flag_node;
+               return (1);
+           case jit_code_note:         case jit_code_label:
+               break;
+           default:
+               return (0);
+       }
+    }
+    return (0);
+}
+
+static jit_bool_t
+_redundant_jump(jit_state_t *_jit, jit_node_t *prev, jit_node_t *node)
+{
+    jit_node_t         *local_prev;
+    jit_node_t         *local_next;
+
+    if (!(node->flag & jit_flag_node))
+       return (0);
+    for (local_prev = node, local_next = node->next;
+        local_next;
+        local_prev = local_next, local_next = local_next->next) {
+
+       switch (local_next->code) {
+           case jit_code_label:        case jit_code_epilog:
+               if (node->u.n == local_next) {
+                   if (local_next->link == node)
+                       local_next->link = node->link;
+                   else {
+                       for (local_prev = local_next->link;
+                            local_prev->link != node;
+                            local_prev = local_prev->link)
+                           assert(local_prev != NULL);
+                       local_prev->link = node->link;
+                   }
+                   del_node(prev, node);
+                   return (1);
+               }
+               break;
+           case jit_code_name:         case jit_code_note:
+           case jit_code_align:
+               break;
+           default:
+               return (0);
+       }
+    }
+    return (0);
+}
+
+static jit_code_t
+reverse_jump_code(jit_code_t code)
+{
+    switch (code) {
+       case jit_code_bltr:     return (jit_code_bger);
+       case jit_code_blti:     return (jit_code_bgei);
+       case jit_code_bltr_u:   return (jit_code_bger_u);
+       case jit_code_blti_u:   return (jit_code_bgei_u);
+       case jit_code_bler:     return (jit_code_bgtr);
+       case jit_code_blei:     return (jit_code_bgti);
+       case jit_code_bler_u:   return (jit_code_bgtr_u);
+       case jit_code_blei_u:   return (jit_code_bgti_u);
+       case jit_code_beqr:     return (jit_code_bner);
+       case jit_code_beqi:     return (jit_code_bnei);
+       case jit_code_bger:     return (jit_code_bltr);
+       case jit_code_bgei:     return (jit_code_blti);
+       case jit_code_bger_u:   return (jit_code_bltr_u);
+       case jit_code_bgei_u:   return (jit_code_blti_u);
+       case jit_code_bgtr:     return (jit_code_bler);
+       case jit_code_bgti:     return (jit_code_blei);
+       case jit_code_bgtr_u:   return (jit_code_bler_u);
+       case jit_code_bgti_u:   return (jit_code_blei_u);
+       case jit_code_bner:     return (jit_code_beqr);
+       case jit_code_bnei:     return (jit_code_beqi);
+       case jit_code_bmsr:     return (jit_code_bmcr);
+       case jit_code_bmsi:     return (jit_code_bmci);
+       case jit_code_bmcr:     return (jit_code_bmsr);
+       case jit_code_bmci:     return (jit_code_bmsi);
+       case jit_code_bltr_f:   return (jit_code_bunger_f);
+       case jit_code_blti_f:   return (jit_code_bungei_f);
+       case jit_code_bler_f:   return (jit_code_bungtr_f);
+       case jit_code_blei_f:   return (jit_code_bungti_f);
+
+       case jit_code_beqr_f:   return (jit_code_bner_f);
+       case jit_code_beqi_f:   return (jit_code_bnei_f);
+
+       case jit_code_bger_f:   return (jit_code_bunltr_f);
+       case jit_code_bgei_f:   return (jit_code_bunlti_f);
+       case jit_code_bgtr_f:   return (jit_code_bunler_f);
+       case jit_code_bgti_f:   return (jit_code_bunlei_f);
+
+       case jit_code_bner_f:   return (jit_code_beqr_f);
+       case jit_code_bnei_f:   return (jit_code_beqr_f);
+
+       case jit_code_bunltr_f: return (jit_code_bger_f);
+       case jit_code_bunlti_f: return (jit_code_bgei_f);
+       case jit_code_bunler_f: return (jit_code_bgtr_f);
+       case jit_code_bunlei_f: return (jit_code_bgti_f);
+
+       case jit_code_buneqr_f: return (jit_code_bltgtr_f);
+       case jit_code_buneqi_f: return (jit_code_bltgti_f);
+
+       case jit_code_bunger_f: return (jit_code_bltr_f);
+       case jit_code_bungei_f: return (jit_code_blti_f);
+       case jit_code_bungtr_f: return (jit_code_bler_f);
+       case jit_code_bungti_f: return (jit_code_blei_f);
+
+       case jit_code_bltgtr_f: return (jit_code_buneqr_f);
+       case jit_code_bltgti_f: return (jit_code_buneqi_f);
+
+       case jit_code_bordr_f:  return (jit_code_bunordr_f);
+       case jit_code_bordi_f:  return (jit_code_bunordi_f);
+       case jit_code_bunordr_f:return (jit_code_bordr_f);
+       case jit_code_bunordi_f:return (jit_code_bordi_f);
+       case jit_code_bltr_d:   return (jit_code_bunger_d);
+       case jit_code_blti_d:   return (jit_code_bungei_d);
+       case jit_code_bler_d:   return (jit_code_bungtr_d);
+       case jit_code_blei_d:   return (jit_code_bungti_d);
+
+       case jit_code_beqr_d:   return (jit_code_bner_d);
+       case jit_code_beqi_d:   return (jit_code_bnei_d);
+
+       case jit_code_bger_d:   return (jit_code_bunltr_d);
+       case jit_code_bgei_d:   return (jit_code_bunlti_d);
+       case jit_code_bgtr_d:   return (jit_code_bunler_d);
+       case jit_code_bgti_d:   return (jit_code_bunlei_d);
+
+       case jit_code_bner_d:   return (jit_code_beqr_d);
+       case jit_code_bnei_d:   return (jit_code_beqi_d);
+
+       case jit_code_bunltr_d: return (jit_code_bger_d);
+       case jit_code_bunlti_d: return (jit_code_bgei_d);
+       case jit_code_bunler_d: return (jit_code_bgtr_d);
+       case jit_code_bunlei_d: return (jit_code_bgti_d);
+
+       case jit_code_buneqr_d: return (jit_code_bltgtr_d);
+       case jit_code_buneqi_d: return (jit_code_bltgti_d);
+
+       case jit_code_bunger_d: return (jit_code_bltr_d);
+       case jit_code_bungei_d: return (jit_code_blti_d);
+       case jit_code_bungtr_d: return (jit_code_bler_d);
+       case jit_code_bungti_d: return (jit_code_blei_d);
+
+       case jit_code_bltgtr_d: return (jit_code_buneqr_d);
+       case jit_code_bltgti_d: return (jit_code_buneqi_d);
+
+       case jit_code_bordr_d:  return (jit_code_bunordr_d);
+       case jit_code_bordi_d:  return (jit_code_bunordi_d);
+       case jit_code_bunordr_d:return (jit_code_bordr_d);
+       case jit_code_bunordi_d:return (jit_code_bordi_d);
+       case jit_code_boaddr:   return (jit_code_bxaddr);
+       case jit_code_boaddi:   return (jit_code_bxaddi);
+       case jit_code_boaddr_u: return (jit_code_bxaddr_u);
+       case jit_code_boaddi_u: return (jit_code_bxaddi_u);
+       case jit_code_bxaddr:   return (jit_code_boaddr);
+       case jit_code_bxaddi:   return (jit_code_boaddi);
+       case jit_code_bxaddr_u: return (jit_code_boaddr_u);
+       case jit_code_bxaddi_u: return (jit_code_boaddi_u);
+       case jit_code_bosubr:   return (jit_code_bxsubr);
+       case jit_code_bosubi:   return (jit_code_bxsubi);
+       case jit_code_bosubr_u: return (jit_code_bxsubr_u);
+       case jit_code_bosubi_u: return (jit_code_bxsubi_u);
+       case jit_code_bxsubr:   return (jit_code_bosubr);
+       case jit_code_bxsubi:   return (jit_code_bosubi);
+       case jit_code_bxsubr_u: return (jit_code_bosubr_u);
+       case jit_code_bxsubi_u: return (jit_code_bosubi_u);
+       default:                abort();        /* invalid jump code */
+    }
+}
+
+/*
+ * change common pattern:
+ *     <cond_jump L0> <jump L1> <label L0>
+ * into
+ *     <reverse_cond_jump L1>
+ */
+static jit_bool_t
+_reverse_jump(jit_state_t *_jit, jit_node_t *prev, jit_node_t *node)
+{
+    jit_node_t         *local_prev;
+    jit_node_t         *local_next;
+    jit_node_t         *local_jump;
+
+    if (!(node->flag & jit_flag_node))
+       return (0);
+    /* =><cond_jump L0> <jump L1> <label L0> */
+    local_next = node->next;
+    if (local_next->code != jit_code_jmpi ||
+       !(local_next->flag & jit_flag_node))
+       return (0);
+    /* <cond_jump L0> =><jump L1> <label L0> */
+
+    local_jump = local_next->u.n;
+    for (local_prev = local_next, local_next = local_next->next;
+        local_next;
+        local_prev = local_next, local_next = local_next->next) {
+       switch (local_next->code) {
+           case jit_code_label:        case jit_code_epilog:
+               if (node->u.n == local_next) {
+                   if (local_next->link == node)
+                       local_next->link = node->link;
+                   else {
+                       for (local_prev = local_next->link;
+                            local_prev->link != node;
+                            local_prev = local_prev->link)
+                           assert(local_prev != NULL);
+                       local_prev->link = node->link;
+                   }
+                   del_node(node, node->next);
+                   node->code = reverse_jump_code(node->code);
+                   node->u.n = local_jump;
+                   node->link = local_jump->link;
+                   local_jump->link = node;
+                   return (1);
+               }
+               break;
+           case jit_code_note:
+               break;
+           default:
+               return (0);
+       }
+    }
+    return (0);
+}
+
+static void
+_redundant_store(jit_state_t *_jit, jit_node_t *node, jit_bool_t jump)
+{
+    jit_node_t         *iter;
+    jit_node_t         *prev;
+    jit_word_t          word;
+    jit_int32_t                 spec;
+    jit_int32_t                 regno;
+
+    if (jump) {
+       prev = node->u.n;
+       if (prev->code == jit_code_epilog)
+           return;
+       assert(prev->code == jit_code_label);
+       if ((prev->flag & jit_flag_head) || node->link || prev->link != node)
+           /* multiple sources */
+           return;
+       /* if there are sequential labels it will return below */
+    }
+    else
+       prev = node;
+    word = node->w.w;
+    regno = jit_regno(node->v.w);
+    for (iter = prev->next; iter; prev = iter, iter = iter->next) {
+       switch (iter->code) {
+           case jit_code_label:        case jit_code_prolog:
+           case jit_code_epilog:
+               return;
+           case jit_code_movi:
+               if (regno == jit_regno(iter->u.w)) {
+                   if (iter->flag || iter->v.w != word)
+                       return;
+                   del_node(prev, iter);
+                   iter = prev;
+               }
+               break;
+           default:
+               spec = jit_classify(iter->code);
+               if (spec & jit_cc_a0_jmp)
+                   return;
+               if ((spec & (jit_cc_a0_reg|jit_cc_a0_chg)) ==
+                   (jit_cc_a0_reg|jit_cc_a0_chg)) {
+                   if (spec & jit_cc_a0_rlh) {
+                       if (regno == jit_regno(iter->u.q.l) ||
+                           regno == jit_regno(iter->u.q.h))
+                           return;
+                   }
+                   else {
+                       if (regno == jit_regno(iter->u.w))
+                           return;
+                   }
+               }
+               if ((spec & (jit_cc_a1_reg|jit_cc_a1_chg)) ==
+                   (jit_cc_a1_reg|jit_cc_a1_chg)) {
+                   if (regno == jit_regno(iter->v.w))
+                       return;
+               }
+               if ((spec & (jit_cc_a2_reg|jit_cc_a2_chg)) ==
+                   (jit_cc_a2_reg|jit_cc_a2_chg)) {
+                   if (regno == jit_regno(iter->w.w))
+                       return;
+               }
+               break;
+       }
+    }
+}
+
+static jit_bool_t
+_simplify_movr(jit_state_t *_jit, jit_node_t *prev, jit_node_t *node,
+              jit_int32_t kind, jit_int32_t size)
+{
+    jit_int32_t                 regno;
+    jit_int32_t                 right;
+    jit_value_t                *value;
+
+    regno = jit_regno(node->u.w);
+    right = jit_regno(node->v.w);
+    value = _jitc->values + regno;
+    if ((value->kind == jit_kind_register &&
+        jit_regno(value->base.q.l) == right &&
+        value->base.q.h == _jitc->gen[right]) ||
+       (value->kind == kind && _jitc->values[right].kind == kind &&
+        memcmp(&value->base.w, &_jitc->values[right].base.w, size) == 0)) {
+       del_node(prev, node);
+       return (1);
+    }
+    if (_jitc->values[right].kind == jit_kind_word)
+       jit_memcpy(value, _jitc->values + right, sizeof(jit_value_t));
+    else {
+       value->kind = jit_kind_register;
+       value->base.q.l = right;
+       value->base.q.h = _jitc->gen[right];
+    }
+    ++_jitc->gen[regno];
+
+    return (0);
+}
+
+static jit_bool_t
+_simplify_movi(jit_state_t *_jit, jit_node_t *prev, jit_node_t *node,
+              jit_int32_t kind, jit_int32_t size)
+{
+    jit_value_t                *value;
+    jit_int32_t                 spec;
+    jit_int32_t                 regno;
+    jit_int32_t                 offset;
+
+    regno = jit_regno(node->u.w);
+    value = _jitc->values + regno;
+    if (node->flag & jit_flag_node) {
+       /* set to undefined if value will be patched */
+       value->kind = 0;
+       ++_jitc->gen[regno];
+       return (0);
+    }
+    if (value->kind == kind) {
+       if (memcmp(&node->v.w, &value->base.w, size) == 0) {
+           del_node(prev, node);
+           return (1);
+       }
+       spec = jit_class(_rvs[regno].spec);
+       if (kind == jit_kind_word)
+           spec &= jit_class_gpr;
+       else
+           spec &= (jit_class_xpr | jit_class_fpr);
+       for (offset = 0; offset < _jitc->reglen; offset++) {
+           if (_jitc->values[offset].kind == kind &&
+               memcmp(&node->v.w, &_jitc->values[offset].base.w, size) == 0 &&
+               (jit_class(_rvs[offset].spec) & spec) == spec) {
+               if (kind == jit_kind_word)
+                   node->code = jit_code_movr;
+               else if (kind == jit_kind_float32)
+                   node->code = jit_code_movr_f;
+               else
+                   node->code = jit_code_movr_d;
+               node->v.w = offset;
+               jit_memcpy(value, _jitc->values + offset, sizeof(jit_value_t));
+               ++_jitc->gen[regno];
+               return (0);
+           }
+       }
+    }
+    value->kind = kind;
+    jit_memcpy(&value->base.w, &node->v.w, size);
+    ++_jitc->gen[regno];
+
+    return (0);
+}
+
+/* simple/safe redundandy test not checking if another register
+ * holds the same value
+ */
+static jit_bool_t
+_simplify_ldxi(jit_state_t *_jit, jit_node_t *prev, jit_node_t *node)
+{
+    jit_value_t                *value;
+    jit_int32_t                 regno;
+    jit_int32_t                 right;
+
+    regno = jit_regno(node->u.w);
+    right = jit_regno(node->v.w);
+    value = _jitc->values + regno;
+    if (regno != right &&
+       value->kind == jit_kind_code && value->code == node->code &&
+       value->base.q.l == right && value->base.q.h == _jitc->gen[right] &&
+       node->w.w == value->disp.w) {
+       del_node(prev, node);
+       return (1);
+    }
+    value->kind = jit_kind_code;
+    value->code = node->code;
+    value->base.q.l = right;
+    value->base.q.h = _jitc->gen[right];
+    value->disp.w = node->w.w;
+    ++_jitc->gen[regno];
+
+    return (0);
+}
+
+static jit_bool_t
+_simplify_stxi(jit_state_t *_jit, jit_node_t *prev, jit_node_t *node)
+{
+    jit_value_t                *value;
+    jit_int32_t                 regno;
+    jit_int32_t                 right;
+    jit_int32_t                 offset;
+
+    regno = jit_regno(node->w.w);
+    right = jit_regno(node->v.w);
+    value = _jitc->values + regno;
+
+    /* check for redundant store after load */
+    if (regno != right &&
+       value->kind == jit_kind_code && value->code == node->code &&
+       value->base.q.l == right && value->base.q.h == _jitc->gen[right] &&
+       node->u.w == value->disp.w) {
+       del_node(prev, node);
+       return (1);
+    }
+
+    /* assume anything can alias, and invalidate tracked values */
+    for (offset = 0; offset < _jitc->reglen; offset++) {
+       if (_jitc->values[offset].kind == jit_kind_code) {
+           _jitc->values[offset].kind = 0;
+           ++_jitc->gen[offset];
+       }
+    }
+
+    /* no multiple information, so, if set to a constant,
+     * prefer to keep that information */
+    if (value->kind == 0) {
+       value->kind = jit_kind_code;
+       switch (node->code) {
+           /* no information about signed/unsigned either */
+           case jit_code_stxi_c:       value->code = jit_code_ldxi_c;  break;
+           case jit_code_stxi_s:       value->code = jit_code_ldxi_s;  break;
+           case jit_code_stxi_i:       value->code = jit_code_ldxi_i;  break;
+           case jit_code_stxi_l:       value->code = jit_code_ldxi_l;  break;
+           case jit_code_stxi_f:       value->code = jit_code_ldxi_f;  break;
+           case jit_code_stxi_d:       value->code = jit_code_ldxi_d;  break;
+           default:                    abort();
+       }
+       value->kind = jit_kind_code;
+       value->base.q.l = right;
+       value->base.q.h = _jitc->gen[right];
+       value->disp.w = node->u.w;
+    }
+
+    return (0);
+}
+
+/* usually there should be only one store in the
+ * jit_get_reg/jit_unget_reg, but properly handle
+ * multiple ones by moving the save node */
+static void
+_simplify_spill(jit_state_t *_jit, jit_node_t *node, jit_int32_t regno)
+{
+    jit_node_t         *save;
+    jit_node_t         *temp;
+
+    if ((temp = _jitc->spill[regno]) && (save = temp->next) != node) {
+       temp->next = save->next;
+       save->next = node->next;
+       node->next = save;
+       _jitc->spill[regno] = node;
+    }
+}
+
+/* checks for simple cases where a register is set more than
+ * once to the same value, and is a common pattern of calls
+ * to jit_pushargi and jit_pushargr
+ */
+static void
+_simplify(jit_state_t *_jit)
+{
+    jit_node_t         *prev;
+    jit_node_t         *node;
+    jit_node_t         *next;
+    jit_int32_t                 info;
+    jit_int32_t                 regno;
+
+    for (prev = NULL, node = _jitc->head; node; prev = node, node = next) {
+       next = node->next;
+       switch (node->code) {
+           case jit_code_label:        case jit_code_prolog:
+           case jit_code_callr:        case jit_code_calli:
+           reset:
+               memset(_jitc->gen, 0, sizeof(jit_int32_t) * _jitc->reglen);
+               memset(_jitc->values, 0, sizeof(jit_value_t) * _jitc->reglen);
+               break;
+           case jit_code_save:
+               _jitc->spill[jit_regno(node->u.w)] = prev;
+               break;
+           case jit_code_load:
+               regno = jit_regno(node->u.w);
+               if (register_change_p(node->link->next, node, regno) !=
+                   jit_reg_change) {
+                   /* spill not required due to optimizing common
+                    * redundancy case of calling jit_get_reg/jit_unget_reg
+                    * and then setting the register to the value it is
+                    * already holding */
+                   patch_register(node->link->next, node,
+                                  jit_regno_patch|regno, regno);
+                   del_node(_jitc->spill[regno], node->link);
+                   del_node(prev, node);
+                   node = prev;
+               }
+               _jitc->spill[regno] = NULL;
+               break;
+           case jit_code_movr:
+               regno = jit_regno(node->u.w);
+               if (simplify_movr(prev, node,
+                                 jit_kind_word, sizeof(jit_word_t)))
+                   simplify_spill(node = prev, regno);
+               break;
+           case jit_code_movi:
+               regno = jit_regno(node->u.w);
+               if (simplify_movi(prev, node,
+                                 jit_kind_word, sizeof(jit_word_t)))
+                   simplify_spill(node = prev, regno);
+               break;
+           case jit_code_movr_f:
+               regno = jit_regno(node->u.w);
+               if (simplify_movr(prev, node,
+                                 jit_kind_float32, sizeof(jit_float32_t)))
+                   simplify_spill(node = prev, regno);
+               break;
+           case jit_code_movi_f:
+               regno = jit_regno(node->u.w);
+               if (simplify_movi(prev, node,
+                                 jit_kind_float32, sizeof(jit_float32_t)))
+                   simplify_spill(node = prev, regno);
+               break;
+           case jit_code_movr_d:
+               regno = jit_regno(node->u.w);
+               if (simplify_movr(prev, node,
+                                 jit_kind_float64, sizeof(jit_float64_t)))
+                   simplify_spill(node = prev, regno);
+               break;
+           case jit_code_movi_d:
+               regno = jit_regno(node->u.w);
+               if (simplify_movi(prev, node,
+                                 jit_kind_float64, sizeof(jit_float64_t)))
+                   simplify_spill(node = prev, regno);
+               break;
+           case jit_code_ldxi_c:       case jit_code_ldxi_uc:
+           case jit_code_ldxi_s:       case jit_code_ldxi_us:
+           case jit_code_ldxi_i:       case jit_code_ldxi_ui:
+           case jit_code_ldxi_l:
+           case jit_code_ldxi_f:       case jit_code_ldxi_d:
+               regno = jit_regno(node->u.w);
+               if (simplify_ldxi(prev, node))
+                   simplify_spill(node = prev, regno);
+               break;
+           case jit_code_stxi_c:       case jit_code_stxi_s:
+           case jit_code_stxi_i:       case jit_code_stxi_l:
+           case jit_code_stxi_f:       case jit_code_stxi_d:
+               regno = jit_regno(node->u.w);
+               if (simplify_stxi(prev, node))
+                   simplify_spill(node = prev, regno);
+               break;
+           default:
+               info = jit_classify(node->code);
+               if (info & jit_cc_a0_jmp)
+                   /* labels are not implicitly added when not taking
+                    * a conditional branch */
+                   goto reset;
+               if (info & jit_cc_a0_chg) {
+                   if (info & jit_cc_a0_rlh) {
+                       regno = jit_regno(node->u.q.l);
+                       _jitc->values[regno].kind = 0;
+                       ++_jitc->gen[regno];
+                       regno = jit_regno(node->u.q.h);
+                       _jitc->values[regno].kind = 0;
+                       ++_jitc->gen[regno];
+                   }
+                   else {
+                       regno = jit_regno(node->u.w);
+                       _jitc->values[regno].kind = 0;
+                       ++_jitc->gen[regno];
+                   }
+               }
+               if (info & jit_cc_a1_chg) {
+                   regno = jit_regno(node->v.w);
+                   _jitc->values[regno].kind = 0;
+                   ++_jitc->gen[regno];
+               }
+               if (info & jit_cc_a2_chg) {
+                   regno = jit_regno(node->w.w);
+                   _jitc->values[regno].kind = 0;
+                   ++_jitc->gen[regno];
+               }
+               break;
+       }
+    }
+}
+
+static jit_int32_t
+_register_change_p(jit_state_t *_jit, jit_node_t *node, jit_node_t *link,
+                  jit_int32_t regno)
+{
+    jit_int32_t                value;
+
+    for (; node != link; node = node->next) {
+       switch (node->code) {
+           case jit_code_label:        case jit_code_prolog:
+               /* lack of extra information so cannot say it is undefined */
+               return (jit_reg_change);
+           case jit_code_callr:        case jit_code_calli:
+               if (!(jit_class(_rvs[regno].spec) & jit_class_sav))
+                   return (jit_reg_undef);
+               break;
+           default:
+               value = jit_classify(node->code);
+               /* lack of extra information */
+               if (value & jit_cc_a0_jmp)
+                   return (jit_reg_change);
+               else if ((value & (jit_cc_a0_reg|jit_cc_a0_chg)) ==
+                        (jit_cc_a0_reg|jit_cc_a0_chg) &&
+                        (((value & jit_cc_a0_rlh) &&
+                          (node->u.q.l == regno || node->u.q.h == regno)) ||
+                         (!(value & jit_cc_a0_rlh) &&
+                          node->u.w == regno)))
+                   return (jit_reg_change);
+               else if ((value & jit_cc_a1_reg) && node->v.w == regno &&
+                        (value & jit_cc_a1_chg))
+                   return (jit_reg_change);
+               else if ((value & jit_cc_a2_reg) && node->w.w == regno &&
+                        (value & jit_cc_a2_chg))
+                   return (jit_reg_change);
+       }
+    }
+
+    return (jit_reg_static);
+}
+
+/* most of this could be done at the same time as generating jit, but
+ * avoid complications on different cpu backends and patch spill/loads
+ * here, by simulating jit generation */
+static jit_bool_t
+_spill_reglive_p(jit_state_t *_jit, jit_node_t *node, jit_int32_t regno)
+{
+    if (!jit_regset_tstbit(&_jitc->reglive, regno)) {
+       jit_regset_setbit(&_jitc->regmask, regno);
+       jit_update(node->next, &_jitc->reglive, &_jitc->regmask);
+       if (!jit_regset_tstbit(&_jitc->reglive, regno) &&
+           register_change_p(node->next, node->link, regno) != jit_reg_change)
+           return (0);
+    }
+
+    return (1);
+}
+
+static void
+_patch_registers(jit_state_t *_jit)
+{
+    jit_node_t         *prev;
+    jit_node_t         *node;
+    jit_node_t         *next;
+    jit_int32_t                 info;
+    jit_int32_t                 spec;
+    jit_int32_t                 regno;
+    jit_int32_t                 value;
+
+    _jitc->function = NULL;
+
+    jit_reglive_setup();
+    for (prev = NULL, node = _jitc->head; node; node = next) {
+       next = node->next;
+
+       info = jit_classify(node->code);
+       jit_regarg_set(node, info);
+
+       switch (node->code) {
+           case jit_code_save:
+               regno = jit_regno(node->u.w);
+               if (!spill_reglive_p(node, regno)) {
+                   /* register is not live, just remove spill/reload */
+                   jit_regarg_clr(node, info);
+                   node->link->v.w = jit_regload_delete;
+                   del_node(prev, node);
+                   continue;
+               }
+               else {
+                   /* try to find a free register of the same class */
+                   spec = jit_class(_rvs[regno].spec) & ~jit_class_arg;
+                   for (value = 0; value < _jitc->reglen; value++) {
+                       if (value != regno &&
+                           ((jit_class(_rvs[value].spec) & spec) &
+                            ~jit_class_arg) == spec &&
+                           !jit_regset_tstbit(&_jitc->regarg, value) &&
+                           !spill_reglive_p(node, value))
+                           break;
+                   }
+                   if (value < _jitc->reglen) {
+                       jit_regarg_clr(node, info);
+                       patch_register(node->next, node->link,
+                                      jit_regno_patch|node->u.w,
+                                      jit_regno_patch|value);
+                       /* mark as live just in case there are nested
+                        * register patches, so that next patch will
+                        * not want to use the same register */
+                       jit_regset_setbit(&_jitc->reglive, value);
+                       /* register is not live, just remove spill/reload */
+                       node->link->v.w = jit_regload_isdead;
+                       del_node(prev, node);
+                       continue;
+                   }
+                   else {
+                       /* failed to find a free register */
+                       if (spec & jit_class_gpr) {
+                           if (!_jitc->function->regoff[regno])
+                               _jitc->function->regoff[regno] =
+                                   jit_allocai(sizeof(jit_word_t));
+#if __WORDSIZE == 32
+                           node->code = jit_code_stxi_i;
+#else
+                           node->code = jit_code_stxi_l;
+#endif
+                       }
+                       else {
+                           node->code = jit_code_stxi_d;
+                           if (!_jitc->function->regoff[regno])
+                               _jitc->function->regoff[regno] =
+                                   jit_allocai(sizeof(jit_float64_t));
+                       }
+                       node->u.w = _jitc->function->regoff[regno];
+                       node->v.w = JIT_FP;
+                       node->w.w = regno;
+                       node->link = NULL;
+                   }
+               }
+               break;
+           case jit_code_load:
+               regno = jit_regno(node->u.w);
+               if (node->v.w) {
+                   if (node->v.w == jit_regload_isdead)
+                       jit_regset_clrbit(&_jitc->reglive, regno);
+                   del_node(prev, node);
+                   continue;
+               }
+               spec = jit_class(_rvs[regno].spec);
+               if (spec & jit_class_gpr) {
+#if __WORDSIZE == 32
+                   node->code = jit_code_ldxi_i;
+#else
+                   node->code = jit_code_ldxi_l;
+#endif
+               }
+               else
+                   node->code = jit_code_ldxi_d;
+               node->v.w = regno;
+               node->v.w = JIT_FP;
+               node->w.w = _jitc->function->regoff[regno];
+               node->link = NULL;
+               break;
+           case jit_code_prolog:
+               _jitc->function = _jitc->functions.ptr + node->w.w;
+               break;
+           case jit_code_epilog:
+               _jitc->function = NULL;
+               break;
+           default:
+               break;
+       }
+
+       jit_regarg_clr(node, info);
+       /* update register live state */
+       jit_reglive(node);
+       prev = node;
+    }
+}
+
+static void
+_patch_register(jit_state_t *_jit, jit_node_t *node, jit_node_t *link,
+               jit_int32_t regno, jit_int32_t patch)
+{
+    jit_int32_t                value;
+
+    for (; node != link; node = node->next) {
+       value = jit_classify(node->code);
+       if (value & jit_cc_a0_reg) {
+           if (value & jit_cc_a0_rlh) {
+               if (node->u.q.l == regno)
+                   node->u.q.l = patch;
+               if (node->u.q.h == regno)
+                   node->u.q.h = patch;
+           }
+           else {
+               if (node->u.w == regno)
+                   node->u.w = patch;
+           }
+       }
+       if ((value & jit_cc_a1_reg) && node->v.w == regno)
+           node->v.w = patch;
+       if ((value & jit_cc_a2_reg) && node->w.w == regno)
+           node->w.w = patch;
+    }
+}
+
+#if defined(__i386__) || defined(__x86_64__)
+#  include "jit_x86.c"
+#elif defined(__mips__)
+#  include "jit_mips.c"
+#elif defined(__arm__)
+#  include "jit_arm.c"
+#elif defined(__powerpc__)
+#  include "jit_ppc.c"
+#elif defined(__sparc__)
+#  include "jit_sparc.c"
+#elif defined(__ia64__)
+#  include "jit_ia64.c"
+#elif defined(__hppa__)
+#  include "jit_hppa.c"
+#elif defined(__aarch64__)
+#  include "jit_aarch64.c"
+#elif defined(__s390__) || defined(__s390x__)
+#  include "jit_s390.c"
+#elif defined(__alpha__)
+#  include "jit_alpha.c"
+#elif defined(__riscv)
+#  include "jit_riscv.c"
+#endif
diff --git a/deps/lightning/lightning.pc.in b/deps/lightning/lightning.pc.in
new file mode 100644 (file)
index 0000000..e13f7c0
--- /dev/null
@@ -0,0 +1,12 @@
+prefix=@prefix@
+exec_prefix=@exec_prefix@
+libdir=@libdir@
+includedir=@includedir@
+
+Name: GNU Lightning
+Description: JIT library
+Version: @VERSION@
+Libs: -L${libdir} -llightning
+Cflags: -I${includedir}/
+
+
diff --git a/deps/lightning/m4/.gitkeep b/deps/lightning/m4/.gitkeep
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/deps/lightning/size.c b/deps/lightning/size.c
new file mode 100644 (file)
index 0000000..4e93370
--- /dev/null
@@ -0,0 +1,125 @@
+/*
+ * Copyright (C) 2013-2019  Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+#include <lightning.h>
+#include <lightning/jit_private.h>
+#include <stdio.h>
+#include "lib/jit_names.c"
+
+jit_int16_t    _szs[jit_code_last_code];
+
+int
+main(int argc, char *argv[])
+{
+    FILE               *fp;
+    jit_word_t          offset;
+    int                         code, size, max;
+
+    if ((fp = fopen(JIT_SIZE_PATH, "r")) == NULL)
+       exit(-1);
+    while (fscanf(fp, "%d %d\n", &code, &size) == 2) {
+       if (_szs[code] < size)
+           _szs[code] = size;
+    }
+    fclose(fp);
+
+    max = 0;
+    for (offset = 0; offset < jit_code_last_code; offset++)
+       if (max < _szs[offset])
+           max = _szs[offset];
+
+    if ((fp = fopen(JIT_SIZE_PATH, "w")) == NULL)
+       exit(-1);
+
+
+#if __X64 || __X32
+#  if __X64
+    fprintf(fp, "#if __X64\n");
+#    if __X64_32
+    fprintf(fp, "#  if __X64_32\n");
+#    else
+    fprintf(fp, "#  if !__X64_32\n");
+#    endif
+#  else
+    fprintf(fp, "#if __X32\n");
+#  endif
+#else
+    fprintf(fp, "#if __WORDSIZE == %d\n", __WORDSIZE);
+#endif
+#if defined(__arm__)
+#  if defined(__ARM_PCS_VFP)
+    fprintf(fp, "#if defined(__ARM_PCS_VFP)\n");
+#  else
+    fprintf(fp, "#if !defined(__ARM_PCS_VFP)\n");
+#  endif
+#elif defined(__mips__)
+#  if __WORDSIZE == 32
+#    if NEW_ABI
+    fprintf(fp, "#if NEW_ABI\n");
+#    else
+    fprintf(fp, "#if !NEW_ABI\n");
+#    endif
+#  endif
+#elif defined(__powerpc__)
+    fprintf(fp, "#if defined(__powerpc__)\n");
+    fprintf(fp, "#if __BYTE_ORDER == %s\n",
+           __BYTE_ORDER == __BIG_ENDIAN ? "__BIG_ENDIAN" : "__LITTLE_ENDIAN");
+#  if __WORDSIZE == 32
+    fprintf(fp, "#if %s\n",
+#    if !_CALL_SYSV
+          "!"
+#    endif
+          "_CALL_SYSV"
+          );
+#  endif
+#endif
+    fprintf(fp, "#define JIT_INSTR_MAX %d\n", max);
+    for (offset = 0; offset < jit_code_last_code; offset++)
+       fprintf(fp, "    %d,    /* %s */\n", _szs[offset], code_name[offset]);
+#if defined(__arm__)
+    fprintf(fp, "#endif /* __ARM_PCS_VFP */\n");
+#elif defined(__mips__)
+#  if __WORDSIZE == 32
+    fprintf(fp, "#endif /* NEW_ABI */\n");
+#  endif
+#elif defined(__powerpc__)
+    fprintf(fp, "#endif /* "
+#  if !_CALL_SYSV
+           "!"
+#  endif
+           "_CALL_SYSV"
+           " */\n");
+    fprintf(fp, "#endif /* __BYTE_ORDER */\n");
+    fprintf(fp, "#endif /* __powerpc__ */\n");
+#endif
+#if __X64 || __X32
+#  if __X64
+    fprintf(fp, "#  endif /* __X64_32 */\n");
+    fprintf(fp, "#endif /* __X64 */\n");
+#  else
+    fprintf(fp, "#endif /* __X32 */\n");
+#  endif
+#else
+    fprintf(fp, "#endif /* __WORDSIZE */\n");
+#endif
+
+    fclose(fp);
+
+    return (0);
+}