Merge pull request #615 from pcercuei/update-lightrec-20220220
authorAutechre <libretro@gmail.com>
Tue, 22 Feb 2022 07:21:06 +0000 (08:21 +0100)
committerGitHub <noreply@github.com>
Tue, 22 Feb 2022 07:21:06 +0000 (08:21 +0100)
Update Lightning/Lightrec to the latest upstream version

67 files changed:
Makefile
deps/lightning/.gitmodules [new file with mode: 0644]
deps/lightning/.gitrepo
deps/lightning/ChangeLog
deps/lightning/Makefile.am
deps/lightning/README-hacking [new file with mode: 0644]
deps/lightning/bootstrap [new file with mode: 0755]
deps/lightning/bootstrap.conf [new file with mode: 0644]
deps/lightning/check/Makefile.am
deps/lightning/check/lightning.c
deps/lightning/check/live.ok [new file with mode: 0644]
deps/lightning/check/live.tst [new file with mode: 0644]
deps/lightning/configure.ac
deps/lightning/doc/.gitignore
deps/lightning/doc/Makefile.am
deps/lightning/doc/body.texi
deps/lightning/doc/version.texi [deleted file]
deps/lightning/gnulib [new submodule]
deps/lightning/gnulib-lib/.gitignore [new file with mode: 0644]
deps/lightning/include/lightning.h [deleted file]
deps/lightning/include/lightning.h.in
deps/lightning/include/lightning/jit_mips.h
deps/lightning/include/lightning/jit_private.h
deps/lightning/lib/Makefile.am
deps/lightning/lib/jit_aarch64-cpu.c
deps/lightning/lib/jit_disasm.c
deps/lightning/lib/jit_memory.c
deps/lightning/lib/jit_mips-cpu.c
deps/lightning/lib/jit_ppc-cpu.c
deps/lightning/lib/jit_print.c
deps/lightning/lib/jit_x86-cpu.c
deps/lightning/lib/jit_x86.c
deps/lightning/lib/lightning.c
deps/lightning/m4/.gitignore [new file with mode: 0644]
deps/lightning/m4/gnulib-cache.m4 [new file with mode: 0644]
deps/lightrec/.gitrepo
deps/lightrec/CMakeLists.txt
deps/lightrec/blockcache.c
deps/lightrec/blockcache.h
deps/lightrec/config.h [deleted file]
deps/lightrec/config.h.cmakein [deleted file]
deps/lightrec/debug.h
deps/lightrec/disassembler.c
deps/lightrec/disassembler.h
deps/lightrec/emitter.c
deps/lightrec/emitter.h
deps/lightrec/interpreter.c
deps/lightrec/interpreter.h
deps/lightrec/lightning-wrapper.h [new file with mode: 0644]
deps/lightrec/lightrec-config.h.cmakein [new file with mode: 0644]
deps/lightrec/lightrec-private.h
deps/lightrec/lightrec.c
deps/lightrec/lightrec.h
deps/lightrec/memmanager.c
deps/lightrec/memmanager.h
deps/lightrec/optimizer.c
deps/lightrec/optimizer.h
deps/lightrec/reaper.c
deps/lightrec/reaper.h
deps/lightrec/recompiler.c
deps/lightrec/recompiler.h
deps/lightrec/regcache.c
deps/lightrec/regcache.h
deps/lightrec/slist.h
include/lightrec/lightrec-config.h [new file with mode: 0644]
libpcsxcore/gte.c
libpcsxcore/lightrec/plugin.c

index f21b823..fc04669 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -88,7 +88,7 @@ libpcsxcore/psxbios.o: CFLAGS += -Wno-nonnull
 
 # dynarec
 ifeq "$(DYNAREC)" "lightrec"
-CFLAGS += -Ideps/lightning/include -Ideps/lightrec \
+CFLAGS += -Ideps/lightning/include -Ideps/lightrec -Iinclude/lightrec \
                  -DLIGHTREC -DLIGHTREC_STATIC
 OBJS += libpcsxcore/lightrec/plugin.o
 OBJS += deps/lightning/lib/jit_disasm.o \
diff --git a/deps/lightning/.gitmodules b/deps/lightning/.gitmodules
new file mode 100644 (file)
index 0000000..acb2669
--- /dev/null
@@ -0,0 +1,3 @@
+[submodule "gnulib"]
+       path = gnulib
+       url = git://git.sv.gnu.org/gnulib.git
index bb1106e..d49a4e8 100644 (file)
@@ -6,7 +6,7 @@
 [subrepo]
        remote = https://git.savannah.gnu.org/git/lightning.git
        branch = master
-       commit = b0b8eb5e856c0d29053dc842e1919a2eb58c8cda
-       parent = 819f3dfc11f81f58cb52bd7b1f7cc5025791af62
+       commit = 876c1043bec5bfd594482b40700c84693e40d0eb
+       parent = cef02748fe77c0d29b441447659262ce1da47c4b
        method = merge
-       cmdver = 0.4.1
+       cmdver = 0.4.3
index 76cac91..9964207 100644 (file)
@@ -1,3 +1,38 @@
+2021-04-03  Marc Nieper-Wißkirchen  <marc@nieper-wisskirchen.de>
+
+       * check/Makefile.am: Add test for the live instruction.
+       * check/live.ok: New file.
+       * check/live.tst: New file.
+       * doc/body.texi: Add documentation for the live instruction and
+       for jit_get_reg/jit_get_unreg.  Fix menu entries.
+       * include/lightning.h.in (jit_get_reg, jit_unget_reg): Expose the
+       macros in the public header file.
+       * include/lightning/jit_private.h (jit_get_reg, jit_unget_reg):
+       Remove the macros from the private header file.
+
+2021-04-03  Marc Nieper-Wißkirchen  <marc@nieper-wisskirchen.de>
+
+       * Makefile.am, check/Makefile.am, doc/Makefile.am,
+       lib/Makefile.am: Include $(top_builddir)/include in include paths
+       for the autoconf-generated header file lightning.h.
+
+2021-04-03  Marc Nieper-Wißkirchen  <marc@nieper-wisskirchen.de>
+
+       * doc/.gitignore: Add version.texi to list of ignored files.
+       * doc/version.texi: Remove file from version control.
+
+2020-18-04 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_x86-cpu.c, lib/jit_x86.c: Implement %rip relative
+       calls and jumps on x86_64. Currently very conservative, assuming
+       a jit block can be larger than 2G, so, if a jump or call is in
+       the same jit generation, but target is unknown, use an indirect
+       branch (could have an option to assume a jit code block is
+       never larger than 2G). Also a deoptimization is that now does
+       not always generate ip relative jmpi; previously implicitly
+       assumed jmpi would never be larger than 2G. Overall still an
+       optimization.
+
 2020-23-01 Paulo Andrade <pcpa@gnu.org>
 
        * lib/lightning.c: Add a proper fix to the condition of considering
@@ -26,7 +61,7 @@
        to the set of registers to scan for live range, what might
        consume a lot of cpu time, doing nothing.
 
-2019-09-16 Marc Nieper-WiÃkirchen <marc@nieper-wisskirchen.de>
+2019-09-16 Marc Nieper-WiÃ\9fkirchen <marc@nieper-wisskirchen.de>
 
        * include/lightning/jit_x86.h, lib/jit_x86.c: Correct x86_64
        backend, made %r12 a callee-save register as dictated by the
        https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=925129
        * THANKS: update.
 
-2019-08-29 Marc Nieper-WiÃkirchen <marc@nieper-wisskirchen.de>
+2019-08-29 Marc Nieper-WiÃ\9fkirchen <marc@nieper-wisskirchen.de>
 
        * include/lightning/jit_private.h: Move definition of offsetof
        from the public header file here.
        * include/lightning/jit_arm.h: Correct wrong jit_f macro
        definition.
 
-       * include/lightning/jit_ia64.h, include/lightning/jit_ppc.h: 
+       * include/lightning/jit_ia64.h, include/lightning/jit_ppc.h:
        Correct wrong jit_r macro definition.
 
        * lib/jit_x86-x87.c, lib/jit_x86.c: Actually use the
        divr_f and divrd_d implementation.
 
        * check/lightning.c: Add __ia64__ preprocessor define
-       on Itanium. 
+       on Itanium.
 
        * check/alu.inc, check/clobber.tst, check/float.tst: Define
        several macros conditionally to __ia64__. This is required
        * include/lightning/jit_x86.h: Correct typo in macro name.
 
        * lib/jit_arm.c, lib/jit_arm-cpu.c, lib/jit_mips.c,
-       lib/jit_mips-cpu.c, lib/jit_ppc.c, lib/jit_ppc-cpu.c, 
+       lib/jit_mips-cpu.c, lib/jit_ppc.c, lib/jit_ppc-cpu.c,
        lib/jit_x86.c, lib/jit_x86-cpu.c: Correct wrong code to get
        current jit function pointer.
 
        * lightning/i386/core.h (jit_ldr_c, jit_ldxr_c, jit_ldr_s,
        jit_ldxr_s): Move...
        * lightning/i386/core-32.h: ... here.
-       * lightning/i386/core-64.h (jit_ldr_c, jit_ldxr_c, jit_ldr_s,
+       * lightning/i386/core-64.h (jit_ldr_c, jit_ldxr_c, jit_ldr_s):
        Use movsbq and movswq.
 
 2010-08-10  Paulo César Pereira de Andrade <pcpa@mandriva.com.br>
 
        * lightning/ppc/funcs.h (jit_flush_code): modified the computation
        of start/end. The pointer arithmetic was done without casting. It
-       prevented compilation with recent gcc versions. 
+       prevented compilation with recent gcc versions.
        * lightning/ppc/core.h (jit_pushr_i): The offset for the store was
        incorrect. Should have been 4 bytes below SP (not above).
-       * lightning/ppc/core.h (jit_popr_i): The offset for the load was 
-       incorrect. Should have been 0 (not +8). 
+       * lightning/ppc/core.h (jit_popr_i): The offset for the load was
+       incorrect. Should have been 0 (not +8).
 
 2008-06-17  Paolo Bonzini  <bonzini@gnu.org>
 
        * lightning/i386/fp-32.h: ... here.
        * lightning/i386/fp-64.h: Write the code.
        * lightning/sparc/fp.h: Fix jit_extr_{f_d,d_f} register order.
-       
+
 2006-11-22  Paolo Bonzini  <bonzini@gnu.org>
 
        * lightning/i386/asm-i386.h: Move x86-64 instructions...
 2006-01-23  Paolo Bonzini  <bonzini@gnu.org>
 
        * configure.ac: Fix comments in config.h.in.
-       
+
 2005-11-25  Paolo Bonzini  <bonzini@gnu.org>
 
        * lightning/sparc/fp.h: Fix header comment.
        * lightning/ppc/funcs.h: correctly align stack pointer
 
 No changelogs for the assemblers (lightning directory) until 1.0
-       
+
 2003-03-27  Paolo Bonzini  <bonzini@gnu.org>
 
        * tests/printf2.c: new test
@@ -4135,7 +4170,7 @@ No changelogs for the assemblers (lightning directory) until 1.0
 2001-01-19  Paolo Bonzini  <bonzini@gnu.org>
 
        * configure.in: support cross-assembling
-       
+
        * disass/bfd.h, disass/dis-asm.h, disass/dis-buf.c,
        disass/i386-dis.c, disass/i386.h, disass/ppc-dis.c,
        disass/ppc.h, disass/ppc-opc.c, disass/sparc-dis.c,
index c921901..112deae 100644 (file)
 ACLOCAL_AMFLAGS = -I m4
 
 SUBDIRS =              \
+       gnulib-lib      \
        check           \
        doc             \
        include         \
        lib
 
+EXTRA_DIST = m4/gnulib-cache.m4
+
 pkgconfiglibdir = $(libdir)/pkgconfig
 pkgconfiglib_DATA = lightning.pc
 
 if get_jit_size
 JIT_SIZE_PATH = "$(top_builddir)/jit_$(cpu)-sz.c"
 AM_CPPFLAGS=-DGET_JIT_SIZE=1 -DJIT_SIZE_PATH='$(JIT_SIZE_PATH)'
-AM_CFLAGS = -I$(top_srcdir)/include -D_GNU_SOURCE $(LIGHTNING_CFLAGS)
+AM_CFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include  \
+       -D_GNU_SOURCE $(LIGHTNING_CFLAGS)
 
 noinst_PROGRAMS = size
 size_LDADD = $(top_builddir)/lib/liblightning.la -lm $(SHLIB)
diff --git a/deps/lightning/README-hacking b/deps/lightning/README-hacking
new file mode 100644 (file)
index 0000000..285f3c9
--- /dev/null
@@ -0,0 +1,96 @@
+This README-hacking file describes the development environment.
+
+Everything related to the development of GNU lightning is on Savannah:
+https://savannah.gnu.org/projects/lightning/.
+
+
+* Working from the Repository
+
+** Autotools
+
+This distribution uses the latest stable versions of Automake, Autoconf.  If
+you are getting the sources from git (or change configure.ac), you'll need
+to have these tools installed to (re)build.  All
+of these programs are available from ftp://ftp.gnu.org/gnu.
+
+If you're using a GNU/Linux distribution, the easiest way to install these
+packages depends on your system.  The following shell command should work
+for Debian-based systems such as Ubuntu:
+
+    $ sudo apt-get install autoconf automake
+
+
+** Building
+
+After getting the git sources, and installing the tools above, you can run
+
+    $ ./bootstrap
+    $ ./configure
+    $ make
+    $ make check
+
+to do a fresh build.  At this point, there should be no difference between
+your local copy, and the master copy:
+
+    $ git diff
+
+should output no difference.
+
+After that first time, running make should suffice.
+
+** Gnulib
+
+This distribution also uses Gnulib (https://www.gnu.org/software/gnulib) to
+share common files, stored as a submodule in git.
+
+** Updating
+
+    $ git pull
+    $ git submodule update
+
+** Updating a submodule
+
+To update a submodule, say gnulib, do as follows:
+
+Get the most recent version of the master branch from git.
+
+    $ cd gnulib
+    $ git pull
+
+Make sure GNU lightning can live with that version of gnulib.
+
+    $ cd ..
+    $ ./bootstrap
+    $ make distcheck
+
+Register your changes.
+
+    $ git commit ...
+
+
+----
+
+
+Copyright 2021 Free Software Foundation, Inc.
+
+This file is part of GNU lightning.
+
+GNU lightning is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as
+published by the Free Software Foundation; either version 3, or (at
+your option) any later version.
+
+GNU lightning is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this program.  If not, see
+<http://www.gnu.org/licenses/>.
+
+
+Local Variables:
+mode: outline
+fill-column: 76
+End:
diff --git a/deps/lightning/bootstrap b/deps/lightning/bootstrap
new file mode 100755 (executable)
index 0000000..c17a36f
--- /dev/null
@@ -0,0 +1,1090 @@
+#! /bin/sh
+# Print a version string.
+scriptversion=2021-01-10.00; # UTC
+
+# Bootstrap this package from checked-out sources.
+
+# Copyright (C) 2003-2021 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+# Originally written by Paul Eggert.  The canonical version of this
+# script is maintained as build-aux/bootstrap in gnulib, however, to
+# be useful to your project, you should place a copy of it under
+# version control in the top-level directory of your project.  The
+# intent is that all customization can be done with a bootstrap.conf
+# file also maintained in your version control; gnulib comes with a
+# template build-aux/bootstrap.conf to get you started.
+
+# Please report bugs or propose patches to bug-gnulib@gnu.org.
+
+nl='
+'
+
+# Ensure file names are sorted consistently across platforms.
+LC_ALL=C
+export LC_ALL
+
+# Ensure that CDPATH is not set.  Otherwise, the output from cd
+# would cause trouble in at least one use below.
+(unset CDPATH) >/dev/null 2>&1 && unset CDPATH
+
+local_gl_dir=gl
+
+# Honor $PERL, but work even if there is none.
+PERL="${PERL-perl}"
+
+me=$0
+
+default_gnulib_url=https://git.savannah.gnu.org/git/gnulib.git
+
+usage() {
+  cat <<EOF
+Usage: $me [OPTION]...
+Bootstrap this package from the checked-out sources.
+
+Options:
+ --gnulib-srcdir=DIRNAME  specify the local directory where gnulib
+                          sources reside.  Use this if you already
+                          have gnulib sources on your machine, and
+                          do not want to waste your bandwidth downloading
+                          them again.  Defaults to \$GNULIB_SRCDIR
+ --bootstrap-sync         if this bootstrap script is not identical to
+                          the version in the local gnulib sources,
+                          update this script, and then restart it with
+                          /bin/sh or the shell \$CONFIG_SHELL
+ --no-bootstrap-sync      do not check whether bootstrap is out of sync
+ --copy                   copy files instead of creating symbolic links
+ --force                  attempt to bootstrap even if the sources seem
+                          not to have been checked out
+ --no-git                 do not use git to update gnulib.  Requires that
+                          --gnulib-srcdir point to a correct gnulib snapshot
+ --skip-po                do not download po files
+EOF
+  bootstrap_print_option_usage_hook
+  cat <<EOF
+If the file $me.conf exists in the same directory as this script, its
+contents are read as shell variables to configure the bootstrap.
+
+For build prerequisites, environment variables like \$AUTOCONF and \$AMTAR
+are honored.
+
+Gnulib sources can be fetched in various ways:
+
+ * If this package is in a git repository with a 'gnulib' submodule
+   configured, then that submodule is initialized and updated and sources
+   are fetched from there.  If \$GNULIB_SRCDIR is set (directly or via
+   --gnulib-srcdir) and is a git repository, then it is used as a reference.
+
+ * Otherwise, if \$GNULIB_SRCDIR is set (directly or via --gnulib-srcdir),
+   then sources are fetched from that local directory.  If it is a git
+   repository and \$GNULIB_REVISION is set, then that revision is checked
+   out.
+
+ * Otherwise, if this package is in a git repository with a 'gnulib'
+   submodule configured, then that submodule is initialized and updated and
+   sources are fetched from there.
+
+ * Otherwise, if the 'gnulib' directory does not exist, Gnulib sources are
+   cloned into that directory using git from \$GNULIB_URL, defaulting to
+   $default_gnulib_url.
+   If \$GNULIB_REVISION is set, then that revision is checked out.
+
+ * Otherwise, the existing Gnulib sources in the 'gnulib' directory are
+   used.  If it is a git repository and \$GNULIB_REVISION is set, then that
+   revision is checked out.
+
+If you maintain a package and want to pin a particular revision of the
+Gnulib sources that has been tested with your package, then there are two
+possible approaches: either configure a 'gnulib' submodule with the
+appropriate revision, or set \$GNULIB_REVISION (and if necessary
+\$GNULIB_URL) in $me.conf.
+
+Running without arguments will suffice in most cases.
+EOF
+}
+
+# warnf_ FORMAT-STRING ARG1...
+warnf_ ()
+{
+  warnf_format_=$1
+  shift
+  nl='
+'
+  case $* in
+    *$nl*) me_=$(printf "$me"|tr "$nl|" '??')
+       printf "$warnf_format_" "$@" | sed "s|^|$me_: |" ;;
+    *) printf "$me: $warnf_format_" "$@" ;;
+  esac >&2
+}
+
+# warn_ WORD1...
+warn_ ()
+{
+  # If IFS does not start with ' ', set it and emit the warning in a subshell.
+  case $IFS in
+    ' '*) warnf_ '%s\n' "$*";;
+    *)    (IFS=' '; warn_ "$@");;
+  esac
+}
+
+# die WORD1...
+die() { warn_ "$@"; exit 1; }
+
+# Configuration.
+
+# Name of the Makefile.am
+gnulib_mk=gnulib.mk
+
+# List of gnulib modules needed.
+gnulib_modules=
+
+# Any gnulib files needed that are not in modules.
+gnulib_files=
+
+: ${AUTOPOINT=autopoint}
+: ${AUTORECONF=autoreconf}
+
+# A function to be called for each unrecognized option.  Returns 0 if
+# the option in $1 has been processed by the function.  Returns 1 if
+# the option has not been processed by the function.  Override it via
+# your own definition in bootstrap.conf
+
+bootstrap_option_hook() { return 1; }
+
+# A function to be called in order to print the --help information
+# corresponding to user-defined command-line options.
+
+bootstrap_print_option_usage_hook() { :; }
+
+# A function to be called right after gnulib-tool is run.
+# Override it via your own definition in bootstrap.conf.
+bootstrap_post_import_hook() { :; }
+
+# A function to be called after everything else in this script.
+# Override it via your own definition in bootstrap.conf.
+bootstrap_epilogue() { :; }
+
+# The command to download all .po files for a specified domain into a
+# specified directory.  Fill in the first %s with the destination
+# directory and the second with the domain name.
+po_download_command_format=\
+"wget --mirror --level=1 -nd -nv -A.po -P '%s' \
+ https://translationproject.org/latest/%s/"
+
+# Prefer a non-empty tarname (4th argument of AC_INIT if given), else
+# fall back to the package name (1st argument with munging).
+extract_package_name='
+  /^AC_INIT(\[*/{
+     s///
+     /^[^,]*,[^,]*,[^,]*,[ []*\([^][ ,)]\)/{
+       s//\1/
+       s/[],)].*//
+       p
+       q
+     }
+     s/[],)].*//
+     s/^GNU //
+     y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/
+     s/[^abcdefghijklmnopqrstuvwxyz0123456789_]/-/g
+     p
+  }
+'
+package=$(${AUTOCONF:-autoconf} --trace AC_INIT:\$4 configure.ac 2>/dev/null)
+if test -z "$package"; then
+  package=$(sed -n "$extract_package_name" configure.ac) \
+      || die 'cannot find package name in configure.ac'
+fi
+gnulib_name=lib$package
+
+build_aux=build-aux
+source_base=lib
+m4_base=m4
+doc_base=doc
+tests_base=tests
+gnulib_extra_files="
+        build-aux/install-sh
+        build-aux/mdate-sh
+        build-aux/texinfo.tex
+        build-aux/depcomp
+        build-aux/config.guess
+        build-aux/config.sub
+        doc/INSTALL
+"
+
+# Additional gnulib-tool options to use.  Use "\newline" to break lines.
+gnulib_tool_option_extras=
+
+# Other locale categories that need message catalogs.
+EXTRA_LOCALE_CATEGORIES=
+
+# Additional xgettext options to use.  Use "\\\newline" to break lines.
+XGETTEXT_OPTIONS='\\\
+ --flag=_:1:pass-c-format\\\
+ --flag=N_:1:pass-c-format\\\
+ --flag=error:3:c-format --flag=error_at_line:5:c-format\\\
+'
+
+# Package bug report address and copyright holder for gettext files
+COPYRIGHT_HOLDER='Free Software Foundation, Inc.'
+MSGID_BUGS_ADDRESS=bug-$package@gnu.org
+
+# Files we don't want to import.
+excluded_files=
+
+# File that should exist in the top directory of a checked out hierarchy,
+# but not in a distribution tarball.
+checkout_only_file=README-hacking
+
+# Whether to use copies instead of symlinks.
+copy=false
+
+# Set this to '.cvsignore .gitignore' in bootstrap.conf if you want
+# those files to be generated in directories like lib/, m4/, and po/.
+# Or set it to 'auto' to make this script select which to use based
+# on which version control system (if any) is used in the source directory.
+vc_ignore=auto
+
+# Set this to true in bootstrap.conf to enable --bootstrap-sync by
+# default.
+bootstrap_sync=false
+
+# Use git to update gnulib sources
+use_git=true
+
+check_exists() {
+  if test "$1" = "--verbose"; then
+    ($2 --version </dev/null) >/dev/null 2>&1
+    if test $? -ge 126; then
+      # If not found, run with diagnostics as one may be
+      # presented with env variables to set to find the right version
+      ($2 --version </dev/null)
+    fi
+  else
+    ($1 --version </dev/null) >/dev/null 2>&1
+  fi
+
+  test $? -lt 126
+}
+
+# find_tool ENVVAR NAMES...
+# -------------------------
+# Search for a required program.  Use the value of ENVVAR, if set,
+# otherwise find the first of the NAMES that can be run.
+# If found, set ENVVAR to the program name, die otherwise.
+#
+# FIXME: code duplication, see also gnu-web-doc-update.
+find_tool ()
+{
+  find_tool_envvar=$1
+  shift
+  find_tool_names=$@
+  eval "find_tool_res=\$$find_tool_envvar"
+  if test x"$find_tool_res" = x; then
+    for i; do
+      if check_exists $i; then
+        find_tool_res=$i
+        break
+      fi
+    done
+  fi
+  if test x"$find_tool_res" = x; then
+    warn_ "one of these is required: $find_tool_names;"
+    die   "alternatively set $find_tool_envvar to a compatible tool"
+  fi
+  eval "$find_tool_envvar=\$find_tool_res"
+  eval "export $find_tool_envvar"
+}
+
+# Override the default configuration, if necessary.
+# Make sure that bootstrap.conf is sourced from the current directory
+# if we were invoked as "sh bootstrap".
+case "$0" in
+  */*) test -r "$0.conf" && . "$0.conf" ;;
+  *) test -r "$0.conf" && . ./"$0.conf" ;;
+esac
+
+if test "$vc_ignore" = auto; then
+  vc_ignore=
+  test -d .git && vc_ignore=.gitignore
+  test -d CVS && vc_ignore="$vc_ignore .cvsignore"
+fi
+
+if test x"$gnulib_modules$gnulib_files$gnulib_extra_files" = x; then
+  use_gnulib=false
+else
+  use_gnulib=true
+fi
+
+# Translate configuration into internal form.
+
+# Parse options.
+
+for option
+do
+  case $option in
+  --help)
+    usage
+    exit;;
+  --gnulib-srcdir=*)
+    GNULIB_SRCDIR=${option#--gnulib-srcdir=};;
+  --skip-po)
+    SKIP_PO=t;;
+  --force)
+    checkout_only_file=;;
+  --copy)
+    copy=true;;
+  --bootstrap-sync)
+    bootstrap_sync=true;;
+  --no-bootstrap-sync)
+    bootstrap_sync=false;;
+  --no-git)
+    use_git=false;;
+  *)
+    bootstrap_option_hook $option || die "$option: unknown option";;
+  esac
+done
+
+$use_git || test -d "$GNULIB_SRCDIR" \
+  || die "Error: --no-git requires --gnulib-srcdir"
+
+if test -n "$checkout_only_file" && test ! -r "$checkout_only_file"; then
+  die "Bootstrapping from a non-checked-out distribution is risky."
+fi
+
+# Strip blank and comment lines to leave significant entries.
+gitignore_entries() {
+  sed '/^#/d; /^$/d' "$@"
+}
+
+# If $STR is not already on a line by itself in $FILE, insert it at the start.
+# Entries are inserted at the start of the ignore list to ensure existing
+# entries starting with ! are not overridden.  Such entries support
+# whitelisting exceptions after a more generic blacklist pattern.
+insert_if_absent() {
+  file=$1
+  str=$2
+  test -f $file || touch $file
+  test -r $file || die "Error: failed to read ignore file: $file"
+  duplicate_entries=$(gitignore_entries $file | sort | uniq -d)
+  if [ "$duplicate_entries" ] ; then
+    die "Error: Duplicate entries in $file: " $duplicate_entries
+  fi
+  linesold=$(gitignore_entries $file | wc -l)
+  linesnew=$( { echo "$str"; cat $file; } | gitignore_entries | sort -u | wc -l)
+  if [ $linesold != $linesnew ] ; then
+    { echo "$str" | cat - $file > $file.bak && mv $file.bak $file; } \
+      || die "insert_if_absent $file $str: failed"
+  fi
+}
+
+# Adjust $PATTERN for $VC_IGNORE_FILE and insert it with
+# insert_if_absent.
+insert_vc_ignore() {
+  vc_ignore_file="$1"
+  pattern="$2"
+  case $vc_ignore_file in
+  *.gitignore)
+    # A .gitignore entry that does not start with '/' applies
+    # recursively to subdirectories, so prepend '/' to every
+    # .gitignore entry.
+    pattern=$(echo "$pattern" | sed s,^,/,);;
+  esac
+  insert_if_absent "$vc_ignore_file" "$pattern"
+}
+
+# Die if there is no AC_CONFIG_AUX_DIR($build_aux) line in configure.ac.
+found_aux_dir=no
+grep '^[        ]*AC_CONFIG_AUX_DIR(\['"$build_aux"'\])' configure.ac \
+    >/dev/null && found_aux_dir=yes
+grep '^[        ]*AC_CONFIG_AUX_DIR('"$build_aux"')' configure.ac \
+    >/dev/null && found_aux_dir=yes
+test $found_aux_dir = yes \
+  || die "configure.ac lacks 'AC_CONFIG_AUX_DIR([$build_aux])'; add it"
+
+# If $build_aux doesn't exist, create it now, otherwise some bits
+# below will malfunction.  If creating it, also mark it as ignored.
+if test ! -d $build_aux; then
+  mkdir $build_aux
+  for dot_ig in x $vc_ignore; do
+    test $dot_ig = x && continue
+    insert_vc_ignore $dot_ig $build_aux
+  done
+fi
+
+# Note this deviates from the version comparison in automake
+# in that it treats 1.5 < 1.5.0, and treats 1.4.4a < 1.4-p3a
+# but this should suffice as we won't be specifying old
+# version formats or redundant trailing .0 in bootstrap.conf.
+# If we did want full compatibility then we should probably
+# use m4_version_compare from autoconf.
+sort_ver() { # sort -V is not generally available
+  ver1="$1"
+  ver2="$2"
+
+  # split on '.' and compare each component
+  i=1
+  while : ; do
+    p1=$(echo "$ver1" | cut -d. -f$i)
+    p2=$(echo "$ver2" | cut -d. -f$i)
+    if [ ! "$p1" ]; then
+      echo "$1 $2"
+      break
+    elif [ ! "$p2" ]; then
+      echo "$2 $1"
+      break
+    elif [ ! "$p1" = "$p2" ]; then
+      if [ "$p1" -gt "$p2" ] 2>/dev/null; then # numeric comparison
+        echo "$2 $1"
+      elif [ "$p2" -gt "$p1" ] 2>/dev/null; then # numeric comparison
+        echo "$1 $2"
+      else # numeric, then lexicographic comparison
+        lp=$(printf "$p1\n$p2\n" | LANG=C sort -n | tail -n1)
+        if [ "$lp" = "$p2" ]; then
+          echo "$1 $2"
+        else
+          echo "$2 $1"
+        fi
+      fi
+      break
+    fi
+    i=$(($i+1))
+  done
+}
+
+get_version_sed='
+# Move version to start of line.
+s/.*[v ]\([0-9]\)/\1/
+
+# Skip lines that do not start with version.
+/^[0-9]/!d
+
+# Remove characters after the version.
+s/[^.a-z0-9-].*//
+
+# The first component must be digits only.
+s/^\([0-9]*\)[a-z-].*/\1/
+
+#the following essentially does s/5.005/5.5/
+s/\.0*\([1-9]\)/.\1/g
+p
+q'
+
+get_version() {
+  app=$1
+
+  $app --version >/dev/null 2>&1 || { $app --version; return 1; }
+
+  $app --version 2>&1 | sed -n "$get_version_sed"
+}
+
+check_versions() {
+  ret=0
+
+  while read app req_ver; do
+    # We only need libtoolize from the libtool package.
+    if test "$app" = libtool; then
+      app=libtoolize
+    fi
+    # Exempt git if --no-git is in effect.
+    if test "$app" = git; then
+      $use_git || continue
+    fi
+    # Honor $APP variables ($TAR, $AUTOCONF, etc.)
+    appvar=$(echo $app | LC_ALL=C tr '[a-z]-' '[A-Z]_')
+    test "$appvar" = TAR && appvar=AMTAR
+    case $appvar in
+        GZIP) ;; # Do not use $GZIP:  it contains gzip options.
+        PERL::*) ;; # Keep perl modules as-is
+        *) eval "app=\${$appvar-$app}" ;;
+    esac
+
+    # Handle the still-experimental Automake-NG programs specially.
+    # They remain named as the mainstream Automake programs ("automake",
+    # and "aclocal") to avoid gratuitous incompatibilities with
+    # pre-existing usages (by, say, autoreconf, or custom autogen.sh
+    # scripts), but correctly identify themselves (as being part of
+    # "GNU automake-ng") when asked their version.
+    case $app in
+      automake-ng|aclocal-ng)
+        app=${app%-ng}
+        ($app --version | grep '(GNU automake-ng)') >/dev/null 2>&1 || {
+          warn_ "Error: '$app' not found or not from Automake-NG"
+          ret=1
+          continue
+        } ;;
+      # Another check is for perl modules.  These can be written as
+      # e.g. perl::XML::XPath in case of XML::XPath module, etc.
+      perl::*)
+        # Extract module name
+        app="${app#perl::}"
+        if ! $PERL -m"$app" -e 'exit 0' >/dev/null 2>&1; then
+          warn_ "Error: perl module '$app' not found"
+          ret=1
+        fi
+        continue
+        ;;
+    esac
+    if [ "$req_ver" = "-" ]; then
+      # Merely require app to exist; not all prereq apps are well-behaved
+      # so we have to rely on $? rather than get_version.
+      if ! check_exists --verbose $app; then
+        warn_ "Error: '$app' not found"
+        ret=1
+      fi
+    else
+      # Require app to produce a new enough version string.
+      inst_ver=$(get_version $app)
+      if [ ! "$inst_ver" ]; then
+        warn_ "Error: '$app' not found"
+        ret=1
+      else
+        latest_ver=$(sort_ver $req_ver $inst_ver | cut -d' ' -f2)
+        if [ ! "$latest_ver" = "$inst_ver" ]; then
+          warnf_ '%s\n'                                        \
+              "Error: '$app' version == $inst_ver is too old"  \
+              "       '$app' version >= $req_ver is required"
+          ret=1
+        fi
+      fi
+    fi
+  done
+
+  return $ret
+}
+
+print_versions() {
+  echo "Program    Min_version"
+  echo "----------------------"
+  printf %s "$buildreq"
+  echo "----------------------"
+  # can't depend on column -t
+}
+
+# Find sha1sum, named gsha1sum on MacPorts, shasum on Mac OS X 10.6.
+# Also find the compatible sha1 utility on the BSDs
+if test x"$SKIP_PO" = x; then
+  find_tool SHA1SUM sha1sum gsha1sum shasum sha1
+fi
+
+use_libtool=0
+# We'd like to use grep -E, to see if any of LT_INIT,
+# AC_PROG_LIBTOOL, AM_PROG_LIBTOOL is used in configure.ac,
+# but that's not portable enough (e.g., for Solaris).
+grep '^[        ]*A[CM]_PROG_LIBTOOL' configure.ac >/dev/null \
+  && use_libtool=1
+grep '^[        ]*LT_INIT' configure.ac >/dev/null \
+  && use_libtool=1
+if test $use_libtool = 1; then
+  find_tool LIBTOOLIZE glibtoolize libtoolize
+fi
+
+# gnulib-tool requires at least automake and autoconf.
+# If either is not listed, add it (with minimum version) as a prerequisite.
+case $buildreq in
+  *automake*) ;;
+  *) buildreq="automake 1.9
+$buildreq" ;;
+esac
+case $buildreq in
+  *autoconf*) ;;
+  *) buildreq="autoconf 2.59
+$buildreq" ;;
+esac
+
+# When we can deduce that gnulib-tool will require patch,
+# and when patch is not already listed as a prerequisite, add it, too.
+if test -d "$local_gl_dir" \
+    && ! find "$local_gl_dir" -name '*.diff' -exec false {} +; then
+  case $buildreq in
+    *patch*) ;;
+    *) buildreq="patch -
+$buildreq" ;;
+  esac
+fi
+
+if ! printf "$buildreq" | check_versions; then
+  echo >&2
+  if test -f README-prereq; then
+    die "See README-prereq for how to get the prerequisite programs"
+  else
+    die "Please install the prerequisite programs"
+  fi
+fi
+
+# Warn the user if autom4te appears to be broken; this causes known
+# issues with at least gettext 0.18.3.
+probe=$(echo 'm4_quote([hi])' | autom4te -l M4sugar -t 'm4_quote:$%' -)
+if test "x$probe" != xhi; then
+  warn_ "WARNING: your autom4te wrapper eats stdin;"
+  warn_ "if bootstrap fails, consider upgrading your autotools"
+fi
+
+echo "$0: Bootstrapping from checked-out $package sources..."
+
+# See if we can use gnulib's git-merge-changelog merge driver.
+if $use_git && test -d .git && check_exists git; then
+  if git config merge.merge-changelog.driver >/dev/null ; then
+    :
+  elif check_exists git-merge-changelog; then
+    echo "$0: initializing git-merge-changelog driver"
+    git config merge.merge-changelog.name 'GNU-style ChangeLog merge driver'
+    git config merge.merge-changelog.driver 'git-merge-changelog %O %A %B'
+  else
+    echo "$0: consider installing git-merge-changelog from gnulib"
+  fi
+fi
+
+
+cleanup_gnulib() {
+  status=$?
+  rm -fr "$gnulib_path"
+  exit $status
+}
+
+git_modules_config () {
+  test -f .gitmodules && git config --file .gitmodules "$@"
+}
+
+if $use_gnulib; then
+  if $use_git; then
+    gnulib_path=$(git_modules_config submodule.gnulib.path)
+    test -z "$gnulib_path" && gnulib_path=gnulib
+  fi
+
+  # Get gnulib files.  Populate $GNULIB_SRCDIR, possibly updating a
+  # submodule, for use in the rest of the script.
+
+  case ${GNULIB_SRCDIR--} in
+  -)
+    # Note that $use_git is necessarily true in this case.
+    if git_modules_config submodule.gnulib.url >/dev/null; then
+      echo "$0: getting gnulib files..."
+      git submodule init -- "$gnulib_path" || exit $?
+      git submodule update -- "$gnulib_path" || exit $?
+
+    elif [ ! -d "$gnulib_path" ]; then
+      echo "$0: getting gnulib files..."
+
+      trap cleanup_gnulib 1 2 13 15
+
+      shallow=
+      if test -z "$GNULIB_REVISION"; then
+        git clone -h 2>&1 | grep -- --depth > /dev/null && shallow='--depth 2'
+      fi
+      git clone $shallow ${GNULIB_URL:-$default_gnulib_url} "$gnulib_path" \
+        || cleanup_gnulib
+
+      trap - 1 2 13 15
+    fi
+    GNULIB_SRCDIR=$gnulib_path
+    ;;
+  *)
+    # Use GNULIB_SRCDIR directly or as a reference.
+    if $use_git && test -d "$GNULIB_SRCDIR"/.git && \
+          git_modules_config submodule.gnulib.url >/dev/null; then
+      echo "$0: getting gnulib files..."
+      if git submodule -h|grep -- --reference > /dev/null; then
+        # Prefer the one-liner available in git 1.6.4 or newer.
+        git submodule update --init --reference "$GNULIB_SRCDIR" \
+          "$gnulib_path" || exit $?
+      else
+        # This fallback allows at least git 1.5.5.
+        if test -f "$gnulib_path"/gnulib-tool; then
+          # Since file already exists, assume submodule init already complete.
+          git submodule update -- "$gnulib_path" || exit $?
+        else
+          # Older git can't clone into an empty directory.
+          rmdir "$gnulib_path" 2>/dev/null
+          git clone --reference "$GNULIB_SRCDIR" \
+            "$(git_modules_config submodule.gnulib.url)" "$gnulib_path" \
+            && git submodule init -- "$gnulib_path" \
+            && git submodule update -- "$gnulib_path" \
+            || exit $?
+        fi
+      fi
+      GNULIB_SRCDIR=$gnulib_path
+    fi
+    ;;
+  esac
+
+  if test -d "$GNULIB_SRCDIR"/.git && test -n "$GNULIB_REVISION" \
+     && ! git_modules_config submodule.gnulib.url >/dev/null; then
+    (cd "$GNULIB_SRCDIR" && git checkout "$GNULIB_REVISION") || cleanup_gnulib
+  fi
+
+  # $GNULIB_SRCDIR now points to the version of gnulib to use, and
+  # we no longer need to use git or $gnulib_path below here.
+
+  if $bootstrap_sync; then
+    cmp -s "$0" "$GNULIB_SRCDIR/build-aux/bootstrap" || {
+      echo "$0: updating bootstrap and restarting..."
+      case $(sh -c 'echo "$1"' -- a) in
+        a) ignored=--;;
+        *) ignored=ignored;;
+      esac
+      exec sh -c \
+        'cp "$1" "$2" && shift && exec "${CONFIG_SHELL-/bin/sh}" "$@"' \
+        $ignored "$GNULIB_SRCDIR/build-aux/bootstrap" \
+        "$0" "$@" --no-bootstrap-sync
+    }
+  fi
+
+  gnulib_tool=$GNULIB_SRCDIR/gnulib-tool
+  <$gnulib_tool || exit $?
+fi
+
+# Get translations.
+
+download_po_files() {
+  subdir=$1
+  domain=$2
+  echo "$me: getting translations into $subdir for $domain..."
+  cmd=$(printf "$po_download_command_format" "$subdir" "$domain")
+  eval "$cmd"
+}
+
+# Mirror .po files to $po_dir/.reference and copy only the new
+# or modified ones into $po_dir.  Also update $po_dir/LINGUAS.
+# Note po files that exist locally only are left in $po_dir but will
+# not be included in LINGUAS and hence will not be distributed.
+update_po_files() {
+  # Directory containing primary .po files.
+  # Overwrite them only when we're sure a .po file is new.
+  po_dir=$1
+  domain=$2
+
+  # Mirror *.po files into this dir.
+  # Usually contains *.s1 checksum files.
+  ref_po_dir="$po_dir/.reference"
+
+  test -d $ref_po_dir || mkdir $ref_po_dir || return
+  download_po_files $ref_po_dir $domain \
+    && ls "$ref_po_dir"/*.po 2>/dev/null |
+      sed 's|.*/||; s|\.po$||' > "$po_dir/LINGUAS" || return
+
+  langs=$(cd $ref_po_dir && echo *.po | sed 's/\.po//g')
+  test "$langs" = '*' && langs=x
+  for po in $langs; do
+    case $po in x) continue;; esac
+    new_po="$ref_po_dir/$po.po"
+    cksum_file="$ref_po_dir/$po.s1"
+    if ! test -f "$cksum_file" ||
+        ! test -f "$po_dir/$po.po" ||
+        ! $SHA1SUM -c "$cksum_file" < "$new_po" > /dev/null 2>&1; then
+      echo "$me: updated $po_dir/$po.po..."
+      cp "$new_po" "$po_dir/$po.po" \
+          && $SHA1SUM < "$new_po" > "$cksum_file" || return
+    fi
+  done
+}
+
+case $SKIP_PO in
+'')
+  if test -d po; then
+    update_po_files po $package || exit
+  fi
+
+  if test -d runtime-po; then
+    update_po_files runtime-po $package-runtime || exit
+  fi;;
+esac
+
+symlink_to_dir()
+{
+  src=$1/$2
+  dst=${3-$2}
+
+  test -f "$src" && {
+
+    # If the destination directory doesn't exist, create it.
+    # This is required at least for "lib/uniwidth/cjk.h".
+    dst_dir=$(dirname "$dst")
+    if ! test -d "$dst_dir"; then
+      mkdir -p "$dst_dir"
+
+      # If we've just created a directory like lib/uniwidth,
+      # tell version control system(s) it's ignorable.
+      # FIXME: for now, this does only one level
+      parent=$(dirname "$dst_dir")
+      for dot_ig in x $vc_ignore; do
+        test $dot_ig = x && continue
+        ig=$parent/$dot_ig
+        insert_vc_ignore $ig "${dst_dir##*/}"
+      done
+    fi
+
+    if $copy; then
+      {
+        test ! -h "$dst" || {
+          echo "$me: rm -f $dst" &&
+          rm -f "$dst"
+        }
+      } &&
+      test -f "$dst" &&
+      cmp -s "$src" "$dst" || {
+        echo "$me: cp -fp $src $dst" &&
+        cp -fp "$src" "$dst"
+      }
+    else
+      # Leave any existing symlink alone, if it already points to the source,
+      # so that broken build tools that care about symlink times
+      # aren't confused into doing unnecessary builds.  Conversely, if the
+      # existing symlink's timestamp is older than the source, make it afresh,
+      # so that broken tools aren't confused into skipping needed builds.  See
+      # <https://lists.gnu.org/r/bug-gnulib/2011-05/msg00326.html>.
+      test -h "$dst" &&
+      src_ls=$(ls -diL "$src" 2>/dev/null) && set $src_ls && src_i=$1 &&
+      dst_ls=$(ls -diL "$dst" 2>/dev/null) && set $dst_ls && dst_i=$1 &&
+      test "$src_i" = "$dst_i" &&
+      both_ls=$(ls -dt "$src" "$dst") &&
+      test "X$both_ls" = "X$dst$nl$src" || {
+        dot_dots=
+        case $src in
+        /*) ;;
+        *)
+          case /$dst/ in
+          *//* | */../* | */./* | /*/*/*/*/*/)
+             die "invalid symlink calculation: $src -> $dst";;
+          /*/*/*/*/)    dot_dots=../../../;;
+          /*/*/*/)      dot_dots=../../;;
+          /*/*/)        dot_dots=../;;
+          esac;;
+        esac
+
+        echo "$me: ln -fs $dot_dots$src $dst" &&
+        ln -fs "$dot_dots$src" "$dst"
+      }
+    fi
+  }
+}
+
+version_controlled_file() {
+  parent=$1
+  file=$2
+  if test -d .git; then
+    git rm -n "$file" > /dev/null 2>&1
+  elif test -d .svn; then
+    svn log -r HEAD "$file" > /dev/null 2>&1
+  elif test -d CVS; then
+    grep -F "/${file##*/}/" "$parent/CVS/Entries" 2>/dev/null |
+             grep '^/[^/]*/[0-9]' > /dev/null
+  else
+    warn_ "no version control for $file?"
+    false
+  fi
+}
+
+# NOTE: we have to be careful to run both autopoint and libtoolize
+# before gnulib-tool, since gnulib-tool is likely to provide newer
+# versions of files "installed" by these two programs.
+# Then, *after* gnulib-tool (see below), we have to be careful to
+# run autoreconf in such a way that it does not run either of these
+# two just-pre-run programs.
+
+# Import from gettext.
+with_gettext=yes
+grep '^[        ]*AM_GNU_GETTEXT_VERSION(' configure.ac >/dev/null || \
+    with_gettext=no
+
+if test $with_gettext = yes || test $use_libtool = 1; then
+
+  tempbase=.bootstrap$$
+  trap "rm -f $tempbase.0 $tempbase.1" 1 2 13 15
+
+  > $tempbase.0 > $tempbase.1 &&
+  find . ! -type d -print | sort > $tempbase.0 || exit
+
+  if test $with_gettext = yes; then
+    # Released autopoint has the tendency to install macros that have been
+    # obsoleted in current gnulib, so run this before gnulib-tool.
+    echo "$0: $AUTOPOINT --force"
+    $AUTOPOINT --force || exit
+  fi
+
+  # Autoreconf runs aclocal before libtoolize, which causes spurious
+  # warnings if the initial aclocal is confused by the libtoolized
+  # (or worse out-of-date) macro directory.
+  # libtoolize 1.9b added the --install option; but we support back
+  # to libtoolize 1.5.22, where the install action was default.
+  if test $use_libtool = 1; then
+    install=
+    case $($LIBTOOLIZE --help) in
+      *--install*) install=--install ;;
+    esac
+    echo "running: $LIBTOOLIZE $install --copy"
+    $LIBTOOLIZE $install --copy
+  fi
+
+  find . ! -type d -print | sort >$tempbase.1
+  old_IFS=$IFS
+  IFS=$nl
+  for file in $(comm -13 $tempbase.0 $tempbase.1); do
+    IFS=$old_IFS
+    parent=${file%/*}
+    version_controlled_file "$parent" "$file" || {
+      for dot_ig in x $vc_ignore; do
+        test $dot_ig = x && continue
+        ig=$parent/$dot_ig
+        insert_vc_ignore "$ig" "${file##*/}"
+      done
+    }
+  done
+  IFS=$old_IFS
+
+  rm -f $tempbase.0 $tempbase.1
+  trap - 1 2 13 15
+fi
+
+# Import from gnulib.
+
+if $use_gnulib; then
+  gnulib_tool_options="\
+   --no-changelog\
+   --aux-dir=$build_aux\
+   --doc-base=$doc_base\
+   --lib=$gnulib_name\
+   --m4-base=$m4_base/\
+   --source-base=$source_base/\
+   --tests-base=$tests_base\
+   --local-dir=$local_gl_dir\
+   $gnulib_tool_option_extras\
+  "
+  if test $use_libtool = 1; then
+    case "$gnulib_tool_options " in
+      *' --libtool '*) ;;
+      *) gnulib_tool_options="$gnulib_tool_options --libtool" ;;
+    esac
+  fi
+  echo "$0: $gnulib_tool $gnulib_tool_options --import ..."
+  $gnulib_tool $gnulib_tool_options --import $gnulib_modules \
+    || die "gnulib-tool failed"
+
+  for file in $gnulib_files; do
+    symlink_to_dir "$GNULIB_SRCDIR" $file \
+      || die "failed to symlink $file"
+  done
+fi
+
+bootstrap_post_import_hook \
+  || die "bootstrap_post_import_hook failed"
+
+# Don't proceed if there are uninitialized submodules.  In particular,
+# the next step will remove dangling links, which might be links into
+# uninitialized submodules.
+#
+# Uninitialized submodules are listed with an initial dash.
+if $use_git && git submodule | grep '^-' >/dev/null; then
+  die "some git submodules are not initialized. "     \
+      "Run 'git submodule update --init' and bootstrap again."
+fi
+
+# Remove any dangling symlink matching "*.m4" or "*.[ch]" in some
+# gnulib-populated directories.  Such .m4 files would cause aclocal to fail.
+# The following requires GNU find 4.2.3 or newer.  Considering the usual
+# portability constraints of this script, that may seem a very demanding
+# requirement, but it should be ok.  Ignore any failure, which is fine,
+# since this is only a convenience to help developers avoid the relatively
+# unusual case in which a symlinked-to .m4 file is git-removed from gnulib
+# between successive runs of this script.
+find "$m4_base" "$source_base" \
+  -depth \( -name '*.m4' -o -name '*.[ch]' \) \
+  -type l -xtype l -delete > /dev/null 2>&1
+
+# Invoke autoreconf with --force --install to ensure upgrades of tools
+# such as ylwrap.
+AUTORECONFFLAGS="--verbose --install --force -I $m4_base $ACLOCAL_FLAGS"
+
+# Some systems (RHEL 5) are using ancient autotools, for which the
+# --no-recursive option had not been invented.  Detect that lack and
+# omit the option when it's not supported.  FIXME in 2017: remove this
+# hack when RHEL 5 autotools are updated, or when they become irrelevant.
+case $($AUTORECONF --help) in
+  *--no-recursive*) AUTORECONFFLAGS="$AUTORECONFFLAGS --no-recursive";;
+esac
+
+# Tell autoreconf not to invoke autopoint or libtoolize; they were run above.
+echo "running: AUTOPOINT=true LIBTOOLIZE=true $AUTORECONF $AUTORECONFFLAGS"
+AUTOPOINT=true LIBTOOLIZE=true $AUTORECONF $AUTORECONFFLAGS \
+  || die "autoreconf failed"
+
+# Get some extra files from gnulib, overriding existing files.
+for file in $gnulib_extra_files; do
+  case $file in
+  */INSTALL) dst=INSTALL;;
+  build-aux/*) dst=$build_aux/${file#build-aux/};;
+  *) dst=$file;;
+  esac
+  symlink_to_dir "$GNULIB_SRCDIR" $file $dst \
+    || die "failed to symlink $file"
+done
+
+if test $with_gettext = yes; then
+  # Create gettext configuration.
+  echo "$0: Creating po/Makevars from po/Makevars.template ..."
+  rm -f po/Makevars
+  sed '
+    /^EXTRA_LOCALE_CATEGORIES *=/s/=.*/= '"$EXTRA_LOCALE_CATEGORIES"'/
+    /^COPYRIGHT_HOLDER *=/s/=.*/= '"$COPYRIGHT_HOLDER"'/
+    /^MSGID_BUGS_ADDRESS *=/s|=.*|= '"$MSGID_BUGS_ADDRESS"'|
+    /^XGETTEXT_OPTIONS *=/{
+      s/$/ \\/
+      a\
+          '"$XGETTEXT_OPTIONS"' $${end_of_xgettext_options+}
+    }
+  ' po/Makevars.template >po/Makevars \
+    || die 'cannot generate po/Makevars'
+
+  # If the 'gettext' module is in use, grab the latest Makefile.in.in.
+  # If only the 'gettext-h' module is in use, assume autopoint already
+  # put the correct version of this file into place.
+  case $gnulib_modules in
+  *gettext-h*) ;;
+  *gettext*)
+    cp $GNULIB_SRCDIR/build-aux/po/Makefile.in.in po/Makefile.in.in \
+      || die "cannot create po/Makefile.in.in"
+    ;;
+  esac
+
+  if test -d runtime-po; then
+    # Similarly for runtime-po/Makevars, but not quite the same.
+    rm -f runtime-po/Makevars
+    sed '
+      /^DOMAIN *=.*/s/=.*/= '"$package"'-runtime/
+      /^subdir *=.*/s/=.*/= runtime-po/
+      /^MSGID_BUGS_ADDRESS *=/s/=.*/= bug-'"$package"'@gnu.org/
+      /^XGETTEXT_OPTIONS *=/{
+        s/$/ \\/
+        a\
+            '"$XGETTEXT_OPTIONS_RUNTIME"' $${end_of_xgettext_options+}
+      }
+    ' po/Makevars.template >runtime-po/Makevars \
+    || die 'cannot generate runtime-po/Makevars'
+
+    # Copy identical files from po to runtime-po.
+    (cd po && cp -p Makefile.in.in *-quot *.header *.sed *.sin ../runtime-po)
+  fi
+fi
+
+bootstrap_epilogue
+
+echo "$0: done.  Now you can run './configure'."
+
+# Local variables:
+# eval: (add-hook 'before-save-hook 'time-stamp)
+# time-stamp-start: "scriptversion="
+# time-stamp-format: "%:y-%02m-%02d.%02H"
+# time-stamp-time-zone: "UTC0"
+# time-stamp-end: "; # UTC"
+# End:
diff --git a/deps/lightning/bootstrap.conf b/deps/lightning/bootstrap.conf
new file mode 100644 (file)
index 0000000..423491b
--- /dev/null
@@ -0,0 +1,76 @@
+# Bootstrap configuration.
+
+# Copyright (C) 2006-2020 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+
+# gnulib modules used by this package.
+gnulib_modules="
+"
+
+# gnulib library name.
+gnulib_name=libgnu
+
+# directories.
+source_base=gnulib-lib
+doc_base=gnulib-doc
+
+# Additional xgettext options to use.  Use "\\\newline" to break lines.
+XGETTEXT_OPTIONS=$XGETTEXT_OPTIONS'\\\
+ --from-code=UTF-8\\\
+ --flag=asprintf:2:c-format --flag=vasprintf:2:c-format\\\
+ --flag=asnprintf:3:c-format --flag=vasnprintf:3:c-format\\\
+ --flag=wrapf:1:c-format\\\
+'
+
+# If "AM_GNU_GETTEXT(external" or "AM_GNU_GETTEXT([external]"
+# appears in configure.ac, exclude some unnecessary files.
+# Without grep's -E option (not portable enough, pre-configure),
+# the following test is ugly.  Also, this depends on the existence
+# of configure.ac, not the obsolescent-named configure.in.  But if
+# you're using this infrastructure, you should care about such things.
+
+gettext_external=0
+grep '^[        ]*AM_GNU_GETTEXT(external\>' configure.ac > /dev/null &&
+  gettext_external=1
+grep '^[        ]*AM_GNU_GETTEXT(\[external\]' configure.ac > /dev/null &&
+  gettext_external=1
+
+if test $gettext_external = 1; then
+  # Gettext supplies these files, but we don't need them since
+  # we don't have an intl subdirectory.
+  excluded_files='
+      m4/glibc2.m4
+      m4/intdiv0.m4
+      m4/lcmessage.m4
+      m4/lock.m4
+      m4/printf-posix.m4
+      m4/size_max.m4
+      m4/uintmax_t.m4
+      m4/ulonglong.m4
+      m4/visibility.m4
+      m4/xsize.m4
+  '
+fi
+
+# Build prerequisites
+buildreq="\
+autoconf   2.59
+automake   1.9.6
+git        1.5.5
+tar        -
+"
+
+bootstrap_sync=true
index e04f7ac..583bb12 100644 (file)
@@ -14,7 +14,7 @@
 # License for more details.
 #
 
-AM_CFLAGS = -I$(top_srcdir)/include -D_GNU_SOURCE
+AM_CFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include -D_GNU_SOURCE
 
 check_PROGRAMS = lightning ccall self setcode nodata ctramp carg cva_list
 
@@ -91,6 +91,7 @@ EXTRA_DIST =                          \
        call.tst        call.ok         \
        float.tst       float.ok        \
        jmpr.tst        jmpr.ok         \
+       live.tst        live.ok         \
        put.tst         put.ok          \
        qalu.inc                        \
        qalu_mul.tst    qalu_mul.ok     \
@@ -125,7 +126,7 @@ base_TESTS =                                \
        fop_abs fop_sqrt                \
        varargs stack                   \
        clobber carry call              \
-       float jmpr put                  \
+       float jmpr live put             \
        qalu_mul qalu_div               \
        range ranger ret tramp          \
        va_list
@@ -315,4 +316,3 @@ CLEANFILES = $(TESTS)
 
 debug:         lightning
        $(LIBTOOL) --mode=execute gdb lightning
-
index e60ef05..cc4a5c5 100644 (file)
@@ -3791,11 +3791,11 @@ execute(int argc, char *argv[])
     function = jit_emit();
     if (flag_verbose > 1 || flag_disasm) {
        jit_print();
-       fprintf(stdout, "  - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n");
+       fprintf(stderr, "  - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n");
     }
     if (flag_verbose > 0 || flag_disasm) {
        jit_disassemble();
-       fprintf(stdout, "  - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n");
+       fprintf(stderr, "  - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n");
     }
 
     jit_clear_state();
diff --git a/deps/lightning/check/live.ok b/deps/lightning/check/live.ok
new file mode 100644 (file)
index 0000000..9766475
--- /dev/null
@@ -0,0 +1 @@
+ok
diff --git a/deps/lightning/check/live.tst b/deps/lightning/check/live.tst
new file mode 100644 (file)
index 0000000..f082a66
--- /dev/null
@@ -0,0 +1,33 @@
+.data  16
+ok:
+.c     "ok"
+
+.code
+       jmpi main
+
+check_r0:
+       prolog
+        movi %v0 exit_r0
+       movi %r0 1
+       movi %r2 10
+       // on x86 this changes %rax on other arches could use %r0 as temporary
+       divi %r1 %r2 3
+        live %r0
+       // %r0 must still be 1
+        jmpr %v0
+exit_r0:
+        retr %r0
+       epilog
+
+main:
+       prolog
+       calli check_r0
+       retval %r1
+       beqi r0_ok %r1 1
+       calli @abort
+r0_ok:
+       prepare
+               pushargi ok
+       finishi @puts
+       ret
+       epilog
index 9261255..1f2c6b4 100644 (file)
@@ -7,26 +7,29 @@ dnl GNU lightning is free software; you can redistribute it and/or modify it
 dnl under the terms of the GNU Lesser General Public License as published
 dnl by the Free Software Foundation; either version 3, or (at your option)
 dnl any later version.
-dnl 
+dnl
 dnl GNU lightning is distributed in the hope that it will be useful, but
 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 dnl or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
 dnl License for more details.
 dnl
 
-AC_PREREQ(2.57)
+AC_PREREQ(2.64)
 AC_INIT([GNU lightning], 2.1.3, pcpa@gnu.org, lightning)
+AC_CONFIG_AUX_DIR([build-aux])
 AC_CANONICAL_TARGET
 AC_CONFIG_SRCDIR([Makefile.am])
 AM_INIT_AUTOMAKE([dist-bzip2])
 m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
-AC_CONFIG_MACRO_DIR(m4)
+AC_CONFIG_MACRO_DIR([m4])
 
-AC_CONFIG_HEADERS(config.h)
+AC_CONFIG_HEADERS([config.h])
 
 AC_PROG_CC
-AC_PROG_INSTALL
-AC_PROG_LIBTOOL
+gl_EARLY
+AM_PROG_AR
+LT_INIT
+gl_INIT
 
 case "$target_cpu" in
     ia64)
@@ -291,6 +294,7 @@ fi
 
 AC_OUTPUT([Makefile
           lightning.pc
+           gnulib-lib/Makefile
           doc/Makefile
           include/Makefile
           include/lightning/Makefile
index 20d4456..c46e0ab 100644 (file)
@@ -14,7 +14,7 @@
 # License for more details.
 #
 
-AM_CFLAGS = -I$(top_srcdir)/include -D_GNU_SOURCE
+AM_CFLAGS = -I $(top_builddir)/include -I$(top_srcdir)/include -D_GNU_SOURCE
 
 info_TEXINFOS = lightning.texi
 MOSTLYCLEANFILES = lightning.tmp
index 4aef7a3..c14f635 100644 (file)
@@ -30,6 +30,7 @@ dynamic code generation.
 * The instruction set::     The RISC instruction set used in GNU lightning
 * GNU lightning examples::  GNU lightning's examples
 * Reentrancy::              Re-entrant usage of GNU lightning
+* Registers::               Accessing the whole register file
 * Customizations::          Advanced code generation customizations
 * Acknowledgements::        Acknowledgements for GNU lightning
 @end menu
@@ -43,8 +44,8 @@ This document describes @value{TOPIC} the @lightning{} library for
 dynamic code generation.
 @end iftex
 
-Dynamic code generation is the generation of machine code 
-at runtime. It is typically used to strip a layer of interpretation 
+Dynamic code generation is the generation of machine code
+at runtime. It is typically used to strip a layer of interpretation
 by allowing compilation to occur at runtime.  One of the most
 well-known applications of dynamic code generation is perhaps that
 of interpreters that compile source code to an intermediate bytecode
@@ -53,7 +54,7 @@ approach effectively combines the portability of bytecode
 representations with the speed of machine code.  Another common
 application of dynamic code generation is in the field of hardware
 simulators and binary emulators, which can use the same techniques
-to translate simulated instructions to the instructions of the 
+to translate simulated instructions to the instructions of the
 underlying machine.
 
 Yet other applications come to mind: for example, windowing
@@ -68,7 +69,7 @@ retargeted for each machine; in addition, coding a run-time code
 generator is a tedious and error-prone task more than a difficult one.
 
 @lightning{} provides a portable, fast and easily retargetable dynamic
-code generation system. 
+code generation system.
 
 To be portable, @lightning{} abstracts over current architectures'
 quirks and unorthogonalities.  The interface that it exposes to is that
@@ -695,6 +696,51 @@ in = arg                     @rem{! Same as above}
      ret                     @rem{! Return to caller}
 @end example
 
+@item Register liveness
+
+During code generation, @lightning{} occasionally needs scratch registers
+or needs to use architecture-defined registers.  For that, @lightning{}
+internally maintains register liveness information.
+
+In the following example, @code{qdivr} will need special registers like
+@code{R0} on some architectures.  As @lightning{} understands that
+@code{R0} is used in the subsequent instruction, it will create
+save/restore code for @code{R0} in case.
+
+@example
+...
+qdivr V0, V1, V2, V3
+movr  V3, R0
+...
+@end example
+
+The same is not true in the example that follows.  Here, @code{R0} is
+not alive after the division operation because @code{R0} is neither an
+argument register nor a callee-save register.  Thus, no save/restore
+code for @code{R0} will be created in case.
+
+@example
+...
+qdivr V0, V1, V2, V3
+jmpr  R1
+...
+@end example
+
+The @code{live} instruction can be used to mark a register as live after
+it as in the following example.  Here, @code{R0} will be preserved
+across the division.
+
+@example
+...
+qdivr V0, V1, V2, V3
+live R0
+jmpr R1
+...
+@end example
+
+The @code{live} instruction is useful at code entry and exit points,
+like after and before a @code{callr} instruction.
+
 @item Trampolines, continuations and tail call optimization
 
 Frequently it is required to generate jit code that must jump to
@@ -1005,9 +1051,9 @@ programmer would write):
       mov  %i0, %g2                 retl
       inc  %g2                      inc %o0
       mov  %g2, %i0
-      restore 
-      retl 
-      nop 
+      restore
+      retl
+      nop
 @end example
 In this case, @lightning{} introduces overhead to create a register
 window (not knowing that the procedure is a leaf procedure) and to
@@ -1480,7 +1526,7 @@ implementation and to avoid needing the user to keep adding an extra
 argument to every call, as multiple jit states generating code in
 paralell should be very uncommon.
 
-@section Registers
+@node Registers
 @chapter Accessing the whole register file
 
 As mentioned earlier in this chapter, all @lightning{} back-ends are
@@ -1496,6 +1542,33 @@ constant.  Of course, expressions like @code{JIT_R0} and
 @code{JIT_R(0)} denote the same register, and likewise for
 integer callee-saved, or floating-point, registers.
 
+@section Scratch registers
+
+For operations, @lightning{} does not support directly, like storing
+a literal in memory, @code{jit_get_reg} and @code{jit_unget_reg} can be used to
+acquire and release a scratch register as in the following pattern:
+
+@example
+    jit_int32_t reg = jit_get_reg (jit_class_gpr);
+    jit_movi (reg, immediate);
+    jit_stxi (offsetof (some_struct, some_field), JIT_V0, reg);
+    jit_unget_reg (reg);
+@end example
+
+As @code{jit_get_reg} and @code{jit_unget_reg} may generate spills and
+reloads but don't follow branches, the code between both must be in
+the same basic block and must not contain any branches as in the
+following (bad) example.
+
+@example
+    jit_int32_t reg = jit_get_reg (jit_class_gpr);
+    jit_ldxi (reg, JIT_V0, offset);
+    jump = jit_bnei (reg, V0);
+    jit_movr (JIT_V1, reg);
+    jit_patch (jump);
+    jit_unget_reg (reg);
+@end example
+
 @node Customizations
 @chapter Customizations
 
diff --git a/deps/lightning/doc/version.texi b/deps/lightning/doc/version.texi
deleted file mode 100644 (file)
index b4a0c22..0000000
+++ /dev/null
@@ -1,4 +0,0 @@
-@set UPDATED 3 October 2017
-@set UPDATED-MONTH October 2017
-@set EDITION 2.1.3
-@set VERSION 2.1.3
diff --git a/deps/lightning/gnulib b/deps/lightning/gnulib
new file mode 160000 (submodule)
index 0000000..e54b645
--- /dev/null
@@ -0,0 +1 @@
+Subproject commit e54b645fc6b8422562327443bda575c65d931fbd
diff --git a/deps/lightning/gnulib-lib/.gitignore b/deps/lightning/gnulib-lib/.gitignore
new file mode 100644 (file)
index 0000000..d9f5394
--- /dev/null
@@ -0,0 +1,2 @@
+/Makefile.am
+/dummy.c
diff --git a/deps/lightning/include/lightning.h b/deps/lightning/include/lightning.h
deleted file mode 100644 (file)
index 4c7cac0..0000000
+++ /dev/null
@@ -1,1112 +0,0 @@
-/*
- * Copyright (C) 2012-2019  Free Software Foundation, Inc.
- *
- * This file is part of GNU lightning.
- *
- * GNU lightning is free software; you can redistribute it and/or modify it
- * under the terms of the GNU Lesser General Public License as published
- * by the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU lightning is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
- * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
- * License for more details.
- *
- * Authors:
- *     Paulo Cesar Pereira de Andrade
- */
-
-#ifndef _lightning_h
-#define _lightning_h
-
-#include <unistd.h>
-#include <stdlib.h>
-#include <stdint.h>
-#include <string.h>
-
-#if defined(__hpux) && defined(__hppa__)
-#  include <machine/param.h>
-#endif
-#if defined(__alpha__) && defined(__osf__)
-#  include <machine/endian.h>
-#endif
-
-#ifndef __WORDSIZE
-#  if defined(WORDSIZE)                                /* ppc darwin */
-#    define __WORDSIZE         WORDSIZE
-#  elif defined(__SIZEOF_POINTER__)            /* ppc aix */
-#    define __WORDSIZE         (__SIZEOF_POINTER__ << 3)
-#  elif defined(_ILP32)                                /* hppa hp-ux */
-#    define __WORDSIZE         32
-#  elif defined(_LP64)                         /* ia64 hp-ux (with cc +DD64) */
-#    define __WORDSIZE         64
-#  elif defined(_MIPS_SZPTR)                   /* mips irix */
-#    if _MIPS_SZPTR == 32
-#      define __WORDSIZE       32
-#    else
-#      define __WORDSIZE       64
-#    endif
-#  else                                                /* From FreeBSD 9.1 stdint.h */
-#    if defined(UINTPTR_MAX) && defined(UINT64_MAX) && \
-       (UINTPTR_MAX == UINT64_MAX)
-#      define __WORDSIZE       64
-#    else
-#      define __WORDSIZE       32
-#    endif
-#  endif
-#endif
-#ifndef __LITTLE_ENDIAN
-#  if defined(LITTLE_ENDIAN)                   /* ppc darwin */
-#    define __LITTLE_ENDIAN    LITTLE_ENDIAN
-#  elif defined(__ORDER_LITTLE_ENDIAN__)       /* ppc aix */
-#    define __LITTLE_ENDIAN    __ORDER_LITTLE_ENDIAN__
-#  else
-#    define __LITTLE_ENDIAN    1234
-#  endif
-#endif
-#ifndef __BIG_ENDIAN
-#  if defined(BIG_ENDIAN)                      /* ppc darwin */
-#    define __BIG_ENDIAN       BIG_ENDIAN
-#  elif defined(__ORDER_BIG_ENDIAN__)          /* ppc aix */
-#    define __BIG_ENDIAN       __ORDER_BIG_ENDIAN__
-#  else
-#    define __BIG_ENDIAN       4321
-#  endif
-#endif
-#ifndef __BYTE_ORDER
-#  if defined(BYTE_ORDER)                      /* ppc darwin */
-#    define __BYTE_ORDER       BYTE_ORDER
-#  elif defined(__BYTE_ORDER__)                        /* ppc aix */
-#    define __BYTE_ORDER       __BYTE_ORDER__
-#  elif defined(_BIG_ENDIAN)                   /* hppa hp-ux */
-#    define __BYTE_ORDER       __BIG_ENDIAN
-#  elif defined(__BIG_ENDIAN__)                        /* ia64 hp-ux */
-#    define __BYTE_ORDER       __BIG_ENDIAN
-#  elif defined(__i386__)                      /* 32 bit x86 solaris */
-#    define __BYTE_ORDER       __LITTLE_ENDIAN
-#  elif defined(__x86_64__)                    /* 64 bit x86 solaris */
-#    define __BYTE_ORDER       __LITTLE_ENDIAN
-#  elif defined(__MIPSEB)                      /* mips irix */
-#    define __BYTE_ORDER       __BIG_ENDIAN
-#  else
-#    error cannot figure __BYTE_ORDER
-#  endif
-#endif
-
-typedef signed char            jit_int8_t;
-typedef unsigned char          jit_uint8_t;
-typedef signed short           jit_int16_t;
-typedef unsigned short         jit_uint16_t;
-typedef signed int             jit_int32_t;
-typedef unsigned int           jit_uint32_t;
-#if __WORDSIZE == 32
-typedef signed long long       jit_int64_t;
-typedef unsigned long long     jit_uint64_t;
-typedef jit_int32_t            jit_word_t;
-typedef jit_uint32_t           jit_uword_t;
-#elif (_WIN32 && !__CYGWIN__)
-typedef signed long long       jit_int64_t;
-typedef unsigned long long     jit_uint64_t;
-typedef jit_int64_t            jit_word_t;
-typedef jit_uint64_t           jit_uword_t;
-#else
-typedef signed long            jit_int64_t;
-typedef unsigned long          jit_uint64_t;
-typedef jit_int64_t            jit_word_t;
-typedef jit_uint64_t           jit_uword_t;
-#endif
-typedef float                  jit_float32_t;
-typedef double                 jit_float64_t;
-typedef void*                  jit_pointer_t;
-typedef jit_int32_t            jit_bool_t;
-typedef jit_int32_t            jit_gpr_t;
-typedef jit_int32_t            jit_fpr_t;
-
-#if defined(__i386__) || defined(__x86_64__)
-#  include <lightning/jit_x86.h>
-#elif defined(__mips__)
-#  include <lightning/jit_mips.h>
-#elif defined(__arm__)
-#  include <lightning/jit_arm.h>
-#elif defined(__powerpc__)
-#  include <lightning/jit_ppc.h>
-#elif defined(__sparc__)
-#  include <lightning/jit_sparc.h>
-#elif defined(__ia64__)
-#  include <lightning/jit_ia64.h>
-#elif defined(__hppa__)
-#  include <lightning/jit_hppa.h>
-#elif defined(__aarch64__)
-#  include <lightning/jit_aarch64.h>
-#elif defined(__s390__) || defined(__s390x__)
-#  include <lightning/jit_s390.h>
-#elif defined(__alpha__)
-#  include <lightning/jit_alpha.h>
-#elif defined(__riscv)
-#  include <lightning/jit_riscv.h>
-#elif defined(__sh__)
-#  include <lightning/jit_sh.h>
-#endif
-
-#define jit_flag_node          0x0001  /* patch node not absolute */
-#define jit_flag_patch         0x0002  /* jump already patched */
-#define jit_flag_data          0x0004  /* data in the constant pool */
-#define jit_flag_use           0x0008  /* do not remove marker label */
-#define jit_flag_synth         0x0010  /* synthesized instruction */
-#define jit_flag_head          0x1000  /* label reached by normal flow */
-#define jit_flag_varargs       0x2000  /* call{r,i} to varargs function */
-
-#define JIT_R(index)           jit_r(index)
-#define JIT_V(index)           jit_v(index)
-#define JIT_A(index)           jit_a(index)
-#define JIT_F(index)           jit_f(index)
-#define JIT_R_NUM              jit_r_num()
-#define JIT_V_NUM              jit_v_num()
-#define JIT_A_NUM              jit_a_num()
-#define JIT_F_NUM              jit_f_num()
-
-#define JIT_DISABLE_DATA       1       /* force synthesize of constants */
-#define JIT_DISABLE_NOTE       2       /* disable debug info generation */
-
-#define jit_class_chk          0x02000000      /* just checking */
-#define jit_class_arg          0x08000000      /* argument register */
-#define jit_class_sav          0x10000000      /* callee save */
-#define jit_class_gpr          0x20000000      /* general purpose */
-#define jit_class_fpr          0x40000000      /* float */
-#define jit_class(reg)         ((reg) & 0xffff0000)
-#define jit_regno(reg)         ((reg) & 0x00007fff)
-
-typedef struct jit_node                jit_node_t;
-typedef struct jit_state       jit_state_t;
-
-typedef enum {
-    jit_code_data,
-#define jit_live(u)            jit_new_node_w(jit_code_live, u)
-#define jit_align(u)           jit_new_node_w(jit_code_align, u)
-    jit_code_live,             jit_code_align,
-    jit_code_save,             jit_code_load,
-#define jit_name(u)            _jit_name(_jit,u)
-    jit_code_name,
-#define jit_note(u, v)         _jit_note(_jit, u, v)
-#define jit_label()            _jit_label(_jit)
-#define jit_forward()          _jit_forward(_jit)
-#define jit_indirect()         _jit_indirect(_jit)
-#define jit_link(u)            _jit_link(_jit,u)
-    jit_code_note,             jit_code_label,
-
-#define jit_prolog()           _jit_prolog(_jit)
-    jit_code_prolog,
-
-#define jit_ellipsis()         _jit_ellipsis(_jit)
-    jit_code_ellipsis,
-#define jit_va_push(u)         _jit_va_push(_jit,u)
-    jit_code_va_push,
-#define jit_allocai(u)         _jit_allocai(_jit,u)
-#define jit_allocar(u, v)      _jit_allocar(_jit,u,v)
-    jit_code_allocai,          jit_code_allocar,
-
-#define jit_arg()              _jit_arg(_jit)
-    jit_code_arg,
-#define jit_getarg_c(u,v)      _jit_getarg_c(_jit,u,v)
-#define jit_getarg_uc(u,v)     _jit_getarg_uc(_jit,u,v)
-    jit_code_getarg_c,         jit_code_getarg_uc,
-#define jit_getarg_s(u,v)      _jit_getarg_s(_jit,u,v)
-#define jit_getarg_us(u,v)     _jit_getarg_us(_jit,u,v)
-    jit_code_getarg_s,         jit_code_getarg_us,
-#define jit_getarg_i(u,v)      _jit_getarg_i(_jit,u,v)
-#if __WORDSIZE == 32
-#  define jit_getarg(u,v)      jit_getarg_i(u,v)
-#  define jit_getarg_ui(u,v)   jit_getarg_i(u,v)
-#else
-#  define jit_getarg(u,v)      jit_getarg_l(u,v)
-#  define jit_getarg_ui(u,v)   _jit_getarg_ui(_jit,u,v)
-#  define jit_getarg_l(u,v)    _jit_getarg_l(_jit,u,v)
-#endif
-    jit_code_getarg_i,         jit_code_getarg_ui,
-    jit_code_getarg_l,
-#  define jit_putargr(u,v)     _jit_putargr(_jit,u,v)
-#  define jit_putargi(u,v)     _jit_putargi(_jit,u,v)
-    jit_code_putargr,          jit_code_putargi,
-
-#define jit_va_start(u)                jit_new_node_w(jit_code_va_start, u)
-    jit_code_va_start,
-#define jit_va_arg(u, v)       jit_new_node_ww(jit_code_va_arg, u, v)
-#define jit_va_arg_d(u, v)     jit_new_node_ww(jit_code_va_arg_d, u, v)
-    jit_code_va_arg,           jit_code_va_arg_d,
-#define jit_va_end(u)          jit_new_node_w(jit_code_va_end, u)
-    jit_code_va_end,
-
-#define jit_addr(u,v,w)                jit_new_node_www(jit_code_addr,u,v,w)
-#define jit_addi(u,v,w)                jit_new_node_www(jit_code_addi,u,v,w)
-    jit_code_addr,             jit_code_addi,
-#define jit_addcr(u,v,w)       jit_new_node_www(jit_code_addcr,u,v,w)
-#define jit_addci(u,v,w)       jit_new_node_www(jit_code_addci,u,v,w)
-    jit_code_addcr,            jit_code_addci,
-#define jit_addxr(u,v,w)       jit_new_node_www(jit_code_addxr,u,v,w)
-#define jit_addxi(u,v,w)       jit_new_node_www(jit_code_addxi,u,v,w)
-    jit_code_addxr,            jit_code_addxi,
-#define jit_subr(u,v,w)                jit_new_node_www(jit_code_subr,u,v,w)
-#define jit_subi(u,v,w)                jit_new_node_www(jit_code_subi,u,v,w)
-    jit_code_subr,             jit_code_subi,
-#define jit_subcr(u,v,w)       jit_new_node_www(jit_code_subcr,u,v,w)
-#define jit_subci(u,v,w)       jit_new_node_www(jit_code_subci,u,v,w)
-    jit_code_subcr,            jit_code_subci,
-#define jit_subxr(u,v,w)       jit_new_node_www(jit_code_subxr,u,v,w)
-#define jit_subxi(u,v,w)       jit_new_node_www(jit_code_subxi,u,v,w)
-    jit_code_subxr,            jit_code_subxi,
-#define jit_rsbr(u,v,w)                jit_subr(u,w,v)
-#define jit_rsbi(u,v,w)                jit_new_node_www(jit_code_rsbi,u,v,w)
-    jit_code_rsbi,
-#define jit_mulr(u,v,w)                jit_new_node_www(jit_code_mulr,u,v,w)
-#define jit_muli(u,v,w)                jit_new_node_www(jit_code_muli,u,v,w)
-    jit_code_mulr,             jit_code_muli,
-#define jit_qmulr(l,h,v,w)     jit_new_node_qww(jit_code_qmulr,l,h,v,w)
-#define jit_qmuli(l,h,v,w)     jit_new_node_qww(jit_code_qmuli,l,h,v,w)
-    jit_code_qmulr,            jit_code_qmuli,
-#define jit_qmulr_u(l,h,v,w)   jit_new_node_qww(jit_code_qmulr_u,l,h,v,w)
-#define jit_qmuli_u(l,h,v,w)   jit_new_node_qww(jit_code_qmuli_u,l,h,v,w)
-    jit_code_qmulr_u,          jit_code_qmuli_u,
-#define jit_divr(u,v,w)                jit_new_node_www(jit_code_divr,u,v,w)
-#define jit_divi(u,v,w)                jit_new_node_www(jit_code_divi,u,v,w)
-    jit_code_divr,             jit_code_divi,
-#define jit_divr_u(u,v,w)      jit_new_node_www(jit_code_divr_u,u,v,w)
-#define jit_divi_u(u,v,w)      jit_new_node_www(jit_code_divi_u,u,v,w)
-    jit_code_divr_u,           jit_code_divi_u,
-#define jit_qdivr(l,h,v,w)     jit_new_node_qww(jit_code_qdivr,l,h,v,w)
-#define jit_qdivi(l,h,v,w)     jit_new_node_qww(jit_code_qdivi,l,h,v,w)
-    jit_code_qdivr,            jit_code_qdivi,
-#define jit_qdivr_u(l,h,v,w)   jit_new_node_qww(jit_code_qdivr_u,l,h,v,w)
-#define jit_qdivi_u(l,h,v,w)   jit_new_node_qww(jit_code_qdivi_u,l,h,v,w)
-    jit_code_qdivr_u,          jit_code_qdivi_u,
-#define jit_remr(u,v,w)                jit_new_node_www(jit_code_remr,u,v,w)
-#define jit_remi(u,v,w)                jit_new_node_www(jit_code_remi,u,v,w)
-    jit_code_remr,             jit_code_remi,
-#define jit_remr_u(u,v,w)      jit_new_node_www(jit_code_remr_u,u,v,w)
-#define jit_remi_u(u,v,w)      jit_new_node_www(jit_code_remi_u,u,v,w)
-    jit_code_remr_u,           jit_code_remi_u,
-
-#define jit_andr(u,v,w)                jit_new_node_www(jit_code_andr,u,v,w)
-#define jit_andi(u,v,w)                jit_new_node_www(jit_code_andi,u,v,w)
-    jit_code_andr,             jit_code_andi,
-#define jit_orr(u,v,w)         jit_new_node_www(jit_code_orr,u,v,w)
-#define jit_ori(u,v,w)         jit_new_node_www(jit_code_ori,u,v,w)
-    jit_code_orr,              jit_code_ori,
-#define jit_xorr(u,v,w)                jit_new_node_www(jit_code_xorr,u,v,w)
-#define jit_xori(u,v,w)                jit_new_node_www(jit_code_xori,u,v,w)
-    jit_code_xorr,             jit_code_xori,
-
-#define jit_lshr(u,v,w)                jit_new_node_www(jit_code_lshr,u,v,w)
-#define jit_lshi(u,v,w)                jit_new_node_www(jit_code_lshi,u,v,w)
-    jit_code_lshr,             jit_code_lshi,
-#define jit_rshr(u,v,w)                jit_new_node_www(jit_code_rshr,u,v,w)
-#define jit_rshi(u,v,w)                jit_new_node_www(jit_code_rshi,u,v,w)
-    jit_code_rshr,             jit_code_rshi,
-#define jit_rshr_u(u,v,w)      jit_new_node_www(jit_code_rshr_u,u,v,w)
-#define jit_rshi_u(u,v,w)      jit_new_node_www(jit_code_rshi_u,u,v,w)
-    jit_code_rshr_u,           jit_code_rshi_u,
-
-#define jit_negr(u,v)          jit_new_node_ww(jit_code_negr,u,v)
-#define jit_comr(u,v)          jit_new_node_ww(jit_code_comr,u,v)
-    jit_code_negr,             jit_code_comr,
-
-#define jit_ltr(u,v,w)         jit_new_node_www(jit_code_ltr,u,v,w)
-#define jit_lti(u,v,w)         jit_new_node_www(jit_code_lti,u,v,w)
-    jit_code_ltr,              jit_code_lti,
-#define jit_ltr_u(u,v,w)       jit_new_node_www(jit_code_ltr_u,u,v,w)
-#define jit_lti_u(u,v,w)       jit_new_node_www(jit_code_lti_u,u,v,w)
-    jit_code_ltr_u,            jit_code_lti_u,
-#define jit_ler(u,v,w)         jit_new_node_www(jit_code_ler,u,v,w)
-#define jit_lei(u,v,w)         jit_new_node_www(jit_code_lei,u,v,w)
-    jit_code_ler,              jit_code_lei,
-#define jit_ler_u(u,v,w)       jit_new_node_www(jit_code_ler_u,u,v,w)
-#define jit_lei_u(u,v,w)       jit_new_node_www(jit_code_lei_u,u,v,w)
-    jit_code_ler_u,            jit_code_lei_u,
-#define jit_eqr(u,v,w)         jit_new_node_www(jit_code_eqr,u,v,w)
-#define jit_eqi(u,v,w)         jit_new_node_www(jit_code_eqi,u,v,w)
-    jit_code_eqr,              jit_code_eqi,
-#define jit_ger(u,v,w)         jit_new_node_www(jit_code_ger,u,v,w)
-#define jit_gei(u,v,w)         jit_new_node_www(jit_code_gei,u,v,w)
-    jit_code_ger,              jit_code_gei,
-#define jit_ger_u(u,v,w)       jit_new_node_www(jit_code_ger_u,u,v,w)
-#define jit_gei_u(u,v,w)       jit_new_node_www(jit_code_gei_u,u,v,w)
-    jit_code_ger_u,            jit_code_gei_u,
-#define jit_gtr(u,v,w)         jit_new_node_www(jit_code_gtr,u,v,w)
-#define jit_gti(u,v,w)         jit_new_node_www(jit_code_gti,u,v,w)
-    jit_code_gtr,              jit_code_gti,
-#define jit_gtr_u(u,v,w)       jit_new_node_www(jit_code_gtr_u,u,v,w)
-#define jit_gti_u(u,v,w)       jit_new_node_www(jit_code_gti_u,u,v,w)
-    jit_code_gtr_u,            jit_code_gti_u,
-#define jit_ner(u,v,w)         jit_new_node_www(jit_code_ner,u,v,w)
-#define jit_nei(u,v,w)         jit_new_node_www(jit_code_nei,u,v,w)
-    jit_code_ner,              jit_code_nei,
-
-#define jit_movr(u,v)          jit_new_node_ww(jit_code_movr,u,v)
-#define jit_movi(u,v)          jit_new_node_ww(jit_code_movi,u,v)
-    jit_code_movr,             jit_code_movi,
-#define jit_extr_c(u,v)                jit_new_node_ww(jit_code_extr_c,u,v)
-#define jit_extr_uc(u,v)       jit_new_node_ww(jit_code_extr_uc,u,v)
-    jit_code_extr_c,           jit_code_extr_uc,
-#define jit_extr_s(u,v)                jit_new_node_ww(jit_code_extr_s,u,v)
-#define jit_extr_us(u,v)       jit_new_node_ww(jit_code_extr_us,u,v)
-    jit_code_extr_s,           jit_code_extr_us,
-#  define jit_extr_i(u,v)      jit_new_node_ww(jit_code_extr_i,u,v)
-#  define jit_extr_ui(u,v)     jit_new_node_ww(jit_code_extr_ui,u,v)
-    jit_code_extr_i,           jit_code_extr_ui,
-
-#define jit_htonr_us(u,v)      jit_new_node_ww(jit_code_htonr_us,u,v)
-#define jit_ntohr_us(u,v)      jit_new_node_ww(jit_code_htonr_us,u,v)
-    jit_code_htonr_us,
-#define jit_htonr_ui(u,v)      jit_new_node_ww(jit_code_htonr_ui,u,v)
-#define jit_ntohr_ui(u,v)      jit_new_node_ww(jit_code_htonr_ui,u,v)
-#if __WORDSIZE == 32
-#  define jit_htonr(u,v)       jit_new_node_ww(jit_code_htonr_ui,u,v)
-#  define jit_ntohr(u,v)       jit_new_node_ww(jit_code_htonr_ui,u,v)
-#else
-#  define jit_htonr_ul(u,v)    jit_new_node_ww(jit_code_htonr_ul,u,v)
-#  define jit_ntohr_ul(u,v)    jit_new_node_ww(jit_code_htonr_ul,u,v)
-#  define jit_htonr(u,v)       jit_new_node_ww(jit_code_htonr_ul,u,v)
-#  define jit_ntohr(u,v)       jit_new_node_ww(jit_code_htonr_ul,u,v)
-#endif
-    jit_code_htonr_ui,         jit_code_htonr_ul,
-
-#define jit_ldr_c(u,v)         jit_new_node_ww(jit_code_ldr_c,u,v)
-#define jit_ldi_c(u,v)         jit_new_node_wp(jit_code_ldi_c,u,v)
-    jit_code_ldr_c,            jit_code_ldi_c,
-#define jit_ldr_uc(u,v)                jit_new_node_ww(jit_code_ldr_uc,u,v)
-#define jit_ldi_uc(u,v)                jit_new_node_wp(jit_code_ldi_uc,u,v)
-    jit_code_ldr_uc,           jit_code_ldi_uc,
-#define jit_ldr_s(u,v)         jit_new_node_ww(jit_code_ldr_s,u,v)
-#define jit_ldi_s(u,v)         jit_new_node_wp(jit_code_ldi_s,u,v)
-    jit_code_ldr_s,            jit_code_ldi_s,
-#define jit_ldr_us(u,v)                jit_new_node_ww(jit_code_ldr_us,u,v)
-#define jit_ldi_us(u,v)                jit_new_node_wp(jit_code_ldi_us,u,v)
-    jit_code_ldr_us,           jit_code_ldi_us,
-#define jit_ldr_i(u,v)         jit_new_node_ww(jit_code_ldr_i,u,v)
-#define jit_ldi_i(u,v)         jit_new_node_wp(jit_code_ldi_i,u,v)
-    jit_code_ldr_i,            jit_code_ldi_i,
-#if __WORDSIZE == 32
-#  define jit_ldr(u,v)         jit_ldr_i(u,v)
-#  define jit_ldr_ui(u,v)      jit_ldr_i(u,v)
-#  define jit_ldi(u,v)         jit_ldi_i(u,v)
-#  define jit_ldi_ui(u,v)      jit_ldi_i(u,v)
-#else
-#  define jit_ldr(u,v)         jit_ldr_l(u,v)
-#  define jit_ldi(u,v)         jit_ldi_l(u,v)
-#  define jit_ldr_ui(u,v)      jit_new_node_ww(jit_code_ldr_ui,u,v)
-#  define jit_ldi_ui(u,v)      jit_new_node_wp(jit_code_ldi_ui,u,v)
-#define jit_ldr_l(u,v)         jit_new_node_ww(jit_code_ldr_l,u,v)
-#define jit_ldi_l(u,v)         jit_new_node_wp(jit_code_ldi_l,u,v)
-#endif
-    jit_code_ldr_ui,           jit_code_ldi_ui,
-    jit_code_ldr_l,            jit_code_ldi_l,
-
-#define jit_ldxr_c(u,v,w)      jit_new_node_www(jit_code_ldxr_c,u,v,w)
-#define jit_ldxi_c(u,v,w)      jit_new_node_www(jit_code_ldxi_c,u,v,w)
-    jit_code_ldxr_c,           jit_code_ldxi_c,
-#define jit_ldxr_uc(u,v,w)     jit_new_node_www(jit_code_ldxr_uc,u,v,w)
-#define jit_ldxi_uc(u,v,w)     jit_new_node_www(jit_code_ldxi_uc,u,v,w)
-    jit_code_ldxr_uc,          jit_code_ldxi_uc,
-#define jit_ldxr_s(u,v,w)      jit_new_node_www(jit_code_ldxr_s,u,v,w)
-#define jit_ldxi_s(u,v,w)      jit_new_node_www(jit_code_ldxi_s,u,v,w)
-    jit_code_ldxr_s,           jit_code_ldxi_s,
-#define jit_ldxr_us(u,v,w)     jit_new_node_www(jit_code_ldxr_us,u,v,w)
-#define jit_ldxi_us(u,v,w)     jit_new_node_www(jit_code_ldxi_us,u,v,w)
-    jit_code_ldxr_us,          jit_code_ldxi_us,
-#define jit_ldxr_i(u,v,w)      jit_new_node_www(jit_code_ldxr_i,u,v,w)
-#define jit_ldxi_i(u,v,w)      jit_new_node_www(jit_code_ldxi_i,u,v,w)
-    jit_code_ldxr_i,           jit_code_ldxi_i,
-#if __WORDSIZE == 32
-#  define jit_ldxr(u,v,w)      jit_ldxr_i(u,v,w)
-#  define jit_ldxr_ui(u,v,w)   jit_ldxr_i(u,v,w)
-#  define jit_ldxi(u,v,w)      jit_ldxi_i(u,v,w)
-#  define jit_ldxi_ui(u,v,w)   jit_ldxi_i(u,v,w)
-#else
-#  define jit_ldxr_ui(u,v,w)   jit_new_node_www(jit_code_ldxr_ui,u,v,w)
-#  define jit_ldxi_ui(u,v,w)   jit_new_node_www(jit_code_ldxi_ui,u,v,w)
-#  define jit_ldxr_l(u,v,w)    jit_new_node_www(jit_code_ldxr_l,u,v,w)
-#  define jit_ldxi_l(u,v,w)    jit_new_node_www(jit_code_ldxi_l,u,v,w)
-#  define jit_ldxr(u,v,w)      jit_ldxr_l(u,v,w)
-#  define jit_ldxi(u,v,w)      jit_ldxi_l(u,v,w)
-#endif
-    jit_code_ldxr_ui,          jit_code_ldxi_ui,
-    jit_code_ldxr_l,           jit_code_ldxi_l,
-
-#define jit_str_c(u,v)         jit_new_node_ww(jit_code_str_c,u,v)
-#define jit_sti_c(u,v)         jit_new_node_pw(jit_code_sti_c,u,v)
-    jit_code_str_c,            jit_code_sti_c,
-#define jit_str_s(u,v)         jit_new_node_ww(jit_code_str_s,u,v)
-#define jit_sti_s(u,v)         jit_new_node_pw(jit_code_sti_s,u,v)
-    jit_code_str_s,            jit_code_sti_s,
-#define jit_str_i(u,v)         jit_new_node_ww(jit_code_str_i,u,v)
-#define jit_sti_i(u,v)         jit_new_node_pw(jit_code_sti_i,u,v)
-    jit_code_str_i,            jit_code_sti_i,
-#if __WORDSIZE == 32
-#  define jit_str(u,v)         jit_str_i(u,v)
-#  define jit_sti(u,v)         jit_sti_i(u,v)
-#else
-#  define jit_str(u,v)         jit_str_l(u,v)
-#  define jit_sti(u,v)         jit_sti_l(u,v)
-#  define jit_str_l(u,v)       jit_new_node_ww(jit_code_str_l,u,v)
-#  define jit_sti_l(u,v)       jit_new_node_pw(jit_code_sti_l,u,v)
-#endif
-    jit_code_str_l,            jit_code_sti_l,
-
-#define jit_stxr_c(u,v,w)      jit_new_node_www(jit_code_stxr_c,u,v,w)
-#define jit_stxi_c(u,v,w)      jit_new_node_www(jit_code_stxi_c,u,v,w)
-    jit_code_stxr_c,           jit_code_stxi_c,
-#define jit_stxr_s(u,v,w)      jit_new_node_www(jit_code_stxr_s,u,v,w)
-#define jit_stxi_s(u,v,w)      jit_new_node_www(jit_code_stxi_s,u,v,w)
-    jit_code_stxr_s,           jit_code_stxi_s,
-#define jit_stxr_i(u,v,w)      jit_new_node_www(jit_code_stxr_i,u,v,w)
-#define jit_stxi_i(u,v,w)      jit_new_node_www(jit_code_stxi_i,u,v,w)
-    jit_code_stxr_i,           jit_code_stxi_i,
-#if __WORDSIZE == 32
-#  define jit_stxr(u,v,w)      jit_stxr_i(u,v,w)
-#  define jit_stxi(u,v,w)      jit_stxi_i(u,v,w)
-#else
-#  define jit_stxr(u,v,w)      jit_stxr_l(u,v,w)
-#  define jit_stxi(u,v,w)      jit_stxi_l(u,v,w)
-#  define jit_stxr_l(u,v,w)    jit_new_node_www(jit_code_stxr_l,u,v,w)
-#  define jit_stxi_l(u,v,w)    jit_new_node_www(jit_code_stxi_l,u,v,w)
-#endif
-    jit_code_stxr_l,           jit_code_stxi_l,
-
-#define jit_bltr(v,w)          jit_new_node_pww(jit_code_bltr,NULL,v,w)
-#define jit_blti(v,w)          jit_new_node_pww(jit_code_blti,NULL,v,w)
-    jit_code_bltr,             jit_code_blti,
-#define jit_bltr_u(v,w)                jit_new_node_pww(jit_code_bltr_u,NULL,v,w)
-#define jit_blti_u(v,w)                jit_new_node_pww(jit_code_blti_u,NULL,v,w)
-    jit_code_bltr_u,           jit_code_blti_u,
-#define jit_bler(v,w)          jit_new_node_pww(jit_code_bler,NULL,v,w)
-#define jit_blei(v,w)          jit_new_node_pww(jit_code_blei,NULL,v,w)
-    jit_code_bler,             jit_code_blei,
-#define jit_bler_u(v,w)                jit_new_node_pww(jit_code_bler_u,NULL,v,w)
-#define jit_blei_u(v,w)                jit_new_node_pww(jit_code_blei_u,NULL,v,w)
-    jit_code_bler_u,           jit_code_blei_u,
-#define jit_beqr(v,w)          jit_new_node_pww(jit_code_beqr,NULL,v,w)
-#define jit_beqi(v,w)          jit_new_node_pww(jit_code_beqi,NULL,v,w)
-    jit_code_beqr,             jit_code_beqi,
-#define jit_bger(v,w)          jit_new_node_pww(jit_code_bger,NULL,v,w)
-#define jit_bgei(v,w)          jit_new_node_pww(jit_code_bgei,NULL,v,w)
-    jit_code_bger,             jit_code_bgei,
-#define jit_bger_u(v,w)                jit_new_node_pww(jit_code_bger_u,NULL,v,w)
-#define jit_bgei_u(v,w)                jit_new_node_pww(jit_code_bgei_u,NULL,v,w)
-    jit_code_bger_u,           jit_code_bgei_u,
-#define jit_bgtr(v,w)          jit_new_node_pww(jit_code_bgtr,NULL,v,w)
-#define jit_bgti(v,w)          jit_new_node_pww(jit_code_bgti,NULL,v,w)
-    jit_code_bgtr,             jit_code_bgti,
-#define jit_bgtr_u(v,w)                jit_new_node_pww(jit_code_bgtr_u,NULL,v,w)
-#define jit_bgti_u(v,w)                jit_new_node_pww(jit_code_bgti_u,NULL,v,w)
-    jit_code_bgtr_u,           jit_code_bgti_u,
-#define jit_bner(v,w)          jit_new_node_pww(jit_code_bner,NULL,v,w)
-#define jit_bnei(v,w)          jit_new_node_pww(jit_code_bnei,NULL,v,w)
-    jit_code_bner,             jit_code_bnei,
-
-#define jit_bmsr(v,w)          jit_new_node_pww(jit_code_bmsr,NULL,v,w)
-#define jit_bmsi(v,w)          jit_new_node_pww(jit_code_bmsi,NULL,v,w)
-    jit_code_bmsr,             jit_code_bmsi,
-#define jit_bmcr(v,w)          jit_new_node_pww(jit_code_bmcr,NULL,v,w)
-#define jit_bmci(v,w)          jit_new_node_pww(jit_code_bmci,NULL,v,w)
-    jit_code_bmcr,             jit_code_bmci,
-
-#define jit_boaddr(v,w)                jit_new_node_pww(jit_code_boaddr,NULL,v,w)
-#define jit_boaddi(v,w)                jit_new_node_pww(jit_code_boaddi,NULL,v,w)
-    jit_code_boaddr,           jit_code_boaddi,
-#define jit_boaddr_u(v,w)      jit_new_node_pww(jit_code_boaddr_u,NULL,v,w)
-#define jit_boaddi_u(v,w)      jit_new_node_pww(jit_code_boaddi_u,NULL,v,w)
-    jit_code_boaddr_u,         jit_code_boaddi_u,
-#define jit_bxaddr(v,w)                jit_new_node_pww(jit_code_bxaddr,NULL,v,w)
-#define jit_bxaddi(v,w)                jit_new_node_pww(jit_code_bxaddi,NULL,v,w)
-    jit_code_bxaddr,           jit_code_bxaddi,
-#define jit_bxaddr_u(v,w)      jit_new_node_pww(jit_code_bxaddr_u,NULL,v,w)
-#define jit_bxaddi_u(v,w)      jit_new_node_pww(jit_code_bxaddi_u,NULL,v,w)
-    jit_code_bxaddr_u,         jit_code_bxaddi_u,
-#define jit_bosubr(v,w)                jit_new_node_pww(jit_code_bosubr,NULL,v,w)
-#define jit_bosubi(v,w)                jit_new_node_pww(jit_code_bosubi,NULL,v,w)
-    jit_code_bosubr,           jit_code_bosubi,
-#define jit_bosubr_u(v,w)      jit_new_node_pww(jit_code_bosubr_u,NULL,v,w)
-#define jit_bosubi_u(v,w)      jit_new_node_pww(jit_code_bosubi_u,NULL,v,w)
-    jit_code_bosubr_u,         jit_code_bosubi_u,
-#define jit_bxsubr(v,w)                jit_new_node_pww(jit_code_bxsubr,NULL,v,w)
-#define jit_bxsubi(v,w)                jit_new_node_pww(jit_code_bxsubi,NULL,v,w)
-    jit_code_bxsubr,           jit_code_bxsubi,
-#define jit_bxsubr_u(v,w)      jit_new_node_pww(jit_code_bxsubr_u,NULL,v,w)
-#define jit_bxsubi_u(v,w)      jit_new_node_pww(jit_code_bxsubi_u,NULL,v,w)
-    jit_code_bxsubr_u,         jit_code_bxsubi_u,
-
-#define jit_jmpr(u)            jit_new_node_w(jit_code_jmpr,u)
-#define jit_jmpi()             jit_new_node_p(jit_code_jmpi,NULL)
-    jit_code_jmpr,             jit_code_jmpi,
-#define jit_callr(u)           jit_new_node_w(jit_code_callr,u)
-#define jit_calli(u)           jit_new_node_p(jit_code_calli,u)
-    jit_code_callr,            jit_code_calli,
-
-#define jit_prepare()          _jit_prepare(_jit)
-    jit_code_prepare,
-#define jit_pushargr(u)                _jit_pushargr(_jit,u)
-#define jit_pushargi(u)                _jit_pushargi(_jit,u)
-    jit_code_pushargr,         jit_code_pushargi,
-#define jit_finishr(u)         _jit_finishr(_jit,u)
-#define jit_finishi(u)         _jit_finishi(_jit,u)
-    jit_code_finishr,          jit_code_finishi,
-#define jit_ret()              _jit_ret(_jit)
-    jit_code_ret,
-#define jit_retr(u)            _jit_retr(_jit,u)
-#define jit_reti(u)            _jit_reti(_jit,u)
-    jit_code_retr,             jit_code_reti,
-#define jit_retval_c(u)                _jit_retval_c(_jit,u)
-#define jit_retval_uc(u)       _jit_retval_uc(_jit,u)
-    jit_code_retval_c,         jit_code_retval_uc,
-#define jit_retval_s(u)                _jit_retval_s(_jit,u)
-#define jit_retval_us(u)       _jit_retval_us(_jit,u)
-    jit_code_retval_s,         jit_code_retval_us,
-#define jit_retval_i(u)                _jit_retval_i(_jit,u)
-#if __WORDSIZE == 32
-#  define jit_retval(u)                jit_retval_i(u)
-#  define jit_retval_ui(u)     jit_retval_i(u)
-#else
-#  define jit_retval(u)                jit_retval_l(u)
-#  define jit_retval_ui(u)     _jit_retval_ui(_jit,u)
-#  define jit_retval_l(u)      _jit_retval_l(_jit,u)
-#endif
-    jit_code_retval_i,         jit_code_retval_ui,
-    jit_code_retval_l,
-
-#define jit_epilog()           _jit_epilog(_jit)
-    jit_code_epilog,
-
-#define jit_arg_f()            _jit_arg_f(_jit)
-    jit_code_arg_f,
-#define jit_getarg_f(u,v)      _jit_getarg_f(_jit,u,v)
-    jit_code_getarg_f,
-#define jit_putargr_f(u,v)     _jit_putargr_f(_jit,u,v)
-#define jit_putargi_f(u,v)     _jit_putargi_f(_jit,u,v)
-    jit_code_putargr_f,                jit_code_putargi_f,
-
-#define jit_addr_f(u,v,w)      jit_new_node_www(jit_code_addr_f,u,v,w)
-#define jit_addi_f(u,v,w)      jit_new_node_wwf(jit_code_addi_f,u,v,w)
-    jit_code_addr_f,           jit_code_addi_f,
-#define jit_subr_f(u,v,w)      jit_new_node_www(jit_code_subr_f,u,v,w)
-#define jit_subi_f(u,v,w)      jit_new_node_wwf(jit_code_subi_f,u,v,w)
-    jit_code_subr_f,           jit_code_subi_f,
-#define jit_rsbr_f(u,v,w)      jit_subr_f(u,w,v)
-#define jit_rsbi_f(u,v,w)      jit_new_node_wwf(jit_code_rsbi_f,u,v,w)
-    jit_code_rsbi_f,
-#define jit_mulr_f(u,v,w)      jit_new_node_www(jit_code_mulr_f,u,v,w)
-#define jit_muli_f(u,v,w)      jit_new_node_wwf(jit_code_muli_f,u,v,w)
-    jit_code_mulr_f,           jit_code_muli_f,
-#define jit_divr_f(u,v,w)      jit_new_node_www(jit_code_divr_f,u,v,w)
-#define jit_divi_f(u,v,w)      jit_new_node_wwf(jit_code_divi_f,u,v,w)
-    jit_code_divr_f,           jit_code_divi_f,
-#define jit_negr_f(u,v)                jit_new_node_ww(jit_code_negr_f,u,v)
-#define jit_absr_f(u,v)                jit_new_node_ww(jit_code_absr_f,u,v)
-#define jit_sqrtr_f(u,v)       jit_new_node_ww(jit_code_sqrtr_f,u,v)
-    jit_code_negr_f,           jit_code_absr_f,        jit_code_sqrtr_f,
-
-#define jit_ltr_f(u,v,w)       jit_new_node_www(jit_code_ltr_f,u,v,w)
-#define jit_lti_f(u,v,w)       jit_new_node_wwf(jit_code_lti_f,u,v,w)
-    jit_code_ltr_f,            jit_code_lti_f,
-#define jit_ler_f(u,v,w)       jit_new_node_www(jit_code_ler_f,u,v,w)
-#define jit_lei_f(u,v,w)       jit_new_node_wwf(jit_code_lei_f,u,v,w)
-    jit_code_ler_f,            jit_code_lei_f,
-#define jit_eqr_f(u,v,w)       jit_new_node_www(jit_code_eqr_f,u,v,w)
-#define jit_eqi_f(u,v,w)       jit_new_node_wwf(jit_code_eqi_f,u,v,w)
-    jit_code_eqr_f,            jit_code_eqi_f,
-#define jit_ger_f(u,v,w)       jit_new_node_www(jit_code_ger_f,u,v,w)
-#define jit_gei_f(u,v,w)       jit_new_node_wwf(jit_code_gei_f,u,v,w)
-    jit_code_ger_f,            jit_code_gei_f,
-#define jit_gtr_f(u,v,w)       jit_new_node_www(jit_code_gtr_f,u,v,w)
-#define jit_gti_f(u,v,w)       jit_new_node_wwf(jit_code_gti_f,u,v,w)
-    jit_code_gtr_f,            jit_code_gti_f,
-#define jit_ner_f(u,v,w)       jit_new_node_www(jit_code_ner_f,u,v,w)
-#define jit_nei_f(u,v,w)       jit_new_node_wwf(jit_code_nei_f,u,v,w)
-    jit_code_ner_f,            jit_code_nei_f,
-#define jit_unltr_f(u,v,w)     jit_new_node_www(jit_code_unltr_f,u,v,w)
-#define jit_unlti_f(u,v,w)     jit_new_node_wwf(jit_code_unlti_f,u,v,w)
-    jit_code_unltr_f,          jit_code_unlti_f,
-#define jit_unler_f(u,v,w)     jit_new_node_www(jit_code_unler_f,u,v,w)
-#define jit_unlei_f(u,v,w)     jit_new_node_wwf(jit_code_unlei_f,u,v,w)
-    jit_code_unler_f,          jit_code_unlei_f,
-#define jit_uneqr_f(u,v,w)     jit_new_node_www(jit_code_uneqr_f,u,v,w)
-#define jit_uneqi_f(u,v,w)     jit_new_node_wwf(jit_code_uneqi_f,u,v,w)
-    jit_code_uneqr_f,          jit_code_uneqi_f,
-#define jit_unger_f(u,v,w)     jit_new_node_www(jit_code_unger_f,u,v,w)
-#define jit_ungei_f(u,v,w)     jit_new_node_wwf(jit_code_ungei_f,u,v,w)
-    jit_code_unger_f,          jit_code_ungei_f,
-#define jit_ungtr_f(u,v,w)     jit_new_node_www(jit_code_ungtr_f,u,v,w)
-#define jit_ungti_f(u,v,w)     jit_new_node_wwf(jit_code_ungti_f,u,v,w)
-    jit_code_ungtr_f,          jit_code_ungti_f,
-#define jit_ltgtr_f(u,v,w)     jit_new_node_www(jit_code_ltgtr_f,u,v,w)
-#define jit_ltgti_f(u,v,w)     jit_new_node_wwf(jit_code_ltgti_f,u,v,w)
-    jit_code_ltgtr_f,          jit_code_ltgti_f,
-#define jit_ordr_f(u,v,w)      jit_new_node_www(jit_code_ordr_f,u,v,w)
-#define jit_ordi_f(u,v,w)      jit_new_node_wwf(jit_code_ordi_f,u,v,w)
-    jit_code_ordr_f,           jit_code_ordi_f,
-#define jit_unordr_f(u,v,w)    jit_new_node_www(jit_code_unordr_f,u,v,w)
-#define jit_unordi_f(u,v,w)    jit_new_node_wwf(jit_code_unordi_f,u,v,w)
-    jit_code_unordr_f,         jit_code_unordi_f,
-
-#define jit_truncr_f_i(u,v)    jit_new_node_ww(jit_code_truncr_f_i,u,v)
-    jit_code_truncr_f_i,
-#if __WORDSIZE == 32
-#  define jit_truncr_f(u,v)    jit_truncr_f_i(u,v)
-#else
-#  define jit_truncr_f(u,v)    jit_truncr_f_l(u,v)
-#  define jit_truncr_f_l(u,v)  jit_new_node_ww(jit_code_truncr_f_l,u,v)
-#endif
-    jit_code_truncr_f_l,
-#define jit_extr_f(u,v)                jit_new_node_ww(jit_code_extr_f,u,v)
-#define jit_extr_d_f(u,v)      jit_new_node_ww(jit_code_extr_d_f,u,v)
-    jit_code_extr_f,           jit_code_extr_d_f,
-#define jit_movr_f(u,v)                jit_new_node_ww(jit_code_movr_f,u,v)
-#define jit_movi_f(u,v)                jit_new_node_wf(jit_code_movi_f,u,v)
-    jit_code_movr_f,           jit_code_movi_f,
-
-#define jit_ldr_f(u,v)         jit_new_node_ww(jit_code_ldr_f,u,v)
-#define jit_ldi_f(u,v)         jit_new_node_wp(jit_code_ldi_f,u,v)
-    jit_code_ldr_f,            jit_code_ldi_f,
-#define jit_ldxr_f(u,v,w)      jit_new_node_www(jit_code_ldxr_f,u,v,w)
-#define jit_ldxi_f(u,v,w)      jit_new_node_www(jit_code_ldxi_f,u,v,w)
-    jit_code_ldxr_f,           jit_code_ldxi_f,
-#define jit_str_f(u,v)         jit_new_node_ww(jit_code_str_f,u,v)
-#define jit_sti_f(u,v)         jit_new_node_pw(jit_code_sti_f,u,v)
-    jit_code_str_f,            jit_code_sti_f,
-#define jit_stxr_f(u,v,w)      jit_new_node_www(jit_code_stxr_f,u,v,w)
-#define jit_stxi_f(u,v,w)      jit_new_node_www(jit_code_stxi_f,u,v,w)
-    jit_code_stxr_f,           jit_code_stxi_f,
-
-#define jit_bltr_f(v,w)                jit_new_node_pww(jit_code_bltr_f,NULL,v,w)
-#define jit_blti_f(v,w)                jit_new_node_pwf(jit_code_blti_f,NULL,v,w)
-    jit_code_bltr_f,           jit_code_blti_f,
-#define jit_bler_f(v,w)                jit_new_node_pww(jit_code_bler_f,NULL,v,w)
-#define jit_blei_f(v,w)                jit_new_node_pwf(jit_code_blei_f,NULL,v,w)
-    jit_code_bler_f,           jit_code_blei_f,
-#define jit_beqr_f(v,w)                jit_new_node_pww(jit_code_beqr_f,NULL,v,w)
-#define jit_beqi_f(v,w)                jit_new_node_pwf(jit_code_beqi_f,NULL,v,w)
-    jit_code_beqr_f,           jit_code_beqi_f,
-#define jit_bger_f(v,w)                jit_new_node_pww(jit_code_bger_f,NULL,v,w)
-#define jit_bgei_f(v,w)                jit_new_node_pwf(jit_code_bgei_f,NULL,v,w)
-    jit_code_bger_f,           jit_code_bgei_f,
-#define jit_bgtr_f(v,w)                jit_new_node_pww(jit_code_bgtr_f,NULL,v,w)
-#define jit_bgti_f(v,w)                jit_new_node_pwf(jit_code_bgti_f,NULL,v,w)
-    jit_code_bgtr_f,           jit_code_bgti_f,
-#define jit_bner_f(v,w)                jit_new_node_pww(jit_code_bner_f,NULL,v,w)
-#define jit_bnei_f(v,w)                jit_new_node_pwf(jit_code_bnei_f,NULL,v,w)
-    jit_code_bner_f,           jit_code_bnei_f,
-#define jit_bunltr_f(v,w)      jit_new_node_pww(jit_code_bunltr_f,NULL,v,w)
-#define jit_bunlti_f(v,w)      jit_new_node_pwf(jit_code_bunlti_f,NULL,v,w)
-    jit_code_bunltr_f,         jit_code_bunlti_f,
-#define jit_bunler_f(v,w)      jit_new_node_pww(jit_code_bunler_f,NULL,v,w)
-#define jit_bunlei_f(v,w)      jit_new_node_pwf(jit_code_bunlei_f,NULL,v,w)
-    jit_code_bunler_f,         jit_code_bunlei_f,
-#define jit_buneqr_f(v,w)      jit_new_node_pww(jit_code_buneqr_f,NULL,v,w)
-#define jit_buneqi_f(v,w)      jit_new_node_pwf(jit_code_buneqi_f,NULL,v,w)
-    jit_code_buneqr_f,         jit_code_buneqi_f,
-#define jit_bunger_f(v,w)      jit_new_node_pww(jit_code_bunger_f,NULL,v,w)
-#define jit_bungei_f(v,w)      jit_new_node_pwf(jit_code_bungei_f,NULL,v,w)
-    jit_code_bunger_f,         jit_code_bungei_f,
-#define jit_bungtr_f(v,w)      jit_new_node_pww(jit_code_bungtr_f,NULL,v,w)
-#define jit_bungti_f(v,w)      jit_new_node_pwf(jit_code_bungti_f,NULL,v,w)
-    jit_code_bungtr_f,         jit_code_bungti_f,
-#define jit_bltgtr_f(v,w)      jit_new_node_pww(jit_code_bltgtr_f,NULL,v,w)
-#define jit_bltgti_f(v,w)      jit_new_node_pwf(jit_code_bltgti_f,NULL,v,w)
-    jit_code_bltgtr_f,         jit_code_bltgti_f,
-#define jit_bordr_f(v,w)       jit_new_node_pww(jit_code_bordr_f,NULL,v,w)
-#define jit_bordi_f(v,w)       jit_new_node_pwf(jit_code_bordi_f,NULL,v,w)
-    jit_code_bordr_f,          jit_code_bordi_f,
-#define jit_bunordr_f(v,w)     jit_new_node_pww(jit_code_bunordr_f,NULL,v,w)
-#define jit_bunordi_f(v,w)     jit_new_node_pwf(jit_code_bunordi_f,NULL,v,w)
-    jit_code_bunordr_f,                jit_code_bunordi_f,
-
-#define jit_pushargr_f(u)      _jit_pushargr_f(_jit,u)
-#define jit_pushargi_f(u)      _jit_pushargi_f(_jit,u)
-    jit_code_pushargr_f,       jit_code_pushargi_f,
-#define jit_retr_f(u)          _jit_retr_f(_jit,u)
-#define jit_reti_f(u)          _jit_reti_f(_jit,u)
-    jit_code_retr_f,           jit_code_reti_f,
-#define jit_retval_f(u)                _jit_retval_f(_jit,u)
-    jit_code_retval_f,
-
-#define jit_arg_d()            _jit_arg_d(_jit)
-    jit_code_arg_d,
-#define jit_getarg_d(u,v)      _jit_getarg_d(_jit,u,v)
-    jit_code_getarg_d,
-#define jit_putargr_d(u,v)     _jit_putargr_d(_jit,u,v)
-#define jit_putargi_d(u,v)     _jit_putargi_d(_jit,u,v)
-    jit_code_putargr_d,                jit_code_putargi_d,
-
-#define jit_addr_d(u,v,w)      jit_new_node_www(jit_code_addr_d,u,v,w)
-#define jit_addi_d(u,v,w)      jit_new_node_wwd(jit_code_addi_d,u,v,w)
-    jit_code_addr_d,           jit_code_addi_d,
-#define jit_subr_d(u,v,w)      jit_new_node_www(jit_code_subr_d,u,v,w)
-#define jit_subi_d(u,v,w)      jit_new_node_wwd(jit_code_subi_d,u,v,w)
-    jit_code_subr_d,           jit_code_subi_d,
-#define jit_rsbr_d(u,v,w)      jit_subr_d(u,w,v)
-#define jit_rsbi_d(u,v,w)      jit_new_node_wwd(jit_code_rsbi_d,u,v,w)
-    jit_code_rsbi_d,
-#define jit_mulr_d(u,v,w)      jit_new_node_www(jit_code_mulr_d,u,v,w)
-#define jit_muli_d(u,v,w)      jit_new_node_wwd(jit_code_muli_d,u,v,w)
-    jit_code_mulr_d,           jit_code_muli_d,
-#define jit_divr_d(u,v,w)      jit_new_node_www(jit_code_divr_d,u,v,w)
-#define jit_divi_d(u,v,w)      jit_new_node_wwd(jit_code_divi_d,u,v,w)
-    jit_code_divr_d,           jit_code_divi_d,
-
-#define jit_negr_d(u,v)                jit_new_node_ww(jit_code_negr_d,u,v)
-#define jit_absr_d(u,v)                jit_new_node_ww(jit_code_absr_d,u,v)
-#define jit_sqrtr_d(u,v)       jit_new_node_ww(jit_code_sqrtr_d,u,v)
-    jit_code_negr_d,           jit_code_absr_d,        jit_code_sqrtr_d,
-
-#define jit_ltr_d(u,v,w)       jit_new_node_www(jit_code_ltr_d,u,v,w)
-#define jit_lti_d(u,v,w)       jit_new_node_wwd(jit_code_lti_d,u,v,w)
-    jit_code_ltr_d,            jit_code_lti_d,
-#define jit_ler_d(u,v,w)       jit_new_node_www(jit_code_ler_d,u,v,w)
-#define jit_lei_d(u,v,w)       jit_new_node_wwd(jit_code_lei_d,u,v,w)
-    jit_code_ler_d,            jit_code_lei_d,
-#define jit_eqr_d(u,v,w)       jit_new_node_www(jit_code_eqr_d,u,v,w)
-#define jit_eqi_d(u,v,w)       jit_new_node_wwd(jit_code_eqi_d,u,v,w)
-    jit_code_eqr_d,            jit_code_eqi_d,
-#define jit_ger_d(u,v,w)       jit_new_node_www(jit_code_ger_d,u,v,w)
-#define jit_gei_d(u,v,w)       jit_new_node_wwd(jit_code_gei_d,u,v,w)
-    jit_code_ger_d,            jit_code_gei_d,
-#define jit_gtr_d(u,v,w)       jit_new_node_www(jit_code_gtr_d,u,v,w)
-#define jit_gti_d(u,v,w)       jit_new_node_wwd(jit_code_gti_d,u,v,w)
-    jit_code_gtr_d,            jit_code_gti_d,
-#define jit_ner_d(u,v,w)       jit_new_node_www(jit_code_ner_d,u,v,w)
-#define jit_nei_d(u,v,w)       jit_new_node_wwd(jit_code_nei_d,u,v,w)
-    jit_code_ner_d,            jit_code_nei_d,
-#define jit_unltr_d(u,v,w)     jit_new_node_www(jit_code_unltr_d,u,v,w)
-#define jit_unlti_d(u,v,w)     jit_new_node_wwd(jit_code_unlti_d,u,v,w)
-    jit_code_unltr_d,          jit_code_unlti_d,
-#define jit_unler_d(u,v,w)     jit_new_node_www(jit_code_unler_d,u,v,w)
-#define jit_unlei_d(u,v,w)     jit_new_node_wwd(jit_code_unlei_d,u,v,w)
-    jit_code_unler_d,          jit_code_unlei_d,
-#define jit_uneqr_d(u,v,w)     jit_new_node_www(jit_code_uneqr_d,u,v,w)
-#define jit_uneqi_d(u,v,w)     jit_new_node_wwd(jit_code_uneqi_d,u,v,w)
-    jit_code_uneqr_d,          jit_code_uneqi_d,
-#define jit_unger_d(u,v,w)     jit_new_node_www(jit_code_unger_d,u,v,w)
-#define jit_ungei_d(u,v,w)     jit_new_node_wwd(jit_code_ungei_d,u,v,w)
-    jit_code_unger_d,          jit_code_ungei_d,
-#define jit_ungtr_d(u,v,w)     jit_new_node_www(jit_code_ungtr_d,u,v,w)
-#define jit_ungti_d(u,v,w)     jit_new_node_wwd(jit_code_ungti_d,u,v,w)
-    jit_code_ungtr_d,          jit_code_ungti_d,
-#define jit_ltgtr_d(u,v,w)     jit_new_node_www(jit_code_ltgtr_d,u,v,w)
-#define jit_ltgti_d(u,v,w)     jit_new_node_wwd(jit_code_ltgti_d,u,v,w)
-    jit_code_ltgtr_d,          jit_code_ltgti_d,
-#define jit_ordr_d(u,v,w)      jit_new_node_www(jit_code_ordr_d,u,v,w)
-#define jit_ordi_d(u,v,w)      jit_new_node_wwd(jit_code_ordi_d,u,v,w)
-    jit_code_ordr_d,           jit_code_ordi_d,
-#define jit_unordr_d(u,v,w)    jit_new_node_www(jit_code_unordr_d,u,v,w)
-#define jit_unordi_d(u,v,w)    jit_new_node_wwd(jit_code_unordi_d,u,v,w)
-    jit_code_unordr_d,         jit_code_unordi_d,
-
-#define jit_truncr_d_i(u,v)    jit_new_node_ww(jit_code_truncr_d_i,u,v)
-    jit_code_truncr_d_i,
-#if __WORDSIZE == 32
-#  define jit_truncr_d(u,v)    jit_truncr_d_i(u,v)
-#else
-#  define jit_truncr_d(u,v)    jit_truncr_d_l(u,v)
-#  define jit_truncr_d_l(u,v)  jit_new_node_ww(jit_code_truncr_d_l,u,v)
-#endif
-    jit_code_truncr_d_l,
-#define jit_extr_d(u,v)                jit_new_node_ww(jit_code_extr_d,u,v)
-#define jit_extr_f_d(u,v)      jit_new_node_ww(jit_code_extr_f_d,u,v)
-    jit_code_extr_d,           jit_code_extr_f_d,
-#define jit_movr_d(u,v)                jit_new_node_ww(jit_code_movr_d,u,v)
-#define jit_movi_d(u,v)                jit_new_node_wd(jit_code_movi_d,u,v)
-    jit_code_movr_d,           jit_code_movi_d,
-
-#define jit_ldr_d(u,v)         jit_new_node_ww(jit_code_ldr_d,u,v)
-#define jit_ldi_d(u,v)         jit_new_node_wp(jit_code_ldi_d,u,v)
-    jit_code_ldr_d,            jit_code_ldi_d,
-#define jit_ldxr_d(u,v,w)      jit_new_node_www(jit_code_ldxr_d,u,v,w)
-#define jit_ldxi_d(u,v,w)      jit_new_node_www(jit_code_ldxi_d,u,v,w)
-    jit_code_ldxr_d,           jit_code_ldxi_d,
-#define jit_str_d(u,v)         jit_new_node_ww(jit_code_str_d,u,v)
-#define jit_sti_d(u,v)         jit_new_node_pw(jit_code_sti_d,u,v)
-    jit_code_str_d,            jit_code_sti_d,
-#define jit_stxr_d(u,v,w)      jit_new_node_www(jit_code_stxr_d,u,v,w)
-#define jit_stxi_d(u,v,w)      jit_new_node_www(jit_code_stxi_d,u,v,w)
-    jit_code_stxr_d,           jit_code_stxi_d,
-
-#define jit_bltr_d(v,w)                jit_new_node_pww(jit_code_bltr_d,NULL,v,w)
-#define jit_blti_d(v,w)                jit_new_node_pwd(jit_code_blti_d,NULL,v,w)
-    jit_code_bltr_d,           jit_code_blti_d,
-#define jit_bler_d(v,w)                jit_new_node_pww(jit_code_bler_d,NULL,v,w)
-#define jit_blei_d(v,w)                jit_new_node_pwd(jit_code_blei_d,NULL,v,w)
-    jit_code_bler_d,           jit_code_blei_d,
-#define jit_beqr_d(v,w)                jit_new_node_pww(jit_code_beqr_d,NULL,v,w)
-#define jit_beqi_d(v,w)                jit_new_node_pwd(jit_code_beqi_d,NULL,v,w)
-    jit_code_beqr_d,           jit_code_beqi_d,
-#define jit_bger_d(v,w)                jit_new_node_pww(jit_code_bger_d,NULL,v,w)
-#define jit_bgei_d(v,w)                jit_new_node_pwd(jit_code_bgei_d,NULL,v,w)
-    jit_code_bger_d,           jit_code_bgei_d,
-#define jit_bgtr_d(v,w)                jit_new_node_pww(jit_code_bgtr_d,NULL,v,w)
-#define jit_bgti_d(v,w)                jit_new_node_pwd(jit_code_bgti_d,NULL,v,w)
-    jit_code_bgtr_d,           jit_code_bgti_d,
-#define jit_bner_d(v,w)                jit_new_node_pww(jit_code_bner_d,NULL,v,w)
-#define jit_bnei_d(v,w)                jit_new_node_pwd(jit_code_bnei_d,NULL,v,w)
-    jit_code_bner_d,           jit_code_bnei_d,
-#define jit_bunltr_d(v,w)      jit_new_node_pww(jit_code_bunltr_d,NULL,v,w)
-#define jit_bunlti_d(v,w)      jit_new_node_pwd(jit_code_bunlti_d,NULL,v,w)
-    jit_code_bunltr_d,         jit_code_bunlti_d,
-#define jit_bunler_d(v,w)      jit_new_node_pww(jit_code_bunler_d,NULL,v,w)
-#define jit_bunlei_d(v,w)      jit_new_node_pwd(jit_code_bunlei_d,NULL,v,w)
-    jit_code_bunler_d,         jit_code_bunlei_d,
-#define jit_buneqr_d(v,w)      jit_new_node_pww(jit_code_buneqr_d,NULL,v,w)
-#define jit_buneqi_d(v,w)      jit_new_node_pwd(jit_code_buneqi_d,NULL,v,w)
-    jit_code_buneqr_d,         jit_code_buneqi_d,
-#define jit_bunger_d(v,w)      jit_new_node_pww(jit_code_bunger_d,NULL,v,w)
-#define jit_bungei_d(v,w)      jit_new_node_pwd(jit_code_bungei_d,NULL,v,w)
-    jit_code_bunger_d,         jit_code_bungei_d,
-#define jit_bungtr_d(v,w)      jit_new_node_pww(jit_code_bungtr_d,NULL,v,w)
-#define jit_bungti_d(v,w)      jit_new_node_pwd(jit_code_bungti_d,NULL,v,w)
-    jit_code_bungtr_d,         jit_code_bungti_d,
-#define jit_bltgtr_d(v,w)      jit_new_node_pww(jit_code_bltgtr_d,NULL,v,w)
-#define jit_bltgti_d(v,w)      jit_new_node_pwd(jit_code_bltgti_d,NULL,v,w)
-    jit_code_bltgtr_d,         jit_code_bltgti_d,
-#define jit_bordr_d(v,w)       jit_new_node_pww(jit_code_bordr_d,NULL,v,w)
-#define jit_bordi_d(v,w)       jit_new_node_pwd(jit_code_bordi_d,NULL,v,w)
-    jit_code_bordr_d,          jit_code_bordi_d,
-#define jit_bunordr_d(v,w)     jit_new_node_pww(jit_code_bunordr_d,NULL,v,w)
-#define jit_bunordi_d(v,w)     jit_new_node_pwd(jit_code_bunordi_d,NULL,v,w)
-    jit_code_bunordr_d,                jit_code_bunordi_d,
-
-#define jit_pushargr_d(u)      _jit_pushargr_d(_jit,u)
-#define jit_pushargi_d(u)      _jit_pushargi_d(_jit,u)
-    jit_code_pushargr_d,       jit_code_pushargi_d,
-#define jit_retr_d(u)          _jit_retr_d(_jit,u)
-#define jit_reti_d(u)          _jit_reti_d(_jit,u)
-    jit_code_retr_d,           jit_code_reti_d,
-#define jit_retval_d(u)                _jit_retval_d(_jit,u)
-    jit_code_retval_d,
-
-#define jit_bswapr_us(u,v)     jit_new_node_ww(jit_code_bswapr_us,u,v)
-    jit_code_bswapr_us,
-#define jit_bswapr_ui(u,v)     jit_new_node_ww(jit_code_bswapr_ui,u,v)
-    jit_code_bswapr_ui,
-#define jit_bswapr_ul(u,v)     jit_new_node_ww(jit_code_bswapr_ul,u,v)
-    jit_code_bswapr_ul,
-#if __WORDSIZE == 32
-#define jit_bswapr(u,v)                jit_new_node_ww(jit_code_bswapr_ui,u,v)
-#else
-#define jit_bswapr(u,v)                jit_new_node_ww(jit_code_bswapr_ul,u,v)
-#endif
-
-    /* Special internal backend specific codes */
-    jit_code_movr_w_f,         jit_code_movr_ww_d,     /* w* -> f|d */
-#define jit_movr_w_f(u, v)     jit_new_node_ww(jit_code_movr_w_f, u, v)
-#define jit_movr_ww_d(u, v, w) jit_new_node_www(jit_code_movr_ww_d, u, v, w)
-    jit_code_movr_w_d,                                 /* w -> d */
-#define jit_movr_w_d(u, v)     jit_new_node_ww(jit_code_movr_w_d, u, v)
-
-    jit_code_movr_f_w,         jit_code_movi_f_w,      /* f|d -> w* */
-#define jit_movr_f_w(u, v)     jit_new_node_ww(jit_code_movr_f_w, u, v)
-#define jit_movi_f_w(u, v)     jit_new_node_wf(jit_code_movi_f_w, u, v)
-    jit_code_movr_d_ww,                jit_code_movi_d_ww,
-#define jit_movr_d_ww(u, v, w) jit_new_node_www(jit_code_movr_d_ww, u, v, w)
-#define jit_movi_d_ww(u, v, w) jit_new_node_wwd(jit_code_movi_d_ww, u, v, w)
-
-    jit_code_movr_d_w,         jit_code_movi_d_w,      /* d -> w */
-#define jit_movr_d_w(u, v)     jit_new_node_ww(jit_code_movr_d_w, u, v)
-#define jit_movi_d_w(u, v)     jit_new_node_wd(jit_code_movi_d_w, u, v)
-
-    jit_code_last_code
-} jit_code_t;
-
-typedef void* (*jit_alloc_func_ptr)    (size_t);
-typedef void* (*jit_realloc_func_ptr)  (void*, size_t);
-typedef void  (*jit_free_func_ptr)     (void*);
-
-/*
- * Prototypes
- */
-extern void init_jit(const char*);
-extern void finish_jit(void);
-
-extern jit_state_t *jit_new_state(void);
-#define jit_clear_state()      _jit_clear_state(_jit)
-extern void _jit_clear_state(jit_state_t*);
-#define jit_destroy_state()    _jit_destroy_state(_jit)
-extern void _jit_destroy_state(jit_state_t*);
-
-#define jit_address(node)      _jit_address(_jit, node)
-extern jit_pointer_t _jit_address(jit_state_t*, jit_node_t*);
-extern jit_node_t *_jit_name(jit_state_t*, const char*);
-extern jit_node_t *_jit_note(jit_state_t*, const char*, int);
-extern jit_node_t *_jit_label(jit_state_t*);
-extern jit_node_t *_jit_forward(jit_state_t*);
-extern jit_node_t *_jit_indirect(jit_state_t*);
-extern void _jit_link(jit_state_t*, jit_node_t*);
-#define jit_forward_p(u)       _jit_forward_p(_jit,u)
-extern jit_bool_t _jit_forward_p(jit_state_t*,jit_node_t*);
-#define jit_indirect_p(u)      _jit_indirect_p(_jit,u)
-extern jit_bool_t _jit_indirect_p(jit_state_t*,jit_node_t*);
-#define jit_target_p(u)                _jit_target_p(_jit,u)
-extern jit_bool_t _jit_target_p(jit_state_t*,jit_node_t*);
-
-extern void _jit_prolog(jit_state_t*);
-
-extern jit_int32_t _jit_allocai(jit_state_t*, jit_int32_t);
-extern void _jit_allocar(jit_state_t*, jit_int32_t, jit_int32_t);
-extern void _jit_ellipsis(jit_state_t*);
-
-extern jit_node_t *_jit_arg(jit_state_t*);
-extern void _jit_getarg_c(jit_state_t*, jit_gpr_t, jit_node_t*);
-extern void _jit_getarg_uc(jit_state_t*, jit_gpr_t, jit_node_t*);
-extern void _jit_getarg_s(jit_state_t*, jit_gpr_t, jit_node_t*);
-extern void _jit_getarg_us(jit_state_t*, jit_gpr_t, jit_node_t*);
-extern void _jit_getarg_i(jit_state_t*, jit_gpr_t, jit_node_t*);
-#if __WORDSIZE == 64
-extern void _jit_getarg_ui(jit_state_t*, jit_gpr_t, jit_node_t*);
-extern void _jit_getarg_l(jit_state_t*, jit_gpr_t, jit_node_t*);
-#endif
-extern void _jit_putargr(jit_state_t*, jit_gpr_t, jit_node_t*);
-extern void _jit_putargi(jit_state_t*, jit_word_t, jit_node_t*);
-
-extern void _jit_prepare(jit_state_t*);
-extern void _jit_ellipsis(jit_state_t*);
-extern void _jit_va_push(jit_state_t*, jit_gpr_t);
-extern void _jit_pushargr(jit_state_t*, jit_gpr_t);
-extern void _jit_pushargi(jit_state_t*, jit_word_t);
-extern void _jit_finishr(jit_state_t*, jit_gpr_t);
-extern jit_node_t *_jit_finishi(jit_state_t*, jit_pointer_t);
-extern void _jit_ret(jit_state_t*);
-extern void _jit_retr(jit_state_t*, jit_gpr_t);
-extern void _jit_reti(jit_state_t*, jit_word_t);
-extern void _jit_retval_c(jit_state_t*, jit_gpr_t);
-extern void _jit_retval_uc(jit_state_t*, jit_gpr_t);
-extern void _jit_retval_s(jit_state_t*, jit_gpr_t);
-extern void _jit_retval_us(jit_state_t*, jit_gpr_t);
-extern void _jit_retval_i(jit_state_t*, jit_gpr_t);
-#if __WORDSIZE == 64
-extern void _jit_retval_ui(jit_state_t*, jit_gpr_t);
-extern void _jit_retval_l(jit_state_t*, jit_gpr_t);
-#endif
-extern void _jit_epilog(jit_state_t*);
-
-#define jit_patch(u)           _jit_patch(_jit,u)
-extern void _jit_patch(jit_state_t*, jit_node_t*);
-#define jit_patch_at(u,v)      _jit_patch_at(_jit,u,v)
-extern void _jit_patch_at(jit_state_t*, jit_node_t*, jit_node_t*);
-#define jit_patch_abs(u,v)     _jit_patch_abs(_jit,u,v)
-extern void _jit_patch_abs(jit_state_t*, jit_node_t*, jit_pointer_t);
-#define jit_realize()          _jit_realize(_jit)
-extern void _jit_realize(jit_state_t*);
-#define jit_get_code(u)                _jit_get_code(_jit,u)
-extern jit_pointer_t _jit_get_code(jit_state_t*, jit_word_t*);
-#define jit_set_code(u,v)      _jit_set_code(_jit,u,v)
-extern void _jit_set_code(jit_state_t*, jit_pointer_t, jit_word_t);
-#define jit_get_data(u,v)      _jit_get_data(_jit,u,v)
-extern jit_pointer_t _jit_get_data(jit_state_t*, jit_word_t*, jit_word_t*);
-#define jit_set_data(u,v,w)    _jit_set_data(_jit,u,v,w)
-extern void _jit_set_data(jit_state_t*, jit_pointer_t, jit_word_t, jit_word_t);
-#define jit_frame(u)           _jit_frame(_jit,u)
-extern void _jit_frame(jit_state_t*, jit_int32_t);
-#define jit_tramp(u)           _jit_tramp(_jit,u)
-extern void _jit_tramp(jit_state_t*, jit_int32_t);
-#define jit_emit()             _jit_emit(_jit)
-extern jit_pointer_t _jit_emit(jit_state_t*);
-
-#define jit_print()            _jit_print(_jit)
-extern void _jit_print(jit_state_t*);
-
-extern jit_node_t *_jit_arg_f(jit_state_t*);
-extern void _jit_getarg_f(jit_state_t*, jit_fpr_t, jit_node_t*);
-extern void _jit_putargr_f(jit_state_t*, jit_fpr_t, jit_node_t*);
-extern void _jit_putargi_f(jit_state_t*, jit_float32_t, jit_node_t*);
-extern void _jit_pushargr_f(jit_state_t*, jit_fpr_t);
-extern void _jit_pushargi_f(jit_state_t*, jit_float32_t);
-extern void _jit_retr_f(jit_state_t*, jit_fpr_t);
-extern void _jit_reti_f(jit_state_t*, jit_float32_t);
-extern void _jit_retval_f(jit_state_t*, jit_fpr_t);
-
-extern jit_node_t *_jit_arg_d(jit_state_t*);
-extern void _jit_getarg_d(jit_state_t*, jit_fpr_t, jit_node_t*);
-extern void _jit_putargr_d(jit_state_t*, jit_fpr_t, jit_node_t*);
-extern void _jit_putargi_d(jit_state_t*, jit_float64_t, jit_node_t*);
-extern void _jit_pushargr_d(jit_state_t*, jit_fpr_t);
-extern void _jit_pushargi_d(jit_state_t*, jit_float64_t);
-extern void _jit_retr_d(jit_state_t*, jit_fpr_t);
-extern void _jit_reti_d(jit_state_t*, jit_float64_t);
-extern void _jit_retval_d(jit_state_t*, jit_fpr_t);
-
-#define jit_new_node(c)                _jit_new_node(_jit,c)
-extern jit_node_t *_jit_new_node(jit_state_t*, jit_code_t);
-#define jit_new_node_w(c,u)    _jit_new_node_w(_jit,c,u)
-extern jit_node_t *_jit_new_node_w(jit_state_t*, jit_code_t,
-                                  jit_word_t);
-#define jit_new_node_f(c,u)    _jit_new_node_f(_jit,c,u)
-extern jit_node_t *_jit_new_node_f(jit_state_t*, jit_code_t,
-                                  jit_float32_t);
-#define jit_new_node_d(c,u)    _jit_new_node_d(_jit,c,u)
-extern jit_node_t *_jit_new_node_d(jit_state_t*, jit_code_t,
-                                  jit_float64_t);
-#define jit_new_node_p(c,u)    _jit_new_node_p(_jit,c,u)
-extern jit_node_t *_jit_new_node_p(jit_state_t*, jit_code_t,
-                                  jit_pointer_t);
-#define jit_new_node_ww(c,u,v) _jit_new_node_ww(_jit,c,u,v)
-extern jit_node_t *_jit_new_node_ww(jit_state_t*,jit_code_t,
-                                   jit_word_t, jit_word_t);
-#define jit_new_node_wp(c,u,v) _jit_new_node_wp(_jit,c,u,v)
-extern jit_node_t *_jit_new_node_wp(jit_state_t*,jit_code_t,
-                                   jit_word_t, jit_pointer_t);
-#define jit_new_node_fp(c,u,v) _jit_new_node_fp(_jit,c,u,v)
-extern jit_node_t *_jit_new_node_fp(jit_state_t*,jit_code_t,
-                                   jit_float32_t, jit_pointer_t);
-#define jit_new_node_dp(c,u,v) _jit_new_node_dp(_jit,c,u,v)
-extern jit_node_t *_jit_new_node_dp(jit_state_t*,jit_code_t,
-                                   jit_float64_t, jit_pointer_t);
-#define jit_new_node_pw(c,u,v) _jit_new_node_pw(_jit,c,u,v)
-extern jit_node_t *_jit_new_node_pw(jit_state_t*,jit_code_t,
-                                   jit_pointer_t, jit_word_t);
-#define jit_new_node_wf(c,u,v) _jit_new_node_wf(_jit,c,u,v)
-extern jit_node_t *_jit_new_node_wf(jit_state_t*, jit_code_t,
-                                   jit_word_t, jit_float32_t);
-#define jit_new_node_wd(c,u,v) _jit_new_node_wd(_jit,c,u,v)
-extern jit_node_t *_jit_new_node_wd(jit_state_t*, jit_code_t,
-                                   jit_word_t, jit_float64_t);
-#define jit_new_node_www(c,u,v,w) _jit_new_node_www(_jit,c,u,v,w)
-extern jit_node_t *_jit_new_node_www(jit_state_t*, jit_code_t,
-                                    jit_word_t, jit_word_t, jit_word_t);
-#define jit_new_node_qww(c,l,h,v,w) _jit_new_node_qww(_jit,c,l,h,v,w)
-extern jit_node_t *_jit_new_node_qww(jit_state_t*, jit_code_t,
-                                    jit_int32_t, jit_int32_t,
-                                    jit_word_t, jit_word_t);
-#define jit_new_node_wwf(c,u,v,w) _jit_new_node_wwf(_jit,c,u,v,w)
-extern jit_node_t *_jit_new_node_wwf(jit_state_t*, jit_code_t,
-                                    jit_word_t, jit_word_t, jit_float32_t);
-#define jit_new_node_wwd(c,u,v,w) _jit_new_node_wwd(_jit,c,u,v,w)
-extern jit_node_t *_jit_new_node_wwd(jit_state_t*, jit_code_t,
-                                    jit_word_t, jit_word_t, jit_float64_t);
-#define jit_new_node_pww(c,u,v,w) _jit_new_node_pww(_jit,c,u,v,w)
-extern jit_node_t *_jit_new_node_pww(jit_state_t*, jit_code_t,
-                                    jit_pointer_t, jit_word_t, jit_word_t);
-#define jit_new_node_pwf(c,u,v,w) _jit_new_node_pwf(_jit,c,u,v,w)
-extern jit_node_t *_jit_new_node_pwf(jit_state_t*, jit_code_t,
-                                    jit_pointer_t, jit_word_t, jit_float32_t);
-#define jit_new_node_pwd(c,u,v,w) _jit_new_node_pwd(_jit,c,u,v,w)
-extern jit_node_t *_jit_new_node_pwd(jit_state_t*, jit_code_t,
-                                    jit_pointer_t, jit_word_t, jit_float64_t);
-
-#define jit_arg_register_p(u)          _jit_arg_register_p(_jit,u)
-extern jit_bool_t _jit_arg_register_p(jit_state_t*, jit_node_t*);
-#define jit_callee_save_p(u)           _jit_callee_save_p(_jit,u)
-extern jit_bool_t _jit_callee_save_p(jit_state_t*, jit_int32_t);
-#define jit_pointer_p(u)               _jit_pointer_p(_jit,u)
-extern jit_bool_t _jit_pointer_p(jit_state_t*,jit_pointer_t);
-
-#define jit_get_note(n,u,v,w)  _jit_get_note(_jit,n,u,v,w)
-extern jit_bool_t _jit_get_note(jit_state_t*,jit_pointer_t,char**,char**,int*);
-
-#define jit_disassemble()              _jit_disassemble(_jit)
-extern void _jit_disassemble(jit_state_t*);
-
-extern void jit_set_memory_functions(jit_alloc_func_ptr,
-                                    jit_realloc_func_ptr,
-                                    jit_free_func_ptr);
-extern void jit_get_memory_functions(jit_alloc_func_ptr*,
-                                    jit_realloc_func_ptr*,
-                                    jit_free_func_ptr*);
-
-#endif /* _lightning_h */
index 70560c9..e1d8a0a 100644 (file)
@@ -1011,6 +1011,12 @@ extern void _jit_retr_d(jit_state_t*, jit_fpr_t);
 extern void _jit_reti_d(jit_state_t*, jit_float64_t);
 extern void _jit_retval_d(jit_state_t*, jit_fpr_t);
 
+#define jit_get_reg(s)         _jit_get_reg(_jit,s)
+extern jit_int32_t _jit_get_reg(jit_state_t*, jit_int32_t);
+
+#define jit_unget_reg(r)       _jit_unget_reg(_jit,r)
+extern void _jit_unget_reg(jit_state_t*, jit_int32_t);
+
 #define jit_new_node(c)                _jit_new_node(_jit,c)
 extern jit_node_t *_jit_new_node(jit_state_t*, jit_code_t);
 #define jit_new_node_w(c,u)    _jit_new_node_w(_jit,c,u)
index eb7d783..45f3851 100644 (file)
@@ -34,9 +34,9 @@
 typedef enum {
 #define jit_r(i)               (_V0 + (i))
 #if NEW_ABI
-#  define jit_r_num()          7
+#  define jit_r_num()          8
 #else
-#  define jit_r_num()          11
+#  define jit_r_num()          12
 #endif
 #define jit_v(i)               (_S0 + (i))
 #define jit_v_num()            8
@@ -55,6 +55,7 @@ typedef enum {
 #  define JIT_R4               _T6
 #  define JIT_R5               _T7
 #  define JIT_R6               _T8
+#  define JIT_R7               _T9
 #else
 #  define JIT_R2               _T0
 #  define JIT_R3               _T1
@@ -65,6 +66,7 @@ typedef enum {
 #  define JIT_R8               _T6
 #  define JIT_R9               _T7
 #  define JIT_R10              _T8
+#  define JIT_R11              _T9
 #endif
     _V0, _V1,
 #if !NEW_ABI
index 8c05853..e00e74d 100644 (file)
@@ -672,14 +672,6 @@ _jit_regarg_set(jit_state_t*, jit_node_t*, jit_int32_t);
 extern void
 _jit_regarg_clr(jit_state_t*, jit_node_t*, jit_int32_t);
 
-#define jit_get_reg(s)         _jit_get_reg(_jit,s)
-extern jit_int32_t
-_jit_get_reg(jit_state_t*, jit_int32_t);
-
-#define jit_unget_reg(r)       _jit_unget_reg(_jit,r)
-extern void
-_jit_unget_reg(jit_state_t*, jit_int32_t);
-
 #define jit_save(reg)          _jit_save(_jit, reg)
 extern void
 _jit_save(jit_state_t*, jit_int32_t);
index f2ac2ba..7e9bd89 100644 (file)
@@ -14,7 +14,8 @@
 # License for more details.
 #
 
-AM_CFLAGS = -I$(top_srcdir)/include -D_GNU_SOURCE $(LIGHTNING_CFLAGS)
+AM_CFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include  \
+       -D_GNU_SOURCE $(LIGHTNING_CFLAGS)
 liblightning_LTLIBRARIES = liblightning.la
 liblightning_la_LDFLAGS = -version-info 1:0:0
 
index 8e8a9a0..5829464 100644 (file)
@@ -1610,8 +1610,7 @@ static void
 _ldxi_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
     jit_int32_t                reg;
-    assert(!(i0 & 1));
-    if (i0 >= 0 && i0 <= 8191)
+    if (i0 >= 0 && i0 <= 8191 && !(i0 & 1))
        LDRSHI(r0, r1, i0 >> 1);
     else if (i0 > -256 && i0 < 0)
        LDURSH(r0, r1, i0 & 0x1ff);
@@ -1636,8 +1635,7 @@ static void
 _ldxi_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
     jit_int32_t                reg;
-    assert(!(i0 & 1));
-    if (i0 >= 0 && i0 <= 8191)
+    if (i0 >= 0 && i0 <= 8191 && !(i0 & 1))
        LDRHI(r0, r1, i0 >> 1);
     else if (i0 > -256 && i0 < 0)
        LDURH(r0, r1, i0 & 0x1ff);
@@ -1656,8 +1654,7 @@ static void
 _ldxi_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
     jit_int32_t                reg;
-    assert(!(i0 & 3));
-    if (i0 >= 0 && i0 <= 16383)
+    if (i0 >= 0 && i0 <= 16383 && !(i0 & 3))
        LDRSWI(r0, r1, i0 >> 2);
     else if (i0 > -256 && i0 < 0)
        LDURSW(r0, r1, i0 & 0x1ff);
@@ -1682,8 +1679,7 @@ static void
 _ldxi_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
     jit_int32_t                reg;
-    assert(!(i0 & 3));
-    if (i0 >= 0 && i0 <= 16383)
+    if (i0 >= 0 && i0 <= 16383 && !(i0 & 3))
        LDRWI(r0, r1, i0 >> 2);
     else if (i0 > -256 && i0 < 0)
        LDURW(r0, r1, i0 & 0x1ff);
@@ -1702,8 +1698,7 @@ static void
 _ldxi_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
     jit_int32_t                reg;
-    assert(!(i0 & 7));
-    if (i0 >= 0 && i0 <= 32767)
+    if (i0 >= 0 && i0 <= 32767 && !(i0 & 7))
        LDRI(r0, r1, i0 >> 3);
     else if (i0 > -256 && i0 < 0)
        LDUR(r0, r1, i0 & 0x1ff);
@@ -1775,8 +1770,7 @@ static void
 _stxi_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     jit_int32_t                reg;
-    assert(!(i0 & 1));
-    if (i0 >= 0 && i0 <= 8191)
+    if (i0 >= 0 && i0 <= 8191 && !(i0 & 1))
        STRHI(r1, r0, i0 >> 1);
     else if (i0 > -256 && i0 < 0)
        STURH(r1, r0, i0 & 0x1ff);
@@ -1792,8 +1786,7 @@ static void
 _stxi_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     jit_int32_t                reg;
-    assert(!(i0 & 3));
-    if (i0 >= 0 && i0 <= 16383)
+    if (i0 >= 0 && i0 <= 16383 && !(i0 & 3))
        STRWI(r1, r0, i0 >> 2);
     else if (i0 > -256 && i0 < 0)
        STURW(r1, r0, i0 & 0x1ff);
@@ -1809,8 +1802,7 @@ static void
 _stxi_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     jit_int32_t                reg;
-    assert(!(i0 & 7));
-    if (i0 >= 0 && i0 <= 32767)
+    if (i0 >= 0 && i0 <= 32767 && !(i0 & 7))
        STRI(r1, r0, i0 >> 3);
     else if (i0 > -256 && i0 < 0)
        STUR(r1, r0, i0 & 0x1ff);
index 15b91b9..27a3ed1 100644 (file)
@@ -50,7 +50,7 @@ static asymbol                         *disasm_synthetic;
 static long                      disasm_num_symbols;
 static long                      disasm_num_synthetic;
 static jit_state_t              *disasm_jit;
-#define disasm_stream            stdout
+static FILE                     *disasm_stream;
 #endif
 
 /*
@@ -73,6 +73,8 @@ jit_init_debug(const char *progname)
     }
     bfd_check_format(disasm_bfd, bfd_object);
     bfd_check_format(disasm_bfd, bfd_archive);
+    if (!disasm_stream)
+       disasm_stream = stderr;
     INIT_DISASSEMBLE_INFO(disasm_info, disasm_stream, fprintf);
 #  if defined(__i386__) || defined(__x86_64__)
     disasm_info.arch = bfd_arch_i386;
index 33b1c35..4d7f92d 100644 (file)
 
 #include <lightning.h>
 #include <lightning/jit_private.h>
-#ifdef _WIN32
-#  include <mman.h>
-#else
-#  include <sys/mman.h>
-#endif
+#include <sys/mman.h>
 
 /*
  * Prototypes
index 8fb7fa1..b73f4b1 100644 (file)
@@ -107,6 +107,10 @@ typedef union {
 #  endif
 #  define can_sign_extend_short_p(im)  ((im) >= -32678 && (im) <= 32767)
 #  define can_zero_extend_short_p(im)  ((im) >= 0 && (im) <= 65535)
+#  define is_low_mask(im)              (((im) & 1) ? (__builtin_popcountl((im) + 1) == 1) : 0)
+#  define is_high_mask(im)             ((im) ? (__builtin_popcountl((im) + (1 << __builtin_ctzl(im))) == 0) : 0)
+#  define masked_bits_count(im)                __builtin_popcountl(im)
+#  define unmasked_bits_count(im)      (__WORDSIZE - masked_bits_count(im))
 #  if __WORDSIZE == 32
 #    define can_sign_extend_int_p(im)  1
 #    define can_zero_extend_int_p(im)  1
@@ -340,8 +344,10 @@ static void _nop(jit_state_t*,jit_int32_t);
 #  define DSRLV(rd,rt,rs)              rrr_t(rs,rt,rd,MIPS_DSRLV)
 #  define DSRL(rd,rt,sa)               rrit(rt,rd,sa,MIPS_DSRL)
 #  define DSRL32(rd,rt,sa)             rrit(rt,rd,sa,MIPS_DSRL32)
-#  define INS(rt,rs,pos,size)          hrrrit(MIPS_SPECIAL3,rs,rt,pos,pos+size-1,MIPS_INS)
-#  define DINS(rt,rs,pos,size)         hrrrit(MIPS_SPECIAL3,rs,rt,pos,pos+size-1,MIPS_DINS)
+#  define INS(rt,rs,pos,size)          hrrrit(MIPS_SPECIAL3,rs,rt,pos+size-1,pos,MIPS_INS)
+#  define DINS(rt,rs,pos,size)         hrrrit(MIPS_SPECIAL3,rs,rt,pos+size-1,pos,MIPS_DINS)
+#  define EXT(rt,rs,pos,size)          hrrrit(MIPS_SPECIAL3,rs,rt,size-1,pos,MIPS_EXT)
+#  define DEXT(rt,rs,pos,size)         hrrrit(MIPS_SPECIAL3,rs,rt,size-1,pos,MIPS_DEXT)
 #  define ROTR(rd,rt,sa)               hrrrit(MIPS_SPECIAL,1,rt,rd,sa,MIPS_SRL)
 #  define DROTR(rd,rt,sa)              hrrrit(MIPS_SPECIAL,1,rt,rd,sa,MIPS_DSRL)
 #  define MFHI(rd)                     rrr_t(_ZERO_REGNO,_ZERO_REGNO,rd,MIPS_MFHI)
@@ -494,7 +500,8 @@ static void _ori(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
 #  define xorr(r0,r1,r2)               XOR(r0,r1,r2)
 #  define xori(r0,r1,i0)               _xori(_jit,r0,r1,i0)
 static void _xori(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
-#  define movr(r0,r1)                  orr(r0,r1,_ZERO_REGNO)
+#  define movr(r0,r1)                  _movr(_jit,r0,r1)
+static void _movr(jit_state_t*,jit_int32_t,jit_int32_t);
 #  define movi(r0,i0)                  _movi(_jit,r0,i0)
 static void _movi(jit_state_t*,jit_int32_t,jit_word_t);
 #  define movi_p(r0,i0)                        _movi_p(_jit,r0,i0)
@@ -1160,7 +1167,20 @@ _andi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
     jit_int32_t                reg;
     if (can_zero_extend_short_p(i0))
        ANDI(r0, r1, i0);
-    else {
+    else if (is_low_mask(i0)) {
+#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
+       if (masked_bits_count(i0) <= 32)
+           EXT(r0, r1, 0, masked_bits_count(i0));
+       else
+#endif
+       {
+               lshi(r0, r1, unmasked_bits_count(i0));
+               rshi_u(r0, r0, unmasked_bits_count(i0));
+       }
+    } else if (is_high_mask(i0)) {
+       rshi(r0, r1, unmasked_bits_count(i0));
+       lshi(r0, r0, unmasked_bits_count(i0));
+    } else {
        reg = jit_get_reg(jit_class_gpr);
        movi(rn(reg), i0);
        AND(r0, r1, rn(reg));
@@ -1196,6 +1216,13 @@ _xori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
     }
 }
 
+static void
+_movr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (r0 != r1)
+       orr(r0, r1, _ZERO_REGNO);
+}
+
 static void
 _movi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 {
@@ -2869,10 +2896,11 @@ _bmci(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
 static void
 _callr(jit_state_t *_jit, jit_int32_t r0)
 {
+    JALR(r0);
     if (r0 != _T9_REGNO)
        movr(_T9_REGNO, r0);
-    JALR(r0);
-    NOP(1);
+    else
+       NOP(1);
 }
 
 static void
index c4397ad..9e99771 100644 (file)
@@ -1160,7 +1160,11 @@ _htonr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
     ROTLWI(rn(reg), r1, 8);
     RLWIMI(rn(reg), r1, 24, 0, 7);
     RLWIMI(rn(reg), r1, 24, 16, 23);
+#  if __WORDSIZE == 64
     CLRLDI(r0, rn(reg), 32);
+#  else
+    MR(r0,rn(reg));
+#  endif
     jit_unget_reg(reg);
 }
 
index fc4ab87..c44623a 100644 (file)
 #include <lightning.h>
 #include <lightning/jit_private.h>
 
-#define print_chr(value)               fputc(value, stdout)
-#define print_hex(value)               fprintf(stdout, "0x%lx", value)
-#define print_dec(value)               fprintf(stdout, "%ld", value)
-#define print_flt(value)               fprintf(stdout, "%g", value)
-#define print_str(value)               fprintf(stdout, "%s", value)
-#define print_ptr(value)               fprintf(stdout, "%p", value)
+#define print_chr(value)               fputc(value, print_stream)
+#define print_hex(value)               fprintf(print_stream, "0x%lx", value)
+#define print_dec(value)               fprintf(print_stream, "%ld", value)
+#define print_flt(value)               fprintf(print_stream, "%g", value)
+#define print_str(value)               fprintf(print_stream, "%s", value)
+#define print_ptr(value)               fprintf(print_stream, "%p", value)
 #define print_reg(value)                                               \
     do {                                                               \
        if ((value) & jit_regno_patch)                                  \
  * Initialization
  */
 #include "jit_names.c"
+/*
+ * Initialization
+ */
+static FILE    *print_stream;
+
 
 /*
  * Implementation
@@ -54,6 +59,9 @@ _jit_print(jit_state_t *_jit)
 {
     jit_node_t         *node;
 
+    if (!print_stream)
+       print_stream = stderr;
+
     if ((node = _jitc->head)) {
        jit_print_node(node);
        for (node = node->next; node; node = node->next) {
@@ -280,12 +288,12 @@ _jit_print_node(jit_state_t *_jit, jit_node_t *node)
        case jit_code_name:
            print_chr(' ');
            if (node->v.p && _jitc->emit)
-               print_ptr(node->v.n->u.p);
+               print_str(node->v.n->u.p);
            break;
        case jit_code_note:
            print_chr(' ');
            if (node->v.p && _jitc->emit)
-               print_ptr(node->v.n->u.p);
+               print_str(node->v.n->u.p);
            if (node->v.p && _jitc->emit && node->w.w)
                print_chr(':');
            if (node->w.w)
index 4627783..547f36c 100644 (file)
@@ -661,10 +661,22 @@ static jit_word_t _bxsubi_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
 static void _callr(jit_state_t*, jit_int32_t);
 #  define calli(i0)                    _calli(_jit, i0)
 static jit_word_t _calli(jit_state_t*, jit_word_t);
+#  if __X64
+#    define calli_p(i0)                        _calli_p(_jit, i0)
+static jit_word_t _calli_p(jit_state_t*, jit_word_t);
+#  else
+#    define calli_p(i0)                        calli(i0)
+#  endif
 #  define jmpr(r0)                     _jmpr(_jit, r0)
 static void _jmpr(jit_state_t*, jit_int32_t);
 #  define jmpi(i0)                     _jmpi(_jit, i0)
 static jit_word_t _jmpi(jit_state_t*, jit_word_t);
+#  if __X64
+#    define jmpi_p(i0)                 _jmpi_p(_jit, i0)
+static jit_word_t _jmpi_p(jit_state_t*, jit_word_t);
+#  else
+#    define jmpi_p(i0)                 jmpi(i0)
+#  endif
 #  define jmpsi(i0)                    _jmpsi(_jit, i0)
 static void _jmpsi(jit_state_t*, jit_uint8_t);
 #  define prolog(node)                 _prolog(_jit, node)
@@ -3411,27 +3423,41 @@ static jit_word_t
 _calli(jit_state_t *_jit, jit_word_t i0)
 {
     jit_word_t         word;
+    jit_word_t         w;
 #if __X64
-    jit_int32_t                reg;
+    w = i0 - (_jit->pc.w + 5);
+    if ((jit_int32_t)w == w) {
+#endif
+       ic(0xe8);
+       w = i0 - (_jit->pc.w + 4);
+       ii(w);
+       word = _jit->pc.w;
+#if __X64
+    }
+    else
+       word = calli_p(i0);
+#endif
+    return (word);
+}
 
+#if __X64
+static jit_word_t
+_calli_p(jit_state_t *_jit, jit_word_t i0)
+{
+    jit_word_t         word;
+    jit_int32_t                reg;
     reg = jit_get_reg(jit_class_gpr);
     word = movi_p(rn(reg), i0);
     callr(rn(reg));
     jit_unget_reg(reg);
-#else
-    jit_word_t         w;
-    ic(0xe8);
-    w = i0 - (_jit->pc.w + 4);
-    ii(w);
-    word = _jit->pc.w;
-#endif
     return (word);
 }
+#endif
 
 static void
 _jmpr(jit_state_t *_jit, jit_int32_t r0)
 {
-    rex(0, WIDE, _NOREG, _NOREG, r0);
+    rex(0, 0, _NOREG, _NOREG, r0);
     ic(0xff);
     mrm(0x03, 0x04, r7(r0));
 }
@@ -3439,13 +3465,38 @@ _jmpr(jit_state_t *_jit, jit_int32_t r0)
 static jit_word_t
 _jmpi(jit_state_t *_jit, jit_word_t i0)
 {
+    jit_word_t         word;
     jit_word_t         w;
-    ic(0xe9);
-    w = i0 - (_jit->pc.w + 4);
-    ii(w);
-    return (_jit->pc.w);
+#if __X64
+    w = i0 - (_jit->pc.w + 5);
+    if ((jit_int32_t)w == w) {
+#endif
+       ic(0xe9);
+       w = i0 - (_jit->pc.w + 4);
+       ii(w);
+       word = _jit->pc.w;
+#if __X64
+    }
+    else
+       word = jmpi_p(i0);
+#endif
+    return (word);
 }
 
+#if __X64
+static jit_word_t
+_jmpi_p(jit_state_t *_jit, jit_word_t i0)
+{
+    jit_word_t         word;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    word = movi_p(rn(reg), i0);
+    jmpr(rn(reg));
+    jit_unget_reg(reg);
+    return (word);
+}
+#endif
+
 static void
 _jmpsi(jit_state_t *_jit, jit_uint8_t i0)
 {
@@ -3830,6 +3881,7 @@ _patch_at(jit_state_t *_jit, jit_node_t *node,
     switch (node->code) {
 #  if __X64
        case jit_code_calli:
+       case jit_code_jmpi:
 #  endif
        case jit_code_movi:
            patch_abs(instr, label);
index c34a117..7dd900e 100644 (file)
@@ -2012,7 +2012,7 @@ _emit_code(jit_state_t *_jit)
                    if (temp->flag & jit_flag_patch)
                        jmpi(temp->u.w);
                    else {
-                       word = jmpi(_jit->pc.w);
+                       word = jmpi_p(_jit->pc.w);
                        patch(word, node);
                    }
                }
@@ -2027,9 +2027,12 @@ _emit_code(jit_state_t *_jit)
                    temp = node->u.n;
                    assert(temp->code == jit_code_label ||
                           temp->code == jit_code_epilog);
-                   word = calli(temp->u.w);
-                   if (!(temp->flag & jit_flag_patch))
+                   if (temp->flag & jit_flag_patch)
+                       calli(temp->u.w);
+                   else {
+                       word = calli_p(_jit->pc.w);
                        patch(word, node);
+                   }
                }
                else
                    calli(node->u.w);
index 507abb6..22eca0c 100644 (file)
 
 #include <lightning.h>
 #include <lightning/jit_private.h>
-#ifdef _WIN32
-#  include <mman.h>
-#else
-#  include <sys/mman.h>
-#endif
+#include <sys/mman.h>
 #if defined(__sgi)
 #  include <fcntl.h>
 #endif
diff --git a/deps/lightning/m4/.gitignore b/deps/lightning/m4/.gitignore
new file mode 100644 (file)
index 0000000..24e2f3f
--- /dev/null
@@ -0,0 +1,10 @@
+/lt~obsolete.m4
+/ltversion.m4
+/ltsugar.m4
+/ltoptions.m4
+/libtool.m4
+/00gnulib.m4
+/gnulib-common.m4
+/gnulib-comp.m4
+/gnulib-tool.m4
+/zzgnulib.m4
diff --git a/deps/lightning/m4/gnulib-cache.m4 b/deps/lightning/m4/gnulib-cache.m4
new file mode 100644 (file)
index 0000000..45be7ba
--- /dev/null
@@ -0,0 +1,57 @@
+# Copyright (C) 2002-2021 Free Software Foundation, Inc.
+#
+# This file is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This file is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this file.  If not, see <https://www.gnu.org/licenses/>.
+#
+# As a special exception to the GNU General Public License,
+# this file may be distributed as part of a program that
+# contains a configuration script generated by Autoconf, under
+# the same distribution terms as the rest of that program.
+#
+# Generated by gnulib-tool.
+#
+# This file represents the specification of how gnulib-tool is used.
+# It acts as a cache: It is written and read by gnulib-tool.
+# In projects that use version control, this file is meant to be put under
+# version control, like the configure.ac and various Makefile.am files.
+
+
+# Specification in the form of a command-line invocation:
+# gnulib-tool --import --local-dir=gl \
+#  --lib=libgnu \
+#  --source-base=gnulib-lib \
+#  --m4-base=m4 \
+#  --doc-base=gnulib-doc \
+#  --tests-base=tests \
+#  --aux-dir=build-aux \
+#  --no-conditional-dependencies \
+#  --libtool \
+#  --macro-prefix=gl
+
+# Specification in the form of a few gnulib-tool.m4 macro invocations:
+gl_LOCAL_DIR([gl])
+gl_MODULES([
+  
+])
+gl_AVOID([])
+gl_SOURCE_BASE([gnulib-lib])
+gl_M4_BASE([m4])
+gl_PO_BASE([])
+gl_DOC_BASE([gnulib-doc])
+gl_TESTS_BASE([tests])
+gl_LIB([libgnu])
+gl_MAKEFILE_NAME([])
+gl_LIBTOOL
+gl_MACRO_PREFIX([gl])
+gl_PO_DOMAIN([])
+gl_WITNESS_C_MACRO([])
index 4ebb7d2..770ee66 100644 (file)
@@ -6,7 +6,7 @@
 [subrepo]
        remote = https://github.com/pcercuei/lightrec.git
        branch = master
-       commit = 2cca097e538876d219b8af9663abe0ca74f68bb2
-       parent = 5c00ea32a0eab812299b08acd14c25bf6ba4ca7a
+       commit = d90de68429bf9c2d67c5f5051d495d1e3131e636
+       parent = a9725dc07f40b39a5533d546b59e45377d1f9b66
        method = merge
-       cmdver = 0.4.1
+       cmdver = 0.4.3
index c58dac5..6a139f4 100644 (file)
@@ -1,5 +1,5 @@
 cmake_minimum_required(VERSION 3.0)
-project(lightrec LANGUAGES C VERSION 0.3)
+project(lightrec LANGUAGES C VERSION 0.4)
 
 set(BUILD_SHARED_LIBS ON CACHE BOOL "Build shared libraries")
 if (NOT BUILD_SHARED_LIBS)
@@ -25,9 +25,10 @@ if (CMAKE_COMPILER_IS_GNUCC)
        add_compile_options(-fvisibility=hidden)
 endif()
 
+set(HAS_DEFAULT_ELM ${CMAKE_COMPILER_IS_GNUCC})
+
 list(APPEND LIGHTREC_SOURCES
        blockcache.c
-       disassembler.c
        emitter.c
        interpreter.c
        lightrec.c
@@ -60,6 +61,17 @@ if (ENABLE_THREADED_COMPILER)
        endif (NOT ENABLE_FIRST_PASS)
 endif (ENABLE_THREADED_COMPILER)
 
+option(OPT_REMOVE_DIV_BY_ZERO_SEQ "(optimization) Remove div-by-zero check sequence" ON)
+option(OPT_REPLACE_MEMSET "(optimization) Detect and replace memset with host variant" ON)
+option(OPT_DETECT_IMPOSSIBLE_BRANCHES "(optimization) Detect impossible branches" ON)
+option(OPT_TRANSFORM_OPS "(optimization) Transform opcodes" ON)
+option(OPT_LOCAL_BRANCHES "(optimization) Detect local branches" ON)
+option(OPT_SWITCH_DELAY_SLOTS "(optimization) Switch delay slots" ON)
+option(OPT_FLAG_STORES "(optimization) Flag stores that don't require invalidation" ON)
+option(OPT_FLAG_IO "(optimization) Flag I/O opcodes whose target is known" ON)
+option(OPT_FLAG_MULT_DIV "(optimization) Flag MULT/DIV that only use one of HI/LO" ON)
+option(OPT_EARLY_UNLOAD "(optimization) Unload registers early" ON)
+
 include_directories(${CMAKE_CURRENT_BINARY_DIR})
 
 add_library(${PROJECT_NAME} ${LIGHTREC_SOURCES} ${LIGHTREC_HEADERS})
@@ -72,6 +84,13 @@ set_target_properties(${PROJECT_NAME} PROPERTIES
        C_EXTENSIONS OFF
 )
 
+if (CMAKE_C_COMPILER_ID MATCHES "GNU|Clang")
+       target_compile_options(${PROJECT_NAME} PRIVATE -Wall -Wno-parentheses)
+endif()
+if (CMAKE_C_COMPILER_ID STREQUAL "Clang")
+       target_compile_options(${PROJECT_NAME} PRIVATE -Wno-initializer-overrides)
+endif()
+
 option(ENABLE_TINYMM "Enable optional libtinymm dependency" OFF)
 if (ENABLE_TINYMM)
        find_library(TINYMM_LIBRARIES tinymm REQUIRED)
@@ -96,19 +115,11 @@ include_directories(${LIBLIGHTNING_INCLUDE_DIR})
 target_link_libraries(${PROJECT_NAME} PRIVATE ${LIBLIGHTNING})
 
 if (LOG_LEVEL STREQUAL Debug)
-       find_library(LIBOPCODES NAMES opcodes-multiarch opcodes)
-       find_path(LIBOPCODES_INCLUDE_DIR dis-asm.h)
-
-       if (NOT LIBOPCODES OR NOT LIBOPCODES_INCLUDE_DIR)
-               message(SEND_ERROR "Debug log level requires libopcodes (from binutils) to be installed.")
-       endif ()
-
        set(ENABLE_DISASSEMBLER ON)
-       include_directories(${LIBOPCODES_INCLUDE_DIR})
-       target_link_libraries(${PROJECT_NAME} PRIVATE ${LIBOPCODES})
+       target_sources(${PROJECT_NAME} PRIVATE disassembler.c)
 endif()
 
-configure_file(config.h.cmakein config.h @ONLY)
+configure_file(lightrec-config.h.cmakein lightrec-config.h @ONLY)
 
 include(GNUInstallDirs)
 install(TARGETS ${PROJECT_NAME}
index 4263431..4512392 100644 (file)
@@ -1,15 +1,6 @@
+// SPDX-License-Identifier: LGPL-2.1-or-later
 /*
- * Copyright (C) 2015-2020 Paul Cercueil <paul@crapouillou.net>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
+ * Copyright (C) 2015-2021 Paul Cercueil <paul@crapouillou.net>
  */
 
 #include "blockcache.h"
@@ -19,6 +10,7 @@
 
 #include <stdbool.h>
 #include <stdlib.h>
+#include <string.h>
 
 /* Must be power of two */
 #define LUT_SIZE 0x4000
@@ -28,6 +20,11 @@ struct blockcache {
        struct block * lut[LUT_SIZE];
 };
 
+u16 lightrec_get_lut_entry(const struct block *block)
+{
+       return (kunseg(block->pc) >> 2) & (LUT_SIZE - 1);
+}
+
 struct block * lightrec_find_block(struct blockcache *cache, u32 pc)
 {
        struct block *block;
@@ -42,22 +39,33 @@ struct block * lightrec_find_block(struct blockcache *cache, u32 pc)
        return NULL;
 }
 
-void remove_from_code_lut(struct blockcache *cache, struct block *block)
+struct block * lightrec_find_block_from_lut(struct blockcache *cache,
+                                           u16 lut_entry, u32 addr_in_block)
 {
-       struct lightrec_state *state = block->state;
-       const struct opcode *op;
-       u32 offset = lut_offset(block->pc);
+       struct block *block;
+       u32 pc;
 
-       /* Use state->get_next_block in the code LUT, which basically
-        * calls back get_next_block_func(), until the compiler
-        * overrides this. This is required, as a NULL value in the code
-        * LUT means an outdated block. */
-       state->code_lut[offset] = state->get_next_block;
+       addr_in_block = kunseg(addr_in_block);
 
-       for (op = block->opcode_list; op; op = op->next)
-               if (op->c.i.op == OP_META_SYNC)
-                       state->code_lut[offset + op->offset] = NULL;
+       for (block = cache->lut[lut_entry]; block; block = block->next) {
+               pc = kunseg(block->pc);
+               if (addr_in_block >= pc &&
+                   addr_in_block < pc + (block->nb_ops << 2))
+                       return block;
+       }
 
+       return NULL;
+}
+
+void remove_from_code_lut(struct blockcache *cache, struct block *block)
+{
+       struct lightrec_state *state = cache->state;
+       u32 offset = lut_offset(block->pc);
+
+       if (block->function) {
+               memset(&state->code_lut[offset], 0,
+                      block->nb_ops * sizeof(*state->code_lut));
+       }
 }
 
 void lightrec_register_block(struct blockcache *cache, struct block *block)
@@ -102,7 +110,7 @@ void lightrec_free_block_cache(struct blockcache *cache)
        for (i = 0; i < LUT_SIZE; i++) {
                for (block = cache->lut[i]; block; block = next) {
                        next = block->next;
-                       lightrec_free_block(block);
+                       lightrec_free_block(cache->state, block);
                }
        }
 
@@ -124,18 +132,10 @@ struct blockcache * lightrec_blockcache_init(struct lightrec_state *state)
 
 u32 lightrec_calculate_block_hash(const struct block *block)
 {
-       const struct lightrec_mem_map *map = block->map;
-       u32 pc, hash = 0xffffffff;
-       const u32 *code;
+       const u32 *code = block->code;
+       u32 hash = 0xffffffff;
        unsigned int i;
 
-       pc = kunseg(block->pc) - map->pc;
-
-       while (map->mirror_of)
-               map = map->mirror_of;
-
-       code = map->address + pc;
-
        /* Jenkins one-at-a-time hash algorithm */
        for (i = 0; i < block->nb_ops; i++) {
                hash += *code++;
@@ -150,9 +150,9 @@ u32 lightrec_calculate_block_hash(const struct block *block)
        return hash;
 }
 
-bool lightrec_block_is_outdated(struct block *block)
+bool lightrec_block_is_outdated(struct lightrec_state *state, struct block *block)
 {
-       void **lut_entry = &block->state->code_lut[lut_offset(block->pc)];
+       void **lut_entry = &state->code_lut[lut_offset(block->pc)];
        bool outdated;
 
        if (*lut_entry)
@@ -165,7 +165,7 @@ bool lightrec_block_is_outdated(struct block *block)
                if (block->function)
                        *lut_entry = block->function;
                else
-                       *lut_entry = block->state->get_next_block;
+                       *lut_entry = state->get_next_block;
        }
 
        return outdated;
index ff63651..3b782f4 100644 (file)
@@ -1,15 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
 /*
- * Copyright (C) 2014-2020 Paul Cercueil <paul@crapouillou.net>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
+ * Copyright (C) 2014-2021 Paul Cercueil <paul@crapouillou.net>
  */
 
 #ifndef __BLOCKCACHE_H__
 struct blockcache;
 
 struct block * lightrec_find_block(struct blockcache *cache, u32 pc);
+struct block * lightrec_find_block_from_lut(struct blockcache *cache,
+                                           u16 lut_entry, u32 addr_in_block);
+u16 lightrec_get_lut_entry(const struct block *block);
+
 void lightrec_register_block(struct blockcache *cache, struct block *block);
 void lightrec_unregister_block(struct blockcache *cache, struct block *block);
 
@@ -27,6 +22,6 @@ struct blockcache * lightrec_blockcache_init(struct lightrec_state *state);
 void lightrec_free_block_cache(struct blockcache *cache);
 
 u32 lightrec_calculate_block_hash(const struct block *block);
-_Bool lightrec_block_is_outdated(struct block *block);
+_Bool lightrec_block_is_outdated(struct lightrec_state *state, struct block *block);
 
 #endif /* __BLOCKCACHE_H__ */
diff --git a/deps/lightrec/config.h b/deps/lightrec/config.h
deleted file mode 100644 (file)
index b72ae10..0000000
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Copyright (C) 2019 Paul Cercueil <paul@crapouillou.net>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- */
-
-#ifndef __LIGHTREC_CONFIG_H__
-#define __LIGHTREC_CONFIG_H__
-
-#define ENABLE_THREADED_COMPILER 1
-#define ENABLE_FIRST_PASS 1
-#define ENABLE_DISASSEMBLER 0
-#define ENABLE_TINYMM 0
-
-#endif /* __LIGHTREC_CONFIG_H__ */
diff --git a/deps/lightrec/config.h.cmakein b/deps/lightrec/config.h.cmakein
deleted file mode 100644 (file)
index 1eac007..0000000
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * Copyright (C) 2019 Paul Cercueil <paul@crapouillou.net>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- */
-
-#ifndef __LIGHTREC_CONFIG_H__
-#define __LIGHTREC_CONFIG_H__
-
-#cmakedefine01 ENABLE_THREADED_COMPILER
-#cmakedefine01 ENABLE_FIRST_PASS
-#cmakedefine01 ENABLE_DISASSEMBLER
-#cmakedefine01 ENABLE_TINYMM
-
-#endif /* __LIGHTREC_CONFIG_H__ */
-
index 4facc22..273f1e5 100644 (file)
@@ -1,15 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
 /*
- * Copyright (C) 2014-2020 Paul Cercueil <paul@crapouillou.net>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
+ * Copyright (C) 2014-2021 Paul Cercueil <paul@crapouillou.net>
  */
 
 #ifndef DEBUG_H
index 06fcec9..c357a30 100644 (file)
+// SPDX-License-Identifier: LGPL-2.1-or-later
 /*
- * Copyright (C) 2014-2020 Paul Cercueil <paul@crapouillou.net>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
+ * Copyright (C) 2014-2021 Paul Cercueil <paul@crapouillou.net>
  */
 
-#include "config.h"
-
-#if ENABLE_DISASSEMBLER
-#include <dis-asm.h>
-#endif
 #include <stdbool.h>
+#include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 
-#include "debug.h"
-#include "disassembler.h"
 #include "lightrec-private.h"
-#include "memmanager.h"
+#include "regcache.h"
 
-static bool is_unconditional_jump(const struct opcode *op)
-{
-       switch (op->i.op) {
-       case OP_SPECIAL:
-               return op->r.op == OP_SPECIAL_JR || op->r.op == OP_SPECIAL_JALR;
-       case OP_J:
-       case OP_JAL:
-               return true;
-       case OP_BEQ:
-       case OP_BLEZ:
-               return op->i.rs == op->i.rt;
-       case OP_REGIMM:
-               return (op->r.rt == OP_REGIMM_BGEZ ||
-                       op->r.rt == OP_REGIMM_BGEZAL) && op->i.rs == 0;
-       default:
-               return false;
-       }
-}
+static const char *std_opcodes[] = {
+       [OP_J]                  = "j       ",
+       [OP_JAL]                = "jal     ",
+       [OP_BEQ]                = "beq     ",
+       [OP_BNE]                = "bne     ",
+       [OP_BLEZ]               = "blez    ",
+       [OP_BGTZ]               = "bgtz    ",
+       [OP_ADDI]               = "addi    ",
+       [OP_ADDIU]              = "addiu   ",
+       [OP_SLTI]               = "slti    ",
+       [OP_SLTIU]              = "sltiu   ",
+       [OP_ANDI]               = "andi    ",
+       [OP_ORI]                = "ori     ",
+       [OP_XORI]               = "xori    ",
+       [OP_LUI]                = "lui     ",
+       [OP_LB]                 = "lb      ",
+       [OP_LH]                 = "lh      ",
+       [OP_LWL]                = "lwl     ",
+       [OP_LW]                 = "lw      ",
+       [OP_LBU]                = "lbu     ",
+       [OP_LHU]                = "lhu     ",
+       [OP_LWR]                = "lwr     ",
+       [OP_SB]                 = "sb      ",
+       [OP_SH]                 = "sh      ",
+       [OP_SWL]                = "swl     ",
+       [OP_SW]                 = "sw      ",
+       [OP_SWR]                = "swr     ",
+       [OP_LWC2]               = "lwc2    ",
+       [OP_SWC2]               = "swc2    ",
+};
 
-static bool is_syscall(const struct opcode *op)
-{
-       return (op->i.op == OP_SPECIAL && (op->r.op == OP_SPECIAL_SYSCALL ||
-                                          op->r.op == OP_SPECIAL_BREAK)) ||
-               (op->i.op == OP_CP0 && (op->r.rs == OP_CP0_MTC0 ||
-                                       op->r.rs == OP_CP0_CTC0) &&
-                (op->r.rd == 12 || op->r.rd == 13));
-}
+static const char *special_opcodes[] = {
+       [OP_SPECIAL_SLL]        = "sll     ",
+       [OP_SPECIAL_SRL]        = "srl     ",
+       [OP_SPECIAL_SRA]        = "sra     ",
+       [OP_SPECIAL_SLLV]       = "sllv    ",
+       [OP_SPECIAL_SRLV]       = "srlv    ",
+       [OP_SPECIAL_SRAV]       = "srav    ",
+       [OP_SPECIAL_JR]         = "jr      ",
+       [OP_SPECIAL_JALR]       = "jalr    ",
+       [OP_SPECIAL_SYSCALL]    = "syscall ",
+       [OP_SPECIAL_BREAK]      = "break   ",
+       [OP_SPECIAL_MFHI]       = "mfhi    ",
+       [OP_SPECIAL_MTHI]       = "mthi    ",
+       [OP_SPECIAL_MFLO]       = "mflo    ",
+       [OP_SPECIAL_MTLO]       = "mtlo    ",
+       [OP_SPECIAL_MULT]       = "mult    ",
+       [OP_SPECIAL_MULTU]      = "multu   ",
+       [OP_SPECIAL_DIV]        = "div     ",
+       [OP_SPECIAL_DIVU]       = "divu    ",
+       [OP_SPECIAL_ADD]        = "add     ",
+       [OP_SPECIAL_ADDU]       = "addu    ",
+       [OP_SPECIAL_SUB]        = "sub     ",
+       [OP_SPECIAL_SUBU]       = "subu    ",
+       [OP_SPECIAL_AND]        = "and     ",
+       [OP_SPECIAL_OR]         = "or      ",
+       [OP_SPECIAL_XOR]        = "xor     ",
+       [OP_SPECIAL_NOR]        = "nor     ",
+       [OP_SPECIAL_SLT]        = "slt     ",
+       [OP_SPECIAL_SLTU]       = "sltu    ",
+};
 
-void lightrec_free_opcode_list(struct lightrec_state *state, struct opcode *list)
-{
-       struct opcode *next;
+static const char *regimm_opcodes[] = {
+       [OP_REGIMM_BLTZ]        = "bltz    ",
+       [OP_REGIMM_BGEZ]        = "bgez    ",
+       [OP_REGIMM_BLTZAL]      = "bltzal  ",
+       [OP_REGIMM_BGEZAL]      = "bgezal  ",
+};
 
-       while (list) {
-               next = list->next;
-               lightrec_free(state, MEM_FOR_IR, sizeof(*list), list);
-               list = next;
-       }
-}
+static const char *cp0_opcodes[] = {
+       [OP_CP0_MFC0]           = "mfc0    ",
+       [OP_CP0_CFC0]           = "cfc0    ",
+       [OP_CP0_MTC0]           = "mtc0    ",
+       [OP_CP0_CTC0]           = "ctc0    ",
+       [OP_CP0_RFE]            = "rfe",
+};
+
+static const char *cp2_opcodes[] = {
+       [OP_CP2_BASIC_MFC2]     = "mfc2    ",
+       [OP_CP2_BASIC_CFC2]     = "cfc2    ",
+       [OP_CP2_BASIC_MTC2]     = "mtc2    ",
+       [OP_CP2_BASIC_CTC2]     = "ctc2    ",
+};
+
+static const char *opcode_flags[] = {
+       "switched branch/DS",
+       "unload Rs",
+       "unload Rt",
+       "unload Rd",
+       "sync point",
+};
+
+static const char *opcode_io_flags[] = {
+       "memory I/O",
+       "hardware I/O",
+       "self-modifying code",
+       "no invalidation",
+};
 
-struct opcode * lightrec_disassemble(struct lightrec_state *state,
-                                    const u32 *src, unsigned int *len)
+static const char *opcode_branch_flags[] = {
+       "emulate branch",
+       "local branch",
+};
+
+static const char *opcode_multdiv_flags[] = {
+       "No LO",
+       "No HI",
+       "No div check",
+};
+
+static int print_flags(char *buf, size_t len, u16 flags,
+                      const char **array, size_t array_size)
 {
-       struct opcode *head = NULL;
-       bool stop_next = false;
-       struct opcode *curr, *last;
+       const char *flag_name;
        unsigned int i;
+       size_t count = 0, bytes;
+       bool first = true;
 
-       for (i = 0, last = NULL; ; i++, last = curr) {
-               curr = lightrec_calloc(state, MEM_FOR_IR, sizeof(*curr));
-               if (!curr) {
-                       pr_err("Unable to allocate memory\n");
-                       lightrec_free_opcode_list(state, head);
-                       return NULL;
-               }
+       for (i = 0; i < array_size + ARRAY_SIZE(opcode_flags); i++) {
+               if (!(flags & BIT(i)))
+                       continue;
 
-               if (!last)
-                       head = curr;
+               if (i < ARRAY_SIZE(opcode_flags))
+                       flag_name = opcode_flags[i];
                else
-                       last->next = curr;
-
-               /* TODO: Take care of endianness */
-               curr->opcode = LE32TOH(*src++);
-               curr->offset = i;
-
-               /* NOTE: The block disassembly ends after the opcode that
-                * follows an unconditional jump (delay slot) */
-               if (stop_next || is_syscall(curr))
-                       break;
-               else if (is_unconditional_jump(curr))
-                       stop_next = true;
+                       flag_name = array[i - ARRAY_SIZE(opcode_flags)];
+
+               if (first)
+                       bytes = snprintf(buf, len, "(%s", flag_name);
+               else
+                       bytes = snprintf(buf, len, ", %s", flag_name);
+
+               first = false;
+               buf += bytes;
+               len -= bytes;
+               count += bytes;
        }
 
-       if (len)
-               *len = (i + 1) * sizeof(u32);
+       if (!first)
+               count += snprintf(buf, len, ")");
+       else
+               *buf = '\0';
 
-       return head;
+       return count;
 }
 
-unsigned int lightrec_cycles_of_opcode(union code code)
+static int print_op_special(union code c, char *buf, size_t len,
+                           const char ***flags_ptr, size_t *nb_flags)
 {
-       switch (code.i.op) {
-       case OP_META_REG_UNLOAD:
-       case OP_META_SYNC:
-               return 0;
+       switch (c.r.op) {
+       case OP_SPECIAL_SLL:
+       case OP_SPECIAL_SRL:
+       case OP_SPECIAL_SRA:
+               return snprintf(buf, len, "%s%s,%s,%u",
+                               special_opcodes[c.r.op],
+                               lightrec_reg_name(c.r.rd),
+                               lightrec_reg_name(c.r.rt),
+                               c.r.imm);
+       case OP_SPECIAL_SLLV:
+       case OP_SPECIAL_SRLV:
+       case OP_SPECIAL_SRAV:
+       case OP_SPECIAL_ADD:
+       case OP_SPECIAL_ADDU:
+       case OP_SPECIAL_SUB:
+       case OP_SPECIAL_SUBU:
+       case OP_SPECIAL_AND:
+       case OP_SPECIAL_OR:
+       case OP_SPECIAL_XOR:
+       case OP_SPECIAL_NOR:
+       case OP_SPECIAL_SLT:
+       case OP_SPECIAL_SLTU:
+               return snprintf(buf, len, "%s%s,%s,%s",
+                               special_opcodes[c.r.op],
+                               lightrec_reg_name(c.r.rd),
+                               lightrec_reg_name(c.r.rt),
+                               lightrec_reg_name(c.r.rs));
+       case OP_SPECIAL_JR:
+       case OP_SPECIAL_MTHI:
+       case OP_SPECIAL_MTLO:
+               return snprintf(buf, len, "%s%s",
+                               special_opcodes[c.r.op],
+                               lightrec_reg_name(c.r.rs));
+       case OP_SPECIAL_JALR:
+               return snprintf(buf, len, "%s%s,%s",
+                               special_opcodes[c.r.op],
+                               lightrec_reg_name(c.r.rd),
+                               lightrec_reg_name(c.r.rt));
+       case OP_SPECIAL_SYSCALL:
+       case OP_SPECIAL_BREAK:
+               return snprintf(buf, len, "%s", special_opcodes[c.r.op]);
+       case OP_SPECIAL_MFHI:
+       case OP_SPECIAL_MFLO:
+               return snprintf(buf, len, "%s%s",
+                               special_opcodes[c.r.op],
+                               lightrec_reg_name(c.r.rd));
+       case OP_SPECIAL_MULT:
+       case OP_SPECIAL_MULTU:
+       case OP_SPECIAL_DIV:
+       case OP_SPECIAL_DIVU:
+               *flags_ptr = opcode_multdiv_flags;
+               *nb_flags = ARRAY_SIZE(opcode_multdiv_flags);
+               return snprintf(buf, len, "%s%s,%s,%s,%s",
+                               special_opcodes[c.r.op],
+                               lightrec_reg_name(get_mult_div_hi(c)),
+                               lightrec_reg_name(get_mult_div_lo(c)),
+                               lightrec_reg_name(c.r.rs),
+                               lightrec_reg_name(c.r.rt));
        default:
-               return 2;
+               return snprintf(buf, len, "unknown (0x%08x)", c.opcode);
        }
 }
 
-#if ENABLE_DISASSEMBLER
-void lightrec_print_disassembly(const struct block *block,
-                               const u32 *code, unsigned int length)
+static int print_op_cp(union code c, char *buf, size_t len, unsigned int cp)
 {
-       struct disassemble_info info;
+       if (cp == 2) {
+               switch (c.i.rs) {
+               case OP_CP0_MFC0:
+               case OP_CP0_CFC0:
+               case OP_CP0_MTC0:
+               case OP_CP0_CTC0:
+                       return snprintf(buf, len, "%s%s,%u",
+                                       cp2_opcodes[c.i.rs],
+                                       lightrec_reg_name(c.i.rt),
+                                       c.r.rd);
+               default:
+                       return snprintf(buf, len, "cp2     (0x%08x)", c.opcode);
+               }
+       } else {
+               switch (c.i.rs) {
+               case OP_CP0_MFC0:
+               case OP_CP0_CFC0:
+               case OP_CP0_MTC0:
+               case OP_CP0_CTC0:
+                       return snprintf(buf, len, "%s%s,%u",
+                                       cp0_opcodes[c.i.rs],
+                                       lightrec_reg_name(c.i.rt),
+                                       c.r.rd);
+               case OP_CP0_RFE:
+                       return snprintf(buf, len, "rfe     ");
+               default:
+                       return snprintf(buf, len, "unknown (0x%08x)", c.opcode);
+               }
+       }
+}
+
+static int print_op(union code c, u32 pc, char *buf, size_t len,
+                   const char ***flags_ptr, size_t *nb_flags)
+{
+       if (c.opcode == 0)
+               return snprintf(buf, len, "nop     ");
+
+       switch (c.i.op) {
+       case OP_SPECIAL:
+               return print_op_special(c, buf, len, flags_ptr, nb_flags);
+       case OP_REGIMM:
+               *flags_ptr = opcode_branch_flags;
+               *nb_flags = ARRAY_SIZE(opcode_branch_flags);
+               return snprintf(buf, len, "%s%s,0x%x",
+                               regimm_opcodes[c.i.rt],
+                               lightrec_reg_name(c.i.rs),
+                               pc + 4 + ((s16)c.i.imm << 2));
+       case OP_J:
+       case OP_JAL:
+               return snprintf(buf, len, "%s0x%x",
+                               std_opcodes[c.i.op],
+                               (pc & 0xf0000000) | (c.j.imm << 2));
+       case OP_BEQ:
+       case OP_BNE:
+       case OP_BLEZ:
+       case OP_BGTZ:
+               *flags_ptr = opcode_branch_flags;
+               *nb_flags = ARRAY_SIZE(opcode_branch_flags);
+               return snprintf(buf, len, "%s%s,%s,0x%x",
+                               std_opcodes[c.i.op],
+                               lightrec_reg_name(c.i.rs),
+                               lightrec_reg_name(c.i.rt),
+                               pc + 4 + ((s16)c.i.imm << 2));
+       case OP_ADDI:
+       case OP_ADDIU:
+       case OP_SLTI:
+       case OP_SLTIU:
+       case OP_ANDI:
+       case OP_ORI:
+       case OP_XORI:
+               return snprintf(buf, len, "%s%s,%s,0x%04hx",
+                               std_opcodes[c.i.op],
+                               lightrec_reg_name(c.i.rt),
+                               lightrec_reg_name(c.i.rs),
+                               (u16)c.i.imm);
+
+       case OP_LUI:
+               return snprintf(buf, len, "%s%s,0x%04hx",
+                               std_opcodes[c.i.op],
+                               lightrec_reg_name(c.i.rt),
+                               (u16)c.i.imm);
+       case OP_CP0:
+               return print_op_cp(c, buf, len, 0);
+       case OP_CP2:
+               return print_op_cp(c, buf, len, 2);
+       case OP_LB:
+       case OP_LH:
+       case OP_LWL:
+       case OP_LW:
+       case OP_LBU:
+       case OP_LHU:
+       case OP_LWR:
+       case OP_SB:
+       case OP_SH:
+       case OP_SWL:
+       case OP_SW:
+       case OP_SWR:
+               *flags_ptr = opcode_io_flags;
+               *nb_flags = ARRAY_SIZE(opcode_io_flags);
+               return snprintf(buf, len, "%s%s,%hd(%s)",
+                               std_opcodes[c.i.op],
+                               lightrec_reg_name(c.i.rt),
+                               (s16)c.i.imm,
+                               lightrec_reg_name(c.i.rs));
+       case OP_LWC2:
+       case OP_SWC2:
+               *flags_ptr = opcode_io_flags;
+               *nb_flags = ARRAY_SIZE(opcode_io_flags);
+               return snprintf(buf, len, "%s%s,%hd(%s)",
+                               std_opcodes[c.i.op],
+                               lightrec_reg_name(c.i.rt),
+                               (s16)c.i.imm,
+                               lightrec_reg_name(c.i.rs));
+       case OP_META_MOV:
+               return snprintf(buf, len, "move    %s,%s",
+                               lightrec_reg_name(c.r.rd),
+                               lightrec_reg_name(c.r.rs));
+       case OP_META_EXTC:
+               return snprintf(buf, len, "extc    %s,%s",
+                               lightrec_reg_name(c.i.rt),
+                               lightrec_reg_name(c.i.rs));
+       case OP_META_EXTS:
+               return snprintf(buf, len, "exts    %s,%s",
+                               lightrec_reg_name(c.i.rt),
+                               lightrec_reg_name(c.i.rs));
+       default:
+               return snprintf(buf, len, "unknown (0x%08x)", c.opcode);
+       }
+}
+
+void lightrec_print_disassembly(const struct block *block, const u32 *code)
+{
+       const struct opcode *op;
+       const char **flags_ptr;
+       size_t nb_flags, count, count2;
+       char buf[256], buf2[256], buf3[256];
        unsigned int i;
+       u32 pc, branch_pc;
+
+       for (i = 0; i < block->nb_ops; i++) {
+               op = &block->opcode_list[i];
+               branch_pc = get_branch_pc(block, i, 0);
+               pc = block->pc + (i << 2);
+
+               count = print_op((union code)code[i], pc, buf, sizeof(buf),
+                                &flags_ptr, &nb_flags);
+
+               flags_ptr = NULL;
+               nb_flags = 0;
+               count2 = print_op(op->c, branch_pc, buf2, sizeof(buf2),
+                                 &flags_ptr, &nb_flags);
+
+               if (code[i] == op->c.opcode) {
+                       *buf2 = '\0';
+                       count2 = 0;
+               }
+
+               print_flags(buf3, sizeof(buf3), op->flags, flags_ptr, nb_flags);
 
-       memset(&info, 0, sizeof(info));
-       init_disassemble_info(&info, stdout, (fprintf_ftype) fprintf);
-
-       info.buffer = (bfd_byte *) code;
-       info.buffer_vma = (bfd_vma)(uintptr_t) code;
-       info.buffer_length = length;
-       info.flavour = bfd_target_unknown_flavour;
-       info.arch = bfd_arch_mips;
-       info.mach = bfd_mach_mips3000;
-       disassemble_init_for_target(&info);
-
-       for (i = 0; i < length; i += 4) {
-               void print_insn_little_mips(bfd_vma, struct disassemble_info *);
-               putc('\t', stdout);
-               print_insn_little_mips((bfd_vma)(uintptr_t) code++, &info);
-               putc('\n', stdout);
+               printf("0x%08x (0x%x)\t%s%*c%s%*c%s\n", pc, i << 2,
+                      buf, 30 - (int)count, ' ', buf2, 30 - (int)count2, ' ', buf3);
        }
 }
-#endif
index 249d094..ae2af7e 100644 (file)
@@ -1,15 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
 /*
- * Copyright (C) 2014-2020 Paul Cercueil <paul@crapouillou.net>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
+ * Copyright (C) 2014-2021 Paul Cercueil <paul@crapouillou.net>
  */
 
 #ifndef __DISASSEMBLER_H__
 #define __packed __attribute__((packed))
 #endif
 
-#define LIGHTREC_DIRECT_IO     (1 << 0)
-#define LIGHTREC_NO_INVALIDATE (1 << 1)
-#define LIGHTREC_NO_DS         (1 << 2)
-#define LIGHTREC_SMC           (1 << 3)
-#define LIGHTREC_EMULATE_BRANCH        (1 << 4)
-#define LIGHTREC_LOCAL_BRANCH  (1 << 5)
-#define LIGHTREC_HW_IO         (1 << 6)
-#define LIGHTREC_MULT32                (1 << 7)
+#define BIT(x) (1ULL << (x))
+
+/* Flags for all opcodes */
+#define LIGHTREC_NO_DS         BIT(0)
+#define LIGHTREC_UNLOAD_RS     BIT(1)
+#define LIGHTREC_UNLOAD_RT     BIT(2)
+#define LIGHTREC_UNLOAD_RD     BIT(3)
+#define LIGHTREC_SYNC          BIT(4)
+
+/* Flags for load/store opcodes */
+#define LIGHTREC_DIRECT_IO     BIT(5)
+#define LIGHTREC_HW_IO         BIT(6)
+#define LIGHTREC_SMC           BIT(7)
+#define LIGHTREC_NO_INVALIDATE BIT(8)
+
+/* Flags for branches */
+#define LIGHTREC_EMULATE_BRANCH        BIT(5)
+#define LIGHTREC_LOCAL_BRANCH  BIT(6)
+
+/* Flags for div/mult opcodes */
+#define LIGHTREC_NO_LO         BIT(5)
+#define LIGHTREC_NO_HI         BIT(6)
+#define LIGHTREC_NO_DIV_CHECK  BIT(7)
 
 struct block;
 
@@ -67,13 +73,10 @@ enum standard_opcodes {
        OP_LWC2                 = 0x32,
        OP_SWC2                 = 0x3a,
 
-       OP_META_REG_UNLOAD      = 0x11,
-
-       OP_META_BEQZ            = 0x14,
-       OP_META_BNEZ            = 0x15,
-
        OP_META_MOV             = 0x16,
-       OP_META_SYNC            = 0x17,
+
+       OP_META_EXTC            = 0x17,
+       OP_META_EXTS            = 0x18,
 };
 
 enum special_opcodes {
@@ -195,18 +198,8 @@ struct opcode {
                struct opcode_j j;
        };
        u16 flags;
-       u16 offset;
-       struct opcode *next;
 };
 
-struct opcode * lightrec_disassemble(struct lightrec_state *state,
-                                    const u32 *src, unsigned int *len);
-void lightrec_free_opcode_list(struct lightrec_state *state,
-                              struct opcode *list);
-
-unsigned int lightrec_cycles_of_opcode(union code code);
-
-void lightrec_print_disassembly(const struct block *block,
-                               const u32 *code, unsigned int length);
+void lightrec_print_disassembly(const struct block *block, const u32 *code);
 
 #endif /* __DISASSEMBLER_H__ */
index 0cf75c3..99f6756 100644 (file)
@@ -1,61 +1,50 @@
+// SPDX-License-Identifier: LGPL-2.1-or-later
 /*
- * Copyright (C) 2014-2020 Paul Cercueil <paul@crapouillou.net>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
+ * Copyright (C) 2014-2021 Paul Cercueil <paul@crapouillou.net>
  */
 
 #include "blockcache.h"
 #include "debug.h"
 #include "disassembler.h"
 #include "emitter.h"
+#include "lightning-wrapper.h"
 #include "optimizer.h"
 #include "regcache.h"
 
-#include <lightning.h>
 #include <stdbool.h>
 #include <stddef.h>
 
-typedef void (*lightrec_rec_func_t)(const struct block *,
-                                   const struct opcode *, u32);
+typedef void (*lightrec_rec_func_t)(struct lightrec_cstate *, const struct block *, u16);
 
 /* Forward declarations */
-static void rec_SPECIAL(const struct block *block,
-                      const struct opcode *op, u32 pc);
-static void rec_REGIMM(const struct block *block,
-                     const struct opcode *op, u32 pc);
-static void rec_CP0(const struct block *block, const struct opcode *op, u32 pc);
-static void rec_CP2(const struct block *block, const struct opcode *op, u32 pc);
+static void rec_SPECIAL(struct lightrec_cstate *state, const struct block *block, u16 offset);
+static void rec_REGIMM(struct lightrec_cstate *state, const struct block *block, u16 offset);
+static void rec_CP0(struct lightrec_cstate *state, const struct block *block, u16 offset);
+static void rec_CP2(struct lightrec_cstate *state, const struct block *block, u16 offset);
 
-
-static void unknown_opcode(const struct block *block,
-                          const struct opcode *op, u32 pc)
+static void unknown_opcode(struct lightrec_cstate *state, const struct block *block, u16 offset)
 {
-       pr_warn("Unknown opcode: 0x%08x at PC 0x%08x\n", op->opcode, pc);
+       pr_warn("Unknown opcode: 0x%08x at PC 0x%08x\n",
+               block->opcode_list[offset].c.opcode,
+               block->pc + (offset << 2));
 }
 
-static void lightrec_emit_end_of_block(const struct block *block,
-                                      const struct opcode *op, u32 pc,
+static void lightrec_emit_end_of_block(struct lightrec_cstate *state,
+                                      const struct block *block, u16 offset,
                                       s8 reg_new_pc, u32 imm, u8 ra_reg,
                                       u32 link, bool update_cycles)
 {
-       struct lightrec_state *state = block->state;
        struct regcache *reg_cache = state->reg_cache;
        u32 cycles = state->cycles;
        jit_state_t *_jit = block->_jit;
+       const struct opcode *op = &block->opcode_list[offset],
+                           *next = &block->opcode_list[offset + 1];
 
        jit_note(__FILE__, __LINE__);
 
        if (link) {
                /* Update the $ra register */
-               u8 link_reg = lightrec_alloc_reg_out(reg_cache, _jit, ra_reg);
+               u8 link_reg = lightrec_alloc_reg_out(reg_cache, _jit, ra_reg, 0);
                jit_movi(link_reg, link);
                lightrec_free_reg(reg_cache, link_reg);
        }
@@ -69,11 +58,11 @@ static void lightrec_emit_end_of_block(const struct block *block,
 
        if (has_delay_slot(op->c) &&
            !(op->flags & (LIGHTREC_NO_DS | LIGHTREC_LOCAL_BRANCH))) {
-               cycles += lightrec_cycles_of_opcode(op->next->c);
+               cycles += lightrec_cycles_of_opcode(next->c);
 
                /* Recompile the delay slot */
-               if (op->next->c.opcode)
-                       lightrec_rec_opcode(block, op->next, pc + 4);
+               if (next->c.opcode)
+                       lightrec_rec_opcode(state, block, offset + 1);
        }
 
        /* Store back remaining registers */
@@ -86,91 +75,122 @@ static void lightrec_emit_end_of_block(const struct block *block,
                pr_debug("EOB: %u cycles\n", cycles);
        }
 
-       if (op->next && ((op->flags & LIGHTREC_NO_DS) || op->next->next))
+       if (offset + !!(op->flags & LIGHTREC_NO_DS) < block->nb_ops - 1)
                state->branches[state->nb_branches++] = jit_jmpi();
 }
 
-void lightrec_emit_eob(const struct block *block,
-                      const struct opcode *op, u32 pc)
+void lightrec_emit_eob(struct lightrec_cstate *state, const struct block *block,
+                      u16 offset, bool after_op)
 {
-       struct lightrec_state *state = block->state;
        struct regcache *reg_cache = state->reg_cache;
        jit_state_t *_jit = block->_jit;
+       union code c = block->opcode_list[offset].c;
+       u32 cycles = state->cycles;
+
+       if (!after_op)
+               cycles -= lightrec_cycles_of_opcode(c);
 
        lightrec_storeback_regs(reg_cache, _jit);
 
-       jit_movi(JIT_V0, pc);
-       jit_subi(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE,
-                state->cycles - lightrec_cycles_of_opcode(op->c));
+       jit_movi(JIT_V0, block->pc + (offset << 2));
+       jit_subi(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, cycles);
 
        state->branches[state->nb_branches++] = jit_jmpi();
 }
 
-static void rec_special_JR(const struct block *block,
-                          const struct opcode *op, u32 pc)
+static u8 get_jr_jalr_reg(struct lightrec_cstate *state, const struct block *block, u16 offset)
 {
-       struct regcache *reg_cache = block->state->reg_cache;
+       struct regcache *reg_cache = state->reg_cache;
        jit_state_t *_jit = block->_jit;
+       const struct opcode *op = &block->opcode_list[offset],
+                           *next = &block->opcode_list[offset + 1];
        u8 rs = lightrec_request_reg_in(reg_cache, _jit, op->r.rs, JIT_V0);
 
-       _jit_name(block->_jit, __func__);
+       /* If the source register is already mapped to JIT_R0 or JIT_R1, and the
+        * delay slot is a I/O operation, unload the register, since JIT_R0 and
+        * JIT_R1 are explicitely used by the I/O opcode generators. */
+       if ((rs == JIT_R0 || rs == JIT_R1) &&
+           !(op->flags & LIGHTREC_NO_DS) &&
+           opcode_is_io(next->c) &&
+           !(next->flags & (LIGHTREC_NO_INVALIDATE | LIGHTREC_DIRECT_IO))) {
+               lightrec_unload_reg(reg_cache, _jit, rs);
+               lightrec_free_reg(reg_cache, rs);
+
+               rs = lightrec_request_reg_in(reg_cache, _jit, op->r.rs, JIT_V0);
+       }
+
        lightrec_lock_reg(reg_cache, _jit, rs);
-       lightrec_emit_end_of_block(block, op, pc, rs, 0, 31, 0, true);
+
+       return rs;
 }
 
-static void rec_special_JALR(const struct block *block,
-                            const struct opcode *op, u32 pc)
+static void rec_special_JR(struct lightrec_cstate *state, const struct block *block, u16 offset)
 {
-       struct regcache *reg_cache = block->state->reg_cache;
-       jit_state_t *_jit = block->_jit;
-       u8 rs = lightrec_request_reg_in(reg_cache, _jit, op->r.rs, JIT_V0);
+       u8 rs = get_jr_jalr_reg(state, block, offset);
 
        _jit_name(block->_jit, __func__);
-       lightrec_lock_reg(reg_cache, _jit, rs);
-       lightrec_emit_end_of_block(block, op, pc, rs, 0, op->r.rd, pc + 8, true);
+       lightrec_emit_end_of_block(state, block, offset, rs, 0, 31, 0, true);
+}
+
+static void rec_special_JALR(struct lightrec_cstate *state, const struct block *block, u16 offset)
+{
+       u8 rs = get_jr_jalr_reg(state, block, offset);
+       union code c = block->opcode_list[offset].c;
+
+       _jit_name(block->_jit, __func__);
+       lightrec_emit_end_of_block(state, block, offset, rs, 0, c.r.rd,
+                                  get_branch_pc(block, offset, 2), true);
 }
 
-static void rec_J(const struct block *block, const struct opcode *op, u32 pc)
+static void rec_J(struct lightrec_cstate *state, const struct block *block, u16 offset)
 {
+       union code c = block->opcode_list[offset].c;
+
        _jit_name(block->_jit, __func__);
-       lightrec_emit_end_of_block(block, op, pc, -1,
-                                  (pc & 0xf0000000) | (op->j.imm << 2), 31, 0, true);
+       lightrec_emit_end_of_block(state, block, offset, -1,
+                                  (block->pc & 0xf0000000) | (c.j.imm << 2),
+                                  31, 0, true);
 }
 
-static void rec_JAL(const struct block *block, const struct opcode *op, u32 pc)
+static void rec_JAL(struct lightrec_cstate *state, const struct block *block, u16 offset)
 {
+       union code c = block->opcode_list[offset].c;
+
        _jit_name(block->_jit, __func__);
-       lightrec_emit_end_of_block(block, op, pc, -1,
-                                  (pc & 0xf0000000) | (op->j.imm << 2),
-                                  31, pc + 8, true);
+       lightrec_emit_end_of_block(state, block, offset, -1,
+                                  (block->pc & 0xf0000000) | (c.j.imm << 2),
+                                  31, get_branch_pc(block, offset, 2), true);
 }
 
-static void rec_b(const struct block *block, const struct opcode *op, u32 pc,
+static void rec_b(struct lightrec_cstate *state, const struct block *block, u16 offset,
                  jit_code_t code, u32 link, bool unconditional, bool bz)
 {
-       struct regcache *reg_cache = block->state->reg_cache;
+       struct regcache *reg_cache = state->reg_cache;
        struct native_register *regs_backup;
        jit_state_t *_jit = block->_jit;
        struct lightrec_branch *branch;
+       const struct opcode *op = &block->opcode_list[offset],
+                           *next = &block->opcode_list[offset + 1];
        jit_node_t *addr;
        u8 link_reg;
-       u32 offset, cycles = block->state->cycles;
+       u32 target_offset, cycles = state->cycles;
        bool is_forward = (s16)op->i.imm >= -1;
+       u32 next_pc;
 
        jit_note(__FILE__, __LINE__);
 
        if (!(op->flags & LIGHTREC_NO_DS))
-               cycles += lightrec_cycles_of_opcode(op->next->c);
+               cycles += lightrec_cycles_of_opcode(next->c);
 
-       block->state->cycles = 0;
+       state->cycles = 0;
 
        if (cycles)
                jit_subi(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, cycles);
 
        if (!unconditional) {
-               u8 rs = lightrec_alloc_reg_in_ext(reg_cache, _jit, op->i.rs),
-                  rt = bz ? 0 : lightrec_alloc_reg_in_ext(reg_cache,
-                                                          _jit, op->i.rt);
+               u8 rs = lightrec_alloc_reg_in(reg_cache, _jit, op->i.rs, REG_EXT),
+                  rt = bz ? 0 : lightrec_alloc_reg_in(reg_cache,
+                                                      _jit, op->i.rt, REG_EXT);
 
                /* Generate the branch opcode */
                addr = jit_new_node_pww(code, NULL, rs, rt);
@@ -180,15 +200,15 @@ static void rec_b(const struct block *block, const struct opcode *op, u32 pc,
        }
 
        if (op->flags & LIGHTREC_LOCAL_BRANCH) {
-               if (op->next && !(op->flags & LIGHTREC_NO_DS)) {
+               if (next && !(op->flags & LIGHTREC_NO_DS)) {
                        /* Recompile the delay slot */
-                       if (op->next->opcode)
-                               lightrec_rec_opcode(block, op->next, pc + 4);
+                       if (next->opcode)
+                               lightrec_rec_opcode(state, block, offset + 1);
                }
 
                if (link) {
                        /* Update the $ra register */
-                       link_reg = lightrec_alloc_reg_out(reg_cache, _jit, 31);
+                       link_reg = lightrec_alloc_reg_out(reg_cache, _jit, 31, 0);
                        jit_movi(link_reg, link);
                        lightrec_free_reg(reg_cache, link_reg);
                }
@@ -196,12 +216,14 @@ static void rec_b(const struct block *block, const struct opcode *op, u32 pc,
                /* Store back remaining registers */
                lightrec_storeback_regs(reg_cache, _jit);
 
-               offset = op->offset + 1 + (s16)op->i.imm;
-               pr_debug("Adding local branch to offset 0x%x\n", offset << 2);
-               branch = &block->state->local_branches[
-                       block->state->nb_local_branches++];
+               target_offset = offset + 1 + (s16)op->i.imm
+                       - !!(OPT_SWITCH_DELAY_SLOTS && (op->flags & LIGHTREC_NO_DS));
+               pr_debug("Adding local branch to offset 0x%x\n",
+                        target_offset << 2);
+               branch = &state->local_branches[
+                       state->nb_local_branches++];
 
-               branch->target = offset;
+               branch->target = target_offset;
                if (is_forward)
                        branch->branch = jit_jmpi();
                else
@@ -209,8 +231,8 @@ static void rec_b(const struct block *block, const struct opcode *op, u32 pc,
        }
 
        if (!(op->flags & LIGHTREC_LOCAL_BRANCH) || !is_forward) {
-               lightrec_emit_end_of_block(block, op, pc, -1,
-                                          pc + 4 + ((s16)op->i.imm << 2),
+               next_pc = get_branch_pc(block, offset, 1 + (s16)op->i.imm);
+               lightrec_emit_end_of_block(state, block, offset, -1, next_pc,
                                           31, link, false);
        }
 
@@ -220,105 +242,127 @@ static void rec_b(const struct block *block, const struct opcode *op, u32 pc,
 
                if (bz && link) {
                        /* Update the $ra register */
-                       link_reg = lightrec_alloc_reg_out_ext(reg_cache,
-                                                             _jit, 31);
+                       link_reg = lightrec_alloc_reg_out(reg_cache, _jit,
+                                                         31, REG_EXT);
                        jit_movi(link_reg, (s32)link);
                        lightrec_free_reg(reg_cache, link_reg);
                }
 
-               if (!(op->flags & LIGHTREC_NO_DS) && op->next->opcode)
-                       lightrec_rec_opcode(block, op->next, pc + 4);
+               if (!(op->flags & LIGHTREC_NO_DS) && next->opcode)
+                       lightrec_rec_opcode(state, block, offset + 1);
        }
 }
 
-static void rec_BNE(const struct block *block, const struct opcode *op, u32 pc)
+static void rec_BNE(struct lightrec_cstate *state,
+                   const struct block *block, u16 offset)
 {
+       union code c = block->opcode_list[offset].c;
+
        _jit_name(block->_jit, __func__);
-       rec_b(block, op, pc, jit_code_beqr, 0, false, false);
+
+       if (c.i.rt == 0)
+               rec_b(state, block, offset, jit_code_beqi, 0, false, true);
+       else
+               rec_b(state, block, offset, jit_code_beqr, 0, false, false);
 }
 
-static void rec_BEQ(const struct block *block, const struct opcode *op, u32 pc)
+static void rec_BEQ(struct lightrec_cstate *state,
+                   const struct block *block, u16 offset)
 {
+       union code c = block->opcode_list[offset].c;
+
        _jit_name(block->_jit, __func__);
-       rec_b(block, op, pc, jit_code_bner, 0,
-                       op->i.rs == op->i.rt, false);
+
+       if (c.i.rt == 0)
+               rec_b(state, block, offset, jit_code_bnei, 0, c.i.rs == 0, true);
+       else
+               rec_b(state, block, offset, jit_code_bner, 0, c.i.rs == c.i.rt, false);
 }
 
-static void rec_BLEZ(const struct block *block, const struct opcode *op, u32 pc)
+static void rec_BLEZ(struct lightrec_cstate *state,
+                    const struct block *block, u16 offset)
 {
+       union code c = block->opcode_list[offset].c;
+
        _jit_name(block->_jit, __func__);
-       rec_b(block, op, pc, jit_code_bgti, 0, op->i.rs == 0, true);
+       rec_b(state, block, offset, jit_code_bgti, 0, c.i.rs == 0, true);
 }
 
-static void rec_BGTZ(const struct block *block, const struct opcode *op, u32 pc)
+static void rec_BGTZ(struct lightrec_cstate *state,
+                    const struct block *block, u16 offset)
 {
        _jit_name(block->_jit, __func__);
-       rec_b(block, op, pc, jit_code_blei, 0, false, true);
+       rec_b(state, block, offset, jit_code_blei, 0, false, true);
 }
 
-static void rec_regimm_BLTZ(const struct block *block,
-                           const struct opcode *op, u32 pc)
+static void rec_regimm_BLTZ(struct lightrec_cstate *state,
+                           const struct block *block, u16 offset)
 {
        _jit_name(block->_jit, __func__);
-       rec_b(block, op, pc, jit_code_bgei, 0, false, true);
+       rec_b(state, block, offset, jit_code_bgei, 0, false, true);
 }
 
-static void rec_regimm_BLTZAL(const struct block *block,
-                             const struct opcode *op, u32 pc)
+static void rec_regimm_BLTZAL(struct lightrec_cstate *state,
+                             const struct block *block, u16 offset)
 {
        _jit_name(block->_jit, __func__);
-       rec_b(block, op, pc, jit_code_bgei, pc + 8, false, true);
+       rec_b(state, block, offset, jit_code_bgei,
+             get_branch_pc(block, offset, 2), false, true);
 }
 
-static void rec_regimm_BGEZ(const struct block *block,
-                           const struct opcode *op, u32 pc)
+static void rec_regimm_BGEZ(struct lightrec_cstate *state,
+                           const struct block *block, u16 offset)
 {
+       union code c = block->opcode_list[offset].c;
+
        _jit_name(block->_jit, __func__);
-       rec_b(block, op, pc, jit_code_blti, 0, !op->i.rs, true);
+       rec_b(state, block, offset, jit_code_blti, 0, !c.i.rs, true);
 }
 
-static void rec_regimm_BGEZAL(const struct block *block,
-                             const struct opcode *op, u32 pc)
+static void rec_regimm_BGEZAL(struct lightrec_cstate *state,
+                             const struct block *block, u16 offset)
 {
+       const struct opcode *op = &block->opcode_list[offset];
        _jit_name(block->_jit, __func__);
-       rec_b(block, op, pc, jit_code_blti, pc + 8, !op->i.rs, true);
+       rec_b(state, block, offset, jit_code_blti,
+             get_branch_pc(block, offset, 2),
+             !op->i.rs, true);
 }
 
-static void rec_alu_imm(const struct block *block, const struct opcode *op,
-                       jit_code_t code, bool sign_extend)
+static void rec_alu_imm(struct lightrec_cstate *state, const struct block *block,
+                       u16 offset, jit_code_t code, bool slti)
 {
-       struct regcache *reg_cache = block->state->reg_cache;
+       struct regcache *reg_cache = state->reg_cache;
+       union code c = block->opcode_list[offset].c;
        jit_state_t *_jit = block->_jit;
-       u8 rs, rt;
+       u8 rs, rt, out_flags = REG_EXT;
+
+       if (slti)
+               out_flags |= REG_ZEXT;
 
        jit_note(__FILE__, __LINE__);
-       rs = lightrec_alloc_reg_in_ext(reg_cache, _jit, op->i.rs);
-       rt = lightrec_alloc_reg_out_ext(reg_cache, _jit, op->i.rt);
+       rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, REG_EXT);
+       rt = lightrec_alloc_reg_out(reg_cache, _jit, c.i.rt, out_flags);
 
-       if (sign_extend)
-               jit_new_node_www(code, rt, rs, (s32)(s16) op->i.imm);
-       else
-               jit_new_node_www(code, rt, rs, (u32)(u16) op->i.imm);
+       jit_new_node_www(code, rt, rs, (s32)(s16) c.i.imm);
 
        lightrec_free_reg(reg_cache, rs);
        lightrec_free_reg(reg_cache, rt);
 }
 
-static void rec_alu_special(const struct block *block, const struct opcode *op,
-                           jit_code_t code, bool out_ext)
+static void rec_alu_special(struct lightrec_cstate *state, const struct block *block,
+                           u16 offset, jit_code_t code, bool out_ext)
 {
-       struct regcache *reg_cache = block->state->reg_cache;
+       struct regcache *reg_cache = state->reg_cache;
+       union code c = block->opcode_list[offset].c;
        jit_state_t *_jit = block->_jit;
        u8 rd, rt, rs;
 
        jit_note(__FILE__, __LINE__);
-       rs = lightrec_alloc_reg_in_ext(reg_cache, _jit, op->r.rs);
-       rt = lightrec_alloc_reg_in_ext(reg_cache, _jit, op->r.rt);
-
-       if (out_ext)
-          rd = lightrec_alloc_reg_out_ext(reg_cache, _jit, op->r.rd);
-       else
-          rd = lightrec_alloc_reg_out(reg_cache, _jit, op->r.rd);
+       rs = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rs, REG_EXT);
+       rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, REG_EXT);
+       rd = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rd,
+                                   out_ext ? REG_EXT | REG_ZEXT : 0);
 
        jit_new_node_www(code, rd, rs, rt);
 
@@ -327,539 +371,698 @@ static void rec_alu_special(const struct block *block, const struct opcode *op,
        lightrec_free_reg(reg_cache, rd);
 }
 
-static void rec_alu_shiftv(const struct block *block,
-                          const struct opcode *op, jit_code_t code)
+static void rec_alu_shiftv(struct lightrec_cstate *state, const struct block *block,
+                          u16 offset, jit_code_t code)
 {
-       struct regcache *reg_cache = block->state->reg_cache;
+       struct regcache *reg_cache = state->reg_cache;
+       union code c = block->opcode_list[offset].c;
        jit_state_t *_jit = block->_jit;
-       u8 rd, rt, rs, temp;
+       u8 rd, rt, rs, temp, flags = 0;
 
        jit_note(__FILE__, __LINE__);
-       rs = lightrec_alloc_reg_in(reg_cache, _jit, op->r.rs);
-       temp = lightrec_alloc_reg_temp(reg_cache, _jit);
+       rs = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rs, 0);
 
-       if (code == jit_code_rshr) {
-               rt = lightrec_alloc_reg_in_ext(reg_cache, _jit, op->r.rt);
-               rd = lightrec_alloc_reg_out_ext(reg_cache, _jit, op->r.rd);
-       } else {
-               rt = lightrec_alloc_reg_in(reg_cache, _jit, op->r.rt);
-               rd = lightrec_alloc_reg_out(reg_cache, _jit, op->r.rd);
-       }
+       if (code == jit_code_rshr)
+               flags = REG_EXT;
+       else if (code == jit_code_rshr_u)
+               flags = REG_ZEXT;
 
-       jit_andi(temp, rs, 0x1f);
-
-#if __WORDSIZE == 64
-       if (code == jit_code_rshr_u) {
-               jit_extr_ui(rd, rt);
-               jit_new_node_www(code, rd, rd, temp);
-       }
-#endif
+       rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, flags);
+       rd = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rd, flags);
 
-       if (__WORDSIZE == 32 || code != jit_code_rshr_u)
+       if (rs != rd && rt != rd) {
+               jit_andi(rd, rs, 0x1f);
+               jit_new_node_www(code, rd, rt, rd);
+       } else {
+               temp = lightrec_alloc_reg_temp(reg_cache, _jit);
+               jit_andi(temp, rs, 0x1f);
                jit_new_node_www(code, rd, rt, temp);
+               lightrec_free_reg(reg_cache, temp);
+       }
 
        lightrec_free_reg(reg_cache, rs);
-       lightrec_free_reg(reg_cache, temp);
        lightrec_free_reg(reg_cache, rt);
        lightrec_free_reg(reg_cache, rd);
 }
 
-static void rec_ADDIU(const struct block *block,
-                     const struct opcode *op, u32 pc)
+static void rec_ADDIU(struct lightrec_cstate *state,
+                     const struct block *block, u16 offset)
 {
        _jit_name(block->_jit, __func__);
-       rec_alu_imm(block, op, jit_code_addi, true);
+       rec_alu_imm(state, block, offset, jit_code_addi, false);
 }
 
-static void rec_ADDI(const struct block *block, const struct opcode *op, u32 pc)
+static void rec_ADDI(struct lightrec_cstate *state,
+                    const struct block *block, u16 offset)
 {
        /* TODO: Handle the exception? */
        _jit_name(block->_jit, __func__);
-       rec_alu_imm(block, op, jit_code_addi, true);
+       rec_alu_imm(state, block, offset, jit_code_addi, false);
 }
 
-static void rec_SLTIU(const struct block *block,
-                     const struct opcode *op, u32 pc)
+static void rec_SLTIU(struct lightrec_cstate *state,
+                     const struct block *block, u16 offset)
 {
        _jit_name(block->_jit, __func__);
-       rec_alu_imm(block, op, jit_code_lti_u, true);
+       rec_alu_imm(state, block, offset, jit_code_lti_u, true);
 }
 
-static void rec_SLTI(const struct block *block, const struct opcode *op, u32 pc)
+static void rec_SLTI(struct lightrec_cstate *state,
+                    const struct block *block, u16 offset)
 {
        _jit_name(block->_jit, __func__);
-       rec_alu_imm(block, op, jit_code_lti, true);
+       rec_alu_imm(state, block, offset, jit_code_lti, true);
 }
 
-static void rec_ANDI(const struct block *block, const struct opcode *op, u32 pc)
+static void rec_ANDI(struct lightrec_cstate *state,
+                    const struct block *block, u16 offset)
 {
-       struct regcache *reg_cache = block->state->reg_cache;
+       struct regcache *reg_cache = state->reg_cache;
+       union code c = block->opcode_list[offset].c;
        jit_state_t *_jit = block->_jit;
        u8 rs, rt;
 
        _jit_name(block->_jit, __func__);
        jit_note(__FILE__, __LINE__);
-       rs = lightrec_alloc_reg_in(reg_cache, _jit, op->i.rs);
-       rt = lightrec_alloc_reg_out_ext(reg_cache, _jit, op->i.rt);
+       rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, 0);
+       rt = lightrec_alloc_reg_out(reg_cache, _jit, c.i.rt,
+                                   REG_EXT | REG_ZEXT);
 
        /* PSX code uses ANDI 0xff / ANDI 0xffff a lot, which are basically
         * casts to uint8_t / uint16_t. */
-       if (op->i.imm == 0xff)
+       if (c.i.imm == 0xff)
                jit_extr_uc(rt, rs);
-       else if (op->i.imm == 0xffff)
+       else if (c.i.imm == 0xffff)
                jit_extr_us(rt, rs);
        else
-               jit_andi(rt, rs, (u32)(u16) op->i.imm);
+               jit_andi(rt, rs, (u32)(u16) c.i.imm);
+
+       lightrec_free_reg(reg_cache, rs);
+       lightrec_free_reg(reg_cache, rt);
+}
+
+static void rec_alu_or_xor(struct lightrec_cstate *state, const struct block *block,
+                          u16 offset, jit_code_t code)
+{
+       struct regcache *reg_cache = state->reg_cache;
+       union code c = block->opcode_list[offset].c;
+       jit_state_t *_jit = block->_jit;
+       u8 rs, rt, flags;
+
+       jit_note(__FILE__, __LINE__);
+       rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, 0);
+       rt = lightrec_alloc_reg_out(reg_cache, _jit, c.i.rt, 0);
+
+       flags = lightrec_get_reg_in_flags(reg_cache, rs);
+       lightrec_set_reg_out_flags(reg_cache, rt, flags);
+
+       jit_new_node_www(code, rt, rs, (u32)(u16) c.i.imm);
 
        lightrec_free_reg(reg_cache, rs);
        lightrec_free_reg(reg_cache, rt);
 }
 
-static void rec_ORI(const struct block *block, const struct opcode *op, u32 pc)
+
+static void rec_ORI(struct lightrec_cstate *state,
+                   const struct block *block, u16 offset)
 {
        _jit_name(block->_jit, __func__);
-       rec_alu_imm(block, op, jit_code_ori, false);
+       rec_alu_or_xor(state, block, offset, jit_code_ori);
 }
 
-static void rec_XORI(const struct block *block, const struct opcode *op, u32 pc)
+static void rec_XORI(struct lightrec_cstate *state,
+                    const struct block *block, u16 offset)
 {
        _jit_name(block->_jit, __func__);
-       rec_alu_imm(block, op, jit_code_xori, false);
+       rec_alu_or_xor(state, block, offset, jit_code_xori);
 }
 
-static void rec_LUI(const struct block *block, const struct opcode *op, u32 pc)
+static void rec_LUI(struct lightrec_cstate *state,
+                   const struct block *block, u16 offset)
 {
-       struct regcache *reg_cache = block->state->reg_cache;
+       struct regcache *reg_cache = state->reg_cache;
+       union code c = block->opcode_list[offset].c;
        jit_state_t *_jit = block->_jit;
-       u8 rt;
+       u8 rt, flags = REG_EXT;
 
        jit_name(__func__);
        jit_note(__FILE__, __LINE__);
-       rt = lightrec_alloc_reg_out_ext(reg_cache, _jit, op->i.rt);
 
-       jit_movi(rt, (s32)(op->i.imm << 16));
+       if (!(c.i.imm & BIT(15)))
+               flags |= REG_ZEXT;
+
+       rt = lightrec_alloc_reg_out(reg_cache, _jit, c.i.rt, flags);
+
+       jit_movi(rt, (s32)(c.i.imm << 16));
 
        lightrec_free_reg(reg_cache, rt);
 }
 
-static void rec_special_ADDU(const struct block *block,
-                            const struct opcode *op, u32 pc)
+static void rec_special_ADDU(struct lightrec_cstate *state,
+                            const struct block *block, u16 offset)
 {
        _jit_name(block->_jit, __func__);
-       rec_alu_special(block, op, jit_code_addr, false);
+       rec_alu_special(state, block, offset, jit_code_addr, false);
 }
 
-static void rec_special_ADD(const struct block *block,
-                           const struct opcode *op, u32 pc)
+static void rec_special_ADD(struct lightrec_cstate *state,
+                           const struct block *block, u16 offset)
 {
        /* TODO: Handle the exception? */
        _jit_name(block->_jit, __func__);
-       rec_alu_special(block, op, jit_code_addr, false);
+       rec_alu_special(state, block, offset, jit_code_addr, false);
 }
 
-static void rec_special_SUBU(const struct block *block,
-                            const struct opcode *op, u32 pc)
+static void rec_special_SUBU(struct lightrec_cstate *state,
+                            const struct block *block, u16 offset)
 {
        _jit_name(block->_jit, __func__);
-       rec_alu_special(block, op, jit_code_subr, false);
+       rec_alu_special(state, block, offset, jit_code_subr, false);
 }
 
-static void rec_special_SUB(const struct block *block,
-                           const struct opcode *op, u32 pc)
+static void rec_special_SUB(struct lightrec_cstate *state,
+                           const struct block *block, u16 offset)
 {
        /* TODO: Handle the exception? */
        _jit_name(block->_jit, __func__);
-       rec_alu_special(block, op, jit_code_subr, false);
+       rec_alu_special(state, block, offset, jit_code_subr, false);
 }
 
-static void rec_special_AND(const struct block *block,
-                           const struct opcode *op, u32 pc)
+static void rec_special_AND(struct lightrec_cstate *state,
+                           const struct block *block, u16 offset)
 {
+       struct regcache *reg_cache = state->reg_cache;
+       union code c = block->opcode_list[offset].c;
+       jit_state_t *_jit = block->_jit;
+       u8 rd, rt, rs, flags_rs, flags_rt, flags_rd;
+
        _jit_name(block->_jit, __func__);
-       rec_alu_special(block, op, jit_code_andr, false);
+       jit_note(__FILE__, __LINE__);
+       rs = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rs, 0);
+       rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, 0);
+       rd = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rd, 0);
+
+       flags_rs = lightrec_get_reg_in_flags(reg_cache, rs);
+       flags_rt = lightrec_get_reg_in_flags(reg_cache, rt);
+
+       /* Z(rd) = Z(rs) | Z(rt) */
+       flags_rd = REG_ZEXT & (flags_rs | flags_rt);
+
+       /* E(rd) = (E(rt) & Z(rt)) | (E(rs) & Z(rs)) | (E(rs) & E(rt)) */
+       if (((flags_rs & REG_EXT) && (flags_rt & REG_ZEXT)) ||
+           ((flags_rt & REG_EXT) && (flags_rs & REG_ZEXT)) ||
+           (REG_EXT & flags_rs & flags_rt))
+               flags_rd |= REG_EXT;
+
+       lightrec_set_reg_out_flags(reg_cache, rd, flags_rd);
+
+       jit_andr(rd, rs, rt);
+
+       lightrec_free_reg(reg_cache, rs);
+       lightrec_free_reg(reg_cache, rt);
+       lightrec_free_reg(reg_cache, rd);
 }
 
-static void rec_special_OR(const struct block *block,
-                          const struct opcode *op, u32 pc)
+static void rec_special_or_nor(struct lightrec_cstate *state,
+                              const struct block *block, u16 offset, bool nor)
+{
+       struct regcache *reg_cache = state->reg_cache;
+       union code c = block->opcode_list[offset].c;
+       jit_state_t *_jit = block->_jit;
+       u8 rd, rt, rs, flags_rs, flags_rt, flags_rd = 0;
+
+       jit_note(__FILE__, __LINE__);
+       rs = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rs, 0);
+       rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, 0);
+       rd = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rd, 0);
+
+       flags_rs = lightrec_get_reg_in_flags(reg_cache, rs);
+       flags_rt = lightrec_get_reg_in_flags(reg_cache, rt);
+
+       /* or: Z(rd) = Z(rs) & Z(rt)
+        * nor: Z(rd) = 0 */
+       if (!nor)
+               flags_rd = REG_ZEXT & flags_rs & flags_rt;
+
+       /* E(rd) = (E(rs) & E(rt)) | (E(rt) & !Z(rt)) | (E(rs) & !Z(rs)) */
+       if ((REG_EXT & flags_rs & flags_rt) ||
+           (flags_rt & (REG_EXT | REG_ZEXT) == REG_EXT) ||
+           (flags_rs & (REG_EXT | REG_ZEXT) == REG_EXT))
+               flags_rd |= REG_EXT;
+
+       lightrec_set_reg_out_flags(reg_cache, rd, flags_rd);
+
+       jit_orr(rd, rs, rt);
+
+       if (nor)
+               jit_comr(rd, rd);
+
+       lightrec_free_reg(reg_cache, rs);
+       lightrec_free_reg(reg_cache, rt);
+       lightrec_free_reg(reg_cache, rd);
+}
+
+static void rec_special_OR(struct lightrec_cstate *state,
+                          const struct block *block, u16 offset)
 {
        _jit_name(block->_jit, __func__);
-       rec_alu_special(block, op, jit_code_orr, false);
+       rec_special_or_nor(state, block, offset, false);
 }
 
-static void rec_special_XOR(const struct block *block,
-                           const struct opcode *op, u32 pc)
+static void rec_special_NOR(struct lightrec_cstate *state,
+                           const struct block *block, u16 offset)
 {
        _jit_name(block->_jit, __func__);
-       rec_alu_special(block, op, jit_code_xorr, false);
+       rec_special_or_nor(state, block, offset, true);
 }
 
-static void rec_special_NOR(const struct block *block,
-                           const struct opcode *op, u32 pc)
+static void rec_special_XOR(struct lightrec_cstate *state,
+                           const struct block *block, u16 offset)
 {
-       struct regcache *reg_cache = block->state->reg_cache;
+       struct regcache *reg_cache = state->reg_cache;
+       union code c = block->opcode_list[offset].c;
        jit_state_t *_jit = block->_jit;
-       u8 rd;
+       u8 rd, rt, rs, flags_rs, flags_rt, flags_rd;
 
-       jit_name(__func__);
-       rec_alu_special(block, op, jit_code_orr, false);
-       rd = lightrec_alloc_reg_out(reg_cache, _jit, op->r.rd);
+       _jit_name(block->_jit, __func__);
+
+       jit_note(__FILE__, __LINE__);
+       rs = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rs, 0);
+       rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, 0);
+       rd = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rd, 0);
 
-       jit_comr(rd, rd);
+       flags_rs = lightrec_get_reg_in_flags(reg_cache, rs);
+       flags_rt = lightrec_get_reg_in_flags(reg_cache, rt);
 
+       /* Z(rd) = Z(rs) & Z(rt) */
+       flags_rd = REG_ZEXT & flags_rs & flags_rt;
+
+       /* E(rd) = E(rs) & E(rt) */
+       flags_rd |= REG_EXT & flags_rs & flags_rt;
+
+       lightrec_set_reg_out_flags(reg_cache, rd, flags_rd);
+
+       jit_xorr(rd, rs, rt);
+
+       lightrec_free_reg(reg_cache, rs);
+       lightrec_free_reg(reg_cache, rt);
        lightrec_free_reg(reg_cache, rd);
 }
 
-static void rec_special_SLTU(const struct block *block,
-                            const struct opcode *op, u32 pc)
+static void rec_special_SLTU(struct lightrec_cstate *state,
+                            const struct block *block, u16 offset)
 {
        _jit_name(block->_jit, __func__);
-       rec_alu_special(block, op, jit_code_ltr_u, true);
+       rec_alu_special(state, block, offset, jit_code_ltr_u, true);
 }
 
-static void rec_special_SLT(const struct block *block,
-                           const struct opcode *op, u32 pc)
+static void rec_special_SLT(struct lightrec_cstate *state,
+                           const struct block *block, u16 offset)
 {
        _jit_name(block->_jit, __func__);
-       rec_alu_special(block, op, jit_code_ltr, true);
+       rec_alu_special(state, block, offset, jit_code_ltr, true);
 }
 
-static void rec_special_SLLV(const struct block *block,
-                            const struct opcode *op, u32 pc)
+static void rec_special_SLLV(struct lightrec_cstate *state,
+                            const struct block *block, u16 offset)
 {
        _jit_name(block->_jit, __func__);
-       rec_alu_shiftv(block, op, jit_code_lshr);
+       rec_alu_shiftv(state, block, offset, jit_code_lshr);
 }
 
-static void rec_special_SRLV(const struct block *block,
-                            const struct opcode *op, u32 pc)
+static void rec_special_SRLV(struct lightrec_cstate *state,
+                            const struct block *block, u16 offset)
 {
        _jit_name(block->_jit, __func__);
-       rec_alu_shiftv(block, op, jit_code_rshr_u);
+       rec_alu_shiftv(state, block, offset, jit_code_rshr_u);
 }
 
-static void rec_special_SRAV(const struct block *block,
-                            const struct opcode *op, u32 pc)
+static void rec_special_SRAV(struct lightrec_cstate *state,
+                            const struct block *block, u16 offset)
 {
        _jit_name(block->_jit, __func__);
-       rec_alu_shiftv(block, op, jit_code_rshr);
+       rec_alu_shiftv(state, block, offset, jit_code_rshr);
 }
 
-static void rec_alu_shift(const struct block *block,
-                         const struct opcode *op, jit_code_t code)
+static void rec_alu_shift(struct lightrec_cstate *state, const struct block *block,
+                         u16 offset, jit_code_t code)
 {
-       struct regcache *reg_cache = block->state->reg_cache;
+       struct regcache *reg_cache = state->reg_cache;
+       union code c = block->opcode_list[offset].c;
        jit_state_t *_jit = block->_jit;
-       u8 rd, rt;
+       u8 rd, rt, flags = 0;
 
        jit_note(__FILE__, __LINE__);
 
-       if (code == jit_code_rshi) {
-               rt = lightrec_alloc_reg_in_ext(reg_cache, _jit, op->r.rt);
-               rd = lightrec_alloc_reg_out_ext(reg_cache, _jit, op->r.rd);
-       } else {
-               rt = lightrec_alloc_reg_in(reg_cache, _jit, op->r.rt);
-               rd = lightrec_alloc_reg_out(reg_cache, _jit, op->r.rd);
-       }
+       if (code == jit_code_rshi)
+               flags = REG_EXT;
+       else if (code == jit_code_rshi_u)
+               flags = REG_ZEXT;
 
-#if __WORDSIZE == 64
-       if (code == jit_code_rshi_u) {
-               jit_extr_ui(rd, rt);
-               jit_new_node_www(code, rd, rd, op->r.imm);
-       }
-#endif
-       if (__WORDSIZE == 32 || code != jit_code_rshi_u)
-               jit_new_node_www(code, rd, rt, op->r.imm);
+       rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, flags);
+
+       /* Input reg is zero-extended, if we SRL at least by one bit, we know
+        * the output reg will be both zero-extended and sign-extended. */
+       if (code == jit_code_rshi_u && c.r.imm)
+               flags |= REG_EXT;
+       rd = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rd, flags);
+
+       jit_new_node_www(code, rd, rt, c.r.imm);
 
        lightrec_free_reg(reg_cache, rt);
        lightrec_free_reg(reg_cache, rd);
 }
 
-static void rec_special_SLL(const struct block *block,
-                           const struct opcode *op, u32 pc)
+static void rec_special_SLL(struct lightrec_cstate *state,
+                           const struct block *block, u16 offset)
 {
        _jit_name(block->_jit, __func__);
-       rec_alu_shift(block, op, jit_code_lshi);
+       rec_alu_shift(state, block, offset, jit_code_lshi);
 }
 
-static void rec_special_SRL(const struct block *block,
-                           const struct opcode *op, u32 pc)
+static void rec_special_SRL(struct lightrec_cstate *state,
+                           const struct block *block, u16 offset)
 {
        _jit_name(block->_jit, __func__);
-       rec_alu_shift(block, op, jit_code_rshi_u);
+       rec_alu_shift(state, block, offset, jit_code_rshi_u);
 }
 
-static void rec_special_SRA(const struct block *block,
-                           const struct opcode *op, u32 pc)
+static void rec_special_SRA(struct lightrec_cstate *state,
+                           const struct block *block, u16 offset)
 {
        _jit_name(block->_jit, __func__);
-       rec_alu_shift(block, op, jit_code_rshi);
+       rec_alu_shift(state, block, offset, jit_code_rshi);
 }
 
-static void rec_alu_mult(const struct block *block,
-                        const struct opcode *op, bool is_signed)
+static void rec_alu_mult(struct lightrec_cstate *state,
+                        const struct block *block, u16 offset, bool is_signed)
 {
-       struct regcache *reg_cache = block->state->reg_cache;
+       struct regcache *reg_cache = state->reg_cache;
+       union code c = block->opcode_list[offset].c;
+       u16 flags = block->opcode_list[offset].flags;
+       u8 reg_lo = get_mult_div_lo(c);
+       u8 reg_hi = get_mult_div_hi(c);
        jit_state_t *_jit = block->_jit;
-       u8 lo, hi, rs, rt;
+       u8 lo, hi, rs, rt, rflags = 0;
 
        jit_note(__FILE__, __LINE__);
 
-       lo = lightrec_alloc_reg_out(reg_cache, _jit, REG_LO);
-       if (!(op->flags & LIGHTREC_MULT32))
-               hi = lightrec_alloc_reg_out_ext(reg_cache, _jit, REG_HI);
-       else if (__WORDSIZE == 64)
-               hi = lightrec_alloc_reg_temp(reg_cache, _jit);
-
-       if (__WORDSIZE == 32 || !is_signed) {
-               rs = lightrec_alloc_reg_in(reg_cache, _jit, op->r.rs);
-               rt = lightrec_alloc_reg_in(reg_cache, _jit, op->r.rt);
+       if (is_signed)
+               rflags = REG_EXT;
+       else
+               rflags = REG_ZEXT;
+
+       rs = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rs, rflags);
+       rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, rflags);
+
+       if (!(flags & LIGHTREC_NO_LO))
+               lo = lightrec_alloc_reg_out(reg_cache, _jit, reg_lo, 0);
+       else if (__WORDSIZE == 32)
+               lo = lightrec_alloc_reg_temp(reg_cache, _jit);
+
+       if (!(flags & LIGHTREC_NO_HI))
+               hi = lightrec_alloc_reg_out(reg_cache, _jit, reg_hi, REG_EXT);
+
+       if (__WORDSIZE == 32) {
+               /* On 32-bit systems, do a 32*32->64 bit operation, or a 32*32->32 bit
+                * operation if the MULT was detected a 32-bit only. */
+               if (!(flags & LIGHTREC_NO_HI)) {
+                       if (is_signed)
+                               jit_qmulr(lo, hi, rs, rt);
+                       else
+                               jit_qmulr_u(lo, hi, rs, rt);
+               } else {
+                       jit_mulr(lo, rs, rt);
+               }
        } else {
-               rs = lightrec_alloc_reg_in_ext(reg_cache, _jit, op->r.rs);
-               rt = lightrec_alloc_reg_in_ext(reg_cache, _jit, op->r.rt);
-       }
+               /* On 64-bit systems, do a 64*64->64 bit operation. */
+               if (flags & LIGHTREC_NO_LO) {
+                       jit_mulr(hi, rs, rt);
+                       jit_rshi(hi, hi, 32);
+               } else {
+                       jit_mulr(lo, rs, rt);
 
-#if __WORDSIZE == 32
-       /* On 32-bit systems, do a 32*32->64 bit operation, or a 32*32->32 bit
-        * operation if the MULT was detected a 32-bit only. */
-       if (!(op->flags & LIGHTREC_MULT32)) {
-               if (is_signed)
-                       jit_qmulr(lo, hi, rs, rt);
-               else
-                       jit_qmulr_u(lo, hi, rs, rt);
-       } else {
-               jit_mulr(lo, rs, rt);
-       }
-#else
-       /* On 64-bit systems, do a 64*64->64 bit operation.
-        * The input registers must be 32 bits, so we first sign-extend (if
-        * mult) or clear (if multu) the input registers. */
-       if (is_signed) {
-               jit_mulr(lo, rs, rt);
-       } else {
-               jit_extr_ui(lo, rt);
-               jit_extr_ui(hi, rs);
-               jit_mulr(lo, hi, lo);
+                       /* The 64-bit output value is in $lo, store the upper 32 bits in $hi */
+                       if (!(flags & LIGHTREC_NO_HI))
+                               jit_rshi(hi, lo, 32);
+               }
        }
 
-       /* The 64-bit output value is in $lo, store the upper 32 bits in $hi */
-       if (!(op->flags & LIGHTREC_MULT32))
-               jit_rshi(hi, lo, 32);
-#endif
-
        lightrec_free_reg(reg_cache, rs);
        lightrec_free_reg(reg_cache, rt);
-       lightrec_free_reg(reg_cache, lo);
-       if (__WORDSIZE == 64 || !(op->flags & LIGHTREC_MULT32))
+       if (!(flags & LIGHTREC_NO_LO) || __WORDSIZE == 32)
+               lightrec_free_reg(reg_cache, lo);
+       if (!(flags & LIGHTREC_NO_HI))
                lightrec_free_reg(reg_cache, hi);
 }
 
-static void rec_alu_div(const struct block *block,
-                       const struct opcode *op, bool is_signed)
+static void rec_alu_div(struct lightrec_cstate *state,
+                       const struct block *block, u16 offset, bool is_signed)
 {
-       struct regcache *reg_cache = block->state->reg_cache;
+       struct regcache *reg_cache = state->reg_cache;
+       union code c = block->opcode_list[offset].c;
+       u16 flags = block->opcode_list[offset].flags;
+       bool no_check = flags & LIGHTREC_NO_DIV_CHECK;
+       u8 reg_lo = get_mult_div_lo(c);
+       u8 reg_hi = get_mult_div_hi(c);
        jit_state_t *_jit = block->_jit;
        jit_node_t *branch, *to_end;
-       u8 lo, hi, rs, rt;
+       u8 lo, hi, rs, rt, rflags = 0;
 
        jit_note(__FILE__, __LINE__);
-       lo = lightrec_alloc_reg_out(reg_cache, _jit, REG_LO);
-       hi = lightrec_alloc_reg_out(reg_cache, _jit, REG_HI);
 
-       if (__WORDSIZE == 32 || !is_signed) {
-               rs = lightrec_alloc_reg_in(reg_cache, _jit, op->r.rs);
-               rt = lightrec_alloc_reg_in(reg_cache, _jit, op->r.rt);
-       } else {
-               rs = lightrec_alloc_reg_in_ext(reg_cache, _jit, op->r.rs);
-               rt = lightrec_alloc_reg_in_ext(reg_cache, _jit, op->r.rt);
-       }
+       if (is_signed)
+               rflags = REG_EXT;
+       else
+               rflags = REG_ZEXT;
+
+       rs = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rs, rflags);
+       rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, rflags);
+
+       if (!(flags & LIGHTREC_NO_LO))
+               lo = lightrec_alloc_reg_out(reg_cache, _jit, reg_lo, 0);
+
+       if (!(flags & LIGHTREC_NO_HI))
+               hi = lightrec_alloc_reg_out(reg_cache, _jit, reg_hi, 0);
 
        /* Jump to special handler if dividing by zero  */
-       branch = jit_beqi(rt, 0);
+       if (!no_check)
+               branch = jit_beqi(rt, 0);
 
-#if __WORDSIZE == 32
-       if (is_signed)
-               jit_qdivr(lo, hi, rs, rt);
-       else
-               jit_qdivr_u(lo, hi, rs, rt);
-#else
-       /* On 64-bit systems, the input registers must be 32 bits, so we first sign-extend
-        * (if div) or clear (if divu) the input registers. */
-       if (is_signed) {
-               jit_qdivr(lo, hi, rs, rt);
+       if (flags & LIGHTREC_NO_LO) {
+               if (is_signed)
+                       jit_remr(hi, rs, rt);
+               else
+                       jit_remr_u(hi, rs, rt);
+       } else if (flags & LIGHTREC_NO_HI) {
+               if (is_signed)
+                       jit_divr(lo, rs, rt);
+               else
+                       jit_divr_u(lo, rs, rt);
        } else {
-               jit_extr_ui(lo, rt);
-               jit_extr_ui(hi, rs);
-               jit_qdivr_u(lo, hi, hi, lo);
+               if (is_signed)
+                       jit_qdivr(lo, hi, rs, rt);
+               else
+                       jit_qdivr_u(lo, hi, rs, rt);
        }
-#endif
 
-       /* Jump above the div-by-zero handler */
-       to_end = jit_jmpi();
+       if (!no_check) {
+               lightrec_regcache_mark_live(reg_cache, _jit);
 
-       jit_patch(branch);
+               /* Jump above the div-by-zero handler */
+               to_end = jit_jmpi();
 
-       if (is_signed) {
-               jit_lti(lo, rs, 0);
-               jit_lshi(lo, lo, 1);
-               jit_subi(lo, lo, 1);
-       } else {
-               jit_movi(lo, 0xffffffff);
-       }
+               jit_patch(branch);
+
+               if (!(flags & LIGHTREC_NO_LO)) {
+                       if (is_signed) {
+                               jit_lti(lo, rs, 0);
+                               jit_lshi(lo, lo, 1);
+                               jit_subi(lo, lo, 1);
+                       } else {
+                               jit_movi(lo, 0xffffffff);
+                       }
+               }
 
-       jit_movr(hi, rs);
+               if (!(flags & LIGHTREC_NO_HI))
+                       jit_movr(hi, rs);
 
-       jit_patch(to_end);
+               jit_patch(to_end);
+       }
 
        lightrec_free_reg(reg_cache, rs);
        lightrec_free_reg(reg_cache, rt);
-       lightrec_free_reg(reg_cache, lo);
-       lightrec_free_reg(reg_cache, hi);
+
+       if (!(flags & LIGHTREC_NO_LO))
+               lightrec_free_reg(reg_cache, lo);
+
+       if (!(flags & LIGHTREC_NO_HI))
+               lightrec_free_reg(reg_cache, hi);
 }
 
-static void rec_special_MULT(const struct block *block,
-                            const struct opcode *op, u32 pc)
+static void rec_special_MULT(struct lightrec_cstate *state,
+                            const struct block *block, u16 offset)
 {
        _jit_name(block->_jit, __func__);
-       rec_alu_mult(block, op, true);
+       rec_alu_mult(state, block, offset, true);
 }
 
-static void rec_special_MULTU(const struct block *block,
-                             const struct opcode *op, u32 pc)
+static void rec_special_MULTU(struct lightrec_cstate *state,
+                             const struct block *block, u16 offset)
 {
        _jit_name(block->_jit, __func__);
-       rec_alu_mult(block, op, false);
+       rec_alu_mult(state, block, offset, false);
 }
 
-static void rec_special_DIV(const struct block *block,
-                           const struct opcode *op, u32 pc)
+static void rec_special_DIV(struct lightrec_cstate *state,
+                           const struct block *block, u16 offset)
 {
        _jit_name(block->_jit, __func__);
-       rec_alu_div(block, op, true);
+       rec_alu_div(state, block, offset, true);
 }
 
-static void rec_special_DIVU(const struct block *block,
-                            const struct opcode *op, u32 pc)
+static void rec_special_DIVU(struct lightrec_cstate *state,
+                            const struct block *block, u16 offset)
 {
        _jit_name(block->_jit, __func__);
-       rec_alu_div(block, op, false);
+       rec_alu_div(state, block, offset, false);
 }
 
-static void rec_alu_mv_lo_hi(const struct block *block, u8 dst, u8 src)
+static void rec_alu_mv_lo_hi(struct lightrec_cstate *state,
+                            const struct block *block, u8 dst, u8 src)
 {
-       struct regcache *reg_cache = block->state->reg_cache;
+       struct regcache *reg_cache = state->reg_cache;
        jit_state_t *_jit = block->_jit;
 
        jit_note(__FILE__, __LINE__);
-       src = lightrec_alloc_reg_in(reg_cache, _jit, src);
-       dst = lightrec_alloc_reg_out_ext(reg_cache, _jit, dst);
+       src = lightrec_alloc_reg_in(reg_cache, _jit, src, 0);
+       dst = lightrec_alloc_reg_out(reg_cache, _jit, dst, REG_EXT);
 
-#if __WORDSIZE == 32
-       jit_movr(dst, src);
-#else
        jit_extr_i(dst, src);
-#endif
 
        lightrec_free_reg(reg_cache, src);
        lightrec_free_reg(reg_cache, dst);
 }
 
-static void rec_special_MFHI(const struct block *block,
-                            const struct opcode *op, u32 pc)
+static void rec_special_MFHI(struct lightrec_cstate *state,
+                            const struct block *block, u16 offset)
 {
+       union code c = block->opcode_list[offset].c;
+
        _jit_name(block->_jit, __func__);
-       rec_alu_mv_lo_hi(block, op->r.rd, REG_HI);
+       rec_alu_mv_lo_hi(state, block, c.r.rd, REG_HI);
 }
 
-static void rec_special_MTHI(const struct block *block,
-                            const struct opcode *op, u32 pc)
+static void rec_special_MTHI(struct lightrec_cstate *state,
+                            const struct block *block, u16 offset)
 {
+       union code c = block->opcode_list[offset].c;
+
        _jit_name(block->_jit, __func__);
-       rec_alu_mv_lo_hi(block, REG_HI, op->r.rs);
+       rec_alu_mv_lo_hi(state, block, REG_HI, c.r.rs);
 }
 
-static void rec_special_MFLO(const struct block *block,
-                            const struct opcode *op, u32 pc)
+static void rec_special_MFLO(struct lightrec_cstate *state,
+                            const struct block *block, u16 offset)
 {
+       union code c = block->opcode_list[offset].c;
+
        _jit_name(block->_jit, __func__);
-       rec_alu_mv_lo_hi(block, op->r.rd, REG_LO);
+       rec_alu_mv_lo_hi(state, block, c.r.rd, REG_LO);
 }
 
-static void rec_special_MTLO(const struct block *block,
-                            const struct opcode *op, u32 pc)
+static void rec_special_MTLO(struct lightrec_cstate *state,
+                            const struct block *block, u16 offset)
 {
+       union code c = block->opcode_list[offset].c;
+
        _jit_name(block->_jit, __func__);
-       rec_alu_mv_lo_hi(block, REG_LO, op->r.rs);
+       rec_alu_mv_lo_hi(state, block, REG_LO, c.r.rs);
 }
 
-static void rec_io(const struct block *block, const struct opcode *op,
-                  bool load_rt, bool read_rt)
+static void call_to_c_wrapper(struct lightrec_cstate *state, const struct block *block,
+                             u32 arg, bool with_arg, enum c_wrappers wrapper)
 {
-       struct regcache *reg_cache = block->state->reg_cache;
+       struct regcache *reg_cache = state->reg_cache;
        jit_state_t *_jit = block->_jit;
-       bool is_tagged = op->flags & (LIGHTREC_HW_IO | LIGHTREC_DIRECT_IO);
-       u32 offset;
        u8 tmp, tmp2, tmp3;
 
-       jit_note(__FILE__, __LINE__);
+       if (with_arg)
+               tmp3 = lightrec_alloc_reg(reg_cache, _jit, JIT_R1);
+       tmp2 = lightrec_alloc_reg(reg_cache, _jit, JIT_R0);
+       tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
 
-       tmp = lightrec_alloc_reg(reg_cache, _jit, JIT_R0);
+       jit_ldxi(tmp, LIGHTREC_REG_STATE,
+                offsetof(struct lightrec_state, c_wrapper));
+       jit_ldxi(tmp2, LIGHTREC_REG_STATE,
+                offsetof(struct lightrec_state, c_wrappers[wrapper]));
+       if (with_arg)
+               jit_movi(tmp3, arg);
 
-       if (is_tagged) {
-               offset = offsetof(struct lightrec_state, rw_func);
-       } else {
-               tmp3 = lightrec_alloc_reg(reg_cache, _jit, JIT_R1);
-               offset = offsetof(struct lightrec_state, rw_generic_func);
-       }
+       jit_callr(tmp);
 
-       tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
-       jit_ldxi(tmp2, LIGHTREC_REG_STATE, offset);
+       lightrec_free_reg(reg_cache, tmp);
+       lightrec_free_reg(reg_cache, tmp2);
+       if (with_arg)
+               lightrec_free_reg(reg_cache, tmp3);
+       lightrec_regcache_mark_live(reg_cache, _jit);
+}
+
+static void rec_io(struct lightrec_cstate *state,
+                  const struct block *block, u16 offset,
+                  bool load_rt, bool read_rt)
+{
+       struct regcache *reg_cache = state->reg_cache;
+       jit_state_t *_jit = block->_jit;
+       union code c = block->opcode_list[offset].c;
+       u16 flags = block->opcode_list[offset].flags;
+       bool is_tagged = flags & (LIGHTREC_HW_IO | LIGHTREC_DIRECT_IO);
+       u32 lut_entry;
 
-       lightrec_clean_reg_if_loaded(reg_cache, _jit, op->i.rs, false);
+       jit_note(__FILE__, __LINE__);
 
-       if (read_rt && likely(op->i.rt))
-               lightrec_clean_reg_if_loaded(reg_cache, _jit, op->i.rt, true);
+       lightrec_clean_reg_if_loaded(reg_cache, _jit, c.i.rs, false);
+
+       if (read_rt && likely(c.i.rt))
+               lightrec_clean_reg_if_loaded(reg_cache, _jit, c.i.rt, true);
        else if (load_rt)
-               lightrec_clean_reg_if_loaded(reg_cache, _jit, op->i.rt, false);
+               lightrec_clean_reg_if_loaded(reg_cache, _jit, c.i.rt, false);
 
        if (is_tagged) {
-               jit_movi(tmp, op->opcode);
+               call_to_c_wrapper(state, block, c.opcode, true, C_WRAPPER_RW);
        } else {
-               jit_movi(tmp, (uintptr_t)op);
-               jit_movi(tmp3, (uintptr_t)block);
+               lut_entry = lightrec_get_lut_entry(block);
+               call_to_c_wrapper(state, block, (lut_entry << 16) | offset,
+                                 true, C_WRAPPER_RW_GENERIC);
        }
-
-       jit_callr(tmp2);
-
-       lightrec_free_reg(reg_cache, tmp);
-       lightrec_free_reg(reg_cache, tmp2);
-       if (!is_tagged)
-               lightrec_free_reg(reg_cache, tmp3);
-       lightrec_regcache_mark_live(reg_cache, _jit);
 }
 
-static void rec_store_direct_no_invalidate(const struct block *block,
-                                          const struct opcode *op,
-                                          jit_code_t code)
+static void rec_store_direct_no_invalidate(struct lightrec_cstate *cstate,
+                                          const struct block *block,
+                                          u16 offset, jit_code_t code)
 {
-       struct lightrec_state *state = block->state;
-       struct regcache *reg_cache = state->reg_cache;
+       struct lightrec_state *state = cstate->state;
+       struct regcache *reg_cache = cstate->reg_cache;
+       union code c = block->opcode_list[offset].c;
        jit_state_t *_jit = block->_jit;
        jit_node_t *to_not_ram, *to_end;
        u8 tmp, tmp2, rs, rt;
        s16 imm;
 
        jit_note(__FILE__, __LINE__);
-       rs = lightrec_alloc_reg_in(reg_cache, _jit, op->i.rs);
+       rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, 0);
        tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
-       tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
+
+       if (state->offset_ram || state->offset_scratch)
+               tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
 
        /* Convert to KUNSEG and avoid RAM mirrors */
        if (state->mirrors_mapped) {
-               imm = (s16)op->i.imm;
+               imm = (s16)c.i.imm;
                jit_andi(tmp, rs, 0x1f800000 | (4 * RAM_SIZE - 1));
-       } else if (op->i.imm) {
+       } else if (c.i.imm) {
                imm = 0;
-               jit_addi(tmp, rs, (s16)op->i.imm);
+               jit_addi(tmp, rs, (s16)c.i.imm);
                jit_andi(tmp, tmp, 0x1f800000 | (RAM_SIZE - 1));
        } else {
                imm = 0;
@@ -871,6 +1074,8 @@ static void rec_store_direct_no_invalidate(const struct block *block,
        if (state->offset_ram != state->offset_scratch) {
                to_not_ram = jit_bmsi(tmp, BIT(28));
 
+               lightrec_regcache_mark_live(reg_cache, _jit);
+
                jit_movi(tmp2, state->offset_ram);
 
                to_end = jit_jmpi();
@@ -882,51 +1087,54 @@ static void rec_store_direct_no_invalidate(const struct block *block,
                jit_movi(tmp2, state->offset_ram);
        }
 
-       if (state->offset_ram || state->offset_scratch)
+       if (state->offset_ram || state->offset_scratch) {
                jit_addr(tmp, tmp, tmp2);
+               lightrec_free_reg(reg_cache, tmp2);
+       }
 
-       lightrec_free_reg(reg_cache, tmp2);
-
-       rt = lightrec_alloc_reg_in(reg_cache, _jit, op->i.rt);
+       rt = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rt, 0);
        jit_new_node_www(code, imm, tmp, rt);
 
        lightrec_free_reg(reg_cache, rt);
        lightrec_free_reg(reg_cache, tmp);
 }
 
-static void rec_store_direct(const struct block *block, const struct opcode *op,
-                            jit_code_t code)
+static void rec_store_direct(struct lightrec_cstate *cstate, const struct block *block,
+                            u16 offset, jit_code_t code)
 {
-       struct lightrec_state *state = block->state;
-       struct regcache *reg_cache = state->reg_cache;
+       struct lightrec_state *state = cstate->state;
+       u32 ram_size = state->mirrors_mapped ? RAM_SIZE * 4 : RAM_SIZE;
+       struct regcache *reg_cache = cstate->reg_cache;
+       union code c = block->opcode_list[offset].c;
        jit_state_t *_jit = block->_jit;
-       jit_node_t *to_not_ram, *to_end = 0;
+       jit_node_t *to_not_ram, *to_end;
        u8 tmp, tmp2, tmp3, rs, rt;
 
        jit_note(__FILE__, __LINE__);
 
-       rs = lightrec_alloc_reg_in(reg_cache, _jit, op->i.rs);
+       rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, 0);
        tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
-       tmp3 = lightrec_alloc_reg_in(reg_cache, _jit, 0);
+       tmp3 = lightrec_alloc_reg_in(reg_cache, _jit, 0, 0);
 
        /* Convert to KUNSEG and avoid RAM mirrors */
-       if (op->i.imm) {
-               jit_addi(tmp2, rs, (s16)op->i.imm);
-               jit_andi(tmp2, tmp2, 0x1f800000 | (RAM_SIZE - 1));
+       if (c.i.imm) {
+               jit_addi(tmp2, rs, (s16)c.i.imm);
+               jit_andi(tmp2, tmp2, 0x1f800000 | (ram_size - 1));
        } else {
-               jit_andi(tmp2, rs, 0x1f800000 | (RAM_SIZE - 1));
+               jit_andi(tmp2, rs, 0x1f800000 | (ram_size - 1));
        }
 
        lightrec_free_reg(reg_cache, rs);
        tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
 
-       to_not_ram = jit_bgti(tmp2, RAM_SIZE);
+       to_not_ram = jit_bgti(tmp2, ram_size);
+
+       lightrec_regcache_mark_live(reg_cache, _jit);
 
        /* Compute the offset to the code LUT */
        jit_andi(tmp, tmp2, (RAM_SIZE - 1) & ~3);
-#if __WORDSIZE == 64
-       jit_lshi(tmp, tmp, 1);
-#endif
+       if (__WORDSIZE == 64)
+               jit_lshi(tmp, tmp, 1);
        jit_addr(tmp, LIGHTREC_REG_STATE, tmp);
 
        /* Write NULL to the code LUT to invalidate any block that's there */
@@ -952,92 +1160,105 @@ static void rec_store_direct(const struct block *block, const struct opcode *op,
        lightrec_free_reg(reg_cache, tmp);
        lightrec_free_reg(reg_cache, tmp3);
 
-       rt = lightrec_alloc_reg_in(reg_cache, _jit, op->i.rt);
+       rt = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rt, 0);
        jit_new_node_www(code, 0, tmp2, rt);
 
        lightrec_free_reg(reg_cache, rt);
        lightrec_free_reg(reg_cache, tmp2);
 }
 
-static void rec_store(const struct block *block, const struct opcode *op,
-                    jit_code_t code)
+static void rec_store(struct lightrec_cstate *state,
+                     const struct block *block, u16 offset, jit_code_t code)
 {
-       if (op->flags & LIGHTREC_NO_INVALIDATE) {
-               rec_store_direct_no_invalidate(block, op, code);
-       } else if (op->flags & LIGHTREC_DIRECT_IO) {
-               if (block->state->invalidate_from_dma_only)
-                       rec_store_direct_no_invalidate(block, op, code);
+       u16 flags = block->opcode_list[offset].flags;
+
+       if (flags & LIGHTREC_NO_INVALIDATE) {
+               rec_store_direct_no_invalidate(state, block, offset, code);
+       } else if (flags & LIGHTREC_DIRECT_IO) {
+               if (state->state->invalidate_from_dma_only)
+                       rec_store_direct_no_invalidate(state, block, offset, code);
                else
-                       rec_store_direct(block, op, code);
+                       rec_store_direct(state, block, offset, code);
        } else {
-               rec_io(block, op, true, false);
+               rec_io(state, block, offset, true, false);
        }
 }
 
-static void rec_SB(const struct block *block, const struct opcode *op, u32 pc)
+static void rec_SB(struct lightrec_cstate *state,
+                  const struct block *block, u16 offset)
 {
        _jit_name(block->_jit, __func__);
-       rec_store(block, op, jit_code_stxi_c);
+       rec_store(state, block, offset, jit_code_stxi_c);
 }
 
-static void rec_SH(const struct block *block, const struct opcode *op, u32 pc)
+static void rec_SH(struct lightrec_cstate *state,
+                  const struct block *block, u16 offset)
 {
        _jit_name(block->_jit, __func__);
-       rec_store(block, op, jit_code_stxi_s);
+       rec_store(state, block, offset, jit_code_stxi_s);
 }
 
-static void rec_SW(const struct block *block, const struct opcode *op, u32 pc)
+static void rec_SW(struct lightrec_cstate *state,
+                  const struct block *block, u16 offset)
+
 {
        _jit_name(block->_jit, __func__);
-       rec_store(block, op, jit_code_stxi_i);
+       rec_store(state, block, offset, jit_code_stxi_i);
 }
 
-static void rec_SWL(const struct block *block, const struct opcode *op, u32 pc)
+static void rec_SWL(struct lightrec_cstate *state,
+                   const struct block *block, u16 offset)
 {
        _jit_name(block->_jit, __func__);
-       rec_io(block, op, true, false);
+       rec_io(state, block, offset, true, false);
 }
 
-static void rec_SWR(const struct block *block, const struct opcode *op, u32 pc)
+static void rec_SWR(struct lightrec_cstate *state,
+                   const struct block *block, u16 offset)
 {
        _jit_name(block->_jit, __func__);
-       rec_io(block, op, true, false);
+       rec_io(state, block, offset, true, false);
 }
 
-static void rec_SWC2(const struct block *block, const struct opcode *op, u32 pc)
+static void rec_SWC2(struct lightrec_cstate *state,
+                    const struct block *block, u16 offset)
 {
        _jit_name(block->_jit, __func__);
-       rec_io(block, op, false, false);
+       rec_io(state, block, offset, false, false);
 }
 
-static void rec_load_direct(const struct block *block, const struct opcode *op,
-                           jit_code_t code)
+static void rec_load_direct(struct lightrec_cstate *cstate, const struct block *block,
+                           u16 offset, jit_code_t code, bool is_unsigned)
 {
-       struct lightrec_state *state = block->state;
-       struct regcache *reg_cache = state->reg_cache;
+       struct lightrec_state *state = cstate->state;
+       struct regcache *reg_cache = cstate->reg_cache;
+       union code c = block->opcode_list[offset].c;
        jit_state_t *_jit = block->_jit;
-       jit_node_t *to_not_ram, *to_not_bios = 0, *to_end, *to_end2;
-       u8 tmp, rs, rt, addr_reg;
+       jit_node_t *to_not_ram, *to_not_bios, *to_end, *to_end2;
+       u8 tmp, rs, rt, addr_reg, flags = REG_EXT;
        s16 imm;
 
-       if (!op->i.rt)
+       if (!c.i.rt)
                return;
 
+       if (is_unsigned)
+               flags |= REG_ZEXT;
+
        jit_note(__FILE__, __LINE__);
-       rs = lightrec_alloc_reg_in(reg_cache, _jit, op->i.rs);
-       rt = lightrec_alloc_reg_out_ext(reg_cache, _jit, op->i.rt);
+       rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, 0);
+       rt = lightrec_alloc_reg_out(reg_cache, _jit, c.i.rt, flags);
 
        if ((state->offset_ram == state->offset_bios &&
            state->offset_ram == state->offset_scratch &&
-           state->mirrors_mapped) || !op->i.imm) {
+           state->mirrors_mapped) || !c.i.imm) {
                addr_reg = rs;
-               imm = (s16)op->i.imm;
+               imm = (s16)c.i.imm;
        } else {
-               jit_addi(rt, rs, (s16)op->i.imm);
+               jit_addi(rt, rs, (s16)c.i.imm);
                addr_reg = rt;
                imm = 0;
 
-               if (op->i.rs != op->i.rt)
+               if (c.i.rs != c.i.rt)
                        lightrec_free_reg(reg_cache, rs);
        }
 
@@ -1059,6 +1280,8 @@ static void rec_load_direct(const struct block *block, const struct opcode *op,
        } else {
                to_not_ram = jit_bmsi(addr_reg, BIT(28));
 
+               lightrec_regcache_mark_live(reg_cache, _jit);
+
                /* Convert to KUNSEG and avoid RAM mirrors */
                jit_andi(rt, addr_reg, RAM_SIZE - 1);
 
@@ -1104,334 +1327,397 @@ static void rec_load_direct(const struct block *block, const struct opcode *op,
        lightrec_free_reg(reg_cache, tmp);
 }
 
-static void rec_load(const struct block *block, const struct opcode *op,
-                   jit_code_t code)
+static void rec_load(struct lightrec_cstate *state, const struct block *block,
+                    u16 offset, jit_code_t code, bool is_unsigned)
 {
-       if (op->flags & LIGHTREC_DIRECT_IO)
-               rec_load_direct(block, op, code);
+       u16 flags = block->opcode_list[offset].flags;
+
+       if (flags & LIGHTREC_DIRECT_IO)
+               rec_load_direct(state, block, offset, code, is_unsigned);
        else
-               rec_io(block, op, false, true);
+               rec_io(state, block, offset, false, true);
 }
 
-static void rec_LB(const struct block *block, const struct opcode *op, u32 pc)
+static void rec_LB(struct lightrec_cstate *state, const struct block *block, u16 offset)
 {
        _jit_name(block->_jit, __func__);
-       rec_load(block, op, jit_code_ldxi_c);
+       rec_load(state, block, offset, jit_code_ldxi_c, false);
 }
 
-static void rec_LBU(const struct block *block, const struct opcode *op, u32 pc)
+static void rec_LBU(struct lightrec_cstate *state, const struct block *block, u16 offset)
 {
        _jit_name(block->_jit, __func__);
-       rec_load(block, op, jit_code_ldxi_uc);
+       rec_load(state, block, offset, jit_code_ldxi_uc, true);
 }
 
-static void rec_LH(const struct block *block, const struct opcode *op, u32 pc)
+static void rec_LH(struct lightrec_cstate *state, const struct block *block, u16 offset)
 {
        _jit_name(block->_jit, __func__);
-       rec_load(block, op, jit_code_ldxi_s);
+       rec_load(state, block, offset, jit_code_ldxi_s, false);
 }
 
-static void rec_LHU(const struct block *block, const struct opcode *op, u32 pc)
+static void rec_LHU(struct lightrec_cstate *state, const struct block *block, u16 offset)
 {
        _jit_name(block->_jit, __func__);
-       rec_load(block, op, jit_code_ldxi_us);
+       rec_load(state, block, offset, jit_code_ldxi_us, true);
 }
 
-static void rec_LWL(const struct block *block, const struct opcode *op, u32 pc)
+static void rec_LWL(struct lightrec_cstate *state, const struct block *block, u16 offset)
 {
        _jit_name(block->_jit, __func__);
-       rec_io(block, op, true, true);
+       rec_io(state, block, offset, true, true);
 }
 
-static void rec_LWR(const struct block *block, const struct opcode *op, u32 pc)
+static void rec_LWR(struct lightrec_cstate *state, const struct block *block, u16 offset)
 {
        _jit_name(block->_jit, __func__);
-       rec_io(block, op, true, true);
+       rec_io(state, block, offset, true, true);
 }
 
-static void rec_LW(const struct block *block, const struct opcode *op, u32 pc)
+static void rec_LW(struct lightrec_cstate *state, const struct block *block, u16 offset)
 {
        _jit_name(block->_jit, __func__);
-       rec_load(block, op, jit_code_ldxi_i);
+       rec_load(state, block, offset, jit_code_ldxi_i, false);
 }
 
-static void rec_LWC2(const struct block *block, const struct opcode *op, u32 pc)
+static void rec_LWC2(struct lightrec_cstate *state, const struct block *block, u16 offset)
 {
        _jit_name(block->_jit, __func__);
-       rec_io(block, op, false, false);
+       rec_io(state, block, offset, false, false);
 }
 
-static void rec_break_syscall(const struct block *block,
-                             const struct opcode *op, u32 pc, bool is_break)
+static void rec_break_syscall(struct lightrec_cstate *state,
+                             const struct block *block, u16 offset, bool is_break)
 {
-       struct regcache *reg_cache = block->state->reg_cache;
-       jit_state_t *_jit = block->_jit;
-       u32 offset;
-       u8 tmp;
-
-       jit_note(__FILE__, __LINE__);
+       _jit_note(block->_jit, __FILE__, __LINE__);
 
        if (is_break)
-               offset = offsetof(struct lightrec_state, break_func);
+               call_to_c_wrapper(state, block, 0, false, C_WRAPPER_BREAK);
        else
-               offset = offsetof(struct lightrec_state, syscall_func);
-
-       tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
-       jit_ldxi(tmp, LIGHTREC_REG_STATE, offset);
-       jit_callr(tmp);
-       lightrec_free_reg(reg_cache, tmp);
-
-       lightrec_regcache_mark_live(reg_cache, _jit);
+               call_to_c_wrapper(state, block, 0, false, C_WRAPPER_SYSCALL);
 
        /* TODO: the return address should be "pc - 4" if we're a delay slot */
-       lightrec_emit_end_of_block(block, op, pc, -1, pc, 31, 0, true);
+       lightrec_emit_end_of_block(state, block, offset, -1,
+                                  get_ds_pc(block, offset, 0),
+                                  31, 0, true);
 }
 
-static void rec_special_SYSCALL(const struct block *block,
-                               const struct opcode *op, u32 pc)
+static void rec_special_SYSCALL(struct lightrec_cstate *state,
+                               const struct block *block, u16 offset)
 {
        _jit_name(block->_jit, __func__);
-       rec_break_syscall(block, op, pc, false);
+       rec_break_syscall(state, block, offset, false);
 }
 
-static void rec_special_BREAK(const struct block *block,
-                             const struct opcode *op, u32 pc)
+static void rec_special_BREAK(struct lightrec_cstate *state,
+                             const struct block *block, u16 offset)
 {
        _jit_name(block->_jit, __func__);
-       rec_break_syscall(block, op, pc, true);
+       rec_break_syscall(state, block, offset, true);
 }
 
-static void rec_mfc(const struct block *block, const struct opcode *op)
+static void rec_mfc(struct lightrec_cstate *state,
+                   const struct block *block, u16 offset)
 {
-       u8 tmp, tmp2;
-       struct lightrec_state *state = block->state;
        struct regcache *reg_cache = state->reg_cache;
+       union code c = block->opcode_list[offset].c;
        jit_state_t *_jit = block->_jit;
 
        jit_note(__FILE__, __LINE__);
+       lightrec_clean_reg_if_loaded(reg_cache, _jit, c.i.rt, true);
 
-       tmp = lightrec_alloc_reg(reg_cache, _jit, JIT_R0);
-       tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
+       call_to_c_wrapper(state, block, c.opcode, true, C_WRAPPER_MFC);
+}
 
-       jit_ldxi(tmp2, LIGHTREC_REG_STATE,
-                offsetof(struct lightrec_state, mfc_func));
+static void rec_mtc(struct lightrec_cstate *state,
+                   const struct block *block, u16 offset)
+{
+       struct regcache *reg_cache = state->reg_cache;
+       union code c = block->opcode_list[offset].c;
+       jit_state_t *_jit = block->_jit;
 
-       lightrec_clean_reg_if_loaded(reg_cache, _jit, op->i.rt, true);
+       jit_note(__FILE__, __LINE__);
+       lightrec_clean_reg_if_loaded(reg_cache, _jit, c.i.rs, false);
+       lightrec_clean_reg_if_loaded(reg_cache, _jit, c.i.rt, false);
 
-       jit_movi(tmp, op->opcode);
-       jit_callr(tmp2);
-       lightrec_free_reg(reg_cache, tmp);
-       lightrec_free_reg(reg_cache, tmp2);
+       call_to_c_wrapper(state, block, c.opcode, true, C_WRAPPER_MTC);
 
-       lightrec_regcache_mark_live(reg_cache, _jit);
+       if (c.i.op == OP_CP0 &&
+           !(block->opcode_list[offset].flags & LIGHTREC_NO_DS) &&
+           (c.r.rd == 12 || c.r.rd == 13))
+               lightrec_emit_end_of_block(state, block, offset, -1,
+                                          get_ds_pc(block, offset, 1),
+                                          0, 0, true);
 }
 
-static void rec_mtc(const struct block *block, const struct opcode *op, u32 pc)
+static void
+rec_mfc0(struct lightrec_cstate *state, const struct block *block, u16 offset)
 {
-       struct lightrec_state *state = block->state;
        struct regcache *reg_cache = state->reg_cache;
+       union code c = block->opcode_list[offset].c;
        jit_state_t *_jit = block->_jit;
-       u8 tmp, tmp2;
+       u8 rt;
 
        jit_note(__FILE__, __LINE__);
 
-       tmp = lightrec_alloc_reg(reg_cache, _jit, JIT_R0);
-       tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
-       jit_ldxi(tmp2, LIGHTREC_REG_STATE,
-                offsetof(struct lightrec_state, mtc_func));
+       rt = lightrec_alloc_reg_out(reg_cache, _jit, c.i.rt, REG_EXT);
 
-       lightrec_clean_reg_if_loaded(reg_cache, _jit, op->i.rs, false);
-       lightrec_clean_reg_if_loaded(reg_cache, _jit, op->i.rt, false);
+       jit_ldxi_i(rt, LIGHTREC_REG_STATE,
+                  offsetof(struct lightrec_state, regs.cp0[c.r.rd]));
 
-       jit_movi(tmp, op->opcode);
-       jit_callr(tmp2);
-       lightrec_free_reg(reg_cache, tmp);
-       lightrec_free_reg(reg_cache, tmp2);
+       lightrec_free_reg(reg_cache, rt);
+}
 
-       lightrec_regcache_mark_live(reg_cache, _jit);
+static bool block_in_bios(const struct lightrec_cstate *state,
+                         const struct block *block)
+{
+       const struct lightrec_mem_map *bios = &state->state->maps[PSX_MAP_BIOS];
+       u32 pc = kunseg(block->pc);
 
-       if (op->i.op == OP_CP0 && !(op->flags & LIGHTREC_NO_DS) &&
-           (op->r.rd == 12 || op->r.rd == 13))
-               lightrec_emit_end_of_block(block, op, pc, -1, pc + 4, 0, 0, true);
+       return pc >= bios->pc && pc < bios->pc + bios->length;
 }
 
-static void rec_cp0_MFC0(const struct block *block,
-                        const struct opcode *op, u32 pc)
+static void
+rec_mtc0(struct lightrec_cstate *state, const struct block *block, u16 offset)
 {
-       _jit_name(block->_jit, __func__);
-       rec_mfc(block, op);
+       struct regcache *reg_cache = state->reg_cache;
+       const union code c = block->opcode_list[offset].c;
+       jit_state_t *_jit = block->_jit;
+       u8 rt, tmp, tmp2, status;
+
+       jit_note(__FILE__, __LINE__);
+
+       switch(c.r.rd) {
+       case 1:
+       case 4:
+       case 8:
+       case 14:
+       case 15:
+               /* Those registers are read-only */
+               return;
+       default:
+               break;
+       }
+
+       if (block_in_bios(state, block) && c.r.rd == 12) {
+               /* If we are running code from the BIOS, handle writes to the
+                * Status register in C. BIOS code may toggle bit 16 which will
+                * map/unmap the RAM, while game code cannot do that. */
+               rec_mtc(state, block, offset);
+               return;
+       }
+
+       rt = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rt, 0);
+
+       if (c.r.rd != 13) {
+               jit_stxi_i(offsetof(struct lightrec_state, regs.cp0[c.r.rd]),
+                          LIGHTREC_REG_STATE, rt);
+       }
+
+       if (c.r.rd == 12 || c.r.rd == 13) {
+               tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
+               jit_ldxi_i(tmp, LIGHTREC_REG_STATE,
+                          offsetof(struct lightrec_state, regs.cp0[13]));
+       }
+
+       if (c.r.rd == 12) {
+               status = rt;
+       } else if (c.r.rd == 13) {
+               tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
+
+               /* Cause = (Cause & ~0x0300) | (value & 0x0300) */
+               jit_andi(tmp2, rt, 0x0300);
+               jit_ori(tmp, tmp, 0x0300);
+               jit_xori(tmp, tmp, 0x0300);
+               jit_orr(tmp, tmp, tmp2);
+               jit_ldxi_i(tmp2, LIGHTREC_REG_STATE,
+                          offsetof(struct lightrec_state, regs.cp0[12]));
+               jit_stxi_i(offsetof(struct lightrec_state, regs.cp0[13]),
+                          LIGHTREC_REG_STATE, tmp);
+               status = tmp2;
+       }
+
+       if (c.r.rd == 12 || c.r.rd == 13) {
+               /* Exit dynarec in case there's a software interrupt.
+                * exit_flags = !!(status & tmp & 0x0300) & status; */
+               jit_andr(tmp, tmp, status);
+               jit_andi(tmp, tmp, 0x0300);
+               jit_nei(tmp, tmp, 0);
+               jit_andr(tmp, tmp, status);
+               jit_stxi_i(offsetof(struct lightrec_state, exit_flags),
+                          LIGHTREC_REG_STATE, tmp);
+
+               lightrec_free_reg(reg_cache, tmp);
+       }
+
+       if (c.r.rd == 13)
+               lightrec_free_reg(reg_cache, tmp2);
+
+       lightrec_free_reg(reg_cache, rt);
+
+       if (!(block->opcode_list[offset].flags & LIGHTREC_NO_DS) &&
+           (c.r.rd == 12 || c.r.rd == 13))
+               lightrec_emit_eob(state, block, offset + 1, true);
 }
 
-static void rec_cp0_CFC0(const struct block *block,
-                        const struct opcode *op, u32 pc)
+static void rec_cp0_MFC0(struct lightrec_cstate *state,
+                        const struct block *block, u16 offset)
 {
        _jit_name(block->_jit, __func__);
-       rec_mfc(block, op);
+       rec_mfc0(state, block, offset);
 }
 
-static void rec_cp0_MTC0(const struct block *block,
-                        const struct opcode *op, u32 pc)
+static void rec_cp0_CFC0(struct lightrec_cstate *state,
+                        const struct block *block, u16 offset)
 {
        _jit_name(block->_jit, __func__);
-       rec_mtc(block, op, pc);
+       rec_mfc0(state, block, offset);
 }
 
-static void rec_cp0_CTC0(const struct block *block,
-                        const struct opcode *op, u32 pc)
+static void rec_cp0_MTC0(struct lightrec_cstate *state,
+                        const struct block *block, u16 offset)
 {
        _jit_name(block->_jit, __func__);
-       rec_mtc(block, op, pc);
+       rec_mtc0(state, block, offset);
 }
 
-static void rec_cp2_basic_MFC2(const struct block *block,
-                              const struct opcode *op, u32 pc)
+static void rec_cp0_CTC0(struct lightrec_cstate *state,
+                        const struct block *block, u16 offset)
 {
        _jit_name(block->_jit, __func__);
-       rec_mfc(block, op);
+       rec_mtc0(state, block, offset);
 }
 
-static void rec_cp2_basic_CFC2(const struct block *block,
-                              const struct opcode *op, u32 pc)
+static void rec_cp2_basic_MFC2(struct lightrec_cstate *state,
+                              const struct block *block, u16 offset)
 {
        _jit_name(block->_jit, __func__);
-       rec_mfc(block, op);
+       rec_mfc(state, block, offset);
 }
 
-static void rec_cp2_basic_MTC2(const struct block *block,
-                              const struct opcode *op, u32 pc)
+static void rec_cp2_basic_CFC2(struct lightrec_cstate *state,
+                              const struct block *block, u16 offset)
 {
        _jit_name(block->_jit, __func__);
-       rec_mtc(block, op, pc);
+       rec_mfc(state, block, offset);
 }
 
-static void rec_cp2_basic_CTC2(const struct block *block,
-                              const struct opcode *op, u32 pc)
+static void rec_cp2_basic_MTC2(struct lightrec_cstate *state,
+                              const struct block *block, u16 offset)
 {
        _jit_name(block->_jit, __func__);
-       rec_mtc(block, op, pc);
+       rec_mtc(state, block, offset);
 }
 
-static void rec_cp0_RFE(const struct block *block,
-                       const struct opcode *op, u32 pc)
+static void rec_cp2_basic_CTC2(struct lightrec_cstate *state,
+                              const struct block *block, u16 offset)
 {
-       struct lightrec_state *state = block->state;
-       jit_state_t *_jit = block->_jit;
-       u8 tmp;
-
-       jit_name(__func__);
-       jit_note(__FILE__, __LINE__);
-
-       tmp = lightrec_alloc_reg_temp(state->reg_cache, _jit);
-       jit_ldxi(tmp, LIGHTREC_REG_STATE,
-                offsetof(struct lightrec_state, rfe_func));
-       jit_callr(tmp);
-       lightrec_free_reg(state->reg_cache, tmp);
-
-       lightrec_regcache_mark_live(state->reg_cache, _jit);
+       _jit_name(block->_jit, __func__);
+       rec_mtc(state, block, offset);
 }
 
-static void rec_CP(const struct block *block, const struct opcode *op, u32 pc)
+static void rec_cp0_RFE(struct lightrec_cstate *state,
+                       const struct block *block, u16 offset)
 {
-       struct regcache *reg_cache = block->state->reg_cache;
+       struct regcache *reg_cache = state->reg_cache;
        jit_state_t *_jit = block->_jit;
-       u8 tmp, tmp2;
+       u8 status, tmp;
 
        jit_name(__func__);
        jit_note(__FILE__, __LINE__);
 
-       tmp = lightrec_alloc_reg(reg_cache, _jit, JIT_R0);
-       tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
+       status = lightrec_alloc_reg_temp(reg_cache, _jit);
+       jit_ldxi_i(status, LIGHTREC_REG_STATE,
+                  offsetof(struct lightrec_state, regs.cp0[12]));
 
-       jit_ldxi(tmp2, LIGHTREC_REG_STATE,
-                offsetof(struct lightrec_state, cp_func));
+       tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
 
-       jit_movi(tmp, op->opcode);
-       jit_callr(tmp2);
+       /* status = ((status >> 2) & 0xf) | status & ~0xf; */
+       jit_rshi(tmp, status, 2);
+       jit_andi(tmp, tmp, 0xf);
+       jit_andi(status, status, ~0xful);
+       jit_orr(status, status, tmp);
+
+       jit_ldxi_i(tmp, LIGHTREC_REG_STATE,
+                  offsetof(struct lightrec_state, regs.cp0[13]));
+       jit_stxi_i(offsetof(struct lightrec_state, regs.cp0[12]),
+                  LIGHTREC_REG_STATE, status);
+
+       /* Exit dynarec in case there's a software interrupt.
+        * exit_flags = !!(status & cause & 0x0300) & status; */
+       jit_andr(tmp, tmp, status);
+       jit_andi(tmp, tmp, 0x0300);
+       jit_nei(tmp, tmp, 0);
+       jit_andr(tmp, tmp, status);
+       jit_stxi_i(offsetof(struct lightrec_state, exit_flags),
+                  LIGHTREC_REG_STATE, tmp);
+
+       lightrec_free_reg(reg_cache, status);
        lightrec_free_reg(reg_cache, tmp);
-       lightrec_free_reg(reg_cache, tmp2);
-
-       lightrec_regcache_mark_live(reg_cache, _jit);
 }
 
-static void rec_meta_unload(const struct block *block,
-                           const struct opcode *op, u32 pc)
+static void rec_CP(struct lightrec_cstate *state,
+                  const struct block *block, u16 offset)
 {
-       struct lightrec_state *state = block->state;
-       struct regcache *reg_cache = state->reg_cache;
+       union code c = block->opcode_list[offset].c;
        jit_state_t *_jit = block->_jit;
 
        jit_name(__func__);
        jit_note(__FILE__, __LINE__);
 
-       pr_debug("Unloading reg %s\n", lightrec_reg_name(op->i.rs));
-       lightrec_clean_reg_if_loaded(reg_cache, _jit, op->i.rs, true);
+       call_to_c_wrapper(state, block, c.opcode, true, C_WRAPPER_CP);
 }
 
-static void rec_meta_BEQZ(const struct block *block,
-                         const struct opcode *op, u32 pc)
+static void rec_meta_MOV(struct lightrec_cstate *state,
+                        const struct block *block, u16 offset)
 {
-       _jit_name(block->_jit, __func__);
-       rec_b(block, op, pc, jit_code_bnei, 0, false, true);
-}
-
-static void rec_meta_BNEZ(const struct block *block,
-                         const struct opcode *op, u32 pc)
-{
-       _jit_name(block->_jit, __func__);
-       rec_b(block, op, pc, jit_code_beqi, 0, false, true);
-}
-
-static void rec_meta_MOV(const struct block *block,
-                        const struct opcode *op, u32 pc)
-{
-       struct lightrec_state *state = block->state;
        struct regcache *reg_cache = state->reg_cache;
+       union code c = block->opcode_list[offset].c;
        jit_state_t *_jit = block->_jit;
        u8 rs, rd;
 
        _jit_name(block->_jit, __func__);
        jit_note(__FILE__, __LINE__);
-       rs = op->r.rs ? lightrec_alloc_reg_in(reg_cache, _jit, op->r.rs) : 0;
-       rd = lightrec_alloc_reg_out_ext(reg_cache, _jit, op->r.rd);
+       if (c.r.rs)
+               rs = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rs, 0);
+       rd = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rd, REG_EXT);
 
-       if (op->r.rs == 0) {
+       if (c.r.rs == 0)
                jit_movi(rd, 0);
-       } else {
-#if __WORDSIZE == 32
-               jit_movr(rd, rs);
-#else
+       else
                jit_extr_i(rd, rs);
-#endif
-       }
 
-       lightrec_free_reg(state->reg_cache, rs);
-       lightrec_free_reg(state->reg_cache, rd);
+       if (c.r.rs)
+               lightrec_free_reg(reg_cache, rs);
+       lightrec_free_reg(reg_cache, rd);
 }
 
-static void rec_meta_sync(const struct block *block,
-                         const struct opcode *op, u32 pc)
+static void rec_meta_EXTC_EXTS(struct lightrec_cstate *state,
+                              const struct block *block,
+                              u16 offset)
 {
-       struct lightrec_state *state = block->state;
-       struct lightrec_branch_target *target;
+       struct regcache *reg_cache = state->reg_cache;
+       union code c = block->opcode_list[offset].c;
        jit_state_t *_jit = block->_jit;
+       u8 rs, rt;
 
-       jit_name(__func__);
+       _jit_name(block->_jit, __func__);
        jit_note(__FILE__, __LINE__);
 
-       jit_subi(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, state->cycles);
-       state->cycles = 0;
+       rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, 0);
+       rt = lightrec_alloc_reg_out(reg_cache, _jit, c.i.rt, REG_EXT);
 
-       lightrec_storeback_regs(state->reg_cache, _jit);
-       lightrec_regcache_reset(state->reg_cache);
+       if (c.i.op == OP_META_EXTC)
+               jit_extr_c(rt, rs);
+       else
+               jit_extr_s(rt, rs);
 
-       pr_debug("Adding branch target at offset 0x%x\n",
-                op->offset << 2);
-       target = &state->targets[state->nb_targets++];
-       target->offset = op->offset;
-       target->label = jit_indirect();
+       lightrec_free_reg(reg_cache, rs);
+       lightrec_free_reg(reg_cache, rt);
 }
 
 static const lightrec_rec_func_t rec_standard[64] = {
+       SET_DEFAULT_ELM(rec_standard, unknown_opcode),
        [OP_SPECIAL]            = rec_SPECIAL,
        [OP_REGIMM]             = rec_REGIMM,
        [OP_J]                  = rec_J,
@@ -1465,14 +1751,13 @@ static const lightrec_rec_func_t rec_standard[64] = {
        [OP_LWC2]               = rec_LWC2,
        [OP_SWC2]               = rec_SWC2,
 
-       [OP_META_REG_UNLOAD]    = rec_meta_unload,
-       [OP_META_BEQZ]          = rec_meta_BEQZ,
-       [OP_META_BNEZ]          = rec_meta_BNEZ,
        [OP_META_MOV]           = rec_meta_MOV,
-       [OP_META_SYNC]          = rec_meta_sync,
+       [OP_META_EXTC]          = rec_meta_EXTC_EXTS,
+       [OP_META_EXTS]          = rec_meta_EXTC_EXTS,
 };
 
 static const lightrec_rec_func_t rec_special[64] = {
+       SET_DEFAULT_ELM(rec_special, unknown_opcode),
        [OP_SPECIAL_SLL]        = rec_special_SLL,
        [OP_SPECIAL_SRL]        = rec_special_SRL,
        [OP_SPECIAL_SRA]        = rec_special_SRA,
@@ -1504,6 +1789,7 @@ static const lightrec_rec_func_t rec_special[64] = {
 };
 
 static const lightrec_rec_func_t rec_regimm[64] = {
+       SET_DEFAULT_ELM(rec_regimm, unknown_opcode),
        [OP_REGIMM_BLTZ]        = rec_regimm_BLTZ,
        [OP_REGIMM_BGEZ]        = rec_regimm_BGEZ,
        [OP_REGIMM_BLTZAL]      = rec_regimm_BLTZAL,
@@ -1511,6 +1797,7 @@ static const lightrec_rec_func_t rec_regimm[64] = {
 };
 
 static const lightrec_rec_func_t rec_cp0[64] = {
+       SET_DEFAULT_ELM(rec_cp0, rec_CP),
        [OP_CP0_MFC0]           = rec_cp0_MFC0,
        [OP_CP0_CFC0]           = rec_cp0_CFC0,
        [OP_CP0_MTC0]           = rec_cp0_MTC0,
@@ -1519,60 +1806,107 @@ static const lightrec_rec_func_t rec_cp0[64] = {
 };
 
 static const lightrec_rec_func_t rec_cp2_basic[64] = {
+       SET_DEFAULT_ELM(rec_cp2_basic, rec_CP),
        [OP_CP2_BASIC_MFC2]     = rec_cp2_basic_MFC2,
        [OP_CP2_BASIC_CFC2]     = rec_cp2_basic_CFC2,
        [OP_CP2_BASIC_MTC2]     = rec_cp2_basic_MTC2,
        [OP_CP2_BASIC_CTC2]     = rec_cp2_basic_CTC2,
 };
 
-static void rec_SPECIAL(const struct block *block,
-                       const struct opcode *op, u32 pc)
+static void rec_SPECIAL(struct lightrec_cstate *state,
+                       const struct block *block, u16 offset)
 {
-       lightrec_rec_func_t f = rec_special[op->r.op];
-       if (likely(f))
-               (*f)(block, op, pc);
+       union code c = block->opcode_list[offset].c;
+       lightrec_rec_func_t f = rec_special[c.r.op];
+
+       if (!HAS_DEFAULT_ELM && unlikely(!f))
+               unknown_opcode(state, block, offset);
        else
-               unknown_opcode(block, op, pc);
+               (*f)(state, block, offset);
 }
 
-static void rec_REGIMM(const struct block *block,
-                      const struct opcode *op, u32 pc)
+static void rec_REGIMM(struct lightrec_cstate *state,
+                      const struct block *block, u16 offset)
 {
-       lightrec_rec_func_t f = rec_regimm[op->r.rt];
-       if (likely(f))
-               (*f)(block, op, pc);
+       union code c = block->opcode_list[offset].c;
+       lightrec_rec_func_t f = rec_regimm[c.r.rt];
+
+       if (!HAS_DEFAULT_ELM && unlikely(!f))
+               unknown_opcode(state, block, offset);
        else
-               unknown_opcode(block, op, pc);
+               (*f)(state, block, offset);
 }
 
-static void rec_CP0(const struct block *block, const struct opcode *op, u32 pc)
+static void rec_CP0(struct lightrec_cstate *state,
+                   const struct block *block, u16 offset)
 {
-       lightrec_rec_func_t f = rec_cp0[op->r.rs];
-       if (likely(f))
-               (*f)(block, op, pc);
+       union code c = block->opcode_list[offset].c;
+       lightrec_rec_func_t f = rec_cp0[c.r.rs];
+
+       if (!HAS_DEFAULT_ELM && unlikely(!f))
+               rec_CP(state, block, offset);
        else
-               rec_CP(block, op, pc);
+               (*f)(state, block, offset);
 }
 
-static void rec_CP2(const struct block *block, const struct opcode *op, u32 pc)
+static void rec_CP2(struct lightrec_cstate *state,
+                   const struct block *block, u16 offset)
 {
-       if (op->r.op == OP_CP2_BASIC) {
-               lightrec_rec_func_t f = rec_cp2_basic[op->r.rs];
-               if (likely(f)) {
-                       (*f)(block, op, pc);
+       union code c = block->opcode_list[offset].c;
+
+       if (c.r.op == OP_CP2_BASIC) {
+               lightrec_rec_func_t f = rec_cp2_basic[c.r.rs];
+
+               if (HAS_DEFAULT_ELM || likely(f)) {
+                       (*f)(state, block, offset);
                        return;
                }
        }
 
-       rec_CP(block, op, pc);
+       rec_CP(state, block, offset);
 }
 
-void lightrec_rec_opcode(const struct block *block,
-                        const struct opcode *op, u32 pc)
+void lightrec_rec_opcode(struct lightrec_cstate *state,
+                        const struct block *block, u16 offset)
 {
-       lightrec_rec_func_t f = rec_standard[op->i.op];
-       if (likely(f))
-               (*f)(block, op, pc);
-       else
-               unknown_opcode(block, op, pc);
+       struct regcache *reg_cache = state->reg_cache;
+       struct lightrec_branch_target *target;
+       const struct opcode *op = &block->opcode_list[offset];
+       jit_state_t *_jit = block->_jit;
+       lightrec_rec_func_t f;
+
+       if (op->flags & LIGHTREC_SYNC) {
+               jit_subi(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, state->cycles);
+               state->cycles = 0;
+
+               lightrec_storeback_regs(reg_cache, _jit);
+               lightrec_regcache_reset(reg_cache);
+
+               pr_debug("Adding branch target at offset 0x%x\n", offset << 2);
+               target = &state->targets[state->nb_targets++];
+               target->offset = offset;
+               target->label = jit_indirect();
+       }
+
+       if (likely(op->opcode)) {
+               f = rec_standard[op->i.op];
+
+               if (!HAS_DEFAULT_ELM && unlikely(!f))
+                       unknown_opcode(state, block, offset);
+               else
+                       (*f)(state, block, offset);
+       }
+
+       if (unlikely(op->flags & LIGHTREC_UNLOAD_RD)) {
+               lightrec_clean_reg_if_loaded(reg_cache, _jit, op->r.rd, true);
+               pr_debug("Cleaning RD reg %s\n", lightrec_reg_name(op->r.rd));
+       }
+       if (unlikely(op->flags & LIGHTREC_UNLOAD_RS)) {
+               lightrec_clean_reg_if_loaded(reg_cache, _jit, op->i.rs, true);
+               pr_debug("Cleaning RS reg %s\n", lightrec_reg_name(op->i.rt));
+       }
+       if (unlikely(op->flags & LIGHTREC_UNLOAD_RT)) {
+               lightrec_clean_reg_if_loaded(reg_cache, _jit, op->i.rt, true);
+               pr_debug("Cleaning RT reg %s\n", lightrec_reg_name(op->i.rt));
+       }
 }
index ec3fc78..b7f54fd 100644 (file)
@@ -1,15 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
 /*
- * Copyright (C) 2014-2020 Paul Cercueil <paul@crapouillou.net>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
+ * Copyright (C) 2014-2021 Paul Cercueil <paul@crapouillou.net>
  */
 
 #ifndef __EMITTER_H__
 #include "lightrec.h"
 
 struct block;
+struct lightrec_cstate;
 struct opcode;
 
-void lightrec_rec_opcode(const struct block *block,
-                        const struct opcode *op, u32 pc);
-void lightrec_emit_eob(const struct block *block,
-                      const struct opcode *op, u32 pc);
+void lightrec_rec_opcode(struct lightrec_cstate *state, const struct block *block, u16 offset);
+void lightrec_emit_eob(struct lightrec_cstate *state, const struct block *block,
+                      u16 offset, _Bool after_op);
 
 #endif /* __EMITTER_H__ */
index ff609a4..922f081 100644 (file)
@@ -1,15 +1,6 @@
+// SPDX-License-Identifier: LGPL-2.1-or-later
 /*
- * Copyright (C) 2019-2020 Paul Cercueil <paul@crapouillou.net>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
+ * Copyright (C) 2019-2021 Paul Cercueil <paul@crapouillou.net>
  */
 
 #include "disassembler.h"
@@ -39,18 +30,45 @@ struct interpreter {
        struct opcode *op;
        u32 cycles;
        bool delay_slot;
+       u16 offset;
 };
 
+static u32 int_get_branch_pc(const struct interpreter *inter)
+{
+       return get_branch_pc(inter->block, inter->offset, 0);
+}
+
+static inline u32 int_get_ds_pc(const struct interpreter *inter, s16 imm)
+{
+       return get_ds_pc(inter->block, inter->offset, imm);
+}
+
+static inline struct opcode *next_op(const struct interpreter *inter)
+{
+       return &inter->block->opcode_list[inter->offset + 1];
+}
+
 static inline u32 execute(lightrec_int_func_t func, struct interpreter *inter)
 {
        return (*func)(inter);
 }
 
+static inline u32 lightrec_int_op(struct interpreter *inter)
+{
+       return execute(int_standard[inter->op->i.op], inter);
+}
+
 static inline u32 jump_skip(struct interpreter *inter)
 {
-       inter->op = inter->op->next;
+       inter->op = next_op(inter);
+       inter->offset++;
 
-       return execute(int_standard[inter->op->i.op], inter);
+       if (inter->op->flags & LIGHTREC_SYNC) {
+               inter->state->current_cycle += inter->cycles;
+               inter->cycles = 0;
+       }
+
+       return lightrec_int_op(inter);
 }
 
 static inline u32 jump_next(struct interpreter *inter)
@@ -70,7 +88,8 @@ static inline u32 jump_after_branch(struct interpreter *inter)
        if (unlikely(inter->delay_slot))
                return 0;
 
-       inter->op = inter->op->next;
+       inter->op = next_op(inter);
+       inter->offset++;
 
        return jump_skip(inter);
 }
@@ -84,7 +103,7 @@ static void update_cycles_before_branch(struct interpreter *inter)
 
                if (has_delay_slot(inter->op->c) &&
                    !(inter->op->flags & LIGHTREC_NO_DS))
-                       cycles += lightrec_cycles_of_opcode(inter->op->next->c);
+                       cycles += lightrec_cycles_of_opcode(next_op(inter)->c);
 
                inter->cycles += cycles;
                inter->state->current_cycle += inter->cycles;
@@ -101,10 +120,8 @@ static bool is_branch_taken(const u32 *reg_cache, union code op)
        case OP_JAL:
                return true;
        case OP_BEQ:
-       case OP_META_BEQZ:
                return reg_cache[op.r.rs] == reg_cache[op.r.rt];
        case OP_BNE:
-       case OP_META_BNEZ:
                return reg_cache[op.r.rs] != reg_cache[op.r.rt];
        case OP_REGIMM:
                switch (op.r.rt) {
@@ -125,8 +142,8 @@ static bool is_branch_taken(const u32 *reg_cache, union code op)
 static u32 int_delay_slot(struct interpreter *inter, u32 pc, bool branch)
 {
        struct lightrec_state *state = inter->state;
-       u32 *reg_cache = state->native_reg_cache;
-       struct opcode new_op, *op = inter->op->next;
+       u32 *reg_cache = state->regs.gpr;
+       struct opcode new_op, *op = next_op(inter);
        union code op_next;
        struct interpreter inter2 = {
                .state = state,
@@ -150,8 +167,8 @@ static u32 int_delay_slot(struct interpreter *inter, u32 pc, bool branch)
                 * but on branch boundaries, we need to adjust the return
                 * address so that the GTE opcode is effectively executed.
                 */
-               cause = (*state->ops.cop0_ops.cfc)(state, op->c.opcode, 13);
-               epc = (*state->ops.cop0_ops.cfc)(state, op->c.opcode, 14);
+               cause = state->regs.cp0[13];
+               epc = state->regs.cp0[14];
 
                if (!(cause & 0x7c) && epc == pc - 4)
                        pc -= 4;
@@ -223,12 +240,10 @@ static u32 int_delay_slot(struct interpreter *inter, u32 pc, bool branch)
                } else {
                        new_op.c = op_next;
                        new_op.flags = 0;
-                       new_op.offset = 0;
-                       new_op.next = NULL;
                        inter2.op = &new_op;
 
                        /* Execute the first opcode of the next block */
-                       (*int_standard[inter2.op->i.op])(&inter2);
+                       lightrec_int_op(&inter2);
 
                        if (save_rs) {
                                new_rs = reg_cache[op->r.rs];
@@ -238,8 +253,7 @@ static u32 int_delay_slot(struct interpreter *inter, u32 pc, bool branch)
                        inter->cycles += lightrec_cycles_of_opcode(op_next);
                }
        } else {
-               next_pc = inter->block->pc
-                       + (inter->op->offset + 2) * sizeof(u32);
+               next_pc = int_get_ds_pc(inter, 2);
        }
 
        inter2.block = inter->block;
@@ -250,7 +264,7 @@ static u32 int_delay_slot(struct interpreter *inter, u32 pc, bool branch)
                new_rt = reg_cache[op->r.rt];
 
        /* Execute delay slot opcode */
-       ds_next_pc = (*int_standard[inter2.op->i.op])(&inter2);
+       ds_next_pc = lightrec_int_op(&inter2);
 
        if (branch_at_addr) {
                if (op_next.i.op == OP_SPECIAL)
@@ -286,8 +300,6 @@ static u32 int_delay_slot(struct interpreter *inter, u32 pc, bool branch)
 
                new_op.c = op_next;
                new_op.flags = 0;
-               new_op.offset = sizeof(u32);
-               new_op.next = NULL;
                inter2.op = &new_op;
                inter2.block = NULL;
 
@@ -295,7 +307,7 @@ static u32 int_delay_slot(struct interpreter *inter, u32 pc, bool branch)
 
                pr_debug("Running delay slot of branch at target of impossible "
                         "branch\n");
-               (*int_standard[inter2.op->i.op])(&inter2);
+               lightrec_int_op(&inter2);
        }
 
        return next_pc;
@@ -311,11 +323,11 @@ static u32 int_unimplemented(struct interpreter *inter)
 static u32 int_jump(struct interpreter *inter, bool link)
 {
        struct lightrec_state *state = inter->state;
-       u32 old_pc = inter->block->pc + inter->op->offset * sizeof(u32);
+       u32 old_pc = int_get_branch_pc(inter);
        u32 pc = (old_pc & 0xf0000000) | (inter->op->j.imm << 2);
 
        if (link)
-               state->native_reg_cache[31] = old_pc + 8;
+               state->regs.gpr[31] = old_pc + 8;
 
        if (inter->op->flags & LIGHTREC_NO_DS)
                return pc;
@@ -336,11 +348,11 @@ static u32 int_JAL(struct interpreter *inter)
 static u32 int_jumpr(struct interpreter *inter, u8 link_reg)
 {
        struct lightrec_state *state = inter->state;
-       u32 old_pc, next_pc = state->native_reg_cache[inter->op->r.rs];
+       u32 old_pc, next_pc = state->regs.gpr[inter->op->r.rs];
 
        if (link_reg) {
-               old_pc = inter->block->pc + inter->op->offset * sizeof(u32);
-               state->native_reg_cache[link_reg] = old_pc + 8;
+               old_pc = int_get_branch_pc(inter);
+               state->regs.gpr[link_reg] = old_pc + 8;
        }
 
        if (inter->op->flags & LIGHTREC_NO_DS)
@@ -365,7 +377,7 @@ static u32 int_do_branch(struct interpreter *inter, u32 old_pc, u32 next_pc)
            (inter->op->flags & LIGHTREC_LOCAL_BRANCH) &&
            (s16)inter->op->c.i.imm >= 0) {
                next_pc = old_pc + ((1 + (s16)inter->op->c.i.imm) << 2);
-               next_pc = lightrec_emulate_block(inter->block, next_pc);
+               next_pc = lightrec_emulate_block(inter->state, inter->block, next_pc);
        }
 
        return next_pc;
@@ -399,10 +411,10 @@ static u32 int_branch(struct interpreter *inter, u32 pc,
 
 static u32 int_beq(struct interpreter *inter, bool bne)
 {
-       u32 rs, rt, old_pc = inter->block->pc + inter->op->offset * sizeof(u32);
+       u32 rs, rt, old_pc = int_get_branch_pc(inter);
 
-       rs = inter->state->native_reg_cache[inter->op->i.rs];
-       rt = inter->state->native_reg_cache[inter->op->i.rt];
+       rs = inter->state->regs.gpr[inter->op->i.rs];
+       rt = inter->state->regs.gpr[inter->op->i.rt];
 
        return int_branch(inter, old_pc, inter->op->c, (rs == rt) ^ bne);
 }
@@ -419,13 +431,13 @@ static u32 int_BNE(struct interpreter *inter)
 
 static u32 int_bgez(struct interpreter *inter, bool link, bool lt, bool regimm)
 {
-       u32 old_pc = inter->block->pc + inter->op->offset * sizeof(u32);
+       u32 old_pc = int_get_branch_pc(inter);
        s32 rs;
 
        if (link)
-               inter->state->native_reg_cache[31] = old_pc + 8;
+               inter->state->regs.gpr[31] = old_pc + 8;
 
-       rs = (s32)inter->state->native_reg_cache[inter->op->i.rs];
+       rs = (s32)inter->state->regs.gpr[inter->op->i.rs];
 
        return int_branch(inter, old_pc, inter->op->c,
                          ((regimm && !rs) || rs > 0) ^ lt);
@@ -470,7 +482,7 @@ static u32 int_cfc(struct interpreter *inter)
        val = lightrec_mfc(state, op->c);
 
        if (likely(op->r.rt))
-               state->native_reg_cache[op->r.rt] = val;
+               state->regs.gpr[op->r.rt] = val;
 
        return jump_next(inter);
 }
@@ -480,54 +492,35 @@ static u32 int_ctc(struct interpreter *inter)
        struct lightrec_state *state = inter->state;
        const struct opcode *op = inter->op;
 
-       lightrec_mtc(state, op->c, state->native_reg_cache[op->r.rt]);
+       lightrec_mtc(state, op->c, state->regs.gpr[op->r.rt]);
 
        /* If we have a MTC0 or CTC0 to CP0 register 12 (Status) or 13 (Cause),
         * return early so that the emulator will be able to check software
         * interrupt status. */
        if (!(inter->op->flags & LIGHTREC_NO_DS) &&
            op->i.op == OP_CP0 && (op->r.rd == 12 || op->r.rd == 13))
-               return inter->block->pc + (op->offset + 1) * sizeof(u32);
+               return int_get_ds_pc(inter, 1);
        else
                return jump_next(inter);
 }
 
 static u32 int_cp0_RFE(struct interpreter *inter)
 {
-       struct lightrec_state *state = inter->state;
-       u32 status;
-
-       /* Read CP0 Status register (r12) */
-       status = state->ops.cop0_ops.mfc(state, inter->op->c.opcode, 12);
-
-       /* Switch the bits */
-       status = ((status & 0x3c) >> 2) | (status & ~0xf);
-
-       /* Write it back */
-       state->ops.cop0_ops.ctc(state, inter->op->c.opcode, 12, status);
+       lightrec_rfe(inter->state);
 
        return jump_next(inter);
 }
 
 static u32 int_CP(struct interpreter *inter)
 {
-       struct lightrec_state *state = inter->state;
-       const struct lightrec_cop_ops *ops;
-       const struct opcode *op = inter->op;
-
-       if ((op->j.imm >> 25) & 1)
-               ops = &state->ops.cop2_ops;
-       else
-               ops = &state->ops.cop0_ops;
-
-       (*ops->op)(state, (op->j.imm) & ~(1 << 25));
+       lightrec_cp(inter->state, inter->op->c);
 
        return jump_next(inter);
 }
 
 static u32 int_ADDI(struct interpreter *inter)
 {
-       u32 *reg_cache = inter->state->native_reg_cache;
+       u32 *reg_cache = inter->state->regs.gpr;
        struct opcode_i *op = &inter->op->i;
 
        if (likely(op->rt))
@@ -538,7 +531,7 @@ static u32 int_ADDI(struct interpreter *inter)
 
 static u32 int_SLTI(struct interpreter *inter)
 {
-       u32 *reg_cache = inter->state->native_reg_cache;
+       u32 *reg_cache = inter->state->regs.gpr;
        struct opcode_i *op = &inter->op->i;
 
        if (likely(op->rt))
@@ -549,7 +542,7 @@ static u32 int_SLTI(struct interpreter *inter)
 
 static u32 int_SLTIU(struct interpreter *inter)
 {
-       u32 *reg_cache = inter->state->native_reg_cache;
+       u32 *reg_cache = inter->state->regs.gpr;
        struct opcode_i *op = &inter->op->i;
 
        if (likely(op->rt))
@@ -560,7 +553,7 @@ static u32 int_SLTIU(struct interpreter *inter)
 
 static u32 int_ANDI(struct interpreter *inter)
 {
-       u32 *reg_cache = inter->state->native_reg_cache;
+       u32 *reg_cache = inter->state->regs.gpr;
        struct opcode_i *op = &inter->op->i;
 
        if (likely(op->rt))
@@ -571,7 +564,7 @@ static u32 int_ANDI(struct interpreter *inter)
 
 static u32 int_ORI(struct interpreter *inter)
 {
-       u32 *reg_cache = inter->state->native_reg_cache;
+       u32 *reg_cache = inter->state->regs.gpr;
        struct opcode_i *op = &inter->op->i;
 
        if (likely(op->rt))
@@ -582,7 +575,7 @@ static u32 int_ORI(struct interpreter *inter)
 
 static u32 int_XORI(struct interpreter *inter)
 {
-       u32 *reg_cache = inter->state->native_reg_cache;
+       u32 *reg_cache = inter->state->regs.gpr;
        struct opcode_i *op = &inter->op->i;
 
        if (likely(op->rt))
@@ -595,7 +588,7 @@ static u32 int_LUI(struct interpreter *inter)
 {
        struct opcode_i *op = &inter->op->i;
 
-       inter->state->native_reg_cache[op->rt] = op->imm << 16;
+       inter->state->regs.gpr[op->rt] = op->imm << 16;
 
        return jump_next(inter);
 }
@@ -603,12 +596,12 @@ static u32 int_LUI(struct interpreter *inter)
 static u32 int_io(struct interpreter *inter, bool is_load)
 {
        struct opcode_i *op = &inter->op->i;
-       u32 *reg_cache = inter->state->native_reg_cache;
+       u32 *reg_cache = inter->state->regs.gpr;
        u32 val;
 
        val = lightrec_rw(inter->state, inter->op->c,
                          reg_cache[op->rs], reg_cache[op->rt],
-                         &inter->op->flags);
+                         &inter->op->flags, inter->block);
 
        if (is_load && op->rt)
                reg_cache[op->rt] = val;
@@ -629,11 +622,11 @@ static u32 int_store(struct interpreter *inter)
                return int_io(inter, false);
 
        lightrec_rw(inter->state, inter->op->c,
-                   inter->state->native_reg_cache[inter->op->i.rs],
-                   inter->state->native_reg_cache[inter->op->i.rt],
-                   &inter->op->flags);
+                   inter->state->regs.gpr[inter->op->i.rs],
+                   inter->state->regs.gpr[inter->op->i.rt],
+                   &inter->op->flags, inter->block);
 
-       next_pc = inter->block->pc + (inter->op->offset + 1) * 4;
+       next_pc = int_get_ds_pc(inter, 1);
 
        /* Invalidate next PC, to force the rest of the block to be rebuilt */
        lightrec_invalidate(inter->state, next_pc, 4);
@@ -652,8 +645,8 @@ static u32 int_special_SLL(struct interpreter *inter)
        u32 rt;
 
        if (op->opcode) { /* Handle NOPs */
-               rt = inter->state->native_reg_cache[op->r.rt];
-               inter->state->native_reg_cache[op->r.rd] = rt << op->r.imm;
+               rt = inter->state->regs.gpr[op->r.rt];
+               inter->state->regs.gpr[op->r.rd] = rt << op->r.imm;
        }
 
        return jump_next(inter);
@@ -662,9 +655,9 @@ static u32 int_special_SLL(struct interpreter *inter)
 static u32 int_special_SRL(struct interpreter *inter)
 {
        struct opcode *op = inter->op;
-       u32 rt = inter->state->native_reg_cache[op->r.rt];
+       u32 rt = inter->state->regs.gpr[op->r.rt];
 
-       inter->state->native_reg_cache[op->r.rd] = rt >> op->r.imm;
+       inter->state->regs.gpr[op->r.rd] = rt >> op->r.imm;
 
        return jump_next(inter);
 }
@@ -672,9 +665,9 @@ static u32 int_special_SRL(struct interpreter *inter)
 static u32 int_special_SRA(struct interpreter *inter)
 {
        struct opcode *op = inter->op;
-       s32 rt = inter->state->native_reg_cache[op->r.rt];
+       s32 rt = inter->state->regs.gpr[op->r.rt];
 
-       inter->state->native_reg_cache[op->r.rd] = rt >> op->r.imm;
+       inter->state->regs.gpr[op->r.rd] = rt >> op->r.imm;
 
        return jump_next(inter);
 }
@@ -682,10 +675,10 @@ static u32 int_special_SRA(struct interpreter *inter)
 static u32 int_special_SLLV(struct interpreter *inter)
 {
        struct opcode *op = inter->op;
-       u32 rs = inter->state->native_reg_cache[op->r.rs];
-       u32 rt = inter->state->native_reg_cache[op->r.rt];
+       u32 rs = inter->state->regs.gpr[op->r.rs];
+       u32 rt = inter->state->regs.gpr[op->r.rt];
 
-       inter->state->native_reg_cache[op->r.rd] = rt << (rs & 0x1f);
+       inter->state->regs.gpr[op->r.rd] = rt << (rs & 0x1f);
 
        return jump_next(inter);
 }
@@ -693,10 +686,10 @@ static u32 int_special_SLLV(struct interpreter *inter)
 static u32 int_special_SRLV(struct interpreter *inter)
 {
        struct opcode *op = inter->op;
-       u32 rs = inter->state->native_reg_cache[op->r.rs];
-       u32 rt = inter->state->native_reg_cache[op->r.rt];
+       u32 rs = inter->state->regs.gpr[op->r.rs];
+       u32 rt = inter->state->regs.gpr[op->r.rt];
 
-       inter->state->native_reg_cache[op->r.rd] = rt >> (rs & 0x1f);
+       inter->state->regs.gpr[op->r.rd] = rt >> (rs & 0x1f);
 
        return jump_next(inter);
 }
@@ -704,10 +697,10 @@ static u32 int_special_SRLV(struct interpreter *inter)
 static u32 int_special_SRAV(struct interpreter *inter)
 {
        struct opcode *op = inter->op;
-       u32 rs = inter->state->native_reg_cache[op->r.rs];
-       s32 rt = inter->state->native_reg_cache[op->r.rt];
+       u32 rs = inter->state->regs.gpr[op->r.rs];
+       s32 rt = inter->state->regs.gpr[op->r.rt];
 
-       inter->state->native_reg_cache[op->r.rd] = rt >> (rs & 0x1f);
+       inter->state->regs.gpr[op->r.rd] = rt >> (rs & 0x1f);
 
        return jump_next(inter);
 }
@@ -720,12 +713,12 @@ static u32 int_syscall_break(struct interpreter *inter)
        else
                inter->state->exit_flags |= LIGHTREC_EXIT_SYSCALL;
 
-       return inter->block->pc + inter->op->offset * sizeof(u32);
+       return int_get_ds_pc(inter, 0);
 }
 
 static u32 int_special_MFHI(struct interpreter *inter)
 {
-       u32 *reg_cache = inter->state->native_reg_cache;
+       u32 *reg_cache = inter->state->regs.gpr;
        struct opcode_r *op = &inter->op->r;
 
        if (likely(op->rd))
@@ -736,7 +729,7 @@ static u32 int_special_MFHI(struct interpreter *inter)
 
 static u32 int_special_MTHI(struct interpreter *inter)
 {
-       u32 *reg_cache = inter->state->native_reg_cache;
+       u32 *reg_cache = inter->state->regs.gpr;
 
        reg_cache[REG_HI] = reg_cache[inter->op->r.rs];
 
@@ -745,7 +738,7 @@ static u32 int_special_MTHI(struct interpreter *inter)
 
 static u32 int_special_MFLO(struct interpreter *inter)
 {
-       u32 *reg_cache = inter->state->native_reg_cache;
+       u32 *reg_cache = inter->state->regs.gpr;
        struct opcode_r *op = &inter->op->r;
 
        if (likely(op->rd))
@@ -756,7 +749,7 @@ static u32 int_special_MFLO(struct interpreter *inter)
 
 static u32 int_special_MTLO(struct interpreter *inter)
 {
-       u32 *reg_cache = inter->state->native_reg_cache;
+       u32 *reg_cache = inter->state->regs.gpr;
 
        reg_cache[REG_LO] = reg_cache[inter->op->r.rs];
 
@@ -765,61 +758,70 @@ static u32 int_special_MTLO(struct interpreter *inter)
 
 static u32 int_special_MULT(struct interpreter *inter)
 {
-       u32 *reg_cache = inter->state->native_reg_cache;
+       u32 *reg_cache = inter->state->regs.gpr;
        s32 rs = reg_cache[inter->op->r.rs];
        s32 rt = reg_cache[inter->op->r.rt];
+       u8 reg_lo = get_mult_div_lo(inter->op->c);
+       u8 reg_hi = get_mult_div_hi(inter->op->c);
        u64 res = (s64)rs * (s64)rt;
 
-       if (!(inter->op->flags & LIGHTREC_MULT32))
-               reg_cache[REG_HI] = res >> 32;
-       reg_cache[REG_LO] = res;
+       if (!(inter->op->flags & LIGHTREC_NO_HI))
+               reg_cache[reg_hi] = res >> 32;
+       if (!(inter->op->flags & LIGHTREC_NO_LO))
+               reg_cache[reg_lo] = res;
 
        return jump_next(inter);
 }
 
 static u32 int_special_MULTU(struct interpreter *inter)
 {
-       u32 *reg_cache = inter->state->native_reg_cache;
+       u32 *reg_cache = inter->state->regs.gpr;
        u32 rs = reg_cache[inter->op->r.rs];
        u32 rt = reg_cache[inter->op->r.rt];
+       u8 reg_lo = get_mult_div_lo(inter->op->c);
+       u8 reg_hi = get_mult_div_hi(inter->op->c);
        u64 res = (u64)rs * (u64)rt;
 
-       if (!(inter->op->flags & LIGHTREC_MULT32))
-               reg_cache[REG_HI] = res >> 32;
-       reg_cache[REG_LO] = res;
+       if (!(inter->op->flags & LIGHTREC_NO_HI))
+               reg_cache[reg_hi] = res >> 32;
+       if (!(inter->op->flags & LIGHTREC_NO_LO))
+               reg_cache[reg_lo] = res;
 
        return jump_next(inter);
 }
 
 static u32 int_special_DIV(struct interpreter *inter)
 {
-       u32 *reg_cache = inter->state->native_reg_cache;
+       u32 *reg_cache = inter->state->regs.gpr;
        s32 rs = reg_cache[inter->op->r.rs];
        s32 rt = reg_cache[inter->op->r.rt];
+       u8 reg_lo = get_mult_div_lo(inter->op->c);
+       u8 reg_hi = get_mult_div_hi(inter->op->c);
        u32 lo, hi;
 
        if (rt == 0) {
                hi = rs;
                lo = (rs < 0) * 2 - 1;
-       } else if ((rs == 0x80000000) && (rt == 0xFFFFFFFF)) {
-               lo = rs;
-               hi = 0;
        } else {
                lo = rs / rt;
                hi = rs % rt;
        }
 
-       reg_cache[REG_HI] = hi;
-       reg_cache[REG_LO] = lo;
+       if (!(inter->op->flags & LIGHTREC_NO_HI))
+               reg_cache[reg_hi] = hi;
+       if (!(inter->op->flags & LIGHTREC_NO_LO))
+               reg_cache[reg_lo] = lo;
 
        return jump_next(inter);
 }
 
 static u32 int_special_DIVU(struct interpreter *inter)
 {
-       u32 *reg_cache = inter->state->native_reg_cache;
+       u32 *reg_cache = inter->state->regs.gpr;
        u32 rs = reg_cache[inter->op->r.rs];
        u32 rt = reg_cache[inter->op->r.rt];
+       u8 reg_lo = get_mult_div_lo(inter->op->c);
+       u8 reg_hi = get_mult_div_hi(inter->op->c);
        u32 lo, hi;
 
        if (rt == 0) {
@@ -830,15 +832,17 @@ static u32 int_special_DIVU(struct interpreter *inter)
                hi = rs % rt;
        }
 
-       reg_cache[REG_HI] = hi;
-       reg_cache[REG_LO] = lo;
+       if (!(inter->op->flags & LIGHTREC_NO_HI))
+               reg_cache[reg_hi] = hi;
+       if (!(inter->op->flags & LIGHTREC_NO_LO))
+               reg_cache[reg_lo] = lo;
 
        return jump_next(inter);
 }
 
 static u32 int_special_ADD(struct interpreter *inter)
 {
-       u32 *reg_cache = inter->state->native_reg_cache;
+       u32 *reg_cache = inter->state->regs.gpr;
        struct opcode_r *op = &inter->op->r;
        s32 rs = reg_cache[op->rs];
        s32 rt = reg_cache[op->rt];
@@ -851,7 +855,7 @@ static u32 int_special_ADD(struct interpreter *inter)
 
 static u32 int_special_SUB(struct interpreter *inter)
 {
-       u32 *reg_cache = inter->state->native_reg_cache;
+       u32 *reg_cache = inter->state->regs.gpr;
        struct opcode_r *op = &inter->op->r;
        u32 rs = reg_cache[op->rs];
        u32 rt = reg_cache[op->rt];
@@ -864,7 +868,7 @@ static u32 int_special_SUB(struct interpreter *inter)
 
 static u32 int_special_AND(struct interpreter *inter)
 {
-       u32 *reg_cache = inter->state->native_reg_cache;
+       u32 *reg_cache = inter->state->regs.gpr;
        struct opcode_r *op = &inter->op->r;
        u32 rs = reg_cache[op->rs];
        u32 rt = reg_cache[op->rt];
@@ -877,7 +881,7 @@ static u32 int_special_AND(struct interpreter *inter)
 
 static u32 int_special_OR(struct interpreter *inter)
 {
-       u32 *reg_cache = inter->state->native_reg_cache;
+       u32 *reg_cache = inter->state->regs.gpr;
        struct opcode_r *op = &inter->op->r;
        u32 rs = reg_cache[op->rs];
        u32 rt = reg_cache[op->rt];
@@ -890,7 +894,7 @@ static u32 int_special_OR(struct interpreter *inter)
 
 static u32 int_special_XOR(struct interpreter *inter)
 {
-       u32 *reg_cache = inter->state->native_reg_cache;
+       u32 *reg_cache = inter->state->regs.gpr;
        struct opcode_r *op = &inter->op->r;
        u32 rs = reg_cache[op->rs];
        u32 rt = reg_cache[op->rt];
@@ -903,7 +907,7 @@ static u32 int_special_XOR(struct interpreter *inter)
 
 static u32 int_special_NOR(struct interpreter *inter)
 {
-       u32 *reg_cache = inter->state->native_reg_cache;
+       u32 *reg_cache = inter->state->regs.gpr;
        struct opcode_r *op = &inter->op->r;
        u32 rs = reg_cache[op->rs];
        u32 rt = reg_cache[op->rt];
@@ -916,7 +920,7 @@ static u32 int_special_NOR(struct interpreter *inter)
 
 static u32 int_special_SLT(struct interpreter *inter)
 {
-       u32 *reg_cache = inter->state->native_reg_cache;
+       u32 *reg_cache = inter->state->regs.gpr;
        struct opcode_r *op = &inter->op->r;
        s32 rs = reg_cache[op->rs];
        s32 rt = reg_cache[op->rt];
@@ -929,7 +933,7 @@ static u32 int_special_SLT(struct interpreter *inter)
 
 static u32 int_special_SLTU(struct interpreter *inter)
 {
-       u32 *reg_cache = inter->state->native_reg_cache;
+       u32 *reg_cache = inter->state->regs.gpr;
        struct opcode_r *op = &inter->op->r;
        u32 rs = reg_cache[op->rs];
        u32 rt = reg_cache[op->rt];
@@ -940,14 +944,9 @@ static u32 int_special_SLTU(struct interpreter *inter)
        return jump_next(inter);
 }
 
-static u32 int_META_SKIP(struct interpreter *inter)
-{
-       return jump_skip(inter);
-}
-
 static u32 int_META_MOV(struct interpreter *inter)
 {
-       u32 *reg_cache = inter->state->native_reg_cache;
+       u32 *reg_cache = inter->state->regs.gpr;
        struct opcode_r *op = &inter->op->r;
 
        if (likely(op->rd))
@@ -956,15 +955,30 @@ static u32 int_META_MOV(struct interpreter *inter)
        return jump_next(inter);
 }
 
-static u32 int_META_SYNC(struct interpreter *inter)
+static u32 int_META_EXTC(struct interpreter *inter)
 {
-       inter->state->current_cycle += inter->cycles;
-       inter->cycles = 0;
+       u32 *reg_cache = inter->state->regs.gpr;
+       struct opcode_i *op = &inter->op->i;
 
-       return jump_skip(inter);
+       if (likely(op->rt))
+               reg_cache[op->rt] = (u32)(s32)(s8)reg_cache[op->rs];
+
+       return jump_next(inter);
+}
+
+static u32 int_META_EXTS(struct interpreter *inter)
+{
+       u32 *reg_cache = inter->state->regs.gpr;
+       struct opcode_i *op = &inter->op->i;
+
+       if (likely(op->rt))
+               reg_cache[op->rt] = (u32)(s32)(s16)reg_cache[op->rs];
+
+       return jump_next(inter);
 }
 
 static const lightrec_int_func_t int_standard[64] = {
+       SET_DEFAULT_ELM(int_standard, int_unimplemented),
        [OP_SPECIAL]            = int_SPECIAL,
        [OP_REGIMM]             = int_REGIMM,
        [OP_J]                  = int_J,
@@ -998,14 +1012,13 @@ static const lightrec_int_func_t int_standard[64] = {
        [OP_LWC2]               = int_LWC2,
        [OP_SWC2]               = int_store,
 
-       [OP_META_REG_UNLOAD]    = int_META_SKIP,
-       [OP_META_BEQZ]          = int_BEQ,
-       [OP_META_BNEZ]          = int_BNE,
        [OP_META_MOV]           = int_META_MOV,
-       [OP_META_SYNC]          = int_META_SYNC,
+       [OP_META_EXTC]          = int_META_EXTC,
+       [OP_META_EXTS]          = int_META_EXTS,
 };
 
 static const lightrec_int_func_t int_special[64] = {
+       SET_DEFAULT_ELM(int_special, int_unimplemented),
        [OP_SPECIAL_SLL]        = int_special_SLL,
        [OP_SPECIAL_SRL]        = int_special_SRL,
        [OP_SPECIAL_SRA]        = int_special_SRA,
@@ -1037,6 +1050,7 @@ static const lightrec_int_func_t int_special[64] = {
 };
 
 static const lightrec_int_func_t int_regimm[64] = {
+       SET_DEFAULT_ELM(int_regimm, int_unimplemented),
        [OP_REGIMM_BLTZ]        = int_regimm_BLTZ,
        [OP_REGIMM_BGEZ]        = int_regimm_BGEZ,
        [OP_REGIMM_BLTZAL]      = int_regimm_BLTZAL,
@@ -1044,6 +1058,7 @@ static const lightrec_int_func_t int_regimm[64] = {
 };
 
 static const lightrec_int_func_t int_cp0[64] = {
+       SET_DEFAULT_ELM(int_cp0, int_CP),
        [OP_CP0_MFC0]           = int_cfc,
        [OP_CP0_CFC0]           = int_cfc,
        [OP_CP0_MTC0]           = int_ctc,
@@ -1052,6 +1067,7 @@ static const lightrec_int_func_t int_cp0[64] = {
 };
 
 static const lightrec_int_func_t int_cp2_basic[64] = {
+       SET_DEFAULT_ELM(int_cp2_basic, int_CP),
        [OP_CP2_BASIC_MFC2]     = int_cfc,
        [OP_CP2_BASIC_CFC2]     = int_cfc,
        [OP_CP2_BASIC_MTC2]     = int_ctc,
@@ -1061,54 +1077,54 @@ static const lightrec_int_func_t int_cp2_basic[64] = {
 static u32 int_SPECIAL(struct interpreter *inter)
 {
        lightrec_int_func_t f = int_special[inter->op->r.op];
-       if (likely(f))
-               return execute(f, inter);
-       else
+
+       if (!HAS_DEFAULT_ELM && unlikely(!f))
                return int_unimplemented(inter);
+
+       return execute(f, inter);
 }
 
 static u32 int_REGIMM(struct interpreter *inter)
 {
        lightrec_int_func_t f = int_regimm[inter->op->r.rt];
-       if (likely(f))
-               return execute(f, inter);
-       else
+
+       if (!HAS_DEFAULT_ELM && unlikely(!f))
                return int_unimplemented(inter);
+
+       return execute(f, inter);
 }
 
 static u32 int_CP0(struct interpreter *inter)
 {
        lightrec_int_func_t f = int_cp0[inter->op->r.rs];
-       if (likely(f))
-               return execute(f, inter);
-       else
+
+       if (!HAS_DEFAULT_ELM && unlikely(!f))
                return int_CP(inter);
+
+       return execute(f, inter);
 }
 
 static u32 int_CP2(struct interpreter *inter)
 {
        if (inter->op->r.op == OP_CP2_BASIC) {
                lightrec_int_func_t f = int_cp2_basic[inter->op->r.rs];
-               if (likely(f))
+               if (HAS_DEFAULT_ELM || likely(f))
                        return execute(f, inter);
        }
 
        return int_CP(inter);
 }
 
-static u32 lightrec_int_op(struct interpreter *inter)
-{
-       return execute(int_standard[inter->op->i.op], inter);
-}
-
-static u32 lightrec_emulate_block_list(struct block *block, struct opcode *op)
+static u32 lightrec_emulate_block_list(struct lightrec_state *state,
+                                      struct block *block, u32 offset)
 {
        struct interpreter inter;
        u32 pc;
 
        inter.block = block;
-       inter.state = block->state;
-       inter.op = op;
+       inter.state = state;
+       inter.offset = offset;
+       inter.op = &block->opcode_list[offset];
        inter.cycles = 0;
        inter.delay_slot = false;
 
@@ -1117,20 +1133,17 @@ static u32 lightrec_emulate_block_list(struct block *block, struct opcode *op)
        /* Add the cycles of the last branch */
        inter.cycles += lightrec_cycles_of_opcode(inter.op->c);
 
-       block->state->current_cycle += inter.cycles;
+       state->current_cycle += inter.cycles;
 
        return pc;
 }
 
-u32 lightrec_emulate_block(struct block *block, u32 pc)
+u32 lightrec_emulate_block(struct lightrec_state *state, struct block *block, u32 pc)
 {
        u32 offset = (kunseg(pc) - kunseg(block->pc)) >> 2;
-       struct opcode *op;
 
-       for (op = block->opcode_list;
-            op && (op->offset < offset); op = op->next);
-       if (op)
-               return lightrec_emulate_block_list(block, op);
+       if (offset < block->nb_ops)
+               return lightrec_emulate_block_list(state, block, offset);
 
        pr_err("PC 0x%x is outside block at PC 0x%x\n", pc, block->pc);
 
index 2113779..96600bf 100644 (file)
@@ -1,15 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
 /*
- * Copyright (C) 2019-2020 Paul Cercueil <paul@crapouillou.net>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
+ * Copyright (C) 2019-2021 Paul Cercueil <paul@crapouillou.net>
  */
 
 #ifndef __LIGHTREC_INTERPRETER_H__
@@ -19,6 +10,6 @@
 
 struct block;
 
-u32 lightrec_emulate_block(struct block *block, u32 pc);
+u32 lightrec_emulate_block(struct lightrec_state *state, struct block *block, u32 pc);
 
 #endif /* __LIGHTREC_INTERPRETER_H__ */
diff --git a/deps/lightrec/lightning-wrapper.h b/deps/lightrec/lightning-wrapper.h
new file mode 100644 (file)
index 0000000..7eeb15f
--- /dev/null
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/*
+ * Copyright (C) 2022 Paul Cercueil <paul@crapouillou.net>
+ */
+
+#ifndef __LIGHTNING_WRAPPER_H__
+#define __LIGHTNING_WRAPPER_H__
+
+#include <lightning.h>
+
+#if __WORDSIZE == 32
+
+#define jit_ldxi_ui(u,v,w)     jit_ldxi_i(u,v,w)
+#define jit_stxi_ui(u,v,w)     jit_stxi_i(u,v,w)
+#define jit_extr_i(u,v)                jit_movr(u,v)
+#define jit_extr_ui(u,v)       jit_movr(u,v)
+#define jit_retval_ui(u)       jit_retval(u)
+#define jit_getarg_ui(u,v)     jit_getarg_i(u,v)
+
+#endif
+
+#endif /* __LIGHTNING_WRAPPER_H__ */
diff --git a/deps/lightrec/lightrec-config.h.cmakein b/deps/lightrec/lightrec-config.h.cmakein
new file mode 100644 (file)
index 0000000..3cef2b8
--- /dev/null
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/*
+ * Copyright (C) 2019-2021 Paul Cercueil <paul@crapouillou.net>
+ */
+
+#ifndef __LIGHTREC_CONFIG_H__
+#define __LIGHTREC_CONFIG_H__
+
+#cmakedefine01 ENABLE_THREADED_COMPILER
+#cmakedefine01 ENABLE_FIRST_PASS
+#cmakedefine01 ENABLE_DISASSEMBLER
+#cmakedefine01 ENABLE_TINYMM
+
+#cmakedefine01 HAS_DEFAULT_ELM
+
+#cmakedefine01 OPT_REMOVE_DIV_BY_ZERO_SEQ
+#cmakedefine01 OPT_REPLACE_MEMSET
+#cmakedefine01 OPT_DETECT_IMPOSSIBLE_BRANCHES
+#cmakedefine01 OPT_TRANSFORM_OPS
+#cmakedefine01 OPT_LOCAL_BRANCHES
+#cmakedefine01 OPT_SWITCH_DELAY_SLOTS
+#cmakedefine01 OPT_FLAG_STORES
+#cmakedefine01 OPT_FLAG_IO
+#cmakedefine01 OPT_FLAG_MULT_DIV
+#cmakedefine01 OPT_EARLY_UNLOAD
+
+#endif /* __LIGHTREC_CONFIG_H__ */
+
index 6304515..e9efcb5 100644 (file)
@@ -1,21 +1,12 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
 /*
- * Copyright (C) 2016-2020 Paul Cercueil <paul@crapouillou.net>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
+ * Copyright (C) 2016-2021 Paul Cercueil <paul@crapouillou.net>
  */
 
 #ifndef __LIGHTREC_PRIVATE_H__
 #define __LIGHTREC_PRIVATE_H__
 
-#include "config.h"
+#include "lightrec-config.h"
 #include "disassembler.h"
 #include "lightrec.h"
 
@@ -24,7 +15,6 @@
 #endif
 
 #define ARRAY_SIZE(x) (sizeof(x) ? sizeof(x) / sizeof((x)[0]) : 0)
-#define BIT(x) (1 << (x))
 
 #ifdef __GNUC__
 #      define likely(x)       __builtin_expect(!!(x),1)
 #      define HTOLE16(x)       (x)
 #endif
 
+#if HAS_DEFAULT_ELM
+#define SET_DEFAULT_ELM(table, value) [0 ... ARRAY_SIZE(table) - 1] = value
+#else
+#define SET_DEFAULT_ELM(table, value) [0] = NULL
+#endif
+
 /* Flags for (struct block *)->flags */
 #define BLOCK_NEVER_COMPILE    BIT(0)
 #define BLOCK_SHOULD_RECOMPILE BIT(1)
 #define BLOCK_FULLY_TAGGED     BIT(2)
 #define BLOCK_IS_DEAD          BIT(3)
+#define BLOCK_IS_MEMSET                BIT(4)
 
 #define RAM_SIZE       0x200000
 #define BIOS_SIZE      0x80000
 
 #define CODE_LUT_SIZE  ((RAM_SIZE + BIOS_SIZE) >> 2)
 
+#define REG_LO 32
+#define REG_HI 33
+
 /* Definition of jit_state_t (avoids inclusion of <lightning.h>) */
 struct jit_node;
 struct jit_state;
@@ -71,19 +71,18 @@ struct reaper;
 
 struct block {
        jit_state_t *_jit;
-       struct lightrec_state *state;
        struct opcode *opcode_list;
        void (*function)(void);
+       const u32 *code;
+       struct block *next;
        u32 pc;
        u32 hash;
+       unsigned int code_size;
+       u16 nb_ops;
+       u8 flags;
 #if ENABLE_THREADED_COMPILER
        atomic_flag op_list_freed;
 #endif
-       unsigned int code_size;
-       u16 flags;
-       u16 nb_ops;
-       const struct lightrec_mem_map *map;
-       struct block *next;
 };
 
 struct lightrec_branch {
@@ -96,33 +95,50 @@ struct lightrec_branch_target {
        u32 offset;
 };
 
-struct lightrec_state {
-       u32 native_reg_cache[34];
-       u32 next_pc;
-       u32 current_cycle;
-       u32 target_cycle;
-       u32 exit_flags;
-       struct block *dispatcher, *rw_wrapper, *rw_generic_wrapper,
-                    *mfc_wrapper, *mtc_wrapper, *rfe_wrapper, *cp_wrapper,
-                    *syscall_wrapper, *break_wrapper;
-       void *rw_func, *rw_generic_func, *mfc_func, *mtc_func, *rfe_func,
-            *cp_func, *syscall_func, *break_func;
+enum c_wrappers {
+       C_WRAPPER_RW,
+       C_WRAPPER_RW_GENERIC,
+       C_WRAPPER_MFC,
+       C_WRAPPER_MTC,
+       C_WRAPPER_CP,
+       C_WRAPPER_SYSCALL,
+       C_WRAPPER_BREAK,
+       C_WRAPPERS_COUNT,
+};
+
+struct lightrec_cstate {
+       struct lightrec_state *state;
+
        struct jit_node *branches[512];
        struct lightrec_branch local_branches[512];
        struct lightrec_branch_target targets[512];
        unsigned int nb_branches;
        unsigned int nb_local_branches;
        unsigned int nb_targets;
+       unsigned int cycles;
+
+       struct regcache *reg_cache;
+};
+
+struct lightrec_state {
+       struct lightrec_registers regs;
+       u32 next_pc;
+       u32 current_cycle;
+       u32 target_cycle;
+       u32 exit_flags;
+       u32 old_cycle_counter;
+       struct block *dispatcher, *c_wrapper_block;
+       void *c_wrapper, *c_wrappers[C_WRAPPERS_COUNT];
        struct tinymm *tinymm;
        struct blockcache *block_cache;
-       struct regcache *reg_cache;
        struct recompiler *rec;
+       struct lightrec_cstate *cstate;
        struct reaper *reaper;
        void (*eob_wrapper_func)(void);
+       void (*memset_func)(void);
        void (*get_next_block)(void);
        struct lightrec_ops ops;
        unsigned int nb_precompile;
-       unsigned int cycles;
        unsigned int nb_maps;
        const struct lightrec_mem_map *maps;
        uintptr_t offset_ram, offset_bios, offset_scratch;
@@ -132,12 +148,16 @@ struct lightrec_state {
 };
 
 u32 lightrec_rw(struct lightrec_state *state, union code op,
-               u32 addr, u32 data, u16 *flags);
+               u32 addr, u32 data, u16 *flags,
+               struct block *block);
 
-void lightrec_free_block(struct block *block);
+void lightrec_free_block(struct lightrec_state *state, struct block *block);
 
 void remove_from_code_lut(struct blockcache *cache, struct block *block);
 
+const struct lightrec_mem_map *
+lightrec_get_map(struct lightrec_state *state, void **host, u32 kaddr);
+
 static inline u32 kunseg(u32 addr)
 {
        if (unlikely(addr >= 0xa0000000))
@@ -154,12 +174,48 @@ static inline u32 lut_offset(u32 pc)
                return (pc & (RAM_SIZE - 1)) >> 2; // RAM
 }
 
+static inline u32 get_ds_pc(const struct block *block, u16 offset, s16 imm)
+{
+       u16 flags = block->opcode_list[offset].flags;
+
+       offset += !!(OPT_SWITCH_DELAY_SLOTS && (flags & LIGHTREC_NO_DS));
+
+       return block->pc + (offset + imm << 2);
+}
+
+static inline u32 get_branch_pc(const struct block *block, u16 offset, s16 imm)
+{
+       u16 flags = block->opcode_list[offset].flags;
+
+       offset -= !!(OPT_SWITCH_DELAY_SLOTS && (flags & LIGHTREC_NO_DS));
+
+       return block->pc + (offset + imm << 2);
+}
+
 void lightrec_mtc(struct lightrec_state *state, union code op, u32 data);
 u32 lightrec_mfc(struct lightrec_state *state, union code op);
+void lightrec_rfe(struct lightrec_state *state);
+void lightrec_cp(struct lightrec_state *state, union code op);
+
+struct lightrec_cstate * lightrec_create_cstate(struct lightrec_state *state);
+void lightrec_free_cstate(struct lightrec_cstate *cstate);
 
 union code lightrec_read_opcode(struct lightrec_state *state, u32 pc);
 
 struct block * lightrec_get_block(struct lightrec_state *state, u32 pc);
-int lightrec_compile_block(struct block *block);
+int lightrec_compile_block(struct lightrec_cstate *cstate, struct block *block);
+void lightrec_free_opcode_list(struct lightrec_state *state, struct block *block);
+
+unsigned int lightrec_cycles_of_opcode(union code code);
+
+static inline u8 get_mult_div_lo(union code c)
+{
+       return (OPT_FLAG_MULT_DIV && c.r.rd) ? c.r.rd : REG_LO;
+}
+
+static inline u8 get_mult_div_hi(union code c)
+{
+       return (OPT_FLAG_MULT_DIV && c.r.imm) ? c.r.imm : REG_HI;
+}
 
 #endif /* __LIGHTREC_PRIVATE_H__ */
index 7fdf74a..3d4e1a2 100644 (file)
@@ -1,23 +1,15 @@
+// SPDX-License-Identifier: LGPL-2.1-or-later
 /*
- * Copyright (C) 2014-2020 Paul Cercueil <paul@crapouillou.net>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
+ * Copyright (C) 2014-2021 Paul Cercueil <paul@crapouillou.net>
  */
 
 #include "blockcache.h"
-#include "config.h"
 #include "debug.h"
 #include "disassembler.h"
 #include "emitter.h"
 #include "interpreter.h"
+#include "lightrec-config.h"
+#include "lightning-wrapper.h"
 #include "lightrec.h"
 #include "memmanager.h"
 #include "reaper.h"
@@ -26,7 +18,7 @@
 #include "optimizer.h"
 
 #include <errno.h>
-#include <lightning.h>
+#include <inttypes.h>
 #include <limits.h>
 #if ENABLE_THREADED_COMPILER
 #include <stdatomic.h>
 
 static struct block * lightrec_precompile_block(struct lightrec_state *state,
                                                u32 pc);
+static bool lightrec_block_is_fully_tagged(const struct block *block);
+
+static void lightrec_mtc2(struct lightrec_state *state, u8 reg, u32 data);
+static u32 lightrec_mfc2(struct lightrec_state *state, u8 reg);
 
 static void lightrec_default_sb(struct lightrec_state *state, u32 opcode,
                                void *host, u32 addr, u8 data)
@@ -98,11 +94,14 @@ static const struct lightrec_mem_map_ops lightrec_default_ops = {
        .lw = lightrec_default_lw,
 };
 
-static void __segfault_cb(struct lightrec_state *state, u32 addr)
+static void __segfault_cb(struct lightrec_state *state, u32 addr,
+                         const struct block *block)
 {
        lightrec_set_exit_flags(state, LIGHTREC_EXIT_SEGFAULT);
        pr_err("Segmentation fault in recompiled code: invalid "
               "load/store at address 0x%08x\n", addr);
+       if (block)
+               pr_err("Was executing block PC 0x%08x\n", block->pc);
 }
 
 static void lightrec_swl(struct lightrec_state *state,
@@ -147,7 +146,7 @@ static void lightrec_swc2(struct lightrec_state *state, union code op,
                          const struct lightrec_mem_map_ops *ops,
                          void *host, u32 addr)
 {
-       u32 data = state->ops.cop2_ops.mfc(state, op.opcode, op.i.rt);
+       u32 data = lightrec_mfc2(state, op.i.rt);
 
        ops->sw(state, op.opcode, host, addr, data);
 }
@@ -192,55 +191,64 @@ static void lightrec_lwc2(struct lightrec_state *state, union code op,
 {
        u32 data = ops->lw(state, op.opcode, host, addr);
 
-       state->ops.cop2_ops.mtc(state, op.opcode, op.i.rt, data);
+       lightrec_mtc2(state, op.i.rt, data);
 }
 
 static void lightrec_invalidate_map(struct lightrec_state *state,
-               const struct lightrec_mem_map *map, u32 addr)
+               const struct lightrec_mem_map *map, u32 addr, u32 len)
 {
-       if (map == &state->maps[PSX_MAP_KERNEL_USER_RAM])
-               state->code_lut[lut_offset(addr)] = NULL;
+       if (map == &state->maps[PSX_MAP_KERNEL_USER_RAM]) {
+               memset(&state->code_lut[lut_offset(addr)], 0,
+                      ((len + 3) / 4) * sizeof(void *));
+       }
 }
 
-static const struct lightrec_mem_map *
-lightrec_get_map(struct lightrec_state *state, u32 kaddr)
+const struct lightrec_mem_map *
+lightrec_get_map(struct lightrec_state *state, void **host, u32 kaddr)
 {
+       const struct lightrec_mem_map *map;
        unsigned int i;
+       u32 addr;
 
        for (i = 0; i < state->nb_maps; i++) {
-               const struct lightrec_mem_map *map = &state->maps[i];
+               const struct lightrec_mem_map *mapi = &state->maps[i];
 
-               if (kaddr >= map->pc && kaddr < map->pc + map->length)
-                       return map;
+               if (kaddr >= mapi->pc && kaddr < mapi->pc + mapi->length) {
+                       map = mapi;
+                       break;
+               }
        }
 
-       return NULL;
+       if (i == state->nb_maps)
+               return NULL;
+
+       addr = kaddr - map->pc;
+
+       while (map->mirror_of)
+               map = map->mirror_of;
+
+       if (host)
+               *host = map->address + addr;
+
+       return map;
 }
 
 u32 lightrec_rw(struct lightrec_state *state, union code op,
-               u32 addr, u32 data, u16 *flags)
+               u32 addr, u32 data, u16 *flags, struct block *block)
 {
        const struct lightrec_mem_map *map;
        const struct lightrec_mem_map_ops *ops;
-       u32 kaddr, pc, opcode = op.opcode;
+       u32 opcode = op.opcode;
        void *host;
 
        addr += (s16) op.i.imm;
-       kaddr = kunseg(addr);
 
-       map = lightrec_get_map(state, kaddr);
+       map = lightrec_get_map(state, &host, kunseg(addr));
        if (!map) {
-               __segfault_cb(state, addr);
+               __segfault_cb(state, addr, block);
                return 0;
        }
 
-       pc = map->pc;
-
-       while (map->mirror_of)
-               map = map->mirror_of;
-
-       host = (void *)((uintptr_t)map->address + kaddr - pc);
-
        if (unlikely(map->ops)) {
                if (flags)
                        *flags |= LIGHTREC_HW_IO;
@@ -294,11 +302,11 @@ u32 lightrec_rw(struct lightrec_state *state, union code op,
 }
 
 static void lightrec_rw_helper(struct lightrec_state *state,
-                              union code op, u16 *flags)
+                              union code op, u16 *flags,
+                              struct block *block)
 {
-       u32 ret = lightrec_rw(state, op,
-                         state->native_reg_cache[op.i.rs],
-                         state->native_reg_cache[op.i.rt], flags);
+       u32 ret = lightrec_rw(state, op, state->regs.gpr[op.i.rs],
+                             state->regs.gpr[op.i.rt], flags, block);
 
        switch (op.i.op) {
        case OP_LB:
@@ -309,7 +317,7 @@ static void lightrec_rw_helper(struct lightrec_state *state,
        case OP_LWR:
        case OP_LW:
                if (op.i.rt)
-                       state->native_reg_cache[op.i.rt] = ret;
+                       state->regs.gpr[op.i.rt] = ret;
        default: /* fall-through */
                break;
        }
@@ -317,43 +325,85 @@ static void lightrec_rw_helper(struct lightrec_state *state,
 
 static void lightrec_rw_cb(struct lightrec_state *state, union code op)
 {
-       lightrec_rw_helper(state, op, NULL);
+       lightrec_rw_helper(state, op, NULL, NULL);
 }
 
-static void lightrec_rw_generic_cb(struct lightrec_state *state,
-                                  struct opcode *op, struct block *block)
+static void lightrec_rw_generic_cb(struct lightrec_state *state, u32 arg)
 {
-       bool was_tagged = op->flags & (LIGHTREC_HW_IO | LIGHTREC_DIRECT_IO);
+       struct block *block;
+       struct opcode *op;
+       bool was_tagged;
+       u16 offset = (u16)arg;
+
+       block = lightrec_find_block_from_lut(state->block_cache,
+                                            arg >> 16, state->next_pc);
+       if (unlikely(!block)) {
+               pr_err("rw_generic: No block found in LUT for PC 0x%x offset 0x%x\n",
+                        state->next_pc, offset);
+               return;
+       }
+
+       op = &block->opcode_list[offset];
+       was_tagged = op->flags & (LIGHTREC_HW_IO | LIGHTREC_DIRECT_IO);
 
-       lightrec_rw_helper(state, op->c, &op->flags);
+       lightrec_rw_helper(state, op->c, &op->flags, block);
 
        if (!was_tagged) {
-               pr_debug("Opcode of block at PC 0x%08x offset 0x%x has been "
-                        "tagged - flag for recompilation\n",
-                        block->pc, op->offset << 2);
+               pr_debug("Opcode of block at PC 0x%08x has been tagged - flag "
+                        "for recompilation\n", block->pc);
 
                block->flags |= BLOCK_SHOULD_RECOMPILE;
        }
 }
 
-u32 lightrec_mfc(struct lightrec_state *state, union code op)
+static u32 clamp_s32(s32 val, s32 min, s32 max)
 {
-       bool is_cfc = (op.i.op == OP_CP0 && op.r.rs == OP_CP0_CFC0) ||
-                     (op.i.op == OP_CP2 && op.r.rs == OP_CP2_BASIC_CFC2);
-       u32 (*func)(struct lightrec_state *, u32, u8);
-       const struct lightrec_cop_ops *ops;
+       return val < min ? min : val > max ? max : val;
+}
 
-       if (op.i.op == OP_CP0)
-               ops = &state->ops.cop0_ops;
-       else
-               ops = &state->ops.cop2_ops;
+static u32 lightrec_mfc2(struct lightrec_state *state, u8 reg)
+{
+       s16 gteir1, gteir2, gteir3;
+
+       switch (reg) {
+       case 1:
+       case 3:
+       case 5:
+       case 8:
+       case 9:
+       case 10:
+       case 11:
+               return (s32)(s16) state->regs.cp2d[reg];
+       case 7:
+       case 16:
+       case 17:
+       case 18:
+       case 19:
+               return (u16) state->regs.cp2d[reg];
+       case 28:
+       case 29:
+               gteir1 = (s16) state->regs.cp2d[9];
+               gteir2 = (s16) state->regs.cp2d[10];
+               gteir3 = (s16) state->regs.cp2d[11];
+
+               return clamp_s32(gteir1 >> 7, 0, 0x1f) << 0 |
+                       clamp_s32(gteir2 >> 7, 0, 0x1f) << 5 |
+                       clamp_s32(gteir3 >> 7, 0, 0x1f) << 10;
+       case 15:
+               reg = 14;
+       default: /* fall-through */
+               return state->regs.cp2d[reg];
+       }
+}
 
-       if (is_cfc)
-               func = ops->cfc;
+u32 lightrec_mfc(struct lightrec_state *state, union code op)
+{
+       if (op.i.op == OP_CP0)
+               return state->regs.cp0[op.r.rd];
+       else if (op.r.rs == OP_CP2_BASIC_MFC2)
+               return lightrec_mfc2(state, op.r.rd);
        else
-               func = ops->mfc;
-
-       return (*func)(state, op.opcode, op.r.rd);
+               return state->regs.cp2c[op.r.rd];
 }
 
 static void lightrec_mfc_cb(struct lightrec_state *state, union code op)
@@ -361,58 +411,146 @@ static void lightrec_mfc_cb(struct lightrec_state *state, union code op)
        u32 rt = lightrec_mfc(state, op);
 
        if (op.r.rt)
-               state->native_reg_cache[op.r.rt] = rt;
+               state->regs.gpr[op.r.rt] = rt;
 }
 
-void lightrec_mtc(struct lightrec_state *state, union code op, u32 data)
+static void lightrec_mtc0(struct lightrec_state *state, u8 reg, u32 data)
 {
-       bool is_ctc = (op.i.op == OP_CP0 && op.r.rs == OP_CP0_CTC0) ||
-                     (op.i.op == OP_CP2 && op.r.rs == OP_CP2_BASIC_CTC2);
-       void (*func)(struct lightrec_state *, u32, u8, u32);
-       const struct lightrec_cop_ops *ops;
+       u32 status, cause;
+
+       switch (reg) {
+       case 1:
+       case 4:
+       case 8:
+       case 14:
+       case 15:
+               /* Those registers are read-only */
+               return;
+       default: /* fall-through */
+               break;
+       }
 
-       if (op.i.op == OP_CP0)
-               ops = &state->ops.cop0_ops;
-       else
-               ops = &state->ops.cop2_ops;
+       if (reg == 12) {
+               status = state->regs.cp0[12];
 
-       if (is_ctc)
-               func = ops->ctc;
-       else
-               func = ops->mtc;
+               if (status & ~data & BIT(16)) {
+                       state->ops.enable_ram(state, true);
+                       lightrec_invalidate_all(state);
+               } else if (~status & data & BIT(16)) {
+                       state->ops.enable_ram(state, false);
+               }
+       }
+
+       state->regs.cp0[reg] = data;
+
+       if (reg == 12 || reg == 13) {
+               cause = state->regs.cp0[13];
+               status = state->regs.cp0[12];
+
+               if (!!(status & cause & 0x300) & status)
+                       lightrec_set_exit_flags(state, LIGHTREC_EXIT_CHECK_INTERRUPT);
+       }
+}
+
+static u32 count_leading_bits(s32 data)
+{
+#if defined(__has_builtin) && __has_builtin(__builtin_clrsb)
+       return 1 + __builtin_clrsb(data);
+#else
+       u32 cnt = 33;
+
+       data = (data ^ (data >> 31)) << 1;
+
+       do {
+               cnt -= 1;
+               data >>= 1;
+       } while (data);
+
+       return cnt;
+#endif
+}
+
+static void lightrec_mtc2(struct lightrec_state *state, u8 reg, u32 data)
+{
+       switch (reg) {
+       case 15:
+               state->regs.cp2d[12] = state->regs.cp2d[13];
+               state->regs.cp2d[13] = state->regs.cp2d[14];
+               state->regs.cp2d[14] = data;
+               break;
+       case 28:
+               state->regs.cp2d[9] = (data << 7) & 0xf80;
+               state->regs.cp2d[10] = (data << 2) & 0xf80;
+               state->regs.cp2d[11] = (data >> 3) & 0xf80;
+               break;
+       case 31:
+               return;
+       case 30:
+               state->regs.cp2d[31] = count_leading_bits((s32) data);
+       default: /* fall-through */
+               state->regs.cp2d[reg] = data;
+               break;
+       }
+}
 
-       (*func)(state, op.opcode, op.r.rd, data);
+static void lightrec_ctc2(struct lightrec_state *state, u8 reg, u32 data)
+{
+       switch (reg) {
+       case 4:
+       case 12:
+       case 20:
+       case 26:
+       case 27:
+       case 29:
+       case 30:
+               data = (s32)(s16) data;
+               break;
+       case 31:
+               data = (data & 0x7ffff000) | !!(data & 0x7f87e000) << 31;
+       default: /* fall-through */
+               break;
+       }
+
+       state->regs.cp2c[reg] = data;
+}
+
+void lightrec_mtc(struct lightrec_state *state, union code op, u32 data)
+{
+       if (op.i.op == OP_CP0)
+               lightrec_mtc0(state, op.r.rd, data);
+       else if (op.r.rs == OP_CP2_BASIC_CTC2)
+               lightrec_ctc2(state, op.r.rd, data);
+       else
+               lightrec_mtc2(state, op.r.rd, data);
 }
 
 static void lightrec_mtc_cb(struct lightrec_state *state, union code op)
 {
-       lightrec_mtc(state, op, state->native_reg_cache[op.r.rt]);
+       lightrec_mtc(state, op, state->regs.gpr[op.r.rt]);
 }
 
-static void lightrec_rfe_cb(struct lightrec_state *state, union code op)
+void lightrec_rfe(struct lightrec_state *state)
 {
        u32 status;
 
        /* Read CP0 Status register (r12) */
-       status = state->ops.cop0_ops.mfc(state, op.opcode, 12);
+       status = state->regs.cp0[12];
 
        /* Switch the bits */
        status = ((status & 0x3c) >> 2) | (status & ~0xf);
 
        /* Write it back */
-       state->ops.cop0_ops.ctc(state, op.opcode, 12, status);
+       lightrec_mtc0(state, 12, status);
 }
 
-static void lightrec_cp_cb(struct lightrec_state *state, union code op)
+void lightrec_cp(struct lightrec_state *state, union code op)
 {
-       void (*func)(struct lightrec_state *, u32);
-
-       if ((op.opcode >> 25) & 1)
-               func = state->ops.cop2_ops.op;
-       else
-               func = state->ops.cop0_ops.op;
+       if (op.i.op == OP_CP0) {
+               pr_err("Invalid CP opcode to coprocessor #0\n");
+               return;
+       }
 
-       (*func)(state, op.opcode);
+       (*state->ops.cop2_op)(state, op.opcode);
 }
 
 static void lightrec_syscall_cb(struct lightrec_state *state, union code op)
@@ -429,7 +567,7 @@ struct block * lightrec_get_block(struct lightrec_state *state, u32 pc)
 {
        struct block *block = lightrec_find_block(state->block_cache, pc);
 
-       if (block && lightrec_block_is_outdated(block)) {
+       if (block && lightrec_block_is_outdated(state, block)) {
                pr_debug("Block at PC 0x%08x is outdated!\n", block->pc);
 
                /* Make sure the recompiler isn't processing the block we'll
@@ -439,7 +577,7 @@ struct block * lightrec_get_block(struct lightrec_state *state, u32 pc)
 
                lightrec_unregister_block(state->block_cache, block);
                remove_from_code_lut(state->block_cache, block);
-               lightrec_free_block(block);
+               lightrec_free_block(state, block);
                block = NULL;
        }
 
@@ -466,12 +604,17 @@ static void * get_next_block_func(struct lightrec_state *state, u32 pc)
        for (;;) {
                func = state->code_lut[lut_offset(pc)];
                if (func && func != state->get_next_block)
-                       return func;
+                       break;
 
                block = lightrec_get_block(state, pc);
 
                if (unlikely(!block))
-                       return NULL;
+                       break;
+
+               if (OPT_REPLACE_MEMSET && (block->flags & BLOCK_IS_MEMSET)) {
+                       func = state->memset_func;
+                       break;
+               }
 
                should_recompile = block->flags & BLOCK_SHOULD_RECOMPILE &&
                        !(block->flags & BLOCK_IS_DEAD);
@@ -484,66 +627,54 @@ static void * get_next_block_func(struct lightrec_state *state, u32 pc)
                        if (ENABLE_THREADED_COMPILER)
                                lightrec_recompiler_add(state->rec, block);
                        else
-                               lightrec_compile_block(block);
+                               lightrec_compile_block(state->cstate, block);
                }
 
                if (ENABLE_THREADED_COMPILER && likely(!should_recompile))
-                       func = lightrec_recompiler_run_first_pass(block, &pc);
+                       func = lightrec_recompiler_run_first_pass(state, block, &pc);
                else
                        func = block->function;
 
                if (likely(func))
-                       return func;
+                       break;
 
-               /* Block wasn't compiled yet - run the interpreter */
-               if (!ENABLE_THREADED_COMPILER &&
-                   ((ENABLE_FIRST_PASS && likely(!should_recompile)) ||
-                    unlikely(block->flags & BLOCK_NEVER_COMPILE)))
-                       pc = lightrec_emulate_block(block, pc);
+               if (unlikely(block->flags & BLOCK_NEVER_COMPILE)) {
+                       pc = lightrec_emulate_block(state, block, pc);
+
+               } else if (!ENABLE_THREADED_COMPILER) {
+                       /* Block wasn't compiled yet - run the interpreter */
+                       if (block->flags & BLOCK_FULLY_TAGGED)
+                               pr_debug("Block fully tagged, skipping first pass\n");
+                       else if (ENABLE_FIRST_PASS && likely(!should_recompile))
+                               pc = lightrec_emulate_block(state, block, pc);
 
-               if (likely(!(block->flags & BLOCK_NEVER_COMPILE))) {
                        /* Then compile it using the profiled data */
-                       if (ENABLE_THREADED_COMPILER)
-                               lightrec_recompiler_add(state->rec, block);
-                       else
-                               lightrec_compile_block(block);
+                       lightrec_compile_block(state->cstate, block);
+               } else {
+                       lightrec_recompiler_add(state->rec, block);
                }
 
                if (state->exit_flags != LIGHTREC_EXIT_NORMAL ||
-                   state->current_cycle >= state->target_cycle) {
-                       state->next_pc = pc;
-                       return NULL;
-               }
+                   state->current_cycle >= state->target_cycle)
+                       break;
        }
-}
 
-static s32 c_generic_function_wrapper(struct lightrec_state *state,
-                                     s32 cycles_delta,
-                                     void (*f)(struct lightrec_state *,
-                                               struct opcode *,
-                                               struct block *),
-                                     struct opcode *op, struct block *block)
-{
-       state->current_cycle = state->target_cycle - cycles_delta;
-
-       (*f)(state, op, block);
-
-       return state->target_cycle - state->current_cycle;
+       state->next_pc = pc;
+       return func;
 }
 
 static s32 c_function_wrapper(struct lightrec_state *state, s32 cycles_delta,
-                             void (*f)(struct lightrec_state *, union code),
-                             union code op)
+                             void (*f)(struct lightrec_state *, u32 d),
+                             u32 d)
 {
        state->current_cycle = state->target_cycle - cycles_delta;
 
-       (*f)(state, op);
+       (*f)(state, d);
 
        return state->target_cycle - state->current_cycle;
 }
 
-static struct block * generate_wrapper(struct lightrec_state *state,
-                                      void *f, bool generic)
+static struct block * generate_wrapper(struct lightrec_state *state)
 {
        struct block *block;
        jit_state_t *_jit;
@@ -594,25 +725,14 @@ static struct block * generate_wrapper(struct lightrec_state *state,
        jit_prepare();
        jit_pushargr(LIGHTREC_REG_STATE);
        jit_pushargr(LIGHTREC_REG_CYCLE);
-       jit_pushargi((uintptr_t)f);
        jit_pushargr(JIT_R0);
-       if (generic) {
-               jit_pushargr(JIT_R1);
-               jit_finishi(c_generic_function_wrapper);
-       } else {
-               jit_finishi(c_function_wrapper);
-       }
-
-#if __WORDSIZE == 64
+       jit_pushargr(JIT_R1);
+       jit_finishi(c_function_wrapper);
        jit_retval_i(LIGHTREC_REG_CYCLE);
-#else
-       jit_retval(LIGHTREC_REG_CYCLE);
-#endif
 
        jit_patch_at(jit_jmpi(), to_fn_epilog);
        jit_epilog();
 
-       block->state = state;
        block->_jit = _jit;
        block->function = jit_emit();
        block->opcode_list = NULL;
@@ -639,11 +759,35 @@ err_no_mem:
        return NULL;
 }
 
+static u32 lightrec_memset(struct lightrec_state *state)
+{
+       u32 kunseg_pc = kunseg(state->regs.gpr[4]);
+       void *host;
+       const struct lightrec_mem_map *map = lightrec_get_map(state, &host, kunseg_pc);
+       u32 length = state->regs.gpr[5] * 4;
+
+       if (!map) {
+               pr_err("Unable to find memory map for memset target address "
+                      "0x%x\n", kunseg_pc);
+               return 0;
+       }
+
+       pr_debug("Calling host memset, PC 0x%x (host address 0x%" PRIxPTR ") for %u bytes\n",
+                kunseg_pc, (uintptr_t)host, length);
+       memset(host, 0, length);
+
+       if (!state->invalidate_from_dma_only)
+               lightrec_invalidate_map(state, map, kunseg_pc, length);
+
+       /* Rough estimation of the number of cycles consumed */
+       return 8 + 5 * (length  + 3 / 4);
+}
+
 static struct block * generate_dispatcher(struct lightrec_state *state)
 {
        struct block *block;
        jit_state_t *_jit;
-       jit_node_t *to_end, *to_end2, *to_c, *loop, *addr, *addr2;
+       jit_node_t *to_end, *to_c, *loop, *addr, *addr2, *addr3;
        unsigned int i;
        u32 offset, ram_len;
        jit_word_t code_size;
@@ -663,11 +807,7 @@ static struct block * generate_dispatcher(struct lightrec_state *state)
        jit_frame(256);
 
        jit_getarg(JIT_R0, jit_arg());
-#if __WORDSIZE == 64
        jit_getarg_i(LIGHTREC_REG_CYCLE, jit_arg());
-#else
-       jit_getarg(LIGHTREC_REG_CYCLE, jit_arg());
-#endif
 
        /* Force all callee-saved registers to be pushed on the stack */
        for (i = 0; i < NUM_REGS; i++)
@@ -682,10 +822,30 @@ static struct block * generate_dispatcher(struct lightrec_state *state)
        /* Call the block's code */
        jit_jmpr(JIT_R0);
 
+       if (OPT_REPLACE_MEMSET) {
+               /* Blocks will jump here when they need to call
+                * lightrec_memset() */
+               addr3 = jit_indirect();
+
+               jit_prepare();
+               jit_pushargr(LIGHTREC_REG_STATE);
+               jit_finishi(lightrec_memset);
+
+               jit_ldxi_ui(JIT_V0, LIGHTREC_REG_STATE,
+                           offsetof(struct lightrec_state, regs.gpr[31]));
+
+               jit_retval(JIT_R0);
+               jit_subr(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, JIT_R0);
+       }
+
        /* The block will jump here, with the number of cycles remaining in
         * LIGHTREC_REG_CYCLE */
        addr2 = jit_indirect();
 
+       /* Store back the next_pc to the lightrec_state structure */
+       offset = offsetof(struct lightrec_state, next_pc);
+       jit_stxi_i(offset, LIGHTREC_REG_STATE, JIT_V0);
+
        /* Jump to end if state->target_cycle < state->current_cycle */
        to_end = jit_blei(LIGHTREC_REG_CYCLE, 0);
 
@@ -695,9 +855,8 @@ static struct block * generate_dispatcher(struct lightrec_state *state)
        to_c = jit_bgei(JIT_R0, ram_len);
 
        /* Fast path: code is running from RAM, use the code LUT */
-#if __WORDSIZE == 64
-       jit_lshi(JIT_R0, JIT_R0, 1);
-#endif
+       if (__WORDSIZE == 64)
+               jit_lshi(JIT_R0, JIT_R0, 1);
        jit_addr(JIT_R0, JIT_R0, LIGHTREC_REG_STATE);
        jit_ldxi(JIT_R0, JIT_R0, offsetof(struct lightrec_state, code_lut));
 
@@ -707,7 +866,7 @@ static struct block * generate_dispatcher(struct lightrec_state *state)
        /* Slow path: call C function get_next_block_func() */
        jit_patch(to_c);
 
-       if (ENABLE_FIRST_PASS) {
+       if (ENABLE_FIRST_PASS || OPT_DETECT_IMPOSSIBLE_BRANCHES) {
                /* We may call the interpreter - update state->current_cycle */
                jit_ldxi_i(JIT_R2, LIGHTREC_REG_STATE,
                           offsetof(struct lightrec_state, target_cycle));
@@ -728,7 +887,7 @@ static struct block * generate_dispatcher(struct lightrec_state *state)
        jit_finishi(&get_next_block_func);
        jit_retval(JIT_R0);
 
-       if (ENABLE_FIRST_PASS) {
+       if (ENABLE_FIRST_PASS || OPT_DETECT_IMPOSSIBLE_BRANCHES) {
                /* The interpreter may have updated state->current_cycle and
                 * state->target_cycle - recalc the delta */
                jit_ldxi_i(JIT_R1, LIGHTREC_REG_STATE,
@@ -741,22 +900,13 @@ static struct block * generate_dispatcher(struct lightrec_state *state)
        /* If we get non-NULL, loop */
        jit_patch_at(jit_bnei(JIT_R0, 0), loop);
 
-       to_end2 = jit_jmpi();
-
        /* When exiting, the recompiled code will jump to that address */
        jit_note(__FILE__, __LINE__);
        jit_patch(to_end);
 
-       /* Store back the next_pc to the lightrec_state structure */
-       offset = offsetof(struct lightrec_state, next_pc);
-       jit_stxi_i(offset, LIGHTREC_REG_STATE, JIT_V0);
-
-       jit_patch(to_end2);
-
        jit_retr(LIGHTREC_REG_CYCLE);
        jit_epilog();
 
-       block->state = state;
        block->_jit = _jit;
        block->function = jit_emit();
        block->opcode_list = NULL;
@@ -769,6 +919,8 @@ static struct block * generate_dispatcher(struct lightrec_state *state)
        block->code_size = code_size;
 
        state->eob_wrapper_func = jit_address(addr2);
+       if (OPT_REPLACE_MEMSET)
+               state->memset_func = jit_address(addr3);
        state->get_next_block = jit_address(addr);
 
        if (ENABLE_DISASSEMBLER) {
@@ -789,18 +941,64 @@ err_no_mem:
 
 union code lightrec_read_opcode(struct lightrec_state *state, u32 pc)
 {
-       u32 addr, kunseg_pc = kunseg(pc);
-       const u32 *code;
-       const struct lightrec_mem_map *map = lightrec_get_map(state, kunseg_pc);
+       void *host;
 
-       addr = kunseg_pc - map->pc;
+       lightrec_get_map(state, &host, kunseg(pc));
 
-       while (map->mirror_of)
-               map = map->mirror_of;
+       const u32 *code = (u32 *)host;
+       return (union code) *code;
+}
 
-       code = map->address + addr;
+unsigned int lightrec_cycles_of_opcode(union code code)
+{
+       return 2;
+}
 
-       return (union code) *code;
+void lightrec_free_opcode_list(struct lightrec_state *state, struct block *block)
+{
+       lightrec_free(state, MEM_FOR_IR,
+                     sizeof(*block->opcode_list) * block->nb_ops,
+                     block->opcode_list);
+}
+
+static unsigned int lightrec_get_mips_block_len(const u32 *src)
+{
+       unsigned int i;
+       union code c;
+
+       for (i = 1; ; i++) {
+               c.opcode = LE32TOH(*src++);
+
+               if (is_syscall(c))
+                       return i;
+
+               if (is_unconditional_jump(c))
+                       return i + 1;
+       }
+}
+
+static struct opcode * lightrec_disassemble(struct lightrec_state *state,
+                                           const u32 *src, unsigned int *len)
+{
+       struct opcode *list;
+       unsigned int i, length;
+
+       length = lightrec_get_mips_block_len(src);
+
+       list = lightrec_malloc(state, MEM_FOR_IR, sizeof(*list) * length);
+       if (!list) {
+               pr_err("Unable to allocate memory\n");
+               return NULL;
+       }
+
+       for (i = 0; i < length; i++) {
+               list[i].opcode = LE32TOH(src[i]);
+               list[i].flags = 0;
+       }
+
+       *len = length * sizeof(u32);
+
+       return list;
 }
 
 static struct block * lightrec_precompile_block(struct lightrec_state *state,
@@ -808,21 +1006,15 @@ static struct block * lightrec_precompile_block(struct lightrec_state *state,
 {
        struct opcode *list;
        struct block *block;
-       const u32 *code;
-       u32 addr, kunseg_pc = kunseg(pc);
-       const struct lightrec_mem_map *map = lightrec_get_map(state, kunseg_pc);
+       void *host;
+       const struct lightrec_mem_map *map = lightrec_get_map(state, &host, kunseg(pc));
+       const u32 *code = (u32 *) host;
        unsigned int length;
+       bool fully_tagged;
 
        if (!map)
                return NULL;
 
-       addr = kunseg_pc - map->pc;
-
-       while (map->mirror_of)
-               map = map->mirror_of;
-
-       code = map->address + addr;
-
        block = lightrec_malloc(state, MEM_FOR_IR, sizeof(*block));
        if (!block) {
                pr_err("Unable to recompile block: Out of memory\n");
@@ -836,11 +1028,10 @@ static struct block * lightrec_precompile_block(struct lightrec_state *state,
        }
 
        block->pc = pc;
-       block->state = state;
        block->_jit = NULL;
        block->function = NULL;
        block->opcode_list = list;
-       block->map = map;
+       block->code = code;
        block->next = NULL;
        block->flags = 0;
        block->code_size = 0;
@@ -849,24 +1040,31 @@ static struct block * lightrec_precompile_block(struct lightrec_state *state,
 #endif
        block->nb_ops = length / sizeof(u32);
 
-       lightrec_optimize(block);
+       lightrec_optimize(state, block);
 
        length = block->nb_ops * sizeof(u32);
 
        lightrec_register(MEM_FOR_MIPS_CODE, length);
 
        if (ENABLE_DISASSEMBLER) {
-               pr_debug("Disassembled block at PC: 0x%x\n", block->pc);
-               lightrec_print_disassembly(block, code, length);
+               pr_debug("Disassembled block at PC: 0x%08x\n", block->pc);
+               lightrec_print_disassembly(block, code);
        }
 
-       pr_debug("Block size: %lu opcodes\n", block->nb_ops);
+       pr_debug("Block size: %hu opcodes\n", block->nb_ops);
 
        /* If the first opcode is an 'impossible' branch, never compile the
         * block */
-       if (list->flags & LIGHTREC_EMULATE_BRANCH)
+       if (should_emulate(block->opcode_list))
                block->flags |= BLOCK_NEVER_COMPILE;
 
+       fully_tagged = lightrec_block_is_fully_tagged(block);
+       if (fully_tagged)
+               block->flags |= BLOCK_FULLY_TAGGED;
+
+       if (OPT_REPLACE_MEMSET && (block->flags & BLOCK_IS_MEMSET))
+               state->code_lut[lut_offset(pc)] = state->memset_func;
+
        block->hash = lightrec_calculate_block_hash(block);
 
        pr_debug("Recompile count: %u\n", state->nb_precompile++);
@@ -874,11 +1072,14 @@ static struct block * lightrec_precompile_block(struct lightrec_state *state,
        return block;
 }
 
-static bool lightrec_block_is_fully_tagged(struct block *block)
+static bool lightrec_block_is_fully_tagged(const struct block *block)
 {
-       struct opcode *op;
+       const struct opcode *op;
+       unsigned int i;
+
+       for (i = 0; i < block->nb_ops; i++) {
+               op = &block->opcode_list[i];
 
-       for (op = block->opcode_list; op; op = op->next) {
                /* Verify that all load/stores of the opcode list
                 * Check all loads/stores of the opcode list and mark the
                 * block as fully compiled if they all have been tagged. */
@@ -908,22 +1109,24 @@ static bool lightrec_block_is_fully_tagged(struct block *block)
        return true;
 }
 
-static void lightrec_reap_block(void *data)
+static void lightrec_reap_block(struct lightrec_state *state, void *data)
 {
        struct block *block = data;
 
        pr_debug("Reap dead block at PC 0x%08x\n", block->pc);
-       lightrec_free_block(block);
+       lightrec_unregister_block(state->block_cache, block);
+       lightrec_free_block(state, block);
 }
 
-static void lightrec_reap_jit(void *data)
+static void lightrec_reap_jit(struct lightrec_state *state, void *data)
 {
        _jit_destroy_state(data);
 }
 
-int lightrec_compile_block(struct block *block)
+int lightrec_compile_block(struct lightrec_cstate *cstate,
+                          struct block *block)
 {
-       struct lightrec_state *state = block->state;
+       struct lightrec_state *state = cstate->state;
        struct lightrec_branch_target *target;
        bool op_list_freed = false, fully_tagged = false;
        struct block *block2;
@@ -933,7 +1136,7 @@ int lightrec_compile_block(struct block *block)
        bool skip_next = false;
        jit_word_t code_size;
        unsigned int i, j;
-       u32 next_pc, offset;
+       u32 offset;
 
        fully_tagged = lightrec_block_is_fully_tagged(block);
        if (fully_tagged)
@@ -946,34 +1149,35 @@ int lightrec_compile_block(struct block *block)
        oldjit = block->_jit;
        block->_jit = _jit;
 
-       lightrec_regcache_reset(state->reg_cache);
-       state->cycles = 0;
-       state->nb_branches = 0;
-       state->nb_local_branches = 0;
-       state->nb_targets = 0;
+       lightrec_regcache_reset(cstate->reg_cache);
+       cstate->cycles = 0;
+       cstate->nb_branches = 0;
+       cstate->nb_local_branches = 0;
+       cstate->nb_targets = 0;
 
        jit_prolog();
        jit_tramp(256);
 
        start_of_block = jit_label();
 
-       for (elm = block->opcode_list; elm; elm = elm->next) {
-               next_pc = block->pc + elm->offset * sizeof(u32);
+       for (i = 0; i < block->nb_ops; i++) {
+               elm = &block->opcode_list[i];
 
                if (skip_next) {
                        skip_next = false;
                        continue;
                }
 
-               state->cycles += lightrec_cycles_of_opcode(elm->c);
+               cstate->cycles += lightrec_cycles_of_opcode(elm->c);
 
-               if (elm->flags & LIGHTREC_EMULATE_BRANCH) {
+               if (should_emulate(elm)) {
                        pr_debug("Branch at offset 0x%x will be emulated\n",
-                                elm->offset << 2);
-                       lightrec_emit_eob(block, elm, next_pc);
+                                i << 2);
+
+                       lightrec_emit_eob(cstate, block, i, false);
                        skip_next = !(elm->flags & LIGHTREC_NO_DS);
-               } else if (elm->opcode) {
-                       lightrec_rec_opcode(block, elm, next_pc);
+               } else {
+                       lightrec_rec_opcode(cstate, block, i);
                        skip_next = has_delay_slot(elm->c) &&
                                !(elm->flags & LIGHTREC_NO_DS);
 #if _WIN32
@@ -981,16 +1185,16 @@ int lightrec_compile_block(struct block *block)
                         * mapped registers as temporaries. Until the actual bug
                         * is found and fixed, unconditionally mark our
                         * registers as live here. */
-                       lightrec_regcache_mark_live(state->reg_cache, _jit);
+                       lightrec_regcache_mark_live(cstate->reg_cache, _jit);
 #endif
                }
        }
 
-       for (i = 0; i < state->nb_branches; i++)
-               jit_patch(state->branches[i]);
+       for (i = 0; i < cstate->nb_branches; i++)
+               jit_patch(cstate->branches[i]);
 
-       for (i = 0; i < state->nb_local_branches; i++) {
-               struct lightrec_branch *branch = &state->local_branches[i];
+       for (i = 0; i < cstate->nb_local_branches; i++) {
+               struct lightrec_branch *branch = &cstate->local_branches[i];
 
                pr_debug("Patch local branch to offset 0x%x\n",
                         branch->target << 2);
@@ -1000,15 +1204,15 @@ int lightrec_compile_block(struct block *block)
                        continue;
                }
 
-               for (j = 0; j < state->nb_targets; j++) {
-                       if (state->targets[j].offset == branch->target) {
+               for (j = 0; j < cstate->nb_targets; j++) {
+                       if (cstate->targets[j].offset == branch->target) {
                                jit_patch_at(branch->branch,
-                                            state->targets[j].label);
+                                            cstate->targets[j].label);
                                break;
                        }
                }
 
-               if (j == state->nb_targets)
+               if (j == cstate->nb_targets)
                        pr_err("Unable to find branch target\n");
        }
 
@@ -1026,19 +1230,16 @@ int lightrec_compile_block(struct block *block)
        /* Add compiled function to the LUT */
        state->code_lut[lut_offset(block->pc)] = block->function;
 
-       /* Fill code LUT with the block's entry points */
-       for (i = 0; i < state->nb_targets; i++) {
-               target = &state->targets[i];
-
-               if (target->offset) {
-                       offset = lut_offset(block->pc) + target->offset;
-                       state->code_lut[offset] = jit_address(target->label);
-               }
+       if (ENABLE_THREADED_COMPILER) {
+               /* Since we might try to reap the same block multiple times,
+                * we need the reaper to wait until everything has been
+                * submitted, so that the duplicate entries can be dropped. */
+               lightrec_reaper_pause(state->reaper);
        }
 
        /* Detect old blocks that have been covered by the new one */
-       for (i = 0; i < state->nb_targets; i++) {
-               target = &state->targets[i];
+       for (i = 0; i < cstate->nb_targets; i++) {
+               target = &cstate->targets[i];
 
                if (!target->offset)
                        continue;
@@ -1049,31 +1250,47 @@ int lightrec_compile_block(struct block *block)
                        /* No need to check if block2 is compilable - it must
                         * be, otherwise block wouldn't be compilable either */
 
+                       /* Set the "block dead" flag to prevent the dynarec from
+                        * recompiling this block */
                        block2->flags |= BLOCK_IS_DEAD;
 
+                       /* If block2 was pending for compilation, cancel it.
+                        * If it's being compiled right now, wait until it
+                        * finishes. */
+                       if (ENABLE_THREADED_COMPILER)
+                               lightrec_recompiler_remove(state->rec, block2);
+
+                       /* We know from now on that block2 isn't going to be
+                        * compiled. We can override the LUT entry with our
+                        * new block's entry point. */
+                       offset = lut_offset(block->pc) + target->offset;
+                       state->code_lut[offset] = jit_address(target->label);
+
                        pr_debug("Reap block 0x%08x as it's covered by block "
                                 "0x%08x\n", block2->pc, block->pc);
 
-                       lightrec_unregister_block(state->block_cache, block2);
-
+                       /* Finally, reap the block. */
                        if (ENABLE_THREADED_COMPILER) {
-                               lightrec_recompiler_remove(state->rec, block2);
                                lightrec_reaper_add(state->reaper,
                                                    lightrec_reap_block,
                                                    block2);
                        } else {
-                               lightrec_free_block(block2);
+                               lightrec_unregister_block(state->block_cache, block2);
+                               lightrec_free_block(state, block2);
                        }
                }
        }
 
+       if (ENABLE_DISASSEMBLER)
+               lightrec_reaper_continue(state->reaper);
+
        jit_get_code(&code_size);
        lightrec_register(MEM_FOR_CODE, code_size);
 
        block->code_size = code_size;
 
        if (ENABLE_DISASSEMBLER) {
-               pr_debug("Compiling block at PC: 0x%x\n", block->pc);
+               pr_debug("Compiling block at PC: 0x%08x\n", block->pc);
                jit_disassemble();
        }
 
@@ -1086,7 +1303,7 @@ int lightrec_compile_block(struct block *block)
        if (fully_tagged && !op_list_freed) {
                pr_debug("Block PC 0x%08x is fully tagged"
                         " - free opcode list\n", block->pc);
-               lightrec_free_opcode_list(state, block->opcode_list);
+               lightrec_free_opcode_list(state, block);
                block->opcode_list = NULL;
        }
 
@@ -1104,6 +1321,20 @@ int lightrec_compile_block(struct block *block)
        return 0;
 }
 
+static void lightrec_print_info(struct lightrec_state *state)
+{
+       if ((state->current_cycle & ~0xfffffff) != state->old_cycle_counter) {
+               pr_info("Lightrec RAM usage: IR %u KiB, CODE %u KiB, "
+                       "MIPS %u KiB, TOTAL %u KiB, avg. IPI %f\n",
+                       lightrec_get_mem_usage(MEM_FOR_IR) / 1024,
+                       lightrec_get_mem_usage(MEM_FOR_CODE) / 1024,
+                       lightrec_get_mem_usage(MEM_FOR_MIPS_CODE) / 1024,
+                       lightrec_get_total_mem_usage() / 1024,
+                      lightrec_get_average_ipi());
+               state->old_cycle_counter = state->current_cycle & ~0xfffffff;
+       }
+}
+
 u32 lightrec_execute(struct lightrec_state *state, u32 pc, u32 target_cycle)
 {
        s32 (*func)(void *, s32) = (void *)state->dispatcher->function;
@@ -1117,6 +1348,7 @@ u32 lightrec_execute(struct lightrec_state *state, u32 pc, u32 target_cycle)
                target_cycle = UINT_MAX;
 
        state->target_cycle = target_cycle;
+       state->next_pc = pc;
 
        block_trace = get_next_block_func(state, pc);
        if (block_trace) {
@@ -1130,6 +1362,9 @@ u32 lightrec_execute(struct lightrec_state *state, u32 pc, u32 target_cycle)
        if (ENABLE_THREADED_COMPILER)
                lightrec_reaper_reap(state->reaper);
 
+       if (LOG_LEVEL >= INFO_L)
+               lightrec_print_info(state);
+
        return state->next_pc;
 }
 
@@ -1146,18 +1381,48 @@ u32 lightrec_run_interpreter(struct lightrec_state *state, u32 pc)
 
        state->exit_flags = LIGHTREC_EXIT_NORMAL;
 
-       return lightrec_emulate_block(block, pc);
+       pc = lightrec_emulate_block(state, block, pc);
+
+       if (LOG_LEVEL >= INFO_L)
+               lightrec_print_info(state);
+
+       return pc;
 }
 
-void lightrec_free_block(struct block *block)
+void lightrec_free_block(struct lightrec_state *state, struct block *block)
 {
        lightrec_unregister(MEM_FOR_MIPS_CODE, block->nb_ops * sizeof(u32));
        if (block->opcode_list)
-               lightrec_free_opcode_list(block->state, block->opcode_list);
+               lightrec_free_opcode_list(state, block);
        if (block->_jit)
                _jit_destroy_state(block->_jit);
        lightrec_unregister(MEM_FOR_CODE, block->code_size);
-       lightrec_free(block->state, MEM_FOR_IR, sizeof(*block), block);
+       lightrec_free(state, MEM_FOR_IR, sizeof(*block), block);
+}
+
+struct lightrec_cstate * lightrec_create_cstate(struct lightrec_state *state)
+{
+       struct lightrec_cstate *cstate;
+
+       cstate = lightrec_malloc(state, MEM_FOR_LIGHTREC, sizeof(*cstate));
+       if (!cstate)
+               return NULL;
+
+       cstate->reg_cache = lightrec_regcache_init(state);
+       if (!cstate->reg_cache) {
+               lightrec_free(state, MEM_FOR_LIGHTREC, sizeof(*cstate), cstate);
+               return NULL;
+       }
+
+       cstate->state = state;
+
+       return cstate;
+}
+
+void lightrec_free_cstate(struct lightrec_cstate *cstate)
+{
+       lightrec_free_regcache(cstate->reg_cache);
+       lightrec_free(cstate->state, MEM_FOR_LIGHTREC, sizeof(*cstate), cstate);
 }
 
 struct lightrec_state * lightrec_init(char *argv0,
@@ -1168,11 +1433,7 @@ struct lightrec_state * lightrec_init(char *argv0,
        struct lightrec_state *state;
 
        /* Sanity-check ops */
-       if (!ops ||
-           !ops->cop0_ops.mfc || !ops->cop0_ops.cfc || !ops->cop0_ops.mtc ||
-           !ops->cop0_ops.ctc || !ops->cop0_ops.op ||
-           !ops->cop2_ops.mfc || !ops->cop2_ops.cfc || !ops->cop2_ops.mtc ||
-           !ops->cop2_ops.ctc || !ops->cop2_ops.op) {
+       if (!ops || !ops->cop2_op || !ops->enable_ram) {
                pr_err("Missing callbacks in lightrec_ops structure\n");
                return NULL;
        }
@@ -1197,18 +1458,18 @@ struct lightrec_state * lightrec_init(char *argv0,
        if (!state->block_cache)
                goto err_free_tinymm;
 
-       state->reg_cache = lightrec_regcache_init(state);
-       if (!state->reg_cache)
-               goto err_free_block_cache;
-
        if (ENABLE_THREADED_COMPILER) {
                state->rec = lightrec_recompiler_init(state);
                if (!state->rec)
-                       goto err_free_reg_cache;
+                       goto err_free_block_cache;
 
                state->reaper = lightrec_reaper_init(state);
                if (!state->reaper)
                        goto err_free_recompiler;
+       } else {
+               state->cstate = lightrec_create_cstate(state);
+               if (!state->cstate)
+                       goto err_free_block_cache;
        }
 
        state->nb_maps = nb;
@@ -1220,50 +1481,19 @@ struct lightrec_state * lightrec_init(char *argv0,
        if (!state->dispatcher)
                goto err_free_reaper;
 
-       state->rw_generic_wrapper = generate_wrapper(state,
-                                                    lightrec_rw_generic_cb,
-                                                    true);
-       if (!state->rw_generic_wrapper)
+       state->c_wrapper_block = generate_wrapper(state);
+       if (!state->c_wrapper_block)
                goto err_free_dispatcher;
 
-       state->rw_wrapper = generate_wrapper(state, lightrec_rw_cb, false);
-       if (!state->rw_wrapper)
-               goto err_free_generic_rw_wrapper;
-
-       state->mfc_wrapper = generate_wrapper(state, lightrec_mfc_cb, false);
-       if (!state->mfc_wrapper)
-               goto err_free_rw_wrapper;
-
-       state->mtc_wrapper = generate_wrapper(state, lightrec_mtc_cb, false);
-       if (!state->mtc_wrapper)
-               goto err_free_mfc_wrapper;
-
-       state->rfe_wrapper = generate_wrapper(state, lightrec_rfe_cb, false);
-       if (!state->rfe_wrapper)
-               goto err_free_mtc_wrapper;
-
-       state->cp_wrapper = generate_wrapper(state, lightrec_cp_cb, false);
-       if (!state->cp_wrapper)
-               goto err_free_rfe_wrapper;
-
-       state->syscall_wrapper = generate_wrapper(state, lightrec_syscall_cb,
-                                                 false);
-       if (!state->syscall_wrapper)
-               goto err_free_cp_wrapper;
-
-       state->break_wrapper = generate_wrapper(state, lightrec_break_cb,
-                                               false);
-       if (!state->break_wrapper)
-               goto err_free_syscall_wrapper;
-
-       state->rw_generic_func = state->rw_generic_wrapper->function;
-       state->rw_func = state->rw_wrapper->function;
-       state->mfc_func = state->mfc_wrapper->function;
-       state->mtc_func = state->mtc_wrapper->function;
-       state->rfe_func = state->rfe_wrapper->function;
-       state->cp_func = state->cp_wrapper->function;
-       state->syscall_func = state->syscall_wrapper->function;
-       state->break_func = state->break_wrapper->function;
+       state->c_wrapper = state->c_wrapper_block->function;
+
+       state->c_wrappers[C_WRAPPER_RW] = lightrec_rw_cb;
+       state->c_wrappers[C_WRAPPER_RW_GENERIC] = lightrec_rw_generic_cb;
+       state->c_wrappers[C_WRAPPER_MFC] = lightrec_mfc_cb;
+       state->c_wrappers[C_WRAPPER_MTC] = lightrec_mtc_cb;
+       state->c_wrappers[C_WRAPPER_CP] = lightrec_cp;
+       state->c_wrappers[C_WRAPPER_SYSCALL] = lightrec_syscall_cb;
+       state->c_wrappers[C_WRAPPER_BREAK] = lightrec_break_cb;
 
        map = &state->maps[PSX_MAP_BIOS];
        state->offset_bios = (uintptr_t)map->address - map->pc;
@@ -1279,32 +1509,27 @@ struct lightrec_state * lightrec_init(char *argv0,
            state->maps[PSX_MAP_MIRROR3].address == map->address + 0x600000)
                state->mirrors_mapped = true;
 
+       if (state->offset_bios == 0 &&
+           state->offset_scratch == 0 &&
+           state->offset_ram == 0 &&
+           state->mirrors_mapped) {
+               pr_info("Memory map is perfect. Emitted code will be best.\n");
+       } else {
+               pr_info("Memory map is sub-par. Emitted code will be slow.\n");
+       }
+
        return state;
 
-err_free_syscall_wrapper:
-       lightrec_free_block(state->syscall_wrapper);
-err_free_cp_wrapper:
-       lightrec_free_block(state->cp_wrapper);
-err_free_rfe_wrapper:
-       lightrec_free_block(state->rfe_wrapper);
-err_free_mtc_wrapper:
-       lightrec_free_block(state->mtc_wrapper);
-err_free_mfc_wrapper:
-       lightrec_free_block(state->mfc_wrapper);
-err_free_rw_wrapper:
-       lightrec_free_block(state->rw_wrapper);
-err_free_generic_rw_wrapper:
-       lightrec_free_block(state->rw_generic_wrapper);
 err_free_dispatcher:
-       lightrec_free_block(state->dispatcher);
+       lightrec_free_block(state, state->dispatcher);
 err_free_reaper:
        if (ENABLE_THREADED_COMPILER)
                lightrec_reaper_destroy(state->reaper);
 err_free_recompiler:
        if (ENABLE_THREADED_COMPILER)
                lightrec_free_recompiler(state->rec);
-err_free_reg_cache:
-       lightrec_free_regcache(state->reg_cache);
+       else
+               lightrec_free_cstate(state->cstate);
 err_free_block_cache:
        lightrec_free_block_cache(state->block_cache);
 err_free_tinymm:
@@ -1322,22 +1547,20 @@ err_finish_jit:
 
 void lightrec_destroy(struct lightrec_state *state)
 {
+       /* Force a print info on destroy*/
+       state->current_cycle = ~state->current_cycle;
+       lightrec_print_info(state);
+
        if (ENABLE_THREADED_COMPILER) {
                lightrec_free_recompiler(state->rec);
                lightrec_reaper_destroy(state->reaper);
+       } else {
+               lightrec_free_cstate(state->cstate);
        }
 
-       lightrec_free_regcache(state->reg_cache);
        lightrec_free_block_cache(state->block_cache);
-       lightrec_free_block(state->dispatcher);
-       lightrec_free_block(state->rw_generic_wrapper);
-       lightrec_free_block(state->rw_wrapper);
-       lightrec_free_block(state->mfc_wrapper);
-       lightrec_free_block(state->mtc_wrapper);
-       lightrec_free_block(state->rfe_wrapper);
-       lightrec_free_block(state->cp_wrapper);
-       lightrec_free_block(state->syscall_wrapper);
-       lightrec_free_block(state->break_wrapper);
+       lightrec_free_block(state, state->dispatcher);
+       lightrec_free_block(state, state->c_wrapper_block);
        finish_jit();
 
 #if ENABLE_TINYMM
@@ -1351,22 +1574,16 @@ void lightrec_destroy(struct lightrec_state *state)
 void lightrec_invalidate(struct lightrec_state *state, u32 addr, u32 len)
 {
        u32 kaddr = kunseg(addr & ~0x3);
-       const struct lightrec_mem_map *map = lightrec_get_map(state, kaddr);
+       const struct lightrec_mem_map *map = lightrec_get_map(state, NULL, kaddr);
 
        if (map) {
-               while (map->mirror_of)
-                       map = map->mirror_of;
-
                if (map != &state->maps[PSX_MAP_KERNEL_USER_RAM])
                        return;
 
                /* Handle mirrors */
                kaddr &= (state->maps[PSX_MAP_KERNEL_USER_RAM].length - 1);
 
-               for (; len > 4; len -= 4, kaddr += 4)
-                       lightrec_invalidate_map(state, map, kaddr);
-
-               lightrec_invalidate_map(state, map, kaddr);
+               lightrec_invalidate_map(state, map, kaddr, len);
        }
 }
 
@@ -1396,16 +1613,6 @@ u32 lightrec_exit_flags(struct lightrec_state *state)
        return state->exit_flags;
 }
 
-void lightrec_dump_registers(struct lightrec_state *state, u32 regs[34])
-{
-       memcpy(regs, state->native_reg_cache, sizeof(state->native_reg_cache));
-}
-
-void lightrec_restore_registers(struct lightrec_state *state, u32 regs[34])
-{
-       memcpy(state->native_reg_cache, regs, sizeof(state->native_reg_cache));
-}
-
 u32 lightrec_current_cycle_count(const struct lightrec_state *state)
 {
        return state->current_cycle;
@@ -1428,3 +1635,8 @@ void lightrec_set_target_cycle_count(struct lightrec_state *state, u32 cycles)
                state->target_cycle = cycles;
        }
 }
+
+struct lightrec_registers * lightrec_get_registers(struct lightrec_state *state)
+{
+       return &state->regs;
+}
index d0793c0..e418c70 100644 (file)
@@ -1,15 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
 /*
- * Copyright (C) 2016-2020 Paul Cercueil <paul@crapouillou.net>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
+ * Copyright (C) 2016-2021 Paul Cercueil <paul@crapouillou.net>
  */
 
 #ifndef __LIGHTREC_H__
@@ -52,9 +43,9 @@ struct lightrec_mem_map;
 
 /* Exit flags */
 #define LIGHTREC_EXIT_NORMAL   (0)
-#define LIGHTREC_EXIT_SYSCALL  (1 << 0)
+#define LIGHTREC_EXIT_CHECK_INTERRUPT  (1 << 0)
 #define LIGHTREC_EXIT_BREAK    (1 << 1)
-#define LIGHTREC_EXIT_CHECK_INTERRUPT  (1 << 2)
+#define LIGHTREC_EXIT_SYSCALL  (1 << 2)
 #define LIGHTREC_EXIT_SEGFAULT (1 << 3)
 
 enum psx_map {
@@ -69,14 +60,6 @@ enum psx_map {
        PSX_MAP_MIRROR3,
 };
 
-enum mem_type {
-       MEM_FOR_CODE,
-       MEM_FOR_MIPS_CODE,
-       MEM_FOR_IR,
-       MEM_FOR_LIGHTREC,
-       MEM_TYPE_END,
-};
-
 struct lightrec_mem_map_ops {
        void (*sb)(struct lightrec_state *, u32 opcode,
                   void *host, u32 addr, u8 data);
@@ -97,17 +80,16 @@ struct lightrec_mem_map {
        const struct lightrec_mem_map *mirror_of;
 };
 
-struct lightrec_cop_ops {
-       u32 (*mfc)(struct lightrec_state *state, u32 op, u8 reg);
-       u32 (*cfc)(struct lightrec_state *state, u32 op, u8 reg);
-       void (*mtc)(struct lightrec_state *state, u32 op, u8 reg, u32 value);
-       void (*ctc)(struct lightrec_state *state, u32 op, u8 reg, u32 value);
-       void (*op)(struct lightrec_state *state, u32 op);
+struct lightrec_ops {
+       void (*cop2_op)(struct lightrec_state *state, u32 op);
+       void (*enable_ram)(struct lightrec_state *state, _Bool enable);
 };
 
-struct lightrec_ops {
-       struct lightrec_cop_ops cop0_ops;
-       struct lightrec_cop_ops cop2_ops;
+struct lightrec_registers {
+       u32 gpr[34];
+       u32 cp0[32];
+       u32 cp2d[32];
+       u32 cp2c[32];
 };
 
 __api struct lightrec_state *lightrec_init(char *argv0,
@@ -130,19 +112,13 @@ __api void lightrec_set_invalidate_mode(struct lightrec_state *state,
 __api void lightrec_set_exit_flags(struct lightrec_state *state, u32 flags);
 __api u32 lightrec_exit_flags(struct lightrec_state *state);
 
-__api void lightrec_dump_registers(struct lightrec_state *state, u32 regs[34]);
-__api void lightrec_restore_registers(struct lightrec_state *state,
-                                     u32 regs[34]);
+__api struct lightrec_registers * lightrec_get_registers(struct lightrec_state *state);
 
 __api u32 lightrec_current_cycle_count(const struct lightrec_state *state);
 __api void lightrec_reset_cycle_count(struct lightrec_state *state, u32 cycles);
 __api void lightrec_set_target_cycle_count(struct lightrec_state *state,
                                           u32 cycles);
 
-__api unsigned int lightrec_get_mem_usage(enum mem_type type);
-__api unsigned int lightrec_get_total_mem_usage(void);
-__api float lightrec_get_average_ipi(void);
-
 #ifdef __cplusplus
 };
 #endif
index 2e6b99b..d39b669 100644 (file)
@@ -1,18 +1,9 @@
+// SPDX-License-Identifier: LGPL-2.1-or-later
 /*
- * Copyright (C) 2019-2020 Paul Cercueil <paul@crapouillou.net>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
+ * Copyright (C) 2019-2021 Paul Cercueil <paul@crapouillou.net>
  */
 
-#include "config.h"
+#include "lightrec-config.h"
 #include "lightrec-private.h"
 #include "memmanager.h"
 
index bd5028d..b14749f 100644 (file)
@@ -1,15 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
 /*
- * Copyright (C) 2019-2020 Paul Cercueil <paul@crapouillou.net>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
+ * Copyright (C) 2019-2021 Paul Cercueil <paul@crapouillou.net>
  */
 
 #ifndef __MEMMANAGER_H__
@@ -17,6 +8,14 @@
 
 #include "lightrec.h"
 
+enum mem_type {
+       MEM_FOR_CODE,
+       MEM_FOR_MIPS_CODE,
+       MEM_FOR_IR,
+       MEM_FOR_LIGHTREC,
+       MEM_TYPE_END,
+};
+
 void * lightrec_malloc(struct lightrec_state *state,
                       enum mem_type type, unsigned int len);
 void * lightrec_calloc(struct lightrec_state *state,
@@ -27,4 +26,8 @@ void lightrec_free(struct lightrec_state *state,
 void lightrec_register(enum mem_type type, unsigned int len);
 void lightrec_unregister(enum mem_type type, unsigned int len);
 
+unsigned int lightrec_get_mem_usage(enum mem_type type);
+unsigned int lightrec_get_total_mem_usage(void);
+float lightrec_get_average_ipi(void);
+
 #endif /* __MEMMANAGER_H__ */
index cf431f2..98a26f6 100644 (file)
@@ -1,17 +1,9 @@
+// SPDX-License-Identifier: LGPL-2.1-or-later
 /*
- * Copyright (C) 2014-2020 Paul Cercueil <paul@crapouillou.net>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
+ * Copyright (C) 2014-2021 Paul Cercueil <paul@crapouillou.net>
  */
 
+#include "lightrec-config.h"
 #include "disassembler.h"
 #include "lightrec.h"
 #include "memmanager.h"
 #include <errno.h>
 #include <stdbool.h>
 #include <stdlib.h>
+#include <string.h>
+
+#define IF_OPT(opt, ptr) ((opt) ? (ptr) : NULL)
 
 struct optimizer_list {
        void (**optimizers)(struct opcode *);
        unsigned int nb_optimizers;
 };
 
-bool opcode_reads_register(union code op, u8 reg)
+static bool is_nop(union code op);
+
+bool is_unconditional_jump(union code c)
+{
+       switch (c.i.op) {
+       case OP_SPECIAL:
+               return c.r.op == OP_SPECIAL_JR || c.r.op == OP_SPECIAL_JALR;
+       case OP_J:
+       case OP_JAL:
+               return true;
+       case OP_BEQ:
+       case OP_BLEZ:
+               return c.i.rs == c.i.rt;
+       case OP_REGIMM:
+               return (c.r.rt == OP_REGIMM_BGEZ ||
+                       c.r.rt == OP_REGIMM_BGEZAL) && c.i.rs == 0;
+       default:
+               return false;
+       }
+}
+
+bool is_syscall(union code c)
+{
+       return (c.i.op == OP_SPECIAL && c.r.op == OP_SPECIAL_SYSCALL) ||
+               (c.i.op == OP_CP0 && (c.r.rs == OP_CP0_MTC0 ||
+                                       c.r.rs == OP_CP0_CTC0) &&
+                (c.r.rd == 12 || c.r.rd == 13));
+}
+
+static u64 opcode_read_mask(union code op)
 {
        switch (op.i.op) {
        case OP_SPECIAL:
                switch (op.r.op) {
                case OP_SPECIAL_SYSCALL:
                case OP_SPECIAL_BREAK:
-                       return false;
+                       return 0;
                case OP_SPECIAL_JR:
                case OP_SPECIAL_JALR:
                case OP_SPECIAL_MTHI:
                case OP_SPECIAL_MTLO:
-                       return op.r.rs == reg;
+                       return BIT(op.r.rs);
                case OP_SPECIAL_MFHI:
-                       return reg == REG_HI;
+                       return BIT(REG_HI);
                case OP_SPECIAL_MFLO:
-                       return reg == REG_LO;
+                       return BIT(REG_LO);
                case OP_SPECIAL_SLL:
                case OP_SPECIAL_SRL:
                case OP_SPECIAL_SRA:
-                       return op.r.rt == reg;
+                       return BIT(op.r.rt);
                default:
-                       return op.r.rs == reg || op.r.rt == reg;
+                       return BIT(op.r.rs) | BIT(op.r.rt);
                }
        case OP_CP0:
                switch (op.r.rs) {
                case OP_CP0_MTC0:
                case OP_CP0_CTC0:
-                       return op.r.rt == reg;
+                       return BIT(op.r.rt);
                default:
-                       return false;
+                       return 0;
                }
        case OP_CP2:
                if (op.r.op == OP_CP2_BASIC) {
                        switch (op.r.rs) {
                        case OP_CP2_BASIC_MTC2:
                        case OP_CP2_BASIC_CTC2:
-                               return op.r.rt == reg;
+                               return BIT(op.r.rt);
                        default:
-                               return false;
+                               break;
                        }
-               } else {
-                       return false;
                }
+               return 0;
        case OP_J:
        case OP_JAL:
        case OP_LUI:
-               return false;
+               return 0;
        case OP_BEQ:
        case OP_BNE:
        case OP_LWL:
@@ -84,33 +107,45 @@ bool opcode_reads_register(union code op, u8 reg)
        case OP_SWL:
        case OP_SW:
        case OP_SWR:
-               return op.i.rs == reg || op.i.rt == reg;
+               return BIT(op.i.rs) | BIT(op.i.rt);
        default:
-               return op.i.rs == reg;
+               return BIT(op.i.rs);
        }
 }
 
-bool opcode_writes_register(union code op, u8 reg)
+static u64 opcode_write_mask(union code op)
 {
+       u64 flags;
+
        switch (op.i.op) {
        case OP_SPECIAL:
                switch (op.r.op) {
                case OP_SPECIAL_JR:
-               case OP_SPECIAL_JALR:
                case OP_SPECIAL_SYSCALL:
                case OP_SPECIAL_BREAK:
-                       return false;
+                       return 0;
                case OP_SPECIAL_MULT:
                case OP_SPECIAL_MULTU:
                case OP_SPECIAL_DIV:
                case OP_SPECIAL_DIVU:
-                       return reg == REG_LO || reg == REG_HI;
+                       if (!OPT_FLAG_MULT_DIV)
+                               return BIT(REG_LO) | BIT(REG_HI);
+
+                       if (op.r.rd)
+                               flags = BIT(op.r.rd);
+                       else
+                               flags = BIT(REG_LO);
+                       if (op.r.imm)
+                               flags |= BIT(op.r.imm);
+                       else
+                               flags |= BIT(REG_HI);
+                       return flags;
                case OP_SPECIAL_MTHI:
-                       return reg == REG_HI;
+                       return BIT(REG_HI);
                case OP_SPECIAL_MTLO:
-                       return reg == REG_LO;
+                       return BIT(REG_LO);
                default:
-                       return op.r.rd == reg;
+                       return BIT(op.r.rd);
                }
        case OP_ADDI:
        case OP_ADDIU:
@@ -127,34 +162,199 @@ bool opcode_writes_register(union code op, u8 reg)
        case OP_LBU:
        case OP_LHU:
        case OP_LWR:
-               return op.i.rt == reg;
+               return BIT(op.i.rt);
+       case OP_JAL:
+               return BIT(31);
        case OP_CP0:
                switch (op.r.rs) {
                case OP_CP0_MFC0:
                case OP_CP0_CFC0:
-                       return op.i.rt == reg;
+                       return BIT(op.i.rt);
                default:
-                       return false;
+                       return 0;
                }
        case OP_CP2:
                if (op.r.op == OP_CP2_BASIC) {
                        switch (op.r.rs) {
                        case OP_CP2_BASIC_MFC2:
                        case OP_CP2_BASIC_CFC2:
-                               return op.i.rt == reg;
+                               return BIT(op.i.rt);
                        default:
-                               return false;
+                               break;
                        }
-               } else {
-                       return false;
+               }
+               return 0;
+       case OP_REGIMM:
+               switch (op.r.rt) {
+               case OP_REGIMM_BLTZAL:
+               case OP_REGIMM_BGEZAL:
+                       return BIT(31);
+               default:
+                       return 0;
                }
        case OP_META_MOV:
-               return op.r.rd == reg;
+               return BIT(op.r.rd);
        default:
+               return 0;
+       }
+}
+
+bool opcode_reads_register(union code op, u8 reg)
+{
+       return opcode_read_mask(op) & BIT(reg);
+}
+
+bool opcode_writes_register(union code op, u8 reg)
+{
+       return opcode_write_mask(op) & BIT(reg);
+}
+
+static int find_prev_writer(const struct opcode *list, unsigned int offset, u8 reg)
+{
+       union code c;
+       unsigned int i;
+
+       if (list[offset].flags & LIGHTREC_SYNC)
+               return -1;
+
+       for (i = offset; i > 0; i--) {
+               c = list[i - 1].c;
+
+               if (opcode_writes_register(c, reg)) {
+                       if (i > 1 && has_delay_slot(list[i - 2].c))
+                               break;
+
+                       return i - 1;
+               }
+
+               if ((list[i - 1].flags & LIGHTREC_SYNC) ||
+                   has_delay_slot(c) ||
+                   opcode_reads_register(c, reg))
+                       break;
+       }
+
+       return -1;
+}
+
+static int find_next_reader(const struct opcode *list, unsigned int offset, u8 reg)
+{
+       unsigned int i;
+       union code c;
+
+       if (list[offset].flags & LIGHTREC_SYNC)
+               return -1;
+
+       for (i = offset; ; i++) {
+               c = list[i].c;
+
+               if (opcode_reads_register(c, reg)) {
+                       if (i > 0 && has_delay_slot(list[i - 1].c))
+                               break;
+
+                       return i;
+               }
+
+               if ((list[i].flags & LIGHTREC_SYNC) ||
+                   has_delay_slot(c) || opcode_writes_register(c, reg))
+                       break;
+       }
+
+       return -1;
+}
+
+static bool reg_is_dead(const struct opcode *list, unsigned int offset, u8 reg)
+{
+       unsigned int i;
+
+       if (list[offset].flags & LIGHTREC_SYNC)
                return false;
+
+       for (i = offset + 1; ; i++) {
+               if (opcode_reads_register(list[i].c, reg))
+                       return false;
+
+               if (opcode_writes_register(list[i].c, reg))
+                       return true;
+
+               if (has_delay_slot(list[i].c)) {
+                       if (list[i].flags & LIGHTREC_NO_DS)
+                               return false;
+
+                       return opcode_writes_register(list[i + 1].c, reg);
+               }
        }
 }
 
+static bool reg_is_read(const struct opcode *list,
+                       unsigned int a, unsigned int b, u8 reg)
+{
+       /* Return true if reg is read in one of the opcodes of the interval
+        * [a, b[ */
+       for (; a < b; a++) {
+               if (!is_nop(list[a].c) && opcode_reads_register(list[a].c, reg))
+                       return true;
+       }
+
+       return false;
+}
+
+static bool reg_is_written(const struct opcode *list,
+                          unsigned int a, unsigned int b, u8 reg)
+{
+       /* Return true if reg is written in one of the opcodes of the interval
+        * [a, b[ */
+
+       for (; a < b; a++) {
+               if (!is_nop(list[a].c) && opcode_writes_register(list[a].c, reg))
+                       return true;
+       }
+
+       return false;
+}
+
+static bool reg_is_read_or_written(const struct opcode *list,
+                                  unsigned int a, unsigned int b, u8 reg)
+{
+       return reg_is_read(list, a, b, reg) || reg_is_written(list, a, b, reg);
+}
+
+static bool opcode_is_load(union code op)
+{
+       switch (op.i.op) {
+       case OP_LB:
+       case OP_LH:
+       case OP_LWL:
+       case OP_LW:
+       case OP_LBU:
+       case OP_LHU:
+       case OP_LWR:
+       case OP_LWC2:
+               return true;
+       default:
+               return false;
+       }
+}
+
+static bool opcode_is_store(union code op)
+{
+       switch (op.i.op) {
+       case OP_SB:
+       case OP_SH:
+       case OP_SW:
+       case OP_SWL:
+       case OP_SWR:
+       case OP_SWC2:
+               return true;
+       default:
+               return false;
+       }
+}
+
+bool opcode_is_io(union code op)
+{
+       return opcode_is_load(op) || opcode_is_store(op);
+}
+
 /* TODO: Complete */
 static bool is_nop(union code op)
 {
@@ -196,6 +396,9 @@ static bool is_nop(union code op)
                case OP_SPECIAL_SRA:
                case OP_SPECIAL_SRL:
                        return op.r.rd == op.r.rt && op.r.imm == 0;
+               case OP_SPECIAL_MFHI:
+               case OP_SPECIAL_MFLO:
+                       return op.r.rd == 0;
                default:
                        return false;
                }
@@ -256,8 +459,13 @@ bool load_in_delay_slot(union code op)
        return false;
 }
 
-static u32 lightrec_propagate_consts(union code c, u32 known, u32 *v)
+static u32 lightrec_propagate_consts(const struct opcode *op, u32 known, u32 *v)
 {
+       union code c = op->c;
+
+       if (op->flags & LIGHTREC_SYNC)
+               return 0;
+
        switch (c.i.op) {
        case OP_SPECIAL:
                switch (c.r.op) {
@@ -478,77 +686,199 @@ static u32 lightrec_propagate_consts(union code c, u32 known, u32 *v)
        return known;
 }
 
-static int lightrec_add_meta(struct block *block,
-                            struct opcode *op, union code code)
+static void lightrec_optimize_sll_sra(struct opcode *list, unsigned int offset)
 {
-       struct opcode *meta;
+       struct opcode *prev, *prev2 = NULL, *curr = &list[offset];
+       struct opcode *to_change, *to_nop;
+       int idx, idx2;
 
-       meta = lightrec_malloc(block->state, MEM_FOR_IR, sizeof(*meta));
-       if (!meta)
-               return -ENOMEM;
+       if (curr->r.imm != 24 && curr->r.imm != 16)
+               return;
+
+       idx = find_prev_writer(list, offset, curr->r.rt);
+       if (idx < 0)
+               return;
+
+       prev = &list[idx];
+
+       if (prev->i.op != OP_SPECIAL || prev->r.op != OP_SPECIAL_SLL ||
+           prev->r.imm != curr->r.imm || prev->r.rd != curr->r.rt)
+               return;
 
-       meta->c = code;
-       meta->flags = 0;
+       if (prev->r.rd != prev->r.rt && curr->r.rd != curr->r.rt) {
+               /* sll rY, rX, 16
+                * ...
+                * srl rZ, rY, 16 */
 
-       if (op) {
-               meta->offset = op->offset;
-               meta->next = op->next;
-               op->next = meta;
+               if (!reg_is_dead(list, offset, curr->r.rt) ||
+                   reg_is_read_or_written(list, idx, offset, curr->r.rd))
+                       return;
+
+               /* If rY is dead after the SRL, and rZ is not used after the SLL,
+                * we can change rY to rZ */
+
+               pr_debug("Detected SLL/SRA with middle temp register\n");
+               prev->r.rd = curr->r.rd;
+               curr->r.rt = prev->r.rd;
+       }
+
+       /* We got a SLL/SRA combo. If imm #16, that's a cast to u16.
+        * If imm #24 that's a cast to u8.
+        *
+        * First of all, make sure that the target register of the SLL is not
+        * read before the SRA. */
+
+       if (prev->r.rd == prev->r.rt) {
+               /* sll rX, rX, 16
+                * ...
+                * srl rY, rX, 16 */
+               to_change = curr;
+               to_nop = prev;
+
+               /* rX is used after the SRA - we cannot convert it. */
+               if (prev->r.rd != curr->r.rd && !reg_is_dead(list, offset, prev->r.rd))
+                       return;
        } else {
-               meta->offset = 0;
-               meta->next = block->opcode_list;
-               block->opcode_list = meta;
+               /* sll rY, rX, 16
+                * ...
+                * srl rY, rY, 16 */
+               to_change = prev;
+               to_nop = curr;
        }
 
-       return 0;
-}
+       idx2 = find_prev_writer(list, idx, prev->r.rt);
+       if (idx2 >= 0) {
+               /* Note that PSX games sometimes do casts after
+                * a LHU or LBU; in this case we can change the
+                * load opcode to a LH or LB, and the cast can
+                * be changed to a MOV or a simple NOP. */
+
+               prev2 = &list[idx2];
+
+               if (curr->r.rd != prev2->i.rt &&
+                   !reg_is_dead(list, offset, prev2->i.rt))
+                       prev2 = NULL;
+               else if (curr->r.imm == 16 && prev2->i.op == OP_LHU)
+                       prev2->i.op = OP_LH;
+               else if (curr->r.imm == 24 && prev2->i.op == OP_LBU)
+                       prev2->i.op = OP_LB;
+               else
+                       prev2 = NULL;
+
+               if (prev2) {
+                       if (curr->r.rd == prev2->i.rt) {
+                               to_change->opcode = 0;
+                       } else if (reg_is_dead(list, offset, prev2->i.rt) &&
+                                  !reg_is_read_or_written(list, idx2 + 1, offset, curr->r.rd)) {
+                               /* The target register of the SRA is dead after the
+                                * LBU/LHU; we can change the target register of the
+                                * LBU/LHU to the one of the SRA. */
+                               prev2->i.rt = curr->r.rd;
+                               to_change->opcode = 0;
+                       } else {
+                               to_change->i.op = OP_META_MOV;
+                               to_change->r.rd = curr->r.rd;
+                               to_change->r.rs = prev2->i.rt;
+                       }
 
-static int lightrec_add_sync(struct block *block, struct opcode *prev)
-{
-       return lightrec_add_meta(block, prev, (union code){
-                                .j.op = OP_META_SYNC,
-                                });
+                       if (to_nop->r.imm == 24)
+                               pr_debug("Convert LBU+SLL+SRA to LB\n");
+                       else
+                               pr_debug("Convert LHU+SLL+SRA to LH\n");
+               }
+       }
+
+       if (!prev2) {
+               pr_debug("Convert SLL/SRA #%u to EXT%c\n",
+                        prev->r.imm,
+                        prev->r.imm == 24 ? 'C' : 'S');
+
+               if (to_change == prev) {
+                       to_change->i.rs = prev->r.rt;
+                       to_change->i.rt = curr->r.rd;
+               } else {
+                       to_change->i.rt = curr->r.rd;
+                       to_change->i.rs = prev->r.rt;
+               }
+
+               if (to_nop->r.imm == 24)
+                       to_change->i.op = OP_META_EXTC;
+               else
+                       to_change->i.op = OP_META_EXTS;
+       }
+
+       to_nop->opcode = 0;
 }
 
-static int lightrec_transform_ops(struct block *block)
+static int lightrec_transform_ops(struct lightrec_state *state, struct block *block)
 {
        struct opcode *list = block->opcode_list;
+       struct opcode *op;
+       u32 known = BIT(0);
+       u32 values[32] = { 0 };
+       unsigned int i;
+       int reader;
 
-       for (; list; list = list->next) {
+       for (i = 0; i < block->nb_ops; i++) {
+               op = &list[i];
 
                /* Transform all opcodes detected as useless to real NOPs
                 * (0x0: SLL r0, r0, #0) */
-               if (list->opcode != 0 && is_nop(list->c)) {
+               if (op->opcode != 0 && is_nop(op->c)) {
                        pr_debug("Converting useless opcode 0x%08x to NOP\n",
-                                       list->opcode);
-                       list->opcode = 0x0;
+                                       op->opcode);
+                       op->opcode = 0x0;
                }
 
-               if (!list->opcode)
+               if (!op->opcode)
                        continue;
 
-               switch (list->i.op) {
-               /* Transform BEQ / BNE to BEQZ / BNEZ meta-opcodes if one of the
-                * two registers is zero. */
+               /* Register $zero is always, well, zero */
+               known |= BIT(0);
+               values[0] = 0;
+
+               switch (op->i.op) {
                case OP_BEQ:
-                       if ((list->i.rs == 0) ^ (list->i.rt == 0)) {
-                               list->i.op = OP_META_BEQZ;
-                               if (list->i.rs == 0) {
-                                       list->i.rs = list->i.rt;
-                                       list->i.rt = 0;
-                               }
-                       } else if (list->i.rs == list->i.rt) {
-                               list->i.rs = 0;
-                               list->i.rt = 0;
+                       if (op->i.rs == op->i.rt) {
+                               op->i.rs = 0;
+                               op->i.rt = 0;
+                       } else if (op->i.rs == 0) {
+                               op->i.rs = op->i.rt;
+                               op->i.rt = 0;
                        }
                        break;
+
                case OP_BNE:
-                       if (list->i.rs == 0) {
-                               list->i.op = OP_META_BNEZ;
-                               list->i.rs = list->i.rt;
-                               list->i.rt = 0;
-                       } else if (list->i.rt == 0) {
-                               list->i.op = OP_META_BNEZ;
+                       if (op->i.rs == 0) {
+                               op->i.rs = op->i.rt;
+                               op->i.rt = 0;
+                       }
+                       break;
+
+               case OP_LUI:
+                       if (!(op->flags & LIGHTREC_SYNC) &&
+                           (known & BIT(op->i.rt)) &&
+                           values[op->i.rt] == op->i.imm << 16) {
+                               pr_debug("Converting duplicated LUI to NOP\n");
+                               op->opcode = 0x0;
+                       }
+
+                       if (op->i.imm != 0 || op->i.rt == 0)
+                               break;
+
+                       reader = find_next_reader(list, i + 1, op->i.rt);
+                       if (reader > 0 &&
+                           (opcode_writes_register(list[reader].c, op->i.rt) ||
+                            reg_is_dead(list, reader, op->i.rt))) {
+
+                               pr_debug("Removing useless LUI 0x0\n");
+
+                               if (list[reader].i.rs == op->i.rt)
+                                       list[reader].i.rs = 0;
+                               if (list[reader].i.op == OP_SPECIAL &&
+                                   list[reader].i.rt == op->i.rt)
+                                       list[reader].i.rt = 0;
+                               op->opcode = 0x0;
                        }
                        break;
 
@@ -557,36 +887,45 @@ static int lightrec_transform_ops(struct block *block)
                case OP_ORI:
                case OP_ADDI:
                case OP_ADDIU:
-                       if (list->i.imm == 0) {
+                       if (op->i.imm == 0) {
                                pr_debug("Convert ORI/ADDI/ADDIU #0 to MOV\n");
-                               list->i.op = OP_META_MOV;
-                               list->r.rd = list->i.rt;
+                               op->i.op = OP_META_MOV;
+                               op->r.rd = op->i.rt;
                        }
                        break;
                case OP_SPECIAL:
-                       switch (list->r.op) {
-                       case OP_SPECIAL_SLL:
+                       switch (op->r.op) {
                        case OP_SPECIAL_SRA:
+                               if (op->r.imm == 0) {
+                                       pr_debug("Convert SRA #0 to MOV\n");
+                                       op->i.op = OP_META_MOV;
+                                       op->r.rs = op->r.rt;
+                                       break;
+                               }
+
+                               lightrec_optimize_sll_sra(block->opcode_list, i);
+                               break;
+                       case OP_SPECIAL_SLL:
                        case OP_SPECIAL_SRL:
-                               if (list->r.imm == 0) {
-                                       pr_debug("Convert SLL/SRL/SRA #0 to MOV\n");
-                                       list->i.op = OP_META_MOV;
-                                       list->r.rs = list->r.rt;
+                               if (op->r.imm == 0) {
+                                       pr_debug("Convert SLL/SRL #0 to MOV\n");
+                                       op->i.op = OP_META_MOV;
+                                       op->r.rs = op->r.rt;
                                }
                                break;
                        case OP_SPECIAL_OR:
                        case OP_SPECIAL_ADD:
                        case OP_SPECIAL_ADDU:
-                               if (list->r.rs == 0) {
+                               if (op->r.rs == 0) {
                                        pr_debug("Convert OR/ADD $zero to MOV\n");
-                                       list->i.op = OP_META_MOV;
-                                       list->r.rs = list->r.rt;
+                                       op->i.op = OP_META_MOV;
+                                       op->r.rs = op->r.rt;
                                }
                        case OP_SPECIAL_SUB: /* fall-through */
                        case OP_SPECIAL_SUBU:
-                               if (list->r.rt == 0) {
+                               if (op->r.rt == 0) {
                                        pr_debug("Convert OR/ADD/SUB $zero to MOV\n");
-                                       list->i.op = OP_META_MOV;
+                                       op->i.op = OP_META_MOV;
                                }
                        default: /* fall-through */
                                break;
@@ -594,27 +933,37 @@ static int lightrec_transform_ops(struct block *block)
                default: /* fall-through */
                        break;
                }
+
+               known = lightrec_propagate_consts(op, known, values);
        }
 
        return 0;
 }
 
-static int lightrec_switch_delay_slots(struct block *block)
+static int lightrec_switch_delay_slots(struct lightrec_state *state, struct block *block)
 {
-       struct opcode *list, *prev;
+       struct opcode *list, *next = &block->opcode_list[0];
+       unsigned int i;
+       union code op, next_op;
        u8 flags;
 
-       for (list = block->opcode_list, prev = NULL; list->next;
-            prev = list, list = list->next) {
-               union code op = list->c;
-               union code next_op = list->next->c;
+       for (i = 0; i < block->nb_ops - 1; i++) {
+               list = next;
+               next = &block->opcode_list[i + 1];
+               next_op = next->c;
+               op = list->c;
 
                if (!has_delay_slot(op) ||
                    list->flags & (LIGHTREC_NO_DS | LIGHTREC_EMULATE_BRANCH) ||
-                   op.opcode == 0)
+                   op.opcode == 0 || next_op.opcode == 0)
+                       continue;
+
+               if (i && has_delay_slot(block->opcode_list[i - 1].c) &&
+                   !(block->opcode_list[i - 1].flags & LIGHTREC_NO_DS))
                        continue;
 
-               if (prev && has_delay_slot(prev->c))
+               if ((list->flags & LIGHTREC_SYNC) ||
+                   (next->flags & LIGHTREC_SYNC))
                        continue;
 
                switch (list->i.op) {
@@ -644,8 +993,6 @@ static int lightrec_switch_delay_slots(struct block *block)
                                continue;
                case OP_BLEZ: /* fall-through */
                case OP_BGTZ:
-               case OP_META_BEQZ:
-               case OP_META_BNEZ:
                        if (op.i.rs && opcode_writes_register(next_op, op.i.rs))
                                continue;
                        break;
@@ -668,27 +1015,60 @@ static int lightrec_switch_delay_slots(struct block *block)
                }
 
                pr_debug("Swap branch and delay slot opcodes "
-                        "at offsets 0x%x / 0x%x\n", list->offset << 2,
-                        list->next->offset << 2);
+                        "at offsets 0x%x / 0x%x\n",
+                        i << 2, (i + 1) << 2);
 
-               flags = list->next->flags;
+               flags = next->flags;
                list->c = next_op;
-               list->next->c = op;
-               list->next->flags = list->flags | LIGHTREC_NO_DS;
+               next->c = op;
+               next->flags = list->flags | LIGHTREC_NO_DS;
                list->flags = flags | LIGHTREC_NO_DS;
-               list->offset++;
-               list->next->offset--;
        }
 
        return 0;
 }
 
-static int lightrec_detect_impossible_branches(struct block *block)
+static int shrink_opcode_list(struct lightrec_state *state, struct block *block, u16 new_size)
+{
+       struct opcode *list;
+
+       if (new_size >= block->nb_ops) {
+               pr_err("Invalid shrink size (%u vs %u)\n",
+                      new_size, block->nb_ops);
+               return -EINVAL;
+       }
+
+
+       list = lightrec_malloc(state, MEM_FOR_IR,
+                              sizeof(*list) * new_size);
+       if (!list) {
+               pr_err("Unable to allocate memory\n");
+               return -ENOMEM;
+       }
+
+       memcpy(list, block->opcode_list, sizeof(*list) * new_size);
+
+       lightrec_free_opcode_list(state, block);
+       block->opcode_list = list;
+       block->nb_ops = new_size;
+
+       pr_debug("Shrunk opcode list of block PC 0x%08x to %u opcodes\n",
+                block->pc, new_size);
+
+       return 0;
+}
+
+static int lightrec_detect_impossible_branches(struct lightrec_state *state,
+                                              struct block *block)
 {
-       struct opcode *op, *next;
+       struct opcode *op, *next = &block->opcode_list[0];
+       unsigned int i;
+       int ret = 0;
+
+       for (i = 0; i < block->nb_ops - 1; i++) {
+               op = next;
+               next = &block->opcode_list[i + 1];
 
-       for (op = block->opcode_list, next = op->next; next;
-            op = next, next = op->next) {
                if (!has_delay_slot(op->c) ||
                    (!load_in_delay_slot(next->c) &&
                     !has_delay_slot(next->c) &&
@@ -702,29 +1082,34 @@ static int lightrec_detect_impossible_branches(struct block *block)
                        continue;
                }
 
+               op->flags |= LIGHTREC_EMULATE_BRANCH;
+
                if (op == block->opcode_list) {
+                       pr_debug("First opcode of block PC 0x%08x is an impossible branch\n",
+                                block->pc);
+
                        /* If the first opcode is an 'impossible' branch, we
                         * only keep the first two opcodes of the block (the
                         * branch itself + its delay slot) */
-                       lightrec_free_opcode_list(block->state, next->next);
-                       next->next = NULL;
-                       block->nb_ops = 2;
+                       if (block->nb_ops > 2)
+                               ret = shrink_opcode_list(state, block, 2);
+                       break;
                }
-
-               op->flags |= LIGHTREC_EMULATE_BRANCH;
        }
 
-       return 0;
+       return ret;
 }
 
-static int lightrec_local_branches(struct block *block)
+static int lightrec_local_branches(struct lightrec_state *state, struct block *block)
 {
-       struct opcode *list, *target, *prev;
+       struct opcode *list;
+       unsigned int i;
        s32 offset;
-       int ret;
 
-       for (list = block->opcode_list; list; list = list->next) {
-               if (list->flags & LIGHTREC_EMULATE_BRANCH)
+       for (i = 0; i < block->nb_ops; i++) {
+               list = &block->opcode_list[i];
+
+               if (should_emulate(list))
                        continue;
 
                switch (list->i.op) {
@@ -733,9 +1118,7 @@ static int lightrec_local_branches(struct block *block)
                case OP_BLEZ:
                case OP_BGTZ:
                case OP_REGIMM:
-               case OP_META_BEQZ:
-               case OP_META_BNEZ:
-                       offset = list->offset + 1 + (s16)list->i.imm;
+                       offset = i + 1 + (s16)list->i.imm;
                        if (offset >= 0 && offset < block->nb_ops)
                                break;
                default: /* fall-through */
@@ -744,37 +1127,20 @@ static int lightrec_local_branches(struct block *block)
 
                pr_debug("Found local branch to offset 0x%x\n", offset << 2);
 
-               for (target = block->opcode_list, prev = NULL;
-                    target; prev = target, target = target->next) {
-                       if (target->offset != offset ||
-                           target->j.op == OP_META_SYNC)
-                               continue;
-
-                       if (target->flags & LIGHTREC_EMULATE_BRANCH) {
-                               pr_debug("Branch target must be emulated"
-                                        " - skip\n");
-                               break;
-                       }
-
-                       if (prev && has_delay_slot(prev->c)) {
-                               pr_debug("Branch target is a delay slot"
-                                        " - skip\n");
-                               break;
-                       }
+               if (should_emulate(&block->opcode_list[offset])) {
+                       pr_debug("Branch target must be emulated - skip\n");
+                       continue;
+               }
 
-                       if (prev && prev->j.op != OP_META_SYNC) {
-                               pr_debug("Adding sync before offset "
-                                        "0x%x\n", offset << 2);
-                               ret = lightrec_add_sync(block, prev);
-                               if (ret)
-                                       return ret;
+               if (offset && has_delay_slot(block->opcode_list[offset - 1].c)) {
+                       pr_debug("Branch target is a delay slot - skip\n");
+                       continue;
+               }
 
-                               prev->next->offset = target->offset;
-                       }
+               pr_debug("Adding sync at offset 0x%x\n", offset << 2);
 
-                       list->flags |= LIGHTREC_LOCAL_BRANCH;
-                       break;
-               }
+               block->opcode_list[offset].flags |= LIGHTREC_SYNC;
+               list->flags |= LIGHTREC_LOCAL_BRANCH;
        }
 
        return 0;
@@ -798,77 +1164,80 @@ bool has_delay_slot(union code op)
        case OP_BLEZ:
        case OP_BGTZ:
        case OP_REGIMM:
-       case OP_META_BEQZ:
-       case OP_META_BNEZ:
                return true;
        default:
                return false;
        }
 }
 
-static int lightrec_add_unload(struct block *block, struct opcode *op, u8 reg)
+bool should_emulate(const struct opcode *list)
 {
-       return lightrec_add_meta(block, op, (union code){
-                                .i.op = OP_META_REG_UNLOAD,
-                                .i.rs = reg,
-                                });
+       return has_delay_slot(list->c) &&
+               (list->flags & LIGHTREC_EMULATE_BRANCH);
 }
 
-static int lightrec_early_unload(struct block *block)
+static void lightrec_add_unload(struct opcode *op, u8 reg)
 {
-       struct opcode *list = block->opcode_list;
-       u8 i;
+       if (op->i.op == OP_SPECIAL && reg == op->r.rd)
+               op->flags |= LIGHTREC_UNLOAD_RD;
 
-       for (i = 1; i < 34; i++) {
-               struct opcode *op, *last_r = NULL, *last_w = NULL;
-               unsigned int last_r_id = 0, last_w_id = 0, id = 0;
-               int ret;
+       if (op->i.rs == reg)
+               op->flags |= LIGHTREC_UNLOAD_RS;
+       if (op->i.rt == reg)
+               op->flags |= LIGHTREC_UNLOAD_RT;
+}
 
-               for (op = list; op->next; op = op->next, id++) {
-                       if (opcode_reads_register(op->c, i)) {
-                               last_r = op;
-                               last_r_id = id;
-                       }
+static int lightrec_early_unload(struct lightrec_state *state, struct block *block)
+{
+       unsigned int i, offset;
+       struct opcode *op;
+       u8 reg;
 
-                       if (opcode_writes_register(op->c, i)) {
-                               last_w = op;
-                               last_w_id = id;
-                       }
+       for (reg = 1; reg < 34; reg++) {
+               int last_r_id = -1, last_w_id = -1;
+
+               for (i = 0; i < block->nb_ops; i++) {
+                       union code c = block->opcode_list[i].c;
+
+                       if (opcode_reads_register(c, reg))
+                               last_r_id = i;
+                       if (opcode_writes_register(c, reg))
+                               last_w_id = i;
                }
 
-               if (last_w_id > last_r_id) {
-                       if (has_delay_slot(last_w->c) &&
-                           !(last_w->flags & LIGHTREC_NO_DS))
-                               last_w = last_w->next;
+               if (last_w_id > last_r_id)
+                       offset = (unsigned int)last_w_id;
+               else if (last_r_id >= 0)
+                       offset = (unsigned int)last_r_id;
+               else
+                       continue;
 
-                       if (last_w->next) {
-                               ret = lightrec_add_unload(block, last_w, i);
-                               if (ret)
-                                       return ret;
-                       }
-               } else if (last_r) {
-                       if (has_delay_slot(last_r->c) &&
-                           !(last_r->flags & LIGHTREC_NO_DS))
-                               last_r = last_r->next;
+               op = &block->opcode_list[offset];
 
-                       if (last_r->next) {
-                               ret = lightrec_add_unload(block, last_r, i);
-                               if (ret)
-                                       return ret;
-                       }
-               }
+               if (has_delay_slot(op->c) && (op->flags & LIGHTREC_NO_DS))
+                       offset++;
+
+               if (offset == block->nb_ops)
+                       continue;
+
+               lightrec_add_unload(&block->opcode_list[offset], reg);
        }
 
        return 0;
 }
 
-static int lightrec_flag_stores(struct block *block)
+static int lightrec_flag_io(struct lightrec_state *state, struct block *block)
 {
+       const struct lightrec_mem_map *map;
        struct opcode *list;
        u32 known = BIT(0);
        u32 values[32] = { 0 };
+       unsigned int i;
+       u32 val;
+
+       for (i = 0; i < block->nb_ops; i++) {
+               list = &block->opcode_list[i];
 
-       for (list = block->opcode_list; list; list = list->next) {
                /* Register $zero is always, well, zero */
                known |= BIT(0);
                values[0] = 0;
@@ -877,145 +1246,464 @@ static int lightrec_flag_stores(struct block *block)
                case OP_SB:
                case OP_SH:
                case OP_SW:
-                       /* Mark all store operations that target $sp or $gp
-                        * as not requiring code invalidation. This is based
-                        * on the heuristic that stores using one of these
-                        * registers as address will never hit a code page. */
-                       if (list->i.rs >= 28 && list->i.rs <= 29 &&
-                           !block->state->maps[PSX_MAP_KERNEL_USER_RAM].ops) {
-                               pr_debug("Flaging opcode 0x%08x as not requiring invalidation\n",
-                                        list->opcode);
-                               list->flags |= LIGHTREC_NO_INVALIDATE;
-                       }
-
-                       /* Detect writes whose destination address is inside the
-                        * current block, using constant propagation. When these
-                        * occur, we mark the blocks as not compilable. */
-                       if ((known & BIT(list->i.rs)) &&
-                           kunseg(values[list->i.rs]) >= kunseg(block->pc) &&
-                           kunseg(values[list->i.rs]) < (kunseg(block->pc) +
-                                                         block->nb_ops * 4)) {
-                               pr_debug("Self-modifying block detected\n");
-                               block->flags |= BLOCK_NEVER_COMPILE;
-                               list->flags |= LIGHTREC_SMC;
+                       if (OPT_FLAG_STORES) {
+                               /* Mark all store operations that target $sp or $gp
+                                * as not requiring code invalidation. This is based
+                                * on the heuristic that stores using one of these
+                                * registers as address will never hit a code page. */
+                               if (list->i.rs >= 28 && list->i.rs <= 29 &&
+                                   !state->maps[PSX_MAP_KERNEL_USER_RAM].ops) {
+                                       pr_debug("Flaging opcode 0x%08x as not "
+                                                "requiring invalidation\n",
+                                                list->opcode);
+                                       list->flags |= LIGHTREC_NO_INVALIDATE;
+                               }
+
+                               /* Detect writes whose destination address is inside the
+                                * current block, using constant propagation. When these
+                                * occur, we mark the blocks as not compilable. */
+                               if ((known & BIT(list->i.rs)) &&
+                                   kunseg(values[list->i.rs]) >= kunseg(block->pc) &&
+                                   kunseg(values[list->i.rs]) < (kunseg(block->pc) +
+                                                                 block->nb_ops * 4)) {
+                                       pr_debug("Self-modifying block detected\n");
+                                       block->flags |= BLOCK_NEVER_COMPILE;
+                                       list->flags |= LIGHTREC_SMC;
+                               }
+                       }
+               case OP_SWL: /* fall-through */
+               case OP_SWR:
+               case OP_SWC2:
+               case OP_LB:
+               case OP_LBU:
+               case OP_LH:
+               case OP_LHU:
+               case OP_LW:
+               case OP_LWL:
+               case OP_LWR:
+               case OP_LWC2:
+                       if (OPT_FLAG_IO && (known & BIT(list->i.rs))) {
+                               val = kunseg(values[list->i.rs] + (s16) list->i.imm);
+                               map = lightrec_get_map(state, NULL, val);
+
+                               if (!map || map->ops ||
+                                   map == &state->maps[PSX_MAP_PARALLEL_PORT]) {
+                                       pr_debug("Flagging opcode %u as accessing I/O registers\n",
+                                                i);
+                                       list->flags |= LIGHTREC_HW_IO;
+                               } else {
+                                       pr_debug("Flaging opcode %u as direct memory access\n", i);
+                                       list->flags |= LIGHTREC_DIRECT_IO;
+                               }
                        }
                default: /* fall-through */
                        break;
                }
 
-               known = lightrec_propagate_consts(list->c, known, values);
+               known = lightrec_propagate_consts(list, known, values);
        }
 
        return 0;
 }
 
-static bool is_mult32(const struct block *block, const struct opcode *op)
+static u8 get_mfhi_mflo_reg(const struct block *block, u16 offset,
+                           const struct opcode *last,
+                           u32 mask, bool sync, bool mflo, bool another)
 {
-       const struct opcode *next, *last = NULL;
-       u32 offset;
+       const struct opcode *op, *next = &block->opcode_list[offset];
+       u32 old_mask;
+       u8 reg2, reg = mflo ? REG_LO : REG_HI;
+       u16 branch_offset;
+       unsigned int i;
+
+       for (i = offset; i < block->nb_ops; i++) {
+               op = next;
+               next = &block->opcode_list[i + 1];
+               old_mask = mask;
+
+               /* If any other opcode writes or reads to the register
+                * we'd use, then we cannot use it anymore. */
+               mask |= opcode_read_mask(op->c);
+               mask |= opcode_write_mask(op->c);
+
+               if (op->flags & LIGHTREC_SYNC)
+                       sync = true;
 
-       for (op = op->next; op != last; op = op->next) {
                switch (op->i.op) {
                case OP_BEQ:
                case OP_BNE:
                case OP_BLEZ:
                case OP_BGTZ:
                case OP_REGIMM:
-               case OP_META_BEQZ:
-               case OP_META_BNEZ:
                        /* TODO: handle backwards branches too */
-                       if ((op->flags & LIGHTREC_LOCAL_BRANCH) &&
+                       if (!last &&
+                           (op->flags & LIGHTREC_LOCAL_BRANCH) &&
                            (s16)op->c.i.imm >= 0) {
-                               offset = op->offset + 1 + (s16)op->c.i.imm;
-
-                               for (next = op; next->offset != offset;
-                                    next = next->next);
-
-                               if (!is_mult32(block, next))
-                                       return false;
-
-                               last = next;
-                               continue;
-                       } else {
-                               return false;
+                               branch_offset = i + 1 + (s16)op->c.i.imm
+                                       - !!(OPT_SWITCH_DELAY_SLOTS && (op->flags & LIGHTREC_NO_DS));
+
+                               reg = get_mfhi_mflo_reg(block, branch_offset, NULL,
+                                                       mask, sync, mflo, false);
+                               reg2 = get_mfhi_mflo_reg(block, offset + 1, next,
+                                                        mask, sync, mflo, false);
+                               if (reg > 0 && reg == reg2)
+                                       return reg;
+                               if (!reg && !reg2)
+                                       return 0;
                        }
+
+                       return mflo ? REG_LO : REG_HI;
                case OP_SPECIAL:
                        switch (op->r.op) {
                        case OP_SPECIAL_MULT:
                        case OP_SPECIAL_MULTU:
                        case OP_SPECIAL_DIV:
                        case OP_SPECIAL_DIVU:
+                               return 0;
                        case OP_SPECIAL_MTHI:
-                               return true;
+                               if (!mflo)
+                                       return 0;
+                               continue;
+                       case OP_SPECIAL_MTLO:
+                               if (mflo)
+                                       return 0;
+                               continue;
                        case OP_SPECIAL_JR:
-                               return op->r.rs == 31 &&
-                                       ((op->flags & LIGHTREC_NO_DS) ||
-                                        !(op->next->i.op == OP_SPECIAL &&
-                                          op->next->r.op == OP_SPECIAL_MFHI));
+                               if (op->r.rs != 31)
+                                       return reg;
+
+                               if (!sync &&
+                                   !(op->flags & LIGHTREC_NO_DS) &&
+                                   (next->i.op == OP_SPECIAL) &&
+                                   ((!mflo && next->r.op == OP_SPECIAL_MFHI) ||
+                                   (mflo && next->r.op == OP_SPECIAL_MFLO)))
+                                       return next->r.rd;
+
+                               return 0;
                        case OP_SPECIAL_JALR:
+                               return reg;
                        case OP_SPECIAL_MFHI:
-                               return false;
-                       default:
+                               if (!mflo) {
+                                       if (another)
+                                               return op->r.rd;
+                                       /* Must use REG_HI if there is another MFHI target*/
+                                       reg2 = get_mfhi_mflo_reg(block, i + 1, next,
+                                                        0, sync, mflo, true);
+                                       if (reg2 > 0 && reg2 != REG_HI)
+                                               return REG_HI;
+
+                                       if (!sync && !(old_mask & BIT(op->r.rd)))
+                                               return op->r.rd;
+                                       else
+                                               return REG_HI;
+                               }
+                               continue;
+                       case OP_SPECIAL_MFLO:
+                               if (mflo) {
+                                       if (another)
+                                               return op->r.rd;
+                                       /* Must use REG_LO if there is another MFLO target*/
+                                       reg2 = get_mfhi_mflo_reg(block, i + 1, next,
+                                                        0, sync, mflo, true);
+                                       if (reg2 > 0 && reg2 != REG_LO)
+                                               return REG_LO;
+
+                                       if (!sync && !(old_mask & BIT(op->r.rd)))
+                                               return op->r.rd;
+                                       else
+                                               return REG_LO;
+                               }
                                continue;
+                       default:
+                               break;
                        }
+
+                       /* fall-through */
                default:
                        continue;
                }
        }
 
-       return last != NULL;
+       return reg;
+}
+
+static void lightrec_replace_lo_hi(struct block *block, u16 offset,
+                                  u16 last, bool lo)
+{
+       unsigned int i;
+       u32 branch_offset;
+
+       /* This function will remove the following MFLO/MFHI. It must be called
+        * only if get_mfhi_mflo_reg() returned a non-zero value. */
+
+       for (i = offset; i < last; i++) {
+               struct opcode *op = &block->opcode_list[i];
+
+               switch (op->i.op) {
+               case OP_BEQ:
+               case OP_BNE:
+               case OP_BLEZ:
+               case OP_BGTZ:
+               case OP_REGIMM:
+                       /* TODO: handle backwards branches too */
+                       if ((op->flags & LIGHTREC_LOCAL_BRANCH) &&
+                           (s16)op->c.i.imm >= 0) {
+                               branch_offset = i + 1 + (s16)op->c.i.imm
+                                       - !!(OPT_SWITCH_DELAY_SLOTS && (op->flags & LIGHTREC_NO_DS));
+
+                               lightrec_replace_lo_hi(block, branch_offset, last, lo);
+                               lightrec_replace_lo_hi(block, i + 1, branch_offset, lo);
+                       }
+                       break;
+
+               case OP_SPECIAL:
+                       if (lo && op->r.op == OP_SPECIAL_MFLO) {
+                               pr_debug("Removing MFLO opcode at offset 0x%x\n",
+                                        i << 2);
+                               op->opcode = 0;
+                               return;
+                       } else if (!lo && op->r.op == OP_SPECIAL_MFHI) {
+                               pr_debug("Removing MFHI opcode at offset 0x%x\n",
+                                        i << 2);
+                               op->opcode = 0;
+                               return;
+                       }
+
+                       /* fall-through */
+               default:
+                       break;
+               }
+       }
 }
 
-static int lightrec_flag_mults(struct block *block)
+static int lightrec_flag_mults_divs(struct lightrec_state *state, struct block *block)
 {
-       struct opcode *list, *prev;
+       struct opcode *list;
+       u8 reg_hi, reg_lo;
+       unsigned int i;
+
+       for (i = 0; i < block->nb_ops - 1; i++) {
+               list = &block->opcode_list[i];
 
-       for (list = block->opcode_list, prev = NULL; list;
-            prev = list, list = list->next) {
                if (list->i.op != OP_SPECIAL)
                        continue;
 
                switch (list->r.op) {
                case OP_SPECIAL_MULT:
                case OP_SPECIAL_MULTU:
+               case OP_SPECIAL_DIV:
+               case OP_SPECIAL_DIVU:
                        break;
                default:
                        continue;
                }
 
-               /* Don't support MULT(U) opcodes in delay slots */
-               if (prev && has_delay_slot(prev->c))
+               /* Don't support opcodes in delay slots */
+               if ((i && has_delay_slot(block->opcode_list[i - 1].c)) ||
+                   (list->flags & LIGHTREC_NO_DS))
                        continue;
 
-               if (is_mult32(block, list)) {
-                       pr_debug("Mark MULT(U) opcode at offset 0x%x as"
-                                " 32-bit\n", list->offset << 2);
-                       list->flags |= LIGHTREC_MULT32;
+               reg_lo = get_mfhi_mflo_reg(block, i + 1, NULL, 0, false, true, false);
+               if (reg_lo == 0) {
+                       pr_debug("Mark MULT(U)/DIV(U) opcode at offset 0x%x as"
+                                " not writing LO\n", i << 2);
+                       list->flags |= LIGHTREC_NO_LO;
+               }
+
+               reg_hi = get_mfhi_mflo_reg(block, i + 1, NULL, 0, false, false, false);
+               if (reg_hi == 0) {
+                       pr_debug("Mark MULT(U)/DIV(U) opcode at offset 0x%x as"
+                                " not writing HI\n", i << 2);
+                       list->flags |= LIGHTREC_NO_HI;
+               }
+
+               if (!reg_lo && !reg_hi) {
+                       pr_debug("Both LO/HI unused in this block, they will "
+                                "probably be used in parent block - removing "
+                                "flags.\n");
+                       list->flags &= ~(LIGHTREC_NO_LO | LIGHTREC_NO_HI);
+               }
+
+               if (reg_lo > 0 && reg_lo != REG_LO) {
+                       pr_debug("Found register %s to hold LO (rs = %u, rt = %u)\n",
+                                lightrec_reg_name(reg_lo), list->r.rs, list->r.rt);
+
+                       lightrec_replace_lo_hi(block, i + 1, block->nb_ops, true);
+                       list->r.rd = reg_lo;
+               } else {
+                       list->r.rd = 0;
+               }
+
+               if (reg_hi > 0 && reg_hi != REG_HI) {
+                       pr_debug("Found register %s to hold HI (rs = %u, rt = %u)\n",
+                                lightrec_reg_name(reg_hi), list->r.rs, list->r.rt);
+
+                       lightrec_replace_lo_hi(block, i + 1, block->nb_ops, false);
+                       list->r.imm = reg_hi;
+               } else {
+                       list->r.imm = 0;
+               }
+       }
+
+       return 0;
+}
+
+static bool remove_div_sequence(struct block *block, unsigned int offset)
+{
+       struct opcode *op;
+       unsigned int i, found = 0;
+
+       /*
+        * Scan for the zero-checking sequence that GCC automatically introduced
+        * after most DIV/DIVU opcodes. This sequence checks the value of the
+        * divisor, and if zero, executes a BREAK opcode, causing the BIOS
+        * handler to crash the PS1.
+        *
+        * For DIV opcodes, this sequence additionally checks that the signed
+        * operation does not overflow.
+        *
+        * With the assumption that the games never crashed the PS1, we can
+        * therefore assume that the games never divided by zero or overflowed,
+        * and these sequences can be removed.
+        */
+
+       for (i = offset; i < block->nb_ops; i++) {
+               op = &block->opcode_list[i];
+
+               if (!found) {
+                       if (op->i.op == OP_SPECIAL &&
+                           (op->r.op == OP_SPECIAL_DIV || op->r.op == OP_SPECIAL_DIVU))
+                               break;
+
+                       if ((op->opcode & 0xfc1fffff) == 0x14000002) {
+                               /* BNE ???, zero, +8 */
+                               found++;
+                       } else {
+                               offset++;
+                       }
+               } else if (found == 1 && !op->opcode) {
+                       /* NOP */
+                       found++;
+               } else if (found == 2 && op->opcode == 0x0007000d) {
+                       /* BREAK 0x1c00 */
+                       found++;
+               } else if (found == 3 && op->opcode == 0x2401ffff) {
+                       /* LI at, -1 */
+                       found++;
+               } else if (found == 4 && (op->opcode & 0xfc1fffff) == 0x14010004) {
+                       /* BNE ???, at, +16 */
+                       found++;
+               } else if (found == 5 && op->opcode == 0x3c018000) {
+                       /* LUI at, 0x8000 */
+                       found++;
+               } else if (found == 6 && (op->opcode & 0x141fffff) == 0x14010002) {
+                       /* BNE ???, at, +16 */
+                       found++;
+               } else if (found == 7 && !op->opcode) {
+                       /* NOP */
+                       found++;
+               } else if (found == 8 && op->opcode == 0x0006000d) {
+                       /* BREAK 0x1800 */
+                       found++;
+                       break;
+               } else {
+                       break;
+               }
+       }
+
+       if (found >= 3) {
+               if (found != 9)
+                       found = 3;
+
+               pr_debug("Removing DIV%s sequence at offset 0x%x\n",
+                        found == 9 ? "" : "U", offset << 2);
+
+               for (i = 0; i < found; i++)
+                       block->opcode_list[offset + i].opcode = 0;
+
+               return true;
+       }
+
+       return false;
+}
+
+static int lightrec_remove_div_by_zero_check_sequence(struct lightrec_state *state,
+                                                     struct block *block)
+{
+       struct opcode *op;
+       unsigned int i;
+
+       for (i = 0; i < block->nb_ops; i++) {
+               op = &block->opcode_list[i];
+
+               if (op->i.op == OP_SPECIAL &&
+                   (op->r.op == OP_SPECIAL_DIVU || op->r.op == OP_SPECIAL_DIV) &&
+                   remove_div_sequence(block, i + 1))
+                       op->flags |= LIGHTREC_NO_DIV_CHECK;
+       }
+
+       return 0;
+}
+
+static const u32 memset_code[] = {
+       0x10a00006,     // beqz         a1, 2f
+       0x24a2ffff,     // addiu        v0,a1,-1
+       0x2403ffff,     // li           v1,-1
+       0xac800000,     // 1: sw        zero,0(a0)
+       0x2442ffff,     // addiu        v0,v0,-1
+       0x1443fffd,     // bne          v0,v1, 1b
+       0x24840004,     // addiu        a0,a0,4
+       0x03e00008,     // 2: jr        ra
+       0x00000000,     // nop
+};
+
+static int lightrec_replace_memset(struct lightrec_state *state, struct block *block)
+{
+       unsigned int i;
+       union code c;
+
+       for (i = 0; i < block->nb_ops; i++) {
+               c = block->opcode_list[i].c;
+
+               if (c.opcode != memset_code[i])
+                       return 0;
+
+               if (i == ARRAY_SIZE(memset_code) - 1) {
+                       /* success! */
+                       pr_debug("Block at PC 0x%x is a memset\n", block->pc);
+                       block->flags |= BLOCK_IS_MEMSET | BLOCK_NEVER_COMPILE;
+
+                       /* Return non-zero to skip other optimizers. */
+                       return 1;
                }
        }
 
        return 0;
 }
 
-static int (*lightrec_optimizers[])(struct block *) = {
-       &lightrec_detect_impossible_branches,
-       &lightrec_transform_ops,
-       &lightrec_local_branches,
-       &lightrec_switch_delay_slots,
-       &lightrec_flag_stores,
-       &lightrec_flag_mults,
-       &lightrec_early_unload,
+static int (*lightrec_optimizers[])(struct lightrec_state *state, struct block *) = {
+       IF_OPT(OPT_REMOVE_DIV_BY_ZERO_SEQ, &lightrec_remove_div_by_zero_check_sequence),
+       IF_OPT(OPT_REPLACE_MEMSET, &lightrec_replace_memset),
+       IF_OPT(OPT_DETECT_IMPOSSIBLE_BRANCHES, &lightrec_detect_impossible_branches),
+       IF_OPT(OPT_LOCAL_BRANCHES, &lightrec_local_branches),
+       IF_OPT(OPT_TRANSFORM_OPS, &lightrec_transform_ops),
+       IF_OPT(OPT_SWITCH_DELAY_SLOTS, &lightrec_switch_delay_slots),
+       IF_OPT(OPT_FLAG_IO || OPT_FLAG_STORES, &lightrec_flag_io),
+       IF_OPT(OPT_FLAG_MULT_DIV, &lightrec_flag_mults_divs),
+       IF_OPT(OPT_EARLY_UNLOAD, &lightrec_early_unload),
 };
 
-int lightrec_optimize(struct block *block)
+int lightrec_optimize(struct lightrec_state *state, struct block *block)
 {
        unsigned int i;
+       int ret;
 
        for (i = 0; i < ARRAY_SIZE(lightrec_optimizers); i++) {
-               int ret = lightrec_optimizers[i](block);
-
-               if (ret)
-                       return ret;
+               if (lightrec_optimizers[i]) {
+                       ret = (*lightrec_optimizers[i])(state, block);
+                       if (ret)
+                               return ret;
+               }
        }
 
        return 0;
index 84a8fc9..c829028 100644 (file)
@@ -1,15 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
 /*
- * Copyright (C) 2014-2020 Paul Cercueil <paul@crapouillou.net>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
+ * Copyright (C) 2014-2021 Paul Cercueil <paul@crapouillou.net>
  */
 
 #ifndef __OPTIMIZER_H__
 #include "disassembler.h"
 
 struct block;
+struct opcode;
 
 _Bool opcode_reads_register(union code op, u8 reg);
 _Bool opcode_writes_register(union code op, u8 reg);
 _Bool has_delay_slot(union code op);
 _Bool load_in_delay_slot(union code op);
+_Bool opcode_is_io(union code op);
+_Bool is_unconditional_jump(union code c);
+_Bool is_syscall(union code c);
 
-int lightrec_optimize(struct block *block);
+_Bool should_emulate(const struct opcode *op);
+
+int lightrec_optimize(struct lightrec_state *state, struct block *block);
 
 #endif /* __OPTIMIZER_H__ */
index 377685c..2e32cae 100644 (file)
@@ -1,15 +1,6 @@
+// SPDX-License-Identifier: LGPL-2.1-or-later
 /*
- * Copyright (C) 2020 Paul Cercueil <paul@crapouillou.net>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
+ * Copyright (C) 2020-2021 Paul Cercueil <paul@crapouillou.net>
  */
 
 #include "blockcache.h"
@@ -21,6 +12,7 @@
 
 #include <errno.h>
 #include <pthread.h>
+#include <stdatomic.h>
 #include <stdbool.h>
 
 struct reaper_elm {
@@ -33,6 +25,8 @@ struct reaper {
        struct lightrec_state *state;
        pthread_mutex_t mutex;
        struct slist_elm reap_list;
+
+       atomic_uint sem;
 };
 
 struct reaper *lightrec_reaper_init(struct lightrec_state *state)
@@ -47,6 +41,7 @@ struct reaper *lightrec_reaper_init(struct lightrec_state *state)
        }
 
        reaper->state = state;
+       reaper->sem = 0;
        slist_init(&reaper->reap_list);
 
        ret = pthread_mutex_init(&reaper->mutex, NULL);
@@ -98,6 +93,11 @@ out_unlock:
        return ret;
 }
 
+static bool lightrec_reaper_can_reap(struct reaper *reaper)
+{
+       return !atomic_load_explicit(&reaper->sem, memory_order_relaxed);
+}
+
 void lightrec_reaper_reap(struct reaper *reaper)
 {
        struct reaper_elm *reaper_elm;
@@ -105,13 +105,14 @@ void lightrec_reaper_reap(struct reaper *reaper)
 
        pthread_mutex_lock(&reaper->mutex);
 
-       while (!!(elm = slist_first(&reaper->reap_list))) {
+       while (lightrec_reaper_can_reap(reaper) &&
+              !!(elm = slist_first(&reaper->reap_list))) {
                slist_remove(&reaper->reap_list, elm);
                pthread_mutex_unlock(&reaper->mutex);
 
                reaper_elm = container_of(elm, struct reaper_elm, slist);
 
-               (*reaper_elm->func)(reaper_elm->data);
+               (*reaper_elm->func)(reaper->state, reaper_elm->data);
 
                lightrec_free(reaper->state, MEM_FOR_LIGHTREC,
                              sizeof(*reaper_elm), reaper_elm);
@@ -121,3 +122,13 @@ void lightrec_reaper_reap(struct reaper *reaper)
 
        pthread_mutex_unlock(&reaper->mutex);
 }
+
+void lightrec_reaper_pause(struct reaper *reaper)
+{
+       atomic_fetch_add_explicit(&reaper->sem, 1, memory_order_relaxed);
+}
+
+void lightrec_reaper_continue(struct reaper *reaper)
+{
+       atomic_fetch_sub_explicit(&reaper->sem, 1, memory_order_relaxed);
+}
index 0309b64..49b6a1a 100644 (file)
@@ -1,15 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
 /*
- * Copyright (C) 2020 Paul Cercueil <paul@crapouillou.net>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
+ * Copyright (C) 2020-2021 Paul Cercueil <paul@crapouillou.net>
  */
 
 #ifndef __LIGHTREC_REAPER_H__
@@ -18,7 +9,7 @@
 struct lightrec_state;
 struct reaper;
 
-typedef void (*reap_func_t)(void *);
+typedef void (*reap_func_t)(struct lightrec_state *state, void *);
 
 struct reaper *lightrec_reaper_init(struct lightrec_state *state);
 void lightrec_reaper_destroy(struct reaper *reaper);
@@ -26,4 +17,7 @@ void lightrec_reaper_destroy(struct reaper *reaper);
 int lightrec_reaper_add(struct reaper *reaper, reap_func_t f, void *data);
 void lightrec_reaper_reap(struct reaper *reaper);
 
+void lightrec_reaper_pause(struct reaper *reaper);
+void lightrec_reaper_continue(struct reaper *reaper);
+
 #endif /* __LIGHTREC_REAPER_H__ */
index 634d3d0..a6c8cd1 100644 (file)
@@ -1,15 +1,6 @@
+// SPDX-License-Identifier: LGPL-2.1-or-later
 /*
- * Copyright (C) 2019-2020 Paul Cercueil <paul@crapouillou.net>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
+ * Copyright (C) 2019-2021 Paul Cercueil <paul@crapouillou.net>
  */
 
 #include "debug.h"
 #include <stdbool.h>
 #include <stdlib.h>
 #include <pthread.h>
+#ifdef __linux__
+#include <unistd.h>
+#endif
 
 struct block_rec {
        struct block *block;
        struct slist_elm slist;
+       bool compiling;
+};
+
+struct recompiler_thd {
+       struct lightrec_cstate *cstate;
+       unsigned int tid;
+       pthread_t thd;
 };
 
 struct recompiler {
        struct lightrec_state *state;
-       pthread_t thd;
        pthread_cond_t cond;
+       pthread_cond_t cond2;
        pthread_mutex_t mutex;
        bool stop;
-       struct block *current_block;
        struct slist_elm slist;
+
+       unsigned int nb_recs;
+       struct recompiler_thd thds[];
 };
 
-static void lightrec_compile_list(struct recompiler *rec)
+static unsigned int get_processors_count(void)
+{
+       unsigned int nb;
+
+#if defined(PTW32_VERSION)
+        nb = pthread_num_processors_np();
+#elif defined(__APPLE__) || defined(__FreeBSD__)
+        int count;
+        size_t size = sizeof(count);
+
+        nb = sysctlbyname("hw.ncpu", &count, &size, NULL, 0) ? 1 : count;
+#elif defined(__linux__)
+       nb = sysconf(_SC_NPROCESSORS_ONLN);
+#endif
+
+       return nb < 1 ? 1 : nb;
+}
+
+static struct slist_elm * lightrec_get_first_elm(struct slist_elm *head)
+{
+       struct block_rec *block_rec;
+       struct slist_elm *elm;
+
+       for (elm = slist_first(head); elm; elm = elm->next) {
+               block_rec = container_of(elm, struct block_rec, slist);
+
+               if (!block_rec->compiling)
+                       return elm;
+       }
+
+       return NULL;
+}
+
+static void lightrec_compile_list(struct recompiler *rec,
+                                 struct recompiler_thd *thd)
 {
        struct block_rec *block_rec;
        struct slist_elm *next;
        struct block *block;
        int ret;
 
-       while (!!(next = slist_first(&rec->slist))) {
+       while (!!(next = lightrec_get_first_elm(&rec->slist))) {
                block_rec = container_of(next, struct block_rec, slist);
+               block_rec->compiling = true;
                block = block_rec->block;
-               rec->current_block = block;
 
                pthread_mutex_unlock(&rec->mutex);
 
-               ret = lightrec_compile_block(block);
-               if (ret) {
-                       pr_err("Unable to compile block at PC 0x%x: %d\n",
-                              block->pc, ret);
+               if (likely(!(block->flags & BLOCK_IS_DEAD))) {
+                       ret = lightrec_compile_block(thd->cstate, block);
+                       if (ret) {
+                               pr_err("Unable to compile block at PC 0x%x: %d\n",
+                                      block->pc, ret);
+                       }
                }
 
                pthread_mutex_lock(&rec->mutex);
@@ -64,15 +103,14 @@ static void lightrec_compile_list(struct recompiler *rec)
                slist_remove(&rec->slist, next);
                lightrec_free(rec->state, MEM_FOR_LIGHTREC,
                              sizeof(*block_rec), block_rec);
-               pthread_cond_signal(&rec->cond);
+               pthread_cond_signal(&rec->cond2);
        }
-
-       rec->current_block = NULL;
 }
 
 static void * lightrec_recompiler_thd(void *d)
 {
-       struct recompiler *rec = d;
+       struct recompiler_thd *thd = d;
+       struct recompiler *rec = container_of(thd, struct recompiler, thds[thd->tid]);
 
        pthread_mutex_lock(&rec->mutex);
 
@@ -85,7 +123,7 @@ static void * lightrec_recompiler_thd(void *d)
 
                } while (slist_empty(&rec->slist));
 
-               lightrec_compile_list(rec);
+               lightrec_compile_list(rec, thd);
        }
 
 out_unlock:
@@ -96,60 +134,104 @@ out_unlock:
 struct recompiler *lightrec_recompiler_init(struct lightrec_state *state)
 {
        struct recompiler *rec;
+       unsigned int i, nb_recs, nb_cpus;
        int ret;
 
-       rec = lightrec_malloc(state, MEM_FOR_LIGHTREC, sizeof(*rec));
+       nb_cpus = get_processors_count();
+       nb_recs = nb_cpus < 2 ? 1 : nb_cpus - 1;
+
+       rec = lightrec_malloc(state, MEM_FOR_LIGHTREC, sizeof(*rec)
+                             + nb_recs * sizeof(*rec->thds));
        if (!rec) {
                pr_err("Cannot create recompiler: Out of memory\n");
                return NULL;
        }
 
+       for (i = 0; i < nb_recs; i++) {
+               rec->thds[i].tid = i;
+               rec->thds[i].cstate = NULL;
+       }
+
+       for (i = 0; i < nb_recs; i++) {
+               rec->thds[i].cstate = lightrec_create_cstate(state);
+               if (!rec->state) {
+                       pr_err("Cannot create recompiler: Out of memory\n");
+                       goto err_free_cstates;
+               }
+       }
+
        rec->state = state;
        rec->stop = false;
-       rec->current_block = NULL;
+       rec->nb_recs = nb_recs;
        slist_init(&rec->slist);
 
        ret = pthread_cond_init(&rec->cond, NULL);
        if (ret) {
                pr_err("Cannot init cond variable: %d\n", ret);
-               goto err_free_rec;
+               goto err_free_cstates;
        }
 
-       ret = pthread_mutex_init(&rec->mutex, NULL);
+       ret = pthread_cond_init(&rec->cond2, NULL);
        if (ret) {
-               pr_err("Cannot init mutex variable: %d\n", ret);
+               pr_err("Cannot init cond variable: %d\n", ret);
                goto err_cnd_destroy;
        }
 
-       ret = pthread_create(&rec->thd, NULL, lightrec_recompiler_thd, rec);
+       ret = pthread_mutex_init(&rec->mutex, NULL);
        if (ret) {
-               pr_err("Cannot create recompiler thread: %d\n", ret);
-               goto err_mtx_destroy;
+               pr_err("Cannot init mutex variable: %d\n", ret);
+               goto err_cnd2_destroy;
        }
 
+       for (i = 0; i < nb_recs; i++) {
+               ret = pthread_create(&rec->thds[i].thd, NULL,
+                                    lightrec_recompiler_thd, &rec->thds[i]);
+               if (ret) {
+                       pr_err("Cannot create recompiler thread: %d\n", ret);
+                       /* TODO: Handle cleanup properly */
+                       goto err_mtx_destroy;
+               }
+       }
+
+       pr_info("Threaded recompiler started with %u workers.\n", nb_recs);
+
        return rec;
 
 err_mtx_destroy:
        pthread_mutex_destroy(&rec->mutex);
+err_cnd2_destroy:
+       pthread_cond_destroy(&rec->cond2);
 err_cnd_destroy:
        pthread_cond_destroy(&rec->cond);
-err_free_rec:
+err_free_cstates:
+       for (i = 0; i < nb_recs; i++) {
+               if (rec->thds[i].cstate)
+                       lightrec_free_cstate(rec->thds[i].cstate);
+       }
        lightrec_free(state, MEM_FOR_LIGHTREC, sizeof(*rec), rec);
        return NULL;
 }
 
 void lightrec_free_recompiler(struct recompiler *rec)
 {
+       unsigned int i;
+
        rec->stop = true;
 
        /* Stop the thread */
        pthread_mutex_lock(&rec->mutex);
-       pthread_cond_signal(&rec->cond);
+       pthread_cond_broadcast(&rec->cond);
        pthread_mutex_unlock(&rec->mutex);
-       pthread_join(rec->thd, NULL);
+
+       for (i = 0; i < rec->nb_recs; i++)
+               pthread_join(rec->thds[i].thd, NULL);
+
+       for (i = 0; i < rec->nb_recs; i++)
+               lightrec_free_cstate(rec->thds[i].cstate);
 
        pthread_mutex_destroy(&rec->mutex);
        pthread_cond_destroy(&rec->cond);
+       pthread_cond_destroy(&rec->cond2);
        lightrec_free(rec->state, MEM_FOR_LIGHTREC, sizeof(*rec), rec);
 }
 
@@ -174,7 +256,8 @@ int lightrec_recompiler_add(struct recompiler *rec, struct block *block)
                        /* The block to compile is already in the queue - bump
                         * it to the top of the list, unless the block is being
                         * recompiled. */
-                       if (prev && !(block->flags & BLOCK_SHOULD_RECOMPILE)) {
+                       if (prev && !block_rec->compiling &&
+                           !(block->flags & BLOCK_SHOULD_RECOMPILE)) {
                                slist_remove_next(prev);
                                slist_append(&rec->slist, elm);
                        }
@@ -198,6 +281,7 @@ int lightrec_recompiler_add(struct recompiler *rec, struct block *block)
        pr_debug("Adding block PC 0x%x to recompiler\n", block->pc);
 
        block_rec->block = block;
+       block_rec->compiling = false;
 
        elm = &rec->slist;
 
@@ -213,6 +297,7 @@ int lightrec_recompiler_add(struct recompiler *rec, struct block *block)
 
 out_unlock:
        pthread_mutex_unlock(&rec->mutex);
+
        return ret;
 }
 
@@ -223,36 +308,59 @@ void lightrec_recompiler_remove(struct recompiler *rec, struct block *block)
 
        pthread_mutex_lock(&rec->mutex);
 
-       for (elm = slist_first(&rec->slist); elm; elm = elm->next) {
-               block_rec = container_of(elm, struct block_rec, slist);
+       while (true) {
+               for (elm = slist_first(&rec->slist); elm; elm = elm->next) {
+                       block_rec = container_of(elm, struct block_rec, slist);
 
-               if (block_rec->block == block) {
-                       if (block == rec->current_block) {
+                       if (block_rec->block != block)
+                               continue;
+
+                       if (block_rec->compiling) {
                                /* Block is being recompiled - wait for
                                 * completion */
-                               do {
-                                       pthread_cond_wait(&rec->cond,
-                                                         &rec->mutex);
-                               } while (block == rec->current_block);
+                               pthread_cond_wait(&rec->cond2, &rec->mutex);
+
+                               /* We can't guarantee the signal was for us.
+                                * Since block_rec may have been removed while
+                                * we were waiting on the condition, we cannot
+                                * check block_rec->compiling again. The best
+                                * thing is just to restart the function. */
+                               break;
                        } else {
                                /* Block is not yet being processed - remove it
                                 * from the list */
                                slist_remove(&rec->slist, elm);
                                lightrec_free(rec->state, MEM_FOR_LIGHTREC,
                                              sizeof(*block_rec), block_rec);
+
+                               goto out_unlock;
                        }
+               }
 
+               if (!elm)
                        break;
-               }
        }
 
+out_unlock:
        pthread_mutex_unlock(&rec->mutex);
 }
 
-void * lightrec_recompiler_run_first_pass(struct block *block, u32 *pc)
+void * lightrec_recompiler_run_first_pass(struct lightrec_state *state,
+                                         struct block *block, u32 *pc)
 {
        bool freed;
 
+       /* There's no point in running the first pass if the block will never
+        * be compiled. Let the main loop run the interpreter instead. */
+       if (block->flags & BLOCK_NEVER_COMPILE)
+               return NULL;
+
+       /* If the block is already fully tagged, there is no point in running
+        * the first pass. Request a recompilation of the block, and maybe the
+        * interpreter will run the block in the meantime. */
+       if (block->flags & BLOCK_FULLY_TAGGED)
+               lightrec_recompiler_add(state->rec, block);
+
        if (likely(block->function)) {
                if (block->flags & BLOCK_FULLY_TAGGED) {
                        freed = atomic_flag_test_and_set(&block->op_list_freed);
@@ -263,8 +371,7 @@ void * lightrec_recompiler_run_first_pass(struct block *block, u32 *pc)
 
                                /* The block was already compiled but the opcode list
                                 * didn't get freed yet - do it now */
-                               lightrec_free_opcode_list(block->state,
-                                                         block->opcode_list);
+                               lightrec_free_opcode_list(state, block);
                                block->opcode_list = NULL;
                        }
                }
@@ -277,7 +384,7 @@ void * lightrec_recompiler_run_first_pass(struct block *block, u32 *pc)
        freed = atomic_flag_test_and_set(&block->op_list_freed);
 
        /* Block wasn't compiled yet - run the interpreter */
-       *pc = lightrec_emulate_block(block, *pc);
+       *pc = lightrec_emulate_block(state, block, *pc);
 
        if (!freed)
                atomic_flag_clear(&block->op_list_freed);
@@ -289,7 +396,7 @@ void * lightrec_recompiler_run_first_pass(struct block *block, u32 *pc)
                pr_debug("Block PC 0x%08x is fully tagged"
                         " - free opcode list\n", block->pc);
 
-               lightrec_free_opcode_list(block->state, block->opcode_list);
+               lightrec_free_opcode_list(state, block);
                block->opcode_list = NULL;
        }
 
index 999a49f..9bc522d 100644 (file)
@@ -1,15 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
 /*
- * Copyright (C) 2019-2020 Paul Cercueil <paul@crapouillou.net>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
+ * Copyright (C) 2019-2021 Paul Cercueil <paul@crapouillou.net>
  */
 
 #ifndef __LIGHTREC_RECOMPILER_H__
@@ -24,6 +15,7 @@ void lightrec_free_recompiler(struct recompiler *rec);
 int lightrec_recompiler_add(struct recompiler *rec, struct block *block);
 void lightrec_recompiler_remove(struct recompiler *rec, struct block *block);
 
-void * lightrec_recompiler_run_first_pass(struct block *block, u32 *pc);
+void * lightrec_recompiler_run_first_pass(struct lightrec_state *state,
+                                         struct block *block, u32 *pc);
 
 #endif /* __LIGHTREC_RECOMPILER_H__ */
index 0256015..c018870 100644 (file)
@@ -1,27 +1,19 @@
+// SPDX-License-Identifier: LGPL-2.1-or-later
 /*
- * Copyright (C) 2014-2020 Paul Cercueil <paul@crapouillou.net>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
+ * Copyright (C) 2014-2021 Paul Cercueil <paul@crapouillou.net>
  */
 
 #include "debug.h"
 #include "memmanager.h"
+#include "lightning-wrapper.h"
 #include "regcache.h"
 
-#include <lightning.h>
 #include <stdbool.h>
 #include <stddef.h>
 
 struct native_register {
-       bool used, loaded, dirty, output, extend, extended, locked;
+       bool used, loaded, dirty, output, extend, extended,
+            zero_extend, zero_extended, locked;
        s8 emulated_register;
 };
 
@@ -48,6 +40,24 @@ const char * lightrec_reg_name(u8 reg)
        return mips_regs[reg];
 }
 
+static inline bool lightrec_reg_is_zero(u8 jit_reg)
+{
+#if defined(__mips__) || defined(__alpha__) || defined(__riscv)
+       if (jit_reg == _ZERO)
+               return true;
+#endif
+       return false;
+}
+
+static inline s8 lightrec_get_hardwired_reg(u8 reg)
+{
+#if defined(__mips__) || defined(__alpha__) || defined(__riscv)
+       if (reg == 0)
+               return _ZERO;
+#endif
+       return -1;
+}
+
 static inline u8 lightrec_reg_number(const struct regcache *cache,
                const struct native_register *nreg)
 {
@@ -79,6 +89,34 @@ static inline struct native_register * lightning_reg_to_lightrec(
        }
 }
 
+u8 lightrec_get_reg_in_flags(struct regcache *cache, u8 jit_reg)
+{
+       struct native_register *reg;
+       u8 flags = 0;
+
+       if (lightrec_reg_is_zero(jit_reg))
+               return REG_EXT | REG_ZEXT;
+
+       reg = lightning_reg_to_lightrec(cache, jit_reg);
+       if (reg->extended)
+               flags |= REG_EXT;
+       if (reg->zero_extended)
+               flags |= REG_ZEXT;
+
+       return flags;
+}
+
+void lightrec_set_reg_out_flags(struct regcache *cache, u8 jit_reg, u8 flags)
+{
+       struct native_register *reg;
+
+       if (!lightrec_reg_is_zero(jit_reg)) {
+               reg = lightning_reg_to_lightrec(cache, jit_reg);
+               reg->extend = flags & REG_EXT;
+               reg->zero_extend = flags & REG_ZEXT;
+       }
+}
+
 static struct native_register * alloc_temp(struct regcache *cache)
 {
        unsigned int i;
@@ -157,6 +195,7 @@ static struct native_register * alloc_in_out(struct regcache *cache,
 static void lightrec_discard_nreg(struct native_register *nreg)
 {
        nreg->extended = false;
+       nreg->zero_extended = false;
        nreg->loaded = false;
        nreg->output = false;
        nreg->dirty = false;
@@ -170,7 +209,7 @@ static void lightrec_unload_nreg(struct regcache *cache, jit_state_t *_jit,
 {
        /* If we get a dirty register, store back the old value */
        if (nreg->dirty) {
-               s16 offset = offsetof(struct lightrec_state, native_reg_cache)
+               s16 offset = offsetof(struct lightrec_state, regs.gpr)
                        + (nreg->emulated_register << 2);
 
                jit_stxi_i(offset, LIGHTREC_REG_STATE, jit_reg);
@@ -181,6 +220,9 @@ static void lightrec_unload_nreg(struct regcache *cache, jit_state_t *_jit,
 
 void lightrec_unload_reg(struct regcache *cache, jit_state_t *_jit, u8 jit_reg)
 {
+       if (lightrec_reg_is_zero(jit_reg))
+               return;
+
        lightrec_unload_nreg(cache, _jit,
                        lightning_reg_to_lightrec(cache, jit_reg), jit_reg);
 }
@@ -189,8 +231,12 @@ void lightrec_unload_reg(struct regcache *cache, jit_state_t *_jit, u8 jit_reg)
  * A locked register cannot only be used as input, not output. */
 void lightrec_lock_reg(struct regcache *cache, jit_state_t *_jit, u8 jit_reg)
 {
-       struct native_register *reg = lightning_reg_to_lightrec(cache, jit_reg);
+       struct native_register *reg;
+
+       if (lightrec_reg_is_zero(jit_reg))
+               return;
 
+       reg = lightning_reg_to_lightrec(cache, jit_reg);
        lightrec_clean_reg(cache, _jit, jit_reg);
 
        reg->locked = true;
@@ -198,8 +244,12 @@ void lightrec_lock_reg(struct regcache *cache, jit_state_t *_jit, u8 jit_reg)
 
 u8 lightrec_alloc_reg(struct regcache *cache, jit_state_t *_jit, u8 jit_reg)
 {
-       struct native_register *reg = lightning_reg_to_lightrec(cache, jit_reg);
+       struct native_register *reg;
 
+       if (lightrec_reg_is_zero(jit_reg))
+               return jit_reg;
+
+       reg = lightning_reg_to_lightrec(cache, jit_reg);
        lightrec_unload_nreg(cache, _jit, reg, jit_reg);
 
        reg->used = true;
@@ -223,10 +273,18 @@ u8 lightrec_alloc_reg_temp(struct regcache *cache, jit_state_t *_jit)
        return jit_reg;
 }
 
-u8 lightrec_alloc_reg_out(struct regcache *cache, jit_state_t *_jit, u8 reg)
+u8 lightrec_alloc_reg_out(struct regcache *cache, jit_state_t *_jit,
+                         u8 reg, u8 flags)
 {
+       struct native_register *nreg;
        u8 jit_reg;
-       struct native_register *nreg = alloc_in_out(cache, reg, true);
+       s8 hw_reg;
+
+       hw_reg = lightrec_get_hardwired_reg(reg);
+       if (hw_reg >= 0)
+               return (u8) hw_reg;
+
+       nreg = alloc_in_out(cache, reg, true);
        if (!nreg) {
                /* No free register, no dirty register to free. */
                pr_err("No more registers! Abandon ship!\n");
@@ -240,18 +298,27 @@ u8 lightrec_alloc_reg_out(struct regcache *cache, jit_state_t *_jit, u8 reg)
        if (nreg->emulated_register != reg)
                lightrec_unload_nreg(cache, _jit, nreg, jit_reg);
 
-       nreg->extend = false;
        nreg->used = true;
        nreg->output = true;
        nreg->emulated_register = reg;
+       nreg->extend = flags & REG_EXT;
+       nreg->zero_extend = flags & REG_ZEXT;
        return jit_reg;
 }
 
-u8 lightrec_alloc_reg_in(struct regcache *cache, jit_state_t *_jit, u8 reg)
+u8 lightrec_alloc_reg_in(struct regcache *cache, jit_state_t *_jit,
+                        u8 reg, u8 flags)
 {
+       struct native_register *nreg;
        u8 jit_reg;
        bool reg_changed;
-       struct native_register *nreg = alloc_in_out(cache, reg, false);
+       s8 hw_reg;
+
+       hw_reg = lightrec_get_hardwired_reg(reg);
+       if (hw_reg >= 0)
+               return (u8) hw_reg;
+
+       nreg = alloc_in_out(cache, reg, false);
        if (!nreg) {
                /* No free register, no dirty register to free. */
                pr_err("No more registers! Abandon ship!\n");
@@ -267,55 +334,44 @@ u8 lightrec_alloc_reg_in(struct regcache *cache, jit_state_t *_jit, u8 reg)
                lightrec_unload_nreg(cache, _jit, nreg, jit_reg);
 
        if (!nreg->loaded && !nreg->dirty && reg != 0) {
-               s16 offset = offsetof(struct lightrec_state, native_reg_cache)
+               s16 offset = offsetof(struct lightrec_state, regs.gpr)
                        + (reg << 2);
 
+               nreg->zero_extended = flags & REG_ZEXT;
+               nreg->extended = !nreg->zero_extended;
+
                /* Load previous value from register cache */
-               jit_ldxi_i(jit_reg, LIGHTREC_REG_STATE, offset);
+               if (nreg->zero_extended)
+                       jit_ldxi_ui(jit_reg, LIGHTREC_REG_STATE, offset);
+               else
+                       jit_ldxi_i(jit_reg, LIGHTREC_REG_STATE, offset);
+
                nreg->loaded = true;
-               nreg->extended = true;
        }
 
        /* Clear register r0 before use */
        if (reg == 0 && (!nreg->loaded || nreg->dirty)) {
                jit_movi(jit_reg, 0);
                nreg->extended = true;
+               nreg->zero_extended = true;
                nreg->loaded = true;
        }
 
        nreg->used = true;
        nreg->output = false;
        nreg->emulated_register = reg;
-       return jit_reg;
-}
-
-u8 lightrec_alloc_reg_out_ext(struct regcache *cache, jit_state_t *_jit, u8 reg)
-{
-       struct native_register *nreg;
-       u8 jit_reg;
-
-       jit_reg = lightrec_alloc_reg_out(cache, _jit, reg);
-       nreg = lightning_reg_to_lightrec(cache, jit_reg);
-
-       nreg->extend = true;
 
-       return jit_reg;
-}
-
-u8 lightrec_alloc_reg_in_ext(struct regcache *cache, jit_state_t *_jit, u8 reg)
-{
-       struct native_register *nreg;
-       u8 jit_reg;
-
-       jit_reg = lightrec_alloc_reg_in(cache, _jit, reg);
-       nreg = lightning_reg_to_lightrec(cache, jit_reg);
-
-#if __WORDSIZE == 64
-       if (!nreg->extended) {
+       if ((flags & REG_EXT) && !nreg->extended &&
+           (!nreg->zero_extended || !(flags & REG_ZEXT))) {
                nreg->extended = true;
+               nreg->zero_extended = false;
                jit_extr_i(jit_reg, jit_reg);
+       } else if (!(flags & REG_EXT) && (flags & REG_ZEXT) &&
+                  !nreg->zero_extended) {
+               nreg->zero_extended = true;
+               nreg->extended = false;
+               jit_extr_ui(jit_reg, jit_reg);
        }
-#endif
 
        return jit_reg;
 }
@@ -337,10 +393,11 @@ u8 lightrec_request_reg_in(struct regcache *cache, jit_state_t *_jit,
        lightrec_unload_nreg(cache, _jit, nreg, jit_reg);
 
        /* Load previous value from register cache */
-       offset = offsetof(struct lightrec_state, native_reg_cache) + (reg << 2);
+       offset = offsetof(struct lightrec_state, regs.gpr) + (reg << 2);
        jit_ldxi_i(jit_reg, LIGHTREC_REG_STATE, offset);
 
        nreg->extended = true;
+       nreg->zero_extended = false;
        nreg->used = true;
        nreg->loaded = true;
        nreg->emulated_register = reg;
@@ -353,14 +410,17 @@ static void free_reg(struct native_register *nreg)
        /* Set output registers as dirty */
        if (nreg->used && nreg->output && nreg->emulated_register > 0)
                nreg->dirty = true;
-       if (nreg->output)
+       if (nreg->output) {
                nreg->extended = nreg->extend;
+               nreg->zero_extended = nreg->zero_extend;
+       }
        nreg->used = false;
 }
 
 void lightrec_free_reg(struct regcache *cache, u8 jit_reg)
 {
-       free_reg(lightning_reg_to_lightrec(cache, jit_reg));
+       if (!lightrec_reg_is_zero(jit_reg))
+               free_reg(lightning_reg_to_lightrec(cache, jit_reg));
 }
 
 void lightrec_free_regs(struct regcache *cache)
@@ -375,7 +435,7 @@ static void clean_reg(jit_state_t *_jit,
                struct native_register *nreg, u8 jit_reg, bool clean)
 {
        if (nreg->dirty) {
-               s16 offset = offsetof(struct lightrec_state, native_reg_cache)
+               s16 offset = offsetof(struct lightrec_state, regs.gpr)
                        + (nreg->emulated_register << 2);
 
                jit_stxi_i(offset, LIGHTREC_REG_STATE, jit_reg);
@@ -408,8 +468,12 @@ void lightrec_clean_regs(struct regcache *cache, jit_state_t *_jit)
 
 void lightrec_clean_reg(struct regcache *cache, jit_state_t *_jit, u8 jit_reg)
 {
-       struct native_register *reg = lightning_reg_to_lightrec(cache, jit_reg);
-       clean_reg(_jit, reg, jit_reg, true);
+       struct native_register *reg;
+
+       if (!lightrec_reg_is_zero(jit_reg)) {
+               reg = lightning_reg_to_lightrec(cache, jit_reg);
+               clean_reg(_jit, reg, jit_reg, true);
+       }
 }
 
 void lightrec_clean_reg_if_loaded(struct regcache *cache, jit_state_t *_jit,
index 8678cc6..835c9c9 100644 (file)
@@ -1,15 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
 /*
- * Copyright (C) 2014-2020 Paul Cercueil <paul@crapouillou.net>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
+ * Copyright (C) 2014-2021 Paul Cercueil <paul@crapouillou.net>
  */
 
 #ifndef __REGCACHE_H__
@@ -22,8 +13,9 @@
 #define LIGHTREC_REG_STATE (JIT_V(JIT_V_NUM - 1))
 #define LIGHTREC_REG_CYCLE (JIT_V(JIT_V_NUM - 2))
 
-#define REG_LO 32
-#define REG_HI 33
+/* Flags for lightrec_alloc_reg_in / lightrec_alloc_reg_out. */
+#define REG_EXT                BIT(0) /* register is sign-extended */
+#define REG_ZEXT       BIT(1) /* register is zero-extended */
 
 struct register_value {
        _Bool known;
@@ -35,15 +27,17 @@ struct regcache;
 
 u8 lightrec_alloc_reg(struct regcache *cache, jit_state_t *_jit, u8 jit_reg);
 u8 lightrec_alloc_reg_temp(struct regcache *cache, jit_state_t *_jit);
-u8 lightrec_alloc_reg_out(struct regcache *cache, jit_state_t *_jit, u8 reg);
-u8 lightrec_alloc_reg_in(struct regcache *cache, jit_state_t *_jit, u8 reg);
-u8 lightrec_alloc_reg_out_ext(struct regcache *cache,
-                             jit_state_t *_jit, u8 reg);
-u8 lightrec_alloc_reg_in_ext(struct regcache *cache, jit_state_t *_jit, u8 reg);
+u8 lightrec_alloc_reg_out(struct regcache *cache, jit_state_t *_jit,
+                         u8 reg, u8 flags);
+u8 lightrec_alloc_reg_in(struct regcache *cache, jit_state_t *_jit,
+                        u8 reg, u8 flags);
 
 u8 lightrec_request_reg_in(struct regcache *cache, jit_state_t *_jit,
                           u8 reg, u8 jit_reg);
 
+u8 lightrec_get_reg_in_flags(struct regcache *cache, u8 jit_reg);
+void lightrec_set_reg_out_flags(struct regcache *cache, u8 jit_reg, u8 flags);
+
 void lightrec_regcache_reset(struct regcache *cache);
 
 void lightrec_lock_reg(struct regcache *cache, jit_state_t *_jit, u8 jit_reg);
index 18195e8..ae7e5d3 100644 (file)
@@ -1,15 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
 /*
- * Copyright (C) 2020 Paul Cercueil <paul@crapouillou.net>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
+ * Copyright (C) 2020-2021 Paul Cercueil <paul@crapouillou.net>
  */
 
 #ifndef __LIGHTREC_SLIST_H__
diff --git a/include/lightrec/lightrec-config.h b/include/lightrec/lightrec-config.h
new file mode 100644 (file)
index 0000000..bbb2329
--- /dev/null
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/*
+ * Copyright (C) 2019-2021 Paul Cercueil <paul@crapouillou.net>
+ */
+
+#ifndef __LIGHTREC_CONFIG_H__
+#define __LIGHTREC_CONFIG_H__
+
+#define ENABLE_THREADED_COMPILER 1
+#define ENABLE_FIRST_PASS 1
+#define ENABLE_DISASSEMBLER 0
+#define ENABLE_TINYMM 0
+
+#define HAS_DEFAULT_ELM 1
+
+#define OPT_REMOVE_DIV_BY_ZERO_SEQ 1
+#define OPT_REPLACE_MEMSET 1
+#define OPT_DETECT_IMPOSSIBLE_BRANCHES 1
+#define OPT_TRANSFORM_OPS 1
+#define OPT_LOCAL_BRANCHES 1
+#define OPT_SWITCH_DELAY_SLOTS 1
+#define OPT_FLAG_STORES 1
+#define OPT_FLAG_IO 1
+#define OPT_FLAG_MULT_DIV 1
+#define OPT_EARLY_UNLOAD 1
+
+#endif /* __LIGHTREC_CONFIG_H__ */
+
index 5164a89..4459600 100644 (file)
 //  sign-extended by bug in original hardware, according to Nocash docs
 //  GTE section 'Screen Offset and Distance'. The emulator does this
 //  sign extension when it is loaded to GTE by CTC2.
-//#define gteH   (psxRegs.CP2C.p[26].sw.l)
-#define gteH   (psxRegs.CP2C.p[26].w.l)
+//#define gteH   (regs->CP2C.p[26].sw.l)
+#define gteH   (regs->CP2C.p[26].w.l)
 #define gteDQA (regs->CP2C.p[27].sw.l)
 #define gteDQB (((s32 *)regs->CP2C.r)[28])
 #define gteZSF3 (regs->CP2C.p[29].sw.l)
index 2648cc3..99d7175 100644 (file)
@@ -104,27 +104,19 @@ static void (*cp2_ops[])(struct psxCP2Regs *) = {
 
 static char cache_buf[64 * 1024];
 
-static u32 cop0_mfc(struct lightrec_state *state, u32 op, u8 reg)
+static void cop2_op(struct lightrec_state *state, u32 func)
 {
-       return psxRegs.CP0.r[reg];
-}
+       struct lightrec_registers *regs = lightrec_get_registers(state);
 
-static u32 cop2_mfc_cfc(struct lightrec_state *state, u8 reg, bool cfc)
-{
-       if (cfc)
-               return psxRegs.CP2C.r[reg];
-       else
-               return MFC2(reg);
-}
-
-static u32 cop2_mfc(struct lightrec_state *state, u32 op, u8 reg)
-{
-       return cop2_mfc_cfc(state, reg, false);
-}
+       psxRegs.code = func;
 
-static u32 cop2_cfc(struct lightrec_state *state, u32 op, u8 reg)
-{
-       return cop2_mfc_cfc(state, reg, true);
+       if (unlikely(!cp2_ops[func & 0x3f])) {
+               fprintf(stderr, "Invalid CP2 function %u\n", func);
+       } else {
+               /* This works because regs->cp2c comes right after regs->cp2d,
+                * so it can be cast to a pcsxCP2Regs pointer. */
+               cp2_ops[func & 0x3f]((psxCP2Regs *) regs->cp2d);
+       }
 }
 
 static bool has_interrupt(void)
@@ -144,85 +136,6 @@ static void lightrec_restore_state(struct lightrec_state *state)
                lightrec_set_target_cycle_count(state, next_interupt);
 }
 
-static void cop0_mtc_ctc(struct lightrec_state *state,
-                        u8 reg, u32 value, bool ctc)
-{
-       psxRegs.cycle = lightrec_current_cycle_count(state);
-
-       switch (reg) {
-       case 1:
-       case 4:
-       case 8:
-       case 14:
-       case 15:
-               /* Those registers are read-only */
-               break;
-       case 12: /* Status */
-               if ((psxRegs.CP0.n.Status & ~value) & (1 << 16)) {
-                       memcpy(psxM, cache_buf, sizeof(cache_buf));
-                       lightrec_invalidate_all(state);
-               } else if ((~psxRegs.CP0.n.Status & value) & (1 << 16)) {
-                       memcpy(cache_buf, psxM, sizeof(cache_buf));
-               }
-
-               psxRegs.CP0.n.Status = value;
-               break;
-       case 13: /* Cause */
-               psxRegs.CP0.n.Cause &= ~0x0300;
-               psxRegs.CP0.n.Cause |= value & 0x0300;
-               break;
-       default:
-               psxRegs.CP0.r[reg] = value;
-               break;
-       }
-
-       lightrec_restore_state(state);
-}
-
-static void cop2_mtc_ctc(struct lightrec_state *state,
-                        u8 reg, u32 value, bool ctc)
-{
-       if (ctc)
-               CTC2(value, reg);
-       else
-               MTC2(value, reg);
-}
-
-static void cop0_mtc(struct lightrec_state *state, u32 op, u8 reg, u32 value)
-{
-       cop0_mtc_ctc(state, reg, value, false);
-}
-
-static void cop0_ctc(struct lightrec_state *state, u32 op, u8 reg, u32 value)
-{
-       cop0_mtc_ctc(state, reg, value, true);
-}
-
-static void cop2_mtc(struct lightrec_state *state, u32 op, u8 reg, u32 value)
-{
-       cop2_mtc_ctc(state, reg, value, false);
-}
-
-static void cop2_ctc(struct lightrec_state *state, u32 op, u8 reg, u32 value)
-{
-       cop2_mtc_ctc(state, reg, value, true);
-}
-
-static void cop0_op(struct lightrec_state *state, u32 func)
-{
-       fprintf(stderr, "Invalid access to COP0\n");
-}
-
-static void cop2_op(struct lightrec_state *state, u32 func)
-{
-       psxRegs.code = func;
-
-       if (unlikely(!cp2_ops[func & 0x3f]))
-               fprintf(stderr, "Invalid CP2 function %u\n", func);
-       else
-               cp2_ops[func & 0x3f](&psxRegs.CP2);
-}
-
 static void hw_write_byte(struct lightrec_state *state,
                          u32 op, void *host, u32 mem, u8 val)
 {
@@ -374,21 +287,17 @@ static struct lightrec_mem_map lightrec_map[] = {
        },
 };
 
+static void lightrec_enable_ram(struct lightrec_state *state, bool enable)
+{
+       if (enable)
+               memcpy(psxM, cache_buf, sizeof(cache_buf));
+       else
+               memcpy(cache_buf, psxM, sizeof(cache_buf));
+}
+
 static const struct lightrec_ops lightrec_ops = {
-       .cop0_ops = {
-               .mfc = cop0_mfc,
-               .cfc = cop0_mfc,
-               .mtc = cop0_mtc,
-               .ctc = cop0_ctc,
-               .op = cop0_op,
-       },
-       .cop2_ops = {
-               .mfc = cop2_mfc,
-               .cfc = cop2_cfc,
-               .mtc = cop2_mtc,
-               .ctc = cop2_ctc,
-               .op = cop2_op,
-       },
+       .cop2_op = cop2_op,
+       .enable_ram = lightrec_enable_ram,
 };
 
 static int lightrec_plugin_init(void)
@@ -505,12 +414,30 @@ static void print_for_big_ass_debugger(void)
        printf("\n");
 }
 
+static void lightrec_dump_regs(struct lightrec_state *state)
+{
+       struct lightrec_registers *regs = lightrec_get_registers(state);
+
+       if (unlikely(booting))
+               memcpy(&psxRegs.GPR, regs->gpr, sizeof(regs->gpr));
+       psxRegs.CP0.n.Status = regs->cp0[12];
+       psxRegs.CP0.n.Cause = regs->cp0[13];
+}
+
+static void lightrec_restore_regs(struct lightrec_state *state)
+{
+       struct lightrec_registers *regs = lightrec_get_registers(state);
+
+       if (unlikely(booting))
+               memcpy(regs->gpr, &psxRegs.GPR, sizeof(regs->gpr));
+       regs->cp0[12] = psxRegs.CP0.n.Status;
+       regs->cp0[13] = psxRegs.CP0.n.Cause;
+       regs->cp0[14] = psxRegs.CP0.n.EPC;
+}
 
 extern void intExecuteBlock();
 extern void gen_interupt();
 
-static u32 old_cycle_counter;
-
 static void lightrec_plugin_execute_block(void)
 {
        u32 old_pc = psxRegs.pc;
@@ -522,13 +449,13 @@ static void lightrec_plugin_execute_block(void)
                intExecuteBlock();
        } else {
                lightrec_reset_cycle_count(lightrec_state, psxRegs.cycle);
-               lightrec_restore_registers(lightrec_state, psxRegs.GPR.r);
+               lightrec_restore_regs(lightrec_state);
 
                if (unlikely(use_lightrec_interpreter))
                        psxRegs.pc = lightrec_run_interpreter(lightrec_state,
                                                              psxRegs.pc);
                // step during early boot so that 0x80030000 fastboot hack works
-               else if (unlikely(booting))
+               else if (unlikely(booting || lightrec_debug))
                        psxRegs.pc = lightrec_execute_one(lightrec_state,
                                                          psxRegs.pc);
                else
@@ -537,7 +464,7 @@ static void lightrec_plugin_execute_block(void)
 
                psxRegs.cycle = lightrec_current_cycle_count(lightrec_state);
 
-               lightrec_dump_registers(lightrec_state, psxRegs.GPR.r);
+               lightrec_dump_regs(lightrec_state);
                flags = lightrec_exit_flags(lightrec_state);
 
                if (flags & LIGHTREC_EXIT_SEGFAULT) {
@@ -563,18 +490,6 @@ static void lightrec_plugin_execute_block(void)
                psxRegs.CP0.n.Cause &= ~0x7c;
                psxException(psxRegs.CP0.n.Cause, 0);
        }
-
-       if ((psxRegs.cycle & ~0xfffffff) != old_cycle_counter) {
-               SysDLog("RAM usage: Lightrec %u KiB, IR %u KiB, CODE %u KiB, "
-                      "MIPS %u KiB, TOTAL %u KiB, avg. IPI %f\n",
-                      lightrec_get_mem_usage(MEM_FOR_LIGHTREC) / 1024,
-                      lightrec_get_mem_usage(MEM_FOR_IR) / 1024,
-                      lightrec_get_mem_usage(MEM_FOR_CODE) / 1024,
-                      lightrec_get_mem_usage(MEM_FOR_MIPS_CODE) / 1024,
-                      lightrec_get_total_mem_usage() / 1024,
-                      lightrec_get_average_ipi());
-               old_cycle_counter = psxRegs.cycle & ~0xfffffff;
-       }
 }
 
 static void lightrec_plugin_execute(void)
@@ -622,8 +537,16 @@ static void lightrec_plugin_shutdown(void)
 
 static void lightrec_plugin_reset(void)
 {
+       struct lightrec_registers *regs;
+
        lightrec_plugin_shutdown();
        lightrec_plugin_init();
+
+       regs = lightrec_get_registers(lightrec_state);
+
+       regs->cp0[12] = 0x10900000; // COP0 enabled | BEV = 1 | TS = 1
+       regs->cp0[15] = 0x00000002; // PRevID = Revision ID, same as R3000A
+
        booting = true;
 }