git subrepo clone https://github.com/pcercuei/lightrec.git deps/lightrec
authorPaul Cercueil <paul@crapouillou.net>
Thu, 30 Jan 2020 15:34:17 +0000 (12:34 -0300)
committerPaul Cercueil <paul@crapouillou.net>
Sat, 8 Feb 2020 14:44:52 +0000 (11:44 -0300)
subrepo:
  subdir:   "deps/lightrec"
  merged:   "6c69e10"
upstream:
  origin:   "https://github.com/pcercuei/lightrec.git"
  branch:   "master"
  commit:   "6c69e10"
git-subrepo:
  version:  "0.4.1"
  origin:   "https://github.com/ingydotnet/git-subrepo.git"
  commit:   "a04d8c2"

26 files changed:
deps/lightrec/.gitignore [new file with mode: 0644]
deps/lightrec/.gitrepo [new file with mode: 0644]
deps/lightrec/CMakeLists.txt [new file with mode: 0644]
deps/lightrec/COPYING [new file with mode: 0644]
deps/lightrec/README [new file with mode: 0644]
deps/lightrec/blockcache.c [new file with mode: 0644]
deps/lightrec/blockcache.h [new file with mode: 0644]
deps/lightrec/config.h.cmakein [new file with mode: 0644]
deps/lightrec/debug.h [new file with mode: 0644]
deps/lightrec/disassembler.c [new file with mode: 0644]
deps/lightrec/disassembler.h [new file with mode: 0644]
deps/lightrec/emitter.c [new file with mode: 0644]
deps/lightrec/emitter.h [new file with mode: 0644]
deps/lightrec/interpreter.c [new file with mode: 0644]
deps/lightrec/interpreter.h [new file with mode: 0644]
deps/lightrec/lightrec-private.h [new file with mode: 0644]
deps/lightrec/lightrec.c [new file with mode: 0644]
deps/lightrec/lightrec.h [new file with mode: 0644]
deps/lightrec/memmanager.c [new file with mode: 0644]
deps/lightrec/memmanager.h [new file with mode: 0644]
deps/lightrec/optimizer.c [new file with mode: 0644]
deps/lightrec/optimizer.h [new file with mode: 0644]
deps/lightrec/recompiler.c [new file with mode: 0644]
deps/lightrec/recompiler.h [new file with mode: 0644]
deps/lightrec/regcache.c [new file with mode: 0644]
deps/lightrec/regcache.h [new file with mode: 0644]

diff --git a/deps/lightrec/.gitignore b/deps/lightrec/.gitignore
new file mode 100644 (file)
index 0000000..bae14b5
--- /dev/null
@@ -0,0 +1,2 @@
+*.o
+*.so*
diff --git a/deps/lightrec/.gitrepo b/deps/lightrec/.gitrepo
new file mode 100644 (file)
index 0000000..871f638
--- /dev/null
@@ -0,0 +1,12 @@
+; DO NOT EDIT (unless you know what you are doing)
+;
+; This subdirectory is a git "subrepo", and this file is maintained by the
+; git-subrepo command. See https://github.com/git-commands/git-subrepo#readme
+;
+[subrepo]
+       remote = https://github.com/pcercuei/lightrec.git
+       branch = master
+       commit = 6c69e104d0827e45b8c094d6a61f95c96e9efb15
+       parent = b7ee664796db949b417754d11d4ae405cf5144a5
+       method = merge
+       cmdver = 0.4.1
diff --git a/deps/lightrec/CMakeLists.txt b/deps/lightrec/CMakeLists.txt
new file mode 100644 (file)
index 0000000..6ac5cd4
--- /dev/null
@@ -0,0 +1,119 @@
+cmake_minimum_required(VERSION 3.0)
+project(lightrec LANGUAGES C VERSION 0.3)
+
+set(BUILD_SHARED_LIBS ON CACHE BOOL "Build shared libraries")
+if (NOT BUILD_SHARED_LIBS)
+       add_definitions(-DLIGHTREC_STATIC)
+endif (NOT BUILD_SHARED_LIBS)
+
+if (NOT LOG_LEVEL)
+       set(LOG_LEVEL Info CACHE STRING "Log level" FORCE)
+       set_property(CACHE LOG_LEVEL PROPERTY STRINGS NoLog Error Warning Info Debug)
+endif()
+
+if (NOT CMAKE_BUILD_TYPE)
+       set(CMAKE_BUILD_TYPE RelWithDebInfo CACHE STRING
+               "Choose the type of build, options are: None(CMAKE_CXX_FLAGS or CMAKE_C_FLAGS used) Debug Release RelWithDebInfo MinSizeRel."
+               FORCE)
+       set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS None Debug Release RelWithDebInfo MinSizeRel)
+endif()
+
+string(TOUPPER ${LOG_LEVEL} LIGHTREC_LOG_LEVEL)
+add_definitions(-DLOG_LEVEL=${LIGHTREC_LOG_LEVEL}_L)
+
+if (CMAKE_COMPILER_IS_GNUCC)
+       add_compile_options(-fvisibility=hidden)
+endif()
+
+list(APPEND LIGHTREC_SOURCES
+       blockcache.c
+       disassembler.c
+       emitter.c
+       interpreter.c
+       lightrec.c
+       memmanager.c
+       optimizer.c
+       regcache.c
+)
+list(APPEND LIGHTREC_HEADERS
+       blockcache.h
+       debug.h
+       disassembler.h
+       emitter.h
+       interpreter.h
+       lightrec-private.h
+       lightrec.h
+       memmanager.h
+       optimizer.h
+       recompiler.h
+       regcache.h
+)
+
+option(ENABLE_FIRST_PASS "Run the interpreter as first-pass optimization" ON)
+
+option(ENABLE_THREADED_COMPILER "Enable threaded compiler" ON)
+if (ENABLE_THREADED_COMPILER)
+       list(APPEND LIGHTREC_SOURCES recompiler.c)
+
+       if (NOT ENABLE_FIRST_PASS)
+               message(SEND_ERROR "Threaded compiler requires first-pass optimization")
+       endif (NOT ENABLE_FIRST_PASS)
+endif (ENABLE_THREADED_COMPILER)
+
+include_directories(${CMAKE_CURRENT_BINARY_DIR})
+
+add_library(${PROJECT_NAME} ${LIGHTREC_SOURCES} ${LIGHTREC_HEADERS})
+set_target_properties(${PROJECT_NAME} PROPERTIES
+       VERSION ${PROJECT_VERSION}
+       SOVERSION ${PROJECT_VERSION_MAJOR}
+       PUBLIC_HEADER lightrec.h
+       C_STANDARD 11
+       C_STANDARD_REQUIRED ON
+       C_EXTENSIONS OFF
+)
+
+option(ENABLE_TINYMM "Enable optional libtinymm dependency" OFF)
+if (ENABLE_TINYMM)
+       find_library(TINYMM_LIBRARIES tinymm REQUIRED)
+       find_path(TINYMM_INCLUDE_DIR tinymm.h REQUIRED)
+
+       include_directories(${TINYMM_INCLUDE_DIR})
+       target_link_libraries(${PROJECT_NAME} PRIVATE ${TINYMM_LIBRARIES})
+endif (ENABLE_TINYMM)
+
+if (ENABLE_THREADED_COMPILER)
+       find_library(PTHREAD_LIBRARIES pthread REQUIRED)
+       find_path(PTHREAD_INCLUDE_DIR pthread.h REQUIRED)
+
+       include_directories(${PTHREAD_INCLUDE_DIR})
+       target_link_libraries(${PROJECT_NAME} PRIVATE ${PTHREAD_LIBRARIES})
+endif (ENABLE_THREADED_COMPILER)
+
+find_library(LIBLIGHTNING lightning REQUIRED)
+find_path(LIBLIGHTNING_INCLUDE_DIR lightning.h REQUIRED)
+
+include_directories(${LIBLIGHTNING_INCLUDE_DIR})
+target_link_libraries(${PROJECT_NAME} PRIVATE ${LIBLIGHTNING})
+
+if (LOG_LEVEL STREQUAL Debug)
+       find_library(LIBOPCODES NAMES opcodes-multiarch opcodes)
+       find_path(LIBOPCODES_INCLUDE_DIR dis-asm.h)
+
+       if (NOT LIBOPCODES OR NOT LIBOPCODES_INCLUDE_DIR)
+               message(SEND_ERROR "Debug log level requires libopcodes (from binutils) to be installed.")
+       endif ()
+
+       set(ENABLE_DISASSEMBLER ON)
+       include_directories(${LIBOPCODES_INCLUDE_DIR})
+       target_link_libraries(${PROJECT_NAME} PRIVATE ${LIBOPCODES})
+endif()
+
+configure_file(config.h.cmakein config.h @ONLY)
+
+include(GNUInstallDirs)
+install(TARGETS ${PROJECT_NAME}
+       ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
+       LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+       RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
+       PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
+)
diff --git a/deps/lightrec/COPYING b/deps/lightrec/COPYING
new file mode 100644 (file)
index 0000000..161a3d1
--- /dev/null
@@ -0,0 +1,482 @@
+                 GNU LIBRARY GENERAL PUBLIC LICENSE
+                      Version 2, June 1991
+
+ Copyright (C) 1991 Free Software Foundation, Inc.
+ 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+[This is the first released version of the library GPL.  It is
+ numbered 2 because it goes with version 2 of the ordinary GPL.]
+
+                           Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+Licenses are intended to guarantee your freedom to share and change
+free software--to make sure the software is free for all its users.
+
+  This license, the Library General Public License, applies to some
+specially designated Free Software Foundation software, and to any
+other libraries whose authors decide to use it.  You can use it for
+your libraries, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if
+you distribute copies of the library, or if you modify it.
+
+  For example, if you distribute copies of the library, whether gratis
+or for a fee, you must give the recipients all the rights that we gave
+you.  You must make sure that they, too, receive or can get the source
+code.  If you link a program with the library, you must provide
+complete object files to the recipients so that they can relink them
+with the library, after making changes to the library and recompiling
+it.  And you must show them these terms so they know their rights.
+
+  Our method of protecting your rights has two steps: (1) copyright
+the library, and (2) offer you this license which gives you legal
+permission to copy, distribute and/or modify the library.
+
+  Also, for each distributor's protection, we want to make certain
+that everyone understands that there is no warranty for this free
+library.  If the library is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original
+version, so that any problems introduced by others will not reflect on
+the original authors' reputations.
+\f
+  Finally, any free program is threatened constantly by software
+patents.  We wish to avoid the danger that companies distributing free
+software will individually obtain patent licenses, thus in effect
+transforming the program into proprietary software.  To prevent this,
+we have made it clear that any patent must be licensed for everyone's
+free use or not licensed at all.
+
+  Most GNU software, including some libraries, is covered by the ordinary
+GNU General Public License, which was designed for utility programs.  This
+license, the GNU Library General Public License, applies to certain
+designated libraries.  This license is quite different from the ordinary
+one; be sure to read it in full, and don't assume that anything in it is
+the same as in the ordinary license.
+
+  The reason we have a separate public license for some libraries is that
+they blur the distinction we usually make between modifying or adding to a
+program and simply using it.  Linking a program with a library, without
+changing the library, is in some sense simply using the library, and is
+analogous to running a utility program or application program.  However, in
+a textual and legal sense, the linked executable is a combined work, a
+derivative of the original library, and the ordinary General Public License
+treats it as such.
+
+  Because of this blurred distinction, using the ordinary General
+Public License for libraries did not effectively promote software
+sharing, because most developers did not use the libraries.  We
+concluded that weaker conditions might promote sharing better.
+
+  However, unrestricted linking of non-free programs would deprive the
+users of those programs of all benefit from the free status of the
+libraries themselves.  This Library General Public License is intended to
+permit developers of non-free programs to use free libraries, while
+preserving your freedom as a user of such programs to change the free
+libraries that are incorporated in them.  (We have not seen how to achieve
+this as regards changes in header files, but we have achieved it as regards
+changes in the actual functions of the Library.)  The hope is that this
+will lead to faster development of free libraries.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.  Pay close attention to the difference between a
+"work based on the library" and a "work that uses the library".  The
+former contains code derived from the library, while the latter only
+works together with the library.
+
+  Note that it is possible for a library to be covered by the ordinary
+General Public License rather than by this special one.
+\f
+                 GNU LIBRARY GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License Agreement applies to any software library which
+contains a notice placed by the copyright holder or other authorized
+party saying it may be distributed under the terms of this Library
+General Public License (also called "this License").  Each licensee is
+addressed as "you".
+
+  A "library" means a collection of software functions and/or data
+prepared so as to be conveniently linked with application programs
+(which use some of those functions and data) to form executables.
+
+  The "Library", below, refers to any such software library or work
+which has been distributed under these terms.  A "work based on the
+Library" means either the Library or any derivative work under
+copyright law: that is to say, a work containing the Library or a
+portion of it, either verbatim or with modifications and/or translated
+straightforwardly into another language.  (Hereinafter, translation is
+included without limitation in the term "modification".)
+
+  "Source code" for a work means the preferred form of the work for
+making modifications to it.  For a library, complete source code means
+all the source code for all modules it contains, plus any associated
+interface definition files, plus the scripts used to control compilation
+and installation of the library.
+
+  Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running a program using the Library is not restricted, and output from
+such a program is covered only if its contents constitute a work based
+on the Library (independent of the use of the Library in a tool for
+writing it).  Whether that is true depends on what the Library does
+and what the program that uses the Library does.
+  
+  1. You may copy and distribute verbatim copies of the Library's
+complete source code as you receive it, in any medium, provided that
+you conspicuously and appropriately publish on each copy an
+appropriate copyright notice and disclaimer of warranty; keep intact
+all the notices that refer to this License and to the absence of any
+warranty; and distribute a copy of this License along with the
+Library.
+
+  You may charge a fee for the physical act of transferring a copy,
+and you may at your option offer warranty protection in exchange for a
+fee.
+\f
+  2. You may modify your copy or copies of the Library or any portion
+of it, thus forming a work based on the Library, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) The modified work must itself be a software library.
+
+    b) You must cause the files modified to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    c) You must cause the whole of the work to be licensed at no
+    charge to all third parties under the terms of this License.
+
+    d) If a facility in the modified Library refers to a function or a
+    table of data to be supplied by an application program that uses
+    the facility, other than as an argument passed when the facility
+    is invoked, then you must make a good faith effort to ensure that,
+    in the event an application does not supply such function or
+    table, the facility still operates, and performs whatever part of
+    its purpose remains meaningful.
+
+    (For example, a function in a library to compute square roots has
+    a purpose that is entirely well-defined independent of the
+    application.  Therefore, Subsection 2d requires that any
+    application-supplied function or table used by this function must
+    be optional: if the application does not supply it, the square
+    root function must still compute square roots.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Library,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Library, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote
+it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Library.
+
+In addition, mere aggregation of another work not based on the Library
+with the Library (or with a work based on the Library) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may opt to apply the terms of the ordinary GNU General Public
+License instead of this License to a given copy of the Library.  To do
+this, you must alter all the notices that refer to this License, so
+that they refer to the ordinary GNU General Public License, version 2,
+instead of to this License.  (If a newer version than version 2 of the
+ordinary GNU General Public License has appeared, then you can specify
+that version instead if you wish.)  Do not make any other change in
+these notices.
+\f
+  Once this change is made in a given copy, it is irreversible for
+that copy, so the ordinary GNU General Public License applies to all
+subsequent copies and derivative works made from that copy.
+
+  This option is useful when you wish to copy part of the code of
+the Library into a program that is not a library.
+
+  4. You may copy and distribute the Library (or a portion or
+derivative of it, under Section 2) in object code or executable form
+under the terms of Sections 1 and 2 above provided that you accompany
+it with the complete corresponding machine-readable source code, which
+must be distributed under the terms of Sections 1 and 2 above on a
+medium customarily used for software interchange.
+
+  If distribution of object code is made by offering access to copy
+from a designated place, then offering equivalent access to copy the
+source code from the same place satisfies the requirement to
+distribute the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  5. A program that contains no derivative of any portion of the
+Library, but is designed to work with the Library by being compiled or
+linked with it, is called a "work that uses the Library".  Such a
+work, in isolation, is not a derivative work of the Library, and
+therefore falls outside the scope of this License.
+
+  However, linking a "work that uses the Library" with the Library
+creates an executable that is a derivative of the Library (because it
+contains portions of the Library), rather than a "work that uses the
+library".  The executable is therefore covered by this License.
+Section 6 states terms for distribution of such executables.
+
+  When a "work that uses the Library" uses material from a header file
+that is part of the Library, the object code for the work may be a
+derivative work of the Library even though the source code is not.
+Whether this is true is especially significant if the work can be
+linked without the Library, or if the work is itself a library.  The
+threshold for this to be true is not precisely defined by law.
+
+  If such an object file uses only numerical parameters, data
+structure layouts and accessors, and small macros and small inline
+functions (ten lines or less in length), then the use of the object
+file is unrestricted, regardless of whether it is legally a derivative
+work.  (Executables containing this object code plus portions of the
+Library will still fall under Section 6.)
+
+  Otherwise, if the work is a derivative of the Library, you may
+distribute the object code for the work under the terms of Section 6.
+Any executables containing that work also fall under Section 6,
+whether or not they are linked directly with the Library itself.
+\f
+  6. As an exception to the Sections above, you may also compile or
+link a "work that uses the Library" with the Library to produce a
+work containing portions of the Library, and distribute that work
+under terms of your choice, provided that the terms permit
+modification of the work for the customer's own use and reverse
+engineering for debugging such modifications.
+
+  You must give prominent notice with each copy of the work that the
+Library is used in it and that the Library and its use are covered by
+this License.  You must supply a copy of this License.  If the work
+during execution displays copyright notices, you must include the
+copyright notice for the Library among them, as well as a reference
+directing the user to the copy of this License.  Also, you must do one
+of these things:
+
+    a) Accompany the work with the complete corresponding
+    machine-readable source code for the Library including whatever
+    changes were used in the work (which must be distributed under
+    Sections 1 and 2 above); and, if the work is an executable linked
+    with the Library, with the complete machine-readable "work that
+    uses the Library", as object code and/or source code, so that the
+    user can modify the Library and then relink to produce a modified
+    executable containing the modified Library.  (It is understood
+    that the user who changes the contents of definitions files in the
+    Library will not necessarily be able to recompile the application
+    to use the modified definitions.)
+
+    b) Accompany the work with a written offer, valid for at
+    least three years, to give the same user the materials
+    specified in Subsection 6a, above, for a charge no more
+    than the cost of performing this distribution.
+
+    c) If distribution of the work is made by offering access to copy
+    from a designated place, offer equivalent access to copy the above
+    specified materials from the same place.
+
+    d) Verify that the user has already received a copy of these
+    materials or that you have already sent this user a copy.
+
+  For an executable, the required form of the "work that uses the
+Library" must include any data and utility programs needed for
+reproducing the executable from it.  However, as a special exception,
+the source code distributed need not include anything that is normally
+distributed (in either source or binary form) with the major
+components (compiler, kernel, and so on) of the operating system on
+which the executable runs, unless that component itself accompanies
+the executable.
+
+  It may happen that this requirement contradicts the license
+restrictions of other proprietary libraries that do not normally
+accompany the operating system.  Such a contradiction means you cannot
+use both them and the Library together in an executable that you
+distribute.
+\f
+  7. You may place library facilities that are a work based on the
+Library side-by-side in a single library together with other library
+facilities not covered by this License, and distribute such a combined
+library, provided that the separate distribution of the work based on
+the Library and of the other library facilities is otherwise
+permitted, and provided that you do these two things:
+
+    a) Accompany the combined library with a copy of the same work
+    based on the Library, uncombined with any other library
+    facilities.  This must be distributed under the terms of the
+    Sections above.
+
+    b) Give prominent notice with the combined library of the fact
+    that part of it is a work based on the Library, and explaining
+    where to find the accompanying uncombined form of the same work.
+
+  8. You may not copy, modify, sublicense, link with, or distribute
+the Library except as expressly provided under this License.  Any
+attempt otherwise to copy, modify, sublicense, link with, or
+distribute the Library is void, and will automatically terminate your
+rights under this License.  However, parties who have received copies,
+or rights, from you under this License will not have their licenses
+terminated so long as such parties remain in full compliance.
+
+  9. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Library or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Library (or any work based on the
+Library), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Library or works based on it.
+
+  10. Each time you redistribute the Library (or any work based on the
+Library), the recipient automatically receives a license from the
+original licensor to copy, distribute, link with or modify the Library
+subject to these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+\f
+  11. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Library at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Library by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Library.
+
+If any portion of this section is held invalid or unenforceable under any
+particular circumstance, the balance of the section is intended to apply,
+and the section as a whole is intended to apply in other circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  12. If the distribution and/or use of the Library is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Library under this License may add
+an explicit geographical distribution limitation excluding those countries,
+so that distribution is permitted only in or among countries not thus
+excluded.  In such case, this License incorporates the limitation as if
+written in the body of this License.
+
+  13. The Free Software Foundation may publish revised and/or new
+versions of the Library General Public License from time to time.
+Such new versions will be similar in spirit to the present version,
+but may differ in detail to address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Library
+specifies a version number of this License which applies to it and
+"any later version", you have the option of following the terms and
+conditions either of that version or of any later version published by
+the Free Software Foundation.  If the Library does not specify a
+license version number, you may choose any version ever published by
+the Free Software Foundation.
+\f
+  14. If you wish to incorporate parts of the Library into other free
+programs whose distribution conditions are incompatible with these,
+write to the author to ask for permission.  For software which is
+copyrighted by the Free Software Foundation, write to the Free
+Software Foundation; we sometimes make exceptions for this.  Our
+decision will be guided by the two goals of preserving the free status
+of all derivatives of our free software and of promoting the sharing
+and reuse of software generally.
+
+                           NO WARRANTY
+
+  15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO
+WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
+EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR
+OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY
+KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
+LIBRARY IS WITH YOU.  SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME
+THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+  16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
+WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY
+AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU
+FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR
+CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
+LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
+RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
+FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
+SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGES.
+
+                    END OF TERMS AND CONDITIONS
+\f
+     Appendix: How to Apply These Terms to Your New Libraries
+
+  If you develop a new library, and you want it to be of the greatest
+possible use to the public, we recommend making it free software that
+everyone can redistribute and change.  You can do so by permitting
+redistribution under these terms (or, alternatively, under the terms of the
+ordinary General Public License).
+
+  To apply these terms, attach the following notices to the library.  It is
+safest to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least the
+"copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the library's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Library General Public
+    License as published by the Free Software Foundation; either
+    version 2 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Library General Public License for more details.
+
+    You should have received a copy of the GNU Library General Public
+    License along with this library; if not, write to the Free
+    Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+    MA 02111-1307, USA
+
+Also add information on how to contact you by electronic and paper mail.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the library, if
+necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the
+  library `Frob' (a library for tweaking knobs) written by James Random Hacker.
+
+  <signature of Ty Coon>, 1 April 1990
+  Ty Coon, President of Vice
+
+That's all there is to it!
diff --git a/deps/lightrec/README b/deps/lightrec/README
new file mode 100644 (file)
index 0000000..5bc4627
--- /dev/null
@@ -0,0 +1 @@
+LightRec is my attempt at creating a dynamic recompiler for MIPS and powered by GNU Lightning.
diff --git a/deps/lightrec/blockcache.c b/deps/lightrec/blockcache.c
new file mode 100644 (file)
index 0000000..833a8e1
--- /dev/null
@@ -0,0 +1,180 @@
+/*
+ * Copyright (C) 2015-2020 Paul Cercueil <paul@crapouillou.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ */
+
+#include "blockcache.h"
+#include "debug.h"
+#include "lightrec-private.h"
+#include "memmanager.h"
+
+#include <stdbool.h>
+#include <stdlib.h>
+
+/* Must be power of two */
+#define LUT_SIZE 0x4000
+
+struct blockcache {
+       struct lightrec_state *state;
+       struct block * lut[LUT_SIZE];
+};
+
+struct block * lightrec_find_block(struct blockcache *cache, u32 pc)
+{
+       struct block *block;
+
+       pc = kunseg(pc);
+
+       for (block = cache->lut[(pc >> 2) & (LUT_SIZE - 1)];
+            block; block = block->next)
+               if (kunseg(block->pc) == pc)
+                       return block;
+
+       return NULL;
+}
+
+void remove_from_code_lut(struct blockcache *cache, struct block *block)
+{
+       struct lightrec_state *state = block->state;
+       const struct opcode *op;
+       u32 offset = lut_offset(block->pc);
+
+       /* Use state->get_next_block in the code LUT, which basically
+        * calls back get_next_block_func(), until the compiler
+        * overrides this. This is required, as a NULL value in the code
+        * LUT means an outdated block. */
+       state->code_lut[offset] = state->get_next_block;
+
+       for (op = block->opcode_list; op; op = op->next)
+               if (op->c.i.op == OP_META_SYNC)
+                       state->code_lut[offset + op->offset] = NULL;
+
+}
+
+void lightrec_mark_for_recompilation(struct blockcache *cache,
+                                    struct block *block)
+{
+       block->flags |= BLOCK_SHOULD_RECOMPILE;
+}
+
+void lightrec_register_block(struct blockcache *cache, struct block *block)
+{
+       u32 pc = kunseg(block->pc);
+       struct block *old;
+
+       old = cache->lut[(pc >> 2) & (LUT_SIZE - 1)];
+       if (old)
+               block->next = old;
+
+       cache->lut[(pc >> 2) & (LUT_SIZE - 1)] = block;
+
+       remove_from_code_lut(cache, block);
+}
+
+void lightrec_unregister_block(struct blockcache *cache, struct block *block)
+{
+       u32 pc = kunseg(block->pc);
+       struct block *old = cache->lut[(pc >> 2) & (LUT_SIZE - 1)];
+
+       remove_from_code_lut(cache, block);
+
+       if (old == block) {
+               cache->lut[(pc >> 2) & (LUT_SIZE - 1)] = old->next;
+               return;
+       }
+
+       for (; old; old = old->next) {
+               if (old->next == block) {
+                       old->next = block->next;
+                       return;
+               }
+       }
+
+       pr_err("Block at PC 0x%x is not in cache\n", block->pc);
+}
+
+void lightrec_free_block_cache(struct blockcache *cache)
+{
+       struct block *block, *next;
+       unsigned int i;
+
+       for (i = 0; i < LUT_SIZE; i++) {
+               for (block = cache->lut[i]; block; block = next) {
+                       next = block->next;
+                       lightrec_free_block(block);
+               }
+       }
+
+       lightrec_free(cache->state, MEM_FOR_LIGHTREC, sizeof(*cache), cache);
+}
+
+struct blockcache * lightrec_blockcache_init(struct lightrec_state *state)
+{
+       struct blockcache *cache;
+
+       cache = lightrec_calloc(state, MEM_FOR_LIGHTREC, sizeof(*cache));
+       if (!cache)
+               return NULL;
+
+       cache->state = state;
+
+       return cache;
+}
+
+u32 lightrec_calculate_block_hash(const struct block *block)
+{
+       const struct lightrec_mem_map *map = block->map;
+       u32 pc, hash = 0xffffffff;
+       const u32 *code;
+       unsigned int i;
+
+       pc = kunseg(block->pc) - map->pc;
+
+       while (map->mirror_of)
+               map = map->mirror_of;
+
+       code = map->address + pc;
+
+       /* Jenkins one-at-a-time hash algorithm */
+       for (i = 0; i < block->nb_ops; i++) {
+               hash += *code++;
+               hash += (hash << 10);
+               hash ^= (hash >> 6);
+       }
+
+       hash += (hash << 3);
+       hash ^= (hash >> 11);
+       hash += (hash << 15);
+
+       return hash;
+}
+
+bool lightrec_block_is_outdated(struct block *block)
+{
+       void **lut_entry = &block->state->code_lut[lut_offset(block->pc)];
+       bool outdated;
+
+       if (*lut_entry)
+               return false;
+
+       outdated = block->hash != lightrec_calculate_block_hash(block);
+       if (likely(!outdated)) {
+               /* The block was marked as outdated, but the content is still
+                * the same */
+               if (block->function)
+                       *lut_entry = block->function;
+               else
+                       *lut_entry = block->state->get_next_block;
+       }
+
+       return outdated;
+}
diff --git a/deps/lightrec/blockcache.h b/deps/lightrec/blockcache.h
new file mode 100644 (file)
index 0000000..0c57ffc
--- /dev/null
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2014 Paul Cercueil <paul@crapouillou.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ */
+
+#ifndef __BLOCKCACHE_H__
+#define __BLOCKCACHE_H__
+
+#include "lightrec.h"
+
+struct blockcache;
+
+struct block * lightrec_find_block(struct blockcache *cache, u32 pc);
+void lightrec_register_block(struct blockcache *cache, struct block *block);
+void lightrec_unregister_block(struct blockcache *cache, struct block *block);
+
+struct blockcache * lightrec_blockcache_init(struct lightrec_state *state);
+void lightrec_free_block_cache(struct blockcache *cache);
+
+u32 lightrec_calculate_block_hash(const struct block *block);
+_Bool lightrec_block_is_outdated(struct block *block);
+
+void lightrec_mark_for_recompilation(struct blockcache *cache,
+                                    struct block *block);
+
+#endif /* __BLOCKCACHE_H__ */
diff --git a/deps/lightrec/config.h.cmakein b/deps/lightrec/config.h.cmakein
new file mode 100644 (file)
index 0000000..1eac007
--- /dev/null
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2019 Paul Cercueil <paul@crapouillou.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ */
+
+#ifndef __LIGHTREC_CONFIG_H__
+#define __LIGHTREC_CONFIG_H__
+
+#cmakedefine01 ENABLE_THREADED_COMPILER
+#cmakedefine01 ENABLE_FIRST_PASS
+#cmakedefine01 ENABLE_DISASSEMBLER
+#cmakedefine01 ENABLE_TINYMM
+
+#endif /* __LIGHTREC_CONFIG_H__ */
+
diff --git a/deps/lightrec/debug.h b/deps/lightrec/debug.h
new file mode 100644 (file)
index 0000000..4048d43
--- /dev/null
@@ -0,0 +1,108 @@
+/*
+ * Copyright (C) 2014 Paul Cercueil <paul@crapouillou.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ */
+
+#ifndef DEBUG_H
+#define DEBUG_H
+
+#include <stdio.h>
+#include <unistd.h>
+
+#define NOLOG_L 0
+#define ERROR_L 1
+#define WARNING_L 2
+#define INFO_L 3
+#define DEBUG_L 4
+
+#ifndef LOG_LEVEL
+#define LOG_LEVEL INFO_L
+#endif
+
+// -------------
+
+#ifndef COLOR_DEBUG
+#define COLOR_DEBUG   "\e[0;32m"
+#endif
+#ifndef COLOR_WARNING
+#define COLOR_WARNING "\e[01;35m"
+#endif
+#ifndef COLOR_ERROR
+#define COLOR_ERROR   "\e[01;31m"
+#endif
+
+#define COLOR_END "\e[0m"
+
+#if (LOG_LEVEL >= DEBUG_L)
+# ifdef COLOR_DEBUG
+#  define pr_debug(str, ...) do {                                      \
+       if (isatty(STDOUT_FILENO))                                      \
+               fprintf(stdout, COLOR_DEBUG "DEBUG: " str COLOR_END,    \
+                       ##__VA_ARGS__);                                 \
+       else                                                            \
+               fprintf(stdout, "DEBUG: " str, ##__VA_ARGS__);          \
+       } while (0)
+# else
+#  define pr_debug(...) \
+    fprintf(stdout, "DEBUG: " __VA_ARGS__)
+# endif
+#else
+#define pr_debug(...)
+#endif
+
+#if (LOG_LEVEL >= INFO_L)
+# ifdef COLOR_INFO
+#  define pr_info(str, ...) \
+    fprintf(stdout, COLOR_INFO str COLOR_END, ##__VA_ARGS__)
+# else
+#  define pr_info(...) \
+    fprintf(stdout, __VA_ARGS__)
+# endif
+#else
+#define pr_info(...)
+#endif
+
+#if (LOG_LEVEL >= WARNING_L)
+# ifdef COLOR_WARNING
+#  define pr_warn(str, ...) do {                                       \
+       if (isatty(STDERR_FILENO))                                      \
+               fprintf(stderr, COLOR_WARNING "WARNING: " str COLOR_END,\
+                       ##__VA_ARGS__);                                 \
+       else                                                            \
+               fprintf(stderr, "WARNING: " str, ##__VA_ARGS__);        \
+       } while (0)
+# else
+#  define pr_warn(...) \
+    fprintf(stderr, "WARNING: " __VA_ARGS__)
+# endif
+#else
+#define pr_warn(...)
+#endif
+
+#if (LOG_LEVEL >= ERROR_L)
+# ifdef COLOR_ERROR
+#  define pr_err(str, ...) do {                                                \
+       if (isatty(STDERR_FILENO))                                      \
+               fprintf(stderr, COLOR_ERROR "ERROR: " str COLOR_END,    \
+                       ##__VA_ARGS__);                                 \
+       else                                                            \
+               fprintf(stderr, "ERROR: " str, ##__VA_ARGS__);          \
+       } while (0)
+# else
+#  define pr_err(...) \
+    fprintf(stderr, "ERROR: " __VA_ARGS__)
+# endif
+#else
+#define pr_err(...)
+#endif
+
+#endif
diff --git a/deps/lightrec/disassembler.c b/deps/lightrec/disassembler.c
new file mode 100644 (file)
index 0000000..06fcec9
--- /dev/null
@@ -0,0 +1,143 @@
+/*
+ * Copyright (C) 2014-2020 Paul Cercueil <paul@crapouillou.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ */
+
+#include "config.h"
+
+#if ENABLE_DISASSEMBLER
+#include <dis-asm.h>
+#endif
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "debug.h"
+#include "disassembler.h"
+#include "lightrec-private.h"
+#include "memmanager.h"
+
+static bool is_unconditional_jump(const struct opcode *op)
+{
+       switch (op->i.op) {
+       case OP_SPECIAL:
+               return op->r.op == OP_SPECIAL_JR || op->r.op == OP_SPECIAL_JALR;
+       case OP_J:
+       case OP_JAL:
+               return true;
+       case OP_BEQ:
+       case OP_BLEZ:
+               return op->i.rs == op->i.rt;
+       case OP_REGIMM:
+               return (op->r.rt == OP_REGIMM_BGEZ ||
+                       op->r.rt == OP_REGIMM_BGEZAL) && op->i.rs == 0;
+       default:
+               return false;
+       }
+}
+
+static bool is_syscall(const struct opcode *op)
+{
+       return (op->i.op == OP_SPECIAL && (op->r.op == OP_SPECIAL_SYSCALL ||
+                                          op->r.op == OP_SPECIAL_BREAK)) ||
+               (op->i.op == OP_CP0 && (op->r.rs == OP_CP0_MTC0 ||
+                                       op->r.rs == OP_CP0_CTC0) &&
+                (op->r.rd == 12 || op->r.rd == 13));
+}
+
+void lightrec_free_opcode_list(struct lightrec_state *state, struct opcode *list)
+{
+       struct opcode *next;
+
+       while (list) {
+               next = list->next;
+               lightrec_free(state, MEM_FOR_IR, sizeof(*list), list);
+               list = next;
+       }
+}
+
+struct opcode * lightrec_disassemble(struct lightrec_state *state,
+                                    const u32 *src, unsigned int *len)
+{
+       struct opcode *head = NULL;
+       bool stop_next = false;
+       struct opcode *curr, *last;
+       unsigned int i;
+
+       for (i = 0, last = NULL; ; i++, last = curr) {
+               curr = lightrec_calloc(state, MEM_FOR_IR, sizeof(*curr));
+               if (!curr) {
+                       pr_err("Unable to allocate memory\n");
+                       lightrec_free_opcode_list(state, head);
+                       return NULL;
+               }
+
+               if (!last)
+                       head = curr;
+               else
+                       last->next = curr;
+
+               /* TODO: Take care of endianness */
+               curr->opcode = LE32TOH(*src++);
+               curr->offset = i;
+
+               /* NOTE: The block disassembly ends after the opcode that
+                * follows an unconditional jump (delay slot) */
+               if (stop_next || is_syscall(curr))
+                       break;
+               else if (is_unconditional_jump(curr))
+                       stop_next = true;
+       }
+
+       if (len)
+               *len = (i + 1) * sizeof(u32);
+
+       return head;
+}
+
+unsigned int lightrec_cycles_of_opcode(union code code)
+{
+       switch (code.i.op) {
+       case OP_META_REG_UNLOAD:
+       case OP_META_SYNC:
+               return 0;
+       default:
+               return 2;
+       }
+}
+
+#if ENABLE_DISASSEMBLER
+void lightrec_print_disassembly(const struct block *block,
+                               const u32 *code, unsigned int length)
+{
+       struct disassemble_info info;
+       unsigned int i;
+
+       memset(&info, 0, sizeof(info));
+       init_disassemble_info(&info, stdout, (fprintf_ftype) fprintf);
+
+       info.buffer = (bfd_byte *) code;
+       info.buffer_vma = (bfd_vma)(uintptr_t) code;
+       info.buffer_length = length;
+       info.flavour = bfd_target_unknown_flavour;
+       info.arch = bfd_arch_mips;
+       info.mach = bfd_mach_mips3000;
+       disassemble_init_for_target(&info);
+
+       for (i = 0; i < length; i += 4) {
+               void print_insn_little_mips(bfd_vma, struct disassemble_info *);
+               putc('\t', stdout);
+               print_insn_little_mips((bfd_vma)(uintptr_t) code++, &info);
+               putc('\n', stdout);
+       }
+}
+#endif
diff --git a/deps/lightrec/disassembler.h b/deps/lightrec/disassembler.h
new file mode 100644 (file)
index 0000000..e4c4403
--- /dev/null
@@ -0,0 +1,212 @@
+/*
+ * Copyright (C) 2014 Paul Cercueil <paul@crapouillou.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ */
+
+#ifndef __DISASSEMBLER_H__
+#define __DISASSEMBLER_H__
+
+#include "debug.h"
+#include "lightrec.h"
+
+#ifndef __packed
+#define __packed __attribute__((packed))
+#endif
+
+#define LIGHTREC_DIRECT_IO     (1 << 0)
+#define LIGHTREC_NO_INVALIDATE (1 << 1)
+#define LIGHTREC_NO_DS         (1 << 2)
+#define LIGHTREC_SMC           (1 << 3)
+#define LIGHTREC_EMULATE_BRANCH        (1 << 4)
+#define LIGHTREC_LOCAL_BRANCH  (1 << 5)
+#define LIGHTREC_HW_IO         (1 << 6)
+#define LIGHTREC_MULT32                (1 << 7)
+
+struct block;
+
+enum standard_opcodes {
+       OP_SPECIAL              = 0x00,
+       OP_REGIMM               = 0x01,
+       OP_J                    = 0x02,
+       OP_JAL                  = 0x03,
+       OP_BEQ                  = 0x04,
+       OP_BNE                  = 0x05,
+       OP_BLEZ                 = 0x06,
+       OP_BGTZ                 = 0x07,
+       OP_ADDI                 = 0x08,
+       OP_ADDIU                = 0x09,
+       OP_SLTI                 = 0x0a,
+       OP_SLTIU                = 0x0b,
+       OP_ANDI                 = 0x0c,
+       OP_ORI                  = 0x0d,
+       OP_XORI                 = 0x0e,
+       OP_LUI                  = 0x0f,
+       OP_CP0                  = 0x10,
+       OP_CP2                  = 0x12,
+       OP_LB                   = 0x20,
+       OP_LH                   = 0x21,
+       OP_LWL                  = 0x22,
+       OP_LW                   = 0x23,
+       OP_LBU                  = 0x24,
+       OP_LHU                  = 0x25,
+       OP_LWR                  = 0x26,
+       OP_SB                   = 0x28,
+       OP_SH                   = 0x29,
+       OP_SWL                  = 0x2a,
+       OP_SW                   = 0x2b,
+       OP_SWR                  = 0x2e,
+       OP_LWC2                 = 0x32,
+       OP_SWC2                 = 0x3a,
+
+       OP_META_REG_UNLOAD      = 0x11,
+
+       OP_META_BEQZ            = 0x14,
+       OP_META_BNEZ            = 0x15,
+
+       OP_META_MOV             = 0x16,
+       OP_META_SYNC            = 0x17,
+};
+
+enum special_opcodes {
+       OP_SPECIAL_SLL          = 0x00,
+       OP_SPECIAL_SRL          = 0x02,
+       OP_SPECIAL_SRA          = 0x03,
+       OP_SPECIAL_SLLV         = 0x04,
+       OP_SPECIAL_SRLV         = 0x06,
+       OP_SPECIAL_SRAV         = 0x07,
+       OP_SPECIAL_JR           = 0x08,
+       OP_SPECIAL_JALR         = 0x09,
+       OP_SPECIAL_SYSCALL      = 0x0c,
+       OP_SPECIAL_BREAK        = 0x0d,
+       OP_SPECIAL_MFHI         = 0x10,
+       OP_SPECIAL_MTHI         = 0x11,
+       OP_SPECIAL_MFLO         = 0x12,
+       OP_SPECIAL_MTLO         = 0x13,
+       OP_SPECIAL_MULT         = 0x18,
+       OP_SPECIAL_MULTU        = 0x19,
+       OP_SPECIAL_DIV          = 0x1a,
+       OP_SPECIAL_DIVU         = 0x1b,
+       OP_SPECIAL_ADD          = 0x20,
+       OP_SPECIAL_ADDU         = 0x21,
+       OP_SPECIAL_SUB          = 0x22,
+       OP_SPECIAL_SUBU         = 0x23,
+       OP_SPECIAL_AND          = 0x24,
+       OP_SPECIAL_OR           = 0x25,
+       OP_SPECIAL_XOR          = 0x26,
+       OP_SPECIAL_NOR          = 0x27,
+       OP_SPECIAL_SLT          = 0x2a,
+       OP_SPECIAL_SLTU         = 0x2b,
+};
+
+enum regimm_opcodes {
+       OP_REGIMM_BLTZ          = 0x00,
+       OP_REGIMM_BGEZ          = 0x01,
+       OP_REGIMM_BLTZAL        = 0x10,
+       OP_REGIMM_BGEZAL        = 0x11,
+};
+
+enum cp0_opcodes {
+       OP_CP0_MFC0             = 0x00,
+       OP_CP0_CFC0             = 0x02,
+       OP_CP0_MTC0             = 0x04,
+       OP_CP0_CTC0             = 0x06,
+       OP_CP0_RFE              = 0x10,
+};
+
+enum cp2_opcodes {
+       OP_CP2_BASIC            = 0x00,
+};
+
+enum cp2_basic_opcodes {
+       OP_CP2_BASIC_MFC2       = 0x00,
+       OP_CP2_BASIC_CFC2       = 0x02,
+       OP_CP2_BASIC_MTC2       = 0x04,
+       OP_CP2_BASIC_CTC2       = 0x06,
+};
+
+struct opcode_r {
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+       u32 zero :6;
+       u32 rs   :5;
+       u32 rt   :5;
+       u32 rd   :5;
+       u32 imm  :5;
+       u32 op   :6;
+#else
+       u32 op   :6;
+       u32 imm  :5;
+       u32 rd   :5;
+       u32 rt   :5;
+       u32 rs   :5;
+       u32 zero :6;
+#endif
+} __packed;
+
+struct opcode_i {
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+       u32 op  :6;
+       u32 rs  :5;
+       u32 rt  :5;
+       u32 imm :16;
+#else
+       u32 imm :16;
+       u32 rt  :5;
+       u32 rs  :5;
+       u32 op  :6;
+#endif
+} __packed;
+
+struct opcode_j {
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+       u32 op  :6;
+       u32 imm :26;
+#else
+       u32 imm :26;
+       u32 op  :6;
+#endif
+} __packed;
+
+union code {
+       /* Keep in sync with struct opcode */
+       u32 opcode;
+       struct opcode_r r;
+       struct opcode_i i;
+       struct opcode_j j;
+};
+
+struct opcode {
+       /* Keep this union at the first position */
+       union {
+               union code c;
+
+               /* Keep in sync with union code */
+               u32 opcode;
+               struct opcode_r r;
+               struct opcode_i i;
+               struct opcode_j j;
+       };
+       u16 flags;
+       u16 offset;
+       struct opcode *next;
+};
+
+struct opcode * lightrec_disassemble(struct lightrec_state *state,
+                                    const u32 *src, unsigned int *len);
+void lightrec_free_opcode_list(struct lightrec_state *state,
+                              struct opcode *list);
+
+unsigned int lightrec_cycles_of_opcode(union code code);
+
+void lightrec_print_disassembly(const struct block *block,
+                               const u32 *code, unsigned int length);
+
+#endif /* __DISASSEMBLER_H__ */
diff --git a/deps/lightrec/emitter.c b/deps/lightrec/emitter.c
new file mode 100644 (file)
index 0000000..b09dc94
--- /dev/null
@@ -0,0 +1,1577 @@
+/*
+ * Copyright (C) 2014-2020 Paul Cercueil <paul@crapouillou.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ */
+
+#include "blockcache.h"
+#include "debug.h"
+#include "disassembler.h"
+#include "emitter.h"
+#include "optimizer.h"
+#include "regcache.h"
+
+#include <lightning.h>
+#include <stdbool.h>
+#include <stddef.h>
+
+typedef void (*lightrec_rec_func_t)(const struct block *,
+                                   const struct opcode *, u32);
+
+/* Forward declarations */
+static void rec_SPECIAL(const struct block *block,
+                      const struct opcode *op, u32 pc);
+static void rec_REGIMM(const struct block *block,
+                     const struct opcode *op, u32 pc);
+static void rec_CP0(const struct block *block, const struct opcode *op, u32 pc);
+static void rec_CP2(const struct block *block, const struct opcode *op, u32 pc);
+
+
+static void unknown_opcode(const struct block *block,
+                          const struct opcode *op, u32 pc)
+{
+       pr_warn("Unknown opcode: 0x%08x at PC 0x%08x\n", op->opcode, pc);
+}
+
+static void lightrec_emit_end_of_block(const struct block *block,
+                                      const struct opcode *op, u32 pc,
+                                      s8 reg_new_pc, u32 imm, u8 ra_reg,
+                                      u32 link, bool update_cycles)
+{
+       struct lightrec_state *state = block->state;
+       struct regcache *reg_cache = state->reg_cache;
+       u32 cycles = state->cycles;
+       jit_state_t *_jit = block->_jit;
+
+       jit_note(__FILE__, __LINE__);
+
+       if (link) {
+               /* Update the $ra register */
+               u8 link_reg = lightrec_alloc_reg_out(reg_cache, _jit, ra_reg);
+               jit_movi(link_reg, link);
+               lightrec_free_reg(reg_cache, link_reg);
+       }
+
+       if (reg_new_pc < 0) {
+               reg_new_pc = lightrec_alloc_reg(reg_cache, _jit, JIT_V0);
+               lightrec_lock_reg(reg_cache, _jit, reg_new_pc);
+
+               jit_movi(reg_new_pc, imm);
+       }
+
+       if (has_delay_slot(op->c) &&
+           !(op->flags & (LIGHTREC_NO_DS | LIGHTREC_LOCAL_BRANCH))) {
+               cycles += lightrec_cycles_of_opcode(op->next->c);
+
+               /* Recompile the delay slot */
+               if (op->next->c.opcode)
+                       lightrec_rec_opcode(block, op->next, pc + 4);
+       }
+
+       /* Store back remaining registers */
+       lightrec_storeback_regs(reg_cache, _jit);
+
+       jit_movr(JIT_V0, reg_new_pc);
+
+       if (cycles && update_cycles) {
+               jit_subi(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, cycles);
+               pr_debug("EOB: %u cycles\n", cycles);
+       }
+
+       if (op->next && ((op->flags & LIGHTREC_NO_DS) || op->next->next))
+               state->branches[state->nb_branches++] = jit_jmpi();
+}
+
+void lightrec_emit_eob(const struct block *block,
+                      const struct opcode *op, u32 pc)
+{
+       struct lightrec_state *state = block->state;
+       struct regcache *reg_cache = state->reg_cache;
+       jit_state_t *_jit = block->_jit;
+
+       lightrec_storeback_regs(reg_cache, _jit);
+
+       jit_movi(JIT_V0, pc);
+       jit_subi(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE,
+                state->cycles - lightrec_cycles_of_opcode(op->c));
+
+       state->branches[state->nb_branches++] = jit_jmpi();
+}
+
+static void rec_special_JR(const struct block *block,
+                          const struct opcode *op, u32 pc)
+{
+       struct regcache *reg_cache = block->state->reg_cache;
+       jit_state_t *_jit = block->_jit;
+       u8 rs = lightrec_request_reg_in(reg_cache, _jit, op->r.rs, JIT_V0);
+
+       _jit_name(block->_jit, __func__);
+       lightrec_lock_reg(reg_cache, _jit, rs);
+       lightrec_emit_end_of_block(block, op, pc, rs, 0, 31, 0, true);
+}
+
+static void rec_special_JALR(const struct block *block,
+                            const struct opcode *op, u32 pc)
+{
+       struct regcache *reg_cache = block->state->reg_cache;
+       jit_state_t *_jit = block->_jit;
+       u8 rs = lightrec_request_reg_in(reg_cache, _jit, op->r.rs, JIT_V0);
+
+       _jit_name(block->_jit, __func__);
+       lightrec_lock_reg(reg_cache, _jit, rs);
+       lightrec_emit_end_of_block(block, op, pc, rs, 0, op->r.rd, pc + 8, true);
+}
+
+static void rec_J(const struct block *block, const struct opcode *op, u32 pc)
+{
+       _jit_name(block->_jit, __func__);
+       lightrec_emit_end_of_block(block, op, pc, -1,
+                                  (pc & 0xf0000000) | (op->j.imm << 2), 31, 0, true);
+}
+
+static void rec_JAL(const struct block *block, const struct opcode *op, u32 pc)
+{
+       _jit_name(block->_jit, __func__);
+       lightrec_emit_end_of_block(block, op, pc, -1,
+                                  (pc & 0xf0000000) | (op->j.imm << 2),
+                                  31, pc + 8, true);
+}
+
+static void rec_b(const struct block *block, const struct opcode *op, u32 pc,
+                 jit_code_t code, u32 link, bool unconditional, bool bz)
+{
+       struct regcache *reg_cache = block->state->reg_cache;
+       struct native_register *regs_backup;
+       jit_state_t *_jit = block->_jit;
+       struct lightrec_branch *branch;
+       jit_node_t *addr;
+       u8 link_reg;
+       u32 offset, cycles = block->state->cycles;
+       bool is_forward = (s16)op->i.imm >= -1;
+
+       jit_note(__FILE__, __LINE__);
+
+       if (!(op->flags & LIGHTREC_NO_DS))
+               cycles += lightrec_cycles_of_opcode(op->next->c);
+
+       block->state->cycles = 0;
+
+       if (cycles)
+               jit_subi(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, cycles);
+
+       if (!unconditional) {
+               u8 rs = lightrec_alloc_reg_in_ext(reg_cache, _jit, op->i.rs),
+                  rt = bz ? 0 : lightrec_alloc_reg_in_ext(reg_cache,
+                                                          _jit, op->i.rt);
+
+               /* Generate the branch opcode */
+               addr = jit_new_node_pww(code, NULL, rs, rt);
+
+               lightrec_free_regs(reg_cache);
+               regs_backup = lightrec_regcache_enter_branch(reg_cache);
+       }
+
+       if (op->flags & LIGHTREC_LOCAL_BRANCH) {
+               if (op->next && !(op->flags & LIGHTREC_NO_DS)) {
+                       /* Recompile the delay slot */
+                       if (op->next->opcode)
+                               lightrec_rec_opcode(block, op->next, pc + 4);
+               }
+
+               if (link) {
+                       /* Update the $ra register */
+                       link_reg = lightrec_alloc_reg_out(reg_cache, _jit, 31);
+                       jit_movi(link_reg, link);
+                       lightrec_free_reg(reg_cache, link_reg);
+               }
+
+               /* Store back remaining registers */
+               lightrec_storeback_regs(reg_cache, _jit);
+
+               offset = op->offset + 1 + (s16)op->i.imm;
+               pr_debug("Adding local branch to offset 0x%x\n", offset << 2);
+               branch = &block->state->local_branches[
+                       block->state->nb_local_branches++];
+
+               branch->target = offset;
+               if (is_forward)
+                       branch->branch = jit_jmpi();
+               else
+                       branch->branch = jit_bgti(LIGHTREC_REG_CYCLE, 0);
+       }
+
+       if (!(op->flags & LIGHTREC_LOCAL_BRANCH) || !is_forward) {
+               lightrec_emit_end_of_block(block, op, pc, -1,
+                                          pc + 4 + ((s16)op->i.imm << 2),
+                                          31, link, false);
+       }
+
+       if (!unconditional) {
+               jit_patch(addr);
+               lightrec_regcache_leave_branch(reg_cache, regs_backup);
+
+               if (bz && link) {
+                       /* Update the $ra register */
+                       link_reg = lightrec_alloc_reg_out_ext(reg_cache,
+                                                             _jit, 31);
+                       jit_movi(link_reg, (s32)link);
+                       lightrec_free_reg(reg_cache, link_reg);
+               }
+
+               if (!(op->flags & LIGHTREC_NO_DS) && op->next->opcode)
+                       lightrec_rec_opcode(block, op->next, pc + 4);
+       }
+}
+
+static void rec_BNE(const struct block *block, const struct opcode *op, u32 pc)
+{
+       _jit_name(block->_jit, __func__);
+       rec_b(block, op, pc, jit_code_beqr, 0, false, false);
+}
+
+static void rec_BEQ(const struct block *block, const struct opcode *op, u32 pc)
+{
+       _jit_name(block->_jit, __func__);
+       rec_b(block, op, pc, jit_code_bner, 0,
+                       op->i.rs == op->i.rt, false);
+}
+
+static void rec_BLEZ(const struct block *block, const struct opcode *op, u32 pc)
+{
+       _jit_name(block->_jit, __func__);
+       rec_b(block, op, pc, jit_code_bgti, 0, op->i.rs == 0, true);
+}
+
+static void rec_BGTZ(const struct block *block, const struct opcode *op, u32 pc)
+{
+       _jit_name(block->_jit, __func__);
+       rec_b(block, op, pc, jit_code_blei, 0, false, true);
+}
+
+static void rec_regimm_BLTZ(const struct block *block,
+                           const struct opcode *op, u32 pc)
+{
+       _jit_name(block->_jit, __func__);
+       rec_b(block, op, pc, jit_code_bgei, 0, false, true);
+}
+
+static void rec_regimm_BLTZAL(const struct block *block,
+                             const struct opcode *op, u32 pc)
+{
+       _jit_name(block->_jit, __func__);
+       rec_b(block, op, pc, jit_code_bgei, pc + 8, false, true);
+}
+
+static void rec_regimm_BGEZ(const struct block *block,
+                           const struct opcode *op, u32 pc)
+{
+       _jit_name(block->_jit, __func__);
+       rec_b(block, op, pc, jit_code_blti, 0, !op->i.rs, true);
+}
+
+static void rec_regimm_BGEZAL(const struct block *block,
+                             const struct opcode *op, u32 pc)
+{
+       _jit_name(block->_jit, __func__);
+       rec_b(block, op, pc, jit_code_blti, pc + 8, !op->i.rs, true);
+}
+
+static void rec_alu_imm(const struct block *block, const struct opcode *op,
+                       jit_code_t code, bool sign_extend)
+{
+       struct regcache *reg_cache = block->state->reg_cache;
+       jit_state_t *_jit = block->_jit;
+       u8 rs, rt;
+
+       jit_note(__FILE__, __LINE__);
+       rs = lightrec_alloc_reg_in_ext(reg_cache, _jit, op->i.rs);
+       rt = lightrec_alloc_reg_out_ext(reg_cache, _jit, op->i.rt);
+
+       if (sign_extend)
+               jit_new_node_www(code, rt, rs, (s32)(s16) op->i.imm);
+       else
+               jit_new_node_www(code, rt, rs, (u32)(u16) op->i.imm);
+
+       lightrec_free_reg(reg_cache, rs);
+       lightrec_free_reg(reg_cache, rt);
+}
+
+static void rec_alu_special(const struct block *block, const struct opcode *op,
+                           jit_code_t code, bool out_ext)
+{
+       struct regcache *reg_cache = block->state->reg_cache;
+       jit_state_t *_jit = block->_jit;
+       u8 rd, rt, rs;
+
+       jit_note(__FILE__, __LINE__);
+       rs = lightrec_alloc_reg_in_ext(reg_cache, _jit, op->r.rs);
+       rt = lightrec_alloc_reg_in_ext(reg_cache, _jit, op->r.rt);
+
+       if (out_ext)
+          rd = lightrec_alloc_reg_out_ext(reg_cache, _jit, op->r.rd);
+       else
+          rd = lightrec_alloc_reg_out(reg_cache, _jit, op->r.rd);
+
+       jit_new_node_www(code, rd, rs, rt);
+
+       lightrec_free_reg(reg_cache, rs);
+       lightrec_free_reg(reg_cache, rt);
+       lightrec_free_reg(reg_cache, rd);
+}
+
+static void rec_alu_shiftv(const struct block *block,
+                          const struct opcode *op, jit_code_t code)
+{
+       struct regcache *reg_cache = block->state->reg_cache;
+       jit_state_t *_jit = block->_jit;
+       u8 rd, rt, rs, temp;
+
+       jit_note(__FILE__, __LINE__);
+       rs = lightrec_alloc_reg_in(reg_cache, _jit, op->r.rs);
+       temp = lightrec_alloc_reg_temp(reg_cache, _jit);
+
+       if (code == jit_code_rshr) {
+               rt = lightrec_alloc_reg_in_ext(reg_cache, _jit, op->r.rt);
+               rd = lightrec_alloc_reg_out_ext(reg_cache, _jit, op->r.rd);
+       } else {
+               rt = lightrec_alloc_reg_in(reg_cache, _jit, op->r.rt);
+               rd = lightrec_alloc_reg_out(reg_cache, _jit, op->r.rd);
+       }
+
+       jit_andi(temp, rs, 0x1f);
+
+#if __WORDSIZE == 64
+       if (code == jit_code_rshr_u) {
+               jit_extr_ui(rd, rt);
+               jit_new_node_www(code, rd, rd, temp);
+       }
+#endif
+
+       if (__WORDSIZE == 32 || code != jit_code_rshr_u)
+               jit_new_node_www(code, rd, rt, temp);
+
+       lightrec_free_reg(reg_cache, rs);
+       lightrec_free_reg(reg_cache, temp);
+       lightrec_free_reg(reg_cache, rt);
+       lightrec_free_reg(reg_cache, rd);
+}
+
+static void rec_ADDIU(const struct block *block,
+                     const struct opcode *op, u32 pc)
+{
+       _jit_name(block->_jit, __func__);
+       rec_alu_imm(block, op, jit_code_addi, true);
+}
+
+static void rec_ADDI(const struct block *block, const struct opcode *op, u32 pc)
+{
+       /* TODO: Handle the exception? */
+       _jit_name(block->_jit, __func__);
+       rec_alu_imm(block, op, jit_code_addi, true);
+}
+
+static void rec_SLTIU(const struct block *block,
+                     const struct opcode *op, u32 pc)
+{
+       _jit_name(block->_jit, __func__);
+       rec_alu_imm(block, op, jit_code_lti_u, true);
+}
+
+static void rec_SLTI(const struct block *block, const struct opcode *op, u32 pc)
+{
+       _jit_name(block->_jit, __func__);
+       rec_alu_imm(block, op, jit_code_lti, true);
+}
+
+static void rec_ANDI(const struct block *block, const struct opcode *op, u32 pc)
+{
+       struct regcache *reg_cache = block->state->reg_cache;
+       jit_state_t *_jit = block->_jit;
+       u8 rs, rt;
+
+       _jit_name(block->_jit, __func__);
+       jit_note(__FILE__, __LINE__);
+       rs = lightrec_alloc_reg_in(reg_cache, _jit, op->i.rs);
+       rt = lightrec_alloc_reg_out_ext(reg_cache, _jit, op->i.rt);
+
+       /* PSX code uses ANDI 0xff / ANDI 0xffff a lot, which are basically
+        * casts to uint8_t / uint16_t. */
+       if (op->i.imm == 0xff)
+               jit_extr_uc(rt, rs);
+       else if (op->i.imm == 0xffff)
+               jit_extr_us(rt, rs);
+       else
+               jit_andi(rt, rs, (u32)(u16) op->i.imm);
+
+       lightrec_free_reg(reg_cache, rs);
+       lightrec_free_reg(reg_cache, rt);
+}
+
+static void rec_ORI(const struct block *block, const struct opcode *op, u32 pc)
+{
+       _jit_name(block->_jit, __func__);
+       rec_alu_imm(block, op, jit_code_ori, false);
+}
+
+static void rec_XORI(const struct block *block, const struct opcode *op, u32 pc)
+{
+       _jit_name(block->_jit, __func__);
+       rec_alu_imm(block, op, jit_code_xori, false);
+}
+
+static void rec_LUI(const struct block *block, const struct opcode *op, u32 pc)
+{
+       struct regcache *reg_cache = block->state->reg_cache;
+       jit_state_t *_jit = block->_jit;
+       u8 rt;
+
+       jit_name(__func__);
+       jit_note(__FILE__, __LINE__);
+       rt = lightrec_alloc_reg_out_ext(reg_cache, _jit, op->i.rt);
+
+       jit_movi(rt, (s32)(op->i.imm << 16));
+
+       lightrec_free_reg(reg_cache, rt);
+}
+
+static void rec_special_ADDU(const struct block *block,
+                            const struct opcode *op, u32 pc)
+{
+       _jit_name(block->_jit, __func__);
+       rec_alu_special(block, op, jit_code_addr, false);
+}
+
+static void rec_special_ADD(const struct block *block,
+                           const struct opcode *op, u32 pc)
+{
+       /* TODO: Handle the exception? */
+       _jit_name(block->_jit, __func__);
+       rec_alu_special(block, op, jit_code_addr, false);
+}
+
+static void rec_special_SUBU(const struct block *block,
+                            const struct opcode *op, u32 pc)
+{
+       _jit_name(block->_jit, __func__);
+       rec_alu_special(block, op, jit_code_subr, false);
+}
+
+static void rec_special_SUB(const struct block *block,
+                           const struct opcode *op, u32 pc)
+{
+       /* TODO: Handle the exception? */
+       _jit_name(block->_jit, __func__);
+       rec_alu_special(block, op, jit_code_subr, false);
+}
+
+static void rec_special_AND(const struct block *block,
+                           const struct opcode *op, u32 pc)
+{
+       _jit_name(block->_jit, __func__);
+       rec_alu_special(block, op, jit_code_andr, false);
+}
+
+static void rec_special_OR(const struct block *block,
+                          const struct opcode *op, u32 pc)
+{
+       _jit_name(block->_jit, __func__);
+       rec_alu_special(block, op, jit_code_orr, false);
+}
+
+static void rec_special_XOR(const struct block *block,
+                           const struct opcode *op, u32 pc)
+{
+       _jit_name(block->_jit, __func__);
+       rec_alu_special(block, op, jit_code_xorr, false);
+}
+
+static void rec_special_NOR(const struct block *block,
+                           const struct opcode *op, u32 pc)
+{
+       struct regcache *reg_cache = block->state->reg_cache;
+       jit_state_t *_jit = block->_jit;
+       u8 rd;
+
+       jit_name(__func__);
+       rec_alu_special(block, op, jit_code_orr, false);
+       rd = lightrec_alloc_reg_out(reg_cache, _jit, op->r.rd);
+
+       jit_comr(rd, rd);
+
+       lightrec_free_reg(reg_cache, rd);
+}
+
+static void rec_special_SLTU(const struct block *block,
+                            const struct opcode *op, u32 pc)
+{
+       _jit_name(block->_jit, __func__);
+       rec_alu_special(block, op, jit_code_ltr_u, true);
+}
+
+static void rec_special_SLT(const struct block *block,
+                           const struct opcode *op, u32 pc)
+{
+       _jit_name(block->_jit, __func__);
+       rec_alu_special(block, op, jit_code_ltr, true);
+}
+
+static void rec_special_SLLV(const struct block *block,
+                            const struct opcode *op, u32 pc)
+{
+       _jit_name(block->_jit, __func__);
+       rec_alu_shiftv(block, op, jit_code_lshr);
+}
+
+static void rec_special_SRLV(const struct block *block,
+                            const struct opcode *op, u32 pc)
+{
+       _jit_name(block->_jit, __func__);
+       rec_alu_shiftv(block, op, jit_code_rshr_u);
+}
+
+static void rec_special_SRAV(const struct block *block,
+                            const struct opcode *op, u32 pc)
+{
+       _jit_name(block->_jit, __func__);
+       rec_alu_shiftv(block, op, jit_code_rshr);
+}
+
+static void rec_alu_shift(const struct block *block,
+                         const struct opcode *op, jit_code_t code)
+{
+       struct regcache *reg_cache = block->state->reg_cache;
+       jit_state_t *_jit = block->_jit;
+       u8 rd, rt;
+
+       jit_note(__FILE__, __LINE__);
+
+       if (code == jit_code_rshi) {
+               rt = lightrec_alloc_reg_in_ext(reg_cache, _jit, op->r.rt);
+               rd = lightrec_alloc_reg_out_ext(reg_cache, _jit, op->r.rd);
+       } else {
+               rt = lightrec_alloc_reg_in(reg_cache, _jit, op->r.rt);
+               rd = lightrec_alloc_reg_out(reg_cache, _jit, op->r.rd);
+       }
+
+#if __WORDSIZE == 64
+       if (code == jit_code_rshi_u) {
+               jit_extr_ui(rd, rt);
+               jit_new_node_www(code, rd, rd, op->r.imm);
+       }
+#endif
+       if (__WORDSIZE == 32 || code != jit_code_rshi_u)
+               jit_new_node_www(code, rd, rt, op->r.imm);
+
+       lightrec_free_reg(reg_cache, rt);
+       lightrec_free_reg(reg_cache, rd);
+}
+
+static void rec_special_SLL(const struct block *block,
+                           const struct opcode *op, u32 pc)
+{
+       _jit_name(block->_jit, __func__);
+       rec_alu_shift(block, op, jit_code_lshi);
+}
+
+static void rec_special_SRL(const struct block *block,
+                           const struct opcode *op, u32 pc)
+{
+       _jit_name(block->_jit, __func__);
+       rec_alu_shift(block, op, jit_code_rshi_u);
+}
+
+static void rec_special_SRA(const struct block *block,
+                           const struct opcode *op, u32 pc)
+{
+       _jit_name(block->_jit, __func__);
+       rec_alu_shift(block, op, jit_code_rshi);
+}
+
+static void rec_alu_mult(const struct block *block,
+                        const struct opcode *op, bool is_signed)
+{
+       struct regcache *reg_cache = block->state->reg_cache;
+       jit_state_t *_jit = block->_jit;
+       u8 lo, hi, rs, rt;
+
+       jit_note(__FILE__, __LINE__);
+
+       lo = lightrec_alloc_reg_out(reg_cache, _jit, REG_LO);
+       if (!(op->flags & LIGHTREC_MULT32))
+               hi = lightrec_alloc_reg_out_ext(reg_cache, _jit, REG_HI);
+       else if (__WORDSIZE == 64)
+               hi = lightrec_alloc_reg_temp(reg_cache, _jit);
+
+       if (__WORDSIZE == 32 || !is_signed) {
+               rs = lightrec_alloc_reg_in(reg_cache, _jit, op->r.rs);
+               rt = lightrec_alloc_reg_in(reg_cache, _jit, op->r.rt);
+       } else {
+               rs = lightrec_alloc_reg_in_ext(reg_cache, _jit, op->r.rs);
+               rt = lightrec_alloc_reg_in_ext(reg_cache, _jit, op->r.rt);
+       }
+
+#if __WORDSIZE == 32
+       /* On 32-bit systems, do a 32*32->64 bit operation, or a 32*32->32 bit
+        * operation if the MULT was detected a 32-bit only. */
+       if (!(op->flags & LIGHTREC_MULT32)) {
+               if (is_signed)
+                       jit_qmulr(lo, hi, rs, rt);
+               else
+                       jit_qmulr_u(lo, hi, rs, rt);
+       } else {
+               jit_mulr(lo, rs, rt);
+       }
+#else
+       /* On 64-bit systems, do a 64*64->64 bit operation.
+        * The input registers must be 32 bits, so we first sign-extend (if
+        * mult) or clear (if multu) the input registers. */
+       if (is_signed) {
+               jit_mulr(lo, rs, rt);
+       } else {
+               jit_extr_ui(lo, rt);
+               jit_extr_ui(hi, rs);
+               jit_mulr(lo, hi, lo);
+       }
+
+       /* The 64-bit output value is in $lo, store the upper 32 bits in $hi */
+       if (!(op->flags & LIGHTREC_MULT32))
+               jit_rshi(hi, lo, 32);
+#endif
+
+       lightrec_free_reg(reg_cache, rs);
+       lightrec_free_reg(reg_cache, rt);
+       lightrec_free_reg(reg_cache, lo);
+       if (__WORDSIZE == 64 || !(op->flags & LIGHTREC_MULT32))
+               lightrec_free_reg(reg_cache, hi);
+}
+
+static void rec_alu_div(const struct block *block,
+                       const struct opcode *op, bool is_signed)
+{
+       struct regcache *reg_cache = block->state->reg_cache;
+       jit_state_t *_jit = block->_jit;
+       jit_node_t *branch, *to_end;
+       u8 lo, hi, rs, rt;
+
+       jit_note(__FILE__, __LINE__);
+       lo = lightrec_alloc_reg_out(reg_cache, _jit, REG_LO);
+       hi = lightrec_alloc_reg_out(reg_cache, _jit, REG_HI);
+
+       if (__WORDSIZE == 32 || !is_signed) {
+               rs = lightrec_alloc_reg_in(reg_cache, _jit, op->r.rs);
+               rt = lightrec_alloc_reg_in(reg_cache, _jit, op->r.rt);
+       } else {
+               rs = lightrec_alloc_reg_in_ext(reg_cache, _jit, op->r.rs);
+               rt = lightrec_alloc_reg_in_ext(reg_cache, _jit, op->r.rt);
+       }
+
+       /* Jump to special handler if dividing by zero  */
+       branch = jit_beqi(rt, 0);
+
+#if __WORDSIZE == 32
+       if (is_signed)
+               jit_qdivr(lo, hi, rs, rt);
+       else
+               jit_qdivr_u(lo, hi, rs, rt);
+#else
+       /* On 64-bit systems, the input registers must be 32 bits, so we first sign-extend
+        * (if div) or clear (if divu) the input registers. */
+       if (is_signed) {
+               jit_qdivr(lo, hi, rs, rt);
+       } else {
+               jit_extr_ui(lo, rt);
+               jit_extr_ui(hi, rs);
+               jit_qdivr_u(lo, hi, hi, lo);
+       }
+#endif
+
+       /* Jump above the div-by-zero handler */
+       to_end = jit_jmpi();
+
+       jit_patch(branch);
+
+       if (is_signed) {
+               jit_lti(lo, rs, 0);
+               jit_lshi(lo, lo, 1);
+               jit_subi(lo, lo, 1);
+       } else {
+               jit_movi(lo, 0xffffffff);
+       }
+
+       jit_movr(hi, rs);
+
+       jit_patch(to_end);
+
+       lightrec_free_reg(reg_cache, rs);
+       lightrec_free_reg(reg_cache, rt);
+       lightrec_free_reg(reg_cache, lo);
+       lightrec_free_reg(reg_cache, hi);
+}
+
+static void rec_special_MULT(const struct block *block,
+                            const struct opcode *op, u32 pc)
+{
+       _jit_name(block->_jit, __func__);
+       rec_alu_mult(block, op, true);
+}
+
+static void rec_special_MULTU(const struct block *block,
+                             const struct opcode *op, u32 pc)
+{
+       _jit_name(block->_jit, __func__);
+       rec_alu_mult(block, op, false);
+}
+
+static void rec_special_DIV(const struct block *block,
+                           const struct opcode *op, u32 pc)
+{
+       _jit_name(block->_jit, __func__);
+       rec_alu_div(block, op, true);
+}
+
+static void rec_special_DIVU(const struct block *block,
+                            const struct opcode *op, u32 pc)
+{
+       _jit_name(block->_jit, __func__);
+       rec_alu_div(block, op, false);
+}
+
+static void rec_alu_mv_lo_hi(const struct block *block, u8 dst, u8 src)
+{
+       struct regcache *reg_cache = block->state->reg_cache;
+       jit_state_t *_jit = block->_jit;
+
+       jit_note(__FILE__, __LINE__);
+       src = lightrec_alloc_reg_in(reg_cache, _jit, src);
+       dst = lightrec_alloc_reg_out_ext(reg_cache, _jit, dst);
+
+#if __WORDSIZE == 32
+       jit_movr(dst, src);
+#else
+       jit_extr_i(dst, src);
+#endif
+
+       lightrec_free_reg(reg_cache, src);
+       lightrec_free_reg(reg_cache, dst);
+}
+
+static void rec_special_MFHI(const struct block *block,
+                            const struct opcode *op, u32 pc)
+{
+       _jit_name(block->_jit, __func__);
+       rec_alu_mv_lo_hi(block, op->r.rd, REG_HI);
+}
+
+static void rec_special_MTHI(const struct block *block,
+                            const struct opcode *op, u32 pc)
+{
+       _jit_name(block->_jit, __func__);
+       rec_alu_mv_lo_hi(block, REG_HI, op->r.rs);
+}
+
+static void rec_special_MFLO(const struct block *block,
+                            const struct opcode *op, u32 pc)
+{
+       _jit_name(block->_jit, __func__);
+       rec_alu_mv_lo_hi(block, op->r.rd, REG_LO);
+}
+
+static void rec_special_MTLO(const struct block *block,
+                            const struct opcode *op, u32 pc)
+{
+       _jit_name(block->_jit, __func__);
+       rec_alu_mv_lo_hi(block, REG_LO, op->r.rs);
+}
+
+static void rec_io(const struct block *block, const struct opcode *op,
+                  bool load_rt, bool read_rt)
+{
+       struct regcache *reg_cache = block->state->reg_cache;
+       jit_state_t *_jit = block->_jit;
+       bool is_tagged = op->flags & (LIGHTREC_HW_IO | LIGHTREC_DIRECT_IO);
+       u32 offset;
+       u8 tmp, tmp2, tmp3;
+
+       jit_note(__FILE__, __LINE__);
+
+       tmp = lightrec_alloc_reg(reg_cache, _jit, JIT_R0);
+
+       if (is_tagged) {
+               offset = offsetof(struct lightrec_state, rw_func);
+       } else {
+               tmp3 = lightrec_alloc_reg(reg_cache, _jit, JIT_R1);
+               offset = offsetof(struct lightrec_state, rw_generic_func);
+       }
+
+       tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
+       jit_ldxi(tmp2, LIGHTREC_REG_STATE, offset);
+
+       lightrec_clean_reg_if_loaded(reg_cache, _jit, op->i.rs, false);
+
+       if (read_rt && likely(op->i.rt))
+               lightrec_clean_reg_if_loaded(reg_cache, _jit, op->i.rt, true);
+       else if (load_rt)
+               lightrec_clean_reg_if_loaded(reg_cache, _jit, op->i.rt, false);
+
+       if (is_tagged) {
+               jit_movi(tmp, op->opcode);
+       } else {
+               jit_movi(tmp, (uintptr_t)op);
+               jit_movi(tmp3, (uintptr_t)block);
+       }
+
+       jit_callr(tmp2);
+
+       lightrec_free_reg(reg_cache, tmp);
+       lightrec_free_reg(reg_cache, tmp2);
+       if (!is_tagged)
+               lightrec_free_reg(reg_cache, tmp3);
+       lightrec_regcache_mark_live(reg_cache, _jit);
+}
+
+static void rec_store_direct_no_invalidate(const struct block *block,
+                                          const struct opcode *op,
+                                          jit_code_t code)
+{
+       struct lightrec_state *state = block->state;
+       struct regcache *reg_cache = state->reg_cache;
+       jit_state_t *_jit = block->_jit;
+       jit_node_t *to_not_ram, *to_end;
+       u8 tmp, tmp2, rs, rt;
+       s16 imm;
+
+       jit_note(__FILE__, __LINE__);
+       rs = lightrec_alloc_reg_in(reg_cache, _jit, op->i.rs);
+       tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
+       tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
+
+       /* Convert to KUNSEG and avoid RAM mirrors */
+       if (state->mirrors_mapped) {
+               imm = (s16)op->i.imm;
+               jit_andi(tmp, rs, 0x1f800000 | (4 * RAM_SIZE - 1));
+       } else if (op->i.imm) {
+               imm = 0;
+               jit_addi(tmp, rs, (s16)op->i.imm);
+               jit_andi(tmp, tmp, 0x1f800000 | (RAM_SIZE - 1));
+       } else {
+               imm = 0;
+               jit_andi(tmp, rs, 0x1f800000 | (RAM_SIZE - 1));
+       }
+
+       lightrec_free_reg(reg_cache, rs);
+
+       if (state->offset_ram != state->offset_scratch) {
+               to_not_ram = jit_bmsi(tmp, BIT(28));
+
+               jit_movi(tmp2, state->offset_ram);
+
+               to_end = jit_jmpi();
+               jit_patch(to_not_ram);
+
+               jit_movi(tmp2, state->offset_scratch);
+               jit_patch(to_end);
+       } else if (state->offset_ram) {
+               jit_movi(tmp2, state->offset_ram);
+       }
+
+       if (state->offset_ram || state->offset_scratch)
+               jit_addr(tmp, tmp, tmp2);
+
+       lightrec_free_reg(reg_cache, tmp2);
+
+       rt = lightrec_alloc_reg_in(reg_cache, _jit, op->i.rt);
+       jit_new_node_www(code, imm, tmp, rt);
+
+       lightrec_free_reg(reg_cache, rt);
+       lightrec_free_reg(reg_cache, tmp);
+}
+
+static void rec_store_direct(const struct block *block, const struct opcode *op,
+                            jit_code_t code)
+{
+       struct lightrec_state *state = block->state;
+       struct regcache *reg_cache = state->reg_cache;
+       jit_state_t *_jit = block->_jit;
+       jit_node_t *to_not_ram, *to_end;
+       u8 tmp, tmp2, tmp3, rs, rt;
+
+       jit_note(__FILE__, __LINE__);
+
+       rs = lightrec_alloc_reg_in(reg_cache, _jit, op->i.rs);
+       tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
+       tmp3 = lightrec_alloc_reg_in(reg_cache, _jit, 0);
+
+       /* Convert to KUNSEG and avoid RAM mirrors */
+       if (op->i.imm) {
+               jit_addi(tmp2, rs, (s16)op->i.imm);
+               jit_andi(tmp2, tmp2, 0x1f800000 | (RAM_SIZE - 1));
+       } else {
+               jit_andi(tmp2, rs, 0x1f800000 | (RAM_SIZE - 1));
+       }
+
+       lightrec_free_reg(reg_cache, rs);
+       tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
+
+       to_not_ram = jit_bgti(tmp2, RAM_SIZE);
+
+       /* Compute the offset to the code LUT */
+       jit_andi(tmp, tmp2, (RAM_SIZE - 1) & ~3);
+#if __WORDSIZE == 64
+       jit_lshi(tmp, tmp, 1);
+#endif
+       jit_addr(tmp, LIGHTREC_REG_STATE, tmp);
+
+       /* Write NULL to the code LUT to invalidate any block that's there */
+       jit_stxi(offsetof(struct lightrec_state, code_lut), tmp, tmp3);
+
+       if (state->offset_ram != state->offset_scratch) {
+               jit_movi(tmp, state->offset_ram);
+
+               to_end = jit_jmpi();
+       }
+
+       jit_patch(to_not_ram);
+
+       if (state->offset_ram || state->offset_scratch)
+               jit_movi(tmp, state->offset_scratch);
+
+       if (state->offset_ram != state->offset_scratch)
+               jit_patch(to_end);
+
+       if (state->offset_ram || state->offset_scratch)
+               jit_addr(tmp2, tmp2, tmp);
+
+       lightrec_free_reg(reg_cache, tmp);
+       lightrec_free_reg(reg_cache, tmp3);
+
+       rt = lightrec_alloc_reg_in(reg_cache, _jit, op->i.rt);
+       jit_new_node_www(code, 0, tmp2, rt);
+
+       lightrec_free_reg(reg_cache, rt);
+       lightrec_free_reg(reg_cache, tmp2);
+}
+
+static void rec_store(const struct block *block, const struct opcode *op,
+                    jit_code_t code)
+{
+       if (op->flags & LIGHTREC_NO_INVALIDATE) {
+               rec_store_direct_no_invalidate(block, op, code);
+       } else if (op->flags & LIGHTREC_DIRECT_IO) {
+               if (block->state->invalidate_from_dma_only)
+                       rec_store_direct_no_invalidate(block, op, code);
+               else
+                       rec_store_direct(block, op, code);
+       } else {
+               rec_io(block, op, true, false);
+       }
+}
+
+static void rec_SB(const struct block *block, const struct opcode *op, u32 pc)
+{
+       _jit_name(block->_jit, __func__);
+       rec_store(block, op, jit_code_stxi_c);
+}
+
+static void rec_SH(const struct block *block, const struct opcode *op, u32 pc)
+{
+       _jit_name(block->_jit, __func__);
+       rec_store(block, op, jit_code_stxi_s);
+}
+
+static void rec_SW(const struct block *block, const struct opcode *op, u32 pc)
+{
+       _jit_name(block->_jit, __func__);
+       rec_store(block, op, jit_code_stxi_i);
+}
+
+static void rec_SWL(const struct block *block, const struct opcode *op, u32 pc)
+{
+       _jit_name(block->_jit, __func__);
+       rec_io(block, op, true, false);
+}
+
+static void rec_SWR(const struct block *block, const struct opcode *op, u32 pc)
+{
+       _jit_name(block->_jit, __func__);
+       rec_io(block, op, true, false);
+}
+
+static void rec_SWC2(const struct block *block, const struct opcode *op, u32 pc)
+{
+       _jit_name(block->_jit, __func__);
+       rec_io(block, op, false, false);
+}
+
+static void rec_load_direct(const struct block *block, const struct opcode *op,
+                           jit_code_t code)
+{
+       struct lightrec_state *state = block->state;
+       struct regcache *reg_cache = state->reg_cache;
+       jit_state_t *_jit = block->_jit;
+       jit_node_t *to_not_ram, *to_not_bios, *to_end, *to_end2;
+       u8 tmp, rs, rt, addr_reg;
+       s16 imm;
+
+       if (!op->i.rt)
+               return;
+
+       jit_note(__FILE__, __LINE__);
+       rs = lightrec_alloc_reg_in(reg_cache, _jit, op->i.rs);
+       rt = lightrec_alloc_reg_out_ext(reg_cache, _jit, op->i.rt);
+
+       if ((state->offset_ram == state->offset_bios &&
+           state->offset_ram == state->offset_scratch &&
+           state->mirrors_mapped) || !op->i.imm) {
+               addr_reg = rs;
+               imm = (s16)op->i.imm;
+       } else {
+               jit_addi(rt, rs, (s16)op->i.imm);
+               addr_reg = rt;
+               imm = 0;
+
+               if (op->i.rs != op->i.rt)
+                       lightrec_free_reg(reg_cache, rs);
+       }
+
+       tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
+
+       if (state->offset_ram == state->offset_bios &&
+           state->offset_ram == state->offset_scratch) {
+               if (!state->mirrors_mapped) {
+                       jit_andi(tmp, addr_reg, BIT(28));
+                       jit_rshi_u(tmp, tmp, 28 - 22);
+                       jit_ori(tmp, tmp, 0x1f800000 | (RAM_SIZE - 1));
+                       jit_andr(rt, addr_reg, tmp);
+               } else {
+                       jit_andi(rt, addr_reg, 0x1fffffff);
+               }
+
+               if (state->offset_ram)
+                       jit_movi(tmp, state->offset_ram);
+       } else {
+               to_not_ram = jit_bmsi(addr_reg, BIT(28));
+
+               /* Convert to KUNSEG and avoid RAM mirrors */
+               jit_andi(rt, addr_reg, RAM_SIZE - 1);
+
+               if (state->offset_ram)
+                       jit_movi(tmp, state->offset_ram);
+
+               to_end = jit_jmpi();
+
+               jit_patch(to_not_ram);
+
+               if (state->offset_bios != state->offset_scratch)
+                       to_not_bios = jit_bmci(addr_reg, BIT(22));
+
+               /* Convert to KUNSEG */
+               jit_andi(rt, addr_reg, 0x1fc00000 | (BIOS_SIZE - 1));
+
+               jit_movi(tmp, state->offset_bios);
+
+               if (state->offset_bios != state->offset_scratch) {
+                       to_end2 = jit_jmpi();
+
+                       jit_patch(to_not_bios);
+
+                       /* Convert to KUNSEG */
+                       jit_andi(rt, addr_reg, 0x1f800fff);
+
+                       if (state->offset_scratch)
+                               jit_movi(tmp, state->offset_scratch);
+
+                       jit_patch(to_end2);
+               }
+
+               jit_patch(to_end);
+       }
+
+       if (state->offset_ram || state->offset_bios || state->offset_scratch)
+               jit_addr(rt, rt, tmp);
+
+       jit_new_node_www(code, rt, rt, imm);
+
+       lightrec_free_reg(reg_cache, addr_reg);
+       lightrec_free_reg(reg_cache, rt);
+       lightrec_free_reg(reg_cache, tmp);
+}
+
+static void rec_load(const struct block *block, const struct opcode *op,
+                   jit_code_t code)
+{
+       if (op->flags & LIGHTREC_DIRECT_IO)
+               rec_load_direct(block, op, code);
+       else
+               rec_io(block, op, false, true);
+}
+
+static void rec_LB(const struct block *block, const struct opcode *op, u32 pc)
+{
+       _jit_name(block->_jit, __func__);
+       rec_load(block, op, jit_code_ldxi_c);
+}
+
+static void rec_LBU(const struct block *block, const struct opcode *op, u32 pc)
+{
+       _jit_name(block->_jit, __func__);
+       rec_load(block, op, jit_code_ldxi_uc);
+}
+
+static void rec_LH(const struct block *block, const struct opcode *op, u32 pc)
+{
+       _jit_name(block->_jit, __func__);
+       rec_load(block, op, jit_code_ldxi_s);
+}
+
+static void rec_LHU(const struct block *block, const struct opcode *op, u32 pc)
+{
+       _jit_name(block->_jit, __func__);
+       rec_load(block, op, jit_code_ldxi_us);
+}
+
+static void rec_LWL(const struct block *block, const struct opcode *op, u32 pc)
+{
+       _jit_name(block->_jit, __func__);
+       rec_io(block, op, true, true);
+}
+
+static void rec_LWR(const struct block *block, const struct opcode *op, u32 pc)
+{
+       _jit_name(block->_jit, __func__);
+       rec_io(block, op, true, true);
+}
+
+static void rec_LW(const struct block *block, const struct opcode *op, u32 pc)
+{
+       _jit_name(block->_jit, __func__);
+       rec_load(block, op, jit_code_ldxi_i);
+}
+
+static void rec_LWC2(const struct block *block, const struct opcode *op, u32 pc)
+{
+       _jit_name(block->_jit, __func__);
+       rec_io(block, op, false, false);
+}
+
+static void rec_break_syscall(const struct block *block,
+                             const struct opcode *op, u32 pc, bool is_break)
+{
+       struct regcache *reg_cache = block->state->reg_cache;
+       jit_state_t *_jit = block->_jit;
+       u32 offset;
+       u8 tmp;
+
+       jit_note(__FILE__, __LINE__);
+
+       if (is_break)
+               offset = offsetof(struct lightrec_state, break_func);
+       else
+               offset = offsetof(struct lightrec_state, syscall_func);
+
+       tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
+       jit_ldxi(tmp, LIGHTREC_REG_STATE, offset);
+       jit_callr(tmp);
+       lightrec_free_reg(reg_cache, tmp);
+
+       lightrec_regcache_mark_live(reg_cache, _jit);
+
+       /* TODO: the return address should be "pc - 4" if we're a delay slot */
+       lightrec_emit_end_of_block(block, op, pc, -1, pc, 31, 0, true);
+}
+
+static void rec_special_SYSCALL(const struct block *block,
+                               const struct opcode *op, u32 pc)
+{
+       _jit_name(block->_jit, __func__);
+       rec_break_syscall(block, op, pc, false);
+}
+
+static void rec_special_BREAK(const struct block *block,
+                             const struct opcode *op, u32 pc)
+{
+       _jit_name(block->_jit, __func__);
+       rec_break_syscall(block, op, pc, true);
+}
+
+static void rec_mfc(const struct block *block, const struct opcode *op)
+{
+       u8 tmp, tmp2;
+       struct lightrec_state *state = block->state;
+       struct regcache *reg_cache = state->reg_cache;
+       jit_state_t *_jit = block->_jit;
+
+       jit_note(__FILE__, __LINE__);
+
+       tmp = lightrec_alloc_reg(reg_cache, _jit, JIT_R0);
+       tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
+
+       jit_ldxi(tmp2, LIGHTREC_REG_STATE,
+                offsetof(struct lightrec_state, mfc_func));
+
+       lightrec_clean_reg_if_loaded(reg_cache, _jit, op->i.rt, true);
+
+       jit_movi(tmp, op->opcode);
+       jit_callr(tmp2);
+       lightrec_free_reg(reg_cache, tmp);
+       lightrec_free_reg(reg_cache, tmp2);
+
+       lightrec_regcache_mark_live(reg_cache, _jit);
+}
+
+static void rec_mtc(const struct block *block, const struct opcode *op, u32 pc)
+{
+       struct lightrec_state *state = block->state;
+       struct regcache *reg_cache = state->reg_cache;
+       jit_state_t *_jit = block->_jit;
+       u8 tmp, tmp2;
+
+       jit_note(__FILE__, __LINE__);
+
+       tmp = lightrec_alloc_reg(reg_cache, _jit, JIT_R0);
+       tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
+       jit_ldxi(tmp2, LIGHTREC_REG_STATE,
+                offsetof(struct lightrec_state, mtc_func));
+
+       lightrec_clean_reg_if_loaded(reg_cache, _jit, op->i.rs, false);
+       lightrec_clean_reg_if_loaded(reg_cache, _jit, op->i.rt, false);
+
+       jit_movi(tmp, op->opcode);
+       jit_callr(tmp2);
+       lightrec_free_reg(reg_cache, tmp);
+       lightrec_free_reg(reg_cache, tmp2);
+
+       lightrec_regcache_mark_live(reg_cache, _jit);
+
+       if (op->i.op == OP_CP0 && (op->r.rd == 12 || op->r.rd == 13))
+               lightrec_emit_end_of_block(block, op, pc, -1, pc + 4, 0, 0, true);
+}
+
+static void rec_cp0_MFC0(const struct block *block,
+                        const struct opcode *op, u32 pc)
+{
+       _jit_name(block->_jit, __func__);
+       rec_mfc(block, op);
+}
+
+static void rec_cp0_CFC0(const struct block *block,
+                        const struct opcode *op, u32 pc)
+{
+       _jit_name(block->_jit, __func__);
+       rec_mfc(block, op);
+}
+
+static void rec_cp0_MTC0(const struct block *block,
+                        const struct opcode *op, u32 pc)
+{
+       _jit_name(block->_jit, __func__);
+       rec_mtc(block, op, pc);
+}
+
+static void rec_cp0_CTC0(const struct block *block,
+                        const struct opcode *op, u32 pc)
+{
+       _jit_name(block->_jit, __func__);
+       rec_mtc(block, op, pc);
+}
+
+static void rec_cp2_basic_MFC2(const struct block *block,
+                              const struct opcode *op, u32 pc)
+{
+       _jit_name(block->_jit, __func__);
+       rec_mfc(block, op);
+}
+
+static void rec_cp2_basic_CFC2(const struct block *block,
+                              const struct opcode *op, u32 pc)
+{
+       _jit_name(block->_jit, __func__);
+       rec_mfc(block, op);
+}
+
+static void rec_cp2_basic_MTC2(const struct block *block,
+                              const struct opcode *op, u32 pc)
+{
+       _jit_name(block->_jit, __func__);
+       rec_mtc(block, op, pc);
+}
+
+static void rec_cp2_basic_CTC2(const struct block *block,
+                              const struct opcode *op, u32 pc)
+{
+       _jit_name(block->_jit, __func__);
+       rec_mtc(block, op, pc);
+}
+
+static void rec_cp0_RFE(const struct block *block,
+                       const struct opcode *op, u32 pc)
+{
+       struct lightrec_state *state = block->state;
+       jit_state_t *_jit = block->_jit;
+       u8 tmp;
+
+       jit_name(__func__);
+       jit_note(__FILE__, __LINE__);
+
+       tmp = lightrec_alloc_reg_temp(state->reg_cache, _jit);
+       jit_ldxi(tmp, LIGHTREC_REG_STATE,
+                offsetof(struct lightrec_state, rfe_func));
+       jit_callr(tmp);
+       lightrec_free_reg(state->reg_cache, tmp);
+
+       lightrec_regcache_mark_live(state->reg_cache, _jit);
+}
+
+static void rec_CP(const struct block *block, const struct opcode *op, u32 pc)
+{
+       struct regcache *reg_cache = block->state->reg_cache;
+       jit_state_t *_jit = block->_jit;
+       u8 tmp, tmp2;
+
+       jit_name(__func__);
+       jit_note(__FILE__, __LINE__);
+
+       tmp = lightrec_alloc_reg(reg_cache, _jit, JIT_R0);
+       tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
+
+       jit_ldxi(tmp2, LIGHTREC_REG_STATE,
+                offsetof(struct lightrec_state, cp_func));
+
+       jit_movi(tmp, op->opcode);
+       jit_callr(tmp2);
+       lightrec_free_reg(reg_cache, tmp);
+       lightrec_free_reg(reg_cache, tmp2);
+
+       lightrec_regcache_mark_live(reg_cache, _jit);
+}
+
+static void rec_meta_unload(const struct block *block,
+                           const struct opcode *op, u32 pc)
+{
+       struct lightrec_state *state = block->state;
+       struct regcache *reg_cache = state->reg_cache;
+       jit_state_t *_jit = block->_jit;
+
+       jit_name(__func__);
+       jit_note(__FILE__, __LINE__);
+
+       pr_debug("Unloading reg %s\n", lightrec_reg_name(op->i.rs));
+       lightrec_clean_reg_if_loaded(reg_cache, _jit, op->i.rs, true);
+}
+
+static void rec_meta_BEQZ(const struct block *block,
+                         const struct opcode *op, u32 pc)
+{
+       _jit_name(block->_jit, __func__);
+       rec_b(block, op, pc, jit_code_bnei, 0, false, true);
+}
+
+static void rec_meta_BNEZ(const struct block *block,
+                         const struct opcode *op, u32 pc)
+{
+       _jit_name(block->_jit, __func__);
+       rec_b(block, op, pc, jit_code_beqi, 0, false, true);
+}
+
+static void rec_meta_MOV(const struct block *block,
+                        const struct opcode *op, u32 pc)
+{
+       struct lightrec_state *state = block->state;
+       struct regcache *reg_cache = state->reg_cache;
+       jit_state_t *_jit = block->_jit;
+       u8 rs, rd;
+
+       _jit_name(block->_jit, __func__);
+       jit_note(__FILE__, __LINE__);
+       rs = op->r.rs ? lightrec_alloc_reg_in(reg_cache, _jit, op->r.rs) : 0;
+       rd = lightrec_alloc_reg_out_ext(reg_cache, _jit, op->r.rd);
+
+       if (op->r.rs == 0) {
+               jit_movi(rd, 0);
+       } else {
+#if __WORDSIZE == 32
+               jit_movr(rd, rs);
+#else
+               jit_extr_i(rd, rs);
+#endif
+       }
+
+       lightrec_free_reg(state->reg_cache, rs);
+       lightrec_free_reg(state->reg_cache, rd);
+}
+
+static void rec_meta_sync(const struct block *block,
+                         const struct opcode *op, u32 pc)
+{
+       struct lightrec_state *state = block->state;
+       struct lightrec_branch_target *target;
+       jit_state_t *_jit = block->_jit;
+
+       jit_name(__func__);
+       jit_note(__FILE__, __LINE__);
+
+       jit_subi(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, state->cycles);
+       state->cycles = 0;
+
+       lightrec_storeback_regs(state->reg_cache, _jit);
+       lightrec_regcache_reset(state->reg_cache);
+
+       pr_debug("Adding branch target at offset 0x%x\n",
+                op->offset << 2);
+       target = &state->targets[state->nb_targets++];
+       target->offset = op->offset;
+       target->label = jit_label();
+}
+
+static const lightrec_rec_func_t rec_standard[64] = {
+       [OP_SPECIAL]            = rec_SPECIAL,
+       [OP_REGIMM]             = rec_REGIMM,
+       [OP_J]                  = rec_J,
+       [OP_JAL]                = rec_JAL,
+       [OP_BEQ]                = rec_BEQ,
+       [OP_BNE]                = rec_BNE,
+       [OP_BLEZ]               = rec_BLEZ,
+       [OP_BGTZ]               = rec_BGTZ,
+       [OP_ADDI]               = rec_ADDI,
+       [OP_ADDIU]              = rec_ADDIU,
+       [OP_SLTI]               = rec_SLTI,
+       [OP_SLTIU]              = rec_SLTIU,
+       [OP_ANDI]               = rec_ANDI,
+       [OP_ORI]                = rec_ORI,
+       [OP_XORI]               = rec_XORI,
+       [OP_LUI]                = rec_LUI,
+       [OP_CP0]                = rec_CP0,
+       [OP_CP2]                = rec_CP2,
+       [OP_LB]                 = rec_LB,
+       [OP_LH]                 = rec_LH,
+       [OP_LWL]                = rec_LWL,
+       [OP_LW]                 = rec_LW,
+       [OP_LBU]                = rec_LBU,
+       [OP_LHU]                = rec_LHU,
+       [OP_LWR]                = rec_LWR,
+       [OP_SB]                 = rec_SB,
+       [OP_SH]                 = rec_SH,
+       [OP_SWL]                = rec_SWL,
+       [OP_SW]                 = rec_SW,
+       [OP_SWR]                = rec_SWR,
+       [OP_LWC2]               = rec_LWC2,
+       [OP_SWC2]               = rec_SWC2,
+
+       [OP_META_REG_UNLOAD]    = rec_meta_unload,
+       [OP_META_BEQZ]          = rec_meta_BEQZ,
+       [OP_META_BNEZ]          = rec_meta_BNEZ,
+       [OP_META_MOV]           = rec_meta_MOV,
+       [OP_META_SYNC]          = rec_meta_sync,
+};
+
+static const lightrec_rec_func_t rec_special[64] = {
+       [OP_SPECIAL_SLL]        = rec_special_SLL,
+       [OP_SPECIAL_SRL]        = rec_special_SRL,
+       [OP_SPECIAL_SRA]        = rec_special_SRA,
+       [OP_SPECIAL_SLLV]       = rec_special_SLLV,
+       [OP_SPECIAL_SRLV]       = rec_special_SRLV,
+       [OP_SPECIAL_SRAV]       = rec_special_SRAV,
+       [OP_SPECIAL_JR]         = rec_special_JR,
+       [OP_SPECIAL_JALR]       = rec_special_JALR,
+       [OP_SPECIAL_SYSCALL]    = rec_special_SYSCALL,
+       [OP_SPECIAL_BREAK]      = rec_special_BREAK,
+       [OP_SPECIAL_MFHI]       = rec_special_MFHI,
+       [OP_SPECIAL_MTHI]       = rec_special_MTHI,
+       [OP_SPECIAL_MFLO]       = rec_special_MFLO,
+       [OP_SPECIAL_MTLO]       = rec_special_MTLO,
+       [OP_SPECIAL_MULT]       = rec_special_MULT,
+       [OP_SPECIAL_MULTU]      = rec_special_MULTU,
+       [OP_SPECIAL_DIV]        = rec_special_DIV,
+       [OP_SPECIAL_DIVU]       = rec_special_DIVU,
+       [OP_SPECIAL_ADD]        = rec_special_ADD,
+       [OP_SPECIAL_ADDU]       = rec_special_ADDU,
+       [OP_SPECIAL_SUB]        = rec_special_SUB,
+       [OP_SPECIAL_SUBU]       = rec_special_SUBU,
+       [OP_SPECIAL_AND]        = rec_special_AND,
+       [OP_SPECIAL_OR]         = rec_special_OR,
+       [OP_SPECIAL_XOR]        = rec_special_XOR,
+       [OP_SPECIAL_NOR]        = rec_special_NOR,
+       [OP_SPECIAL_SLT]        = rec_special_SLT,
+       [OP_SPECIAL_SLTU]       = rec_special_SLTU,
+};
+
+static const lightrec_rec_func_t rec_regimm[64] = {
+       [OP_REGIMM_BLTZ]        = rec_regimm_BLTZ,
+       [OP_REGIMM_BGEZ]        = rec_regimm_BGEZ,
+       [OP_REGIMM_BLTZAL]      = rec_regimm_BLTZAL,
+       [OP_REGIMM_BGEZAL]      = rec_regimm_BGEZAL,
+};
+
+static const lightrec_rec_func_t rec_cp0[64] = {
+       [OP_CP0_MFC0]           = rec_cp0_MFC0,
+       [OP_CP0_CFC0]           = rec_cp0_CFC0,
+       [OP_CP0_MTC0]           = rec_cp0_MTC0,
+       [OP_CP0_CTC0]           = rec_cp0_CTC0,
+       [OP_CP0_RFE]            = rec_cp0_RFE,
+};
+
+static const lightrec_rec_func_t rec_cp2_basic[64] = {
+       [OP_CP2_BASIC_MFC2]     = rec_cp2_basic_MFC2,
+       [OP_CP2_BASIC_CFC2]     = rec_cp2_basic_CFC2,
+       [OP_CP2_BASIC_MTC2]     = rec_cp2_basic_MTC2,
+       [OP_CP2_BASIC_CTC2]     = rec_cp2_basic_CTC2,
+};
+
+static void rec_SPECIAL(const struct block *block,
+                       const struct opcode *op, u32 pc)
+{
+       lightrec_rec_func_t f = rec_special[op->r.op];
+       if (likely(f))
+               (*f)(block, op, pc);
+       else
+               unknown_opcode(block, op, pc);
+}
+
+static void rec_REGIMM(const struct block *block,
+                      const struct opcode *op, u32 pc)
+{
+       lightrec_rec_func_t f = rec_regimm[op->r.rt];
+       if (likely(f))
+               (*f)(block, op, pc);
+       else
+               unknown_opcode(block, op, pc);
+}
+
+static void rec_CP0(const struct block *block, const struct opcode *op, u32 pc)
+{
+       lightrec_rec_func_t f = rec_cp0[op->r.rs];
+       if (likely(f))
+               (*f)(block, op, pc);
+       else
+               rec_CP(block, op, pc);
+}
+
+static void rec_CP2(const struct block *block, const struct opcode *op, u32 pc)
+{
+       if (op->r.op == OP_CP2_BASIC) {
+               lightrec_rec_func_t f = rec_cp2_basic[op->r.rs];
+               if (likely(f)) {
+                       (*f)(block, op, pc);
+                       return;
+               }
+       }
+
+       rec_CP(block, op, pc);
+}
+
+void lightrec_rec_opcode(const struct block *block,
+                        const struct opcode *op, u32 pc)
+{
+       lightrec_rec_func_t f = rec_standard[op->i.op];
+       if (likely(f))
+               (*f)(block, op, pc);
+       else
+               unknown_opcode(block, op, pc);
+}
diff --git a/deps/lightrec/emitter.h b/deps/lightrec/emitter.h
new file mode 100644 (file)
index 0000000..57ededf
--- /dev/null
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2014 Paul Cercueil <paul@crapouillou.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ */
+
+#ifndef __EMITTER_H__
+#define __EMITTER_H__
+
+#include "lightrec.h"
+
+struct block;
+struct opcode;
+
+void lightrec_rec_opcode(const struct block *block,
+                        const struct opcode *op, u32 pc);
+void lightrec_emit_eob(const struct block *block,
+                      const struct opcode *op, u32 pc);
+
+#endif /* __EMITTER_H__ */
diff --git a/deps/lightrec/interpreter.c b/deps/lightrec/interpreter.c
new file mode 100644 (file)
index 0000000..acc41ea
--- /dev/null
@@ -0,0 +1,1124 @@
+/*
+ * Copyright (C) 2019-2020 Paul Cercueil <paul@crapouillou.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ */
+
+#include "disassembler.h"
+#include "interpreter.h"
+#include "lightrec-private.h"
+#include "optimizer.h"
+#include "regcache.h"
+
+#include <stdbool.h>
+
+struct interpreter;
+
+static u32 int_CP0(struct interpreter *inter);
+static u32 int_CP2(struct interpreter *inter);
+static u32 int_SPECIAL(struct interpreter *inter);
+static u32 int_REGIMM(struct interpreter *inter);
+static u32 int_branch(struct interpreter *inter, u32 pc,
+                     union code code, bool branch);
+
+typedef u32 (*lightrec_int_func_t)(struct interpreter *inter);
+
+static const lightrec_int_func_t int_standard[64];
+
+struct interpreter {
+       struct lightrec_state *state;
+       struct block *block;
+       struct opcode *op;
+       u32 cycles;
+       bool delay_slot;
+};
+
+static inline u32 execute(lightrec_int_func_t func, struct interpreter *inter)
+{
+       return (*func)(inter);
+}
+
+static inline u32 jump_skip(struct interpreter *inter)
+{
+       inter->op = inter->op->next;
+
+       return execute(int_standard[inter->op->i.op], inter);
+}
+
+static inline u32 jump_next(struct interpreter *inter)
+{
+       inter->cycles += lightrec_cycles_of_opcode(inter->op->c);
+
+       if (unlikely(inter->delay_slot))
+               return 0;
+
+       return jump_skip(inter);
+}
+
+static inline u32 jump_after_branch(struct interpreter *inter)
+{
+       inter->cycles += lightrec_cycles_of_opcode(inter->op->c);
+
+       if (unlikely(inter->delay_slot))
+               return 0;
+
+       inter->op = inter->op->next;
+
+       return jump_skip(inter);
+}
+
+static void update_cycles_before_branch(struct interpreter *inter)
+{
+       u32 cycles;
+
+       if (!inter->delay_slot) {
+               cycles = lightrec_cycles_of_opcode(inter->op->c);
+
+               if (has_delay_slot(inter->op->c) &&
+                   !(inter->op->flags & LIGHTREC_NO_DS))
+                       cycles += lightrec_cycles_of_opcode(inter->op->next->c);
+
+               inter->cycles += cycles;
+               inter->state->current_cycle += inter->cycles;
+               inter->cycles = -cycles;
+       }
+}
+
+static bool is_branch_taken(const u32 *reg_cache, union code op)
+{
+       switch (op.i.op) {
+       case OP_SPECIAL:
+               return op.r.op == OP_SPECIAL_JR || op.r.op == OP_SPECIAL_JALR;
+       case OP_J:
+       case OP_JAL:
+               return true;
+       case OP_BEQ:
+       case OP_META_BEQZ:
+               return reg_cache[op.r.rs] == reg_cache[op.r.rt];
+       case OP_BNE:
+       case OP_META_BNEZ:
+               return reg_cache[op.r.rs] != reg_cache[op.r.rt];
+       case OP_REGIMM:
+               switch (op.r.rt) {
+               case OP_REGIMM_BLTZ:
+               case OP_REGIMM_BLTZAL:
+                       return (s32)reg_cache[op.r.rs] < 0;
+               case OP_REGIMM_BGEZ:
+               case OP_REGIMM_BGEZAL:
+                       return (s32)reg_cache[op.r.rs] >= 0;
+               }
+       default:
+               break;
+       }
+
+       return false;
+}
+
+static u32 int_delay_slot(struct interpreter *inter, u32 pc, bool branch)
+{
+       struct lightrec_state *state = inter->state;
+       u32 *reg_cache = state->native_reg_cache;
+       struct opcode new_op, *op = inter->op->next;
+       union code op_next;
+       struct interpreter inter2 = {
+               .state = state,
+               .cycles = inter->cycles,
+               .delay_slot = true,
+               .block = NULL,
+       };
+       bool run_first_op = false, dummy_ld = false, save_rs = false,
+            load_in_ds, branch_in_ds = false, branch_at_addr = false,
+            branch_taken;
+       u32 old_rs, new_rs, new_rt;
+       u32 next_pc, ds_next_pc;
+       u32 cause, epc;
+
+       if (op->i.op == OP_CP0 && op->r.rs == OP_CP0_RFE) {
+               /* When an IRQ happens, the PSX exception handlers (when done)
+                * will jump back to the instruction that was executed right
+                * before the IRQ, unless it was a GTE opcode; in that case, it
+                * jumps to the instruction right after.
+                * Since we will never handle the IRQ right after a GTE opcode,
+                * but on branch boundaries, we need to adjust the return
+                * address so that the GTE opcode is effectively executed.
+                */
+               cause = (*state->ops.cop0_ops.cfc)(state, 13);
+               epc = (*state->ops.cop0_ops.cfc)(state, 14);
+
+               if (!(cause & 0x7c) && epc == pc - 4)
+                       pc -= 4;
+       }
+
+       if (inter->delay_slot) {
+               /* The branch opcode was in a delay slot of another branch
+                * opcode. Just return the target address of the second
+                * branch. */
+               return pc;
+       }
+
+       /* An opcode located in the delay slot performing a delayed read
+        * requires special handling; we will always resort to using the
+        * interpreter in that case.
+        * Same goes for when we have a branch in a delay slot of another
+        * branch. */
+       load_in_ds = load_in_delay_slot(op->c);
+       branch_in_ds = has_delay_slot(op->c);
+
+       if (branch) {
+               if (load_in_ds || branch_in_ds)
+                       op_next = lightrec_read_opcode(state, pc);
+
+               if (load_in_ds) {
+                       /* Verify that the next block actually reads the
+                        * destination register of the delay slot opcode. */
+                       run_first_op = opcode_reads_register(op_next, op->r.rt);
+               }
+
+               if (branch_in_ds) {
+                       run_first_op = true;
+                       next_pc = pc + 4;
+               }
+
+               if (load_in_ds && run_first_op) {
+                       next_pc = pc + 4;
+
+                       /* If the first opcode of the next block writes the
+                        * regiser used as the address for the load, we need to
+                        * reset to the old value after it has been executed,
+                        * then restore the new value after the delay slot
+                        * opcode has been executed. */
+                       save_rs = opcode_reads_register(op->c, op->r.rs) &&
+                               opcode_writes_register(op_next, op->r.rs);
+                       if (save_rs)
+                               old_rs = reg_cache[op->r.rs];
+
+                       /* If both the first opcode of the next block and the
+                        * delay slot opcode write to the same register, the
+                        * value written by the delay slot opcode is
+                        * discarded. */
+                       dummy_ld = opcode_writes_register(op_next, op->r.rt);
+               }
+
+               if (!run_first_op) {
+                       next_pc = pc;
+               } else if (has_delay_slot(op_next)) {
+                       /* The first opcode of the next block is a branch, so we
+                        * cannot execute it here, because of the load delay.
+                        * Just check whether or not the branch would be taken,
+                        * and save that info into the interpreter struct. */
+                       branch_at_addr = true;
+                       branch_taken = is_branch_taken(reg_cache, op_next);
+                       pr_debug("Target of impossible branch is a branch, "
+                                "%staken.\n", branch_taken ? "" : "not ");
+               } else {
+                       new_op.c = op_next;
+                       new_op.flags = 0;
+                       new_op.offset = 0;
+                       new_op.next = NULL;
+                       inter2.op = &new_op;
+
+                       /* Execute the first opcode of the next block */
+                       (*int_standard[inter2.op->i.op])(&inter2);
+
+                       if (save_rs) {
+                               new_rs = reg_cache[op->r.rs];
+                               reg_cache[op->r.rs] = old_rs;
+                       }
+
+                       inter->cycles += lightrec_cycles_of_opcode(op_next);
+               }
+       } else {
+               next_pc = inter->block->pc
+                       + (inter->op->offset + 2) * sizeof(u32);
+       }
+
+       inter2.block = inter->block;
+       inter2.op = op;
+       inter2.cycles = inter->cycles;
+
+       if (dummy_ld)
+               new_rt = reg_cache[op->r.rt];
+
+       /* Execute delay slot opcode */
+       if (branch_at_addr)
+               ds_next_pc = int_branch(&inter2, pc, op_next, branch_taken);
+       else
+               ds_next_pc = (*int_standard[inter2.op->i.op])(&inter2);
+
+       if (branch_at_addr && !branch_taken) {
+               /* If the branch at the target of the branch opcode is not
+                * taken, we jump to its delay slot */
+               next_pc = pc + sizeof(u32);
+       } else if (!branch && branch_in_ds) {
+               next_pc = ds_next_pc;
+       }
+
+       if (save_rs)
+               reg_cache[op->r.rs] = new_rs;
+       if (dummy_ld)
+               reg_cache[op->r.rt] = new_rt;
+
+       inter->cycles += lightrec_cycles_of_opcode(op->c);
+
+       if (branch_at_addr && branch_taken) {
+               /* If the branch at the target of the branch opcode is taken,
+                * we execute its delay slot here, and jump to its target
+                * address. */
+               op_next = lightrec_read_opcode(state, pc + 4);
+
+               new_op.c = op_next;
+               new_op.flags = 0;
+               new_op.offset = sizeof(u32);
+               new_op.next = NULL;
+               inter2.op = &new_op;
+               inter2.block = NULL;
+
+               inter->cycles += lightrec_cycles_of_opcode(op_next);
+
+               pr_debug("Running delay slot of branch at target of impossible "
+                        "branch\n");
+               (*int_standard[inter2.op->i.op])(&inter2);
+       }
+
+       return next_pc;
+}
+
+static u32 int_unimplemented(struct interpreter *inter)
+{
+       pr_warn("Unimplemented opcode 0x%08x\n", inter->op->opcode);
+
+       return jump_next(inter);
+}
+
+static u32 int_jump(struct interpreter *inter, bool link)
+{
+       struct lightrec_state *state = inter->state;
+       u32 old_pc = inter->block->pc + inter->op->offset * sizeof(u32);
+       u32 pc = (old_pc & 0xf0000000) | (inter->op->j.imm << 2);
+
+       if (link)
+               state->native_reg_cache[31] = old_pc + 8;
+
+       if (inter->op->flags & LIGHTREC_NO_DS)
+               return pc;
+
+       return int_delay_slot(inter, pc, true);
+}
+
+static u32 int_J(struct interpreter *inter)
+{
+       return int_jump(inter, false);
+}
+
+static u32 int_JAL(struct interpreter *inter)
+{
+       return int_jump(inter, true);
+}
+
+static u32 int_jumpr(struct interpreter *inter, u8 link_reg)
+{
+       struct lightrec_state *state = inter->state;
+       u32 old_pc, next_pc = state->native_reg_cache[inter->op->r.rs];
+
+       if (link_reg) {
+               old_pc = inter->block->pc + inter->op->offset * sizeof(u32);
+               state->native_reg_cache[link_reg] = old_pc + 8;
+       }
+
+       if (inter->op->flags & LIGHTREC_NO_DS)
+               return next_pc;
+
+       return int_delay_slot(inter, next_pc, true);
+}
+
+static u32 int_special_JR(struct interpreter *inter)
+{
+       return int_jumpr(inter, 0);
+}
+
+static u32 int_special_JALR(struct interpreter *inter)
+{
+       return int_jumpr(inter, inter->op->r.rd);
+}
+
+static u32 int_do_branch(struct interpreter *inter, u32 old_pc, u32 next_pc)
+{
+       if (!inter->delay_slot &&
+           (inter->op->flags & LIGHTREC_LOCAL_BRANCH) &&
+           (s16)inter->op->c.i.imm >= 0) {
+               next_pc = old_pc + ((1 + (s16)inter->op->c.i.imm) << 2);
+               next_pc = lightrec_emulate_block(inter->block, next_pc);
+       }
+
+       return next_pc;
+}
+
+static u32 int_branch(struct interpreter *inter, u32 pc,
+                     union code code, bool branch)
+{
+       u32 next_pc = pc + 4 + ((s16)code.i.imm << 2);
+
+       update_cycles_before_branch(inter);
+
+       if (inter->op->flags & LIGHTREC_NO_DS) {
+               if (branch)
+                       return int_do_branch(inter, pc, next_pc);
+               else
+                       return jump_next(inter);
+       }
+
+       if (!inter->delay_slot)
+               next_pc = int_delay_slot(inter, next_pc, branch);
+
+       if (branch)
+               return int_do_branch(inter, pc, next_pc);
+
+       if (inter->op->flags & LIGHTREC_EMULATE_BRANCH)
+               return pc + 8;
+       else
+               return jump_after_branch(inter);
+}
+
+static u32 int_beq(struct interpreter *inter, bool bne)
+{
+       u32 rs, rt, old_pc = inter->block->pc + inter->op->offset * sizeof(u32);
+
+       rs = inter->state->native_reg_cache[inter->op->i.rs];
+       rt = inter->state->native_reg_cache[inter->op->i.rt];
+
+       return int_branch(inter, old_pc, inter->op->c, (rs == rt) ^ bne);
+}
+
+static u32 int_BEQ(struct interpreter *inter)
+{
+       return int_beq(inter, false);
+}
+
+static u32 int_BNE(struct interpreter *inter)
+{
+       return int_beq(inter, true);
+}
+
+static u32 int_bgez(struct interpreter *inter, bool link, bool lt, bool regimm)
+{
+       u32 old_pc = inter->block->pc + inter->op->offset * sizeof(u32);
+       s32 rs;
+
+       if (link)
+               inter->state->native_reg_cache[31] = old_pc + 8;
+
+       rs = (s32)inter->state->native_reg_cache[inter->op->i.rs];
+
+       return int_branch(inter, old_pc, inter->op->c,
+                         ((regimm && !rs) || rs > 0) ^ lt);
+}
+
+static u32 int_regimm_BLTZ(struct interpreter *inter)
+{
+       return int_bgez(inter, false, true, true);
+}
+
+static u32 int_regimm_BGEZ(struct interpreter *inter)
+{
+       return int_bgez(inter, false, false, true);
+}
+
+static u32 int_regimm_BLTZAL(struct interpreter *inter)
+{
+       return int_bgez(inter, true, true, true);
+}
+
+static u32 int_regimm_BGEZAL(struct interpreter *inter)
+{
+       return int_bgez(inter, true, false, true);
+}
+
+static u32 int_BLEZ(struct interpreter *inter)
+{
+       return int_bgez(inter, false, true, false);
+}
+
+static u32 int_BGTZ(struct interpreter *inter)
+{
+       return int_bgez(inter, false, false, false);
+}
+
+static u32 int_cfc(struct interpreter *inter)
+{
+       struct lightrec_state *state = inter->state;
+       const struct opcode *op = inter->op;
+       u32 val;
+
+       val = lightrec_mfc(state, op->c);
+
+       if (likely(op->r.rt))
+               state->native_reg_cache[op->r.rt] = val;
+
+       return jump_next(inter);
+}
+
+static u32 int_ctc(struct interpreter *inter)
+{
+       struct lightrec_state *state = inter->state;
+       const struct opcode *op = inter->op;
+
+       lightrec_mtc(state, op->c, state->native_reg_cache[op->r.rt]);
+
+       /* If we have a MTC0 or CTC0 to CP0 register 12 (Status) or 13 (Cause),
+        * return early so that the emulator will be able to check software
+        * interrupt status. */
+       if (op->i.op == OP_CP0 && (op->r.rd == 12 || op->r.rd == 13))
+               return inter->block->pc + (op->offset + 1) * sizeof(u32);
+       else
+               return jump_next(inter);
+}
+
+static u32 int_cp0_RFE(struct interpreter *inter)
+{
+       struct lightrec_state *state = inter->state;
+       u32 status;
+
+       /* Read CP0 Status register (r12) */
+       status = state->ops.cop0_ops.mfc(state, 12);
+
+       /* Switch the bits */
+       status = ((status & 0x3c) >> 2) | (status & ~0xf);
+
+       /* Write it back */
+       state->ops.cop0_ops.ctc(state, 12, status);
+
+       return jump_next(inter);
+}
+
+static u32 int_CP(struct interpreter *inter)
+{
+       struct lightrec_state *state = inter->state;
+       const struct lightrec_cop_ops *ops;
+       const struct opcode *op = inter->op;
+
+       if ((op->j.imm >> 25) & 1)
+               ops = &state->ops.cop2_ops;
+       else
+               ops = &state->ops.cop0_ops;
+
+       (*ops->op)(state, (op->j.imm) & ~(1 << 25));
+
+       return jump_next(inter);
+}
+
+static u32 int_ADDI(struct interpreter *inter)
+{
+       u32 *reg_cache = inter->state->native_reg_cache;
+       struct opcode_i *op = &inter->op->i;
+
+       if (likely(op->rt))
+               reg_cache[op->rt] = reg_cache[op->rs] + (s32)(s16)op->imm;
+
+       return jump_next(inter);
+}
+
+static u32 int_SLTI(struct interpreter *inter)
+{
+       u32 *reg_cache = inter->state->native_reg_cache;
+       struct opcode_i *op = &inter->op->i;
+
+       if (likely(op->rt))
+               reg_cache[op->rt] = (s32)reg_cache[op->rs] < (s32)(s16)op->imm;
+
+       return jump_next(inter);
+}
+
+static u32 int_SLTIU(struct interpreter *inter)
+{
+       u32 *reg_cache = inter->state->native_reg_cache;
+       struct opcode_i *op = &inter->op->i;
+
+       if (likely(op->rt))
+               reg_cache[op->rt] = reg_cache[op->rs] < (u32)(s32)(s16)op->imm;
+
+       return jump_next(inter);
+}
+
+static u32 int_ANDI(struct interpreter *inter)
+{
+       u32 *reg_cache = inter->state->native_reg_cache;
+       struct opcode_i *op = &inter->op->i;
+
+       if (likely(op->rt))
+               reg_cache[op->rt] = reg_cache[op->rs] & op->imm;
+
+       return jump_next(inter);
+}
+
+static u32 int_ORI(struct interpreter *inter)
+{
+       u32 *reg_cache = inter->state->native_reg_cache;
+       struct opcode_i *op = &inter->op->i;
+
+       if (likely(op->rt))
+               reg_cache[op->rt] = reg_cache[op->rs] | op->imm;
+
+       return jump_next(inter);
+}
+
+static u32 int_XORI(struct interpreter *inter)
+{
+       u32 *reg_cache = inter->state->native_reg_cache;
+       struct opcode_i *op = &inter->op->i;
+
+       if (likely(op->rt))
+               reg_cache[op->rt] = reg_cache[op->rs] ^ op->imm;
+
+       return jump_next(inter);
+}
+
+static u32 int_LUI(struct interpreter *inter)
+{
+       struct opcode_i *op = &inter->op->i;
+
+       inter->state->native_reg_cache[op->rt] = op->imm << 16;
+
+       return jump_next(inter);
+}
+
+static u32 int_io(struct interpreter *inter, bool is_load)
+{
+       struct opcode_i *op = &inter->op->i;
+       u32 *reg_cache = inter->state->native_reg_cache;
+       u32 val;
+
+       val = lightrec_rw(inter->state, inter->op->c,
+                         reg_cache[op->rs], reg_cache[op->rt],
+                         &inter->op->flags);
+
+       if (is_load && op->rt)
+               reg_cache[op->rt] = val;
+
+       return jump_next(inter);
+}
+
+static u32 int_load(struct interpreter *inter)
+{
+       return int_io(inter, true);
+}
+
+static u32 int_store(struct interpreter *inter)
+{
+       u32 next_pc;
+
+       if (likely(!(inter->op->flags & LIGHTREC_SMC)))
+               return int_io(inter, false);
+
+       lightrec_rw(inter->state, inter->op->c,
+                   inter->state->native_reg_cache[inter->op->i.rs],
+                   inter->state->native_reg_cache[inter->op->i.rt],
+                   &inter->op->flags);
+
+       next_pc = inter->block->pc + (inter->op->offset + 1) * 4;
+
+       /* Invalidate next PC, to force the rest of the block to be rebuilt */
+       lightrec_invalidate(inter->state, next_pc, 4);
+
+       return next_pc;
+}
+
+static u32 int_LWC2(struct interpreter *inter)
+{
+       return int_io(inter, false);
+}
+
+static u32 int_special_SLL(struct interpreter *inter)
+{
+       struct opcode *op = inter->op;
+       u32 rt;
+
+       if (op->opcode) { /* Handle NOPs */
+               rt = inter->state->native_reg_cache[op->r.rt];
+               inter->state->native_reg_cache[op->r.rd] = rt << op->r.imm;
+       }
+
+       return jump_next(inter);
+}
+
+static u32 int_special_SRL(struct interpreter *inter)
+{
+       struct opcode *op = inter->op;
+       u32 rt = inter->state->native_reg_cache[op->r.rt];
+
+       inter->state->native_reg_cache[op->r.rd] = rt >> op->r.imm;
+
+       return jump_next(inter);
+}
+
+static u32 int_special_SRA(struct interpreter *inter)
+{
+       struct opcode *op = inter->op;
+       s32 rt = inter->state->native_reg_cache[op->r.rt];
+
+       inter->state->native_reg_cache[op->r.rd] = rt >> op->r.imm;
+
+       return jump_next(inter);
+}
+
+static u32 int_special_SLLV(struct interpreter *inter)
+{
+       struct opcode *op = inter->op;
+       u32 rs = inter->state->native_reg_cache[op->r.rs];
+       u32 rt = inter->state->native_reg_cache[op->r.rt];
+
+       inter->state->native_reg_cache[op->r.rd] = rt << (rs & 0x1f);
+
+       return jump_next(inter);
+}
+
+static u32 int_special_SRLV(struct interpreter *inter)
+{
+       struct opcode *op = inter->op;
+       u32 rs = inter->state->native_reg_cache[op->r.rs];
+       u32 rt = inter->state->native_reg_cache[op->r.rt];
+
+       inter->state->native_reg_cache[op->r.rd] = rt >> (rs & 0x1f);
+
+       return jump_next(inter);
+}
+
+static u32 int_special_SRAV(struct interpreter *inter)
+{
+       struct opcode *op = inter->op;
+       u32 rs = inter->state->native_reg_cache[op->r.rs];
+       s32 rt = inter->state->native_reg_cache[op->r.rt];
+
+       inter->state->native_reg_cache[op->r.rd] = rt >> (rs & 0x1f);
+
+       return jump_next(inter);
+}
+
+static u32 int_syscall_break(struct interpreter *inter)
+{
+
+       if (inter->op->r.op == OP_SPECIAL_BREAK)
+               inter->state->exit_flags |= LIGHTREC_EXIT_BREAK;
+       else
+               inter->state->exit_flags |= LIGHTREC_EXIT_SYSCALL;
+
+       return inter->block->pc + inter->op->offset * sizeof(u32);
+}
+
+static u32 int_special_MFHI(struct interpreter *inter)
+{
+       u32 *reg_cache = inter->state->native_reg_cache;
+       struct opcode_r *op = &inter->op->r;
+
+       if (likely(op->rd))
+               reg_cache[op->rd] = reg_cache[REG_HI];
+
+       return jump_next(inter);
+}
+
+static u32 int_special_MTHI(struct interpreter *inter)
+{
+       u32 *reg_cache = inter->state->native_reg_cache;
+
+       reg_cache[REG_HI] = reg_cache[inter->op->r.rs];
+
+       return jump_next(inter);
+}
+
+static u32 int_special_MFLO(struct interpreter *inter)
+{
+       u32 *reg_cache = inter->state->native_reg_cache;
+       struct opcode_r *op = &inter->op->r;
+
+       if (likely(op->rd))
+               reg_cache[op->rd] = reg_cache[REG_LO];
+
+       return jump_next(inter);
+}
+
+static u32 int_special_MTLO(struct interpreter *inter)
+{
+       u32 *reg_cache = inter->state->native_reg_cache;
+
+       reg_cache[REG_LO] = reg_cache[inter->op->r.rs];
+
+       return jump_next(inter);
+}
+
+static u32 int_special_MULT(struct interpreter *inter)
+{
+       u32 *reg_cache = inter->state->native_reg_cache;
+       s32 rs = reg_cache[inter->op->r.rs];
+       s32 rt = reg_cache[inter->op->r.rt];
+       u64 res = (s64)rs * (s64)rt;
+
+       if (!(inter->op->flags & LIGHTREC_MULT32))
+               reg_cache[REG_HI] = res >> 32;
+       reg_cache[REG_LO] = res;
+
+       return jump_next(inter);
+}
+
+static u32 int_special_MULTU(struct interpreter *inter)
+{
+       u32 *reg_cache = inter->state->native_reg_cache;
+       u32 rs = reg_cache[inter->op->r.rs];
+       u32 rt = reg_cache[inter->op->r.rt];
+       u64 res = (u64)rs * (u64)rt;
+
+       if (!(inter->op->flags & LIGHTREC_MULT32))
+               reg_cache[REG_HI] = res >> 32;
+       reg_cache[REG_LO] = res;
+
+       return jump_next(inter);
+}
+
+static u32 int_special_DIV(struct interpreter *inter)
+{
+       u32 *reg_cache = inter->state->native_reg_cache;
+       s32 rs = reg_cache[inter->op->r.rs];
+       s32 rt = reg_cache[inter->op->r.rt];
+       u32 lo, hi;
+
+       if (rt == 0) {
+               hi = rs;
+               lo = (rs < 0) * 2 - 1;
+       } else {
+               lo = rs / rt;
+               hi = rs % rt;
+       }
+
+       reg_cache[REG_HI] = hi;
+       reg_cache[REG_LO] = lo;
+
+       return jump_next(inter);
+}
+
+static u32 int_special_DIVU(struct interpreter *inter)
+{
+       u32 *reg_cache = inter->state->native_reg_cache;
+       u32 rs = reg_cache[inter->op->r.rs];
+       u32 rt = reg_cache[inter->op->r.rt];
+       u32 lo, hi;
+
+       if (rt == 0) {
+               hi = rs;
+               lo = (u32)-1;
+       } else {
+               lo = rs / rt;
+               hi = rs % rt;
+       }
+
+       reg_cache[REG_HI] = hi;
+       reg_cache[REG_LO] = lo;
+
+       return jump_next(inter);
+}
+
+static u32 int_special_ADD(struct interpreter *inter)
+{
+       u32 *reg_cache = inter->state->native_reg_cache;
+       struct opcode_r *op = &inter->op->r;
+       s32 rs = reg_cache[op->rs];
+       s32 rt = reg_cache[op->rt];
+
+       if (likely(op->rd))
+               reg_cache[op->rd] = rs + rt;
+
+       return jump_next(inter);
+}
+
+static u32 int_special_SUB(struct interpreter *inter)
+{
+       u32 *reg_cache = inter->state->native_reg_cache;
+       struct opcode_r *op = &inter->op->r;
+       u32 rs = reg_cache[op->rs];
+       u32 rt = reg_cache[op->rt];
+
+       if (likely(op->rd))
+               reg_cache[op->rd] = rs - rt;
+
+       return jump_next(inter);
+}
+
+static u32 int_special_AND(struct interpreter *inter)
+{
+       u32 *reg_cache = inter->state->native_reg_cache;
+       struct opcode_r *op = &inter->op->r;
+       u32 rs = reg_cache[op->rs];
+       u32 rt = reg_cache[op->rt];
+
+       if (likely(op->rd))
+               reg_cache[op->rd] = rs & rt;
+
+       return jump_next(inter);
+}
+
+static u32 int_special_OR(struct interpreter *inter)
+{
+       u32 *reg_cache = inter->state->native_reg_cache;
+       struct opcode_r *op = &inter->op->r;
+       u32 rs = reg_cache[op->rs];
+       u32 rt = reg_cache[op->rt];
+
+       if (likely(op->rd))
+               reg_cache[op->rd] = rs | rt;
+
+       return jump_next(inter);
+}
+
+static u32 int_special_XOR(struct interpreter *inter)
+{
+       u32 *reg_cache = inter->state->native_reg_cache;
+       struct opcode_r *op = &inter->op->r;
+       u32 rs = reg_cache[op->rs];
+       u32 rt = reg_cache[op->rt];
+
+       if (likely(op->rd))
+               reg_cache[op->rd] = rs ^ rt;
+
+       return jump_next(inter);
+}
+
+static u32 int_special_NOR(struct interpreter *inter)
+{
+       u32 *reg_cache = inter->state->native_reg_cache;
+       struct opcode_r *op = &inter->op->r;
+       u32 rs = reg_cache[op->rs];
+       u32 rt = reg_cache[op->rt];
+
+       if (likely(op->rd))
+               reg_cache[op->rd] = ~(rs | rt);
+
+       return jump_next(inter);
+}
+
+static u32 int_special_SLT(struct interpreter *inter)
+{
+       u32 *reg_cache = inter->state->native_reg_cache;
+       struct opcode_r *op = &inter->op->r;
+       s32 rs = reg_cache[op->rs];
+       s32 rt = reg_cache[op->rt];
+
+       if (likely(op->rd))
+               reg_cache[op->rd] = rs < rt;
+
+       return jump_next(inter);
+}
+
+static u32 int_special_SLTU(struct interpreter *inter)
+{
+       u32 *reg_cache = inter->state->native_reg_cache;
+       struct opcode_r *op = &inter->op->r;
+       u32 rs = reg_cache[op->rs];
+       u32 rt = reg_cache[op->rt];
+
+       if (likely(op->rd))
+               reg_cache[op->rd] = rs < rt;
+
+       return jump_next(inter);
+}
+
+static u32 int_META_SKIP(struct interpreter *inter)
+{
+       return jump_skip(inter);
+}
+
+static u32 int_META_MOV(struct interpreter *inter)
+{
+       u32 *reg_cache = inter->state->native_reg_cache;
+       struct opcode_r *op = &inter->op->r;
+
+       if (likely(op->rd))
+               reg_cache[op->rd] = reg_cache[op->rs];
+
+       return jump_next(inter);
+}
+
+static u32 int_META_SYNC(struct interpreter *inter)
+{
+       inter->state->current_cycle += inter->cycles;
+       inter->cycles = 0;
+
+       return jump_skip(inter);
+}
+
+static const lightrec_int_func_t int_standard[64] = {
+       [OP_SPECIAL]            = int_SPECIAL,
+       [OP_REGIMM]             = int_REGIMM,
+       [OP_J]                  = int_J,
+       [OP_JAL]                = int_JAL,
+       [OP_BEQ]                = int_BEQ,
+       [OP_BNE]                = int_BNE,
+       [OP_BLEZ]               = int_BLEZ,
+       [OP_BGTZ]               = int_BGTZ,
+       [OP_ADDI]               = int_ADDI,
+       [OP_ADDIU]              = int_ADDI,
+       [OP_SLTI]               = int_SLTI,
+       [OP_SLTIU]              = int_SLTIU,
+       [OP_ANDI]               = int_ANDI,
+       [OP_ORI]                = int_ORI,
+       [OP_XORI]               = int_XORI,
+       [OP_LUI]                = int_LUI,
+       [OP_CP0]                = int_CP0,
+       [OP_CP2]                = int_CP2,
+       [OP_LB]                 = int_load,
+       [OP_LH]                 = int_load,
+       [OP_LWL]                = int_load,
+       [OP_LW]                 = int_load,
+       [OP_LBU]                = int_load,
+       [OP_LHU]                = int_load,
+       [OP_LWR]                = int_load,
+       [OP_SB]                 = int_store,
+       [OP_SH]                 = int_store,
+       [OP_SWL]                = int_store,
+       [OP_SW]                 = int_store,
+       [OP_SWR]                = int_store,
+       [OP_LWC2]               = int_LWC2,
+       [OP_SWC2]               = int_store,
+
+       [OP_META_REG_UNLOAD]    = int_META_SKIP,
+       [OP_META_BEQZ]          = int_BEQ,
+       [OP_META_BNEZ]          = int_BNE,
+       [OP_META_MOV]           = int_META_MOV,
+       [OP_META_SYNC]          = int_META_SYNC,
+};
+
+static const lightrec_int_func_t int_special[64] = {
+       [OP_SPECIAL_SLL]        = int_special_SLL,
+       [OP_SPECIAL_SRL]        = int_special_SRL,
+       [OP_SPECIAL_SRA]        = int_special_SRA,
+       [OP_SPECIAL_SLLV]       = int_special_SLLV,
+       [OP_SPECIAL_SRLV]       = int_special_SRLV,
+       [OP_SPECIAL_SRAV]       = int_special_SRAV,
+       [OP_SPECIAL_JR]         = int_special_JR,
+       [OP_SPECIAL_JALR]       = int_special_JALR,
+       [OP_SPECIAL_SYSCALL]    = int_syscall_break,
+       [OP_SPECIAL_BREAK]      = int_syscall_break,
+       [OP_SPECIAL_MFHI]       = int_special_MFHI,
+       [OP_SPECIAL_MTHI]       = int_special_MTHI,
+       [OP_SPECIAL_MFLO]       = int_special_MFLO,
+       [OP_SPECIAL_MTLO]       = int_special_MTLO,
+       [OP_SPECIAL_MULT]       = int_special_MULT,
+       [OP_SPECIAL_MULTU]      = int_special_MULTU,
+       [OP_SPECIAL_DIV]        = int_special_DIV,
+       [OP_SPECIAL_DIVU]       = int_special_DIVU,
+       [OP_SPECIAL_ADD]        = int_special_ADD,
+       [OP_SPECIAL_ADDU]       = int_special_ADD,
+       [OP_SPECIAL_SUB]        = int_special_SUB,
+       [OP_SPECIAL_SUBU]       = int_special_SUB,
+       [OP_SPECIAL_AND]        = int_special_AND,
+       [OP_SPECIAL_OR]         = int_special_OR,
+       [OP_SPECIAL_XOR]        = int_special_XOR,
+       [OP_SPECIAL_NOR]        = int_special_NOR,
+       [OP_SPECIAL_SLT]        = int_special_SLT,
+       [OP_SPECIAL_SLTU]       = int_special_SLTU,
+};
+
+static const lightrec_int_func_t int_regimm[64] = {
+       [OP_REGIMM_BLTZ]        = int_regimm_BLTZ,
+       [OP_REGIMM_BGEZ]        = int_regimm_BGEZ,
+       [OP_REGIMM_BLTZAL]      = int_regimm_BLTZAL,
+       [OP_REGIMM_BGEZAL]      = int_regimm_BGEZAL,
+};
+
+static const lightrec_int_func_t int_cp0[64] = {
+       [OP_CP0_MFC0]           = int_cfc,
+       [OP_CP0_CFC0]           = int_cfc,
+       [OP_CP0_MTC0]           = int_ctc,
+       [OP_CP0_CTC0]           = int_ctc,
+       [OP_CP0_RFE]            = int_cp0_RFE,
+};
+
+static const lightrec_int_func_t int_cp2_basic[64] = {
+       [OP_CP2_BASIC_MFC2]     = int_cfc,
+       [OP_CP2_BASIC_CFC2]     = int_cfc,
+       [OP_CP2_BASIC_MTC2]     = int_ctc,
+       [OP_CP2_BASIC_CTC2]     = int_ctc,
+};
+
+static u32 int_SPECIAL(struct interpreter *inter)
+{
+       lightrec_int_func_t f = int_special[inter->op->r.op];
+       if (likely(f))
+               return execute(f, inter);
+       else
+               return int_unimplemented(inter);
+}
+
+static u32 int_REGIMM(struct interpreter *inter)
+{
+       lightrec_int_func_t f = int_regimm[inter->op->r.rt];
+       if (likely(f))
+               return execute(f, inter);
+       else
+               return int_unimplemented(inter);
+}
+
+static u32 int_CP0(struct interpreter *inter)
+{
+       lightrec_int_func_t f = int_cp0[inter->op->r.rs];
+       if (likely(f))
+               return execute(f, inter);
+       else
+               return int_CP(inter);
+}
+
+static u32 int_CP2(struct interpreter *inter)
+{
+       if (inter->op->r.op == OP_CP2_BASIC) {
+               lightrec_int_func_t f = int_cp2_basic[inter->op->r.rs];
+               if (likely(f))
+                       return execute(f, inter);
+       }
+
+       return int_CP(inter);
+}
+
+static u32 lightrec_int_op(struct interpreter *inter)
+{
+       return execute(int_standard[inter->op->i.op], inter);
+}
+
+static u32 lightrec_emulate_block_list(struct block *block, struct opcode *op)
+{
+       struct interpreter inter;
+       u32 pc;
+
+       inter.block = block;
+       inter.state = block->state;
+       inter.op = op;
+       inter.cycles = 0;
+       inter.delay_slot = false;
+
+       pc = lightrec_int_op(&inter);
+
+       /* Add the cycles of the last branch */
+       inter.cycles += lightrec_cycles_of_opcode(inter.op->c);
+
+       block->state->current_cycle += inter.cycles;
+
+       return pc;
+}
+
+u32 lightrec_emulate_block(struct block *block, u32 pc)
+{
+       u32 offset = (kunseg(pc) - kunseg(block->pc)) >> 2;
+       struct opcode *op;
+
+       for (op = block->opcode_list;
+            op && (op->offset < offset); op = op->next);
+       if (op)
+               return lightrec_emulate_block_list(block, op);
+
+       pr_err("PC 0x%x is outside block at PC 0x%x\n", pc, block->pc);
+
+       return 0;
+}
diff --git a/deps/lightrec/interpreter.h b/deps/lightrec/interpreter.h
new file mode 100644 (file)
index 0000000..d4177b3
--- /dev/null
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2019 Paul Cercueil <paul@crapouillou.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ */
+
+#ifndef __LIGHTREC_INTERPRETER_H__
+#define __LIGHTREC_INTERPRETER_H__
+
+#include "lightrec.h"
+
+struct block;
+
+u32 lightrec_emulate_block(struct block *block, u32 pc);
+
+#endif /* __LIGHTREC_INTERPRETER_H__ */
diff --git a/deps/lightrec/lightrec-private.h b/deps/lightrec/lightrec-private.h
new file mode 100644 (file)
index 0000000..4c9c269
--- /dev/null
@@ -0,0 +1,161 @@
+/*
+ * Copyright (C) 2016 Paul Cercueil <paul@crapouillou.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ */
+
+#ifndef __LIGHTREC_PRIVATE_H__
+#define __LIGHTREC_PRIVATE_H__
+
+#include "config.h"
+#include "disassembler.h"
+#include "lightrec.h"
+
+#if ENABLE_THREADED_COMPILER
+#include <stdatomic.h>
+#endif
+
+#define ARRAY_SIZE(x) (sizeof(x) ? sizeof(x) / sizeof((x)[0]) : 0)
+#define BIT(x) (1 << (x))
+
+#ifdef __GNUC__
+#      define likely(x)       __builtin_expect(!!(x),1)
+#      define unlikely(x)     __builtin_expect(!!(x),0)
+#else
+#      define likely(x)       (x)
+#      define unlikely(x)     (x)
+#endif
+
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+#      define LE32TOH(x)       __builtin_bswap32(x)
+#      define HTOLE32(x)       __builtin_bswap32(x)
+#      define LE16TOH(x)       __builtin_bswap16(x)
+#      define HTOLE16(x)       __builtin_bswap16(x)
+#else
+#      define LE32TOH(x)       (x)
+#      define HTOLE32(x)       (x)
+#      define LE16TOH(x)       (x)
+#      define HTOLE16(x)       (x)
+#endif
+
+/* Flags for (struct block *)->flags */
+#define BLOCK_NEVER_COMPILE    BIT(0)
+#define BLOCK_SHOULD_RECOMPILE BIT(1)
+#define BLOCK_FULLY_TAGGED     BIT(2)
+
+#define RAM_SIZE       0x200000
+#define BIOS_SIZE      0x80000
+
+#define CODE_LUT_SIZE  ((RAM_SIZE + BIOS_SIZE) >> 2)
+
+/* Definition of jit_state_t (avoids inclusion of <lightning.h>) */
+struct jit_node;
+struct jit_state;
+typedef struct jit_state jit_state_t;
+
+struct blockcache;
+struct recompiler;
+struct regcache;
+struct opcode;
+struct tinymm;
+
+struct block {
+       jit_state_t *_jit;
+       struct lightrec_state *state;
+       struct opcode *opcode_list;
+       void (*function)(void);
+       u32 pc;
+       u32 hash;
+#if ENABLE_THREADED_COMPILER
+       atomic_flag op_list_freed;
+#endif
+       unsigned int code_size;
+       u16 flags;
+       u16 nb_ops;
+       const struct lightrec_mem_map *map;
+       struct block *next;
+};
+
+struct lightrec_branch {
+       struct jit_node *branch;
+       u32 target;
+};
+
+struct lightrec_branch_target {
+       struct jit_node *label;
+       u32 offset;
+};
+
+struct lightrec_state {
+       u32 native_reg_cache[34];
+       u32 next_pc;
+       u32 current_cycle;
+       u32 target_cycle;
+       u32 exit_flags;
+       struct block *dispatcher, *rw_wrapper, *rw_generic_wrapper,
+                    *mfc_wrapper, *mtc_wrapper, *rfe_wrapper, *cp_wrapper,
+                    *syscall_wrapper, *break_wrapper;
+       void *rw_func, *rw_generic_func, *mfc_func, *mtc_func, *rfe_func,
+            *cp_func, *syscall_func, *break_func;
+       struct jit_node *branches[512];
+       struct lightrec_branch local_branches[512];
+       struct lightrec_branch_target targets[512];
+       unsigned int nb_branches;
+       unsigned int nb_local_branches;
+       unsigned int nb_targets;
+       struct tinymm *tinymm;
+       struct blockcache *block_cache;
+       struct regcache *reg_cache;
+       struct recompiler *rec;
+       void (*eob_wrapper_func)(void);
+       void (*get_next_block)(void);
+       struct lightrec_ops ops;
+       unsigned int cycles;
+       unsigned int nb_maps;
+       const struct lightrec_mem_map *maps;
+       uintptr_t offset_ram, offset_bios, offset_scratch;
+       _Bool mirrors_mapped;
+       _Bool invalidate_from_dma_only;
+       void *code_lut[];
+};
+
+u32 lightrec_rw(struct lightrec_state *state, union code op,
+               u32 addr, u32 data, u16 *flags);
+
+void lightrec_free_block(struct block *block);
+
+void remove_from_code_lut(struct blockcache *cache, struct block *block);
+
+static inline u32 kunseg(u32 addr)
+{
+       if (unlikely(addr >= 0xa0000000))
+               return addr - 0xa0000000;
+       else
+               return addr &~ 0x80000000;
+}
+
+static inline u32 lut_offset(u32 pc)
+{
+       if (pc & BIT(28))
+               return ((pc & (BIOS_SIZE - 1)) + RAM_SIZE) >> 2; // BIOS
+       else
+               return (pc & (RAM_SIZE - 1)) >> 2; // RAM
+}
+
+void lightrec_mtc(struct lightrec_state *state, union code op, u32 data);
+u32 lightrec_mfc(struct lightrec_state *state, union code op);
+
+union code lightrec_read_opcode(struct lightrec_state *state, u32 pc);
+
+struct block * lightrec_get_block(struct lightrec_state *state, u32 pc);
+int lightrec_compile_block(struct block *block);
+
+#endif /* __LIGHTREC_PRIVATE_H__ */
diff --git a/deps/lightrec/lightrec.c b/deps/lightrec/lightrec.c
new file mode 100644 (file)
index 0000000..47c49c8
--- /dev/null
@@ -0,0 +1,1265 @@
+/*
+ * Copyright (C) 2014-2020 Paul Cercueil <paul@crapouillou.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ */
+
+#include "blockcache.h"
+#include "config.h"
+#include "debug.h"
+#include "disassembler.h"
+#include "emitter.h"
+#include "interpreter.h"
+#include "lightrec.h"
+#include "memmanager.h"
+#include "recompiler.h"
+#include "regcache.h"
+#include "optimizer.h"
+
+#include <errno.h>
+#include <lightning.h>
+#include <limits.h>
+#if ENABLE_THREADED_COMPILER
+#include <stdatomic.h>
+#endif
+#include <stdbool.h>
+#include <stddef.h>
+#include <string.h>
+#if ENABLE_TINYMM
+#include <tinymm.h>
+#endif
+
+#define GENMASK(h, l) \
+       (((uintptr_t)-1 << (l)) & ((uintptr_t)-1 >> (__WORDSIZE - 1 - (h))))
+
+static struct block * lightrec_precompile_block(struct lightrec_state *state,
+                                               u32 pc);
+
+static void __segfault_cb(struct lightrec_state *state, u32 addr)
+{
+       lightrec_set_exit_flags(state, LIGHTREC_EXIT_SEGFAULT);
+       pr_err("Segmentation fault in recompiled code: invalid "
+              "load/store at address 0x%08x\n", addr);
+}
+
+static u32 lightrec_rw_ops(struct lightrec_state *state, union code op,
+               const struct lightrec_mem_map_ops *ops, u32 addr, u32 data)
+{
+       switch (op.i.op) {
+       case OP_SB:
+               ops->sb(state, addr, (u8) data);
+               return 0;
+       case OP_SH:
+               ops->sh(state, addr, (u16) data);
+               return 0;
+       case OP_SWL:
+       case OP_SWR:
+       case OP_SW:
+               ops->sw(state, addr, data);
+               return 0;
+       case OP_LB:
+               return (s32) (s8) ops->lb(state, addr);
+       case OP_LBU:
+               return ops->lb(state, addr);
+       case OP_LH:
+               return (s32) (s16) ops->lh(state, addr);
+       case OP_LHU:
+               return ops->lh(state, addr);
+       case OP_LW:
+       default:
+               return ops->lw(state, addr);
+       }
+}
+
+static void lightrec_invalidate_map(struct lightrec_state *state,
+               const struct lightrec_mem_map *map, u32 addr)
+{
+       if (map == &state->maps[PSX_MAP_KERNEL_USER_RAM])
+               state->code_lut[lut_offset(addr)] = NULL;
+}
+
+static const struct lightrec_mem_map *
+lightrec_get_map(struct lightrec_state *state, u32 kaddr)
+{
+       unsigned int i;
+
+       for (i = 0; i < state->nb_maps; i++) {
+               const struct lightrec_mem_map *map = &state->maps[i];
+
+               if (kaddr >= map->pc && kaddr < map->pc + map->length)
+                       return map;
+       }
+
+       return NULL;
+}
+
+u32 lightrec_rw(struct lightrec_state *state, union code op,
+               u32 addr, u32 data, u16 *flags)
+{
+       const struct lightrec_mem_map *map;
+       u32 shift, mem_data, mask, pc;
+       uintptr_t new_addr;
+       u32 kaddr;
+
+       addr += (s16) op.i.imm;
+       kaddr = kunseg(addr);
+
+       map = lightrec_get_map(state, kaddr);
+       if (!map) {
+               __segfault_cb(state, addr);
+               return 0;
+       }
+
+       pc = map->pc;
+
+       if (unlikely(map->ops)) {
+               if (flags)
+                       *flags |= LIGHTREC_HW_IO;
+
+               return lightrec_rw_ops(state, op, map->ops, addr, data);
+       }
+
+       while (map->mirror_of)
+               map = map->mirror_of;
+
+       if (flags)
+               *flags |= LIGHTREC_DIRECT_IO;
+
+       kaddr -= pc;
+       new_addr = (uintptr_t) map->address + kaddr;
+
+       switch (op.i.op) {
+       case OP_SB:
+               *(u8 *) new_addr = (u8) data;
+               if (!state->invalidate_from_dma_only)
+                       lightrec_invalidate_map(state, map, kaddr);
+               return 0;
+       case OP_SH:
+               *(u16 *) new_addr = HTOLE16((u16) data);
+               if (!state->invalidate_from_dma_only)
+                       lightrec_invalidate_map(state, map, kaddr);
+               return 0;
+       case OP_SWL:
+               shift = kaddr & 3;
+               mem_data = LE32TOH(*(u32 *)(new_addr & ~3));
+               mask = GENMASK(31, (shift + 1) * 8);
+
+               *(u32 *)(new_addr & ~3) = HTOLE32((data >> ((3 - shift) * 8))
+                                                 | (mem_data & mask));
+               if (!state->invalidate_from_dma_only)
+                       lightrec_invalidate_map(state, map, kaddr & ~0x3);
+               return 0;
+       case OP_SWR:
+               shift = kaddr & 3;
+               mem_data = LE32TOH(*(u32 *)(new_addr & ~3));
+               mask = (1 << (shift * 8)) - 1;
+
+               *(u32 *)(new_addr & ~3) = HTOLE32((data << (shift * 8))
+                                                 | (mem_data & mask));
+               if (!state->invalidate_from_dma_only)
+                       lightrec_invalidate_map(state, map, kaddr & ~0x3);
+               return 0;
+       case OP_SW:
+               *(u32 *) new_addr = HTOLE32(data);
+               if (!state->invalidate_from_dma_only)
+                       lightrec_invalidate_map(state, map, kaddr);
+               return 0;
+       case OP_SWC2:
+               *(u32 *) new_addr = HTOLE32(state->ops.cop2_ops.mfc(state,
+                                                                   op.i.rt));
+               if (!state->invalidate_from_dma_only)
+                       lightrec_invalidate_map(state, map, kaddr);
+               return 0;
+       case OP_LB:
+               return (s32) *(s8 *) new_addr;
+       case OP_LBU:
+               return *(u8 *) new_addr;
+       case OP_LH:
+               return (s32)(s16) LE16TOH(*(u16 *) new_addr);
+       case OP_LHU:
+               return LE16TOH(*(u16 *) new_addr);
+       case OP_LWL:
+               shift = kaddr & 3;
+               mem_data = LE32TOH(*(u32 *)(new_addr & ~3));
+               mask = (1 << (24 - shift * 8)) - 1;
+
+               return (data & mask) | (mem_data << (24 - shift * 8));
+       case OP_LWR:
+               shift = kaddr & 3;
+               mem_data = LE32TOH(*(u32 *)(new_addr & ~3));
+               mask = GENMASK(31, 32 - shift * 8);
+
+               return (data & mask) | (mem_data >> (shift * 8));
+       case OP_LWC2:
+               state->ops.cop2_ops.mtc(state, op.i.rt,
+                                       LE32TOH(*(u32 *) new_addr));
+               return 0;
+       case OP_LW:
+       default:
+               return LE32TOH(*(u32 *) new_addr);
+       }
+}
+
+static void lightrec_rw_helper(struct lightrec_state *state,
+                              union code op, u16 *flags)
+{
+       u32 ret = lightrec_rw(state, op,
+                         state->native_reg_cache[op.i.rs],
+                         state->native_reg_cache[op.i.rt], flags);
+
+       switch (op.i.op) {
+       case OP_LB:
+       case OP_LBU:
+       case OP_LH:
+       case OP_LHU:
+       case OP_LWL:
+       case OP_LWR:
+       case OP_LW:
+               if (op.i.rt)
+                       state->native_reg_cache[op.i.rt] = ret;
+       default: /* fall-through */
+               break;
+       }
+}
+
+static void lightrec_rw_cb(struct lightrec_state *state, union code op)
+{
+       lightrec_rw_helper(state, op, NULL);
+}
+
+static void lightrec_rw_generic_cb(struct lightrec_state *state,
+                                  struct opcode *op, struct block *block)
+{
+       bool was_tagged = op->flags & (LIGHTREC_HW_IO | LIGHTREC_DIRECT_IO);
+
+       lightrec_rw_helper(state, op->c, &op->flags);
+
+       if (!was_tagged) {
+               pr_debug("Opcode of block at PC 0x%08x offset 0x%x has been "
+                        "tagged - flag for recompilation\n",
+                        block->pc, op->offset << 2);
+
+               lightrec_mark_for_recompilation(state->block_cache, block);
+       }
+}
+
+u32 lightrec_mfc(struct lightrec_state *state, union code op)
+{
+       bool is_cfc = (op.i.op == OP_CP0 && op.r.rs == OP_CP0_CFC0) ||
+                     (op.i.op == OP_CP2 && op.r.rs == OP_CP2_BASIC_CFC2);
+       u32 (*func)(struct lightrec_state *, u8);
+       const struct lightrec_cop_ops *ops;
+
+       if (op.i.op == OP_CP0)
+               ops = &state->ops.cop0_ops;
+       else
+               ops = &state->ops.cop2_ops;
+
+       if (is_cfc)
+               func = ops->cfc;
+       else
+               func = ops->mfc;
+
+       return (*func)(state, op.r.rd);
+}
+
+static void lightrec_mfc_cb(struct lightrec_state *state, union code op)
+{
+       u32 rt = lightrec_mfc(state, op);
+
+       if (op.r.rt)
+               state->native_reg_cache[op.r.rt] = rt;
+}
+
+void lightrec_mtc(struct lightrec_state *state, union code op, u32 data)
+{
+       bool is_ctc = (op.i.op == OP_CP0 && op.r.rs == OP_CP0_CTC0) ||
+                     (op.i.op == OP_CP2 && op.r.rs == OP_CP2_BASIC_CTC2);
+       void (*func)(struct lightrec_state *, u8, u32);
+       const struct lightrec_cop_ops *ops;
+
+       if (op.i.op == OP_CP0)
+               ops = &state->ops.cop0_ops;
+       else
+               ops = &state->ops.cop2_ops;
+
+       if (is_ctc)
+               func = ops->ctc;
+       else
+               func = ops->mtc;
+
+       (*func)(state, op.r.rd, data);
+}
+
+static void lightrec_mtc_cb(struct lightrec_state *state, union code op)
+{
+       lightrec_mtc(state, op, state->native_reg_cache[op.r.rt]);
+}
+
+static void lightrec_rfe_cb(struct lightrec_state *state, union code op)
+{
+       u32 status;
+
+       /* Read CP0 Status register (r12) */
+       status = state->ops.cop0_ops.mfc(state, 12);
+
+       /* Switch the bits */
+       status = ((status & 0x3c) >> 2) | (status & ~0xf);
+
+       /* Write it back */
+       state->ops.cop0_ops.ctc(state, 12, status);
+}
+
+static void lightrec_cp_cb(struct lightrec_state *state, union code op)
+{
+       void (*func)(struct lightrec_state *, u32);
+
+       if ((op.opcode >> 25) & 1)
+               func = state->ops.cop2_ops.op;
+       else
+               func = state->ops.cop0_ops.op;
+
+       (*func)(state, op.opcode);
+}
+
+static void lightrec_syscall_cb(struct lightrec_state *state, union code op)
+{
+       lightrec_set_exit_flags(state, LIGHTREC_EXIT_SYSCALL);
+}
+
+static void lightrec_break_cb(struct lightrec_state *state, union code op)
+{
+       lightrec_set_exit_flags(state, LIGHTREC_EXIT_BREAK);
+}
+
+struct block * lightrec_get_block(struct lightrec_state *state, u32 pc)
+{
+       struct block *block = lightrec_find_block(state->block_cache, pc);
+
+       if (block && lightrec_block_is_outdated(block)) {
+               pr_debug("Block at PC 0x%08x is outdated!\n", block->pc);
+
+               /* Make sure the recompiler isn't processing the block we'll
+                * destroy */
+               if (ENABLE_THREADED_COMPILER)
+                       lightrec_recompiler_remove(state->rec, block);
+
+               lightrec_unregister_block(state->block_cache, block);
+               lightrec_free_block(block);
+               block = NULL;
+       }
+
+       if (!block) {
+               block = lightrec_precompile_block(state, pc);
+               if (!block) {
+                       pr_err("Unable to recompile block at PC 0x%x\n", pc);
+                       lightrec_set_exit_flags(state, LIGHTREC_EXIT_SEGFAULT);
+                       return NULL;
+               }
+
+               lightrec_register_block(state->block_cache, block);
+       }
+
+       return block;
+}
+
+static void * get_next_block_func(struct lightrec_state *state, u32 pc)
+{
+       struct block *block;
+       bool should_recompile;
+       void *func;
+
+       for (;;) {
+               func = state->code_lut[lut_offset(pc)];
+               if (func && func != state->get_next_block)
+                       return func;
+
+               block = lightrec_get_block(state, pc);
+
+               if (unlikely(!block))
+                       return NULL;
+
+               should_recompile = block->flags & BLOCK_SHOULD_RECOMPILE;
+
+               if (unlikely(should_recompile)) {
+                       pr_debug("Block at PC 0x%08x should recompile"
+                                " - freeing old code\n", pc);
+
+                       if (ENABLE_THREADED_COMPILER)
+                               lightrec_recompiler_remove(state->rec, block);
+
+                       remove_from_code_lut(state->block_cache, block);
+                       lightrec_unregister(MEM_FOR_CODE, block->code_size);
+                       if (block->_jit)
+                               _jit_destroy_state(block->_jit);
+                       block->_jit = NULL;
+                       block->function = NULL;
+                       block->flags &= ~BLOCK_SHOULD_RECOMPILE;
+               }
+
+               if (ENABLE_THREADED_COMPILER && likely(!should_recompile))
+                       func = lightrec_recompiler_run_first_pass(block, &pc);
+               else
+                       func = block->function;
+
+               if (likely(func))
+                       return func;
+
+               /* Block wasn't compiled yet - run the interpreter */
+               if (!ENABLE_THREADED_COMPILER &&
+                   ((ENABLE_FIRST_PASS && likely(!should_recompile)) ||
+                    unlikely(block->flags & BLOCK_NEVER_COMPILE)))
+                       pc = lightrec_emulate_block(block, pc);
+
+               if (likely(!(block->flags & BLOCK_NEVER_COMPILE))) {
+                       /* Then compile it using the profiled data */
+                       if (ENABLE_THREADED_COMPILER)
+                               lightrec_recompiler_add(state->rec, block);
+                       else
+                               lightrec_compile_block(block);
+               }
+
+               if (state->exit_flags != LIGHTREC_EXIT_NORMAL ||
+                   state->current_cycle >= state->target_cycle) {
+                       state->next_pc = pc;
+                       return NULL;
+               }
+       }
+}
+
+static s32 c_generic_function_wrapper(struct lightrec_state *state,
+                                     s32 cycles_delta,
+                                     void (*f)(struct lightrec_state *,
+                                               struct opcode *,
+                                               struct block *),
+                                     struct opcode *op, struct block *block)
+{
+       state->current_cycle = state->target_cycle - cycles_delta;
+
+       (*f)(state, op, block);
+
+       return state->target_cycle - state->current_cycle;
+}
+
+static s32 c_function_wrapper(struct lightrec_state *state, s32 cycles_delta,
+                             void (*f)(struct lightrec_state *, union code),
+                             union code op)
+{
+       state->current_cycle = state->target_cycle - cycles_delta;
+
+       (*f)(state, op);
+
+       return state->target_cycle - state->current_cycle;
+}
+
+static struct block * generate_wrapper(struct lightrec_state *state,
+                                      void *f, bool generic)
+{
+       struct block *block;
+       jit_state_t *_jit;
+       unsigned int i;
+       int stack_ptr;
+       jit_word_t code_size;
+       jit_node_t *to_tramp, *to_fn_epilog;
+
+       block = lightrec_malloc(state, MEM_FOR_IR, sizeof(*block));
+       if (!block)
+               goto err_no_mem;
+
+       _jit = jit_new_state();
+       if (!_jit)
+               goto err_free_block;
+
+       jit_name("RW wrapper");
+       jit_note(__FILE__, __LINE__);
+
+       /* Wrapper entry point */
+       jit_prolog();
+
+       stack_ptr = jit_allocai(sizeof(uintptr_t) * NUM_TEMPS);
+
+       for (i = 0; i < NUM_TEMPS; i++)
+               jit_stxi(stack_ptr + i * sizeof(uintptr_t), JIT_FP, JIT_R(i));
+
+       /* Jump to the trampoline */
+       to_tramp = jit_jmpi();
+
+       /* The trampoline will jump back here */
+       to_fn_epilog = jit_label();
+
+       for (i = 0; i < NUM_TEMPS; i++)
+               jit_ldxi(JIT_R(i), JIT_FP, stack_ptr + i * sizeof(uintptr_t));
+
+       jit_ret();
+       jit_epilog();
+
+       /* Trampoline entry point.
+        * The sole purpose of the trampoline is to cheese Lightning not to
+        * save/restore the callee-saved register LIGHTREC_REG_CYCLE, since we
+        * do want to return to the caller with this register modified. */
+       jit_prolog();
+       jit_tramp(256);
+       jit_patch(to_tramp);
+
+       jit_prepare();
+       jit_pushargr(LIGHTREC_REG_STATE);
+       jit_pushargr(LIGHTREC_REG_CYCLE);
+       jit_pushargi((uintptr_t)f);
+       jit_pushargr(JIT_R0);
+       if (generic) {
+               jit_pushargr(JIT_R1);
+               jit_finishi(c_generic_function_wrapper);
+       } else {
+               jit_finishi(c_function_wrapper);
+       }
+
+#if __WORDSIZE == 64
+       jit_retval_i(LIGHTREC_REG_CYCLE);
+#else
+       jit_retval(LIGHTREC_REG_CYCLE);
+#endif
+
+       jit_patch_at(jit_jmpi(), to_fn_epilog);
+       jit_epilog();
+
+       block->state = state;
+       block->_jit = _jit;
+       block->function = jit_emit();
+       block->opcode_list = NULL;
+       block->flags = 0;
+       block->nb_ops = 0;
+
+       jit_get_code(&code_size);
+       lightrec_register(MEM_FOR_CODE, code_size);
+
+       block->code_size = code_size;
+
+       if (ENABLE_DISASSEMBLER) {
+               pr_debug("Wrapper block:\n");
+               jit_disassemble();
+       }
+
+       jit_clear_state();
+       return block;
+
+err_free_block:
+       lightrec_free(state, MEM_FOR_IR, sizeof(*block), block);
+err_no_mem:
+       pr_err("Unable to compile wrapper: Out of memory\n");
+       return NULL;
+}
+
+static struct block * generate_dispatcher(struct lightrec_state *state)
+{
+       struct block *block;
+       jit_state_t *_jit;
+       jit_node_t *to_end, *to_end2, *to_c, *loop, *addr, *addr2;
+       unsigned int i;
+       u32 offset, ram_len;
+       jit_word_t code_size;
+
+       block = lightrec_malloc(state, MEM_FOR_IR, sizeof(*block));
+       if (!block)
+               goto err_no_mem;
+
+       _jit = jit_new_state();
+       if (!_jit)
+               goto err_free_block;
+
+       jit_name("dispatcher");
+       jit_note(__FILE__, __LINE__);
+
+       jit_prolog();
+       jit_frame(256);
+
+       jit_getarg(JIT_R0, jit_arg());
+#if __WORDSIZE == 64
+       jit_getarg_i(LIGHTREC_REG_CYCLE, jit_arg());
+#else
+       jit_getarg(LIGHTREC_REG_CYCLE, jit_arg());
+#endif
+
+       /* Force all callee-saved registers to be pushed on the stack */
+       for (i = 0; i < NUM_REGS; i++)
+               jit_movr(JIT_V(i), JIT_V(i));
+
+       /* Pass lightrec_state structure to blocks, using the last callee-saved
+        * register that Lightning provides */
+       jit_movi(LIGHTREC_REG_STATE, (intptr_t) state);
+
+       loop = jit_label();
+
+       /* Call the block's code */
+       jit_jmpr(JIT_R0);
+
+       /* The block will jump here, with the number of cycles remaining in
+        * LIGHTREC_REG_CYCLE */
+       addr2 = jit_indirect();
+
+       /* Jump to end if state->target_cycle < state->current_cycle */
+       to_end = jit_blei(LIGHTREC_REG_CYCLE, 0);
+
+       /* Convert next PC to KUNSEG and avoid mirrors */
+       ram_len = state->maps[PSX_MAP_KERNEL_USER_RAM].length;
+       jit_andi(JIT_R0, JIT_V0, 0x10000000 | (ram_len - 1));
+       to_c = jit_bgei(JIT_R0, ram_len);
+
+       /* Fast path: code is running from RAM, use the code LUT */
+#if __WORDSIZE == 64
+       jit_lshi(JIT_R0, JIT_R0, 1);
+#endif
+       jit_addr(JIT_R0, JIT_R0, LIGHTREC_REG_STATE);
+       jit_ldxi(JIT_R0, JIT_R0, offsetof(struct lightrec_state, code_lut));
+
+       /* If we get non-NULL, loop */
+       jit_patch_at(jit_bnei(JIT_R0, 0), loop);
+
+       /* Slow path: call C function get_next_block_func() */
+       jit_patch(to_c);
+
+       if (ENABLE_FIRST_PASS) {
+               /* We may call the interpreter - update state->current_cycle */
+               jit_ldxi_i(JIT_R2, LIGHTREC_REG_STATE,
+                          offsetof(struct lightrec_state, target_cycle));
+               jit_subr(JIT_R1, JIT_R2, LIGHTREC_REG_CYCLE);
+               jit_stxi_i(offsetof(struct lightrec_state, current_cycle),
+                          LIGHTREC_REG_STATE, JIT_R1);
+       }
+
+       /* The code LUT will be set to this address when the block at the target
+        * PC has been preprocessed but not yet compiled by the threaded
+        * recompiler */
+       addr = jit_indirect();
+
+       /* Get the next block */
+       jit_prepare();
+       jit_pushargr(LIGHTREC_REG_STATE);
+       jit_pushargr(JIT_V0);
+       jit_finishi(&get_next_block_func);
+       jit_retval(JIT_R0);
+
+       if (ENABLE_FIRST_PASS) {
+               /* The interpreter may have updated state->current_cycle and
+                * state->target_cycle - recalc the delta */
+               jit_ldxi_i(JIT_R1, LIGHTREC_REG_STATE,
+                          offsetof(struct lightrec_state, current_cycle));
+               jit_ldxi_i(JIT_R2, LIGHTREC_REG_STATE,
+                          offsetof(struct lightrec_state, target_cycle));
+               jit_subr(LIGHTREC_REG_CYCLE, JIT_R2, JIT_R1);
+       }
+
+       /* If we get non-NULL, loop */
+       jit_patch_at(jit_bnei(JIT_R0, 0), loop);
+
+       to_end2 = jit_jmpi();
+
+       /* When exiting, the recompiled code will jump to that address */
+       jit_note(__FILE__, __LINE__);
+       jit_patch(to_end);
+
+       /* Store back the next_pc to the lightrec_state structure */
+       offset = offsetof(struct lightrec_state, next_pc);
+       jit_stxi_i(offset, LIGHTREC_REG_STATE, JIT_V0);
+
+       jit_patch(to_end2);
+
+       jit_retr(LIGHTREC_REG_CYCLE);
+       jit_epilog();
+
+       block->state = state;
+       block->_jit = _jit;
+       block->function = jit_emit();
+       block->opcode_list = NULL;
+       block->flags = 0;
+       block->nb_ops = 0;
+
+       jit_get_code(&code_size);
+       lightrec_register(MEM_FOR_CODE, code_size);
+
+       block->code_size = code_size;
+
+       state->eob_wrapper_func = jit_address(addr2);
+       state->get_next_block = jit_address(addr);
+
+       if (ENABLE_DISASSEMBLER) {
+               pr_debug("Dispatcher block:\n");
+               jit_disassemble();
+       }
+
+       /* We're done! */
+       jit_clear_state();
+       return block;
+
+err_free_block:
+       lightrec_free(state, MEM_FOR_IR, sizeof(*block), block);
+err_no_mem:
+       pr_err("Unable to compile dispatcher: Out of memory\n");
+       return NULL;
+}
+
+union code lightrec_read_opcode(struct lightrec_state *state, u32 pc)
+{
+       u32 addr, kunseg_pc = kunseg(pc);
+       const u32 *code;
+       const struct lightrec_mem_map *map = lightrec_get_map(state, kunseg_pc);
+
+       addr = kunseg_pc - map->pc;
+
+       while (map->mirror_of)
+               map = map->mirror_of;
+
+       code = map->address + addr;
+
+       return (union code) *code;
+}
+
+static struct block * lightrec_precompile_block(struct lightrec_state *state,
+                                               u32 pc)
+{
+       struct opcode *list;
+       struct block *block;
+       const u32 *code;
+       u32 addr, kunseg_pc = kunseg(pc);
+       const struct lightrec_mem_map *map = lightrec_get_map(state, kunseg_pc);
+       unsigned int length;
+
+       if (!map)
+               return NULL;
+
+       addr = kunseg_pc - map->pc;
+
+       while (map->mirror_of)
+               map = map->mirror_of;
+
+       code = map->address + addr;
+
+       block = lightrec_malloc(state, MEM_FOR_IR, sizeof(*block));
+       if (!block) {
+               pr_err("Unable to recompile block: Out of memory\n");
+               return NULL;
+       }
+
+       list = lightrec_disassemble(state, code, &length);
+       if (!list) {
+               lightrec_free(state, MEM_FOR_IR, sizeof(*block), block);
+               return NULL;
+       }
+
+       block->pc = pc;
+       block->state = state;
+       block->_jit = NULL;
+       block->function = NULL;
+       block->opcode_list = list;
+       block->map = map;
+       block->next = NULL;
+       block->flags = 0;
+       block->code_size = 0;
+#if ENABLE_THREADED_COMPILER
+       block->op_list_freed = (atomic_flag)ATOMIC_FLAG_INIT;
+#endif
+       block->nb_ops = length / sizeof(u32);
+
+       lightrec_optimize(block);
+
+       length = block->nb_ops * sizeof(u32);
+
+       lightrec_register(MEM_FOR_MIPS_CODE, length);
+
+       if (ENABLE_DISASSEMBLER) {
+               pr_debug("Disassembled block at PC: 0x%x\n", block->pc);
+               lightrec_print_disassembly(block, code, length);
+       }
+
+       pr_debug("Block size: %lu opcodes\n", block->nb_ops);
+
+       /* If the first opcode is an 'impossible' branch, never compile the
+        * block */
+       if (list->flags & LIGHTREC_EMULATE_BRANCH)
+               block->flags |= BLOCK_NEVER_COMPILE;
+
+       block->hash = lightrec_calculate_block_hash(block);
+
+       return block;
+}
+
+static bool lightrec_block_is_fully_tagged(struct block *block)
+{
+       struct opcode *op;
+
+       for (op = block->opcode_list; op; op = op->next) {
+               /* Verify that all load/stores of the opcode list
+                * Check all loads/stores of the opcode list and mark the
+                * block as fully compiled if they all have been tagged. */
+               switch (op->c.i.op) {
+               case OP_LB:
+               case OP_LH:
+               case OP_LWL:
+               case OP_LW:
+               case OP_LBU:
+               case OP_LHU:
+               case OP_LWR:
+               case OP_SB:
+               case OP_SH:
+               case OP_SWL:
+               case OP_SW:
+               case OP_SWR:
+               case OP_LWC2:
+               case OP_SWC2:
+                       if (!(op->flags & (LIGHTREC_DIRECT_IO |
+                                          LIGHTREC_HW_IO)))
+                               return false;
+               default: /* fall-through */
+                       continue;
+               }
+       }
+
+       return true;
+}
+
+int lightrec_compile_block(struct block *block)
+{
+       struct lightrec_state *state = block->state;
+       bool op_list_freed = false, fully_tagged = false;
+       struct opcode *elm;
+       jit_state_t *_jit;
+       jit_node_t *start_of_block;
+       bool skip_next = false;
+       jit_word_t code_size;
+       unsigned int i, j;
+       u32 next_pc;
+
+       fully_tagged = lightrec_block_is_fully_tagged(block);
+       if (fully_tagged)
+               block->flags |= BLOCK_FULLY_TAGGED;
+
+       _jit = jit_new_state();
+       if (!_jit)
+               return -ENOMEM;
+
+       block->_jit = _jit;
+
+       lightrec_regcache_reset(state->reg_cache);
+       state->cycles = 0;
+       state->nb_branches = 0;
+       state->nb_local_branches = 0;
+       state->nb_targets = 0;
+
+       jit_prolog();
+       jit_tramp(256);
+
+       start_of_block = jit_label();
+
+       for (elm = block->opcode_list; elm; elm = elm->next) {
+               next_pc = block->pc + elm->offset * sizeof(u32);
+
+               if (skip_next) {
+                       skip_next = false;
+                       continue;
+               }
+
+               state->cycles += lightrec_cycles_of_opcode(elm->c);
+
+               if (elm->flags & LIGHTREC_EMULATE_BRANCH) {
+                       pr_debug("Branch at offset 0x%x will be emulated\n",
+                                elm->offset << 2);
+                       lightrec_emit_eob(block, elm, next_pc);
+                       skip_next = !(elm->flags & LIGHTREC_NO_DS);
+               } else if (elm->opcode) {
+                       lightrec_rec_opcode(block, elm, next_pc);
+                       skip_next = has_delay_slot(elm->c) &&
+                               !(elm->flags & LIGHTREC_NO_DS);
+#if _WIN32
+                       /* FIXME: GNU Lightning on Windows seems to use our
+                        * mapped registers as temporaries. Until the actual bug
+                        * is found and fixed, unconditionally mark our
+                        * registers as live here. */
+                       lightrec_regcache_mark_live(state->reg_cache, _jit);
+#endif
+               }
+       }
+
+       for (i = 0; i < state->nb_branches; i++)
+               jit_patch(state->branches[i]);
+
+       for (i = 0; i < state->nb_local_branches; i++) {
+               struct lightrec_branch *branch = &state->local_branches[i];
+
+               pr_debug("Patch local branch to offset 0x%x\n",
+                        branch->target << 2);
+
+               if (branch->target == 0) {
+                       jit_patch_at(branch->branch, start_of_block);
+                       continue;
+               }
+
+               for (j = 0; j < state->nb_targets; j++) {
+                       if (state->targets[j].offset == branch->target) {
+                               jit_patch_at(branch->branch,
+                                            state->targets[j].label);
+                               break;
+                       }
+               }
+
+               if (j == state->nb_targets)
+                       pr_err("Unable to find branch target\n");
+       }
+
+       jit_ldxi(JIT_R0, LIGHTREC_REG_STATE,
+                offsetof(struct lightrec_state, eob_wrapper_func));
+
+       jit_jmpr(JIT_R0);
+
+       jit_ret();
+       jit_epilog();
+
+       block->function = jit_emit();
+
+       /* Add compiled function to the LUT */
+       state->code_lut[lut_offset(block->pc)] = block->function;
+
+       jit_get_code(&code_size);
+       lightrec_register(MEM_FOR_CODE, code_size);
+
+       block->code_size = code_size;
+
+       if (ENABLE_DISASSEMBLER) {
+               pr_debug("Compiling block at PC: 0x%x\n", block->pc);
+               jit_disassemble();
+       }
+
+       jit_clear_state();
+
+#if ENABLE_THREADED_COMPILER
+       if (fully_tagged)
+               op_list_freed = atomic_flag_test_and_set(&block->op_list_freed);
+#endif
+       if (fully_tagged && !op_list_freed) {
+               pr_debug("Block PC 0x%08x is fully tagged"
+                        " - free opcode list\n", block->pc);
+               lightrec_free_opcode_list(state, block->opcode_list);
+               block->opcode_list = NULL;
+       }
+
+       return 0;
+}
+
+u32 lightrec_execute(struct lightrec_state *state, u32 pc, u32 target_cycle)
+{
+       s32 (*func)(void *, s32) = (void *)state->dispatcher->function;
+       void *block_trace;
+       s32 cycles_delta;
+
+       state->exit_flags = LIGHTREC_EXIT_NORMAL;
+
+       /* Handle the cycle counter overflowing */
+       if (unlikely(target_cycle < state->current_cycle))
+               target_cycle = UINT_MAX;
+
+       state->target_cycle = target_cycle;
+
+       block_trace = get_next_block_func(state, pc);
+       if (block_trace) {
+               cycles_delta = state->target_cycle - state->current_cycle;
+
+               cycles_delta = (*func)(block_trace, cycles_delta);
+
+               state->current_cycle = state->target_cycle - cycles_delta;
+       }
+
+       return state->next_pc;
+}
+
+u32 lightrec_execute_one(struct lightrec_state *state, u32 pc)
+{
+       return lightrec_execute(state, pc, state->current_cycle);
+}
+
+u32 lightrec_run_interpreter(struct lightrec_state *state, u32 pc)
+{
+       struct block *block = lightrec_get_block(state, pc);
+       if (!block)
+               return 0;
+
+       state->exit_flags = LIGHTREC_EXIT_NORMAL;
+
+       return lightrec_emulate_block(block, pc);
+}
+
+void lightrec_free_block(struct block *block)
+{
+       lightrec_unregister(MEM_FOR_MIPS_CODE, block->nb_ops * sizeof(u32));
+       if (block->opcode_list)
+               lightrec_free_opcode_list(block->state, block->opcode_list);
+       if (block->_jit)
+               _jit_destroy_state(block->_jit);
+       lightrec_unregister(MEM_FOR_CODE, block->code_size);
+       lightrec_free(block->state, MEM_FOR_IR, sizeof(*block), block);
+}
+
+struct lightrec_state * lightrec_init(char *argv0,
+                                     const struct lightrec_mem_map *map,
+                                     size_t nb,
+                                     const struct lightrec_ops *ops)
+{
+       struct lightrec_state *state;
+
+       /* Sanity-check ops */
+       if (!ops ||
+           !ops->cop0_ops.mfc || !ops->cop0_ops.cfc || !ops->cop0_ops.mtc ||
+           !ops->cop0_ops.ctc || !ops->cop0_ops.op ||
+           !ops->cop2_ops.mfc || !ops->cop2_ops.cfc || !ops->cop2_ops.mtc ||
+           !ops->cop2_ops.ctc || !ops->cop2_ops.op) {
+               pr_err("Missing callbacks in lightrec_ops structure\n");
+               return NULL;
+       }
+
+       init_jit(argv0);
+
+       state = calloc(1, sizeof(*state) +
+                      sizeof(*state->code_lut) * CODE_LUT_SIZE);
+       if (!state)
+               goto err_finish_jit;
+
+       lightrec_register(MEM_FOR_LIGHTREC, sizeof(*state) +
+                         sizeof(*state->code_lut) * CODE_LUT_SIZE);
+
+#if ENABLE_TINYMM
+       state->tinymm = tinymm_init(malloc, free, 4096);
+       if (!state->tinymm)
+               goto err_free_state;
+#endif
+
+       state->block_cache = lightrec_blockcache_init(state);
+       if (!state->block_cache)
+               goto err_free_tinymm;
+
+       state->reg_cache = lightrec_regcache_init(state);
+       if (!state->reg_cache)
+               goto err_free_block_cache;
+
+       if (ENABLE_THREADED_COMPILER) {
+               state->rec = lightrec_recompiler_init(state);
+               if (!state->rec)
+                       goto err_free_reg_cache;
+       }
+
+       state->nb_maps = nb;
+       state->maps = map;
+
+       memcpy(&state->ops, ops, sizeof(*ops));
+
+       state->dispatcher = generate_dispatcher(state);
+       if (!state->dispatcher)
+               goto err_free_recompiler;
+
+       state->rw_generic_wrapper = generate_wrapper(state,
+                                                    lightrec_rw_generic_cb,
+                                                    true);
+       if (!state->rw_generic_wrapper)
+               goto err_free_dispatcher;
+
+       state->rw_wrapper = generate_wrapper(state, lightrec_rw_cb, false);
+       if (!state->rw_wrapper)
+               goto err_free_generic_rw_wrapper;
+
+       state->mfc_wrapper = generate_wrapper(state, lightrec_mfc_cb, false);
+       if (!state->mfc_wrapper)
+               goto err_free_rw_wrapper;
+
+       state->mtc_wrapper = generate_wrapper(state, lightrec_mtc_cb, false);
+       if (!state->mtc_wrapper)
+               goto err_free_mfc_wrapper;
+
+       state->rfe_wrapper = generate_wrapper(state, lightrec_rfe_cb, false);
+       if (!state->rfe_wrapper)
+               goto err_free_mtc_wrapper;
+
+       state->cp_wrapper = generate_wrapper(state, lightrec_cp_cb, false);
+       if (!state->cp_wrapper)
+               goto err_free_rfe_wrapper;
+
+       state->syscall_wrapper = generate_wrapper(state, lightrec_syscall_cb,
+                                                 false);
+       if (!state->syscall_wrapper)
+               goto err_free_cp_wrapper;
+
+       state->break_wrapper = generate_wrapper(state, lightrec_break_cb,
+                                               false);
+       if (!state->break_wrapper)
+               goto err_free_syscall_wrapper;
+
+       state->rw_generic_func = state->rw_generic_wrapper->function;
+       state->rw_func = state->rw_wrapper->function;
+       state->mfc_func = state->mfc_wrapper->function;
+       state->mtc_func = state->mtc_wrapper->function;
+       state->rfe_func = state->rfe_wrapper->function;
+       state->cp_func = state->cp_wrapper->function;
+       state->syscall_func = state->syscall_wrapper->function;
+       state->break_func = state->break_wrapper->function;
+
+       map = &state->maps[PSX_MAP_BIOS];
+       state->offset_bios = (uintptr_t)map->address - map->pc;
+
+       map = &state->maps[PSX_MAP_SCRATCH_PAD];
+       state->offset_scratch = (uintptr_t)map->address - map->pc;
+
+       map = &state->maps[PSX_MAP_KERNEL_USER_RAM];
+       state->offset_ram = (uintptr_t)map->address - map->pc;
+
+       if (state->maps[PSX_MAP_MIRROR1].address == map->address + 0x200000 &&
+           state->maps[PSX_MAP_MIRROR2].address == map->address + 0x400000 &&
+           state->maps[PSX_MAP_MIRROR3].address == map->address + 0x600000)
+               state->mirrors_mapped = true;
+
+       return state;
+
+err_free_syscall_wrapper:
+       lightrec_free_block(state->syscall_wrapper);
+err_free_cp_wrapper:
+       lightrec_free_block(state->cp_wrapper);
+err_free_rfe_wrapper:
+       lightrec_free_block(state->rfe_wrapper);
+err_free_mtc_wrapper:
+       lightrec_free_block(state->mtc_wrapper);
+err_free_mfc_wrapper:
+       lightrec_free_block(state->mfc_wrapper);
+err_free_rw_wrapper:
+       lightrec_free_block(state->rw_wrapper);
+err_free_generic_rw_wrapper:
+       lightrec_free_block(state->rw_generic_wrapper);
+err_free_dispatcher:
+       lightrec_free_block(state->dispatcher);
+err_free_recompiler:
+       if (ENABLE_THREADED_COMPILER)
+               lightrec_free_recompiler(state->rec);
+err_free_reg_cache:
+       lightrec_free_regcache(state->reg_cache);
+err_free_block_cache:
+       lightrec_free_block_cache(state->block_cache);
+err_free_tinymm:
+#if ENABLE_TINYMM
+       tinymm_shutdown(state->tinymm);
+err_free_state:
+#endif
+       lightrec_unregister(MEM_FOR_LIGHTREC, sizeof(*state) +
+                           sizeof(*state->code_lut) * CODE_LUT_SIZE);
+       free(state);
+err_finish_jit:
+       finish_jit();
+       return NULL;
+}
+
+void lightrec_destroy(struct lightrec_state *state)
+{
+       if (ENABLE_THREADED_COMPILER)
+               lightrec_free_recompiler(state->rec);
+
+       lightrec_free_regcache(state->reg_cache);
+       lightrec_free_block_cache(state->block_cache);
+       lightrec_free_block(state->dispatcher);
+       lightrec_free_block(state->rw_generic_wrapper);
+       lightrec_free_block(state->rw_wrapper);
+       lightrec_free_block(state->mfc_wrapper);
+       lightrec_free_block(state->mtc_wrapper);
+       lightrec_free_block(state->rfe_wrapper);
+       lightrec_free_block(state->cp_wrapper);
+       lightrec_free_block(state->syscall_wrapper);
+       lightrec_free_block(state->break_wrapper);
+       finish_jit();
+
+#if ENABLE_TINYMM
+       tinymm_shutdown(state->tinymm);
+#endif
+       lightrec_unregister(MEM_FOR_LIGHTREC, sizeof(*state) +
+                           sizeof(*state->code_lut) * CODE_LUT_SIZE);
+       free(state);
+}
+
+void lightrec_invalidate(struct lightrec_state *state, u32 addr, u32 len)
+{
+       u32 kaddr = kunseg(addr & ~0x3);
+       const struct lightrec_mem_map *map = lightrec_get_map(state, kaddr);
+
+       if (map) {
+               while (map->mirror_of)
+                       map = map->mirror_of;
+
+               if (map != &state->maps[PSX_MAP_KERNEL_USER_RAM])
+                       return;
+
+               /* Handle mirrors */
+               kaddr &= (state->maps[PSX_MAP_KERNEL_USER_RAM].length - 1);
+
+               for (; len > 4; len -= 4, kaddr += 4)
+                       lightrec_invalidate_map(state, map, kaddr);
+
+               lightrec_invalidate_map(state, map, kaddr);
+       }
+}
+
+void lightrec_invalidate_all(struct lightrec_state *state)
+{
+       memset(state->code_lut, 0, sizeof(*state->code_lut) * CODE_LUT_SIZE);
+}
+
+void lightrec_set_invalidate_mode(struct lightrec_state *state, bool dma_only)
+{
+       if (state->invalidate_from_dma_only != dma_only)
+               lightrec_invalidate_all(state);
+
+       state->invalidate_from_dma_only = dma_only;
+}
+
+void lightrec_set_exit_flags(struct lightrec_state *state, u32 flags)
+{
+       if (flags != LIGHTREC_EXIT_NORMAL) {
+               state->exit_flags |= flags;
+               state->target_cycle = state->current_cycle;
+       }
+}
+
+u32 lightrec_exit_flags(struct lightrec_state *state)
+{
+       return state->exit_flags;
+}
+
+void lightrec_dump_registers(struct lightrec_state *state, u32 regs[34])
+{
+       memcpy(regs, state->native_reg_cache, sizeof(state->native_reg_cache));
+}
+
+void lightrec_restore_registers(struct lightrec_state *state, u32 regs[34])
+{
+       memcpy(state->native_reg_cache, regs, sizeof(state->native_reg_cache));
+}
+
+u32 lightrec_current_cycle_count(const struct lightrec_state *state)
+{
+       return state->current_cycle;
+}
+
+void lightrec_reset_cycle_count(struct lightrec_state *state, u32 cycles)
+{
+       state->current_cycle = cycles;
+
+       if (state->target_cycle < cycles)
+               state->target_cycle = cycles;
+}
+
+void lightrec_set_target_cycle_count(struct lightrec_state *state, u32 cycles)
+{
+       if (state->exit_flags == LIGHTREC_EXIT_NORMAL) {
+               if (cycles < state->current_cycle)
+                       cycles = state->current_cycle;
+
+               state->target_cycle = cycles;
+       }
+}
diff --git a/deps/lightrec/lightrec.h b/deps/lightrec/lightrec.h
new file mode 100644 (file)
index 0000000..d3d896c
--- /dev/null
@@ -0,0 +1,147 @@
+/*
+ * Copyright (C) 2016 Paul Cercueil <paul@crapouillou.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ */
+
+#ifndef __LIGHTREC_H__
+#define __LIGHTREC_H__
+
+#ifdef __cplusplus
+#define _Bool bool
+extern "C" {
+#endif
+
+#include <stddef.h>
+#include <stdint.h>
+
+#ifdef _WIN32
+#   ifdef lightrec_EXPORTS
+#      define __api __declspec(dllexport)
+#   elif !defined(LIGHTREC_STATIC)
+#      define __api __declspec(dllimport)
+#   else
+#      define __api
+#   endif
+#elif __GNUC__ >= 4
+#   define __api __attribute__((visibility ("default")))
+#else
+#   define __api
+#endif
+
+typedef uint64_t u64;
+typedef uint32_t u32;
+typedef uint16_t u16;
+typedef uint8_t  u8;
+
+typedef int64_t s64;
+typedef int32_t s32;
+typedef int16_t s16;
+typedef int8_t  s8;
+
+struct lightrec_state;
+struct lightrec_mem_map;
+
+/* Exit flags */
+#define LIGHTREC_EXIT_NORMAL   (0)
+#define LIGHTREC_EXIT_SYSCALL  (1 << 0)
+#define LIGHTREC_EXIT_BREAK    (1 << 1)
+#define LIGHTREC_EXIT_CHECK_INTERRUPT  (1 << 2)
+#define LIGHTREC_EXIT_SEGFAULT (1 << 3)
+
+enum psx_map {
+       PSX_MAP_KERNEL_USER_RAM,
+       PSX_MAP_BIOS,
+       PSX_MAP_SCRATCH_PAD,
+       PSX_MAP_PARALLEL_PORT,
+       PSX_MAP_HW_REGISTERS,
+       PSX_MAP_CACHE_CONTROL,
+       PSX_MAP_MIRROR1,
+       PSX_MAP_MIRROR2,
+       PSX_MAP_MIRROR3,
+};
+
+enum mem_type {
+       MEM_FOR_CODE,
+       MEM_FOR_MIPS_CODE,
+       MEM_FOR_IR,
+       MEM_FOR_LIGHTREC,
+       MEM_TYPE_END,
+};
+
+struct lightrec_mem_map_ops {
+       void (*sb)(struct lightrec_state *, u32 addr, u8 data);
+       void (*sh)(struct lightrec_state *, u32 addr, u16 data);
+       void (*sw)(struct lightrec_state *, u32 addr, u32 data);
+       u8 (*lb)(struct lightrec_state *, u32 addr);
+       u16 (*lh)(struct lightrec_state *, u32 addr);
+       u32 (*lw)(struct lightrec_state *, u32 addr);
+};
+
+struct lightrec_mem_map {
+       u32 pc;
+       u32 length;
+       void *address;
+       const struct lightrec_mem_map_ops *ops;
+       const struct lightrec_mem_map *mirror_of;
+};
+
+struct lightrec_cop_ops {
+       u32 (*mfc)(struct lightrec_state *state, u8 reg);
+       u32 (*cfc)(struct lightrec_state *state, u8 reg);
+       void (*mtc)(struct lightrec_state *state, u8 reg, u32 value);
+       void (*ctc)(struct lightrec_state *state, u8 reg, u32 value);
+       void (*op)(struct lightrec_state *state, u32 opcode);
+};
+
+struct lightrec_ops {
+       struct lightrec_cop_ops cop0_ops;
+       struct lightrec_cop_ops cop2_ops;
+};
+
+__api struct lightrec_state *lightrec_init(char *argv0,
+                                          const struct lightrec_mem_map *map,
+                                          size_t nb,
+                                          const struct lightrec_ops *ops);
+
+__api void lightrec_destroy(struct lightrec_state *state);
+
+__api u32 lightrec_execute(struct lightrec_state *state,
+                          u32 pc, u32 target_cycle);
+__api u32 lightrec_execute_one(struct lightrec_state *state, u32 pc);
+__api u32 lightrec_run_interpreter(struct lightrec_state *state, u32 pc);
+
+__api void lightrec_invalidate(struct lightrec_state *state, u32 addr, u32 len);
+__api void lightrec_invalidate_all(struct lightrec_state *state);
+__api void lightrec_set_invalidate_mode(struct lightrec_state *state,
+                                       _Bool dma_only);
+
+__api void lightrec_set_exit_flags(struct lightrec_state *state, u32 flags);
+__api u32 lightrec_exit_flags(struct lightrec_state *state);
+
+__api void lightrec_dump_registers(struct lightrec_state *state, u32 regs[34]);
+__api void lightrec_restore_registers(struct lightrec_state *state,
+                                     u32 regs[34]);
+
+__api u32 lightrec_current_cycle_count(const struct lightrec_state *state);
+__api void lightrec_reset_cycle_count(struct lightrec_state *state, u32 cycles);
+__api void lightrec_set_target_cycle_count(struct lightrec_state *state,
+                                          u32 cycles);
+
+__api unsigned int lightrec_get_mem_usage(enum mem_type type);
+__api unsigned int lightrec_get_total_mem_usage(void);
+__api float lightrec_get_average_ipi(void);
+
+#ifdef __cplusplus
+};
+#endif
+
+#endif /* __LIGHTREC_H__ */
diff --git a/deps/lightrec/memmanager.c b/deps/lightrec/memmanager.c
new file mode 100644 (file)
index 0000000..2e6b99b
--- /dev/null
@@ -0,0 +1,130 @@
+/*
+ * Copyright (C) 2019-2020 Paul Cercueil <paul@crapouillou.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ */
+
+#include "config.h"
+#include "lightrec-private.h"
+#include "memmanager.h"
+
+#include <stdlib.h>
+#if ENABLE_TINYMM
+#include <tinymm.h>
+#endif
+
+#ifdef ENABLE_THREADED_COMPILER
+#include <stdatomic.h>
+
+static atomic_uint lightrec_bytes[MEM_TYPE_END];
+
+void lightrec_register(enum mem_type type, unsigned int len)
+{
+       atomic_fetch_add(&lightrec_bytes[type], len);
+}
+
+void lightrec_unregister(enum mem_type type, unsigned int len)
+{
+       atomic_fetch_sub(&lightrec_bytes[type], len);
+}
+
+unsigned int lightrec_get_mem_usage(enum mem_type type)
+{
+       return atomic_load(&lightrec_bytes[type]);
+}
+
+#else /* ENABLE_THREADED_COMPILER */
+
+static unsigned int lightrec_bytes[MEM_TYPE_END];
+
+void lightrec_register(enum mem_type type, unsigned int len)
+{
+       lightrec_bytes[type] += len;
+}
+
+void lightrec_unregister(enum mem_type type, unsigned int len)
+{
+       lightrec_bytes[type] -= len;
+}
+
+unsigned int lightrec_get_mem_usage(enum mem_type type)
+{
+       return lightrec_bytes[type];
+}
+#endif /* ENABLE_THREADED_COMPILER */
+
+unsigned int lightrec_get_total_mem_usage(void)
+{
+       unsigned int i, count;
+
+       for (i = 0, count = 0; i < MEM_TYPE_END; i++)
+               count += lightrec_get_mem_usage((enum mem_type)i);
+
+       return count;
+}
+
+void * lightrec_malloc(struct lightrec_state *state,
+                      enum mem_type type, unsigned int len)
+{
+       void *ptr;
+
+#if ENABLE_TINYMM
+       if (type == MEM_FOR_IR)
+               ptr = tinymm_malloc(state->tinymm, len);
+       else
+#endif
+               ptr = malloc(len);
+       if (!ptr)
+               return NULL;
+
+       lightrec_register(type, len);
+
+       return ptr;
+}
+
+void * lightrec_calloc(struct lightrec_state *state,
+                      enum mem_type type, unsigned int len)
+{
+       void *ptr;
+
+#if ENABLE_TINYMM
+       if (type == MEM_FOR_IR)
+               ptr = tinymm_zalloc(state->tinymm, len);
+       else
+#endif
+               ptr = calloc(1, len);
+       if (!ptr)
+               return NULL;
+
+       lightrec_register(type, len);
+
+       return ptr;
+}
+
+void lightrec_free(struct lightrec_state *state,
+                  enum mem_type type, unsigned int len, void *ptr)
+{
+       lightrec_unregister(type, len);
+#if ENABLE_TINYMM
+       if (type == MEM_FOR_IR)
+               tinymm_free(state->tinymm, ptr);
+       else
+#endif
+               free(ptr);
+}
+
+float lightrec_get_average_ipi(void)
+{
+       unsigned int code_mem = lightrec_get_mem_usage(MEM_FOR_CODE);
+       unsigned int native_mem = lightrec_get_mem_usage(MEM_FOR_MIPS_CODE);
+
+       return native_mem ? (float)code_mem / (float)native_mem : 0.0f;
+}
diff --git a/deps/lightrec/memmanager.h b/deps/lightrec/memmanager.h
new file mode 100644 (file)
index 0000000..956e7c7
--- /dev/null
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2019 Paul Cercueil <paul@crapouillou.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ */
+
+#ifndef __MEMMANAGER_H__
+#define __MEMMANAGER_H__
+
+#include "lightrec.h"
+
+void * lightrec_malloc(struct lightrec_state *state,
+                      enum mem_type type, unsigned int len);
+void * lightrec_calloc(struct lightrec_state *state,
+                      enum mem_type type, unsigned int len);
+void lightrec_free(struct lightrec_state *state,
+                  enum mem_type type, unsigned int len, void *ptr);
+
+void lightrec_register(enum mem_type type, unsigned int len);
+void lightrec_unregister(enum mem_type type, unsigned int len);
+
+#endif /* __MEMMANAGER_H__ */
diff --git a/deps/lightrec/optimizer.c b/deps/lightrec/optimizer.c
new file mode 100644 (file)
index 0000000..92b4daa
--- /dev/null
@@ -0,0 +1,1021 @@
+/*
+ * Copyright (C) 2014-2020 Paul Cercueil <paul@crapouillou.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ */
+
+#include "disassembler.h"
+#include "lightrec.h"
+#include "memmanager.h"
+#include "optimizer.h"
+#include "regcache.h"
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stdlib.h>
+
+struct optimizer_list {
+       void (**optimizers)(struct opcode *);
+       unsigned int nb_optimizers;
+};
+
+bool opcode_reads_register(union code op, u8 reg)
+{
+       switch (op.i.op) {
+       case OP_SPECIAL:
+               switch (op.r.op) {
+               case OP_SPECIAL_SYSCALL:
+               case OP_SPECIAL_BREAK:
+                       return false;
+               case OP_SPECIAL_JR:
+               case OP_SPECIAL_JALR:
+               case OP_SPECIAL_MTHI:
+               case OP_SPECIAL_MTLO:
+                       return op.r.rs == reg;
+               case OP_SPECIAL_MFHI:
+                       return reg == REG_HI;
+               case OP_SPECIAL_MFLO:
+                       return reg == REG_LO;
+               case OP_SPECIAL_SLL:
+               case OP_SPECIAL_SRL:
+               case OP_SPECIAL_SRA:
+                       return op.r.rt == reg;
+               default:
+                       return op.r.rs == reg || op.r.rt == reg;
+               }
+       case OP_CP0:
+               switch (op.r.rs) {
+               case OP_CP0_MTC0:
+               case OP_CP0_CTC0:
+                       return op.r.rt == reg;
+               default:
+                       return false;
+               }
+       case OP_CP2:
+               if (op.r.op == OP_CP2_BASIC) {
+                       switch (op.r.rs) {
+                       case OP_CP2_BASIC_MTC2:
+                       case OP_CP2_BASIC_CTC2:
+                               return op.r.rt == reg;
+                       default:
+                               return false;
+                       }
+               } else {
+                       return false;
+               }
+       case OP_J:
+       case OP_JAL:
+       case OP_LUI:
+               return false;
+       case OP_BEQ:
+       case OP_BNE:
+       case OP_LWL:
+       case OP_LWR:
+       case OP_SB:
+       case OP_SH:
+       case OP_SWL:
+       case OP_SW:
+       case OP_SWR:
+               return op.i.rs == reg || op.i.rt == reg;
+       default:
+               return op.i.rs == reg;
+       }
+}
+
+bool opcode_writes_register(union code op, u8 reg)
+{
+       switch (op.i.op) {
+       case OP_SPECIAL:
+               switch (op.r.op) {
+               case OP_SPECIAL_JR:
+               case OP_SPECIAL_JALR:
+               case OP_SPECIAL_SYSCALL:
+               case OP_SPECIAL_BREAK:
+                       return false;
+               case OP_SPECIAL_MULT:
+               case OP_SPECIAL_MULTU:
+               case OP_SPECIAL_DIV:
+               case OP_SPECIAL_DIVU:
+                       return reg == REG_LO || reg == REG_HI;
+               case OP_SPECIAL_MTHI:
+                       return reg == REG_HI;
+               case OP_SPECIAL_MTLO:
+                       return reg == REG_LO;
+               default:
+                       return op.r.rd == reg;
+               }
+       case OP_ADDI:
+       case OP_ADDIU:
+       case OP_SLTI:
+       case OP_SLTIU:
+       case OP_ANDI:
+       case OP_ORI:
+       case OP_XORI:
+       case OP_LUI:
+       case OP_LB:
+       case OP_LH:
+       case OP_LWL:
+       case OP_LW:
+       case OP_LBU:
+       case OP_LHU:
+       case OP_LWR:
+               return op.i.rt == reg;
+       case OP_CP0:
+               switch (op.r.rs) {
+               case OP_CP0_MFC0:
+               case OP_CP0_CFC0:
+                       return op.i.rt == reg;
+               default:
+                       return false;
+               }
+       case OP_CP2:
+               if (op.r.op == OP_CP2_BASIC) {
+                       switch (op.r.rs) {
+                       case OP_CP2_BASIC_MFC2:
+                       case OP_CP2_BASIC_CFC2:
+                               return op.i.rt == reg;
+                       default:
+                               return false;
+                       }
+               } else {
+                       return false;
+               }
+       case OP_META_MOV:
+               return op.r.rd == reg;
+       default:
+               return false;
+       }
+}
+
+/* TODO: Complete */
+static bool is_nop(union code op)
+{
+       if (opcode_writes_register(op, 0)) {
+               switch (op.i.op) {
+               case OP_CP0:
+                       return op.r.rs != OP_CP0_MFC0;
+               case OP_LB:
+               case OP_LH:
+               case OP_LWL:
+               case OP_LW:
+               case OP_LBU:
+               case OP_LHU:
+               case OP_LWR:
+                       return false;
+               default:
+                       return true;
+               }
+       }
+
+       switch (op.i.op) {
+       case OP_SPECIAL:
+               switch (op.r.op) {
+               case OP_SPECIAL_AND:
+                       return op.r.rd == op.r.rt && op.r.rd == op.r.rs;
+               case OP_SPECIAL_ADD:
+               case OP_SPECIAL_ADDU:
+                       return (op.r.rd == op.r.rt && op.r.rs == 0) ||
+                               (op.r.rd == op.r.rs && op.r.rt == 0);
+               case OP_SPECIAL_SUB:
+               case OP_SPECIAL_SUBU:
+                       return op.r.rd == op.r.rs && op.r.rt == 0;
+               case OP_SPECIAL_OR:
+                       if (op.r.rd == op.r.rt)
+                               return op.r.rd == op.r.rs || op.r.rs == 0;
+                       else
+                               return (op.r.rd == op.r.rs) && op.r.rt == 0;
+               case OP_SPECIAL_SLL:
+               case OP_SPECIAL_SRA:
+               case OP_SPECIAL_SRL:
+                       return op.r.rd == op.r.rt && op.r.imm == 0;
+               default:
+                       return false;
+               }
+       case OP_ORI:
+       case OP_ADDI:
+       case OP_ADDIU:
+               return op.i.rt == op.i.rs && op.i.imm == 0;
+       case OP_BGTZ:
+               return (op.i.rs == 0 || op.i.imm == 1);
+       case OP_REGIMM:
+               return (op.i.op == OP_REGIMM_BLTZ ||
+                               op.i.op == OP_REGIMM_BLTZAL) &&
+                       (op.i.rs == 0 || op.i.imm == 1);
+       case OP_BNE:
+               return (op.i.rs == op.i.rt || op.i.imm == 1);
+       default:
+               return false;
+       }
+}
+
+bool load_in_delay_slot(union code op)
+{
+       switch (op.i.op) {
+       case OP_CP0:
+               switch (op.r.rs) {
+               case OP_CP0_MFC0:
+               case OP_CP0_CFC0:
+                       return true;
+               default:
+                       break;
+               }
+
+               break;
+       case OP_CP2:
+               if (op.r.op == OP_CP2_BASIC) {
+                       switch (op.r.rs) {
+                       case OP_CP2_BASIC_MFC2:
+                       case OP_CP2_BASIC_CFC2:
+                               return true;
+                       default:
+                               break;
+                       }
+               }
+
+               break;
+       case OP_LB:
+       case OP_LH:
+       case OP_LW:
+       case OP_LWL:
+       case OP_LWR:
+       case OP_LBU:
+       case OP_LHU:
+               return true;
+       default:
+               break;
+       }
+
+       return false;
+}
+
+static u32 lightrec_propagate_consts(union code c, u32 known, u32 *v)
+{
+       switch (c.i.op) {
+       case OP_SPECIAL:
+               switch (c.r.op) {
+               case OP_SPECIAL_SLL:
+                       if (known & BIT(c.r.rt)) {
+                               known |= BIT(c.r.rd);
+                               v[c.r.rd] = v[c.r.rt] << c.r.imm;
+                       } else {
+                               known &= ~BIT(c.r.rd);
+                       }
+                       break;
+               case OP_SPECIAL_SRL:
+                       if (known & BIT(c.r.rt)) {
+                               known |= BIT(c.r.rd);
+                               v[c.r.rd] = v[c.r.rt] >> c.r.imm;
+                       } else {
+                               known &= ~BIT(c.r.rd);
+                       }
+                       break;
+               case OP_SPECIAL_SRA:
+                       if (known & BIT(c.r.rt)) {
+                               known |= BIT(c.r.rd);
+                               v[c.r.rd] = (s32)v[c.r.rt] >> c.r.imm;
+                       } else {
+                               known &= ~BIT(c.r.rd);
+                       }
+                       break;
+               case OP_SPECIAL_SLLV:
+                       if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
+                               known |= BIT(c.r.rd);
+                               v[c.r.rd] = v[c.r.rt] << (v[c.r.rs] & 0x1f);
+                       } else {
+                               known &= ~BIT(c.r.rd);
+                       }
+                       break;
+               case OP_SPECIAL_SRLV:
+                       if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
+                               known |= BIT(c.r.rd);
+                               v[c.r.rd] = v[c.r.rt] >> (v[c.r.rs] & 0x1f);
+                       } else {
+                               known &= ~BIT(c.r.rd);
+                       }
+                       break;
+               case OP_SPECIAL_SRAV:
+                       if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
+                               known |= BIT(c.r.rd);
+                               v[c.r.rd] = (s32)v[c.r.rt]
+                                         >> (v[c.r.rs] & 0x1f);
+                       } else {
+                               known &= ~BIT(c.r.rd);
+                       }
+                       break;
+               case OP_SPECIAL_ADD:
+               case OP_SPECIAL_ADDU:
+                       if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
+                               known |= BIT(c.r.rd);
+                               v[c.r.rd] = (s32)v[c.r.rt] + (s32)v[c.r.rs];
+                       } else {
+                               known &= ~BIT(c.r.rd);
+                       }
+                       break;
+               case OP_SPECIAL_SUB:
+               case OP_SPECIAL_SUBU:
+                       if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
+                               known |= BIT(c.r.rd);
+                               v[c.r.rd] = v[c.r.rt] - v[c.r.rs];
+                       } else {
+                               known &= ~BIT(c.r.rd);
+                       }
+                       break;
+               case OP_SPECIAL_AND:
+                       if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
+                               known |= BIT(c.r.rd);
+                               v[c.r.rd] = v[c.r.rt] & v[c.r.rs];
+                       } else {
+                               known &= ~BIT(c.r.rd);
+                       }
+                       break;
+               case OP_SPECIAL_OR:
+                       if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
+                               known |= BIT(c.r.rd);
+                               v[c.r.rd] = v[c.r.rt] | v[c.r.rs];
+                       } else {
+                               known &= ~BIT(c.r.rd);
+                       }
+                       break;
+               case OP_SPECIAL_XOR:
+                       if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
+                               known |= BIT(c.r.rd);
+                               v[c.r.rd] = v[c.r.rt] ^ v[c.r.rs];
+                       } else {
+                               known &= ~BIT(c.r.rd);
+                       }
+                       break;
+               case OP_SPECIAL_NOR:
+                       if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
+                               known |= BIT(c.r.rd);
+                               v[c.r.rd] = ~(v[c.r.rt] | v[c.r.rs]);
+                       } else {
+                               known &= ~BIT(c.r.rd);
+                       }
+                       break;
+               case OP_SPECIAL_SLT:
+                       if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
+                               known |= BIT(c.r.rd);
+                               v[c.r.rd] = (s32)v[c.r.rs] < (s32)v[c.r.rt];
+                       } else {
+                               known &= ~BIT(c.r.rd);
+                       }
+                       break;
+               case OP_SPECIAL_SLTU:
+                       if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
+                               known |= BIT(c.r.rd);
+                               v[c.r.rd] = v[c.r.rs] < v[c.r.rt];
+                       } else {
+                               known &= ~BIT(c.r.rd);
+                       }
+                       break;
+               default:
+                       break;
+               }
+               break;
+       case OP_REGIMM:
+               break;
+       case OP_ADDI:
+       case OP_ADDIU:
+               if (known & BIT(c.i.rs)) {
+                       known |= BIT(c.i.rt);
+                       v[c.i.rt] = v[c.i.rs] + (s32)(s16)c.i.imm;
+               } else {
+                       known &= ~BIT(c.i.rt);
+               }
+               break;
+       case OP_SLTI:
+               if (known & BIT(c.i.rs)) {
+                       known |= BIT(c.i.rt);
+                       v[c.i.rt] = (s32)v[c.i.rs] < (s32)(s16)c.i.imm;
+               } else {
+                       known &= ~BIT(c.i.rt);
+               }
+               break;
+       case OP_SLTIU:
+               if (known & BIT(c.i.rs)) {
+                       known |= BIT(c.i.rt);
+                       v[c.i.rt] = v[c.i.rs] < (u32)(s32)(s16)c.i.imm;
+               } else {
+                       known &= ~BIT(c.i.rt);
+               }
+               break;
+       case OP_ANDI:
+               if (known & BIT(c.i.rs)) {
+                       known |= BIT(c.i.rt);
+                       v[c.i.rt] = v[c.i.rs] & c.i.imm;
+               } else {
+                       known &= ~BIT(c.i.rt);
+               }
+               break;
+       case OP_ORI:
+               if (known & BIT(c.i.rs)) {
+                       known |= BIT(c.i.rt);
+                       v[c.i.rt] = v[c.i.rs] | c.i.imm;
+               } else {
+                       known &= ~BIT(c.i.rt);
+               }
+               break;
+       case OP_XORI:
+               if (known & BIT(c.i.rs)) {
+                       known |= BIT(c.i.rt);
+                       v[c.i.rt] = v[c.i.rs] ^ c.i.imm;
+               } else {
+                       known &= ~BIT(c.i.rt);
+               }
+               break;
+       case OP_LUI:
+               known |= BIT(c.i.rt);
+               v[c.i.rt] = c.i.imm << 16;
+               break;
+       case OP_CP0:
+               switch (c.r.rs) {
+               case OP_CP0_MFC0:
+               case OP_CP0_CFC0:
+                       known &= ~BIT(c.r.rt);
+                       break;
+               }
+               break;
+       case OP_CP2:
+               if (c.r.op == OP_CP2_BASIC) {
+                       switch (c.r.rs) {
+                       case OP_CP2_BASIC_MFC2:
+                       case OP_CP2_BASIC_CFC2:
+                               known &= ~BIT(c.r.rt);
+                               break;
+                       }
+               }
+               break;
+       case OP_LB:
+       case OP_LH:
+       case OP_LWL:
+       case OP_LW:
+       case OP_LBU:
+       case OP_LHU:
+       case OP_LWR:
+       case OP_LWC2:
+               known &= ~BIT(c.i.rt);
+               break;
+       case OP_META_MOV:
+               if (known & BIT(c.r.rs)) {
+                       known |= BIT(c.r.rd);
+                       v[c.r.rd] = v[c.r.rs];
+               } else {
+                       known &= ~BIT(c.r.rd);
+               }
+               break;
+       default:
+               break;
+       }
+
+       return known;
+}
+
+static int lightrec_add_meta(struct block *block,
+                            struct opcode *op, union code code)
+{
+       struct opcode *meta;
+
+       meta = lightrec_malloc(block->state, MEM_FOR_IR, sizeof(*meta));
+       if (!meta)
+               return -ENOMEM;
+
+       meta->c = code;
+       meta->flags = 0;
+
+       if (op) {
+               meta->offset = op->offset;
+               meta->next = op->next;
+               op->next = meta;
+       } else {
+               meta->offset = 0;
+               meta->next = block->opcode_list;
+               block->opcode_list = meta;
+       }
+
+       return 0;
+}
+
+static int lightrec_add_sync(struct block *block, struct opcode *prev)
+{
+       return lightrec_add_meta(block, prev, (union code){
+                                .j.op = OP_META_SYNC,
+                                });
+}
+
+static int lightrec_transform_ops(struct block *block)
+{
+       struct opcode *list = block->opcode_list;
+
+       for (; list; list = list->next) {
+
+               /* Transform all opcodes detected as useless to real NOPs
+                * (0x0: SLL r0, r0, #0) */
+               if (list->opcode != 0 && is_nop(list->c)) {
+                       pr_debug("Converting useless opcode 0x%08x to NOP\n",
+                                       list->opcode);
+                       list->opcode = 0x0;
+               }
+
+               if (!list->opcode)
+                       continue;
+
+               switch (list->i.op) {
+               /* Transform BEQ / BNE to BEQZ / BNEZ meta-opcodes if one of the
+                * two registers is zero. */
+               case OP_BEQ:
+                       if ((list->i.rs == 0) ^ (list->i.rt == 0)) {
+                               list->i.op = OP_META_BEQZ;
+                               if (list->i.rs == 0) {
+                                       list->i.rs = list->i.rt;
+                                       list->i.rt = 0;
+                               }
+                       } else if (list->i.rs == list->i.rt) {
+                               list->i.rs = 0;
+                               list->i.rt = 0;
+                       }
+                       break;
+               case OP_BNE:
+                       if (list->i.rs == 0) {
+                               list->i.op = OP_META_BNEZ;
+                               list->i.rs = list->i.rt;
+                               list->i.rt = 0;
+                       } else if (list->i.rt == 0) {
+                               list->i.op = OP_META_BNEZ;
+                       }
+                       break;
+
+               /* Transform ORI/ADDI/ADDIU with imm #0 or ORR/ADD/ADDU/SUB/SUBU
+                * with register $zero to the MOV meta-opcode */
+               case OP_ORI:
+               case OP_ADDI:
+               case OP_ADDIU:
+                       if (list->i.imm == 0) {
+                               pr_debug("Convert ORI/ADDI/ADDIU #0 to MOV\n");
+                               list->i.op = OP_META_MOV;
+                               list->r.rd = list->i.rt;
+                       }
+                       break;
+               case OP_SPECIAL:
+                       switch (list->r.op) {
+                       case OP_SPECIAL_SLL:
+                       case OP_SPECIAL_SRA:
+                       case OP_SPECIAL_SRL:
+                               if (list->r.imm == 0) {
+                                       pr_debug("Convert SLL/SRL/SRA #0 to MOV\n");
+                                       list->i.op = OP_META_MOV;
+                                       list->r.rs = list->r.rt;
+                               }
+                               break;
+                       case OP_SPECIAL_OR:
+                       case OP_SPECIAL_ADD:
+                       case OP_SPECIAL_ADDU:
+                               if (list->r.rs == 0) {
+                                       pr_debug("Convert OR/ADD $zero to MOV\n");
+                                       list->i.op = OP_META_MOV;
+                                       list->r.rs = list->r.rt;
+                               }
+                       case OP_SPECIAL_SUB: /* fall-through */
+                       case OP_SPECIAL_SUBU:
+                               if (list->r.rt == 0) {
+                                       pr_debug("Convert OR/ADD/SUB $zero to MOV\n");
+                                       list->i.op = OP_META_MOV;
+                               }
+                       default: /* fall-through */
+                               break;
+                       }
+               default: /* fall-through */
+                       break;
+               }
+       }
+
+       return 0;
+}
+
+static int lightrec_switch_delay_slots(struct block *block)
+{
+       struct opcode *list, *prev;
+       u8 flags;
+
+       for (list = block->opcode_list, prev = NULL; list->next;
+            prev = list, list = list->next) {
+               union code op = list->c;
+               union code next_op = list->next->c;
+
+               if (!has_delay_slot(op) ||
+                   list->flags & (LIGHTREC_NO_DS | LIGHTREC_EMULATE_BRANCH) ||
+                   op.opcode == 0)
+                       continue;
+
+               if (prev && has_delay_slot(prev->c))
+                       continue;
+
+               switch (list->i.op) {
+               case OP_SPECIAL:
+                       switch (op.r.op) {
+                       case OP_SPECIAL_JALR:
+                               if (opcode_reads_register(next_op, op.r.rd) ||
+                                   opcode_writes_register(next_op, op.r.rd))
+                                       continue;
+                       case OP_SPECIAL_JR: /* fall-through */
+                               if (opcode_writes_register(next_op, op.r.rs))
+                                       continue;
+                       default: /* fall-through */
+                               break;
+                       }
+               case OP_J: /* fall-through */
+                       break;
+               case OP_JAL:
+                       if (opcode_reads_register(next_op, 31) ||
+                           opcode_writes_register(next_op, 31))
+                               continue;
+                       else
+                               break;
+               case OP_BEQ:
+               case OP_BNE:
+                       if (op.i.rt && opcode_writes_register(next_op, op.i.rt))
+                               continue;
+               case OP_BLEZ: /* fall-through */
+               case OP_BGTZ:
+               case OP_META_BEQZ:
+               case OP_META_BNEZ:
+                       if (op.i.rs && opcode_writes_register(next_op, op.i.rs))
+                               continue;
+                       break;
+               case OP_REGIMM:
+                       switch (op.r.rt) {
+                       case OP_REGIMM_BLTZAL:
+                       case OP_REGIMM_BGEZAL:
+                               if (opcode_reads_register(next_op, 31) ||
+                                   opcode_writes_register(next_op, 31))
+                                       continue;
+                       case OP_REGIMM_BLTZ: /* fall-through */
+                       case OP_REGIMM_BGEZ:
+                               if (op.i.rs &&
+                                   opcode_writes_register(next_op, op.i.rs))
+                                       continue;
+                               break;
+                       }
+               default: /* fall-through */
+                       break;
+               }
+
+               pr_debug("Swap branch and delay slot opcodes "
+                        "at offsets 0x%x / 0x%x\n", list->offset << 2,
+                        list->next->offset << 2);
+
+               flags = list->next->flags;
+               list->c = next_op;
+               list->next->c = op;
+               list->next->flags = list->flags | LIGHTREC_NO_DS;
+               list->flags = flags;
+               list->offset++;
+               list->next->offset--;
+       }
+
+       return 0;
+}
+
+static int lightrec_detect_impossible_branches(struct block *block)
+{
+       struct opcode *op, *next;
+
+       for (op = block->opcode_list, next = op->next; next;
+            op = next, next = op->next) {
+               if (!has_delay_slot(op->c) ||
+                   (!load_in_delay_slot(next->c) &&
+                    !has_delay_slot(next->c) &&
+                    !(next->i.op == OP_CP0 && next->r.rs == OP_CP0_RFE)))
+                       continue;
+
+               if (op->c.opcode == next->c.opcode) {
+                       /* The delay slot is the exact same opcode as the branch
+                        * opcode: this is effectively a NOP */
+                       next->c.opcode = 0;
+                       continue;
+               }
+
+               if (op == block->opcode_list) {
+                       /* If the first opcode is an 'impossible' branch, we
+                        * only keep the first two opcodes of the block (the
+                        * branch itself + its delay slot) */
+                       lightrec_free_opcode_list(block->state, next->next);
+                       next->next = NULL;
+                       block->nb_ops = 2;
+               }
+
+               op->flags |= LIGHTREC_EMULATE_BRANCH;
+       }
+
+       return 0;
+}
+
+static int lightrec_local_branches(struct block *block)
+{
+       struct opcode *list, *target, *prev;
+       s32 offset;
+       int ret;
+
+       for (list = block->opcode_list; list; list = list->next) {
+               if (list->flags & LIGHTREC_EMULATE_BRANCH)
+                       continue;
+
+               switch (list->i.op) {
+               case OP_BEQ:
+               case OP_BNE:
+               case OP_BLEZ:
+               case OP_BGTZ:
+               case OP_REGIMM:
+               case OP_META_BEQZ:
+               case OP_META_BNEZ:
+                       offset = list->offset + 1 + (s16)list->i.imm;
+                       if (offset >= 0 && offset < block->nb_ops)
+                               break;
+               default: /* fall-through */
+                       continue;
+               }
+
+               pr_debug("Found local branch to offset 0x%x\n", offset << 2);
+
+               for (target = block->opcode_list, prev = NULL;
+                    target; prev = target, target = target->next) {
+                       if (target->offset != offset ||
+                           target->j.op == OP_META_SYNC)
+                               continue;
+
+                       if (target->flags & LIGHTREC_EMULATE_BRANCH) {
+                               pr_debug("Branch target must be emulated"
+                                        " - skip\n");
+                               break;
+                       }
+
+                       if (prev && has_delay_slot(prev->c)) {
+                               pr_debug("Branch target is a delay slot"
+                                        " - skip\n");
+                               break;
+                       }
+
+                       if (prev && prev->j.op != OP_META_SYNC) {
+                               pr_debug("Adding sync before offset "
+                                        "0x%x\n", offset << 2);
+                               ret = lightrec_add_sync(block, prev);
+                               if (ret)
+                                       return ret;
+
+                               prev->next->offset = target->offset;
+                       }
+
+                       list->flags |= LIGHTREC_LOCAL_BRANCH;
+                       break;
+               }
+       }
+
+       return 0;
+}
+
+bool has_delay_slot(union code op)
+{
+       switch (op.i.op) {
+       case OP_SPECIAL:
+               switch (op.r.op) {
+               case OP_SPECIAL_JR:
+               case OP_SPECIAL_JALR:
+                       return true;
+               default:
+                       return false;
+               }
+       case OP_J:
+       case OP_JAL:
+       case OP_BEQ:
+       case OP_BNE:
+       case OP_BLEZ:
+       case OP_BGTZ:
+       case OP_REGIMM:
+       case OP_META_BEQZ:
+       case OP_META_BNEZ:
+               return true;
+       default:
+               return false;
+       }
+}
+
+static int lightrec_add_unload(struct block *block, struct opcode *op, u8 reg)
+{
+       return lightrec_add_meta(block, op, (union code){
+                                .i.op = OP_META_REG_UNLOAD,
+                                .i.rs = reg,
+                                });
+}
+
+static int lightrec_early_unload(struct block *block)
+{
+       struct opcode *list = block->opcode_list;
+       u8 i;
+
+       for (i = 1; i < 34; i++) {
+               struct opcode *op, *last_r = NULL, *last_w = NULL;
+               unsigned int last_r_id = 0, last_w_id = 0, id = 0;
+               int ret;
+
+               for (op = list; op->next; op = op->next, id++) {
+                       if (opcode_reads_register(op->c, i)) {
+                               last_r = op;
+                               last_r_id = id;
+                       }
+
+                       if (opcode_writes_register(op->c, i)) {
+                               last_w = op;
+                               last_w_id = id;
+                       }
+               }
+
+               if (last_w_id > last_r_id) {
+                       if (has_delay_slot(last_w->c) &&
+                           !(last_w->flags & LIGHTREC_NO_DS))
+                               last_w = last_w->next;
+
+                       if (last_w->next) {
+                               ret = lightrec_add_unload(block, last_w, i);
+                               if (ret)
+                                       return ret;
+                       }
+               } else if (last_r) {
+                       if (has_delay_slot(last_r->c) &&
+                           !(last_r->flags & LIGHTREC_NO_DS))
+                               last_r = last_r->next;
+
+                       if (last_r->next) {
+                               ret = lightrec_add_unload(block, last_r, i);
+                               if (ret)
+                                       return ret;
+                       }
+               }
+       }
+
+       return 0;
+}
+
+static int lightrec_flag_stores(struct block *block)
+{
+       struct opcode *list;
+       u32 known = BIT(0);
+       u32 values[32] = { 0 };
+
+       for (list = block->opcode_list; list; list = list->next) {
+               /* Register $zero is always, well, zero */
+               known |= BIT(0);
+               values[0] = 0;
+
+               switch (list->i.op) {
+               case OP_SB:
+               case OP_SH:
+               case OP_SW:
+                       /* Mark all store operations that target $sp, $gp, $k0
+                        * or $k1 as not requiring code invalidation. This is
+                        * based on the heuristic that stores using one of these
+                        * registers as address will never hit a code page. */
+                       if (list->i.rs >= 26 && list->i.rs <= 29) {
+                               pr_debug("Flaging opcode 0x%08x as not requiring invalidation\n",
+                                        list->opcode);
+                               list->flags |= LIGHTREC_NO_INVALIDATE;
+                       }
+
+                       /* Detect writes whose destination address is inside the
+                        * current block, using constant propagation. When these
+                        * occur, we mark the blocks as not compilable. */
+                       if ((known & BIT(list->i.rs)) &&
+                           kunseg(values[list->i.rs]) >= kunseg(block->pc) &&
+                           kunseg(values[list->i.rs]) < (kunseg(block->pc) +
+                                                         block->nb_ops * 4)) {
+                               pr_debug("Self-modifying block detected\n");
+                               block->flags |= BLOCK_NEVER_COMPILE;
+                               list->flags |= LIGHTREC_SMC;
+                       }
+               default: /* fall-through */
+                       break;
+               }
+
+               known = lightrec_propagate_consts(list->c, known, values);
+       }
+
+       return 0;
+}
+
+static bool is_mult32(const struct block *block, const struct opcode *op)
+{
+       const struct opcode *next, *last = NULL;
+       u32 offset;
+
+       for (op = op->next; op != last; op = op->next) {
+               switch (op->i.op) {
+               case OP_BEQ:
+               case OP_BNE:
+               case OP_BLEZ:
+               case OP_BGTZ:
+               case OP_REGIMM:
+               case OP_META_BEQZ:
+               case OP_META_BNEZ:
+                       /* TODO: handle backwards branches too */
+                       if ((op->flags & LIGHTREC_LOCAL_BRANCH) &&
+                           (s16)op->c.i.imm >= 0) {
+                               offset = op->offset + 1 + (s16)op->c.i.imm;
+
+                               for (next = op; next->offset != offset;
+                                    next = next->next);
+
+                               if (!is_mult32(block, next))
+                                       return false;
+
+                               last = next;
+                               continue;
+                       } else {
+                               return false;
+                       }
+               case OP_SPECIAL:
+                       switch (op->r.op) {
+                       case OP_SPECIAL_MULT:
+                       case OP_SPECIAL_MULTU:
+                       case OP_SPECIAL_DIV:
+                       case OP_SPECIAL_DIVU:
+                       case OP_SPECIAL_MTHI:
+                               return true;
+                       case OP_SPECIAL_JR:
+                               return op->r.rs == 31 &&
+                                       ((op->flags & LIGHTREC_NO_DS) ||
+                                        !(op->next->i.op == OP_SPECIAL &&
+                                          op->next->r.op == OP_SPECIAL_MFHI));
+                       case OP_SPECIAL_JALR:
+                       case OP_SPECIAL_MFHI:
+                               return false;
+                       default:
+                               continue;
+                       }
+               default:
+                       continue;
+               }
+       }
+
+       return last != NULL;
+}
+
+static int lightrec_flag_mults(struct block *block)
+{
+       struct opcode *list, *prev;
+
+       for (list = block->opcode_list, prev = NULL; list;
+            prev = list, list = list->next) {
+               if (list->i.op != OP_SPECIAL)
+                       continue;
+
+               switch (list->r.op) {
+               case OP_SPECIAL_MULT:
+               case OP_SPECIAL_MULTU:
+                       break;
+               default:
+                       continue;
+               }
+
+               /* Don't support MULT(U) opcodes in delay slots */
+               if (prev && has_delay_slot(prev->c))
+                       continue;
+
+               if (is_mult32(block, list)) {
+                       pr_debug("Mark MULT(U) opcode at offset 0x%x as"
+                                " 32-bit\n", list->offset << 2);
+                       list->flags |= LIGHTREC_MULT32;
+               }
+       }
+
+       return 0;
+}
+
+static int (*lightrec_optimizers[])(struct block *) = {
+       &lightrec_detect_impossible_branches,
+       &lightrec_transform_ops,
+       &lightrec_local_branches,
+       &lightrec_switch_delay_slots,
+       &lightrec_flag_stores,
+       &lightrec_flag_mults,
+       &lightrec_early_unload,
+};
+
+int lightrec_optimize(struct block *block)
+{
+       unsigned int i;
+
+       for (i = 0; i < ARRAY_SIZE(lightrec_optimizers); i++) {
+               int ret = lightrec_optimizers[i](block);
+
+               if (ret)
+                       return ret;
+       }
+
+       return 0;
+}
diff --git a/deps/lightrec/optimizer.h b/deps/lightrec/optimizer.h
new file mode 100644 (file)
index 0000000..d8def69
--- /dev/null
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2014 Paul Cercueil <paul@crapouillou.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ */
+
+#ifndef __OPTIMIZER_H__
+#define __OPTIMIZER_H__
+
+#include "disassembler.h"
+
+struct block;
+
+_Bool opcode_reads_register(union code op, u8 reg);
+_Bool opcode_writes_register(union code op, u8 reg);
+_Bool has_delay_slot(union code op);
+_Bool load_in_delay_slot(union code op);
+
+int lightrec_optimize(struct block *block);
+
+#endif /* __OPTIMIZER_H__ */
diff --git a/deps/lightrec/recompiler.c b/deps/lightrec/recompiler.c
new file mode 100644 (file)
index 0000000..379881a
--- /dev/null
@@ -0,0 +1,289 @@
+/*
+ * Copyright (C) 2019-2020 Paul Cercueil <paul@crapouillou.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ */
+
+#include "debug.h"
+#include "interpreter.h"
+#include "lightrec-private.h"
+#include "memmanager.h"
+
+#include <errno.h>
+#include <stdatomic.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <pthread.h>
+
+struct block_rec {
+       struct block *block;
+       struct block_rec *next;
+};
+
+struct recompiler {
+       struct lightrec_state *state;
+       pthread_t thd;
+       pthread_cond_t cond;
+       pthread_mutex_t mutex;
+       bool stop;
+       struct block *current_block;
+       struct block_rec *list;
+};
+
+static void slist_remove(struct recompiler *rec, struct block_rec *elm)
+{
+       struct block_rec *prev;
+
+       if (rec->list == elm) {
+               rec->list = elm->next;
+       } else {
+               for (prev = rec->list; prev && prev->next != elm; )
+                       prev = prev->next;
+               if (prev)
+                       prev->next = elm->next;
+       }
+}
+
+static void lightrec_compile_list(struct recompiler *rec)
+{
+       struct block_rec *next;
+       struct block *block;
+       int ret;
+
+       while (!!(next = rec->list)) {
+               block = next->block;
+               rec->current_block = block;
+
+               pthread_mutex_unlock(&rec->mutex);
+
+               ret = lightrec_compile_block(block);
+               if (ret) {
+                       pr_err("Unable to compile block at PC 0x%x: %d\n",
+                              block->pc, ret);
+               }
+
+               pthread_mutex_lock(&rec->mutex);
+
+               slist_remove(rec, next);
+               lightrec_free(rec->state, MEM_FOR_LIGHTREC,
+                             sizeof(*next), next);
+               pthread_cond_signal(&rec->cond);
+       }
+
+       rec->current_block = NULL;
+}
+
+static void * lightrec_recompiler_thd(void *d)
+{
+       struct recompiler *rec = d;
+
+       pthread_mutex_lock(&rec->mutex);
+
+       for (;;) {
+               do {
+                       pthread_cond_wait(&rec->cond, &rec->mutex);
+
+                       if (rec->stop) {
+                               pthread_mutex_unlock(&rec->mutex);
+                               return NULL;
+                       }
+
+               } while (!rec->list);
+
+               lightrec_compile_list(rec);
+       }
+}
+
+struct recompiler *lightrec_recompiler_init(struct lightrec_state *state)
+{
+       struct recompiler *rec;
+       int ret;
+
+       rec = lightrec_malloc(state, MEM_FOR_LIGHTREC, sizeof(*rec));
+       if (!rec) {
+               pr_err("Cannot create recompiler: Out of memory\n");
+               return NULL;
+       }
+
+       rec->state = state;
+       rec->stop = false;
+       rec->current_block = NULL;
+       rec->list = NULL;
+
+       ret = pthread_cond_init(&rec->cond, NULL);
+       if (ret) {
+               pr_err("Cannot init cond variable: %d\n", ret);
+               goto err_free_rec;
+       }
+
+       ret = pthread_mutex_init(&rec->mutex, NULL);
+       if (ret) {
+               pr_err("Cannot init mutex variable: %d\n", ret);
+               goto err_cnd_destroy;
+       }
+
+       ret = pthread_create(&rec->thd, NULL, lightrec_recompiler_thd, rec);
+       if (ret) {
+               pr_err("Cannot create recompiler thread: %d\n", ret);
+               goto err_mtx_destroy;
+       }
+
+       return rec;
+
+err_mtx_destroy:
+       pthread_mutex_destroy(&rec->mutex);
+err_cnd_destroy:
+       pthread_cond_destroy(&rec->cond);
+err_free_rec:
+       lightrec_free(state, MEM_FOR_LIGHTREC, sizeof(*rec), rec);
+       return NULL;
+}
+
+void lightrec_free_recompiler(struct recompiler *rec)
+{
+       rec->stop = true;
+
+       /* Stop the thread */
+       pthread_mutex_lock(&rec->mutex);
+       pthread_cond_signal(&rec->cond);
+       pthread_mutex_unlock(&rec->mutex);
+       pthread_join(rec->thd, NULL);
+
+       pthread_mutex_destroy(&rec->mutex);
+       pthread_cond_destroy(&rec->cond);
+       lightrec_free(rec->state, MEM_FOR_LIGHTREC, sizeof(*rec), rec);
+}
+
+int lightrec_recompiler_add(struct recompiler *rec, struct block *block)
+{
+       struct block_rec *block_rec, *prev;
+
+       pthread_mutex_lock(&rec->mutex);
+
+       for (block_rec = rec->list, prev = NULL; block_rec;
+            prev = block_rec, block_rec = block_rec->next) {
+               if (block_rec->block == block) {
+                       /* The block to compile is already in the queue - bump
+                        * it to the top of the list */
+                       if (prev) {
+                               prev->next = block_rec->next;
+                               block_rec->next = rec->list;
+                               rec->list = block_rec;
+                       }
+
+                       pthread_mutex_unlock(&rec->mutex);
+                       return 0;
+               }
+       }
+
+       /* By the time this function was called, the block has been recompiled
+        * and ins't in the wait list anymore. Just return here. */
+       if (block->function) {
+               pthread_mutex_unlock(&rec->mutex);
+               return 0;
+       }
+
+       block_rec = lightrec_malloc(rec->state, MEM_FOR_LIGHTREC,
+                                   sizeof(*block_rec));
+       if (!block_rec) {
+               pthread_mutex_unlock(&rec->mutex);
+               return -ENOMEM;
+       }
+
+       pr_debug("Adding block PC 0x%x to recompiler\n", block->pc);
+
+       block_rec->block = block;
+       block_rec->next = rec->list;
+       rec->list = block_rec;
+
+       /* Signal the thread */
+       pthread_cond_signal(&rec->cond);
+       pthread_mutex_unlock(&rec->mutex);
+
+       return 0;
+}
+
+void lightrec_recompiler_remove(struct recompiler *rec, struct block *block)
+{
+       struct block_rec *block_rec;
+
+       pthread_mutex_lock(&rec->mutex);
+
+       for (block_rec = rec->list; block_rec; block_rec = block_rec->next) {
+               if (block_rec->block == block) {
+                       if (block == rec->current_block) {
+                               /* Block is being recompiled - wait for
+                                * completion */
+                               do {
+                                       pthread_cond_wait(&rec->cond,
+                                                         &rec->mutex);
+                               } while (block == rec->current_block);
+                       } else {
+                               /* Block is not yet being processed - remove it
+                                * from the list */
+                               slist_remove(rec, block_rec);
+                               lightrec_free(rec->state, MEM_FOR_LIGHTREC,
+                                             sizeof(*block_rec), block_rec);
+                       }
+
+                       break;
+               }
+       }
+
+       pthread_mutex_unlock(&rec->mutex);
+}
+
+void * lightrec_recompiler_run_first_pass(struct block *block, u32 *pc)
+{
+       bool freed;
+
+       if (likely(block->function)) {
+               if (block->flags & BLOCK_FULLY_TAGGED) {
+                       freed = atomic_flag_test_and_set(&block->op_list_freed);
+
+                       if (!freed) {
+                               pr_debug("Block PC 0x%08x is fully tagged"
+                                        " - free opcode list\n", block->pc);
+
+                               /* The block was already compiled but the opcode list
+                                * didn't get freed yet - do it now */
+                               lightrec_free_opcode_list(block->state,
+                                                         block->opcode_list);
+                               block->opcode_list = NULL;
+                       }
+               }
+
+               return block->function;
+       }
+
+       /* Mark the opcode list as freed, so that the threaded compiler won't
+        * free it while we're using it in the interpreter. */
+       freed = atomic_flag_test_and_set(&block->op_list_freed);
+
+       /* Block wasn't compiled yet - run the interpreter */
+       *pc = lightrec_emulate_block(block, *pc);
+
+       if (!freed)
+               atomic_flag_clear(&block->op_list_freed);
+
+       /* The block got compiled while the interpreter was running.
+        * We can free the opcode list now. */
+       if (block->function && (block->flags & BLOCK_FULLY_TAGGED) &&
+           !atomic_flag_test_and_set(&block->op_list_freed)) {
+               pr_debug("Block PC 0x%08x is fully tagged"
+                        " - free opcode list\n", block->pc);
+
+               lightrec_free_opcode_list(block->state, block->opcode_list);
+               block->opcode_list = NULL;
+       }
+
+       return NULL;
+}
diff --git a/deps/lightrec/recompiler.h b/deps/lightrec/recompiler.h
new file mode 100644 (file)
index 0000000..99e82aa
--- /dev/null
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2019 Paul Cercueil <paul@crapouillou.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ */
+
+#ifndef __LIGHTREC_RECOMPILER_H__
+#define __LIGHTREC_RECOMPILER_H__
+
+struct block;
+struct lightrec_state;
+struct recompiler;
+
+struct recompiler *lightrec_recompiler_init(struct lightrec_state *state);
+void lightrec_free_recompiler(struct recompiler *rec);
+int lightrec_recompiler_add(struct recompiler *rec, struct block *block);
+void lightrec_recompiler_remove(struct recompiler *rec, struct block *block);
+
+void * lightrec_recompiler_run_first_pass(struct block *block, u32 *pc);
+
+#endif /* __LIGHTREC_RECOMPILER_H__ */
diff --git a/deps/lightrec/regcache.c b/deps/lightrec/regcache.c
new file mode 100644 (file)
index 0000000..0256015
--- /dev/null
@@ -0,0 +1,498 @@
+/*
+ * Copyright (C) 2014-2020 Paul Cercueil <paul@crapouillou.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ */
+
+#include "debug.h"
+#include "memmanager.h"
+#include "regcache.h"
+
+#include <lightning.h>
+#include <stdbool.h>
+#include <stddef.h>
+
+struct native_register {
+       bool used, loaded, dirty, output, extend, extended, locked;
+       s8 emulated_register;
+};
+
+struct regcache {
+       struct lightrec_state *state;
+       struct native_register lightrec_regs[NUM_REGS + NUM_TEMPS];
+};
+
+static const char * mips_regs[] = {
+       "zero",
+       "at",
+       "v0", "v1",
+       "a0", "a1", "a2", "a3",
+       "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7",
+       "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
+       "t8", "t9",
+       "k0", "k1",
+       "gp", "sp", "fp", "ra",
+       "lo", "hi",
+};
+
+const char * lightrec_reg_name(u8 reg)
+{
+       return mips_regs[reg];
+}
+
+static inline u8 lightrec_reg_number(const struct regcache *cache,
+               const struct native_register *nreg)
+{
+       return (u8) (((uintptr_t) nreg - (uintptr_t) cache->lightrec_regs)
+                       / sizeof(*nreg));
+}
+
+static inline u8 lightrec_reg_to_lightning(const struct regcache *cache,
+               const struct native_register *nreg)
+{
+       u8 offset = lightrec_reg_number(cache, nreg);
+       return offset < NUM_REGS ? JIT_V(offset) : JIT_R(offset - NUM_REGS);
+}
+
+static inline struct native_register * lightning_reg_to_lightrec(
+               struct regcache *cache, u8 reg)
+{
+       if ((JIT_V0 > JIT_R0 && reg >= JIT_V0) ||
+                       (JIT_V0 < JIT_R0 && reg < JIT_R0)) {
+               if (JIT_V1 > JIT_V0)
+                       return &cache->lightrec_regs[reg - JIT_V0];
+               else
+                       return &cache->lightrec_regs[JIT_V0 - reg];
+       } else {
+               if (JIT_R1 > JIT_R0)
+                       return &cache->lightrec_regs[NUM_REGS + reg - JIT_R0];
+               else
+                       return &cache->lightrec_regs[NUM_REGS + JIT_R0 - reg];
+       }
+}
+
+static struct native_register * alloc_temp(struct regcache *cache)
+{
+       unsigned int i;
+
+       /* We search the register list in reverse order. As temporaries are
+        * meant to be used only in the emitter functions, they can be mapped to
+        * caller-saved registers, as they won't have to be saved back to
+        * memory. */
+       for (i = ARRAY_SIZE(cache->lightrec_regs); i; i--) {
+               struct native_register *nreg = &cache->lightrec_regs[i - 1];
+               if (!nreg->used && !nreg->loaded && !nreg->dirty)
+                       return nreg;
+       }
+
+       for (i = ARRAY_SIZE(cache->lightrec_regs); i; i--) {
+               struct native_register *nreg = &cache->lightrec_regs[i - 1];
+               if (!nreg->used)
+                       return nreg;
+       }
+
+       return NULL;
+}
+
+static struct native_register * find_mapped_reg(struct regcache *cache,
+                                               u8 reg, bool out)
+{
+       unsigned int i;
+
+       for (i = 0; i < ARRAY_SIZE(cache->lightrec_regs); i++) {
+               struct native_register *nreg = &cache->lightrec_regs[i];
+               if ((!reg || nreg->loaded || nreg->dirty) &&
+                               nreg->emulated_register == reg &&
+                               (!out || !nreg->locked))
+                       return nreg;
+       }
+
+       return NULL;
+}
+
+static struct native_register * alloc_in_out(struct regcache *cache,
+                                            u8 reg, bool out)
+{
+       struct native_register *nreg;
+       unsigned int i;
+
+       /* Try to find if the register is already mapped somewhere */
+       nreg = find_mapped_reg(cache, reg, out);
+       if (nreg)
+               return nreg;
+
+       /* Try to allocate a non-dirty, non-loaded register.
+        * Loaded registers may be re-used later, so it's better to avoid
+        * re-using one if possible. */
+       for (i = 0; i < ARRAY_SIZE(cache->lightrec_regs); i++) {
+               nreg = &cache->lightrec_regs[i];
+               if (!nreg->used && !nreg->dirty && !nreg->loaded)
+                       return nreg;
+       }
+
+       /* Try to allocate a non-dirty register */
+       for (i = 0; i < ARRAY_SIZE(cache->lightrec_regs); i++) {
+               nreg = &cache->lightrec_regs[i];
+               if (!nreg->used && !nreg->dirty)
+                       return nreg;
+       }
+
+       for (i = 0; i < ARRAY_SIZE(cache->lightrec_regs); i++) {
+               nreg = &cache->lightrec_regs[i];
+               if (!nreg->used)
+                       return nreg;
+       }
+
+       return NULL;
+}
+
+static void lightrec_discard_nreg(struct native_register *nreg)
+{
+       nreg->extended = false;
+       nreg->loaded = false;
+       nreg->output = false;
+       nreg->dirty = false;
+       nreg->used = false;
+       nreg->locked = false;
+       nreg->emulated_register = -1;
+}
+
+static void lightrec_unload_nreg(struct regcache *cache, jit_state_t *_jit,
+               struct native_register *nreg, u8 jit_reg)
+{
+       /* If we get a dirty register, store back the old value */
+       if (nreg->dirty) {
+               s16 offset = offsetof(struct lightrec_state, native_reg_cache)
+                       + (nreg->emulated_register << 2);
+
+               jit_stxi_i(offset, LIGHTREC_REG_STATE, jit_reg);
+       }
+
+       lightrec_discard_nreg(nreg);
+}
+
+void lightrec_unload_reg(struct regcache *cache, jit_state_t *_jit, u8 jit_reg)
+{
+       lightrec_unload_nreg(cache, _jit,
+                       lightning_reg_to_lightrec(cache, jit_reg), jit_reg);
+}
+
+/* lightrec_lock_reg: the register will be cleaned if dirty, then locked.
+ * A locked register cannot only be used as input, not output. */
+void lightrec_lock_reg(struct regcache *cache, jit_state_t *_jit, u8 jit_reg)
+{
+       struct native_register *reg = lightning_reg_to_lightrec(cache, jit_reg);
+
+       lightrec_clean_reg(cache, _jit, jit_reg);
+
+       reg->locked = true;
+}
+
+u8 lightrec_alloc_reg(struct regcache *cache, jit_state_t *_jit, u8 jit_reg)
+{
+       struct native_register *reg = lightning_reg_to_lightrec(cache, jit_reg);
+
+       lightrec_unload_nreg(cache, _jit, reg, jit_reg);
+
+       reg->used = true;
+       return jit_reg;
+}
+
+u8 lightrec_alloc_reg_temp(struct regcache *cache, jit_state_t *_jit)
+{
+       u8 jit_reg;
+       struct native_register *nreg = alloc_temp(cache);
+       if (!nreg) {
+               /* No free register, no dirty register to free. */
+               pr_err("No more registers! Abandon ship!\n");
+               return 0;
+       }
+
+       jit_reg = lightrec_reg_to_lightning(cache, nreg);
+       lightrec_unload_nreg(cache, _jit, nreg, jit_reg);
+
+       nreg->used = true;
+       return jit_reg;
+}
+
+u8 lightrec_alloc_reg_out(struct regcache *cache, jit_state_t *_jit, u8 reg)
+{
+       u8 jit_reg;
+       struct native_register *nreg = alloc_in_out(cache, reg, true);
+       if (!nreg) {
+               /* No free register, no dirty register to free. */
+               pr_err("No more registers! Abandon ship!\n");
+               return 0;
+       }
+
+       jit_reg = lightrec_reg_to_lightning(cache, nreg);
+
+       /* If we get a dirty register that doesn't correspond to the one
+        * we're requesting, store back the old value */
+       if (nreg->emulated_register != reg)
+               lightrec_unload_nreg(cache, _jit, nreg, jit_reg);
+
+       nreg->extend = false;
+       nreg->used = true;
+       nreg->output = true;
+       nreg->emulated_register = reg;
+       return jit_reg;
+}
+
+u8 lightrec_alloc_reg_in(struct regcache *cache, jit_state_t *_jit, u8 reg)
+{
+       u8 jit_reg;
+       bool reg_changed;
+       struct native_register *nreg = alloc_in_out(cache, reg, false);
+       if (!nreg) {
+               /* No free register, no dirty register to free. */
+               pr_err("No more registers! Abandon ship!\n");
+               return 0;
+       }
+
+       jit_reg = lightrec_reg_to_lightning(cache, nreg);
+
+       /* If we get a dirty register that doesn't correspond to the one
+        * we're requesting, store back the old value */
+       reg_changed = nreg->emulated_register != reg;
+       if (reg_changed)
+               lightrec_unload_nreg(cache, _jit, nreg, jit_reg);
+
+       if (!nreg->loaded && !nreg->dirty && reg != 0) {
+               s16 offset = offsetof(struct lightrec_state, native_reg_cache)
+                       + (reg << 2);
+
+               /* Load previous value from register cache */
+               jit_ldxi_i(jit_reg, LIGHTREC_REG_STATE, offset);
+               nreg->loaded = true;
+               nreg->extended = true;
+       }
+
+       /* Clear register r0 before use */
+       if (reg == 0 && (!nreg->loaded || nreg->dirty)) {
+               jit_movi(jit_reg, 0);
+               nreg->extended = true;
+               nreg->loaded = true;
+       }
+
+       nreg->used = true;
+       nreg->output = false;
+       nreg->emulated_register = reg;
+       return jit_reg;
+}
+
+u8 lightrec_alloc_reg_out_ext(struct regcache *cache, jit_state_t *_jit, u8 reg)
+{
+       struct native_register *nreg;
+       u8 jit_reg;
+
+       jit_reg = lightrec_alloc_reg_out(cache, _jit, reg);
+       nreg = lightning_reg_to_lightrec(cache, jit_reg);
+
+       nreg->extend = true;
+
+       return jit_reg;
+}
+
+u8 lightrec_alloc_reg_in_ext(struct regcache *cache, jit_state_t *_jit, u8 reg)
+{
+       struct native_register *nreg;
+       u8 jit_reg;
+
+       jit_reg = lightrec_alloc_reg_in(cache, _jit, reg);
+       nreg = lightning_reg_to_lightrec(cache, jit_reg);
+
+#if __WORDSIZE == 64
+       if (!nreg->extended) {
+               nreg->extended = true;
+               jit_extr_i(jit_reg, jit_reg);
+       }
+#endif
+
+       return jit_reg;
+}
+
+u8 lightrec_request_reg_in(struct regcache *cache, jit_state_t *_jit,
+                          u8 reg, u8 jit_reg)
+{
+       struct native_register *nreg;
+       u16 offset;
+
+       nreg = find_mapped_reg(cache, reg, false);
+       if (nreg) {
+               jit_reg = lightrec_reg_to_lightning(cache, nreg);
+               nreg->used = true;
+               return jit_reg;
+       }
+
+       nreg = lightning_reg_to_lightrec(cache, jit_reg);
+       lightrec_unload_nreg(cache, _jit, nreg, jit_reg);
+
+       /* Load previous value from register cache */
+       offset = offsetof(struct lightrec_state, native_reg_cache) + (reg << 2);
+       jit_ldxi_i(jit_reg, LIGHTREC_REG_STATE, offset);
+
+       nreg->extended = true;
+       nreg->used = true;
+       nreg->loaded = true;
+       nreg->emulated_register = reg;
+
+       return jit_reg;
+}
+
+static void free_reg(struct native_register *nreg)
+{
+       /* Set output registers as dirty */
+       if (nreg->used && nreg->output && nreg->emulated_register > 0)
+               nreg->dirty = true;
+       if (nreg->output)
+               nreg->extended = nreg->extend;
+       nreg->used = false;
+}
+
+void lightrec_free_reg(struct regcache *cache, u8 jit_reg)
+{
+       free_reg(lightning_reg_to_lightrec(cache, jit_reg));
+}
+
+void lightrec_free_regs(struct regcache *cache)
+{
+       unsigned int i;
+
+       for (i = 0; i < ARRAY_SIZE(cache->lightrec_regs); i++)
+               free_reg(&cache->lightrec_regs[i]);
+}
+
+static void clean_reg(jit_state_t *_jit,
+               struct native_register *nreg, u8 jit_reg, bool clean)
+{
+       if (nreg->dirty) {
+               s16 offset = offsetof(struct lightrec_state, native_reg_cache)
+                       + (nreg->emulated_register << 2);
+
+               jit_stxi_i(offset, LIGHTREC_REG_STATE, jit_reg);
+               nreg->loaded |= nreg->dirty;
+               nreg->dirty ^= clean;
+       }
+}
+
+static void clean_regs(struct regcache *cache, jit_state_t *_jit, bool clean)
+{
+       unsigned int i;
+
+       for (i = 0; i < NUM_REGS; i++)
+               clean_reg(_jit, &cache->lightrec_regs[i], JIT_V(i), clean);
+       for (i = 0; i < NUM_TEMPS; i++) {
+               clean_reg(_jit, &cache->lightrec_regs[i + NUM_REGS],
+                               JIT_R(i), clean);
+       }
+}
+
+void lightrec_storeback_regs(struct regcache *cache, jit_state_t *_jit)
+{
+       clean_regs(cache, _jit, false);
+}
+
+void lightrec_clean_regs(struct regcache *cache, jit_state_t *_jit)
+{
+       clean_regs(cache, _jit, true);
+}
+
+void lightrec_clean_reg(struct regcache *cache, jit_state_t *_jit, u8 jit_reg)
+{
+       struct native_register *reg = lightning_reg_to_lightrec(cache, jit_reg);
+       clean_reg(_jit, reg, jit_reg, true);
+}
+
+void lightrec_clean_reg_if_loaded(struct regcache *cache, jit_state_t *_jit,
+                                 u8 reg, bool unload)
+{
+       struct native_register *nreg;
+       u8 jit_reg;
+
+       nreg = find_mapped_reg(cache, reg, false);
+       if (nreg) {
+               jit_reg = lightrec_reg_to_lightning(cache, nreg);
+
+               if (unload)
+                       lightrec_unload_nreg(cache, _jit, nreg, jit_reg);
+               else
+                       clean_reg(_jit, nreg, jit_reg, true);
+       }
+}
+
+struct native_register * lightrec_regcache_enter_branch(struct regcache *cache)
+{
+       struct native_register *backup;
+
+       backup = lightrec_malloc(cache->state, MEM_FOR_LIGHTREC,
+                                sizeof(cache->lightrec_regs));
+       memcpy(backup, &cache->lightrec_regs, sizeof(cache->lightrec_regs));
+
+       return backup;
+}
+
+void lightrec_regcache_leave_branch(struct regcache *cache,
+                       struct native_register *regs)
+{
+       memcpy(&cache->lightrec_regs, regs, sizeof(cache->lightrec_regs));
+       lightrec_free(cache->state, MEM_FOR_LIGHTREC,
+                     sizeof(cache->lightrec_regs), regs);
+}
+
+void lightrec_regcache_reset(struct regcache *cache)
+{
+       memset(&cache->lightrec_regs, 0, sizeof(cache->lightrec_regs));
+}
+
+struct regcache * lightrec_regcache_init(struct lightrec_state *state)
+{
+       struct regcache *cache;
+
+       cache = lightrec_calloc(state, MEM_FOR_LIGHTREC, sizeof(*cache));
+       if (!cache)
+               return NULL;
+
+       cache->state = state;
+
+       return cache;
+}
+
+void lightrec_free_regcache(struct regcache *cache)
+{
+       return lightrec_free(cache->state, MEM_FOR_LIGHTREC,
+                            sizeof(*cache), cache);
+}
+
+void lightrec_regcache_mark_live(struct regcache *cache, jit_state_t *_jit)
+{
+       struct native_register *nreg;
+       unsigned int i;
+
+#ifdef _WIN32
+       /* FIXME: GNU Lightning on Windows seems to use our mapped registers as
+        * temporaries. Until the actual bug is found and fixed, unconditionally
+        * mark our registers as live here. */
+       for (i = 0; i < NUM_REGS; i++) {
+               nreg = &cache->lightrec_regs[i];
+
+               if (nreg->used || nreg->loaded || nreg->dirty)
+                       jit_live(JIT_V(i));
+       }
+#endif
+
+       for (i = 0; i < NUM_TEMPS; i++) {
+               nreg = &cache->lightrec_regs[NUM_REGS + i];
+
+               if (nreg->used || nreg->loaded || nreg->dirty)
+                       jit_live(JIT_R(i));
+       }
+}
diff --git a/deps/lightrec/regcache.h b/deps/lightrec/regcache.h
new file mode 100644 (file)
index 0000000..956cc3c
--- /dev/null
@@ -0,0 +1,74 @@
+/*
+ * Copyright (C) 2014 Paul Cercueil <paul@crapouillou.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ */
+
+#ifndef __REGCACHE_H__
+#define __REGCACHE_H__
+
+#include "lightrec-private.h"
+
+#define NUM_REGS (JIT_V_NUM - 2)
+#define NUM_TEMPS (JIT_R_NUM)
+#define LIGHTREC_REG_STATE (JIT_V(JIT_V_NUM - 1))
+#define LIGHTREC_REG_CYCLE (JIT_V(JIT_V_NUM - 2))
+
+#define REG_LO 32
+#define REG_HI 33
+
+struct register_value {
+       _Bool known;
+       u32 value;
+};
+
+struct native_register;
+struct regcache;
+
+u8 lightrec_alloc_reg(struct regcache *cache, jit_state_t *_jit, u8 jit_reg);
+u8 lightrec_alloc_reg_temp(struct regcache *cache, jit_state_t *_jit);
+u8 lightrec_alloc_reg_out(struct regcache *cache, jit_state_t *_jit, u8 reg);
+u8 lightrec_alloc_reg_in(struct regcache *cache, jit_state_t *_jit, u8 reg);
+u8 lightrec_alloc_reg_out_ext(struct regcache *cache,
+                             jit_state_t *_jit, u8 reg);
+u8 lightrec_alloc_reg_in_ext(struct regcache *cache, jit_state_t *_jit, u8 reg);
+
+u8 lightrec_request_reg_in(struct regcache *cache, jit_state_t *_jit,
+                          u8 reg, u8 jit_reg);
+
+void lightrec_regcache_reset(struct regcache *cache);
+
+void lightrec_lock_reg(struct regcache *cache, jit_state_t *_jit, u8 jit_reg);
+void lightrec_free_reg(struct regcache *cache, u8 jit_reg);
+void lightrec_free_regs(struct regcache *cache);
+void lightrec_clean_reg(struct regcache *cache, jit_state_t *_jit, u8 jit_reg);
+void lightrec_clean_regs(struct regcache *cache, jit_state_t *_jit);
+void lightrec_unload_reg(struct regcache *cache, jit_state_t *_jit, u8 jit_reg);
+void lightrec_storeback_regs(struct regcache *cache, jit_state_t *_jit);
+
+void lightrec_clean_reg_if_loaded(struct regcache *cache, jit_state_t *_jit,
+                                 u8 reg, _Bool unload);
+
+u8 lightrec_alloc_reg_in_address(struct regcache *cache,
+               jit_state_t *_jit, u8 reg, s16 offset);
+
+struct native_register * lightrec_regcache_enter_branch(struct regcache *cache);
+void lightrec_regcache_leave_branch(struct regcache *cache,
+                       struct native_register *regs);
+
+struct regcache * lightrec_regcache_init(struct lightrec_state *state);
+void lightrec_free_regcache(struct regcache *cache);
+
+const char * lightrec_reg_name(u8 reg);
+
+void lightrec_regcache_mark_live(struct regcache *cache, jit_state_t *_jit);
+
+#endif /* __REGCACHE_H__ */