From fc5d46b49a19d41f9f2da5a9336daec452900475 Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Tue, 24 Sep 2013 21:55:40 +0200 Subject: [PATCH] RSP LLE plugin. Compile and run (slowly, eat 50% CPU) on the OpenPandora --- source/mupen64plus-rsp-z64/.hg_archival.txt | 5 + source/mupen64plus-rsp-z64/.hgignore | 4 + source/mupen64plus-rsp-z64/.hgtags | 3 + source/mupen64plus-rsp-z64/COPYING | 339 ++ .../projects/unix/Makefile | 322 ++ source/mupen64plus-rsp-z64/src/main_rsp.cpp | 249 ++ source/mupen64plus-rsp-z64/src/rsp.cpp | 3093 +++++++++++++++++ source/mupen64plus-rsp-z64/src/rsp.h | 449 +++ .../src/rsp_api_export.ver | 8 + source/mupen64plus-rsp-z64/src/rsp_dasm.cpp | 377 ++ source/mupen64plus-rsp-z64/src/rsp_gen.cpp | 693 ++++ source/mupen64plus-rsp-z64/src/rsp_gen.h | 349 ++ source/mupen64plus-rsp-z64/src/rsp_opinfo.cpp | 560 +++ source/mupen64plus-rsp-z64/src/rsp_opinfo.h | 200 ++ source/mupen64plus-rsp-z64/src/rsp_recomp.cpp | 574 +++ source/mupen64plus-rsp-z64/src/rsp_recomp.h | 29 + source/mupen64plus-rsp-z64/src/z64.h | 90 + 17 files changed, 7344 insertions(+) create mode 100644 source/mupen64plus-rsp-z64/.hg_archival.txt create mode 100644 source/mupen64plus-rsp-z64/.hgignore create mode 100644 source/mupen64plus-rsp-z64/.hgtags create mode 100644 source/mupen64plus-rsp-z64/COPYING create mode 100755 source/mupen64plus-rsp-z64/projects/unix/Makefile create mode 100644 source/mupen64plus-rsp-z64/src/main_rsp.cpp create mode 100644 source/mupen64plus-rsp-z64/src/rsp.cpp create mode 100644 source/mupen64plus-rsp-z64/src/rsp.h create mode 100644 source/mupen64plus-rsp-z64/src/rsp_api_export.ver create mode 100644 source/mupen64plus-rsp-z64/src/rsp_dasm.cpp create mode 100644 source/mupen64plus-rsp-z64/src/rsp_gen.cpp create mode 100644 source/mupen64plus-rsp-z64/src/rsp_gen.h create mode 100644 source/mupen64plus-rsp-z64/src/rsp_opinfo.cpp create mode 100644 source/mupen64plus-rsp-z64/src/rsp_opinfo.h create mode 100644 source/mupen64plus-rsp-z64/src/rsp_recomp.cpp create mode 100644 source/mupen64plus-rsp-z64/src/rsp_recomp.h create mode 100644 source/mupen64plus-rsp-z64/src/z64.h diff --git a/source/mupen64plus-rsp-z64/.hg_archival.txt b/source/mupen64plus-rsp-z64/.hg_archival.txt new file mode 100644 index 0000000..a9ef836 --- /dev/null +++ b/source/mupen64plus-rsp-z64/.hg_archival.txt @@ -0,0 +1,5 @@ +repo: f788ddc2d4711f6ac1125c99e9e4c5ebd3827ea7 +node: c44d274a4c4ab80d4b88353c045a78273ee76915 +branch: default +latesttag: 2.0.0 +latesttagdistance: 1 diff --git a/source/mupen64plus-rsp-z64/.hgignore b/source/mupen64plus-rsp-z64/.hgignore new file mode 100644 index 0000000..b93dc89 --- /dev/null +++ b/source/mupen64plus-rsp-z64/.hgignore @@ -0,0 +1,4 @@ +syntax: regexp + +^projects/unix/_obj/ +^projects/unix/mupen64plus-rsp-z64.so$ diff --git a/source/mupen64plus-rsp-z64/.hgtags b/source/mupen64plus-rsp-z64/.hgtags new file mode 100644 index 0000000..2bfd467 --- /dev/null +++ b/source/mupen64plus-rsp-z64/.hgtags @@ -0,0 +1,3 @@ +0990ff8d7dcc61cd88e049c91eeaedb0bfbe70d7 1.99.4 +04507d8b67d1817deacdf4f0ca05a6b16af02f10 1.99.5 +3f5658dd8b57d4f1414a7ab858986c2589253590 2.0.0 diff --git a/source/mupen64plus-rsp-z64/COPYING b/source/mupen64plus-rsp-z64/COPYING new file mode 100644 index 0000000..d511905 --- /dev/null +++ b/source/mupen64plus-rsp-z64/COPYING @@ -0,0 +1,339 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. diff --git a/source/mupen64plus-rsp-z64/projects/unix/Makefile b/source/mupen64plus-rsp-z64/projects/unix/Makefile new file mode 100755 index 0000000..92a1b8f --- /dev/null +++ b/source/mupen64plus-rsp-z64/projects/unix/Makefile @@ -0,0 +1,322 @@ +#/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * +# * mupen64plus-rsp-z64 - Makefile * +# * http://bitbucket.org/wahrhaft/mupen64plus-rsp-z64/ * +# * Copyright (C) 2010 Jon Ring * +# * Copyright (C) 2008-2009 Richard Goedeken * +# * Copyright (C) 2007-2008 DarkJeztr Tillin9 * +# * * +# * This program is free software; you can redistribute it and/or modify * +# * it under the terms of the GNU General Public License as published by * +# * the Free Software Foundation; either version 2 of the License, or * +# * (at your option) any later version. * +# * * +# * This program is distributed in the hope that it will be useful, * +# * but WITHOUT ANY WARRANTY; without even the implied warranty of * +# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +# * GNU General Public License for more details. * +# * * +# * You should have received a copy of the GNU General Public License * +# * along with this program; if not, write to the * +# * Free Software Foundation, Inc., * +# * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * +# * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ +# Makefile for Z64 RSP plugin in Mupen64plus. + +# detect operating system +UNAME ?= $(shell uname -s) +OS := NONE +ifeq ("$(UNAME)","Linux") + OS = LINUX + SO_EXTENSION = so + SHARED = -shared +endif +ifeq ("$(UNAME)","linux") + OS = LINUX + SO_EXTENSION = so + SHARED = -shared +endif +ifneq ("$(filter GNU hurd,$(UNAME))","") + OS = LINUX + SO_EXTENSION = so + SHARED = -shared +endif +ifeq ("$(UNAME)","Darwin") + OS = OSX + SO_EXTENSION = dylib + SHARED = -bundle +endif +ifeq ("$(UNAME)","FreeBSD") + OS = FREEBSD + SO_EXTENSION = so + SHARED = -shared +endif +ifeq ("$(UNAME)","OpenBSD") + OS = FREEBSD + SO_EXTENSION = so + SHARED = -shared + $(warning OS type "$(UNAME)" not officially supported.') +endif +ifneq ("$(filter GNU/kFreeBSD kfreebsd,$(UNAME))","") + OS = LINUX + SO_EXTENSION = so + SHARED = -shared +endif +ifeq ("$(patsubst MINGW%,MINGW,$(UNAME))","MINGW") + OS = MINGW + SO_EXTENSION = dll + SHARED = -shared + PIC = 0 +endif +ifeq ("$(OS)","NONE") + $(error OS type "$(UNAME)" not supported. Please file bug report at 'http://code.google.com/p/mupen64plus/issues') +endif + +# detect system architecture +HOST_CPU ?= $(shell uname -m) +NO_ASM ?= 1 +CPU := NONE +ifneq ("$(filter x86_64 amd64,$(HOST_CPU))","") + CPU := X86 + ifeq ("$(BITS)", "32") + ARCH_DETECTED := 64BITS_32 + PIC ?= 0 + else + ARCH_DETECTED := 64BITS + PIC ?= 1 + endif +endif +ifneq ("$(filter pentium i%86,$(HOST_CPU))","") + CPU := X86 + ARCH_DETECTED := 32BITS + PIC ?= 0 +endif +ifneq ("$(filter ppc macppc socppc powerpc,$(HOST_CPU))","") + CPU := PPC + ARCH_DETECTED := 32BITS + BIG_ENDIAN := 1 + PIC ?= 1 + $(warning Architecture "$(HOST_CPU)" not officially supported.') +endif +ifneq ("$(filter ppc64 powerpc64,$(HOST_CPU))","") + CPU := PPC + ARCH_DETECTED := 64BITS + BIG_ENDIAN := 1 + PIC ?= 1 + $(warning Architecture "$(HOST_CPU)" not officially supported.') +endif +ifneq ("$(filter arm%,$(HOST_CPU))","") + ifeq ("$(filter arm%b,$(HOST_CPU))","") + CPU := ARM + ARCH_DETECTED := 32BITS + PIC ?= 1 + HLEVIDEO ?= 1 + CFLAGS += -mcpu=cortex-a8 -mfpu=neon -mfloat-abi=softfp -mtune=cortex-a8 -fsigned-char + $(warning Architecture "$(HOST_CPU)" not officially supported.') + endif +endif +ifeq ("$(CPU)","NONE") + $(error CPU type "$(HOST_CPU)" not supported. Please file bug report at 'http://code.google.com/p/mupen64plus/issues') +endif + +# base CFLAGS, LDLIBS, and LDFLAGS +OPTFLAGS ?= -O3 -flto +WARNFLAGS ?= -Wall +CFLAGS += $(OPTFLAGS) $(WARNFLAGS) -ffast-math -fno-strict-aliasing -fvisibility=hidden -I../../src +CXXFLAGS += -fvisibility-inlines-hidden +LDFLAGS += $(SHARED) + +# Since we are building a shared library, we must compile with -fPIC on some architectures +# On 32-bit x86 systems we do not want to use -fPIC because we don't have to and it has a big performance penalty on this arch +ifeq ($(PIC), 1) + CFLAGS += -fPIC +else + CFLAGS += -fno-PIC +endif + +ifeq ($(HLEVIDEO), 1) + CFLAGS += -DVIDEO_HLE_ALLOWED + POSTFIX = -hlevideo +endif + +ifeq ($(BIG_ENDIAN), 1) + CFLAGS += -DM64P_BIG_ENDIAN +endif + +# tweak flags for 32-bit build on 64-bit system +ifeq ($(ARCH_DETECTED), 64BITS_32) + ifeq ($(OS), FREEBSD) + $(error Do not use the BITS=32 option with FreeBSD, use -m32 and -m elf_i386) + endif + CFLAGS += -m32 + LDFLAGS += -Wl,-m,elf_i386 +endif + +# set special flags per-system +ifeq ($(OS), LINUX) + # only export api symbols + LDFLAGS += -Wl,-version-script,$(SRCDIR)/rsp_api_export.ver + LDLIBS += -ldl +endif +ifeq ($(OS), FREEBSD) + LDLIBS += -lc +endif +ifeq ($(OS), OSX) + # Select the proper SDK + # Also, SDKs are stored in a different location since XCode 4.3 + OSX_SDK ?= $(shell sw_vers -productVersion | cut -f1 -f2 -d .) + OSX_XCODEMAJ = $(shell xcodebuild -version | grep '[0-9]*\.[0-9]*' | cut -f2 -d ' ' | cut -f1 -d .) + OSX_XCODEMIN = $(shell xcodebuild -version | grep '[0-9]*\.[0-9]*' | cut -f2 -d ' ' | cut -f2 -d .) + OSX_XCODEGE43 = $(shell echo "`expr $(OSX_XCODEMAJ) \>= 4``expr $(OSX_XCODEMIN) \>= 3`") + ifeq ($(OSX_XCODEGE43), 11) + OSX_SYSROOT := /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs + else + OSX_SYSROOT := /Developer/SDKs + endif + + ifeq ($(CPU), X86) + ifeq ($(ARCH_DETECTED), 64BITS) + CFLAGS += -pipe -arch x86_64 -mmacosx-version-min=$(OSX_SDK) -isysroot $(OSX_SYSROOT)/MacOSX$(OSX_SDK).sdk + LDLIBS += -ldl + else + CFLAGS += -pipe -mmmx -msse -fomit-frame-pointer -arch i686 -mmacosx-version-min=$(OSX_SDK) -isysroot $(OSX_SYSROOT)/MacOSX$(OSX_SDK).sdk + LDLIBS += -ldl + endif + endif +endif + +# set mupen64plus core API header path +ifneq ("$(APIDIR)","") + CFLAGS += "-I$(APIDIR)" +else + TRYDIR = ../../../mupen64plus-core/src/api + ifneq ("$(wildcard $(TRYDIR)/m64p_types.h)","") + CFLAGS += -I$(TRYDIR) + else + TRYDIR = /usr/local/include/mupen64plus + ifneq ("$(wildcard $(TRYDIR)/m64p_types.h)","") + CFLAGS += -I$(TRYDIR) + else + TRYDIR = /usr/include/mupen64plus + ifneq ("$(wildcard $(TRYDIR)/m64p_types.h)","") + CFLAGS += -I$(TRYDIR) + else + $(error Mupen64Plus API header files not found! Use makefile parameter APIDIR to force a location.) + endif + endif + endif +endif + +# reduced compile output when running make without V=1 +ifneq ($(findstring $(MAKEFLAGS),s),s) +ifndef V + Q_CC = @echo ' CC '$@; + Q_CXX = @echo ' CXX '$@; + Q_LD = @echo ' LD '$@; +endif +endif + +# set base program pointers and flags +CC = $(CROSS_COMPILE)gcc +CXX = $(CROSS_COMPILE)g++ +RM ?= rm -f +INSTALL ?= install +MKDIR ?= mkdir -p +COMPILE.c = $(Q_CC)$(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c +COMPILE.cc = $(Q_CXX)$(CXX) $(CXXFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c +LINK.o = $(Q_LD)$(CXX) $(CXXFLAGS) $(LDFLAGS) $(TARGET_ARCH) + +# set special flags for given Makefile parameters +ifeq ($(DEBUG),1) + CFLAGS += -g + INSTALL_STRIP_FLAG ?= +else + INSTALL_STRIP_FLAG ?= -s +endif + +# set installation options +ifeq ($(PREFIX),) + PREFIX := /usr/local +endif +ifeq ($(LIBDIR),) + LIBDIR := $(PREFIX)/lib +endif +ifeq ($(PLUGINDIR),) + PLUGINDIR := $(LIBDIR)/mupen64plus +endif + +SRCDIR = ../../src +OBJDIR = _obj$(POSTFIX) + +# list of source files to compile +SOURCE = \ + $(SRCDIR)/rsp.cpp \ + $(SRCDIR)/rsp_opinfo.cpp \ + $(SRCDIR)/rsp_recomp.cpp \ + $(SRCDIR)/rsp_dasm.cpp \ + $(SRCDIR)/main_rsp.cpp + +# generate a list of object files build, make a temporary directory for them +OBJECTS := $(patsubst $(SRCDIR)/%.c, $(OBJDIR)/%.o, $(filter %.c, $(SOURCE))) +OBJECTS += $(patsubst $(SRCDIR)/%.cpp, $(OBJDIR)/%.o, $(filter %.cpp, $(SOURCE))) +OBJDIRS = $(dir $(OBJECTS)) +$(shell $(MKDIR) $(OBJDIRS)) + +# build targets +TARGET = mupen64plus-rsp-z64$(POSTFIX).$(SO_EXTENSION) + +targets: + @echo "Mupen64Plus-rsp-z64 makefile. " + @echo " Targets:" + @echo " all == Build Mupen64Plus rsp-hle plugin" + @echo " clean == remove object files" + @echo " rebuild == clean and re-build all" + @echo " install == Install Mupen64Plus rsp-hle plugin" + @echo " uninstall == Uninstall Mupen64Plus rsp-hle plugin" + @echo " Options:" + @echo " BITS=32 == build 32-bit binaries on 64-bit machine" + @echo " APIDIR=path == path to find Mupen64Plus Core headers" + @echo " OPTFLAGS=flag == compiler optimization (default: -O3 -flto)" + @echo " WARNFLAGS=flag == compiler warning levels (default: -Wall)" + @echo " PIC=(1|0) == Force enable/disable of position independent code" + @echo " HLEVIDEO=(1|0) == Move task of gfx emulation to a HLE video plugins" + @echo " POSTFIX=name == String added to the name of the the build (default: '')" + @echo " Install Options:" + @echo " PREFIX=path == install/uninstall prefix (default: /usr/local)" + @echo " LIBDIR=path == library prefix (default: PREFIX/lib)" + @echo " PLUGINDIR=path == path to install plugin libraries (default: LIBDIR/mupen64plus)" + @echo " DESTDIR=path == path to prepend to all installation paths (only for packagers)" + @echo " Debugging Options:" + @echo " DEBUG=1 == add debugging symbols" + @echo " V=1 == show verbose compiler output" + +all: $(TARGET) + +install: $(TARGET) + $(INSTALL) -d "$(DESTDIR)$(PLUGINDIR)" + $(INSTALL) -m 0644 $(INSTALL_STRIP_FLAG) $(TARGET) "$(DESTDIR)$(PLUGINDIR)" + +uninstall: + $(RM) "$(DESTDIR)$(PLUGINDIR)/$(TARGET)" + +clean: + $(RM) -r $(OBJDIR) $(TARGET) + +rebuild: clean all + +# build dependency files +CFLAGS += -MD +-include $(OBJECTS:.o=.d) + +CXXFLAGS += $(CFLAGS) + +# standard build rules +$(OBJDIR)/%.o: $(SRCDIR)/%.c + $(COMPILE.c) -o $@ $< + +$(OBJDIR)/%.o: $(SRCDIR)/%.cpp + $(COMPILE.cc) -o $@ $< + +$(TARGET): $(OBJECTS) + $(LINK.o) $^ $(LOADLIBES) $(LDLIBS) -o $@ + +.PHONY: all clean install uninstall targets diff --git a/source/mupen64plus-rsp-z64/src/main_rsp.cpp b/source/mupen64plus-rsp-z64/src/main_rsp.cpp new file mode 100644 index 0000000..c6bcf50 --- /dev/null +++ b/source/mupen64plus-rsp-z64/src/main_rsp.cpp @@ -0,0 +1,249 @@ +/* + * z64 + * + * Copyright (C) 2007 ziggy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * +**/ + +#include "rsp.h" +#include +#include + +#define RSP_Z64_VERSION 0x020000 +#define RSP_PLUGIN_API_VERSION 0x020000 + +static void (*l_DebugCallback)(void *, int, const char *) = NULL; +static void *l_DebugCallContext = NULL; +static bool l_PluginInit = false; + +#if 0 +static void dump() +{ + FILE * fp = fopen("rsp.dump", "w"); + assert(fp); + fwrite(rdram, 8*1024, 1024, fp); + fwrite(rsp_dmem, 0x2000, 1, fp); + fwrite(rsp.ext.MI_INTR_REG, 4, 1, fp); + + fwrite(rsp.ext.SP_MEM_ADDR_REG, 4, 1, fp); + fwrite(rsp.ext.SP_DRAM_ADDR_REG, 4, 1, fp); + fwrite(rsp.ext.SP_RD_LEN_REG, 4, 1, fp); + fwrite(rsp.ext.SP_WR_LEN_REG, 4, 1, fp); + fwrite(rsp.ext.SP_STATUS_REG, 4, 1, fp); + fwrite(rsp.ext.SP_DMA_FULL_REG, 4, 1, fp); + fwrite(rsp.ext.SP_DMA_BUSY_REG, 4, 1, fp); + fwrite(rsp.ext.SP_PC_REG, 4, 1, fp); + fwrite(rsp.ext.SP_SEMAPHORE_REG, 4, 1, fp); + + fwrite(rsp.ext.DPC_START_REG, 4, 1, fp); + fwrite(rsp.ext.DPC_END_REG, 4, 1, fp); + fwrite(rsp.ext.DPC_CURRENT_REG, 4, 1, fp); + fwrite(rsp.ext.DPC_STATUS_REG, 4, 1, fp); + fwrite(rsp.ext.DPC_CLOCK_REG, 4, 1, fp); + fwrite(rsp.ext.DPC_BUFBUSY_REG, 4, 1, fp); + fwrite(rsp.ext.DPC_PIPEBUSY_REG, 4, 1, fp); + fwrite(rsp.ext.DPC_TMEM_REG, 4, 1, fp); + fclose(fp); +} +#endif + +void log(m64p_msg_level level, const char *msg, ...) +{ + char buf[1024]; + va_list args; + va_start(args, msg); + vsnprintf(buf, 1023, msg, args); + buf[1023]='\0'; + va_end(args); + if (l_DebugCallback) + { + l_DebugCallback(l_DebugCallContext, level, buf); + } +} + +#ifdef __cplusplus +extern "C" { +#endif + + /* DLL-exported functions */ + EXPORT m64p_error CALL PluginStartup(m64p_dynlib_handle CoreLibHandle, void *Context, + void (*DebugCallback)(void *, int, const char *)) + { + + if (l_PluginInit) + return M64ERR_ALREADY_INIT; + + ///* first thing is to set the callback function for debug info */ + l_DebugCallback = DebugCallback; + l_DebugCallContext = Context; + + ///* this plugin doesn't use any Core library functions (ex for Configuration), so no need to keep the CoreLibHandle */ + + l_PluginInit = true; + return M64ERR_SUCCESS; + } + + EXPORT m64p_error CALL PluginShutdown(void) + { + if (!l_PluginInit) + return M64ERR_NOT_INIT; + + ///* reset some local variable */ + l_DebugCallback = NULL; + l_DebugCallContext = NULL; + + l_PluginInit = 0; + return M64ERR_SUCCESS; + } + + EXPORT m64p_error CALL PluginGetVersion(m64p_plugin_type *PluginType, int *PluginVersion, int *APIVersion, const char **PluginNamePtr, int *Capabilities) + { + /* set version info */ + if (PluginType != NULL) + *PluginType = M64PLUGIN_RSP; + + if (PluginVersion != NULL) + *PluginVersion = RSP_Z64_VERSION; + + if (APIVersion != NULL) + *APIVersion = RSP_PLUGIN_API_VERSION; + + if (PluginNamePtr != NULL) + *PluginNamePtr = "Z64 RSP Plugin"; + + if (Capabilities != NULL) + { + *Capabilities = 0; + } + + return M64ERR_SUCCESS; + } + + EXPORT unsigned int CALL DoRspCycles(unsigned int Cycles) + { + //#define VIDEO_HLE_ALLOWED + //#define AUDIO_HLE_ALLOWED + +#if defined (AUDIO_HLE_ALLOWED) || defined (VIDEO_HLE_ALLOWED) + unsigned int TaskType = *(unsigned int *)(z64_rspinfo.DMEM + 0xFC0); +#endif + +#ifdef VIDEO_HLE_ALLOWED +#if 0 + if (TaskType == 1) { + SDL_Event event; + while (SDL_PollEvent(&event)) { + switch (event.type) { + case SDL_KEYDOWN: + switch (event.key.keysym.sym) { + case 'd': + printf("Dumping !\n"); + dump(); + break; + } + break; + } + } + } +#endif + + if (TaskType == 1) { + if (z64_rspinfo.ProcessDlistList != NULL) { + z64_rspinfo.ProcessDlistList(); + } + *z64_rspinfo.SP_STATUS_REG |= (0x0203); + if ((*z64_rspinfo.SP_STATUS_REG & SP_STATUS_INTR_BREAK) != 0 ) { + *z64_rspinfo.MI_INTR_REG |= R4300i_SP_Intr; + z64_rspinfo.CheckInterrupts(); + } + + *z64_rspinfo.DPC_STATUS_REG &= ~0x0002; + return Cycles; + } +#endif + +#ifdef AUDIO_HLE_ALLOWED + if (TaskType == 2) { + if (z64_rspinfo.ProcessAlistList != NULL) { + z64_rspinfo.ProcessAlistList(); + } + *z64_rspinfo.SP_STATUS_REG |= (0x0203); + if ((*z64_rspinfo.SP_STATUS_REG & SP_STATUS_INTR_BREAK) != 0 ) { + *z64_rspinfo.MI_INTR_REG |= R4300i_SP_Intr; + z64_rspinfo.CheckInterrupts(); + } + return Cycles; + } +#endif + + if (z64_rspinfo.CheckInterrupts==NULL) + log(M64MSG_WARNING, "Emulator doesn't provide CheckInterrupts routine"); + return rsp_execute(0x100000); + //return Cycles; + } + + EXPORT void CALL InitiateRSP(RSP_INFO Rsp_Info, unsigned int *CycleCount) + { + log(M64MSG_STATUS, "INITIATE RSP"); + rsp_init(Rsp_Info); + memset(((UINT32*)z64_rspinfo.DMEM), 0, 0x2000); + //*CycleCount = 0; //Causes segfault, doesn't seem to be used anyway + } + + EXPORT void CALL RomClosed(void) + { + extern int rsp_gen_cache_hit; + extern int rsp_gen_cache_miss; + log(M64MSG_STATUS, "cache hit %d miss %d %g%%", rsp_gen_cache_hit, rsp_gen_cache_miss, + rsp_gen_cache_miss*100.0f/rsp_gen_cache_hit); + rsp_gen_cache_hit = rsp_gen_cache_miss = 0; + +#ifdef RSPTIMING + int i,j; + UINT32 op, op2; + + for(i=0; i<0x140;i++) { + if (i>=0x100) + op = (0x12<<26) | (0x10 << 21) | (i&0x3f); + else if (i>=0xc0) + op = (0x3a<<26) | ((i&0x1f)<<11); + else if (i>=0xa0) + op = (0x32<<26) | ((i&0x1f)<<11); + else if (i>=0x80) + op = (0x12<<26) | ((i&0x1f)<<21); + else if (i>=0x40) + op = (0<<26) | (i&0x3f); + else + op = (i&0x3f)<<26; + + char s[128], s2[128]; + rsp_dasm_one(s, 0x800, op); + //rsp_dasm_one(s2, 0x800, op2); + if (rsptimings[i]) + printf("%10g %10g %7d\t%30s\n" + /*"%10g %10g %7d\t%30s\n"*/, + rsptimings[i]/(rspcounts[i]*1.0f), rsptimings[i]*(1.0f), rspcounts[i], s//, + //timings[k]/1.0f/counts[k], counts[k], s2 + ); + } +#endif + + //rsp_init(z64_rspinfo); + } +#ifdef __cplusplus +} +#endif diff --git a/source/mupen64plus-rsp-z64/src/rsp.cpp b/source/mupen64plus-rsp-z64/src/rsp.cpp new file mode 100644 index 0000000..04e65ec --- /dev/null +++ b/source/mupen64plus-rsp-z64/src/rsp.cpp @@ -0,0 +1,3093 @@ +/* + * z64 + * + * Copyright (C) 2007 ziggy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * +**/ + +/* +Nintendo/SGI Reality Signal Processor (RSP) emulator + +Written by Ville Linde +*/ +// #include "z64.h" +#include "rsp.h" +#include "rsp_opinfo.h" +#include // sqrt +#include +#include + +#define INLINE inline + +#define LOG_INSTRUCTION_EXECUTION 0 +#define SAVE_DISASM 0 +#define SAVE_DMEM 0 + +#define PRINT_VECREG(x) printf("V%d: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X\n", (x), \ + (UINT16)VREG_S((x),0), (UINT16)VREG_S((x),1), \ + (UINT16)VREG_S((x),2), (UINT16)VREG_S((x),3), \ + (UINT16)VREG_S((x),4), (UINT16)VREG_S((x),5), \ + (UINT16)VREG_S((x),6), (UINT16)VREG_S((x),7)) + + +extern offs_t rsp_dasm_one(char *buffer, offs_t pc, UINT32 op); + +#if LOG_INSTRUCTION_EXECUTION +static FILE *exec_output; +#endif + + +// INLINE void sp_set_status(UINT32 status) +// { +// if (status & 0x1) +// { +// cpu_trigger(6789); + +// cpunum_set_input_line(1, INPUT_LINE_HALT, ASSERT_LINE); +// rsp_sp_status |= SP_STATUS_HALT; +// } +// if (status & 0x2) +// { +// rsp_sp_status |= SP_STATUS_BROKE; + +// if (rsp_sp_status & SP_STATUS_INTR_BREAK) +// { +// signal_rcp_interrupt(SP_INTERRUPT); +// } +// } +// } + + +#if 0 +enum +{ + RSP_PC = 1, + RSP_R0, + RSP_R1, + RSP_R2, + RSP_R3, + RSP_R4, + RSP_R5, + RSP_R6, + RSP_R7, + RSP_R8, + RSP_R9, + RSP_R10, + RSP_R11, + RSP_R12, + RSP_R13, + RSP_R14, + RSP_R15, + RSP_R16, + RSP_R17, + RSP_R18, + RSP_R19, + RSP_R20, + RSP_R21, + RSP_R22, + RSP_R23, + RSP_R24, + RSP_R25, + RSP_R26, + RSP_R27, + RSP_R28, + RSP_R29, + RSP_R30, + RSP_R31, +}; +#endif + + +#ifdef RSPTIMING +uint64_t rsptimings[512]; +int rspcounts[512]; +#endif + + +#define JUMP_ABS(addr) { rsp.nextpc = 0x04001000 | (((addr) << 2) & 0xfff); } +#define JUMP_ABS_L(addr,l) { rsp.nextpc = 0x04001000 | (((addr) << 2) & 0xfff); rsp.r[l] = sp_pc + 4; } +#define JUMP_REL(offset) { rsp.nextpc = 0x04001000 | ((sp_pc + ((offset) << 2)) & 0xfff); } +#define JUMP_REL_L(offset,l) { rsp.nextpc = 0x04001000 | ((sp_pc + ((offset) << 2)) & 0xfff); rsp.r[l] = sp_pc + 4; } +#define JUMP_PC(addr) { rsp.nextpc = 0x04001000 | ((addr) & 0xfff); } +#define JUMP_PC_L(addr,l) { rsp.nextpc = 0x04001000 | ((addr) & 0xfff); rsp.r[l] = sp_pc + 4; } +#define LINK(l) rsp.r[l] = sp_pc + 4 + + +#define VDREG ((op >> 6) & 0x1f) +#define VS1REG ((op >> 11) & 0x1f) +#define VS2REG ((op >> 16) & 0x1f) +#define EL ((op >> 21) & 0xf) + +#define S_VREG_B(offset) (((15 - (offset)) & 0x07) << 3) +#define S_VREG_S(offset) (((7 - (offset)) & 0x03) << 4) +#define S_VREG_L(offset) (((3 - (offset)) & 0x01) << 5) + +#define M_VREG_B(offset) ((UINT64)0x00FF << S_VREG_B(offset)) +#define M_VREG_S(offset) ((UINT64)0x0000FFFFul << S_VREG_S(offset)) +#define M_VREG_L(offset) ((UINT64)0x00000000FFFFFFFFull << S_VREG_L(offset)) + +#define R_VREG_B(reg, offset) ((rsp.v[(reg)].d[(15 - (offset)) >> 3] >> S_VREG_B(offset)) & 0x00FF) +#define R_VREG_S(reg, offset) (INT16)((rsp.v[(reg)].d[(7 - (offset)) >> 2] >> S_VREG_S(offset)) & 0x0000FFFFul) +#define R_VREG_L(reg, offset) ((rsp.v[(reg)].d[(3 - (offset)) >> 1] >> S_VREG_L(offset)) & 0x00000000FFFFFFFFull) + +#define W_VREG_B(reg, offset, val) (rsp.v[(reg)].d[(15 - (offset)) >> 3] = (rsp.v[(reg)].d[(15 - (offset)) >> 3] & ~M_VREG_B(offset)) | (M_VREG_B(offset) & ((UINT64)(val) << S_VREG_B(offset)))) +#define W_VREG_S(reg, offset, val) (rsp.v[(reg)].d[(7 - (offset)) >> 2] = (rsp.v[(reg)].d[(7 - (offset)) >> 2] & ~M_VREG_S(offset)) | (M_VREG_S(offset) & ((UINT64)(val) << S_VREG_S(offset)))) +#define W_VREG_L(reg, offset, val) (rsp.v[(reg)].d[(3 - (offset)) >> 1] = (rsp.v[(reg)].d[(3 - (offset)) >> 1] & ~M_VREG_L(offset)) | (M_VREG_L(offset) & ((UINT64)(val) << S_VREG_L(offset)))) + + +#define VEC_EL_1(x,z) (z) +#define VEC_EL_2(x,z) (vector_elements_2[(x)][(z)]) + +#define ACCUM(x) rsp.accum[((x))].q + +#define S_ACCUM_H (3 << 4) +#define S_ACCUM_M (2 << 4) +#define S_ACCUM_L (1 << 4) + +#define M_ACCUM_H (((INT64)0x0000FFFF) << S_ACCUM_H) +#define M_ACCUM_M (((INT64)0x0000FFFF) << S_ACCUM_M) +#define M_ACCUM_L (((INT64)0x0000FFFF) << S_ACCUM_L) + +#define R_ACCUM_H(x) ((INT16)((ACCUM(x) >> S_ACCUM_H) & 0x00FFFF)) +#define R_ACCUM_M(x) ((INT16)((ACCUM(x) >> S_ACCUM_M) & 0x00FFFF)) +#define R_ACCUM_L(x) ((INT16)((ACCUM(x) >> S_ACCUM_L) & 0x00FFFF)) + +#define W_ACCUM_H(x, y) (ACCUM(x) = (ACCUM(x) & ~M_ACCUM_H) | (M_ACCUM_H & ((INT64)(y) << S_ACCUM_H))) +#define W_ACCUM_M(x, y) (ACCUM(x) = (ACCUM(x) & ~M_ACCUM_M) | (M_ACCUM_M & ((INT64)(y) << S_ACCUM_M))) +#define W_ACCUM_L(x, y) (ACCUM(x) = (ACCUM(x) & ~M_ACCUM_L) | (M_ACCUM_L & ((INT64)(y) << S_ACCUM_L))) + + + +RSP_REGS rsp; +static int rsp_icount; +// RSP Interface + +#define rsp_sp_status (*(UINT32*)z64_rspinfo.SP_STATUS_REG) +#define sp_mem_addr (*(UINT32*)z64_rspinfo.SP_MEM_ADDR_REG) +#define sp_dram_addr (*(UINT32*)z64_rspinfo.SP_DRAM_ADDR_REG) +#define sp_semaphore (*(UINT32*)z64_rspinfo.SP_SEMAPHORE_REG) + +#define sp_dma_rlength (*(UINT32*)z64_rspinfo.SP_RD_LEN_REG) +#define sp_dma_wlength (*(UINT32*)z64_rspinfo.SP_WR_LEN_REG) + +INT32 sp_dma_length; + +/*****************************************************************************/ + +UINT32 get_cop0_reg(int reg) +{ + if (reg >= 0 && reg < 8) + { + return sp_read_reg(reg); + } + else if (reg >= 8 && reg < 16) + { + return n64_dp_reg_r(reg - 8, 0x00000000); + } + else + { + log(M64MSG_ERROR, "RSP: get_cop0_reg: %d", reg); + return ~0; + } +} + +void set_cop0_reg(int reg, UINT32 data) +{ + if (reg >= 0 && reg < 8) + { + sp_write_reg(reg, data); + } + else if (reg >= 8 && reg < 16) + { + n64_dp_reg_w(reg - 8, data, 0x00000000); + } + else + { + log(M64MSG_ERROR, "RSP: set_cop0_reg: %d, %08X\n", reg, data); + } +} + +static int got_unimp; +void unimplemented_opcode(UINT32 op) +{ + got_unimp = 1; +#ifdef MAME_DEBUG + char string[200]; + rsp_dasm_one(string, rsp.ppc, op); + printf("%08X: %s\n", rsp.ppc, string); +#endif + +#if SAVE_DISASM + { + char string[200]; + int i; + FILE *dasm; + dasm = fopen("rsp_disasm.txt", "wt"); + + for (i=0; i < 0x1000; i+=4) + { + UINT32 opcode = ROPCODE(0x04001000 + i); + rsp_dasm_one(string, 0x04001000 + i, opcode); + fprintf(dasm, "%08X: %08X %s\n", 0x04001000 + i, opcode, string); + } + fclose(dasm); + } +#endif +#if SAVE_DMEM + { + int i; + FILE *dmem; + dmem = fopen("rsp_dmem.bin", "wb"); + + for (i=0; i < 0x1000; i++) + { + fputc(READ8(0x04000000 + i), dmem); + } + fclose(dmem); + } +#endif + + log(M64MSG_ERROR, "RSP: unknown opcode %02X (%d) (%08X) at %08X\n", op >> 26, op >> 26, op, rsp.ppc); +} + +/*****************************************************************************/ + +const int vector_elements_1[16][8] = +{ + { 0, 1, 2, 3, 4, 5, 6, 7 }, // none + { 0, 1, 2, 3, 4, 5, 6 ,7 }, // ??? + { 1, 3, 5, 7, 0, 2, 4, 6 }, // 0q + { 0, 2, 4, 6, 1, 3, 5, 7 }, // 1q + { 1, 2, 3, 5, 6, 7, 0, 4 }, // 0h + { 0, 2, 3, 4, 6, 7, 1, 5 }, // 1h + { 0, 1, 3, 4, 5, 7, 2, 6 }, // 2h + { 0, 1, 2, 4, 5, 6, 3, 7 }, // 3h + { 1, 2, 3, 4, 5, 6, 7, 0 }, // 0 + { 0, 2, 3, 4, 5, 6, 7, 1 }, // 1 + { 0, 1, 3, 4, 5, 6, 7, 2 }, // 2 + { 0, 1, 2, 4, 5, 6, 7, 3 }, // 3 + { 0, 1, 2, 3, 5, 6, 7, 4 }, // 4 + { 0, 1, 2, 3, 4, 6, 7, 5 }, // 5 + { 0, 1, 2, 3, 4, 5, 7, 6 }, // 6 + { 0, 1, 2, 3, 4, 5, 6, 7 }, // 7 +}; + +const int vector_elements_2[16][8] = +{ + { 0, 1, 2, 3, 4, 5, 6, 7 }, // none + { 0, 1, 2, 3, 4, 5, 6, 7 }, // ??? + { 0, 0, 2, 2, 4, 4, 6, 6 }, // 0q + { 1, 1, 3, 3, 5, 5, 7, 7 }, // 1q + { 0, 0, 0, 0, 4, 4, 4, 4 }, // 0h + { 1, 1, 1, 1, 5, 5, 5, 5 }, // 1h + { 2, 2, 2, 2, 6, 6, 6, 6 }, // 2h + { 3, 3, 3, 3, 7, 7, 7, 7 }, // 3h + { 0, 0, 0, 0, 0, 0, 0, 0 }, // 0 + { 1, 1, 1, 1, 1, 1, 1, 1 }, // 1 + { 2, 2, 2, 2, 2, 2, 2, 2 }, // 2 + { 3, 3, 3, 3, 3, 3, 3, 3 }, // 3 + { 4, 4, 4, 4, 4, 4, 4, 4 }, // 4 + { 5, 5, 5, 5, 5, 5, 5, 5 }, // 5 + { 6, 6, 6, 6, 6, 6, 6, 6 }, // 6 + { 7, 7, 7, 7, 7, 7, 7, 7 }, // 7 +}; + +void rsp_init(RSP_INFO info) +{ +#if LOG_INSTRUCTION_EXECUTION + exec_output = fopen("rsp_execute.txt", "wt"); +#endif + + memset(&rsp, 0, sizeof(rsp)); + rsp.ext = info; + + sp_pc = 0; //0x4001000; + rsp.nextpc = ~0U; + //rsp_invalidate(0, 0x1000); + rsp.step_count=0; +} + +void rsp_reset(void) +{ + rsp.nextpc = ~0U; +} + +void handle_lwc2(UINT32 op) +{ + int i, end; + UINT32 ea; + int dest = (op >> 16) & 0x1f; + int base = (op >> 21) & 0x1f; + int index = (op >> 7) & 0xf; + int offset = (op & 0x7f); + if (offset & 0x40) + offset |= 0xffffffc0; + + switch ((op >> 11) & 0x1f) + { + case 0x00: /* LBV */ + { + // 31 25 20 15 10 6 0 + // -------------------------------------------------- + // | 110010 | BBBBB | TTTTT | 00000 | IIII | Offset | + // -------------------------------------------------- + // + // Load 1 byte to vector byte index + + ea = (base) ? rsp.r[base] + offset : offset; + VREG_B(dest, index) = READ8(ea); + break; + } + case 0x01: /* LSV */ + { + // 31 25 20 15 10 6 0 + // -------------------------------------------------- + // | 110010 | BBBBB | TTTTT | 00001 | IIII | Offset | + // -------------------------------------------------- + // + // Loads 2 bytes starting from vector byte index + + ea = (base) ? rsp.r[base] + (offset * 2) : (offset * 2); + + end = index + 2; + + // VP need mask i and ea ? + for (i=index; i < end; i++) + { + VREG_B(dest, i) = READ8(ea); + ea++; + } + break; + } + case 0x02: /* LLV */ + { + // 31 25 20 15 10 6 0 + // -------------------------------------------------- + // | 110010 | BBBBB | TTTTT | 00010 | IIII | Offset | + // -------------------------------------------------- + // + // Loads 4 bytes starting from vector byte index + + ea = (base) ? rsp.r[base] + (offset * 4) : (offset * 4); + + end = index + 4; + + // VP need mask i and ea ? + for (i=index; i < end; i++) + { + VREG_B(dest, i) = READ8(ea); + ea++; + } + break; + } + case 0x03: /* LDV */ + { + // 31 25 20 15 10 6 0 + // -------------------------------------------------- + // | 110010 | BBBBB | TTTTT | 00011 | IIII | Offset | + // -------------------------------------------------- + // + // Loads 8 bytes starting from vector byte index + + ea = (base) ? rsp.r[base] + (offset * 8) : (offset * 8); + + end = index + 8; + + // VP need mask i and ea ? + for (i=index; i < end; i++) + { + VREG_B(dest, i) = READ8(ea); + ea++; + } + break; + } + case 0x04: /* LQV */ + { + // 31 25 20 15 10 6 0 + // -------------------------------------------------- + // | 110010 | BBBBB | TTTTT | 00100 | IIII | Offset | + // -------------------------------------------------- + // + // Loads up to 16 bytes starting from vector byte index + + ea = (base) ? rsp.r[base] + (offset * 16) : (offset * 16); + + end = index + (16 - (ea & 0xf)); + if (end > 16) end = 16; + for (i=index; i < end; i++) + { + VREG_B(dest, i) = READ8(ea); + ea++; + } + break; + } + case 0x05: /* LRV */ + { + // 31 25 20 15 10 6 0 + // -------------------------------------------------- + // | 110010 | BBBBB | TTTTT | 00101 | IIII | Offset | + // -------------------------------------------------- + // + // Stores up to 16 bytes starting from right side until 16-byte boundary + + ea = (base) ? rsp.r[base] + (offset * 16) : (offset * 16); + + index = 16 - ((ea & 0xf) - index); + end = 16; + ea &= ~0xf; + //assert(index == 0); + + for (i=index; i < end; i++) + { + VREG_B(dest, i) = READ8(ea); + ea++; + } + break; + } + case 0x06: /* LPV */ + { + // 31 25 20 15 10 6 0 + // -------------------------------------------------- + // | 110010 | BBBBB | TTTTT | 00110 | IIII | Offset | + // -------------------------------------------------- + // + // Loads a byte as the upper 8 bits of each element + + ea = (base) ? rsp.r[base] + (offset * 8) : (offset * 8); + + for (i=0; i < 8; i++) + { + VREG_S(dest, i) = READ8(ea + (((16-index) + i) & 0xf)) << 8; + } + break; + } + case 0x07: /* LUV */ + { + // 31 25 20 15 10 6 0 + // -------------------------------------------------- + // | 110010 | BBBBB | TTTTT | 00111 | IIII | Offset | + // -------------------------------------------------- + // + // Loads a byte as the bits 14-7 of each element + + ea = (base) ? rsp.r[base] + (offset * 8) : (offset * 8); + + for (i=0; i < 8; i++) + { + VREG_S(dest, i) = READ8(ea + (((16-index) + i) & 0xf)) << 7; + } + break; + } + case 0x08: /* LHV */ + { + // 31 25 20 15 10 6 0 + // -------------------------------------------------- + // | 110010 | BBBBB | TTTTT | 01000 | IIII | Offset | + // -------------------------------------------------- + // + // Loads a byte as the bits 14-7 of each element, with 2-byte stride + + ea = (base) ? rsp.r[base] + (offset * 16) : (offset * 16); + + for (i=0; i < 8; i++) + { + VREG_S(dest, i) = READ8(ea + (((16-index) + (i<<1)) & 0xf)) << 7; + } + break; + } + case 0x09: /* LFV */ + { + // 31 25 20 15 10 6 0 + // -------------------------------------------------- + // | 110010 | BBBBB | TTTTT | 01001 | IIII | Offset | + // -------------------------------------------------- + // + // Loads a byte as the bits 14-7 of upper or lower quad, with 4-byte stride + + // fatalerror("RSP: LFV\n"); + + //if (index & 0x7) fatalerror("RSP: LFV: index = %d at %08X\n", index, rsp.ppc); + + ea = (base) ? rsp.r[base] + (offset * 16) : (offset * 16); + + // not sure what happens if 16-byte boundary is crossed... + //if ((ea & 0xf) > 0) fatalerror("RSP: LFV: 16-byte boundary crossing at %08X, recheck this!\n", rsp.ppc); + + end = (index >> 1) + 4; + + for (i=index >> 1; i < end; i++) + { + VREG_S(dest, i) = READ8(ea) << 7; + ea += 4; + } + break; + } + case 0x0a: /* LWV */ + { + // 31 25 20 15 10 6 0 + // -------------------------------------------------- + // | 110010 | BBBBB | TTTTT | 01010 | IIII | Offset | + // -------------------------------------------------- + // + // Loads the full 128-bit vector starting from vector byte index and wrapping to index 0 + // after byte index 15 + + ea = (base) ? rsp.r[base] + (offset * 16) : (offset * 16); + + // not sure what happens if 16-byte boundary is crossed... + //if ((ea & 0xf) > 0) fatalerror("RSP: LWV: 16-byte boundary crossing at %08X, recheck this!\n", rsp.ppc); + + end = (16 - index) + 16; + + for (i=(16 - index); i < end; i++) + { + VREG_B(dest, i & 0xf) = READ8(ea); + ea += 4; + } + break; + } + case 0x0b: /* LTV */ + { + // 31 25 20 15 10 6 0 + // -------------------------------------------------- + // | 110010 | BBBBB | TTTTT | 01011 | IIII | Offset | + // -------------------------------------------------- + // + // Loads one element to maximum of 8 vectors, while incrementing element index + + // FIXME: has a small problem with odd indices + + int element; + int vs = dest; + int ve = dest + 8; + if (ve > 32) + ve = 32; + + element = 7 - (index >> 1); + + //if (index & 1) fatalerror("RSP: LTV: index = %d\n", index); + + ea = (base) ? rsp.r[base] + (offset * 16) : (offset * 16); + + ea = ((ea + 8) & ~0xf) + (index & 1); + for (i=vs; i < ve; i++) + { + element = ((8 - (index >> 1) + (i-vs)) << 1); + VREG_B(i, (element & 0xf)) = READ8(ea); + VREG_B(i, ((element+1) & 0xf)) = READ8(ea+1); + + ea += 2; + } + break; + } + + default: + { + unimplemented_opcode(op); + break; + } + } +} + +void handle_swc2(UINT32 op) +{ + int i, end; + int eaoffset; + UINT32 ea; + int dest = (op >> 16) & 0x1f; + int base = (op >> 21) & 0x1f; + int index = (op >> 7) & 0xf; + int offset = (op & 0x7f); + if (offset & 0x40) + offset |= 0xffffffc0; + + switch ((op >> 11) & 0x1f) + { + case 0x00: /* SBV */ + { + // 31 25 20 15 10 6 0 + // -------------------------------------------------- + // | 111010 | BBBBB | TTTTT | 00000 | IIII | Offset | + // -------------------------------------------------- + // + // Stores 1 byte from vector byte index + + ea = (base) ? rsp.r[base] + offset : offset; + WRITE8(ea, VREG_B(dest, index)); + break; + } + case 0x01: /* SSV */ + { + // 31 25 20 15 10 6 0 + // -------------------------------------------------- + // | 111010 | BBBBB | TTTTT | 00001 | IIII | Offset | + // -------------------------------------------------- + // + // Stores 2 bytes starting from vector byte index + + ea = (base) ? rsp.r[base] + (offset * 2) : (offset * 2); + + end = index + 2; + + for (i=index; i < end; i++) + { + WRITE8(ea, VREG_B(dest, i)); + ea++; + } + break; + } + case 0x02: /* SLV */ + { + // 31 25 20 15 10 6 0 + // -------------------------------------------------- + // | 111010 | BBBBB | TTTTT | 00010 | IIII | Offset | + // -------------------------------------------------- + // + // Stores 4 bytes starting from vector byte index + + ea = (base) ? rsp.r[base] + (offset * 4) : (offset * 4); + + end = index + 4; + + for (i=index; i < end; i++) + { + WRITE8(ea, VREG_B(dest, i)); + ea++; + } + break; + } + case 0x03: /* SDV */ + { + // 31 25 20 15 10 6 0 + // -------------------------------------------------- + // | 111010 | BBBBB | TTTTT | 00011 | IIII | Offset | + // -------------------------------------------------- + // + // Stores 8 bytes starting from vector byte index + + ea = (base) ? rsp.r[base] + (offset * 8) : (offset * 8); + + end = index + 8; + + for (i=index; i < end; i++) + { + WRITE8(ea, VREG_B(dest, i)); + ea++; + } + break; + } + case 0x04: /* SQV */ + { + // 31 25 20 15 10 6 0 + // -------------------------------------------------- + // | 111010 | BBBBB | TTTTT | 00100 | IIII | Offset | + // -------------------------------------------------- + // + // Stores up to 16 bytes starting from vector byte index until 16-byte boundary + + ea = (base) ? rsp.r[base] + (offset * 16) : (offset * 16); + + end = index + (16 - (ea & 0xf)); + // if (end != 16) + // printf("SQV %d\n", end-index); + //assert(end == 16); + + for (i=index; i < end; i++) + { + WRITE8(ea, VREG_B(dest, i & 0xf)); + ea++; + } + break; + } + case 0x05: /* SRV */ + { + // 31 25 20 15 10 6 0 + // -------------------------------------------------- + // | 111010 | BBBBB | TTTTT | 00101 | IIII | Offset | + // -------------------------------------------------- + // + // Stores up to 16 bytes starting from right side until 16-byte boundary + + int o; + ea = (base) ? rsp.r[base] + (offset * 16) : (offset * 16); + + end = index + (ea & 0xf); + o = (16 - (ea & 0xf)) & 0xf; + ea &= ~0xf; + // if (end != 16) + // printf("SRV %d\n", end-index); + //assert(end == 16); + + for (i=index; i < end; i++) + { + WRITE8(ea, VREG_B(dest, ((i + o) & 0xf))); + ea++; + } + break; + } + case 0x06: /* SPV */ + { + // 31 25 20 15 10 6 0 + // -------------------------------------------------- + // | 111010 | BBBBB | TTTTT | 00110 | IIII | Offset | + // -------------------------------------------------- + // + // Stores upper 8 bits of each element + + ea = (base) ? rsp.r[base] + (offset * 8) : (offset * 8); + end = index + 8; + + for (i=index; i < end; i++) + { + if ((i & 0xf) < 8) + { + WRITE8(ea, VREG_B(dest, ((i & 0xf) << 1))); + } + else + { + WRITE8(ea, VREG_S(dest, (i & 0x7)) >> 7); + } + ea++; + } + break; + } + case 0x07: /* SUV */ + { + // 31 25 20 15 10 6 0 + // -------------------------------------------------- + // | 111010 | BBBBB | TTTTT | 00111 | IIII | Offset | + // -------------------------------------------------- + // + // Stores bits 14-7 of each element + + ea = (base) ? rsp.r[base] + (offset * 8) : (offset * 8); + end = index + 8; + + for (i=index; i < end; i++) + { + if ((i & 0xf) < 8) + { + WRITE8(ea, VREG_S(dest, (i & 0x7)) >> 7); + } + else + { + WRITE8(ea, VREG_B(dest, ((i & 0x7) << 1))); + } + ea++; + } + break; + } + case 0x08: /* SHV */ + { + // 31 25 20 15 10 6 0 + // -------------------------------------------------- + // | 111010 | BBBBB | TTTTT | 01000 | IIII | Offset | + // -------------------------------------------------- + // + // Stores bits 14-7 of each element, with 2-byte stride + + ea = (base) ? rsp.r[base] + (offset * 16) : (offset * 16); + + for (i=0; i < 8; i++) + { + UINT8 d = ((VREG_B(dest, ((index + (i << 1) + 0) & 0xf))) << 1) | + ((VREG_B(dest, ((index + (i << 1) + 1) & 0xf))) >> 7); + + WRITE8(ea, d); + ea += 2; + } + break; + } + case 0x09: /* SFV */ + { + // 31 25 20 15 10 6 0 + // -------------------------------------------------- + // | 111010 | BBBBB | TTTTT | 01001 | IIII | Offset | + // -------------------------------------------------- + // + // Stores bits 14-7 of upper or lower quad, with 4-byte stride + + // FIXME: only works for index 0 and index 8 + + if (index & 0x7) + log(M64MSG_WARNING, "SFV: index = %d at %08X\n", index, rsp.ppc); + + ea = (base) ? rsp.r[base] + (offset * 16) : (offset * 16); + + eaoffset = ea & 0xf; + ea &= ~0xf; + + end = (index >> 1) + 4; + + for (i=index >> 1; i < end; i++) + { + WRITE8(ea + (eaoffset & 0xf), VREG_S(dest, i) >> 7); + eaoffset += 4; + } + break; + } + case 0x0a: /* SWV */ + { + // 31 25 20 15 10 6 0 + // -------------------------------------------------- + // | 111010 | BBBBB | TTTTT | 01010 | IIII | Offset | + // -------------------------------------------------- + // + // Stores the full 128-bit vector starting from vector byte index and wrapping to index 0 + // after byte index 15 + + ea = (base) ? rsp.r[base] + (offset * 16) : (offset * 16); + + eaoffset = ea & 0xf; + ea &= ~0xf; + + end = index + 16; + + for (i=index; i < end; i++) + { + WRITE8(ea + (eaoffset & 0xf), VREG_B(dest, i & 0xf)); + eaoffset++; + } + break; + } + case 0x0b: /* STV */ + { + // 31 25 20 15 10 6 0 + // -------------------------------------------------- + // | 111010 | BBBBB | TTTTT | 01011 | IIII | Offset | + // -------------------------------------------------- + // + // Stores one element from maximum of 8 vectors, while incrementing element index + + int element, eaoffset; + int vs = dest; + int ve = dest + 8; + if (ve > 32) + ve = 32; + + element = 8 - (index >> 1); + //if (index & 0x1) fatalerror("RSP: STV: index = %d at %08X\n", index, rsp.ppc); + + ea = (base) ? rsp.r[base] + (offset * 16) : (offset * 16); + + //if (ea & 0x1) fatalerror("RSP: STV: ea = %08X at %08X\n", ea, rsp.ppc); + + eaoffset = (ea & 0xf) + (element * 2); + ea &= ~0xf; + + for (i=vs; i < ve; i++) + { + WRITE16(ea + (eaoffset & 0xf), VREG_S(i, element & 0x7)); + eaoffset += 2; + element++; + } + break; + } + + default: + { + unimplemented_opcode(op); + break; + } + } +} + +#define U16MIN 0x0000 +#define U16MAX 0xffff + +#define S16MIN 0x8000 +#define S16MAX 0x7fff + +INLINE UINT16 SATURATE_ACCUM_U(int accum) +{ + if ((INT16)ACCUM_H(accum) < 0) + { + if ((UINT16)(ACCUM_H(accum)) != 0xffff) + { + return U16MIN; + } + else + { + if ((INT16)ACCUM_M(accum) >= 0) + { + return U16MIN; + } + else + { + return ACCUM_L(accum); + } + } + } + else + { + if ((UINT16)(ACCUM_H(accum)) != 0) + { + return U16MAX; + } + else + { + if ((INT16)ACCUM_M(accum) < 0) + { + return U16MAX; + } + else + { + return ACCUM_L(accum); + } + } + } + + return 0; +} + +INLINE UINT16 SATURATE_ACCUM_S(int accum) +{ + if ((INT16)ACCUM_H(accum) < 0) + { + if ((UINT16)(ACCUM_H(accum)) != 0xffff) + return S16MIN; + else + { + if ((INT16)ACCUM_M(accum) >= 0) + return S16MIN; + else + return ACCUM_M(accum); + } + } + else + { + if ((UINT16)(ACCUM_H(accum)) != 0) + return S16MAX; + else + { + if ((INT16)ACCUM_M(accum) < 0) + return S16MAX; + else + return ACCUM_M(accum); + } + } + + return 0; +} + +#define WRITEBACK_RESULT() \ + do { \ + VREG_S(VDREG, 0) = vres[0]; \ + VREG_S(VDREG, 1) = vres[1]; \ + VREG_S(VDREG, 2) = vres[2]; \ + VREG_S(VDREG, 3) = vres[3]; \ + VREG_S(VDREG, 4) = vres[4]; \ + VREG_S(VDREG, 5) = vres[5]; \ + VREG_S(VDREG, 6) = vres[6]; \ + VREG_S(VDREG, 7) = vres[7]; \ + } while(0) + + +void handle_vector_ops(UINT32 op) +{ + int i; + INT16 vres[8]; + + // Opcode legend: + // E = VS2 element type + // S = VS1, Source vector 1 + // T = VS2, Source vector 2 + // D = Destination vector + + switch (op & 0x3f) + { + case 0x00: /* VMULF */ + { + // 31 25 24 20 15 10 5 0 + // ------------------------------------------------------ + // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000000 | + // ------------------------------------------------------ + // + // Multiplies signed integer by signed integer * 2 + + for (i=0; i < 8; i++) + { + int del = VEC_EL_1(EL, i); + int sel = VEC_EL_2(EL, del); + INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, del); + INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, sel); + if (s1 == -32768 && s2 == -32768) + { + // overflow + ACCUM_H(del) = 0; + ACCUM_M(del) = -32768; + ACCUM_L(del) = -32768; + vres[del] = 0x7fff; + } + else + { + INT64 r = s1 * s2 * 2; + r += 0x8000; // rounding ? + ACCUM_H(del) = (r < 0) ? 0xffff : 0; // sign-extend to 48-bit + ACCUM_M(del) = (INT16)(r >> 16); + ACCUM_L(del) = (UINT16)(r); + vres[del] = ACCUM_M(del); + } + } + WRITEBACK_RESULT(); + + break; + } + + case 0x01: /* VMULU */ + { + // 31 25 24 20 15 10 5 0 + // ------------------------------------------------------ + // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000001 | + // ------------------------------------------------------ + // + + for (i=0; i < 8; i++) + { + int del = VEC_EL_1(EL, i); + int sel = VEC_EL_2(EL, del); + INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, del); + INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, sel); + INT64 r = s1 * s2 * 2; + r += 0x8000; // rounding ? + + ACCUM_H(del) = (UINT16)(r >> 32); + ACCUM_M(del) = (UINT16)(r >> 16); + ACCUM_L(del) = (UINT16)(r); + + if (r < 0) + { + vres[del] = 0; + } + else if (((INT16)(ACCUM_H(del)) ^ (INT16)(ACCUM_M(del))) < 0) + { + vres[del] = -1; + } + else + { + vres[del] = ACCUM_M(del); + } + } + WRITEBACK_RESULT(); + break; + } + + case 0x04: /* VMUDL */ + { + // 31 25 24 20 15 10 5 0 + // ------------------------------------------------------ + // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000100 | + // ------------------------------------------------------ + // + // Multiplies unsigned fraction by unsigned fraction + // Stores the higher 16 bits of the 32-bit result to accumulator + // The low slice of accumulator is stored into destination element + + for (i=0; i < 8; i++) + { + int del = VEC_EL_1(EL, i); + int sel = VEC_EL_2(EL, del); + UINT32 s1 = (UINT32)(UINT16)VREG_S(VS1REG, del); + UINT32 s2 = (UINT32)(UINT16)VREG_S(VS2REG, sel); + UINT32 r = s1 * s2; + + ACCUM_H(del) = 0; + ACCUM_M(del) = 0; + ACCUM_L(del) = (UINT16)(r >> 16); + + vres[del] = ACCUM_L(del); + } + WRITEBACK_RESULT(); + break; + } + + case 0x05: /* VMUDM */ + { + // 31 25 24 20 15 10 5 0 + // ------------------------------------------------------ + // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000101 | + // ------------------------------------------------------ + // + // Multiplies signed integer by unsigned fraction + // The result is stored into accumulator + // The middle slice of accumulator is stored into destination element + + for (i=0; i < 8; i++) + { + int del = VEC_EL_1(EL, i); + int sel = VEC_EL_2(EL, del); + INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, del); + INT32 s2 = (UINT16)VREG_S(VS2REG, sel); // not sign-extended + INT32 r = s1 * s2; + + ACCUM_H(del) = (r < 0) ? 0xffff : 0; // sign-extend to 48-bit + ACCUM_M(del) = (INT16)(r >> 16); + ACCUM_L(del) = (UINT16)(r); + + vres[del] = ACCUM_M(del); + } + WRITEBACK_RESULT(); + break; + + } + + case 0x06: /* VMUDN */ + { + // 31 25 24 20 15 10 5 0 + // ------------------------------------------------------ + // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000110 | + // ------------------------------------------------------ + // + // Multiplies unsigned fraction by signed integer + // The result is stored into accumulator + // The low slice of accumulator is stored into destination element + + for (i=0; i < 8; i++) + { + int del = VEC_EL_1(EL, i); + int sel = VEC_EL_2(EL, del); + INT32 s1 = (UINT16)VREG_S(VS1REG, del); // not sign-extended + INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, sel); + INT32 r = s1 * s2; + + ACCUM_H(del) = (r < 0) ? 0xffff : 0; // sign-extend to 48-bit + ACCUM_M(del) = (INT16)(r >> 16); + ACCUM_L(del) = (UINT16)(r); + + vres[del] = ACCUM_L(del); + } + WRITEBACK_RESULT(); + break; + } + + case 0x07: /* VMUDH */ + { + // 31 25 24 20 15 10 5 0 + // ------------------------------------------------------ + // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000111 | + // ------------------------------------------------------ + // + // Multiplies signed integer by signed integer + // The result is stored into highest 32 bits of accumulator, the low slice is zero + // The highest 32 bits of accumulator is saturated into destination element + + for (i=0; i < 8; i++) + { + int del = VEC_EL_1(EL, i); + int sel = VEC_EL_2(EL, del); + INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, del); + INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, sel); + INT32 r = s1 * s2; + + ACCUM_H(del) = (INT16)(r >> 16); + ACCUM_M(del) = (UINT16)(r); + ACCUM_L(del) = 0; + + if (r < -32768) r = -32768; + if (r > 32767) r = 32767; + vres[del] = (INT16)(r); + } + WRITEBACK_RESULT(); + break; + } + + case 0x08: /* VMACF */ + { + // 31 25 24 20 15 10 5 0 + // ------------------------------------------------------ + // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001000 | + // ------------------------------------------------------ + // + // Multiplies signed integer by signed integer * 2 + // The result is added to accumulator + + for (i=0; i < 8; i++) + { + UINT16 res; + int del = VEC_EL_1(EL, i); + int sel = VEC_EL_2(EL, del); + INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, del); + INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, sel); + INT32 r = s1 * s2; + + ACCUM(del) += (INT64)(r) << 17; + res = SATURATE_ACCUM_S(del); + + vres[del] = res; + } + WRITEBACK_RESULT(); + break; + } + + case 0x09: /* VMACU */ + { + // 31 25 24 20 15 10 5 0 + // ------------------------------------------------------ + // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001001 | + // ------------------------------------------------------ + // + + for (i=0; i < 8; i++) + { + UINT16 res; + int del = VEC_EL_1(EL, i); + int sel = VEC_EL_2(EL, del); + INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, del); + INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, sel); + INT32 r1 = s1 * s2; + UINT32 r2 = (UINT16)ACCUM_L(del) + ((UINT16)(r1) * 2); + UINT32 r3 = (UINT16)ACCUM_M(del) + (UINT16)((r1 >> 16) * 2) + (UINT16)(r2 >> 16); + + ACCUM_L(del) = (UINT16)(r2); + ACCUM_M(del) = (UINT16)(r3); + ACCUM_H(del) += (UINT16)(r3 >> 16) + (UINT16)(r1 >> 31); + + //res = SATURATE_ACCUM(del, 1, 0x0000, 0xffff); + if ((INT16)ACCUM_H(del) < 0) + { + res = 0; + } + else + { + if (ACCUM_H(del) != 0) + { + res = 0xffff; + } + else + { + if ((INT16)ACCUM_M(del) < 0) + { + res = 0xffff; + } + else + { + res = ACCUM_M(del); + } + } + } + + vres[del] = res; + } + WRITEBACK_RESULT(); + break; + } + + case 0x0c: /* VMADL */ + { + // 31 25 24 20 15 10 5 0 + // ------------------------------------------------------ + // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001100 | + // ------------------------------------------------------ + // + // Multiplies unsigned fraction by unsigned fraction + // Adds the higher 16 bits of the 32-bit result to accumulator + // The low slice of accumulator is stored into destination element + + for (i=0; i < 8; i++) + { + UINT16 res; + int del = VEC_EL_1(EL, i); + int sel = VEC_EL_2(EL, del); + UINT32 s1 = (UINT32)(UINT16)VREG_S(VS1REG, del); + UINT32 s2 = (UINT32)(UINT16)VREG_S(VS2REG, sel); + UINT32 r1 = s1 * s2; + UINT32 r2 = (UINT16)ACCUM_L(del) + (r1 >> 16); + UINT32 r3 = (UINT16)ACCUM_M(del) + (r2 >> 16); + + ACCUM_L(del) = (UINT16)(r2); + ACCUM_M(del) = (UINT16)(r3); + ACCUM_H(del) += (INT16)(r3 >> 16); + + res = SATURATE_ACCUM_U(del); + + vres[del] = res; + } + WRITEBACK_RESULT(); + break; + } + + case 0x0d: /* VMADM */ + { + // 31 25 24 20 15 10 5 0 + // ------------------------------------------------------ + // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001101 | + // ------------------------------------------------------ + // + // Multiplies signed integer by unsigned fraction + // The result is added into accumulator + // The middle slice of accumulator is stored into destination element + + for (i=0; i < 8; i++) + { + UINT16 res; + int del = VEC_EL_1(EL, i); + int sel = VEC_EL_2(EL, del); + UINT32 s1 = (INT32)(INT16)VREG_S(VS1REG, del); + UINT32 s2 = (UINT16)VREG_S(VS2REG, sel); // not sign-extended + UINT32 r1 = s1 * s2; + UINT32 r2 = (UINT16)ACCUM_L(del) + (UINT16)(r1); + UINT32 r3 = (UINT16)ACCUM_M(del) + (r1 >> 16) + (r2 >> 16); + + ACCUM_L(del) = (UINT16)(r2); + ACCUM_M(del) = (UINT16)(r3); + ACCUM_H(del) += (UINT16)(r3 >> 16); + if ((INT32)(r1) < 0) + ACCUM_H(del) -= 1; + + res = SATURATE_ACCUM_S(del); + + vres[del] = res; + } + WRITEBACK_RESULT(); + break; + } + + case 0x0e: /* VMADN */ + { + // 31 25 24 20 15 10 5 0 + // ------------------------------------------------------ + // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001110 | + // ------------------------------------------------------ + // + // Multiplies unsigned fraction by signed integer + // The result is added into accumulator + // The low slice of accumulator is stored into destination element + +#if 1 + for (i=0; i < 8; i++) + { + int del = VEC_EL_1(EL, i); + int sel = VEC_EL_2(EL, del); + INT32 s1 = (UINT16)VREG_S(VS1REG, del); // not sign-extended + INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, sel); + ACCUM(del) += (INT64)(s1*s2)<<16; + } + + for (i=0; i < 8; i++) + { + UINT16 res; + res = SATURATE_ACCUM_U(i); + //res = ACCUM_L(i); + + VREG_S(VDREG, i) = res; + } +#else + for (i=0; i < 8; i++) + { + UINT16 res; + int del = VEC_EL_1(EL, i); + int sel = VEC_EL_2(EL, del); + INT32 s1 = (UINT16)VREG_S(VS1REG, del); // not sign-extended + INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, sel); + UINT32 r1 = s1 * s2; + UINT32 r2 = (UINT16)ACCUM_L(del) + (UINT16)(r1); + UINT32 r3 = (UINT16)ACCUM_M(del) + (r1 >> 16) + (r2 >> 16); + + ACCUM_L(del) = (UINT16)(r2); + ACCUM_M(del) = (UINT16)(r3); + ACCUM_H(del) += (UINT16)(r3 >> 16); + if ((INT32)(r1) < 0) + ACCUM_H(del) -= 1; + + res = SATURATE_ACCUM_U(del); + + vres[del] = res; + } + WRITEBACK_RESULT(); +#endif + break; + } + + case 0x0f: /* VMADH */ + { + // 31 25 24 20 15 10 5 0 + // ------------------------------------------------------ + // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001111 | + // ------------------------------------------------------ + // + // Multiplies signed integer by signed integer + // The result is added into highest 32 bits of accumulator, the low slice is zero + // The highest 32 bits of accumulator is saturated into destination element + +#if 1 + for (i=0; i < 8; i++) + { + int del = VEC_EL_1(EL, i); + int sel = VEC_EL_2(EL, del); + INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, del); + INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, sel); + + rsp.accum[del].l[1] += s1*s2; + + } + for (i=0; i < 8; i++) + { + UINT16 res; + res = SATURATE_ACCUM_S(i); + //res = ACCUM_M(i); + + VREG_S(VDREG, i) = res; + } +#else + for (i=0; i < 8; i++) + { + UINT16 res; + int del = VEC_EL_1(EL, i); + int sel = VEC_EL_2(EL, del); + INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, del); + INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, sel); + INT64 r = s1 * s2; + + ACCUM(del) += (INT64)(r) << 32; + + res = SATURATE_ACCUM_S(del); + + vres[del] = res; + } + WRITEBACK_RESULT(); +#endif + break; + } + + case 0x10: /* VADD */ + { + // 31 25 24 20 15 10 5 0 + // ------------------------------------------------------ + // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010000 | + // ------------------------------------------------------ + // + // Adds two vector registers and carry flag, the result is saturated to 32767 + + // TODO: check VS2REG == VDREG + + for (i=0; i < 8; i++) + { + int del = VEC_EL_1(EL, i); + int sel = VEC_EL_2(EL, del); + INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, del); + INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, sel); + INT32 r = s1 + s2 + CARRY_FLAG(del); + + ACCUM_L(del) = (INT16)(r); + + if (r > 32767) r = 32767; + if (r < -32768) r = -32768; + vres[del] = (INT16)(r); + } + CLEAR_ZERO_FLAGS(); + CLEAR_CARRY_FLAGS(); + WRITEBACK_RESULT(); + break; + } + + case 0x11: /* VSUB */ + { + // 31 25 24 20 15 10 5 0 + // ------------------------------------------------------ + // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010001 | + // ------------------------------------------------------ + // + // Subtracts two vector registers and carry flag, the result is saturated to -32768 + + // TODO: check VS2REG == VDREG + + for (i=0; i < 8; i++) + { + int del = VEC_EL_1(EL, i); + int sel = VEC_EL_2(EL, del); + INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, del); + INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, sel); + INT32 r = s1 - s2 - CARRY_FLAG(del); + + ACCUM_L(del) = (INT16)(r); + + if (r > 32767) r = 32767; + if (r < -32768) r = -32768; + + vres[del] = (INT16)(r); + } + CLEAR_ZERO_FLAGS(); + CLEAR_CARRY_FLAGS(); + WRITEBACK_RESULT(); + break; + } + + case 0x13: /* VABS */ + { + // 31 25 24 20 15 10 5 0 + // ------------------------------------------------------ + // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010011 | + // ------------------------------------------------------ + // + // Changes the sign of source register 2 if source register 1 is negative and stores + // the result to destination register + + for (i=0; i < 8; i++) + { + int del = VEC_EL_1(EL, i); + int sel = VEC_EL_2(EL, del); + INT16 s1 = (INT16)VREG_S(VS1REG, del); + INT16 s2 = (INT16)VREG_S(VS2REG, sel); + + if (s1 < 0) + { + if (s2 == -32768) + { + vres[del] = 32767; + } + else + { + vres[del] = -s2; + } + } + else if (s1 > 0) + { + vres[del] = s2; + } + else + { + vres[del] = 0; + } + + ACCUM_L(del) = vres[del]; + } + WRITEBACK_RESULT(); + break; + } + + case 0x14: /* VADDC */ + { + // 31 25 24 20 15 10 5 0 + // ------------------------------------------------------ + // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010100 | + // ------------------------------------------------------ + // + // Adds two vector registers, the carry out is stored into carry register + + // TODO: check VS2REG = VDREG + + CLEAR_ZERO_FLAGS(); + CLEAR_CARRY_FLAGS(); + + for (i=0; i < 8; i++) + { + int del = VEC_EL_1(EL, i); + int sel = VEC_EL_2(EL, del); + INT32 s1 = (UINT32)(UINT16)VREG_S(VS1REG, del); + INT32 s2 = (UINT32)(UINT16)VREG_S(VS2REG, sel); + INT32 r = s1 + s2; + + vres[del] = (INT16)(r); + ACCUM_L(del) = (INT16)(r); + + if (r & 0xffff0000) + { + SET_CARRY_FLAG(del); + } + } + WRITEBACK_RESULT(); + break; + } + + case 0x15: /* VSUBC */ + { + // 31 25 24 20 15 10 5 0 + // ------------------------------------------------------ + // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010101 | + // ------------------------------------------------------ + // + // Subtracts two vector registers, the carry out is stored into carry register + + // TODO: check VS2REG = VDREG + + CLEAR_ZERO_FLAGS(); + CLEAR_CARRY_FLAGS(); + + for (i=0; i < 8; i++) + { + int del = VEC_EL_1(EL, i); + int sel = VEC_EL_2(EL, del); + INT32 s1 = (UINT32)(UINT16)VREG_S(VS1REG, del); + INT32 s2 = (UINT32)(UINT16)VREG_S(VS2REG, sel); + INT32 r = s1 - s2; + + vres[del] = (INT16)(r); + ACCUM_L(del) = (UINT16)(r); + + if ((UINT16)(r) != 0) + { + SET_ZERO_FLAG(del); + } + if (r & 0xffff0000) + { + SET_CARRY_FLAG(del); + } + } + WRITEBACK_RESULT(); + break; + } + + case 0x1d: /* VSAW */ + { + // 31 25 24 20 15 10 5 0 + // ------------------------------------------------------ + // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 011101 | + // ------------------------------------------------------ + // + // Stores high, middle or low slice of accumulator to destination vector + + switch (EL) + { + case 0x08: // VSAWH + { + for (i=0; i < 8; i++) + { + VREG_S(VDREG, i) = ACCUM_H(i); + } + break; + } + case 0x09: // VSAWM + { + for (i=0; i < 8; i++) + { + VREG_S(VDREG, i) = ACCUM_M(i); + } + break; + } + case 0x0a: // VSAWL + { + for (i=0; i < 8; i++) + { + VREG_S(VDREG, i) = ACCUM_L(i); + } + break; + } + default: log(M64MSG_ERROR, "RSP: VSAW: el = %d\n", EL); + } + break; + } + + case 0x20: /* VLT */ + { + // 31 25 24 20 15 10 5 0 + // ------------------------------------------------------ + // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100000 | + // ------------------------------------------------------ + // + // Sets compare flags if elements in VS1 are less than VS2 + // Moves the element in VS2 to destination vector + + rsp.flag[1] = 0; + + for (i=0; i < 8; i++) + { + int del = VEC_EL_1(EL, i); + int sel = VEC_EL_2(EL, del); + + if (VREG_S(VS1REG, del) < VREG_S(VS2REG, sel)) + { + vres[del] = VREG_S(VS1REG, del); + SET_COMPARE_FLAG(del); + } + else if (VREG_S(VS1REG, del) == VREG_S(VS2REG, sel)) + { + vres[del] = VREG_S(VS1REG, del); + if (ZERO_FLAG(del) != 0 && CARRY_FLAG(del) != 0) + { + SET_COMPARE_FLAG(del); + } + } + else + { + vres[del] = VREG_S(VS2REG, sel); + } + + ACCUM_L(del) = vres[del]; + } + + CLEAR_ZERO_FLAGS(); + CLEAR_CARRY_FLAGS(); + WRITEBACK_RESULT(); + break; + } + + case 0x21: /* VEQ */ + { + // 31 25 24 20 15 10 5 0 + // ------------------------------------------------------ + // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100001 | + // ------------------------------------------------------ + // + // Sets compare flags if elements in VS1 are equal with VS2 + // Moves the element in VS2 to destination vector + + rsp.flag[1] = 0; + + for (i=0; i < 8; i++) + { + int del = VEC_EL_1(EL, i); + int sel = VEC_EL_2(EL, del); + + vres[del] = VREG_S(VS2REG, sel); + ACCUM_L(del) = vres[del]; + + if (VREG_S(VS1REG, del) == VREG_S(VS2REG, sel)) + { + if (ZERO_FLAG(del) == 0) + { + SET_COMPARE_FLAG(del); + } + } + } + + CLEAR_ZERO_FLAGS(); + CLEAR_CARRY_FLAGS(); + WRITEBACK_RESULT(); + break; + } + + case 0x22: /* VNE */ + { + // 31 25 24 20 15 10 5 0 + // ------------------------------------------------------ + // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100010 | + // ------------------------------------------------------ + // + // Sets compare flags if elements in VS1 are not equal with VS2 + // Moves the element in VS2 to destination vector + + rsp.flag[1] = 0; + + for (i=0; i < 8; i++) + { + int del = VEC_EL_1(EL, i); + int sel = VEC_EL_2(EL, del); + + vres[del] = VREG_S(VS1REG, del); + ACCUM_L(del) = vres[del]; + + if (VREG_S(VS1REG, del) != VREG_S(VS2REG, sel)) + { + SET_COMPARE_FLAG(del); + } + else + { + if (ZERO_FLAG(del) != 0) + { + SET_COMPARE_FLAG(del); + } + } + } + + CLEAR_ZERO_FLAGS(); + CLEAR_CARRY_FLAGS(); + WRITEBACK_RESULT(); + break; + } + + case 0x23: /* VGE */ + { + // 31 25 24 20 15 10 5 0 + // ------------------------------------------------------ + // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100011 | + // ------------------------------------------------------ + // + // Sets compare flags if elements in VS1 are greater or equal with VS2 + // Moves the element in VS2 to destination vector + + rsp.flag[1] = 0; + + for (i=0; i < 8; i++) + { + int del = VEC_EL_1(EL, i); + int sel = VEC_EL_2(EL, del); + + if (VREG_S(VS1REG, del) == VREG_S(VS2REG, sel)) + { + if (ZERO_FLAG(del) == 0 || CARRY_FLAG(del) == 0) + { + SET_COMPARE_FLAG(del); + } + } + else if (VREG_S(VS1REG, del) > VREG_S(VS2REG, sel)) + { + SET_COMPARE_FLAG(del); + } + + if (COMPARE_FLAG(del) != 0) + { + vres[del] = VREG_S(VS1REG, del); + } + else + { + vres[del] = VREG_S(VS2REG, sel); + } + + ACCUM_L(del) = vres[del]; + } + + CLEAR_ZERO_FLAGS(); + CLEAR_CARRY_FLAGS(); + WRITEBACK_RESULT(); + break; + } + + case 0x24: /* VCL */ + { + // 31 25 24 20 15 10 5 0 + // ------------------------------------------------------ + // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100100 | + // ------------------------------------------------------ + // + // Vector clip low + + for (i=0; i < 8; i++) + { + int del = VEC_EL_1(EL, i); + int sel = VEC_EL_2(EL, del); + INT16 s1 = VREG_S(VS1REG, del); + INT16 s2 = VREG_S(VS2REG, sel); + + if (CARRY_FLAG(del) != 0) + { + if (ZERO_FLAG(del) != 0) + { + if (COMPARE_FLAG(del) != 0) + { + ACCUM_L(del) = -(UINT16)s2; + } + else + { + ACCUM_L(del) = s1; + } + } + else + { + if (rsp.flag[2] & (1 << (del))) + { + if (((UINT32)(INT16)(s1) + (UINT32)(INT16)(s2)) > 0x10000) + { + ACCUM_L(del) = s1; + CLEAR_COMPARE_FLAG(del); + } + else + { + ACCUM_L(del) = -((UINT16)s2); + SET_COMPARE_FLAG(del); + } + } + else + { + if (((UINT32)(INT16)(s1) + (UINT32)(INT16)(s2)) != 0) + { + ACCUM_L(del) = s1; + CLEAR_COMPARE_FLAG(del); + } + else + { + ACCUM_L(del) = -((UINT16)s2); + SET_COMPARE_FLAG(del); + } + } + } + } + else + { + if (ZERO_FLAG(del) != 0) + { + if (rsp.flag[1] & (1 << (8+del))) + { + ACCUM_L(del) = s2; + } + else + { + ACCUM_L(del) = s1; + } + } + else + { + if (((INT32)(UINT16)s1 - (INT32)(UINT16)s2) >= 0) + { + ACCUM_L(del) = s2; + rsp.flag[1] |= (1 << (8+del)); + } + else + { + ACCUM_L(del) = s1; + rsp.flag[1] &= ~(1 << (8+del)); + } + } + } + + vres[del] = ACCUM_L(del); + } + CLEAR_ZERO_FLAGS(); + CLEAR_CARRY_FLAGS(); + rsp.flag[2] = 0; + WRITEBACK_RESULT(); + break; + } + + case 0x25: /* VCH */ + { + // 31 25 24 20 15 10 5 0 + // ------------------------------------------------------ + // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100101 | + // ------------------------------------------------------ + // + // Vector clip high + + CLEAR_ZERO_FLAGS(); + CLEAR_CARRY_FLAGS(); + rsp.flag[1] = 0; + rsp.flag[2] = 0; + + for (i=0; i < 8; i++) + { + int del = VEC_EL_1(EL, i); + int sel = VEC_EL_2(EL, del); + INT16 s1 = VREG_S(VS1REG, del); + INT16 s2 = VREG_S(VS2REG, sel); + + if ((s1 ^ s2) < 0) + { + SET_CARRY_FLAG(del); + if (s2 < 0) + { + rsp.flag[1] |= (1 << (8+del)); + } + + if (s1 + s2 <= 0) + { + if (s1 + s2 == -1) + { + rsp.flag[2] |= (1 << (del)); + } + SET_COMPARE_FLAG(del); + vres[del] = -((UINT16)s2); + } + else + { + vres[del] = s1; + } + + if (s1 + s2 != 0) + { + if (s1 != ~s2) + { + SET_ZERO_FLAG(del); + } + } + } + else + { + if (s2 < 0) + { + SET_COMPARE_FLAG(del); + } + if (s1 - s2 >= 0) + { + rsp.flag[1] |= (1 << (8+del)); + vres[del] = s2; + } + else + { + vres[del] = s1; + } + + if ((s1 - s2) != 0) + { + if (s1 != ~s2) + { + SET_ZERO_FLAG(del); + } + } + } + + ACCUM_L(del) = vres[del]; + } + WRITEBACK_RESULT(); + break; + } + + case 0x26: /* VCR */ + { + // 31 25 24 20 15 10 5 0 + // ------------------------------------------------------ + // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100110 | + // ------------------------------------------------------ + // + // Vector clip reverse + + rsp.flag[0] = 0; + rsp.flag[1] = 0; + rsp.flag[2] = 0; + + for (i=0; i < 8; i++) + { + int del = VEC_EL_1(EL, i); + int sel = VEC_EL_2(EL, del); + INT16 s1 = VREG_S(VS1REG, del); + INT16 s2 = VREG_S(VS2REG, sel); + + if ((INT16)(s1 ^ s2) < 0) + { + if (s2 < 0) + { + rsp.flag[1] |= (1 << (8+del)); + } + if ((s1 + s2) <= 0) + { + ACCUM_L(del) = ~((UINT16)s2); + SET_COMPARE_FLAG(del); + } + else + { + ACCUM_L(del) = s1; + } + } + else + { + if (s2 < 0) + { + SET_COMPARE_FLAG(del); + } + if ((s1 - s2) >= 0) + { + ACCUM_L(del) = s2; + rsp.flag[1] |= (1 << (8+del)); + } + else + { + ACCUM_L(del) = s1; + } + } + + vres[del] = ACCUM_L(del); + } + WRITEBACK_RESULT(); + break; + } + + case 0x27: /* VMRG */ + { + // 31 25 24 20 15 10 5 0 + // ------------------------------------------------------ + // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100111 | + // ------------------------------------------------------ + // + // Merges two vectors according to compare flags + + for (i=0; i < 8; i++) + { + int del = VEC_EL_1(EL, i); + int sel = VEC_EL_2(EL, del); + if (COMPARE_FLAG(del) != 0) + { + vres[del] = VREG_S(VS1REG, del); + } + else + { + vres[del] = VREG_S(VS2REG, VEC_EL_2(EL, sel)); + } + + ACCUM_L(del) = vres[del]; + } + WRITEBACK_RESULT(); + break; + } + case 0x28: /* VAND */ + { + // 31 25 24 20 15 10 5 0 + // ------------------------------------------------------ + // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101000 | + // ------------------------------------------------------ + // + // Bitwise AND of two vector registers + + for (i=0; i < 8; i++) + { + int del = VEC_EL_1(EL, i); + int sel = VEC_EL_2(EL, del); + vres[del] = VREG_S(VS1REG, del) & VREG_S(VS2REG, sel); + ACCUM_L(del) = vres[del]; + } + WRITEBACK_RESULT(); + break; + } + case 0x29: /* VNAND */ + { + // 31 25 24 20 15 10 5 0 + // ------------------------------------------------------ + // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101001 | + // ------------------------------------------------------ + // + // Bitwise NOT AND of two vector registers + + for (i=0; i < 8; i++) + { + int del = VEC_EL_1(EL, i); + int sel = VEC_EL_2(EL, del); + vres[del] = ~((VREG_S(VS1REG, del) & VREG_S(VS2REG, sel))); + ACCUM_L(del) = vres[del]; + } + WRITEBACK_RESULT(); + break; + } + case 0x2a: /* VOR */ + { + // 31 25 24 20 15 10 5 0 + // ------------------------------------------------------ + // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101010 | + // ------------------------------------------------------ + // + // Bitwise OR of two vector registers + + for (i=0; i < 8; i++) + { + int del = VEC_EL_1(EL, i); + int sel = VEC_EL_2(EL, del); + vres[del] = VREG_S(VS1REG, del) | VREG_S(VS2REG, sel); + ACCUM_L(del) = vres[del]; + } + WRITEBACK_RESULT(); + break; + } + case 0x2b: /* VNOR */ + { + // 31 25 24 20 15 10 5 0 + // ------------------------------------------------------ + // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101011 | + // ------------------------------------------------------ + // + // Bitwise NOT OR of two vector registers + + for (i=0; i < 8; i++) + { + int del = VEC_EL_1(EL, i); + int sel = VEC_EL_2(EL, del); + vres[del] = ~((VREG_S(VS1REG, del) | VREG_S(VS2REG, sel))); + ACCUM_L(del) = vres[del]; + } + WRITEBACK_RESULT(); + break; + } + case 0x2c: /* VXOR */ + { + // 31 25 24 20 15 10 5 0 + // ------------------------------------------------------ + // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101100 | + // ------------------------------------------------------ + // + // Bitwise XOR of two vector registers + + for (i=0; i < 8; i++) + { + int del = VEC_EL_1(EL, i); + int sel = VEC_EL_2(EL, del); + vres[del] = VREG_S(VS1REG, del) ^ VREG_S(VS2REG, sel); + ACCUM_L(del) = vres[del]; + } + WRITEBACK_RESULT(); + break; + } + case 0x2d: /* VNXOR */ + { + // 31 25 24 20 15 10 5 0 + // ------------------------------------------------------ + // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101101 | + // ------------------------------------------------------ + // + // Bitwise NOT XOR of two vector registers + + for (i=0; i < 8; i++) + { + int del = VEC_EL_1(EL, i); + int sel = VEC_EL_2(EL, del); + vres[del] = ~((VREG_S(VS1REG, del) ^ VREG_S(VS2REG, sel))); + ACCUM_L(del) = vres[del]; + } + WRITEBACK_RESULT(); + break; + } + + case 0x30: /* VRCP */ + { + // 31 25 24 20 15 10 5 0 + // ------------------------------------------------------ + // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110000 | + // ------------------------------------------------------ + // + // Calculates reciprocal + int del = (VS1REG & 7); + int sel = EL&7; //VEC_EL_2(EL, del); + INT32 rec; + + rec = (INT16)(VREG_S(VS2REG, sel)); + + if (rec == 0) + { + // divide by zero -> overflow + rec = 0x7fffffff; + } + else + { + int negative = 0; + if (rec < 0) + { + rec = ~rec+1; + negative = 1; + } + for (i = 15; i > 0; i--) + { + if (rec & (1 << i)) + { + rec &= ((0xffc0) >> (15 - i)); + i = 0; + } + } + rec = (INT32)(0x7fffffff / (double)rec); + for (i = 31; i > 0; i--) + { + if (rec & (1 << i)) + { + rec &= ((0xffff8000) >> (31 - i)); + i = 0; + } + } + if (negative) + { + rec = ~rec; + } + } + + for (i=0; i < 8; i++) + { + int element = VEC_EL_2(EL, i); + ACCUM_L(i) = VREG_S(VS2REG, element); + } + + rsp.reciprocal_res = rec; + + VREG_S(VDREG, del) = (UINT16)(rsp.reciprocal_res); // store low part + break; + } + + case 0x31: /* VRCPL */ + { + // 31 25 24 20 15 10 5 0 + // ------------------------------------------------------ + // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110001 | + // ------------------------------------------------------ + // + // Calculates reciprocal low part + + int del = (VS1REG & 7); + int sel = VEC_EL_2(EL, del); + INT32 rec; + + rec = ((UINT16)(VREG_S(VS2REG, sel)) | ((UINT32)(rsp.reciprocal_high) << 16)); + + if (rec == 0) + { + // divide by zero -> overflow + rec = 0x7fffffff; + } + else + { + int negative = 0; + if (rec < 0) + { + if (((UINT32)(rec & 0xffff0000) == 0xffff0000) && ((INT16)(rec & 0xffff) < 0)) + { + rec = ~rec+1; + } + else + { + rec = ~rec; + } + negative = 1; + } + for (i = 31; i > 0; i--) + { + if (rec & (1 << i)) + { + rec &= ((0xffc00000) >> (31 - i)); + i = 0; + } + } + rec = (0x7fffffff / rec); + for (i = 31; i > 0; i--) + { + if (rec & (1 << i)) + { + rec &= ((0xffff8000) >> (31 - i)); + i = 0; + } + } + if (negative) + { + rec = ~rec; + } + } + + for (i=0; i < 8; i++) + { + int element = VEC_EL_2(EL, i); + ACCUM_L(i) = VREG_S(VS2REG, element); + } + + rsp.reciprocal_res = rec; + + VREG_S(VDREG, del) = (UINT16)(rsp.reciprocal_res); // store low part + break; + } + + case 0x32: /* VRCPH */ + { + // 31 25 24 20 15 10 5 0 + // ------------------------------------------------------ + // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110010 | + // ------------------------------------------------------ + // + // Calculates reciprocal high part + + int del = (VS1REG & 7); + int sel = VEC_EL_2(EL, del); + + rsp.reciprocal_high = VREG_S(VS2REG, sel); + + for (i=0; i < 8; i++) + { + int element = VEC_EL_2(EL, i); + ACCUM_L(i) = VREG_S(VS2REG, element); // perhaps accumulator is used to store the intermediate values ? + } + + VREG_S(VDREG, del) = (INT16)(rsp.reciprocal_res >> 16); // store high part + break; + } + + case 0x33: /* VMOV */ + { + // 31 25 24 20 15 10 5 0 + // ------------------------------------------------------ + // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110011 | + // ------------------------------------------------------ + // + // Moves element from vector to destination vector + + int element = VS1REG & 7; + VREG_S(VDREG, element) = VREG_S(VS2REG, VEC_EL_2(EL, 7-element)); + break; + } + + case 0x35: /* VRSQL */ + { + // 31 25 24 20 15 10 5 0 + // ------------------------------------------------------ + // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110101 | + // ------------------------------------------------------ + // + // Calculates reciprocal square-root low part + + int del = (VS1REG & 7); + int sel = VEC_EL_2(EL, del); + UINT32 sqr; + + sqr = (UINT16)(VREG_S(VS2REG, sel)) | ((UINT32)(rsp.square_root_high) << 16); + + if (sqr == 0) + { + // square root on 0 -> overflow + sqr = 0x7fffffff; + } + else if (sqr == 0xffff8000) + { + // overflow ? + sqr = 0xffff8000; + } + else + { + int negative = 0; + if (sqr > 0x7fffffff) + { + if (((UINT32)(sqr & 0xffff0000) == 0xffff0000) && ((INT16)(sqr & 0xffff) < 0)) + { + sqr = ~sqr+1; + } + else + { + sqr = ~sqr; + } + negative = 1; + } + for (i = 31; i > 0; i--) + { + if (sqr & (1 << i)) + { + sqr &= (0xff800000 >> (31 - i)); + i = 0; + } + } + sqr = (INT32)(0x7fffffff / sqrt(sqr)); + for (i = 31; i > 0; i--) + { + if (sqr & (1 << i)) + { + sqr &= (0xffff8000 >> (31 - i)); + i = 0; + } + } + if (negative) + { + sqr = ~sqr; + } + } + + for (i=0; i < 8; i++) + { + int element = VEC_EL_2(EL, i); + ACCUM_L(i) = VREG_S(VS2REG, element); + } + + rsp.square_root_res = sqr; + + VREG_S(VDREG, del) = (UINT16)(rsp.square_root_res); // store low part + break; + } + + case 0x36: /* VRSQH */ + { + // 31 25 24 20 15 10 5 0 + // ------------------------------------------------------ + // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110110 | + // ------------------------------------------------------ + // + // Calculates reciprocal square-root high part + + int del = (VS1REG & 7); + int sel = VEC_EL_2(EL, del); + + rsp.square_root_high = VREG_S(VS2REG, sel); + + for (i=0; i < 8; i++) + { + int element = VEC_EL_2(EL, i); + ACCUM_L(i) = VREG_S(VS2REG, element); // perhaps accumulator is used to store the intermediate values ? + } + + VREG_S(VDREG, del) = (INT16)(rsp.square_root_res >> 16); // store high part + break; + } + + default: unimplemented_opcode(op); break; + } +} + +int rsp_execute(int cycles) +{ + UINT32 op; + + rsp_icount=1; //cycles; + + UINT32 ExecutedCycles=0; + UINT32 BreakMarker=0; + UINT32 WDCHackFlag1=0; + UINT32 WDCHackFlag2=0; + + sp_pc = /*0x4001000 | */(sp_pc & 0xfff); + if( rsp_sp_status & (SP_STATUS_HALT|SP_STATUS_BROKE)) + { + log(M64MSG_WARNING, "Quit due to SP halt/broke on start"); + rsp_icount = 0; + } + + + while (rsp_icount > 0) + { +#ifdef RSPTIMING + uint64_t lasttime; + lasttime = RDTSC(); +#endif + rsp.ppc = sp_pc; + + + op = ROPCODE(sp_pc); +#ifdef GENTRACE + char s[128]; + rsp_dasm_one(s, sp_pc, op); + GENTRACE("%2x %3x\t%s\n", ((UINT8*)rsp_dmem)[0x1934], sp_pc, s); +#endif + + if (rsp.nextpc != ~0U)///DELAY SLOT USAGE + { + sp_pc = /*0x4001000 | */(rsp.nextpc & 0xfff); //rsp.nextpc; + rsp.nextpc = ~0U; + } + else + { + sp_pc = /*0x4001000 | */((sp_pc+4)&0xfff); + } + + switch (op >> 26) + { + case 0x00: /* SPECIAL */ + { + switch (op & 0x3f) + { + case 0x00: /* SLL */ if (RDREG) RDVAL = (UINT32)RTVAL << SHIFT; break; + case 0x02: /* SRL */ if (RDREG) RDVAL = (UINT32)RTVAL >> SHIFT; break; + case 0x03: /* SRA */ if (RDREG) RDVAL = (INT32)RTVAL >> SHIFT; break; + case 0x04: /* SLLV */ if (RDREG) RDVAL = (UINT32)RTVAL << (RSVAL & 0x1f); break; + case 0x06: /* SRLV */ if (RDREG) RDVAL = (UINT32)RTVAL >> (RSVAL & 0x1f); break; + case 0x07: /* SRAV */ if (RDREG) RDVAL = (INT32)RTVAL >> (RSVAL & 0x1f); break; + case 0x08: /* JR */ JUMP_PC(RSVAL); break; + case 0x09: /* JALR */ JUMP_PC_L(RSVAL, RDREG); break; + case 0x0d: /* BREAK */ + { + *z64_rspinfo.SP_STATUS_REG |= (SP_STATUS_HALT | SP_STATUS_BROKE ); + if ((*z64_rspinfo.SP_STATUS_REG & SP_STATUS_INTR_BREAK) != 0 ) { + *z64_rspinfo.MI_INTR_REG |= 1; + z64_rspinfo.CheckInterrupts(); + } + //sp_set_status(0x3); + rsp_icount = 0; + + BreakMarker=1; + +#if LOG_INSTRUCTION_EXECUTION + fprintf(exec_output, "\n---------- break ----------\n\n"); +#endif + break; + } + case 0x20: /* ADD */ if (RDREG) RDVAL = (INT32)(RSVAL + RTVAL); break; + case 0x21: /* ADDU */ if (RDREG) RDVAL = (INT32)(RSVAL + RTVAL); break; + case 0x22: /* SUB */ if (RDREG) RDVAL = (INT32)(RSVAL - RTVAL); break; + case 0x23: /* SUBU */ if (RDREG) RDVAL = (INT32)(RSVAL - RTVAL); break; + case 0x24: /* AND */ if (RDREG) RDVAL = RSVAL & RTVAL; break; + case 0x25: /* OR */ if (RDREG) RDVAL = RSVAL | RTVAL; break; + case 0x26: /* XOR */ if (RDREG) RDVAL = RSVAL ^ RTVAL; break; + case 0x27: /* NOR */ if (RDREG) RDVAL = ~(RSVAL | RTVAL); break; + case 0x2a: /* SLT */ if (RDREG) RDVAL = (INT32)RSVAL < (INT32)RTVAL; break; + case 0x2b: /* SLTU */ if (RDREG) RDVAL = (UINT32)RSVAL < (UINT32)RTVAL; break; + default: unimplemented_opcode(op); break; + } + break; + } + + case 0x01: /* REGIMM */ + { + switch (RTREG) + { + case 0x00: /* BLTZ */ if ((INT32)(RSVAL) < 0) JUMP_REL(SIMM16); break; + case 0x01: /* BGEZ */ if ((INT32)(RSVAL) >= 0) JUMP_REL(SIMM16); break; + // VP according to the doc, link is performed even when condition fails, + // this sound pretty stupid but let's try it that way + case 0x11: /* BGEZAL */ LINK(31); if ((INT32)(RSVAL) >= 0) JUMP_REL(SIMM16); break; + //case 0x11: /* BGEZAL */ if ((INT32)(RSVAL) >= 0) JUMP_REL_L(SIMM16, 31); break; + default: unimplemented_opcode(op); break; + } + break; + } + + case 0x02: /* J */ JUMP_ABS(UIMM26); break; + case 0x03: /* JAL */ JUMP_ABS_L(UIMM26, 31); break; + case 0x04: /* BEQ */ if (RSVAL == RTVAL) JUMP_REL(SIMM16); break; + case 0x05: /* BNE */ if (RSVAL != RTVAL) JUMP_REL(SIMM16); break; + case 0x06: /* BLEZ */ if ((INT32)RSVAL <= 0) JUMP_REL(SIMM16); break; + case 0x07: /* BGTZ */ if ((INT32)RSVAL > 0) JUMP_REL(SIMM16); break; + case 0x08: /* ADDI */ if (RTREG) RTVAL = (INT32)(RSVAL + SIMM16); break; + case 0x09: /* ADDIU */ if (RTREG) RTVAL = (INT32)(RSVAL + SIMM16); break; + case 0x0a: /* SLTI */ if (RTREG) RTVAL = (INT32)(RSVAL) < ((INT32)SIMM16); break; + case 0x0b: /* SLTIU */ if (RTREG) RTVAL = (UINT32)(RSVAL) < (UINT32)((INT32)SIMM16); break; + case 0x0c: /* ANDI */ if (RTREG) RTVAL = RSVAL & UIMM16; break; + case 0x0d: /* ORI */ if (RTREG) RTVAL = RSVAL | UIMM16; break; + case 0x0e: /* XORI */ if (RTREG) RTVAL = RSVAL ^ UIMM16; break; + case 0x0f: /* LUI */ if (RTREG) RTVAL = UIMM16 << 16; break; + + case 0x10: /* COP0 */ + { + switch ((op >> 21) & 0x1f) + { + case 0x00: /* MFC0 */ if (RTREG) RTVAL = get_cop0_reg(RDREG); break; + case 0x04: /* MTC0 */ set_cop0_reg(RDREG, RTVAL); break; + default: + log(M64MSG_WARNING, "unimplemented cop0 %x (%x)\n", (op >> 21) & 0x1f, op); + break; + } + break; + } + + case 0x12: /* COP2 */ + { + switch ((op >> 21) & 0x1f) + { + case 0x00: /* MFC2 */ + { + // 31 25 20 15 10 6 0 + // --------------------------------------------------- + // | 010010 | 00000 | TTTTT | DDDDD | IIII | 0000000 | + // --------------------------------------------------- + // + + int el = (op >> 7) & 0xf; + UINT16 b1 = VREG_B(VS1REG, (el+0) & 0xf); + UINT16 b2 = VREG_B(VS1REG, (el+1) & 0xf); + if (RTREG) RTVAL = (INT32)(INT16)((b1 << 8) | (b2)); + break; + } + case 0x02: /* CFC2 */ + { + // 31 25 20 15 10 0 + // ------------------------------------------------ + // | 010010 | 00010 | TTTTT | DDDDD | 00000000000 | + // ------------------------------------------------ + // + + if (RTREG) + { + if (RDREG == 2) + { + // Anciliary clipping flags + RTVAL = rsp.flag[RDREG] & 0x00ff; + } + else + { + // All other flags are 16 bits but sign-extended at retrieval + RTVAL = (UINT32)rsp.flag[RDREG] | ( ( rsp.flag[RDREG] & 0x8000 ) ? 0xffff0000 : 0 ); + } + } + break; + + } + case 0x04: /* MTC2 */ + { + // 31 25 20 15 10 6 0 + // --------------------------------------------------- + // | 010010 | 00100 | TTTTT | DDDDD | IIII | 0000000 | + // --------------------------------------------------- + // + + int el = (op >> 7) & 0xf; + VREG_B(VS1REG, (el+0) & 0xf) = (RTVAL >> 8) & 0xff; + VREG_B(VS1REG, (el+1) & 0xf) = (RTVAL >> 0) & 0xff; + break; + } + case 0x06: /* CTC2 */ + { + // 31 25 20 15 10 0 + // ------------------------------------------------ + // | 010010 | 00110 | TTTTT | DDDDD | 00000000000 | + // ------------------------------------------------ + // + + rsp.flag[RDREG] = RTVAL & 0xffff; + break; + } + + case 0x10: case 0x11: case 0x12: case 0x13: case 0x14: case 0x15: case 0x16: case 0x17: + case 0x18: case 0x19: case 0x1a: case 0x1b: case 0x1c: case 0x1d: case 0x1e: case 0x1f: + { + handle_vector_ops(op); + break; + } + + default: unimplemented_opcode(op); break; + } + break; + } + + case 0x20: /* LB */ if (RTREG) RTVAL = (INT32)(INT8)READ8(RSVAL + SIMM16); break; + case 0x21: /* LH */ if (RTREG) RTVAL = (INT32)(INT16)READ16(RSVAL + SIMM16); break; + case 0x23: /* LW */ if (RTREG) RTVAL = READ32(RSVAL + SIMM16); break; + case 0x24: /* LBU */ if (RTREG) RTVAL = (UINT8)READ8(RSVAL + SIMM16); break; + case 0x25: /* LHU */ if (RTREG) RTVAL = (UINT16)READ16(RSVAL + SIMM16); break; + case 0x28: /* SB */ WRITE8(RSVAL + SIMM16, RTVAL); break; + case 0x29: /* SH */ WRITE16(RSVAL + SIMM16, RTVAL); break; + case 0x2b: /* SW */ WRITE32(RSVAL + SIMM16, RTVAL); break; + case 0x32: /* LWC2 */ handle_lwc2(op); break; + case 0x3a: /* SWC2 */ handle_swc2(op); break; + + default: + { + unimplemented_opcode(op); + break; + } + } + +#ifdef RSPTIMING + uint64_t time = lasttime; + lasttime = RDTSC(); + rsp_opinfo_t info; + rsp_get_opinfo(op, &info); + rsptimings[info.op2] += lasttime - time; + rspcounts[info.op2]++; +#endif + +#if LOG_INSTRUCTION_EXECUTION + { + int i, l; + static UINT32 prev_regs[32]; + static VECTOR_REG prev_vecs[32]; + char string[200]; + rsp_dasm_one(string, rsp.ppc, op); + + fprintf(exec_output, "%08X: %s", rsp.ppc, string); + + l = strlen(string); + if (l < 36) + { + for (i=l; i < 36; i++) + { + fprintf(exec_output, " "); + } + } + + fprintf(exec_output, "| "); + + for (i=0; i < 32; i++) + { + if (rsp.r[i] != prev_regs[i]) + { + fprintf(exec_output, "R%d: %08X ", i, rsp.r[i]); + } + prev_regs[i] = rsp.r[i]; + } + + for (i=0; i < 32; i++) + { + if (rsp.v[i].d[0] != prev_vecs[i].d[0] || rsp.v[i].d[1] != prev_vecs[i].d[1]) + { + fprintf(exec_output, "V%d: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X ", i, + (UINT16)VREG_S(i,0), (UINT16)VREG_S(i,1), (UINT16)VREG_S(i,2), (UINT16)VREG_S(i,3), (UINT16)VREG_S(i,4), (UINT16)VREG_S(i,5), (UINT16)VREG_S(i,6), (UINT16)VREG_S(i,7)); + } + prev_vecs[i].d[0] = rsp.v[i].d[0]; + prev_vecs[i].d[1] = rsp.v[i].d[1]; + } + + fprintf(exec_output, "\n"); + + } +#endif + // --rsp_icount; + + ExecutedCycles++; + if( rsp_sp_status & SP_STATUS_SSTEP ) + { + if( rsp.step_count ) + { + rsp.step_count--; + } + else + { + rsp_sp_status |= SP_STATUS_BROKE; + } + } + + if( rsp_sp_status & (SP_STATUS_HALT|SP_STATUS_BROKE)) + { + rsp_icount = 0; + + if(BreakMarker==0) + log(M64MSG_WARNING, "Quit due to SP halt/broke set by MTC0\n"); + } + + ///WDC&SR64 hack:VERSION3:1.8x -2x FASTER & safer + if((WDCHackFlag1==0)&&(rsp.ppc>0x137)&&(rsp.ppc<0x14D)) + WDCHackFlag1=ExecutedCycles; + if ((WDCHackFlag1!=0)&&((rsp.ppc<=0x137)||(rsp.ppc>=0x14D))) + WDCHackFlag1=0; + if ((WDCHackFlag1!=0)&&((ExecutedCycles-WDCHackFlag1)>=0x20)&&(rsp.ppc>0x137)&&(rsp.ppc<0x14D)) + { + // printf("WDC hack quit 1\n"); + rsp_icount=0;//32 cycles should be enough + } + if((WDCHackFlag2==0)&&(rsp.ppc>0xFCB)&&(rsp.ppc<0xFD5)) + WDCHackFlag2=ExecutedCycles; + if ((WDCHackFlag2!=0)&&((rsp.ppc<=0xFCB)||(rsp.ppc>=0xFD5))) + WDCHackFlag2=0; + if ((WDCHackFlag2!=0)&&((ExecutedCycles-WDCHackFlag2)>=0x20)&&(rsp.ppc>0xFCB)&&(rsp.ppc<0xFD5)) + { + // printf("WDC hack quit 2\n"); + rsp_icount=0;//32 cycles should be enough + } + + + } + //sp_pc -= 4; + + return ExecutedCycles; +} + +/*****************************************************************************/ + + +enum sp_dma_direction +{ + SP_DMA_RDRAM_TO_IDMEM, + SP_DMA_IDMEM_TO_RDRAM +}; + +static void sp_dma(enum sp_dma_direction direction) +{ + UINT8 *src, *dst; + int i, j; + int length; + int count; + int skip; + + + UINT32 l = sp_dma_length; + length = ((l & 0xfff) | 7) + 1; + skip = (l >> 20) + length; + count = ((l >> 12) & 0xff) + 1; + + if (direction == SP_DMA_RDRAM_TO_IDMEM) // RDRAM -> I/DMEM + { + //UINT32 src_address = sp_dram_addr & ~7; + //UINT32 dst_address = (sp_mem_addr & 0x1000) ? 0x4001000 : 0x4000000; + src = (UINT8*)&rdram[(sp_dram_addr&~7) / 4]; + dst = (sp_mem_addr & 0x1000) ? (UINT8*)&rsp_imem[(sp_mem_addr & ~7 & 0xfff) / 4] : (UINT8*)&rsp_dmem[(sp_mem_addr & ~7 &0xfff) / 4]; + ///cpuintrf_push_context(0); +#define BYTE8_XOR_BE(a) ((a)^7)// JFG, Ocarina of Time + + for (j=0; j < count; j++) + { + for (i=0; i < length; i++) + { + ///UINT8 b = program_read_byte_64be(src_address + i + (j*skip)); + ///program_write_byte_64be(dst_address + (((sp_mem_addr & ~7) + i + (j*length)) & 0xfff), b); + dst[BYTE8_XOR_BE((i + j*length)&0xfff)] = src[BYTE8_XOR_BE(i + j*skip)]; + } + } + + ///cpuintrf_pop_context(); + *z64_rspinfo.SP_DMA_BUSY_REG = 0; + *z64_rspinfo.SP_STATUS_REG &= ~SP_STATUS_DMABUSY; + } + else if (direction == SP_DMA_IDMEM_TO_RDRAM) // I/DMEM -> RDRAM + { + //UINT32 dst_address = sp_dram_addr & ~7; + //UINT32 src_address = (sp_mem_addr & 0x1000) ? 0x4001000 : 0x4000000; + + dst = (UINT8*)&rdram[(sp_dram_addr&~7) / 4]; + src = (sp_mem_addr & 0x1000) ? (UINT8*)&rsp_imem[(sp_mem_addr & ~7 & 0xfff) / 4] : (UINT8*)&rsp_dmem[(sp_mem_addr & ~7 &0xfff) / 4]; + ///cpuintrf_push_context(0); + + for (j=0; j < count; j++) + { + for (i=0; i < length; i++) + { + ///UINT8 b = program_read_byte_64be(src_address + (((sp_mem_addr & ~7) + i + (j*length)) & 0xfff)); + ///program_write_byte_64be(dst_address + i + (j*skip), b); + dst[BYTE8_XOR_BE(i + j*skip)] = src[BYTE8_XOR_BE((+i + j*length)&0xfff)]; + } + } + + ///cpuintrf_pop_context(); + *z64_rspinfo.SP_DMA_BUSY_REG = 0; + *z64_rspinfo.SP_STATUS_REG &= ~SP_STATUS_DMABUSY; + } + + +} + + + + + +UINT32 n64_sp_reg_r(UINT32 offset, UINT32 dummy) +{ + switch (offset) + { + case 0x00/4: // SP_MEM_ADDR_REG + return sp_mem_addr; + + case 0x04/4: // SP_DRAM_ADDR_REG + return sp_dram_addr; + + case 0x08/4: // SP_RD_LEN_REG + return sp_dma_rlength; + + case 0x10/4: // SP_STATUS_REG + return rsp_sp_status; + + case 0x14/4: // SP_DMA_FULL_REG + return 0; + + case 0x18/4: // SP_DMA_BUSY_REG + return 0; + + case 0x1c/4: // SP_SEMAPHORE_REG + return sp_semaphore; + + default: + log(M64MSG_WARNING, "sp_reg_r: %08X\n", offset); + break; + } + + return 0; +} + +//UINT32 n64_sp_reg_w(RSP_REGS & rsp, UINT32 offset, UINT32 data, UINT32 dummy) +void n64_sp_reg_w(UINT32 offset, UINT32 data, UINT32 dummy) +{ + UINT32 InterruptPending=0; + if ((offset & 0x10000) == 0) + { + switch (offset & 0xffff) + { + case 0x00/4: // SP_MEM_ADDR_REG + sp_mem_addr = data; + break; + + case 0x04/4: // SP_DRAM_ADDR_REG + sp_dram_addr = data & 0xffffff; + break; + + case 0x08/4: // SP_RD_LEN_REG + // sp_dma_length = data & 0xfff; + // sp_dma_count = (data >> 12) & 0xff; + // sp_dma_skip = (data >> 20) & 0xfff; + sp_dma_length=data; + sp_dma(SP_DMA_RDRAM_TO_IDMEM); + break; + + case 0x0c/4: // SP_WR_LEN_REG + // sp_dma_length = data & 0xfff; + // sp_dma_count = (data >> 12) & 0xff; + // sp_dma_skip = (data >> 20) & 0xfff; + sp_dma_length=data; + sp_dma(SP_DMA_IDMEM_TO_RDRAM); + break; + + case 0x10/4: // SP_STATUS_REG + { + if((data&0x1)&&(data&0x2)) + log(M64MSG_ERROR, "Clear halt and set halt simultaneously\n"); + if((data&0x8)&&(data&0x10)) + log(M64MSG_ERROR, "Clear int and set int simultaneously\n"); + if((data&0x20)&&(data&0x40)) + log(M64MSG_ERROR, "Clear sstep and set sstep simultaneously\n"); + if (data & 0x00000001) // clear halt + { + rsp_sp_status &= ~SP_STATUS_HALT; + + // if (first_rsp) + // { + // cpu_spinuntil_trigger(6789); + + // cpunum_set_input_line(1, INPUT_LINE_HALT, CLEAR_LINE); + // rsp_sp_status &= ~SP_STATUS_HALT; + // } + // else + // { + // first_rsp = 1; + // } + } + if (data & 0x00000002) // set halt + { + // cpunum_set_input_line(1, INPUT_LINE_HALT, ASSERT_LINE); + rsp_sp_status |= SP_STATUS_HALT; + } + if (data & 0x00000004) rsp_sp_status &= ~SP_STATUS_BROKE; // clear broke + if (data & 0x00000008) // clear interrupt + { + *z64_rspinfo.MI_INTR_REG &= ~R4300i_SP_Intr; + ///TEMPORARY COMMENTED FOR SPEED + /// printf("sp_reg_w clear interrupt"); + //clear_rcp_interrupt(SP_INTERRUPT); + } + if (data & 0x00000010) // set interrupt + { + //signal_rcp_interrupt(SP_INTERRUPT); + } + if (data & 0x00000020) rsp_sp_status &= ~SP_STATUS_SSTEP; // clear single step + if (data & 0x00000040) { + rsp_sp_status |= SP_STATUS_SSTEP; // set single step + log(M64MSG_STATUS, "RSP STATUS REG: SSTEP set\n"); + } + if (data & 0x00000080) rsp_sp_status &= ~SP_STATUS_INTR_BREAK; // clear interrupt on break + if (data & 0x00000100) rsp_sp_status |= SP_STATUS_INTR_BREAK; // set interrupt on break + if (data & 0x00000200) rsp_sp_status &= ~SP_STATUS_SIGNAL0; // clear signal 0 + if (data & 0x00000400) rsp_sp_status |= SP_STATUS_SIGNAL0; // set signal 0 + if (data & 0x00000800) rsp_sp_status &= ~SP_STATUS_SIGNAL1; // clear signal 1 + if (data & 0x00001000) rsp_sp_status |= SP_STATUS_SIGNAL1; // set signal 1 + if (data & 0x00002000) rsp_sp_status &= ~SP_STATUS_SIGNAL2; // clear signal 2 + if (data & 0x00004000) rsp_sp_status |= SP_STATUS_SIGNAL2; // set signal 2 + if (data & 0x00008000) rsp_sp_status &= ~SP_STATUS_SIGNAL3; // clear signal 3 + if (data & 0x00010000) rsp_sp_status |= SP_STATUS_SIGNAL3; // set signal 3 + if (data & 0x00020000) rsp_sp_status &= ~SP_STATUS_SIGNAL4; // clear signal 4 + if (data & 0x00040000) rsp_sp_status |= SP_STATUS_SIGNAL4; // set signal 4 + if (data & 0x00080000) rsp_sp_status &= ~SP_STATUS_SIGNAL5; // clear signal 5 + if (data & 0x00100000) rsp_sp_status |= SP_STATUS_SIGNAL5; // set signal 5 + if (data & 0x00200000) rsp_sp_status &= ~SP_STATUS_SIGNAL6; // clear signal 6 + if (data & 0x00400000) rsp_sp_status |= SP_STATUS_SIGNAL6; // set signal 6 + if (data & 0x00800000) rsp_sp_status &= ~SP_STATUS_SIGNAL7; // clear signal 7 + if (data & 0x01000000) rsp_sp_status |= SP_STATUS_SIGNAL7; // set signal 7 + + if(InterruptPending==1) + { + *z64_rspinfo.MI_INTR_REG |= 1; + z64_rspinfo.CheckInterrupts(); + InterruptPending=0; + } + break; + } + + case 0x1c/4: // SP_SEMAPHORE_REG + sp_semaphore = data; + // mame_printf_debug("sp_semaphore = %08X\n", sp_semaphore); + break; + + default: + log(M64MSG_WARNING, "sp_reg_w: %08X, %08X\n", data, offset); + break; + } + } + else + { + switch (offset & 0xffff) + { + case 0x00/4: // SP_PC_REG + //cpunum_set_info_int(1, CPUINFO_INT_PC, 0x04001000 | (data & 0xfff)); + //break; + + default: + log(M64MSG_WARNING, "sp_reg_w: %08X, %08X\n", data, offset); + break; + } + } +} + +UINT32 sp_read_reg(UINT32 reg) +{ + switch (reg) + { + //case 4: return rsp_sp_status; + default: return n64_sp_reg_r(reg, 0x00000000); + } +} + + +void sp_write_reg(UINT32 reg, UINT32 data) +{ + switch (reg) + { + default: n64_sp_reg_w(reg, data, 0x00000000); break; + } +} diff --git a/source/mupen64plus-rsp-z64/src/rsp.h b/source/mupen64plus-rsp-z64/src/rsp.h new file mode 100644 index 0000000..6c1cf73 --- /dev/null +++ b/source/mupen64plus-rsp-z64/src/rsp.h @@ -0,0 +1,449 @@ +/* + * z64 + * + * Copyright (C) 2007 ziggy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * +**/ + +#ifndef _RSP_H_ +#define _RSP_H_ + +#define M64P_PLUGIN_PROTOTYPES 1 +#include "m64p_types.h" +#include "m64p_plugin.h" +#include "z64.h" +#include // sqrt +#include +#include +#include // memset + +#define INLINE inline + +extern void log(m64p_msg_level level, const char *msg, ...); + +/* defined in systems/n64.c */ +#define rdram ((UINT32*)z64_rspinfo.RDRAM) +//extern UINT32 *rdram; +#define rsp_imem ((UINT32*)z64_rspinfo.IMEM) +//extern UINT32 *rsp_imem; +#define rsp_dmem ((UINT32*)z64_rspinfo.DMEM) +//extern UINT32 *rsp_dmem; +//extern void dp_full_sync(void); + +#define vi_origin (*(UINT32*)z64_rspinfo.VI_ORIGIN_REG) +//extern UINT32 vi_origin; +#define vi_width (*(UINT32*)z64_rspinfo.VI_WIDTH_REG) +//extern UINT32 vi_width; +#define vi_control (*(UINT32*)z64_rspinfo.VI_STATUS_REG) +//extern UINT32 vi_control; + +#define dp_start (*(UINT32*)z64_rspinfo.DPC_START_REG) +//extern UINT32 dp_start; +#define dp_end (*(UINT32*)z64_rspinfo.DPC_END_REG) +//extern UINT32 dp_end; +#define dp_current (*(UINT32*)z64_rspinfo.DPC_CURRENT_REG) +//extern UINT32 dp_current; +#define dp_status (*(UINT32*)z64_rspinfo.DPC_STATUS_REG) +//extern UINT32 dp_status; + +#define sp_pc (*(UINT32*)z64_rspinfo.SP_PC_REG) + +typedef union +{ + UINT64 d[2]; + UINT32 l[4]; + INT16 s[8]; + UINT8 b[16]; +} VECTOR_REG; + +typedef union +{ + INT64 q; + INT32 l[2]; + INT16 w[4]; +} ACCUMULATOR_REG; + +typedef struct +{ + // vectors first , need to be memory aligned for sse + VECTOR_REG v[32]; + ACCUMULATOR_REG accum[8]; + + //UINT32 pc; + UINT32 r[32]; + UINT16 flag[4]; + + INT32 square_root_res; + INT32 square_root_high; + INT32 reciprocal_res; + INT32 reciprocal_high; + + UINT32 ppc; + UINT32 nextpc; + + UINT32 step_count; + + int inval_gen; + + RSP_INFO ext; +} RSP_REGS; + +#define z64_rspinfo (rsp.ext) + +int rsp_execute(int cycles); +void rsp_reset(void); +void rsp_init(RSP_INFO info); +offs_t rsp_dasm_one(char *buffer, offs_t pc, UINT32 op); + +extern UINT32 sp_read_reg(UINT32 reg); +extern void sp_write_reg(UINT32 reg, UINT32 data); +// extern READ32_HANDLER( n64_dp_reg_r ); +// extern WRITE32_HANDLER( n64_dp_reg_w ); + +#define RSREG ((op >> 21) & 0x1f) +#define RTREG ((op >> 16) & 0x1f) +#define RDREG ((op >> 11) & 0x1f) +#define SHIFT ((op >> 6) & 0x1f) + +#define RSVAL (rsp.r[RSREG]) +#define RTVAL (rsp.r[RTREG]) +#define RDVAL (rsp.r[RDREG]) + +#define _RSREG(op) ((op >> 21) & 0x1f) +#define _RTREG(op) ((op >> 16) & 0x1f) +#define _RDREG(op) ((op >> 11) & 0x1f) +#define _SHIFT(op) ((op >> 6) & 0x1f) + +#define _RSVAL(op) (rsp.r[_RSREG(op)]) +#define _RTVAL(op) (rsp.r[_RTREG(op)]) +#define _RDVAL(op) (rsp.r[_RDREG(op)]) + +#define SIMM16 ((INT32)(INT16)(op)) +#define UIMM16 ((UINT16)(op)) +#define UIMM26 (op & 0x03ffffff) + +#define _SIMM16(op) ((INT32)(INT16)(op)) +#define _UIMM16(op) ((UINT16)(op)) +#define _UIMM26(op) (op & 0x03ffffff) + + +/*#define _JUMP(pc) \ +if ((GENTRACE("_JUMP %x\n", rsp.nextpc), 1) && rsp_jump(rsp.nextpc)) return 1; \ +if (rsp.inval_gen || sp_pc != pc+8) return 0; +*/ + +#define CARRY_FLAG(x) ((rsp.flag[0] & (1 << ((x)))) ? 1 : 0) +#define CLEAR_CARRY_FLAGS() { rsp.flag[0] &= ~0xff; } +#define SET_CARRY_FLAG(x) { rsp.flag[0] |= (1 << ((x))); } +#define CLEAR_CARRY_FLAG(x) { rsp.flag[0] &= ~(1 << ((x))); } + +#define COMPARE_FLAG(x) ((rsp.flag[1] & (1 << ((x)))) ? 1 : 0) +#define CLEAR_COMPARE_FLAGS() { rsp.flag[1] &= ~0xff; } +#define SET_COMPARE_FLAG(x) { rsp.flag[1] |= (1 << ((x))); } +#define CLEAR_COMPARE_FLAG(x) { rsp.flag[1] &= ~(1 << ((x))); } + +#define ZERO_FLAG(x) ((rsp.flag[0] & (1 << (8+(x)))) ? 1 : 0) +#define CLEAR_ZERO_FLAGS() { rsp.flag[0] &= ~0xff00; } +#define SET_ZERO_FLAG(x) { rsp.flag[0] |= (1 << (8+(x))); } +#define CLEAR_ZERO_FLAG(x) { rsp.flag[0] &= ~(1 << (8+(x))); } + +//#define rsp z64_rsp // to avoid namespace collision with other libs +extern RSP_REGS rsp __attribute__((aligned(16))); + + +//#define ROPCODE(pc) cpu_readop32(pc) +#define ROPCODE(pc) program_read_dword_32be(pc | 0x1000) + +INLINE UINT8 program_read_byte_32be(UINT32 address) +{ + return ((UINT8*)z64_rspinfo.DMEM)[(address&0x1fff)^3]; +} + +INLINE UINT16 program_read_word_32be(UINT32 address) +{ + return ((UINT16*)z64_rspinfo.DMEM)[((address&0x1fff)>>1)^1]; +} + +INLINE UINT32 program_read_dword_32be(UINT32 address) +{ + return ((UINT32*)z64_rspinfo.DMEM)[(address&0x1fff)>>2]; +} + +INLINE void program_write_byte_32be(UINT32 address, UINT8 data) +{ + ((UINT8*)z64_rspinfo.DMEM)[(address&0x1fff)^3] = data; +} + +INLINE void program_write_word_32be(UINT32 address, UINT16 data) +{ + ((UINT16*)z64_rspinfo.DMEM)[((address&0x1fff)>>1)^1] = data; +} + +INLINE void program_write_dword_32be(UINT32 address, UINT32 data) +{ + ((UINT32*)z64_rspinfo.DMEM)[(address&0x1fff)>>2] = data; +} + +INLINE UINT8 READ8(UINT32 address) +{ + address = 0x04000000 | (address & 0xfff); + return program_read_byte_32be(address); +} + +INLINE UINT16 READ16(UINT32 address) +{ + address = 0x04000000 | (address & 0xfff); + + if (address & 1) + { + //osd_die("RSP: READ16: unaligned %08X at %08X\n", address, rsp.ppc); + return ((program_read_byte_32be(address+0) & 0xff) << 8) | (program_read_byte_32be(address+1) & 0xff); + } + + return program_read_word_32be(address); +} + +INLINE UINT32 READ32(UINT32 address) +{ + address = 0x04000000 | (address & 0xfff); + + if (address & 3) + { + //fatalerror("RSP: READ32: unaligned %08X at %08X\n", address, rsp.ppc); + return ((program_read_byte_32be(address + 0) & 0xff) << 24) | + ((program_read_byte_32be(address + 1) & 0xff) << 16) | + ((program_read_byte_32be(address + 2) & 0xff) << 8) | + ((program_read_byte_32be(address + 3) & 0xff) << 0); + } + + return program_read_dword_32be(address); +} + + +INLINE void WRITE8(UINT32 address, UINT8 data) +{ + address = 0x04000000 | (address & 0xfff); + program_write_byte_32be(address, data); +} + +INLINE void WRITE16(UINT32 address, UINT16 data) +{ + address = 0x04000000 | (address & 0xfff); + + if (address & 1) + { + //fatalerror("RSP: WRITE16: unaligned %08X, %04X at %08X\n", address, data, rsp.ppc); + program_write_byte_32be(address + 0, (data >> 8) & 0xff); + program_write_byte_32be(address + 1, (data >> 0) & 0xff); + return; + } + + program_write_word_32be(address, data); +} + +INLINE void WRITE32(UINT32 address, UINT32 data) +{ + address = 0x04000000 | (address & 0xfff); + + if (address & 3) + { + //fatalerror("RSP: WRITE32: unaligned %08X, %08X at %08X\n", address, data, rsp.ppc); + program_write_byte_32be(address + 0, (data >> 24) & 0xff); + program_write_byte_32be(address + 1, (data >> 16) & 0xff); + program_write_byte_32be(address + 2, (data >> 8) & 0xff); + program_write_byte_32be(address + 3, (data >> 0) & 0xff); + return; + } + + program_write_dword_32be(address, data); +} + +int rsp_jump(int pc); +void rsp_invalidate(int begin, int len); +void rsp_execute_one(UINT32 op); + + + +#define JUMP_ABS(addr) { rsp.nextpc = 0x04001000 | (((addr) << 2) & 0xfff); } +#define JUMP_ABS_L(addr,l) { rsp.nextpc = 0x04001000 | (((addr) << 2) & 0xfff); rsp.r[l] = sp_pc + 4; } +#define JUMP_REL(offset) { rsp.nextpc = 0x04001000 | ((sp_pc + ((offset) << 2)) & 0xfff); } +#define JUMP_REL_L(offset,l) { rsp.nextpc = 0x04001000 | ((sp_pc + ((offset) << 2)) & 0xfff); rsp.r[l] = sp_pc + 4; } +#define JUMP_PC(addr) { rsp.nextpc = 0x04001000 | ((addr) & 0xfff); } +#define JUMP_PC_L(addr,l) { rsp.nextpc = 0x04001000 | ((addr) & 0xfff); rsp.r[l] = sp_pc + 4; } +#define LINK(l) rsp.r[l] = sp_pc + 4 + +#define VDREG ((op >> 6) & 0x1f) +#define VS1REG ((op >> 11) & 0x1f) +#define VS2REG ((op >> 16) & 0x1f) +#define EL ((op >> 21) & 0xf) + +#define VREG_B(reg, offset) rsp.v[(reg)].b[((offset)^1)] +#define VREG_S(reg, offset) rsp.v[(reg)].s[((offset))] +#define VREG_L(reg, offset) rsp.v[(reg)].l[((offset))] + +#define VEC_EL_1(x,z) (z) //(vector_elements_1[(x)][(z)]) +#define VEC_EL_2(x,z) (vector_elements_2[(x)][(z)]) + +#define ACCUM(x) rsp.accum[((x))].q +#define ACCUM_H(x) rsp.accum[((x))].w[3] +#define ACCUM_M(x) rsp.accum[((x))].w[2] +#define ACCUM_L(x) rsp.accum[((x))].w[1] + +void unimplemented_opcode(UINT32 op); +void handle_vector_ops(UINT32 op); +UINT32 get_cop0_reg(int reg); +void set_cop0_reg(int reg, UINT32 data); +void handle_lwc2(UINT32 op); +void handle_swc2(UINT32 op); + +INLINE UINT32 n64_dp_reg_r(UINT32 offset, UINT32 dummy) +{ + switch (offset) + { + case 0x00/4: // DP_START_REG + return dp_start; + + case 0x04/4: // DP_END_REG + return dp_end; + + case 0x08/4: // DP_CURRENT_REG + return dp_current; + + case 0x0c/4: // DP_STATUS_REG + return dp_status; + + case 0x10/4: // DP_CLOCK_REG + return *z64_rspinfo.DPC_CLOCK_REG; + + default: + log(M64MSG_WARNING, "dp_reg_r: %08X\n", offset); + break; + } + + return 0; +} +INLINE void n64_dp_reg_w(UINT32 offset, UINT32 data, UINT32 dummy) +{ + switch (offset) + { + case 0x00/4: // DP_START_REG + dp_start = data; + dp_current = dp_start; + break; + + case 0x04/4: // DP_END_REG + dp_end = data; + //rdp_process_list(); + if (dp_end= 2 +static __inline__ unsigned long long RDTSC(void) +{ + unsigned long long int x; + __asm__ volatile (".byte 0x0f, 0x31" : "=A" (x)); + return x; +} +// inline volatile uint64_t RDTSC() { +// register uint64_t TSC asm("eax"); +// asm volatile (".byte 15, 49" : : : "eax", "edx"); +// return TSC; +// } +// #define RDTSC1(n) __asm__ __volatile__("rdtsc" : "=a" (n): ) +// #define RDTSC2(n) __asm__ __volatile__ ("rdtsc\nmov %%edx,%%eax" : "=a" (n): ) +// inline void RDTSC(uint64_t& a) { uint32_t b, c; RDTSC1(b); RDTSC2(c); +// a = (((uint64_t)c)<<32) | b; } +#elif defined(INTEL86) && defined WIN32 +#define rdtsc __asm __emit 0fh __asm __emit 031h +#define cpuid __asm __emit 0fh __asm __emit 0a2h +inline uint64_t RDTSC() { + static uint32_t temp; + __asm { + push edx + push eax + rdtsc + mov temp, eax + pop eax + pop edx + } + return temp; +} +#else +#define RDTSC(n) n=0 +#endif + +#ifdef GENTRACE +#undef GENTRACE +#include +inline void GENTRACE(const char * s, ...) { + va_list ap; + va_start(ap, s); + vfprintf(stderr, s, ap); + va_end(ap); + int i; + for (i=0; i<32; i++) + fprintf(stderr, "r%d=%x ", i, rsp.r[i]); + fprintf(stderr, "\n"); + for (i=0; i<32; i++) + fprintf(stderr, "v%d=%x %x %x %x %x %x %x %x ", i, + (UINT16)rsp.v[i].s[0], + (UINT16)rsp.v[i].s[1], + (UINT16)rsp.v[i].s[2], + (UINT16)rsp.v[i].s[3], + (UINT16)rsp.v[i].s[4], + (UINT16)rsp.v[i].s[5], + (UINT16)rsp.v[i].s[6], + (UINT16)rsp.v[i].s[7] + ); + fprintf(stderr, "\n"); + + fprintf(stderr, "f0=%x f1=%x f2=%x f3=%x\n", rsp.flag[0], + rsp.flag[1],rsp.flag[2],rsp.flag[3]); +} +#endif +//#define GENTRACE printf +//#define GENTRACE + +#ifdef RSPTIMING +extern uint64_t rsptimings[512]; +extern int rspcounts[512]; +#endif + +#endif // ifndef _RSP_H_ diff --git a/source/mupen64plus-rsp-z64/src/rsp_api_export.ver b/source/mupen64plus-rsp-z64/src/rsp_api_export.ver new file mode 100644 index 0000000..27e8138 --- /dev/null +++ b/source/mupen64plus-rsp-z64/src/rsp_api_export.ver @@ -0,0 +1,8 @@ +{ global: +PluginStartup; +PluginShutdown; +PluginGetVersion; +DoRspCycles; +InitiateRSP; +RomClosed; +local: *; }; diff --git a/source/mupen64plus-rsp-z64/src/rsp_dasm.cpp b/source/mupen64plus-rsp-z64/src/rsp_dasm.cpp new file mode 100644 index 0000000..40f557c --- /dev/null +++ b/source/mupen64plus-rsp-z64/src/rsp_dasm.cpp @@ -0,0 +1,377 @@ +/* + * z64 + * + * Copyright (C) 2007 ziggy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * +**/ + +/* + Nintendo/SGI RSP Disassembler + + Written by Ville Linde +*/ + +//#include "z64.h" +#include +#include +#include "rsp.h" + +#define DASMFLAG_SUPPORTED 0x80000000 /* are disassembly flags supported? */ +#define DASMFLAG_STEP_OUT 0x40000000 /* this instruction should be the end of a step out sequence */ +#define DASMFLAG_STEP_OVER 0x20000000 /* this instruction should be stepped over by setting a breakpoint afterwards */ +#define DASMFLAG_OVERINSTMASK 0x18000000 /* number of extra instructions to skip when stepping over */ +#define DASMFLAG_OVERINSTSHIFT 27 /* bits to shift after masking to get the value */ +#define DASMFLAG_LENGTHMASK 0x0000ffff /* the low 16-bits contain the actual length */ +#define DASMFLAG_STEP_OVER_EXTRA(x) ((x) << DASMFLAG_OVERINSTSHIFT) + +static const char *reg[32] = +{ + "0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", + "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", + "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", + "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31" +}; + +static const char *vreg[32] = +{ + " v0", " v1", " v2", " v3", " v4", " v5", " v6", " v7", + " v8", " v9", "v10", "v11", "v12", "v13", "v14", "v15", + "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", + "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31" +}; + +static const char *cop0_regs[32] = +{ + "SP_MEM_ADDR", "SP_DRAM_ADDR", "SP_RD_LEN", "SP_WR_LEN", + "SP_STATUS", "SP_DMA_FULL", "SP_DMA_BUSY", "SP_SEMAPHORE", + "DPC_START", "DPC_END", "DPC_CURRENT", "DPC_STATUS", + "DPC_CLOCK", "DPC_BUFBUSY", "DPC_PIPEBUSY", "DPC_TMEM", + "???", "???", "???", "???", + "???", "???", "???", "???", + "???", "???", "???", "???", + "???", "???", "???", "???" +}; + +static const char *element[16] = +{ + "", "[???]", "[00224466]", "[11335577]", "[00004444]", "[11115555]", "[22226666]", "[33337777]", + "[00000000]", "[11111111]", "[22222222]", "[33333333]", "[44444444]", "[55555555]", "[66666666]", "[77777777]" +}; + +static const char *element2[16] = +{ + "01234567", "????????", "00224466", "11335577", "00004444", "11115555", "22226666", "33337777", + "00000000", "11111111", "22222222", "33333333", "44444444", "55555555", "66666666", "77777777" +}; + +#define INLINE inline +INLINE char *signed_imm16(UINT32 op) +{ + static char temp[10]; + INT16 value = op & 0xffff; + + if (value < 0) + { + sprintf(temp, "-$%04x", -value); + } + else + { + sprintf(temp, "$%04x", value); + } + return temp; +} + + +static char *output; + +static void print(const char *fmt, ...) +{ + va_list vl; + + va_start(vl, fmt); + output += vsprintf(output, fmt, vl); + va_end(vl); +} + +static void disasm_cop0(UINT32 op) +{ + int rt = (op >> 16) & 31; + int rd = (op >> 11) & 31; + + switch ((op >> 21) & 0x1f) + { + case 0x00: print("mfc0 %s, %s", reg[rt], cop0_regs[rd]); break; + case 0x04: print("mtc0 %s, %s", reg[rt], cop0_regs[rd]); break; + + default: print("??? (COP0)"); break; + } +} + +static void disasm_cop2(UINT32 op) +{ + int rt = (op >> 16) & 31; + int rd = (op >> 11) & 31; + int el = (op >> 21) & 0xf; + int dest = (op >> 6) & 0x1f; + int s1 = rd; + int s2 = rt; + + switch ((op >> 21) & 0x1f) + { + case 0x00: print("mfc2 %s, %s[%d]", reg[rt], vreg[rd], dest); break; + case 0x02: print("cfc2 %s, FLAG%d", reg[rt], rd); break; + case 0x04: print("mtc2 %s, %s[%d]", reg[rt], vreg[rd], dest); break; + case 0x06: print("ctc2 %s, FLAG%d", reg[rt], rd); break; + + case 0x10: case 0x11: case 0x12: case 0x13: case 0x14: case 0x15: case 0x16: case 0x17: + case 0x18: case 0x19: case 0x1a: case 0x1b: case 0x1c: case 0x1d: case 0x1e: case 0x1f: + { + switch (op & 0x3f) + { + case 0x00: print("vmulf %s, %s, %s%s", vreg[dest], vreg[s1], vreg[s2], element[el]); break; + case 0x01: print("vmulu %s, %s, %s%s", vreg[dest], vreg[s1], vreg[s2], element[el]); break; + case 0x02: print("vrndp %s, %s, %s%s", vreg[dest], vreg[s1], vreg[s2], element[el]); break; + case 0x03: print("vmulq %s, %s, %s%s", vreg[dest], vreg[s1], vreg[s2], element[el]); break; + case 0x04: print("vmudl %s, %s, %s%s", vreg[dest], vreg[s1], vreg[s2], element[el]); break; + case 0x05: print("vmudm %s, %s, %s%s", vreg[dest], vreg[s1], vreg[s2], element[el]); break; + case 0x06: print("vmudn %s, %s, %s%s", vreg[dest], vreg[s1], vreg[s2], element[el]); break; + case 0x07: print("vmudh %s, %s, %s%s", vreg[dest], vreg[s1], vreg[s2], element[el]); break; + case 0x08: print("vmacf %s, %s, %s%s", vreg[dest], vreg[s1], vreg[s2], element[el]); break; + case 0x09: print("vmacu %s, %s, %s%s", vreg[dest], vreg[s1], vreg[s2], element[el]); break; + case 0x0a: print("vrndn %s, %s, %s%s", vreg[dest], vreg[s1], vreg[s2], element[el]); break; + case 0x0b: print("vmacq %s, %s, %s%s", vreg[dest], vreg[s1], vreg[s2], element[el]); break; + case 0x0c: print("vmadl %s, %s, %s%s", vreg[dest], vreg[s1], vreg[s2], element[el]); break; + case 0x0d: print("vmadm %s, %s, %s%s", vreg[dest], vreg[s1], vreg[s2], element[el]); break; + case 0x0e: print("vmadn %s, %s, %s%s", vreg[dest], vreg[s1], vreg[s2], element[el]); break; + case 0x0f: print("vmadh %s, %s, %s%s", vreg[dest], vreg[s1], vreg[s2], element[el]); break; + case 0x10: print("vadd %s, %s, %s%s", vreg[dest], vreg[s1], vreg[s2], element[el]); break; + case 0x11: print("vsub %s, %s, %s%s", vreg[dest], vreg[s1], vreg[s2], element[el]); break; + case 0x12: print("vsut???"); break; + case 0x13: print("vabs %s, %s, %s%s", vreg[dest], vreg[s1], vreg[s2], element[el]); break; + case 0x14: print("vaddc %s, %s, %s%s", vreg[dest], vreg[s1], vreg[s2], element[el]); break; + case 0x15: print("vsubc %s, %s, %s%s", vreg[dest], vreg[s1], vreg[s2], element[el]); break; + + case 0x1d: + { + switch (el) + { + case 8: print("vsaw %s, ACCUM_H", vreg[dest]); break; + case 9: print("vsaw %s, ACCUM_M", vreg[dest]); break; + case 10: print("vsaw %s, ACCUM_L", vreg[dest]); break; + default: print("vsaw %s, ???", vreg[dest]); break; + } + break; + } + + case 0x20: print("vlt %s, %s, %s%s", vreg[dest], vreg[s1], vreg[s2], element[el]); break; + case 0x21: print("veq %s, %s, %s%s", vreg[dest], vreg[s1], vreg[s2], element[el]); break; + case 0x22: print("vne %s, %s, %s%s", vreg[dest], vreg[s1], vreg[s2], element[el]); break; + case 0x23: print("vge %s, %s, %s%s", vreg[dest], vreg[s1], vreg[s2], element[el]); break; + case 0x24: print("vcl %s, %s, %s%s", vreg[dest], vreg[s1], vreg[s2], element[el]); break; + case 0x25: print("vch %s, %s, %s%s", vreg[dest], vreg[s1], vreg[s2], element[el]); break; + case 0x26: print("vcr %s, %s, %s%s", vreg[dest], vreg[s1], vreg[s2], element[el]); break; + case 0x27: print("vmrg %s, %s, %s%s", vreg[dest], vreg[s1], vreg[s2], element[el]); break; + case 0x28: print("vand %s, %s, %s%s", vreg[dest], vreg[s1], vreg[s2], element[el]); break; + case 0x29: print("vnand %s, %s, %s%s", vreg[dest], vreg[s1], vreg[s2], element[el]); break; + case 0x2a: print("vor %s, %s, %s%s", vreg[dest], vreg[s1], vreg[s2], element[el]); break; + case 0x2b: print("vnor %s, %s, %s%s", vreg[dest], vreg[s1], vreg[s2], element[el]); break; + case 0x2c: print("vxor %s, %s, %s%s", vreg[dest], vreg[s1], vreg[s2], element[el]); break; + case 0x2d: print("vnxor %s, %s, %s%s", vreg[dest], vreg[s1], vreg[s2], element[el]); break; + case 0x30: print("vrcp %s[%d], %s[%c]", vreg[dest], s1 & 7, vreg[s2], element2[el][7-(s1 & 7)]); break; + case 0x31: print("vrcpl %s[%d], %s[%c]", vreg[dest], s1 & 7, vreg[s2], element2[el][7-(s1 & 7)]); break; + case 0x32: print("vrcph %s[%d], %s[%c]", vreg[dest], s1 & 7, vreg[s2], element2[el][7-(s1 & 7)]); break; + case 0x33: print("vmov %s[%d], %s[%c]", vreg[dest], s1 & 7, vreg[s2], element2[el][7-(s1 & 7)]); break; + case 0x34: print("vrsq %s[%d], %s[%c]", vreg[dest], s1 & 7, vreg[s2], element2[el][7-(s1 & 7)]); break; + case 0x35: print("vrsql %s[%d], %s[%c]", vreg[dest], s1 & 7, vreg[s2], element2[el][7-(s1 & 7)]); break; + case 0x36: print("vrsqh %s[%d], %s[%c]", vreg[dest], s1 & 7, vreg[s2], element2[el][7-(s1 & 7)]); break; + case 0x37: print("vnop"); break; + default: print("??? (VECTOR OP)"); break; + } + break; + } + + default: print("??? (COP2)"); break; + } +} + +static void disasm_lwc2(UINT32 op) +{ + int dest = (op >> 16) & 0x1f; + int base = (op >> 21) & 0x1f; + int del = (op >> 7) & 0xf; + int offset = (op & 0x7f); + if (offset & 0x40) + offset |= 0xffffff80; + + switch ((op >> 11) & 0x1f) + { + case 0x00: print("lbv %s[%d], %s(%s)", vreg[dest], del, signed_imm16(offset * 1), reg[base]); break; + case 0x01: print("lsv %s[%d], %s(%s)", vreg[dest], del, signed_imm16(offset * 2), reg[base]); break; + case 0x02: print("llv %s[%d], %s(%s)", vreg[dest], del, signed_imm16(offset * 4), reg[base]); break; + case 0x03: print("ldv %s[%d], %s(%s)", vreg[dest], del, signed_imm16(offset * 8), reg[base]); break; + case 0x04: print("lqv %s[%d], %s(%s)", vreg[dest], del, signed_imm16(offset * 16), reg[base]); break; + case 0x05: print("lrv %s[%d], %s(%s)", vreg[dest], del, signed_imm16(offset * 16), reg[base]); break; + case 0x06: print("lpv %s[%d], %s(%s)", vreg[dest], del, signed_imm16(offset * 8), reg[base]); break; + case 0x07: print("luv %s[%d], %s(%s)", vreg[dest], del, signed_imm16(offset * 8), reg[base]); break; + case 0x08: print("lhv %s[%d], %s(%s)", vreg[dest], del, signed_imm16(offset * 16), reg[base]); break; + case 0x09: print("lfv %s[%d], %s(%s)", vreg[dest], del, signed_imm16(offset * 16), reg[base]); break; + case 0x0a: print("lwv %s[%d], %s(%s)", vreg[dest], del, signed_imm16(offset * 16), reg[base]); break; + case 0x0b: print("ltv %s[%d], %s(%s)", vreg[dest], del, signed_imm16(offset * 16), reg[base]); break; + default: print("??? (LWC2)"); break; + } +} + +static void disasm_swc2(UINT32 op) +{ + int dest = (op >> 16) & 0x1f; + int base = (op >> 21) & 0x1f; + int del = (op >> 7) & 0xf; + int offset = (op & 0x7f); + if (offset & 0x40) + offset |= 0xffffff80; + + switch ((op >> 11) & 0x1f) + { + case 0x00: print("sbv %s[%d], %s(%s)", vreg[dest], del, signed_imm16(offset * 1), reg[base]); break; + case 0x01: print("ssv %s[%d], %s(%s)", vreg[dest], del, signed_imm16(offset * 2), reg[base]); break; + case 0x02: print("slv %s[%d], %s(%s)", vreg[dest], del, signed_imm16(offset * 4), reg[base]); break; + case 0x03: print("sdv %s[%d], %s(%s)", vreg[dest], del, signed_imm16(offset * 8), reg[base]); break; + case 0x04: print("sqv %s[%d], %s(%s)", vreg[dest], del, signed_imm16(offset * 16), reg[base]); break; + case 0x05: print("srv %s[%d], %s(%s)", vreg[dest], del, signed_imm16(offset * 16), reg[base]); break; + case 0x06: print("spv %s[%d], %s(%s)", vreg[dest], del, signed_imm16(offset * 8), reg[base]); break; + case 0x07: print("suv %s[%d], %s(%s)", vreg[dest], del, signed_imm16(offset * 8), reg[base]); break; + case 0x08: print("shv %s[%d], %s(%s)", vreg[dest], del, signed_imm16(offset * 16), reg[base]); break; + case 0x09: print("sfv %s[%d], %s(%s)", vreg[dest], del, signed_imm16(offset * 16), reg[base]); break; + case 0x0a: print("swv %s[%d], %s(%s)", vreg[dest], del, signed_imm16(offset * 16), reg[base]); break; + case 0x0b: print("stv %s[%d], %s(%s)", vreg[dest], del, signed_imm16(offset * 16), reg[base]); break; + default: print("??? (SWC2)"); break; + } +} + +offs_t rsp_dasm_one(char *buffer, offs_t pc, UINT32 op) +{ + int rs = (op >> 21) & 31; + int rt = (op >> 16) & 31; + int rd = (op >> 11) & 31; + int shift = (op >> 6) & 31; + UINT32 flags = 0; + + output = buffer; + + switch (op >> 26) + { + case 0x00: // SPECIAL + { + switch (op & 0x3f) + { + case 0x00: + { + if (op == 0) + { + print("nop"); + } + else + { + print("sll %s, %s, %d", reg[rd], reg[rt], shift); + } + break; + } + case 0x02: print("srl %s, %s, %d", reg[rd], reg[rt], shift); break; + case 0x03: print("sra %s, %s, %d", reg[rd], reg[rt], shift); break; + case 0x04: print("sllv %s, %s, %s", reg[rd], reg[rt], reg[rs]); break; + case 0x06: print("srlv %s, %s, %s", reg[rd], reg[rt], reg[rs]); break; + case 0x07: print("srav %s, %s, %s", reg[rd], reg[rt], reg[rs]); break; + case 0x08: print("jr %s", reg[rs]); if (rs == 31) flags = DASMFLAG_STEP_OUT; break; + case 0x09: + { + if (rd == 31) + { + print("jalr %s", reg[rs]); + } + else + { + print("jalr %s, %s", reg[rs], reg[rd]); + } + flags = DASMFLAG_STEP_OVER | DASMFLAG_STEP_OVER_EXTRA(1); + break; + } + case 0x0d: print("break"); flags = DASMFLAG_STEP_OVER; break; + case 0x20: print("add %s, %s, %s", reg[rd], reg[rs], reg[rt]); break; + case 0x21: print("addu %s, %s, %s", reg[rd], reg[rs], reg[rt]); break; + case 0x22: print("sub %s, %s, %s", reg[rd], reg[rs], reg[rt]); break; + case 0x23: print("subu %s, %s, %s", reg[rd], reg[rs], reg[rt]); break; + case 0x24: print("and %s, %s, %s", reg[rd], reg[rs], reg[rt]); break; + case 0x25: print("or %s, %s, %s", reg[rd], reg[rs], reg[rt]); break; + case 0x26: print("xor %s, %s, %s", reg[rd], reg[rs], reg[rt]); break; + case 0x27: print("nor %s, %s, %s", reg[rd], reg[rs], reg[rt]); break; + case 0x2a: print("slt %s, %s, %s", reg[rd], reg[rs], reg[rt]); break; + case 0x2b: print("sltu %s, %s, %s", reg[rd], reg[rs], reg[rt]); break; + + default: print("???"); break; + } + break; + } + + case 0x01: // REGIMM + { + switch ((op >> 16) & 0x1f) + { + case 0x00: print("bltz %s, $%08X", reg[rs], pc + 4 + ((INT16)op << 2)); break; + case 0x01: print("bgez %s, $%08X", reg[rs], pc + 4 + ((INT16)op << 2)); break; + case 0x10: print("bltzal %s, $%08X", reg[rs], pc + 4 + ((INT16)op << 2)); break; + case 0x11: print("bgezal %s, $%08X", reg[rs], pc + 4 + ((INT16)op << 2)); break; + + default: print("???"); break; + } + break; + } + + case 0x02: print("j $%08X", (op & 0x03ffffff) << 2); break; + case 0x03: print("jal $%08X", (op & 0x03ffffff) << 2); break; + case 0x04: print("beq %s, %s, $%08X", reg[rs], reg[rt], pc + 4 + ((INT16)(op) << 2)); break; + case 0x05: print("bne %s, %s, $%08X", reg[rs], reg[rt], pc + 4 + ((INT16)(op) << 2)); break; + case 0x06: print("blez %s, $%08X", reg[rs], pc + 4 + ((INT16)(op) << 2)); break; + case 0x07: print("bgtz %s, $%08X", reg[rs], pc + 4 + ((INT16)(op) << 2)); break; + case 0x08: print("addi %s, %s, %s", reg[rt], reg[rs], signed_imm16(op)); break; + case 0x09: print("addiu %s, %s, %s", reg[rt], reg[rs], signed_imm16(op)); break; + case 0x0a: print("slti %s, %s, %s", reg[rt], reg[rs], signed_imm16(op)); break; + case 0x0b: print("sltiu %s, %s, %s", reg[rt], reg[rs], signed_imm16(op)); break; + case 0x0c: print("andi %s, %s, $%04X", reg[rt], reg[rs], (UINT16)(op)); break; + case 0x0d: print("ori %s, %s, $%04X", reg[rt], reg[rs], (UINT16)(op)); break; + case 0x0e: print("xori %s, %s, $%04X", reg[rt], reg[rs], (UINT16)(op)); break; + case 0x0f: print("lui %s, %s, $%04X", reg[rt], reg[rs], (UINT16)(op)); break; + + case 0x10: disasm_cop0(op); break; + case 0x12: disasm_cop2(op); break; + + case 0x20: print("lb %s, %s(%s)", reg[rt], signed_imm16(op), reg[rs]); break; + case 0x21: print("lh %s, %s(%s)", reg[rt], signed_imm16(op), reg[rs]); break; + case 0x23: print("lw %s, %s(%s)", reg[rt], signed_imm16(op), reg[rs]); break; + case 0x24: print("lbu %s, %s(%s)", reg[rt], signed_imm16(op), reg[rs]); break; + case 0x25: print("lhu %s, %s(%s)", reg[rt], signed_imm16(op), reg[rs]); break; + case 0x28: print("sb %s, %s(%s)", reg[rt], signed_imm16(op), reg[rs]); break; + case 0x29: print("sh %s, %s(%s)", reg[rt], signed_imm16(op), reg[rs]); break; + case 0x2b: print("sw %s, %s(%s)", reg[rt], signed_imm16(op), reg[rs]); break; + + case 0x32: disasm_lwc2(op); break; + case 0x3a: disasm_swc2(op); break; + + default: print("???"); break; + } + + return 4 | flags | DASMFLAG_SUPPORTED; +} diff --git a/source/mupen64plus-rsp-z64/src/rsp_gen.cpp b/source/mupen64plus-rsp-z64/src/rsp_gen.cpp new file mode 100644 index 0000000..a7577f7 --- /dev/null +++ b/source/mupen64plus-rsp-z64/src/rsp_gen.cpp @@ -0,0 +1,693 @@ +/* + * z64 + * + * Copyright (C) 2007 ziggy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * +**/ + +#include "rsp.h" +#include +#include +#include +#include +#include + +#define GENDEBUG + +typedef int (* gen_f)(RSP_REGS & rsp); + +struct gen_t { + UINT32 crc; + void * lib; + gen_f f; +#ifdef GENDEBUG + char name[32]; +#endif +}; + +struct opinfo_t { + int visit, labeled; + int label; + + int nbgen; + int szgen; + gen_t * gentable; + gen_t * curgen; +}; + +struct branch_t { + int start, end; +}; + +static int curvisit; +static opinfo_t opinfo[0x1000/4]; +static int nb_branches; +static branch_t branches[256]; +static int nb_labels; +static int labels[256]; + +#define OPI(pc) opinfo[(pc)>>2] +/*inline*/ void SETLABEL(int pc) { + //printf("%x\n", pc); + //pc &= 0xfff; + assert(pc >= 0 && pc < 0x1000); + if (OPI(pc).labeled != curvisit) { + labels[nb_labels] = pc; + OPI(pc).label = nb_labels++; + assert(nb_labels < sizeof(labels)/sizeof(labels[0])); + OPI(pc).labeled = curvisit; + } +} + +#define ABS(addr) (((addr) << 2) & 0xfff) +#define REL(offset) ((pc + ((offset) << 2)) & 0xfff) + +static UINT32 prep_gen(int pc, UINT32 crc, int & len) +{ + UINT32 op; + int br = 0; + + branches[nb_branches].start = pc; + + while ( !br ) + { + if (OPI(pc).visit == curvisit) { + SETLABEL((pc)&0xfff); + SETLABEL((pc+4)&0xfff); + break; + } + + OPI(pc).visit = curvisit; + + op = ROPCODE(pc); + crc = ((crc<<1)|(crc>>31))^op^pc; + pc = (pc+4)&0xfff; + len++; + + switch (op >> 26) + { + case 0x00: /* SPECIAL */ + { + switch (op & 0x3f) + { + case 0x08: /* JR */ + br = 1; + break; + case 0x09: /* JALR */ + //br = 1; + break; + case 0x0d: /* BREAK */ + br = 1; + break; + } + break; + } + + case 0x01: /* REGIMM */ + { + switch (RTREG) + { + case 0x00: /* BLTZ */ + case 0x01: /* BGEZ */ + SETLABEL(REL(SIMM16)); + break; + case 0x11: /* BGEZAL */ + //br = 1; + break; + } + break; + } + + case 0x02: /* J */ + SETLABEL(ABS(UIMM26)); + br = 1; + break; + case 0x04: /* BEQ */ + case 0x05: /* BNE */ + case 0x06: /* BLEZ */ + case 0x07: /* BGTZ */ + SETLABEL(REL(SIMM16)); + break; + case 0x03: /* JAL */ + //SETLABEL(ABS(UIMM26)); + //br = 1; + break; + } + + } + + branches[nb_branches++].end = pc; + assert(nb_branches < sizeof(branches)/sizeof(branches[0])); + + return crc; +} + +static char tmps[1024]; +static char * delayed; +static int has_cond; + +#define COND \ + has_cond = 1, fprintf + +#define NOCOND() \ + if (cont && OPI((pc+4)&0xfff).labeled == curvisit) { \ + COND(fp, "cond = 1; \n"); \ + } else \ + has_cond = 0 + + +static void D_JUMP_ABS(UINT32 addr) +{ + int a = addr&0xfff; + sprintf(tmps, "%s { /*if (rsp.inval_gen) { rsp.nextpc=0x%x; return 0; }*/ %s goto L%d; }", has_cond? "if (cond)":"", a, has_cond? "cond=0; ":"", OPI(a).label); + delayed = tmps; +} + +static void D_JUMP_REL(int pc, int offset) +{ + D_JUMP_ABS(pc+4 + ((offset) << 2)); +} + +static void D_JUMP() +{ + sprintf(tmps, "%s { return 0; }", has_cond? "if (cond)":""); + delayed = tmps; +} + +static void D_JUMPL(int pc) +{ + sprintf(tmps, + "%s { \n" + "%s" + " int res;\n" + " if (res = rsp_jump(rsp.nextpc)) return res; \n" + " if (/*rsp.inval_gen || */sp_pc != 0x%x) return 0; \n" + "}", has_cond? "if (cond)":"", has_cond?" cond=0; \n":"", (pc+8)&0xfff); + delayed = tmps; + has_cond = 1; +} + +static void dogen(const char * s, UINT32 op, FILE * fp) +{ + fprintf(fp, "#define op 0x%x\n%s\n#undef op\n", op, s); +} + +#define GEN(s) dogen(s, op, fp) + +static void rsp_gen(int pc) +{ + int i; + const char * old_delayed; + int oldbr, br; + + curvisit++; + if (!curvisit) { + // we looped, reset all visit counters + for (i=0; i<0x1000/4; i++) { + opinfo[i].visit = 0; + opinfo[i].labeled = 0; + } + curvisit++; + } + + nb_branches = 0; + nb_labels = 0; + + int len = 0; + UINT32 crc = prep_gen(pc, 0, len); + + for (i=0; igentable) { + for (i=0; inbgen; i++) + if (opi->gentable[i].crc == crc) { + opi->curgen = opi->gentable + i; + return; + } + } + if (opi->nbgen >= opi->szgen) { + if (opi->szgen) + opi->szgen *= 2; + else + opi->szgen = 4; + opi->gentable = (gen_t *) realloc(opi->gentable, sizeof(gen_t)*(opi->szgen)); + } + gen_t * gen; + gen = opi->gentable + opi->nbgen++; + opi->curgen = gen; + +#ifdef GENDEBUG + strcpy(gen->name, lib); +#endif + + gen->crc = crc; + gen->lib = dlopen(lib, RTLD_NOW); + if (gen->lib) { + gen->f = (gen_f) dlsym(gen->lib, sym); + assert(gen->f); + fprintf(stderr, "reloaded %s\n", lib); + return; + } + // else + // printf("%s\n", dlerror()); + + sprintf(src, "z64/rspgen/%x-%x-%x.cpp", crc, pc, len); + FILE * fp = fopen(src, "w"); + + fprintf(fp, + "#include \"rsp.h\"\n" + "\n" + "extern \"C\" {\n" + "int %s() {\n" + "int cond=0;\n", + sym); + + for (i=0; i %x\n", branches[i].start, branches[i].end-4); + for (pc=branches[i].start; cont || delayed; pc = (pc+4)&0xfff) { + UINT32 op = ROPCODE(pc); + char s[128]; + rsp_dasm_one(s, pc, op); + if (cont && OPI(pc).labeled == curvisit) + fprintf(fp, "L%d: ;\n", OPI(pc).label); + //fprintf(fp, "/* %3x\t%s */\n", pc, s); + fprintf(fp, "GENTRACE(\"%3x\t%s\\n\");\n", pc, s); + oldbr = br; + br = 0; + old_delayed = delayed; + delayed = 0; + + if (((pc+4)&0xfff)==branches[i].end) + cont = 0; + + switch (op >> 26) + { + case 0x00: /* SPECIAL */ + { + switch (op & 0x3f) + { + case 0x08: /* JR */ + if (!old_delayed) { + br = 1|8|16; + NOCOND(); + D_JUMP(); + } + break; + case 0x09: /* JALR */ + if (!old_delayed) { + br = 1; + NOCOND(); + D_JUMPL(pc); + } + break; + case 0x0d: /* BREAK */ + br = 2|8; + //delayed = "return 1;"; + has_cond = 0; + break; + } + break; + } + + case 0x01: /* REGIMM */ + { + switch (RTREG) + { + case 0x00: /* BLTZ */ + if (!old_delayed) { + COND(fp, " cond=(INT32)(_RSVAL(0x%x)) < 0;\n", op); + D_JUMP_REL(pc, _SIMM16(op)); + br = 4; + } + break; + case 0x01: /* BGEZ */ + if (!old_delayed) { + COND(fp, " cond=(INT32)(_RSVAL(0x%x)) >= 0;\n", op); + D_JUMP_REL(pc, _SIMM16(op)); + br = 4; + } + break; + case 0x11: /* BGEZAL */ + br = 1; + COND(fp, "cond=(INT32)(_RSVAL(0x%x)) >= 0;\n", op); + D_JUMPL(pc); + break; + } + break; + } + + case 0x02: /* J */ + if (!old_delayed) { + NOCOND(); + D_JUMP_ABS(_UIMM26(op) <<2); + br = 4|8|16; + } + break; + case 0x04: /* BEQ */ + if (!old_delayed) { + COND(fp, " cond=_RSVAL(0x%0x) == _RTVAL(0x%0x);\n", op, op); + D_JUMP_REL(pc, _SIMM16(op)); + br = 4; + } + break; + case 0x05: /* BNE */ + if (!old_delayed) { + COND(fp, " cond=_RSVAL(0x%0x) != _RTVAL(0x%0x);\n", op, op); + D_JUMP_REL(pc, _SIMM16(op)); + br = 4; + } + break; + case 0x06: /* BLEZ */ + if (!old_delayed) { + COND(fp, " cond=(INT32)_RSVAL(0x%0x) <= 0;\n", op); + D_JUMP_REL(pc, _SIMM16(op)); + br = 4; + } + break; + case 0x07: /* BGTZ */ + if (!old_delayed) { + COND(fp, " cond=(INT32)_RSVAL(0x%0x) > 0;\n", op); + D_JUMP_REL(pc, _SIMM16(op)); + br = 4; + } + break; + case 0x03: /* JAL */ + if (!old_delayed) { + br = 1; + NOCOND(); + D_JUMPL(pc); + } + break; + } + + if (!(br&4) && (!old_delayed || !br)) { + if (br && !(br&16)) { + fprintf(fp, "sp_pc = 0x%x;\n", (pc + 4)&0xfff); + } + //fprintf(fp, "rsp_execute_one(0x%x);\n", op); + + + + + + switch (op >> 26) + { + case 0x00: /* SPECIAL */ + { + switch (op & 0x3f) + { + case 0x00: /* SLL */ if (RDREG) GEN("RDVAL = (UINT32)RTVAL << SHIFT;"); break; + case 0x02: /* SRL */ if (RDREG) GEN("RDVAL = (UINT32)RTVAL >> SHIFT; "); break; + case 0x03: /* SRA */ if (RDREG) GEN("RDVAL = (INT32)RTVAL >> SHIFT; "); break; + case 0x04: /* SLLV */ if (RDREG) GEN("RDVAL = (UINT32)RTVAL << (RSVAL & 0x1f); "); break; + case 0x06: /* SRLV */ if (RDREG) GEN("RDVAL = (UINT32)RTVAL >> (RSVAL & 0x1f); "); break; + case 0x07: /* SRAV */ if (RDREG) GEN("RDVAL = (INT32)RTVAL >> (RSVAL & 0x1f); "); break; + case 0x08: /* JR */ GEN("JUMP_PC(RSVAL); "); break; + case 0x09: /* JALR */ GEN("JUMP_PC_L(RSVAL, RDREG); "); break; + case 0x0d: /* BREAK */ + { + GEN( + " \ + *z64_rspinfo.SP_STATUS_REG |= (SP_STATUS_HALT | SP_STATUS_BROKE ); \ + if ((*z64_rspinfo.SP_STATUS_REG & SP_STATUS_INTR_BREAK) != 0 ) { \ + *z64_rspinfo.MI_INTR_REG |= 1; \ + z64_rspinfo.CheckInterrupts(); \ + } \ + rsp.nextpc = ~0; \ + return 1; \ + "); + break; + } + case 0x20: /* ADD */ if (RDREG) GEN("RDVAL = (INT32)(RSVAL + RTVAL); "); break; + case 0x21: /* ADDU */ if (RDREG) GEN("RDVAL = (INT32)(RSVAL + RTVAL); "); break; + case 0x22: /* SUB */ if (RDREG) GEN("RDVAL = (INT32)(RSVAL - RTVAL); "); break; + case 0x23: /* SUBU */ if (RDREG) GEN("RDVAL = (INT32)(RSVAL - RTVAL); "); break; + case 0x24: /* AND */ if (RDREG) GEN("RDVAL = RSVAL & RTVAL; "); break; + case 0x25: /* OR */ if (RDREG) GEN("RDVAL = RSVAL | RTVAL; "); break; + case 0x26: /* XOR */ if (RDREG) GEN("RDVAL = RSVAL ^ RTVAL; "); break; + case 0x27: /* NOR */ if (RDREG) GEN("RDVAL = ~(RSVAL | RTVAL); "); break; + case 0x2a: /* SLT */ if (RDREG) GEN("RDVAL = (INT32)RSVAL < (INT32)RTVAL; "); break; + case 0x2b: /* SLTU */ if (RDREG) GEN("RDVAL = (UINT32)RSVAL < (UINT32)RTVAL; "); break; + default: GEN("unimplemented_opcode(op); "); break; + } + break; + } + + case 0x01: /* REGIMM */ + { + switch (RTREG) + { + case 0x00: /* BLTZ */ GEN("if ((INT32)(RSVAL) < 0) JUMP_REL(SIMM16); "); break; + case 0x01: /* BGEZ */ GEN("if ((INT32)(RSVAL) >= 0) JUMP_REL(SIMM16); "); break; + // VP according to the doc, link is performed even when condition fails + case 0x11: /* BGEZAL */ GEN("LINK(31); if ((INT32)(RSVAL) >= 0) JUMP_REL(SIMM16); "); break; + //case 0x11: /* BGEZAL */ if ((INT32)(RSVAL) >= 0) JUMP_REL_L(SIMM16, 31); break; + default: GEN("unimplemented_opcode(op); "); break; + } + break; + } + + case 0x02: /* J */ GEN("JUMP_ABS(UIMM26); "); break; + case 0x03: /* JAL */ GEN("JUMP_ABS_L(UIMM26, 31); "); break; + case 0x04: /* BEQ */ GEN("if (RSVAL == RTVAL) JUMP_REL(SIMM16); "); break; + case 0x05: /* BNE */ GEN("if (RSVAL != RTVAL) JUMP_REL(SIMM16); "); break; + case 0x06: /* BLEZ */ GEN("if ((INT32)RSVAL <= 0) JUMP_REL(SIMM16); "); break; + case 0x07: /* BGTZ */ GEN("if ((INT32)RSVAL > 0) JUMP_REL(SIMM16); "); break; + case 0x08: /* ADDI */ if (RTREG) GEN("RTVAL = (INT32)(RSVAL + SIMM16); "); break; + case 0x09: /* ADDIU */ if (RTREG) GEN("RTVAL = (INT32)(RSVAL + SIMM16); "); break; + case 0x0a: /* SLTI */ if (RTREG) GEN("RTVAL = (INT32)(RSVAL) < ((INT32)SIMM16); "); break; + case 0x0b: /* SLTIU */ if (RTREG) GEN("RTVAL = (UINT32)(RSVAL) < (UINT32)((INT32)SIMM16); "); break; + case 0x0c: /* ANDI */ if (RTREG) GEN("RTVAL = RSVAL & UIMM16; "); break; + case 0x0d: /* ORI */ if (RTREG) GEN("RTVAL = RSVAL | UIMM16; "); break; + case 0x0e: /* XORI */ if (RTREG) GEN("RTVAL = RSVAL ^ UIMM16; "); break; + case 0x0f: /* LUI */ if (RTREG) GEN("RTVAL = UIMM16 << 16; "); break; + + case 0x10: /* COP0 */ + { + switch ((op >> 21) & 0x1f) + { + case 0x00: /* MFC0 */ if (RTREG) GEN("RTVAL = get_cop0_reg(rsp, RDREG); "); break; + case 0x04: /* MTC0 */ + { + GEN("set_cop0_reg(rsp, RDREG, RTVAL); \n"); + if (RDREG == 0x08/4) { + fprintf(fp, + "if (rsp.inval_gen) {\n" + " rsp.inval_gen = 0;\n" + " sp_pc = 0x%x; \n" + " return 2; \n" + "}\n" + , (pc + 4)&0xfff); + } + break; + } + default: + log(M64MSG_WARNING, "unimplemented cop0 %x (%x)\n", (op >> 21) & 0x1f, op); + break; + } + break; + } + + case 0x12: /* COP2 */ + { + switch ((op >> 21) & 0x1f) + { + case 0x00: /* MFC2 */ + { + // 31 25 20 15 10 6 0 + // --------------------------------------------------- + // | 010010 | 00000 | TTTTT | DDDDD | IIII | 0000000 | + // --------------------------------------------------- + // + if (RTREG) GEN("\ + {int el = (op >> 7) & 0xf;\ + UINT16 b1 = VREG_B(VS1REG, (el+0) & 0xf);\ + UINT16 b2 = VREG_B(VS1REG, (el+1) & 0xf);\ + RTVAL = (INT32)(INT16)((b1 << 8) | (b2));}\ + "); + break; + } + case 0x02: /* CFC2 */ + { + // 31 25 20 15 10 0 + // ------------------------------------------------ + // | 010010 | 00010 | TTTTT | DDDDD | 00000000000 | + // ------------------------------------------------ + // + + // VP to sign extend or to not sign extend ? + //if (RTREG) RTVAL = (INT16)rsp.flag[RDREG]; + if (RTREG) GEN("RTVAL = rsp.flag[RDREG];"); + break; + } + case 0x04: /* MTC2 */ + { + // 31 25 20 15 10 6 0 + // --------------------------------------------------- + // | 010010 | 00100 | TTTTT | DDDDD | IIII | 0000000 | + // --------------------------------------------------- + // + GEN("\ + {int el = (op >> 7) & 0xf;\ + VREG_B(VS1REG, (el+0) & 0xf) = (RTVAL >> 8) & 0xff;\ + VREG_B(VS1REG, (el+1) & 0xf) = (RTVAL >> 0) & 0xff;}\ + "); + break; + } + case 0x06: /* CTC2 */ + { + // 31 25 20 15 10 0 + // ------------------------------------------------ + // | 010010 | 00110 | TTTTT | DDDDD | 00000000000 | + // ------------------------------------------------ + // + + GEN("rsp.flag[RDREG] = RTVAL & 0xffff;"); + break; + } + + case 0x10: case 0x11: case 0x12: case 0x13: case 0x14: case 0x15: case 0x16: case 0x17: + case 0x18: case 0x19: case 0x1a: case 0x1b: case 0x1c: case 0x1d: case 0x1e: case 0x1f: + { + GEN("handle_vector_ops(rsp, op);"); + break; + } + + default: GEN("unimplemented_opcode(op); "); break; + } + break; + } + + case 0x20: /* LB */ if (RTREG) GEN("RTVAL = (INT32)(INT8)READ8(RSVAL + SIMM16); "); break; + case 0x21: /* LH */ if (RTREG) GEN("RTVAL = (INT32)(INT16)READ16(RSVAL + SIMM16); "); break; + case 0x23: /* LW */ if (RTREG) GEN("RTVAL = READ32(RSVAL + SIMM16); "); break; + case 0x24: /* LBU */ if (RTREG) GEN("RTVAL = (UINT8)READ8(RSVAL + SIMM16); "); break; + case 0x25: /* LHU */ if (RTREG) GEN("RTVAL = (UINT16)READ16(RSVAL + SIMM16); "); break; + case 0x28: /* SB */ GEN("WRITE8(RSVAL + SIMM16, RTVAL); "); break; + case 0x29: /* SH */ GEN("WRITE16(RSVAL + SIMM16, RTVAL); "); break; + case 0x2b: /* SW */ GEN("WRITE32(RSVAL + SIMM16, RTVAL); "); break; + case 0x32: /* LWC2 */ GEN("handle_lwc2(rsp, op); "); break; + case 0x3a: /* SWC2 */ GEN("handle_swc2(rsp, op); "); break; + + default: + { + GEN("unimplemented_opcode(op);"); + break; + } + } + + + + + // if (br) { + // if (br & 2) + // fprintf(fp, "return 1;\n"); + // else + // fprintf(fp, "return 0;\n"); + // } + } + if (old_delayed) + fprintf(fp, "%s\n", old_delayed); + } + if (!((/*br|*/oldbr)&8) && ((!oldbr && !(br&2)) || has_cond)) { + fprintf(fp, "/* jumping back to %x */\ngoto L%d;\n", pc, OPI(pc).label); + assert(OPI(pc).labeled == curvisit); + } + } + + fprintf(fp, "}}\n"); + + fclose(fp); + + pid_t pid = fork(); + // SDL redirect these signals, but we need them untouched for waitpid call + signal(17, 0); + signal(11, 0); + if (!pid) { + // char s[128]; + // atexit(0); + // sprintf(s, "gcc -Iz64 -g -shared -O2 %s -o %s", src, lib); + // system(s); + // exit(0); + + //setsid(); + //execl("/usr/bin/gcc", "/usr/bin/gcc", "-Iz64", "-shared", "-g", "-O3", "-fomit-frame-pointer", src, "-o", lib, "-finline-limit=10000", 0); + //execl("/usr/bin/gcc", "/usr/bin/gcc", "-Iz64", "-shared", "-O3", src, "-o", lib, "-fomit-frame-pointer", "-ffast-math", "-funroll-loops", "-fforce-addr", "-finline-limit=10000", 0); + //execl("/usr/bin/gcc", "/usr/bin/gcc", "-Iz64", "-shared", "-O3", src, "-o", lib, "-fomit-frame-pointer", "-ffast-math", "-funroll-loops", "-fforce-addr", "-finline-limit=10000", "-m3dnow", "-mmmx", "-msse", "-msse2", "-mfpmath=sse", 0); + execl("/usr/bin/gcc", "/usr/bin/gcc", "-Iz64", "-shared", "-O6", src, "-o", lib, "-fomit-frame-pointer", "-ffast-math", "-funroll-loops", "-fforce-addr", "-finline-limit=10000", "-m3dnow", "-mmmx", "-msse", "-msse2", 0); + printf("gnii ??\n"); + exit(0); + } + waitpid(pid, 0, __WALL); + + gen->lib = dlopen(lib, RTLD_NOW); + if (!gen->lib) + log(M64MSG_WARNING, "%s\n", dlerror()); + assert(gen->lib); + log(M64MSG_VERBOSE, "created and loaded %s\n", lib); + gen->f = (gen_f) dlsym(gen->lib, sym); + assert(gen->f); +} + +void rsp_invalidate(int begin, int len) +{ + //printf("invalidate %x %x\n", begin, len); + begin = 0; len = 0x1000; + assert(begin+len<=0x1000); + while (len > 0) { + OPI(begin).curgen = 0; + begin += 4; + len -= 4; + } + rsp.inval_gen = 1; +} + +int rsp_jump(int pc) +{ + pc &= 0xfff; + sp_pc = pc; + rsp.nextpc = ~0; + opinfo_t * opi = &OPI(pc); + gen_t * gen = opi->curgen; + if (!gen) rsp_gen(pc); + gen = opi->curgen; + GENTRACE("rsp_jump %x (%s)\n", pc, gen->name); + int res = gen->f(rsp); + GENTRACE("r31 %x from %x nextpc %x pc %x res %d (%s)\n", rsp.r[31], pc, rsp.nextpc, sp_pc, res, gen->name); + if (rsp.nextpc != ~0) + { + sp_pc = (rsp.nextpc & 0xfff); + rsp.nextpc = ~0; + } + else + { + //sp_pc = ((sp_pc+4)&0xfff); + } + return res; +} diff --git a/source/mupen64plus-rsp-z64/src/rsp_gen.h b/source/mupen64plus-rsp-z64/src/rsp_gen.h new file mode 100644 index 0000000..2bcf820 --- /dev/null +++ b/source/mupen64plus-rsp-z64/src/rsp_gen.h @@ -0,0 +1,349 @@ +/* + * z64 + * + * Copyright (C) 2007 ziggy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * +**/ + +#include "rsp.h" + +inline UINT32 get_cop0_reg(RSP_REGS & rsp, int reg) +{ + if (reg >= 0 && reg < 8) + { + return sp_read_reg(rsp ,reg); + } + else if (reg >= 8 && reg < 16) + { + return n64_dp_reg_r(rsp, reg - 8, 0x00000000); + } + else + { + log(M64MSG_ERROR, "RSP: get_cop0_reg: %d", reg); + } +} + +inline void set_cop0_reg(RSP_REGS & rsp, int reg, UINT32 data) +{ + if (reg >= 0 && reg < 8) + { + sp_write_reg(rsp, reg, data); + } + else if (reg >= 8 && reg < 16) + { + n64_dp_reg_w(rsp, reg - 8, data, 0x00000000); + } + else + { + log(M64MSG_ERROR, "RSP: set_cop0_reg: %d, %08X\n", reg, data); + } +} + +static const int vector_elements_2[16][8] = +{ + { 0, 1, 2, 3, 4, 5, 6, 7 }, // none + { 0, 1, 2, 3, 4, 5, 6, 7 }, // ??? + { 0, 0, 2, 2, 4, 4, 6, 6 }, // 0q + { 1, 1, 3, 3, 5, 5, 7, 7 }, // 1q + { 0, 0, 0, 0, 4, 4, 4, 4 }, // 0h + { 1, 1, 1, 1, 5, 5, 5, 5 }, // 1h + { 2, 2, 2, 2, 6, 6, 6, 6 }, // 2h + { 3, 3, 3, 3, 7, 7, 7, 7 }, // 3h + { 0, 0, 0, 0, 0, 0, 0, 0 }, // 0 + { 1, 1, 1, 1, 1, 1, 1, 1 }, // 1 + { 2, 2, 2, 2, 2, 2, 2, 2 }, // 2 + { 3, 3, 3, 3, 3, 3, 3, 3 }, // 3 + { 4, 4, 4, 4, 4, 4, 4, 4 }, // 4 + { 5, 5, 5, 5, 5, 5, 5, 5 }, // 5 + { 6, 6, 6, 6, 6, 6, 6, 6 }, // 6 + { 7, 7, 7, 7, 7, 7, 7, 7 }, // 7 +}; +INLINE UINT16 SATURATE_ACCUM(int accum, int slice, UINT16 negative, UINT16 positive) +{ + if ((INT16)ACCUM_H(accum) < 0) + { + if ((UINT16)(ACCUM_H(accum)) != 0xffff) + { + return negative; + } + else + { + if ((INT16)ACCUM_M(accum) >= 0) + { + return negative; + } + else + { + if (slice == 0) + { + return ACCUM_L(accum); + } + else if (slice == 1) + { + return ACCUM_M(accum); + } + } + } + } + else + { + if ((UINT16)(ACCUM_H(accum)) != 0) + { + return positive; + } + else + { + if ((INT16)ACCUM_M(accum) < 0) + { + return positive; + } + else + { + if (slice == 0) + { + return ACCUM_L(accum); + } + else + { + return ACCUM_M(accum); + } + } + } + } + + return 0; +} + +INLINE UINT16 SATURATE_ACCUM1(int accum, UINT16 negative, UINT16 positive) +{ + if ((INT16)ACCUM_H(accum) < 0) + { + if ((UINT16)(ACCUM_H(accum)) != 0xffff) + return negative; + else + { + if ((INT16)ACCUM_M(accum) >= 0) + return negative; + else + return ACCUM_M(accum); + } + } + else + { + if ((UINT16)(ACCUM_H(accum)) != 0) + return positive; + else + { + if ((INT16)ACCUM_M(accum) < 0) + return positive; + else + return ACCUM_M(accum); + } + } + + return 0; +} + +#define WRITEBACK_RESULT() \ + do { \ + VREG_S(VDREG, 0) = vres[0]; \ + VREG_S(VDREG, 1) = vres[1]; \ + VREG_S(VDREG, 2) = vres[2]; \ + VREG_S(VDREG, 3) = vres[3]; \ + VREG_S(VDREG, 4) = vres[4]; \ + VREG_S(VDREG, 5) = vres[5]; \ + VREG_S(VDREG, 6) = vres[6]; \ + VREG_S(VDREG, 7) = vres[7]; \ + } while(0) + +#if 0 +inline void rsp_execute_one(UINT32 op) +{ + switch (op >> 26) + { + case 0x00: /* SPECIAL */ + { + switch (op & 0x3f) + { + case 0x00: /* SLL */ if (RDREG) RDVAL = (UINT32)RTVAL << SHIFT; break; + case 0x02: /* SRL */ if (RDREG) RDVAL = (UINT32)RTVAL >> SHIFT; break; + case 0x03: /* SRA */ if (RDREG) RDVAL = (INT32)RTVAL >> SHIFT; break; + case 0x04: /* SLLV */ if (RDREG) RDVAL = (UINT32)RTVAL << (RSVAL & 0x1f); break; + case 0x06: /* SRLV */ if (RDREG) RDVAL = (UINT32)RTVAL >> (RSVAL & 0x1f); break; + case 0x07: /* SRAV */ if (RDREG) RDVAL = (INT32)RTVAL >> (RSVAL & 0x1f); break; + case 0x08: /* JR */ JUMP_PC(RSVAL); break; + case 0x09: /* JALR */ JUMP_PC_L(RSVAL, RDREG); break; + case 0x0d: /* BREAK */ + { + *z64_rspinfo.SP_STATUS_REG |= (SP_STATUS_HALT | SP_STATUS_BROKE ); + if ((*z64_rspinfo.SP_STATUS_REG & SP_STATUS_INTR_BREAK) != 0 ) { + *z64_rspinfo.MI_INTR_REG |= 1; + z64_rspinfo.CheckInterrupts(); + } + //sp_set_status(0x3); + +#if LOG_INSTRUCTION_EXECUTION + fprintf(exec_output, "\n---------- break ----------\n\n"); +#endif + break; + } + case 0x20: /* ADD */ if (RDREG) RDVAL = (INT32)(RSVAL + RTVAL); break; + case 0x21: /* ADDU */ if (RDREG) RDVAL = (INT32)(RSVAL + RTVAL); break; + case 0x22: /* SUB */ if (RDREG) RDVAL = (INT32)(RSVAL - RTVAL); break; + case 0x23: /* SUBU */ if (RDREG) RDVAL = (INT32)(RSVAL - RTVAL); break; + case 0x24: /* AND */ if (RDREG) RDVAL = RSVAL & RTVAL; break; + case 0x25: /* OR */ if (RDREG) RDVAL = RSVAL | RTVAL; break; + case 0x26: /* XOR */ if (RDREG) RDVAL = RSVAL ^ RTVAL; break; + case 0x27: /* NOR */ if (RDREG) RDVAL = ~(RSVAL | RTVAL); break; + case 0x2a: /* SLT */ if (RDREG) RDVAL = (INT32)RSVAL < (INT32)RTVAL; break; + case 0x2b: /* SLTU */ if (RDREG) RDVAL = (UINT32)RSVAL < (UINT32)RTVAL; break; + default: unimplemented_opcode(op); break; + } + break; + } + + case 0x01: /* REGIMM */ + { + switch (RTREG) + { + case 0x00: /* BLTZ */ if ((INT32)(RSVAL) < 0) JUMP_REL(SIMM16); break; + case 0x01: /* BGEZ */ if ((INT32)(RSVAL) >= 0) JUMP_REL(SIMM16); break; + // VP according to the doc, link is performed even when condition fails, + // this sound pretty stupid but let's try it that way + case 0x11: /* BGEZAL */ LINK(31); if ((INT32)(RSVAL) >= 0) JUMP_REL(SIMM16); break; + //case 0x11: /* BGEZAL */ if ((INT32)(RSVAL) >= 0) JUMP_REL_L(SIMM16, 31); break; + default: unimplemented_opcode(op); break; + } + break; + } + + case 0x02: /* J */ JUMP_ABS(UIMM26); break; + case 0x03: /* JAL */ JUMP_ABS_L(UIMM26, 31); break; + case 0x04: /* BEQ */ if (RSVAL == RTVAL) JUMP_REL(SIMM16); break; + case 0x05: /* BNE */ if (RSVAL != RTVAL) JUMP_REL(SIMM16); break; + case 0x06: /* BLEZ */ if ((INT32)RSVAL <= 0) JUMP_REL(SIMM16); break; + case 0x07: /* BGTZ */ if ((INT32)RSVAL > 0) JUMP_REL(SIMM16); break; + case 0x08: /* ADDI */ if (RTREG) RTVAL = (INT32)(RSVAL + SIMM16); break; + case 0x09: /* ADDIU */ if (RTREG) RTVAL = (INT32)(RSVAL + SIMM16); break; + case 0x0a: /* SLTI */ if (RTREG) RTVAL = (INT32)(RSVAL) < ((INT32)SIMM16); break; + case 0x0b: /* SLTIU */ if (RTREG) RTVAL = (UINT32)(RSVAL) < (UINT32)((INT32)SIMM16); break; + case 0x0c: /* ANDI */ if (RTREG) RTVAL = RSVAL & UIMM16; break; + case 0x0d: /* ORI */ if (RTREG) RTVAL = RSVAL | UIMM16; break; + case 0x0e: /* XORI */ if (RTREG) RTVAL = RSVAL ^ UIMM16; break; + case 0x0f: /* LUI */ if (RTREG) RTVAL = UIMM16 << 16; break; + + case 0x10: /* COP0 */ + { + switch ((op >> 21) & 0x1f) + { + case 0x00: /* MFC0 */ if (RTREG) RTVAL = get_cop0_reg(RDREG); break; + case 0x04: /* MTC0 */ set_cop0_reg(RDREG, RTVAL); break; + default: + printf("unimplemented cop0 %x (%x)\n", (op >> 21) & 0x1f, op); + break; + } + break; + } + + case 0x12: /* COP2 */ + { + switch ((op >> 21) & 0x1f) + { + case 0x00: /* MFC2 */ + { + // 31 25 20 15 10 6 0 + // --------------------------------------------------- + // | 010010 | 00000 | TTTTT | DDDDD | IIII | 0000000 | + // --------------------------------------------------- + // + + int el = (op >> 7) & 0xf; + UINT16 b1 = VREG_B(VS1REG, (el+0) & 0xf); + UINT16 b2 = VREG_B(VS1REG, (el+1) & 0xf); + if (RTREG) RTVAL = (INT32)(INT16)((b1 << 8) | (b2)); + break; + } + case 0x02: /* CFC2 */ + { + // 31 25 20 15 10 0 + // ------------------------------------------------ + // | 010010 | 00010 | TTTTT | DDDDD | 00000000000 | + // ------------------------------------------------ + // + + // VP to sign extend or to not sign extend ? + //if (RTREG) RTVAL = (INT16)rsp.flag[RDREG]; + if (RTREG) RTVAL = rsp.flag[RDREG]; + break; + } + case 0x04: /* MTC2 */ + { + // 31 25 20 15 10 6 0 + // --------------------------------------------------- + // | 010010 | 00100 | TTTTT | DDDDD | IIII | 0000000 | + // --------------------------------------------------- + // + + int el = (op >> 7) & 0xf; + VREG_B(VS1REG, (el+0) & 0xf) = (RTVAL >> 8) & 0xff; + VREG_B(VS1REG, (el+1) & 0xf) = (RTVAL >> 0) & 0xff; + break; + } + case 0x06: /* CTC2 */ + { + // 31 25 20 15 10 0 + // ------------------------------------------------ + // | 010010 | 00110 | TTTTT | DDDDD | 00000000000 | + // ------------------------------------------------ + // + + rsp.flag[RDREG] = RTVAL & 0xffff; + break; + } + + case 0x10: case 0x11: case 0x12: case 0x13: case 0x14: case 0x15: case 0x16: case 0x17: + case 0x18: case 0x19: case 0x1a: case 0x1b: case 0x1c: case 0x1d: case 0x1e: case 0x1f: + { + handle_vector_ops(op); + break; + } + + default: unimplemented_opcode(op); break; + } + break; + } + + case 0x20: /* LB */ if (RTREG) RTVAL = (INT32)(INT8)READ8(RSVAL + SIMM16); break; + case 0x21: /* LH */ if (RTREG) RTVAL = (INT32)(INT16)READ16(RSVAL + SIMM16); break; + case 0x23: /* LW */ if (RTREG) RTVAL = READ32(RSVAL + SIMM16); break; + case 0x24: /* LBU */ if (RTREG) RTVAL = (UINT8)READ8(RSVAL + SIMM16); break; + case 0x25: /* LHU */ if (RTREG) RTVAL = (UINT16)READ16(RSVAL + SIMM16); break; + case 0x28: /* SB */ WRITE8(RSVAL + SIMM16, RTVAL); break; + case 0x29: /* SH */ WRITE16(RSVAL + SIMM16, RTVAL); break; + case 0x2b: /* SW */ WRITE32(RSVAL + SIMM16, RTVAL); break; + case 0x32: /* LWC2 */ handle_lwc2(op); break; + case 0x3a: /* SWC2 */ handle_swc2(op); break; + + default: + { + unimplemented_opcode(op); + break; + } + } +} + +#endif diff --git a/source/mupen64plus-rsp-z64/src/rsp_opinfo.cpp b/source/mupen64plus-rsp-z64/src/rsp_opinfo.cpp new file mode 100644 index 0000000..d02b561 --- /dev/null +++ b/source/mupen64plus-rsp-z64/src/rsp_opinfo.cpp @@ -0,0 +1,560 @@ +/* + * z64 + * + * Copyright (C) 2007 ziggy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * +**/ + +#include "rsp_opinfo.h" + +static const int vector_elements_2[16][8] = +{ + { 0, 1, 2, 3, 4, 5, 6, 7 }, // none + { 0, 1, 2, 3, 4, 5, 6, 7 }, // ??? + { 0, 0, 2, 2, 4, 4, 6, 6 }, // 0q + { 1, 1, 3, 3, 5, 5, 7, 7 }, // 1q + { 0, 0, 0, 0, 4, 4, 4, 4 }, // 0h + { 1, 1, 1, 1, 5, 5, 5, 5 }, // 1h + { 2, 2, 2, 2, 6, 6, 6, 6 }, // 2h + { 3, 3, 3, 3, 7, 7, 7, 7 }, // 3h + { 0, 0, 0, 0, 0, 0, 0, 0 }, // 0 + { 1, 1, 1, 1, 1, 1, 1, 1 }, // 1 + { 2, 2, 2, 2, 2, 2, 2, 2 }, // 2 + { 3, 3, 3, 3, 3, 3, 3, 3 }, // 3 + { 4, 4, 4, 4, 4, 4, 4, 4 }, // 4 + { 5, 5, 5, 5, 5, 5, 5, 5 }, // 5 + { 6, 6, 6, 6, 6, 6, 6, 6 }, // 6 + { 7, 7, 7, 7, 7, 7, 7, 7 }, // 7 +}; + +void rsp_get_opinfo(UINT32 op, rsp_opinfo_t * info) +{ + int op2; + int i; + info->op = op; + switch (op>>26) { + case 0: /* SPECIAL */ + op2 = RSP_SPECIAL_OFFS + (op&0x3f); + break; + case 0x12: /* COP2 */ + if (((op>>21)&0x1f) >= 0x10) + op2 = RSP_COP2_2_OFFS + (op & 0x3f); + else + op2 = RSP_COP2_1_OFFS + ((op >> 21) & 0x1f); + break; + case 0x32: /* LWC2 */ + op2 = RSP_LWC2_OFFS + ((op>>11)&0x1f); + break; + case 0x3a: /* SWC2 */ + op2 = RSP_SWC2_OFFS + ((op>>11)&0x1f); + break; + default: + op2 = RSP_BASIC_OFFS + (op>>26); + if (op2 == RSP_REGIMM) { + switch (RTREG) + { + case 0x00: /* BLTZ */ + op2 = RSP_BLTZ; + break; + case 0x01: /* BGEZ */ + op2 = RSP_BGEZ; + break; + case 0x11: /* BGEZAL */ + op2 = RSP_BGEZAL; + break; + } + } + } + info->op2 = op2; + + memset(&info->used, 0, sizeof(info->used)); + memset(&info->set, 0, sizeof(info->set)); + info->used.accu = info->used.flag = 0; + info->set.accu = info->set.flag = 0; + info->flags = 0; + + int dest = (op >> 16) & 0x1f; + int index = (op >> 7) & 0xf; + int offset = (op & 0x7f); + if (offset & 0x40) + offset |= 0xffffffc0; + + switch(op2) { + case RSP_SPECIAL: + case RSP_BLTZ: + info->flags = RSP_OPINFO_JUMP | RSP_OPINFO_COND | RSP_OPINFO_USEPC; + break; + case RSP_BGEZ: + info->flags = RSP_OPINFO_JUMP | RSP_OPINFO_COND | RSP_OPINFO_USEPC; + break; + case RSP_BGEZAL: + info->flags = RSP_OPINFO_JUMP | RSP_OPINFO_COND | RSP_OPINFO_LINK | RSP_OPINFO_USEPC; + break; + case RSP_J: + info->flags = RSP_OPINFO_JUMP; + break; + case RSP_JAL: + info->flags = RSP_OPINFO_JUMP | RSP_OPINFO_LINK | RSP_OPINFO_USEPC; + break; + case RSP_BEQ: + case RSP_BNE: + case RSP_BLEZ: + case RSP_BGTZ: + info->flags = RSP_OPINFO_JUMP | RSP_OPINFO_COND | RSP_OPINFO_USEPC; + break; + case RSP_ADDI: + case RSP_ADDIU: + case RSP_SLTI: + case RSP_SLTIU: + case RSP_ANDI: + case RSP_ORI: + case RSP_XORI: + case RSP_LUI: + case RSP_COP0: + case RSP_LB: + case RSP_LH: + case RSP_LW: + case RSP_LBU: + case RSP_LHU: + case RSP_SB: + case RSP_SH: + case RSP_SW: + break; + + case RSP_SLL: + case RSP_SRL: + case RSP_SRA: + case RSP_SLLV: + case RSP_SRLV: + case RSP_SRAV: + break; + + case RSP_JR: + info->flags = RSP_OPINFO_JUMP; + break; + case RSP_JALR: + info->flags = RSP_OPINFO_JUMP | RSP_OPINFO_LINK | RSP_OPINFO_USEPC; + break; + case RSP_BREAK: + info->flags = RSP_OPINFO_BREAK; + break; + + case RSP_ADD: + case RSP_ADDU: + case RSP_SUB: + case RSP_SUBU: + case RSP_AND: + case RSP_OR: + case RSP_XOR: + case RSP_NOR: + case RSP_SLT: + case RSP_SLTU: + break; + + case RSP_MFC2: + { + int el = op >> 7; + RSP_SET_VEC_I(info->used, VS1REG, ((el+0)&0xf)>>1); + RSP_SET_VEC_I(info->used, VS1REG, ((el+1)&0xf)>>1); + break; + } + case RSP_CFC2: + RSP_SET_FLAG_I(info->used, RDREG & 3); + break; + case RSP_MTC2: + { + int el = op >> 7; + RSP_SET_VEC_I(info->set, VS1REG, ((el+0)&0xf)>>1); + RSP_SET_VEC_I(info->set, VS1REG, ((el+1)&0xf)>>1); + break; + } + case RSP_CTC2: + RSP_SET_FLAG_I(info->set, RDREG & 3); + break; + + + case RSP_LBV: + RSP_SET_VEC_I(info->set, dest, index>>1); + break; + case RSP_LSV: + for (i=index; iset, dest, (i>>1)&7); + break; + case RSP_LLV: + for (i=index; iset, dest, (i>>1)&7); + break; + case RSP_LDV: + for (i=index; iset, dest, (i>>1)&7); + break; + case RSP_LQV: + case RSP_LRV: + // WARNING WARNING WARNING + // we assume this instruction always used to load the full vector + // i.e. the address is always 16 bytes aligned + // for (i=0; i<8; i++) + // RSP_SET_VEC_I(info->set, dest, i); + break; + case RSP_LPV: + case RSP_LUV: + case RSP_LHV: + case RSP_LWV: + for (i=0; i<8; i++) + RSP_SET_VEC_I(info->set, dest, i); + break; + case RSP_LFV: + for (i=(index>>1); i<(index>>1)+4; i++) + RSP_SET_VEC_I(info->set, dest, i); + break; + case RSP_LTV: + { + // 31 25 20 15 10 6 0 + // -------------------------------------------------- + // | 110010 | BBBBB | TTTTT | 01011 | IIII | Offset | + // -------------------------------------------------- + // + // Loads one element to maximum of 8 vectors, while incrementing element index + + // FIXME: has a small problem with odd indices + + int element; + int vs = dest; + int ve = dest + 8; + if (ve > 32) + ve = 32; + + element = 7 - (index >> 1); + + if (index & 1) + log(M64MSG_ERROR, "RSP: LTV: index = %d\n", index); + + for (i=vs; i < ve; i++) + { + element = ((8 - (index >> 1) + (i-vs)) << 1); + RSP_SET_VEC_I(info->set, i, (element & 0xf)>>1); + RSP_SET_VEC_I(info->set, i, ((element+1) & 0xf)>>1); + } + break; + } + + case RSP_SBV: + RSP_SET_VEC_I(info->used, dest, index>>1); + break; + case RSP_SSV: + for (i=index; iused, dest, (i>>1)&7); + break; + case RSP_SLV: + for (i=index; iused, dest, (i>>1)&7); + break; + case RSP_SDV: + for (i=index; iused, dest, (i>>1)&7); + break; + case RSP_SQV: + case RSP_SRV: + // WARNING WARNING WARNING + // we assume this instruction always used to store the full vector + // i.e. the address is always 16 bytes aligned + for (i=0; i<8; i++) + RSP_SET_VEC_I(info->used, dest, i); + break; + case RSP_SPV: + case RSP_SUV: + case RSP_SHV: + case RSP_SWV: + for (i=0; i<8; i++) + RSP_SET_VEC_I(info->used, dest, i); + break; + case RSP_SFV: + for (i=(index>>1); i<(index>>1)+4; i++) + RSP_SET_VEC_I(info->used, dest, i); + break; + case RSP_STV: + { + // 31 25 20 15 10 6 0 + // -------------------------------------------------- + // | 111010 | BBBBB | TTTTT | 01011 | IIII | Offset | + // -------------------------------------------------- + // + // Stores one element from maximum of 8 vectors, while incrementing element index + + int element; + int vs = dest; + int ve = dest + 8; + if (ve > 32) + ve = 32; + + element = 8 - (index >> 1); + if (index & 0x1) + log(M64MSG_ERROR, "RSP: STV: index = %d at %08X\n", index, rsp.ppc); + + for (i=vs; i < ve; i++) + { + RSP_SET_VEC_I(info->used, i, element & 0x7); + element++; + } + break; + } + + case RSP_VMULF: + case RSP_VMULU: + case RSP_VMUDL: + case RSP_VMUDM: + case RSP_VMUDN: + case RSP_VMUDH: + { + for (i=0; i < 8; i++) + { + int sel = VEC_EL_2(EL, i); + RSP_SET_VEC_I(info->used, VS1REG, i); + RSP_SET_VEC_I(info->used, VS2REG, sel); + RSP_SET_VEC_I(info->set, VDREG, i); + RSP_SET_ACCU_I(info->set, i, 14); + } + break; + } + case RSP_VMACF: + case RSP_VMACU: + case RSP_VMADL: + case RSP_VMADM: + case RSP_VMADN: + case RSP_VMADH: + { + for (i=0; i < 8; i++) + { + int sel = VEC_EL_2(EL, i); + RSP_SET_VEC_I(info->used, VS1REG, i); + RSP_SET_VEC_I(info->used, VS2REG, sel); + RSP_SET_VEC_I(info->set, VDREG, i); + RSP_SET_ACCU_I(info->used, i, 14); + RSP_SET_ACCU_I(info->set, i, 14); + } + break; + } + case RSP_VADD: + case RSP_VSUB: + { + for (i=0; i < 8; i++) + { + int sel = VEC_EL_2(EL, i); + RSP_SET_VEC_I(info->used, VS1REG, i); + RSP_SET_VEC_I(info->used, VS2REG, sel); + RSP_SET_VEC_I(info->set, VDREG, i); + RSP_SET_ACCU_I(info->set, i, 2); + } + RSP_SET_FLAG_I(info->used, 0); + RSP_SET_FLAG_I(info->set, 0); + break; + } + case RSP_VABS: + { + for (i=0; i < 8; i++) + { + int sel = VEC_EL_2(EL, i); + RSP_SET_VEC_I(info->used, VS1REG, i); + RSP_SET_VEC_I(info->used, VS2REG, sel); + RSP_SET_VEC_I(info->set, VDREG, i); + RSP_SET_ACCU_I(info->set, i, 2); + } + break; + } + case RSP_VADDC: + case RSP_VSUBC: + { + for (i=0; i < 8; i++) + { + int sel = VEC_EL_2(EL, i); + RSP_SET_VEC_I(info->used, VS1REG, i); + RSP_SET_VEC_I(info->used, VS2REG, sel); + RSP_SET_VEC_I(info->set, VDREG, i); + RSP_SET_ACCU_I(info->set, i, 2); + } + RSP_SET_FLAG_I(info->set, 0); + break; + } + case RSP_VSAW: + switch (EL) + { + case 0x08: // VSAWH + { + for (i=0; i < 8; i++) + { + RSP_SET_VEC_I(info->set, VDREG, i); + RSP_SET_ACCU_I(info->used, i, 8); + } + break; + } + case 0x09: // VSAWM + { + for (i=0; i < 8; i++) + { + RSP_SET_VEC_I(info->set, VDREG, i); + RSP_SET_ACCU_I(info->used, i, 4); + } + break; + } + case 0x0a: // VSAWL + { + for (i=0; i < 8; i++) + { + RSP_SET_VEC_I(info->set, VDREG, i); + RSP_SET_ACCU_I(info->used, i, 2); + } + break; + } + default: + log(M64MSG_ERROR, "RSP: VSAW: el = %d\n", EL); + } + break; + case RSP_VLT: + { + for (i=0; i < 8; i++) + { + int sel = VEC_EL_2(EL, i); + RSP_SET_VEC_I(info->used, VS1REG, i); + RSP_SET_VEC_I(info->used, VS2REG, sel); + RSP_SET_VEC_I(info->set, VDREG, i); + RSP_SET_ACCU_I(info->set, i, 2); + } + RSP_SET_FLAG_I(info->set, 0); + RSP_SET_FLAG_I(info->set, 1); + break; + } + case RSP_VEQ: + case RSP_VNE: + case RSP_VGE: + { + for (i=0; i < 8; i++) + { + int sel = VEC_EL_2(EL, i); + RSP_SET_VEC_I(info->used, VS1REG, i); + RSP_SET_VEC_I(info->used, VS2REG, sel); + RSP_SET_VEC_I(info->set, VDREG, i); + RSP_SET_ACCU_I(info->set, i, 2); + } + RSP_SET_FLAG_I(info->used, 0); + RSP_SET_FLAG_I(info->set, 0); + RSP_SET_FLAG_I(info->set, 1); + break; + } + case RSP_VCL: + { + for (i=0; i < 8; i++) + { + int sel = VEC_EL_2(EL, i); + RSP_SET_VEC_I(info->used, VS1REG, i); + RSP_SET_VEC_I(info->used, VS2REG, sel); + RSP_SET_VEC_I(info->set, VDREG, i); + RSP_SET_ACCU_I(info->set, i, 2); + } + RSP_SET_FLAG_I(info->used, 0); + RSP_SET_FLAG_I(info->used, 1); + RSP_SET_FLAG_I(info->set, 0); + RSP_SET_FLAG_I(info->set, 1); + RSP_SET_FLAG_I(info->set, 2); + break; + } + case RSP_VCH: + case RSP_VCR: + { + for (i=0; i < 8; i++) + { + int sel = VEC_EL_2(EL, i); + RSP_SET_VEC_I(info->used, VS1REG, i); + RSP_SET_VEC_I(info->used, VS2REG, sel); + RSP_SET_VEC_I(info->set, VDREG, i); + RSP_SET_ACCU_I(info->set, i, 2); + } + RSP_SET_FLAG_I(info->set, 0); + RSP_SET_FLAG_I(info->set, 1); + RSP_SET_FLAG_I(info->set, 2); + break; + } + case RSP_VMRG: + { + for (i=0; i < 8; i++) + { + int sel = VEC_EL_2(EL, i); + RSP_SET_VEC_I(info->used, VS1REG, i); + RSP_SET_VEC_I(info->used, VS2REG, sel); + RSP_SET_VEC_I(info->set, VDREG, i); + RSP_SET_ACCU_I(info->set, i, 2); + } + RSP_SET_FLAG_I(info->used, 1); + break; + } + case RSP_VAND: + case RSP_VNAND: + case RSP_VOR: + case RSP_VNOR: + case RSP_VXOR: + case RSP_VNXOR: + { + for (i=0; i < 8; i++) + { + int sel = VEC_EL_2(EL, i); + RSP_SET_VEC_I(info->used, VS1REG, i); + RSP_SET_VEC_I(info->used, VS2REG, sel); + RSP_SET_VEC_I(info->set, VDREG, i); + RSP_SET_ACCU_I(info->set, i, 2); + } + break; + } + case RSP_VRCP: + case RSP_VRCPL: + case RSP_VRCPH: + case RSP_VRSQL: + case RSP_VRSQH: + { + int del = (VS1REG & 7); + int sel = VEC_EL_2(EL, del); + + RSP_SET_VEC_I(info->used, VS2REG, sel); + + for (i=0; i < 8; i++) + { + int element = VEC_EL_2(EL, i); + RSP_SET_VEC_I(info->used, VS2REG, element); + RSP_SET_ACCU_I(info->set, i, 2); + } + + RSP_SET_VEC_I(info->set, VDREG, del); + break; + } + case RSP_VMOV: + { + int element = VS1REG & 7; + RSP_SET_VEC_I(info->used, VS2REG, VEC_EL_2(EL, 7-element)); + RSP_SET_VEC_I(info->set, VDREG, element); + break; + } + + default: + { + // char string[200]; + // rsp_dasm_one(string, 0x800, op); + // if (strcmp(string, "???")) { + // printf("%s\n", string); + // printf("unimplemented opcode\n"); + // } + break; + } + } +} diff --git a/source/mupen64plus-rsp-z64/src/rsp_opinfo.h b/source/mupen64plus-rsp-z64/src/rsp_opinfo.h new file mode 100644 index 0000000..ac6f75a --- /dev/null +++ b/source/mupen64plus-rsp-z64/src/rsp_opinfo.h @@ -0,0 +1,200 @@ +/* + * z64 + * + * Copyright (C) 2007 ziggy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * +**/ + +#include "rsp.h" + +struct rsp_regmask_t { + //UINT32 r; + UINT8 v[32]; + UINT32 accu; + UINT8 flag; +}; + +#define RSP_GET_REG_I(i, R) ( (i).r & (1<<(R)) ) +#define RSP_SET_REG_I(i, R) (i).r |= (1<<(R)) +#define RSP_CLEAR_REG_I(i, R) (i).r &= ~(1<<(R)) + +#define RSP_GET_VEC_I(i, R, I) ( (i).v[R] & (1<<(I)) ) +#define RSP_SET_VEC_I(i, R, I) (i).v[R] |= (1<<(I)) +#define RSP_CLEAR_VEC_I(i, R, I) (i).v[R] &= ~(1<<(I)) + +#define RSP_GET_ACCU_I(i, I, P) ( (i).accu & ((P)<<(I)*4) ) +#define RSP_SET_ACCU_I(i, I, P) (i).accu |= ((P)<<(I)*4) +#define RSP_CLEAR_ACCU_I(i, I, P) (i).accu &= ~((P)<<(I)*4) + +#define RSP_GET_FLAG_I(i, R) ( (i).flag & (1<<(R)) ) +#define RSP_SET_FLAG_I(i, R) (i).flag |= (1<<(R)) +#define RSP_CLEAR_FLAG_I(i, R) (i).flag &= ~(1<<(R)) + +#define RSP_OPINFO_JUMP 1 +#define RSP_OPINFO_BREAK 2 +#define RSP_OPINFO_COND 4 +#define RSP_OPINFO_LINK 8 +#define RSP_OPINFO_USEPC 16 +struct rsp_opinfo_t { + UINT32 op; // original opcode + int op2; // simplified opcode + rsp_regmask_t used; + rsp_regmask_t set; + int flags; +}; + +void rsp_get_opinfo(UINT32 op, rsp_opinfo_t * info); + +#define RSP_BASIC_OFFS 0x000 +#define RSP_SPECIAL_OFFS 0x040 +#define RSP_LWC2_OFFS 0x0a0 +#define RSP_SWC2_OFFS 0x0c0 +#define RSP_COP2_1_OFFS 0x080 +#define RSP_COP2_2_OFFS 0x100 +#define RSP_CONTROL_OFFS 0x140 + + +#define RSP_STOP (RSP_CONTROL_OFFS + 0x00) +#define RSP_LOOP (RSP_CONTROL_OFFS + 0x01) +#define RSP_JUMP (RSP_CONTROL_OFFS + 0x02) +#define RSP_CONDJUMP (RSP_CONTROL_OFFS + 0x03) +#define RSP_JUMPLOCAL (RSP_CONTROL_OFFS + 0x04) +#define RSP_CONDJUMPLOCAL (RSP_CONTROL_OFFS + 0x05) + + +#define RSP_SPECIAL (RSP_BASIC_OFFS + 0x00) +#define RSP_REGIMM (RSP_BASIC_OFFS + 0x01) +#define RSP_J (RSP_BASIC_OFFS + 0x02) +#define RSP_JAL (RSP_BASIC_OFFS + 0x03) +#define RSP_BEQ (RSP_BASIC_OFFS + 0x04) +#define RSP_BNE (RSP_BASIC_OFFS + 0x05) +#define RSP_BLEZ (RSP_BASIC_OFFS + 0x06) +#define RSP_BGTZ (RSP_BASIC_OFFS + 0x07) +#define RSP_ADDI (RSP_BASIC_OFFS + 0x08) +#define RSP_ADDIU (RSP_BASIC_OFFS + 0x09) +#define RSP_SLTI (RSP_BASIC_OFFS + 0x0a) +#define RSP_SLTIU (RSP_BASIC_OFFS + 0x0b) +#define RSP_ANDI (RSP_BASIC_OFFS + 0x0c) +#define RSP_ORI (RSP_BASIC_OFFS + 0x0d) +#define RSP_XORI (RSP_BASIC_OFFS + 0x0e) +#define RSP_LUI (RSP_BASIC_OFFS + 0x0f) +#define RSP_COP0 (RSP_BASIC_OFFS + 0x10) +#define RSP_COP2 (RSP_BASIC_OFFS + 0x12) +#define RSP_LB (RSP_BASIC_OFFS + 0x20) +#define RSP_LH (RSP_BASIC_OFFS + 0x21) +#define RSP_LW (RSP_BASIC_OFFS + 0x23) +#define RSP_LBU (RSP_BASIC_OFFS + 0x24) +#define RSP_LHU (RSP_BASIC_OFFS + 0x25) +#define RSP_SB (RSP_BASIC_OFFS + 0x28) +#define RSP_SH (RSP_BASIC_OFFS + 0x29) +#define RSP_SW (RSP_BASIC_OFFS + 0x2b) +#define RSP_LWC2 (RSP_BASIC_OFFS + 0x32) +#define RSP_SWC2 (RSP_BASIC_OFFS + 0x3a) +#define RSP_BLTZ (RSP_BASIC_OFFS + 0x3b) +#define RSP_BGEZ (RSP_BASIC_OFFS + 0x3c) +#define RSP_BGEZAL (RSP_BASIC_OFFS + 0x3d) + +#define RSP_SLL (RSP_SPECIAL_OFFS + 0x00) +#define RSP_SRL (RSP_SPECIAL_OFFS + 0x02) +#define RSP_SRA (RSP_SPECIAL_OFFS + 0x03) +#define RSP_SLLV (RSP_SPECIAL_OFFS + 0x04) +#define RSP_SRLV (RSP_SPECIAL_OFFS + 0x06) +#define RSP_SRAV (RSP_SPECIAL_OFFS + 0x07) +#define RSP_JR (RSP_SPECIAL_OFFS + 0x08) +#define RSP_JALR (RSP_SPECIAL_OFFS + 0x09) +#define RSP_BREAK (RSP_SPECIAL_OFFS + 0x0d) +#define RSP_ADD (RSP_SPECIAL_OFFS + 0x20) +#define RSP_ADDU (RSP_SPECIAL_OFFS + 0x21) +#define RSP_SUB (RSP_SPECIAL_OFFS + 0x22) +#define RSP_SUBU (RSP_SPECIAL_OFFS + 0x23) +#define RSP_AND (RSP_SPECIAL_OFFS + 0x24) +#define RSP_OR (RSP_SPECIAL_OFFS + 0x25) +#define RSP_XOR (RSP_SPECIAL_OFFS + 0x26) +#define RSP_NOR (RSP_SPECIAL_OFFS + 0x27) +#define RSP_SLT (RSP_SPECIAL_OFFS + 0x2a) +#define RSP_SLTU (RSP_SPECIAL_OFFS + 0x2b) + +#define RSP_MFC2 (RSP_COP2_1_OFFS + 0x00) +#define RSP_CFC2 (RSP_COP2_1_OFFS + 0x02) +#define RSP_MTC2 (RSP_COP2_1_OFFS + 0x04) +#define RSP_CTC2 (RSP_COP2_1_OFFS + 0x06) + + +#define RSP_LBV (RSP_LWC2_OFFS + 0x00) +#define RSP_LSV (RSP_LWC2_OFFS + 0x01) +#define RSP_LLV (RSP_LWC2_OFFS + 0x02) +#define RSP_LDV (RSP_LWC2_OFFS + 0x03) +#define RSP_LQV (RSP_LWC2_OFFS + 0x04) +#define RSP_LRV (RSP_LWC2_OFFS + 0x05) +#define RSP_LPV (RSP_LWC2_OFFS + 0x06) +#define RSP_LUV (RSP_LWC2_OFFS + 0x07) +#define RSP_LHV (RSP_LWC2_OFFS + 0x08) +#define RSP_LFV (RSP_LWC2_OFFS + 0x09) +#define RSP_LWV (RSP_LWC2_OFFS + 0x0a) +#define RSP_LTV (RSP_LWC2_OFFS + 0x0b) + +#define RSP_SBV (RSP_SWC2_OFFS + 0x00) +#define RSP_SSV (RSP_SWC2_OFFS + 0x01) +#define RSP_SLV (RSP_SWC2_OFFS + 0x02) +#define RSP_SDV (RSP_SWC2_OFFS + 0x03) +#define RSP_SQV (RSP_SWC2_OFFS + 0x04) +#define RSP_SRV (RSP_SWC2_OFFS + 0x05) +#define RSP_SPV (RSP_SWC2_OFFS + 0x06) +#define RSP_SUV (RSP_SWC2_OFFS + 0x07) +#define RSP_SHV (RSP_SWC2_OFFS + 0x08) +#define RSP_SFV (RSP_SWC2_OFFS + 0x09) +#define RSP_SWV (RSP_SWC2_OFFS + 0x0a) +#define RSP_STV (RSP_SWC2_OFFS + 0x0b) + +#define RSP_VMULF (RSP_COP2_2_OFFS + 0x00) +#define RSP_VMULU (RSP_COP2_2_OFFS + 0x01) +#define RSP_VMUDL (RSP_COP2_2_OFFS + 0x04) +#define RSP_VMUDM (RSP_COP2_2_OFFS + 0x05) +#define RSP_VMUDN (RSP_COP2_2_OFFS + 0x06) +#define RSP_VMUDH (RSP_COP2_2_OFFS + 0x07) +#define RSP_VMACF (RSP_COP2_2_OFFS + 0x08) +#define RSP_VMACU (RSP_COP2_2_OFFS + 0x09) +#define RSP_VMADL (RSP_COP2_2_OFFS + 0x0c) +#define RSP_VMADM (RSP_COP2_2_OFFS + 0x0d) +#define RSP_VMADN (RSP_COP2_2_OFFS + 0x0e) +#define RSP_VMADH (RSP_COP2_2_OFFS + 0x0f) +#define RSP_VADD (RSP_COP2_2_OFFS + 0x10) +#define RSP_VSUB (RSP_COP2_2_OFFS + 0x11) +#define RSP_VABS (RSP_COP2_2_OFFS + 0x13) +#define RSP_VADDC (RSP_COP2_2_OFFS + 0x14) +#define RSP_VSUBC (RSP_COP2_2_OFFS + 0x15) +#define RSP_VSAW (RSP_COP2_2_OFFS + 0x1d) +#define RSP_VLT (RSP_COP2_2_OFFS + 0x20) +#define RSP_VEQ (RSP_COP2_2_OFFS + 0x21) +#define RSP_VNE (RSP_COP2_2_OFFS + 0x22) +#define RSP_VGE (RSP_COP2_2_OFFS + 0x23) +#define RSP_VCL (RSP_COP2_2_OFFS + 0x24) +#define RSP_VCH (RSP_COP2_2_OFFS + 0x25) +#define RSP_VCR (RSP_COP2_2_OFFS + 0x26) +#define RSP_VMRG (RSP_COP2_2_OFFS + 0x27) +#define RSP_VAND (RSP_COP2_2_OFFS + 0x28) +#define RSP_VNAND (RSP_COP2_2_OFFS + 0x29) +#define RSP_VOR (RSP_COP2_2_OFFS + 0x2a) +#define RSP_VNOR (RSP_COP2_2_OFFS + 0x2b) +#define RSP_VXOR (RSP_COP2_2_OFFS + 0x2c) +#define RSP_VNXOR (RSP_COP2_2_OFFS + 0x2d) +#define RSP_VRCP (RSP_COP2_2_OFFS + 0x30) +#define RSP_VRCPL (RSP_COP2_2_OFFS + 0x31) +#define RSP_VRCPH (RSP_COP2_2_OFFS + 0x32) +#define RSP_VMOV (RSP_COP2_2_OFFS + 0x33) +#define RSP_VRSQL (RSP_COP2_2_OFFS + 0x35) +#define RSP_VRSQH (RSP_COP2_2_OFFS + 0x36) diff --git a/source/mupen64plus-rsp-z64/src/rsp_recomp.cpp b/source/mupen64plus-rsp-z64/src/rsp_recomp.cpp new file mode 100644 index 0000000..f771955 --- /dev/null +++ b/source/mupen64plus-rsp-z64/src/rsp_recomp.cpp @@ -0,0 +1,574 @@ +/* + * z64 + * + * Copyright (C) 2007 ziggy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * +**/ + +#include "rsp_recomp.h" +#include + +#define GENDEBUG + +struct gen_t { + UINT32 crc; + int lbc; + rsp_bc_t * bc; +#ifdef GENDEBUG + char name[32]; +#endif +}; + +struct opinfo_t { + int visit, labeled; + int label; + + unsigned int nbgen; + unsigned int szgen; + gen_t * gentable; + gen_t * curgen; +}; + +struct branch_t { + int start, end; +}; + +static int curvisit; +static opinfo_t opinfo[0x1000/4]; +static int jumps[0x1000]; +static unsigned int nb_branches; +static branch_t branches[256]; +static unsigned int nb_labels; +static int labels[256]; + +#define OPI(pc) opinfo[(pc)>>2] +/*inline*/ void SETLABEL(int pc) { + //printf("%x\n", pc); + //pc &= 0xfff; + assert(pc >= 0 && pc < 0x1000); + if (OPI(pc).labeled != curvisit) { + labels[nb_labels] = pc; + OPI(pc).label = nb_labels++; + assert(nb_labels < sizeof(labels)/sizeof(labels[0])); + OPI(pc).labeled = curvisit; + } +} + +#define ABS(addr) (((addr) << 2) & 0xfff) +#define REL(offset) ((pc + ((offset) << 2)) & 0xfff) + +static UINT32 prep_gen(int pc, UINT32 crc, int & len) +{ + UINT32 op; + int br = 0; + + branches[nb_branches].start = pc; + + while ( !br ) + { + if (OPI(pc).visit == curvisit) { + SETLABEL((pc)&0xfff); + SETLABEL((pc+4)&0xfff); + break; + } + + OPI(pc).visit = curvisit; + + op = ROPCODE(pc); + crc = ((crc<<1)|(crc>>31))^op^pc; + pc = (pc+4)&0xfff; + len++; + + switch (op >> 26) + { + case 0x00: /* SPECIAL */ + { + switch (op & 0x3f) + { + case 0x08: /* JR */ + br = 1; + break; + case 0x09: /* JALR */ + //br = 1; + break; + case 0x0d: /* BREAK */ + br = 1; + break; + } + break; + } + + case 0x01: /* REGIMM */ + { + switch (RTREG) + { + case 0x00: /* BLTZ */ + case 0x01: /* BGEZ */ + SETLABEL(REL(SIMM16)); + break; + case 0x11: /* BGEZAL */ + //br = 1; + break; + } + break; + } + + case 0x02: /* J */ + SETLABEL(ABS(UIMM26)); + br = 1; + break; + case 0x04: /* BEQ */ + case 0x05: /* BNE */ + case 0x06: /* BLEZ */ + case 0x07: /* BGTZ */ + SETLABEL(REL(SIMM16)); + break; + case 0x03: /* JAL */ + //SETLABEL(ABS(UIMM26)); + //br = 1; + break; + } + + } + + branches[nb_branches++].end = pc; + assert(nb_branches < sizeof(branches)/sizeof(branches[0])); + + return crc; +} + +static void rsp_gen(int pc) +{ + unsigned int i; + + curvisit++; + if (!curvisit) { + // we looped, reset all visit counters + for (i=0; i<0x1000/4; i++) { + opinfo[i].visit = 0; + opinfo[i].labeled = 0; + } + curvisit++; + } + + nb_branches = 0; + nb_labels = 0; + + int len = 0; + UINT32 crc = prep_gen(pc, 0, len); + + for (i=0; igentable) { + for (i=0; inbgen; i++) + if (opi->gentable[i].crc == crc) { + opi->curgen = opi->gentable + i; + return; + } + } + if (opi->nbgen >= opi->szgen) { + if (opi->szgen) + opi->szgen *= 2; + else + opi->szgen = 4; + opi->gentable = (gen_t *) realloc(opi->gentable, sizeof(gen_t)*(opi->szgen)); + } + gen_t * gen; + gen = opi->gentable + opi->nbgen++; + gen->crc = crc; + opi->curgen = gen; + + // convert to bytecode + unsigned int lbc = 0; + static rsp_bc_t bc[0x1000*2+10]; + for (i=0; i>3)&0xffc; + // char s[128]; + // rsp_dasm_one(s, realpc, bc[i].op); + // printf("%3x\t%s\n", realpc, s); + // } + switch (bc[i].op2) { + case RSP_JUMPLOCAL: + case RSP_CONDJUMPLOCAL: + case RSP_LOOP: + { + // int pc; + // for (pc = 0; pc>5)<<2) == bc[i].flags) + // break; + // assert(pc < lbc); + // bc[i].flags = pc<<5; + bc[i].flags = jumps[bc[i].flags]<<5; + break; + } + } + } + + gen->lbc = lbc; + gen->bc = (rsp_bc_t *) malloc(sizeof(rsp_bc_t)*lbc); + memcpy(gen->bc, bc, sizeof(rsp_bc_t)*lbc); +} + +void rsp_invalidate(int begin, int len) +{ + //printf("invalidate %x %x\n", begin, len); + begin = 0; len = 0x1000; + assert(begin+len<=0x1000); + while (len > 0) { + OPI(begin).curgen = 0; + begin += 4; + len -= 4; + } + rsp.inval_gen = 1; +} + +inline void rsp_execute_one(RSP_REGS & rsp, const UINT32 op) +{ + switch (op >> 26) + { + case 0x12: /* COP2 */ + { + handle_vector_ops(op); + break; + } + + case 0x32: /* LWC2 */ handle_lwc2(op); break; + case 0x3a: /* SWC2 */ handle_swc2(op); break; + + default: + { + unimplemented_opcode(op); + break; + } + } +} + +static int cond; +static int run(RSP_REGS & rsp, gen_t * gen) +{ + int pc = 0; + + cond = 0; + for ( ; ; ) { + const rsp_bc_t & bc = gen->bc[pc]; + const UINT32 op = bc.op; + const int op2 = bc.op2; + + // if (op2 < RSP_CONTROL_OFFS) { + // int realpc = (bc.flags>>3)&0xffc; + // char s[128]; + // rsp_dasm_one(s, realpc, op); + // fprintf(stderr, "%3x\t%s\n", realpc, s); + // } + + pc++; + switch (op2) { + case RSP_LOOP: + pc = bc.flags>>5; + break; + case RSP_JUMPLOCAL: + case RSP_CONDJUMPLOCAL: + if (cond) { + pc = bc.flags>>5; + cond = 0; + } + break; + case RSP_JUMP: + case RSP_CONDJUMP: + if (cond) { + return 0; + } + break; + + #define _LINK(l) rsp.r[l] = ((bc.flags >>3)+8)&0xffc + #define _JUMP_PC(a) { cond=1; rsp.nextpc = ((a) & 0xfff); } + #define _JUMP_PC_L(a, l) { _LINK(l); _JUMP_PC(a); } + #define _JUMP_REL(a) _JUMP_PC(((bc.flags >>3)+4+(a<<2))&0xffc) + #define _JUMP_REL_L(a, l) _JUMP_PC_L(((bc.flags >>3)+4+(a<<2))&0xffc, l) + + case RSP_SLL: if (RDREG) RDVAL = (UINT32)RTVAL << SHIFT; break; + case RSP_SRL: if (RDREG) RDVAL = (UINT32)RTVAL >> SHIFT; break; + case RSP_SRA: if (RDREG) RDVAL = (INT32)RTVAL >> SHIFT; break; + case RSP_SLLV: if (RDREG) RDVAL = (UINT32)RTVAL << (RSVAL & 0x1f); break; + case RSP_SRLV: if (RDREG) RDVAL = (UINT32)RTVAL >> (RSVAL & 0x1f); break; + case RSP_SRAV: if (RDREG) RDVAL = (INT32)RTVAL >> (RSVAL & 0x1f); break; + case RSP_JR: _JUMP_PC(RSVAL); break; + case RSP_JALR: _JUMP_PC_L(RSVAL, RDREG); break; + case RSP_BREAK: + { + *z64_rspinfo.SP_STATUS_REG |= (SP_STATUS_HALT | SP_STATUS_BROKE ); + if ((*z64_rspinfo.SP_STATUS_REG & SP_STATUS_INTR_BREAK) != 0 ) { + *z64_rspinfo.MI_INTR_REG |= 1; + z64_rspinfo.CheckInterrupts(); + } + return 1; + } + case RSP_ADD: if (RDREG) RDVAL = (INT32)(RSVAL + RTVAL); break; + case RSP_ADDU: if (RDREG) RDVAL = (INT32)(RSVAL + RTVAL); break; + case RSP_SUB: if (RDREG) RDVAL = (INT32)(RSVAL - RTVAL); break; + case RSP_SUBU: if (RDREG) RDVAL = (INT32)(RSVAL - RTVAL); break; + case RSP_AND: if (RDREG) RDVAL = RSVAL & RTVAL; break; + case RSP_OR: if (RDREG) RDVAL = RSVAL | RTVAL; break; + case RSP_XOR: if (RDREG) RDVAL = RSVAL ^ RTVAL; break; + case RSP_NOR: if (RDREG) RDVAL = ~(RSVAL | RTVAL); break; + case RSP_SLT: if (RDREG) RDVAL = (INT32)RSVAL < (INT32)RTVAL; break; + case RSP_SLTU: if (RDREG) RDVAL = (UINT32)RSVAL < (UINT32)RTVAL; break; + case RSP_BLTZ: if ((INT32)(RSVAL) < 0) cond = 1; break; + case RSP_BGEZ: if ((INT32)(RSVAL) >= 0) cond = 1; break; + case RSP_BGEZAL: _LINK(31); if ((INT32)(RSVAL) >= 0) _JUMP_REL(SIMM16); break; + case RSP_J: cond = 1; break; + case RSP_JAL: _JUMP_PC_L(UIMM26<<2, 31); break; + case RSP_BEQ: if (RSVAL == RTVAL) cond = 1; break; + case RSP_BNE: if (RSVAL != RTVAL) cond = 1; break; + case RSP_BLEZ: if ((INT32)RSVAL <= 0) cond = 1; break; + case RSP_BGTZ: if ((INT32)RSVAL > 0) cond = 1; break; + case RSP_ADDI: if (RTREG) RTVAL = (INT32)(RSVAL + SIMM16); break; + case RSP_ADDIU: if (RTREG) RTVAL = (INT32)(RSVAL + SIMM16); break; + case RSP_SLTI: if (RTREG) RTVAL = (INT32)(RSVAL) < ((INT32)SIMM16); break; + case RSP_SLTIU: if (RTREG) RTVAL = (UINT32)(RSVAL) < (UINT32)((INT32)SIMM16); break; + case RSP_ANDI: if (RTREG) RTVAL = RSVAL & UIMM16; break; + case RSP_ORI: if (RTREG) RTVAL = RSVAL | UIMM16; break; + case RSP_XORI: if (RTREG) RTVAL = RSVAL ^ UIMM16; break; + case RSP_LUI: if (RTREG) RTVAL = UIMM16 << 16; break; + + case RSP_COP0: + { + switch ((op >> 21) & 0x1f) + { + case 0x00: /* MFC0 */ + if (RTREG) + RTVAL = get_cop0_reg(RDREG); + break; + case 0x04: /* MTC0 */ + set_cop0_reg(RDREG, RTVAL); + if (rsp.inval_gen) { + rsp.inval_gen = 0; + sp_pc = ((bc.flags >>3) + 4)&0xffc; + return 2; + } + break; + default: + log(M64MSG_WARNING, "unimplemented cop0 %x (%x)\n", (op >> 21) & 0x1f, op); + break; + } + break; + } + + case RSP_MFC2: + { + // 31 25 20 15 10 6 0 + // --------------------------------------------------- + // | 010010 | 00000 | TTTTT | DDDDD | IIII | 0000000 | + // --------------------------------------------------- + // + + int el = (op >> 7) & 0xf; + UINT16 b1 = VREG_B(VS1REG, (el+0) & 0xf); + UINT16 b2 = VREG_B(VS1REG, (el+1) & 0xf); + if (RTREG) RTVAL = (INT32)(INT16)((b1 << 8) | (b2)); + break; + } + case RSP_CFC2: + { + // 31 25 20 15 10 0 + // ------------------------------------------------ + // | 010010 | 00010 | TTTTT | DDDDD | 00000000000 | + // ------------------------------------------------ + // + + // VP to sign extend or to not sign extend ? + //if (RTREG) RTVAL = (INT16)rsp.flag[RDREG]; + if (RTREG) RTVAL = rsp.flag[RDREG]; + break; + } + case RSP_MTC2: + { + // 31 25 20 15 10 6 0 + // --------------------------------------------------- + // | 010010 | 00100 | TTTTT | DDDDD | IIII | 0000000 | + // --------------------------------------------------- + // + + int el = (op >> 7) & 0xf; + VREG_B(VS1REG, (el+0) & 0xf) = (RTVAL >> 8) & 0xff; + VREG_B(VS1REG, (el+1) & 0xf) = (RTVAL >> 0) & 0xff; + break; + } + case RSP_CTC2: + { + // 31 25 20 15 10 0 + // ------------------------------------------------ + // | 010010 | 00110 | TTTTT | DDDDD | 00000000000 | + // ------------------------------------------------ + // + + rsp.flag[RDREG] = RTVAL & 0xffff; + break; + } + case RSP_LB: if (RTREG) RTVAL = (INT32)(INT8)READ8(RSVAL + SIMM16); break; + case RSP_LH: if (RTREG) RTVAL = (INT32)(INT16)READ16(RSVAL + SIMM16); break; + case RSP_LW: if (RTREG) RTVAL = READ32(RSVAL + SIMM16); break; + case RSP_LBU: if (RTREG) RTVAL = (UINT8)READ8(RSVAL + SIMM16); break; + case RSP_LHU: if (RTREG) RTVAL = (UINT16)READ16(RSVAL + SIMM16); break; + case RSP_SB: WRITE8(RSVAL + SIMM16, RTVAL); break; + case RSP_SH: WRITE16(RSVAL + SIMM16, RTVAL); break; + case RSP_SW: WRITE32(RSVAL + SIMM16, RTVAL); break; + + default: + switch (op >> 26) + { + case 0x12: /* COP2 */ + handle_vector_ops(op); + break; + case 0x32: /* LWC2 */ + handle_lwc2(op); + break; + case 0x3a: /* SWC2 */ + handle_swc2(op); + break; + } + } + } +} + +int rsp_gen_cache_hit; +int rsp_gen_cache_miss; +int rsp_jump(int pc) +{ + pc &= 0xfff; + sp_pc = pc; + rsp.nextpc = ~0; + opinfo_t * opi = &OPI(pc); + gen_t * gen = opi->curgen; + rsp_gen_cache_hit++; + if (!gen) { + rsp_gen_cache_miss++; + rsp_gen(pc); + } + gen = opi->curgen; + //fprintf(stderr, "rsp_jump %x (%s)\n", pc, gen->name); + + int res = run(rsp, gen); + + //fprintf(stderr, "r31 %x from %x nextpc %x pc %x res %d (%s)\n", rsp.r[31], pc, rsp.nextpc, sp_pc, res, gen->name); + if (rsp.nextpc != ~0U) + { + sp_pc = (rsp.nextpc & 0xfff); + rsp.nextpc = ~0U; + } + else + { + //sp_pc = ((sp_pc+4)&0xfff); + } + return res; +} diff --git a/source/mupen64plus-rsp-z64/src/rsp_recomp.h b/source/mupen64plus-rsp-z64/src/rsp_recomp.h new file mode 100644 index 0000000..1e8d0a3 --- /dev/null +++ b/source/mupen64plus-rsp-z64/src/rsp_recomp.h @@ -0,0 +1,29 @@ +/* + * z64 + * + * Copyright (C) 2007 ziggy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * +**/ + +#include "rsp_opinfo.h" + +struct rsp_bc_t { + UINT32 op; // original opcode + short op2; // simplified opcode + short flags; + rsp_regmask_t need; +}; diff --git a/source/mupen64plus-rsp-z64/src/z64.h b/source/mupen64plus-rsp-z64/src/z64.h new file mode 100644 index 0000000..0a57679 --- /dev/null +++ b/source/mupen64plus-rsp-z64/src/z64.h @@ -0,0 +1,90 @@ +/* + * z64 + * + * Copyright (C) 2007 ziggy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * +**/ + +#ifndef _Z64_H_ +#define _Z64_H_ +#include + +#define DACRATE_NTSC (48681812) +#define DACRATE_PAL (49656530) +#define DACRATE_MPAL (48628316) + +#define SP_INTERRUPT 0x1 +#define SI_INTERRUPT 0x2 +#define AI_INTERRUPT 0x4 +#define VI_INTERRUPT 0x8 +#define PI_INTERRUPT 0x10 +#define DP_INTERRUPT 0x20 + +#define SP_STATUS_HALT 0x0001 +#define SP_STATUS_BROKE 0x0002 +#define SP_STATUS_DMABUSY 0x0004 +#define SP_STATUS_DMAFULL 0x0008 +#define SP_STATUS_IOFULL 0x0010 +#define SP_STATUS_SSTEP 0x0020 +#define SP_STATUS_INTR_BREAK 0x0040 +#define SP_STATUS_SIGNAL0 0x0080 +#define SP_STATUS_SIGNAL1 0x0100 +#define SP_STATUS_SIGNAL2 0x0200 +#define SP_STATUS_SIGNAL3 0x0400 +#define SP_STATUS_SIGNAL4 0x0800 +#define SP_STATUS_SIGNAL5 0x1000 +#define SP_STATUS_SIGNAL6 0x2000 +#define SP_STATUS_SIGNAL7 0x4000 + +#define DP_STATUS_XBUS_DMA 0x01 +#define DP_STATUS_FREEZE 0x02 +#define DP_STATUS_FLUSH 0x04 +#define DP_STATUS_START_GCLK 0x008 +#define DP_STATUS_TMEM_BUSY 0x010 +#define DP_STATUS_PIPE_BUSY 0x020 +#define DP_STATUS_CMD_BUSY 0x040 +#define DP_STATUS_CBUF_READY 0x080 +#define DP_STATUS_DMA_BUSY 0x100 +#define DP_STATUS_END_VALID 0x200 +#define DP_STATUS_START_VALID 0x400 + +#define R4300i_SP_Intr 1 + + +#define LSB_FIRST 1 // TODO : check for platform +#ifdef LSB_FIRST + #define BYTE_ADDR_XOR 3 + #define WORD_ADDR_XOR 1 + #define BYTE4_XOR_BE(a) ((a) ^ 3) /* read/write a byte to a 32-bit space */ +#else + #define BYTE_ADDR_XOR 0 + #define WORD_ADDR_XOR 0 + #define BYTE4_XOR_BE(a) (a) +#endif + + +typedef uint64_t UINT64; +typedef int64_t INT64; +typedef uint32_t UINT32; +typedef int32_t INT32; +typedef uint16_t UINT16; +typedef int16_t INT16; +typedef uint8_t UINT8; +typedef int8_t INT8; +typedef unsigned int offs_t; +#endif + -- 2.39.2