From d9e74a6f3c9f96acfb0d64f4bda43f9b9c14984e Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Tue, 24 Sep 2013 21:39:44 +0200 Subject: [PATCH] RSP HLE plugin. Compile and run on the OpenPandora --- source/mupen64plus-rsp-hle/INSTALL | 26 + source/mupen64plus-rsp-hle/LICENSES | 369 +++++++ source/mupen64plus-rsp-hle/RELEASE | 44 + .../msvc11/mupen64plus-rsp-hle.vcxproj | 114 +++ .../projects/msvc8/mupen64plus-rsp-hle.vcproj | 243 +++++ .../projects/unix/Makefile | 314 ++++++ source/mupen64plus-rsp-hle/src/alist.c | 82 ++ source/mupen64plus-rsp-hle/src/alist.h | 33 + .../mupen64plus-rsp-hle/src/alist_internal.h | 50 + source/mupen64plus-rsp-hle/src/cicx105.c | 55 + source/mupen64plus-rsp-hle/src/cicx105.h | 28 + source/mupen64plus-rsp-hle/src/hle.h | 89 ++ source/mupen64plus-rsp-hle/src/jpeg.c | 683 +++++++++++++ source/mupen64plus-rsp-hle/src/jpeg.h | 30 + source/mupen64plus-rsp-hle/src/main.c | 476 +++++++++ .../src/rsp_api_export.ver | 8 + source/mupen64plus-rsp-hle/src/ucode1.cpp | 951 ++++++++++++++++++ source/mupen64plus-rsp-hle/src/ucode2.cpp | 930 +++++++++++++++++ source/mupen64plus-rsp-hle/src/ucode3.cpp | 834 +++++++++++++++ source/mupen64plus-rsp-hle/src/ucode3mp3.cpp | 604 +++++++++++ 20 files changed, 5963 insertions(+) create mode 100644 source/mupen64plus-rsp-hle/INSTALL create mode 100644 source/mupen64plus-rsp-hle/LICENSES create mode 100644 source/mupen64plus-rsp-hle/RELEASE create mode 100644 source/mupen64plus-rsp-hle/projects/msvc11/mupen64plus-rsp-hle.vcxproj create mode 100644 source/mupen64plus-rsp-hle/projects/msvc8/mupen64plus-rsp-hle.vcproj create mode 100755 source/mupen64plus-rsp-hle/projects/unix/Makefile create mode 100644 source/mupen64plus-rsp-hle/src/alist.c create mode 100644 source/mupen64plus-rsp-hle/src/alist.h create mode 100644 source/mupen64plus-rsp-hle/src/alist_internal.h create mode 100644 source/mupen64plus-rsp-hle/src/cicx105.c create mode 100644 source/mupen64plus-rsp-hle/src/cicx105.h create mode 100644 source/mupen64plus-rsp-hle/src/hle.h create mode 100755 source/mupen64plus-rsp-hle/src/jpeg.c create mode 100644 source/mupen64plus-rsp-hle/src/jpeg.h create mode 100644 source/mupen64plus-rsp-hle/src/main.c create mode 100644 source/mupen64plus-rsp-hle/src/rsp_api_export.ver create mode 100644 source/mupen64plus-rsp-hle/src/ucode1.cpp create mode 100644 source/mupen64plus-rsp-hle/src/ucode2.cpp create mode 100644 source/mupen64plus-rsp-hle/src/ucode3.cpp create mode 100644 source/mupen64plus-rsp-hle/src/ucode3mp3.cpp diff --git a/source/mupen64plus-rsp-hle/INSTALL b/source/mupen64plus-rsp-hle/INSTALL new file mode 100644 index 0000000..746f9bd --- /dev/null +++ b/source/mupen64plus-rsp-hle/INSTALL @@ -0,0 +1,26 @@ +Mupen64Plus-RSP-HLE INSTALL +--------------------------- + +This text file was written to explain the installation process of the +Mupen64Plus-RSP-HLE plugin. + +If this module is part of a Mupen64Plus source code bundle, the user should run +the "m64p_install.sh" script in the root of the unzipped bundle to install all +of the included modules in the bundle. + +If this module is a standalone source code release, you should build the library +from source code and install it via the makefile, like this: + +$ cd projects/unix +$ make all +$ sudo make install + +If you want to build the Mupen64Plus-RSP-HLE module for installation in a home +folder for a single user, you may build it like this (replacing +with your desired local installation path): + +$ cd projects/unix +$ make all +$ make install LIBDIR= + + diff --git a/source/mupen64plus-rsp-hle/LICENSES b/source/mupen64plus-rsp-hle/LICENSES new file mode 100644 index 0000000..e1403bd --- /dev/null +++ b/source/mupen64plus-rsp-hle/LICENSES @@ -0,0 +1,369 @@ +Mupen64Plus-rsp-hle LICENSE +--------------------------- + +Mupen64Plus-rsp-hle is licensed under the GNU General Public License version 2. + +The authors of Mupen64Plus-rsp-hle are: + * Richard Goedeken (Richard42) + * Bobby Smiles + * John Chadwick (NMN) + * James Hood (Ebenblues) + * Scott Gorman (okaygo) + * Scott Knauert (Tillin9) + * Jesse Dean (DarkJezter) + * Louai Al-Khanji (slougi) + * Bob Forder (orbitaldecay) + * Jason Espinosa (hasone) + * HyperHacker + * and others. + +Mupen64Plus is based on GPL-licensed source code from Mupen64 v0.5, originally written by: + * Hacktarux + * Dave2001 + * Zilmar + * Gregor Anich (Blight) + * Juha Luotio (JttL) + * and others. + + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc. + 675 Mass Ave, Cambridge, MA 02139, USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Library General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + + Appendix: How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) 19yy + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) 19yy name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Library General +Public License instead of this License. + diff --git a/source/mupen64plus-rsp-hle/RELEASE b/source/mupen64plus-rsp-hle/RELEASE new file mode 100644 index 0000000..7a66111 --- /dev/null +++ b/source/mupen64plus-rsp-hle/RELEASE @@ -0,0 +1,44 @@ +RSP High-Level Emulation plugin for Mupen64Plus +----------------------------------------------- + +Mupen64Plus-rsp-hle v2.0 - July 4, 2013 +------------------------------------------------- + - Add support for MusyX ucode detection + - support JPEG decoding used in Pokemon Stadium Japan + - lots of refactoring to clean up code + - Project files for Visual Studio 2012 + - Makefile changes + - add support for PowerPC and MinGW32 builds + - add cross-compiling support to build Win32 executables (MXE) under Linux + +Mupen64Plus-rsp-hle v1.99.5 - March 10, 2012 +------------------------------------------------- + - Handle JPEG decompression, used in Ogre Battle 64 and Pokemon Stadium + - updated RSP plugin for new Mupen64plus 2.0 API versioning scheme + - bugfix: #102 - Missing backgrounds in Ogre Battle 64 + - many makefile fixes and improvements + +Mupen64Plus-rsp-hle v1.99.4 - November 22, 2010 +------------------------------------------------- + - merged all big-endian fixes from mupen64gc project + - makefile fixes and improvements + +Mupen64Plus-rsp-hle v1.99.3 - February 13, 2010 +------------------------------------------------- + - Makefile improvement: added OS type GNU/kFreeBSD + +Mupen64Plus-rsp-hle v1.99.2 - January 6, 2010 +------------------------------------------------- + - new feature: added MSVC8 project file for RSP-HLE plugin, fixed a few minor incompatibilities + - Makefile improvements: + - throw error if OS/CPU not supported + - use DESTDIR in install/uninstall paths + - Allow user-specified CC/CXX/LD paths + - use C++ compiler to link instead of LD, because the compiler knows where the standard C++ libs are + +Mupen64Plus-rsp-hle v1.99.1 - December 14, 2009 +------------------------------------------------- + - Converted to new Mupen64Plus 2.0 API + - Refactored build system to separate source and object files + - Refactored all code to remove win32-specific things, unnecessary functions, and clean up + diff --git a/source/mupen64plus-rsp-hle/projects/msvc11/mupen64plus-rsp-hle.vcxproj b/source/mupen64plus-rsp-hle/projects/msvc11/mupen64plus-rsp-hle.vcxproj new file mode 100644 index 0000000..936d879 --- /dev/null +++ b/source/mupen64plus-rsp-hle/projects/msvc11/mupen64plus-rsp-hle.vcxproj @@ -0,0 +1,114 @@ + + + + + Debug + Win32 + + + Release + Win32 + + + + {2EC7CEE3-C7A7-4F2E-B2C8-4DF6AFEC3E9A} + mupen64plusrsphle + Win32Proj + + + + DynamicLibrary + MultiByte + true + v110 + + + DynamicLibrary + MultiByte + v110 + + + + + + + + + + + + + <_ProjectFileVersion>10.0.40219.1 + $(SolutionDir)$(Configuration)\ + $(Configuration)\ + true + $(SolutionDir)$(Configuration)\ + $(Configuration)\ + false + AllRules.ruleset + + + AllRules.ruleset + + + + + + Disabled + ..\..\..\mupen64plus-core\src\api;%(AdditionalIncludeDirectories) + WIN32;_DEBUG;_WINDOWS;_USRDLL;_CRT_SECURE_NO_DEPRECATE;inline=__inline;%(PreprocessorDefinitions) + true + EnableFastChecks + MultiThreadedDebugDLL + + + Level3 + EditAndContinue + Default + + + true + Windows + MachineX86 + + + + + ..\..\..\mupen64plus-core\src\api;%(AdditionalIncludeDirectories) + WIN32;NDEBUG;_WINDOWS;_USRDLL;_CRT_SECURE_NO_DEPRECATE;inline=__inline;%(PreprocessorDefinitions) + MultiThreadedDLL + + + Level3 + ProgramDatabase + Default + + + true + Windows + true + true + MachineX86 + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/source/mupen64plus-rsp-hle/projects/msvc8/mupen64plus-rsp-hle.vcproj b/source/mupen64plus-rsp-hle/projects/msvc8/mupen64plus-rsp-hle.vcproj new file mode 100644 index 0000000..c70872c --- /dev/null +++ b/source/mupen64plus-rsp-hle/projects/msvc8/mupen64plus-rsp-hle.vcproj @@ -0,0 +1,243 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/source/mupen64plus-rsp-hle/projects/unix/Makefile b/source/mupen64plus-rsp-hle/projects/unix/Makefile new file mode 100755 index 0000000..cafd807 --- /dev/null +++ b/source/mupen64plus-rsp-hle/projects/unix/Makefile @@ -0,0 +1,314 @@ +#/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * +# * mupen64plus-rsp-hle - Makefile * +# * Mupen64Plus homepage: http://code.google.com/p/mupen64plus/ * +# * Copyright (C) 2008-2009 Richard Goedeken * +# * Copyright (C) 2007-2008 DarkJeztr Tillin9 * +# * * +# * This program is free software; you can redistribute it and/or modify * +# * it under the terms of the GNU General Public License as published by * +# * the Free Software Foundation; either version 2 of the License, or * +# * (at your option) any later version. * +# * * +# * This program is distributed in the hope that it will be useful, * +# * but WITHOUT ANY WARRANTY; without even the implied warranty of * +# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +# * GNU General Public License for more details. * +# * * +# * You should have received a copy of the GNU General Public License * +# * along with this program; if not, write to the * +# * Free Software Foundation, Inc., * +# * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * +# * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ +# Makefile for Mupen64 HLE RSP plugin in Mupen64plus. + +# detect operating system +UNAME ?= $(shell uname -s) +OS := NONE +ifeq ("$(UNAME)","Linux") + OS = LINUX + SO_EXTENSION = so + SHARED = -shared +endif +ifeq ("$(UNAME)","linux") + OS = LINUX + SO_EXTENSION = so + SHARED = -shared +endif +ifneq ("$(filter GNU hurd,$(UNAME))","") + OS = LINUX + SO_EXTENSION = so + SHARED = -shared +endif +ifeq ("$(UNAME)","Darwin") + OS = OSX + SO_EXTENSION = dylib + SHARED = -bundle +endif +ifeq ("$(UNAME)","FreeBSD") + OS = FREEBSD + SO_EXTENSION = so + SHARED = -shared +endif +ifeq ("$(UNAME)","OpenBSD") + OS = FREEBSD + SO_EXTENSION = so + SHARED = -shared + $(warning OS type "$(UNAME)" not officially supported.') +endif +ifneq ("$(filter GNU/kFreeBSD kfreebsd,$(UNAME))","") + OS = LINUX + SO_EXTENSION = so + SHARED = -shared +endif +ifeq ("$(patsubst MINGW%,MINGW,$(UNAME))","MINGW") + OS = MINGW + SO_EXTENSION = dll + SHARED = -shared + PIC = 0 +endif +ifeq ("$(OS)","NONE") + $(error OS type "$(UNAME)" not supported. Please file bug report at 'http://code.google.com/p/mupen64plus/issues') +endif + +# detect system architecture +HOST_CPU ?= $(shell uname -m) +NO_ASM ?= 1 +CPU := NONE +ifneq ("$(filter x86_64 amd64,$(HOST_CPU))","") + CPU := X86 + ifeq ("$(BITS)", "32") + ARCH_DETECTED := 64BITS_32 + PIC ?= 0 + else + ARCH_DETECTED := 64BITS + PIC ?= 1 + endif +endif +ifneq ("$(filter pentium i%86,$(HOST_CPU))","") + CPU := X86 + ARCH_DETECTED := 32BITS + PIC ?= 0 +endif +ifneq ("$(filter ppc macppc socppc powerpc,$(HOST_CPU))","") + CPU := PPC + ARCH_DETECTED := 32BITS + BIG_ENDIAN := 1 + PIC ?= 1 + $(warning Architecture "$(HOST_CPU)" not officially supported.') +endif +ifneq ("$(filter ppc64 powerpc64,$(HOST_CPU))","") + CPU := PPC + ARCH_DETECTED := 64BITS + BIG_ENDIAN := 1 + PIC ?= 1 + $(warning Architecture "$(HOST_CPU)" not officially supported.') +endif +ifneq ("$(filter arm%,$(HOST_CPU))","") + ifeq ("$(filter arm%b,$(HOST_CPU))","") + CPU := ARM + ARCH_DETECTED := 32BITS + PIC ?= 1 + CFLAGS += -mcpu=cortex-a8 -mfpu=neon -mfloat-abi=softfp -mtune=cortex-a8 -fsigned-char + $(warning Architecture "$(HOST_CPU)" not officially supported.') + endif +endif +ifeq ("$(CPU)","NONE") + $(error CPU type "$(HOST_CPU)" not supported. Please file bug report at 'http://code.google.com/p/mupen64plus/issues') +endif + +# base CFLAGS, LDLIBS, and LDFLAGS +OPTFLAGS ?= -O3 +WARNFLAGS ?= -Wall +CFLAGS += $(OPTFLAGS) $(WARNFLAGS) -ffast-math -fno-strict-aliasing -fvisibility=hidden -I../../src +CXXFLAGS += -fvisibility-inlines-hidden +LDFLAGS += $(SHARED) + +# Since we are building a shared library, we must compile with -fPIC on some architectures +# On 32-bit x86 systems we do not want to use -fPIC because we don't have to and it has a big performance penalty on this arch +ifeq ($(PIC), 1) + CFLAGS += -fPIC +else + CFLAGS += -fno-PIC +endif + +ifeq ($(BIG_ENDIAN), 1) + CFLAGS += -DM64P_BIG_ENDIAN +endif + +# tweak flags for 32-bit build on 64-bit system +ifeq ($(ARCH_DETECTED), 64BITS_32) + ifeq ($(OS), FREEBSD) + $(error Do not use the BITS=32 option with FreeBSD, use -m32 and -m elf_i386) + endif + CFLAGS += -m32 + LDFLAGS += -Wl,-m,elf_i386 +endif + +# set special flags per-system +ifeq ($(OS), LINUX) + # only export api symbols + LDFLAGS += -Wl,-version-script,$(SRCDIR)/rsp_api_export.ver +endif +ifeq ($(OS), OSX) + # Select the proper SDK + # Also, SDKs are stored in a different location since XCode 4.3 + OSX_SDK ?= $(shell sw_vers -productVersion | cut -f1 -f2 -d .) + OSX_XCODEMAJ = $(shell xcodebuild -version | grep '[0-9]*\.[0-9]*' | cut -f2 -d ' ' | cut -f1 -d .) + OSX_XCODEMIN = $(shell xcodebuild -version | grep '[0-9]*\.[0-9]*' | cut -f2 -d ' ' | cut -f2 -d .) + OSX_XCODEGE43 = $(shell echo "`expr $(OSX_XCODEMAJ) \>= 4``expr $(OSX_XCODEMIN) \>= 3`") + ifeq ($(OSX_XCODEGE43), 11) + OSX_SYSROOT := /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs + else + OSX_SYSROOT := /Developer/SDKs + endif + + ifeq ($(CPU), X86) + ifeq ($(ARCH_DETECTED), 64BITS) + CFLAGS += -pipe -arch x86_64 -mmacosx-version-min=$(OSX_SDK) -isysroot $(OSX_SYSROOT)/MacOSX$(OSX_SDK).sdk + else + CFLAGS += -pipe -mmmx -msse -fomit-frame-pointer -arch i686 -mmacosx-version-min=$(OSX_SDK) -isysroot $(OSX_SYSROOT)/MacOSX$(OSX_SDK).sdk + LDFLAGS += -read_only_relocs suppress + endif + endif +endif + +# set mupen64plus core API header path +ifneq ("$(APIDIR)","") + CFLAGS += "-I$(APIDIR)" +else + TRYDIR = ../../../mupen64plus-core/src/api + ifneq ("$(wildcard $(TRYDIR)/m64p_types.h)","") + CFLAGS += -I$(TRYDIR) + else + TRYDIR = /usr/local/include/mupen64plus + ifneq ("$(wildcard $(TRYDIR)/m64p_types.h)","") + CFLAGS += -I$(TRYDIR) + else + TRYDIR = /usr/include/mupen64plus + ifneq ("$(wildcard $(TRYDIR)/m64p_types.h)","") + CFLAGS += -I$(TRYDIR) + else + $(error Mupen64Plus API header files not found! Use makefile parameter APIDIR to force a location.) + endif + endif + endif +endif + +# reduced compile output when running make without V=1 +ifneq ($(findstring $(MAKEFLAGS),s),s) +ifndef V + Q_CC = @echo ' CC '$@; + Q_CXX = @echo ' CXX '$@; + Q_LD = @echo ' LD '$@; +endif +endif + +# set base program pointers and flags +CC = $(CROSS_COMPILE)gcc +CXX = $(CROSS_COMPILE)g++ +RM ?= rm -f +INSTALL ?= install +MKDIR ?= mkdir -p +COMPILE.c = $(Q_CC)$(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c +COMPILE.cc = $(Q_CXX)$(CXX) $(CXXFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c +LINK.o = $(Q_LD)$(CXX) $(CXXFLAGS) $(LDFLAGS) $(TARGET_ARCH) + +# set special flags for given Makefile parameters +ifeq ($(DEBUG),1) + CFLAGS += -g + INSTALL_STRIP_FLAG ?= +else + ifneq ($(OS),OSX) + INSTALL_STRIP_FLAG ?= -s + endif +endif + +# set installation options +ifeq ($(PREFIX),) + PREFIX := /usr/local +endif +ifeq ($(LIBDIR),) + LIBDIR := $(PREFIX)/lib +endif +ifeq ($(PLUGINDIR),) + PLUGINDIR := $(LIBDIR)/mupen64plus +endif + +SRCDIR = ../../src +OBJDIR = _obj$(POSTFIX) + +# list of source files to compile +SOURCE = \ + $(SRCDIR)/main.c \ + $(SRCDIR)/alist.c \ + $(SRCDIR)/cicx105.c \ + $(SRCDIR)/jpeg.c \ + $(SRCDIR)/ucode3.cpp \ + $(SRCDIR)/ucode2.cpp \ + $(SRCDIR)/ucode1.cpp \ + $(SRCDIR)/ucode3mp3.cpp + +# generate a list of object files build, make a temporary directory for them +OBJECTS := $(patsubst $(SRCDIR)/%.c, $(OBJDIR)/%.o, $(filter %.c, $(SOURCE))) +OBJECTS += $(patsubst $(SRCDIR)/%.cpp, $(OBJDIR)/%.o, $(filter %.cpp, $(SOURCE))) +OBJDIRS = $(dir $(OBJECTS)) +$(shell $(MKDIR) $(OBJDIRS)) + +# build targets +TARGET = mupen64plus-rsp-hle$(POSTFIX).$(SO_EXTENSION) + +targets: + @echo "Mupen64Plus-rsp-hle makefile. " + @echo " Targets:" + @echo " all == Build Mupen64Plus rsp-hle plugin" + @echo " clean == remove object files" + @echo " rebuild == clean and re-build all" + @echo " install == Install Mupen64Plus rsp-hle plugin" + @echo " uninstall == Uninstall Mupen64Plus rsp-hle plugin" + @echo " Options:" + @echo " BITS=32 == build 32-bit binaries on 64-bit machine" + @echo " APIDIR=path == path to find Mupen64Plus Core headers" + @echo " OPTFLAGS=flag == compiler optimization (default: -O3 -flto)" + @echo " WARNFLAGS=flag == compiler warning levels (default: -Wall)" + @echo " PIC=(1|0) == Force enable/disable of position independent code" + @echo " POSTFIX=name == String added to the name of the the build (default: '')" + @echo " Install Options:" + @echo " PREFIX=path == install/uninstall prefix (default: /usr/local)" + @echo " LIBDIR=path == library prefix (default: PREFIX/lib)" + @echo " PLUGINDIR=path == path to install plugin libraries (default: LIBDIR/mupen64plus)" + @echo " DESTDIR=path == path to prepend to all installation paths (only for packagers)" + @echo " Debugging Options:" + @echo " DEBUG=1 == add debugging symbols" + @echo " V=1 == show verbose compiler output" + +all: $(TARGET) + +install: $(TARGET) + $(INSTALL) -d "$(DESTDIR)$(PLUGINDIR)" + $(INSTALL) -m 0644 $(INSTALL_STRIP_FLAG) $(TARGET) "$(DESTDIR)$(PLUGINDIR)" + +uninstall: + $(RM) "$(DESTDIR)$(PLUGINDIR)/$(TARGET)" + +clean: + $(RM) -r $(OBJDIR) $(TARGET) + +rebuild: clean all + +# build dependency files +CFLAGS += -MD +-include $(OBJECTS:.o=.d) + +CXXFLAGS += $(CFLAGS) + +# standard build rules +$(OBJDIR)/%.o: $(SRCDIR)/%.c + $(COMPILE.c) -o $@ $< + +$(OBJDIR)/%.o: $(SRCDIR)/%.cpp + $(COMPILE.cc) -o $@ $< + +$(TARGET): $(OBJECTS) + $(LINK.o) $^ $(LOADLIBES) $(LDLIBS) -o $@ + +.PHONY: all clean install uninstall targets diff --git a/source/mupen64plus-rsp-hle/src/alist.c b/source/mupen64plus-rsp-hle/src/alist.c new file mode 100644 index 0000000..c141893 --- /dev/null +++ b/source/mupen64plus-rsp-hle/src/alist.c @@ -0,0 +1,82 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * Mupen64plus-rsp-hle - alist.c * + * Mupen64Plus homepage: http://code.google.com/p/mupen64plus/ * + * Copyright (C) 2012 Bobby Smiles * + * Copyright (C) 2009 Richard Goedeken * + * Copyright (C) 2002 Hacktarux * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +#include "hle.h" +#include "alist_internal.h" + +// FIXME: this decomposition into 3 ABI is not accurate, +// there are a least 9 or 10 different ABI, each with one or a few revisions +// for a total of almost 16 differents audio ucode. +// +// ABI2 in fact is a mix of at least 7 differents ABI which are mostly compatible +// but not totally, that's why there is a isZeldaABI/isMKABI workaround. +// +extern const acmd_callback_t ABI1[0x10]; +extern const acmd_callback_t ABI2[0x20]; +extern const acmd_callback_t ABI3[0x10]; + +/* local functions */ +static void alist_process(const acmd_callback_t abi[], unsigned int abi_size) +{ + u32 inst1, inst2; + unsigned int acmd; + const OSTask_t * const task = get_task(); + + const unsigned int *alist = (unsigned int*)(rsp.RDRAM + task->data_ptr); + const unsigned int * const alist_end = alist + (task->data_size >> 2); + + while (alist != alist_end) + { + inst1 = *(alist++); + inst2 = *(alist++); + + acmd = inst1 >> 24; + + if (acmd < abi_size) + { + (*abi[acmd])(inst1, inst2); + } + else + { + DebugMessage(M64MSG_WARNING, "Invalid ABI command %u", acmd); + } + } +} + +/* global functions */ +void alist_process_ABI1() +{ + alist_process(ABI1, 0x10); +} + +void alist_process_ABI2() +{ + alist_process(ABI2, 0x20); +} + +void alist_process_ABI3() +{ + alist_process(ABI3, 0x10); +} + + diff --git a/source/mupen64plus-rsp-hle/src/alist.h b/source/mupen64plus-rsp-hle/src/alist.h new file mode 100644 index 0000000..43f9f04 --- /dev/null +++ b/source/mupen64plus-rsp-hle/src/alist.h @@ -0,0 +1,33 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * Mupen64plus-rsp-hle - alist.h * + * Mupen64Plus homepage: http://code.google.com/p/mupen64plus/ * + * Copyright (C) 2002 Hacktarux * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +#ifndef ALIST_H +#define ALIST_H + +void alist_process_ABI1(); +void alist_process_ABI2(); +void alist_process_ABI3(); + +// FIXME: to remove when isZeldaABI/isMKABI workaround is gone +void init_ucode2(); + +#endif + diff --git a/source/mupen64plus-rsp-hle/src/alist_internal.h b/source/mupen64plus-rsp-hle/src/alist_internal.h new file mode 100644 index 0000000..ae39883 --- /dev/null +++ b/source/mupen64plus-rsp-hle/src/alist_internal.h @@ -0,0 +1,50 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * Mupen64plus-rsp-hle - alist_internal.h * + * Mupen64Plus homepage: http://code.google.com/p/mupen64plus/ * + * Copyright (C) 2002 Hacktarux * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +#ifndef ALIST_INTERNAL_H +#define ALIST_INTERNAL_H + +#include "hle.h" + +typedef void (*acmd_callback_t)(u32 inst1, u32 inst2); + +/* + * Audio flags + */ + +#define A_INIT 0x01 +#define A_CONTINUE 0x00 +#define A_LOOP 0x02 +#define A_OUT 0x02 +#define A_LEFT 0x02 +#define A_RIGHT 0x00 +#define A_VOL 0x04 +#define A_RATE 0x00 +#define A_AUX 0x08 +#define A_NOAUX 0x00 +#define A_MAIN 0x00 +#define A_MIX 0x10 + +extern u16 AudioInBuffer, AudioOutBuffer, AudioCount; +extern u16 AudioAuxA, AudioAuxC, AudioAuxE; +extern u32 loopval; // Value set by A_SETLOOP : Possible conflict with SETVOLUME??? + +#endif diff --git a/source/mupen64plus-rsp-hle/src/cicx105.c b/source/mupen64plus-rsp-hle/src/cicx105.c new file mode 100644 index 0000000..1e89056 --- /dev/null +++ b/source/mupen64plus-rsp-hle/src/cicx105.c @@ -0,0 +1,55 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * Mupen64plus-rsp-hle - cicx105.c * + * Mupen64Plus homepage: http://code.google.com/p/mupen64plus/ * + * Copyright (C) 2012 Bobby Smiles * + * Copyright (C) 2009 Richard Goedeken * + * Copyright (C) 2002 Hacktarux * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +#include + +#include "hle.h" + +/** + * During IPL3 stage of CIC x105 games, the RSP performs some checks and transactions + * necessary for booting the game. + * + * We only implement the needed DMA transactions for booting. + * + * Found in Banjo-Tooie, Zelda, Perfect Dark, ...) + **/ +void cicx105_ucode() +{ + // memcpy is okay to use because access constrains are met (alignment, size) + unsigned int i; + unsigned char * dst = rsp.RDRAM + 0x2fb1f0; + unsigned char * src = rsp.IMEM + 0x120; + + /* dma_read(0x1120, 0x1e8, 0x1e8) */ + memcpy(rsp.IMEM + 0x120, rsp.RDRAM + 0x1e8, 0x1f0); + + /* dma_write(0x1120, 0x2fb1f0, 0xfe817000) */ + for (i = 0; i < 24; ++i) + { + memcpy(dst, src, 8); + dst += 0xff0; + src += 0x8; + + } +} + diff --git a/source/mupen64plus-rsp-hle/src/cicx105.h b/source/mupen64plus-rsp-hle/src/cicx105.h new file mode 100644 index 0000000..fbfcfad --- /dev/null +++ b/source/mupen64plus-rsp-hle/src/cicx105.h @@ -0,0 +1,28 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * Mupen64plus-rsp-hle - cicx105.h * + * Mupen64Plus homepage: http://code.google.com/p/mupen64plus/ * + * Copyright (C) 2002 Hacktarux * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +#ifndef CICX105_H +#define CICX105_H + +void cicx105_ucode(); + +#endif + diff --git a/source/mupen64plus-rsp-hle/src/hle.h b/source/mupen64plus-rsp-hle/src/hle.h new file mode 100644 index 0000000..2aea0a4 --- /dev/null +++ b/source/mupen64plus-rsp-hle/src/hle.h @@ -0,0 +1,89 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * Mupen64plus-rsp-hle - hle.h * + * Mupen64Plus homepage: http://code.google.com/p/mupen64plus/ * + * Copyright (C) 2002 Hacktarux * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +#ifndef HLE_H +#define HLE_H + +#define M64P_PLUGIN_PROTOTYPES 1 +#include "m64p_plugin.h" + +#define RSP_HLE_VERSION 0x020000 +#define RSP_PLUGIN_API_VERSION 0x020000 + +#ifdef M64P_BIG_ENDIAN +#define S 0 +#define S16 0 +#define S8 0 +#else +#define S 1 +#define S16 2 +#define S8 3 +#endif + +// types +typedef unsigned char u8; +typedef unsigned short u16; +typedef unsigned int u32; +typedef unsigned long long u64; + +typedef signed char s8; +typedef signed short s16; +typedef signed int s32; +typedef signed long long s64; + +extern RSP_INFO rsp; + +typedef struct +{ + unsigned int type; + unsigned int flags; + + unsigned int ucode_boot; + unsigned int ucode_boot_size; + + unsigned int ucode; + unsigned int ucode_size; + + unsigned int ucode_data; + unsigned int ucode_data_size; + + unsigned int dram_stack; + unsigned int dram_stack_size; + + unsigned int output_buff; + unsigned int output_buff_size; + + unsigned int data_ptr; + unsigned int data_size; + + unsigned int yield_data_ptr; + unsigned int yield_data_size; +} OSTask_t; + +static inline const OSTask_t * const get_task() +{ + return (OSTask_t*)(rsp.DMEM + 0xfc0); +} + +void DebugMessage(int level, const char *message, ...); + +#endif + diff --git a/source/mupen64plus-rsp-hle/src/jpeg.c b/source/mupen64plus-rsp-hle/src/jpeg.c new file mode 100755 index 0000000..28fcc8b --- /dev/null +++ b/source/mupen64plus-rsp-hle/src/jpeg.c @@ -0,0 +1,683 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * Mupen64plus-rsp-hle - jpeg.c * + * Mupen64Plus homepage: http://code.google.com/p/mupen64plus/ * + * Copyright (C) 2012 Bobby Smiles * + * Copyright (C) 2009 Richard Goedeken * + * Copyright (C) 2002 Hacktarux * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +#include +#include +#include + +#define M64P_PLUGIN_PROTOTYPES 1 +#include "m64p_types.h" +#include "m64p_plugin.h" +#include "hle.h" + +#define SUBBLOCK_SIZE 64 + +typedef void (*tile_line_emitter_t)(const int16_t *y, const int16_t *u, uint32_t address); +typedef void (*std_macroblock_decoder_t)(int16_t *macroblock, unsigned int subblock_count, const int16_t qtables[3][SUBBLOCK_SIZE]); + +/* rdram operations */ +// FIXME: these functions deserve their own module +static void rdram_read_many_u16(uint16_t *dst, uint32_t address, unsigned int count); +static void rdram_write_many_u16(const uint16_t *src, uint32_t address, unsigned int count); +static uint32_t rdram_read_u32(uint32_t address); +static void rdram_write_many_u32(const uint32_t *src, uint32_t address, unsigned int count); + +/* standard jpeg ucode decoder */ +static void jpeg_decode_std(const char * const version, const std_macroblock_decoder_t decode_mb, const tile_line_emitter_t emit_line); + +/* helper functions */ +static uint8_t clamp_u8(int16_t x); +static int16_t clamp_s12(int16_t x); +static int16_t clamp_s16(int32_t x); +static uint16_t clamp_RGBA_component(int16_t x); + +/* pixel conversion & foratting */ +static uint32_t GetUYVY(int16_t y1, int16_t y2, int16_t u, int16_t v); +static uint16_t GetRGBA(int16_t y, int16_t u, int16_t v); + +/* tile line emitters */ +static void EmitYUVTileLine(const int16_t *y, const int16_t *u, uint32_t address); +static void EmitRGBATileLine(const int16_t *y, const int16_t *u, uint32_t address); + +/* macroblocks operations */ +static void DecodeMacroblock1(int16_t *macroblock, int32_t *y_dc, int32_t *u_dc, int32_t *v_dc, const int16_t *qtable); +static void DecodeMacroblock2(int16_t *macroblock, unsigned int subblock_count, const int16_t qtables[3][SUBBLOCK_SIZE]); +static void DecodeMacroblock3(int16_t *macroblock, unsigned int subblock_count, const int16_t qtables[3][SUBBLOCK_SIZE]); +static void EmitTilesMode0(const tile_line_emitter_t emit_line, const int16_t *macroblock, uint32_t address); +static void EmitTilesMode2(const tile_line_emitter_t emit_line, const int16_t *macroblock, uint32_t address); + +/* subblocks operations */ +static void TransposeSubBlock(int16_t *dst, const int16_t *src); +static void ZigZagSubBlock(int16_t *dst, const int16_t *src); +static void ReorderSubBlock(int16_t *dst, const int16_t *src, const unsigned int *table); +static void MultSubBlocks(int16_t *dst, const int16_t *src1, const int16_t *src2, unsigned int shift); +static void ScaleSubBlock(int16_t *dst, const int16_t *src, int16_t scale); +static void RShiftSubBlock(int16_t *dst, const int16_t *src, unsigned int shift); +static void InverseDCT1D(const float * const x, float *dst, unsigned int stride); +static void InverseDCTSubBlock(int16_t *dst, const int16_t *src); +static void RescaleYSubBlock(int16_t *dst, const int16_t *src); +static void RescaleUVSubBlock(int16_t *dst, const int16_t *src); + +/* transposed dequantization table */ +static const int16_t DEFAULT_QTABLE[SUBBLOCK_SIZE] = +{ + 16, 12, 14, 14, 18, 24, 49, 72, + 11, 12, 13, 17, 22, 35, 64, 92, + 10, 14, 16, 22, 37, 55, 78, 95, + 16, 19, 24, 29, 56, 64, 87, 98, + 24, 26, 40, 51, 68, 81, 103, 112, + 40, 58, 57, 87, 109, 104, 121, 100, + 51, 60, 69, 80, 103, 113, 120, 103, + 61, 55, 56, 62, 77, 92, 101, 99 +}; + +/* zig-zag indices */ +static const unsigned int ZIGZAG_TABLE[SUBBLOCK_SIZE] = +{ + 0, 1, 5, 6, 14, 15, 27, 28, + 2, 4, 7, 13, 16, 26, 29, 42, + 3, 8, 12, 17, 25, 30, 41, 43, + 9, 11, 18, 24, 31, 40, 44, 53, + 10, 19, 23, 32, 39, 45, 52, 54, + 20, 22, 33, 38, 46, 51, 55, 60, + 21, 34, 37, 47, 50, 56, 59, 61, + 35, 36, 48, 49, 57, 58, 62, 63 +}; + +/* transposition indices */ +static const unsigned int TRANSPOSE_TABLE[SUBBLOCK_SIZE] = +{ + 0, 8, 16, 24, 32, 40, 48, 56, + 1, 9, 17, 25, 33, 41, 49, 57, + 2, 10, 18, 26, 34, 42, 50, 58, + 3, 11, 19, 27, 35, 43, 51, 59, + 4, 12, 20, 28, 36, 44, 52, 60, + 5, 13, 21, 29, 37, 45, 53, 61, + 6, 14, 22, 30, 38, 46, 54, 62, + 7, 15, 23, 31, 39, 47, 55, 63 +}; + + + +/* IDCT related constants + * Cn = alpha * cos(n * PI / 16) (alpha is chosen such as C4 = 1) */ +static const float IDCT_C3 = 1.175875602f; +static const float IDCT_C6 = 0.541196100f; +static const float IDCT_K[10] = +{ + 0.765366865f, /* C2-C6 */ + -1.847759065f, /* -C2-C6 */ + -0.390180644f, /* C5-C3 */ + -1.961570561f, /* -C5-C3 */ + 1.501321110f, /* C1+C3-C5-C7 */ + 2.053119869f, /* C1+C3-C5+C7 */ + 3.072711027f, /* C1+C3+C5-C7 */ + 0.298631336f, /* -C1+C3+C5-C7 */ + -0.899976223f, /* C7-C3 */ + -2.562915448f /* -C1-C3 */ +}; + + +/* global functions */ + +/*************************************************************************** + * JPEG decoding ucode found in Japanese exclusive version of Pokemon Stadium. + **************************************************************************/ +void jpeg_decode_PS0() +{ + jpeg_decode_std("PS0", DecodeMacroblock3, EmitYUVTileLine); +} + +/*************************************************************************** + * JPEG decoding ucode found in Ocarina of Time, Pokemon Stadium 1 and + * Pokemon Stadium 2. + **************************************************************************/ +void jpeg_decode_PS() +{ + jpeg_decode_std("PS", DecodeMacroblock2, EmitRGBATileLine); +} + +/*************************************************************************** + * JPEG decoding ucode found in Ogre Battle and Bottom of the 9th. + **************************************************************************/ +void jpeg_decode_OB() +{ + int16_t qtable[SUBBLOCK_SIZE]; + unsigned int mb; + + int32_t y_dc = 0; + int32_t u_dc = 0; + int32_t v_dc = 0; + + const OSTask_t * const task = get_task(); + + uint32_t address = task->data_ptr; + const unsigned int macroblock_count = task->data_size; + const int qscale = task->yield_data_size; + + DebugMessage(M64MSG_VERBOSE, "jpeg_decode_OB: *buffer=%x, #MB=%d, qscale=%d", + address, + macroblock_count, + qscale); + + if (qscale != 0) + { + if (qscale > 0) + { + ScaleSubBlock(qtable, DEFAULT_QTABLE, qscale); + } + else + { + RShiftSubBlock(qtable, DEFAULT_QTABLE, -qscale); + } + } + + for (mb = 0; mb < macroblock_count; ++mb) + { + int16_t macroblock[6*SUBBLOCK_SIZE]; + + rdram_read_many_u16((uint16_t*)macroblock, address, 6*SUBBLOCK_SIZE); + DecodeMacroblock1(macroblock, &y_dc, &u_dc, &v_dc, (qscale != 0) ? qtable : NULL); + EmitTilesMode2(EmitYUVTileLine, macroblock, address); + + address += (2*6*SUBBLOCK_SIZE); + } +} + + +/* local functions */ +static void jpeg_decode_std(const char * const version, const std_macroblock_decoder_t decode_mb, const tile_line_emitter_t emit_line) +{ + int16_t qtables[3][SUBBLOCK_SIZE]; + unsigned int mb; + uint32_t address; + uint32_t macroblock_count; + uint32_t mode; + uint32_t qtableY_ptr; + uint32_t qtableU_ptr; + uint32_t qtableV_ptr; + unsigned int subblock_count; + unsigned int macroblock_size; + int16_t *macroblock; + const OSTask_t * const task = get_task(); + + if (task->flags & 0x1) + { + DebugMessage(M64MSG_WARNING, "jpeg_decode_%s: task yielding not implemented", version); + return; + } + + address = rdram_read_u32(task->data_ptr); + macroblock_count = rdram_read_u32(task->data_ptr + 4); + mode = rdram_read_u32(task->data_ptr + 8); + qtableY_ptr = rdram_read_u32(task->data_ptr + 12); + qtableU_ptr = rdram_read_u32(task->data_ptr + 16); + qtableV_ptr = rdram_read_u32(task->data_ptr + 20); + + DebugMessage(M64MSG_VERBOSE, "jpeg_decode_%s: *buffer=%x, #MB=%d, mode=%d, *Qy=%x, *Qu=%x, *Qv=%x", + version, + address, + macroblock_count, + mode, + qtableY_ptr, + qtableU_ptr, + qtableV_ptr); + + if (mode != 0 && mode != 2) + { + DebugMessage(M64MSG_WARNING, "jpeg_decode_%s: invalid mode %d", version, mode); + return; + } + + subblock_count = mode + 4; + macroblock_size = 2*subblock_count*SUBBLOCK_SIZE; + + rdram_read_many_u16((uint16_t*)qtables[0], qtableY_ptr, SUBBLOCK_SIZE); + rdram_read_many_u16((uint16_t*)qtables[1], qtableU_ptr, SUBBLOCK_SIZE); + rdram_read_many_u16((uint16_t*)qtables[2], qtableV_ptr, SUBBLOCK_SIZE); + + macroblock = malloc(sizeof(*macroblock) * macroblock_size); + if (!macroblock) + { + DebugMessage(M64MSG_WARNING, "jpeg_decode_%s: could not allocate macroblock", version); + return; + } + + for (mb = 0; mb < macroblock_count; ++mb) + { + rdram_read_many_u16((uint16_t*)macroblock, address, macroblock_size >> 1); + decode_mb(macroblock, subblock_count, (const int16_t (*)[SUBBLOCK_SIZE])qtables); + + if (mode == 0) + { + EmitTilesMode0(emit_line, macroblock, address); + } + else + { + EmitTilesMode2(emit_line, macroblock, address); + } + + address += macroblock_size; + } + free(macroblock); +} + +static uint8_t clamp_u8(int16_t x) +{ + return (x & (0xff00)) ? ((-x) >> 15) & 0xff : x; +} + +static int16_t clamp_s12(int16_t x) +{ + if (x < -0x800) { x = -0x800; } else if (x > 0x7f0) { x = 0x7f0; } + return x; +} + +static int16_t clamp_s16(int32_t x) +{ + if (x > 32767) { x = 32767; } else if (x < -32768) { x = -32768; } + return x; +} + +static uint16_t clamp_RGBA_component(int16_t x) +{ + if (x > 0xff0) { x = 0xff0; } else if (x < 0) { x = 0; } + return (x & 0xf80); +} + +static uint32_t GetUYVY(int16_t y1, int16_t y2, int16_t u, int16_t v) +{ + return (uint32_t)clamp_u8(u) << 24 + | (uint32_t)clamp_u8(y1) << 16 + | (uint32_t)clamp_u8(v) << 8 + | (uint32_t)clamp_u8(y2); +} + +static uint16_t GetRGBA(int16_t y, int16_t u, int16_t v) +{ + const float fY = (float)y + 2048.0f; + const float fU = (float)u; + const float fV = (float)v; + + const uint16_t r = clamp_RGBA_component((int16_t)(fY + 1.4025*fV)); + const uint16_t g = clamp_RGBA_component((int16_t)(fY - 0.3443*fU - 0.7144*fV)); + const uint16_t b = clamp_RGBA_component((int16_t)(fY + 1.7729*fU )); + + return (r << 4) | (g >> 1) | (b >> 6) | 1; +} + +static void EmitYUVTileLine(const int16_t *y, const int16_t *u, uint32_t address) +{ + uint32_t uyvy[8]; + + const int16_t * const v = u + SUBBLOCK_SIZE; + const int16_t * const y2 = y + SUBBLOCK_SIZE; + + uyvy[0] = GetUYVY(y[0], y[1], u[0], v[0]); + uyvy[1] = GetUYVY(y[2], y[3], u[1], v[1]); + uyvy[2] = GetUYVY(y[4], y[5], u[2], v[2]); + uyvy[3] = GetUYVY(y[6], y[7], u[3], v[3]); + uyvy[4] = GetUYVY(y2[0], y2[1], u[4], v[4]); + uyvy[5] = GetUYVY(y2[2], y2[3], u[5], v[5]); + uyvy[6] = GetUYVY(y2[4], y2[5], u[6], v[6]); + uyvy[7] = GetUYVY(y2[6], y2[7], u[7], v[7]); + + rdram_write_many_u32(uyvy, address, 8); +} + +static void EmitRGBATileLine(const int16_t *y, const int16_t *u, uint32_t address) +{ + uint16_t rgba[16]; + + const int16_t * const v = u + SUBBLOCK_SIZE; + const int16_t * const y2 = y + SUBBLOCK_SIZE; + + rgba[0] = GetRGBA(y[0], u[0], v[0]); + rgba[1] = GetRGBA(y[1], u[0], v[0]); + rgba[2] = GetRGBA(y[2], u[1], v[1]); + rgba[3] = GetRGBA(y[3], u[1], v[1]); + rgba[4] = GetRGBA(y[4], u[2], v[2]); + rgba[5] = GetRGBA(y[5], u[2], v[2]); + rgba[6] = GetRGBA(y[6], u[3], v[3]); + rgba[7] = GetRGBA(y[7], u[3], v[3]); + rgba[8] = GetRGBA(y2[0], u[4], v[4]); + rgba[9] = GetRGBA(y2[1], u[4], v[4]); + rgba[10] = GetRGBA(y2[2], u[5], v[5]); + rgba[11] = GetRGBA(y2[3], u[5], v[5]); + rgba[12] = GetRGBA(y2[4], u[6], v[6]); + rgba[13] = GetRGBA(y2[5], u[6], v[6]); + rgba[14] = GetRGBA(y2[6], u[7], v[7]); + rgba[15] = GetRGBA(y2[7], u[7], v[7]); + + rdram_write_many_u16(rgba, address, 16); +} + +static void EmitTilesMode0(const tile_line_emitter_t emit_line, const int16_t *macroblock, uint32_t address) +{ + unsigned int i; + + unsigned int y_offset = 0; + unsigned int u_offset = 2*SUBBLOCK_SIZE; + + for (i = 0; i < 8; ++i) + { + emit_line(¯oblock[y_offset], ¯oblock[u_offset], address); + + y_offset += 8; + u_offset += 8; + address += 32; + } +} + +static void EmitTilesMode2(const tile_line_emitter_t emit_line, const int16_t *macroblock, uint32_t address) +{ + unsigned int i; + + unsigned int y_offset = 0; + unsigned int u_offset = 4*SUBBLOCK_SIZE; + + for (i = 0; i < 8; ++i) + { + emit_line(¯oblock[y_offset], ¯oblock[u_offset], address); + emit_line(¯oblock[y_offset + 8], ¯oblock[u_offset], address + 32); + + y_offset += (i == 3) ? SUBBLOCK_SIZE+16 : 16; + u_offset += 8; + address += 64; + } +} + +static void DecodeMacroblock1(int16_t *macroblock, int32_t *y_dc, int32_t *u_dc, int32_t *v_dc, const int16_t *qtable) +{ + int sb; + + for (sb = 0; sb < 6; ++sb) + { + int16_t tmp_sb[SUBBLOCK_SIZE]; + + /* update DC */ + int32_t dc = (int32_t)macroblock[0]; + switch(sb) + { + case 0: case 1: case 2: case 3: + *y_dc += dc; macroblock[0] = *y_dc & 0xffff; break; + case 4: *u_dc += dc; macroblock[0] = *u_dc & 0xffff; break; + case 5: *v_dc += dc; macroblock[0] = *v_dc & 0xffff; break; + } + + ZigZagSubBlock(tmp_sb, macroblock); + if (qtable != NULL) { MultSubBlocks(tmp_sb, tmp_sb, qtable, 0); } + TransposeSubBlock(macroblock, tmp_sb); + InverseDCTSubBlock(macroblock, macroblock); + + macroblock += SUBBLOCK_SIZE; + } +} + +static void DecodeMacroblock2(int16_t *macroblock, unsigned int subblock_count, const int16_t qtables[3][SUBBLOCK_SIZE]) +{ + unsigned int sb; + unsigned int q = 0; + + for (sb = 0; sb < subblock_count; ++sb) + { + int16_t tmp_sb[SUBBLOCK_SIZE]; + const int isChromaSubBlock = (subblock_count - sb <= 2); + + if (isChromaSubBlock) { ++q; } + + MultSubBlocks(macroblock, macroblock, qtables[q], 4); + ZigZagSubBlock(tmp_sb, macroblock); + InverseDCTSubBlock(macroblock, tmp_sb); + + macroblock += SUBBLOCK_SIZE; + } + +} + +static void DecodeMacroblock3(int16_t *macroblock, unsigned int subblock_count, const int16_t qtables[3][SUBBLOCK_SIZE]) +{ + unsigned int sb; + unsigned int q = 0; + + for (sb = 0; sb < subblock_count; ++sb) + { + int16_t tmp_sb[SUBBLOCK_SIZE]; + const int isChromaSubBlock = (subblock_count - sb <= 2); + + if (isChromaSubBlock) { ++q; } + + MultSubBlocks(macroblock, macroblock, qtables[q], 4); + ZigZagSubBlock(tmp_sb, macroblock); + InverseDCTSubBlock(macroblock, tmp_sb); + + if (isChromaSubBlock) + { + RescaleUVSubBlock(macroblock, macroblock); + } + else + { + RescaleYSubBlock(macroblock, macroblock); + } + + macroblock += SUBBLOCK_SIZE; + } +} + +static void TransposeSubBlock(int16_t *dst, const int16_t *src) +{ + ReorderSubBlock(dst, src, TRANSPOSE_TABLE); +} + +static void ZigZagSubBlock(int16_t *dst, const int16_t *src) +{ + ReorderSubBlock(dst, src, ZIGZAG_TABLE); +} + +static void ReorderSubBlock(int16_t *dst, const int16_t *src, const unsigned int *table) +{ + unsigned int i; + + /* source and destination sublocks cannot overlap */ + assert(abs(dst - src) > SUBBLOCK_SIZE); + + for (i = 0; i < SUBBLOCK_SIZE; ++i) + { + dst[i] = src[table[i]]; + } +} + +static void MultSubBlocks(int16_t *dst, const int16_t *src1, const int16_t *src2, unsigned int shift) +{ + unsigned int i; + + for (i = 0; i < SUBBLOCK_SIZE; ++i) + { + int32_t v = src1[i] * src2[i]; + dst[i] = clamp_s16(v) << shift; + } +} + +static void ScaleSubBlock(int16_t *dst, const int16_t *src, int16_t scale) +{ + unsigned int i; + + for (i = 0; i < SUBBLOCK_SIZE; ++i) + { + int32_t v = src[i] * scale; + dst[i] = clamp_s16(v); + } +} + +static void RShiftSubBlock(int16_t *dst, const int16_t *src, unsigned int shift) +{ + unsigned int i; + + for (i = 0; i < SUBBLOCK_SIZE; ++i) + { + dst[i] = src[i] >> shift; + } +} + +/*************************************************************************** + * Fast 2D IDCT using separable formulation and normalization + * Computations use single precision floats + * Implementation based on Wikipedia : + * http://fr.wikipedia.org/wiki/Transform%C3%A9e_en_cosinus_discr%C3%A8te + **************************************************************************/ +static void InverseDCT1D(const float * const x, float *dst, unsigned int stride) +{ + float e[4]; + float f[4]; + float x26, x1357, x15, x37, x17, x35; + + x15 = IDCT_K[2] * (x[1] + x[5]); + x37 = IDCT_K[3] * (x[3] + x[7]); + x17 = IDCT_K[8] * (x[1] + x[7]); + x35 = IDCT_K[9] * (x[3] + x[5]); + x1357 = IDCT_C3 * (x[1] + x[3] + x[5] + x[7]); + x26 = IDCT_C6 * (x[2] + x[6]); + + f[0] = x[0] + x[4]; + f[1] = x[0] - x[4]; + f[2] = x26 + IDCT_K[0]*x[2]; + f[3] = x26 + IDCT_K[1]*x[6]; + + e[0] = x1357 + x15 + IDCT_K[4]*x[1] + x17; + e[1] = x1357 + x37 + IDCT_K[6]*x[3] + x35; + e[2] = x1357 + x15 + IDCT_K[5]*x[5] + x35; + e[3] = x1357 + x37 + IDCT_K[7]*x[7] + x17; + + *dst = f[0] + f[2] + e[0]; dst += stride; + *dst = f[1] + f[3] + e[1]; dst += stride; + *dst = f[1] - f[3] + e[2]; dst += stride; + *dst = f[0] - f[2] + e[3]; dst += stride; + *dst = f[0] - f[2] - e[3]; dst += stride; + *dst = f[1] - f[3] - e[2]; dst += stride; + *dst = f[1] + f[3] - e[1]; dst += stride; + *dst = f[0] + f[2] - e[0]; dst += stride; +} + +static void InverseDCTSubBlock(int16_t *dst, const int16_t *src) +{ + float x[8]; + float block[SUBBLOCK_SIZE]; + unsigned int i, j; + + /* idct 1d on rows (+transposition) */ + for (i = 0; i < 8; ++i) + { + for (j = 0; j < 8; ++j) + { + x[j] = (float)src[i*8+j]; + } + + InverseDCT1D(x, &block[i], 8); + } + + /* idct 1d on columns (thanks to previous transposition) */ + for (i = 0; i < 8; ++i) + { + InverseDCT1D(&block[i*8], x, 1); + + /* C4 = 1 normalization implies a division by 8 */ + for (j = 0; j < 8; ++j) + { + dst[i+j*8] = (int16_t)x[j] >> 3; + } + } +} + +static void RescaleYSubBlock(int16_t *dst, const int16_t *src) +{ + unsigned int i; + + for (i = 0; i < SUBBLOCK_SIZE; ++i) + { + dst[i] = (((uint32_t)(clamp_s12(src[i]) + 0x800) * 0xdb0) >> 16) + 0x10; + } +} + +static void RescaleUVSubBlock(int16_t *dst, const int16_t *src) +{ + unsigned int i; + + for (i = 0; i < SUBBLOCK_SIZE; ++i) + { + dst[i] = (((int)clamp_s12(src[i]) * 0xe00) >> 16) + 0x80; + } +} + + + +/* FIXME: assume presence of expansion pack */ +#define MEMMASK 0x7fffff + +static void rdram_read_many_u16(uint16_t *dst, uint32_t address, unsigned int count) +{ + while (count != 0) + { + uint16_t s = rsp.RDRAM[((address++)^S8) & MEMMASK]; + s <<= 8; + s |= rsp.RDRAM[((address++)^S8) & MEMMASK]; + + *(dst++) = s; + + --count; + } +} + +static void rdram_write_many_u16(const uint16_t *src, uint32_t address, unsigned int count) +{ + while (count != 0) + { + rsp.RDRAM[((address++)^S8) & MEMMASK] = (uint8_t)(*src >> 8); + rsp.RDRAM[((address++)^S8) & MEMMASK] = (uint8_t)(*(src++) & 0xff); + + --count; + } +} + +static uint32_t rdram_read_u32(uint32_t address) +{ + uint32_t r = rsp.RDRAM[((address++) ^ S8) & MEMMASK]; r <<= 8; + r |= rsp.RDRAM[((address++) ^ S8) & MEMMASK]; r <<= 8; + r |= rsp.RDRAM[((address++) ^ S8) & MEMMASK]; r <<= 8; + r |= rsp.RDRAM[((address++) ^ S8) & MEMMASK]; + + return r; +} + +static void rdram_write_many_u32(const uint32_t *src, uint32_t address, unsigned int count) +{ + while (count != 0) + { + rsp.RDRAM[((address++)^S8) & MEMMASK] = (uint8_t)(*src >> 24); + rsp.RDRAM[((address++)^S8) & MEMMASK] = (uint8_t)(*src >> 16); + rsp.RDRAM[((address++)^S8) & MEMMASK] = (uint8_t)(*src >> 8); + rsp.RDRAM[((address++)^S8) & MEMMASK] = (uint8_t)(*(src++) & 0xff); + + --count; + } +} + diff --git a/source/mupen64plus-rsp-hle/src/jpeg.h b/source/mupen64plus-rsp-hle/src/jpeg.h new file mode 100644 index 0000000..b7deaf6 --- /dev/null +++ b/source/mupen64plus-rsp-hle/src/jpeg.h @@ -0,0 +1,30 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * Mupen64plus-rsp-hle - jpeg.h * + * Mupen64Plus homepage: http://code.google.com/p/mupen64plus/ * + * Copyright (C) 2002 Hacktarux * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +#ifndef JPEG_H +#define JPEG_H + +void jpeg_decode_PS0(); +void jpeg_decode_PS(); +void jpeg_decode_OB(); + +#endif + diff --git a/source/mupen64plus-rsp-hle/src/main.c b/source/mupen64plus-rsp-hle/src/main.c new file mode 100644 index 0000000..ff6525a --- /dev/null +++ b/source/mupen64plus-rsp-hle/src/main.c @@ -0,0 +1,476 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * Mupen64plus-rsp-hle - main.c * + * Mupen64Plus homepage: http://code.google.com/p/mupen64plus/ * + * Copyright (C) 2012 Bobby Smiles * + * Copyright (C) 2009 Richard Goedeken * + * Copyright (C) 2002 Hacktarux * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +#include +#include +#include + +#define M64P_PLUGIN_PROTOTYPES 1 +#include "m64p_types.h" +#include "m64p_common.h" +#include "m64p_plugin.h" +#include "hle.h" +#include "alist.h" +#include "cicx105.h" +#include "jpeg.h" + +#define min(a,b) (((a) < (b)) ? (a) : (b)) + +/* some rsp status flags */ +#define RSP_STATUS_HALT 0x1 +#define RSP_STATUS_BROKE 0x2 +#define RSP_STATUS_INTR_ON_BREAK 0x40 +#define RSP_STATUS_TASKDONE 0x200 + +/* some rdp status flags */ +#define DP_STATUS_FREEZE 0x2 + +/* some mips interface interrupt flags */ +#define MI_INTR_SP 0x1 + + +/* helper functions prototypes */ +static unsigned int sum_bytes(const unsigned char *bytes, unsigned int size); +static void dump_binary(const char * const filename, const unsigned char * const bytes, + unsigned int size); +static void dump_task(const char * const filename, const OSTask_t * const task); + +static void handle_unknown_task(unsigned int sum); +static void handle_unknown_non_task(unsigned int sum); + +/* global variables */ +RSP_INFO rsp; + +/* local variables */ +static const int FORWARD_AUDIO = 0, FORWARD_GFX = 1; +static void (*l_DebugCallback)(void *, int, const char *) = NULL; +static void *l_DebugCallContext = NULL; +static int l_PluginInit = 0; + +/* local functions */ + + +/** + * Try to figure if the RSP was launched using osSpTask* functions + * and not run directly (in which case DMEM[0xfc0-0xfff] is meaningless). + * + * Previously, the ucode_size field was used to determine this, + * but it is not robust enough (hi Pokemon Stadium !) because games could write anything + * in this field : most ucode_boot discard the value and just use 0xf7f anyway. + * + * Using ucode_boot_size should be more robust in this regard. + **/ +static int is_task() +{ + return (get_task()->ucode_boot_size <= 0x1000); +} + +static void rsp_break(unsigned int setbits) +{ + *rsp.SP_STATUS_REG |= setbits | RSP_STATUS_BROKE | RSP_STATUS_HALT; + + if ((*rsp.SP_STATUS_REG & RSP_STATUS_INTR_ON_BREAK)) + { + *rsp.MI_INTR_REG |= MI_INTR_SP; + rsp.CheckInterrupts(); + } +} + +static void forward_gfx_task() +{ + if (rsp.ProcessDlistList != NULL) + { + rsp.ProcessDlistList(); + *rsp.DPC_STATUS_REG &= ~DP_STATUS_FREEZE; + } +} + +static void forward_audio_task() +{ + if (rsp.ProcessAlistList != NULL) + { + rsp.ProcessAlistList(); + } +} + +static void show_cfb() +{ + if (rsp.ShowCFB != NULL) + { + rsp.ShowCFB(); + } +} + +static int try_fast_audio_dispatching() +{ + /* identify audio ucode by using the content of ucode_data */ + const OSTask_t * const task = get_task(); + const unsigned char * const udata_ptr = rsp.RDRAM + task->ucode_data; + + if (*(unsigned int*)(udata_ptr + 0) == 0x00000001) + { + if (*(unsigned int*)(udata_ptr + 0x30) == 0xf0000f00) + { + /** + * Many games including: + * Super Mario 64, Diddy Kong Racing, BlastCorp, GoldenEye, ... (most common) + **/ + alist_process_ABI1(); return 1; + } + else + { + /** + * Mario Kart / Wave Race, + * LylatWars, + * FZeroX, + * Yoshi Story, + * 1080 Snowboarding, + * Zelda Ocarina of Time, + * Zelda Majoras Mask / Pokemon Stadium 2, + * Animal Crossing + * + * FIXME: in fact, all these games do not share the same ABI. + * That's the reason of the workaround in ucode2.cpp with isZeldaABI and isMKABI + **/ + alist_process_ABI2(); return 1; + } + } + else + { + if (*(unsigned int*)(udata_ptr + 0x10) == 0x00000001) + { + /** + * Musyx ucode found in following games: + * RogueSquadron, ResidentEvil2, SnowCrossPolaris, TheWorldIsNotEnough, + * RugratsInParis, NBAShowTime, HydroThunder, Tarzan, + * GauntletLegend, Rush2049, IndianaJones, BattleForNaboo + * TODO: implement ucode + **/ + DebugMessage(M64MSG_WARNING, "MusyX ucode not implemented."); + /* return 1; */ + } + else + { + /** + * Many games including: + * Pokemon Stadium, Banjo Kazooie, Donkey Kong, Banjo Tooie, Jet Force Gemini, + * Mickey SpeedWay USA, Perfect Dark, Conker Bad Fur Day ... + **/ + alist_process_ABI3(); return 1; + } + } + + return 0; +} + +static int try_fast_task_dispatching() +{ + /* identify task ucode by its type */ + const OSTask_t * const task = get_task(); + + switch (task->type) + { + case 1: if (FORWARD_GFX) { forward_gfx_task(); return 1; } break; + + case 2: + if (FORWARD_AUDIO) { forward_audio_task(); return 1; } + else if (try_fast_audio_dispatching()) { return 1; } + break; + + case 7: show_cfb(); return 1; + } + + return 0; +} + +static void normal_task_dispatching() +{ + const OSTask_t * const task = get_task(); + const unsigned int sum = + sum_bytes(rsp.RDRAM + task->ucode, min(task->ucode_size, 0xf80) >> 1); + + switch (sum) + { + /* StoreVe12: found in Zelda Ocarina of Time [misleading task->type == 4] */ + case 0x278: /* Nothing to emulate */ return; + + /* GFX: Twintris [misleading task->type == 0] */ + case 0x212ee: + if (FORWARD_GFX) { forward_gfx_task(); return; } + break; + + /* JPEG: found in Pokemon Stadium J */ + case 0x2c85a: jpeg_decode_PS0(); return; + + /* JPEG: found in Zelda Ocarina of Time, Pokemon Stadium 1, Pokemon Stadium 2 */ + case 0x2caa6: jpeg_decode_PS(); return; + + /* JPEG: found in Ogre Battle, Bottom of the 9th */ + case 0x130de: jpeg_decode_OB(); return; + } + + handle_unknown_task(sum); +} + +static void non_task_dispatching() +{ + const unsigned int sum = sum_bytes(rsp.IMEM, 0x1000 >> 1); + + switch(sum) + { + /* CIC x105 ucode (used during boot of CIC x105 games) */ + case 0x9e2: /* CIC 6105 */ + case 0x9f2: /* CIC 7105 */ + cicx105_ucode(); return; + } + + handle_unknown_non_task(sum); +} + +static void handle_unknown_task(unsigned int sum) +{ + char filename[256]; + const OSTask_t * const task = get_task(); + + DebugMessage(M64MSG_WARNING, "unknown OSTask: sum %x PC:%x", sum, *rsp.SP_PC_REG); + + sprintf(&filename[0], "task_%x.log", sum); + dump_task(filename, task); + + // dump ucode_boot + sprintf(&filename[0], "ucode_boot_%x.bin", sum); + dump_binary(filename, rsp.RDRAM + (task->ucode_boot & 0x7fffff), task->ucode_boot_size); + + // dump ucode + if (task->ucode != 0) + { + sprintf(&filename[0], "ucode_%x.bin", sum); + dump_binary(filename, rsp.RDRAM + (task->ucode & 0x7fffff), 0xf80); + } + + // dump ucode_data + if (task->ucode_data != 0) + { + sprintf(&filename[0], "ucode_data_%x.bin", sum); + dump_binary(filename, rsp.RDRAM + (task->ucode_data & 0x7fffff), task->ucode_data_size); + } + + // dump data + if (task->data_ptr != 0) + { + sprintf(&filename[0], "data_%x.bin", sum); + dump_binary(filename, rsp.RDRAM + (task->data_ptr & 0x7fffff), task->data_size); + } +} + +static void handle_unknown_non_task(unsigned int sum) +{ + char filename[256]; + + DebugMessage(M64MSG_WARNING, "unknown RSP code: sum: %x PC:%x", sum, *rsp.SP_PC_REG); + + // dump IMEM & DMEM for further analysis + sprintf(&filename[0], "imem_%x.bin", sum); + dump_binary(filename, rsp.IMEM, 0x1000); + + sprintf(&filename[0], "dmem_%x.bin", sum); + dump_binary(filename, rsp.DMEM, 0x1000); +} + + +/* Global functions */ +void DebugMessage(int level, const char *message, ...) +{ + char msgbuf[1024]; + va_list args; + + if (l_DebugCallback == NULL) + return; + + va_start(args, message); + vsprintf(msgbuf, message, args); + + (*l_DebugCallback)(l_DebugCallContext, level, msgbuf); + + va_end(args); +} + +/* DLL-exported functions */ +EXPORT m64p_error CALL PluginStartup(m64p_dynlib_handle CoreLibHandle, void *Context, + void (*DebugCallback)(void *, int, const char *)) +{ + if (l_PluginInit) + return M64ERR_ALREADY_INIT; + + /* first thing is to set the callback function for debug info */ + l_DebugCallback = DebugCallback; + l_DebugCallContext = Context; + + /* this plugin doesn't use any Core library functions (ex for Configuration), so no need to keep the CoreLibHandle */ + + l_PluginInit = 1; + return M64ERR_SUCCESS; +} + +EXPORT m64p_error CALL PluginShutdown(void) +{ + if (!l_PluginInit) + return M64ERR_NOT_INIT; + + /* reset some local variable */ + l_DebugCallback = NULL; + l_DebugCallContext = NULL; + + l_PluginInit = 0; + return M64ERR_SUCCESS; +} + +EXPORT m64p_error CALL PluginGetVersion(m64p_plugin_type *PluginType, int *PluginVersion, int *APIVersion, const char **PluginNamePtr, int *Capabilities) +{ + /* set version info */ + if (PluginType != NULL) + *PluginType = M64PLUGIN_RSP; + + if (PluginVersion != NULL) + *PluginVersion = RSP_HLE_VERSION; + + if (APIVersion != NULL) + *APIVersion = RSP_PLUGIN_API_VERSION; + + if (PluginNamePtr != NULL) + *PluginNamePtr = "Hacktarux/Azimer High-Level Emulation RSP Plugin"; + + if (Capabilities != NULL) + { + *Capabilities = 0; + } + + return M64ERR_SUCCESS; +} + +EXPORT unsigned int CALL DoRspCycles(unsigned int Cycles) +{ + if (is_task()) + { + if (!try_fast_task_dispatching()) { normal_task_dispatching(); } + rsp_break(RSP_STATUS_TASKDONE); + } + else + { + non_task_dispatching(); + rsp_break(0); + } + + return Cycles; +} + +EXPORT void CALL InitiateRSP(RSP_INFO Rsp_Info, unsigned int *CycleCount) +{ + rsp = Rsp_Info; +} + +EXPORT void CALL RomClosed(void) +{ + memset(rsp.DMEM, 0, 0x1000); + memset(rsp.IMEM, 0, 0x1000); + + init_ucode2(); +} + + +/* local helper functions */ +static unsigned int sum_bytes(const unsigned char *bytes, unsigned int size) +{ + unsigned int sum = 0; + const unsigned char * const bytes_end = bytes + size; + + while (bytes != bytes_end) + sum += *bytes++; + + return sum; +} + + +static void dump_binary(const char * const filename, const unsigned char * const bytes, + unsigned int size) +{ + FILE *f; + + // if file already exists, do nothing + f = fopen(filename, "r"); + if (f == NULL) + { + // else we write bytes to the file + f= fopen(filename, "wb"); + if (f != NULL) { + if (fwrite(bytes, 1, size, f) != size) + { + DebugMessage(M64MSG_ERROR, "Writing error on %s", filename); + } + fclose(f); + } + else + { + DebugMessage(M64MSG_ERROR, "Couldn't open %s for writing !", filename); + } + } + else + { + fclose(f); + } +} + +static void dump_task(const char * const filename, const OSTask_t * const task) +{ + FILE *f; + + f = fopen(filename, "r"); + if (f == NULL) + { + f = fopen(filename, "w"); + fprintf(f, + "type = %d\n" + "flags = %d\n" + "ucode_boot = %#08x size = %#x\n" + "ucode = %#08x size = %#x\n" + "ucode_data = %#08x size = %#x\n" + "dram_stack = %#08x size = %#x\n" + "output_buff = %#08x *size = %#x\n" + "data = %#08x size = %#x\n" + "yield_data = %#08x size = %#x\n", + task->type, task->flags, + task->ucode_boot, task->ucode_boot_size, + task->ucode, task->ucode_size, + task->ucode_data, task->ucode_data_size, + task->dram_stack, task->dram_stack_size, + task->output_buff, task->output_buff_size, + task->data_ptr, task->data_size, + task->yield_data_ptr, task->yield_data_size); + fclose(f); + } + else + { + fclose(f); + } +} + diff --git a/source/mupen64plus-rsp-hle/src/rsp_api_export.ver b/source/mupen64plus-rsp-hle/src/rsp_api_export.ver new file mode 100644 index 0000000..27e8138 --- /dev/null +++ b/source/mupen64plus-rsp-hle/src/rsp_api_export.ver @@ -0,0 +1,8 @@ +{ global: +PluginStartup; +PluginShutdown; +PluginGetVersion; +DoRspCycles; +InitiateRSP; +RomClosed; +local: *; }; diff --git a/source/mupen64plus-rsp-hle/src/ucode1.cpp b/source/mupen64plus-rsp-hle/src/ucode1.cpp new file mode 100644 index 0000000..eb869cb --- /dev/null +++ b/source/mupen64plus-rsp-hle/src/ucode1.cpp @@ -0,0 +1,951 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * Mupen64plus-rsp-hle - ucode1.cpp * + * Mupen64Plus homepage: http://code.google.com/p/mupen64plus/ * + * Copyright (C) 2009 Richard Goedeken * + * Copyright (C) 2002 Hacktarux * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +# include + +extern "C" { + #include "hle.h" + #include "alist_internal.h" +} + +//#include "rsp.h" +//#define SAFE_MEMORY +/* +#ifndef SAFE_MEMORY +# define wr8 (src , address); +# define rd8 (dest, address); +# define wr16 (src, address); +# define rd16 (dest, address); +# define wr32 (src, address); +# define rd32 (dest, address); +# define wr64 (src, address); +# define rd64 (dest, address); +# define dmamem (dest, src, size) memcpy (dest, src, size); +# define clrmem (dest, size) memset (dest, 0, size); +#else + void wr8 (u8 src, void *address); + void rd8 (u8 dest, void *address); + void wr16 (u16 src, void *address); + void rd16 (u16 dest, void *address); + void wr32 (u16 src, void *address); + void rd32 (u16 dest, void *address); + void wr64 (u16 src, void *address); + void rd64 (u16 dest, void *address); + void dmamem (void *dest, void *src, int size); + void clrmem (void *dest, int size); +#endif +*/ +/******** DMEM Memory Map for ABI 1 *************** +Address/Range Description +------------- ------------------------------- +0x000..0x2BF UCodeData + 0x000-0x00F Constants - 0000 0001 0002 FFFF 0020 0800 7FFF 4000 + 0x010-0x02F Function Jump Table (16 Functions * 2 bytes each = 32) 0x20 + 0x030-0x03F Constants - F000 0F00 00F0 000F 0001 0010 0100 1000 + 0x040-0x03F Used by the Envelope Mixer (But what for?) + 0x070-0x07F Used by the Envelope Mixer (But what for?) +0x2C0..0x31F +0x320..0x35F Segments +0x360 Audio In Buffer (Location) +0x362 Audio Out Buffer (Location) +0x364 Audio Buffer Size (Location) +0x366 Initial Volume for Left Channel +0x368 Initial Volume for Right Channel +0x36A Auxillary Buffer #1 (Location) +0x36C Auxillary Buffer #2 (Location) +0x36E Auxillary Buffer #3 (Location) +0x370 Loop Value (shared location) +0x370 Target Volume (Left) +0x372 Ramp?? (Left) +0x374 Rate?? (Left) +0x376 Target Volume (Right) +0x378 Ramp?? (Right) +0x37A Rate?? (Right) +0x37C Dry?? +0x37E Wet?? +0x380..0x4BF Alist data +0x4C0..0x4FF ADPCM CodeBook +0x500..0x5BF +0x5C0..0xF7F Buffers... +0xF80..0xFFF +***************************************************/ +#ifdef USE_EXPANSION + #define MEMMASK 0x7FFFFF +#else + #define MEMMASK 0x3FFFFF +#endif + +static void SPNOOP (u32 inst1, u32 inst2) { + //MessageBox (NULL, "Unknown Audio Command in ABI 1", "Audio HLE Error", MB_OK); +} + +u32 SEGMENTS[0x10]; // 0x0320 +// T8 = 0x360 +u16 AudioInBuffer; // 0x0000(T8) +u16 AudioOutBuffer; // 0x0002(T8) +u16 AudioCount; // 0x0004(T8) +s16 Vol_Left; // 0x0006(T8) +s16 Vol_Right; // 0x0008(T8) +u16 AudioAuxA; // 0x000A(T8) +u16 AudioAuxC; // 0x000C(T8) +u16 AudioAuxE; // 0x000E(T8) +u32 loopval; // 0x0010(T8) // Value set by A_SETLOOP : Possible conflict with SETVOLUME??? +s16 VolTrg_Left; // 0x0010(T8) +s32 VolRamp_Left; // m_LeftVolTarget +//u16 VolRate_Left; // m_LeftVolRate +s16 VolTrg_Right; // m_RightVol +s32 VolRamp_Right; // m_RightVolTarget +//u16 VolRate_Right; // m_RightVolRate +s16 Env_Dry; // 0x001C(T8) +s16 Env_Wet; // 0x001E(T8) + +u8 BufferSpace[0x10000]; + +short hleMixerWorkArea[256]; +u16 adpcmtable[0x88]; + +extern const u16 ResampleLUT [0x200] = { + 0x0C39, 0x66AD, 0x0D46, 0xFFDF, 0x0B39, 0x6696, 0x0E5F, 0xFFD8, + 0x0A44, 0x6669, 0x0F83, 0xFFD0, 0x095A, 0x6626, 0x10B4, 0xFFC8, + 0x087D, 0x65CD, 0x11F0, 0xFFBF, 0x07AB, 0x655E, 0x1338, 0xFFB6, + 0x06E4, 0x64D9, 0x148C, 0xFFAC, 0x0628, 0x643F, 0x15EB, 0xFFA1, + 0x0577, 0x638F, 0x1756, 0xFF96, 0x04D1, 0x62CB, 0x18CB, 0xFF8A, + 0x0435, 0x61F3, 0x1A4C, 0xFF7E, 0x03A4, 0x6106, 0x1BD7, 0xFF71, + 0x031C, 0x6007, 0x1D6C, 0xFF64, 0x029F, 0x5EF5, 0x1F0B, 0xFF56, + 0x022A, 0x5DD0, 0x20B3, 0xFF48, 0x01BE, 0x5C9A, 0x2264, 0xFF3A, + 0x015B, 0x5B53, 0x241E, 0xFF2C, 0x0101, 0x59FC, 0x25E0, 0xFF1E, + 0x00AE, 0x5896, 0x27A9, 0xFF10, 0x0063, 0x5720, 0x297A, 0xFF02, + 0x001F, 0x559D, 0x2B50, 0xFEF4, 0xFFE2, 0x540D, 0x2D2C, 0xFEE8, + 0xFFAC, 0x5270, 0x2F0D, 0xFEDB, 0xFF7C, 0x50C7, 0x30F3, 0xFED0, + 0xFF53, 0x4F14, 0x32DC, 0xFEC6, 0xFF2E, 0x4D57, 0x34C8, 0xFEBD, + 0xFF0F, 0x4B91, 0x36B6, 0xFEB6, 0xFEF5, 0x49C2, 0x38A5, 0xFEB0, + 0xFEDF, 0x47ED, 0x3A95, 0xFEAC, 0xFECE, 0x4611, 0x3C85, 0xFEAB, + 0xFEC0, 0x4430, 0x3E74, 0xFEAC, 0xFEB6, 0x424A, 0x4060, 0xFEAF, + 0xFEAF, 0x4060, 0x424A, 0xFEB6, 0xFEAC, 0x3E74, 0x4430, 0xFEC0, + 0xFEAB, 0x3C85, 0x4611, 0xFECE, 0xFEAC, 0x3A95, 0x47ED, 0xFEDF, + 0xFEB0, 0x38A5, 0x49C2, 0xFEF5, 0xFEB6, 0x36B6, 0x4B91, 0xFF0F, + 0xFEBD, 0x34C8, 0x4D57, 0xFF2E, 0xFEC6, 0x32DC, 0x4F14, 0xFF53, + 0xFED0, 0x30F3, 0x50C7, 0xFF7C, 0xFEDB, 0x2F0D, 0x5270, 0xFFAC, + 0xFEE8, 0x2D2C, 0x540D, 0xFFE2, 0xFEF4, 0x2B50, 0x559D, 0x001F, + 0xFF02, 0x297A, 0x5720, 0x0063, 0xFF10, 0x27A9, 0x5896, 0x00AE, + 0xFF1E, 0x25E0, 0x59FC, 0x0101, 0xFF2C, 0x241E, 0x5B53, 0x015B, + 0xFF3A, 0x2264, 0x5C9A, 0x01BE, 0xFF48, 0x20B3, 0x5DD0, 0x022A, + 0xFF56, 0x1F0B, 0x5EF5, 0x029F, 0xFF64, 0x1D6C, 0x6007, 0x031C, + 0xFF71, 0x1BD7, 0x6106, 0x03A4, 0xFF7E, 0x1A4C, 0x61F3, 0x0435, + 0xFF8A, 0x18CB, 0x62CB, 0x04D1, 0xFF96, 0x1756, 0x638F, 0x0577, + 0xFFA1, 0x15EB, 0x643F, 0x0628, 0xFFAC, 0x148C, 0x64D9, 0x06E4, + 0xFFB6, 0x1338, 0x655E, 0x07AB, 0xFFBF, 0x11F0, 0x65CD, 0x087D, + 0xFFC8, 0x10B4, 0x6626, 0x095A, 0xFFD0, 0x0F83, 0x6669, 0x0A44, + 0xFFD8, 0x0E5F, 0x6696, 0x0B39, 0xFFDF, 0x0D46, 0x66AD, 0x0C39 +}; + +static void CLEARBUFF (u32 inst1, u32 inst2) { + u32 addr = (u32)(inst1 & 0xffff); + u32 count = (u32)(inst2 & 0xffff); + addr &= 0xFFFC; + memset(BufferSpace+addr, 0, (count+3)&0xFFFC); +} + +//FILE *dfile = fopen ("d:\\envmix.txt", "wt"); + +static void ENVMIXER (u32 inst1, u32 inst2) { + //static int envmixcnt = 0; + u8 flags = (u8)((inst1 >> 16) & 0xff); + u32 addy = (inst2 & 0xFFFFFF);// + SEGMENTS[(inst2>>24)&0xf]; + //static +// ********* Make sure these conditions are met... *********** + /*if ((AudioInBuffer | AudioOutBuffer | AudioAuxA | AudioAuxC | AudioAuxE | AudioCount) & 0x3) { + MessageBox (NULL, "Unaligned EnvMixer... please report this to Azimer with the following information: RomTitle, Place in the rom it occurred, and any save state just before the error", "AudioHLE Error", MB_OK); + }*/ +// ------------------------------------------------------------ + short *inp=(short *)(BufferSpace+AudioInBuffer); + short *out=(short *)(BufferSpace+AudioOutBuffer); + short *aux1=(short *)(BufferSpace+AudioAuxA); + short *aux2=(short *)(BufferSpace+AudioAuxC); + short *aux3=(short *)(BufferSpace+AudioAuxE); + s32 MainR; + s32 MainL; + s32 AuxR; + s32 AuxL; + int i1,o1,a1,a2=0,a3=0; + unsigned short AuxIncRate=1; + short zero[8]; + memset(zero,0,16); + s32 LVol, RVol; + s32 LAcc, RAcc; + s32 LTrg, RTrg; + s16 Wet, Dry; + u32 ptr = 0; + s32 RRamp, LRamp; + s32 LAdderStart, RAdderStart, LAdderEnd, RAdderEnd; + s32 oMainR, oMainL, oAuxR, oAuxL; + + //envmixcnt++; + + //fprintf (dfile, "\n----------------------------------------------------\n"); + if (flags & A_INIT) { + LVol = ((Vol_Left * (s32)VolRamp_Left)); + RVol = ((Vol_Right * (s32)VolRamp_Right)); + Wet = (s16)Env_Wet; Dry = (s16)Env_Dry; // Save Wet/Dry values + LTrg = (VolTrg_Left << 16); RTrg = (VolTrg_Right << 16); // Save Current Left/Right Targets + LAdderStart = Vol_Left << 16; + RAdderStart = Vol_Right << 16; + LAdderEnd = LVol; + RAdderEnd = RVol; + RRamp = VolRamp_Right; + LRamp = VolRamp_Left; + } else { + // Load LVol, RVol, LAcc, and RAcc (all 32bit) + // Load Wet, Dry, LTrg, RTrg + memcpy((u8 *)hleMixerWorkArea, (rsp.RDRAM+addy), 80); + Wet = *(s16 *)(hleMixerWorkArea + 0); // 0-1 + Dry = *(s16 *)(hleMixerWorkArea + 2); // 2-3 + LTrg = *(s32 *)(hleMixerWorkArea + 4); // 4-5 + RTrg = *(s32 *)(hleMixerWorkArea + 6); // 6-7 + LRamp= *(s32 *)(hleMixerWorkArea + 8); // 8-9 (hleMixerWorkArea is a 16bit pointer) + RRamp= *(s32 *)(hleMixerWorkArea + 10); // 10-11 + LAdderEnd = *(s32 *)(hleMixerWorkArea + 12); // 12-13 + RAdderEnd = *(s32 *)(hleMixerWorkArea + 14); // 14-15 + LAdderStart = *(s32 *)(hleMixerWorkArea + 16); // 12-13 + RAdderStart = *(s32 *)(hleMixerWorkArea + 18); // 14-15 + } + + if(!(flags&A_AUX)) { + AuxIncRate=0; + aux2=aux3=zero; + } + + oMainL = (Dry * (LTrg>>16) + 0x4000) >> 15; + oAuxL = (Wet * (LTrg>>16) + 0x4000) >> 15; + oMainR = (Dry * (RTrg>>16) + 0x4000) >> 15; + oAuxR = (Wet * (RTrg>>16) + 0x4000) >> 15; + + for (int y = 0; y < AudioCount; y += 0x10) { + + if (LAdderStart != LTrg) { + LAcc = LAdderStart; + LVol = (LAdderEnd - LAdderStart) >> 3; + LAdderEnd = (s32) (((s64)LAdderEnd * (s64)LRamp) >> 16); + LAdderStart = (s32) (((s64)LAcc * (s64)LRamp) >> 16); + } else { + LAcc = LTrg; + LVol = 0; + } + + if (RAdderStart != RTrg) { + RAcc = RAdderStart; + RVol = (RAdderEnd - RAdderStart) >> 3; + RAdderEnd = (s32) (((s64)RAdderEnd * (s64)RRamp) >> 16); + RAdderStart = (s32) (((s64)RAcc * (s64)RRamp) >> 16); + } else { + RAcc = RTrg; + RVol = 0; + } + + for (int x = 0; x < 8; x++) { + i1=(int)inp[ptr^S]; + o1=(int)out[ptr^S]; + a1=(int)aux1[ptr^S]; + if (AuxIncRate) { + a2=(int)aux2[ptr^S]; + a3=(int)aux3[ptr^S]; + } + // TODO: here... + //LAcc = LTrg; + //RAcc = RTrg; + + LAcc += LVol; + RAcc += RVol; + + if (LVol <= 0) { // Decrementing + if (LAcc < LTrg) { + LAcc = LTrg; + LAdderStart = LTrg; + MainL = oMainL; + AuxL = oAuxL; + } else { + MainL = (Dry * ((s32)LAcc>>16) + 0x4000) >> 15; + AuxL = (Wet * ((s32)LAcc>>16) + 0x4000) >> 15; + } + } else { + if (LAcc > LTrg) { + LAcc = LTrg; + LAdderStart = LTrg; + MainL = oMainL; + AuxL = oAuxL; + } else { + MainL = (Dry * ((s32)LAcc>>16) + 0x4000) >> 15; + AuxL = (Wet * ((s32)LAcc>>16) + 0x4000) >> 15; + } + } + + if (RVol <= 0) { // Decrementing + if (RAcc < RTrg) { + RAcc = RTrg; + RAdderStart = RTrg; + MainR = oMainR; + AuxR = oAuxR; + } else { + MainR = (Dry * ((s32)RAcc>>16) + 0x4000) >> 15; + AuxR = (Wet * ((s32)RAcc>>16) + 0x4000) >> 15; + } + } else { + if (RAcc > RTrg) { + RAcc = RTrg; + RAdderStart = RTrg; + MainR = oMainR; + AuxR = oAuxR; + } else { + MainR = (Dry * ((s32)RAcc>>16) + 0x4000) >> 15; + AuxR = (Wet * ((s32)RAcc>>16) + 0x4000) >> 15; + } + } + + //fprintf (dfile, "%04X ", (LAcc>>16)); + + /*MainL = (((s64)Dry*2 * (s64)(LAcc>>16)) + 0x8000) >> 16; + MainR = (((s64)Dry*2 * (s64)(RAcc>>16)) + 0x8000) >> 16; + AuxL = (((s64)Wet*2 * (s64)(LAcc>>16)) + 0x8000) >> 16; + AuxR = (((s64)Wet*2 * (s64)(RAcc>>16)) + 0x8000) >> 16;*/ +/* + if (MainL>32767) MainL = 32767; + else if (MainL<-32768) MainL = -32768; + if (MainR>32767) MainR = 32767; + else if (MainR<-32768) MainR = -32768; + if (AuxL>32767) AuxL = 32767; + else if (AuxL<-32768) AuxR = -32768; + if (AuxR>32767) AuxR = 32767; + else if (AuxR<-32768) AuxR = -32768;*/ + /* + MainR = (Dry * RTrg + 0x10000) >> 15; + MainL = (Dry * LTrg + 0x10000) >> 15; + AuxR = (Wet * RTrg + 0x8000) >> 16; + AuxL = (Wet * LTrg + 0x8000) >> 16;*/ + + o1+=(/*(o1*0x7fff)+*/(i1*MainR)+0x4000)>>15; + a1+=(/*(a1*0x7fff)+*/(i1*MainL)+0x4000)>>15; + +/* o1=((s64)(((s64)o1*0xfffe)+((s64)i1*MainR*2)+0x8000)>>16); + + a1=((s64)(((s64)a1*0xfffe)+((s64)i1*MainL*2)+0x8000)>>16);*/ + + if(o1>32767) o1=32767; + else if(o1<-32768) o1=-32768; + + if(a1>32767) a1=32767; + else if(a1<-32768) a1=-32768; + + out[ptr^S]=o1; + aux1[ptr^S]=a1; + if (AuxIncRate) { + //a2=((s64)(((s64)a2*0xfffe)+((s64)i1*AuxR*2)+0x8000)>>16); + + //a3=((s64)(((s64)a3*0xfffe)+((s64)i1*AuxL*2)+0x8000)>>16); + a2+=(/*(a2*0x7fff)+*/(i1*AuxR)+0x4000)>>15; + a3+=(/*(a3*0x7fff)+*/(i1*AuxL)+0x4000)>>15; + + if(a2>32767) a2=32767; + else if(a2<-32768) a2=-32768; + + if(a3>32767) a3=32767; + else if(a3<-32768) a3=-32768; + + aux2[ptr^S]=a2; + aux3[ptr^S]=a3; + } + ptr++; + } + } + + /*LAcc = LAdderEnd; + RAcc = RAdderEnd;*/ + + *(s16 *)(hleMixerWorkArea + 0) = Wet; // 0-1 + *(s16 *)(hleMixerWorkArea + 2) = Dry; // 2-3 + *(s32 *)(hleMixerWorkArea + 4) = LTrg; // 4-5 + *(s32 *)(hleMixerWorkArea + 6) = RTrg; // 6-7 + *(s32 *)(hleMixerWorkArea + 8) = LRamp; // 8-9 (hleMixerWorkArea is a 16bit pointer) + *(s32 *)(hleMixerWorkArea + 10) = RRamp; // 10-11 + *(s32 *)(hleMixerWorkArea + 12) = LAdderEnd; // 12-13 + *(s32 *)(hleMixerWorkArea + 14) = RAdderEnd; // 14-15 + *(s32 *)(hleMixerWorkArea + 16) = LAdderStart; // 12-13 + *(s32 *)(hleMixerWorkArea + 18) = RAdderStart; // 14-15 + memcpy(rsp.RDRAM+addy, (u8 *)hleMixerWorkArea,80); +} + +static void RESAMPLE (u32 inst1, u32 inst2) { + unsigned char Flags=(u8)((inst1>>16)&0xff); + unsigned int Pitch=((inst1&0xffff))<<1; + u32 addy = (inst2 & 0xffffff);// + SEGMENTS[(inst2>>24)&0xf]; + unsigned int Accum=0; + unsigned int location; + s16 *lut/*, *lut2*/; + short *dst; + s16 *src; + dst=(short *)(BufferSpace); + src=(s16 *)(BufferSpace); + u32 srcPtr=(AudioInBuffer/2); + u32 dstPtr=(AudioOutBuffer/2); + s32 temp; + s32 accum; +/* + if (addy > (1024*1024*8)) + addy = (inst2 & 0xffffff); +*/ + srcPtr -= 4; + + if ((Flags & 0x1) == 0) { + //memcpy (src+srcPtr, rsp.RDRAM+addy, 0x8); + for (int x=0; x < 4; x++) + src[(srcPtr+x)^S] = ((u16 *)rsp.RDRAM)[((addy/2)+x)^S]; + Accum = *(u16 *)(rsp.RDRAM+addy+10); + } else { + for (int x=0; x < 4; x++) + src[(srcPtr+x)^S] = 0;//*(u16 *)(rsp.RDRAM+((addy+x)^2)); + } + + for(int i=0;i < ((AudioCount+0xf)&0xFFF0)/2;i++) { + //location = (((Accum * 0x40) >> 0x10) * 8); + // location is the fractional position between two samples + location = (Accum >> 0xa) * 4; + lut = (s16*)ResampleLUT + location; + + // mov eax, dword ptr [src+srcPtr]; + // movsx edx, word ptr [lut]; + // shl edx, 1 + // imul edx + // test eax, 08000h + // setz ecx + // shl ecx, 16 + // xor eax, 08000h + // add eax, ecx + // and edx, 0f000h + + // imul + temp = ((s32)*(s16*)(src+((srcPtr+0)^S))*((s32)((s16)lut[0]))); + accum = (s32)(temp >> 15); + + temp = ((s32)*(s16*)(src+((srcPtr+1)^S))*((s32)((s16)lut[1]))); + accum += (s32)(temp >> 15); + + temp = ((s32)*(s16*)(src+((srcPtr+2)^S))*((s32)((s16)lut[2]))); + accum += (s32)(temp >> 15); + + temp = ((s32)*(s16*)(src+((srcPtr+3)^S))*((s32)((s16)lut[3]))); + accum += (s32)(temp >> 15); + + if (accum > 32767) accum = 32767; + if (accum < -32768) accum = -32768; + + dst[dstPtr^S] = (accum); + dstPtr++; + Accum += Pitch; + srcPtr += (Accum>>16); + Accum&=0xffff; + } + for (int x=0; x < 4; x++) + ((u16 *)rsp.RDRAM)[((addy/2)+x)^S] = src[(srcPtr+x)^S]; + //memcpy (RSWORK, src+srcPtr, 0x8); + *(u16 *)(rsp.RDRAM+addy+10) = Accum; +} + +static void SETVOL (u32 inst1, u32 inst2) { +// Might be better to unpack these depending on the flags... + u8 flags = (u8)((inst1 >> 16) & 0xff); + u16 vol = (s16)(inst1 & 0xffff); + //u16 voltarg =(u16)((inst2 >> 16)&0xffff); + u16 volrate = (u16)((inst2 & 0xffff)); + + if (flags & A_AUX) { + Env_Dry = (s16)vol; // m_MainVol + Env_Wet = (s16)volrate; // m_AuxVol + return; + } + + if(flags & A_VOL) { // Set the Source(start) Volumes + if(flags & A_LEFT) { + Vol_Left = (s16)vol; // m_LeftVolume + } else { // A_RIGHT + Vol_Right = (s16)vol; // m_RightVolume + } + return; + } + +//0x370 Loop Value (shared location) +//0x370 Target Volume (Left) +//u16 VolRamp_Left; // 0x0012(T8) + if(flags & A_LEFT) { // Set the Ramping values Target, Ramp + //loopval = (((u32)vol << 0x10) | (u32)voltarg); + VolTrg_Left = (s16)inst1; // m_LeftVol + //VolRamp_Left = (s32)inst2; + VolRamp_Left = (s32)inst2;//(u16)(inst2) | (s32)(s16)(inst2 << 0x10); + //fprintf (dfile, "Ramp Left: %f\n", (float)VolRamp_Left/65536.0); + //fprintf (dfile, "Ramp Left: %08X\n", inst2); + //VolRamp_Left = (s16)voltarg; // m_LeftVolTarget + //VolRate_Left = (s16)volrate; // m_LeftVolRate + } else { // A_RIGHT + VolTrg_Right = (s16)inst1; // m_RightVol + //VolRamp_Right = (s32)inst2; + VolRamp_Right = (s32)inst2;//(u16)(inst2 >> 0x10) | (s32)(s16)(inst2 << 0x10); + //fprintf (dfile, "Ramp Right: %f\n", (float)VolRamp_Right/65536.0); + //fprintf (dfile, "Ramp Right: %08X\n", inst2); + //VolRamp_Right = (s16)voltarg; // m_RightVolTarget + //VolRate_Right = (s16)volrate; // m_RightVolRate + } +} + +static void UNKNOWN (u32 inst1, u32 inst2) {} + +static void SETLOOP (u32 inst1, u32 inst2) { + loopval = (inst2 & 0xffffff);// + SEGMENTS[(inst2>>24)&0xf]; + //VolTrg_Left = (s16)(loopval>>16); // m_LeftVol + //VolRamp_Left = (s16)(loopval); // m_LeftVolTarget +} + +static void ADPCM (u32 inst1, u32 inst2) { // Work in progress! :) + unsigned char Flags=(u8)(inst1>>16)&0xff; + //unsigned short Gain=(u16)(inst1&0xffff); + unsigned int Address=(inst2 & 0xffffff);// + SEGMENTS[(inst2>>24)&0xf]; + unsigned short inPtr=0; + //short *out=(s16 *)(testbuff+(AudioOutBuffer>>2)); + short *out=(short *)(BufferSpace+AudioOutBuffer); + //unsigned char *in=(unsigned char *)(BufferSpace+AudioInBuffer); + short count=(short)AudioCount; + unsigned char icode; + unsigned char code; + int vscale; + unsigned short index; + unsigned short j; + int a[8]; + short *book1,*book2; +/* + if (Address > (1024*1024*8)) + Address = (inst2 & 0xffffff); +*/ + memset(out,0,32); + + if(!(Flags&0x1)) + { + if(Flags&0x2) { + memcpy(out,&rsp.RDRAM[loopval&MEMMASK],32); + } else { + memcpy(out,&rsp.RDRAM[Address],32); + } + } + + int l1=out[14^S]; + int l2=out[15^S]; + int inp1[8]; + int inp2[8]; + out+=16; + while(count>0) + { + // the first interation through, these values are + // either 0 in the case of A_INIT, from a special + // area of memory in the case of A_LOOP or just + // the values we calculated the last time + + code=BufferSpace[(AudioInBuffer+inPtr)^S8]; + index=code&0xf; + index<<=4; // index into the adpcm code table + book1=(short *)&adpcmtable[index]; + book2=book1+8; + code>>=4; // upper nibble is scale + vscale=(0x8000>>((12-code)-1)); // very strange. 0x8000 would be .5 in 16:16 format + // so this appears to be a fractional scale based + // on the 12 based inverse of the scale value. note + // that this could be negative, in which case we do + // not use the calculated vscale value... see the + // if(code>12) check below + + inPtr++; // coded adpcm data lies next + j=0; + while(j<8) // loop of 8, for 8 coded nibbles from 4 bytes + // which yields 8 short pcm values + { + icode=BufferSpace[(AudioInBuffer+inPtr)^S8]; + inPtr++; + + inp1[j]=(s16)((icode&0xf0)<<8); // this will in effect be signed + if(code<12) + inp1[j]=((int)((int)inp1[j]*(int)vscale)>>16); + /*else + int catchme=1;*/ + j++; + + inp1[j]=(s16)((icode&0xf)<<12); + if(code<12) + inp1[j]=((int)((int)inp1[j]*(int)vscale)>>16); + /*else + int catchme=1;*/ + j++; + } + j=0; + while(j<8) + { + icode=BufferSpace[(AudioInBuffer+inPtr)^S8]; + inPtr++; + + inp2[j]=(short)((icode&0xf0)<<8); // this will in effect be signed + if(code<12) + inp2[j]=((int)((int)inp2[j]*(int)vscale)>>16); + /*else + int catchme=1;*/ + j++; + + inp2[j]=(short)((icode&0xf)<<12); + if(code<12) + inp2[j]=((int)((int)inp2[j]*(int)vscale)>>16); + /*else + int catchme=1;*/ + j++; + } + + a[0]= (int)book1[0]*(int)l1; + a[0]+=(int)book2[0]*(int)l2; + a[0]+=(int)inp1[0]*(int)2048; + + a[1] =(int)book1[1]*(int)l1; + a[1]+=(int)book2[1]*(int)l2; + a[1]+=(int)book2[0]*inp1[0]; + a[1]+=(int)inp1[1]*(int)2048; + + a[2] =(int)book1[2]*(int)l1; + a[2]+=(int)book2[2]*(int)l2; + a[2]+=(int)book2[1]*inp1[0]; + a[2]+=(int)book2[0]*inp1[1]; + a[2]+=(int)inp1[2]*(int)2048; + + a[3] =(int)book1[3]*(int)l1; + a[3]+=(int)book2[3]*(int)l2; + a[3]+=(int)book2[2]*inp1[0]; + a[3]+=(int)book2[1]*inp1[1]; + a[3]+=(int)book2[0]*inp1[2]; + a[3]+=(int)inp1[3]*(int)2048; + + a[4] =(int)book1[4]*(int)l1; + a[4]+=(int)book2[4]*(int)l2; + a[4]+=(int)book2[3]*inp1[0]; + a[4]+=(int)book2[2]*inp1[1]; + a[4]+=(int)book2[1]*inp1[2]; + a[4]+=(int)book2[0]*inp1[3]; + a[4]+=(int)inp1[4]*(int)2048; + + a[5] =(int)book1[5]*(int)l1; + a[5]+=(int)book2[5]*(int)l2; + a[5]+=(int)book2[4]*inp1[0]; + a[5]+=(int)book2[3]*inp1[1]; + a[5]+=(int)book2[2]*inp1[2]; + a[5]+=(int)book2[1]*inp1[3]; + a[5]+=(int)book2[0]*inp1[4]; + a[5]+=(int)inp1[5]*(int)2048; + + a[6] =(int)book1[6]*(int)l1; + a[6]+=(int)book2[6]*(int)l2; + a[6]+=(int)book2[5]*inp1[0]; + a[6]+=(int)book2[4]*inp1[1]; + a[6]+=(int)book2[3]*inp1[2]; + a[6]+=(int)book2[2]*inp1[3]; + a[6]+=(int)book2[1]*inp1[4]; + a[6]+=(int)book2[0]*inp1[5]; + a[6]+=(int)inp1[6]*(int)2048; + + a[7] =(int)book1[7]*(int)l1; + a[7]+=(int)book2[7]*(int)l2; + a[7]+=(int)book2[6]*inp1[0]; + a[7]+=(int)book2[5]*inp1[1]; + a[7]+=(int)book2[4]*inp1[2]; + a[7]+=(int)book2[3]*inp1[3]; + a[7]+=(int)book2[2]*inp1[4]; + a[7]+=(int)book2[1]*inp1[5]; + a[7]+=(int)book2[0]*inp1[6]; + a[7]+=(int)inp1[7]*(int)2048; + + for(j=0;j<8;j++) + { + a[j^S]>>=11; + if(a[j^S]>32767) a[j^S]=32767; + else if(a[j^S]<-32768) a[j^S]=-32768; + *(out++)=a[j^S]; + } + l1=a[6]; + l2=a[7]; + + a[0]= (int)book1[0]*(int)l1; + a[0]+=(int)book2[0]*(int)l2; + a[0]+=(int)inp2[0]*(int)2048; + + a[1] =(int)book1[1]*(int)l1; + a[1]+=(int)book2[1]*(int)l2; + a[1]+=(int)book2[0]*inp2[0]; + a[1]+=(int)inp2[1]*(int)2048; + + a[2] =(int)book1[2]*(int)l1; + a[2]+=(int)book2[2]*(int)l2; + a[2]+=(int)book2[1]*inp2[0]; + a[2]+=(int)book2[0]*inp2[1]; + a[2]+=(int)inp2[2]*(int)2048; + + a[3] =(int)book1[3]*(int)l1; + a[3]+=(int)book2[3]*(int)l2; + a[3]+=(int)book2[2]*inp2[0]; + a[3]+=(int)book2[1]*inp2[1]; + a[3]+=(int)book2[0]*inp2[2]; + a[3]+=(int)inp2[3]*(int)2048; + + a[4] =(int)book1[4]*(int)l1; + a[4]+=(int)book2[4]*(int)l2; + a[4]+=(int)book2[3]*inp2[0]; + a[4]+=(int)book2[2]*inp2[1]; + a[4]+=(int)book2[1]*inp2[2]; + a[4]+=(int)book2[0]*inp2[3]; + a[4]+=(int)inp2[4]*(int)2048; + + a[5] =(int)book1[5]*(int)l1; + a[5]+=(int)book2[5]*(int)l2; + a[5]+=(int)book2[4]*inp2[0]; + a[5]+=(int)book2[3]*inp2[1]; + a[5]+=(int)book2[2]*inp2[2]; + a[5]+=(int)book2[1]*inp2[3]; + a[5]+=(int)book2[0]*inp2[4]; + a[5]+=(int)inp2[5]*(int)2048; + + a[6] =(int)book1[6]*(int)l1; + a[6]+=(int)book2[6]*(int)l2; + a[6]+=(int)book2[5]*inp2[0]; + a[6]+=(int)book2[4]*inp2[1]; + a[6]+=(int)book2[3]*inp2[2]; + a[6]+=(int)book2[2]*inp2[3]; + a[6]+=(int)book2[1]*inp2[4]; + a[6]+=(int)book2[0]*inp2[5]; + a[6]+=(int)inp2[6]*(int)2048; + + a[7] =(int)book1[7]*(int)l1; + a[7]+=(int)book2[7]*(int)l2; + a[7]+=(int)book2[6]*inp2[0]; + a[7]+=(int)book2[5]*inp2[1]; + a[7]+=(int)book2[4]*inp2[2]; + a[7]+=(int)book2[3]*inp2[3]; + a[7]+=(int)book2[2]*inp2[4]; + a[7]+=(int)book2[1]*inp2[5]; + a[7]+=(int)book2[0]*inp2[6]; + a[7]+=(int)inp2[7]*(int)2048; + + for(j=0;j<8;j++) + { + a[j^S]>>=11; + if(a[j^S]>32767) a[j^S]=32767; + else if(a[j^S]<-32768) a[j^S]=-32768; + *(out++)=a[j^S]; + } + l1=a[6]; + l2=a[7]; + + count-=32; + } + out-=16; + memcpy(&rsp.RDRAM[Address],out,32); +} + +static void LOADBUFF (u32 inst1, u32 inst2) { // memcpy causes static... endianess issue :( + u32 v0; + //u32 cnt; + if (AudioCount == 0) + return; + v0 = (inst2 & 0xfffffc);// + SEGMENTS[(inst2>>24)&0xf]; + memcpy (BufferSpace+(AudioInBuffer&0xFFFC), rsp.RDRAM+v0, (AudioCount+3)&0xFFFC); +} + +static void SAVEBUFF (u32 inst1, u32 inst2) { // memcpy causes static... endianess issue :( + u32 v0; + //u32 cnt; + if (AudioCount == 0) + return; + v0 = (inst2 & 0xfffffc);// + SEGMENTS[(inst2>>24)&0xf]; + memcpy (rsp.RDRAM+v0, BufferSpace+(AudioOutBuffer&0xFFFC), (AudioCount+3)&0xFFFC); +} + +static void SETBUFF (u32 inst1, u32 inst2) { // Should work ;-) + if ((inst1 >> 0x10) & 0x8) { // A_AUX - Auxillary Sound Buffer Settings + AudioAuxA = u16(inst1); + AudioAuxC = u16((inst2 >> 0x10)); + AudioAuxE = u16(inst2); + } else { // A_MAIN - Main Sound Buffer Settings + AudioInBuffer = u16(inst1); // 0x00 + AudioOutBuffer = u16((inst2 >> 0x10)); // 0x02 + AudioCount = u16(inst2); // 0x04 + } +} + +static void DMEMMOVE (u32 inst1, u32 inst2) { // Doesn't sound just right?... will fix when HLE is ready - 03-11-01 + u32 v0, v1; + u32 cnt; + if ((inst2 & 0xffff)==0) + return; + v0 = (inst1 & 0xFFFF); + v1 = (inst2 >> 0x10); + //assert ((v1 & 0x3) == 0); + //assert ((v0 & 0x3) == 0); + u32 count = ((inst2+3) & 0xfffc); + //v0 = (v0) & 0xfffc; + //v1 = (v1) & 0xfffc; + + //memcpy (BufferSpace+v1, BufferSpace+v0, count-1); + for (cnt = 0; cnt < count; cnt++) { + *(u8 *)(BufferSpace+((cnt+v1)^S8)) = *(u8 *)(BufferSpace+((cnt+v0)^S8)); + } +} + +static void LOADADPCM (u32 inst1, u32 inst2) { // Loads an ADPCM table - Works 100% Now 03-13-01 + u32 v0; + v0 = (inst2 & 0xffffff);// + SEGMENTS[(inst2>>24)&0xf]; +/* if (v0 > (1024*1024*8)) + v0 = (inst2 & 0xffffff);*/ + //memcpy (dmem+0x4c0, rsp.RDRAM+v0, inst1&0xffff); // Could prolly get away with not putting this in dmem + //assert ((inst1&0xffff) <= 0x80); + u16 *table = (u16 *)(rsp.RDRAM+v0); + for (u32 x = 0; x < ((inst1&0xffff)>>0x4); x++) { + adpcmtable[(0x0+(x<<3))^S] = table[0]; + adpcmtable[(0x1+(x<<3))^S] = table[1]; + + adpcmtable[(0x2+(x<<3))^S] = table[2]; + adpcmtable[(0x3+(x<<3))^S] = table[3]; + + adpcmtable[(0x4+(x<<3))^S] = table[4]; + adpcmtable[(0x5+(x<<3))^S] = table[5]; + + adpcmtable[(0x6+(x<<3))^S] = table[6]; + adpcmtable[(0x7+(x<<3))^S] = table[7]; + table += 8; + } +} + + +static void INTERLEAVE (u32 inst1, u32 inst2) { // Works... - 3-11-01 + u32 inL, inR; + u16 *outbuff = (u16 *)(AudioOutBuffer+BufferSpace); + u16 *inSrcR; + u16 *inSrcL; + u16 Left, Right, Left2, Right2; + + inL = inst2 & 0xFFFF; + inR = (inst2 >> 16) & 0xFFFF; + + inSrcR = (u16 *)(BufferSpace+inR); + inSrcL = (u16 *)(BufferSpace+inL); + + for (int x = 0; x < (AudioCount/4); x++) { + Left=*(inSrcL++); + Right=*(inSrcR++); + Left2=*(inSrcL++); + Right2=*(inSrcR++); + +#ifdef M64P_BIG_ENDIAN + *(outbuff++)=Right; + *(outbuff++)=Left; + *(outbuff++)=Right2; + *(outbuff++)=Left2; +#else + *(outbuff++)=Right2; + *(outbuff++)=Left2; + *(outbuff++)=Right; + *(outbuff++)=Left; +#endif + } +} + + +static void MIXER (u32 inst1, u32 inst2) { // Fixed a sign issue... 03-14-01 + u32 dmemin = (u16)(inst2 >> 0x10); + u32 dmemout = (u16)(inst2 & 0xFFFF); + //u8 flags = (u8)((inst1 >> 16) & 0xff); + s32 gain = (s16)(inst1 & 0xFFFF); + s32 temp; + + if (AudioCount == 0) + return; + + for (int x=0; x < AudioCount; x+=2) { // I think I can do this a lot easier + temp = (*(s16 *)(BufferSpace+dmemin+x) * gain) >> 15; + temp += *(s16 *)(BufferSpace+dmemout+x); + + if ((s32)temp > 32767) + temp = 32767; + if ((s32)temp < -32768) + temp = -32768; + + *(u16 *)(BufferSpace+dmemout+x) = (u16)(temp & 0xFFFF); + } +} + +// TOP Performance Hogs: +//Command: ADPCM - Calls: 48 - Total Time: 331226 - Avg Time: 6900.54 - Percent: 31.53% +//Command: ENVMIXER - Calls: 48 - Total Time: 408563 - Avg Time: 8511.73 - Percent: 38.90% +//Command: LOADBUFF - Calls: 56 - Total Time: 21551 - Avg Time: 384.84 - Percent: 2.05% +//Command: RESAMPLE - Calls: 48 - Total Time: 225922 - Avg Time: 4706.71 - Percent: 21.51% + +//Command: ADPCM - Calls: 48 - Total Time: 391600 - Avg Time: 8158.33 - Percent: 32.52% +//Command: ENVMIXER - Calls: 48 - Total Time: 444091 - Avg Time: 9251.90 - Percent: 36.88% +//Command: LOADBUFF - Calls: 58 - Total Time: 29945 - Avg Time: 516.29 - Percent: 2.49% +//Command: RESAMPLE - Calls: 48 - Total Time: 276354 - Avg Time: 5757.38 - Percent: 22.95% + + +extern "C" const acmd_callback_t ABI1[0x10] = { // TOP Performace Hogs: MIXER, RESAMPLE, ENVMIXER + SPNOOP , ADPCM , CLEARBUFF, ENVMIXER , LOADBUFF, RESAMPLE , SAVEBUFF, UNKNOWN, + SETBUFF, SETVOL, DMEMMOVE , LOADADPCM , MIXER , INTERLEAVE, UNKNOWN , SETLOOP +}; + +/* BACKUPS +void MIXER (u32 inst1, u32 inst2) { // Fixed a sign issue... 03-14-01 + u16 dmemin = (u16)(inst2 >> 0x10); + u16 dmemout = (u16)(inst2 & 0xFFFF); + u16 gain = (u16)(inst1 & 0xFFFF); + u8 flags = (u8)((inst1 >> 16) & 0xff); + u64 temp; + + if (AudioCount == 0) + return; + + for (int x=0; x < AudioCount; x+=2) { // I think I can do this a lot easier + temp = (s64)(*(s16 *)(BufferSpace+dmemout+x)) * (s64)((s16)(0x7FFF)*2); + + if (temp & 0x8000) + temp = (temp^0x8000) + 0x10000; + else + temp = (temp^0x8000); + + temp = (temp & 0xFFFFFFFFFFFF); + + temp += ((*(s16 *)(BufferSpace+dmemin+x) * (s64)((s16)gain*2))) & 0xFFFFFFFFFFFF; + + temp = (s32)(temp >> 16); + if ((s32)temp > 32767) + temp = 32767; + if ((s32)temp < -32768) + temp = -32768; + + *(u16 *)(BufferSpace+dmemout+x) = (u16)(temp & 0xFFFF); + } +} +*/ + + diff --git a/source/mupen64plus-rsp-hle/src/ucode2.cpp b/source/mupen64plus-rsp-hle/src/ucode2.cpp new file mode 100644 index 0000000..dd15689 --- /dev/null +++ b/source/mupen64plus-rsp-hle/src/ucode2.cpp @@ -0,0 +1,930 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * Mupen64plus-rsp-hle - ucode2.cpp * + * Mupen64Plus homepage: http://code.google.com/p/mupen64plus/ * + * Copyright (C) 2009 Richard Goedeken * + * Copyright (C) 2002 Hacktarux * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +# include +# include + +extern "C" { + #include "m64p_types.h" + #include "hle.h" + #include "alist_internal.h" +} + +extern u8 BufferSpace[0x10000]; + +static void SPNOOP (u32 inst1, u32 inst2) { + DebugMessage(M64MSG_ERROR, "Unknown/Unimplemented Audio Command %i in ABI 2", (int)(inst1 >> 24)); +} +extern u16 AudioInBuffer; // 0x0000(T8) +extern u16 AudioOutBuffer; // 0x0002(T8) +extern u16 AudioCount; // 0x0004(T8) +extern u32 loopval; // 0x0010(T8) +extern u32 SEGMENTS[0x10]; + +extern u16 adpcmtable[0x88]; + +extern const u16 ResampleLUT [0x200]; + +bool isMKABI = false; +bool isZeldaABI = false; + +extern "C" void init_ucode2() { isMKABI = isZeldaABI = false; } + +static void LOADADPCM2 (u32 inst1, u32 inst2) { // Loads an ADPCM table - Works 100% Now 03-13-01 + u32 v0; + v0 = (inst2 & 0xffffff);// + SEGMENTS[(inst2>>24)&0xf]; + u16 *table = (u16 *)(rsp.RDRAM+v0); // Zelda2 Specific... + + for (u32 x = 0; x < ((inst1&0xffff)>>0x4); x++) { + adpcmtable[(0x0+(x<<3))^S] = table[0]; + adpcmtable[(0x1+(x<<3))^S] = table[1]; + + adpcmtable[(0x2+(x<<3))^S] = table[2]; + adpcmtable[(0x3+(x<<3))^S] = table[3]; + + adpcmtable[(0x4+(x<<3))^S] = table[4]; + adpcmtable[(0x5+(x<<3))^S] = table[5]; + + adpcmtable[(0x6+(x<<3))^S] = table[6]; + adpcmtable[(0x7+(x<<3))^S] = table[7]; + table += 8; + } +} + +static void SETLOOP2 (u32 inst1, u32 inst2) { + loopval = inst2 & 0xffffff; // No segment? +} + +static void SETBUFF2 (u32 inst1, u32 inst2) { + AudioInBuffer = u16(inst1); // 0x00 + AudioOutBuffer = u16((inst2 >> 0x10)); // 0x02 + AudioCount = u16(inst2); // 0x04 +} + +static void ADPCM2 (u32 inst1, u32 inst2) { // Verified to be 100% Accurate... + unsigned char Flags=(u8)(inst1>>16)&0xff; + //unsigned short Gain=(u16)(inst1&0xffff); + unsigned int Address=(inst2 & 0xffffff);// + SEGMENTS[(inst2>>24)&0xf]; + unsigned short inPtr=0; + //short *out=(s16 *)(testbuff+(AudioOutBuffer>>2)); + short *out=(short *)(BufferSpace+AudioOutBuffer); + //unsigned char *in=(unsigned char *)(BufferSpace+AudioInBuffer); + short count=(short)AudioCount; + unsigned char icode; + unsigned char code; + int vscale; + unsigned short index; + unsigned short j; + int a[8]; + short *book1,*book2; + + u8 srange; + u8 mask1; + u8 mask2; + u8 shifter; + + memset(out,0,32); + + if (Flags & 0x4) { // Tricky lil Zelda MM and ABI2!!! hahaha I know your secrets! :DDD + srange = 0xE; + mask1 = 0xC0; + mask2 = 0x30; + shifter = 10; + } else { + srange = 0xC; + mask1 = 0xf0; + mask2 = 0x0f; + shifter = 12; + } + + if(!(Flags&0x1)) + { + if(Flags&0x2) + {/* + for(int i=0;i<16;i++) + { + out[i]=*(short *)&rsp.RDRAM[(loopval+i*2)^2]; + }*/ + memcpy(out,&rsp.RDRAM[loopval],32); + } + else + {/* + for(int i=0;i<16;i++) + { + out[i]=*(short *)&rsp.RDRAM[(Address+i*2)^2]; + }*/ + memcpy(out,&rsp.RDRAM[Address],32); + } + } + + int l1=out[14^S]; + int l2=out[15^S]; + int inp1[8]; + int inp2[8]; + out+=16; + while(count>0) { + code=BufferSpace[(AudioInBuffer+inPtr)^S8]; + index=code&0xf; + index<<=4; + book1=(short *)&adpcmtable[index]; + book2=book1+8; + code>>=4; + vscale=(0x8000>>((srange-code)-1)); + + inPtr++; + j=0; + + while(j<8) { + icode=BufferSpace[(AudioInBuffer+inPtr)^S8]; + inPtr++; + + inp1[j]=(s16)((icode&mask1) << 8); // this will in effect be signed + if(code>16); + //else int catchme=1; + j++; + + inp1[j]=(s16)((icode&mask2)<>16); + //else int catchme=1; + j++; + + if (Flags & 4) { + inp1[j]=(s16)((icode&0xC) << 12); // this will in effect be signed + if(code < 0xE) inp1[j]=((int)((int)inp1[j]*(int)vscale)>>16); + //else int catchme=1; + j++; + + inp1[j]=(s16)((icode&0x3) << 14); + if(code < 0xE) inp1[j]=((int)((int)inp1[j]*(int)vscale)>>16); + //else int catchme=1; + j++; + } // end flags + } // end while + + + + j=0; + while(j<8) { + icode=BufferSpace[(AudioInBuffer+inPtr)^S8]; + inPtr++; + + inp2[j]=(s16)((icode&mask1) << 8); + if(code>16); + //else int catchme=1; + j++; + + inp2[j]=(s16)((icode&mask2)<>16); + //else int catchme=1; + j++; + + if (Flags & 4) { + inp2[j]=(s16)((icode&0xC) << 12); + if(code < 0xE) inp2[j]=((int)((int)inp2[j]*(int)vscale)>>16); + //else int catchme=1; + j++; + + inp2[j]=(s16)((icode&0x3) << 14); + if(code < 0xE) inp2[j]=((int)((int)inp2[j]*(int)vscale)>>16); + //else int catchme=1; + j++; + } // end flags + } + + a[0]= (int)book1[0]*(int)l1; + a[0]+=(int)book2[0]*(int)l2; + a[0]+=(int)inp1[0]*(int)2048; + + a[1] =(int)book1[1]*(int)l1; + a[1]+=(int)book2[1]*(int)l2; + a[1]+=(int)book2[0]*inp1[0]; + a[1]+=(int)inp1[1]*(int)2048; + + a[2] =(int)book1[2]*(int)l1; + a[2]+=(int)book2[2]*(int)l2; + a[2]+=(int)book2[1]*inp1[0]; + a[2]+=(int)book2[0]*inp1[1]; + a[2]+=(int)inp1[2]*(int)2048; + + a[3] =(int)book1[3]*(int)l1; + a[3]+=(int)book2[3]*(int)l2; + a[3]+=(int)book2[2]*inp1[0]; + a[3]+=(int)book2[1]*inp1[1]; + a[3]+=(int)book2[0]*inp1[2]; + a[3]+=(int)inp1[3]*(int)2048; + + a[4] =(int)book1[4]*(int)l1; + a[4]+=(int)book2[4]*(int)l2; + a[4]+=(int)book2[3]*inp1[0]; + a[4]+=(int)book2[2]*inp1[1]; + a[4]+=(int)book2[1]*inp1[2]; + a[4]+=(int)book2[0]*inp1[3]; + a[4]+=(int)inp1[4]*(int)2048; + + a[5] =(int)book1[5]*(int)l1; + a[5]+=(int)book2[5]*(int)l2; + a[5]+=(int)book2[4]*inp1[0]; + a[5]+=(int)book2[3]*inp1[1]; + a[5]+=(int)book2[2]*inp1[2]; + a[5]+=(int)book2[1]*inp1[3]; + a[5]+=(int)book2[0]*inp1[4]; + a[5]+=(int)inp1[5]*(int)2048; + + a[6] =(int)book1[6]*(int)l1; + a[6]+=(int)book2[6]*(int)l2; + a[6]+=(int)book2[5]*inp1[0]; + a[6]+=(int)book2[4]*inp1[1]; + a[6]+=(int)book2[3]*inp1[2]; + a[6]+=(int)book2[2]*inp1[3]; + a[6]+=(int)book2[1]*inp1[4]; + a[6]+=(int)book2[0]*inp1[5]; + a[6]+=(int)inp1[6]*(int)2048; + + a[7] =(int)book1[7]*(int)l1; + a[7]+=(int)book2[7]*(int)l2; + a[7]+=(int)book2[6]*inp1[0]; + a[7]+=(int)book2[5]*inp1[1]; + a[7]+=(int)book2[4]*inp1[2]; + a[7]+=(int)book2[3]*inp1[3]; + a[7]+=(int)book2[2]*inp1[4]; + a[7]+=(int)book2[1]*inp1[5]; + a[7]+=(int)book2[0]*inp1[6]; + a[7]+=(int)inp1[7]*(int)2048; + + for(j=0;j<8;j++) + { + a[j^S]>>=11; + if(a[j^S]>32767) a[j^S]=32767; + else if(a[j^S]<-32768) a[j^S]=-32768; + *(out++)=a[j^S]; + } + l1=a[6]; + l2=a[7]; + + a[0]= (int)book1[0]*(int)l1; + a[0]+=(int)book2[0]*(int)l2; + a[0]+=(int)inp2[0]*(int)2048; + + a[1] =(int)book1[1]*(int)l1; + a[1]+=(int)book2[1]*(int)l2; + a[1]+=(int)book2[0]*inp2[0]; + a[1]+=(int)inp2[1]*(int)2048; + + a[2] =(int)book1[2]*(int)l1; + a[2]+=(int)book2[2]*(int)l2; + a[2]+=(int)book2[1]*inp2[0]; + a[2]+=(int)book2[0]*inp2[1]; + a[2]+=(int)inp2[2]*(int)2048; + + a[3] =(int)book1[3]*(int)l1; + a[3]+=(int)book2[3]*(int)l2; + a[3]+=(int)book2[2]*inp2[0]; + a[3]+=(int)book2[1]*inp2[1]; + a[3]+=(int)book2[0]*inp2[2]; + a[3]+=(int)inp2[3]*(int)2048; + + a[4] =(int)book1[4]*(int)l1; + a[4]+=(int)book2[4]*(int)l2; + a[4]+=(int)book2[3]*inp2[0]; + a[4]+=(int)book2[2]*inp2[1]; + a[4]+=(int)book2[1]*inp2[2]; + a[4]+=(int)book2[0]*inp2[3]; + a[4]+=(int)inp2[4]*(int)2048; + + a[5] =(int)book1[5]*(int)l1; + a[5]+=(int)book2[5]*(int)l2; + a[5]+=(int)book2[4]*inp2[0]; + a[5]+=(int)book2[3]*inp2[1]; + a[5]+=(int)book2[2]*inp2[2]; + a[5]+=(int)book2[1]*inp2[3]; + a[5]+=(int)book2[0]*inp2[4]; + a[5]+=(int)inp2[5]*(int)2048; + + a[6] =(int)book1[6]*(int)l1; + a[6]+=(int)book2[6]*(int)l2; + a[6]+=(int)book2[5]*inp2[0]; + a[6]+=(int)book2[4]*inp2[1]; + a[6]+=(int)book2[3]*inp2[2]; + a[6]+=(int)book2[2]*inp2[3]; + a[6]+=(int)book2[1]*inp2[4]; + a[6]+=(int)book2[0]*inp2[5]; + a[6]+=(int)inp2[6]*(int)2048; + + a[7] =(int)book1[7]*(int)l1; + a[7]+=(int)book2[7]*(int)l2; + a[7]+=(int)book2[6]*inp2[0]; + a[7]+=(int)book2[5]*inp2[1]; + a[7]+=(int)book2[4]*inp2[2]; + a[7]+=(int)book2[3]*inp2[3]; + a[7]+=(int)book2[2]*inp2[4]; + a[7]+=(int)book2[1]*inp2[5]; + a[7]+=(int)book2[0]*inp2[6]; + a[7]+=(int)inp2[7]*(int)2048; + + for(j=0;j<8;j++) + { + a[j^S]>>=11; + if(a[j^S]>32767) a[j^S]=32767; + else if(a[j^S]<-32768) a[j^S]=-32768; + *(out++)=a[j^S]; + } + l1=a[6]; + l2=a[7]; + + count-=32; + } + out-=16; + memcpy(&rsp.RDRAM[Address],out,32); +} + +static void CLEARBUFF2 (u32 inst1, u32 inst2) { + u16 addr = (u16)(inst1 & 0xffff); + u16 count = (u16)(inst2 & 0xffff); + if (count > 0) + memset(BufferSpace+addr, 0, count); +} + +static void LOADBUFF2 (u32 inst1, u32 inst2) { // Needs accuracy verification... + u32 v0; + u32 cnt = (((inst1 >> 0xC)+3)&0xFFC); + v0 = (inst2 & 0xfffffc);// + SEGMENTS[(inst2>>24)&0xf]; + memcpy (BufferSpace+(inst1&0xfffc), rsp.RDRAM+v0, (cnt+3)&0xFFFC); +} + +static void SAVEBUFF2 (u32 inst1, u32 inst2) { // Needs accuracy verification... + u32 v0; + u32 cnt = (((inst1 >> 0xC)+3)&0xFFC); + v0 = (inst2 & 0xfffffc);// + SEGMENTS[(inst2>>24)&0xf]; + memcpy (rsp.RDRAM+v0, BufferSpace+(inst1&0xfffc), (cnt+3)&0xFFFC); +} + + +static void MIXER2 (u32 inst1, u32 inst2) { // Needs accuracy verification... + u16 dmemin = (u16)(inst2 >> 0x10); + u16 dmemout = (u16)(inst2 & 0xFFFF); + u32 count = ((inst1 >> 12) & 0xFF0); + s32 gain = (s16)(inst1 & 0xFFFF); + s32 temp; + + for (unsigned int x=0; x < count; x+=2) { // I think I can do this a lot easier + + temp = (*(s16 *)(BufferSpace+dmemin+x) * gain) >> 15; + temp += *(s16 *)(BufferSpace+dmemout+x); + + if ((s32)temp > 32767) + temp = 32767; + if ((s32)temp < -32768) + temp = -32768; + + *(u16 *)(BufferSpace+dmemout+x) = (u16)(temp & 0xFFFF); + } +} + + +static void RESAMPLE2 (u32 inst1, u32 inst2) { + unsigned char Flags=(u8)((inst1>>16)&0xff); + unsigned int Pitch=((inst1&0xffff))<<1; + u32 addy = (inst2 & 0xffffff);// + SEGMENTS[(inst2>>24)&0xf]; + unsigned int Accum=0; + unsigned int location; + s16 *lut; + short *dst; + s16 *src; + dst=(short *)(BufferSpace); + src=(s16 *)(BufferSpace); + u32 srcPtr=(AudioInBuffer/2); + u32 dstPtr=(AudioOutBuffer/2); + s32 temp; + s32 accum; + + if (addy > (1024*1024*8)) + addy = (inst2 & 0xffffff); + + srcPtr -= 4; + + if ((Flags & 0x1) == 0) { + for (int x=0; x < 4; x++) //memcpy (src+srcPtr, rsp.RDRAM+addy, 0x8); + src[(srcPtr+x)^S] = ((u16 *)rsp.RDRAM)[((addy/2)+x)^S]; + Accum = *(u16 *)(rsp.RDRAM+addy+10); + } else { + for (int x=0; x < 4; x++) + src[(srcPtr+x)^S] = 0;//*(u16 *)(rsp.RDRAM+((addy+x)^2)); + } + + for(int i=0;i < ((AudioCount+0xf)&0xFFF0)/2;i++) { + location = (((Accum * 0x40) >> 0x10) * 8); + //location = (Accum >> 0xa) << 0x3; + lut = (s16 *)(((u8 *)ResampleLUT) + location); + + temp = ((s32)*(s16*)(src+((srcPtr+0)^S))*((s32)((s16)lut[0]))); + accum = (s32)(temp >> 15); + + temp = ((s32)*(s16*)(src+((srcPtr+1)^S))*((s32)((s16)lut[1]))); + accum += (s32)(temp >> 15); + + temp = ((s32)*(s16*)(src+((srcPtr+2)^S))*((s32)((s16)lut[2]))); + accum += (s32)(temp >> 15); + + temp = ((s32)*(s16*)(src+((srcPtr+3)^S))*((s32)((s16)lut[3]))); + accum += (s32)(temp >> 15); + + if (accum > 32767) accum = 32767; + if (accum < -32768) accum = -32768; + + dst[dstPtr^S] = (s16)(accum); + dstPtr++; + Accum += Pitch; + srcPtr += (Accum>>16); + Accum&=0xffff; + } + for (int x=0; x < 4; x++) + ((u16 *)rsp.RDRAM)[((addy/2)+x)^S] = src[(srcPtr+x)^S]; + *(u16 *)(rsp.RDRAM+addy+10) = (u16)Accum; + //memcpy (RSWORK, src+srcPtr, 0x8); +} + +static void DMEMMOVE2 (u32 inst1, u32 inst2) { // Needs accuracy verification... + u32 v0, v1; + u32 cnt; + if ((inst2 & 0xffff)==0) + return; + v0 = (inst1 & 0xFFFF); + v1 = (inst2 >> 0x10); + //assert ((v1 & 0x3) == 0); + //assert ((v0 & 0x3) == 0); + u32 count = ((inst2+3) & 0xfffc); + //v0 = (v0) & 0xfffc; + //v1 = (v1) & 0xfffc; + + //memcpy (dmem+v1, dmem+v0, count-1); + for (cnt = 0; cnt < count; cnt++) { + *(u8 *)(BufferSpace+((cnt+v1)^S8)) = *(u8 *)(BufferSpace+((cnt+v0)^S8)); + } +} + +static u32 t3, s5, s6; +static u16 env[8]; + +static void ENVSETUP1 (u32 inst1, u32 inst2) { + u32 tmp; + + //fprintf (dfile, "ENVSETUP1: inst1 = %08X, inst2 = %08X\n", inst1, inst2); + t3 = inst1 & 0xFFFF; + tmp = (inst1 >> 0x8) & 0xFF00; + env[4] = (u16)tmp; + tmp += t3; + env[5] = (u16)tmp; + s5 = inst2 >> 0x10; + s6 = inst2 & 0xFFFF; + //fprintf (dfile, " t3 = %X / s5 = %X / s6 = %X / env[4] = %X / env[5] = %X\n", t3, s5, s6, env[4], env[5]); +} + +static void ENVSETUP2 (u32 inst1, u32 inst2) { + u32 tmp; + + //fprintf (dfile, "ENVSETUP2: inst1 = %08X, inst2 = %08X\n", inst1, inst2); + tmp = (inst2 >> 0x10); + env[0] = (u16)tmp; + tmp += s5; + env[1] = (u16)tmp; + tmp = inst2 & 0xffff; + env[2] = (u16)tmp; + tmp += s6; + env[3] = (u16)tmp; + //fprintf (dfile, " env[0] = %X / env[1] = %X / env[2] = %X / env[3] = %X\n", env[0], env[1], env[2], env[3]); +} + +static void ENVMIXER2 (u32 inst1, u32 inst2) { + //fprintf (dfile, "ENVMIXER: inst1 = %08X, inst2 = %08X\n", inst1, inst2); + + s16 *bufft6, *bufft7, *buffs0, *buffs1; + s16 *buffs3; + s32 count; + u32 adder; + + s16 vec9, vec10; + + s16 v2[8]; + + buffs3 = (s16 *)(BufferSpace + ((inst1 >> 0x0c)&0x0ff0)); + bufft6 = (s16 *)(BufferSpace + ((inst2 >> 0x14)&0x0ff0)); + bufft7 = (s16 *)(BufferSpace + ((inst2 >> 0x0c)&0x0ff0)); + buffs0 = (s16 *)(BufferSpace + ((inst2 >> 0x04)&0x0ff0)); + buffs1 = (s16 *)(BufferSpace + ((inst2 << 0x04)&0x0ff0)); + + + v2[0] = 0 - (s16)((inst1 & 0x2) >> 1); + v2[1] = 0 - (s16)((inst1 & 0x1)); + v2[2] = 0 - (s16)((inst1 & 0x8) >> 1); + v2[3] = 0 - (s16)((inst1 & 0x4) >> 1); + + count = (inst1 >> 8) & 0xff; + + if (!isMKABI) { + s5 *= 2; s6 *= 2; t3 *= 2; + adder = 0x10; + } else { + inst1 = 0; + adder = 0x8; + t3 = 0; + } + + + while (count > 0) { + int temp, x; + for (x=0; x < 0x8; x++) { + vec9 = (s16)(((s32)buffs3[x^S] * (u32)env[0]) >> 0x10) ^ v2[0]; + vec10 = (s16)(((s32)buffs3[x^S] * (u32)env[2]) >> 0x10) ^ v2[1]; + temp = bufft6[x^S] + vec9; + if (temp > 32767) temp = 32767; if (temp < -32768) temp = -32768; + bufft6[x^S] = temp; + temp = bufft7[x^S] + vec10; + if (temp > 32767) temp = 32767; if (temp < -32768) temp = -32768; + bufft7[x^S] = temp; + vec9 = (s16)(((s32)vec9 * (u32)env[4]) >> 0x10) ^ v2[2]; + vec10 = (s16)(((s32)vec10 * (u32)env[4]) >> 0x10) ^ v2[3]; + if (inst1 & 0x10) { + temp = buffs0[x^S] + vec10; + if (temp > 32767) temp = 32767; if (temp < -32768) temp = -32768; + buffs0[x^S] = temp; + temp = buffs1[x^S] + vec9; + if (temp > 32767) temp = 32767; if (temp < -32768) temp = -32768; + buffs1[x^S] = temp; + } else { + temp = buffs0[x^S] + vec9; + if (temp > 32767) temp = 32767; if (temp < -32768) temp = -32768; + buffs0[x^S] = temp; + temp = buffs1[x^S] + vec10; + if (temp > 32767) temp = 32767; if (temp < -32768) temp = -32768; + buffs1[x^S] = temp; + } + } + + if (!isMKABI) + for (x=0x8; x < 0x10; x++) { + vec9 = (s16)(((s32)buffs3[x^S] * (u32)env[1]) >> 0x10) ^ v2[0]; + vec10 = (s16)(((s32)buffs3[x^S] * (u32)env[3]) >> 0x10) ^ v2[1]; + temp = bufft6[x^S] + vec9; + if (temp > 32767) temp = 32767; if (temp < -32768) temp = -32768; + bufft6[x^S] = temp; + temp = bufft7[x^S] + vec10; + if (temp > 32767) temp = 32767; if (temp < -32768) temp = -32768; + bufft7[x^S] = temp; + vec9 = (s16)(((s32)vec9 * (u32)env[5]) >> 0x10) ^ v2[2]; + vec10 = (s16)(((s32)vec10 * (u32)env[5]) >> 0x10) ^ v2[3]; + if (inst1 & 0x10) { + temp = buffs0[x^S] + vec10; + if (temp > 32767) temp = 32767; if (temp < -32768) temp = -32768; + buffs0[x^S] = temp; + temp = buffs1[x^S] + vec9; + if (temp > 32767) temp = 32767; if (temp < -32768) temp = -32768; + buffs1[x^S] = temp; + } else { + temp = buffs0[x^S] + vec9; + if (temp > 32767) temp = 32767; if (temp < -32768) temp = -32768; + buffs0[x^S] = temp; + temp = buffs1[x^S] + vec10; + if (temp > 32767) temp = 32767; if (temp < -32768) temp = -32768; + buffs1[x^S] = temp; + } + } + bufft6 += adder; bufft7 += adder; + buffs0 += adder; buffs1 += adder; + buffs3 += adder; count -= adder; + env[0] += (u16)s5; env[1] += (u16)s5; + env[2] += (u16)s6; env[3] += (u16)s6; + env[4] += (u16)t3; env[5] += (u16)t3; + } +} + +static void DUPLICATE2(u32 inst1, u32 inst2) { + unsigned short Count = (inst1 >> 16) & 0xff; + unsigned short In = inst1&0xffff; + unsigned short Out = (inst2>>16); + + unsigned short buff[64]; + + memcpy(buff,BufferSpace+In,128); + + while(Count) { + memcpy(BufferSpace+Out,buff,128); + Out+=128; + Count--; + } +} +/* +static void INTERL2 (u32 inst1, u32 inst2) { // Make your own... + short Count = inst1 & 0xffff; + unsigned short Out = inst2 & 0xffff; + unsigned short In = (inst2 >> 16); + + short *src,*dst,tmp; + src=(short *)&BufferSpace[In]; + dst=(short *)&BufferSpace[Out]; + while(Count) + { + *(dst++)=*(src++); + src++; + *(dst++)=*(src++); + src++; + *(dst++)=*(src++); + src++; + *(dst++)=*(src++); + src++; + *(dst++)=*(src++); + src++; + *(dst++)=*(src++); + src++; + *(dst++)=*(src++); + src++; + *(dst++)=*(src++); + src++; + Count-=8; + } +} +*/ + +static void INTERL2 (u32 inst1, u32 inst2) { + short Count = inst1 & 0xffff; + unsigned short Out = inst2 & 0xffff; + unsigned short In = (inst2 >> 16); + + unsigned char *src,*dst/*,tmp*/; + src=(unsigned char *)(BufferSpace);//[In]; + dst=(unsigned char *)(BufferSpace);//[Out]; + while(Count) { + *(short *)(dst+(Out^S8)) = *(short *)(src+(In^S8)); + Out += 2; + In += 4; + Count--; + } +} + +static void INTERLEAVE2 (u32 inst1, u32 inst2) { // Needs accuracy verification... + u32 inL, inR; + u16 *outbuff; + u16 *inSrcR; + u16 *inSrcL; + u16 Left, Right, Left2, Right2; + u32 count; + count = ((inst1 >> 12) & 0xFF0); + if (count == 0) { + outbuff = (u16 *)(AudioOutBuffer+BufferSpace); + count = AudioCount; + } else { + outbuff = (u16 *)((inst1&0xFFFF)+BufferSpace); + } + + inR = inst2 & 0xFFFF; + inL = (inst2 >> 16) & 0xFFFF; + + inSrcR = (u16 *)(BufferSpace+inR); + inSrcL = (u16 *)(BufferSpace+inL); + + for (u32 x = 0; x < (count/4); x++) { + Left=*(inSrcL++); + Right=*(inSrcR++); + Left2=*(inSrcL++); + Right2=*(inSrcR++); + +#ifdef M64P_BIG_ENDIAN + *(outbuff++)=Right; + *(outbuff++)=Left; + *(outbuff++)=Right2; + *(outbuff++)=Left2; +#else + *(outbuff++)=Right2; + *(outbuff++)=Left2; + *(outbuff++)=Right; + *(outbuff++)=Left; +#endif + } +} + +static void ADDMIXER (u32 inst1, u32 inst2) { + short Count = (inst1 >> 12) & 0x00ff0; + u16 InBuffer = (inst2 >> 16); + u16 OutBuffer = inst2 & 0xffff; + + s16 *inp, *outp; + s32 temp; + inp = (s16 *)(BufferSpace + InBuffer); + outp = (s16 *)(BufferSpace + OutBuffer); + for (int cntr = 0; cntr < Count; cntr+=2) { + temp = *outp + *inp; + if (temp > 32767) temp = 32767; if (temp < -32768) temp = -32768; + *(outp++) = temp; + inp++; + } +} + +static void HILOGAIN (u32 inst1, u32 inst2) { + u16 cnt = inst1 & 0xffff; + u16 out = (inst2 >> 16) & 0xffff; + s16 hi = (s16)((inst1 >> 4) & 0xf000); + u16 lo = (inst1 >> 20) & 0xf; + s16 *src; + + src = (s16 *)(BufferSpace+out); + s32 tmp, val; + + while(cnt) { + val = (s32)*src; + //tmp = ((val * (s32)hi) + ((u64)(val * lo) << 16) >> 16); + tmp = ((val * (s32)hi) >> 16) + (u32)(val * lo); + if ((s32)tmp > 32767) tmp = 32767; + else if ((s32)tmp < -32768) tmp = -32768; + *src = tmp; + src++; + cnt -= 2; + } +} + +static void FILTER2 (u32 inst1, u32 inst2) { + static int cnt = 0; + static s16 *lutt6; + static s16 *lutt5; + u8 *save = (rsp.RDRAM+(inst2&0xFFFFFF)); + u8 t4 = (u8)((inst1 >> 0x10) & 0xFF); + int x; + + if (t4 > 1) { // Then set the cnt variable + cnt = (inst1 & 0xFFFF); + lutt6 = (s16 *)save; +// memcpy (dmem+0xFE0, rsp.RDRAM+(inst2&0xFFFFFF), 0x10); + return; + } + + if (t4 == 0) { +// memcpy (dmem+0xFB0, rsp.RDRAM+(inst2&0xFFFFFF), 0x20); + lutt5 = (short *)(save+0x10); + } + + lutt5 = (short *)(save+0x10); + +// lutt5 = (short *)(dmem + 0xFC0); +// lutt6 = (short *)(dmem + 0xFE0); + for (x = 0; x < 8; x++) { + s32 a; + a = (lutt5[x] + lutt6[x]) >> 1; + lutt5[x] = lutt6[x] = (short)a; + } + short *inp1, *inp2; + s32 out1[8]; + s16 outbuff[0x3c0], *outp; + u32 inPtr = (u32)(inst1&0xffff); + inp1 = (short *)(save); + outp = outbuff; + inp2 = (short *)(BufferSpace+inPtr); + for (x = 0; x < cnt; x+=0x10) { + out1[1] = inp1[0]*lutt6[6]; + out1[1] += inp1[3]*lutt6[7]; + out1[1] += inp1[2]*lutt6[4]; + out1[1] += inp1[5]*lutt6[5]; + out1[1] += inp1[4]*lutt6[2]; + out1[1] += inp1[7]*lutt6[3]; + out1[1] += inp1[6]*lutt6[0]; + out1[1] += inp2[1]*lutt6[1]; // 1 + + out1[0] = inp1[3]*lutt6[6]; + out1[0] += inp1[2]*lutt6[7]; + out1[0] += inp1[5]*lutt6[4]; + out1[0] += inp1[4]*lutt6[5]; + out1[0] += inp1[7]*lutt6[2]; + out1[0] += inp1[6]*lutt6[3]; + out1[0] += inp2[1]*lutt6[0]; + out1[0] += inp2[0]*lutt6[1]; + + out1[3] = inp1[2]*lutt6[6]; + out1[3] += inp1[5]*lutt6[7]; + out1[3] += inp1[4]*lutt6[4]; + out1[3] += inp1[7]*lutt6[5]; + out1[3] += inp1[6]*lutt6[2]; + out1[3] += inp2[1]*lutt6[3]; + out1[3] += inp2[0]*lutt6[0]; + out1[3] += inp2[3]*lutt6[1]; + + out1[2] = inp1[5]*lutt6[6]; + out1[2] += inp1[4]*lutt6[7]; + out1[2] += inp1[7]*lutt6[4]; + out1[2] += inp1[6]*lutt6[5]; + out1[2] += inp2[1]*lutt6[2]; + out1[2] += inp2[0]*lutt6[3]; + out1[2] += inp2[3]*lutt6[0]; + out1[2] += inp2[2]*lutt6[1]; + + out1[5] = inp1[4]*lutt6[6]; + out1[5] += inp1[7]*lutt6[7]; + out1[5] += inp1[6]*lutt6[4]; + out1[5] += inp2[1]*lutt6[5]; + out1[5] += inp2[0]*lutt6[2]; + out1[5] += inp2[3]*lutt6[3]; + out1[5] += inp2[2]*lutt6[0]; + out1[5] += inp2[5]*lutt6[1]; + + out1[4] = inp1[7]*lutt6[6]; + out1[4] += inp1[6]*lutt6[7]; + out1[4] += inp2[1]*lutt6[4]; + out1[4] += inp2[0]*lutt6[5]; + out1[4] += inp2[3]*lutt6[2]; + out1[4] += inp2[2]*lutt6[3]; + out1[4] += inp2[5]*lutt6[0]; + out1[4] += inp2[4]*lutt6[1]; + + out1[7] = inp1[6]*lutt6[6]; + out1[7] += inp2[1]*lutt6[7]; + out1[7] += inp2[0]*lutt6[4]; + out1[7] += inp2[3]*lutt6[5]; + out1[7] += inp2[2]*lutt6[2]; + out1[7] += inp2[5]*lutt6[3]; + out1[7] += inp2[4]*lutt6[0]; + out1[7] += inp2[7]*lutt6[1]; + + out1[6] = inp2[1]*lutt6[6]; + out1[6] += inp2[0]*lutt6[7]; + out1[6] += inp2[3]*lutt6[4]; + out1[6] += inp2[2]*lutt6[5]; + out1[6] += inp2[5]*lutt6[2]; + out1[6] += inp2[4]*lutt6[3]; + out1[6] += inp2[7]*lutt6[0]; + out1[6] += inp2[6]*lutt6[1]; + outp[1] = /*CLAMP*/((out1[1]+0x4000) >> 0xF); + outp[0] = /*CLAMP*/((out1[0]+0x4000) >> 0xF); + outp[3] = /*CLAMP*/((out1[3]+0x4000) >> 0xF); + outp[2] = /*CLAMP*/((out1[2]+0x4000) >> 0xF); + outp[5] = /*CLAMP*/((out1[5]+0x4000) >> 0xF); + outp[4] = /*CLAMP*/((out1[4]+0x4000) >> 0xF); + outp[7] = /*CLAMP*/((out1[7]+0x4000) >> 0xF); + outp[6] = /*CLAMP*/((out1[6]+0x4000) >> 0xF); + inp1 = inp2; + inp2 += 8; + outp += 8; + } +// memcpy (rsp.RDRAM+(inst2&0xFFFFFF), dmem+0xFB0, 0x20); + memcpy (save, inp2-8, 0x10); + memcpy (BufferSpace+(inst1&0xffff), outbuff, cnt); +} + +static void SEGMENT2 (u32 inst1, u32 inst2) { + if (isZeldaABI) { + FILTER2 (inst1, inst2); + return; + } + if ((inst1 & 0xffffff) == 0) { + isMKABI = true; + //SEGMENTS[(inst2>>24)&0xf] = (inst2 & 0xffffff); + } else { + isMKABI = false; + isZeldaABI = true; + FILTER2 (inst1, inst2); + } +} + +static void UNKNOWN (u32 inst1, u32 inst2) { +} +/* +void (*ABI2[0x20])(void) = { + SPNOOP, ADPCM2, CLEARBUFF2, SPNOOP, SPNOOP, RESAMPLE2, SPNOOP, SEGMENT2, + SETBUFF2, SPNOOP, DMEMMOVE2, LOADADPCM2, MIXER2, INTERLEAVE2, HILOGAIN, SETLOOP2, + SPNOOP, INTERL2, ENVSETUP1, ENVMIXER2, LOADBUFF2, SAVEBUFF2, ENVSETUP2, SPNOOP, + SPNOOP, SPNOOP, SPNOOP, SPNOOP, SPNOOP, SPNOOP, SPNOOP, SPNOOP +};*/ + +extern "C" const acmd_callback_t ABI2[0x20] = { + SPNOOP , ADPCM2, CLEARBUFF2, UNKNOWN, ADDMIXER, RESAMPLE2, UNKNOWN, SEGMENT2, + SETBUFF2 , DUPLICATE2, DMEMMOVE2, LOADADPCM2, MIXER2, INTERLEAVE2, HILOGAIN, SETLOOP2, + SPNOOP, INTERL2 , ENVSETUP1, ENVMIXER2, LOADBUFF2, SAVEBUFF2, ENVSETUP2, SPNOOP, + HILOGAIN , SPNOOP, DUPLICATE2 , UNKNOWN , SPNOOP , SPNOOP , SPNOOP , SPNOOP +}; +/* +void (*ABI2[0x20])(void) = { + SPNOOP , ADPCM2, CLEARBUFF2, SPNOOP, SPNOOP, RESAMPLE2 , SPNOOP , SEGMENT2, + SETBUFF2 , DUPLICATE2, DMEMMOVE2, LOADADPCM2, MIXER2, INTERLEAVE2, SPNOOP, SETLOOP2, + SPNOOP, INTERL2 , ENVSETUP1, ENVMIXER2, LOADBUFF2, SAVEBUFF2, ENVSETUP2, SPNOOP, + SPNOOP , SPNOOP, SPNOOP , SPNOOP , SPNOOP , SPNOOP , SPNOOP , SPNOOP +};*/ +/* NOTES: + + FILTER/SEGMENT - Still needs to be finished up... add FILTER? + UNKNOWWN #27 - Is this worth doing? Looks like a pain in the ass just for WaveRace64 +*/ + diff --git a/source/mupen64plus-rsp-hle/src/ucode3.cpp b/source/mupen64plus-rsp-hle/src/ucode3.cpp new file mode 100644 index 0000000..b63fbe2 --- /dev/null +++ b/source/mupen64plus-rsp-hle/src/ucode3.cpp @@ -0,0 +1,834 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * Mupen64plus-rsp-hle - ucode3.cpp * + * Mupen64Plus homepage: http://code.google.com/p/mupen64plus/ * + * Copyright (C) 2009 Richard Goedeken * + * Copyright (C) 2002 Hacktarux * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +# include +# include + +extern "C" { + #include "m64p_types.h" + #include "hle.h" + #include "alist_internal.h" +} + +/* +static void SPNOOP (u32 inst1, u32 inst2) { + DebugMessage(M64MSG_ERROR, "Unknown/Unimplemented Audio Command %i in ABI 3", (int)(inst1 >> 24)); +} +*/ + +extern const u16 ResampleLUT [0x200]; + +extern u32 loopval; + +extern s16 Env_Dry; +extern s16 Env_Wet; +extern s16 Vol_Left; +extern s16 Vol_Right; +extern s16 VolTrg_Left; +extern s32 VolRamp_Left; +//extern u16 VolRate_Left; +extern s16 VolTrg_Right; +extern s32 VolRamp_Right; +//extern u16 VolRate_Right; + + +extern short hleMixerWorkArea[256]; +extern u16 adpcmtable[0x88]; + +extern u8 BufferSpace[0x10000]; + +/* +static void SETVOL3 (u32 inst1, u32 inst2) { // Swapped Rate_Left and Vol + u8 Flags = (u8)(inst1 >> 0x10); + if (Flags & 0x4) { // 288 + if (Flags & 0x2) { // 290 + VolTrg_Left = *(s16*)&inst1; + VolRamp_Left = *(s32*)&inst2; + } else { + VolTrg_Right = *(s16*)&inst1; + VolRamp_Right = *(s32*)&inst2; + } + } else { + Vol_Left = *(s16*)&inst1; + Env_Dry = (s16)(*(s32*)&inst2 >> 0x10); + Env_Wet = *(s16*)&inst2; + } +} +*/ +static void SETVOL3 (u32 inst1, u32 inst2) { + u8 Flags = (u8)(inst1 >> 0x10); + if (Flags & 0x4) { // 288 + if (Flags & 0x2) { // 290 + Vol_Left = (s16)inst1; // 0x50 + Env_Dry = (s16)(inst2 >> 0x10); // 0x4E + Env_Wet = (s16)inst2; // 0x4C + } else { + VolTrg_Right = (s16)inst1; // 0x46 + //VolRamp_Right = (u16)(inst2 >> 0x10) | (s32)(s16)(inst2 << 0x10); + VolRamp_Right = (s32)inst2; // 0x48/0x4A + } + } else { + VolTrg_Left = (s16)inst1; // 0x40 + VolRamp_Left = (s32)inst2; // 0x42/0x44 + } +} + +static void ENVMIXER3 (u32 inst1, u32 inst2) { + u8 flags = (u8)((inst1 >> 16) & 0xff); + u32 addy = (inst2 & 0xFFFFFF); + + short *inp=(short *)(BufferSpace+0x4F0); + short *out=(short *)(BufferSpace+0x9D0); + short *aux1=(short *)(BufferSpace+0xB40); + short *aux2=(short *)(BufferSpace+0xCB0); + short *aux3=(short *)(BufferSpace+0xE20); + s32 MainR; + s32 MainL; + s32 AuxR; + s32 AuxL; + int i1,o1,a1,a2,a3; + //unsigned short AuxIncRate=1; + short zero[8]; + memset(zero,0,16); + + s32 LAdder, LAcc, LVol; + s32 RAdder, RAcc, RVol; + s16 RSig, LSig; // Most significant part of the Ramp Value + s16 Wet, Dry; + s16 LTrg, RTrg; + + Vol_Right = (s16)inst1; + + if (flags & A_INIT) { + LAdder = VolRamp_Left / 8; + LAcc = 0; + LVol = Vol_Left; + LSig = (s16)(VolRamp_Left >> 16); + + RAdder = VolRamp_Right / 8; + RAcc = 0; + RVol = Vol_Right; + RSig = (s16)(VolRamp_Right >> 16); + + Wet = (s16)Env_Wet; Dry = (s16)Env_Dry; // Save Wet/Dry values + LTrg = VolTrg_Left; RTrg = VolTrg_Right; // Save Current Left/Right Targets + } else { + memcpy((u8 *)hleMixerWorkArea, rsp.RDRAM+addy, 80); + Wet = *(s16 *)(hleMixerWorkArea + 0); // 0-1 + Dry = *(s16 *)(hleMixerWorkArea + 2); // 2-3 + LTrg = *(s16 *)(hleMixerWorkArea + 4); // 4-5 + RTrg = *(s16 *)(hleMixerWorkArea + 6); // 6-7 + LAdder = *(s32 *)(hleMixerWorkArea + 8); // 8-9 (hleMixerWorkArea is a 16bit pointer) + RAdder = *(s32 *)(hleMixerWorkArea + 10); // 10-11 + LAcc = *(s32 *)(hleMixerWorkArea + 12); // 12-13 + RAcc = *(s32 *)(hleMixerWorkArea + 14); // 14-15 + LVol = *(s32 *)(hleMixerWorkArea + 16); // 16-17 + RVol = *(s32 *)(hleMixerWorkArea + 18); // 18-19 + LSig = *(s16 *)(hleMixerWorkArea + 20); // 20-21 + RSig = *(s16 *)(hleMixerWorkArea + 22); // 22-23 + //u32 test = *(s32 *)(hleMixerWorkArea + 24); // 22-23 + //if (test != 0x13371337) + } + + + //if(!(flags&A_AUX)) { + // AuxIncRate=0; + // aux2=aux3=zero; + //} + + for (int y = 0; y < (0x170/2); y++) { + + // Left + LAcc += LAdder; + LVol += (LAcc >> 16); + LAcc &= 0xFFFF; + + // Right + RAcc += RAdder; + RVol += (RAcc >> 16); + RAcc &= 0xFFFF; +// **************************************************************** + // Clamp Left + if (LSig >= 0) { // VLT + if (LVol > LTrg) { + LVol = LTrg; + } + } else { // VGE + if (LVol < LTrg) { + LVol = LTrg; + } + } + + // Clamp Right + if (RSig >= 0) { // VLT + if (RVol > RTrg) { + RVol = RTrg; + } + } else { // VGE + if (RVol < RTrg) { + RVol = RTrg; + } + } +// **************************************************************** + MainL = ((Dry * LVol) + 0x4000) >> 15; + MainR = ((Dry * RVol) + 0x4000) >> 15; + + o1 = out [y^S]; + a1 = aux1[y^S]; + i1 = inp [y^S]; + + o1+=((i1*MainL)+0x4000)>>15; + a1+=((i1*MainR)+0x4000)>>15; + +// **************************************************************** + + if(o1>32767) o1=32767; + else if(o1<-32768) o1=-32768; + + if(a1>32767) a1=32767; + else if(a1<-32768) a1=-32768; + +// **************************************************************** + + out[y^S]=o1; + aux1[y^S]=a1; + +// **************************************************************** + //if (!(flags&A_AUX)) { + a2 = aux2[y^S]; + a3 = aux3[y^S]; + + AuxL = ((Wet * LVol) + 0x4000) >> 15; + AuxR = ((Wet * RVol) + 0x4000) >> 15; + + a2+=((i1*AuxL)+0x4000)>>15; + a3+=((i1*AuxR)+0x4000)>>15; + + if(a2>32767) a2=32767; + else if(a2<-32768) a2=-32768; + + if(a3>32767) a3=32767; + else if(a3<-32768) a3=-32768; + + aux2[y^S]=a2; + aux3[y^S]=a3; + } + //} + + *(s16 *)(hleMixerWorkArea + 0) = Wet; // 0-1 + *(s16 *)(hleMixerWorkArea + 2) = Dry; // 2-3 + *(s16 *)(hleMixerWorkArea + 4) = LTrg; // 4-5 + *(s16 *)(hleMixerWorkArea + 6) = RTrg; // 6-7 + *(s32 *)(hleMixerWorkArea + 8) = LAdder; // 8-9 (hleMixerWorkArea is a 16bit pointer) + *(s32 *)(hleMixerWorkArea + 10) = RAdder; // 10-11 + *(s32 *)(hleMixerWorkArea + 12) = LAcc; // 12-13 + *(s32 *)(hleMixerWorkArea + 14) = RAcc; // 14-15 + *(s32 *)(hleMixerWorkArea + 16) = LVol; // 16-17 + *(s32 *)(hleMixerWorkArea + 18) = RVol; // 18-19 + *(s16 *)(hleMixerWorkArea + 20) = LSig; // 20-21 + *(s16 *)(hleMixerWorkArea + 22) = RSig; // 22-23 + //*(u32 *)(hleMixerWorkArea + 24) = 0x13371337; // 22-23 + memcpy(rsp.RDRAM+addy, (u8 *)hleMixerWorkArea,80); +} + +static void CLEARBUFF3 (u32 inst1, u32 inst2) { + u16 addr = (u16)(inst1 & 0xffff); + u16 count = (u16)(inst2 & 0xffff); + memset(BufferSpace+addr+0x4f0, 0, count); +} + +static void MIXER3 (u32 inst1, u32 inst2) { // Needs accuracy verification... + u16 dmemin = (u16)(inst2 >> 0x10) + 0x4f0; + u16 dmemout = (u16)(inst2 & 0xFFFF) + 0x4f0; + //u8 flags = (u8)((inst1 >> 16) & 0xff); + s32 gain = (s16)(inst1 & 0xFFFF); + s32 temp; + + for (int x=0; x < 0x170; x+=2) { // I think I can do this a lot easier + temp = (*(s16 *)(BufferSpace+dmemin+x) * gain) >> 15; + temp += *(s16 *)(BufferSpace+dmemout+x); + + if ((s32)temp > 32767) + temp = 32767; + if ((s32)temp < -32768) + temp = -32768; + + *(u16 *)(BufferSpace+dmemout+x) = (u16)(temp & 0xFFFF); + } +} + +static void LOADBUFF3 (u32 inst1, u32 inst2) { + u32 v0; + u32 cnt = (((inst1 >> 0xC)+3)&0xFFC); + v0 = (inst2 & 0xfffffc); + u32 src = (inst1&0xffc)+0x4f0; + memcpy (BufferSpace+src, rsp.RDRAM+v0, cnt); +} + +static void SAVEBUFF3 (u32 inst1, u32 inst2) { + u32 v0; + u32 cnt = (((inst1 >> 0xC)+3)&0xFFC); + v0 = (inst2 & 0xfffffc); + u32 src = (inst1&0xffc)+0x4f0; + memcpy (rsp.RDRAM+v0, BufferSpace+src, cnt); +} + +static void LOADADPCM3 (u32 inst1, u32 inst2) { // Loads an ADPCM table - Works 100% Now 03-13-01 + u32 v0; + v0 = (inst2 & 0xffffff); + //memcpy (dmem+0x3f0, rsp.RDRAM+v0, inst1&0xffff); + //assert ((inst1&0xffff) <= 0x80); + u16 *table = (u16 *)(rsp.RDRAM+v0); + for (u32 x = 0; x < ((inst1&0xffff)>>0x4); x++) { + adpcmtable[(0x0+(x<<3))^S] = table[0]; + adpcmtable[(0x1+(x<<3))^S] = table[1]; + + adpcmtable[(0x2+(x<<3))^S] = table[2]; + adpcmtable[(0x3+(x<<3))^S] = table[3]; + + adpcmtable[(0x4+(x<<3))^S] = table[4]; + adpcmtable[(0x5+(x<<3))^S] = table[5]; + + adpcmtable[(0x6+(x<<3))^S] = table[6]; + adpcmtable[(0x7+(x<<3))^S] = table[7]; + table += 8; + } +} + +static void DMEMMOVE3 (u32 inst1, u32 inst2) { // Needs accuracy verification... + u32 v0, v1; + u32 cnt; + v0 = (inst1 & 0xFFFF) + 0x4f0; + v1 = (inst2 >> 0x10) + 0x4f0; + u32 count = ((inst2+3) & 0xfffc); + + //memcpy (dmem+v1, dmem+v0, count-1); + for (cnt = 0; cnt < count; cnt++) { + *(u8 *)(BufferSpace+((cnt+v1)^S8)) = *(u8 *)(BufferSpace+((cnt+v0)^S8)); + } +} + +static void SETLOOP3 (u32 inst1, u32 inst2) { + loopval = (inst2 & 0xffffff); +} + +static void ADPCM3 (u32 inst1, u32 inst2) { // Verified to be 100% Accurate... + unsigned char Flags=(u8)(inst2>>0x1c)&0xff; + //unsigned short Gain=(u16)(inst1&0xffff); + unsigned int Address=(inst1 & 0xffffff);// + SEGMENTS[(inst2>>24)&0xf]; + unsigned short inPtr=(inst2>>12)&0xf; + //short *out=(s16 *)(testbuff+(AudioOutBuffer>>2)); + short *out=(short *)(BufferSpace+(inst2&0xfff)+0x4f0); + //unsigned char *in=(unsigned char *)(BufferSpace+((inst2>>12)&0xf)+0x4f0); + short count=(short)((inst2 >> 16)&0xfff); + unsigned char icode; + unsigned char code; + int vscale; + unsigned short index; + unsigned short j; + int a[8]; + short *book1,*book2; + + memset(out,0,32); + + if(!(Flags&0x1)) + { + if(Flags&0x2) + {/* + for(int i=0;i<16;i++) + { + out[i]=*(short *)&rsp.RDRAM[(loopval+i*2)^2]; + }*/ + memcpy(out,&rsp.RDRAM[loopval],32); + } + else + {/* + for(int i=0;i<16;i++) + { + out[i]=*(short *)&rsp.RDRAM[(Address+i*2)^2]; + }*/ + memcpy(out,&rsp.RDRAM[Address],32); + } + } + + int l1=out[14^S]; + int l2=out[15^S]; + int inp1[8]; + int inp2[8]; + out+=16; + while(count>0) + { + // the first interation through, these values are + // either 0 in the case of A_INIT, from a special + // area of memory in the case of A_LOOP or just + // the values we calculated the last time + + code=BufferSpace[(0x4f0+inPtr)^S8]; + index=code&0xf; + index<<=4; // index into the adpcm code table + book1=(short *)&adpcmtable[index]; + book2=book1+8; + code>>=4; // upper nibble is scale + vscale=(0x8000>>((12-code)-1)); // very strange. 0x8000 would be .5 in 16:16 format + // so this appears to be a fractional scale based + // on the 12 based inverse of the scale value. note + // that this could be negative, in which case we do + // not use the calculated vscale value... see the + // if(code>12) check below + + inPtr++; // coded adpcm data lies next + j=0; + while(j<8) // loop of 8, for 8 coded nibbles from 4 bytes + // which yields 8 short pcm values + { + icode=BufferSpace[(0x4f0+inPtr)^S8]; + inPtr++; + + inp1[j]=(s16)((icode&0xf0)<<8); // this will in effect be signed + if(code<12) + inp1[j]=((int)((int)inp1[j]*(int)vscale)>>16); + /*else + int catchme=1;*/ + j++; + + inp1[j]=(s16)((icode&0xf)<<12); + if(code<12) + inp1[j]=((int)((int)inp1[j]*(int)vscale)>>16); + /*else + int catchme=1;*/ + j++; + } + j=0; + while(j<8) + { + icode=BufferSpace[(0x4f0+inPtr)^S8]; + inPtr++; + + inp2[j]=(short)((icode&0xf0)<<8); // this will in effect be signed + if(code<12) + inp2[j]=((int)((int)inp2[j]*(int)vscale)>>16); + /*else + int catchme=1;*/ + j++; + + inp2[j]=(short)((icode&0xf)<<12); + if(code<12) + inp2[j]=((int)((int)inp2[j]*(int)vscale)>>16); + /*else + int catchme=1;*/ + j++; + } + + a[0]= (int)book1[0]*(int)l1; + a[0]+=(int)book2[0]*(int)l2; + a[0]+=(int)inp1[0]*(int)2048; + + a[1] =(int)book1[1]*(int)l1; + a[1]+=(int)book2[1]*(int)l2; + a[1]+=(int)book2[0]*inp1[0]; + a[1]+=(int)inp1[1]*(int)2048; + + a[2] =(int)book1[2]*(int)l1; + a[2]+=(int)book2[2]*(int)l2; + a[2]+=(int)book2[1]*inp1[0]; + a[2]+=(int)book2[0]*inp1[1]; + a[2]+=(int)inp1[2]*(int)2048; + + a[3] =(int)book1[3]*(int)l1; + a[3]+=(int)book2[3]*(int)l2; + a[3]+=(int)book2[2]*inp1[0]; + a[3]+=(int)book2[1]*inp1[1]; + a[3]+=(int)book2[0]*inp1[2]; + a[3]+=(int)inp1[3]*(int)2048; + + a[4] =(int)book1[4]*(int)l1; + a[4]+=(int)book2[4]*(int)l2; + a[4]+=(int)book2[3]*inp1[0]; + a[4]+=(int)book2[2]*inp1[1]; + a[4]+=(int)book2[1]*inp1[2]; + a[4]+=(int)book2[0]*inp1[3]; + a[4]+=(int)inp1[4]*(int)2048; + + a[5] =(int)book1[5]*(int)l1; + a[5]+=(int)book2[5]*(int)l2; + a[5]+=(int)book2[4]*inp1[0]; + a[5]+=(int)book2[3]*inp1[1]; + a[5]+=(int)book2[2]*inp1[2]; + a[5]+=(int)book2[1]*inp1[3]; + a[5]+=(int)book2[0]*inp1[4]; + a[5]+=(int)inp1[5]*(int)2048; + + a[6] =(int)book1[6]*(int)l1; + a[6]+=(int)book2[6]*(int)l2; + a[6]+=(int)book2[5]*inp1[0]; + a[6]+=(int)book2[4]*inp1[1]; + a[6]+=(int)book2[3]*inp1[2]; + a[6]+=(int)book2[2]*inp1[3]; + a[6]+=(int)book2[1]*inp1[4]; + a[6]+=(int)book2[0]*inp1[5]; + a[6]+=(int)inp1[6]*(int)2048; + + a[7] =(int)book1[7]*(int)l1; + a[7]+=(int)book2[7]*(int)l2; + a[7]+=(int)book2[6]*inp1[0]; + a[7]+=(int)book2[5]*inp1[1]; + a[7]+=(int)book2[4]*inp1[2]; + a[7]+=(int)book2[3]*inp1[3]; + a[7]+=(int)book2[2]*inp1[4]; + a[7]+=(int)book2[1]*inp1[5]; + a[7]+=(int)book2[0]*inp1[6]; + a[7]+=(int)inp1[7]*(int)2048; + + for(j=0;j<8;j++) + { + a[j^S]>>=11; + if(a[j^S]>32767) a[j^S]=32767; + else if(a[j^S]<-32768) a[j^S]=-32768; + *(out++)=a[j^S]; + //*(out+j)=a[j^S]; + } + //out += 0x10; + l1=a[6]; + l2=a[7]; + + a[0]= (int)book1[0]*(int)l1; + a[0]+=(int)book2[0]*(int)l2; + a[0]+=(int)inp2[0]*(int)2048; + + a[1] =(int)book1[1]*(int)l1; + a[1]+=(int)book2[1]*(int)l2; + a[1]+=(int)book2[0]*inp2[0]; + a[1]+=(int)inp2[1]*(int)2048; + + a[2] =(int)book1[2]*(int)l1; + a[2]+=(int)book2[2]*(int)l2; + a[2]+=(int)book2[1]*inp2[0]; + a[2]+=(int)book2[0]*inp2[1]; + a[2]+=(int)inp2[2]*(int)2048; + + a[3] =(int)book1[3]*(int)l1; + a[3]+=(int)book2[3]*(int)l2; + a[3]+=(int)book2[2]*inp2[0]; + a[3]+=(int)book2[1]*inp2[1]; + a[3]+=(int)book2[0]*inp2[2]; + a[3]+=(int)inp2[3]*(int)2048; + + a[4] =(int)book1[4]*(int)l1; + a[4]+=(int)book2[4]*(int)l2; + a[4]+=(int)book2[3]*inp2[0]; + a[4]+=(int)book2[2]*inp2[1]; + a[4]+=(int)book2[1]*inp2[2]; + a[4]+=(int)book2[0]*inp2[3]; + a[4]+=(int)inp2[4]*(int)2048; + + a[5] =(int)book1[5]*(int)l1; + a[5]+=(int)book2[5]*(int)l2; + a[5]+=(int)book2[4]*inp2[0]; + a[5]+=(int)book2[3]*inp2[1]; + a[5]+=(int)book2[2]*inp2[2]; + a[5]+=(int)book2[1]*inp2[3]; + a[5]+=(int)book2[0]*inp2[4]; + a[5]+=(int)inp2[5]*(int)2048; + + a[6] =(int)book1[6]*(int)l1; + a[6]+=(int)book2[6]*(int)l2; + a[6]+=(int)book2[5]*inp2[0]; + a[6]+=(int)book2[4]*inp2[1]; + a[6]+=(int)book2[3]*inp2[2]; + a[6]+=(int)book2[2]*inp2[3]; + a[6]+=(int)book2[1]*inp2[4]; + a[6]+=(int)book2[0]*inp2[5]; + a[6]+=(int)inp2[6]*(int)2048; + + a[7] =(int)book1[7]*(int)l1; + a[7]+=(int)book2[7]*(int)l2; + a[7]+=(int)book2[6]*inp2[0]; + a[7]+=(int)book2[5]*inp2[1]; + a[7]+=(int)book2[4]*inp2[2]; + a[7]+=(int)book2[3]*inp2[3]; + a[7]+=(int)book2[2]*inp2[4]; + a[7]+=(int)book2[1]*inp2[5]; + a[7]+=(int)book2[0]*inp2[6]; + a[7]+=(int)inp2[7]*(int)2048; + + for(j=0;j<8;j++) + { + a[j^S]>>=11; + if(a[j^S]>32767) a[j^S]=32767; + else if(a[j^S]<-32768) a[j^S]=-32768; + *(out++)=a[j^S]; + //*(out+j+0x1f8)=a[j^S]; + } + l1=a[6]; + l2=a[7]; + + count-=32; + } + out-=16; + memcpy(&rsp.RDRAM[Address],out,32); +} + +static void RESAMPLE3 (u32 inst1, u32 inst2) { + unsigned char Flags=(u8)((inst2>>0x1e)); + unsigned int Pitch=((inst2>>0xe)&0xffff)<<1; + u32 addy = (inst1 & 0xffffff); + unsigned int Accum=0; + unsigned int location; + s16 *lut; + short *dst; + s16 *src; + dst=(short *)(BufferSpace); + src=(s16 *)(BufferSpace); + u32 srcPtr=((((inst2>>2)&0xfff)+0x4f0)/2); + u32 dstPtr;//=(AudioOutBuffer/2); + s32 temp; + s32 accum; + + //if (addy > (1024*1024*8)) + // addy = (inst2 & 0xffffff); + + srcPtr -= 4; + + if (inst2 & 0x3) { + dstPtr = 0x660/2; + } else { + dstPtr = 0x4f0/2; + } + + if ((Flags & 0x1) == 0) { + for (int x=0; x < 4; x++) //memcpy (src+srcPtr, rsp.RDRAM+addy, 0x8); + src[(srcPtr+x)^S] = ((u16 *)rsp.RDRAM)[((addy/2)+x)^S]; + Accum = *(u16 *)(rsp.RDRAM+addy+10); + } else { + for (int x=0; x < 4; x++) + src[(srcPtr+x)^S] = 0;//*(u16 *)(rsp.RDRAM+((addy+x)^2)); + } + + for(int i=0;i < 0x170/2;i++) { + location = (((Accum * 0x40) >> 0x10) * 8); + //location = (Accum >> 0xa) << 0x3; + lut = (s16 *)(((u8 *)ResampleLUT) + location); + + temp = ((s32)*(s16*)(src+((srcPtr+0)^S))*((s32)((s16)lut[0]))); + accum = (s32)(temp >> 15); + + temp = ((s32)*(s16*)(src+((srcPtr+1)^S))*((s32)((s16)lut[1]))); + accum += (s32)(temp >> 15); + + temp = ((s32)*(s16*)(src+((srcPtr+2)^S))*((s32)((s16)lut[2]))); + accum += (s32)(temp >> 15); + + temp = ((s32)*(s16*)(src+((srcPtr+3)^S))*((s32)((s16)lut[3]))); + accum += (s32)(temp >> 15); +/* temp = ((s64)*(s16*)(src+((srcPtr+0)^S))*((s64)((s16)lut[0]<<1))); + if (temp & 0x8000) temp = (temp^0x8000) + 0x10000; + else temp = (temp^0x8000); + temp = (s32)(temp >> 16); + if ((s32)temp > 32767) temp = 32767; + if ((s32)temp < -32768) temp = -32768; + accum = (s32)(s16)temp; + + temp = ((s64)*(s16*)(src+((srcPtr+1)^S))*((s64)((s16)lut[1]<<1))); + if (temp & 0x8000) temp = (temp^0x8000) + 0x10000; + else temp = (temp^0x8000); + temp = (s32)(temp >> 16); + if ((s32)temp > 32767) temp = 32767; + if ((s32)temp < -32768) temp = -32768; + accum += (s32)(s16)temp; + + temp = ((s64)*(s16*)(src+((srcPtr+2)^S))*((s64)((s16)lut[2]<<1))); + if (temp & 0x8000) temp = (temp^0x8000) + 0x10000; + else temp = (temp^0x8000); + temp = (s32)(temp >> 16); + if ((s32)temp > 32767) temp = 32767; + if ((s32)temp < -32768) temp = -32768; + accum += (s32)(s16)temp; + + temp = ((s64)*(s16*)(src+((srcPtr+3)^S))*((s64)((s16)lut[3]<<1))); + if (temp & 0x8000) temp = (temp^0x8000) + 0x10000; + else temp = (temp^0x8000); + temp = (s32)(temp >> 16); + if ((s32)temp > 32767) temp = 32767; + if ((s32)temp < -32768) temp = -32768; + accum += (s32)(s16)temp;*/ + + if (accum > 32767) accum = 32767; + if (accum < -32768) accum = -32768; + + dst[dstPtr^S] = (accum); + dstPtr++; + Accum += Pitch; + srcPtr += (Accum>>16); + Accum&=0xffff; + } + for (int x=0; x < 4; x++) + ((u16 *)rsp.RDRAM)[((addy/2)+x)^S] = src[(srcPtr+x)^S]; + *(u16 *)(rsp.RDRAM+addy+10) = Accum; +} + +static void INTERLEAVE3 (u32 inst1, u32 inst2) { // Needs accuracy verification... + //u32 inL, inR; + u16 *outbuff = (u16 *)(BufferSpace + 0x4f0);//(u16 *)(AudioOutBuffer+dmem); + u16 *inSrcR; + u16 *inSrcL; + u16 Left, Right, Left2, Right2; + + //inR = inst2 & 0xFFFF; + //inL = (inst2 >> 16) & 0xFFFF; + + inSrcR = (u16 *)(BufferSpace+0xb40); + inSrcL = (u16 *)(BufferSpace+0x9d0); + + for (int x = 0; x < (0x170/4); x++) { + Left=*(inSrcL++); + Right=*(inSrcR++); + Left2=*(inSrcL++); + Right2=*(inSrcR++); + +#ifdef M64P_BIG_ENDIAN + *(outbuff++)=Right; + *(outbuff++)=Left; + *(outbuff++)=Right2; + *(outbuff++)=Left2; +#else + *(outbuff++)=Right2; + *(outbuff++)=Left2; + *(outbuff++)=Right; + *(outbuff++)=Left; +#endif +/* + Left=*(inSrcL++); + Right=*(inSrcR++); + *(outbuff++)=(u16)Left; + Left >>= 16; + *(outbuff++)=(u16)Right; + Right >>= 16; + *(outbuff++)=(u16)Left; + *(outbuff++)=(u16)Right;*/ + } +} + +//static void UNKNOWN (u32 inst1, u32 inst2); +/* +typedef struct { + unsigned char sync; + + unsigned char error_protection : 1; // 0=yes, 1=no + unsigned char lay : 2; // 4-lay = layerI, II or III + unsigned char version : 1; // 3=mpeg 1.0, 2=mpeg 2.5 0=mpeg 2.0 + unsigned char sync2 : 4; + + unsigned char extension : 1; // Unknown + unsigned char padding : 1; // padding + unsigned char sampling_freq : 2; // see table below + unsigned char bitrate_index : 4; // see table below + + unsigned char emphasis : 2; //see table below + unsigned char original : 1; // 0=no 1=yes + unsigned char copyright : 1; // 0=no 1=yes + unsigned char mode_ext : 2; // used with "joint stereo" mode + unsigned char mode : 2; // Channel Mode +} mp3struct; + +mp3struct mp3; +FILE *mp3dat; +*/ + +static void WHATISTHIS (u32 inst1, u32 inst2) { +} + +//static FILE *fp = fopen ("d:\\mp3info.txt", "wt"); +u32 setaddr; +static void MP3ADDY (u32 inst1, u32 inst2) { + setaddr = (inst2 & 0xffffff); +} + +extern "C" { + void rsp_run(void); + void mp3setup (unsigned int inst1, unsigned int inst2, unsigned int t8); +} + +extern u32 base, dmembase; +extern "C" { + extern char *pDMEM; +} +void MP3 (u32 inst1, u32 inst2); +/* + { +// return; + // Setup Registers... + mp3setup (inst1, inst2, 0xFA0); + + // Setup Memory Locations... + //u32 base = ((u32*)dmem)[0xFD0/4]; // Should be 000291A0 + memcpy (BufferSpace, dmembase+rsp.RDRAM, 0x10); + ((u32*)BufferSpace)[0x0] = base; + ((u32*)BufferSpace)[0x008/4] += base; + ((u32*)BufferSpace)[0xFFC/4] = loopval; + ((u32*)BufferSpace)[0xFF8/4] = dmembase; + + memcpy (imem+0x238, rsp.RDRAM+((u32*)BufferSpace)[0x008/4], 0x9C0); + ((u32*)BufferSpace)[0xFF4/4] = setaddr; + pDMEM = (char *)BufferSpace; + rsp_run (void); + dmembase = ((u32*)BufferSpace)[0xFF8/4]; + loopval = ((u32*)BufferSpace)[0xFFC/4]; +//0x1A98 SW S1, 0x0FF4 (R0) +//0x1A9C SW S0, 0x0FF8 (R0) +//0x1AA0 SW T7, 0x0FFC (R0) +//0x1AA4 SW T3, 0x0FF0 (R0) + //fprintf (fp, "mp3: inst1: %08X, inst2: %08X\n", inst1, inst2); +}*/ +/* +FFT = Fast Fourier Transform +DCT = Discrete Cosine Transform +MPEG-1 Layer 3 retains Layer 2's 1152-sample window, as well as the FFT polyphase filter for +backward compatibility, but adds a modified DCT filter. DCT's advantages over DFTs (discrete +Fourier transforms) include half as many multiply-accumulate operations and half the +generated coefficients because the sinusoidal portion of the calculation is absent, and DCT +generally involves simpler math. The finite lengths of a conventional DCTs' bandpass impulse +responses, however, may result in block-boundary effects. MDCTs overlap the analysis blocks +and lowpass-filter the decoded audio to remove aliases, eliminating these effects. MDCTs also +have a higher transform coding gain than the standard DCT, and their basic functions +correspond to better bandpass response. + +MPEG-1 Layer 3's DCT sub-bands are unequally sized, and correspond to the human auditory +system's critical bands. In Layer 3 decoders must support both constant- and variable-bit-rate +bit streams. (However, many Layer 1 and 2 decoders also handle variable bit rates). Finally, +Layer 3 encoders Huffman-code the quantized coefficients before archiving or transmission for +additional lossless compression. Bit streams range from 32 to 320 kbps, and 128-kbps rates +achieve near-CD quality, an important specification to enable dual-channel ISDN +(integrated-services-digital-network) to be the future high-bandwidth pipe to the home. + +*/ +static void DISABLE (u32 inst1, u32 inst2) { + //MessageBox (NULL, "Help", "ABI 3 Command 0", MB_OK); + //ChangeABI (5); +} + + +extern "C" const acmd_callback_t ABI3[0x10] = { + DISABLE , ADPCM3 , CLEARBUFF3, ENVMIXER3 , LOADBUFF3, RESAMPLE3 , SAVEBUFF3, MP3, + MP3ADDY, SETVOL3, DMEMMOVE3 , LOADADPCM3 , MIXER3 , INTERLEAVE3, WHATISTHIS , SETLOOP3 +}; + + diff --git a/source/mupen64plus-rsp-hle/src/ucode3mp3.cpp b/source/mupen64plus-rsp-hle/src/ucode3mp3.cpp new file mode 100644 index 0000000..21caa33 --- /dev/null +++ b/source/mupen64plus-rsp-hle/src/ucode3mp3.cpp @@ -0,0 +1,604 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * Mupen64plus-rsp-hle - ucode3mp3.h * + * Mupen64Plus homepage: http://code.google.com/p/mupen64plus/ * + * Copyright (C) 2009 Richard Goedeken * + * Copyright (C) 2002 Hacktarux * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +# include +# include + +extern "C" { + #include "hle.h" + #include "alist_internal.h" +} + +static const u16 DeWindowLUT [0x420] = { + 0x0000, 0xFFF3, 0x005D, 0xFF38, 0x037A, 0xF736, 0x0B37, 0xC00E, + 0x7FFF, 0x3FF2, 0x0B37, 0x08CA, 0x037A, 0x00C8, 0x005D, 0x000D, + 0x0000, 0xFFF3, 0x005D, 0xFF38, 0x037A, 0xF736, 0x0B37, 0xC00E, + 0x7FFF, 0x3FF2, 0x0B37, 0x08CA, 0x037A, 0x00C8, 0x005D, 0x000D, + 0x0000, 0xFFF2, 0x005F, 0xFF1D, 0x0369, 0xF697, 0x0A2A, 0xBCE7, + 0x7FEB, 0x3CCB, 0x0C2B, 0x082B, 0x0385, 0x00AF, 0x005B, 0x000B, + 0x0000, 0xFFF2, 0x005F, 0xFF1D, 0x0369, 0xF697, 0x0A2A, 0xBCE7, + 0x7FEB, 0x3CCB, 0x0C2B, 0x082B, 0x0385, 0x00AF, 0x005B, 0x000B, + 0x0000, 0xFFF1, 0x0061, 0xFF02, 0x0354, 0xF5F9, 0x0905, 0xB9C4, + 0x7FB0, 0x39A4, 0x0D08, 0x078C, 0x038C, 0x0098, 0x0058, 0x000A, + 0x0000, 0xFFF1, 0x0061, 0xFF02, 0x0354, 0xF5F9, 0x0905, 0xB9C4, + 0x7FB0, 0x39A4, 0x0D08, 0x078C, 0x038C, 0x0098, 0x0058, 0x000A, + 0x0000, 0xFFEF, 0x0062, 0xFEE6, 0x033B, 0xF55C, 0x07C8, 0xB6A4, + 0x7F4D, 0x367E, 0x0DCE, 0x06EE, 0x038F, 0x0080, 0x0056, 0x0009, + 0x0000, 0xFFEF, 0x0062, 0xFEE6, 0x033B, 0xF55C, 0x07C8, 0xB6A4, + 0x7F4D, 0x367E, 0x0DCE, 0x06EE, 0x038F, 0x0080, 0x0056, 0x0009, + 0x0000, 0xFFEE, 0x0063, 0xFECA, 0x031C, 0xF4C3, 0x0671, 0xB38C, + 0x7EC2, 0x335D, 0x0E7C, 0x0652, 0x038E, 0x006B, 0x0053, 0x0008, + 0x0000, 0xFFEE, 0x0063, 0xFECA, 0x031C, 0xF4C3, 0x0671, 0xB38C, + 0x7EC2, 0x335D, 0x0E7C, 0x0652, 0x038E, 0x006B, 0x0053, 0x0008, + 0x0000, 0xFFEC, 0x0064, 0xFEAC, 0x02F7, 0xF42C, 0x0502, 0xB07C, + 0x7E12, 0x3041, 0x0F14, 0x05B7, 0x038A, 0x0056, 0x0050, 0x0007, + 0x0000, 0xFFEC, 0x0064, 0xFEAC, 0x02F7, 0xF42C, 0x0502, 0xB07C, + 0x7E12, 0x3041, 0x0F14, 0x05B7, 0x038A, 0x0056, 0x0050, 0x0007, + 0x0000, 0xFFEB, 0x0064, 0xFE8E, 0x02CE, 0xF399, 0x037A, 0xAD75, + 0x7D3A, 0x2D2C, 0x0F97, 0x0520, 0x0382, 0x0043, 0x004D, 0x0007, + 0x0000, 0xFFEB, 0x0064, 0xFE8E, 0x02CE, 0xF399, 0x037A, 0xAD75, + 0x7D3A, 0x2D2C, 0x0F97, 0x0520, 0x0382, 0x0043, 0x004D, 0x0007, + 0xFFFF, 0xFFE9, 0x0063, 0xFE6F, 0x029E, 0xF30B, 0x01D8, 0xAA7B, + 0x7C3D, 0x2A1F, 0x1004, 0x048B, 0x0377, 0x0030, 0x004A, 0x0006, + 0xFFFF, 0xFFE9, 0x0063, 0xFE6F, 0x029E, 0xF30B, 0x01D8, 0xAA7B, + 0x7C3D, 0x2A1F, 0x1004, 0x048B, 0x0377, 0x0030, 0x004A, 0x0006, + 0xFFFF, 0xFFE7, 0x0062, 0xFE4F, 0x0269, 0xF282, 0x001F, 0xA78D, + 0x7B1A, 0x271C, 0x105D, 0x03F9, 0x036A, 0x001F, 0x0046, 0x0006, + 0xFFFF, 0xFFE7, 0x0062, 0xFE4F, 0x0269, 0xF282, 0x001F, 0xA78D, + 0x7B1A, 0x271C, 0x105D, 0x03F9, 0x036A, 0x001F, 0x0046, 0x0006, + 0xFFFF, 0xFFE4, 0x0061, 0xFE2F, 0x022F, 0xF1FF, 0xFE4C, 0xA4AF, + 0x79D3, 0x2425, 0x10A2, 0x036C, 0x0359, 0x0010, 0x0043, 0x0005, + 0xFFFF, 0xFFE4, 0x0061, 0xFE2F, 0x022F, 0xF1FF, 0xFE4C, 0xA4AF, + 0x79D3, 0x2425, 0x10A2, 0x036C, 0x0359, 0x0010, 0x0043, 0x0005, + 0xFFFF, 0xFFE2, 0x005E, 0xFE10, 0x01EE, 0xF184, 0xFC61, 0xA1E1, + 0x7869, 0x2139, 0x10D3, 0x02E3, 0x0346, 0x0001, 0x0040, 0x0004, + 0xFFFF, 0xFFE2, 0x005E, 0xFE10, 0x01EE, 0xF184, 0xFC61, 0xA1E1, + 0x7869, 0x2139, 0x10D3, 0x02E3, 0x0346, 0x0001, 0x0040, 0x0004, + 0xFFFF, 0xFFE0, 0x005B, 0xFDF0, 0x01A8, 0xF111, 0xFA5F, 0x9F27, + 0x76DB, 0x1E5C, 0x10F2, 0x025E, 0x0331, 0xFFF3, 0x003D, 0x0004, + 0xFFFF, 0xFFE0, 0x005B, 0xFDF0, 0x01A8, 0xF111, 0xFA5F, 0x9F27, + 0x76DB, 0x1E5C, 0x10F2, 0x025E, 0x0331, 0xFFF3, 0x003D, 0x0004, + 0xFFFF, 0xFFDE, 0x0057, 0xFDD0, 0x015B, 0xF0A7, 0xF845, 0x9C80, + 0x752C, 0x1B8E, 0x1100, 0x01DE, 0x0319, 0xFFE7, 0x003A, 0x0003, + 0xFFFF, 0xFFDE, 0x0057, 0xFDD0, 0x015B, 0xF0A7, 0xF845, 0x9C80, + 0x752C, 0x1B8E, 0x1100, 0x01DE, 0x0319, 0xFFE7, 0x003A, 0x0003, + 0xFFFE, 0xFFDB, 0x0053, 0xFDB0, 0x0108, 0xF046, 0xF613, 0x99EE, + 0x735C, 0x18D1, 0x10FD, 0x0163, 0x0300, 0xFFDC, 0x0037, 0x0003, + 0xFFFE, 0xFFDB, 0x0053, 0xFDB0, 0x0108, 0xF046, 0xF613, 0x99EE, + 0x735C, 0x18D1, 0x10FD, 0x0163, 0x0300, 0xFFDC, 0x0037, 0x0003, + 0xFFFE, 0xFFD8, 0x004D, 0xFD90, 0x00B0, 0xEFF0, 0xF3CC, 0x9775, + 0x716C, 0x1624, 0x10EA, 0x00EE, 0x02E5, 0xFFD2, 0x0033, 0x0003, + 0xFFFE, 0xFFD8, 0x004D, 0xFD90, 0x00B0, 0xEFF0, 0xF3CC, 0x9775, + 0x716C, 0x1624, 0x10EA, 0x00EE, 0x02E5, 0xFFD2, 0x0033, 0x0003, + 0xFFFE, 0xFFD6, 0x0047, 0xFD72, 0x0051, 0xEFA6, 0xF16F, 0x9514, + 0x6F5E, 0x138A, 0x10C8, 0x007E, 0x02CA, 0xFFC9, 0x0030, 0x0003, + 0xFFFE, 0xFFD6, 0x0047, 0xFD72, 0x0051, 0xEFA6, 0xF16F, 0x9514, + 0x6F5E, 0x138A, 0x10C8, 0x007E, 0x02CA, 0xFFC9, 0x0030, 0x0003, + 0xFFFE, 0xFFD3, 0x0040, 0xFD54, 0xFFEC, 0xEF68, 0xEEFC, 0x92CD, + 0x6D33, 0x1104, 0x1098, 0x0014, 0x02AC, 0xFFC0, 0x002D, 0x0002, + 0xFFFE, 0xFFD3, 0x0040, 0xFD54, 0xFFEC, 0xEF68, 0xEEFC, 0x92CD, + 0x6D33, 0x1104, 0x1098, 0x0014, 0x02AC, 0xFFC0, 0x002D, 0x0002, + 0x0030, 0xFFC9, 0x02CA, 0x007E, 0x10C8, 0x138A, 0x6F5E, 0x9514, + 0xF16F, 0xEFA6, 0x0051, 0xFD72, 0x0047, 0xFFD6, 0xFFFE, 0x0003, + 0x0030, 0xFFC9, 0x02CA, 0x007E, 0x10C8, 0x138A, 0x6F5E, 0x9514, + 0xF16F, 0xEFA6, 0x0051, 0xFD72, 0x0047, 0xFFD6, 0xFFFE, 0x0003, + 0x0033, 0xFFD2, 0x02E5, 0x00EE, 0x10EA, 0x1624, 0x716C, 0x9775, + 0xF3CC, 0xEFF0, 0x00B0, 0xFD90, 0x004D, 0xFFD8, 0xFFFE, 0x0003, + 0x0033, 0xFFD2, 0x02E5, 0x00EE, 0x10EA, 0x1624, 0x716C, 0x9775, + 0xF3CC, 0xEFF0, 0x00B0, 0xFD90, 0x004D, 0xFFD8, 0xFFFE, 0x0003, + 0x0037, 0xFFDC, 0x0300, 0x0163, 0x10FD, 0x18D1, 0x735C, 0x99EE, + 0xF613, 0xF046, 0x0108, 0xFDB0, 0x0053, 0xFFDB, 0xFFFE, 0x0003, + 0x0037, 0xFFDC, 0x0300, 0x0163, 0x10FD, 0x18D1, 0x735C, 0x99EE, + 0xF613, 0xF046, 0x0108, 0xFDB0, 0x0053, 0xFFDB, 0xFFFE, 0x0003, + 0x003A, 0xFFE7, 0x0319, 0x01DE, 0x1100, 0x1B8E, 0x752C, 0x9C80, + 0xF845, 0xF0A7, 0x015B, 0xFDD0, 0x0057, 0xFFDE, 0xFFFF, 0x0003, + 0x003A, 0xFFE7, 0x0319, 0x01DE, 0x1100, 0x1B8E, 0x752C, 0x9C80, + 0xF845, 0xF0A7, 0x015B, 0xFDD0, 0x0057, 0xFFDE, 0xFFFF, 0x0004, + 0x003D, 0xFFF3, 0x0331, 0x025E, 0x10F2, 0x1E5C, 0x76DB, 0x9F27, + 0xFA5F, 0xF111, 0x01A8, 0xFDF0, 0x005B, 0xFFE0, 0xFFFF, 0x0004, + 0x003D, 0xFFF3, 0x0331, 0x025E, 0x10F2, 0x1E5C, 0x76DB, 0x9F27, + 0xFA5F, 0xF111, 0x01A8, 0xFDF0, 0x005B, 0xFFE0, 0xFFFF, 0x0004, + 0x0040, 0x0001, 0x0346, 0x02E3, 0x10D3, 0x2139, 0x7869, 0xA1E1, + 0xFC61, 0xF184, 0x01EE, 0xFE10, 0x005E, 0xFFE2, 0xFFFF, 0x0004, + 0x0040, 0x0001, 0x0346, 0x02E3, 0x10D3, 0x2139, 0x7869, 0xA1E1, + 0xFC61, 0xF184, 0x01EE, 0xFE10, 0x005E, 0xFFE2, 0xFFFF, 0x0005, + 0x0043, 0x0010, 0x0359, 0x036C, 0x10A2, 0x2425, 0x79D3, 0xA4AF, + 0xFE4C, 0xF1FF, 0x022F, 0xFE2F, 0x0061, 0xFFE4, 0xFFFF, 0x0005, + 0x0043, 0x0010, 0x0359, 0x036C, 0x10A2, 0x2425, 0x79D3, 0xA4AF, + 0xFE4C, 0xF1FF, 0x022F, 0xFE2F, 0x0061, 0xFFE4, 0xFFFF, 0x0006, + 0x0046, 0x001F, 0x036A, 0x03F9, 0x105D, 0x271C, 0x7B1A, 0xA78D, + 0x001F, 0xF282, 0x0269, 0xFE4F, 0x0062, 0xFFE7, 0xFFFF, 0x0006, + 0x0046, 0x001F, 0x036A, 0x03F9, 0x105D, 0x271C, 0x7B1A, 0xA78D, + 0x001F, 0xF282, 0x0269, 0xFE4F, 0x0062, 0xFFE7, 0xFFFF, 0x0006, + 0x004A, 0x0030, 0x0377, 0x048B, 0x1004, 0x2A1F, 0x7C3D, 0xAA7B, + 0x01D8, 0xF30B, 0x029E, 0xFE6F, 0x0063, 0xFFE9, 0xFFFF, 0x0006, + 0x004A, 0x0030, 0x0377, 0x048B, 0x1004, 0x2A1F, 0x7C3D, 0xAA7B, + 0x01D8, 0xF30B, 0x029E, 0xFE6F, 0x0063, 0xFFE9, 0xFFFF, 0x0007, + 0x004D, 0x0043, 0x0382, 0x0520, 0x0F97, 0x2D2C, 0x7D3A, 0xAD75, + 0x037A, 0xF399, 0x02CE, 0xFE8E, 0x0064, 0xFFEB, 0x0000, 0x0007, + 0x004D, 0x0043, 0x0382, 0x0520, 0x0F97, 0x2D2C, 0x7D3A, 0xAD75, + 0x037A, 0xF399, 0x02CE, 0xFE8E, 0x0064, 0xFFEB, 0x0000, 0x0007, + 0x0050, 0x0056, 0x038A, 0x05B7, 0x0F14, 0x3041, 0x7E12, 0xB07C, + 0x0502, 0xF42C, 0x02F7, 0xFEAC, 0x0064, 0xFFEC, 0x0000, 0x0007, + 0x0050, 0x0056, 0x038A, 0x05B7, 0x0F14, 0x3041, 0x7E12, 0xB07C, + 0x0502, 0xF42C, 0x02F7, 0xFEAC, 0x0064, 0xFFEC, 0x0000, 0x0008, + 0x0053, 0x006B, 0x038E, 0x0652, 0x0E7C, 0x335D, 0x7EC2, 0xB38C, + 0x0671, 0xF4C3, 0x031C, 0xFECA, 0x0063, 0xFFEE, 0x0000, 0x0008, + 0x0053, 0x006B, 0x038E, 0x0652, 0x0E7C, 0x335D, 0x7EC2, 0xB38C, + 0x0671, 0xF4C3, 0x031C, 0xFECA, 0x0063, 0xFFEE, 0x0000, 0x0009, + 0x0056, 0x0080, 0x038F, 0x06EE, 0x0DCE, 0x367E, 0x7F4D, 0xB6A4, + 0x07C8, 0xF55C, 0x033B, 0xFEE6, 0x0062, 0xFFEF, 0x0000, 0x0009, + 0x0056, 0x0080, 0x038F, 0x06EE, 0x0DCE, 0x367E, 0x7F4D, 0xB6A4, + 0x07C8, 0xF55C, 0x033B, 0xFEE6, 0x0062, 0xFFEF, 0x0000, 0x000A, + 0x0058, 0x0098, 0x038C, 0x078C, 0x0D08, 0x39A4, 0x7FB0, 0xB9C4, + 0x0905, 0xF5F9, 0x0354, 0xFF02, 0x0061, 0xFFF1, 0x0000, 0x000A, + 0x0058, 0x0098, 0x038C, 0x078C, 0x0D08, 0x39A4, 0x7FB0, 0xB9C4, + 0x0905, 0xF5F9, 0x0354, 0xFF02, 0x0061, 0xFFF1, 0x0000, 0x000B, + 0x005B, 0x00AF, 0x0385, 0x082B, 0x0C2B, 0x3CCB, 0x7FEB, 0xBCE7, + 0x0A2A, 0xF697, 0x0369, 0xFF1D, 0x005F, 0xFFF2, 0x0000, 0x000B, + 0x005B, 0x00AF, 0x0385, 0x082B, 0x0C2B, 0x3CCB, 0x7FEB, 0xBCE7, + 0x0A2A, 0xF697, 0x0369, 0xFF1D, 0x005F, 0xFFF2, 0x0000, 0x000D, + 0x005D, 0x00C8, 0x037A, 0x08CA, 0x0B37, 0x3FF2, 0x7FFF, 0xC00E, + 0x0B37, 0xF736, 0x037A, 0xFF38, 0x005D, 0xFFF3, 0x0000, 0x000D, + 0x005D, 0x00C8, 0x037A, 0x08CA, 0x0B37, 0x3FF2, 0x7FFF, 0xC00E, + 0x0B37, 0xF736, 0x037A, 0xFF38, 0x005D, 0xFFF3, 0x0000, 0x0000 +}; + +//static u16 myVector[32][8]; + +static u8 mp3data[0x1000]; + +static s32 v[32]; + +static void MP3AB0 () { + // Part 2 - 100% Accurate + const u16 LUT2[8] = { 0xFEC4, 0xF4FA, 0xC5E4, 0xE1C4, + 0x1916, 0x4A50, 0xA268, 0x78AE }; + const u16 LUT3[4] = { 0xFB14, 0xD4DC, 0x31F2, 0x8E3A }; + int i; + + for (i = 0; i < 8; i++) { + v[16+i] = v[0+i] + v[8+i]; + v[24+i] = ((v[0+i] - v[8+i]) * LUT2[i]) >> 0x10; + } + + // Part 3: 4-wide butterflies + + for (i=0; i < 4; i++) { + v[0+i] = v[16+i] + v[20+i]; + v[4+i] = ((v[16+i] - v[20+i]) * LUT3[i]) >> 0x10; + + v[8+i] = v[24+i] + v[28+i]; + v[12+i] = ((v[24+i] - v[28+i]) * LUT3[i]) >> 0x10; + } + + // Part 4: 2-wide butterflies - 100% Accurate + + for (i = 0; i < 16; i+=4) { + v[16+i] = v[0+i] + v[2+i]; + v[18+i] = ((v[0+i] - v[2+i]) * 0xEC84) >> 0x10; + + v[17+i] = v[1+i] + v[3+i]; + v[19+i] = ((v[1+i] - v[3+i]) * 0x61F8) >> 0x10; + } +} + +static void InnerLoop (); + +static u32 inPtr, outPtr; + +static u32 t6;// = 0x08A0; // I think these are temporary storage buffers +static u32 t5;// = 0x0AC0; +static u32 t4;// = (inst1 & 0x1E); + +void MP3 (u32 inst1, u32 inst2) { + // Initialization Code + u32 readPtr; // s5 + u32 writePtr; // s6 + //u32 Count = 0x0480; // s4 + u32 tmp; + //u32 inPtr, outPtr; + + t6 = 0x08A0; // I think these are temporary storage buffers + t5 = 0x0AC0; + t4 = (inst1 & 0x1E); + + writePtr = inst2 & 0xFFFFFF; + readPtr = writePtr; + memcpy (mp3data+0xCE8, rsp.RDRAM+readPtr, 8); // Just do that for efficiency... may remove and use directly later anyway + readPtr += 8; // This must be a header byte or whatnot + + for (int cnt = 0; cnt < 0x480; cnt += 0x180) { + memcpy (mp3data+0xCF0, rsp.RDRAM+readPtr, 0x180); // DMA: 0xCF0 <- RDRAM[s5] : 0x180 + inPtr = 0xCF0; // s7 + outPtr = 0xE70; // s3 +// --------------- Inner Loop Start -------------------- + for (int cnt2 = 0; cnt2 < 0x180; cnt2 += 0x40) { + t6 &= 0xFFE0; + t5 &= 0xFFE0; + t6 |= t4; + t5 |= t4; + InnerLoop (); + t4 = (t4-2)&0x1E; + tmp = t6; + t6 = t5; + t5 = tmp; + //outPtr += 0x40; + inPtr += 0x40; + } +// --------------- Inner Loop End -------------------- + memcpy (rsp.RDRAM+writePtr, mp3data+0xe70, 0x180); + writePtr += 0x180; + readPtr += 0x180; + } +} + + + +static void InnerLoop () { + // Part 1: 100% Accurate + + int i; + v[0] = *(s16 *)(mp3data+inPtr+(0x00^S16)); v[31] = *(s16 *)(mp3data+inPtr+(0x3E^S16)); v[0] += v[31]; + v[1] = *(s16 *)(mp3data+inPtr+(0x02^S16)); v[30] = *(s16 *)(mp3data+inPtr+(0x3C^S16)); v[1] += v[30]; + v[2] = *(s16 *)(mp3data+inPtr+(0x06^S16)); v[28] = *(s16 *)(mp3data+inPtr+(0x38^S16)); v[2] += v[28]; + v[3] = *(s16 *)(mp3data+inPtr+(0x04^S16)); v[29] = *(s16 *)(mp3data+inPtr+(0x3A^S16)); v[3] += v[29]; + + v[4] = *(s16 *)(mp3data+inPtr+(0x0E^S16)); v[24] = *(s16 *)(mp3data+inPtr+(0x30^S16)); v[4] += v[24]; + v[5] = *(s16 *)(mp3data+inPtr+(0x0C^S16)); v[25] = *(s16 *)(mp3data+inPtr+(0x32^S16)); v[5] += v[25]; + v[6] = *(s16 *)(mp3data+inPtr+(0x08^S16)); v[27] = *(s16 *)(mp3data+inPtr+(0x36^S16)); v[6] += v[27]; + v[7] = *(s16 *)(mp3data+inPtr+(0x0A^S16)); v[26] = *(s16 *)(mp3data+inPtr+(0x34^S16)); v[7] += v[26]; + + v[8] = *(s16 *)(mp3data+inPtr+(0x1E^S16)); v[16] = *(s16 *)(mp3data+inPtr+(0x20^S16)); v[8] += v[16]; + v[9] = *(s16 *)(mp3data+inPtr+(0x1C^S16)); v[17] = *(s16 *)(mp3data+inPtr+(0x22^S16)); v[9] += v[17]; + v[10]= *(s16 *)(mp3data+inPtr+(0x18^S16)); v[19] = *(s16 *)(mp3data+inPtr+(0x26^S16)); v[10]+= v[19]; + v[11]= *(s16 *)(mp3data+inPtr+(0x1A^S16)); v[18] = *(s16 *)(mp3data+inPtr+(0x24^S16)); v[11]+= v[18]; + + v[12]= *(s16 *)(mp3data+inPtr+(0x10^S16)); v[23] = *(s16 *)(mp3data+inPtr+(0x2E^S16)); v[12]+= v[23]; + v[13]= *(s16 *)(mp3data+inPtr+(0x12^S16)); v[22] = *(s16 *)(mp3data+inPtr+(0x2C^S16)); v[13]+= v[22]; + v[14]= *(s16 *)(mp3data+inPtr+(0x16^S16)); v[20] = *(s16 *)(mp3data+inPtr+(0x28^S16)); v[14]+= v[20]; + v[15]= *(s16 *)(mp3data+inPtr+(0x14^S16)); v[21] = *(s16 *)(mp3data+inPtr+(0x2A^S16)); v[15]+= v[21]; + + // Part 2-4 + + MP3AB0 (); + + // Part 5 - 1-Wide Butterflies - 100% Accurate but need SSVs!!! + + u32 t0 = t6 + 0x100; + u32 t1 = t6 + 0x200; + u32 t2 = t5 + 0x100; + u32 t3 = t5 + 0x200; + /*RSP_GPR[0x8].W = t0; + RSP_GPR[0x9].W = t1; + RSP_GPR[0xA].W = t2; + RSP_GPR[0xB].W = t3; + + RSP_Vect[0].DW[1] = 0xB504A57E00016A09; + RSP_Vect[0].DW[0] = 0x0002D4130005A827; +*/ + + // 0x13A8 + v[1] = 0; + v[11] = ((v[16] - v[17]) * 0xB504) >> 0x10; + + v[16] = -v[16] -v[17]; + v[2] = v[18] + v[19]; + // ** Store v[11] -> (T6 + 0)** + *(s16 *)(mp3data+((t6+(short)0x0))) = (short)v[11]; + + + v[11] = -v[11]; + // ** Store v[16] -> (T3 + 0)** + *(s16 *)(mp3data+((t3+(short)0x0))) = (short)v[16]; + // ** Store v[11] -> (T5 + 0)** + *(s16 *)(mp3data+((t5+(short)0x0))) = (short)v[11]; + // 0x13E8 - Verified.... + v[2] = -v[2]; + // ** Store v[2] -> (T2 + 0)** + *(s16 *)(mp3data+((t2+(short)0x0))) = (short)v[2]; + v[3] = (((v[18] - v[19]) * 0x16A09) >> 0x10) + v[2]; + // ** Store v[3] -> (T0 + 0)** + *(s16 *)(mp3data+((t0+(short)0x0))) = (short)v[3]; + // 0x1400 - Verified + v[4] = -v[20] -v[21]; + v[6] = v[22] + v[23]; + v[5] = ((v[20] - v[21]) * 0x16A09) >> 0x10; + // ** Store v[4] -> (T3 + 0xFF80) + *(s16 *)(mp3data+((t3+(short)0xFF80))) = (short)v[4]; + v[7] = ((v[22] - v[23]) * 0x2D413) >> 0x10; + v[5] = v[5] - v[4]; + v[7] = v[7] - v[5]; + v[6] = v[6] + v[6]; + v[5] = v[5] - v[6]; + v[4] = -v[4] - v[6]; + // *** Store v[7] -> (T1 + 0xFF80) + *(s16 *)(mp3data+((t1+(short)0xFF80))) = (short)v[7]; + // *** Store v[4] -> (T2 + 0xFF80) + *(s16 *)(mp3data+((t2+(short)0xFF80))) = (short)v[4]; + // *** Store v[5] -> (T0 + 0xFF80) + *(s16 *)(mp3data+((t0+(short)0xFF80))) = (short)v[5]; + v[8] = v[24] + v[25]; + + + v[9] = ((v[24] - v[25]) * 0x16A09) >> 0x10; + v[2] = v[8] + v[9]; + v[11] = ((v[26] - v[27]) * 0x2D413) >> 0x10; + v[13] = ((v[28] - v[29]) * 0x2D413) >> 0x10; + + v[10] = v[26] + v[27]; v[10] = v[10] + v[10]; + v[12] = v[28] + v[29]; v[12] = v[12] + v[12]; + v[14] = v[30] + v[31]; + v[3] = v[8] + v[10]; + v[14] = v[14] + v[14]; + v[13] = (v[13] - v[2]) + v[12]; + v[15] = (((v[30] - v[31]) * 0x5A827) >> 0x10) - (v[11] + v[2]); + v[14] = -(v[14] + v[14]) + v[3]; + v[17] = v[13] - v[10]; + v[9] = v[9] + v[14]; + // ** Store v[9] -> (T6 + 0x40) + *(s16 *)(mp3data+((t6+(short)0x40))) = (short)v[9]; + v[11] = v[11] - v[13]; + // ** Store v[17] -> (T0 + 0xFFC0) + *(s16 *)(mp3data+((t0+(short)0xFFC0))) = (short)v[17]; + v[12] = v[8] - v[12]; + // ** Store v[11] -> (T0 + 0x40) + *(s16 *)(mp3data+((t0+(short)0x40))) = (short)v[11]; + v[8] = -v[8]; + // ** Store v[15] -> (T1 + 0xFFC0) + *(s16 *)(mp3data+((t1+(short)0xFFC0))) = (short)v[15]; + v[10] = -v[10] -v[12]; + // ** Store v[12] -> (T2 + 0x40) + *(s16 *)(mp3data+((t2+(short)0x40))) = (short)v[12]; + // ** Store v[8] -> (T3 + 0xFFC0) + *(s16 *)(mp3data+((t3+(short)0xFFC0))) = (short)v[8]; + // ** Store v[14] -> (T5 + 0x40) + *(s16 *)(mp3data+((t5+(short)0x40))) = (short)v[14]; + // ** Store v[10] -> (T2 + 0xFFC0) + *(s16 *)(mp3data+((t2+(short)0xFFC0))) = (short)v[10]; + // 0x14FC - Verified... + + // Part 6 - 100% Accurate + + v[0] = *(s16 *)(mp3data+inPtr+(0x00^S16)); v[31] = *(s16 *)(mp3data+inPtr+(0x3E^S16)); v[0] -= v[31]; + v[1] = *(s16 *)(mp3data+inPtr+(0x02^S16)); v[30] = *(s16 *)(mp3data+inPtr+(0x3C^S16)); v[1] -= v[30]; + v[2] = *(s16 *)(mp3data+inPtr+(0x06^S16)); v[28] = *(s16 *)(mp3data+inPtr+(0x38^S16)); v[2] -= v[28]; + v[3] = *(s16 *)(mp3data+inPtr+(0x04^S16)); v[29] = *(s16 *)(mp3data+inPtr+(0x3A^S16)); v[3] -= v[29]; + + v[4] = *(s16 *)(mp3data+inPtr+(0x0E^S16)); v[24] = *(s16 *)(mp3data+inPtr+(0x30^S16)); v[4] -= v[24]; + v[5] = *(s16 *)(mp3data+inPtr+(0x0C^S16)); v[25] = *(s16 *)(mp3data+inPtr+(0x32^S16)); v[5] -= v[25]; + v[6] = *(s16 *)(mp3data+inPtr+(0x08^S16)); v[27] = *(s16 *)(mp3data+inPtr+(0x36^S16)); v[6] -= v[27]; + v[7] = *(s16 *)(mp3data+inPtr+(0x0A^S16)); v[26] = *(s16 *)(mp3data+inPtr+(0x34^S16)); v[7] -= v[26]; + + v[8] = *(s16 *)(mp3data+inPtr+(0x1E^S16)); v[16] = *(s16 *)(mp3data+inPtr+(0x20^S16)); v[8] -= v[16]; + v[9] = *(s16 *)(mp3data+inPtr+(0x1C^S16)); v[17] = *(s16 *)(mp3data+inPtr+(0x22^S16)); v[9] -= v[17]; + v[10]= *(s16 *)(mp3data+inPtr+(0x18^S16)); v[19] = *(s16 *)(mp3data+inPtr+(0x26^S16)); v[10]-= v[19]; + v[11]= *(s16 *)(mp3data+inPtr+(0x1A^S16)); v[18] = *(s16 *)(mp3data+inPtr+(0x24^S16)); v[11]-= v[18]; + + v[12]= *(s16 *)(mp3data+inPtr+(0x10^S16)); v[23] = *(s16 *)(mp3data+inPtr+(0x2E^S16)); v[12]-= v[23]; + v[13]= *(s16 *)(mp3data+inPtr+(0x12^S16)); v[22] = *(s16 *)(mp3data+inPtr+(0x2C^S16)); v[13]-= v[22]; + v[14]= *(s16 *)(mp3data+inPtr+(0x16^S16)); v[20] = *(s16 *)(mp3data+inPtr+(0x28^S16)); v[14]-= v[20]; + v[15]= *(s16 *)(mp3data+inPtr+(0x14^S16)); v[21] = *(s16 *)(mp3data+inPtr+(0x2A^S16)); v[15]-= v[21]; + + //0, 1, 3, 2, 7, 6, 4, 5, 7, 6, 4, 5, 0, 1, 3, 2 + const u16 LUT6[16] = { 0xFFB2, 0xFD3A, 0xF10A, 0xF854, + 0xBDAE, 0xCDA0, 0xE76C, 0xDB94, + 0x1920, 0x4B20, 0xAC7C, 0x7C68, + 0xABEC, 0x9880, 0xDAE8, 0x839C }; + for (i = 0; i < 16; i++) { + v[0+i] = (v[0+i] * LUT6[i]) >> 0x10; + } + v[0] = v[0] + v[0]; v[1] = v[1] + v[1]; + v[2] = v[2] + v[2]; v[3] = v[3] + v[3]; v[4] = v[4] + v[4]; + v[5] = v[5] + v[5]; v[6] = v[6] + v[6]; v[7] = v[7] + v[7]; + v[12] = v[12] + v[12]; v[13] = v[13] + v[13]; v[15] = v[15] + v[15]; + + MP3AB0 (); + + // Part 7: - 100% Accurate + SSV - Unoptimized + + v[0] = ( v[17] + v[16] ) >> 1; + v[1] = ((v[17] * (int)((short)0xA57E * 2)) + (v[16] * 0xB504)) >> 0x10; + v[2] = -v[18] -v[19]; + v[3] = ((v[18] - v[19]) * 0x16A09) >> 0x10; + v[4] = v[20] + v[21] + v[0]; + v[5] = (((v[20] - v[21]) * 0x16A09) >> 0x10) + v[1]; + v[6] = (((v[22] + v[23]) << 1) + v[0]) - v[2]; + v[7] = (((v[22] - v[23]) * 0x2D413) >> 0x10) + v[0] + v[1] + v[3]; + // 0x16A8 + // Save v[0] -> (T3 + 0xFFE0) + *(s16 *)(mp3data+((t3+(short)0xFFE0))) = (short)-v[0]; + v[8] = v[24] + v[25]; + v[9] = ((v[24] - v[25]) * 0x16A09) >> 0x10; + v[10] = ((v[26] + v[27]) << 1) + v[8]; + v[11] = (((v[26] - v[27]) * 0x2D413) >> 0x10) + v[8] + v[9]; + v[12] = v[4] - ((v[28] + v[29]) << 1); + // ** Store v12 -> (T2 + 0x20) + *(s16 *)(mp3data+((t2+(short)0x20))) = (short)v[12]; + v[13] = (((v[28] - v[29]) * 0x2D413) >> 0x10) - v[12] - v[5]; + v[14] = v[30] + v[31]; + v[14] = v[14] + v[14]; + v[14] = v[14] + v[14]; + v[14] = v[6] - v[14]; + v[15] = (((v[30] - v[31]) * 0x5A827) >> 0x10) - v[7]; + // Store v14 -> (T5 + 0x20) + *(s16 *)(mp3data+((t5+(short)0x20))) = (short)v[14]; + v[14] = v[14] + v[1]; + // Store v[14] -> (T6 + 0x20) + *(s16 *)(mp3data+((t6+(short)0x20))) = (short)v[14]; + // Store v[15] -> (T1 + 0xFFE0) + *(s16 *)(mp3data+((t1+(short)0xFFE0))) = (short)v[15]; + v[9] = v[9] + v[10]; + v[1] = v[1] + v[6]; + v[6] = v[10] - v[6]; + v[1] = v[9] - v[1]; + // Store v[6] -> (T5 + 0x60) + *(s16 *)(mp3data+((t5+(short)0x60))) = (short)v[6]; + v[10] = v[10] + v[2]; + v[10] = v[4] - v[10]; + // Store v[10] -> (T2 + 0xFFA0) + *(s16 *)(mp3data+((t2+(short)0xFFA0))) = (short)v[10]; + v[12] = v[2] - v[12]; + // Store v[12] -> (T2 + 0xFFE0) + *(s16 *)(mp3data+((t2+(short)0xFFE0))) = (short)v[12]; + v[5] = v[4] + v[5]; + v[4] = v[8] - v[4]; + // Store v[4] -> (T2 + 0x60) + *(s16 *)(mp3data+((t2+(short)0x60))) = (short)v[4]; + v[0] = v[0] - v[8]; + // Store v[0] -> (T3 + 0xFFA0) + *(s16 *)(mp3data+((t3+(short)0xFFA0))) = (short)v[0]; + v[7] = v[7] - v[11]; + // Store v[7] -> (T1 + 0xFFA0) + *(s16 *)(mp3data+((t1+(short)0xFFA0))) = (short)v[7]; + v[11] = v[11] - v[3]; + // Store v[1] -> (T6 + 0x60) + *(s16 *)(mp3data+((t6+(short)0x60))) = (short)v[1]; + v[11] = v[11] - v[5]; + // Store v[11] -> (T0 + 0x60) + *(s16 *)(mp3data+((t0+(short)0x60))) = (short)v[11]; + v[3] = v[3] - v[13]; + // Store v[3] -> (T0 + 0x20) + *(s16 *)(mp3data+((t0+(short)0x20))) = (short)v[3]; + v[13] = v[13] + v[2]; + // Store v[13] -> (T0 + 0xFFE0) + *(s16 *)(mp3data+((t0+(short)0xFFE0))) = (short)v[13]; + //v[2] = ; + v[2] = (v[5] - v[2]) - v[9]; + // Store v[2] -> (T0 + 0xFFA0) + *(s16 *)(mp3data+((t0+(short)0xFFA0))) = (short)v[2]; + // 0x7A8 - Verified... + + // Step 8 - Dewindowing + + //u64 *DW = (u64 *)&DeWindowLUT[0x10-(t4>>1)]; + u32 offset = 0x10-(t4>>1); + + u32 addptr = t6 & 0xFFE0; + offset = 0x10-(t4>>1); + + s32 v2=0, v4=0, v6=0, v8=0; + //s32 z2=0, z4=0, z6=0, z8=0; + + offset = 0x10-(t4>>1);// + x*0x40; + int x; + for (x = 0; x < 8; x++) { + v2 = v4 = v6 = v8 = 0; + + //addptr = t1; + + for (i = 7; i >= 0; i--) { + v2 += ((int)*(s16 *)(mp3data+(addptr)+0x00) * (short)DeWindowLUT[offset+0x00] + 0x4000) >> 0xF; + v4 += ((int)*(s16 *)(mp3data+(addptr)+0x10) * (short)DeWindowLUT[offset+0x08] + 0x4000) >> 0xF; + v6 += ((int)*(s16 *)(mp3data+(addptr)+0x20) * (short)DeWindowLUT[offset+0x20] + 0x4000) >> 0xF; + v8 += ((int)*(s16 *)(mp3data+(addptr)+0x30) * (short)DeWindowLUT[offset+0x28] + 0x4000) >> 0xF; + addptr+=2; offset++; + } + s32 v0 = v2 + v4; + s32 v18 = v6 + v8; + //Clamp(v0); + //Clamp(v18); + // clamp??? + *(s16 *)(mp3data+(outPtr^S16)) = v0; + *(s16 *)(mp3data+((outPtr+2)^S16)) = v18; + outPtr+=4; + addptr += 0x30; + offset += 0x38; + } + + offset = 0x10-(t4>>1) + 8*0x40; + v2 = v4 = 0; + for (i = 0; i < 4; i++) { + v2 += ((int)*(s16 *)(mp3data+(addptr)+0x00) * (short)DeWindowLUT[offset+0x00] + 0x4000) >> 0xF; + v2 += ((int)*(s16 *)(mp3data+(addptr)+0x10) * (short)DeWindowLUT[offset+0x08] + 0x4000) >> 0xF; + addptr+=2; offset++; + v4 += ((int)*(s16 *)(mp3data+(addptr)+0x00) * (short)DeWindowLUT[offset+0x00] + 0x4000) >> 0xF; + v4 += ((int)*(s16 *)(mp3data+(addptr)+0x10) * (short)DeWindowLUT[offset+0x08] + 0x4000) >> 0xF; + addptr+=2; offset++; + } + s32 mult6 = *(s32 *)(mp3data+0xCE8); + s32 mult4 = *(s32 *)(mp3data+0xCEC); + if (t4 & 0x2) { + v2 = (v2 * *(u32 *)(mp3data+0xCE8)) >> 0x10; + *(s16 *)(mp3data+(outPtr^S16)) = v2; + } else { + v4 = (v4 * *(u32 *)(mp3data+0xCE8)) >> 0x10; + *(s16 *)(mp3data+(outPtr^S16)) = v4; + mult4 = *(u32 *)(mp3data+0xCE8); + } + addptr -= 0x50; + + for (x = 0; x < 8; x++) { + v2 = v4 = v6 = v8 = 0; + + offset = (0x22F-(t4>>1) + x*0x40); + + for (i = 0; i < 4; i++) { + v2 += ((int)*(s16 *)(mp3data+(addptr )+0x20) * (short)DeWindowLUT[offset+0x00] + 0x4000) >> 0xF; + v2 -= ((int)*(s16 *)(mp3data+((addptr+2))+0x20) * (short)DeWindowLUT[offset+0x01] + 0x4000) >> 0xF; + v4 += ((int)*(s16 *)(mp3data+(addptr )+0x30) * (short)DeWindowLUT[offset+0x08] + 0x4000) >> 0xF; + v4 -= ((int)*(s16 *)(mp3data+((addptr+2))+0x30) * (short)DeWindowLUT[offset+0x09] + 0x4000) >> 0xF; + v6 += ((int)*(s16 *)(mp3data+(addptr )+0x00) * (short)DeWindowLUT[offset+0x20] + 0x4000) >> 0xF; + v6 -= ((int)*(s16 *)(mp3data+((addptr+2))+0x00) * (short)DeWindowLUT[offset+0x21] + 0x4000) >> 0xF; + v8 += ((int)*(s16 *)(mp3data+(addptr )+0x10) * (short)DeWindowLUT[offset+0x28] + 0x4000) >> 0xF; + v8 -= ((int)*(s16 *)(mp3data+((addptr+2))+0x10) * (short)DeWindowLUT[offset+0x29] + 0x4000) >> 0xF; + addptr+=4; offset+=2; + } + s32 v0 = v2 + v4; + s32 v18 = v6 + v8; + //Clamp(v0); + //Clamp(v18); + // clamp??? + *(s16 *)(mp3data+((outPtr+2)^S16)) = v0; + *(s16 *)(mp3data+((outPtr+4)^S16)) = v18; + outPtr+=4; + addptr -= 0x50; + } + + int tmp = outPtr; + s32 hi0 = mult6; + s32 hi1 = mult4; + s32 v; + + hi0 = (int)hi0 >> 0x10; + hi1 = (int)hi1 >> 0x10; + for (i = 0; i < 8; i++) { + // v0 + v = (*(s16 *)(mp3data+((tmp-0x40)^S16)) * hi0); + if (v > 32767) v = 32767; else if (v < -32767) v = -32767; + *(s16 *)((u8 *)mp3data+((tmp-0x40)^S16)) = (s16)v; + // v17 + v = (*(s16 *)(mp3data+((tmp-0x30)^S16)) * hi0); + if (v > 32767) v = 32767; else if (v < -32767) v = -32767; + *(s16 *)((u8 *)mp3data+((tmp-0x30)^S16)) = v; + // v2 + v = (*(s16 *)(mp3data+((tmp-0x1E)^S16)) * hi1); + if (v > 32767) v = 32767; else if (v < -32767) v = -32767; + *(s16 *)((u8 *)mp3data+((tmp-0x1E)^S16)) = v; + // v4 + v = (*(s16 *)(mp3data+((tmp-0xE)^S16)) * hi1); + if (v > 32767) v = 32767; else if (v < -32767) v = -32767; + *(s16 *)((u8 *)mp3data+((tmp-0xE)^S16)) = v; + tmp += 2; + } +} + -- 2.39.5