RSP HLE plugin. Compile and run on the OpenPandora
[mupen64plus-pandora.git] / source / mupen64plus-rsp-hle / src / ucode3mp3.cpp
1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2  *   Mupen64plus-rsp-hle - ucode3mp3.h                                     *
3  *   Mupen64Plus homepage: http://code.google.com/p/mupen64plus/           *
4  *   Copyright (C) 2009 Richard Goedeken                                   *
5  *   Copyright (C) 2002 Hacktarux                                          *
6  *                                                                         *
7  *   This program is free software; you can redistribute it and/or modify  *
8  *   it under the terms of the GNU General Public License as published by  *
9  *   the Free Software Foundation; either version 2 of the License, or     *
10  *   (at your option) any later version.                                   *
11  *                                                                         *
12  *   This program is distributed in the hope that it will be useful,       *
13  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
14  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
15  *   GNU General Public License for more details.                          *
16  *                                                                         *
17  *   You should have received a copy of the GNU General Public License     *
18  *   along with this program; if not, write to the                         *
19  *   Free Software Foundation, Inc.,                                       *
20  *   51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.          *
21  * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
22
23 # include <string.h>
24 # include <stdio.h>
25
26 extern "C" {
27   #include "hle.h"
28   #include "alist_internal.h"
29 }
30
31 static const u16 DeWindowLUT [0x420] = {
32     0x0000, 0xFFF3, 0x005D, 0xFF38, 0x037A, 0xF736, 0x0B37, 0xC00E,
33     0x7FFF, 0x3FF2, 0x0B37, 0x08CA, 0x037A, 0x00C8, 0x005D, 0x000D,
34     0x0000, 0xFFF3, 0x005D, 0xFF38, 0x037A, 0xF736, 0x0B37, 0xC00E,
35     0x7FFF, 0x3FF2, 0x0B37, 0x08CA, 0x037A, 0x00C8, 0x005D, 0x000D,
36     0x0000, 0xFFF2, 0x005F, 0xFF1D, 0x0369, 0xF697, 0x0A2A, 0xBCE7,
37     0x7FEB, 0x3CCB, 0x0C2B, 0x082B, 0x0385, 0x00AF, 0x005B, 0x000B,
38     0x0000, 0xFFF2, 0x005F, 0xFF1D, 0x0369, 0xF697, 0x0A2A, 0xBCE7,
39     0x7FEB, 0x3CCB, 0x0C2B, 0x082B, 0x0385, 0x00AF, 0x005B, 0x000B,
40     0x0000, 0xFFF1, 0x0061, 0xFF02, 0x0354, 0xF5F9, 0x0905, 0xB9C4,
41     0x7FB0, 0x39A4, 0x0D08, 0x078C, 0x038C, 0x0098, 0x0058, 0x000A,
42     0x0000, 0xFFF1, 0x0061, 0xFF02, 0x0354, 0xF5F9, 0x0905, 0xB9C4,
43     0x7FB0, 0x39A4, 0x0D08, 0x078C, 0x038C, 0x0098, 0x0058, 0x000A,
44     0x0000, 0xFFEF, 0x0062, 0xFEE6, 0x033B, 0xF55C, 0x07C8, 0xB6A4,
45     0x7F4D, 0x367E, 0x0DCE, 0x06EE, 0x038F, 0x0080, 0x0056, 0x0009,
46     0x0000, 0xFFEF, 0x0062, 0xFEE6, 0x033B, 0xF55C, 0x07C8, 0xB6A4,
47     0x7F4D, 0x367E, 0x0DCE, 0x06EE, 0x038F, 0x0080, 0x0056, 0x0009,
48     0x0000, 0xFFEE, 0x0063, 0xFECA, 0x031C, 0xF4C3, 0x0671, 0xB38C,
49     0x7EC2, 0x335D, 0x0E7C, 0x0652, 0x038E, 0x006B, 0x0053, 0x0008,
50     0x0000, 0xFFEE, 0x0063, 0xFECA, 0x031C, 0xF4C3, 0x0671, 0xB38C,
51     0x7EC2, 0x335D, 0x0E7C, 0x0652, 0x038E, 0x006B, 0x0053, 0x0008,
52     0x0000, 0xFFEC, 0x0064, 0xFEAC, 0x02F7, 0xF42C, 0x0502, 0xB07C,
53     0x7E12, 0x3041, 0x0F14, 0x05B7, 0x038A, 0x0056, 0x0050, 0x0007,
54     0x0000, 0xFFEC, 0x0064, 0xFEAC, 0x02F7, 0xF42C, 0x0502, 0xB07C,
55     0x7E12, 0x3041, 0x0F14, 0x05B7, 0x038A, 0x0056, 0x0050, 0x0007,
56     0x0000, 0xFFEB, 0x0064, 0xFE8E, 0x02CE, 0xF399, 0x037A, 0xAD75,
57     0x7D3A, 0x2D2C, 0x0F97, 0x0520, 0x0382, 0x0043, 0x004D, 0x0007,
58     0x0000, 0xFFEB, 0x0064, 0xFE8E, 0x02CE, 0xF399, 0x037A, 0xAD75,
59     0x7D3A, 0x2D2C, 0x0F97, 0x0520, 0x0382, 0x0043, 0x004D, 0x0007,
60     0xFFFF, 0xFFE9, 0x0063, 0xFE6F, 0x029E, 0xF30B, 0x01D8, 0xAA7B,
61     0x7C3D, 0x2A1F, 0x1004, 0x048B, 0x0377, 0x0030, 0x004A, 0x0006,
62     0xFFFF, 0xFFE9, 0x0063, 0xFE6F, 0x029E, 0xF30B, 0x01D8, 0xAA7B,
63     0x7C3D, 0x2A1F, 0x1004, 0x048B, 0x0377, 0x0030, 0x004A, 0x0006,
64     0xFFFF, 0xFFE7, 0x0062, 0xFE4F, 0x0269, 0xF282, 0x001F, 0xA78D,
65     0x7B1A, 0x271C, 0x105D, 0x03F9, 0x036A, 0x001F, 0x0046, 0x0006,
66     0xFFFF, 0xFFE7, 0x0062, 0xFE4F, 0x0269, 0xF282, 0x001F, 0xA78D,
67     0x7B1A, 0x271C, 0x105D, 0x03F9, 0x036A, 0x001F, 0x0046, 0x0006,
68     0xFFFF, 0xFFE4, 0x0061, 0xFE2F, 0x022F, 0xF1FF, 0xFE4C, 0xA4AF,
69     0x79D3, 0x2425, 0x10A2, 0x036C, 0x0359, 0x0010, 0x0043, 0x0005,
70     0xFFFF, 0xFFE4, 0x0061, 0xFE2F, 0x022F, 0xF1FF, 0xFE4C, 0xA4AF,
71     0x79D3, 0x2425, 0x10A2, 0x036C, 0x0359, 0x0010, 0x0043, 0x0005,
72     0xFFFF, 0xFFE2, 0x005E, 0xFE10, 0x01EE, 0xF184, 0xFC61, 0xA1E1,
73     0x7869, 0x2139, 0x10D3, 0x02E3, 0x0346, 0x0001, 0x0040, 0x0004,
74     0xFFFF, 0xFFE2, 0x005E, 0xFE10, 0x01EE, 0xF184, 0xFC61, 0xA1E1,
75     0x7869, 0x2139, 0x10D3, 0x02E3, 0x0346, 0x0001, 0x0040, 0x0004,
76     0xFFFF, 0xFFE0, 0x005B, 0xFDF0, 0x01A8, 0xF111, 0xFA5F, 0x9F27,
77     0x76DB, 0x1E5C, 0x10F2, 0x025E, 0x0331, 0xFFF3, 0x003D, 0x0004,
78     0xFFFF, 0xFFE0, 0x005B, 0xFDF0, 0x01A8, 0xF111, 0xFA5F, 0x9F27,
79     0x76DB, 0x1E5C, 0x10F2, 0x025E, 0x0331, 0xFFF3, 0x003D, 0x0004,
80     0xFFFF, 0xFFDE, 0x0057, 0xFDD0, 0x015B, 0xF0A7, 0xF845, 0x9C80,
81     0x752C, 0x1B8E, 0x1100, 0x01DE, 0x0319, 0xFFE7, 0x003A, 0x0003,
82     0xFFFF, 0xFFDE, 0x0057, 0xFDD0, 0x015B, 0xF0A7, 0xF845, 0x9C80,
83     0x752C, 0x1B8E, 0x1100, 0x01DE, 0x0319, 0xFFE7, 0x003A, 0x0003,
84     0xFFFE, 0xFFDB, 0x0053, 0xFDB0, 0x0108, 0xF046, 0xF613, 0x99EE,
85     0x735C, 0x18D1, 0x10FD, 0x0163, 0x0300, 0xFFDC, 0x0037, 0x0003,
86     0xFFFE, 0xFFDB, 0x0053, 0xFDB0, 0x0108, 0xF046, 0xF613, 0x99EE,
87     0x735C, 0x18D1, 0x10FD, 0x0163, 0x0300, 0xFFDC, 0x0037, 0x0003,
88     0xFFFE, 0xFFD8, 0x004D, 0xFD90, 0x00B0, 0xEFF0, 0xF3CC, 0x9775,
89     0x716C, 0x1624, 0x10EA, 0x00EE, 0x02E5, 0xFFD2, 0x0033, 0x0003,
90     0xFFFE, 0xFFD8, 0x004D, 0xFD90, 0x00B0, 0xEFF0, 0xF3CC, 0x9775,
91     0x716C, 0x1624, 0x10EA, 0x00EE, 0x02E5, 0xFFD2, 0x0033, 0x0003,
92     0xFFFE, 0xFFD6, 0x0047, 0xFD72, 0x0051, 0xEFA6, 0xF16F, 0x9514,
93     0x6F5E, 0x138A, 0x10C8, 0x007E, 0x02CA, 0xFFC9, 0x0030, 0x0003,
94     0xFFFE, 0xFFD6, 0x0047, 0xFD72, 0x0051, 0xEFA6, 0xF16F, 0x9514,
95     0x6F5E, 0x138A, 0x10C8, 0x007E, 0x02CA, 0xFFC9, 0x0030, 0x0003,
96     0xFFFE, 0xFFD3, 0x0040, 0xFD54, 0xFFEC, 0xEF68, 0xEEFC, 0x92CD,
97     0x6D33, 0x1104, 0x1098, 0x0014, 0x02AC, 0xFFC0, 0x002D, 0x0002,
98     0xFFFE, 0xFFD3, 0x0040, 0xFD54, 0xFFEC, 0xEF68, 0xEEFC, 0x92CD,
99     0x6D33, 0x1104, 0x1098, 0x0014, 0x02AC, 0xFFC0, 0x002D, 0x0002,
100     0x0030, 0xFFC9, 0x02CA, 0x007E, 0x10C8, 0x138A, 0x6F5E, 0x9514,
101     0xF16F, 0xEFA6, 0x0051, 0xFD72, 0x0047, 0xFFD6, 0xFFFE, 0x0003,
102     0x0030, 0xFFC9, 0x02CA, 0x007E, 0x10C8, 0x138A, 0x6F5E, 0x9514,
103     0xF16F, 0xEFA6, 0x0051, 0xFD72, 0x0047, 0xFFD6, 0xFFFE, 0x0003,
104     0x0033, 0xFFD2, 0x02E5, 0x00EE, 0x10EA, 0x1624, 0x716C, 0x9775,
105     0xF3CC, 0xEFF0, 0x00B0, 0xFD90, 0x004D, 0xFFD8, 0xFFFE, 0x0003,
106     0x0033, 0xFFD2, 0x02E5, 0x00EE, 0x10EA, 0x1624, 0x716C, 0x9775,
107     0xF3CC, 0xEFF0, 0x00B0, 0xFD90, 0x004D, 0xFFD8, 0xFFFE, 0x0003,
108     0x0037, 0xFFDC, 0x0300, 0x0163, 0x10FD, 0x18D1, 0x735C, 0x99EE,
109     0xF613, 0xF046, 0x0108, 0xFDB0, 0x0053, 0xFFDB, 0xFFFE, 0x0003,
110     0x0037, 0xFFDC, 0x0300, 0x0163, 0x10FD, 0x18D1, 0x735C, 0x99EE,
111     0xF613, 0xF046, 0x0108, 0xFDB0, 0x0053, 0xFFDB, 0xFFFE, 0x0003,
112     0x003A, 0xFFE7, 0x0319, 0x01DE, 0x1100, 0x1B8E, 0x752C, 0x9C80,
113     0xF845, 0xF0A7, 0x015B, 0xFDD0, 0x0057, 0xFFDE, 0xFFFF, 0x0003,
114     0x003A, 0xFFE7, 0x0319, 0x01DE, 0x1100, 0x1B8E, 0x752C, 0x9C80,
115     0xF845, 0xF0A7, 0x015B, 0xFDD0, 0x0057, 0xFFDE, 0xFFFF, 0x0004,
116     0x003D, 0xFFF3, 0x0331, 0x025E, 0x10F2, 0x1E5C, 0x76DB, 0x9F27,
117     0xFA5F, 0xF111, 0x01A8, 0xFDF0, 0x005B, 0xFFE0, 0xFFFF, 0x0004,
118     0x003D, 0xFFF3, 0x0331, 0x025E, 0x10F2, 0x1E5C, 0x76DB, 0x9F27,
119     0xFA5F, 0xF111, 0x01A8, 0xFDF0, 0x005B, 0xFFE0, 0xFFFF, 0x0004,
120     0x0040, 0x0001, 0x0346, 0x02E3, 0x10D3, 0x2139, 0x7869, 0xA1E1,
121     0xFC61, 0xF184, 0x01EE, 0xFE10, 0x005E, 0xFFE2, 0xFFFF, 0x0004,
122     0x0040, 0x0001, 0x0346, 0x02E3, 0x10D3, 0x2139, 0x7869, 0xA1E1,
123     0xFC61, 0xF184, 0x01EE, 0xFE10, 0x005E, 0xFFE2, 0xFFFF, 0x0005,
124     0x0043, 0x0010, 0x0359, 0x036C, 0x10A2, 0x2425, 0x79D3, 0xA4AF,
125     0xFE4C, 0xF1FF, 0x022F, 0xFE2F, 0x0061, 0xFFE4, 0xFFFF, 0x0005,
126     0x0043, 0x0010, 0x0359, 0x036C, 0x10A2, 0x2425, 0x79D3, 0xA4AF,
127     0xFE4C, 0xF1FF, 0x022F, 0xFE2F, 0x0061, 0xFFE4, 0xFFFF, 0x0006,
128     0x0046, 0x001F, 0x036A, 0x03F9, 0x105D, 0x271C, 0x7B1A, 0xA78D,
129     0x001F, 0xF282, 0x0269, 0xFE4F, 0x0062, 0xFFE7, 0xFFFF, 0x0006,
130     0x0046, 0x001F, 0x036A, 0x03F9, 0x105D, 0x271C, 0x7B1A, 0xA78D,
131     0x001F, 0xF282, 0x0269, 0xFE4F, 0x0062, 0xFFE7, 0xFFFF, 0x0006,
132     0x004A, 0x0030, 0x0377, 0x048B, 0x1004, 0x2A1F, 0x7C3D, 0xAA7B,
133     0x01D8, 0xF30B, 0x029E, 0xFE6F, 0x0063, 0xFFE9, 0xFFFF, 0x0006,
134     0x004A, 0x0030, 0x0377, 0x048B, 0x1004, 0x2A1F, 0x7C3D, 0xAA7B,
135     0x01D8, 0xF30B, 0x029E, 0xFE6F, 0x0063, 0xFFE9, 0xFFFF, 0x0007,
136     0x004D, 0x0043, 0x0382, 0x0520, 0x0F97, 0x2D2C, 0x7D3A, 0xAD75,
137     0x037A, 0xF399, 0x02CE, 0xFE8E, 0x0064, 0xFFEB, 0x0000, 0x0007,
138     0x004D, 0x0043, 0x0382, 0x0520, 0x0F97, 0x2D2C, 0x7D3A, 0xAD75,
139     0x037A, 0xF399, 0x02CE, 0xFE8E, 0x0064, 0xFFEB, 0x0000, 0x0007,
140     0x0050, 0x0056, 0x038A, 0x05B7, 0x0F14, 0x3041, 0x7E12, 0xB07C,
141     0x0502, 0xF42C, 0x02F7, 0xFEAC, 0x0064, 0xFFEC, 0x0000, 0x0007,
142     0x0050, 0x0056, 0x038A, 0x05B7, 0x0F14, 0x3041, 0x7E12, 0xB07C,
143     0x0502, 0xF42C, 0x02F7, 0xFEAC, 0x0064, 0xFFEC, 0x0000, 0x0008,
144     0x0053, 0x006B, 0x038E, 0x0652, 0x0E7C, 0x335D, 0x7EC2, 0xB38C,
145     0x0671, 0xF4C3, 0x031C, 0xFECA, 0x0063, 0xFFEE, 0x0000, 0x0008,
146     0x0053, 0x006B, 0x038E, 0x0652, 0x0E7C, 0x335D, 0x7EC2, 0xB38C,
147     0x0671, 0xF4C3, 0x031C, 0xFECA, 0x0063, 0xFFEE, 0x0000, 0x0009,
148     0x0056, 0x0080, 0x038F, 0x06EE, 0x0DCE, 0x367E, 0x7F4D, 0xB6A4,
149     0x07C8, 0xF55C, 0x033B, 0xFEE6, 0x0062, 0xFFEF, 0x0000, 0x0009,
150     0x0056, 0x0080, 0x038F, 0x06EE, 0x0DCE, 0x367E, 0x7F4D, 0xB6A4,
151     0x07C8, 0xF55C, 0x033B, 0xFEE6, 0x0062, 0xFFEF, 0x0000, 0x000A,
152     0x0058, 0x0098, 0x038C, 0x078C, 0x0D08, 0x39A4, 0x7FB0, 0xB9C4,
153     0x0905, 0xF5F9, 0x0354, 0xFF02, 0x0061, 0xFFF1, 0x0000, 0x000A,
154     0x0058, 0x0098, 0x038C, 0x078C, 0x0D08, 0x39A4, 0x7FB0, 0xB9C4,
155     0x0905, 0xF5F9, 0x0354, 0xFF02, 0x0061, 0xFFF1, 0x0000, 0x000B,
156     0x005B, 0x00AF, 0x0385, 0x082B, 0x0C2B, 0x3CCB, 0x7FEB, 0xBCE7,
157     0x0A2A, 0xF697, 0x0369, 0xFF1D, 0x005F, 0xFFF2, 0x0000, 0x000B,
158     0x005B, 0x00AF, 0x0385, 0x082B, 0x0C2B, 0x3CCB, 0x7FEB, 0xBCE7,
159     0x0A2A, 0xF697, 0x0369, 0xFF1D, 0x005F, 0xFFF2, 0x0000, 0x000D,
160     0x005D, 0x00C8, 0x037A, 0x08CA, 0x0B37, 0x3FF2, 0x7FFF, 0xC00E,
161     0x0B37, 0xF736, 0x037A, 0xFF38, 0x005D, 0xFFF3, 0x0000, 0x000D,
162     0x005D, 0x00C8, 0x037A, 0x08CA, 0x0B37, 0x3FF2, 0x7FFF, 0xC00E,
163     0x0B37, 0xF736, 0x037A, 0xFF38, 0x005D, 0xFFF3, 0x0000, 0x0000
164 };
165
166 //static u16 myVector[32][8];
167
168 static u8 mp3data[0x1000];
169
170 static s32 v[32];
171
172 static void MP3AB0 () {
173     // Part 2 - 100% Accurate
174     const u16 LUT2[8] = { 0xFEC4, 0xF4FA, 0xC5E4, 0xE1C4, 
175                           0x1916, 0x4A50, 0xA268, 0x78AE };
176     const u16 LUT3[4] = { 0xFB14, 0xD4DC, 0x31F2, 0x8E3A };
177     int i;
178
179     for (i = 0; i < 8; i++) {
180         v[16+i] = v[0+i] + v[8+i];
181         v[24+i] = ((v[0+i] - v[8+i]) * LUT2[i]) >> 0x10;
182     }
183
184     // Part 3: 4-wide butterflies
185
186     for (i=0; i < 4; i++) {
187         v[0+i]  = v[16+i] + v[20+i];
188         v[4+i]  = ((v[16+i] - v[20+i]) * LUT3[i]) >> 0x10;
189
190         v[8+i]  = v[24+i] + v[28+i];
191         v[12+i] = ((v[24+i] - v[28+i]) * LUT3[i]) >> 0x10;
192     }
193                 
194     // Part 4: 2-wide butterflies - 100% Accurate
195
196     for (i = 0; i < 16; i+=4) {
197         v[16+i] = v[0+i] + v[2+i];
198         v[18+i] = ((v[0+i] - v[2+i]) * 0xEC84) >> 0x10;
199
200         v[17+i] = v[1+i] + v[3+i];
201         v[19+i] = ((v[1+i] - v[3+i]) * 0x61F8) >> 0x10;
202     }
203 }
204
205 static void InnerLoop ();
206
207 static u32 inPtr, outPtr;
208
209 static u32 t6;// = 0x08A0; // I think these are temporary storage buffers
210 static u32 t5;// = 0x0AC0;
211 static u32 t4;// = (inst1 & 0x1E);
212
213 void MP3 (u32 inst1, u32 inst2) {
214     // Initialization Code
215     u32 readPtr; // s5
216     u32 writePtr; // s6
217     //u32 Count = 0x0480; // s4
218     u32 tmp;
219     //u32 inPtr, outPtr;
220
221     t6 = 0x08A0; // I think these are temporary storage buffers
222     t5 = 0x0AC0;
223     t4 = (inst1 & 0x1E);
224
225     writePtr = inst2 & 0xFFFFFF;
226     readPtr  = writePtr;
227     memcpy (mp3data+0xCE8, rsp.RDRAM+readPtr, 8); // Just do that for efficiency... may remove and use directly later anyway
228     readPtr += 8; // This must be a header byte or whatnot
229
230     for (int cnt = 0; cnt < 0x480; cnt += 0x180) {
231         memcpy (mp3data+0xCF0, rsp.RDRAM+readPtr, 0x180); // DMA: 0xCF0 <- RDRAM[s5] : 0x180
232         inPtr  = 0xCF0; // s7
233         outPtr = 0xE70; // s3
234 // --------------- Inner Loop Start --------------------
235         for (int cnt2 = 0; cnt2 < 0x180; cnt2 += 0x40) {
236             t6 &= 0xFFE0;
237             t5 &= 0xFFE0;
238             t6 |= t4;
239             t5 |= t4;
240             InnerLoop ();
241             t4 = (t4-2)&0x1E;
242             tmp = t6;
243             t6 = t5;
244             t5 = tmp;
245             //outPtr += 0x40;
246             inPtr += 0x40;
247         }
248 // --------------- Inner Loop End --------------------
249         memcpy (rsp.RDRAM+writePtr, mp3data+0xe70, 0x180);
250         writePtr += 0x180;
251         readPtr  += 0x180;
252     }
253 }
254
255
256
257 static void InnerLoop () {
258                 // Part 1: 100% Accurate
259
260                 int i;
261                 v[0] = *(s16 *)(mp3data+inPtr+(0x00^S16)); v[31] = *(s16 *)(mp3data+inPtr+(0x3E^S16)); v[0] += v[31];
262                 v[1] = *(s16 *)(mp3data+inPtr+(0x02^S16)); v[30] = *(s16 *)(mp3data+inPtr+(0x3C^S16)); v[1] += v[30];
263                 v[2] = *(s16 *)(mp3data+inPtr+(0x06^S16)); v[28] = *(s16 *)(mp3data+inPtr+(0x38^S16)); v[2] += v[28];
264                 v[3] = *(s16 *)(mp3data+inPtr+(0x04^S16)); v[29] = *(s16 *)(mp3data+inPtr+(0x3A^S16)); v[3] += v[29];
265
266                 v[4] = *(s16 *)(mp3data+inPtr+(0x0E^S16)); v[24] = *(s16 *)(mp3data+inPtr+(0x30^S16)); v[4] += v[24];
267                 v[5] = *(s16 *)(mp3data+inPtr+(0x0C^S16)); v[25] = *(s16 *)(mp3data+inPtr+(0x32^S16)); v[5] += v[25];
268                 v[6] = *(s16 *)(mp3data+inPtr+(0x08^S16)); v[27] = *(s16 *)(mp3data+inPtr+(0x36^S16)); v[6] += v[27];
269                 v[7] = *(s16 *)(mp3data+inPtr+(0x0A^S16)); v[26] = *(s16 *)(mp3data+inPtr+(0x34^S16)); v[7] += v[26];
270
271                 v[8] = *(s16 *)(mp3data+inPtr+(0x1E^S16)); v[16] = *(s16 *)(mp3data+inPtr+(0x20^S16)); v[8] += v[16];
272                 v[9] = *(s16 *)(mp3data+inPtr+(0x1C^S16)); v[17] = *(s16 *)(mp3data+inPtr+(0x22^S16)); v[9] += v[17];
273                 v[10]= *(s16 *)(mp3data+inPtr+(0x18^S16)); v[19] = *(s16 *)(mp3data+inPtr+(0x26^S16)); v[10]+= v[19];
274                 v[11]= *(s16 *)(mp3data+inPtr+(0x1A^S16)); v[18] = *(s16 *)(mp3data+inPtr+(0x24^S16)); v[11]+= v[18];
275
276                 v[12]= *(s16 *)(mp3data+inPtr+(0x10^S16)); v[23] = *(s16 *)(mp3data+inPtr+(0x2E^S16)); v[12]+= v[23];
277                 v[13]= *(s16 *)(mp3data+inPtr+(0x12^S16)); v[22] = *(s16 *)(mp3data+inPtr+(0x2C^S16)); v[13]+= v[22];
278                 v[14]= *(s16 *)(mp3data+inPtr+(0x16^S16)); v[20] = *(s16 *)(mp3data+inPtr+(0x28^S16)); v[14]+= v[20];
279                 v[15]= *(s16 *)(mp3data+inPtr+(0x14^S16)); v[21] = *(s16 *)(mp3data+inPtr+(0x2A^S16)); v[15]+= v[21];
280
281                 // Part 2-4
282
283                 MP3AB0 ();
284
285                 // Part 5 - 1-Wide Butterflies - 100% Accurate but need SSVs!!!
286
287                 u32 t0 = t6 + 0x100;
288                 u32 t1 = t6 + 0x200;
289                 u32 t2 = t5 + 0x100;
290                 u32 t3 = t5 + 0x200;
291                 /*RSP_GPR[0x8].W = t0;
292                 RSP_GPR[0x9].W = t1;
293                 RSP_GPR[0xA].W = t2;
294                 RSP_GPR[0xB].W = t3;
295
296                 RSP_Vect[0].DW[1] = 0xB504A57E00016A09;
297                 RSP_Vect[0].DW[0] = 0x0002D4130005A827;
298 */
299
300                 // 0x13A8
301                 v[1] = 0;
302                 v[11] = ((v[16] - v[17]) * 0xB504) >> 0x10;
303
304                 v[16] = -v[16] -v[17];
305                 v[2] = v[18] + v[19];
306                 // ** Store v[11] -> (T6 + 0)**
307                 *(s16 *)(mp3data+((t6+(short)0x0))) = (short)v[11];
308                 
309                 
310                 v[11] = -v[11];
311                 // ** Store v[16] -> (T3 + 0)**
312                 *(s16 *)(mp3data+((t3+(short)0x0))) = (short)v[16];
313                 // ** Store v[11] -> (T5 + 0)**
314                 *(s16 *)(mp3data+((t5+(short)0x0))) = (short)v[11];
315                 // 0x13E8 - Verified....
316                 v[2] = -v[2];
317                 // ** Store v[2] -> (T2 + 0)**
318                 *(s16 *)(mp3data+((t2+(short)0x0))) = (short)v[2];
319                 v[3]  = (((v[18] - v[19]) * 0x16A09) >> 0x10) + v[2];
320                 // ** Store v[3] -> (T0 + 0)**
321                 *(s16 *)(mp3data+((t0+(short)0x0))) = (short)v[3];
322                 // 0x1400 - Verified
323                 v[4] = -v[20] -v[21];
324                 v[6] = v[22] + v[23];
325                 v[5] = ((v[20] - v[21]) * 0x16A09) >> 0x10;
326                 // ** Store v[4] -> (T3 + 0xFF80)
327                 *(s16 *)(mp3data+((t3+(short)0xFF80))) = (short)v[4];
328                 v[7] = ((v[22] - v[23]) * 0x2D413) >> 0x10;
329                 v[5] = v[5] - v[4];
330                 v[7] = v[7] - v[5];
331                 v[6] = v[6] + v[6];
332                 v[5] = v[5] - v[6];
333                 v[4] = -v[4] - v[6];
334                 // *** Store v[7] -> (T1 + 0xFF80)
335                 *(s16 *)(mp3data+((t1+(short)0xFF80))) = (short)v[7];
336                 // *** Store v[4] -> (T2 + 0xFF80)
337                 *(s16 *)(mp3data+((t2+(short)0xFF80))) = (short)v[4];
338                 // *** Store v[5] -> (T0 + 0xFF80)
339                 *(s16 *)(mp3data+((t0+(short)0xFF80))) = (short)v[5];
340                 v[8] = v[24] + v[25];
341
342
343                 v[9] = ((v[24] - v[25]) * 0x16A09) >> 0x10;
344                 v[2] = v[8] + v[9];
345                 v[11] = ((v[26] - v[27]) * 0x2D413) >> 0x10;
346                 v[13] = ((v[28] - v[29]) * 0x2D413) >> 0x10;
347
348                 v[10] = v[26] + v[27]; v[10] = v[10] + v[10];
349                 v[12] = v[28] + v[29]; v[12] = v[12] + v[12];
350                 v[14] = v[30] + v[31];
351                 v[3] = v[8] + v[10];
352                 v[14] = v[14] + v[14];
353                 v[13] = (v[13] - v[2]) + v[12];
354                 v[15] = (((v[30] - v[31]) * 0x5A827) >> 0x10) - (v[11] + v[2]);
355                 v[14] = -(v[14] + v[14]) + v[3];
356                 v[17] = v[13] - v[10];
357                 v[9] = v[9] + v[14];
358                 // ** Store v[9] -> (T6 + 0x40)
359                 *(s16 *)(mp3data+((t6+(short)0x40))) = (short)v[9];
360                 v[11] = v[11] - v[13];
361                 // ** Store v[17] -> (T0 + 0xFFC0)
362                 *(s16 *)(mp3data+((t0+(short)0xFFC0))) = (short)v[17];
363                 v[12] = v[8] - v[12];
364                 // ** Store v[11] -> (T0 + 0x40)
365                 *(s16 *)(mp3data+((t0+(short)0x40))) = (short)v[11];
366                 v[8] = -v[8];
367                 // ** Store v[15] -> (T1 + 0xFFC0)
368                 *(s16 *)(mp3data+((t1+(short)0xFFC0))) = (short)v[15];
369                 v[10] = -v[10] -v[12];
370                 // ** Store v[12] -> (T2 + 0x40)
371                 *(s16 *)(mp3data+((t2+(short)0x40))) = (short)v[12];
372                 // ** Store v[8] -> (T3 + 0xFFC0)
373                 *(s16 *)(mp3data+((t3+(short)0xFFC0))) = (short)v[8];
374                 // ** Store v[14] -> (T5 + 0x40)
375                 *(s16 *)(mp3data+((t5+(short)0x40))) = (short)v[14];
376                 // ** Store v[10] -> (T2 + 0xFFC0)
377                 *(s16 *)(mp3data+((t2+(short)0xFFC0))) = (short)v[10];
378                 // 0x14FC - Verified...
379
380                 // Part 6 - 100% Accurate
381
382                 v[0] = *(s16 *)(mp3data+inPtr+(0x00^S16)); v[31] = *(s16 *)(mp3data+inPtr+(0x3E^S16)); v[0] -= v[31];
383                 v[1] = *(s16 *)(mp3data+inPtr+(0x02^S16)); v[30] = *(s16 *)(mp3data+inPtr+(0x3C^S16)); v[1] -= v[30];
384                 v[2] = *(s16 *)(mp3data+inPtr+(0x06^S16)); v[28] = *(s16 *)(mp3data+inPtr+(0x38^S16)); v[2] -= v[28];
385                 v[3] = *(s16 *)(mp3data+inPtr+(0x04^S16)); v[29] = *(s16 *)(mp3data+inPtr+(0x3A^S16)); v[3] -= v[29];
386
387                 v[4] = *(s16 *)(mp3data+inPtr+(0x0E^S16)); v[24] = *(s16 *)(mp3data+inPtr+(0x30^S16)); v[4] -= v[24];
388                 v[5] = *(s16 *)(mp3data+inPtr+(0x0C^S16)); v[25] = *(s16 *)(mp3data+inPtr+(0x32^S16)); v[5] -= v[25];
389                 v[6] = *(s16 *)(mp3data+inPtr+(0x08^S16)); v[27] = *(s16 *)(mp3data+inPtr+(0x36^S16)); v[6] -= v[27];
390                 v[7] = *(s16 *)(mp3data+inPtr+(0x0A^S16)); v[26] = *(s16 *)(mp3data+inPtr+(0x34^S16)); v[7] -= v[26];
391
392                 v[8] = *(s16 *)(mp3data+inPtr+(0x1E^S16)); v[16] = *(s16 *)(mp3data+inPtr+(0x20^S16)); v[8] -= v[16];
393                 v[9] = *(s16 *)(mp3data+inPtr+(0x1C^S16)); v[17] = *(s16 *)(mp3data+inPtr+(0x22^S16)); v[9] -= v[17];
394                 v[10]= *(s16 *)(mp3data+inPtr+(0x18^S16)); v[19] = *(s16 *)(mp3data+inPtr+(0x26^S16)); v[10]-= v[19];
395                 v[11]= *(s16 *)(mp3data+inPtr+(0x1A^S16)); v[18] = *(s16 *)(mp3data+inPtr+(0x24^S16)); v[11]-= v[18];
396
397                 v[12]= *(s16 *)(mp3data+inPtr+(0x10^S16)); v[23] = *(s16 *)(mp3data+inPtr+(0x2E^S16)); v[12]-= v[23];
398                 v[13]= *(s16 *)(mp3data+inPtr+(0x12^S16)); v[22] = *(s16 *)(mp3data+inPtr+(0x2C^S16)); v[13]-= v[22];
399                 v[14]= *(s16 *)(mp3data+inPtr+(0x16^S16)); v[20] = *(s16 *)(mp3data+inPtr+(0x28^S16)); v[14]-= v[20];
400                 v[15]= *(s16 *)(mp3data+inPtr+(0x14^S16)); v[21] = *(s16 *)(mp3data+inPtr+(0x2A^S16)); v[15]-= v[21];
401
402                 //0, 1, 3, 2, 7, 6, 4, 5, 7, 6, 4, 5, 0, 1, 3, 2
403                 const u16 LUT6[16] = { 0xFFB2, 0xFD3A, 0xF10A, 0xF854,
404                                        0xBDAE, 0xCDA0, 0xE76C, 0xDB94,
405                                        0x1920, 0x4B20, 0xAC7C, 0x7C68,
406                                        0xABEC, 0x9880, 0xDAE8, 0x839C };
407                 for (i = 0; i < 16; i++) {
408                     v[0+i] = (v[0+i] * LUT6[i]) >> 0x10;
409                 }
410                 v[0] = v[0] + v[0]; v[1] = v[1] + v[1];
411                 v[2] = v[2] + v[2]; v[3] = v[3] + v[3]; v[4] = v[4] + v[4];
412                 v[5] = v[5] + v[5]; v[6] = v[6] + v[6]; v[7] = v[7] + v[7];
413                 v[12] = v[12] + v[12]; v[13] = v[13] + v[13]; v[15] = v[15] + v[15];
414                 
415                 MP3AB0 ();
416
417                 // Part 7: - 100% Accurate + SSV - Unoptimized
418
419                 v[0] = ( v[17] + v[16] ) >> 1;
420                 v[1] = ((v[17] * (int)((short)0xA57E * 2)) + (v[16] * 0xB504)) >> 0x10;
421                 v[2] = -v[18] -v[19];
422                 v[3] = ((v[18] - v[19]) * 0x16A09) >> 0x10;
423                 v[4] = v[20] + v[21] + v[0];
424                 v[5] = (((v[20] - v[21]) * 0x16A09) >> 0x10) + v[1];
425                 v[6] = (((v[22] + v[23]) << 1) + v[0]) - v[2];
426                 v[7] = (((v[22] - v[23]) * 0x2D413) >> 0x10) + v[0] + v[1] + v[3];
427                 // 0x16A8
428                 // Save v[0] -> (T3 + 0xFFE0)
429                 *(s16 *)(mp3data+((t3+(short)0xFFE0))) = (short)-v[0];
430                 v[8] = v[24] + v[25];
431                 v[9] = ((v[24] - v[25]) * 0x16A09) >> 0x10;
432                 v[10] = ((v[26] + v[27]) << 1) + v[8];
433                 v[11] = (((v[26] - v[27]) * 0x2D413) >> 0x10) + v[8] + v[9];
434                 v[12] = v[4] - ((v[28] + v[29]) << 1);
435                 // ** Store v12 -> (T2 + 0x20)
436                 *(s16 *)(mp3data+((t2+(short)0x20))) = (short)v[12];
437                 v[13] = (((v[28] - v[29]) * 0x2D413) >> 0x10) - v[12] - v[5];
438                 v[14] = v[30] + v[31];
439                 v[14] = v[14] + v[14];
440                 v[14] = v[14] + v[14];
441                 v[14] = v[6] - v[14];
442                 v[15] = (((v[30] - v[31]) * 0x5A827) >> 0x10) - v[7];
443                 // Store v14 -> (T5 + 0x20)
444                 *(s16 *)(mp3data+((t5+(short)0x20))) = (short)v[14];
445                 v[14] = v[14] + v[1];
446                 // Store v[14] -> (T6 + 0x20)
447                 *(s16 *)(mp3data+((t6+(short)0x20))) = (short)v[14];
448                 // Store v[15] -> (T1 + 0xFFE0)
449                 *(s16 *)(mp3data+((t1+(short)0xFFE0))) = (short)v[15];
450                 v[9] = v[9] + v[10];
451                 v[1] = v[1] + v[6];
452                 v[6] = v[10] - v[6];
453                 v[1] = v[9] - v[1];
454                 // Store v[6] -> (T5 + 0x60)
455                 *(s16 *)(mp3data+((t5+(short)0x60))) = (short)v[6];
456                 v[10] = v[10] + v[2];
457                 v[10] = v[4] - v[10];
458                 // Store v[10] -> (T2 + 0xFFA0)
459                 *(s16 *)(mp3data+((t2+(short)0xFFA0))) = (short)v[10];
460                 v[12] = v[2] - v[12];
461                 // Store v[12] -> (T2 + 0xFFE0)
462                 *(s16 *)(mp3data+((t2+(short)0xFFE0))) = (short)v[12];
463                 v[5] = v[4] + v[5];
464                 v[4] = v[8] - v[4];
465                 // Store v[4] -> (T2 + 0x60)
466                 *(s16 *)(mp3data+((t2+(short)0x60))) = (short)v[4];
467                 v[0] = v[0] - v[8];
468                 // Store v[0] -> (T3 + 0xFFA0)
469                 *(s16 *)(mp3data+((t3+(short)0xFFA0))) = (short)v[0];
470                 v[7] = v[7] - v[11];
471                 // Store v[7] -> (T1 + 0xFFA0)
472                 *(s16 *)(mp3data+((t1+(short)0xFFA0))) = (short)v[7];
473                 v[11] = v[11] - v[3];
474                 // Store v[1] -> (T6 + 0x60)
475                 *(s16 *)(mp3data+((t6+(short)0x60))) = (short)v[1];
476                 v[11] = v[11] - v[5];
477                 // Store v[11] -> (T0 + 0x60)
478                 *(s16 *)(mp3data+((t0+(short)0x60))) = (short)v[11];
479                 v[3] = v[3] - v[13];
480                 // Store v[3] -> (T0 + 0x20)
481                 *(s16 *)(mp3data+((t0+(short)0x20))) = (short)v[3];
482                 v[13] = v[13] + v[2];
483                 // Store v[13] -> (T0 + 0xFFE0)
484                 *(s16 *)(mp3data+((t0+(short)0xFFE0))) = (short)v[13];
485                 //v[2] = ;
486                 v[2] = (v[5] - v[2]) - v[9];
487                 // Store v[2] -> (T0 + 0xFFA0)
488                 *(s16 *)(mp3data+((t0+(short)0xFFA0))) = (short)v[2];
489                 // 0x7A8 - Verified...
490
491                 // Step 8 - Dewindowing
492     
493                 //u64 *DW = (u64 *)&DeWindowLUT[0x10-(t4>>1)];
494                 u32 offset = 0x10-(t4>>1);
495
496                 u32 addptr = t6 & 0xFFE0;
497                 offset = 0x10-(t4>>1);
498
499                 s32 v2=0, v4=0, v6=0, v8=0;
500                 //s32 z2=0, z4=0, z6=0, z8=0;
501
502                 offset = 0x10-(t4>>1);// + x*0x40;
503                 int x;
504                 for (x = 0; x < 8; x++) {
505                     v2 = v4 = v6 = v8 = 0;
506
507                     //addptr = t1;
508                 
509                     for (i = 7; i >= 0; i--) {
510                         v2 += ((int)*(s16 *)(mp3data+(addptr)+0x00) * (short)DeWindowLUT[offset+0x00] + 0x4000) >> 0xF;
511                         v4 += ((int)*(s16 *)(mp3data+(addptr)+0x10) * (short)DeWindowLUT[offset+0x08] + 0x4000) >> 0xF;
512                         v6 += ((int)*(s16 *)(mp3data+(addptr)+0x20) * (short)DeWindowLUT[offset+0x20] + 0x4000) >> 0xF;
513                         v8 += ((int)*(s16 *)(mp3data+(addptr)+0x30) * (short)DeWindowLUT[offset+0x28] + 0x4000) >> 0xF;
514                         addptr+=2; offset++;
515                     }
516                     s32 v0  = v2 + v4;
517                     s32 v18 = v6 + v8;
518                     //Clamp(v0);
519                     //Clamp(v18);
520                     // clamp???
521                     *(s16 *)(mp3data+(outPtr^S16)) = v0;
522                     *(s16 *)(mp3data+((outPtr+2)^S16)) = v18;
523                     outPtr+=4;
524                     addptr += 0x30;
525                     offset += 0x38;
526                 }
527
528                 offset = 0x10-(t4>>1) + 8*0x40;
529                 v2 = v4 = 0;
530                 for (i = 0; i < 4; i++) {
531                     v2 += ((int)*(s16 *)(mp3data+(addptr)+0x00) * (short)DeWindowLUT[offset+0x00] + 0x4000) >> 0xF;
532                     v2 += ((int)*(s16 *)(mp3data+(addptr)+0x10) * (short)DeWindowLUT[offset+0x08] + 0x4000) >> 0xF;
533                     addptr+=2; offset++;
534                     v4 += ((int)*(s16 *)(mp3data+(addptr)+0x00) * (short)DeWindowLUT[offset+0x00] + 0x4000) >> 0xF;
535                     v4 += ((int)*(s16 *)(mp3data+(addptr)+0x10) * (short)DeWindowLUT[offset+0x08] + 0x4000) >> 0xF;
536                     addptr+=2; offset++;
537                 }
538                 s32 mult6 = *(s32 *)(mp3data+0xCE8);
539                 s32 mult4 = *(s32 *)(mp3data+0xCEC);
540                 if (t4 & 0x2) {
541                     v2 = (v2 * *(u32 *)(mp3data+0xCE8)) >> 0x10;
542                     *(s16 *)(mp3data+(outPtr^S16)) = v2;
543                 } else {
544                     v4 = (v4 * *(u32 *)(mp3data+0xCE8)) >> 0x10;
545                     *(s16 *)(mp3data+(outPtr^S16)) = v4;
546                     mult4 = *(u32 *)(mp3data+0xCE8);
547                 }
548                 addptr -= 0x50;
549
550                 for (x = 0; x < 8; x++) {
551                     v2 = v4 = v6 = v8 = 0;
552
553                     offset = (0x22F-(t4>>1) + x*0x40);
554                 
555                     for (i = 0; i < 4; i++) {
556                         v2 += ((int)*(s16 *)(mp3data+(addptr    )+0x20) * (short)DeWindowLUT[offset+0x00] + 0x4000) >> 0xF;
557                         v2 -= ((int)*(s16 *)(mp3data+((addptr+2))+0x20) * (short)DeWindowLUT[offset+0x01] + 0x4000) >> 0xF;
558                         v4 += ((int)*(s16 *)(mp3data+(addptr    )+0x30) * (short)DeWindowLUT[offset+0x08] + 0x4000) >> 0xF;
559                         v4 -= ((int)*(s16 *)(mp3data+((addptr+2))+0x30) * (short)DeWindowLUT[offset+0x09] + 0x4000) >> 0xF;
560                         v6 += ((int)*(s16 *)(mp3data+(addptr    )+0x00) * (short)DeWindowLUT[offset+0x20] + 0x4000) >> 0xF;
561                         v6 -= ((int)*(s16 *)(mp3data+((addptr+2))+0x00) * (short)DeWindowLUT[offset+0x21] + 0x4000) >> 0xF;
562                         v8 += ((int)*(s16 *)(mp3data+(addptr    )+0x10) * (short)DeWindowLUT[offset+0x28] + 0x4000) >> 0xF;
563                         v8 -= ((int)*(s16 *)(mp3data+((addptr+2))+0x10) * (short)DeWindowLUT[offset+0x29] + 0x4000) >> 0xF;
564                         addptr+=4; offset+=2;
565                     }
566                     s32 v0  = v2 + v4;
567                     s32 v18 = v6 + v8;
568                     //Clamp(v0);
569                     //Clamp(v18);
570                     // clamp???
571                     *(s16 *)(mp3data+((outPtr+2)^S16)) = v0;
572                     *(s16 *)(mp3data+((outPtr+4)^S16)) = v18;
573                     outPtr+=4;
574                     addptr -= 0x50;
575                 }
576
577                 int tmp = outPtr;
578                 s32 hi0 = mult6;
579                 s32 hi1 = mult4;
580                 s32 v;
581
582                 hi0 = (int)hi0 >> 0x10;
583                 hi1 = (int)hi1 >> 0x10;
584                 for (i = 0; i < 8; i++) {
585                     // v0
586                     v = (*(s16 *)(mp3data+((tmp-0x40)^S16)) * hi0);
587                     if (v > 32767) v = 32767; else if (v < -32767) v = -32767;
588                     *(s16 *)((u8 *)mp3data+((tmp-0x40)^S16)) = (s16)v;
589                     // v17
590                     v = (*(s16 *)(mp3data+((tmp-0x30)^S16)) * hi0);
591                     if (v > 32767) v = 32767; else if (v < -32767) v = -32767;
592                     *(s16 *)((u8 *)mp3data+((tmp-0x30)^S16)) = v;
593                     // v2
594                     v = (*(s16 *)(mp3data+((tmp-0x1E)^S16)) * hi1);
595                     if (v > 32767) v = 32767; else if (v < -32767) v = -32767;
596                     *(s16 *)((u8 *)mp3data+((tmp-0x1E)^S16)) = v;
597                     // v4
598                     v = (*(s16 *)(mp3data+((tmp-0xE)^S16)) * hi1);
599                     if (v > 32767) v = 32767; else if (v < -32767) v = -32767;
600                     *(s16 *)((u8 *)mp3data+((tmp-0xE)^S16)) = v;
601                     tmp += 2;
602                 }
603 }
604