CORE: Few fixes from mupen64plus-ae
[mupen64plus-pandora.git] / source / mupen64plus-rsp-hle / src / ucode3mp3.cpp
CommitLineData
d9e74a6f 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus-rsp-hle - ucode3mp3.h *
3 * Mupen64Plus homepage: http://code.google.com/p/mupen64plus/ *
4 * Copyright (C) 2009 Richard Goedeken *
5 * Copyright (C) 2002 Hacktarux *
6 * *
7 * This program is free software; you can redistribute it and/or modify *
8 * it under the terms of the GNU General Public License as published by *
9 * the Free Software Foundation; either version 2 of the License, or *
10 * (at your option) any later version. *
11 * *
12 * This program is distributed in the hope that it will be useful, *
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15 * GNU General Public License for more details. *
16 * *
17 * You should have received a copy of the GNU General Public License *
18 * along with this program; if not, write to the *
19 * Free Software Foundation, Inc., *
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
21 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
22
23# include <string.h>
24# include <stdio.h>
25
26extern "C" {
27 #include "hle.h"
28 #include "alist_internal.h"
29}
30
31static const u16 DeWindowLUT [0x420] = {
32 0x0000, 0xFFF3, 0x005D, 0xFF38, 0x037A, 0xF736, 0x0B37, 0xC00E,
33 0x7FFF, 0x3FF2, 0x0B37, 0x08CA, 0x037A, 0x00C8, 0x005D, 0x000D,
34 0x0000, 0xFFF3, 0x005D, 0xFF38, 0x037A, 0xF736, 0x0B37, 0xC00E,
35 0x7FFF, 0x3FF2, 0x0B37, 0x08CA, 0x037A, 0x00C8, 0x005D, 0x000D,
36 0x0000, 0xFFF2, 0x005F, 0xFF1D, 0x0369, 0xF697, 0x0A2A, 0xBCE7,
37 0x7FEB, 0x3CCB, 0x0C2B, 0x082B, 0x0385, 0x00AF, 0x005B, 0x000B,
38 0x0000, 0xFFF2, 0x005F, 0xFF1D, 0x0369, 0xF697, 0x0A2A, 0xBCE7,
39 0x7FEB, 0x3CCB, 0x0C2B, 0x082B, 0x0385, 0x00AF, 0x005B, 0x000B,
40 0x0000, 0xFFF1, 0x0061, 0xFF02, 0x0354, 0xF5F9, 0x0905, 0xB9C4,
41 0x7FB0, 0x39A4, 0x0D08, 0x078C, 0x038C, 0x0098, 0x0058, 0x000A,
42 0x0000, 0xFFF1, 0x0061, 0xFF02, 0x0354, 0xF5F9, 0x0905, 0xB9C4,
43 0x7FB0, 0x39A4, 0x0D08, 0x078C, 0x038C, 0x0098, 0x0058, 0x000A,
44 0x0000, 0xFFEF, 0x0062, 0xFEE6, 0x033B, 0xF55C, 0x07C8, 0xB6A4,
45 0x7F4D, 0x367E, 0x0DCE, 0x06EE, 0x038F, 0x0080, 0x0056, 0x0009,
46 0x0000, 0xFFEF, 0x0062, 0xFEE6, 0x033B, 0xF55C, 0x07C8, 0xB6A4,
47 0x7F4D, 0x367E, 0x0DCE, 0x06EE, 0x038F, 0x0080, 0x0056, 0x0009,
48 0x0000, 0xFFEE, 0x0063, 0xFECA, 0x031C, 0xF4C3, 0x0671, 0xB38C,
49 0x7EC2, 0x335D, 0x0E7C, 0x0652, 0x038E, 0x006B, 0x0053, 0x0008,
50 0x0000, 0xFFEE, 0x0063, 0xFECA, 0x031C, 0xF4C3, 0x0671, 0xB38C,
51 0x7EC2, 0x335D, 0x0E7C, 0x0652, 0x038E, 0x006B, 0x0053, 0x0008,
52 0x0000, 0xFFEC, 0x0064, 0xFEAC, 0x02F7, 0xF42C, 0x0502, 0xB07C,
53 0x7E12, 0x3041, 0x0F14, 0x05B7, 0x038A, 0x0056, 0x0050, 0x0007,
54 0x0000, 0xFFEC, 0x0064, 0xFEAC, 0x02F7, 0xF42C, 0x0502, 0xB07C,
55 0x7E12, 0x3041, 0x0F14, 0x05B7, 0x038A, 0x0056, 0x0050, 0x0007,
56 0x0000, 0xFFEB, 0x0064, 0xFE8E, 0x02CE, 0xF399, 0x037A, 0xAD75,
57 0x7D3A, 0x2D2C, 0x0F97, 0x0520, 0x0382, 0x0043, 0x004D, 0x0007,
58 0x0000, 0xFFEB, 0x0064, 0xFE8E, 0x02CE, 0xF399, 0x037A, 0xAD75,
59 0x7D3A, 0x2D2C, 0x0F97, 0x0520, 0x0382, 0x0043, 0x004D, 0x0007,
60 0xFFFF, 0xFFE9, 0x0063, 0xFE6F, 0x029E, 0xF30B, 0x01D8, 0xAA7B,
61 0x7C3D, 0x2A1F, 0x1004, 0x048B, 0x0377, 0x0030, 0x004A, 0x0006,
62 0xFFFF, 0xFFE9, 0x0063, 0xFE6F, 0x029E, 0xF30B, 0x01D8, 0xAA7B,
63 0x7C3D, 0x2A1F, 0x1004, 0x048B, 0x0377, 0x0030, 0x004A, 0x0006,
64 0xFFFF, 0xFFE7, 0x0062, 0xFE4F, 0x0269, 0xF282, 0x001F, 0xA78D,
65 0x7B1A, 0x271C, 0x105D, 0x03F9, 0x036A, 0x001F, 0x0046, 0x0006,
66 0xFFFF, 0xFFE7, 0x0062, 0xFE4F, 0x0269, 0xF282, 0x001F, 0xA78D,
67 0x7B1A, 0x271C, 0x105D, 0x03F9, 0x036A, 0x001F, 0x0046, 0x0006,
68 0xFFFF, 0xFFE4, 0x0061, 0xFE2F, 0x022F, 0xF1FF, 0xFE4C, 0xA4AF,
69 0x79D3, 0x2425, 0x10A2, 0x036C, 0x0359, 0x0010, 0x0043, 0x0005,
70 0xFFFF, 0xFFE4, 0x0061, 0xFE2F, 0x022F, 0xF1FF, 0xFE4C, 0xA4AF,
71 0x79D3, 0x2425, 0x10A2, 0x036C, 0x0359, 0x0010, 0x0043, 0x0005,
72 0xFFFF, 0xFFE2, 0x005E, 0xFE10, 0x01EE, 0xF184, 0xFC61, 0xA1E1,
73 0x7869, 0x2139, 0x10D3, 0x02E3, 0x0346, 0x0001, 0x0040, 0x0004,
74 0xFFFF, 0xFFE2, 0x005E, 0xFE10, 0x01EE, 0xF184, 0xFC61, 0xA1E1,
75 0x7869, 0x2139, 0x10D3, 0x02E3, 0x0346, 0x0001, 0x0040, 0x0004,
76 0xFFFF, 0xFFE0, 0x005B, 0xFDF0, 0x01A8, 0xF111, 0xFA5F, 0x9F27,
77 0x76DB, 0x1E5C, 0x10F2, 0x025E, 0x0331, 0xFFF3, 0x003D, 0x0004,
78 0xFFFF, 0xFFE0, 0x005B, 0xFDF0, 0x01A8, 0xF111, 0xFA5F, 0x9F27,
79 0x76DB, 0x1E5C, 0x10F2, 0x025E, 0x0331, 0xFFF3, 0x003D, 0x0004,
80 0xFFFF, 0xFFDE, 0x0057, 0xFDD0, 0x015B, 0xF0A7, 0xF845, 0x9C80,
81 0x752C, 0x1B8E, 0x1100, 0x01DE, 0x0319, 0xFFE7, 0x003A, 0x0003,
82 0xFFFF, 0xFFDE, 0x0057, 0xFDD0, 0x015B, 0xF0A7, 0xF845, 0x9C80,
83 0x752C, 0x1B8E, 0x1100, 0x01DE, 0x0319, 0xFFE7, 0x003A, 0x0003,
84 0xFFFE, 0xFFDB, 0x0053, 0xFDB0, 0x0108, 0xF046, 0xF613, 0x99EE,
85 0x735C, 0x18D1, 0x10FD, 0x0163, 0x0300, 0xFFDC, 0x0037, 0x0003,
86 0xFFFE, 0xFFDB, 0x0053, 0xFDB0, 0x0108, 0xF046, 0xF613, 0x99EE,
87 0x735C, 0x18D1, 0x10FD, 0x0163, 0x0300, 0xFFDC, 0x0037, 0x0003,
88 0xFFFE, 0xFFD8, 0x004D, 0xFD90, 0x00B0, 0xEFF0, 0xF3CC, 0x9775,
89 0x716C, 0x1624, 0x10EA, 0x00EE, 0x02E5, 0xFFD2, 0x0033, 0x0003,
90 0xFFFE, 0xFFD8, 0x004D, 0xFD90, 0x00B0, 0xEFF0, 0xF3CC, 0x9775,
91 0x716C, 0x1624, 0x10EA, 0x00EE, 0x02E5, 0xFFD2, 0x0033, 0x0003,
92 0xFFFE, 0xFFD6, 0x0047, 0xFD72, 0x0051, 0xEFA6, 0xF16F, 0x9514,
93 0x6F5E, 0x138A, 0x10C8, 0x007E, 0x02CA, 0xFFC9, 0x0030, 0x0003,
94 0xFFFE, 0xFFD6, 0x0047, 0xFD72, 0x0051, 0xEFA6, 0xF16F, 0x9514,
95 0x6F5E, 0x138A, 0x10C8, 0x007E, 0x02CA, 0xFFC9, 0x0030, 0x0003,
96 0xFFFE, 0xFFD3, 0x0040, 0xFD54, 0xFFEC, 0xEF68, 0xEEFC, 0x92CD,
97 0x6D33, 0x1104, 0x1098, 0x0014, 0x02AC, 0xFFC0, 0x002D, 0x0002,
98 0xFFFE, 0xFFD3, 0x0040, 0xFD54, 0xFFEC, 0xEF68, 0xEEFC, 0x92CD,
99 0x6D33, 0x1104, 0x1098, 0x0014, 0x02AC, 0xFFC0, 0x002D, 0x0002,
100 0x0030, 0xFFC9, 0x02CA, 0x007E, 0x10C8, 0x138A, 0x6F5E, 0x9514,
101 0xF16F, 0xEFA6, 0x0051, 0xFD72, 0x0047, 0xFFD6, 0xFFFE, 0x0003,
102 0x0030, 0xFFC9, 0x02CA, 0x007E, 0x10C8, 0x138A, 0x6F5E, 0x9514,
103 0xF16F, 0xEFA6, 0x0051, 0xFD72, 0x0047, 0xFFD6, 0xFFFE, 0x0003,
104 0x0033, 0xFFD2, 0x02E5, 0x00EE, 0x10EA, 0x1624, 0x716C, 0x9775,
105 0xF3CC, 0xEFF0, 0x00B0, 0xFD90, 0x004D, 0xFFD8, 0xFFFE, 0x0003,
106 0x0033, 0xFFD2, 0x02E5, 0x00EE, 0x10EA, 0x1624, 0x716C, 0x9775,
107 0xF3CC, 0xEFF0, 0x00B0, 0xFD90, 0x004D, 0xFFD8, 0xFFFE, 0x0003,
108 0x0037, 0xFFDC, 0x0300, 0x0163, 0x10FD, 0x18D1, 0x735C, 0x99EE,
109 0xF613, 0xF046, 0x0108, 0xFDB0, 0x0053, 0xFFDB, 0xFFFE, 0x0003,
110 0x0037, 0xFFDC, 0x0300, 0x0163, 0x10FD, 0x18D1, 0x735C, 0x99EE,
111 0xF613, 0xF046, 0x0108, 0xFDB0, 0x0053, 0xFFDB, 0xFFFE, 0x0003,
112 0x003A, 0xFFE7, 0x0319, 0x01DE, 0x1100, 0x1B8E, 0x752C, 0x9C80,
113 0xF845, 0xF0A7, 0x015B, 0xFDD0, 0x0057, 0xFFDE, 0xFFFF, 0x0003,
114 0x003A, 0xFFE7, 0x0319, 0x01DE, 0x1100, 0x1B8E, 0x752C, 0x9C80,
115 0xF845, 0xF0A7, 0x015B, 0xFDD0, 0x0057, 0xFFDE, 0xFFFF, 0x0004,
116 0x003D, 0xFFF3, 0x0331, 0x025E, 0x10F2, 0x1E5C, 0x76DB, 0x9F27,
117 0xFA5F, 0xF111, 0x01A8, 0xFDF0, 0x005B, 0xFFE0, 0xFFFF, 0x0004,
118 0x003D, 0xFFF3, 0x0331, 0x025E, 0x10F2, 0x1E5C, 0x76DB, 0x9F27,
119 0xFA5F, 0xF111, 0x01A8, 0xFDF0, 0x005B, 0xFFE0, 0xFFFF, 0x0004,
120 0x0040, 0x0001, 0x0346, 0x02E3, 0x10D3, 0x2139, 0x7869, 0xA1E1,
121 0xFC61, 0xF184, 0x01EE, 0xFE10, 0x005E, 0xFFE2, 0xFFFF, 0x0004,
122 0x0040, 0x0001, 0x0346, 0x02E3, 0x10D3, 0x2139, 0x7869, 0xA1E1,
123 0xFC61, 0xF184, 0x01EE, 0xFE10, 0x005E, 0xFFE2, 0xFFFF, 0x0005,
124 0x0043, 0x0010, 0x0359, 0x036C, 0x10A2, 0x2425, 0x79D3, 0xA4AF,
125 0xFE4C, 0xF1FF, 0x022F, 0xFE2F, 0x0061, 0xFFE4, 0xFFFF, 0x0005,
126 0x0043, 0x0010, 0x0359, 0x036C, 0x10A2, 0x2425, 0x79D3, 0xA4AF,
127 0xFE4C, 0xF1FF, 0x022F, 0xFE2F, 0x0061, 0xFFE4, 0xFFFF, 0x0006,
128 0x0046, 0x001F, 0x036A, 0x03F9, 0x105D, 0x271C, 0x7B1A, 0xA78D,
129 0x001F, 0xF282, 0x0269, 0xFE4F, 0x0062, 0xFFE7, 0xFFFF, 0x0006,
130 0x0046, 0x001F, 0x036A, 0x03F9, 0x105D, 0x271C, 0x7B1A, 0xA78D,
131 0x001F, 0xF282, 0x0269, 0xFE4F, 0x0062, 0xFFE7, 0xFFFF, 0x0006,
132 0x004A, 0x0030, 0x0377, 0x048B, 0x1004, 0x2A1F, 0x7C3D, 0xAA7B,
133 0x01D8, 0xF30B, 0x029E, 0xFE6F, 0x0063, 0xFFE9, 0xFFFF, 0x0006,
134 0x004A, 0x0030, 0x0377, 0x048B, 0x1004, 0x2A1F, 0x7C3D, 0xAA7B,
135 0x01D8, 0xF30B, 0x029E, 0xFE6F, 0x0063, 0xFFE9, 0xFFFF, 0x0007,
136 0x004D, 0x0043, 0x0382, 0x0520, 0x0F97, 0x2D2C, 0x7D3A, 0xAD75,
137 0x037A, 0xF399, 0x02CE, 0xFE8E, 0x0064, 0xFFEB, 0x0000, 0x0007,
138 0x004D, 0x0043, 0x0382, 0x0520, 0x0F97, 0x2D2C, 0x7D3A, 0xAD75,
139 0x037A, 0xF399, 0x02CE, 0xFE8E, 0x0064, 0xFFEB, 0x0000, 0x0007,
140 0x0050, 0x0056, 0x038A, 0x05B7, 0x0F14, 0x3041, 0x7E12, 0xB07C,
141 0x0502, 0xF42C, 0x02F7, 0xFEAC, 0x0064, 0xFFEC, 0x0000, 0x0007,
142 0x0050, 0x0056, 0x038A, 0x05B7, 0x0F14, 0x3041, 0x7E12, 0xB07C,
143 0x0502, 0xF42C, 0x02F7, 0xFEAC, 0x0064, 0xFFEC, 0x0000, 0x0008,
144 0x0053, 0x006B, 0x038E, 0x0652, 0x0E7C, 0x335D, 0x7EC2, 0xB38C,
145 0x0671, 0xF4C3, 0x031C, 0xFECA, 0x0063, 0xFFEE, 0x0000, 0x0008,
146 0x0053, 0x006B, 0x038E, 0x0652, 0x0E7C, 0x335D, 0x7EC2, 0xB38C,
147 0x0671, 0xF4C3, 0x031C, 0xFECA, 0x0063, 0xFFEE, 0x0000, 0x0009,
148 0x0056, 0x0080, 0x038F, 0x06EE, 0x0DCE, 0x367E, 0x7F4D, 0xB6A4,
149 0x07C8, 0xF55C, 0x033B, 0xFEE6, 0x0062, 0xFFEF, 0x0000, 0x0009,
150 0x0056, 0x0080, 0x038F, 0x06EE, 0x0DCE, 0x367E, 0x7F4D, 0xB6A4,
151 0x07C8, 0xF55C, 0x033B, 0xFEE6, 0x0062, 0xFFEF, 0x0000, 0x000A,
152 0x0058, 0x0098, 0x038C, 0x078C, 0x0D08, 0x39A4, 0x7FB0, 0xB9C4,
153 0x0905, 0xF5F9, 0x0354, 0xFF02, 0x0061, 0xFFF1, 0x0000, 0x000A,
154 0x0058, 0x0098, 0x038C, 0x078C, 0x0D08, 0x39A4, 0x7FB0, 0xB9C4,
155 0x0905, 0xF5F9, 0x0354, 0xFF02, 0x0061, 0xFFF1, 0x0000, 0x000B,
156 0x005B, 0x00AF, 0x0385, 0x082B, 0x0C2B, 0x3CCB, 0x7FEB, 0xBCE7,
157 0x0A2A, 0xF697, 0x0369, 0xFF1D, 0x005F, 0xFFF2, 0x0000, 0x000B,
158 0x005B, 0x00AF, 0x0385, 0x082B, 0x0C2B, 0x3CCB, 0x7FEB, 0xBCE7,
159 0x0A2A, 0xF697, 0x0369, 0xFF1D, 0x005F, 0xFFF2, 0x0000, 0x000D,
160 0x005D, 0x00C8, 0x037A, 0x08CA, 0x0B37, 0x3FF2, 0x7FFF, 0xC00E,
161 0x0B37, 0xF736, 0x037A, 0xFF38, 0x005D, 0xFFF3, 0x0000, 0x000D,
162 0x005D, 0x00C8, 0x037A, 0x08CA, 0x0B37, 0x3FF2, 0x7FFF, 0xC00E,
163 0x0B37, 0xF736, 0x037A, 0xFF38, 0x005D, 0xFFF3, 0x0000, 0x0000
164};
165
166//static u16 myVector[32][8];
167
168static u8 mp3data[0x1000];
169
170static s32 v[32];
171
172static void MP3AB0 () {
173 // Part 2 - 100% Accurate
174 const u16 LUT2[8] = { 0xFEC4, 0xF4FA, 0xC5E4, 0xE1C4,
175 0x1916, 0x4A50, 0xA268, 0x78AE };
176 const u16 LUT3[4] = { 0xFB14, 0xD4DC, 0x31F2, 0x8E3A };
177 int i;
178
179 for (i = 0; i < 8; i++) {
180 v[16+i] = v[0+i] + v[8+i];
181 v[24+i] = ((v[0+i] - v[8+i]) * LUT2[i]) >> 0x10;
182 }
183
184 // Part 3: 4-wide butterflies
185
186 for (i=0; i < 4; i++) {
187 v[0+i] = v[16+i] + v[20+i];
188 v[4+i] = ((v[16+i] - v[20+i]) * LUT3[i]) >> 0x10;
189
190 v[8+i] = v[24+i] + v[28+i];
191 v[12+i] = ((v[24+i] - v[28+i]) * LUT3[i]) >> 0x10;
192 }
193
194 // Part 4: 2-wide butterflies - 100% Accurate
195
196 for (i = 0; i < 16; i+=4) {
197 v[16+i] = v[0+i] + v[2+i];
198 v[18+i] = ((v[0+i] - v[2+i]) * 0xEC84) >> 0x10;
199
200 v[17+i] = v[1+i] + v[3+i];
201 v[19+i] = ((v[1+i] - v[3+i]) * 0x61F8) >> 0x10;
202 }
203}
204
205static void InnerLoop ();
206
207static u32 inPtr, outPtr;
208
209static u32 t6;// = 0x08A0; // I think these are temporary storage buffers
210static u32 t5;// = 0x0AC0;
211static u32 t4;// = (inst1 & 0x1E);
212
213void MP3 (u32 inst1, u32 inst2) {
214 // Initialization Code
215 u32 readPtr; // s5
216 u32 writePtr; // s6
217 //u32 Count = 0x0480; // s4
218 u32 tmp;
219 //u32 inPtr, outPtr;
220
221 t6 = 0x08A0; // I think these are temporary storage buffers
222 t5 = 0x0AC0;
223 t4 = (inst1 & 0x1E);
224
225 writePtr = inst2 & 0xFFFFFF;
226 readPtr = writePtr;
227 memcpy (mp3data+0xCE8, rsp.RDRAM+readPtr, 8); // Just do that for efficiency... may remove and use directly later anyway
228 readPtr += 8; // This must be a header byte or whatnot
229
230 for (int cnt = 0; cnt < 0x480; cnt += 0x180) {
231 memcpy (mp3data+0xCF0, rsp.RDRAM+readPtr, 0x180); // DMA: 0xCF0 <- RDRAM[s5] : 0x180
232 inPtr = 0xCF0; // s7
233 outPtr = 0xE70; // s3
234// --------------- Inner Loop Start --------------------
235 for (int cnt2 = 0; cnt2 < 0x180; cnt2 += 0x40) {
236 t6 &= 0xFFE0;
237 t5 &= 0xFFE0;
238 t6 |= t4;
239 t5 |= t4;
240 InnerLoop ();
241 t4 = (t4-2)&0x1E;
242 tmp = t6;
243 t6 = t5;
244 t5 = tmp;
245 //outPtr += 0x40;
246 inPtr += 0x40;
247 }
248// --------------- Inner Loop End --------------------
249 memcpy (rsp.RDRAM+writePtr, mp3data+0xe70, 0x180);
250 writePtr += 0x180;
251 readPtr += 0x180;
252 }
253}
254
255
256
257static void InnerLoop () {
258 // Part 1: 100% Accurate
259
260 int i;
261 v[0] = *(s16 *)(mp3data+inPtr+(0x00^S16)); v[31] = *(s16 *)(mp3data+inPtr+(0x3E^S16)); v[0] += v[31];
262 v[1] = *(s16 *)(mp3data+inPtr+(0x02^S16)); v[30] = *(s16 *)(mp3data+inPtr+(0x3C^S16)); v[1] += v[30];
263 v[2] = *(s16 *)(mp3data+inPtr+(0x06^S16)); v[28] = *(s16 *)(mp3data+inPtr+(0x38^S16)); v[2] += v[28];
264 v[3] = *(s16 *)(mp3data+inPtr+(0x04^S16)); v[29] = *(s16 *)(mp3data+inPtr+(0x3A^S16)); v[3] += v[29];
265
266 v[4] = *(s16 *)(mp3data+inPtr+(0x0E^S16)); v[24] = *(s16 *)(mp3data+inPtr+(0x30^S16)); v[4] += v[24];
267 v[5] = *(s16 *)(mp3data+inPtr+(0x0C^S16)); v[25] = *(s16 *)(mp3data+inPtr+(0x32^S16)); v[5] += v[25];
268 v[6] = *(s16 *)(mp3data+inPtr+(0x08^S16)); v[27] = *(s16 *)(mp3data+inPtr+(0x36^S16)); v[6] += v[27];
269 v[7] = *(s16 *)(mp3data+inPtr+(0x0A^S16)); v[26] = *(s16 *)(mp3data+inPtr+(0x34^S16)); v[7] += v[26];
270
271 v[8] = *(s16 *)(mp3data+inPtr+(0x1E^S16)); v[16] = *(s16 *)(mp3data+inPtr+(0x20^S16)); v[8] += v[16];
272 v[9] = *(s16 *)(mp3data+inPtr+(0x1C^S16)); v[17] = *(s16 *)(mp3data+inPtr+(0x22^S16)); v[9] += v[17];
273 v[10]= *(s16 *)(mp3data+inPtr+(0x18^S16)); v[19] = *(s16 *)(mp3data+inPtr+(0x26^S16)); v[10]+= v[19];
274 v[11]= *(s16 *)(mp3data+inPtr+(0x1A^S16)); v[18] = *(s16 *)(mp3data+inPtr+(0x24^S16)); v[11]+= v[18];
275
276 v[12]= *(s16 *)(mp3data+inPtr+(0x10^S16)); v[23] = *(s16 *)(mp3data+inPtr+(0x2E^S16)); v[12]+= v[23];
277 v[13]= *(s16 *)(mp3data+inPtr+(0x12^S16)); v[22] = *(s16 *)(mp3data+inPtr+(0x2C^S16)); v[13]+= v[22];
278 v[14]= *(s16 *)(mp3data+inPtr+(0x16^S16)); v[20] = *(s16 *)(mp3data+inPtr+(0x28^S16)); v[14]+= v[20];
279 v[15]= *(s16 *)(mp3data+inPtr+(0x14^S16)); v[21] = *(s16 *)(mp3data+inPtr+(0x2A^S16)); v[15]+= v[21];
280
281 // Part 2-4
282
283 MP3AB0 ();
284
285 // Part 5 - 1-Wide Butterflies - 100% Accurate but need SSVs!!!
286
287 u32 t0 = t6 + 0x100;
288 u32 t1 = t6 + 0x200;
289 u32 t2 = t5 + 0x100;
290 u32 t3 = t5 + 0x200;
291 /*RSP_GPR[0x8].W = t0;
292 RSP_GPR[0x9].W = t1;
293 RSP_GPR[0xA].W = t2;
294 RSP_GPR[0xB].W = t3;
295
296 RSP_Vect[0].DW[1] = 0xB504A57E00016A09;
297 RSP_Vect[0].DW[0] = 0x0002D4130005A827;
298*/
299
300 // 0x13A8
301 v[1] = 0;
302 v[11] = ((v[16] - v[17]) * 0xB504) >> 0x10;
303
304 v[16] = -v[16] -v[17];
305 v[2] = v[18] + v[19];
306 // ** Store v[11] -> (T6 + 0)**
307 *(s16 *)(mp3data+((t6+(short)0x0))) = (short)v[11];
308
309
310 v[11] = -v[11];
311 // ** Store v[16] -> (T3 + 0)**
312 *(s16 *)(mp3data+((t3+(short)0x0))) = (short)v[16];
313 // ** Store v[11] -> (T5 + 0)**
314 *(s16 *)(mp3data+((t5+(short)0x0))) = (short)v[11];
315 // 0x13E8 - Verified....
316 v[2] = -v[2];
317 // ** Store v[2] -> (T2 + 0)**
318 *(s16 *)(mp3data+((t2+(short)0x0))) = (short)v[2];
319 v[3] = (((v[18] - v[19]) * 0x16A09) >> 0x10) + v[2];
320 // ** Store v[3] -> (T0 + 0)**
321 *(s16 *)(mp3data+((t0+(short)0x0))) = (short)v[3];
322 // 0x1400 - Verified
323 v[4] = -v[20] -v[21];
324 v[6] = v[22] + v[23];
325 v[5] = ((v[20] - v[21]) * 0x16A09) >> 0x10;
326 // ** Store v[4] -> (T3 + 0xFF80)
327 *(s16 *)(mp3data+((t3+(short)0xFF80))) = (short)v[4];
328 v[7] = ((v[22] - v[23]) * 0x2D413) >> 0x10;
329 v[5] = v[5] - v[4];
330 v[7] = v[7] - v[5];
331 v[6] = v[6] + v[6];
332 v[5] = v[5] - v[6];
333 v[4] = -v[4] - v[6];
334 // *** Store v[7] -> (T1 + 0xFF80)
335 *(s16 *)(mp3data+((t1+(short)0xFF80))) = (short)v[7];
336 // *** Store v[4] -> (T2 + 0xFF80)
337 *(s16 *)(mp3data+((t2+(short)0xFF80))) = (short)v[4];
338 // *** Store v[5] -> (T0 + 0xFF80)
339 *(s16 *)(mp3data+((t0+(short)0xFF80))) = (short)v[5];
340 v[8] = v[24] + v[25];
341
342
343 v[9] = ((v[24] - v[25]) * 0x16A09) >> 0x10;
344 v[2] = v[8] + v[9];
345 v[11] = ((v[26] - v[27]) * 0x2D413) >> 0x10;
346 v[13] = ((v[28] - v[29]) * 0x2D413) >> 0x10;
347
348 v[10] = v[26] + v[27]; v[10] = v[10] + v[10];
349 v[12] = v[28] + v[29]; v[12] = v[12] + v[12];
350 v[14] = v[30] + v[31];
351 v[3] = v[8] + v[10];
352 v[14] = v[14] + v[14];
353 v[13] = (v[13] - v[2]) + v[12];
354 v[15] = (((v[30] - v[31]) * 0x5A827) >> 0x10) - (v[11] + v[2]);
355 v[14] = -(v[14] + v[14]) + v[3];
356 v[17] = v[13] - v[10];
357 v[9] = v[9] + v[14];
358 // ** Store v[9] -> (T6 + 0x40)
359 *(s16 *)(mp3data+((t6+(short)0x40))) = (short)v[9];
360 v[11] = v[11] - v[13];
361 // ** Store v[17] -> (T0 + 0xFFC0)
362 *(s16 *)(mp3data+((t0+(short)0xFFC0))) = (short)v[17];
363 v[12] = v[8] - v[12];
364 // ** Store v[11] -> (T0 + 0x40)
365 *(s16 *)(mp3data+((t0+(short)0x40))) = (short)v[11];
366 v[8] = -v[8];
367 // ** Store v[15] -> (T1 + 0xFFC0)
368 *(s16 *)(mp3data+((t1+(short)0xFFC0))) = (short)v[15];
369 v[10] = -v[10] -v[12];
370 // ** Store v[12] -> (T2 + 0x40)
371 *(s16 *)(mp3data+((t2+(short)0x40))) = (short)v[12];
372 // ** Store v[8] -> (T3 + 0xFFC0)
373 *(s16 *)(mp3data+((t3+(short)0xFFC0))) = (short)v[8];
374 // ** Store v[14] -> (T5 + 0x40)
375 *(s16 *)(mp3data+((t5+(short)0x40))) = (short)v[14];
376 // ** Store v[10] -> (T2 + 0xFFC0)
377 *(s16 *)(mp3data+((t2+(short)0xFFC0))) = (short)v[10];
378 // 0x14FC - Verified...
379
380 // Part 6 - 100% Accurate
381
382 v[0] = *(s16 *)(mp3data+inPtr+(0x00^S16)); v[31] = *(s16 *)(mp3data+inPtr+(0x3E^S16)); v[0] -= v[31];
383 v[1] = *(s16 *)(mp3data+inPtr+(0x02^S16)); v[30] = *(s16 *)(mp3data+inPtr+(0x3C^S16)); v[1] -= v[30];
384 v[2] = *(s16 *)(mp3data+inPtr+(0x06^S16)); v[28] = *(s16 *)(mp3data+inPtr+(0x38^S16)); v[2] -= v[28];
385 v[3] = *(s16 *)(mp3data+inPtr+(0x04^S16)); v[29] = *(s16 *)(mp3data+inPtr+(0x3A^S16)); v[3] -= v[29];
386
387 v[4] = *(s16 *)(mp3data+inPtr+(0x0E^S16)); v[24] = *(s16 *)(mp3data+inPtr+(0x30^S16)); v[4] -= v[24];
388 v[5] = *(s16 *)(mp3data+inPtr+(0x0C^S16)); v[25] = *(s16 *)(mp3data+inPtr+(0x32^S16)); v[5] -= v[25];
389 v[6] = *(s16 *)(mp3data+inPtr+(0x08^S16)); v[27] = *(s16 *)(mp3data+inPtr+(0x36^S16)); v[6] -= v[27];
390 v[7] = *(s16 *)(mp3data+inPtr+(0x0A^S16)); v[26] = *(s16 *)(mp3data+inPtr+(0x34^S16)); v[7] -= v[26];
391
392 v[8] = *(s16 *)(mp3data+inPtr+(0x1E^S16)); v[16] = *(s16 *)(mp3data+inPtr+(0x20^S16)); v[8] -= v[16];
393 v[9] = *(s16 *)(mp3data+inPtr+(0x1C^S16)); v[17] = *(s16 *)(mp3data+inPtr+(0x22^S16)); v[9] -= v[17];
394 v[10]= *(s16 *)(mp3data+inPtr+(0x18^S16)); v[19] = *(s16 *)(mp3data+inPtr+(0x26^S16)); v[10]-= v[19];
395 v[11]= *(s16 *)(mp3data+inPtr+(0x1A^S16)); v[18] = *(s16 *)(mp3data+inPtr+(0x24^S16)); v[11]-= v[18];
396
397 v[12]= *(s16 *)(mp3data+inPtr+(0x10^S16)); v[23] = *(s16 *)(mp3data+inPtr+(0x2E^S16)); v[12]-= v[23];
398 v[13]= *(s16 *)(mp3data+inPtr+(0x12^S16)); v[22] = *(s16 *)(mp3data+inPtr+(0x2C^S16)); v[13]-= v[22];
399 v[14]= *(s16 *)(mp3data+inPtr+(0x16^S16)); v[20] = *(s16 *)(mp3data+inPtr+(0x28^S16)); v[14]-= v[20];
400 v[15]= *(s16 *)(mp3data+inPtr+(0x14^S16)); v[21] = *(s16 *)(mp3data+inPtr+(0x2A^S16)); v[15]-= v[21];
401
402 //0, 1, 3, 2, 7, 6, 4, 5, 7, 6, 4, 5, 0, 1, 3, 2
403 const u16 LUT6[16] = { 0xFFB2, 0xFD3A, 0xF10A, 0xF854,
404 0xBDAE, 0xCDA0, 0xE76C, 0xDB94,
405 0x1920, 0x4B20, 0xAC7C, 0x7C68,
406 0xABEC, 0x9880, 0xDAE8, 0x839C };
407 for (i = 0; i < 16; i++) {
408 v[0+i] = (v[0+i] * LUT6[i]) >> 0x10;
409 }
410 v[0] = v[0] + v[0]; v[1] = v[1] + v[1];
411 v[2] = v[2] + v[2]; v[3] = v[3] + v[3]; v[4] = v[4] + v[4];
412 v[5] = v[5] + v[5]; v[6] = v[6] + v[6]; v[7] = v[7] + v[7];
413 v[12] = v[12] + v[12]; v[13] = v[13] + v[13]; v[15] = v[15] + v[15];
414
415 MP3AB0 ();
416
417 // Part 7: - 100% Accurate + SSV - Unoptimized
418
419 v[0] = ( v[17] + v[16] ) >> 1;
420 v[1] = ((v[17] * (int)((short)0xA57E * 2)) + (v[16] * 0xB504)) >> 0x10;
421 v[2] = -v[18] -v[19];
422 v[3] = ((v[18] - v[19]) * 0x16A09) >> 0x10;
423 v[4] = v[20] + v[21] + v[0];
424 v[5] = (((v[20] - v[21]) * 0x16A09) >> 0x10) + v[1];
425 v[6] = (((v[22] + v[23]) << 1) + v[0]) - v[2];
426 v[7] = (((v[22] - v[23]) * 0x2D413) >> 0x10) + v[0] + v[1] + v[3];
427 // 0x16A8
428 // Save v[0] -> (T3 + 0xFFE0)
429 *(s16 *)(mp3data+((t3+(short)0xFFE0))) = (short)-v[0];
430 v[8] = v[24] + v[25];
431 v[9] = ((v[24] - v[25]) * 0x16A09) >> 0x10;
432 v[10] = ((v[26] + v[27]) << 1) + v[8];
433 v[11] = (((v[26] - v[27]) * 0x2D413) >> 0x10) + v[8] + v[9];
434 v[12] = v[4] - ((v[28] + v[29]) << 1);
435 // ** Store v12 -> (T2 + 0x20)
436 *(s16 *)(mp3data+((t2+(short)0x20))) = (short)v[12];
437 v[13] = (((v[28] - v[29]) * 0x2D413) >> 0x10) - v[12] - v[5];
438 v[14] = v[30] + v[31];
439 v[14] = v[14] + v[14];
440 v[14] = v[14] + v[14];
441 v[14] = v[6] - v[14];
442 v[15] = (((v[30] - v[31]) * 0x5A827) >> 0x10) - v[7];
443 // Store v14 -> (T5 + 0x20)
444 *(s16 *)(mp3data+((t5+(short)0x20))) = (short)v[14];
445 v[14] = v[14] + v[1];
446 // Store v[14] -> (T6 + 0x20)
447 *(s16 *)(mp3data+((t6+(short)0x20))) = (short)v[14];
448 // Store v[15] -> (T1 + 0xFFE0)
449 *(s16 *)(mp3data+((t1+(short)0xFFE0))) = (short)v[15];
450 v[9] = v[9] + v[10];
451 v[1] = v[1] + v[6];
452 v[6] = v[10] - v[6];
453 v[1] = v[9] - v[1];
454 // Store v[6] -> (T5 + 0x60)
455 *(s16 *)(mp3data+((t5+(short)0x60))) = (short)v[6];
456 v[10] = v[10] + v[2];
457 v[10] = v[4] - v[10];
458 // Store v[10] -> (T2 + 0xFFA0)
459 *(s16 *)(mp3data+((t2+(short)0xFFA0))) = (short)v[10];
460 v[12] = v[2] - v[12];
461 // Store v[12] -> (T2 + 0xFFE0)
462 *(s16 *)(mp3data+((t2+(short)0xFFE0))) = (short)v[12];
463 v[5] = v[4] + v[5];
464 v[4] = v[8] - v[4];
465 // Store v[4] -> (T2 + 0x60)
466 *(s16 *)(mp3data+((t2+(short)0x60))) = (short)v[4];
467 v[0] = v[0] - v[8];
468 // Store v[0] -> (T3 + 0xFFA0)
469 *(s16 *)(mp3data+((t3+(short)0xFFA0))) = (short)v[0];
470 v[7] = v[7] - v[11];
471 // Store v[7] -> (T1 + 0xFFA0)
472 *(s16 *)(mp3data+((t1+(short)0xFFA0))) = (short)v[7];
473 v[11] = v[11] - v[3];
474 // Store v[1] -> (T6 + 0x60)
475 *(s16 *)(mp3data+((t6+(short)0x60))) = (short)v[1];
476 v[11] = v[11] - v[5];
477 // Store v[11] -> (T0 + 0x60)
478 *(s16 *)(mp3data+((t0+(short)0x60))) = (short)v[11];
479 v[3] = v[3] - v[13];
480 // Store v[3] -> (T0 + 0x20)
481 *(s16 *)(mp3data+((t0+(short)0x20))) = (short)v[3];
482 v[13] = v[13] + v[2];
483 // Store v[13] -> (T0 + 0xFFE0)
484 *(s16 *)(mp3data+((t0+(short)0xFFE0))) = (short)v[13];
485 //v[2] = ;
486 v[2] = (v[5] - v[2]) - v[9];
487 // Store v[2] -> (T0 + 0xFFA0)
488 *(s16 *)(mp3data+((t0+(short)0xFFA0))) = (short)v[2];
489 // 0x7A8 - Verified...
490
491 // Step 8 - Dewindowing
492
493 //u64 *DW = (u64 *)&DeWindowLUT[0x10-(t4>>1)];
494 u32 offset = 0x10-(t4>>1);
495
496 u32 addptr = t6 & 0xFFE0;
497 offset = 0x10-(t4>>1);
498
499 s32 v2=0, v4=0, v6=0, v8=0;
500 //s32 z2=0, z4=0, z6=0, z8=0;
501
502 offset = 0x10-(t4>>1);// + x*0x40;
503 int x;
504 for (x = 0; x < 8; x++) {
505 v2 = v4 = v6 = v8 = 0;
506
507 //addptr = t1;
508
509 for (i = 7; i >= 0; i--) {
510 v2 += ((int)*(s16 *)(mp3data+(addptr)+0x00) * (short)DeWindowLUT[offset+0x00] + 0x4000) >> 0xF;
511 v4 += ((int)*(s16 *)(mp3data+(addptr)+0x10) * (short)DeWindowLUT[offset+0x08] + 0x4000) >> 0xF;
512 v6 += ((int)*(s16 *)(mp3data+(addptr)+0x20) * (short)DeWindowLUT[offset+0x20] + 0x4000) >> 0xF;
513 v8 += ((int)*(s16 *)(mp3data+(addptr)+0x30) * (short)DeWindowLUT[offset+0x28] + 0x4000) >> 0xF;
514 addptr+=2; offset++;
515 }
516 s32 v0 = v2 + v4;
517 s32 v18 = v6 + v8;
518 //Clamp(v0);
519 //Clamp(v18);
520 // clamp???
521 *(s16 *)(mp3data+(outPtr^S16)) = v0;
522 *(s16 *)(mp3data+((outPtr+2)^S16)) = v18;
523 outPtr+=4;
524 addptr += 0x30;
525 offset += 0x38;
526 }
527
528 offset = 0x10-(t4>>1) + 8*0x40;
529 v2 = v4 = 0;
530 for (i = 0; i < 4; i++) {
531 v2 += ((int)*(s16 *)(mp3data+(addptr)+0x00) * (short)DeWindowLUT[offset+0x00] + 0x4000) >> 0xF;
532 v2 += ((int)*(s16 *)(mp3data+(addptr)+0x10) * (short)DeWindowLUT[offset+0x08] + 0x4000) >> 0xF;
533 addptr+=2; offset++;
534 v4 += ((int)*(s16 *)(mp3data+(addptr)+0x00) * (short)DeWindowLUT[offset+0x00] + 0x4000) >> 0xF;
535 v4 += ((int)*(s16 *)(mp3data+(addptr)+0x10) * (short)DeWindowLUT[offset+0x08] + 0x4000) >> 0xF;
536 addptr+=2; offset++;
537 }
538 s32 mult6 = *(s32 *)(mp3data+0xCE8);
539 s32 mult4 = *(s32 *)(mp3data+0xCEC);
540 if (t4 & 0x2) {
541 v2 = (v2 * *(u32 *)(mp3data+0xCE8)) >> 0x10;
542 *(s16 *)(mp3data+(outPtr^S16)) = v2;
543 } else {
544 v4 = (v4 * *(u32 *)(mp3data+0xCE8)) >> 0x10;
545 *(s16 *)(mp3data+(outPtr^S16)) = v4;
546 mult4 = *(u32 *)(mp3data+0xCE8);
547 }
548 addptr -= 0x50;
549
550 for (x = 0; x < 8; x++) {
551 v2 = v4 = v6 = v8 = 0;
552
553 offset = (0x22F-(t4>>1) + x*0x40);
554
555 for (i = 0; i < 4; i++) {
556 v2 += ((int)*(s16 *)(mp3data+(addptr )+0x20) * (short)DeWindowLUT[offset+0x00] + 0x4000) >> 0xF;
557 v2 -= ((int)*(s16 *)(mp3data+((addptr+2))+0x20) * (short)DeWindowLUT[offset+0x01] + 0x4000) >> 0xF;
558 v4 += ((int)*(s16 *)(mp3data+(addptr )+0x30) * (short)DeWindowLUT[offset+0x08] + 0x4000) >> 0xF;
559 v4 -= ((int)*(s16 *)(mp3data+((addptr+2))+0x30) * (short)DeWindowLUT[offset+0x09] + 0x4000) >> 0xF;
560 v6 += ((int)*(s16 *)(mp3data+(addptr )+0x00) * (short)DeWindowLUT[offset+0x20] + 0x4000) >> 0xF;
561 v6 -= ((int)*(s16 *)(mp3data+((addptr+2))+0x00) * (short)DeWindowLUT[offset+0x21] + 0x4000) >> 0xF;
562 v8 += ((int)*(s16 *)(mp3data+(addptr )+0x10) * (short)DeWindowLUT[offset+0x28] + 0x4000) >> 0xF;
563 v8 -= ((int)*(s16 *)(mp3data+((addptr+2))+0x10) * (short)DeWindowLUT[offset+0x29] + 0x4000) >> 0xF;
564 addptr+=4; offset+=2;
565 }
566 s32 v0 = v2 + v4;
567 s32 v18 = v6 + v8;
568 //Clamp(v0);
569 //Clamp(v18);
570 // clamp???
571 *(s16 *)(mp3data+((outPtr+2)^S16)) = v0;
572 *(s16 *)(mp3data+((outPtr+4)^S16)) = v18;
573 outPtr+=4;
574 addptr -= 0x50;
575 }
576
577 int tmp = outPtr;
578 s32 hi0 = mult6;
579 s32 hi1 = mult4;
580 s32 v;
581
582 hi0 = (int)hi0 >> 0x10;
583 hi1 = (int)hi1 >> 0x10;
584 for (i = 0; i < 8; i++) {
585 // v0
586 v = (*(s16 *)(mp3data+((tmp-0x40)^S16)) * hi0);
587 if (v > 32767) v = 32767; else if (v < -32767) v = -32767;
588 *(s16 *)((u8 *)mp3data+((tmp-0x40)^S16)) = (s16)v;
589 // v17
590 v = (*(s16 *)(mp3data+((tmp-0x30)^S16)) * hi0);
591 if (v > 32767) v = 32767; else if (v < -32767) v = -32767;
592 *(s16 *)((u8 *)mp3data+((tmp-0x30)^S16)) = v;
593 // v2
594 v = (*(s16 *)(mp3data+((tmp-0x1E)^S16)) * hi1);
595 if (v > 32767) v = 32767; else if (v < -32767) v = -32767;
596 *(s16 *)((u8 *)mp3data+((tmp-0x1E)^S16)) = v;
597 // v4
598 v = (*(s16 *)(mp3data+((tmp-0xE)^S16)) * hi1);
599 if (v > 32767) v = 32767; else if (v < -32767) v = -32767;
600 *(s16 *)((u8 *)mp3data+((tmp-0xE)^S16)) = v;
601 tmp += 2;
602 }
603}
604