1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus-rsp-hle - ucode3mp3.h *
3 * Mupen64Plus homepage: http://code.google.com/p/mupen64plus/ *
4 * Copyright (C) 2009 Richard Goedeken *
5 * Copyright (C) 2002 Hacktarux *
7 * This program is free software; you can redistribute it and/or modify *
8 * it under the terms of the GNU General Public License as published by *
9 * the Free Software Foundation; either version 2 of the License, or *
10 * (at your option) any later version. *
12 * This program is distributed in the hope that it will be useful, *
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15 * GNU General Public License for more details. *
17 * You should have received a copy of the GNU General Public License *
18 * along with this program; if not, write to the *
19 * Free Software Foundation, Inc., *
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
21 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
28 #include "alist_internal.h"
31 static const u16 DeWindowLUT [0x420] = {
32 0x0000, 0xFFF3, 0x005D, 0xFF38, 0x037A, 0xF736, 0x0B37, 0xC00E,
33 0x7FFF, 0x3FF2, 0x0B37, 0x08CA, 0x037A, 0x00C8, 0x005D, 0x000D,
34 0x0000, 0xFFF3, 0x005D, 0xFF38, 0x037A, 0xF736, 0x0B37, 0xC00E,
35 0x7FFF, 0x3FF2, 0x0B37, 0x08CA, 0x037A, 0x00C8, 0x005D, 0x000D,
36 0x0000, 0xFFF2, 0x005F, 0xFF1D, 0x0369, 0xF697, 0x0A2A, 0xBCE7,
37 0x7FEB, 0x3CCB, 0x0C2B, 0x082B, 0x0385, 0x00AF, 0x005B, 0x000B,
38 0x0000, 0xFFF2, 0x005F, 0xFF1D, 0x0369, 0xF697, 0x0A2A, 0xBCE7,
39 0x7FEB, 0x3CCB, 0x0C2B, 0x082B, 0x0385, 0x00AF, 0x005B, 0x000B,
40 0x0000, 0xFFF1, 0x0061, 0xFF02, 0x0354, 0xF5F9, 0x0905, 0xB9C4,
41 0x7FB0, 0x39A4, 0x0D08, 0x078C, 0x038C, 0x0098, 0x0058, 0x000A,
42 0x0000, 0xFFF1, 0x0061, 0xFF02, 0x0354, 0xF5F9, 0x0905, 0xB9C4,
43 0x7FB0, 0x39A4, 0x0D08, 0x078C, 0x038C, 0x0098, 0x0058, 0x000A,
44 0x0000, 0xFFEF, 0x0062, 0xFEE6, 0x033B, 0xF55C, 0x07C8, 0xB6A4,
45 0x7F4D, 0x367E, 0x0DCE, 0x06EE, 0x038F, 0x0080, 0x0056, 0x0009,
46 0x0000, 0xFFEF, 0x0062, 0xFEE6, 0x033B, 0xF55C, 0x07C8, 0xB6A4,
47 0x7F4D, 0x367E, 0x0DCE, 0x06EE, 0x038F, 0x0080, 0x0056, 0x0009,
48 0x0000, 0xFFEE, 0x0063, 0xFECA, 0x031C, 0xF4C3, 0x0671, 0xB38C,
49 0x7EC2, 0x335D, 0x0E7C, 0x0652, 0x038E, 0x006B, 0x0053, 0x0008,
50 0x0000, 0xFFEE, 0x0063, 0xFECA, 0x031C, 0xF4C3, 0x0671, 0xB38C,
51 0x7EC2, 0x335D, 0x0E7C, 0x0652, 0x038E, 0x006B, 0x0053, 0x0008,
52 0x0000, 0xFFEC, 0x0064, 0xFEAC, 0x02F7, 0xF42C, 0x0502, 0xB07C,
53 0x7E12, 0x3041, 0x0F14, 0x05B7, 0x038A, 0x0056, 0x0050, 0x0007,
54 0x0000, 0xFFEC, 0x0064, 0xFEAC, 0x02F7, 0xF42C, 0x0502, 0xB07C,
55 0x7E12, 0x3041, 0x0F14, 0x05B7, 0x038A, 0x0056, 0x0050, 0x0007,
56 0x0000, 0xFFEB, 0x0064, 0xFE8E, 0x02CE, 0xF399, 0x037A, 0xAD75,
57 0x7D3A, 0x2D2C, 0x0F97, 0x0520, 0x0382, 0x0043, 0x004D, 0x0007,
58 0x0000, 0xFFEB, 0x0064, 0xFE8E, 0x02CE, 0xF399, 0x037A, 0xAD75,
59 0x7D3A, 0x2D2C, 0x0F97, 0x0520, 0x0382, 0x0043, 0x004D, 0x0007,
60 0xFFFF, 0xFFE9, 0x0063, 0xFE6F, 0x029E, 0xF30B, 0x01D8, 0xAA7B,
61 0x7C3D, 0x2A1F, 0x1004, 0x048B, 0x0377, 0x0030, 0x004A, 0x0006,
62 0xFFFF, 0xFFE9, 0x0063, 0xFE6F, 0x029E, 0xF30B, 0x01D8, 0xAA7B,
63 0x7C3D, 0x2A1F, 0x1004, 0x048B, 0x0377, 0x0030, 0x004A, 0x0006,
64 0xFFFF, 0xFFE7, 0x0062, 0xFE4F, 0x0269, 0xF282, 0x001F, 0xA78D,
65 0x7B1A, 0x271C, 0x105D, 0x03F9, 0x036A, 0x001F, 0x0046, 0x0006,
66 0xFFFF, 0xFFE7, 0x0062, 0xFE4F, 0x0269, 0xF282, 0x001F, 0xA78D,
67 0x7B1A, 0x271C, 0x105D, 0x03F9, 0x036A, 0x001F, 0x0046, 0x0006,
68 0xFFFF, 0xFFE4, 0x0061, 0xFE2F, 0x022F, 0xF1FF, 0xFE4C, 0xA4AF,
69 0x79D3, 0x2425, 0x10A2, 0x036C, 0x0359, 0x0010, 0x0043, 0x0005,
70 0xFFFF, 0xFFE4, 0x0061, 0xFE2F, 0x022F, 0xF1FF, 0xFE4C, 0xA4AF,
71 0x79D3, 0x2425, 0x10A2, 0x036C, 0x0359, 0x0010, 0x0043, 0x0005,
72 0xFFFF, 0xFFE2, 0x005E, 0xFE10, 0x01EE, 0xF184, 0xFC61, 0xA1E1,
73 0x7869, 0x2139, 0x10D3, 0x02E3, 0x0346, 0x0001, 0x0040, 0x0004,
74 0xFFFF, 0xFFE2, 0x005E, 0xFE10, 0x01EE, 0xF184, 0xFC61, 0xA1E1,
75 0x7869, 0x2139, 0x10D3, 0x02E3, 0x0346, 0x0001, 0x0040, 0x0004,
76 0xFFFF, 0xFFE0, 0x005B, 0xFDF0, 0x01A8, 0xF111, 0xFA5F, 0x9F27,
77 0x76DB, 0x1E5C, 0x10F2, 0x025E, 0x0331, 0xFFF3, 0x003D, 0x0004,
78 0xFFFF, 0xFFE0, 0x005B, 0xFDF0, 0x01A8, 0xF111, 0xFA5F, 0x9F27,
79 0x76DB, 0x1E5C, 0x10F2, 0x025E, 0x0331, 0xFFF3, 0x003D, 0x0004,
80 0xFFFF, 0xFFDE, 0x0057, 0xFDD0, 0x015B, 0xF0A7, 0xF845, 0x9C80,
81 0x752C, 0x1B8E, 0x1100, 0x01DE, 0x0319, 0xFFE7, 0x003A, 0x0003,
82 0xFFFF, 0xFFDE, 0x0057, 0xFDD0, 0x015B, 0xF0A7, 0xF845, 0x9C80,
83 0x752C, 0x1B8E, 0x1100, 0x01DE, 0x0319, 0xFFE7, 0x003A, 0x0003,
84 0xFFFE, 0xFFDB, 0x0053, 0xFDB0, 0x0108, 0xF046, 0xF613, 0x99EE,
85 0x735C, 0x18D1, 0x10FD, 0x0163, 0x0300, 0xFFDC, 0x0037, 0x0003,
86 0xFFFE, 0xFFDB, 0x0053, 0xFDB0, 0x0108, 0xF046, 0xF613, 0x99EE,
87 0x735C, 0x18D1, 0x10FD, 0x0163, 0x0300, 0xFFDC, 0x0037, 0x0003,
88 0xFFFE, 0xFFD8, 0x004D, 0xFD90, 0x00B0, 0xEFF0, 0xF3CC, 0x9775,
89 0x716C, 0x1624, 0x10EA, 0x00EE, 0x02E5, 0xFFD2, 0x0033, 0x0003,
90 0xFFFE, 0xFFD8, 0x004D, 0xFD90, 0x00B0, 0xEFF0, 0xF3CC, 0x9775,
91 0x716C, 0x1624, 0x10EA, 0x00EE, 0x02E5, 0xFFD2, 0x0033, 0x0003,
92 0xFFFE, 0xFFD6, 0x0047, 0xFD72, 0x0051, 0xEFA6, 0xF16F, 0x9514,
93 0x6F5E, 0x138A, 0x10C8, 0x007E, 0x02CA, 0xFFC9, 0x0030, 0x0003,
94 0xFFFE, 0xFFD6, 0x0047, 0xFD72, 0x0051, 0xEFA6, 0xF16F, 0x9514,
95 0x6F5E, 0x138A, 0x10C8, 0x007E, 0x02CA, 0xFFC9, 0x0030, 0x0003,
96 0xFFFE, 0xFFD3, 0x0040, 0xFD54, 0xFFEC, 0xEF68, 0xEEFC, 0x92CD,
97 0x6D33, 0x1104, 0x1098, 0x0014, 0x02AC, 0xFFC0, 0x002D, 0x0002,
98 0xFFFE, 0xFFD3, 0x0040, 0xFD54, 0xFFEC, 0xEF68, 0xEEFC, 0x92CD,
99 0x6D33, 0x1104, 0x1098, 0x0014, 0x02AC, 0xFFC0, 0x002D, 0x0002,
100 0x0030, 0xFFC9, 0x02CA, 0x007E, 0x10C8, 0x138A, 0x6F5E, 0x9514,
101 0xF16F, 0xEFA6, 0x0051, 0xFD72, 0x0047, 0xFFD6, 0xFFFE, 0x0003,
102 0x0030, 0xFFC9, 0x02CA, 0x007E, 0x10C8, 0x138A, 0x6F5E, 0x9514,
103 0xF16F, 0xEFA6, 0x0051, 0xFD72, 0x0047, 0xFFD6, 0xFFFE, 0x0003,
104 0x0033, 0xFFD2, 0x02E5, 0x00EE, 0x10EA, 0x1624, 0x716C, 0x9775,
105 0xF3CC, 0xEFF0, 0x00B0, 0xFD90, 0x004D, 0xFFD8, 0xFFFE, 0x0003,
106 0x0033, 0xFFD2, 0x02E5, 0x00EE, 0x10EA, 0x1624, 0x716C, 0x9775,
107 0xF3CC, 0xEFF0, 0x00B0, 0xFD90, 0x004D, 0xFFD8, 0xFFFE, 0x0003,
108 0x0037, 0xFFDC, 0x0300, 0x0163, 0x10FD, 0x18D1, 0x735C, 0x99EE,
109 0xF613, 0xF046, 0x0108, 0xFDB0, 0x0053, 0xFFDB, 0xFFFE, 0x0003,
110 0x0037, 0xFFDC, 0x0300, 0x0163, 0x10FD, 0x18D1, 0x735C, 0x99EE,
111 0xF613, 0xF046, 0x0108, 0xFDB0, 0x0053, 0xFFDB, 0xFFFE, 0x0003,
112 0x003A, 0xFFE7, 0x0319, 0x01DE, 0x1100, 0x1B8E, 0x752C, 0x9C80,
113 0xF845, 0xF0A7, 0x015B, 0xFDD0, 0x0057, 0xFFDE, 0xFFFF, 0x0003,
114 0x003A, 0xFFE7, 0x0319, 0x01DE, 0x1100, 0x1B8E, 0x752C, 0x9C80,
115 0xF845, 0xF0A7, 0x015B, 0xFDD0, 0x0057, 0xFFDE, 0xFFFF, 0x0004,
116 0x003D, 0xFFF3, 0x0331, 0x025E, 0x10F2, 0x1E5C, 0x76DB, 0x9F27,
117 0xFA5F, 0xF111, 0x01A8, 0xFDF0, 0x005B, 0xFFE0, 0xFFFF, 0x0004,
118 0x003D, 0xFFF3, 0x0331, 0x025E, 0x10F2, 0x1E5C, 0x76DB, 0x9F27,
119 0xFA5F, 0xF111, 0x01A8, 0xFDF0, 0x005B, 0xFFE0, 0xFFFF, 0x0004,
120 0x0040, 0x0001, 0x0346, 0x02E3, 0x10D3, 0x2139, 0x7869, 0xA1E1,
121 0xFC61, 0xF184, 0x01EE, 0xFE10, 0x005E, 0xFFE2, 0xFFFF, 0x0004,
122 0x0040, 0x0001, 0x0346, 0x02E3, 0x10D3, 0x2139, 0x7869, 0xA1E1,
123 0xFC61, 0xF184, 0x01EE, 0xFE10, 0x005E, 0xFFE2, 0xFFFF, 0x0005,
124 0x0043, 0x0010, 0x0359, 0x036C, 0x10A2, 0x2425, 0x79D3, 0xA4AF,
125 0xFE4C, 0xF1FF, 0x022F, 0xFE2F, 0x0061, 0xFFE4, 0xFFFF, 0x0005,
126 0x0043, 0x0010, 0x0359, 0x036C, 0x10A2, 0x2425, 0x79D3, 0xA4AF,
127 0xFE4C, 0xF1FF, 0x022F, 0xFE2F, 0x0061, 0xFFE4, 0xFFFF, 0x0006,
128 0x0046, 0x001F, 0x036A, 0x03F9, 0x105D, 0x271C, 0x7B1A, 0xA78D,
129 0x001F, 0xF282, 0x0269, 0xFE4F, 0x0062, 0xFFE7, 0xFFFF, 0x0006,
130 0x0046, 0x001F, 0x036A, 0x03F9, 0x105D, 0x271C, 0x7B1A, 0xA78D,
131 0x001F, 0xF282, 0x0269, 0xFE4F, 0x0062, 0xFFE7, 0xFFFF, 0x0006,
132 0x004A, 0x0030, 0x0377, 0x048B, 0x1004, 0x2A1F, 0x7C3D, 0xAA7B,
133 0x01D8, 0xF30B, 0x029E, 0xFE6F, 0x0063, 0xFFE9, 0xFFFF, 0x0006,
134 0x004A, 0x0030, 0x0377, 0x048B, 0x1004, 0x2A1F, 0x7C3D, 0xAA7B,
135 0x01D8, 0xF30B, 0x029E, 0xFE6F, 0x0063, 0xFFE9, 0xFFFF, 0x0007,
136 0x004D, 0x0043, 0x0382, 0x0520, 0x0F97, 0x2D2C, 0x7D3A, 0xAD75,
137 0x037A, 0xF399, 0x02CE, 0xFE8E, 0x0064, 0xFFEB, 0x0000, 0x0007,
138 0x004D, 0x0043, 0x0382, 0x0520, 0x0F97, 0x2D2C, 0x7D3A, 0xAD75,
139 0x037A, 0xF399, 0x02CE, 0xFE8E, 0x0064, 0xFFEB, 0x0000, 0x0007,
140 0x0050, 0x0056, 0x038A, 0x05B7, 0x0F14, 0x3041, 0x7E12, 0xB07C,
141 0x0502, 0xF42C, 0x02F7, 0xFEAC, 0x0064, 0xFFEC, 0x0000, 0x0007,
142 0x0050, 0x0056, 0x038A, 0x05B7, 0x0F14, 0x3041, 0x7E12, 0xB07C,
143 0x0502, 0xF42C, 0x02F7, 0xFEAC, 0x0064, 0xFFEC, 0x0000, 0x0008,
144 0x0053, 0x006B, 0x038E, 0x0652, 0x0E7C, 0x335D, 0x7EC2, 0xB38C,
145 0x0671, 0xF4C3, 0x031C, 0xFECA, 0x0063, 0xFFEE, 0x0000, 0x0008,
146 0x0053, 0x006B, 0x038E, 0x0652, 0x0E7C, 0x335D, 0x7EC2, 0xB38C,
147 0x0671, 0xF4C3, 0x031C, 0xFECA, 0x0063, 0xFFEE, 0x0000, 0x0009,
148 0x0056, 0x0080, 0x038F, 0x06EE, 0x0DCE, 0x367E, 0x7F4D, 0xB6A4,
149 0x07C8, 0xF55C, 0x033B, 0xFEE6, 0x0062, 0xFFEF, 0x0000, 0x0009,
150 0x0056, 0x0080, 0x038F, 0x06EE, 0x0DCE, 0x367E, 0x7F4D, 0xB6A4,
151 0x07C8, 0xF55C, 0x033B, 0xFEE6, 0x0062, 0xFFEF, 0x0000, 0x000A,
152 0x0058, 0x0098, 0x038C, 0x078C, 0x0D08, 0x39A4, 0x7FB0, 0xB9C4,
153 0x0905, 0xF5F9, 0x0354, 0xFF02, 0x0061, 0xFFF1, 0x0000, 0x000A,
154 0x0058, 0x0098, 0x038C, 0x078C, 0x0D08, 0x39A4, 0x7FB0, 0xB9C4,
155 0x0905, 0xF5F9, 0x0354, 0xFF02, 0x0061, 0xFFF1, 0x0000, 0x000B,
156 0x005B, 0x00AF, 0x0385, 0x082B, 0x0C2B, 0x3CCB, 0x7FEB, 0xBCE7,
157 0x0A2A, 0xF697, 0x0369, 0xFF1D, 0x005F, 0xFFF2, 0x0000, 0x000B,
158 0x005B, 0x00AF, 0x0385, 0x082B, 0x0C2B, 0x3CCB, 0x7FEB, 0xBCE7,
159 0x0A2A, 0xF697, 0x0369, 0xFF1D, 0x005F, 0xFFF2, 0x0000, 0x000D,
160 0x005D, 0x00C8, 0x037A, 0x08CA, 0x0B37, 0x3FF2, 0x7FFF, 0xC00E,
161 0x0B37, 0xF736, 0x037A, 0xFF38, 0x005D, 0xFFF3, 0x0000, 0x000D,
162 0x005D, 0x00C8, 0x037A, 0x08CA, 0x0B37, 0x3FF2, 0x7FFF, 0xC00E,
163 0x0B37, 0xF736, 0x037A, 0xFF38, 0x005D, 0xFFF3, 0x0000, 0x0000
166 //static u16 myVector[32][8];
168 static u8 mp3data[0x1000];
172 static void MP3AB0 () {
173 // Part 2 - 100% Accurate
174 const u16 LUT2[8] = { 0xFEC4, 0xF4FA, 0xC5E4, 0xE1C4,
175 0x1916, 0x4A50, 0xA268, 0x78AE };
176 const u16 LUT3[4] = { 0xFB14, 0xD4DC, 0x31F2, 0x8E3A };
179 for (i = 0; i < 8; i++) {
180 v[16+i] = v[0+i] + v[8+i];
181 v[24+i] = ((v[0+i] - v[8+i]) * LUT2[i]) >> 0x10;
184 // Part 3: 4-wide butterflies
186 for (i=0; i < 4; i++) {
187 v[0+i] = v[16+i] + v[20+i];
188 v[4+i] = ((v[16+i] - v[20+i]) * LUT3[i]) >> 0x10;
190 v[8+i] = v[24+i] + v[28+i];
191 v[12+i] = ((v[24+i] - v[28+i]) * LUT3[i]) >> 0x10;
194 // Part 4: 2-wide butterflies - 100% Accurate
196 for (i = 0; i < 16; i+=4) {
197 v[16+i] = v[0+i] + v[2+i];
198 v[18+i] = ((v[0+i] - v[2+i]) * 0xEC84) >> 0x10;
200 v[17+i] = v[1+i] + v[3+i];
201 v[19+i] = ((v[1+i] - v[3+i]) * 0x61F8) >> 0x10;
205 static void InnerLoop ();
207 static u32 inPtr, outPtr;
209 static u32 t6;// = 0x08A0; // I think these are temporary storage buffers
210 static u32 t5;// = 0x0AC0;
211 static u32 t4;// = (inst1 & 0x1E);
213 void MP3 (u32 inst1, u32 inst2) {
214 // Initialization Code
217 //u32 Count = 0x0480; // s4
221 t6 = 0x08A0; // I think these are temporary storage buffers
225 writePtr = inst2 & 0xFFFFFF;
227 memcpy (mp3data+0xCE8, rsp.RDRAM+readPtr, 8); // Just do that for efficiency... may remove and use directly later anyway
228 readPtr += 8; // This must be a header byte or whatnot
230 for (int cnt = 0; cnt < 0x480; cnt += 0x180) {
231 memcpy (mp3data+0xCF0, rsp.RDRAM+readPtr, 0x180); // DMA: 0xCF0 <- RDRAM[s5] : 0x180
233 outPtr = 0xE70; // s3
234 // --------------- Inner Loop Start --------------------
235 for (int cnt2 = 0; cnt2 < 0x180; cnt2 += 0x40) {
248 // --------------- Inner Loop End --------------------
249 memcpy (rsp.RDRAM+writePtr, mp3data+0xe70, 0x180);
257 static void InnerLoop () {
258 // Part 1: 100% Accurate
261 v[0] = *(s16 *)(mp3data+inPtr+(0x00^S16)); v[31] = *(s16 *)(mp3data+inPtr+(0x3E^S16)); v[0] += v[31];
262 v[1] = *(s16 *)(mp3data+inPtr+(0x02^S16)); v[30] = *(s16 *)(mp3data+inPtr+(0x3C^S16)); v[1] += v[30];
263 v[2] = *(s16 *)(mp3data+inPtr+(0x06^S16)); v[28] = *(s16 *)(mp3data+inPtr+(0x38^S16)); v[2] += v[28];
264 v[3] = *(s16 *)(mp3data+inPtr+(0x04^S16)); v[29] = *(s16 *)(mp3data+inPtr+(0x3A^S16)); v[3] += v[29];
266 v[4] = *(s16 *)(mp3data+inPtr+(0x0E^S16)); v[24] = *(s16 *)(mp3data+inPtr+(0x30^S16)); v[4] += v[24];
267 v[5] = *(s16 *)(mp3data+inPtr+(0x0C^S16)); v[25] = *(s16 *)(mp3data+inPtr+(0x32^S16)); v[5] += v[25];
268 v[6] = *(s16 *)(mp3data+inPtr+(0x08^S16)); v[27] = *(s16 *)(mp3data+inPtr+(0x36^S16)); v[6] += v[27];
269 v[7] = *(s16 *)(mp3data+inPtr+(0x0A^S16)); v[26] = *(s16 *)(mp3data+inPtr+(0x34^S16)); v[7] += v[26];
271 v[8] = *(s16 *)(mp3data+inPtr+(0x1E^S16)); v[16] = *(s16 *)(mp3data+inPtr+(0x20^S16)); v[8] += v[16];
272 v[9] = *(s16 *)(mp3data+inPtr+(0x1C^S16)); v[17] = *(s16 *)(mp3data+inPtr+(0x22^S16)); v[9] += v[17];
273 v[10]= *(s16 *)(mp3data+inPtr+(0x18^S16)); v[19] = *(s16 *)(mp3data+inPtr+(0x26^S16)); v[10]+= v[19];
274 v[11]= *(s16 *)(mp3data+inPtr+(0x1A^S16)); v[18] = *(s16 *)(mp3data+inPtr+(0x24^S16)); v[11]+= v[18];
276 v[12]= *(s16 *)(mp3data+inPtr+(0x10^S16)); v[23] = *(s16 *)(mp3data+inPtr+(0x2E^S16)); v[12]+= v[23];
277 v[13]= *(s16 *)(mp3data+inPtr+(0x12^S16)); v[22] = *(s16 *)(mp3data+inPtr+(0x2C^S16)); v[13]+= v[22];
278 v[14]= *(s16 *)(mp3data+inPtr+(0x16^S16)); v[20] = *(s16 *)(mp3data+inPtr+(0x28^S16)); v[14]+= v[20];
279 v[15]= *(s16 *)(mp3data+inPtr+(0x14^S16)); v[21] = *(s16 *)(mp3data+inPtr+(0x2A^S16)); v[15]+= v[21];
285 // Part 5 - 1-Wide Butterflies - 100% Accurate but need SSVs!!!
291 /*RSP_GPR[0x8].W = t0;
296 RSP_Vect[0].DW[1] = 0xB504A57E00016A09;
297 RSP_Vect[0].DW[0] = 0x0002D4130005A827;
302 v[11] = ((v[16] - v[17]) * 0xB504) >> 0x10;
304 v[16] = -v[16] -v[17];
305 v[2] = v[18] + v[19];
306 // ** Store v[11] -> (T6 + 0)**
307 *(s16 *)(mp3data+((t6+(short)0x0))) = (short)v[11];
311 // ** Store v[16] -> (T3 + 0)**
312 *(s16 *)(mp3data+((t3+(short)0x0))) = (short)v[16];
313 // ** Store v[11] -> (T5 + 0)**
314 *(s16 *)(mp3data+((t5+(short)0x0))) = (short)v[11];
315 // 0x13E8 - Verified....
317 // ** Store v[2] -> (T2 + 0)**
318 *(s16 *)(mp3data+((t2+(short)0x0))) = (short)v[2];
319 v[3] = (((v[18] - v[19]) * 0x16A09) >> 0x10) + v[2];
320 // ** Store v[3] -> (T0 + 0)**
321 *(s16 *)(mp3data+((t0+(short)0x0))) = (short)v[3];
323 v[4] = -v[20] -v[21];
324 v[6] = v[22] + v[23];
325 v[5] = ((v[20] - v[21]) * 0x16A09) >> 0x10;
326 // ** Store v[4] -> (T3 + 0xFF80)
327 *(s16 *)(mp3data+((t3+(short)0xFF80))) = (short)v[4];
328 v[7] = ((v[22] - v[23]) * 0x2D413) >> 0x10;
334 // *** Store v[7] -> (T1 + 0xFF80)
335 *(s16 *)(mp3data+((t1+(short)0xFF80))) = (short)v[7];
336 // *** Store v[4] -> (T2 + 0xFF80)
337 *(s16 *)(mp3data+((t2+(short)0xFF80))) = (short)v[4];
338 // *** Store v[5] -> (T0 + 0xFF80)
339 *(s16 *)(mp3data+((t0+(short)0xFF80))) = (short)v[5];
340 v[8] = v[24] + v[25];
343 v[9] = ((v[24] - v[25]) * 0x16A09) >> 0x10;
345 v[11] = ((v[26] - v[27]) * 0x2D413) >> 0x10;
346 v[13] = ((v[28] - v[29]) * 0x2D413) >> 0x10;
348 v[10] = v[26] + v[27]; v[10] = v[10] + v[10];
349 v[12] = v[28] + v[29]; v[12] = v[12] + v[12];
350 v[14] = v[30] + v[31];
352 v[14] = v[14] + v[14];
353 v[13] = (v[13] - v[2]) + v[12];
354 v[15] = (((v[30] - v[31]) * 0x5A827) >> 0x10) - (v[11] + v[2]);
355 v[14] = -(v[14] + v[14]) + v[3];
356 v[17] = v[13] - v[10];
358 // ** Store v[9] -> (T6 + 0x40)
359 *(s16 *)(mp3data+((t6+(short)0x40))) = (short)v[9];
360 v[11] = v[11] - v[13];
361 // ** Store v[17] -> (T0 + 0xFFC0)
362 *(s16 *)(mp3data+((t0+(short)0xFFC0))) = (short)v[17];
363 v[12] = v[8] - v[12];
364 // ** Store v[11] -> (T0 + 0x40)
365 *(s16 *)(mp3data+((t0+(short)0x40))) = (short)v[11];
367 // ** Store v[15] -> (T1 + 0xFFC0)
368 *(s16 *)(mp3data+((t1+(short)0xFFC0))) = (short)v[15];
369 v[10] = -v[10] -v[12];
370 // ** Store v[12] -> (T2 + 0x40)
371 *(s16 *)(mp3data+((t2+(short)0x40))) = (short)v[12];
372 // ** Store v[8] -> (T3 + 0xFFC0)
373 *(s16 *)(mp3data+((t3+(short)0xFFC0))) = (short)v[8];
374 // ** Store v[14] -> (T5 + 0x40)
375 *(s16 *)(mp3data+((t5+(short)0x40))) = (short)v[14];
376 // ** Store v[10] -> (T2 + 0xFFC0)
377 *(s16 *)(mp3data+((t2+(short)0xFFC0))) = (short)v[10];
378 // 0x14FC - Verified...
380 // Part 6 - 100% Accurate
382 v[0] = *(s16 *)(mp3data+inPtr+(0x00^S16)); v[31] = *(s16 *)(mp3data+inPtr+(0x3E^S16)); v[0] -= v[31];
383 v[1] = *(s16 *)(mp3data+inPtr+(0x02^S16)); v[30] = *(s16 *)(mp3data+inPtr+(0x3C^S16)); v[1] -= v[30];
384 v[2] = *(s16 *)(mp3data+inPtr+(0x06^S16)); v[28] = *(s16 *)(mp3data+inPtr+(0x38^S16)); v[2] -= v[28];
385 v[3] = *(s16 *)(mp3data+inPtr+(0x04^S16)); v[29] = *(s16 *)(mp3data+inPtr+(0x3A^S16)); v[3] -= v[29];
387 v[4] = *(s16 *)(mp3data+inPtr+(0x0E^S16)); v[24] = *(s16 *)(mp3data+inPtr+(0x30^S16)); v[4] -= v[24];
388 v[5] = *(s16 *)(mp3data+inPtr+(0x0C^S16)); v[25] = *(s16 *)(mp3data+inPtr+(0x32^S16)); v[5] -= v[25];
389 v[6] = *(s16 *)(mp3data+inPtr+(0x08^S16)); v[27] = *(s16 *)(mp3data+inPtr+(0x36^S16)); v[6] -= v[27];
390 v[7] = *(s16 *)(mp3data+inPtr+(0x0A^S16)); v[26] = *(s16 *)(mp3data+inPtr+(0x34^S16)); v[7] -= v[26];
392 v[8] = *(s16 *)(mp3data+inPtr+(0x1E^S16)); v[16] = *(s16 *)(mp3data+inPtr+(0x20^S16)); v[8] -= v[16];
393 v[9] = *(s16 *)(mp3data+inPtr+(0x1C^S16)); v[17] = *(s16 *)(mp3data+inPtr+(0x22^S16)); v[9] -= v[17];
394 v[10]= *(s16 *)(mp3data+inPtr+(0x18^S16)); v[19] = *(s16 *)(mp3data+inPtr+(0x26^S16)); v[10]-= v[19];
395 v[11]= *(s16 *)(mp3data+inPtr+(0x1A^S16)); v[18] = *(s16 *)(mp3data+inPtr+(0x24^S16)); v[11]-= v[18];
397 v[12]= *(s16 *)(mp3data+inPtr+(0x10^S16)); v[23] = *(s16 *)(mp3data+inPtr+(0x2E^S16)); v[12]-= v[23];
398 v[13]= *(s16 *)(mp3data+inPtr+(0x12^S16)); v[22] = *(s16 *)(mp3data+inPtr+(0x2C^S16)); v[13]-= v[22];
399 v[14]= *(s16 *)(mp3data+inPtr+(0x16^S16)); v[20] = *(s16 *)(mp3data+inPtr+(0x28^S16)); v[14]-= v[20];
400 v[15]= *(s16 *)(mp3data+inPtr+(0x14^S16)); v[21] = *(s16 *)(mp3data+inPtr+(0x2A^S16)); v[15]-= v[21];
402 //0, 1, 3, 2, 7, 6, 4, 5, 7, 6, 4, 5, 0, 1, 3, 2
403 const u16 LUT6[16] = { 0xFFB2, 0xFD3A, 0xF10A, 0xF854,
404 0xBDAE, 0xCDA0, 0xE76C, 0xDB94,
405 0x1920, 0x4B20, 0xAC7C, 0x7C68,
406 0xABEC, 0x9880, 0xDAE8, 0x839C };
407 for (i = 0; i < 16; i++) {
408 v[0+i] = (v[0+i] * LUT6[i]) >> 0x10;
410 v[0] = v[0] + v[0]; v[1] = v[1] + v[1];
411 v[2] = v[2] + v[2]; v[3] = v[3] + v[3]; v[4] = v[4] + v[4];
412 v[5] = v[5] + v[5]; v[6] = v[6] + v[6]; v[7] = v[7] + v[7];
413 v[12] = v[12] + v[12]; v[13] = v[13] + v[13]; v[15] = v[15] + v[15];
417 // Part 7: - 100% Accurate + SSV - Unoptimized
419 v[0] = ( v[17] + v[16] ) >> 1;
420 v[1] = ((v[17] * (int)((short)0xA57E * 2)) + (v[16] * 0xB504)) >> 0x10;
421 v[2] = -v[18] -v[19];
422 v[3] = ((v[18] - v[19]) * 0x16A09) >> 0x10;
423 v[4] = v[20] + v[21] + v[0];
424 v[5] = (((v[20] - v[21]) * 0x16A09) >> 0x10) + v[1];
425 v[6] = (((v[22] + v[23]) << 1) + v[0]) - v[2];
426 v[7] = (((v[22] - v[23]) * 0x2D413) >> 0x10) + v[0] + v[1] + v[3];
428 // Save v[0] -> (T3 + 0xFFE0)
429 *(s16 *)(mp3data+((t3+(short)0xFFE0))) = (short)-v[0];
430 v[8] = v[24] + v[25];
431 v[9] = ((v[24] - v[25]) * 0x16A09) >> 0x10;
432 v[10] = ((v[26] + v[27]) << 1) + v[8];
433 v[11] = (((v[26] - v[27]) * 0x2D413) >> 0x10) + v[8] + v[9];
434 v[12] = v[4] - ((v[28] + v[29]) << 1);
435 // ** Store v12 -> (T2 + 0x20)
436 *(s16 *)(mp3data+((t2+(short)0x20))) = (short)v[12];
437 v[13] = (((v[28] - v[29]) * 0x2D413) >> 0x10) - v[12] - v[5];
438 v[14] = v[30] + v[31];
439 v[14] = v[14] + v[14];
440 v[14] = v[14] + v[14];
441 v[14] = v[6] - v[14];
442 v[15] = (((v[30] - v[31]) * 0x5A827) >> 0x10) - v[7];
443 // Store v14 -> (T5 + 0x20)
444 *(s16 *)(mp3data+((t5+(short)0x20))) = (short)v[14];
445 v[14] = v[14] + v[1];
446 // Store v[14] -> (T6 + 0x20)
447 *(s16 *)(mp3data+((t6+(short)0x20))) = (short)v[14];
448 // Store v[15] -> (T1 + 0xFFE0)
449 *(s16 *)(mp3data+((t1+(short)0xFFE0))) = (short)v[15];
454 // Store v[6] -> (T5 + 0x60)
455 *(s16 *)(mp3data+((t5+(short)0x60))) = (short)v[6];
456 v[10] = v[10] + v[2];
457 v[10] = v[4] - v[10];
458 // Store v[10] -> (T2 + 0xFFA0)
459 *(s16 *)(mp3data+((t2+(short)0xFFA0))) = (short)v[10];
460 v[12] = v[2] - v[12];
461 // Store v[12] -> (T2 + 0xFFE0)
462 *(s16 *)(mp3data+((t2+(short)0xFFE0))) = (short)v[12];
465 // Store v[4] -> (T2 + 0x60)
466 *(s16 *)(mp3data+((t2+(short)0x60))) = (short)v[4];
468 // Store v[0] -> (T3 + 0xFFA0)
469 *(s16 *)(mp3data+((t3+(short)0xFFA0))) = (short)v[0];
471 // Store v[7] -> (T1 + 0xFFA0)
472 *(s16 *)(mp3data+((t1+(short)0xFFA0))) = (short)v[7];
473 v[11] = v[11] - v[3];
474 // Store v[1] -> (T6 + 0x60)
475 *(s16 *)(mp3data+((t6+(short)0x60))) = (short)v[1];
476 v[11] = v[11] - v[5];
477 // Store v[11] -> (T0 + 0x60)
478 *(s16 *)(mp3data+((t0+(short)0x60))) = (short)v[11];
480 // Store v[3] -> (T0 + 0x20)
481 *(s16 *)(mp3data+((t0+(short)0x20))) = (short)v[3];
482 v[13] = v[13] + v[2];
483 // Store v[13] -> (T0 + 0xFFE0)
484 *(s16 *)(mp3data+((t0+(short)0xFFE0))) = (short)v[13];
486 v[2] = (v[5] - v[2]) - v[9];
487 // Store v[2] -> (T0 + 0xFFA0)
488 *(s16 *)(mp3data+((t0+(short)0xFFA0))) = (short)v[2];
489 // 0x7A8 - Verified...
491 // Step 8 - Dewindowing
493 //u64 *DW = (u64 *)&DeWindowLUT[0x10-(t4>>1)];
494 u32 offset = 0x10-(t4>>1);
496 u32 addptr = t6 & 0xFFE0;
497 offset = 0x10-(t4>>1);
499 s32 v2=0, v4=0, v6=0, v8=0;
500 //s32 z2=0, z4=0, z6=0, z8=0;
502 offset = 0x10-(t4>>1);// + x*0x40;
504 for (x = 0; x < 8; x++) {
505 v2 = v4 = v6 = v8 = 0;
509 for (i = 7; i >= 0; i--) {
510 v2 += ((int)*(s16 *)(mp3data+(addptr)+0x00) * (short)DeWindowLUT[offset+0x00] + 0x4000) >> 0xF;
511 v4 += ((int)*(s16 *)(mp3data+(addptr)+0x10) * (short)DeWindowLUT[offset+0x08] + 0x4000) >> 0xF;
512 v6 += ((int)*(s16 *)(mp3data+(addptr)+0x20) * (short)DeWindowLUT[offset+0x20] + 0x4000) >> 0xF;
513 v8 += ((int)*(s16 *)(mp3data+(addptr)+0x30) * (short)DeWindowLUT[offset+0x28] + 0x4000) >> 0xF;
521 *(s16 *)(mp3data+(outPtr^S16)) = v0;
522 *(s16 *)(mp3data+((outPtr+2)^S16)) = v18;
528 offset = 0x10-(t4>>1) + 8*0x40;
530 for (i = 0; i < 4; i++) {
531 v2 += ((int)*(s16 *)(mp3data+(addptr)+0x00) * (short)DeWindowLUT[offset+0x00] + 0x4000) >> 0xF;
532 v2 += ((int)*(s16 *)(mp3data+(addptr)+0x10) * (short)DeWindowLUT[offset+0x08] + 0x4000) >> 0xF;
534 v4 += ((int)*(s16 *)(mp3data+(addptr)+0x00) * (short)DeWindowLUT[offset+0x00] + 0x4000) >> 0xF;
535 v4 += ((int)*(s16 *)(mp3data+(addptr)+0x10) * (short)DeWindowLUT[offset+0x08] + 0x4000) >> 0xF;
538 s32 mult6 = *(s32 *)(mp3data+0xCE8);
539 s32 mult4 = *(s32 *)(mp3data+0xCEC);
541 v2 = (v2 * *(u32 *)(mp3data+0xCE8)) >> 0x10;
542 *(s16 *)(mp3data+(outPtr^S16)) = v2;
544 v4 = (v4 * *(u32 *)(mp3data+0xCE8)) >> 0x10;
545 *(s16 *)(mp3data+(outPtr^S16)) = v4;
546 mult4 = *(u32 *)(mp3data+0xCE8);
550 for (x = 0; x < 8; x++) {
551 v2 = v4 = v6 = v8 = 0;
553 offset = (0x22F-(t4>>1) + x*0x40);
555 for (i = 0; i < 4; i++) {
556 v2 += ((int)*(s16 *)(mp3data+(addptr )+0x20) * (short)DeWindowLUT[offset+0x00] + 0x4000) >> 0xF;
557 v2 -= ((int)*(s16 *)(mp3data+((addptr+2))+0x20) * (short)DeWindowLUT[offset+0x01] + 0x4000) >> 0xF;
558 v4 += ((int)*(s16 *)(mp3data+(addptr )+0x30) * (short)DeWindowLUT[offset+0x08] + 0x4000) >> 0xF;
559 v4 -= ((int)*(s16 *)(mp3data+((addptr+2))+0x30) * (short)DeWindowLUT[offset+0x09] + 0x4000) >> 0xF;
560 v6 += ((int)*(s16 *)(mp3data+(addptr )+0x00) * (short)DeWindowLUT[offset+0x20] + 0x4000) >> 0xF;
561 v6 -= ((int)*(s16 *)(mp3data+((addptr+2))+0x00) * (short)DeWindowLUT[offset+0x21] + 0x4000) >> 0xF;
562 v8 += ((int)*(s16 *)(mp3data+(addptr )+0x10) * (short)DeWindowLUT[offset+0x28] + 0x4000) >> 0xF;
563 v8 -= ((int)*(s16 *)(mp3data+((addptr+2))+0x10) * (short)DeWindowLUT[offset+0x29] + 0x4000) >> 0xF;
564 addptr+=4; offset+=2;
571 *(s16 *)(mp3data+((outPtr+2)^S16)) = v0;
572 *(s16 *)(mp3data+((outPtr+4)^S16)) = v18;
582 hi0 = (int)hi0 >> 0x10;
583 hi1 = (int)hi1 >> 0x10;
584 for (i = 0; i < 8; i++) {
586 v = (*(s16 *)(mp3data+((tmp-0x40)^S16)) * hi0);
587 if (v > 32767) v = 32767; else if (v < -32767) v = -32767;
588 *(s16 *)((u8 *)mp3data+((tmp-0x40)^S16)) = (s16)v;
590 v = (*(s16 *)(mp3data+((tmp-0x30)^S16)) * hi0);
591 if (v > 32767) v = 32767; else if (v < -32767) v = -32767;
592 *(s16 *)((u8 *)mp3data+((tmp-0x30)^S16)) = v;
594 v = (*(s16 *)(mp3data+((tmp-0x1E)^S16)) * hi1);
595 if (v > 32767) v = 32767; else if (v < -32767) v = -32767;
596 *(s16 *)((u8 *)mp3data+((tmp-0x1E)^S16)) = v;
598 v = (*(s16 *)(mp3data+((tmp-0xE)^S16)) * hi1);
599 if (v > 32767) v = 32767; else if (v < -32767) v = -32767;
600 *(s16 *)((u8 *)mp3data+((tmp-0xE)^S16)) = v;