df00ea13 |
1 | /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * |
2 | * Mupen64plus-rsp-hle - ucode2.c * |
3 | * Mupen64Plus homepage: http://code.google.com/p/mupen64plus/ * |
4 | * Copyright (C) 2009 Richard Goedeken * |
5 | * Copyright (C) 2002 Hacktarux * |
6 | * * |
7 | * This program is free software; you can redistribute it and/or modify * |
8 | * it under the terms of the GNU General Public License as published by * |
9 | * the Free Software Foundation; either version 2 of the License, or * |
10 | * (at your option) any later version. * |
11 | * * |
12 | * This program is distributed in the hope that it will be useful, * |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of * |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * |
15 | * GNU General Public License for more details. * |
16 | * * |
17 | * You should have received a copy of the GNU General Public License * |
18 | * along with this program; if not, write to the * |
19 | * Free Software Foundation, Inc., * |
20 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * |
21 | * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ |
22 | |
23 | #include <string.h> |
24 | #include <stdbool.h> |
25 | #include <stdint.h> |
26 | |
27 | #include "m64p_plugin.h" |
28 | #include "m64p_types.h" |
29 | #include "hle.h" |
30 | #include "alist_internal.h" |
31 | #include "alist.h" |
32 | |
33 | static void SPNOOP(uint32_t inst1, uint32_t inst2) |
34 | { |
35 | DebugMessage(M64MSG_ERROR, "Unknown/Unimplemented Audio Command %i in ABI 2", (int)(inst1 >> 24)); |
36 | } |
37 | |
38 | |
39 | static bool isMKABI = false; |
40 | static bool isZeldaABI = false; |
41 | |
42 | void init_ucode2(void) |
43 | { |
44 | isMKABI = isZeldaABI = false; |
45 | } |
46 | |
47 | /* Loads an ADPCM table |
48 | * NOTE Works 100% Now 03-13-01 |
49 | */ |
50 | static void LOADADPCM2(uint32_t inst1, uint32_t inst2) |
51 | { |
52 | uint32_t v0 = (inst2 & 0xffffff); |
53 | uint32_t x; |
54 | /* Zelda2 Specific... */ |
55 | uint16_t *table = (uint16_t *)(rsp.RDRAM + v0); |
56 | |
57 | for (x = 0; x < ((inst1 & 0xffff) >> 0x4); x++) { |
58 | adpcmtable[(0x0 + (x << 3))^S] = table[0]; |
59 | adpcmtable[(0x1 + (x << 3))^S] = table[1]; |
60 | |
61 | adpcmtable[(0x2 + (x << 3))^S] = table[2]; |
62 | adpcmtable[(0x3 + (x << 3))^S] = table[3]; |
63 | |
64 | adpcmtable[(0x4 + (x << 3))^S] = table[4]; |
65 | adpcmtable[(0x5 + (x << 3))^S] = table[5]; |
66 | |
67 | adpcmtable[(0x6 + (x << 3))^S] = table[6]; |
68 | adpcmtable[(0x7 + (x << 3))^S] = table[7]; |
69 | table += 8; |
70 | } |
71 | } |
72 | |
73 | static void SETLOOP2(uint32_t inst1, uint32_t inst2) |
74 | { |
75 | loopval = inst2 & 0xffffff; /* No segment? */ |
76 | } |
77 | |
78 | static void SETBUFF2(uint32_t inst1, uint32_t inst2) |
79 | { |
80 | AudioInBuffer = (uint16_t)(inst1); /* 0x00 */ |
81 | AudioOutBuffer = (uint16_t)((inst2 >> 0x10)); /* 0x02 */ |
82 | AudioCount = (uint16_t)(inst2); /* 0x04 */ |
83 | } |
84 | |
85 | /* NOTE Verified to be 100% Accurate... */ |
86 | static void ADPCM2(uint32_t inst1, uint32_t inst2) |
87 | { |
88 | unsigned char Flags = (uint8_t)(inst1 >> 16) & 0xff; |
89 | unsigned int Address = (inst2 & 0xffffff); |
90 | unsigned short inPtr = 0; |
91 | short *out = (short *)(BufferSpace + AudioOutBuffer); |
92 | short count = (short)AudioCount; |
93 | unsigned char icode; |
94 | unsigned char code; |
95 | int vscale; |
96 | unsigned short index; |
97 | unsigned short j; |
98 | int a[8]; |
99 | short *book1, *book2; |
100 | |
101 | uint8_t srange; |
102 | uint8_t mask1; |
103 | uint8_t mask2; |
104 | uint8_t shifter; |
105 | |
106 | int l1; |
107 | int l2; |
108 | int inp1[8]; |
109 | int inp2[8]; |
110 | |
111 | memset(out, 0, 32); |
112 | |
113 | /* Tricky lil Zelda MM and ABI2!!! hahaha I know your secrets! :DDD */ |
114 | if (Flags & 0x4) { |
115 | srange = 0xE; |
116 | mask1 = 0xC0; |
117 | mask2 = 0x30; |
118 | shifter = 10; |
119 | } else { |
120 | srange = 0xC; |
121 | mask1 = 0xf0; |
122 | mask2 = 0x0f; |
123 | shifter = 12; |
124 | } |
125 | |
126 | if (!(Flags & 0x1)) { |
127 | if (Flags & 0x2) |
128 | memcpy(out, &rsp.RDRAM[loopval], 32); |
129 | else |
130 | memcpy(out, &rsp.RDRAM[Address], 32); |
131 | } |
132 | |
133 | l1 = out[14 ^ S]; |
134 | l2 = out[15 ^ S]; |
135 | out += 16; |
136 | while (count > 0) { |
137 | code = BufferSpace[(AudioInBuffer + inPtr)^S8]; |
138 | index = code & 0xf; |
139 | index <<= 4; |
140 | book1 = (short *)&adpcmtable[index]; |
141 | book2 = book1 + 8; |
142 | code >>= 4; |
143 | vscale = (0x8000 >> ((srange - code) - 1)); |
144 | |
145 | inPtr++; |
146 | j = 0; |
147 | |
148 | while (j < 8) { |
149 | icode = BufferSpace[(AudioInBuffer + inPtr)^S8]; |
150 | inPtr++; |
151 | |
152 | /* this will in effect be signed */ |
153 | inp1[j] = (int16_t)((icode & mask1) << 8); |
154 | if (code < srange) |
155 | inp1[j] = ((int)((int)inp1[j] * (int)vscale) >> 16); |
156 | j++; |
157 | |
158 | inp1[j] = (int16_t)((icode & mask2) << shifter); |
159 | if (code < srange) |
160 | inp1[j] = ((int)((int)inp1[j] * (int)vscale) >> 16); |
161 | j++; |
162 | |
163 | if (Flags & 4) { |
164 | /* this will in effect be signed */ |
165 | inp1[j] = (int16_t)((icode & 0xC) << 12); |
166 | if (code < 0xE) |
167 | inp1[j] = ((int)((int)inp1[j] * (int)vscale) >> 16); |
168 | j++; |
169 | |
170 | inp1[j] = (int16_t)((icode & 0x3) << 14); |
171 | if (code < 0xE) |
172 | inp1[j] = ((int)((int)inp1[j] * (int)vscale) >> 16); |
173 | j++; |
174 | } |
175 | } |
176 | |
177 | |
178 | |
179 | j = 0; |
180 | while (j < 8) { |
181 | icode = BufferSpace[(AudioInBuffer + inPtr)^S8]; |
182 | inPtr++; |
183 | |
184 | inp2[j] = (int16_t)((icode & mask1) << 8); |
185 | if (code < srange) |
186 | inp2[j] = ((int)((int)inp2[j] * (int)vscale) >> 16); |
187 | j++; |
188 | |
189 | inp2[j] = (int16_t)((icode & mask2) << shifter); |
190 | if (code < srange) |
191 | inp2[j] = ((int)((int)inp2[j] * (int)vscale) >> 16); |
192 | j++; |
193 | |
194 | if (Flags & 4) { |
195 | inp2[j] = (int16_t)((icode & 0xC) << 12); |
196 | if (code < 0xE) |
197 | inp2[j] = ((int)((int)inp2[j] * (int)vscale) >> 16); |
198 | j++; |
199 | |
200 | inp2[j] = (int16_t)((icode & 0x3) << 14); |
201 | if (code < 0xE) |
202 | inp2[j] = ((int)((int)inp2[j] * (int)vscale) >> 16); |
203 | j++; |
204 | } |
205 | } |
206 | |
207 | a[0] = (int)book1[0] * (int)l1; |
208 | a[0] += (int)book2[0] * (int)l2; |
209 | a[0] += (int)inp1[0] * (int)2048; |
210 | |
211 | a[1] = (int)book1[1] * (int)l1; |
212 | a[1] += (int)book2[1] * (int)l2; |
213 | a[1] += (int)book2[0] * inp1[0]; |
214 | a[1] += (int)inp1[1] * (int)2048; |
215 | |
216 | a[2] = (int)book1[2] * (int)l1; |
217 | a[2] += (int)book2[2] * (int)l2; |
218 | a[2] += (int)book2[1] * inp1[0]; |
219 | a[2] += (int)book2[0] * inp1[1]; |
220 | a[2] += (int)inp1[2] * (int)2048; |
221 | |
222 | a[3] = (int)book1[3] * (int)l1; |
223 | a[3] += (int)book2[3] * (int)l2; |
224 | a[3] += (int)book2[2] * inp1[0]; |
225 | a[3] += (int)book2[1] * inp1[1]; |
226 | a[3] += (int)book2[0] * inp1[2]; |
227 | a[3] += (int)inp1[3] * (int)2048; |
228 | |
229 | a[4] = (int)book1[4] * (int)l1; |
230 | a[4] += (int)book2[4] * (int)l2; |
231 | a[4] += (int)book2[3] * inp1[0]; |
232 | a[4] += (int)book2[2] * inp1[1]; |
233 | a[4] += (int)book2[1] * inp1[2]; |
234 | a[4] += (int)book2[0] * inp1[3]; |
235 | a[4] += (int)inp1[4] * (int)2048; |
236 | |
237 | a[5] = (int)book1[5] * (int)l1; |
238 | a[5] += (int)book2[5] * (int)l2; |
239 | a[5] += (int)book2[4] * inp1[0]; |
240 | a[5] += (int)book2[3] * inp1[1]; |
241 | a[5] += (int)book2[2] * inp1[2]; |
242 | a[5] += (int)book2[1] * inp1[3]; |
243 | a[5] += (int)book2[0] * inp1[4]; |
244 | a[5] += (int)inp1[5] * (int)2048; |
245 | |
246 | a[6] = (int)book1[6] * (int)l1; |
247 | a[6] += (int)book2[6] * (int)l2; |
248 | a[6] += (int)book2[5] * inp1[0]; |
249 | a[6] += (int)book2[4] * inp1[1]; |
250 | a[6] += (int)book2[3] * inp1[2]; |
251 | a[6] += (int)book2[2] * inp1[3]; |
252 | a[6] += (int)book2[1] * inp1[4]; |
253 | a[6] += (int)book2[0] * inp1[5]; |
254 | a[6] += (int)inp1[6] * (int)2048; |
255 | |
256 | a[7] = (int)book1[7] * (int)l1; |
257 | a[7] += (int)book2[7] * (int)l2; |
258 | a[7] += (int)book2[6] * inp1[0]; |
259 | a[7] += (int)book2[5] * inp1[1]; |
260 | a[7] += (int)book2[4] * inp1[2]; |
261 | a[7] += (int)book2[3] * inp1[3]; |
262 | a[7] += (int)book2[2] * inp1[4]; |
263 | a[7] += (int)book2[1] * inp1[5]; |
264 | a[7] += (int)book2[0] * inp1[6]; |
265 | a[7] += (int)inp1[7] * (int)2048; |
266 | |
267 | for (j = 0; j < 8; j++) { |
268 | a[j ^ S] >>= 11; |
269 | a[j ^ S] = clamp_s16(a[j ^ S]); |
270 | *(out++) = a[j ^ S]; |
271 | } |
272 | l1 = a[6]; |
273 | l2 = a[7]; |
274 | |
275 | a[0] = (int)book1[0] * (int)l1; |
276 | a[0] += (int)book2[0] * (int)l2; |
277 | a[0] += (int)inp2[0] * (int)2048; |
278 | |
279 | a[1] = (int)book1[1] * (int)l1; |
280 | a[1] += (int)book2[1] * (int)l2; |
281 | a[1] += (int)book2[0] * inp2[0]; |
282 | a[1] += (int)inp2[1] * (int)2048; |
283 | |
284 | a[2] = (int)book1[2] * (int)l1; |
285 | a[2] += (int)book2[2] * (int)l2; |
286 | a[2] += (int)book2[1] * inp2[0]; |
287 | a[2] += (int)book2[0] * inp2[1]; |
288 | a[2] += (int)inp2[2] * (int)2048; |
289 | |
290 | a[3] = (int)book1[3] * (int)l1; |
291 | a[3] += (int)book2[3] * (int)l2; |
292 | a[3] += (int)book2[2] * inp2[0]; |
293 | a[3] += (int)book2[1] * inp2[1]; |
294 | a[3] += (int)book2[0] * inp2[2]; |
295 | a[3] += (int)inp2[3] * (int)2048; |
296 | |
297 | a[4] = (int)book1[4] * (int)l1; |
298 | a[4] += (int)book2[4] * (int)l2; |
299 | a[4] += (int)book2[3] * inp2[0]; |
300 | a[4] += (int)book2[2] * inp2[1]; |
301 | a[4] += (int)book2[1] * inp2[2]; |
302 | a[4] += (int)book2[0] * inp2[3]; |
303 | a[4] += (int)inp2[4] * (int)2048; |
304 | |
305 | a[5] = (int)book1[5] * (int)l1; |
306 | a[5] += (int)book2[5] * (int)l2; |
307 | a[5] += (int)book2[4] * inp2[0]; |
308 | a[5] += (int)book2[3] * inp2[1]; |
309 | a[5] += (int)book2[2] * inp2[2]; |
310 | a[5] += (int)book2[1] * inp2[3]; |
311 | a[5] += (int)book2[0] * inp2[4]; |
312 | a[5] += (int)inp2[5] * (int)2048; |
313 | |
314 | a[6] = (int)book1[6] * (int)l1; |
315 | a[6] += (int)book2[6] * (int)l2; |
316 | a[6] += (int)book2[5] * inp2[0]; |
317 | a[6] += (int)book2[4] * inp2[1]; |
318 | a[6] += (int)book2[3] * inp2[2]; |
319 | a[6] += (int)book2[2] * inp2[3]; |
320 | a[6] += (int)book2[1] * inp2[4]; |
321 | a[6] += (int)book2[0] * inp2[5]; |
322 | a[6] += (int)inp2[6] * (int)2048; |
323 | |
324 | a[7] = (int)book1[7] * (int)l1; |
325 | a[7] += (int)book2[7] * (int)l2; |
326 | a[7] += (int)book2[6] * inp2[0]; |
327 | a[7] += (int)book2[5] * inp2[1]; |
328 | a[7] += (int)book2[4] * inp2[2]; |
329 | a[7] += (int)book2[3] * inp2[3]; |
330 | a[7] += (int)book2[2] * inp2[4]; |
331 | a[7] += (int)book2[1] * inp2[5]; |
332 | a[7] += (int)book2[0] * inp2[6]; |
333 | a[7] += (int)inp2[7] * (int)2048; |
334 | |
335 | for (j = 0; j < 8; j++) { |
336 | a[j ^ S] >>= 11; |
337 | a[j ^ S] = clamp_s16(a[j ^ S]); |
338 | *(out++) = a[j ^ S]; |
339 | } |
340 | l1 = a[6]; |
341 | l2 = a[7]; |
342 | |
343 | count -= 32; |
344 | } |
345 | out -= 16; |
346 | memcpy(&rsp.RDRAM[Address], out, 32); |
347 | } |
348 | |
349 | static void CLEARBUFF2(uint32_t inst1, uint32_t inst2) |
350 | { |
351 | uint16_t addr = (uint16_t)(inst1 & 0xffff); |
352 | uint16_t count = (uint16_t)(inst2 & 0xffff); |
353 | if (count > 0) |
354 | memset(BufferSpace + addr, 0, count); |
355 | } |
356 | |
357 | /* TODO Needs accuracy verification... */ |
358 | static void LOADBUFF2(uint32_t inst1, uint32_t inst2) |
359 | { |
360 | uint32_t v0; |
361 | uint32_t cnt = (((inst1 >> 0xC) + 3) & 0xFFC); |
362 | v0 = (inst2 & 0xfffffc); |
363 | memcpy(BufferSpace + (inst1 & 0xfffc), rsp.RDRAM + v0, (cnt + 3) & 0xFFFC); |
364 | } |
365 | |
366 | /* TODO Needs accuracy verification... */ |
367 | static void SAVEBUFF2(uint32_t inst1, uint32_t inst2) |
368 | { |
369 | uint32_t v0; |
370 | uint32_t cnt = (((inst1 >> 0xC) + 3) & 0xFFC); |
371 | v0 = (inst2 & 0xfffffc); |
372 | memcpy(rsp.RDRAM + v0, BufferSpace + (inst1 & 0xfffc), (cnt + 3) & 0xFFFC); |
373 | } |
374 | |
375 | /* TODO Needs accuracy verification... */ |
376 | static void MIXER2(uint32_t inst1, uint32_t inst2) |
377 | { |
378 | uint16_t dmemin = (uint16_t)(inst2 >> 0x10); |
379 | uint16_t dmemout = (uint16_t)(inst2 & 0xFFFF); |
380 | uint32_t count = ((inst1 >> 12) & 0xFF0); |
381 | int32_t gain = (int16_t)(inst1 & 0xFFFF); |
382 | int32_t temp; |
383 | unsigned int x; |
384 | |
385 | for (x = 0; x < count; x += 2) { |
386 | /* TODO I think I can do this a lot easier */ |
387 | temp = (*(int16_t *)(BufferSpace + dmemin + x) * gain) >> 15; |
388 | temp += *(int16_t *)(BufferSpace + dmemout + x); |
389 | |
390 | temp = clamp_s16((int32_t)temp); |
391 | |
392 | *(uint16_t *)(BufferSpace + dmemout + x) = (uint16_t)(temp & 0xFFFF); |
393 | } |
394 | } |
395 | |
396 | |
397 | static void RESAMPLE2(uint32_t inst1, uint32_t inst2) |
398 | { |
399 | unsigned char Flags = (uint8_t)((inst1 >> 16) & 0xff); |
400 | unsigned int Pitch = ((inst1 & 0xffff)) << 1; |
401 | uint32_t addy = (inst2 & 0xffffff); |
402 | unsigned int Accum = 0; |
403 | unsigned int location; |
404 | int16_t *lut; |
405 | short *dst; |
406 | int16_t *src; |
407 | uint32_t srcPtr = (AudioInBuffer / 2); |
408 | uint32_t dstPtr = (AudioOutBuffer / 2); |
409 | int32_t temp; |
410 | int32_t accum; |
411 | int x, i; |
412 | |
413 | dst = (short *)(BufferSpace); |
414 | src = (int16_t *)(BufferSpace); |
415 | |
416 | if (addy > (1024 * 1024 * 8)) |
417 | addy = (inst2 & 0xffffff); |
418 | |
419 | srcPtr -= 4; |
420 | |
421 | if ((Flags & 0x1) == 0) { |
422 | for (x = 0; x < 4; x++) |
423 | src[(srcPtr + x)^S] = ((uint16_t *)rsp.RDRAM)[((addy / 2) + x)^S]; |
424 | Accum = *(uint16_t *)(rsp.RDRAM + addy + 10); |
425 | } else { |
426 | for (x = 0; x < 4; x++) |
427 | src[(srcPtr + x)^S] = 0; |
428 | } |
429 | |
430 | for (i = 0; i < ((AudioCount + 0xf) & 0xFFF0) / 2; i++) { |
431 | location = (((Accum * 0x40) >> 0x10) * 8); |
432 | lut = (int16_t *)(((uint8_t *)ResampleLUT) + location); |
433 | |
434 | temp = ((int32_t) * (int16_t *)(src + ((srcPtr + 0)^S)) * ((int32_t)((int16_t)lut[0]))); |
435 | accum = (int32_t)(temp >> 15); |
436 | |
437 | temp = ((int32_t) * (int16_t *)(src + ((srcPtr + 1)^S)) * ((int32_t)((int16_t)lut[1]))); |
438 | accum += (int32_t)(temp >> 15); |
439 | |
440 | temp = ((int32_t) * (int16_t *)(src + ((srcPtr + 2)^S)) * ((int32_t)((int16_t)lut[2]))); |
441 | accum += (int32_t)(temp >> 15); |
442 | |
443 | temp = ((int32_t) * (int16_t *)(src + ((srcPtr + 3)^S)) * ((int32_t)((int16_t)lut[3]))); |
444 | accum += (int32_t)(temp >> 15); |
445 | |
446 | accum = clamp_s16(accum); |
447 | |
448 | dst[dstPtr ^ S] = (int16_t)(accum); |
449 | dstPtr++; |
450 | Accum += Pitch; |
451 | srcPtr += (Accum >> 16); |
452 | Accum &= 0xffff; |
453 | } |
454 | for (x = 0; x < 4; x++) |
455 | ((uint16_t *)rsp.RDRAM)[((addy / 2) + x)^S] = src[(srcPtr + x)^S]; |
456 | *(uint16_t *)(rsp.RDRAM + addy + 10) = (uint16_t)Accum; |
457 | } |
458 | |
459 | /* TODO Needs accuracy verification... */ |
460 | static void DMEMMOVE2(uint32_t inst1, uint32_t inst2) |
461 | { |
462 | uint32_t cnt; |
463 | uint32_t v0 = (inst1 & 0xFFFF); |
464 | uint32_t v1 = (inst2 >> 0x10); |
465 | uint32_t count = ((inst2 + 3) & 0xfffc); |
466 | |
467 | if ((inst2 & 0xffff) == 0) |
468 | return; |
469 | |
470 | for (cnt = 0; cnt < count; cnt++) |
471 | *(uint8_t *)(BufferSpace + ((cnt + v1)^S8)) = *(uint8_t *)(BufferSpace + ((cnt + v0)^S8)); |
472 | } |
473 | |
474 | static uint32_t t3, s5, s6; |
475 | static uint16_t env[8]; |
476 | |
477 | static void ENVSETUP1(uint32_t inst1, uint32_t inst2) |
478 | { |
479 | uint32_t tmp; |
480 | |
481 | t3 = inst1 & 0xFFFF; |
482 | tmp = (inst1 >> 0x8) & 0xFF00; |
483 | env[4] = (uint16_t)tmp; |
484 | tmp += t3; |
485 | env[5] = (uint16_t)tmp; |
486 | s5 = inst2 >> 0x10; |
487 | s6 = inst2 & 0xFFFF; |
488 | } |
489 | |
490 | static void ENVSETUP2(uint32_t inst1, uint32_t inst2) |
491 | { |
492 | uint32_t tmp; |
493 | |
494 | tmp = (inst2 >> 0x10); |
495 | env[0] = (uint16_t)tmp; |
496 | tmp += s5; |
497 | env[1] = (uint16_t)tmp; |
498 | tmp = inst2 & 0xffff; |
499 | env[2] = (uint16_t)tmp; |
500 | tmp += s6; |
501 | env[3] = (uint16_t)tmp; |
502 | } |
503 | |
504 | static void ENVMIXER2(uint32_t inst1, uint32_t inst2) |
505 | { |
506 | int16_t *bufft6, *bufft7, *buffs0, *buffs1; |
507 | int16_t *buffs3; |
508 | int32_t count; |
509 | uint32_t adder; |
510 | |
511 | int16_t vec9, vec10; |
512 | |
513 | int16_t v2[8]; |
514 | |
515 | buffs3 = (int16_t *)(BufferSpace + ((inst1 >> 0x0c) & 0x0ff0)); |
516 | bufft6 = (int16_t *)(BufferSpace + ((inst2 >> 0x14) & 0x0ff0)); |
517 | bufft7 = (int16_t *)(BufferSpace + ((inst2 >> 0x0c) & 0x0ff0)); |
518 | buffs0 = (int16_t *)(BufferSpace + ((inst2 >> 0x04) & 0x0ff0)); |
519 | buffs1 = (int16_t *)(BufferSpace + ((inst2 << 0x04) & 0x0ff0)); |
520 | |
521 | |
522 | v2[0] = 0 - (int16_t)((inst1 & 0x2) >> 1); |
523 | v2[1] = 0 - (int16_t)((inst1 & 0x1)); |
524 | v2[2] = 0 - (int16_t)((inst1 & 0x8) >> 1); |
525 | v2[3] = 0 - (int16_t)((inst1 & 0x4) >> 1); |
526 | |
527 | count = (inst1 >> 8) & 0xff; |
528 | |
529 | if (!isMKABI) { |
530 | s5 *= 2; |
531 | s6 *= 2; |
532 | t3 *= 2; |
533 | adder = 0x10; |
534 | } else { |
535 | inst1 = 0; |
536 | adder = 0x8; |
537 | t3 = 0; |
538 | } |
539 | |
540 | |
541 | while (count > 0) { |
542 | int temp, x; |
543 | for (x = 0; x < 0x8; x++) { |
544 | vec9 = (int16_t)(((int32_t)buffs3[x ^ S] * (uint32_t)env[0]) >> 0x10) ^ v2[0]; |
545 | vec10 = (int16_t)(((int32_t)buffs3[x ^ S] * (uint32_t)env[2]) >> 0x10) ^ v2[1]; |
546 | temp = bufft6[x ^ S] + vec9; |
547 | temp = clamp_s16(temp); |
548 | bufft6[x ^ S] = temp; |
549 | temp = bufft7[x ^ S] + vec10; |
550 | temp = clamp_s16(temp); |
551 | bufft7[x ^ S] = temp; |
552 | vec9 = (int16_t)(((int32_t)vec9 * (uint32_t)env[4]) >> 0x10) ^ v2[2]; |
553 | vec10 = (int16_t)(((int32_t)vec10 * (uint32_t)env[4]) >> 0x10) ^ v2[3]; |
554 | if (inst1 & 0x10) { |
555 | temp = buffs0[x ^ S] + vec10; |
556 | temp = clamp_s16(temp); |
557 | buffs0[x ^ S] = temp; |
558 | temp = buffs1[x ^ S] + vec9; |
559 | temp = clamp_s16(temp); |
560 | buffs1[x ^ S] = temp; |
561 | } else { |
562 | temp = buffs0[x ^ S] + vec9; |
563 | temp = clamp_s16(temp); |
564 | buffs0[x ^ S] = temp; |
565 | temp = buffs1[x ^ S] + vec10; |
566 | temp = clamp_s16(temp); |
567 | buffs1[x ^ S] = temp; |
568 | } |
569 | } |
570 | |
571 | if (!isMKABI) |
572 | for (x = 0x8; x < 0x10; x++) { |
573 | vec9 = (int16_t)(((int32_t)buffs3[x ^ S] * (uint32_t)env[1]) >> 0x10) ^ v2[0]; |
574 | vec10 = (int16_t)(((int32_t)buffs3[x ^ S] * (uint32_t)env[3]) >> 0x10) ^ v2[1]; |
575 | temp = bufft6[x ^ S] + vec9; |
576 | temp = clamp_s16(temp); |
577 | bufft6[x ^ S] = temp; |
578 | temp = bufft7[x ^ S] + vec10; |
579 | temp = clamp_s16(temp); |
580 | bufft7[x ^ S] = temp; |
581 | vec9 = (int16_t)(((int32_t)vec9 * (uint32_t)env[5]) >> 0x10) ^ v2[2]; |
582 | vec10 = (int16_t)(((int32_t)vec10 * (uint32_t)env[5]) >> 0x10) ^ v2[3]; |
583 | if (inst1 & 0x10) { |
584 | temp = buffs0[x ^ S] + vec10; |
585 | temp = clamp_s16(temp); |
586 | buffs0[x ^ S] = temp; |
587 | temp = buffs1[x ^ S] + vec9; |
588 | temp = clamp_s16(temp); |
589 | buffs1[x ^ S] = temp; |
590 | } else { |
591 | temp = buffs0[x ^ S] + vec9; |
592 | temp = clamp_s16(temp); |
593 | buffs0[x ^ S] = temp; |
594 | temp = buffs1[x ^ S] + vec10; |
595 | temp = clamp_s16(temp); |
596 | buffs1[x ^ S] = temp; |
597 | } |
598 | } |
599 | bufft6 += adder; |
600 | bufft7 += adder; |
601 | buffs0 += adder; |
602 | buffs1 += adder; |
603 | buffs3 += adder; |
604 | count -= adder; |
605 | env[0] += (uint16_t)s5; |
606 | env[1] += (uint16_t)s5; |
607 | env[2] += (uint16_t)s6; |
608 | env[3] += (uint16_t)s6; |
609 | env[4] += (uint16_t)t3; |
610 | env[5] += (uint16_t)t3; |
611 | } |
612 | } |
613 | |
614 | static void DUPLICATE2(uint32_t inst1, uint32_t inst2) |
615 | { |
616 | unsigned short Count = (inst1 >> 16) & 0xff; |
617 | unsigned short In = inst1 & 0xffff; |
618 | unsigned short Out = (inst2 >> 16); |
619 | |
620 | unsigned short buff[64]; |
621 | |
622 | memcpy(buff, BufferSpace + In, 128); |
623 | |
624 | while (Count) { |
625 | memcpy(BufferSpace + Out, buff, 128); |
626 | Out += 128; |
627 | Count--; |
628 | } |
629 | } |
630 | |
631 | static void INTERL2(uint32_t inst1, uint32_t inst2) |
632 | { |
633 | short Count = inst1 & 0xffff; |
634 | unsigned short Out = inst2 & 0xffff; |
635 | unsigned short In = (inst2 >> 16); |
636 | |
637 | unsigned char *src, *dst; |
638 | src = (unsigned char *)(BufferSpace); /* [In]; */ |
639 | dst = (unsigned char *)(BufferSpace); /* [Out]; */ |
640 | while (Count) { |
641 | *(short *)(dst + (Out ^ S8)) = *(short *)(src + (In ^ S8)); |
642 | Out += 2; |
643 | In += 4; |
644 | Count--; |
645 | } |
646 | } |
647 | |
648 | /* TODO Needs accuracy verification... */ |
649 | static void INTERLEAVE2(uint32_t inst1, uint32_t inst2) |
650 | { |
651 | uint32_t inL, inR; |
652 | uint16_t *outbuff; |
653 | uint16_t *inSrcR; |
654 | uint16_t *inSrcL; |
655 | uint16_t Left, Right, Left2, Right2; |
656 | uint32_t count; |
657 | uint32_t x; |
658 | |
659 | count = ((inst1 >> 12) & 0xFF0); |
660 | if (count == 0) { |
661 | outbuff = (uint16_t *)(AudioOutBuffer + BufferSpace); |
662 | count = AudioCount; |
663 | } else |
664 | outbuff = (uint16_t *)((inst1 & 0xFFFF) + BufferSpace); |
665 | |
666 | inR = inst2 & 0xFFFF; |
667 | inL = (inst2 >> 16) & 0xFFFF; |
668 | |
669 | inSrcR = (uint16_t *)(BufferSpace + inR); |
670 | inSrcL = (uint16_t *)(BufferSpace + inL); |
671 | |
672 | for (x = 0; x < (count / 4); x++) { |
673 | Left = *(inSrcL++); |
674 | Right = *(inSrcR++); |
675 | Left2 = *(inSrcL++); |
676 | Right2 = *(inSrcR++); |
677 | |
678 | #ifdef M64P_BIG_ENDIAN |
679 | *(outbuff++) = Right; |
680 | *(outbuff++) = Left; |
681 | *(outbuff++) = Right2; |
682 | *(outbuff++) = Left2; |
683 | #else |
684 | *(outbuff++) = Right2; |
685 | *(outbuff++) = Left2; |
686 | *(outbuff++) = Right; |
687 | *(outbuff++) = Left; |
688 | #endif |
689 | } |
690 | } |
691 | |
692 | static void ADDMIXER(uint32_t inst1, uint32_t inst2) |
693 | { |
694 | short Count = (inst1 >> 12) & 0x00ff0; |
695 | uint16_t InBuffer = (inst2 >> 16); |
696 | uint16_t OutBuffer = inst2 & 0xffff; |
697 | int cntr; |
698 | |
699 | int16_t *inp, *outp; |
700 | int32_t temp; |
701 | inp = (int16_t *)(BufferSpace + InBuffer); |
702 | outp = (int16_t *)(BufferSpace + OutBuffer); |
703 | for (cntr = 0; cntr < Count; cntr += 2) { |
704 | temp = *outp + *inp; |
705 | temp = clamp_s16(temp); |
706 | *(outp++) = temp; |
707 | inp++; |
708 | } |
709 | } |
710 | |
711 | static void HILOGAIN(uint32_t inst1, uint32_t inst2) |
712 | { |
713 | uint16_t cnt = inst1 & 0xffff; |
714 | uint16_t out = (inst2 >> 16) & 0xffff; |
715 | int16_t hi = (int16_t)((inst1 >> 4) & 0xf000); |
716 | uint16_t lo = (inst1 >> 20) & 0xf; |
717 | int16_t *src = (int16_t *)(BufferSpace + out); |
718 | int32_t tmp, val; |
719 | |
720 | while (cnt) { |
721 | val = (int32_t) * src; |
722 | tmp = ((val * (int32_t)hi) >> 16) + (uint32_t)(val * lo); |
723 | tmp = clamp_s16(tmp); |
724 | *src = tmp; |
725 | src++; |
726 | cnt -= 2; |
727 | } |
728 | } |
729 | |
730 | static void FILTER2(uint32_t inst1, uint32_t inst2) |
731 | { |
732 | static int cnt = 0; |
733 | static int16_t *lutt6; |
734 | static int16_t *lutt5; |
735 | uint8_t *save = (rsp.RDRAM + (inst2 & 0xFFFFFF)); |
736 | uint8_t t4 = (uint8_t)((inst1 >> 0x10) & 0xFF); |
737 | int x; |
738 | short *inp1, *inp2; |
739 | int32_t out1[8]; |
740 | int16_t outbuff[0x3c0], *outp; |
741 | uint32_t inPtr; |
742 | |
743 | if (t4 > 1) { |
744 | /* Then set the cnt variable */ |
745 | cnt = (inst1 & 0xFFFF); |
746 | lutt6 = (int16_t *)save; |
747 | return; |
748 | } |
749 | |
750 | if (t4 == 0) |
751 | lutt5 = (short *)(save + 0x10); |
752 | |
753 | lutt5 = (short *)(save + 0x10); |
754 | |
755 | for (x = 0; x < 8; x++) { |
756 | int32_t a; |
757 | a = (lutt5[x] + lutt6[x]) >> 1; |
758 | lutt5[x] = lutt6[x] = (short)a; |
759 | } |
760 | inPtr = (uint32_t)(inst1 & 0xffff); |
761 | inp1 = (short *)(save); |
762 | outp = outbuff; |
763 | inp2 = (short *)(BufferSpace + inPtr); |
764 | for (x = 0; x < cnt; x += 0x10) { |
765 | out1[1] = inp1[0] * lutt6[6]; |
766 | out1[1] += inp1[3] * lutt6[7]; |
767 | out1[1] += inp1[2] * lutt6[4]; |
768 | out1[1] += inp1[5] * lutt6[5]; |
769 | out1[1] += inp1[4] * lutt6[2]; |
770 | out1[1] += inp1[7] * lutt6[3]; |
771 | out1[1] += inp1[6] * lutt6[0]; |
772 | out1[1] += inp2[1] * lutt6[1]; /* 1 */ |
773 | |
774 | out1[0] = inp1[3] * lutt6[6]; |
775 | out1[0] += inp1[2] * lutt6[7]; |
776 | out1[0] += inp1[5] * lutt6[4]; |
777 | out1[0] += inp1[4] * lutt6[5]; |
778 | out1[0] += inp1[7] * lutt6[2]; |
779 | out1[0] += inp1[6] * lutt6[3]; |
780 | out1[0] += inp2[1] * lutt6[0]; |
781 | out1[0] += inp2[0] * lutt6[1]; |
782 | |
783 | out1[3] = inp1[2] * lutt6[6]; |
784 | out1[3] += inp1[5] * lutt6[7]; |
785 | out1[3] += inp1[4] * lutt6[4]; |
786 | out1[3] += inp1[7] * lutt6[5]; |
787 | out1[3] += inp1[6] * lutt6[2]; |
788 | out1[3] += inp2[1] * lutt6[3]; |
789 | out1[3] += inp2[0] * lutt6[0]; |
790 | out1[3] += inp2[3] * lutt6[1]; |
791 | |
792 | out1[2] = inp1[5] * lutt6[6]; |
793 | out1[2] += inp1[4] * lutt6[7]; |
794 | out1[2] += inp1[7] * lutt6[4]; |
795 | out1[2] += inp1[6] * lutt6[5]; |
796 | out1[2] += inp2[1] * lutt6[2]; |
797 | out1[2] += inp2[0] * lutt6[3]; |
798 | out1[2] += inp2[3] * lutt6[0]; |
799 | out1[2] += inp2[2] * lutt6[1]; |
800 | |
801 | out1[5] = inp1[4] * lutt6[6]; |
802 | out1[5] += inp1[7] * lutt6[7]; |
803 | out1[5] += inp1[6] * lutt6[4]; |
804 | out1[5] += inp2[1] * lutt6[5]; |
805 | out1[5] += inp2[0] * lutt6[2]; |
806 | out1[5] += inp2[3] * lutt6[3]; |
807 | out1[5] += inp2[2] * lutt6[0]; |
808 | out1[5] += inp2[5] * lutt6[1]; |
809 | |
810 | out1[4] = inp1[7] * lutt6[6]; |
811 | out1[4] += inp1[6] * lutt6[7]; |
812 | out1[4] += inp2[1] * lutt6[4]; |
813 | out1[4] += inp2[0] * lutt6[5]; |
814 | out1[4] += inp2[3] * lutt6[2]; |
815 | out1[4] += inp2[2] * lutt6[3]; |
816 | out1[4] += inp2[5] * lutt6[0]; |
817 | out1[4] += inp2[4] * lutt6[1]; |
818 | |
819 | out1[7] = inp1[6] * lutt6[6]; |
820 | out1[7] += inp2[1] * lutt6[7]; |
821 | out1[7] += inp2[0] * lutt6[4]; |
822 | out1[7] += inp2[3] * lutt6[5]; |
823 | out1[7] += inp2[2] * lutt6[2]; |
824 | out1[7] += inp2[5] * lutt6[3]; |
825 | out1[7] += inp2[4] * lutt6[0]; |
826 | out1[7] += inp2[7] * lutt6[1]; |
827 | |
828 | out1[6] = inp2[1] * lutt6[6]; |
829 | out1[6] += inp2[0] * lutt6[7]; |
830 | out1[6] += inp2[3] * lutt6[4]; |
831 | out1[6] += inp2[2] * lutt6[5]; |
832 | out1[6] += inp2[5] * lutt6[2]; |
833 | out1[6] += inp2[4] * lutt6[3]; |
834 | out1[6] += inp2[7] * lutt6[0]; |
835 | out1[6] += inp2[6] * lutt6[1]; |
836 | outp[1] = /*CLAMP*/((out1[1] + 0x4000) >> 0xF); |
837 | outp[0] = /*CLAMP*/((out1[0] + 0x4000) >> 0xF); |
838 | outp[3] = /*CLAMP*/((out1[3] + 0x4000) >> 0xF); |
839 | outp[2] = /*CLAMP*/((out1[2] + 0x4000) >> 0xF); |
840 | outp[5] = /*CLAMP*/((out1[5] + 0x4000) >> 0xF); |
841 | outp[4] = /*CLAMP*/((out1[4] + 0x4000) >> 0xF); |
842 | outp[7] = /*CLAMP*/((out1[7] + 0x4000) >> 0xF); |
843 | outp[6] = /*CLAMP*/((out1[6] + 0x4000) >> 0xF); |
844 | inp1 = inp2; |
845 | inp2 += 8; |
846 | outp += 8; |
847 | } |
848 | memcpy(save, inp2 - 8, 0x10); |
849 | memcpy(BufferSpace + (inst1 & 0xffff), outbuff, cnt); |
850 | } |
851 | |
852 | static void SEGMENT2(uint32_t inst1, uint32_t inst2) |
853 | { |
854 | if (isZeldaABI) { |
855 | FILTER2(inst1, inst2); |
856 | return; |
857 | } |
858 | if ((inst1 & 0xffffff) == 0) { |
859 | isMKABI = true; |
860 | } else { |
861 | isMKABI = false; |
862 | isZeldaABI = true; |
863 | FILTER2(inst1, inst2); |
864 | } |
865 | } |
866 | |
867 | static void UNKNOWN(uint32_t inst1, uint32_t inst2) |
868 | { |
869 | } |
870 | |
871 | const acmd_callback_t ABI2[0x20] = { |
872 | SPNOOP , ADPCM2, CLEARBUFF2, UNKNOWN, ADDMIXER, RESAMPLE2, UNKNOWN, SEGMENT2, |
873 | SETBUFF2 , DUPLICATE2, DMEMMOVE2, LOADADPCM2, MIXER2, INTERLEAVE2, HILOGAIN, SETLOOP2, |
874 | SPNOOP, INTERL2 , ENVSETUP1, ENVMIXER2, LOADBUFF2, SAVEBUFF2, ENVSETUP2, SPNOOP, |
875 | HILOGAIN , SPNOOP, DUPLICATE2 , UNKNOWN , SPNOOP , SPNOOP , SPNOOP , SPNOOP |
876 | }; |
877 | /* NOTES: |
878 | * |
879 | * FILTER/SEGMENT - Still needs to be finished up... add FILTER? |
880 | * UNKNOWWN #27 - Is this worth doing? Looks like a pain in the ass just for WaveRace64 |
881 | */ |
882 | |