df00ea13 |
1 | /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * |
2 | * Mupen64plus-rsp-hle - musyx.c * |
3 | * Mupen64Plus homepage: http://code.google.com/p/mupen64plus/ * |
4 | * Copyright (C) 2013 Bobby Smiles * |
5 | * * |
6 | * This program is free software; you can redistribute it and/or modify * |
7 | * it under the terms of the GNU General Public License as published by * |
8 | * the Free Software Foundation; either version 2 of the License, or * |
9 | * (at your option) any later version. * |
10 | * * |
11 | * This program is distributed in the hope that it will be useful, * |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of * |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * |
14 | * GNU General Public License for more details. * |
15 | * * |
16 | * You should have received a copy of the GNU General Public License * |
17 | * along with this program; if not, write to the * |
18 | * Free Software Foundation, Inc., * |
19 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * |
20 | * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ |
21 | |
22 | #include <stdbool.h> |
23 | #include <stdint.h> |
24 | #include <string.h> |
25 | #include <stddef.h> |
26 | |
27 | #include "m64p_plugin.h" |
28 | #include "m64p_types.h" |
29 | #include "hle.h" |
30 | #include "musyx.h" |
31 | |
32 | /* various constants */ |
33 | enum { SUBFRAME_SIZE = 192 }; |
34 | enum { MAX_VOICES = 32 }; |
35 | |
36 | enum { SAMPLE_BUFFER_SIZE = 0x200 }; |
37 | |
38 | |
39 | enum { |
40 | SFD_VOICE_COUNT = 0x0, |
41 | SFD_SFX_INDEX = 0x2, |
42 | SFD_VOICE_BITMASK = 0x4, |
43 | SFD_STATE_PTR = 0x8, |
44 | SFD_SFX_PTR = 0xc, |
45 | |
46 | SFD_VOICES = 0x10 |
47 | }; |
48 | |
49 | enum { |
50 | VOICE_ENV_BEGIN = 0x00, |
51 | VOICE_ENV_STEP = 0x10, |
52 | VOICE_PITCH_Q16 = 0x20, |
53 | VOICE_PITCH_SHIFT = 0x22, |
54 | VOICE_CATSRC_0 = 0x24, |
55 | VOICE_CATSRC_1 = 0x30, |
56 | VOICE_ADPCM_FRAMES = 0x3c, |
57 | VOICE_SKIP_SAMPLES = 0x3e, |
58 | |
59 | /* for PCM16 */ |
60 | VOICE_U16_40 = 0x40, |
61 | VOICE_U16_42 = 0x42, |
62 | |
63 | /* for ADPCM */ |
64 | VOICE_ADPCM_TABLE_PTR = 0x40, |
65 | |
66 | VOICE_INTERLEAVED_PTR = 0x44, |
67 | VOICE_END_POINT = 0x48, |
68 | VOICE_RESTART_POINT = 0x4a, |
69 | VOICE_U16_4C = 0x4c, |
70 | VOICE_U16_4E = 0x4e, |
71 | |
72 | VOICE_SIZE = 0x50 |
73 | }; |
74 | |
75 | enum { |
76 | CATSRC_PTR1 = 0x00, |
77 | CATSRC_PTR2 = 0x04, |
78 | CATSRC_SIZE1 = 0x08, |
79 | CATSRC_SIZE2 = 0x0a |
80 | }; |
81 | |
82 | enum { |
83 | STATE_LAST_SAMPLE = 0x0, |
84 | STATE_BASE_VOL = 0x100, |
85 | STATE_CC0 = 0x110, |
86 | STATE_740_LAST4 = 0x290 |
87 | }; |
88 | |
89 | enum { |
90 | SFX_CBUFFER_PTR = 0x00, |
91 | SFX_CBUFFER_LENGTH = 0x04, |
92 | SFX_TAP_COUNT = 0x08, |
93 | SFX_FIR4_HGAIN = 0x0a, |
94 | SFX_TAP_DELAYS = 0x0c, |
95 | SFX_TAP_GAINS = 0x2c, |
96 | /* padding = 0x3c */ |
97 | SFX_FIR4_HCOEFFS = 0x40 |
98 | }; |
99 | |
100 | |
101 | /* struct definition */ |
102 | typedef struct { |
103 | /* internal subframes */ |
104 | int16_t left[SUBFRAME_SIZE]; |
105 | int16_t right[SUBFRAME_SIZE]; |
106 | int16_t cc0[SUBFRAME_SIZE]; |
107 | int16_t e50[SUBFRAME_SIZE]; |
108 | |
109 | /* internal subframes base volumes */ |
110 | int32_t base_vol[4]; |
111 | |
112 | /* */ |
113 | int16_t subframe_740_last4[4]; |
114 | } musyx_t; |
115 | |
116 | /* helper functions prototypes */ |
117 | static void load_base_vol(int32_t *base_vol, uint32_t address); |
118 | static void save_base_vol(const int32_t *base_vol, uint32_t address); |
119 | static void update_base_vol(int32_t *base_vol, uint32_t voice_mask, |
120 | uint32_t last_sample_ptr); |
121 | |
122 | static void init_subframes(musyx_t *musyx); |
123 | |
124 | static uint32_t voice_stage(musyx_t *musyx, uint32_t voice_ptr, |
125 | uint32_t last_sample_ptr); |
126 | |
127 | static void dma_cat8(uint8_t *dst, uint32_t catsrc_ptr); |
128 | static void dma_cat16(uint16_t *dst, uint32_t catsrc_ptr); |
129 | |
130 | static void load_samples_PCM16(uint32_t voice_ptr, int16_t *samples, |
131 | unsigned *segbase, unsigned *offset); |
132 | static void load_samples_ADPCM(uint32_t voice_ptr, int16_t *samples, |
133 | unsigned *segbase, unsigned *offset); |
134 | |
135 | static void adpcm_decode_frames(int16_t *dst, const uint8_t *src, |
136 | const int16_t *table, uint8_t count, |
137 | uint8_t skip_samples); |
138 | |
139 | static int16_t adpcm_get_predicted_sample(uint8_t byte, uint8_t mask, |
140 | unsigned lshift, unsigned rshift); |
141 | static void adpcm_get_predicted_frame(int16_t *dst, const uint8_t *src, |
142 | const uint8_t *nibbles, |
143 | unsigned int rshift); |
144 | static void adpcm_decode_upto_8_samples(int16_t *dst, const int16_t *src, |
145 | const int16_t *cb_entry, |
146 | const int16_t *last_samples, |
147 | size_t size); |
148 | |
149 | static void mix_voice_samples(musyx_t *musyx, uint32_t voice_ptr, |
150 | const int16_t *samples, unsigned segbase, |
151 | unsigned offset, uint32_t last_sample_ptr); |
152 | |
153 | static void sfx_stage(musyx_t *musyx, uint32_t sfx_ptr, uint16_t idx); |
154 | static void mix_subframes(int16_t *y, const int16_t *x, int16_t hgain); |
155 | static void mix_fir4(int16_t *y, const int16_t *x, int16_t hgain, const int16_t *hcoeffs); |
156 | |
157 | |
158 | static void interleave_stage(musyx_t *musyx, uint32_t output_ptr); |
159 | |
160 | |
161 | static uint8_t *dram_u8(uint32_t address); |
162 | static uint16_t *dram_u16(uint32_t address); |
163 | static uint32_t *dram_u32(uint32_t address); |
164 | |
165 | static void load_u8(uint8_t *dst, uint32_t address, size_t count); |
166 | static void load_u16(uint16_t *dst, uint32_t address, size_t count); |
167 | static void load_u32(uint32_t *dst, uint32_t address, size_t count); |
168 | |
169 | static void store_u16(const uint16_t *src, uint32_t address, size_t count); |
170 | |
171 | static inline unsigned int align(unsigned int x, unsigned amount) |
172 | { |
173 | --amount; |
174 | return (x + amount) & ~amount; |
175 | } |
176 | |
177 | static int32_t rdot(size_t n, const int16_t *x, const int16_t *y) |
178 | { |
179 | int32_t accu = 0; |
180 | |
181 | y += n; |
182 | |
183 | while (n != 0) { |
184 | accu += ((int32_t)*(x++) * (int32_t)*(--y)); |
185 | --n; |
186 | } |
187 | |
188 | return accu; |
189 | } |
190 | |
191 | |
192 | static int32_t dot4(const int16_t *x, const int16_t *y) |
193 | { |
194 | size_t i; |
195 | int32_t accu = 0; |
196 | |
197 | for (i = 0; i < 4; ++i) |
198 | accu = clamp_s16(accu + (((int32_t)x[i] * (int32_t)y[i]) >> 15)); |
199 | |
200 | return accu; |
201 | } |
202 | |
203 | /* Fast and dirty way of reading dram memory |
204 | * Assume properly aligned access |
205 | */ |
206 | static uint8_t *dram_u8(uint32_t address) |
207 | { |
208 | return (uint8_t *)&rsp.RDRAM[(address & 0xffffff) ^ S8]; |
209 | } |
210 | |
211 | static uint16_t *dram_u16(uint32_t address) |
212 | { |
213 | return (uint16_t *)&rsp.RDRAM[(address & 0xffffff) ^ S16]; |
214 | } |
215 | |
216 | static uint32_t *dram_u32(uint32_t address) |
217 | { |
218 | return (uint32_t *)&rsp.RDRAM[address & 0xffffff]; |
219 | } |
220 | |
221 | static void load_u8(uint8_t *dst, uint32_t address, size_t count) |
222 | { |
223 | while (count != 0) { |
224 | *(dst++) = *dram_u8(address); |
225 | address += 1; |
226 | --count; |
227 | } |
228 | } |
229 | |
230 | static void load_u16(uint16_t *dst, uint32_t address, size_t count) |
231 | { |
232 | while (count != 0) { |
233 | *(dst++) = *dram_u16(address); |
234 | address += 2; |
235 | --count; |
236 | } |
237 | } |
238 | |
239 | static void load_u32(uint32_t *dst, uint32_t address, size_t count) |
240 | { |
241 | /* Optimization for uint32_t */ |
242 | const uint32_t *src = dram_u32(address); |
243 | |
244 | memcpy(dst, src, count * sizeof(uint32_t)); |
245 | } |
246 | |
247 | static void store_u16(const uint16_t *src, uint32_t address, size_t count) |
248 | { |
249 | while (count != 0) { |
250 | *dram_u16(address) = *(src++); |
251 | address += 2; |
252 | --count; |
253 | } |
254 | } |
255 | |
256 | /************************************************************************** |
257 | * MusyX audio ucode |
258 | **************************************************************************/ |
259 | void musyx_task(void) |
260 | { |
261 | const OSTask_t *const task = get_task(); |
262 | |
263 | uint32_t sfd_ptr = task->data_ptr; |
264 | uint32_t sfd_count = task->data_size; |
265 | uint32_t state_ptr; |
266 | musyx_t musyx; |
267 | |
268 | DebugMessage(M64MSG_VERBOSE, "musyx_task: *data=%x, #SF=%d", |
269 | sfd_ptr, |
270 | sfd_count); |
271 | |
272 | state_ptr = *dram_u32(sfd_ptr + SFD_STATE_PTR); |
273 | |
274 | /* load initial state */ |
275 | load_base_vol(musyx.base_vol, state_ptr + STATE_BASE_VOL); |
276 | load_u16((uint16_t *)musyx.cc0, state_ptr + STATE_CC0, SUBFRAME_SIZE); |
277 | load_u16((uint16_t *)musyx.subframe_740_last4, state_ptr + STATE_740_LAST4, |
278 | 4); |
279 | |
280 | for (;;) { |
281 | /* parse SFD structure */ |
282 | uint16_t sfx_index = *dram_u16(sfd_ptr + SFD_SFX_INDEX); |
283 | uint32_t voice_mask = *dram_u32(sfd_ptr + SFD_VOICE_BITMASK); |
284 | uint32_t sfx_ptr = *dram_u32(sfd_ptr + SFD_SFX_PTR); |
285 | uint32_t voice_ptr = sfd_ptr + SFD_VOICES; |
286 | uint32_t last_sample_ptr = state_ptr + STATE_LAST_SAMPLE; |
287 | uint32_t output_ptr; |
288 | |
289 | /* initialize internal subframes using updated base volumes */ |
290 | update_base_vol(musyx.base_vol, voice_mask, last_sample_ptr); |
291 | init_subframes(&musyx); |
292 | |
293 | /* active voices get mixed into L,R,cc0,e50 subframes (optional) */ |
294 | output_ptr = voice_stage(&musyx, voice_ptr, last_sample_ptr); |
295 | |
296 | /* apply delay-based effects (optional) */ |
297 | sfx_stage(&musyx, sfx_ptr, sfx_index); |
298 | |
299 | /* emit interleaved L,R subframes */ |
300 | interleave_stage(&musyx, output_ptr); |
301 | |
302 | --sfd_count; |
303 | if (sfd_count == 0) |
304 | break; |
305 | |
306 | sfd_ptr += SFD_VOICES + MAX_VOICES * VOICE_SIZE; |
307 | state_ptr = *dram_u32(sfd_ptr + SFD_STATE_PTR); |
308 | } |
309 | |
310 | /* writeback updated state */ |
311 | save_base_vol(musyx.base_vol, state_ptr + STATE_BASE_VOL); |
312 | store_u16((uint16_t *)musyx.cc0, state_ptr + STATE_CC0, SUBFRAME_SIZE); |
313 | store_u16((uint16_t *)musyx.subframe_740_last4, state_ptr + STATE_740_LAST4, |
314 | 4); |
315 | } |
316 | |
317 | static void load_base_vol(int32_t *base_vol, uint32_t address) |
318 | { |
319 | base_vol[0] = ((uint32_t)(*dram_u16(address)) << 16) | (*dram_u16(address + 8)); |
320 | base_vol[1] = ((uint32_t)(*dram_u16(address + 2)) << 16) | (*dram_u16(address + 10)); |
321 | base_vol[2] = ((uint32_t)(*dram_u16(address + 4)) << 16) | (*dram_u16(address + 12)); |
322 | base_vol[3] = ((uint32_t)(*dram_u16(address + 6)) << 16) | (*dram_u16(address + 14)); |
323 | } |
324 | |
325 | static void save_base_vol(const int32_t *base_vol, uint32_t address) |
326 | { |
327 | unsigned k; |
328 | |
329 | for (k = 0; k < 4; ++k) { |
330 | *dram_u16(address) = (uint16_t)(base_vol[k] >> 16); |
331 | address += 2; |
332 | } |
333 | |
334 | for (k = 0; k < 4; ++k) { |
335 | *dram_u16(address) = (uint16_t)(base_vol[k]); |
336 | address += 2; |
337 | } |
338 | } |
339 | |
340 | static void update_base_vol(int32_t *base_vol, uint32_t voice_mask, |
341 | uint32_t last_sample_ptr) |
342 | { |
343 | unsigned i, k; |
344 | uint32_t mask; |
345 | |
346 | DebugMessage(M64MSG_VERBOSE, "base_vol voice_mask = %08x", voice_mask); |
347 | DebugMessage(M64MSG_VERBOSE, "BEFORE: base_vol = %08x %08x %08x %08x", |
348 | base_vol[0], base_vol[1], base_vol[2], base_vol[3]); |
349 | |
350 | /* optim: skip voices contributions entirely if voice_mask is empty */ |
351 | if (voice_mask != 0) { |
352 | for (i = 0, mask = 1; i < MAX_VOICES; |
353 | ++i, mask <<= 1, last_sample_ptr += 8) { |
354 | if ((voice_mask & mask) == 0) |
355 | continue; |
356 | |
357 | for (k = 0; k < 4; ++k) |
358 | base_vol[k] += (int16_t)*dram_u16(last_sample_ptr + k * 2); |
359 | } |
360 | } |
361 | |
362 | /* apply 3% decay */ |
363 | for (k = 0; k < 4; ++k) |
364 | base_vol[k] = (base_vol[k] * 0x0000f850) >> 16; |
365 | |
366 | DebugMessage(M64MSG_VERBOSE, "AFTER: base_vol = %08x %08x %08x %08x", |
367 | base_vol[0], base_vol[1], base_vol[2], base_vol[3]); |
368 | } |
369 | |
370 | static void init_subframes(musyx_t *musyx) |
371 | { |
372 | unsigned i; |
373 | |
374 | int16_t base_cc0 = clamp_s16(musyx->base_vol[2]); |
375 | int16_t base_e50 = clamp_s16(musyx->base_vol[3]); |
376 | |
377 | int16_t *left = musyx->left; |
378 | int16_t *right = musyx->right; |
379 | int16_t *cc0 = musyx->cc0; |
380 | int16_t *e50 = musyx->e50; |
381 | |
382 | for (i = 0; i < SUBFRAME_SIZE; ++i) { |
383 | *(e50++) = base_e50; |
384 | *(left++) = clamp_s16(*cc0 + base_cc0); |
385 | *(right++) = clamp_s16(-*cc0 - base_cc0); |
386 | *(cc0++) = 0; |
387 | } |
388 | } |
389 | |
390 | /* Process voices, and returns interleaved subframe destination address */ |
391 | static uint32_t voice_stage(musyx_t *musyx, uint32_t voice_ptr, |
392 | uint32_t last_sample_ptr) |
393 | { |
394 | uint32_t output_ptr; |
395 | int i = 0; |
396 | |
397 | /* voice stage can be skipped if first voice has no samples */ |
398 | if (*dram_u16(voice_ptr + VOICE_CATSRC_0 + CATSRC_SIZE1) == 0) { |
399 | DebugMessage(M64MSG_VERBOSE, "Skipping Voice stage"); |
400 | output_ptr = *dram_u32(voice_ptr + VOICE_INTERLEAVED_PTR); |
401 | } else { |
402 | /* otherwise process voices until a non null output_ptr is encountered */ |
403 | for (;;) { |
404 | /* load voice samples (PCM16 or APDCM) */ |
405 | int16_t samples[SAMPLE_BUFFER_SIZE]; |
406 | unsigned segbase; |
407 | unsigned offset; |
408 | |
409 | DebugMessage(M64MSG_VERBOSE, "Processing Voice #%d", i); |
410 | |
411 | if (*dram_u8(voice_ptr + VOICE_ADPCM_FRAMES) == 0) |
412 | load_samples_PCM16(voice_ptr, samples, &segbase, &offset); |
413 | else |
414 | load_samples_ADPCM(voice_ptr, samples, &segbase, &offset); |
415 | |
416 | /* mix them with each internal subframes */ |
417 | mix_voice_samples(musyx, voice_ptr, samples, segbase, offset, |
418 | last_sample_ptr + i * 8); |
419 | |
420 | /* check break condition */ |
421 | output_ptr = *dram_u32(voice_ptr + VOICE_INTERLEAVED_PTR); |
422 | if (output_ptr != 0) |
423 | break; |
424 | |
425 | /* next voice */ |
426 | ++i; |
427 | voice_ptr += VOICE_SIZE; |
428 | } |
429 | } |
430 | |
431 | return output_ptr; |
432 | } |
433 | |
434 | static void dma_cat8(uint8_t *dst, uint32_t catsrc_ptr) |
435 | { |
436 | uint32_t ptr1 = *dram_u32(catsrc_ptr + CATSRC_PTR1); |
437 | uint32_t ptr2 = *dram_u32(catsrc_ptr + CATSRC_PTR2); |
438 | uint16_t size1 = *dram_u16(catsrc_ptr + CATSRC_SIZE1); |
439 | uint16_t size2 = *dram_u16(catsrc_ptr + CATSRC_SIZE2); |
440 | |
441 | size_t count1 = size1; |
442 | size_t count2 = size2; |
443 | |
444 | DebugMessage(M64MSG_VERBOSE, "dma_cat: %08x %08x %04x %04x", |
445 | ptr1, |
446 | ptr2, |
447 | size1, |
448 | size2); |
449 | |
450 | load_u8(dst, ptr1, count1); |
451 | |
452 | if (size2 == 0) |
453 | return; |
454 | |
455 | load_u8(dst + count1, ptr2, count2); |
456 | } |
457 | |
458 | static void dma_cat16(uint16_t *dst, uint32_t catsrc_ptr) |
459 | { |
460 | uint32_t ptr1 = *dram_u32(catsrc_ptr + CATSRC_PTR1); |
461 | uint32_t ptr2 = *dram_u32(catsrc_ptr + CATSRC_PTR2); |
462 | uint16_t size1 = *dram_u16(catsrc_ptr + CATSRC_SIZE1); |
463 | uint16_t size2 = *dram_u16(catsrc_ptr + CATSRC_SIZE2); |
464 | |
465 | size_t count1 = size1 >> 1; |
466 | size_t count2 = size2 >> 1; |
467 | |
468 | DebugMessage(M64MSG_VERBOSE, "dma_cat: %08x %08x %04x %04x", |
469 | ptr1, |
470 | ptr2, |
471 | size1, |
472 | size2); |
473 | |
474 | load_u16(dst, ptr1, count1); |
475 | |
476 | if (size2 == 0) |
477 | return; |
478 | |
479 | load_u16(dst + count1, ptr2, count2); |
480 | } |
481 | |
482 | static void load_samples_PCM16(uint32_t voice_ptr, int16_t *samples, |
483 | unsigned *segbase, unsigned *offset) |
484 | { |
485 | |
486 | uint8_t u8_3e = *dram_u8(voice_ptr + VOICE_SKIP_SAMPLES); |
487 | uint16_t u16_40 = *dram_u16(voice_ptr + VOICE_U16_40); |
488 | uint16_t u16_42 = *dram_u16(voice_ptr + VOICE_U16_42); |
489 | |
490 | unsigned count = align(u16_40 + u8_3e, 4); |
491 | |
492 | DebugMessage(M64MSG_VERBOSE, "Format: PCM16"); |
493 | |
494 | *segbase = SAMPLE_BUFFER_SIZE - count; |
495 | *offset = u8_3e; |
496 | |
497 | dma_cat16((uint16_t *)samples + *segbase, voice_ptr + VOICE_CATSRC_0); |
498 | |
499 | if (u16_42 != 0) |
500 | dma_cat16((uint16_t *)samples, voice_ptr + VOICE_CATSRC_1); |
501 | } |
502 | |
503 | static void load_samples_ADPCM(uint32_t voice_ptr, int16_t *samples, |
504 | unsigned *segbase, unsigned *offset) |
505 | { |
506 | /* decompressed samples cannot exceed 0x400 bytes; |
507 | * ADPCM has a compression ratio of 5/16 */ |
508 | uint8_t buffer[SAMPLE_BUFFER_SIZE * 2 * 5 / 16]; |
509 | int16_t adpcm_table[128]; |
510 | |
511 | uint8_t u8_3c = *dram_u8(voice_ptr + VOICE_ADPCM_FRAMES ); |
512 | uint8_t u8_3d = *dram_u8(voice_ptr + VOICE_ADPCM_FRAMES + 1); |
513 | uint8_t u8_3e = *dram_u8(voice_ptr + VOICE_SKIP_SAMPLES ); |
514 | uint8_t u8_3f = *dram_u8(voice_ptr + VOICE_SKIP_SAMPLES + 1); |
515 | uint32_t adpcm_table_ptr = *dram_u32(voice_ptr + VOICE_ADPCM_TABLE_PTR); |
516 | unsigned count; |
517 | |
518 | DebugMessage(M64MSG_VERBOSE, "Format: ADPCM"); |
519 | |
520 | DebugMessage(M64MSG_VERBOSE, "Loading ADPCM table: %08x", adpcm_table_ptr); |
521 | load_u16((uint16_t *)adpcm_table, adpcm_table_ptr, 128); |
522 | |
523 | count = u8_3c << 5; |
524 | |
525 | *segbase = SAMPLE_BUFFER_SIZE - count; |
526 | *offset = u8_3e & 0x1f; |
527 | |
528 | dma_cat8(buffer, voice_ptr + VOICE_CATSRC_0); |
529 | adpcm_decode_frames(samples + *segbase, buffer, adpcm_table, u8_3c, u8_3e); |
530 | |
531 | if (u8_3d != 0) { |
532 | dma_cat8(buffer, voice_ptr + VOICE_CATSRC_1); |
533 | adpcm_decode_frames(samples, buffer, adpcm_table, u8_3d, u8_3f); |
534 | } |
535 | } |
536 | |
537 | static void adpcm_decode_frames(int16_t *dst, const uint8_t *src, |
538 | const int16_t *table, uint8_t count, |
539 | uint8_t skip_samples) |
540 | { |
541 | int16_t frame[32]; |
542 | const uint8_t *nibbles = src + 8; |
543 | unsigned i; |
544 | bool jump_gap = false; |
545 | |
546 | DebugMessage(M64MSG_VERBOSE, "ADPCM decode: count=%d, skip=%d", count, |
547 | skip_samples); |
548 | |
549 | if (skip_samples >= 32) { |
550 | jump_gap = true; |
551 | nibbles += 16; |
552 | src += 4; |
553 | } |
554 | |
555 | for (i = 0; i < count; ++i) { |
556 | uint8_t c2 = nibbles[0]; |
557 | |
558 | const int16_t *book = (c2 & 0xf0) + table; |
559 | unsigned int rshift = (c2 & 0x0f); |
560 | |
561 | adpcm_get_predicted_frame(frame, src, nibbles, rshift); |
562 | |
563 | memcpy(dst, frame, 2 * sizeof(frame[0])); |
564 | adpcm_decode_upto_8_samples(dst + 2, frame + 2, book, dst , 6); |
565 | adpcm_decode_upto_8_samples(dst + 8, frame + 8, book, dst + 6, 8); |
566 | adpcm_decode_upto_8_samples(dst + 16, frame + 16, book, dst + 14, 8); |
567 | adpcm_decode_upto_8_samples(dst + 24, frame + 24, book, dst + 22, 8); |
568 | |
569 | if (jump_gap) { |
570 | nibbles += 8; |
571 | src += 32; |
572 | } |
573 | |
574 | jump_gap = !jump_gap; |
575 | nibbles += 16; |
576 | src += 4; |
577 | dst += 32; |
578 | } |
579 | } |
580 | |
581 | static int16_t adpcm_get_predicted_sample(uint8_t byte, uint8_t mask, |
582 | unsigned lshift, unsigned rshift) |
583 | { |
584 | int16_t sample = ((uint16_t)byte & (uint16_t)mask) << lshift; |
585 | sample >>= rshift; /* signed */ |
586 | return sample; |
587 | } |
588 | |
589 | static void adpcm_get_predicted_frame(int16_t *dst, const uint8_t *src, |
590 | const uint8_t *nibbles, |
591 | unsigned int rshift) |
592 | { |
593 | unsigned int i; |
594 | |
595 | *(dst++) = (src[0] << 8) | src[1]; |
596 | *(dst++) = (src[2] << 8) | src[3]; |
597 | |
598 | for (i = 1; i < 16; ++i) { |
599 | uint8_t byte = nibbles[i]; |
600 | |
601 | *(dst++) = adpcm_get_predicted_sample(byte, 0xf0, 8, rshift); |
602 | *(dst++) = adpcm_get_predicted_sample(byte, 0x0f, 12, rshift); |
603 | } |
604 | } |
605 | |
606 | static void adpcm_decode_upto_8_samples(int16_t *dst, const int16_t *src, |
607 | const int16_t *cb_entry, |
608 | const int16_t *last_samples, |
609 | size_t size) |
610 | { |
611 | const int16_t *const book1 = cb_entry; |
612 | const int16_t *const book2 = cb_entry + 8; |
613 | |
614 | const int16_t l1 = last_samples[0]; |
615 | const int16_t l2 = last_samples[1]; |
616 | |
617 | size_t i; |
618 | int32_t accu; |
619 | |
620 | for (i = 0; i < size; ++i) { |
621 | accu = (int32_t)src[i] << 11; |
622 | accu += book1[i] * l1 + book2[i] * l2 + rdot(i, book2, src); |
623 | dst[i] = clamp_s16(accu >> 11); |
624 | } |
625 | } |
626 | |
627 | static void mix_voice_samples(musyx_t *musyx, uint32_t voice_ptr, |
628 | const int16_t *samples, unsigned segbase, |
629 | unsigned offset, uint32_t last_sample_ptr) |
630 | { |
631 | int i, k; |
632 | |
633 | /* parse VOICE structure */ |
634 | const uint16_t pitch_q16 = *dram_u16(voice_ptr + VOICE_PITCH_Q16); |
635 | const uint16_t pitch_shift = *dram_u16(voice_ptr + VOICE_PITCH_SHIFT); /* Q4.12 */ |
636 | |
637 | const uint16_t end_point = *dram_u16(voice_ptr + VOICE_END_POINT); |
638 | const uint16_t restart_point = *dram_u16(voice_ptr + VOICE_RESTART_POINT); |
639 | |
640 | const uint16_t u16_4e = *dram_u16(voice_ptr + VOICE_U16_4E); |
641 | |
642 | /* init values and pointers */ |
643 | const int16_t *sample = samples + segbase + offset + u16_4e; |
644 | const int16_t *const sample_end = samples + segbase + end_point; |
645 | const int16_t *const sample_restart = samples + (restart_point & 0x7fff) + |
646 | (((restart_point & 0x8000) != 0) ? 0x000 : segbase); |
647 | |
648 | |
649 | uint32_t pitch_accu = pitch_q16; |
650 | uint32_t pitch_step = pitch_shift << 4; |
651 | |
652 | int32_t v4_env[4]; |
653 | int32_t v4_env_step[4]; |
654 | int16_t *v4_dst[4]; |
655 | int16_t v4[4]; |
656 | |
657 | load_u32((uint32_t *)v4_env, voice_ptr + VOICE_ENV_BEGIN, 4); |
658 | load_u32((uint32_t *)v4_env_step, voice_ptr + VOICE_ENV_STEP, 4); |
659 | |
660 | v4_dst[0] = musyx->left; |
661 | v4_dst[1] = musyx->right; |
662 | v4_dst[2] = musyx->cc0; |
663 | v4_dst[3] = musyx->e50; |
664 | |
665 | DebugMessage(M64MSG_VERBOSE, |
666 | "Voice debug: segbase=%d" |
667 | "\tu16_4e=%04x\n" |
668 | "\tpitch: frac0=%04x shift=%04x\n" |
669 | "\tend_point=%04x restart_point=%04x\n" |
670 | "\tenv = %08x %08x %08x %08x\n" |
671 | "\tenv_step = %08x %08x %08x %08x\n", |
672 | segbase, |
673 | u16_4e, |
674 | pitch_q16, pitch_shift, |
675 | end_point, restart_point, |
676 | v4_env[0], v4_env[1], v4_env[2], v4_env[3], |
677 | v4_env_step[0], v4_env_step[1], v4_env_step[2], v4_env_step[3]); |
678 | |
679 | for (i = 0; i < SUBFRAME_SIZE; ++i) { |
680 | /* update sample and resample_lut pointers and then pitch_accu */ |
681 | const int16_t *lut = (int16_t *)(ResampleLUT + ((pitch_accu & 0xfc00) >> 8)); |
682 | int dist; |
683 | int16_t v; |
684 | |
685 | sample += (pitch_accu >> 16); |
686 | pitch_accu &= 0xffff; |
687 | pitch_accu += pitch_step; |
688 | |
689 | /* handle end/restart points */ |
690 | dist = sample - sample_end; |
691 | if (dist >= 0) |
692 | sample = sample_restart + dist; |
693 | |
694 | /* apply resample filter */ |
695 | v = clamp_s16(dot4(sample, lut)); |
696 | |
697 | for (k = 0; k < 4; ++k) { |
698 | /* envmix */ |
699 | int32_t accu = (v * (v4_env[k] >> 16)) >> 15; |
700 | v4[k] = clamp_s16(accu); |
701 | *(v4_dst[k]) = clamp_s16(accu + *(v4_dst[k])); |
702 | |
703 | /* update envelopes and dst pointers */ |
704 | ++(v4_dst[k]); |
705 | v4_env[k] += v4_env_step[k]; |
706 | } |
707 | } |
708 | |
709 | /* save last resampled sample */ |
710 | store_u16((uint16_t *)v4, last_sample_ptr, 4); |
711 | |
712 | DebugMessage(M64MSG_VERBOSE, "last_sample = %04x %04x %04x %04x", |
713 | v4[0], v4[1], v4[2], v4[3]); |
714 | } |
715 | |
716 | |
717 | static void sfx_stage(musyx_t *musyx, uint32_t sfx_ptr, uint16_t idx) |
718 | { |
719 | unsigned int i; |
720 | |
721 | int16_t buffer[SUBFRAME_SIZE + 4]; |
722 | int16_t *subframe = buffer + 4; |
723 | |
724 | uint32_t tap_delays[8]; |
725 | int16_t tap_gains[8]; |
726 | int16_t fir4_hcoeffs[4]; |
727 | |
728 | int16_t delayed[SUBFRAME_SIZE]; |
729 | int dpos, dlength; |
730 | |
731 | const uint32_t pos = idx * SUBFRAME_SIZE; |
732 | |
733 | uint32_t cbuffer_ptr; |
734 | uint32_t cbuffer_length; |
735 | uint16_t tap_count; |
736 | int16_t fir4_hgain; |
737 | |
738 | DebugMessage(M64MSG_VERBOSE, "SFX: %08x, idx=%d", sfx_ptr, idx); |
739 | |
740 | if (sfx_ptr == 0) |
741 | return; |
742 | |
743 | /* load sfx parameters */ |
744 | cbuffer_ptr = *dram_u32(sfx_ptr + SFX_CBUFFER_PTR); |
745 | cbuffer_length = *dram_u32(sfx_ptr + SFX_CBUFFER_LENGTH); |
746 | |
747 | tap_count = *dram_u16(sfx_ptr + SFX_TAP_COUNT); |
748 | |
749 | load_u32(tap_delays, sfx_ptr + SFX_TAP_DELAYS, 8); |
750 | load_u16((uint16_t *)tap_gains, sfx_ptr + SFX_TAP_GAINS, 8); |
751 | |
752 | fir4_hgain = *dram_u16(sfx_ptr + SFX_FIR4_HGAIN); |
753 | load_u16((uint16_t *)fir4_hcoeffs, sfx_ptr + SFX_FIR4_HCOEFFS, 4); |
754 | |
755 | DebugMessage(M64MSG_VERBOSE, "cbuffer: ptr=%08x length=%x", cbuffer_ptr, |
756 | cbuffer_length); |
757 | |
758 | DebugMessage(M64MSG_VERBOSE, "fir4: hgain=%04x hcoeff=%04x %04x %04x %04x", |
759 | fir4_hgain, fir4_hcoeffs[0], fir4_hcoeffs[1], fir4_hcoeffs[2], |
760 | fir4_hcoeffs[3]); |
761 | |
762 | DebugMessage(M64MSG_VERBOSE, |
763 | "tap count=%d\n" |
764 | "delays: %08x %08x %08x %08x %08x %08x %08x %08x\n" |
765 | "gains: %04x %04x %04x %04x %04x %04x %04x %04x", |
766 | tap_count, |
767 | tap_delays[0], tap_delays[1], tap_delays[2], tap_delays[3], |
768 | tap_delays[4], tap_delays[5], tap_delays[6], tap_delays[7], |
769 | tap_gains[0], tap_gains[1], tap_gains[2], tap_gains[3], |
770 | tap_gains[4], tap_gains[5], tap_gains[6], tap_gains[7]); |
771 | |
772 | /* mix up to 8 delayed subframes */ |
773 | memset(subframe, 0, SUBFRAME_SIZE * sizeof(subframe[0])); |
774 | for (i = 0; i < tap_count; ++i) { |
775 | |
776 | dpos = pos - tap_delays[i]; |
777 | if (dpos <= 0) |
778 | dpos += cbuffer_length; |
779 | dlength = SUBFRAME_SIZE; |
780 | |
781 | if (dpos + SUBFRAME_SIZE > cbuffer_length) { |
782 | dlength = cbuffer_length - dpos; |
783 | load_u16((uint16_t *)delayed + dlength, cbuffer_ptr, SUBFRAME_SIZE - dlength); |
784 | } |
785 | |
786 | load_u16((uint16_t *)delayed, cbuffer_ptr + dpos * 2, dlength); |
787 | |
788 | mix_subframes(subframe, delayed, tap_gains[i]); |
789 | } |
790 | |
791 | /* add resulting subframe to L/R subframes */ |
792 | for (i = 0; i < SUBFRAME_SIZE; ++i) { |
793 | int16_t v = subframe[i]; |
794 | musyx->left[i] = clamp_s16(musyx->left[i] + v); |
795 | musyx->right[i] = clamp_s16(musyx->right[i] + v); |
796 | } |
797 | |
798 | /* apply FIR4 filter and writeback filtered result */ |
799 | memcpy(buffer, musyx->subframe_740_last4, 4 * sizeof(int16_t)); |
800 | memcpy(musyx->subframe_740_last4, subframe + SUBFRAME_SIZE - 4, 4 * sizeof(int16_t)); |
801 | mix_fir4(musyx->e50, buffer + 1, fir4_hgain, fir4_hcoeffs); |
802 | store_u16((uint16_t *)musyx->e50, cbuffer_ptr + pos * 2, SUBFRAME_SIZE); |
803 | } |
804 | |
805 | static void mix_subframes(int16_t *y, const int16_t *x, int16_t hgain) |
806 | { |
807 | unsigned int i; |
808 | |
809 | for (i = 0; i < SUBFRAME_SIZE; ++i) { |
810 | int32_t v = (hgain * x[i]) >> 15; |
811 | y[i] = clamp_s16(y[i] + v); |
812 | } |
813 | } |
814 | |
815 | static void mix_fir4(int16_t *y, const int16_t *x, int16_t hgain, const int16_t *hcoeffs) |
816 | { |
817 | unsigned int i; |
818 | int32_t h[4]; |
819 | |
820 | h[0] = (hgain * hcoeffs[0]) >> 15; |
821 | h[1] = (hgain * hcoeffs[1]) >> 15; |
822 | h[2] = (hgain * hcoeffs[2]) >> 15; |
823 | h[3] = (hgain * hcoeffs[3]) >> 15; |
824 | |
825 | for (i = 0; i < SUBFRAME_SIZE; ++i) { |
826 | int32_t v = (h[0] * x[i] + h[1] * x[i + 1] + h[2] * x[i + 2] + h[3] * x[i + 3]) >> 15; |
827 | y[i] = clamp_s16(y[i] + v); |
828 | } |
829 | } |
830 | |
831 | |
832 | static void interleave_stage(musyx_t *musyx, uint32_t output_ptr) |
833 | { |
834 | size_t i; |
835 | |
836 | int16_t base_left; |
837 | int16_t base_right; |
838 | |
839 | int16_t *left; |
840 | int16_t *right; |
841 | uint32_t *dst; |
842 | |
843 | DebugMessage(M64MSG_VERBOSE, "interleave: %08x", output_ptr); |
844 | |
845 | base_left = clamp_s16(musyx->base_vol[0]); |
846 | base_right = clamp_s16(musyx->base_vol[1]); |
847 | |
848 | left = musyx->left; |
849 | right = musyx->right; |
850 | dst = dram_u32(output_ptr); |
851 | |
852 | for (i = 0; i < SUBFRAME_SIZE; ++i) { |
853 | uint16_t l = clamp_s16(*(left++) + base_left); |
854 | uint16_t r = clamp_s16(*(right++) + base_right); |
855 | |
856 | *(dst++) = (l << 16) | r; |
857 | } |
858 | } |