RSP: Added some missing files
[mupen64plus-pandora.git] / source / mupen64plus-rsp-hle / src / musyx.c
CommitLineData
df00ea13 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus-rsp-hle - musyx.c *
3 * Mupen64Plus homepage: http://code.google.com/p/mupen64plus/ *
4 * Copyright (C) 2013 Bobby Smiles *
5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
22#include <stdbool.h>
23#include <stdint.h>
24#include <string.h>
25#include <stddef.h>
26
27#include "m64p_plugin.h"
28#include "m64p_types.h"
29#include "hle.h"
30#include "musyx.h"
31
32/* various constants */
33enum { SUBFRAME_SIZE = 192 };
34enum { MAX_VOICES = 32 };
35
36enum { SAMPLE_BUFFER_SIZE = 0x200 };
37
38
39enum {
40 SFD_VOICE_COUNT = 0x0,
41 SFD_SFX_INDEX = 0x2,
42 SFD_VOICE_BITMASK = 0x4,
43 SFD_STATE_PTR = 0x8,
44 SFD_SFX_PTR = 0xc,
45
46 SFD_VOICES = 0x10
47};
48
49enum {
50 VOICE_ENV_BEGIN = 0x00,
51 VOICE_ENV_STEP = 0x10,
52 VOICE_PITCH_Q16 = 0x20,
53 VOICE_PITCH_SHIFT = 0x22,
54 VOICE_CATSRC_0 = 0x24,
55 VOICE_CATSRC_1 = 0x30,
56 VOICE_ADPCM_FRAMES = 0x3c,
57 VOICE_SKIP_SAMPLES = 0x3e,
58
59 /* for PCM16 */
60 VOICE_U16_40 = 0x40,
61 VOICE_U16_42 = 0x42,
62
63 /* for ADPCM */
64 VOICE_ADPCM_TABLE_PTR = 0x40,
65
66 VOICE_INTERLEAVED_PTR = 0x44,
67 VOICE_END_POINT = 0x48,
68 VOICE_RESTART_POINT = 0x4a,
69 VOICE_U16_4C = 0x4c,
70 VOICE_U16_4E = 0x4e,
71
72 VOICE_SIZE = 0x50
73};
74
75enum {
76 CATSRC_PTR1 = 0x00,
77 CATSRC_PTR2 = 0x04,
78 CATSRC_SIZE1 = 0x08,
79 CATSRC_SIZE2 = 0x0a
80};
81
82enum {
83 STATE_LAST_SAMPLE = 0x0,
84 STATE_BASE_VOL = 0x100,
85 STATE_CC0 = 0x110,
86 STATE_740_LAST4 = 0x290
87};
88
89enum {
90 SFX_CBUFFER_PTR = 0x00,
91 SFX_CBUFFER_LENGTH = 0x04,
92 SFX_TAP_COUNT = 0x08,
93 SFX_FIR4_HGAIN = 0x0a,
94 SFX_TAP_DELAYS = 0x0c,
95 SFX_TAP_GAINS = 0x2c,
96 /* padding = 0x3c */
97 SFX_FIR4_HCOEFFS = 0x40
98};
99
100
101/* struct definition */
102typedef struct {
103 /* internal subframes */
104 int16_t left[SUBFRAME_SIZE];
105 int16_t right[SUBFRAME_SIZE];
106 int16_t cc0[SUBFRAME_SIZE];
107 int16_t e50[SUBFRAME_SIZE];
108
109 /* internal subframes base volumes */
110 int32_t base_vol[4];
111
112 /* */
113 int16_t subframe_740_last4[4];
114} musyx_t;
115
116/* helper functions prototypes */
117static void load_base_vol(int32_t *base_vol, uint32_t address);
118static void save_base_vol(const int32_t *base_vol, uint32_t address);
119static void update_base_vol(int32_t *base_vol, uint32_t voice_mask,
120 uint32_t last_sample_ptr);
121
122static void init_subframes(musyx_t *musyx);
123
124static uint32_t voice_stage(musyx_t *musyx, uint32_t voice_ptr,
125 uint32_t last_sample_ptr);
126
127static void dma_cat8(uint8_t *dst, uint32_t catsrc_ptr);
128static void dma_cat16(uint16_t *dst, uint32_t catsrc_ptr);
129
130static void load_samples_PCM16(uint32_t voice_ptr, int16_t *samples,
131 unsigned *segbase, unsigned *offset);
132static void load_samples_ADPCM(uint32_t voice_ptr, int16_t *samples,
133 unsigned *segbase, unsigned *offset);
134
135static void adpcm_decode_frames(int16_t *dst, const uint8_t *src,
136 const int16_t *table, uint8_t count,
137 uint8_t skip_samples);
138
139static int16_t adpcm_get_predicted_sample(uint8_t byte, uint8_t mask,
140 unsigned lshift, unsigned rshift);
141static void adpcm_get_predicted_frame(int16_t *dst, const uint8_t *src,
142 const uint8_t *nibbles,
143 unsigned int rshift);
144static void adpcm_decode_upto_8_samples(int16_t *dst, const int16_t *src,
145 const int16_t *cb_entry,
146 const int16_t *last_samples,
147 size_t size);
148
149static void mix_voice_samples(musyx_t *musyx, uint32_t voice_ptr,
150 const int16_t *samples, unsigned segbase,
151 unsigned offset, uint32_t last_sample_ptr);
152
153static void sfx_stage(musyx_t *musyx, uint32_t sfx_ptr, uint16_t idx);
154static void mix_subframes(int16_t *y, const int16_t *x, int16_t hgain);
155static void mix_fir4(int16_t *y, const int16_t *x, int16_t hgain, const int16_t *hcoeffs);
156
157
158static void interleave_stage(musyx_t *musyx, uint32_t output_ptr);
159
160
161static uint8_t *dram_u8(uint32_t address);
162static uint16_t *dram_u16(uint32_t address);
163static uint32_t *dram_u32(uint32_t address);
164
165static void load_u8(uint8_t *dst, uint32_t address, size_t count);
166static void load_u16(uint16_t *dst, uint32_t address, size_t count);
167static void load_u32(uint32_t *dst, uint32_t address, size_t count);
168
169static void store_u16(const uint16_t *src, uint32_t address, size_t count);
170
171static inline unsigned int align(unsigned int x, unsigned amount)
172{
173 --amount;
174 return (x + amount) & ~amount;
175}
176
177static int32_t rdot(size_t n, const int16_t *x, const int16_t *y)
178{
179 int32_t accu = 0;
180
181 y += n;
182
183 while (n != 0) {
184 accu += ((int32_t)*(x++) * (int32_t)*(--y));
185 --n;
186 }
187
188 return accu;
189}
190
191
192static int32_t dot4(const int16_t *x, const int16_t *y)
193{
194 size_t i;
195 int32_t accu = 0;
196
197 for (i = 0; i < 4; ++i)
198 accu = clamp_s16(accu + (((int32_t)x[i] * (int32_t)y[i]) >> 15));
199
200 return accu;
201}
202
203/* Fast and dirty way of reading dram memory
204 * Assume properly aligned access
205 */
206static uint8_t *dram_u8(uint32_t address)
207{
208 return (uint8_t *)&rsp.RDRAM[(address & 0xffffff) ^ S8];
209}
210
211static uint16_t *dram_u16(uint32_t address)
212{
213 return (uint16_t *)&rsp.RDRAM[(address & 0xffffff) ^ S16];
214}
215
216static uint32_t *dram_u32(uint32_t address)
217{
218 return (uint32_t *)&rsp.RDRAM[address & 0xffffff];
219}
220
221static void load_u8(uint8_t *dst, uint32_t address, size_t count)
222{
223 while (count != 0) {
224 *(dst++) = *dram_u8(address);
225 address += 1;
226 --count;
227 }
228}
229
230static void load_u16(uint16_t *dst, uint32_t address, size_t count)
231{
232 while (count != 0) {
233 *(dst++) = *dram_u16(address);
234 address += 2;
235 --count;
236 }
237}
238
239static void load_u32(uint32_t *dst, uint32_t address, size_t count)
240{
241 /* Optimization for uint32_t */
242 const uint32_t *src = dram_u32(address);
243
244 memcpy(dst, src, count * sizeof(uint32_t));
245}
246
247static void store_u16(const uint16_t *src, uint32_t address, size_t count)
248{
249 while (count != 0) {
250 *dram_u16(address) = *(src++);
251 address += 2;
252 --count;
253 }
254}
255
256/**************************************************************************
257 * MusyX audio ucode
258 **************************************************************************/
259void musyx_task(void)
260{
261 const OSTask_t *const task = get_task();
262
263 uint32_t sfd_ptr = task->data_ptr;
264 uint32_t sfd_count = task->data_size;
265 uint32_t state_ptr;
266 musyx_t musyx;
267
268 DebugMessage(M64MSG_VERBOSE, "musyx_task: *data=%x, #SF=%d",
269 sfd_ptr,
270 sfd_count);
271
272 state_ptr = *dram_u32(sfd_ptr + SFD_STATE_PTR);
273
274 /* load initial state */
275 load_base_vol(musyx.base_vol, state_ptr + STATE_BASE_VOL);
276 load_u16((uint16_t *)musyx.cc0, state_ptr + STATE_CC0, SUBFRAME_SIZE);
277 load_u16((uint16_t *)musyx.subframe_740_last4, state_ptr + STATE_740_LAST4,
278 4);
279
280 for (;;) {
281 /* parse SFD structure */
282 uint16_t sfx_index = *dram_u16(sfd_ptr + SFD_SFX_INDEX);
283 uint32_t voice_mask = *dram_u32(sfd_ptr + SFD_VOICE_BITMASK);
284 uint32_t sfx_ptr = *dram_u32(sfd_ptr + SFD_SFX_PTR);
285 uint32_t voice_ptr = sfd_ptr + SFD_VOICES;
286 uint32_t last_sample_ptr = state_ptr + STATE_LAST_SAMPLE;
287 uint32_t output_ptr;
288
289 /* initialize internal subframes using updated base volumes */
290 update_base_vol(musyx.base_vol, voice_mask, last_sample_ptr);
291 init_subframes(&musyx);
292
293 /* active voices get mixed into L,R,cc0,e50 subframes (optional) */
294 output_ptr = voice_stage(&musyx, voice_ptr, last_sample_ptr);
295
296 /* apply delay-based effects (optional) */
297 sfx_stage(&musyx, sfx_ptr, sfx_index);
298
299 /* emit interleaved L,R subframes */
300 interleave_stage(&musyx, output_ptr);
301
302 --sfd_count;
303 if (sfd_count == 0)
304 break;
305
306 sfd_ptr += SFD_VOICES + MAX_VOICES * VOICE_SIZE;
307 state_ptr = *dram_u32(sfd_ptr + SFD_STATE_PTR);
308 }
309
310 /* writeback updated state */
311 save_base_vol(musyx.base_vol, state_ptr + STATE_BASE_VOL);
312 store_u16((uint16_t *)musyx.cc0, state_ptr + STATE_CC0, SUBFRAME_SIZE);
313 store_u16((uint16_t *)musyx.subframe_740_last4, state_ptr + STATE_740_LAST4,
314 4);
315}
316
317static void load_base_vol(int32_t *base_vol, uint32_t address)
318{
319 base_vol[0] = ((uint32_t)(*dram_u16(address)) << 16) | (*dram_u16(address + 8));
320 base_vol[1] = ((uint32_t)(*dram_u16(address + 2)) << 16) | (*dram_u16(address + 10));
321 base_vol[2] = ((uint32_t)(*dram_u16(address + 4)) << 16) | (*dram_u16(address + 12));
322 base_vol[3] = ((uint32_t)(*dram_u16(address + 6)) << 16) | (*dram_u16(address + 14));
323}
324
325static void save_base_vol(const int32_t *base_vol, uint32_t address)
326{
327 unsigned k;
328
329 for (k = 0; k < 4; ++k) {
330 *dram_u16(address) = (uint16_t)(base_vol[k] >> 16);
331 address += 2;
332 }
333
334 for (k = 0; k < 4; ++k) {
335 *dram_u16(address) = (uint16_t)(base_vol[k]);
336 address += 2;
337 }
338}
339
340static void update_base_vol(int32_t *base_vol, uint32_t voice_mask,
341 uint32_t last_sample_ptr)
342{
343 unsigned i, k;
344 uint32_t mask;
345
346 DebugMessage(M64MSG_VERBOSE, "base_vol voice_mask = %08x", voice_mask);
347 DebugMessage(M64MSG_VERBOSE, "BEFORE: base_vol = %08x %08x %08x %08x",
348 base_vol[0], base_vol[1], base_vol[2], base_vol[3]);
349
350 /* optim: skip voices contributions entirely if voice_mask is empty */
351 if (voice_mask != 0) {
352 for (i = 0, mask = 1; i < MAX_VOICES;
353 ++i, mask <<= 1, last_sample_ptr += 8) {
354 if ((voice_mask & mask) == 0)
355 continue;
356
357 for (k = 0; k < 4; ++k)
358 base_vol[k] += (int16_t)*dram_u16(last_sample_ptr + k * 2);
359 }
360 }
361
362 /* apply 3% decay */
363 for (k = 0; k < 4; ++k)
364 base_vol[k] = (base_vol[k] * 0x0000f850) >> 16;
365
366 DebugMessage(M64MSG_VERBOSE, "AFTER: base_vol = %08x %08x %08x %08x",
367 base_vol[0], base_vol[1], base_vol[2], base_vol[3]);
368}
369
370static void init_subframes(musyx_t *musyx)
371{
372 unsigned i;
373
374 int16_t base_cc0 = clamp_s16(musyx->base_vol[2]);
375 int16_t base_e50 = clamp_s16(musyx->base_vol[3]);
376
377 int16_t *left = musyx->left;
378 int16_t *right = musyx->right;
379 int16_t *cc0 = musyx->cc0;
380 int16_t *e50 = musyx->e50;
381
382 for (i = 0; i < SUBFRAME_SIZE; ++i) {
383 *(e50++) = base_e50;
384 *(left++) = clamp_s16(*cc0 + base_cc0);
385 *(right++) = clamp_s16(-*cc0 - base_cc0);
386 *(cc0++) = 0;
387 }
388}
389
390/* Process voices, and returns interleaved subframe destination address */
391static uint32_t voice_stage(musyx_t *musyx, uint32_t voice_ptr,
392 uint32_t last_sample_ptr)
393{
394 uint32_t output_ptr;
395 int i = 0;
396
397 /* voice stage can be skipped if first voice has no samples */
398 if (*dram_u16(voice_ptr + VOICE_CATSRC_0 + CATSRC_SIZE1) == 0) {
399 DebugMessage(M64MSG_VERBOSE, "Skipping Voice stage");
400 output_ptr = *dram_u32(voice_ptr + VOICE_INTERLEAVED_PTR);
401 } else {
402 /* otherwise process voices until a non null output_ptr is encountered */
403 for (;;) {
404 /* load voice samples (PCM16 or APDCM) */
405 int16_t samples[SAMPLE_BUFFER_SIZE];
406 unsigned segbase;
407 unsigned offset;
408
409 DebugMessage(M64MSG_VERBOSE, "Processing Voice #%d", i);
410
411 if (*dram_u8(voice_ptr + VOICE_ADPCM_FRAMES) == 0)
412 load_samples_PCM16(voice_ptr, samples, &segbase, &offset);
413 else
414 load_samples_ADPCM(voice_ptr, samples, &segbase, &offset);
415
416 /* mix them with each internal subframes */
417 mix_voice_samples(musyx, voice_ptr, samples, segbase, offset,
418 last_sample_ptr + i * 8);
419
420 /* check break condition */
421 output_ptr = *dram_u32(voice_ptr + VOICE_INTERLEAVED_PTR);
422 if (output_ptr != 0)
423 break;
424
425 /* next voice */
426 ++i;
427 voice_ptr += VOICE_SIZE;
428 }
429 }
430
431 return output_ptr;
432}
433
434static void dma_cat8(uint8_t *dst, uint32_t catsrc_ptr)
435{
436 uint32_t ptr1 = *dram_u32(catsrc_ptr + CATSRC_PTR1);
437 uint32_t ptr2 = *dram_u32(catsrc_ptr + CATSRC_PTR2);
438 uint16_t size1 = *dram_u16(catsrc_ptr + CATSRC_SIZE1);
439 uint16_t size2 = *dram_u16(catsrc_ptr + CATSRC_SIZE2);
440
441 size_t count1 = size1;
442 size_t count2 = size2;
443
444 DebugMessage(M64MSG_VERBOSE, "dma_cat: %08x %08x %04x %04x",
445 ptr1,
446 ptr2,
447 size1,
448 size2);
449
450 load_u8(dst, ptr1, count1);
451
452 if (size2 == 0)
453 return;
454
455 load_u8(dst + count1, ptr2, count2);
456}
457
458static void dma_cat16(uint16_t *dst, uint32_t catsrc_ptr)
459{
460 uint32_t ptr1 = *dram_u32(catsrc_ptr + CATSRC_PTR1);
461 uint32_t ptr2 = *dram_u32(catsrc_ptr + CATSRC_PTR2);
462 uint16_t size1 = *dram_u16(catsrc_ptr + CATSRC_SIZE1);
463 uint16_t size2 = *dram_u16(catsrc_ptr + CATSRC_SIZE2);
464
465 size_t count1 = size1 >> 1;
466 size_t count2 = size2 >> 1;
467
468 DebugMessage(M64MSG_VERBOSE, "dma_cat: %08x %08x %04x %04x",
469 ptr1,
470 ptr2,
471 size1,
472 size2);
473
474 load_u16(dst, ptr1, count1);
475
476 if (size2 == 0)
477 return;
478
479 load_u16(dst + count1, ptr2, count2);
480}
481
482static void load_samples_PCM16(uint32_t voice_ptr, int16_t *samples,
483 unsigned *segbase, unsigned *offset)
484{
485
486 uint8_t u8_3e = *dram_u8(voice_ptr + VOICE_SKIP_SAMPLES);
487 uint16_t u16_40 = *dram_u16(voice_ptr + VOICE_U16_40);
488 uint16_t u16_42 = *dram_u16(voice_ptr + VOICE_U16_42);
489
490 unsigned count = align(u16_40 + u8_3e, 4);
491
492 DebugMessage(M64MSG_VERBOSE, "Format: PCM16");
493
494 *segbase = SAMPLE_BUFFER_SIZE - count;
495 *offset = u8_3e;
496
497 dma_cat16((uint16_t *)samples + *segbase, voice_ptr + VOICE_CATSRC_0);
498
499 if (u16_42 != 0)
500 dma_cat16((uint16_t *)samples, voice_ptr + VOICE_CATSRC_1);
501}
502
503static void load_samples_ADPCM(uint32_t voice_ptr, int16_t *samples,
504 unsigned *segbase, unsigned *offset)
505{
506 /* decompressed samples cannot exceed 0x400 bytes;
507 * ADPCM has a compression ratio of 5/16 */
508 uint8_t buffer[SAMPLE_BUFFER_SIZE * 2 * 5 / 16];
509 int16_t adpcm_table[128];
510
511 uint8_t u8_3c = *dram_u8(voice_ptr + VOICE_ADPCM_FRAMES );
512 uint8_t u8_3d = *dram_u8(voice_ptr + VOICE_ADPCM_FRAMES + 1);
513 uint8_t u8_3e = *dram_u8(voice_ptr + VOICE_SKIP_SAMPLES );
514 uint8_t u8_3f = *dram_u8(voice_ptr + VOICE_SKIP_SAMPLES + 1);
515 uint32_t adpcm_table_ptr = *dram_u32(voice_ptr + VOICE_ADPCM_TABLE_PTR);
516 unsigned count;
517
518 DebugMessage(M64MSG_VERBOSE, "Format: ADPCM");
519
520 DebugMessage(M64MSG_VERBOSE, "Loading ADPCM table: %08x", adpcm_table_ptr);
521 load_u16((uint16_t *)adpcm_table, adpcm_table_ptr, 128);
522
523 count = u8_3c << 5;
524
525 *segbase = SAMPLE_BUFFER_SIZE - count;
526 *offset = u8_3e & 0x1f;
527
528 dma_cat8(buffer, voice_ptr + VOICE_CATSRC_0);
529 adpcm_decode_frames(samples + *segbase, buffer, adpcm_table, u8_3c, u8_3e);
530
531 if (u8_3d != 0) {
532 dma_cat8(buffer, voice_ptr + VOICE_CATSRC_1);
533 adpcm_decode_frames(samples, buffer, adpcm_table, u8_3d, u8_3f);
534 }
535}
536
537static void adpcm_decode_frames(int16_t *dst, const uint8_t *src,
538 const int16_t *table, uint8_t count,
539 uint8_t skip_samples)
540{
541 int16_t frame[32];
542 const uint8_t *nibbles = src + 8;
543 unsigned i;
544 bool jump_gap = false;
545
546 DebugMessage(M64MSG_VERBOSE, "ADPCM decode: count=%d, skip=%d", count,
547 skip_samples);
548
549 if (skip_samples >= 32) {
550 jump_gap = true;
551 nibbles += 16;
552 src += 4;
553 }
554
555 for (i = 0; i < count; ++i) {
556 uint8_t c2 = nibbles[0];
557
558 const int16_t *book = (c2 & 0xf0) + table;
559 unsigned int rshift = (c2 & 0x0f);
560
561 adpcm_get_predicted_frame(frame, src, nibbles, rshift);
562
563 memcpy(dst, frame, 2 * sizeof(frame[0]));
564 adpcm_decode_upto_8_samples(dst + 2, frame + 2, book, dst , 6);
565 adpcm_decode_upto_8_samples(dst + 8, frame + 8, book, dst + 6, 8);
566 adpcm_decode_upto_8_samples(dst + 16, frame + 16, book, dst + 14, 8);
567 adpcm_decode_upto_8_samples(dst + 24, frame + 24, book, dst + 22, 8);
568
569 if (jump_gap) {
570 nibbles += 8;
571 src += 32;
572 }
573
574 jump_gap = !jump_gap;
575 nibbles += 16;
576 src += 4;
577 dst += 32;
578 }
579}
580
581static int16_t adpcm_get_predicted_sample(uint8_t byte, uint8_t mask,
582 unsigned lshift, unsigned rshift)
583{
584 int16_t sample = ((uint16_t)byte & (uint16_t)mask) << lshift;
585 sample >>= rshift; /* signed */
586 return sample;
587}
588
589static void adpcm_get_predicted_frame(int16_t *dst, const uint8_t *src,
590 const uint8_t *nibbles,
591 unsigned int rshift)
592{
593 unsigned int i;
594
595 *(dst++) = (src[0] << 8) | src[1];
596 *(dst++) = (src[2] << 8) | src[3];
597
598 for (i = 1; i < 16; ++i) {
599 uint8_t byte = nibbles[i];
600
601 *(dst++) = adpcm_get_predicted_sample(byte, 0xf0, 8, rshift);
602 *(dst++) = adpcm_get_predicted_sample(byte, 0x0f, 12, rshift);
603 }
604}
605
606static void adpcm_decode_upto_8_samples(int16_t *dst, const int16_t *src,
607 const int16_t *cb_entry,
608 const int16_t *last_samples,
609 size_t size)
610{
611 const int16_t *const book1 = cb_entry;
612 const int16_t *const book2 = cb_entry + 8;
613
614 const int16_t l1 = last_samples[0];
615 const int16_t l2 = last_samples[1];
616
617 size_t i;
618 int32_t accu;
619
620 for (i = 0; i < size; ++i) {
621 accu = (int32_t)src[i] << 11;
622 accu += book1[i] * l1 + book2[i] * l2 + rdot(i, book2, src);
623 dst[i] = clamp_s16(accu >> 11);
624 }
625}
626
627static void mix_voice_samples(musyx_t *musyx, uint32_t voice_ptr,
628 const int16_t *samples, unsigned segbase,
629 unsigned offset, uint32_t last_sample_ptr)
630{
631 int i, k;
632
633 /* parse VOICE structure */
634 const uint16_t pitch_q16 = *dram_u16(voice_ptr + VOICE_PITCH_Q16);
635 const uint16_t pitch_shift = *dram_u16(voice_ptr + VOICE_PITCH_SHIFT); /* Q4.12 */
636
637 const uint16_t end_point = *dram_u16(voice_ptr + VOICE_END_POINT);
638 const uint16_t restart_point = *dram_u16(voice_ptr + VOICE_RESTART_POINT);
639
640 const uint16_t u16_4e = *dram_u16(voice_ptr + VOICE_U16_4E);
641
642 /* init values and pointers */
643 const int16_t *sample = samples + segbase + offset + u16_4e;
644 const int16_t *const sample_end = samples + segbase + end_point;
645 const int16_t *const sample_restart = samples + (restart_point & 0x7fff) +
646 (((restart_point & 0x8000) != 0) ? 0x000 : segbase);
647
648
649 uint32_t pitch_accu = pitch_q16;
650 uint32_t pitch_step = pitch_shift << 4;
651
652 int32_t v4_env[4];
653 int32_t v4_env_step[4];
654 int16_t *v4_dst[4];
655 int16_t v4[4];
656
657 load_u32((uint32_t *)v4_env, voice_ptr + VOICE_ENV_BEGIN, 4);
658 load_u32((uint32_t *)v4_env_step, voice_ptr + VOICE_ENV_STEP, 4);
659
660 v4_dst[0] = musyx->left;
661 v4_dst[1] = musyx->right;
662 v4_dst[2] = musyx->cc0;
663 v4_dst[3] = musyx->e50;
664
665 DebugMessage(M64MSG_VERBOSE,
666 "Voice debug: segbase=%d"
667 "\tu16_4e=%04x\n"
668 "\tpitch: frac0=%04x shift=%04x\n"
669 "\tend_point=%04x restart_point=%04x\n"
670 "\tenv = %08x %08x %08x %08x\n"
671 "\tenv_step = %08x %08x %08x %08x\n",
672 segbase,
673 u16_4e,
674 pitch_q16, pitch_shift,
675 end_point, restart_point,
676 v4_env[0], v4_env[1], v4_env[2], v4_env[3],
677 v4_env_step[0], v4_env_step[1], v4_env_step[2], v4_env_step[3]);
678
679 for (i = 0; i < SUBFRAME_SIZE; ++i) {
680 /* update sample and resample_lut pointers and then pitch_accu */
681 const int16_t *lut = (int16_t *)(ResampleLUT + ((pitch_accu & 0xfc00) >> 8));
682 int dist;
683 int16_t v;
684
685 sample += (pitch_accu >> 16);
686 pitch_accu &= 0xffff;
687 pitch_accu += pitch_step;
688
689 /* handle end/restart points */
690 dist = sample - sample_end;
691 if (dist >= 0)
692 sample = sample_restart + dist;
693
694 /* apply resample filter */
695 v = clamp_s16(dot4(sample, lut));
696
697 for (k = 0; k < 4; ++k) {
698 /* envmix */
699 int32_t accu = (v * (v4_env[k] >> 16)) >> 15;
700 v4[k] = clamp_s16(accu);
701 *(v4_dst[k]) = clamp_s16(accu + *(v4_dst[k]));
702
703 /* update envelopes and dst pointers */
704 ++(v4_dst[k]);
705 v4_env[k] += v4_env_step[k];
706 }
707 }
708
709 /* save last resampled sample */
710 store_u16((uint16_t *)v4, last_sample_ptr, 4);
711
712 DebugMessage(M64MSG_VERBOSE, "last_sample = %04x %04x %04x %04x",
713 v4[0], v4[1], v4[2], v4[3]);
714}
715
716
717static void sfx_stage(musyx_t *musyx, uint32_t sfx_ptr, uint16_t idx)
718{
719 unsigned int i;
720
721 int16_t buffer[SUBFRAME_SIZE + 4];
722 int16_t *subframe = buffer + 4;
723
724 uint32_t tap_delays[8];
725 int16_t tap_gains[8];
726 int16_t fir4_hcoeffs[4];
727
728 int16_t delayed[SUBFRAME_SIZE];
729 int dpos, dlength;
730
731 const uint32_t pos = idx * SUBFRAME_SIZE;
732
733 uint32_t cbuffer_ptr;
734 uint32_t cbuffer_length;
735 uint16_t tap_count;
736 int16_t fir4_hgain;
737
738 DebugMessage(M64MSG_VERBOSE, "SFX: %08x, idx=%d", sfx_ptr, idx);
739
740 if (sfx_ptr == 0)
741 return;
742
743 /* load sfx parameters */
744 cbuffer_ptr = *dram_u32(sfx_ptr + SFX_CBUFFER_PTR);
745 cbuffer_length = *dram_u32(sfx_ptr + SFX_CBUFFER_LENGTH);
746
747 tap_count = *dram_u16(sfx_ptr + SFX_TAP_COUNT);
748
749 load_u32(tap_delays, sfx_ptr + SFX_TAP_DELAYS, 8);
750 load_u16((uint16_t *)tap_gains, sfx_ptr + SFX_TAP_GAINS, 8);
751
752 fir4_hgain = *dram_u16(sfx_ptr + SFX_FIR4_HGAIN);
753 load_u16((uint16_t *)fir4_hcoeffs, sfx_ptr + SFX_FIR4_HCOEFFS, 4);
754
755 DebugMessage(M64MSG_VERBOSE, "cbuffer: ptr=%08x length=%x", cbuffer_ptr,
756 cbuffer_length);
757
758 DebugMessage(M64MSG_VERBOSE, "fir4: hgain=%04x hcoeff=%04x %04x %04x %04x",
759 fir4_hgain, fir4_hcoeffs[0], fir4_hcoeffs[1], fir4_hcoeffs[2],
760 fir4_hcoeffs[3]);
761
762 DebugMessage(M64MSG_VERBOSE,
763 "tap count=%d\n"
764 "delays: %08x %08x %08x %08x %08x %08x %08x %08x\n"
765 "gains: %04x %04x %04x %04x %04x %04x %04x %04x",
766 tap_count,
767 tap_delays[0], tap_delays[1], tap_delays[2], tap_delays[3],
768 tap_delays[4], tap_delays[5], tap_delays[6], tap_delays[7],
769 tap_gains[0], tap_gains[1], tap_gains[2], tap_gains[3],
770 tap_gains[4], tap_gains[5], tap_gains[6], tap_gains[7]);
771
772 /* mix up to 8 delayed subframes */
773 memset(subframe, 0, SUBFRAME_SIZE * sizeof(subframe[0]));
774 for (i = 0; i < tap_count; ++i) {
775
776 dpos = pos - tap_delays[i];
777 if (dpos <= 0)
778 dpos += cbuffer_length;
779 dlength = SUBFRAME_SIZE;
780
781 if (dpos + SUBFRAME_SIZE > cbuffer_length) {
782 dlength = cbuffer_length - dpos;
783 load_u16((uint16_t *)delayed + dlength, cbuffer_ptr, SUBFRAME_SIZE - dlength);
784 }
785
786 load_u16((uint16_t *)delayed, cbuffer_ptr + dpos * 2, dlength);
787
788 mix_subframes(subframe, delayed, tap_gains[i]);
789 }
790
791 /* add resulting subframe to L/R subframes */
792 for (i = 0; i < SUBFRAME_SIZE; ++i) {
793 int16_t v = subframe[i];
794 musyx->left[i] = clamp_s16(musyx->left[i] + v);
795 musyx->right[i] = clamp_s16(musyx->right[i] + v);
796 }
797
798 /* apply FIR4 filter and writeback filtered result */
799 memcpy(buffer, musyx->subframe_740_last4, 4 * sizeof(int16_t));
800 memcpy(musyx->subframe_740_last4, subframe + SUBFRAME_SIZE - 4, 4 * sizeof(int16_t));
801 mix_fir4(musyx->e50, buffer + 1, fir4_hgain, fir4_hcoeffs);
802 store_u16((uint16_t *)musyx->e50, cbuffer_ptr + pos * 2, SUBFRAME_SIZE);
803}
804
805static void mix_subframes(int16_t *y, const int16_t *x, int16_t hgain)
806{
807 unsigned int i;
808
809 for (i = 0; i < SUBFRAME_SIZE; ++i) {
810 int32_t v = (hgain * x[i]) >> 15;
811 y[i] = clamp_s16(y[i] + v);
812 }
813}
814
815static void mix_fir4(int16_t *y, const int16_t *x, int16_t hgain, const int16_t *hcoeffs)
816{
817 unsigned int i;
818 int32_t h[4];
819
820 h[0] = (hgain * hcoeffs[0]) >> 15;
821 h[1] = (hgain * hcoeffs[1]) >> 15;
822 h[2] = (hgain * hcoeffs[2]) >> 15;
823 h[3] = (hgain * hcoeffs[3]) >> 15;
824
825 for (i = 0; i < SUBFRAME_SIZE; ++i) {
826 int32_t v = (h[0] * x[i] + h[1] * x[i + 1] + h[2] * x[i + 2] + h[3] * x[i + 3]) >> 15;
827 y[i] = clamp_s16(y[i] + v);
828 }
829}
830
831
832static void interleave_stage(musyx_t *musyx, uint32_t output_ptr)
833{
834 size_t i;
835
836 int16_t base_left;
837 int16_t base_right;
838
839 int16_t *left;
840 int16_t *right;
841 uint32_t *dst;
842
843 DebugMessage(M64MSG_VERBOSE, "interleave: %08x", output_ptr);
844
845 base_left = clamp_s16(musyx->base_vol[0]);
846 base_right = clamp_s16(musyx->base_vol[1]);
847
848 left = musyx->left;
849 right = musyx->right;
850 dst = dram_u32(output_ptr);
851
852 for (i = 0; i < SUBFRAME_SIZE; ++i) {
853 uint16_t l = clamp_s16(*(left++) + base_left);
854 uint16_t r = clamp_s16(*(right++) + base_right);
855
856 *(dst++) = (l << 16) | r;
857 }
858}