| 1 | /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * |
| 2 | * Mupen64plus-rsp-hle - musyx.c * |
| 3 | * Mupen64Plus homepage: http://code.google.com/p/mupen64plus/ * |
| 4 | * Copyright (C) 2013 Bobby Smiles * |
| 5 | * * |
| 6 | * This program is free software; you can redistribute it and/or modify * |
| 7 | * it under the terms of the GNU General Public License as published by * |
| 8 | * the Free Software Foundation; either version 2 of the License, or * |
| 9 | * (at your option) any later version. * |
| 10 | * * |
| 11 | * This program is distributed in the hope that it will be useful, * |
| 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of * |
| 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * |
| 14 | * GNU General Public License for more details. * |
| 15 | * * |
| 16 | * You should have received a copy of the GNU General Public License * |
| 17 | * along with this program; if not, write to the * |
| 18 | * Free Software Foundation, Inc., * |
| 19 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * |
| 20 | * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ |
| 21 | |
| 22 | #include <stdbool.h> |
| 23 | #include <stdint.h> |
| 24 | #include <string.h> |
| 25 | #include <stddef.h> |
| 26 | |
| 27 | #include "m64p_plugin.h" |
| 28 | #include "m64p_types.h" |
| 29 | #include "hle.h" |
| 30 | #include "musyx.h" |
| 31 | |
| 32 | /* various constants */ |
| 33 | enum { SUBFRAME_SIZE = 192 }; |
| 34 | enum { MAX_VOICES = 32 }; |
| 35 | |
| 36 | enum { SAMPLE_BUFFER_SIZE = 0x200 }; |
| 37 | |
| 38 | |
| 39 | enum { |
| 40 | SFD_VOICE_COUNT = 0x0, |
| 41 | SFD_SFX_INDEX = 0x2, |
| 42 | SFD_VOICE_BITMASK = 0x4, |
| 43 | SFD_STATE_PTR = 0x8, |
| 44 | SFD_SFX_PTR = 0xc, |
| 45 | |
| 46 | SFD_VOICES = 0x10 |
| 47 | }; |
| 48 | |
| 49 | enum { |
| 50 | VOICE_ENV_BEGIN = 0x00, |
| 51 | VOICE_ENV_STEP = 0x10, |
| 52 | VOICE_PITCH_Q16 = 0x20, |
| 53 | VOICE_PITCH_SHIFT = 0x22, |
| 54 | VOICE_CATSRC_0 = 0x24, |
| 55 | VOICE_CATSRC_1 = 0x30, |
| 56 | VOICE_ADPCM_FRAMES = 0x3c, |
| 57 | VOICE_SKIP_SAMPLES = 0x3e, |
| 58 | |
| 59 | /* for PCM16 */ |
| 60 | VOICE_U16_40 = 0x40, |
| 61 | VOICE_U16_42 = 0x42, |
| 62 | |
| 63 | /* for ADPCM */ |
| 64 | VOICE_ADPCM_TABLE_PTR = 0x40, |
| 65 | |
| 66 | VOICE_INTERLEAVED_PTR = 0x44, |
| 67 | VOICE_END_POINT = 0x48, |
| 68 | VOICE_RESTART_POINT = 0x4a, |
| 69 | VOICE_U16_4C = 0x4c, |
| 70 | VOICE_U16_4E = 0x4e, |
| 71 | |
| 72 | VOICE_SIZE = 0x50 |
| 73 | }; |
| 74 | |
| 75 | enum { |
| 76 | CATSRC_PTR1 = 0x00, |
| 77 | CATSRC_PTR2 = 0x04, |
| 78 | CATSRC_SIZE1 = 0x08, |
| 79 | CATSRC_SIZE2 = 0x0a |
| 80 | }; |
| 81 | |
| 82 | enum { |
| 83 | STATE_LAST_SAMPLE = 0x0, |
| 84 | STATE_BASE_VOL = 0x100, |
| 85 | STATE_CC0 = 0x110, |
| 86 | STATE_740_LAST4 = 0x290 |
| 87 | }; |
| 88 | |
| 89 | enum { |
| 90 | SFX_CBUFFER_PTR = 0x00, |
| 91 | SFX_CBUFFER_LENGTH = 0x04, |
| 92 | SFX_TAP_COUNT = 0x08, |
| 93 | SFX_FIR4_HGAIN = 0x0a, |
| 94 | SFX_TAP_DELAYS = 0x0c, |
| 95 | SFX_TAP_GAINS = 0x2c, |
| 96 | /* padding = 0x3c */ |
| 97 | SFX_FIR4_HCOEFFS = 0x40 |
| 98 | }; |
| 99 | |
| 100 | |
| 101 | /* struct definition */ |
| 102 | typedef struct { |
| 103 | /* internal subframes */ |
| 104 | int16_t left[SUBFRAME_SIZE]; |
| 105 | int16_t right[SUBFRAME_SIZE]; |
| 106 | int16_t cc0[SUBFRAME_SIZE]; |
| 107 | int16_t e50[SUBFRAME_SIZE]; |
| 108 | |
| 109 | /* internal subframes base volumes */ |
| 110 | int32_t base_vol[4]; |
| 111 | |
| 112 | /* */ |
| 113 | int16_t subframe_740_last4[4]; |
| 114 | } musyx_t; |
| 115 | |
| 116 | /* helper functions prototypes */ |
| 117 | static void load_base_vol(int32_t *base_vol, uint32_t address); |
| 118 | static void save_base_vol(const int32_t *base_vol, uint32_t address); |
| 119 | static void update_base_vol(int32_t *base_vol, uint32_t voice_mask, |
| 120 | uint32_t last_sample_ptr); |
| 121 | |
| 122 | static void init_subframes(musyx_t *musyx); |
| 123 | |
| 124 | static uint32_t voice_stage(musyx_t *musyx, uint32_t voice_ptr, |
| 125 | uint32_t last_sample_ptr); |
| 126 | |
| 127 | static void dma_cat8(uint8_t *dst, uint32_t catsrc_ptr); |
| 128 | static void dma_cat16(uint16_t *dst, uint32_t catsrc_ptr); |
| 129 | |
| 130 | static void load_samples_PCM16(uint32_t voice_ptr, int16_t *samples, |
| 131 | unsigned *segbase, unsigned *offset); |
| 132 | static void load_samples_ADPCM(uint32_t voice_ptr, int16_t *samples, |
| 133 | unsigned *segbase, unsigned *offset); |
| 134 | |
| 135 | static void adpcm_decode_frames(int16_t *dst, const uint8_t *src, |
| 136 | const int16_t *table, uint8_t count, |
| 137 | uint8_t skip_samples); |
| 138 | |
| 139 | static int16_t adpcm_get_predicted_sample(uint8_t byte, uint8_t mask, |
| 140 | unsigned lshift, unsigned rshift); |
| 141 | static void adpcm_get_predicted_frame(int16_t *dst, const uint8_t *src, |
| 142 | const uint8_t *nibbles, |
| 143 | unsigned int rshift); |
| 144 | static void adpcm_decode_upto_8_samples(int16_t *dst, const int16_t *src, |
| 145 | const int16_t *cb_entry, |
| 146 | const int16_t *last_samples, |
| 147 | size_t size); |
| 148 | |
| 149 | static void mix_voice_samples(musyx_t *musyx, uint32_t voice_ptr, |
| 150 | const int16_t *samples, unsigned segbase, |
| 151 | unsigned offset, uint32_t last_sample_ptr); |
| 152 | |
| 153 | static void sfx_stage(musyx_t *musyx, uint32_t sfx_ptr, uint16_t idx); |
| 154 | static void mix_subframes(int16_t *y, const int16_t *x, int16_t hgain); |
| 155 | static void mix_fir4(int16_t *y, const int16_t *x, int16_t hgain, const int16_t *hcoeffs); |
| 156 | |
| 157 | |
| 158 | static void interleave_stage(musyx_t *musyx, uint32_t output_ptr); |
| 159 | |
| 160 | |
| 161 | static uint8_t *dram_u8(uint32_t address); |
| 162 | static uint16_t *dram_u16(uint32_t address); |
| 163 | static uint32_t *dram_u32(uint32_t address); |
| 164 | |
| 165 | static void load_u8(uint8_t *dst, uint32_t address, size_t count); |
| 166 | static void load_u16(uint16_t *dst, uint32_t address, size_t count); |
| 167 | static void load_u32(uint32_t *dst, uint32_t address, size_t count); |
| 168 | |
| 169 | static void store_u16(const uint16_t *src, uint32_t address, size_t count); |
| 170 | |
| 171 | static inline unsigned int align(unsigned int x, unsigned amount) |
| 172 | { |
| 173 | --amount; |
| 174 | return (x + amount) & ~amount; |
| 175 | } |
| 176 | |
| 177 | static int32_t rdot(size_t n, const int16_t *x, const int16_t *y) |
| 178 | { |
| 179 | int32_t accu = 0; |
| 180 | |
| 181 | y += n; |
| 182 | |
| 183 | while (n != 0) { |
| 184 | accu += ((int32_t)*(x++) * (int32_t)*(--y)); |
| 185 | --n; |
| 186 | } |
| 187 | |
| 188 | return accu; |
| 189 | } |
| 190 | |
| 191 | |
| 192 | static int32_t dot4(const int16_t *x, const int16_t *y) |
| 193 | { |
| 194 | size_t i; |
| 195 | int32_t accu = 0; |
| 196 | |
| 197 | for (i = 0; i < 4; ++i) |
| 198 | accu = clamp_s16(accu + (((int32_t)x[i] * (int32_t)y[i]) >> 15)); |
| 199 | |
| 200 | return accu; |
| 201 | } |
| 202 | |
| 203 | /* Fast and dirty way of reading dram memory |
| 204 | * Assume properly aligned access |
| 205 | */ |
| 206 | static uint8_t *dram_u8(uint32_t address) |
| 207 | { |
| 208 | return (uint8_t *)&rsp.RDRAM[(address & 0xffffff) ^ S8]; |
| 209 | } |
| 210 | |
| 211 | static uint16_t *dram_u16(uint32_t address) |
| 212 | { |
| 213 | return (uint16_t *)&rsp.RDRAM[(address & 0xffffff) ^ S16]; |
| 214 | } |
| 215 | |
| 216 | static uint32_t *dram_u32(uint32_t address) |
| 217 | { |
| 218 | return (uint32_t *)&rsp.RDRAM[address & 0xffffff]; |
| 219 | } |
| 220 | |
| 221 | static void load_u8(uint8_t *dst, uint32_t address, size_t count) |
| 222 | { |
| 223 | while (count != 0) { |
| 224 | *(dst++) = *dram_u8(address); |
| 225 | address += 1; |
| 226 | --count; |
| 227 | } |
| 228 | } |
| 229 | |
| 230 | static void load_u16(uint16_t *dst, uint32_t address, size_t count) |
| 231 | { |
| 232 | while (count != 0) { |
| 233 | *(dst++) = *dram_u16(address); |
| 234 | address += 2; |
| 235 | --count; |
| 236 | } |
| 237 | } |
| 238 | |
| 239 | static void load_u32(uint32_t *dst, uint32_t address, size_t count) |
| 240 | { |
| 241 | /* Optimization for uint32_t */ |
| 242 | const uint32_t *src = dram_u32(address); |
| 243 | |
| 244 | memcpy(dst, src, count * sizeof(uint32_t)); |
| 245 | } |
| 246 | |
| 247 | static void store_u16(const uint16_t *src, uint32_t address, size_t count) |
| 248 | { |
| 249 | while (count != 0) { |
| 250 | *dram_u16(address) = *(src++); |
| 251 | address += 2; |
| 252 | --count; |
| 253 | } |
| 254 | } |
| 255 | |
| 256 | /************************************************************************** |
| 257 | * MusyX audio ucode |
| 258 | **************************************************************************/ |
| 259 | void musyx_task(void) |
| 260 | { |
| 261 | const OSTask_t *const task = get_task(); |
| 262 | |
| 263 | uint32_t sfd_ptr = task->data_ptr; |
| 264 | uint32_t sfd_count = task->data_size; |
| 265 | uint32_t state_ptr; |
| 266 | musyx_t musyx; |
| 267 | |
| 268 | DebugMessage(M64MSG_VERBOSE, "musyx_task: *data=%x, #SF=%d", |
| 269 | sfd_ptr, |
| 270 | sfd_count); |
| 271 | |
| 272 | state_ptr = *dram_u32(sfd_ptr + SFD_STATE_PTR); |
| 273 | |
| 274 | /* load initial state */ |
| 275 | load_base_vol(musyx.base_vol, state_ptr + STATE_BASE_VOL); |
| 276 | load_u16((uint16_t *)musyx.cc0, state_ptr + STATE_CC0, SUBFRAME_SIZE); |
| 277 | load_u16((uint16_t *)musyx.subframe_740_last4, state_ptr + STATE_740_LAST4, |
| 278 | 4); |
| 279 | |
| 280 | for (;;) { |
| 281 | /* parse SFD structure */ |
| 282 | uint16_t sfx_index = *dram_u16(sfd_ptr + SFD_SFX_INDEX); |
| 283 | uint32_t voice_mask = *dram_u32(sfd_ptr + SFD_VOICE_BITMASK); |
| 284 | uint32_t sfx_ptr = *dram_u32(sfd_ptr + SFD_SFX_PTR); |
| 285 | uint32_t voice_ptr = sfd_ptr + SFD_VOICES; |
| 286 | uint32_t last_sample_ptr = state_ptr + STATE_LAST_SAMPLE; |
| 287 | uint32_t output_ptr; |
| 288 | |
| 289 | /* initialize internal subframes using updated base volumes */ |
| 290 | update_base_vol(musyx.base_vol, voice_mask, last_sample_ptr); |
| 291 | init_subframes(&musyx); |
| 292 | |
| 293 | /* active voices get mixed into L,R,cc0,e50 subframes (optional) */ |
| 294 | output_ptr = voice_stage(&musyx, voice_ptr, last_sample_ptr); |
| 295 | |
| 296 | /* apply delay-based effects (optional) */ |
| 297 | sfx_stage(&musyx, sfx_ptr, sfx_index); |
| 298 | |
| 299 | /* emit interleaved L,R subframes */ |
| 300 | interleave_stage(&musyx, output_ptr); |
| 301 | |
| 302 | --sfd_count; |
| 303 | if (sfd_count == 0) |
| 304 | break; |
| 305 | |
| 306 | sfd_ptr += SFD_VOICES + MAX_VOICES * VOICE_SIZE; |
| 307 | state_ptr = *dram_u32(sfd_ptr + SFD_STATE_PTR); |
| 308 | } |
| 309 | |
| 310 | /* writeback updated state */ |
| 311 | save_base_vol(musyx.base_vol, state_ptr + STATE_BASE_VOL); |
| 312 | store_u16((uint16_t *)musyx.cc0, state_ptr + STATE_CC0, SUBFRAME_SIZE); |
| 313 | store_u16((uint16_t *)musyx.subframe_740_last4, state_ptr + STATE_740_LAST4, |
| 314 | 4); |
| 315 | } |
| 316 | |
| 317 | static void load_base_vol(int32_t *base_vol, uint32_t address) |
| 318 | { |
| 319 | base_vol[0] = ((uint32_t)(*dram_u16(address)) << 16) | (*dram_u16(address + 8)); |
| 320 | base_vol[1] = ((uint32_t)(*dram_u16(address + 2)) << 16) | (*dram_u16(address + 10)); |
| 321 | base_vol[2] = ((uint32_t)(*dram_u16(address + 4)) << 16) | (*dram_u16(address + 12)); |
| 322 | base_vol[3] = ((uint32_t)(*dram_u16(address + 6)) << 16) | (*dram_u16(address + 14)); |
| 323 | } |
| 324 | |
| 325 | static void save_base_vol(const int32_t *base_vol, uint32_t address) |
| 326 | { |
| 327 | unsigned k; |
| 328 | |
| 329 | for (k = 0; k < 4; ++k) { |
| 330 | *dram_u16(address) = (uint16_t)(base_vol[k] >> 16); |
| 331 | address += 2; |
| 332 | } |
| 333 | |
| 334 | for (k = 0; k < 4; ++k) { |
| 335 | *dram_u16(address) = (uint16_t)(base_vol[k]); |
| 336 | address += 2; |
| 337 | } |
| 338 | } |
| 339 | |
| 340 | static void update_base_vol(int32_t *base_vol, uint32_t voice_mask, |
| 341 | uint32_t last_sample_ptr) |
| 342 | { |
| 343 | unsigned i, k; |
| 344 | uint32_t mask; |
| 345 | |
| 346 | DebugMessage(M64MSG_VERBOSE, "base_vol voice_mask = %08x", voice_mask); |
| 347 | DebugMessage(M64MSG_VERBOSE, "BEFORE: base_vol = %08x %08x %08x %08x", |
| 348 | base_vol[0], base_vol[1], base_vol[2], base_vol[3]); |
| 349 | |
| 350 | /* optim: skip voices contributions entirely if voice_mask is empty */ |
| 351 | if (voice_mask != 0) { |
| 352 | for (i = 0, mask = 1; i < MAX_VOICES; |
| 353 | ++i, mask <<= 1, last_sample_ptr += 8) { |
| 354 | if ((voice_mask & mask) == 0) |
| 355 | continue; |
| 356 | |
| 357 | for (k = 0; k < 4; ++k) |
| 358 | base_vol[k] += (int16_t)*dram_u16(last_sample_ptr + k * 2); |
| 359 | } |
| 360 | } |
| 361 | |
| 362 | /* apply 3% decay */ |
| 363 | for (k = 0; k < 4; ++k) |
| 364 | base_vol[k] = (base_vol[k] * 0x0000f850) >> 16; |
| 365 | |
| 366 | DebugMessage(M64MSG_VERBOSE, "AFTER: base_vol = %08x %08x %08x %08x", |
| 367 | base_vol[0], base_vol[1], base_vol[2], base_vol[3]); |
| 368 | } |
| 369 | |
| 370 | static void init_subframes(musyx_t *musyx) |
| 371 | { |
| 372 | unsigned i; |
| 373 | |
| 374 | int16_t base_cc0 = clamp_s16(musyx->base_vol[2]); |
| 375 | int16_t base_e50 = clamp_s16(musyx->base_vol[3]); |
| 376 | |
| 377 | int16_t *left = musyx->left; |
| 378 | int16_t *right = musyx->right; |
| 379 | int16_t *cc0 = musyx->cc0; |
| 380 | int16_t *e50 = musyx->e50; |
| 381 | |
| 382 | for (i = 0; i < SUBFRAME_SIZE; ++i) { |
| 383 | *(e50++) = base_e50; |
| 384 | *(left++) = clamp_s16(*cc0 + base_cc0); |
| 385 | *(right++) = clamp_s16(-*cc0 - base_cc0); |
| 386 | *(cc0++) = 0; |
| 387 | } |
| 388 | } |
| 389 | |
| 390 | /* Process voices, and returns interleaved subframe destination address */ |
| 391 | static uint32_t voice_stage(musyx_t *musyx, uint32_t voice_ptr, |
| 392 | uint32_t last_sample_ptr) |
| 393 | { |
| 394 | uint32_t output_ptr; |
| 395 | int i = 0; |
| 396 | |
| 397 | /* voice stage can be skipped if first voice has no samples */ |
| 398 | if (*dram_u16(voice_ptr + VOICE_CATSRC_0 + CATSRC_SIZE1) == 0) { |
| 399 | DebugMessage(M64MSG_VERBOSE, "Skipping Voice stage"); |
| 400 | output_ptr = *dram_u32(voice_ptr + VOICE_INTERLEAVED_PTR); |
| 401 | } else { |
| 402 | /* otherwise process voices until a non null output_ptr is encountered */ |
| 403 | for (;;) { |
| 404 | /* load voice samples (PCM16 or APDCM) */ |
| 405 | int16_t samples[SAMPLE_BUFFER_SIZE]; |
| 406 | unsigned segbase; |
| 407 | unsigned offset; |
| 408 | |
| 409 | DebugMessage(M64MSG_VERBOSE, "Processing Voice #%d", i); |
| 410 | |
| 411 | if (*dram_u8(voice_ptr + VOICE_ADPCM_FRAMES) == 0) |
| 412 | load_samples_PCM16(voice_ptr, samples, &segbase, &offset); |
| 413 | else |
| 414 | load_samples_ADPCM(voice_ptr, samples, &segbase, &offset); |
| 415 | |
| 416 | /* mix them with each internal subframes */ |
| 417 | mix_voice_samples(musyx, voice_ptr, samples, segbase, offset, |
| 418 | last_sample_ptr + i * 8); |
| 419 | |
| 420 | /* check break condition */ |
| 421 | output_ptr = *dram_u32(voice_ptr + VOICE_INTERLEAVED_PTR); |
| 422 | if (output_ptr != 0) |
| 423 | break; |
| 424 | |
| 425 | /* next voice */ |
| 426 | ++i; |
| 427 | voice_ptr += VOICE_SIZE; |
| 428 | } |
| 429 | } |
| 430 | |
| 431 | return output_ptr; |
| 432 | } |
| 433 | |
| 434 | static void dma_cat8(uint8_t *dst, uint32_t catsrc_ptr) |
| 435 | { |
| 436 | uint32_t ptr1 = *dram_u32(catsrc_ptr + CATSRC_PTR1); |
| 437 | uint32_t ptr2 = *dram_u32(catsrc_ptr + CATSRC_PTR2); |
| 438 | uint16_t size1 = *dram_u16(catsrc_ptr + CATSRC_SIZE1); |
| 439 | uint16_t size2 = *dram_u16(catsrc_ptr + CATSRC_SIZE2); |
| 440 | |
| 441 | size_t count1 = size1; |
| 442 | size_t count2 = size2; |
| 443 | |
| 444 | DebugMessage(M64MSG_VERBOSE, "dma_cat: %08x %08x %04x %04x", |
| 445 | ptr1, |
| 446 | ptr2, |
| 447 | size1, |
| 448 | size2); |
| 449 | |
| 450 | load_u8(dst, ptr1, count1); |
| 451 | |
| 452 | if (size2 == 0) |
| 453 | return; |
| 454 | |
| 455 | load_u8(dst + count1, ptr2, count2); |
| 456 | } |
| 457 | |
| 458 | static void dma_cat16(uint16_t *dst, uint32_t catsrc_ptr) |
| 459 | { |
| 460 | uint32_t ptr1 = *dram_u32(catsrc_ptr + CATSRC_PTR1); |
| 461 | uint32_t ptr2 = *dram_u32(catsrc_ptr + CATSRC_PTR2); |
| 462 | uint16_t size1 = *dram_u16(catsrc_ptr + CATSRC_SIZE1); |
| 463 | uint16_t size2 = *dram_u16(catsrc_ptr + CATSRC_SIZE2); |
| 464 | |
| 465 | size_t count1 = size1 >> 1; |
| 466 | size_t count2 = size2 >> 1; |
| 467 | |
| 468 | DebugMessage(M64MSG_VERBOSE, "dma_cat: %08x %08x %04x %04x", |
| 469 | ptr1, |
| 470 | ptr2, |
| 471 | size1, |
| 472 | size2); |
| 473 | |
| 474 | load_u16(dst, ptr1, count1); |
| 475 | |
| 476 | if (size2 == 0) |
| 477 | return; |
| 478 | |
| 479 | load_u16(dst + count1, ptr2, count2); |
| 480 | } |
| 481 | |
| 482 | static void load_samples_PCM16(uint32_t voice_ptr, int16_t *samples, |
| 483 | unsigned *segbase, unsigned *offset) |
| 484 | { |
| 485 | |
| 486 | uint8_t u8_3e = *dram_u8(voice_ptr + VOICE_SKIP_SAMPLES); |
| 487 | uint16_t u16_40 = *dram_u16(voice_ptr + VOICE_U16_40); |
| 488 | uint16_t u16_42 = *dram_u16(voice_ptr + VOICE_U16_42); |
| 489 | |
| 490 | unsigned count = align(u16_40 + u8_3e, 4); |
| 491 | |
| 492 | DebugMessage(M64MSG_VERBOSE, "Format: PCM16"); |
| 493 | |
| 494 | *segbase = SAMPLE_BUFFER_SIZE - count; |
| 495 | *offset = u8_3e; |
| 496 | |
| 497 | dma_cat16((uint16_t *)samples + *segbase, voice_ptr + VOICE_CATSRC_0); |
| 498 | |
| 499 | if (u16_42 != 0) |
| 500 | dma_cat16((uint16_t *)samples, voice_ptr + VOICE_CATSRC_1); |
| 501 | } |
| 502 | |
| 503 | static void load_samples_ADPCM(uint32_t voice_ptr, int16_t *samples, |
| 504 | unsigned *segbase, unsigned *offset) |
| 505 | { |
| 506 | /* decompressed samples cannot exceed 0x400 bytes; |
| 507 | * ADPCM has a compression ratio of 5/16 */ |
| 508 | uint8_t buffer[SAMPLE_BUFFER_SIZE * 2 * 5 / 16]; |
| 509 | int16_t adpcm_table[128]; |
| 510 | |
| 511 | uint8_t u8_3c = *dram_u8(voice_ptr + VOICE_ADPCM_FRAMES ); |
| 512 | uint8_t u8_3d = *dram_u8(voice_ptr + VOICE_ADPCM_FRAMES + 1); |
| 513 | uint8_t u8_3e = *dram_u8(voice_ptr + VOICE_SKIP_SAMPLES ); |
| 514 | uint8_t u8_3f = *dram_u8(voice_ptr + VOICE_SKIP_SAMPLES + 1); |
| 515 | uint32_t adpcm_table_ptr = *dram_u32(voice_ptr + VOICE_ADPCM_TABLE_PTR); |
| 516 | unsigned count; |
| 517 | |
| 518 | DebugMessage(M64MSG_VERBOSE, "Format: ADPCM"); |
| 519 | |
| 520 | DebugMessage(M64MSG_VERBOSE, "Loading ADPCM table: %08x", adpcm_table_ptr); |
| 521 | load_u16((uint16_t *)adpcm_table, adpcm_table_ptr, 128); |
| 522 | |
| 523 | count = u8_3c << 5; |
| 524 | |
| 525 | *segbase = SAMPLE_BUFFER_SIZE - count; |
| 526 | *offset = u8_3e & 0x1f; |
| 527 | |
| 528 | dma_cat8(buffer, voice_ptr + VOICE_CATSRC_0); |
| 529 | adpcm_decode_frames(samples + *segbase, buffer, adpcm_table, u8_3c, u8_3e); |
| 530 | |
| 531 | if (u8_3d != 0) { |
| 532 | dma_cat8(buffer, voice_ptr + VOICE_CATSRC_1); |
| 533 | adpcm_decode_frames(samples, buffer, adpcm_table, u8_3d, u8_3f); |
| 534 | } |
| 535 | } |
| 536 | |
| 537 | static void adpcm_decode_frames(int16_t *dst, const uint8_t *src, |
| 538 | const int16_t *table, uint8_t count, |
| 539 | uint8_t skip_samples) |
| 540 | { |
| 541 | int16_t frame[32]; |
| 542 | const uint8_t *nibbles = src + 8; |
| 543 | unsigned i; |
| 544 | bool jump_gap = false; |
| 545 | |
| 546 | DebugMessage(M64MSG_VERBOSE, "ADPCM decode: count=%d, skip=%d", count, |
| 547 | skip_samples); |
| 548 | |
| 549 | if (skip_samples >= 32) { |
| 550 | jump_gap = true; |
| 551 | nibbles += 16; |
| 552 | src += 4; |
| 553 | } |
| 554 | |
| 555 | for (i = 0; i < count; ++i) { |
| 556 | uint8_t c2 = nibbles[0]; |
| 557 | |
| 558 | const int16_t *book = (c2 & 0xf0) + table; |
| 559 | unsigned int rshift = (c2 & 0x0f); |
| 560 | |
| 561 | adpcm_get_predicted_frame(frame, src, nibbles, rshift); |
| 562 | |
| 563 | memcpy(dst, frame, 2 * sizeof(frame[0])); |
| 564 | adpcm_decode_upto_8_samples(dst + 2, frame + 2, book, dst , 6); |
| 565 | adpcm_decode_upto_8_samples(dst + 8, frame + 8, book, dst + 6, 8); |
| 566 | adpcm_decode_upto_8_samples(dst + 16, frame + 16, book, dst + 14, 8); |
| 567 | adpcm_decode_upto_8_samples(dst + 24, frame + 24, book, dst + 22, 8); |
| 568 | |
| 569 | if (jump_gap) { |
| 570 | nibbles += 8; |
| 571 | src += 32; |
| 572 | } |
| 573 | |
| 574 | jump_gap = !jump_gap; |
| 575 | nibbles += 16; |
| 576 | src += 4; |
| 577 | dst += 32; |
| 578 | } |
| 579 | } |
| 580 | |
| 581 | static int16_t adpcm_get_predicted_sample(uint8_t byte, uint8_t mask, |
| 582 | unsigned lshift, unsigned rshift) |
| 583 | { |
| 584 | int16_t sample = ((uint16_t)byte & (uint16_t)mask) << lshift; |
| 585 | sample >>= rshift; /* signed */ |
| 586 | return sample; |
| 587 | } |
| 588 | |
| 589 | static void adpcm_get_predicted_frame(int16_t *dst, const uint8_t *src, |
| 590 | const uint8_t *nibbles, |
| 591 | unsigned int rshift) |
| 592 | { |
| 593 | unsigned int i; |
| 594 | |
| 595 | *(dst++) = (src[0] << 8) | src[1]; |
| 596 | *(dst++) = (src[2] << 8) | src[3]; |
| 597 | |
| 598 | for (i = 1; i < 16; ++i) { |
| 599 | uint8_t byte = nibbles[i]; |
| 600 | |
| 601 | *(dst++) = adpcm_get_predicted_sample(byte, 0xf0, 8, rshift); |
| 602 | *(dst++) = adpcm_get_predicted_sample(byte, 0x0f, 12, rshift); |
| 603 | } |
| 604 | } |
| 605 | |
| 606 | static void adpcm_decode_upto_8_samples(int16_t *dst, const int16_t *src, |
| 607 | const int16_t *cb_entry, |
| 608 | const int16_t *last_samples, |
| 609 | size_t size) |
| 610 | { |
| 611 | const int16_t *const book1 = cb_entry; |
| 612 | const int16_t *const book2 = cb_entry + 8; |
| 613 | |
| 614 | const int16_t l1 = last_samples[0]; |
| 615 | const int16_t l2 = last_samples[1]; |
| 616 | |
| 617 | size_t i; |
| 618 | int32_t accu; |
| 619 | |
| 620 | for (i = 0; i < size; ++i) { |
| 621 | accu = (int32_t)src[i] << 11; |
| 622 | accu += book1[i] * l1 + book2[i] * l2 + rdot(i, book2, src); |
| 623 | dst[i] = clamp_s16(accu >> 11); |
| 624 | } |
| 625 | } |
| 626 | |
| 627 | static void mix_voice_samples(musyx_t *musyx, uint32_t voice_ptr, |
| 628 | const int16_t *samples, unsigned segbase, |
| 629 | unsigned offset, uint32_t last_sample_ptr) |
| 630 | { |
| 631 | int i, k; |
| 632 | |
| 633 | /* parse VOICE structure */ |
| 634 | const uint16_t pitch_q16 = *dram_u16(voice_ptr + VOICE_PITCH_Q16); |
| 635 | const uint16_t pitch_shift = *dram_u16(voice_ptr + VOICE_PITCH_SHIFT); /* Q4.12 */ |
| 636 | |
| 637 | const uint16_t end_point = *dram_u16(voice_ptr + VOICE_END_POINT); |
| 638 | const uint16_t restart_point = *dram_u16(voice_ptr + VOICE_RESTART_POINT); |
| 639 | |
| 640 | const uint16_t u16_4e = *dram_u16(voice_ptr + VOICE_U16_4E); |
| 641 | |
| 642 | /* init values and pointers */ |
| 643 | const int16_t *sample = samples + segbase + offset + u16_4e; |
| 644 | const int16_t *const sample_end = samples + segbase + end_point; |
| 645 | const int16_t *const sample_restart = samples + (restart_point & 0x7fff) + |
| 646 | (((restart_point & 0x8000) != 0) ? 0x000 : segbase); |
| 647 | |
| 648 | |
| 649 | uint32_t pitch_accu = pitch_q16; |
| 650 | uint32_t pitch_step = pitch_shift << 4; |
| 651 | |
| 652 | int32_t v4_env[4]; |
| 653 | int32_t v4_env_step[4]; |
| 654 | int16_t *v4_dst[4]; |
| 655 | int16_t v4[4]; |
| 656 | |
| 657 | load_u32((uint32_t *)v4_env, voice_ptr + VOICE_ENV_BEGIN, 4); |
| 658 | load_u32((uint32_t *)v4_env_step, voice_ptr + VOICE_ENV_STEP, 4); |
| 659 | |
| 660 | v4_dst[0] = musyx->left; |
| 661 | v4_dst[1] = musyx->right; |
| 662 | v4_dst[2] = musyx->cc0; |
| 663 | v4_dst[3] = musyx->e50; |
| 664 | |
| 665 | DebugMessage(M64MSG_VERBOSE, |
| 666 | "Voice debug: segbase=%d" |
| 667 | "\tu16_4e=%04x\n" |
| 668 | "\tpitch: frac0=%04x shift=%04x\n" |
| 669 | "\tend_point=%04x restart_point=%04x\n" |
| 670 | "\tenv = %08x %08x %08x %08x\n" |
| 671 | "\tenv_step = %08x %08x %08x %08x\n", |
| 672 | segbase, |
| 673 | u16_4e, |
| 674 | pitch_q16, pitch_shift, |
| 675 | end_point, restart_point, |
| 676 | v4_env[0], v4_env[1], v4_env[2], v4_env[3], |
| 677 | v4_env_step[0], v4_env_step[1], v4_env_step[2], v4_env_step[3]); |
| 678 | |
| 679 | for (i = 0; i < SUBFRAME_SIZE; ++i) { |
| 680 | /* update sample and resample_lut pointers and then pitch_accu */ |
| 681 | const int16_t *lut = (int16_t *)(ResampleLUT + ((pitch_accu & 0xfc00) >> 8)); |
| 682 | int dist; |
| 683 | int16_t v; |
| 684 | |
| 685 | sample += (pitch_accu >> 16); |
| 686 | pitch_accu &= 0xffff; |
| 687 | pitch_accu += pitch_step; |
| 688 | |
| 689 | /* handle end/restart points */ |
| 690 | dist = sample - sample_end; |
| 691 | if (dist >= 0) |
| 692 | sample = sample_restart + dist; |
| 693 | |
| 694 | /* apply resample filter */ |
| 695 | v = clamp_s16(dot4(sample, lut)); |
| 696 | |
| 697 | for (k = 0; k < 4; ++k) { |
| 698 | /* envmix */ |
| 699 | int32_t accu = (v * (v4_env[k] >> 16)) >> 15; |
| 700 | v4[k] = clamp_s16(accu); |
| 701 | *(v4_dst[k]) = clamp_s16(accu + *(v4_dst[k])); |
| 702 | |
| 703 | /* update envelopes and dst pointers */ |
| 704 | ++(v4_dst[k]); |
| 705 | v4_env[k] += v4_env_step[k]; |
| 706 | } |
| 707 | } |
| 708 | |
| 709 | /* save last resampled sample */ |
| 710 | store_u16((uint16_t *)v4, last_sample_ptr, 4); |
| 711 | |
| 712 | DebugMessage(M64MSG_VERBOSE, "last_sample = %04x %04x %04x %04x", |
| 713 | v4[0], v4[1], v4[2], v4[3]); |
| 714 | } |
| 715 | |
| 716 | |
| 717 | static void sfx_stage(musyx_t *musyx, uint32_t sfx_ptr, uint16_t idx) |
| 718 | { |
| 719 | unsigned int i; |
| 720 | |
| 721 | int16_t buffer[SUBFRAME_SIZE + 4]; |
| 722 | int16_t *subframe = buffer + 4; |
| 723 | |
| 724 | uint32_t tap_delays[8]; |
| 725 | int16_t tap_gains[8]; |
| 726 | int16_t fir4_hcoeffs[4]; |
| 727 | |
| 728 | int16_t delayed[SUBFRAME_SIZE]; |
| 729 | int dpos, dlength; |
| 730 | |
| 731 | const uint32_t pos = idx * SUBFRAME_SIZE; |
| 732 | |
| 733 | uint32_t cbuffer_ptr; |
| 734 | uint32_t cbuffer_length; |
| 735 | uint16_t tap_count; |
| 736 | int16_t fir4_hgain; |
| 737 | |
| 738 | DebugMessage(M64MSG_VERBOSE, "SFX: %08x, idx=%d", sfx_ptr, idx); |
| 739 | |
| 740 | if (sfx_ptr == 0) |
| 741 | return; |
| 742 | |
| 743 | /* load sfx parameters */ |
| 744 | cbuffer_ptr = *dram_u32(sfx_ptr + SFX_CBUFFER_PTR); |
| 745 | cbuffer_length = *dram_u32(sfx_ptr + SFX_CBUFFER_LENGTH); |
| 746 | |
| 747 | tap_count = *dram_u16(sfx_ptr + SFX_TAP_COUNT); |
| 748 | |
| 749 | load_u32(tap_delays, sfx_ptr + SFX_TAP_DELAYS, 8); |
| 750 | load_u16((uint16_t *)tap_gains, sfx_ptr + SFX_TAP_GAINS, 8); |
| 751 | |
| 752 | fir4_hgain = *dram_u16(sfx_ptr + SFX_FIR4_HGAIN); |
| 753 | load_u16((uint16_t *)fir4_hcoeffs, sfx_ptr + SFX_FIR4_HCOEFFS, 4); |
| 754 | |
| 755 | DebugMessage(M64MSG_VERBOSE, "cbuffer: ptr=%08x length=%x", cbuffer_ptr, |
| 756 | cbuffer_length); |
| 757 | |
| 758 | DebugMessage(M64MSG_VERBOSE, "fir4: hgain=%04x hcoeff=%04x %04x %04x %04x", |
| 759 | fir4_hgain, fir4_hcoeffs[0], fir4_hcoeffs[1], fir4_hcoeffs[2], |
| 760 | fir4_hcoeffs[3]); |
| 761 | |
| 762 | DebugMessage(M64MSG_VERBOSE, |
| 763 | "tap count=%d\n" |
| 764 | "delays: %08x %08x %08x %08x %08x %08x %08x %08x\n" |
| 765 | "gains: %04x %04x %04x %04x %04x %04x %04x %04x", |
| 766 | tap_count, |
| 767 | tap_delays[0], tap_delays[1], tap_delays[2], tap_delays[3], |
| 768 | tap_delays[4], tap_delays[5], tap_delays[6], tap_delays[7], |
| 769 | tap_gains[0], tap_gains[1], tap_gains[2], tap_gains[3], |
| 770 | tap_gains[4], tap_gains[5], tap_gains[6], tap_gains[7]); |
| 771 | |
| 772 | /* mix up to 8 delayed subframes */ |
| 773 | memset(subframe, 0, SUBFRAME_SIZE * sizeof(subframe[0])); |
| 774 | for (i = 0; i < tap_count; ++i) { |
| 775 | |
| 776 | dpos = pos - tap_delays[i]; |
| 777 | if (dpos <= 0) |
| 778 | dpos += cbuffer_length; |
| 779 | dlength = SUBFRAME_SIZE; |
| 780 | |
| 781 | if (dpos + SUBFRAME_SIZE > cbuffer_length) { |
| 782 | dlength = cbuffer_length - dpos; |
| 783 | load_u16((uint16_t *)delayed + dlength, cbuffer_ptr, SUBFRAME_SIZE - dlength); |
| 784 | } |
| 785 | |
| 786 | load_u16((uint16_t *)delayed, cbuffer_ptr + dpos * 2, dlength); |
| 787 | |
| 788 | mix_subframes(subframe, delayed, tap_gains[i]); |
| 789 | } |
| 790 | |
| 791 | /* add resulting subframe to L/R subframes */ |
| 792 | for (i = 0; i < SUBFRAME_SIZE; ++i) { |
| 793 | int16_t v = subframe[i]; |
| 794 | musyx->left[i] = clamp_s16(musyx->left[i] + v); |
| 795 | musyx->right[i] = clamp_s16(musyx->right[i] + v); |
| 796 | } |
| 797 | |
| 798 | /* apply FIR4 filter and writeback filtered result */ |
| 799 | memcpy(buffer, musyx->subframe_740_last4, 4 * sizeof(int16_t)); |
| 800 | memcpy(musyx->subframe_740_last4, subframe + SUBFRAME_SIZE - 4, 4 * sizeof(int16_t)); |
| 801 | mix_fir4(musyx->e50, buffer + 1, fir4_hgain, fir4_hcoeffs); |
| 802 | store_u16((uint16_t *)musyx->e50, cbuffer_ptr + pos * 2, SUBFRAME_SIZE); |
| 803 | } |
| 804 | |
| 805 | static void mix_subframes(int16_t *y, const int16_t *x, int16_t hgain) |
| 806 | { |
| 807 | unsigned int i; |
| 808 | |
| 809 | for (i = 0; i < SUBFRAME_SIZE; ++i) { |
| 810 | int32_t v = (hgain * x[i]) >> 15; |
| 811 | y[i] = clamp_s16(y[i] + v); |
| 812 | } |
| 813 | } |
| 814 | |
| 815 | static void mix_fir4(int16_t *y, const int16_t *x, int16_t hgain, const int16_t *hcoeffs) |
| 816 | { |
| 817 | unsigned int i; |
| 818 | int32_t h[4]; |
| 819 | |
| 820 | h[0] = (hgain * hcoeffs[0]) >> 15; |
| 821 | h[1] = (hgain * hcoeffs[1]) >> 15; |
| 822 | h[2] = (hgain * hcoeffs[2]) >> 15; |
| 823 | h[3] = (hgain * hcoeffs[3]) >> 15; |
| 824 | |
| 825 | for (i = 0; i < SUBFRAME_SIZE; ++i) { |
| 826 | int32_t v = (h[0] * x[i] + h[1] * x[i + 1] + h[2] * x[i + 2] + h[3] * x[i + 3]) >> 15; |
| 827 | y[i] = clamp_s16(y[i] + v); |
| 828 | } |
| 829 | } |
| 830 | |
| 831 | |
| 832 | static void interleave_stage(musyx_t *musyx, uint32_t output_ptr) |
| 833 | { |
| 834 | size_t i; |
| 835 | |
| 836 | int16_t base_left; |
| 837 | int16_t base_right; |
| 838 | |
| 839 | int16_t *left; |
| 840 | int16_t *right; |
| 841 | uint32_t *dst; |
| 842 | |
| 843 | DebugMessage(M64MSG_VERBOSE, "interleave: %08x", output_ptr); |
| 844 | |
| 845 | base_left = clamp_s16(musyx->base_vol[0]); |
| 846 | base_right = clamp_s16(musyx->base_vol[1]); |
| 847 | |
| 848 | left = musyx->left; |
| 849 | right = musyx->right; |
| 850 | dst = dram_u32(output_ptr); |
| 851 | |
| 852 | for (i = 0; i < SUBFRAME_SIZE; ++i) { |
| 853 | uint16_t l = clamp_s16(*(left++) + base_left); |
| 854 | uint16_t r = clamp_s16(*(right++) + base_right); |
| 855 | |
| 856 | *(dst++) = (l << 16) | r; |
| 857 | } |
| 858 | } |