From: ptitSeb Date: Mon, 16 Jun 2014 20:56:38 +0000 (+0200) Subject: RSP: Added some missing files X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=df00ea1301b9e755242364def666d5b12d470762;p=mupen64plus-pandora.git RSP: Added some missing files --- diff --git a/source/mupen64plus-rsp-hle/src/musyx.c b/source/mupen64plus-rsp-hle/src/musyx.c new file mode 100755 index 0000000..1c520c0 --- /dev/null +++ b/source/mupen64plus-rsp-hle/src/musyx.c @@ -0,0 +1,858 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * Mupen64plus-rsp-hle - musyx.c * + * Mupen64Plus homepage: http://code.google.com/p/mupen64plus/ * + * Copyright (C) 2013 Bobby Smiles * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +#include +#include +#include +#include + +#include "m64p_plugin.h" +#include "m64p_types.h" +#include "hle.h" +#include "musyx.h" + +/* various constants */ +enum { SUBFRAME_SIZE = 192 }; +enum { MAX_VOICES = 32 }; + +enum { SAMPLE_BUFFER_SIZE = 0x200 }; + + +enum { + SFD_VOICE_COUNT = 0x0, + SFD_SFX_INDEX = 0x2, + SFD_VOICE_BITMASK = 0x4, + SFD_STATE_PTR = 0x8, + SFD_SFX_PTR = 0xc, + + SFD_VOICES = 0x10 +}; + +enum { + VOICE_ENV_BEGIN = 0x00, + VOICE_ENV_STEP = 0x10, + VOICE_PITCH_Q16 = 0x20, + VOICE_PITCH_SHIFT = 0x22, + VOICE_CATSRC_0 = 0x24, + VOICE_CATSRC_1 = 0x30, + VOICE_ADPCM_FRAMES = 0x3c, + VOICE_SKIP_SAMPLES = 0x3e, + + /* for PCM16 */ + VOICE_U16_40 = 0x40, + VOICE_U16_42 = 0x42, + + /* for ADPCM */ + VOICE_ADPCM_TABLE_PTR = 0x40, + + VOICE_INTERLEAVED_PTR = 0x44, + VOICE_END_POINT = 0x48, + VOICE_RESTART_POINT = 0x4a, + VOICE_U16_4C = 0x4c, + VOICE_U16_4E = 0x4e, + + VOICE_SIZE = 0x50 +}; + +enum { + CATSRC_PTR1 = 0x00, + CATSRC_PTR2 = 0x04, + CATSRC_SIZE1 = 0x08, + CATSRC_SIZE2 = 0x0a +}; + +enum { + STATE_LAST_SAMPLE = 0x0, + STATE_BASE_VOL = 0x100, + STATE_CC0 = 0x110, + STATE_740_LAST4 = 0x290 +}; + +enum { + SFX_CBUFFER_PTR = 0x00, + SFX_CBUFFER_LENGTH = 0x04, + SFX_TAP_COUNT = 0x08, + SFX_FIR4_HGAIN = 0x0a, + SFX_TAP_DELAYS = 0x0c, + SFX_TAP_GAINS = 0x2c, + /* padding = 0x3c */ + SFX_FIR4_HCOEFFS = 0x40 +}; + + +/* struct definition */ +typedef struct { + /* internal subframes */ + int16_t left[SUBFRAME_SIZE]; + int16_t right[SUBFRAME_SIZE]; + int16_t cc0[SUBFRAME_SIZE]; + int16_t e50[SUBFRAME_SIZE]; + + /* internal subframes base volumes */ + int32_t base_vol[4]; + + /* */ + int16_t subframe_740_last4[4]; +} musyx_t; + +/* helper functions prototypes */ +static void load_base_vol(int32_t *base_vol, uint32_t address); +static void save_base_vol(const int32_t *base_vol, uint32_t address); +static void update_base_vol(int32_t *base_vol, uint32_t voice_mask, + uint32_t last_sample_ptr); + +static void init_subframes(musyx_t *musyx); + +static uint32_t voice_stage(musyx_t *musyx, uint32_t voice_ptr, + uint32_t last_sample_ptr); + +static void dma_cat8(uint8_t *dst, uint32_t catsrc_ptr); +static void dma_cat16(uint16_t *dst, uint32_t catsrc_ptr); + +static void load_samples_PCM16(uint32_t voice_ptr, int16_t *samples, + unsigned *segbase, unsigned *offset); +static void load_samples_ADPCM(uint32_t voice_ptr, int16_t *samples, + unsigned *segbase, unsigned *offset); + +static void adpcm_decode_frames(int16_t *dst, const uint8_t *src, + const int16_t *table, uint8_t count, + uint8_t skip_samples); + +static int16_t adpcm_get_predicted_sample(uint8_t byte, uint8_t mask, + unsigned lshift, unsigned rshift); +static void adpcm_get_predicted_frame(int16_t *dst, const uint8_t *src, + const uint8_t *nibbles, + unsigned int rshift); +static void adpcm_decode_upto_8_samples(int16_t *dst, const int16_t *src, + const int16_t *cb_entry, + const int16_t *last_samples, + size_t size); + +static void mix_voice_samples(musyx_t *musyx, uint32_t voice_ptr, + const int16_t *samples, unsigned segbase, + unsigned offset, uint32_t last_sample_ptr); + +static void sfx_stage(musyx_t *musyx, uint32_t sfx_ptr, uint16_t idx); +static void mix_subframes(int16_t *y, const int16_t *x, int16_t hgain); +static void mix_fir4(int16_t *y, const int16_t *x, int16_t hgain, const int16_t *hcoeffs); + + +static void interleave_stage(musyx_t *musyx, uint32_t output_ptr); + + +static uint8_t *dram_u8(uint32_t address); +static uint16_t *dram_u16(uint32_t address); +static uint32_t *dram_u32(uint32_t address); + +static void load_u8(uint8_t *dst, uint32_t address, size_t count); +static void load_u16(uint16_t *dst, uint32_t address, size_t count); +static void load_u32(uint32_t *dst, uint32_t address, size_t count); + +static void store_u16(const uint16_t *src, uint32_t address, size_t count); + +static inline unsigned int align(unsigned int x, unsigned amount) +{ + --amount; + return (x + amount) & ~amount; +} + +static int32_t rdot(size_t n, const int16_t *x, const int16_t *y) +{ + int32_t accu = 0; + + y += n; + + while (n != 0) { + accu += ((int32_t)*(x++) * (int32_t)*(--y)); + --n; + } + + return accu; +} + + +static int32_t dot4(const int16_t *x, const int16_t *y) +{ + size_t i; + int32_t accu = 0; + + for (i = 0; i < 4; ++i) + accu = clamp_s16(accu + (((int32_t)x[i] * (int32_t)y[i]) >> 15)); + + return accu; +} + +/* Fast and dirty way of reading dram memory + * Assume properly aligned access + */ +static uint8_t *dram_u8(uint32_t address) +{ + return (uint8_t *)&rsp.RDRAM[(address & 0xffffff) ^ S8]; +} + +static uint16_t *dram_u16(uint32_t address) +{ + return (uint16_t *)&rsp.RDRAM[(address & 0xffffff) ^ S16]; +} + +static uint32_t *dram_u32(uint32_t address) +{ + return (uint32_t *)&rsp.RDRAM[address & 0xffffff]; +} + +static void load_u8(uint8_t *dst, uint32_t address, size_t count) +{ + while (count != 0) { + *(dst++) = *dram_u8(address); + address += 1; + --count; + } +} + +static void load_u16(uint16_t *dst, uint32_t address, size_t count) +{ + while (count != 0) { + *(dst++) = *dram_u16(address); + address += 2; + --count; + } +} + +static void load_u32(uint32_t *dst, uint32_t address, size_t count) +{ + /* Optimization for uint32_t */ + const uint32_t *src = dram_u32(address); + + memcpy(dst, src, count * sizeof(uint32_t)); +} + +static void store_u16(const uint16_t *src, uint32_t address, size_t count) +{ + while (count != 0) { + *dram_u16(address) = *(src++); + address += 2; + --count; + } +} + +/************************************************************************** + * MusyX audio ucode + **************************************************************************/ +void musyx_task(void) +{ + const OSTask_t *const task = get_task(); + + uint32_t sfd_ptr = task->data_ptr; + uint32_t sfd_count = task->data_size; + uint32_t state_ptr; + musyx_t musyx; + + DebugMessage(M64MSG_VERBOSE, "musyx_task: *data=%x, #SF=%d", + sfd_ptr, + sfd_count); + + state_ptr = *dram_u32(sfd_ptr + SFD_STATE_PTR); + + /* load initial state */ + load_base_vol(musyx.base_vol, state_ptr + STATE_BASE_VOL); + load_u16((uint16_t *)musyx.cc0, state_ptr + STATE_CC0, SUBFRAME_SIZE); + load_u16((uint16_t *)musyx.subframe_740_last4, state_ptr + STATE_740_LAST4, + 4); + + for (;;) { + /* parse SFD structure */ + uint16_t sfx_index = *dram_u16(sfd_ptr + SFD_SFX_INDEX); + uint32_t voice_mask = *dram_u32(sfd_ptr + SFD_VOICE_BITMASK); + uint32_t sfx_ptr = *dram_u32(sfd_ptr + SFD_SFX_PTR); + uint32_t voice_ptr = sfd_ptr + SFD_VOICES; + uint32_t last_sample_ptr = state_ptr + STATE_LAST_SAMPLE; + uint32_t output_ptr; + + /* initialize internal subframes using updated base volumes */ + update_base_vol(musyx.base_vol, voice_mask, last_sample_ptr); + init_subframes(&musyx); + + /* active voices get mixed into L,R,cc0,e50 subframes (optional) */ + output_ptr = voice_stage(&musyx, voice_ptr, last_sample_ptr); + + /* apply delay-based effects (optional) */ + sfx_stage(&musyx, sfx_ptr, sfx_index); + + /* emit interleaved L,R subframes */ + interleave_stage(&musyx, output_ptr); + + --sfd_count; + if (sfd_count == 0) + break; + + sfd_ptr += SFD_VOICES + MAX_VOICES * VOICE_SIZE; + state_ptr = *dram_u32(sfd_ptr + SFD_STATE_PTR); + } + + /* writeback updated state */ + save_base_vol(musyx.base_vol, state_ptr + STATE_BASE_VOL); + store_u16((uint16_t *)musyx.cc0, state_ptr + STATE_CC0, SUBFRAME_SIZE); + store_u16((uint16_t *)musyx.subframe_740_last4, state_ptr + STATE_740_LAST4, + 4); +} + +static void load_base_vol(int32_t *base_vol, uint32_t address) +{ + base_vol[0] = ((uint32_t)(*dram_u16(address)) << 16) | (*dram_u16(address + 8)); + base_vol[1] = ((uint32_t)(*dram_u16(address + 2)) << 16) | (*dram_u16(address + 10)); + base_vol[2] = ((uint32_t)(*dram_u16(address + 4)) << 16) | (*dram_u16(address + 12)); + base_vol[3] = ((uint32_t)(*dram_u16(address + 6)) << 16) | (*dram_u16(address + 14)); +} + +static void save_base_vol(const int32_t *base_vol, uint32_t address) +{ + unsigned k; + + for (k = 0; k < 4; ++k) { + *dram_u16(address) = (uint16_t)(base_vol[k] >> 16); + address += 2; + } + + for (k = 0; k < 4; ++k) { + *dram_u16(address) = (uint16_t)(base_vol[k]); + address += 2; + } +} + +static void update_base_vol(int32_t *base_vol, uint32_t voice_mask, + uint32_t last_sample_ptr) +{ + unsigned i, k; + uint32_t mask; + + DebugMessage(M64MSG_VERBOSE, "base_vol voice_mask = %08x", voice_mask); + DebugMessage(M64MSG_VERBOSE, "BEFORE: base_vol = %08x %08x %08x %08x", + base_vol[0], base_vol[1], base_vol[2], base_vol[3]); + + /* optim: skip voices contributions entirely if voice_mask is empty */ + if (voice_mask != 0) { + for (i = 0, mask = 1; i < MAX_VOICES; + ++i, mask <<= 1, last_sample_ptr += 8) { + if ((voice_mask & mask) == 0) + continue; + + for (k = 0; k < 4; ++k) + base_vol[k] += (int16_t)*dram_u16(last_sample_ptr + k * 2); + } + } + + /* apply 3% decay */ + for (k = 0; k < 4; ++k) + base_vol[k] = (base_vol[k] * 0x0000f850) >> 16; + + DebugMessage(M64MSG_VERBOSE, "AFTER: base_vol = %08x %08x %08x %08x", + base_vol[0], base_vol[1], base_vol[2], base_vol[3]); +} + +static void init_subframes(musyx_t *musyx) +{ + unsigned i; + + int16_t base_cc0 = clamp_s16(musyx->base_vol[2]); + int16_t base_e50 = clamp_s16(musyx->base_vol[3]); + + int16_t *left = musyx->left; + int16_t *right = musyx->right; + int16_t *cc0 = musyx->cc0; + int16_t *e50 = musyx->e50; + + for (i = 0; i < SUBFRAME_SIZE; ++i) { + *(e50++) = base_e50; + *(left++) = clamp_s16(*cc0 + base_cc0); + *(right++) = clamp_s16(-*cc0 - base_cc0); + *(cc0++) = 0; + } +} + +/* Process voices, and returns interleaved subframe destination address */ +static uint32_t voice_stage(musyx_t *musyx, uint32_t voice_ptr, + uint32_t last_sample_ptr) +{ + uint32_t output_ptr; + int i = 0; + + /* voice stage can be skipped if first voice has no samples */ + if (*dram_u16(voice_ptr + VOICE_CATSRC_0 + CATSRC_SIZE1) == 0) { + DebugMessage(M64MSG_VERBOSE, "Skipping Voice stage"); + output_ptr = *dram_u32(voice_ptr + VOICE_INTERLEAVED_PTR); + } else { + /* otherwise process voices until a non null output_ptr is encountered */ + for (;;) { + /* load voice samples (PCM16 or APDCM) */ + int16_t samples[SAMPLE_BUFFER_SIZE]; + unsigned segbase; + unsigned offset; + + DebugMessage(M64MSG_VERBOSE, "Processing Voice #%d", i); + + if (*dram_u8(voice_ptr + VOICE_ADPCM_FRAMES) == 0) + load_samples_PCM16(voice_ptr, samples, &segbase, &offset); + else + load_samples_ADPCM(voice_ptr, samples, &segbase, &offset); + + /* mix them with each internal subframes */ + mix_voice_samples(musyx, voice_ptr, samples, segbase, offset, + last_sample_ptr + i * 8); + + /* check break condition */ + output_ptr = *dram_u32(voice_ptr + VOICE_INTERLEAVED_PTR); + if (output_ptr != 0) + break; + + /* next voice */ + ++i; + voice_ptr += VOICE_SIZE; + } + } + + return output_ptr; +} + +static void dma_cat8(uint8_t *dst, uint32_t catsrc_ptr) +{ + uint32_t ptr1 = *dram_u32(catsrc_ptr + CATSRC_PTR1); + uint32_t ptr2 = *dram_u32(catsrc_ptr + CATSRC_PTR2); + uint16_t size1 = *dram_u16(catsrc_ptr + CATSRC_SIZE1); + uint16_t size2 = *dram_u16(catsrc_ptr + CATSRC_SIZE2); + + size_t count1 = size1; + size_t count2 = size2; + + DebugMessage(M64MSG_VERBOSE, "dma_cat: %08x %08x %04x %04x", + ptr1, + ptr2, + size1, + size2); + + load_u8(dst, ptr1, count1); + + if (size2 == 0) + return; + + load_u8(dst + count1, ptr2, count2); +} + +static void dma_cat16(uint16_t *dst, uint32_t catsrc_ptr) +{ + uint32_t ptr1 = *dram_u32(catsrc_ptr + CATSRC_PTR1); + uint32_t ptr2 = *dram_u32(catsrc_ptr + CATSRC_PTR2); + uint16_t size1 = *dram_u16(catsrc_ptr + CATSRC_SIZE1); + uint16_t size2 = *dram_u16(catsrc_ptr + CATSRC_SIZE2); + + size_t count1 = size1 >> 1; + size_t count2 = size2 >> 1; + + DebugMessage(M64MSG_VERBOSE, "dma_cat: %08x %08x %04x %04x", + ptr1, + ptr2, + size1, + size2); + + load_u16(dst, ptr1, count1); + + if (size2 == 0) + return; + + load_u16(dst + count1, ptr2, count2); +} + +static void load_samples_PCM16(uint32_t voice_ptr, int16_t *samples, + unsigned *segbase, unsigned *offset) +{ + + uint8_t u8_3e = *dram_u8(voice_ptr + VOICE_SKIP_SAMPLES); + uint16_t u16_40 = *dram_u16(voice_ptr + VOICE_U16_40); + uint16_t u16_42 = *dram_u16(voice_ptr + VOICE_U16_42); + + unsigned count = align(u16_40 + u8_3e, 4); + + DebugMessage(M64MSG_VERBOSE, "Format: PCM16"); + + *segbase = SAMPLE_BUFFER_SIZE - count; + *offset = u8_3e; + + dma_cat16((uint16_t *)samples + *segbase, voice_ptr + VOICE_CATSRC_0); + + if (u16_42 != 0) + dma_cat16((uint16_t *)samples, voice_ptr + VOICE_CATSRC_1); +} + +static void load_samples_ADPCM(uint32_t voice_ptr, int16_t *samples, + unsigned *segbase, unsigned *offset) +{ + /* decompressed samples cannot exceed 0x400 bytes; + * ADPCM has a compression ratio of 5/16 */ + uint8_t buffer[SAMPLE_BUFFER_SIZE * 2 * 5 / 16]; + int16_t adpcm_table[128]; + + uint8_t u8_3c = *dram_u8(voice_ptr + VOICE_ADPCM_FRAMES ); + uint8_t u8_3d = *dram_u8(voice_ptr + VOICE_ADPCM_FRAMES + 1); + uint8_t u8_3e = *dram_u8(voice_ptr + VOICE_SKIP_SAMPLES ); + uint8_t u8_3f = *dram_u8(voice_ptr + VOICE_SKIP_SAMPLES + 1); + uint32_t adpcm_table_ptr = *dram_u32(voice_ptr + VOICE_ADPCM_TABLE_PTR); + unsigned count; + + DebugMessage(M64MSG_VERBOSE, "Format: ADPCM"); + + DebugMessage(M64MSG_VERBOSE, "Loading ADPCM table: %08x", adpcm_table_ptr); + load_u16((uint16_t *)adpcm_table, adpcm_table_ptr, 128); + + count = u8_3c << 5; + + *segbase = SAMPLE_BUFFER_SIZE - count; + *offset = u8_3e & 0x1f; + + dma_cat8(buffer, voice_ptr + VOICE_CATSRC_0); + adpcm_decode_frames(samples + *segbase, buffer, adpcm_table, u8_3c, u8_3e); + + if (u8_3d != 0) { + dma_cat8(buffer, voice_ptr + VOICE_CATSRC_1); + adpcm_decode_frames(samples, buffer, adpcm_table, u8_3d, u8_3f); + } +} + +static void adpcm_decode_frames(int16_t *dst, const uint8_t *src, + const int16_t *table, uint8_t count, + uint8_t skip_samples) +{ + int16_t frame[32]; + const uint8_t *nibbles = src + 8; + unsigned i; + bool jump_gap = false; + + DebugMessage(M64MSG_VERBOSE, "ADPCM decode: count=%d, skip=%d", count, + skip_samples); + + if (skip_samples >= 32) { + jump_gap = true; + nibbles += 16; + src += 4; + } + + for (i = 0; i < count; ++i) { + uint8_t c2 = nibbles[0]; + + const int16_t *book = (c2 & 0xf0) + table; + unsigned int rshift = (c2 & 0x0f); + + adpcm_get_predicted_frame(frame, src, nibbles, rshift); + + memcpy(dst, frame, 2 * sizeof(frame[0])); + adpcm_decode_upto_8_samples(dst + 2, frame + 2, book, dst , 6); + adpcm_decode_upto_8_samples(dst + 8, frame + 8, book, dst + 6, 8); + adpcm_decode_upto_8_samples(dst + 16, frame + 16, book, dst + 14, 8); + adpcm_decode_upto_8_samples(dst + 24, frame + 24, book, dst + 22, 8); + + if (jump_gap) { + nibbles += 8; + src += 32; + } + + jump_gap = !jump_gap; + nibbles += 16; + src += 4; + dst += 32; + } +} + +static int16_t adpcm_get_predicted_sample(uint8_t byte, uint8_t mask, + unsigned lshift, unsigned rshift) +{ + int16_t sample = ((uint16_t)byte & (uint16_t)mask) << lshift; + sample >>= rshift; /* signed */ + return sample; +} + +static void adpcm_get_predicted_frame(int16_t *dst, const uint8_t *src, + const uint8_t *nibbles, + unsigned int rshift) +{ + unsigned int i; + + *(dst++) = (src[0] << 8) | src[1]; + *(dst++) = (src[2] << 8) | src[3]; + + for (i = 1; i < 16; ++i) { + uint8_t byte = nibbles[i]; + + *(dst++) = adpcm_get_predicted_sample(byte, 0xf0, 8, rshift); + *(dst++) = adpcm_get_predicted_sample(byte, 0x0f, 12, rshift); + } +} + +static void adpcm_decode_upto_8_samples(int16_t *dst, const int16_t *src, + const int16_t *cb_entry, + const int16_t *last_samples, + size_t size) +{ + const int16_t *const book1 = cb_entry; + const int16_t *const book2 = cb_entry + 8; + + const int16_t l1 = last_samples[0]; + const int16_t l2 = last_samples[1]; + + size_t i; + int32_t accu; + + for (i = 0; i < size; ++i) { + accu = (int32_t)src[i] << 11; + accu += book1[i] * l1 + book2[i] * l2 + rdot(i, book2, src); + dst[i] = clamp_s16(accu >> 11); + } +} + +static void mix_voice_samples(musyx_t *musyx, uint32_t voice_ptr, + const int16_t *samples, unsigned segbase, + unsigned offset, uint32_t last_sample_ptr) +{ + int i, k; + + /* parse VOICE structure */ + const uint16_t pitch_q16 = *dram_u16(voice_ptr + VOICE_PITCH_Q16); + const uint16_t pitch_shift = *dram_u16(voice_ptr + VOICE_PITCH_SHIFT); /* Q4.12 */ + + const uint16_t end_point = *dram_u16(voice_ptr + VOICE_END_POINT); + const uint16_t restart_point = *dram_u16(voice_ptr + VOICE_RESTART_POINT); + + const uint16_t u16_4e = *dram_u16(voice_ptr + VOICE_U16_4E); + + /* init values and pointers */ + const int16_t *sample = samples + segbase + offset + u16_4e; + const int16_t *const sample_end = samples + segbase + end_point; + const int16_t *const sample_restart = samples + (restart_point & 0x7fff) + + (((restart_point & 0x8000) != 0) ? 0x000 : segbase); + + + uint32_t pitch_accu = pitch_q16; + uint32_t pitch_step = pitch_shift << 4; + + int32_t v4_env[4]; + int32_t v4_env_step[4]; + int16_t *v4_dst[4]; + int16_t v4[4]; + + load_u32((uint32_t *)v4_env, voice_ptr + VOICE_ENV_BEGIN, 4); + load_u32((uint32_t *)v4_env_step, voice_ptr + VOICE_ENV_STEP, 4); + + v4_dst[0] = musyx->left; + v4_dst[1] = musyx->right; + v4_dst[2] = musyx->cc0; + v4_dst[3] = musyx->e50; + + DebugMessage(M64MSG_VERBOSE, + "Voice debug: segbase=%d" + "\tu16_4e=%04x\n" + "\tpitch: frac0=%04x shift=%04x\n" + "\tend_point=%04x restart_point=%04x\n" + "\tenv = %08x %08x %08x %08x\n" + "\tenv_step = %08x %08x %08x %08x\n", + segbase, + u16_4e, + pitch_q16, pitch_shift, + end_point, restart_point, + v4_env[0], v4_env[1], v4_env[2], v4_env[3], + v4_env_step[0], v4_env_step[1], v4_env_step[2], v4_env_step[3]); + + for (i = 0; i < SUBFRAME_SIZE; ++i) { + /* update sample and resample_lut pointers and then pitch_accu */ + const int16_t *lut = (int16_t *)(ResampleLUT + ((pitch_accu & 0xfc00) >> 8)); + int dist; + int16_t v; + + sample += (pitch_accu >> 16); + pitch_accu &= 0xffff; + pitch_accu += pitch_step; + + /* handle end/restart points */ + dist = sample - sample_end; + if (dist >= 0) + sample = sample_restart + dist; + + /* apply resample filter */ + v = clamp_s16(dot4(sample, lut)); + + for (k = 0; k < 4; ++k) { + /* envmix */ + int32_t accu = (v * (v4_env[k] >> 16)) >> 15; + v4[k] = clamp_s16(accu); + *(v4_dst[k]) = clamp_s16(accu + *(v4_dst[k])); + + /* update envelopes and dst pointers */ + ++(v4_dst[k]); + v4_env[k] += v4_env_step[k]; + } + } + + /* save last resampled sample */ + store_u16((uint16_t *)v4, last_sample_ptr, 4); + + DebugMessage(M64MSG_VERBOSE, "last_sample = %04x %04x %04x %04x", + v4[0], v4[1], v4[2], v4[3]); +} + + +static void sfx_stage(musyx_t *musyx, uint32_t sfx_ptr, uint16_t idx) +{ + unsigned int i; + + int16_t buffer[SUBFRAME_SIZE + 4]; + int16_t *subframe = buffer + 4; + + uint32_t tap_delays[8]; + int16_t tap_gains[8]; + int16_t fir4_hcoeffs[4]; + + int16_t delayed[SUBFRAME_SIZE]; + int dpos, dlength; + + const uint32_t pos = idx * SUBFRAME_SIZE; + + uint32_t cbuffer_ptr; + uint32_t cbuffer_length; + uint16_t tap_count; + int16_t fir4_hgain; + + DebugMessage(M64MSG_VERBOSE, "SFX: %08x, idx=%d", sfx_ptr, idx); + + if (sfx_ptr == 0) + return; + + /* load sfx parameters */ + cbuffer_ptr = *dram_u32(sfx_ptr + SFX_CBUFFER_PTR); + cbuffer_length = *dram_u32(sfx_ptr + SFX_CBUFFER_LENGTH); + + tap_count = *dram_u16(sfx_ptr + SFX_TAP_COUNT); + + load_u32(tap_delays, sfx_ptr + SFX_TAP_DELAYS, 8); + load_u16((uint16_t *)tap_gains, sfx_ptr + SFX_TAP_GAINS, 8); + + fir4_hgain = *dram_u16(sfx_ptr + SFX_FIR4_HGAIN); + load_u16((uint16_t *)fir4_hcoeffs, sfx_ptr + SFX_FIR4_HCOEFFS, 4); + + DebugMessage(M64MSG_VERBOSE, "cbuffer: ptr=%08x length=%x", cbuffer_ptr, + cbuffer_length); + + DebugMessage(M64MSG_VERBOSE, "fir4: hgain=%04x hcoeff=%04x %04x %04x %04x", + fir4_hgain, fir4_hcoeffs[0], fir4_hcoeffs[1], fir4_hcoeffs[2], + fir4_hcoeffs[3]); + + DebugMessage(M64MSG_VERBOSE, + "tap count=%d\n" + "delays: %08x %08x %08x %08x %08x %08x %08x %08x\n" + "gains: %04x %04x %04x %04x %04x %04x %04x %04x", + tap_count, + tap_delays[0], tap_delays[1], tap_delays[2], tap_delays[3], + tap_delays[4], tap_delays[5], tap_delays[6], tap_delays[7], + tap_gains[0], tap_gains[1], tap_gains[2], tap_gains[3], + tap_gains[4], tap_gains[5], tap_gains[6], tap_gains[7]); + + /* mix up to 8 delayed subframes */ + memset(subframe, 0, SUBFRAME_SIZE * sizeof(subframe[0])); + for (i = 0; i < tap_count; ++i) { + + dpos = pos - tap_delays[i]; + if (dpos <= 0) + dpos += cbuffer_length; + dlength = SUBFRAME_SIZE; + + if (dpos + SUBFRAME_SIZE > cbuffer_length) { + dlength = cbuffer_length - dpos; + load_u16((uint16_t *)delayed + dlength, cbuffer_ptr, SUBFRAME_SIZE - dlength); + } + + load_u16((uint16_t *)delayed, cbuffer_ptr + dpos * 2, dlength); + + mix_subframes(subframe, delayed, tap_gains[i]); + } + + /* add resulting subframe to L/R subframes */ + for (i = 0; i < SUBFRAME_SIZE; ++i) { + int16_t v = subframe[i]; + musyx->left[i] = clamp_s16(musyx->left[i] + v); + musyx->right[i] = clamp_s16(musyx->right[i] + v); + } + + /* apply FIR4 filter and writeback filtered result */ + memcpy(buffer, musyx->subframe_740_last4, 4 * sizeof(int16_t)); + memcpy(musyx->subframe_740_last4, subframe + SUBFRAME_SIZE - 4, 4 * sizeof(int16_t)); + mix_fir4(musyx->e50, buffer + 1, fir4_hgain, fir4_hcoeffs); + store_u16((uint16_t *)musyx->e50, cbuffer_ptr + pos * 2, SUBFRAME_SIZE); +} + +static void mix_subframes(int16_t *y, const int16_t *x, int16_t hgain) +{ + unsigned int i; + + for (i = 0; i < SUBFRAME_SIZE; ++i) { + int32_t v = (hgain * x[i]) >> 15; + y[i] = clamp_s16(y[i] + v); + } +} + +static void mix_fir4(int16_t *y, const int16_t *x, int16_t hgain, const int16_t *hcoeffs) +{ + unsigned int i; + int32_t h[4]; + + h[0] = (hgain * hcoeffs[0]) >> 15; + h[1] = (hgain * hcoeffs[1]) >> 15; + h[2] = (hgain * hcoeffs[2]) >> 15; + h[3] = (hgain * hcoeffs[3]) >> 15; + + for (i = 0; i < SUBFRAME_SIZE; ++i) { + int32_t v = (h[0] * x[i] + h[1] * x[i + 1] + h[2] * x[i + 2] + h[3] * x[i + 3]) >> 15; + y[i] = clamp_s16(y[i] + v); + } +} + + +static void interleave_stage(musyx_t *musyx, uint32_t output_ptr) +{ + size_t i; + + int16_t base_left; + int16_t base_right; + + int16_t *left; + int16_t *right; + uint32_t *dst; + + DebugMessage(M64MSG_VERBOSE, "interleave: %08x", output_ptr); + + base_left = clamp_s16(musyx->base_vol[0]); + base_right = clamp_s16(musyx->base_vol[1]); + + left = musyx->left; + right = musyx->right; + dst = dram_u32(output_ptr); + + for (i = 0; i < SUBFRAME_SIZE; ++i) { + uint16_t l = clamp_s16(*(left++) + base_left); + uint16_t r = clamp_s16(*(right++) + base_right); + + *(dst++) = (l << 16) | r; + } +} diff --git a/source/mupen64plus-rsp-hle/src/musyx.h b/source/mupen64plus-rsp-hle/src/musyx.h new file mode 100755 index 0000000..ec97099 --- /dev/null +++ b/source/mupen64plus-rsp-hle/src/musyx.h @@ -0,0 +1,27 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * Mupen64plus-rsp-hle - musyx.h * + * Mupen64Plus homepage: http://code.google.com/p/mupen64plus/ * + * Copyright (C) 2013 Bobby Smiles * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +#ifndef MUSYX_H +#define MUSYX_H + +void musyx_task(void); + +#endif diff --git a/source/mupen64plus-rsp-hle/src/ucode1.c b/source/mupen64plus-rsp-hle/src/ucode1.c new file mode 100755 index 0000000..72ca868 --- /dev/null +++ b/source/mupen64plus-rsp-hle/src/ucode1.c @@ -0,0 +1,813 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * Mupen64plus-rsp-hle - ucode1.c * + * Mupen64Plus homepage: http://code.google.com/p/mupen64plus/ * + * Copyright (C) 2009 Richard Goedeken * + * Copyright (C) 2002 Hacktarux * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +# include +#include + +#include "m64p_plugin.h" +#include "hle.h" +#include "alist_internal.h" + +/******** DMEM Memory Map for ABI 1 *************** +Address/Range Description +------------- ------------------------------- +0x000..0x2BF UCodeData + 0x000-0x00F Constants - 0000 0001 0002 FFFF 0020 0800 7FFF 4000 + 0x010-0x02F Function Jump Table (16 Functions * 2 bytes each = 32) 0x20 + 0x030-0x03F Constants - F000 0F00 00F0 000F 0001 0010 0100 1000 + 0x040-0x03F Used by the Envelope Mixer (But what for?) + 0x070-0x07F Used by the Envelope Mixer (But what for?) +0x2C0..0x31F +0x320..0x35F Segments +0x360 Audio In Buffer (Location) +0x362 Audio Out Buffer (Location) +0x364 Audio Buffer Size (Location) +0x366 Initial Volume for Left Channel +0x368 Initial Volume for Right Channel +0x36A Auxillary Buffer #1 (Location) +0x36C Auxillary Buffer #2 (Location) +0x36E Auxillary Buffer #3 (Location) +0x370 Loop Value (shared location) +0x370 Target Volume (Left) +0x372 Ramp?? (Left) +0x374 Rate?? (Left) +0x376 Target Volume (Right) +0x378 Ramp?? (Right) +0x37A Rate?? (Right) +0x37C Dry?? +0x37E Wet?? +0x380..0x4BF Alist data +0x4C0..0x4FF ADPCM CodeBook +0x500..0x5BF +0x5C0..0xF7F Buffers... +0xF80..0xFFF +***************************************************/ +#ifdef USE_EXPANSION +#define MEMMASK 0x7FFFFF +#else +#define MEMMASK 0x3FFFFF +#endif + +static void SPNOOP(uint32_t inst1, uint32_t inst2) +{ +} + +uint16_t AudioInBuffer; /* 0x0000(T8) */ +uint16_t AudioOutBuffer; /* 0x0002(T8) */ +uint16_t AudioCount; /* 0x0004(T8) */ +int16_t Vol_Left; /* 0x0006(T8) */ +int16_t Vol_Right; /* 0x0008(T8) */ +static uint16_t AudioAuxA; /* 0x000A(T8) */ +static uint16_t AudioAuxC; /* 0x000C(T8) */ +static uint16_t AudioAuxE; /* 0x000E(T8) */ +uint32_t loopval; /* 0x0010(T8) - Value set by A_SETLOOP : Possible conflict with SETVOLUME??? */ +int16_t VolTrg_Left; /* 0x0010(T8) */ +int32_t VolRamp_Left; /* m_LeftVolTarget */ +int16_t VolTrg_Right; /* m_RightVol */ +int32_t VolRamp_Right; /* m_RightVolTarget */ +int16_t Env_Dry; /* 0x001C(T8) */ +int16_t Env_Wet; /* 0x001E(T8) */ + +uint8_t BufferSpace[0x10000]; + +short hleMixerWorkArea[256]; +uint16_t adpcmtable[0x88]; + +const uint16_t ResampleLUT [0x200] = { + 0x0C39, 0x66AD, 0x0D46, 0xFFDF, 0x0B39, 0x6696, 0x0E5F, 0xFFD8, + 0x0A44, 0x6669, 0x0F83, 0xFFD0, 0x095A, 0x6626, 0x10B4, 0xFFC8, + 0x087D, 0x65CD, 0x11F0, 0xFFBF, 0x07AB, 0x655E, 0x1338, 0xFFB6, + 0x06E4, 0x64D9, 0x148C, 0xFFAC, 0x0628, 0x643F, 0x15EB, 0xFFA1, + 0x0577, 0x638F, 0x1756, 0xFF96, 0x04D1, 0x62CB, 0x18CB, 0xFF8A, + 0x0435, 0x61F3, 0x1A4C, 0xFF7E, 0x03A4, 0x6106, 0x1BD7, 0xFF71, + 0x031C, 0x6007, 0x1D6C, 0xFF64, 0x029F, 0x5EF5, 0x1F0B, 0xFF56, + 0x022A, 0x5DD0, 0x20B3, 0xFF48, 0x01BE, 0x5C9A, 0x2264, 0xFF3A, + 0x015B, 0x5B53, 0x241E, 0xFF2C, 0x0101, 0x59FC, 0x25E0, 0xFF1E, + 0x00AE, 0x5896, 0x27A9, 0xFF10, 0x0063, 0x5720, 0x297A, 0xFF02, + 0x001F, 0x559D, 0x2B50, 0xFEF4, 0xFFE2, 0x540D, 0x2D2C, 0xFEE8, + 0xFFAC, 0x5270, 0x2F0D, 0xFEDB, 0xFF7C, 0x50C7, 0x30F3, 0xFED0, + 0xFF53, 0x4F14, 0x32DC, 0xFEC6, 0xFF2E, 0x4D57, 0x34C8, 0xFEBD, + 0xFF0F, 0x4B91, 0x36B6, 0xFEB6, 0xFEF5, 0x49C2, 0x38A5, 0xFEB0, + 0xFEDF, 0x47ED, 0x3A95, 0xFEAC, 0xFECE, 0x4611, 0x3C85, 0xFEAB, + 0xFEC0, 0x4430, 0x3E74, 0xFEAC, 0xFEB6, 0x424A, 0x4060, 0xFEAF, + 0xFEAF, 0x4060, 0x424A, 0xFEB6, 0xFEAC, 0x3E74, 0x4430, 0xFEC0, + 0xFEAB, 0x3C85, 0x4611, 0xFECE, 0xFEAC, 0x3A95, 0x47ED, 0xFEDF, + 0xFEB0, 0x38A5, 0x49C2, 0xFEF5, 0xFEB6, 0x36B6, 0x4B91, 0xFF0F, + 0xFEBD, 0x34C8, 0x4D57, 0xFF2E, 0xFEC6, 0x32DC, 0x4F14, 0xFF53, + 0xFED0, 0x30F3, 0x50C7, 0xFF7C, 0xFEDB, 0x2F0D, 0x5270, 0xFFAC, + 0xFEE8, 0x2D2C, 0x540D, 0xFFE2, 0xFEF4, 0x2B50, 0x559D, 0x001F, + 0xFF02, 0x297A, 0x5720, 0x0063, 0xFF10, 0x27A9, 0x5896, 0x00AE, + 0xFF1E, 0x25E0, 0x59FC, 0x0101, 0xFF2C, 0x241E, 0x5B53, 0x015B, + 0xFF3A, 0x2264, 0x5C9A, 0x01BE, 0xFF48, 0x20B3, 0x5DD0, 0x022A, + 0xFF56, 0x1F0B, 0x5EF5, 0x029F, 0xFF64, 0x1D6C, 0x6007, 0x031C, + 0xFF71, 0x1BD7, 0x6106, 0x03A4, 0xFF7E, 0x1A4C, 0x61F3, 0x0435, + 0xFF8A, 0x18CB, 0x62CB, 0x04D1, 0xFF96, 0x1756, 0x638F, 0x0577, + 0xFFA1, 0x15EB, 0x643F, 0x0628, 0xFFAC, 0x148C, 0x64D9, 0x06E4, + 0xFFB6, 0x1338, 0x655E, 0x07AB, 0xFFBF, 0x11F0, 0x65CD, 0x087D, + 0xFFC8, 0x10B4, 0x6626, 0x095A, 0xFFD0, 0x0F83, 0x6669, 0x0A44, + 0xFFD8, 0x0E5F, 0x6696, 0x0B39, 0xFFDF, 0x0D46, 0x66AD, 0x0C39 +}; + +static void CLEARBUFF(uint32_t inst1, uint32_t inst2) +{ + uint32_t addr = (uint32_t)(inst1 & 0xffff); + uint32_t count = (uint32_t)(inst2 & 0xffff); + addr &= 0xFFFC; + memset(BufferSpace + addr, 0, (count + 3) & 0xFFFC); +} + +static void ENVMIXER(uint32_t inst1, uint32_t inst2) +{ + uint8_t flags = (uint8_t)((inst1 >> 16) & 0xff); + uint32_t addy = (inst2 & 0xFFFFFF); + short *inp = (short *)(BufferSpace + AudioInBuffer); + short *out = (short *)(BufferSpace + AudioOutBuffer); + short *aux1 = (short *)(BufferSpace + AudioAuxA); + short *aux2 = (short *)(BufferSpace + AudioAuxC); + short *aux3 = (short *)(BufferSpace + AudioAuxE); + int32_t MainR; + int32_t MainL; + int32_t AuxR; + int32_t AuxL; + int i1, o1, a1, a2 = 0, a3 = 0; + unsigned short AuxIncRate = 1; + short zero[8]; + int32_t LVol, RVol; + int32_t LAcc, RAcc; + int32_t LTrg, RTrg; + int16_t Wet, Dry; + uint32_t ptr = 0; + int32_t RRamp, LRamp; + int32_t LAdderStart, RAdderStart, LAdderEnd, RAdderEnd; + int32_t oMainR, oMainL, oAuxR, oAuxL; + int x, y; + + memset(zero, 0, sizeof(zero)); + + if (flags & A_INIT) { + LVol = ((Vol_Left * (int32_t)VolRamp_Left)); + RVol = ((Vol_Right * (int32_t)VolRamp_Right)); + Wet = (int16_t)Env_Wet; + /* Save Wet/Dry values */ + Dry = (int16_t)Env_Dry; + /* Save Current Left/Right Targets */ + LTrg = (VolTrg_Left << 16); + RTrg = (VolTrg_Right << 16); + LAdderStart = Vol_Left << 16; + RAdderStart = Vol_Right << 16; + LAdderEnd = LVol; + RAdderEnd = RVol; + RRamp = VolRamp_Right; + LRamp = VolRamp_Left; + } else { + /* Load LVol, RVol, LAcc, and RAcc (all 32bit) + * Load Wet, Dry, LTrg, RTrg + */ + memcpy((uint8_t *)hleMixerWorkArea, (rsp.RDRAM + addy), 80); + Wet = *(int16_t *)(hleMixerWorkArea + 0); /* 0-1 */ + Dry = *(int16_t *)(hleMixerWorkArea + 2); /* 2-3 */ + LTrg = *(int32_t *)(hleMixerWorkArea + 4); /* 4-5 */ + RTrg = *(int32_t *)(hleMixerWorkArea + 6); /* 6-7 */ + LRamp = *(int32_t *)(hleMixerWorkArea + 8); /* 8-9 (hleMixerWorkArea is a 16bit pointer) */ + RRamp = *(int32_t *)(hleMixerWorkArea + 10); /* 10-11 */ + LAdderEnd = *(int32_t *)(hleMixerWorkArea + 12); /* 12-13 */ + RAdderEnd = *(int32_t *)(hleMixerWorkArea + 14); /* 14-15 */ + LAdderStart = *(int32_t *)(hleMixerWorkArea + 16); /* 12-13 */ + RAdderStart = *(int32_t *)(hleMixerWorkArea + 18); /* 14-15 */ + } + + if (!(flags & A_AUX)) { + AuxIncRate = 0; + aux2 = aux3 = zero; + } + + oMainL = (Dry * (LTrg >> 16) + 0x4000) >> 15; + oAuxL = (Wet * (LTrg >> 16) + 0x4000) >> 15; + oMainR = (Dry * (RTrg >> 16) + 0x4000) >> 15; + oAuxR = (Wet * (RTrg >> 16) + 0x4000) >> 15; + + for (y = 0; y < AudioCount; y += 0x10) { + + if (LAdderStart != LTrg) { + LAcc = LAdderStart; + LVol = (LAdderEnd - LAdderStart) >> 3; + LAdderEnd = (int32_t)(((int64_t)LAdderEnd * (int64_t)LRamp) >> 16); + LAdderStart = (int32_t)(((int64_t)LAcc * (int64_t)LRamp) >> 16); + } else { + LAcc = LTrg; + LVol = 0; + } + + if (RAdderStart != RTrg) { + RAcc = RAdderStart; + RVol = (RAdderEnd - RAdderStart) >> 3; + RAdderEnd = (int32_t)(((int64_t)RAdderEnd * (int64_t)RRamp) >> 16); + RAdderStart = (int32_t)(((int64_t)RAcc * (int64_t)RRamp) >> 16); + } else { + RAcc = RTrg; + RVol = 0; + } + + for (x = 0; x < 8; x++) { + i1 = (int)inp[ptr ^ S]; + o1 = (int)out[ptr ^ S]; + a1 = (int)aux1[ptr ^ S]; + if (AuxIncRate) { + a2 = (int)aux2[ptr ^ S]; + a3 = (int)aux3[ptr ^ S]; + } + /* TODO: here... + * LAcc = LTrg; + * RAcc = RTrg; + */ + + LAcc += LVol; + RAcc += RVol; + + if (LVol <= 0) { + /* Decrementing */ + if (LAcc < LTrg) { + LAcc = LTrg; + LAdderStart = LTrg; + MainL = oMainL; + AuxL = oAuxL; + } else { + MainL = (Dry * ((int32_t)LAcc >> 16) + 0x4000) >> 15; + AuxL = (Wet * ((int32_t)LAcc >> 16) + 0x4000) >> 15; + } + } else { + if (LAcc > LTrg) { + LAcc = LTrg; + LAdderStart = LTrg; + MainL = oMainL; + AuxL = oAuxL; + } else { + MainL = (Dry * ((int32_t)LAcc >> 16) + 0x4000) >> 15; + AuxL = (Wet * ((int32_t)LAcc >> 16) + 0x4000) >> 15; + } + } + + if (RVol <= 0) { + /* Decrementing */ + if (RAcc < RTrg) { + RAcc = RTrg; + RAdderStart = RTrg; + MainR = oMainR; + AuxR = oAuxR; + } else { + MainR = (Dry * ((int32_t)RAcc >> 16) + 0x4000) >> 15; + AuxR = (Wet * ((int32_t)RAcc >> 16) + 0x4000) >> 15; + } + } else { + if (RAcc > RTrg) { + RAcc = RTrg; + RAdderStart = RTrg; + MainR = oMainR; + AuxR = oAuxR; + } else { + MainR = (Dry * ((int32_t)RAcc >> 16) + 0x4000) >> 15; + AuxR = (Wet * ((int32_t)RAcc >> 16) + 0x4000) >> 15; + } + } + + o1 += ((i1 * MainR) + 0x4000) >> 15; + a1 += ((i1 * MainL) + 0x4000) >> 15; + + o1 = clamp_s16(o1); + a1 = clamp_s16(a1); + + out[ptr ^ S] = o1; + aux1[ptr ^ S] = a1; + if (AuxIncRate) { + a2 += ((i1 * AuxR) + 0x4000) >> 15; + a3 += ((i1 * AuxL) + 0x4000) >> 15; + + a2 = clamp_s16(a2); + a3 = clamp_s16(a3); + + aux2[ptr ^ S] = a2; + aux3[ptr ^ S] = a3; + } + ptr++; + } + } + + *(int16_t *)(hleMixerWorkArea + 0) = Wet; /* 0-1 */ + *(int16_t *)(hleMixerWorkArea + 2) = Dry; /* 2-3 */ + *(int32_t *)(hleMixerWorkArea + 4) = LTrg; /* 4-5 */ + *(int32_t *)(hleMixerWorkArea + 6) = RTrg; /* 6-7 */ + *(int32_t *)(hleMixerWorkArea + 8) = LRamp; /* 8-9 (hleMixerWorkArea is a 16bit pointer) */ + *(int32_t *)(hleMixerWorkArea + 10) = RRamp; /* 10-11 */ + *(int32_t *)(hleMixerWorkArea + 12) = LAdderEnd; /* 12-13 */ + *(int32_t *)(hleMixerWorkArea + 14) = RAdderEnd; /* 14-15 */ + *(int32_t *)(hleMixerWorkArea + 16) = LAdderStart; /* 12-13 */ + *(int32_t *)(hleMixerWorkArea + 18) = RAdderStart; /* 14-15 */ + memcpy(rsp.RDRAM + addy, (uint8_t *)hleMixerWorkArea, 80); +} + +static void RESAMPLE(uint32_t inst1, uint32_t inst2) +{ + unsigned char Flags = (uint8_t)((inst1 >> 16) & 0xff); + unsigned int Pitch = ((inst1 & 0xffff)) << 1; + uint32_t addy = (inst2 & 0xffffff); + unsigned int Accum = 0; + unsigned int location; + int16_t *lut; + short *dst = (short *)(BufferSpace); + int16_t *src = (int16_t *)(BufferSpace); + uint32_t srcPtr = (AudioInBuffer / 2); + uint32_t dstPtr = (AudioOutBuffer / 2); + int32_t temp; + int32_t accum; + int x, i; + srcPtr -= 4; + + if ((Flags & 0x1) == 0) { + for (x = 0; x < 4; x++) + src[(srcPtr + x)^S] = ((uint16_t *)rsp.RDRAM)[((addy / 2) + x)^S]; + Accum = *(uint16_t *)(rsp.RDRAM + addy + 10); + } else { + for (x = 0; x < 4; x++) + src[(srcPtr + x)^S] = 0; + } + + for (i = 0; i < ((AudioCount + 0xf) & 0xFFF0) / 2; i++) { + /* location is the fractional position between two samples */ + location = (Accum >> 0xa) * 4; + lut = (int16_t *)ResampleLUT + location; + + /* imul */ + temp = ((int32_t) * (int16_t *)(src + ((srcPtr + 0)^S)) * ((int32_t)((int16_t)lut[0]))); + accum = (int32_t)(temp >> 15); + + temp = ((int32_t) * (int16_t *)(src + ((srcPtr + 1)^S)) * ((int32_t)((int16_t)lut[1]))); + accum += (int32_t)(temp >> 15); + + temp = ((int32_t) * (int16_t *)(src + ((srcPtr + 2)^S)) * ((int32_t)((int16_t)lut[2]))); + accum += (int32_t)(temp >> 15); + + temp = ((int32_t) * (int16_t *)(src + ((srcPtr + 3)^S)) * ((int32_t)((int16_t)lut[3]))); + accum += (int32_t)(temp >> 15); + + accum = clamp_s16(accum); + + dst[dstPtr ^ S] = (accum); + dstPtr++; + Accum += Pitch; + srcPtr += (Accum >> 16); + Accum &= 0xffff; + } + for (x = 0; x < 4; x++) + ((uint16_t *)rsp.RDRAM)[((addy / 2) + x)^S] = src[(srcPtr + x)^S]; + *(uint16_t *)(rsp.RDRAM + addy + 10) = Accum; +} + +static void SETVOL(uint32_t inst1, uint32_t inst2) +{ + /* Might be better to unpack these depending on the flags... */ + uint8_t flags = (uint8_t)((inst1 >> 16) & 0xff); + uint16_t vol = (int16_t)(inst1 & 0xffff); + uint16_t volrate = (uint16_t)((inst2 & 0xffff)); + + if (flags & A_AUX) { + Env_Dry = (int16_t)vol; /* m_MainVol */ + Env_Wet = (int16_t)volrate; /* m_AuxVol */ + return; + } + + /* Set the Source(start) Volumes */ + if (flags & A_VOL) { + if (flags & A_LEFT) + Vol_Left = (int16_t)vol; + else + /* A_RIGHT */ + Vol_Right = (int16_t)vol; + return; + } + + /* 0x370 Loop Value (shared location) + * 0x370 Target Volume (Left) + */ + + /* Set the Ramping values Target, Ramp */ + if (flags & A_LEFT) { + VolTrg_Left = (int16_t)inst1; + VolRamp_Left = (int32_t)inst2; + } else { /* A_RIGHT */ + VolTrg_Right = (int16_t)inst1; + VolRamp_Right = (int32_t)inst2; + } +} + +static void UNKNOWN(uint32_t inst1, uint32_t inst2) {} + +static void SETLOOP(uint32_t inst1, uint32_t inst2) +{ + loopval = (inst2 & 0xffffff); +} + +/* TODO Work in progress! :) */ +static void ADPCM(uint32_t inst1, uint32_t inst2) +{ + unsigned char Flags = (uint8_t)(inst1 >> 16) & 0xff; + unsigned int Address = (inst2 & 0xffffff); + unsigned short inPtr = 0; + short *out = (short *)(BufferSpace + AudioOutBuffer); + short count = (short)AudioCount; + unsigned char icode; + unsigned char code; + int vscale; + unsigned short index; + unsigned short j; + int a[8]; + short *book1, *book2; + int l1; + int l2; + int inp1[8]; + int inp2[8]; + + memset(out, 0, 32); + + if (!(Flags & 0x1)) { + if (Flags & 0x2) + memcpy(out, &rsp.RDRAM[loopval & MEMMASK], 32); + else + memcpy(out, &rsp.RDRAM[Address], 32); + } + + l1 = out[14 ^ S]; + l2 = out[15 ^ S]; + out += 16; + while (count > 0) { + /* the first interation through, these values are + * either 0 in the case of A_INIT, from a special + * area of memory in the case of A_LOOP or just + * the values we calculated the last time + */ + + code = BufferSpace[(AudioInBuffer + inPtr)^S8]; + index = code & 0xf; + /* index into the adpcm code table */ + index <<= 4; + book1 = (short *)&adpcmtable[index]; + book2 = book1 + 8; + /* upper nibble is scale */ + code >>= 4; + /* very strange. 0x8000 would be .5 in 16:16 format + * so this appears to be a fractional scale based + * on the 12 based inverse of the scale value. note + * that this could be negative, in which case we do + * not use the calculated vscale value... see the + * if(code>12) check below + */ + vscale = (0x8000 >> ((12 - code) - 1)); + + /* coded adpcm data lies next */ + inPtr++; + j = 0; + /* loop of 8, for 8 coded nibbles from 4 bytes + * which yields 8 short pcm values + */ + while (j < 8) { + icode = BufferSpace[(AudioInBuffer + inPtr)^S8]; + inPtr++; + + /* this will in effect be signed */ + inp1[j] = (int16_t)((icode & 0xf0) << 8); + if (code < 12) + inp1[j] = ((int)((int)inp1[j] * (int)vscale) >> 16); + j++; + + inp1[j] = (int16_t)((icode & 0xf) << 12); + if (code < 12) + inp1[j] = ((int)((int)inp1[j] * (int)vscale) >> 16); + j++; + } + j = 0; + while (j < 8) { + icode = BufferSpace[(AudioInBuffer + inPtr)^S8]; + inPtr++; + + /* this will in effect be signed */ + inp2[j] = (short)((icode & 0xf0) << 8); + if (code < 12) + inp2[j] = ((int)((int)inp2[j] * (int)vscale) >> 16); + j++; + + inp2[j] = (short)((icode & 0xf) << 12); + if (code < 12) + inp2[j] = ((int)((int)inp2[j] * (int)vscale) >> 16); + j++; + } + + a[0] = (int)book1[0] * (int)l1; + a[0] += (int)book2[0] * (int)l2; + a[0] += (int)inp1[0] * (int)2048; + + a[1] = (int)book1[1] * (int)l1; + a[1] += (int)book2[1] * (int)l2; + a[1] += (int)book2[0] * inp1[0]; + a[1] += (int)inp1[1] * (int)2048; + + a[2] = (int)book1[2] * (int)l1; + a[2] += (int)book2[2] * (int)l2; + a[2] += (int)book2[1] * inp1[0]; + a[2] += (int)book2[0] * inp1[1]; + a[2] += (int)inp1[2] * (int)2048; + + a[3] = (int)book1[3] * (int)l1; + a[3] += (int)book2[3] * (int)l2; + a[3] += (int)book2[2] * inp1[0]; + a[3] += (int)book2[1] * inp1[1]; + a[3] += (int)book2[0] * inp1[2]; + a[3] += (int)inp1[3] * (int)2048; + + a[4] = (int)book1[4] * (int)l1; + a[4] += (int)book2[4] * (int)l2; + a[4] += (int)book2[3] * inp1[0]; + a[4] += (int)book2[2] * inp1[1]; + a[4] += (int)book2[1] * inp1[2]; + a[4] += (int)book2[0] * inp1[3]; + a[4] += (int)inp1[4] * (int)2048; + + a[5] = (int)book1[5] * (int)l1; + a[5] += (int)book2[5] * (int)l2; + a[5] += (int)book2[4] * inp1[0]; + a[5] += (int)book2[3] * inp1[1]; + a[5] += (int)book2[2] * inp1[2]; + a[5] += (int)book2[1] * inp1[3]; + a[5] += (int)book2[0] * inp1[4]; + a[5] += (int)inp1[5] * (int)2048; + + a[6] = (int)book1[6] * (int)l1; + a[6] += (int)book2[6] * (int)l2; + a[6] += (int)book2[5] * inp1[0]; + a[6] += (int)book2[4] * inp1[1]; + a[6] += (int)book2[3] * inp1[2]; + a[6] += (int)book2[2] * inp1[3]; + a[6] += (int)book2[1] * inp1[4]; + a[6] += (int)book2[0] * inp1[5]; + a[6] += (int)inp1[6] * (int)2048; + + a[7] = (int)book1[7] * (int)l1; + a[7] += (int)book2[7] * (int)l2; + a[7] += (int)book2[6] * inp1[0]; + a[7] += (int)book2[5] * inp1[1]; + a[7] += (int)book2[4] * inp1[2]; + a[7] += (int)book2[3] * inp1[3]; + a[7] += (int)book2[2] * inp1[4]; + a[7] += (int)book2[1] * inp1[5]; + a[7] += (int)book2[0] * inp1[6]; + a[7] += (int)inp1[7] * (int)2048; + + for (j = 0; j < 8; j++) { + a[j ^ S] >>= 11; + a[j ^ S] = clamp_s16(a[j ^ S]); + *(out++) = a[j ^ S]; + } + l1 = a[6]; + l2 = a[7]; + + a[0] = (int)book1[0] * (int)l1; + a[0] += (int)book2[0] * (int)l2; + a[0] += (int)inp2[0] * (int)2048; + + a[1] = (int)book1[1] * (int)l1; + a[1] += (int)book2[1] * (int)l2; + a[1] += (int)book2[0] * inp2[0]; + a[1] += (int)inp2[1] * (int)2048; + + a[2] = (int)book1[2] * (int)l1; + a[2] += (int)book2[2] * (int)l2; + a[2] += (int)book2[1] * inp2[0]; + a[2] += (int)book2[0] * inp2[1]; + a[2] += (int)inp2[2] * (int)2048; + + a[3] = (int)book1[3] * (int)l1; + a[3] += (int)book2[3] * (int)l2; + a[3] += (int)book2[2] * inp2[0]; + a[3] += (int)book2[1] * inp2[1]; + a[3] += (int)book2[0] * inp2[2]; + a[3] += (int)inp2[3] * (int)2048; + + a[4] = (int)book1[4] * (int)l1; + a[4] += (int)book2[4] * (int)l2; + a[4] += (int)book2[3] * inp2[0]; + a[4] += (int)book2[2] * inp2[1]; + a[4] += (int)book2[1] * inp2[2]; + a[4] += (int)book2[0] * inp2[3]; + a[4] += (int)inp2[4] * (int)2048; + + a[5] = (int)book1[5] * (int)l1; + a[5] += (int)book2[5] * (int)l2; + a[5] += (int)book2[4] * inp2[0]; + a[5] += (int)book2[3] * inp2[1]; + a[5] += (int)book2[2] * inp2[2]; + a[5] += (int)book2[1] * inp2[3]; + a[5] += (int)book2[0] * inp2[4]; + a[5] += (int)inp2[5] * (int)2048; + + a[6] = (int)book1[6] * (int)l1; + a[6] += (int)book2[6] * (int)l2; + a[6] += (int)book2[5] * inp2[0]; + a[6] += (int)book2[4] * inp2[1]; + a[6] += (int)book2[3] * inp2[2]; + a[6] += (int)book2[2] * inp2[3]; + a[6] += (int)book2[1] * inp2[4]; + a[6] += (int)book2[0] * inp2[5]; + a[6] += (int)inp2[6] * (int)2048; + + a[7] = (int)book1[7] * (int)l1; + a[7] += (int)book2[7] * (int)l2; + a[7] += (int)book2[6] * inp2[0]; + a[7] += (int)book2[5] * inp2[1]; + a[7] += (int)book2[4] * inp2[2]; + a[7] += (int)book2[3] * inp2[3]; + a[7] += (int)book2[2] * inp2[4]; + a[7] += (int)book2[1] * inp2[5]; + a[7] += (int)book2[0] * inp2[6]; + a[7] += (int)inp2[7] * (int)2048; + + for (j = 0; j < 8; j++) { + a[j ^ S] >>= 11; + a[j ^ S] = clamp_s16(a[j ^ S]); + *(out++) = a[j ^ S]; + } + l1 = a[6]; + l2 = a[7]; + + count -= 32; + } + out -= 16; + memcpy(&rsp.RDRAM[Address], out, 32); +} + +/* TODO memcpy causes static... endianess issue :( */ +static void LOADBUFF(uint32_t inst1, uint32_t inst2) +{ + uint32_t v0; + if (AudioCount == 0) + return; + v0 = (inst2 & 0xfffffc); + memcpy(BufferSpace + (AudioInBuffer & 0xFFFC), rsp.RDRAM + v0, (AudioCount + 3) & 0xFFFC); +} + +/* TODO memcpy causes static... endianess issue :( */ +static void SAVEBUFF(uint32_t inst1, uint32_t inst2) +{ + uint32_t v0; + if (AudioCount == 0) + return; + v0 = (inst2 & 0xfffffc); + memcpy(rsp.RDRAM + v0, BufferSpace + (AudioOutBuffer & 0xFFFC), (AudioCount + 3) & 0xFFFC); +} + +/* NOTE Should work ;-) */ +static void SETBUFF(uint32_t inst1, uint32_t inst2) +{ + if ((inst1 >> 0x10) & 0x8) { + /* A_AUX - Auxillary Sound Buffer Settings */ + AudioAuxA = (uint16_t)(inst1); + AudioAuxC = (uint16_t)((inst2 >> 0x10)); + AudioAuxE = (uint16_t)(inst2); + } else { + /* A_MAIN - Main Sound Buffer Settings */ + AudioInBuffer = (uint16_t)(inst1); /* 0x00 */ + AudioOutBuffer = (uint16_t)((inst2 >> 0x10)); /* 0x02 */ + AudioCount = (uint16_t)(inst2); /* 0x04 */ + } +} + +/* TODO Doesn't sound just right?... will fix when HLE is ready - 03-11-01 */ +static void DMEMMOVE(uint32_t inst1, uint32_t inst2) +{ + uint32_t cnt; + uint32_t v0 = (inst1 & 0xFFFF); + uint32_t v1 = (inst2 >> 0x10); + uint32_t count = ((inst2 + 3) & 0xfffc); + + if ((inst2 & 0xffff) == 0) + return; + + for (cnt = 0; cnt < count; cnt++) + *(uint8_t *)(BufferSpace + ((cnt + v1)^S8)) = *(uint8_t *)(BufferSpace + ((cnt + v0)^S8)); +} + +/* NOTE Loads an ADPCM table - Works 100% Now 03-13-01 */ +static void LOADADPCM(uint32_t inst1, uint32_t inst2) +{ + uint32_t v0 = (inst2 & 0xffffff); + uint32_t x; + + uint16_t *table = (uint16_t *)(rsp.RDRAM + v0); + for (x = 0; x < ((inst1 & 0xffff) >> 0x4); x++) { + adpcmtable[(0x0 + (x << 3))^S] = table[0]; + adpcmtable[(0x1 + (x << 3))^S] = table[1]; + + adpcmtable[(0x2 + (x << 3))^S] = table[2]; + adpcmtable[(0x3 + (x << 3))^S] = table[3]; + + adpcmtable[(0x4 + (x << 3))^S] = table[4]; + adpcmtable[(0x5 + (x << 3))^S] = table[5]; + + adpcmtable[(0x6 + (x << 3))^S] = table[6]; + adpcmtable[(0x7 + (x << 3))^S] = table[7]; + table += 8; + } +} + + +/* NOTE Works... - 3-11-01 */ +static void INTERLEAVE(uint32_t inst1, uint32_t inst2) +{ + uint32_t inL, inR; + uint16_t *outbuff = (uint16_t *)(AudioOutBuffer + BufferSpace); + uint16_t *inSrcR; + uint16_t *inSrcL; + uint16_t Left, Right, Left2, Right2; + int x; + + inL = inst2 & 0xFFFF; + inR = (inst2 >> 16) & 0xFFFF; + + inSrcR = (uint16_t *)(BufferSpace + inR); + inSrcL = (uint16_t *)(BufferSpace + inL); + + for (x = 0; x < (AudioCount / 4); x++) { + Left = *(inSrcL++); + Right = *(inSrcR++); + Left2 = *(inSrcL++); + Right2 = *(inSrcR++); + +#ifdef M64P_BIG_ENDIAN + *(outbuff++) = Right; + *(outbuff++) = Left; + *(outbuff++) = Right2; + *(outbuff++) = Left2; +#else + *(outbuff++) = Right2; + *(outbuff++) = Left2; + *(outbuff++) = Right; + *(outbuff++) = Left; +#endif + } +} + +/* NOTE Fixed a sign issue... 03-14-01 */ +static void MIXER(uint32_t inst1, uint32_t inst2) +{ + uint32_t dmemin = (uint16_t)(inst2 >> 0x10); + uint32_t dmemout = (uint16_t)(inst2 & 0xFFFF); + int32_t gain = (int16_t)(inst1 & 0xFFFF); + int32_t temp; + int x; + + if (AudioCount == 0) + return; + + for (x = 0; x < AudioCount; x += 2) { /* I think I can do this a lot easier */ + temp = (*(int16_t *)(BufferSpace + dmemin + x) * gain) >> 15; + temp += *(int16_t *)(BufferSpace + dmemout + x); + + temp = clamp_s16((int32_t)temp); + + *(uint16_t *)(BufferSpace + dmemout + x) = (uint16_t)(temp & 0xFFFF); + } +} + +/* TOP Performance Hogs: + * Command: ADPCM - Calls: 48 - Total Time: 331226 - Avg Time: 6900.54 - Percent: 31.53% + * Command: ENVMIXER - Calls: 48 - Total Time: 408563 - Avg Time: 8511.73 - Percent: 38.90% + * Command: LOADBUFF - Calls: 56 - Total Time: 21551 - Avg Time: 384.84 - Percent: 2.05% + * Command: RESAMPLE - Calls: 48 - Total Time: 225922 - Avg Time: 4706.71 - Percent: 21.51% + * + * Command: ADPCM - Calls: 48 - Total Time: 391600 - Avg Time: 8158.33 - Percent: 32.52% + * Command: ENVMIXER - Calls: 48 - Total Time: 444091 - Avg Time: 9251.90 - Percent: 36.88% + * Command: LOADBUFF - Calls: 58 - Total Time: 29945 - Avg Time: 516.29 - Percent: 2.49% + * Command: RESAMPLE - Calls: 48 - Total Time: 276354 - Avg Time: 5757.38 - Percent: 22.95% + */ + +/* NOTE TOP Performace Hogs: MIXER, RESAMPLE, ENVMIXER */ +const acmd_callback_t ABI1[0x10] = { + SPNOOP , ADPCM , CLEARBUFF, ENVMIXER , LOADBUFF, RESAMPLE , SAVEBUFF, UNKNOWN, + SETBUFF, SETVOL, DMEMMOVE , LOADADPCM , MIXER , INTERLEAVE, UNKNOWN , SETLOOP +}; diff --git a/source/mupen64plus-rsp-hle/src/ucode2.c b/source/mupen64plus-rsp-hle/src/ucode2.c new file mode 100755 index 0000000..0299fb8 --- /dev/null +++ b/source/mupen64plus-rsp-hle/src/ucode2.c @@ -0,0 +1,882 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * Mupen64plus-rsp-hle - ucode2.c * + * Mupen64Plus homepage: http://code.google.com/p/mupen64plus/ * + * Copyright (C) 2009 Richard Goedeken * + * Copyright (C) 2002 Hacktarux * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +#include +#include +#include + +#include "m64p_plugin.h" +#include "m64p_types.h" +#include "hle.h" +#include "alist_internal.h" +#include "alist.h" + +static void SPNOOP(uint32_t inst1, uint32_t inst2) +{ + DebugMessage(M64MSG_ERROR, "Unknown/Unimplemented Audio Command %i in ABI 2", (int)(inst1 >> 24)); +} + + +static bool isMKABI = false; +static bool isZeldaABI = false; + +void init_ucode2(void) +{ + isMKABI = isZeldaABI = false; +} + +/* Loads an ADPCM table + * NOTE Works 100% Now 03-13-01 + */ +static void LOADADPCM2(uint32_t inst1, uint32_t inst2) +{ + uint32_t v0 = (inst2 & 0xffffff); + uint32_t x; + /* Zelda2 Specific... */ + uint16_t *table = (uint16_t *)(rsp.RDRAM + v0); + + for (x = 0; x < ((inst1 & 0xffff) >> 0x4); x++) { + adpcmtable[(0x0 + (x << 3))^S] = table[0]; + adpcmtable[(0x1 + (x << 3))^S] = table[1]; + + adpcmtable[(0x2 + (x << 3))^S] = table[2]; + adpcmtable[(0x3 + (x << 3))^S] = table[3]; + + adpcmtable[(0x4 + (x << 3))^S] = table[4]; + adpcmtable[(0x5 + (x << 3))^S] = table[5]; + + adpcmtable[(0x6 + (x << 3))^S] = table[6]; + adpcmtable[(0x7 + (x << 3))^S] = table[7]; + table += 8; + } +} + +static void SETLOOP2(uint32_t inst1, uint32_t inst2) +{ + loopval = inst2 & 0xffffff; /* No segment? */ +} + +static void SETBUFF2(uint32_t inst1, uint32_t inst2) +{ + AudioInBuffer = (uint16_t)(inst1); /* 0x00 */ + AudioOutBuffer = (uint16_t)((inst2 >> 0x10)); /* 0x02 */ + AudioCount = (uint16_t)(inst2); /* 0x04 */ +} + +/* NOTE Verified to be 100% Accurate... */ +static void ADPCM2(uint32_t inst1, uint32_t inst2) +{ + unsigned char Flags = (uint8_t)(inst1 >> 16) & 0xff; + unsigned int Address = (inst2 & 0xffffff); + unsigned short inPtr = 0; + short *out = (short *)(BufferSpace + AudioOutBuffer); + short count = (short)AudioCount; + unsigned char icode; + unsigned char code; + int vscale; + unsigned short index; + unsigned short j; + int a[8]; + short *book1, *book2; + + uint8_t srange; + uint8_t mask1; + uint8_t mask2; + uint8_t shifter; + + int l1; + int l2; + int inp1[8]; + int inp2[8]; + + memset(out, 0, 32); + + /* Tricky lil Zelda MM and ABI2!!! hahaha I know your secrets! :DDD */ + if (Flags & 0x4) { + srange = 0xE; + mask1 = 0xC0; + mask2 = 0x30; + shifter = 10; + } else { + srange = 0xC; + mask1 = 0xf0; + mask2 = 0x0f; + shifter = 12; + } + + if (!(Flags & 0x1)) { + if (Flags & 0x2) + memcpy(out, &rsp.RDRAM[loopval], 32); + else + memcpy(out, &rsp.RDRAM[Address], 32); + } + + l1 = out[14 ^ S]; + l2 = out[15 ^ S]; + out += 16; + while (count > 0) { + code = BufferSpace[(AudioInBuffer + inPtr)^S8]; + index = code & 0xf; + index <<= 4; + book1 = (short *)&adpcmtable[index]; + book2 = book1 + 8; + code >>= 4; + vscale = (0x8000 >> ((srange - code) - 1)); + + inPtr++; + j = 0; + + while (j < 8) { + icode = BufferSpace[(AudioInBuffer + inPtr)^S8]; + inPtr++; + + /* this will in effect be signed */ + inp1[j] = (int16_t)((icode & mask1) << 8); + if (code < srange) + inp1[j] = ((int)((int)inp1[j] * (int)vscale) >> 16); + j++; + + inp1[j] = (int16_t)((icode & mask2) << shifter); + if (code < srange) + inp1[j] = ((int)((int)inp1[j] * (int)vscale) >> 16); + j++; + + if (Flags & 4) { + /* this will in effect be signed */ + inp1[j] = (int16_t)((icode & 0xC) << 12); + if (code < 0xE) + inp1[j] = ((int)((int)inp1[j] * (int)vscale) >> 16); + j++; + + inp1[j] = (int16_t)((icode & 0x3) << 14); + if (code < 0xE) + inp1[j] = ((int)((int)inp1[j] * (int)vscale) >> 16); + j++; + } + } + + + + j = 0; + while (j < 8) { + icode = BufferSpace[(AudioInBuffer + inPtr)^S8]; + inPtr++; + + inp2[j] = (int16_t)((icode & mask1) << 8); + if (code < srange) + inp2[j] = ((int)((int)inp2[j] * (int)vscale) >> 16); + j++; + + inp2[j] = (int16_t)((icode & mask2) << shifter); + if (code < srange) + inp2[j] = ((int)((int)inp2[j] * (int)vscale) >> 16); + j++; + + if (Flags & 4) { + inp2[j] = (int16_t)((icode & 0xC) << 12); + if (code < 0xE) + inp2[j] = ((int)((int)inp2[j] * (int)vscale) >> 16); + j++; + + inp2[j] = (int16_t)((icode & 0x3) << 14); + if (code < 0xE) + inp2[j] = ((int)((int)inp2[j] * (int)vscale) >> 16); + j++; + } + } + + a[0] = (int)book1[0] * (int)l1; + a[0] += (int)book2[0] * (int)l2; + a[0] += (int)inp1[0] * (int)2048; + + a[1] = (int)book1[1] * (int)l1; + a[1] += (int)book2[1] * (int)l2; + a[1] += (int)book2[0] * inp1[0]; + a[1] += (int)inp1[1] * (int)2048; + + a[2] = (int)book1[2] * (int)l1; + a[2] += (int)book2[2] * (int)l2; + a[2] += (int)book2[1] * inp1[0]; + a[2] += (int)book2[0] * inp1[1]; + a[2] += (int)inp1[2] * (int)2048; + + a[3] = (int)book1[3] * (int)l1; + a[3] += (int)book2[3] * (int)l2; + a[3] += (int)book2[2] * inp1[0]; + a[3] += (int)book2[1] * inp1[1]; + a[3] += (int)book2[0] * inp1[2]; + a[3] += (int)inp1[3] * (int)2048; + + a[4] = (int)book1[4] * (int)l1; + a[4] += (int)book2[4] * (int)l2; + a[4] += (int)book2[3] * inp1[0]; + a[4] += (int)book2[2] * inp1[1]; + a[4] += (int)book2[1] * inp1[2]; + a[4] += (int)book2[0] * inp1[3]; + a[4] += (int)inp1[4] * (int)2048; + + a[5] = (int)book1[5] * (int)l1; + a[5] += (int)book2[5] * (int)l2; + a[5] += (int)book2[4] * inp1[0]; + a[5] += (int)book2[3] * inp1[1]; + a[5] += (int)book2[2] * inp1[2]; + a[5] += (int)book2[1] * inp1[3]; + a[5] += (int)book2[0] * inp1[4]; + a[5] += (int)inp1[5] * (int)2048; + + a[6] = (int)book1[6] * (int)l1; + a[6] += (int)book2[6] * (int)l2; + a[6] += (int)book2[5] * inp1[0]; + a[6] += (int)book2[4] * inp1[1]; + a[6] += (int)book2[3] * inp1[2]; + a[6] += (int)book2[2] * inp1[3]; + a[6] += (int)book2[1] * inp1[4]; + a[6] += (int)book2[0] * inp1[5]; + a[6] += (int)inp1[6] * (int)2048; + + a[7] = (int)book1[7] * (int)l1; + a[7] += (int)book2[7] * (int)l2; + a[7] += (int)book2[6] * inp1[0]; + a[7] += (int)book2[5] * inp1[1]; + a[7] += (int)book2[4] * inp1[2]; + a[7] += (int)book2[3] * inp1[3]; + a[7] += (int)book2[2] * inp1[4]; + a[7] += (int)book2[1] * inp1[5]; + a[7] += (int)book2[0] * inp1[6]; + a[7] += (int)inp1[7] * (int)2048; + + for (j = 0; j < 8; j++) { + a[j ^ S] >>= 11; + a[j ^ S] = clamp_s16(a[j ^ S]); + *(out++) = a[j ^ S]; + } + l1 = a[6]; + l2 = a[7]; + + a[0] = (int)book1[0] * (int)l1; + a[0] += (int)book2[0] * (int)l2; + a[0] += (int)inp2[0] * (int)2048; + + a[1] = (int)book1[1] * (int)l1; + a[1] += (int)book2[1] * (int)l2; + a[1] += (int)book2[0] * inp2[0]; + a[1] += (int)inp2[1] * (int)2048; + + a[2] = (int)book1[2] * (int)l1; + a[2] += (int)book2[2] * (int)l2; + a[2] += (int)book2[1] * inp2[0]; + a[2] += (int)book2[0] * inp2[1]; + a[2] += (int)inp2[2] * (int)2048; + + a[3] = (int)book1[3] * (int)l1; + a[3] += (int)book2[3] * (int)l2; + a[3] += (int)book2[2] * inp2[0]; + a[3] += (int)book2[1] * inp2[1]; + a[3] += (int)book2[0] * inp2[2]; + a[3] += (int)inp2[3] * (int)2048; + + a[4] = (int)book1[4] * (int)l1; + a[4] += (int)book2[4] * (int)l2; + a[4] += (int)book2[3] * inp2[0]; + a[4] += (int)book2[2] * inp2[1]; + a[4] += (int)book2[1] * inp2[2]; + a[4] += (int)book2[0] * inp2[3]; + a[4] += (int)inp2[4] * (int)2048; + + a[5] = (int)book1[5] * (int)l1; + a[5] += (int)book2[5] * (int)l2; + a[5] += (int)book2[4] * inp2[0]; + a[5] += (int)book2[3] * inp2[1]; + a[5] += (int)book2[2] * inp2[2]; + a[5] += (int)book2[1] * inp2[3]; + a[5] += (int)book2[0] * inp2[4]; + a[5] += (int)inp2[5] * (int)2048; + + a[6] = (int)book1[6] * (int)l1; + a[6] += (int)book2[6] * (int)l2; + a[6] += (int)book2[5] * inp2[0]; + a[6] += (int)book2[4] * inp2[1]; + a[6] += (int)book2[3] * inp2[2]; + a[6] += (int)book2[2] * inp2[3]; + a[6] += (int)book2[1] * inp2[4]; + a[6] += (int)book2[0] * inp2[5]; + a[6] += (int)inp2[6] * (int)2048; + + a[7] = (int)book1[7] * (int)l1; + a[7] += (int)book2[7] * (int)l2; + a[7] += (int)book2[6] * inp2[0]; + a[7] += (int)book2[5] * inp2[1]; + a[7] += (int)book2[4] * inp2[2]; + a[7] += (int)book2[3] * inp2[3]; + a[7] += (int)book2[2] * inp2[4]; + a[7] += (int)book2[1] * inp2[5]; + a[7] += (int)book2[0] * inp2[6]; + a[7] += (int)inp2[7] * (int)2048; + + for (j = 0; j < 8; j++) { + a[j ^ S] >>= 11; + a[j ^ S] = clamp_s16(a[j ^ S]); + *(out++) = a[j ^ S]; + } + l1 = a[6]; + l2 = a[7]; + + count -= 32; + } + out -= 16; + memcpy(&rsp.RDRAM[Address], out, 32); +} + +static void CLEARBUFF2(uint32_t inst1, uint32_t inst2) +{ + uint16_t addr = (uint16_t)(inst1 & 0xffff); + uint16_t count = (uint16_t)(inst2 & 0xffff); + if (count > 0) + memset(BufferSpace + addr, 0, count); +} + +/* TODO Needs accuracy verification... */ +static void LOADBUFF2(uint32_t inst1, uint32_t inst2) +{ + uint32_t v0; + uint32_t cnt = (((inst1 >> 0xC) + 3) & 0xFFC); + v0 = (inst2 & 0xfffffc); + memcpy(BufferSpace + (inst1 & 0xfffc), rsp.RDRAM + v0, (cnt + 3) & 0xFFFC); +} + +/* TODO Needs accuracy verification... */ +static void SAVEBUFF2(uint32_t inst1, uint32_t inst2) +{ + uint32_t v0; + uint32_t cnt = (((inst1 >> 0xC) + 3) & 0xFFC); + v0 = (inst2 & 0xfffffc); + memcpy(rsp.RDRAM + v0, BufferSpace + (inst1 & 0xfffc), (cnt + 3) & 0xFFFC); +} + +/* TODO Needs accuracy verification... */ +static void MIXER2(uint32_t inst1, uint32_t inst2) +{ + uint16_t dmemin = (uint16_t)(inst2 >> 0x10); + uint16_t dmemout = (uint16_t)(inst2 & 0xFFFF); + uint32_t count = ((inst1 >> 12) & 0xFF0); + int32_t gain = (int16_t)(inst1 & 0xFFFF); + int32_t temp; + unsigned int x; + + for (x = 0; x < count; x += 2) { + /* TODO I think I can do this a lot easier */ + temp = (*(int16_t *)(BufferSpace + dmemin + x) * gain) >> 15; + temp += *(int16_t *)(BufferSpace + dmemout + x); + + temp = clamp_s16((int32_t)temp); + + *(uint16_t *)(BufferSpace + dmemout + x) = (uint16_t)(temp & 0xFFFF); + } +} + + +static void RESAMPLE2(uint32_t inst1, uint32_t inst2) +{ + unsigned char Flags = (uint8_t)((inst1 >> 16) & 0xff); + unsigned int Pitch = ((inst1 & 0xffff)) << 1; + uint32_t addy = (inst2 & 0xffffff); + unsigned int Accum = 0; + unsigned int location; + int16_t *lut; + short *dst; + int16_t *src; + uint32_t srcPtr = (AudioInBuffer / 2); + uint32_t dstPtr = (AudioOutBuffer / 2); + int32_t temp; + int32_t accum; + int x, i; + + dst = (short *)(BufferSpace); + src = (int16_t *)(BufferSpace); + + if (addy > (1024 * 1024 * 8)) + addy = (inst2 & 0xffffff); + + srcPtr -= 4; + + if ((Flags & 0x1) == 0) { + for (x = 0; x < 4; x++) + src[(srcPtr + x)^S] = ((uint16_t *)rsp.RDRAM)[((addy / 2) + x)^S]; + Accum = *(uint16_t *)(rsp.RDRAM + addy + 10); + } else { + for (x = 0; x < 4; x++) + src[(srcPtr + x)^S] = 0; + } + + for (i = 0; i < ((AudioCount + 0xf) & 0xFFF0) / 2; i++) { + location = (((Accum * 0x40) >> 0x10) * 8); + lut = (int16_t *)(((uint8_t *)ResampleLUT) + location); + + temp = ((int32_t) * (int16_t *)(src + ((srcPtr + 0)^S)) * ((int32_t)((int16_t)lut[0]))); + accum = (int32_t)(temp >> 15); + + temp = ((int32_t) * (int16_t *)(src + ((srcPtr + 1)^S)) * ((int32_t)((int16_t)lut[1]))); + accum += (int32_t)(temp >> 15); + + temp = ((int32_t) * (int16_t *)(src + ((srcPtr + 2)^S)) * ((int32_t)((int16_t)lut[2]))); + accum += (int32_t)(temp >> 15); + + temp = ((int32_t) * (int16_t *)(src + ((srcPtr + 3)^S)) * ((int32_t)((int16_t)lut[3]))); + accum += (int32_t)(temp >> 15); + + accum = clamp_s16(accum); + + dst[dstPtr ^ S] = (int16_t)(accum); + dstPtr++; + Accum += Pitch; + srcPtr += (Accum >> 16); + Accum &= 0xffff; + } + for (x = 0; x < 4; x++) + ((uint16_t *)rsp.RDRAM)[((addy / 2) + x)^S] = src[(srcPtr + x)^S]; + *(uint16_t *)(rsp.RDRAM + addy + 10) = (uint16_t)Accum; +} + +/* TODO Needs accuracy verification... */ +static void DMEMMOVE2(uint32_t inst1, uint32_t inst2) +{ + uint32_t cnt; + uint32_t v0 = (inst1 & 0xFFFF); + uint32_t v1 = (inst2 >> 0x10); + uint32_t count = ((inst2 + 3) & 0xfffc); + + if ((inst2 & 0xffff) == 0) + return; + + for (cnt = 0; cnt < count; cnt++) + *(uint8_t *)(BufferSpace + ((cnt + v1)^S8)) = *(uint8_t *)(BufferSpace + ((cnt + v0)^S8)); +} + +static uint32_t t3, s5, s6; +static uint16_t env[8]; + +static void ENVSETUP1(uint32_t inst1, uint32_t inst2) +{ + uint32_t tmp; + + t3 = inst1 & 0xFFFF; + tmp = (inst1 >> 0x8) & 0xFF00; + env[4] = (uint16_t)tmp; + tmp += t3; + env[5] = (uint16_t)tmp; + s5 = inst2 >> 0x10; + s6 = inst2 & 0xFFFF; +} + +static void ENVSETUP2(uint32_t inst1, uint32_t inst2) +{ + uint32_t tmp; + + tmp = (inst2 >> 0x10); + env[0] = (uint16_t)tmp; + tmp += s5; + env[1] = (uint16_t)tmp; + tmp = inst2 & 0xffff; + env[2] = (uint16_t)tmp; + tmp += s6; + env[3] = (uint16_t)tmp; +} + +static void ENVMIXER2(uint32_t inst1, uint32_t inst2) +{ + int16_t *bufft6, *bufft7, *buffs0, *buffs1; + int16_t *buffs3; + int32_t count; + uint32_t adder; + + int16_t vec9, vec10; + + int16_t v2[8]; + + buffs3 = (int16_t *)(BufferSpace + ((inst1 >> 0x0c) & 0x0ff0)); + bufft6 = (int16_t *)(BufferSpace + ((inst2 >> 0x14) & 0x0ff0)); + bufft7 = (int16_t *)(BufferSpace + ((inst2 >> 0x0c) & 0x0ff0)); + buffs0 = (int16_t *)(BufferSpace + ((inst2 >> 0x04) & 0x0ff0)); + buffs1 = (int16_t *)(BufferSpace + ((inst2 << 0x04) & 0x0ff0)); + + + v2[0] = 0 - (int16_t)((inst1 & 0x2) >> 1); + v2[1] = 0 - (int16_t)((inst1 & 0x1)); + v2[2] = 0 - (int16_t)((inst1 & 0x8) >> 1); + v2[3] = 0 - (int16_t)((inst1 & 0x4) >> 1); + + count = (inst1 >> 8) & 0xff; + + if (!isMKABI) { + s5 *= 2; + s6 *= 2; + t3 *= 2; + adder = 0x10; + } else { + inst1 = 0; + adder = 0x8; + t3 = 0; + } + + + while (count > 0) { + int temp, x; + for (x = 0; x < 0x8; x++) { + vec9 = (int16_t)(((int32_t)buffs3[x ^ S] * (uint32_t)env[0]) >> 0x10) ^ v2[0]; + vec10 = (int16_t)(((int32_t)buffs3[x ^ S] * (uint32_t)env[2]) >> 0x10) ^ v2[1]; + temp = bufft6[x ^ S] + vec9; + temp = clamp_s16(temp); + bufft6[x ^ S] = temp; + temp = bufft7[x ^ S] + vec10; + temp = clamp_s16(temp); + bufft7[x ^ S] = temp; + vec9 = (int16_t)(((int32_t)vec9 * (uint32_t)env[4]) >> 0x10) ^ v2[2]; + vec10 = (int16_t)(((int32_t)vec10 * (uint32_t)env[4]) >> 0x10) ^ v2[3]; + if (inst1 & 0x10) { + temp = buffs0[x ^ S] + vec10; + temp = clamp_s16(temp); + buffs0[x ^ S] = temp; + temp = buffs1[x ^ S] + vec9; + temp = clamp_s16(temp); + buffs1[x ^ S] = temp; + } else { + temp = buffs0[x ^ S] + vec9; + temp = clamp_s16(temp); + buffs0[x ^ S] = temp; + temp = buffs1[x ^ S] + vec10; + temp = clamp_s16(temp); + buffs1[x ^ S] = temp; + } + } + + if (!isMKABI) + for (x = 0x8; x < 0x10; x++) { + vec9 = (int16_t)(((int32_t)buffs3[x ^ S] * (uint32_t)env[1]) >> 0x10) ^ v2[0]; + vec10 = (int16_t)(((int32_t)buffs3[x ^ S] * (uint32_t)env[3]) >> 0x10) ^ v2[1]; + temp = bufft6[x ^ S] + vec9; + temp = clamp_s16(temp); + bufft6[x ^ S] = temp; + temp = bufft7[x ^ S] + vec10; + temp = clamp_s16(temp); + bufft7[x ^ S] = temp; + vec9 = (int16_t)(((int32_t)vec9 * (uint32_t)env[5]) >> 0x10) ^ v2[2]; + vec10 = (int16_t)(((int32_t)vec10 * (uint32_t)env[5]) >> 0x10) ^ v2[3]; + if (inst1 & 0x10) { + temp = buffs0[x ^ S] + vec10; + temp = clamp_s16(temp); + buffs0[x ^ S] = temp; + temp = buffs1[x ^ S] + vec9; + temp = clamp_s16(temp); + buffs1[x ^ S] = temp; + } else { + temp = buffs0[x ^ S] + vec9; + temp = clamp_s16(temp); + buffs0[x ^ S] = temp; + temp = buffs1[x ^ S] + vec10; + temp = clamp_s16(temp); + buffs1[x ^ S] = temp; + } + } + bufft6 += adder; + bufft7 += adder; + buffs0 += adder; + buffs1 += adder; + buffs3 += adder; + count -= adder; + env[0] += (uint16_t)s5; + env[1] += (uint16_t)s5; + env[2] += (uint16_t)s6; + env[3] += (uint16_t)s6; + env[4] += (uint16_t)t3; + env[5] += (uint16_t)t3; + } +} + +static void DUPLICATE2(uint32_t inst1, uint32_t inst2) +{ + unsigned short Count = (inst1 >> 16) & 0xff; + unsigned short In = inst1 & 0xffff; + unsigned short Out = (inst2 >> 16); + + unsigned short buff[64]; + + memcpy(buff, BufferSpace + In, 128); + + while (Count) { + memcpy(BufferSpace + Out, buff, 128); + Out += 128; + Count--; + } +} + +static void INTERL2(uint32_t inst1, uint32_t inst2) +{ + short Count = inst1 & 0xffff; + unsigned short Out = inst2 & 0xffff; + unsigned short In = (inst2 >> 16); + + unsigned char *src, *dst; + src = (unsigned char *)(BufferSpace); /* [In]; */ + dst = (unsigned char *)(BufferSpace); /* [Out]; */ + while (Count) { + *(short *)(dst + (Out ^ S8)) = *(short *)(src + (In ^ S8)); + Out += 2; + In += 4; + Count--; + } +} + +/* TODO Needs accuracy verification... */ +static void INTERLEAVE2(uint32_t inst1, uint32_t inst2) +{ + uint32_t inL, inR; + uint16_t *outbuff; + uint16_t *inSrcR; + uint16_t *inSrcL; + uint16_t Left, Right, Left2, Right2; + uint32_t count; + uint32_t x; + + count = ((inst1 >> 12) & 0xFF0); + if (count == 0) { + outbuff = (uint16_t *)(AudioOutBuffer + BufferSpace); + count = AudioCount; + } else + outbuff = (uint16_t *)((inst1 & 0xFFFF) + BufferSpace); + + inR = inst2 & 0xFFFF; + inL = (inst2 >> 16) & 0xFFFF; + + inSrcR = (uint16_t *)(BufferSpace + inR); + inSrcL = (uint16_t *)(BufferSpace + inL); + + for (x = 0; x < (count / 4); x++) { + Left = *(inSrcL++); + Right = *(inSrcR++); + Left2 = *(inSrcL++); + Right2 = *(inSrcR++); + +#ifdef M64P_BIG_ENDIAN + *(outbuff++) = Right; + *(outbuff++) = Left; + *(outbuff++) = Right2; + *(outbuff++) = Left2; +#else + *(outbuff++) = Right2; + *(outbuff++) = Left2; + *(outbuff++) = Right; + *(outbuff++) = Left; +#endif + } +} + +static void ADDMIXER(uint32_t inst1, uint32_t inst2) +{ + short Count = (inst1 >> 12) & 0x00ff0; + uint16_t InBuffer = (inst2 >> 16); + uint16_t OutBuffer = inst2 & 0xffff; + int cntr; + + int16_t *inp, *outp; + int32_t temp; + inp = (int16_t *)(BufferSpace + InBuffer); + outp = (int16_t *)(BufferSpace + OutBuffer); + for (cntr = 0; cntr < Count; cntr += 2) { + temp = *outp + *inp; + temp = clamp_s16(temp); + *(outp++) = temp; + inp++; + } +} + +static void HILOGAIN(uint32_t inst1, uint32_t inst2) +{ + uint16_t cnt = inst1 & 0xffff; + uint16_t out = (inst2 >> 16) & 0xffff; + int16_t hi = (int16_t)((inst1 >> 4) & 0xf000); + uint16_t lo = (inst1 >> 20) & 0xf; + int16_t *src = (int16_t *)(BufferSpace + out); + int32_t tmp, val; + + while (cnt) { + val = (int32_t) * src; + tmp = ((val * (int32_t)hi) >> 16) + (uint32_t)(val * lo); + tmp = clamp_s16(tmp); + *src = tmp; + src++; + cnt -= 2; + } +} + +static void FILTER2(uint32_t inst1, uint32_t inst2) +{ + static int cnt = 0; + static int16_t *lutt6; + static int16_t *lutt5; + uint8_t *save = (rsp.RDRAM + (inst2 & 0xFFFFFF)); + uint8_t t4 = (uint8_t)((inst1 >> 0x10) & 0xFF); + int x; + short *inp1, *inp2; + int32_t out1[8]; + int16_t outbuff[0x3c0], *outp; + uint32_t inPtr; + + if (t4 > 1) { + /* Then set the cnt variable */ + cnt = (inst1 & 0xFFFF); + lutt6 = (int16_t *)save; + return; + } + + if (t4 == 0) + lutt5 = (short *)(save + 0x10); + + lutt5 = (short *)(save + 0x10); + + for (x = 0; x < 8; x++) { + int32_t a; + a = (lutt5[x] + lutt6[x]) >> 1; + lutt5[x] = lutt6[x] = (short)a; + } + inPtr = (uint32_t)(inst1 & 0xffff); + inp1 = (short *)(save); + outp = outbuff; + inp2 = (short *)(BufferSpace + inPtr); + for (x = 0; x < cnt; x += 0x10) { + out1[1] = inp1[0] * lutt6[6]; + out1[1] += inp1[3] * lutt6[7]; + out1[1] += inp1[2] * lutt6[4]; + out1[1] += inp1[5] * lutt6[5]; + out1[1] += inp1[4] * lutt6[2]; + out1[1] += inp1[7] * lutt6[3]; + out1[1] += inp1[6] * lutt6[0]; + out1[1] += inp2[1] * lutt6[1]; /* 1 */ + + out1[0] = inp1[3] * lutt6[6]; + out1[0] += inp1[2] * lutt6[7]; + out1[0] += inp1[5] * lutt6[4]; + out1[0] += inp1[4] * lutt6[5]; + out1[0] += inp1[7] * lutt6[2]; + out1[0] += inp1[6] * lutt6[3]; + out1[0] += inp2[1] * lutt6[0]; + out1[0] += inp2[0] * lutt6[1]; + + out1[3] = inp1[2] * lutt6[6]; + out1[3] += inp1[5] * lutt6[7]; + out1[3] += inp1[4] * lutt6[4]; + out1[3] += inp1[7] * lutt6[5]; + out1[3] += inp1[6] * lutt6[2]; + out1[3] += inp2[1] * lutt6[3]; + out1[3] += inp2[0] * lutt6[0]; + out1[3] += inp2[3] * lutt6[1]; + + out1[2] = inp1[5] * lutt6[6]; + out1[2] += inp1[4] * lutt6[7]; + out1[2] += inp1[7] * lutt6[4]; + out1[2] += inp1[6] * lutt6[5]; + out1[2] += inp2[1] * lutt6[2]; + out1[2] += inp2[0] * lutt6[3]; + out1[2] += inp2[3] * lutt6[0]; + out1[2] += inp2[2] * lutt6[1]; + + out1[5] = inp1[4] * lutt6[6]; + out1[5] += inp1[7] * lutt6[7]; + out1[5] += inp1[6] * lutt6[4]; + out1[5] += inp2[1] * lutt6[5]; + out1[5] += inp2[0] * lutt6[2]; + out1[5] += inp2[3] * lutt6[3]; + out1[5] += inp2[2] * lutt6[0]; + out1[5] += inp2[5] * lutt6[1]; + + out1[4] = inp1[7] * lutt6[6]; + out1[4] += inp1[6] * lutt6[7]; + out1[4] += inp2[1] * lutt6[4]; + out1[4] += inp2[0] * lutt6[5]; + out1[4] += inp2[3] * lutt6[2]; + out1[4] += inp2[2] * lutt6[3]; + out1[4] += inp2[5] * lutt6[0]; + out1[4] += inp2[4] * lutt6[1]; + + out1[7] = inp1[6] * lutt6[6]; + out1[7] += inp2[1] * lutt6[7]; + out1[7] += inp2[0] * lutt6[4]; + out1[7] += inp2[3] * lutt6[5]; + out1[7] += inp2[2] * lutt6[2]; + out1[7] += inp2[5] * lutt6[3]; + out1[7] += inp2[4] * lutt6[0]; + out1[7] += inp2[7] * lutt6[1]; + + out1[6] = inp2[1] * lutt6[6]; + out1[6] += inp2[0] * lutt6[7]; + out1[6] += inp2[3] * lutt6[4]; + out1[6] += inp2[2] * lutt6[5]; + out1[6] += inp2[5] * lutt6[2]; + out1[6] += inp2[4] * lutt6[3]; + out1[6] += inp2[7] * lutt6[0]; + out1[6] += inp2[6] * lutt6[1]; + outp[1] = /*CLAMP*/((out1[1] + 0x4000) >> 0xF); + outp[0] = /*CLAMP*/((out1[0] + 0x4000) >> 0xF); + outp[3] = /*CLAMP*/((out1[3] + 0x4000) >> 0xF); + outp[2] = /*CLAMP*/((out1[2] + 0x4000) >> 0xF); + outp[5] = /*CLAMP*/((out1[5] + 0x4000) >> 0xF); + outp[4] = /*CLAMP*/((out1[4] + 0x4000) >> 0xF); + outp[7] = /*CLAMP*/((out1[7] + 0x4000) >> 0xF); + outp[6] = /*CLAMP*/((out1[6] + 0x4000) >> 0xF); + inp1 = inp2; + inp2 += 8; + outp += 8; + } + memcpy(save, inp2 - 8, 0x10); + memcpy(BufferSpace + (inst1 & 0xffff), outbuff, cnt); +} + +static void SEGMENT2(uint32_t inst1, uint32_t inst2) +{ + if (isZeldaABI) { + FILTER2(inst1, inst2); + return; + } + if ((inst1 & 0xffffff) == 0) { + isMKABI = true; + } else { + isMKABI = false; + isZeldaABI = true; + FILTER2(inst1, inst2); + } +} + +static void UNKNOWN(uint32_t inst1, uint32_t inst2) +{ +} + +const acmd_callback_t ABI2[0x20] = { + SPNOOP , ADPCM2, CLEARBUFF2, UNKNOWN, ADDMIXER, RESAMPLE2, UNKNOWN, SEGMENT2, + SETBUFF2 , DUPLICATE2, DMEMMOVE2, LOADADPCM2, MIXER2, INTERLEAVE2, HILOGAIN, SETLOOP2, + SPNOOP, INTERL2 , ENVSETUP1, ENVMIXER2, LOADBUFF2, SAVEBUFF2, ENVSETUP2, SPNOOP, + HILOGAIN , SPNOOP, DUPLICATE2 , UNKNOWN , SPNOOP , SPNOOP , SPNOOP , SPNOOP +}; +/* NOTES: + * + * FILTER/SEGMENT - Still needs to be finished up... add FILTER? + * UNKNOWWN #27 - Is this worth doing? Looks like a pain in the ass just for WaveRace64 + */ + diff --git a/source/mupen64plus-rsp-hle/src/ucode3.c b/source/mupen64plus-rsp-hle/src/ucode3.c new file mode 100755 index 0000000..5ef735b --- /dev/null +++ b/source/mupen64plus-rsp-hle/src/ucode3.c @@ -0,0 +1,648 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * Mupen64plus-rsp-hle - ucode3.c * + * Mupen64Plus homepage: http://code.google.com/p/mupen64plus/ * + * Copyright (C) 2009 Richard Goedeken * + * Copyright (C) 2002 Hacktarux * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +# include +#include + +#include "m64p_plugin.h" +#include "hle.h" +#include "alist_internal.h" + +static void SETVOL3(uint32_t inst1, uint32_t inst2) +{ + uint8_t Flags = (uint8_t)(inst1 >> 0x10); + if (Flags & 0x4) { /* 288 */ + if (Flags & 0x2) { /* 290 */ + Vol_Left = (int16_t)inst1; /* 0x50 */ + Env_Dry = (int16_t)(inst2 >> 0x10); /* 0x4E */ + Env_Wet = (int16_t)inst2; /* 0x4C */ + } else { + VolTrg_Right = (int16_t)inst1; /* 0x46 */ + VolRamp_Right = (int32_t)inst2; /* 0x48/0x4A */ + } + } else { + VolTrg_Left = (int16_t)inst1; /* 0x40 */ + VolRamp_Left = (int32_t)inst2; /* 0x42/0x44 */ + } +} + +static void ENVMIXER3(uint32_t inst1, uint32_t inst2) +{ + uint8_t flags = (uint8_t)((inst1 >> 16) & 0xff); + uint32_t addy = (inst2 & 0xFFFFFF); + + short *inp = (short *)(BufferSpace + 0x4F0); + short *out = (short *)(BufferSpace + 0x9D0); + short *aux1 = (short *)(BufferSpace + 0xB40); + short *aux2 = (short *)(BufferSpace + 0xCB0); + short *aux3 = (short *)(BufferSpace + 0xE20); + int32_t MainR; + int32_t MainL; + int32_t AuxR; + int32_t AuxL; + int i1, o1, a1, a2, a3; + short zero[8]; + int y; + + int32_t LAdder, LAcc, LVol; + int32_t RAdder, RAcc, RVol; + /* Most significant part of the Ramp Value */ + int16_t RSig, LSig; + int16_t Wet, Dry; + int16_t LTrg, RTrg; + + memset(zero, 0, sizeof(zero)); + + Vol_Right = (int16_t)inst1; + + if (flags & A_INIT) { + LAdder = VolRamp_Left / 8; + LAcc = 0; + LVol = Vol_Left; + LSig = (int16_t)(VolRamp_Left >> 16); + + RAdder = VolRamp_Right / 8; + RAcc = 0; + RVol = Vol_Right; + RSig = (int16_t)(VolRamp_Right >> 16); + + /* Save Wet/Dry values */ + Wet = (int16_t)Env_Wet; + Dry = (int16_t)Env_Dry; + /* Save Current Left/Right Targets */ + LTrg = VolTrg_Left; + RTrg = VolTrg_Right; + } else { + memcpy((uint8_t *)hleMixerWorkArea, rsp.RDRAM + addy, 80); + Wet = *(int16_t *)(hleMixerWorkArea + 0); /* 0-1 */ + Dry = *(int16_t *)(hleMixerWorkArea + 2); /* 2-3 */ + LTrg = *(int16_t *)(hleMixerWorkArea + 4); /* 4-5 */ + RTrg = *(int16_t *)(hleMixerWorkArea + 6); /* 6-7 */ + LAdder = *(int32_t *)(hleMixerWorkArea + 8); /* 8-9 (hleMixerWorkArea is a 16bit pointer) */ + RAdder = *(int32_t *)(hleMixerWorkArea + 10); /* 10-11 */ + LAcc = *(int32_t *)(hleMixerWorkArea + 12); /* 12-13 */ + RAcc = *(int32_t *)(hleMixerWorkArea + 14); /* 14-15 */ + LVol = *(int32_t *)(hleMixerWorkArea + 16); /* 16-17 */ + RVol = *(int32_t *)(hleMixerWorkArea + 18); /* 18-19 */ + LSig = *(int16_t *)(hleMixerWorkArea + 20); /* 20-21 */ + RSig = *(int16_t *)(hleMixerWorkArea + 22); /* 22-23 */ + } + + for (y = 0; y < (0x170 / 2); y++) { + + /* Left */ + LAcc += LAdder; + LVol += (LAcc >> 16); + LAcc &= 0xFFFF; + + /* Right */ + RAcc += RAdder; + RVol += (RAcc >> 16); + RAcc &= 0xFFFF; +/****************************************************************/ + /* Clamp Left */ + if (LSig >= 0) { /* VLT */ + if (LVol > LTrg) + LVol = LTrg; + } else { /* VGE */ + if (LVol < LTrg) + LVol = LTrg; + } + + /* Clamp Right */ + if (RSig >= 0) { /* VLT */ + if (RVol > RTrg) + RVol = RTrg; + } else { /* VGE */ + if (RVol < RTrg) + RVol = RTrg; + } +/****************************************************************/ + MainL = ((Dry * LVol) + 0x4000) >> 15; + MainR = ((Dry * RVol) + 0x4000) >> 15; + + o1 = out [y ^ S]; + a1 = aux1[y ^ S]; + i1 = inp [y ^ S]; + + o1 += ((i1 * MainL) + 0x4000) >> 15; + a1 += ((i1 * MainR) + 0x4000) >> 15; + +/****************************************************************/ + o1 = clamp_s16(o1); + a1 = clamp_s16(a1); + +/****************************************************************/ + + out[y ^ S] = o1; + aux1[y ^ S] = a1; + +/****************************************************************/ + a2 = aux2[y ^ S]; + a3 = aux3[y ^ S]; + + AuxL = ((Wet * LVol) + 0x4000) >> 15; + AuxR = ((Wet * RVol) + 0x4000) >> 15; + + a2 += ((i1 * AuxL) + 0x4000) >> 15; + a3 += ((i1 * AuxR) + 0x4000) >> 15; + + a2 = clamp_s16(a2); + a3 = clamp_s16(a3); + + aux2[y ^ S] = a2; + aux3[y ^ S] = a3; + } + + *(int16_t *)(hleMixerWorkArea + 0) = Wet; /* 0-1 */ + *(int16_t *)(hleMixerWorkArea + 2) = Dry; /* 2-3 */ + *(int16_t *)(hleMixerWorkArea + 4) = LTrg; /* 4-5 */ + *(int16_t *)(hleMixerWorkArea + 6) = RTrg; /* 6-7 */ + *(int32_t *)(hleMixerWorkArea + 8) = LAdder; /* 8-9 (hleMixerWorkArea is a 16bit pointer) */ + *(int32_t *)(hleMixerWorkArea + 10) = RAdder; /* 10-11 */ + *(int32_t *)(hleMixerWorkArea + 12) = LAcc; /* 12-13 */ + *(int32_t *)(hleMixerWorkArea + 14) = RAcc; /* 14-15 */ + *(int32_t *)(hleMixerWorkArea + 16) = LVol; /* 16-17 */ + *(int32_t *)(hleMixerWorkArea + 18) = RVol; /* 18-19 */ + *(int16_t *)(hleMixerWorkArea + 20) = LSig; /* 20-21 */ + *(int16_t *)(hleMixerWorkArea + 22) = RSig; /* 22-23 */ + memcpy(rsp.RDRAM + addy, (uint8_t *)hleMixerWorkArea, 80); +} + +static void CLEARBUFF3(uint32_t inst1, uint32_t inst2) +{ + uint16_t addr = (uint16_t)(inst1 & 0xffff); + uint16_t count = (uint16_t)(inst2 & 0xffff); + memset(BufferSpace + addr + 0x4f0, 0, count); +} + +/* TODO Needs accuracy verification... */ +static void MIXER3(uint32_t inst1, uint32_t inst2) +{ + uint16_t dmemin = (uint16_t)(inst2 >> 0x10) + 0x4f0; + uint16_t dmemout = (uint16_t)(inst2 & 0xFFFF) + 0x4f0; + int32_t gain = (int16_t)(inst1 & 0xFFFF); + int32_t temp; + int x; + + for (x = 0; x < 0x170; x += 2) { + /* TODO I think I can do this a lot easier */ + temp = (*(int16_t *)(BufferSpace + dmemin + x) * gain) >> 15; + temp += *(int16_t *)(BufferSpace + dmemout + x); + + temp = clamp_s16((int32_t)temp); + + *(uint16_t *)(BufferSpace + dmemout + x) = (uint16_t)(temp & 0xFFFF); + } +} + +static void LOADBUFF3(uint32_t inst1, uint32_t inst2) +{ + uint32_t v0 = (inst2 & 0xfffffc); + uint32_t cnt = (((inst1 >> 0xC) + 3) & 0xFFC); + uint32_t src = (inst1 & 0xffc) + 0x4f0; + memcpy(BufferSpace + src, rsp.RDRAM + v0, cnt); +} + +static void SAVEBUFF3(uint32_t inst1, uint32_t inst2) +{ + uint32_t v0 = (inst2 & 0xfffffc); + uint32_t cnt = (((inst1 >> 0xC) + 3) & 0xFFC); + uint32_t src = (inst1 & 0xffc) + 0x4f0; + memcpy(rsp.RDRAM + v0, BufferSpace + src, cnt); +} + +/* Loads an ADPCM table + * NOTE Works 100% Now 03-13-01 + */ +static void LOADADPCM3(uint32_t inst1, uint32_t inst2) +{ + uint32_t v0 = (inst2 & 0xffffff); + uint32_t x; + + uint16_t *table = (uint16_t *)(rsp.RDRAM + v0); + for (x = 0; x < ((inst1 & 0xffff) >> 0x4); x++) { + adpcmtable[(0x0 + (x << 3))^S] = table[0]; + adpcmtable[(0x1 + (x << 3))^S] = table[1]; + + adpcmtable[(0x2 + (x << 3))^S] = table[2]; + adpcmtable[(0x3 + (x << 3))^S] = table[3]; + + adpcmtable[(0x4 + (x << 3))^S] = table[4]; + adpcmtable[(0x5 + (x << 3))^S] = table[5]; + + adpcmtable[(0x6 + (x << 3))^S] = table[6]; + adpcmtable[(0x7 + (x << 3))^S] = table[7]; + table += 8; + } +} + +/* TODO Needs accuracy verification... */ +static void DMEMMOVE3(uint32_t inst1, uint32_t inst2) +{ + uint32_t cnt; + uint32_t v0 = (inst1 & 0xFFFF) + 0x4f0; + uint32_t v1 = (inst2 >> 0x10) + 0x4f0; + uint32_t count = ((inst2 + 3) & 0xfffc); + + for (cnt = 0; cnt < count; cnt++) + *(uint8_t *)(BufferSpace + ((cnt + v1)^S8)) = *(uint8_t *)(BufferSpace + ((cnt + v0)^S8)); +} + +static void SETLOOP3(uint32_t inst1, uint32_t inst2) +{ + loopval = (inst2 & 0xffffff); +} + +/* TODO Verified to be 100% Accurate... */ +static void ADPCM3(uint32_t inst1, uint32_t inst2) +{ + unsigned char Flags = (uint8_t)(inst2 >> 0x1c) & 0xff; + unsigned int Address = (inst1 & 0xffffff); + unsigned short inPtr = (inst2 >> 12) & 0xf; + short *out = (short *)(BufferSpace + (inst2 & 0xfff) + 0x4f0); + short count = (short)((inst2 >> 16) & 0xfff); + unsigned char icode; + unsigned char code; + int vscale; + unsigned short index; + unsigned short j; + int a[8]; + short *book1, *book2; + int l1; + int l2; + int inp1[8]; + int inp2[8]; + + memset(out, 0, 32); + + if (!(Flags & 0x1)) { + if (Flags & 0x2) + memcpy(out, &rsp.RDRAM[loopval], 32); + else + memcpy(out, &rsp.RDRAM[Address], 32); + } + + l1 = out[14 ^ S]; + l2 = out[15 ^ S]; + out += 16; + while (count > 0) { + /* the first interation through, these values are + * either 0 in the case of A_INIT, from a special + * area of memory in the case of A_LOOP or just + * the values we calculated the last time + */ + + code = BufferSpace[(0x4f0 + inPtr)^S8]; + index = code & 0xf; + /* index into the adpcm code table */ + index <<= 4; + book1 = (short *)&adpcmtable[index]; + book2 = book1 + 8; + /* upper nibble is scale */ + code >>= 4; + /* very strange. 0x8000 would be .5 in 16:16 format + * so this appears to be a fractional scale based + * on the 12 based inverse of the scale value. note + * that this could be negative, in which case we do + * not use the calculated vscale value... see the + * if(code>12) check below + */ + vscale = (0x8000 >> ((12 - code) - 1)); + + /* coded adpcm data lies next */ + inPtr++; + j = 0; + /* loop of 8, for 8 coded nibbles from 4 bytes + * which yields 8 short pcm values + */ + while (j < 8) { + icode = BufferSpace[(0x4f0 + inPtr)^S8]; + inPtr++; + + /* this will in effect be signed */ + inp1[j] = (int16_t)((icode & 0xf0) << 8); + if (code < 12) + inp1[j] = ((int)((int)inp1[j] * (int)vscale) >> 16); + j++; + + inp1[j] = (int16_t)((icode & 0xf) << 12); + if (code < 12) + inp1[j] = ((int)((int)inp1[j] * (int)vscale) >> 16); + j++; + } + j = 0; + while (j < 8) { + icode = BufferSpace[(0x4f0 + inPtr)^S8]; + inPtr++; + + /* this will in effect be signed */ + inp2[j] = (short)((icode & 0xf0) << 8); + if (code < 12) + inp2[j] = ((int)((int)inp2[j] * (int)vscale) >> 16); + j++; + + inp2[j] = (short)((icode & 0xf) << 12); + if (code < 12) + inp2[j] = ((int)((int)inp2[j] * (int)vscale) >> 16); + j++; + } + + a[0] = (int)book1[0] * (int)l1; + a[0] += (int)book2[0] * (int)l2; + a[0] += (int)inp1[0] * (int)2048; + + a[1] = (int)book1[1] * (int)l1; + a[1] += (int)book2[1] * (int)l2; + a[1] += (int)book2[0] * inp1[0]; + a[1] += (int)inp1[1] * (int)2048; + + a[2] = (int)book1[2] * (int)l1; + a[2] += (int)book2[2] * (int)l2; + a[2] += (int)book2[1] * inp1[0]; + a[2] += (int)book2[0] * inp1[1]; + a[2] += (int)inp1[2] * (int)2048; + + a[3] = (int)book1[3] * (int)l1; + a[3] += (int)book2[3] * (int)l2; + a[3] += (int)book2[2] * inp1[0]; + a[3] += (int)book2[1] * inp1[1]; + a[3] += (int)book2[0] * inp1[2]; + a[3] += (int)inp1[3] * (int)2048; + + a[4] = (int)book1[4] * (int)l1; + a[4] += (int)book2[4] * (int)l2; + a[4] += (int)book2[3] * inp1[0]; + a[4] += (int)book2[2] * inp1[1]; + a[4] += (int)book2[1] * inp1[2]; + a[4] += (int)book2[0] * inp1[3]; + a[4] += (int)inp1[4] * (int)2048; + + a[5] = (int)book1[5] * (int)l1; + a[5] += (int)book2[5] * (int)l2; + a[5] += (int)book2[4] * inp1[0]; + a[5] += (int)book2[3] * inp1[1]; + a[5] += (int)book2[2] * inp1[2]; + a[5] += (int)book2[1] * inp1[3]; + a[5] += (int)book2[0] * inp1[4]; + a[5] += (int)inp1[5] * (int)2048; + + a[6] = (int)book1[6] * (int)l1; + a[6] += (int)book2[6] * (int)l2; + a[6] += (int)book2[5] * inp1[0]; + a[6] += (int)book2[4] * inp1[1]; + a[6] += (int)book2[3] * inp1[2]; + a[6] += (int)book2[2] * inp1[3]; + a[6] += (int)book2[1] * inp1[4]; + a[6] += (int)book2[0] * inp1[5]; + a[6] += (int)inp1[6] * (int)2048; + + a[7] = (int)book1[7] * (int)l1; + a[7] += (int)book2[7] * (int)l2; + a[7] += (int)book2[6] * inp1[0]; + a[7] += (int)book2[5] * inp1[1]; + a[7] += (int)book2[4] * inp1[2]; + a[7] += (int)book2[3] * inp1[3]; + a[7] += (int)book2[2] * inp1[4]; + a[7] += (int)book2[1] * inp1[5]; + a[7] += (int)book2[0] * inp1[6]; + a[7] += (int)inp1[7] * (int)2048; + + for (j = 0; j < 8; j++) { + a[j ^ S] >>= 11; + a[j ^ S] = clamp_s16(a[j ^ S]); + *(out++) = a[j ^ S]; + } + l1 = a[6]; + l2 = a[7]; + + a[0] = (int)book1[0] * (int)l1; + a[0] += (int)book2[0] * (int)l2; + a[0] += (int)inp2[0] * (int)2048; + + a[1] = (int)book1[1] * (int)l1; + a[1] += (int)book2[1] * (int)l2; + a[1] += (int)book2[0] * inp2[0]; + a[1] += (int)inp2[1] * (int)2048; + + a[2] = (int)book1[2] * (int)l1; + a[2] += (int)book2[2] * (int)l2; + a[2] += (int)book2[1] * inp2[0]; + a[2] += (int)book2[0] * inp2[1]; + a[2] += (int)inp2[2] * (int)2048; + + a[3] = (int)book1[3] * (int)l1; + a[3] += (int)book2[3] * (int)l2; + a[3] += (int)book2[2] * inp2[0]; + a[3] += (int)book2[1] * inp2[1]; + a[3] += (int)book2[0] * inp2[2]; + a[3] += (int)inp2[3] * (int)2048; + + a[4] = (int)book1[4] * (int)l1; + a[4] += (int)book2[4] * (int)l2; + a[4] += (int)book2[3] * inp2[0]; + a[4] += (int)book2[2] * inp2[1]; + a[4] += (int)book2[1] * inp2[2]; + a[4] += (int)book2[0] * inp2[3]; + a[4] += (int)inp2[4] * (int)2048; + + a[5] = (int)book1[5] * (int)l1; + a[5] += (int)book2[5] * (int)l2; + a[5] += (int)book2[4] * inp2[0]; + a[5] += (int)book2[3] * inp2[1]; + a[5] += (int)book2[2] * inp2[2]; + a[5] += (int)book2[1] * inp2[3]; + a[5] += (int)book2[0] * inp2[4]; + a[5] += (int)inp2[5] * (int)2048; + + a[6] = (int)book1[6] * (int)l1; + a[6] += (int)book2[6] * (int)l2; + a[6] += (int)book2[5] * inp2[0]; + a[6] += (int)book2[4] * inp2[1]; + a[6] += (int)book2[3] * inp2[2]; + a[6] += (int)book2[2] * inp2[3]; + a[6] += (int)book2[1] * inp2[4]; + a[6] += (int)book2[0] * inp2[5]; + a[6] += (int)inp2[6] * (int)2048; + + a[7] = (int)book1[7] * (int)l1; + a[7] += (int)book2[7] * (int)l2; + a[7] += (int)book2[6] * inp2[0]; + a[7] += (int)book2[5] * inp2[1]; + a[7] += (int)book2[4] * inp2[2]; + a[7] += (int)book2[3] * inp2[3]; + a[7] += (int)book2[2] * inp2[4]; + a[7] += (int)book2[1] * inp2[5]; + a[7] += (int)book2[0] * inp2[6]; + a[7] += (int)inp2[7] * (int)2048; + + for (j = 0; j < 8; j++) { + a[j ^ S] >>= 11; + a[j ^ S] = clamp_s16(a[j ^ S]); + *(out++) = a[j ^ S]; + } + l1 = a[6]; + l2 = a[7]; + + count -= 32; + } + out -= 16; + memcpy(&rsp.RDRAM[Address], out, 32); +} + +static void RESAMPLE3(uint32_t inst1, uint32_t inst2) +{ + unsigned char Flags = (uint8_t)((inst2 >> 0x1e)); + unsigned int Pitch = ((inst2 >> 0xe) & 0xffff) << 1; + uint32_t addy = (inst1 & 0xffffff); + unsigned int Accum = 0; + unsigned int location; + int16_t *lut; + short *dst; + int16_t *src; + uint32_t srcPtr = ((((inst2 >> 2) & 0xfff) + 0x4f0) / 2); + uint32_t dstPtr; + int32_t temp; + int32_t accum; + int x, i; + + dst = (short *)(BufferSpace); + src = (int16_t *)(BufferSpace); + + srcPtr -= 4; + + if (inst2 & 0x3) + dstPtr = 0x660 / 2; + else + dstPtr = 0x4f0 / 2; + + if ((Flags & 0x1) == 0) { + for (x = 0; x < 4; x++) + src[(srcPtr + x)^S] = ((uint16_t *)rsp.RDRAM)[((addy / 2) + x)^S]; + Accum = *(uint16_t *)(rsp.RDRAM + addy + 10); + } else { + for (x = 0; x < 4; x++) + src[(srcPtr + x)^S] = 0; + } + + for (i = 0; i < 0x170 / 2; i++) { + location = (((Accum * 0x40) >> 0x10) * 8); + lut = (int16_t *)(((uint8_t *)ResampleLUT) + location); + + temp = ((int32_t) * (int16_t *)(src + ((srcPtr + 0)^S)) * ((int32_t)((int16_t)lut[0]))); + accum = (int32_t)(temp >> 15); + + temp = ((int32_t) * (int16_t *)(src + ((srcPtr + 1)^S)) * ((int32_t)((int16_t)lut[1]))); + accum += (int32_t)(temp >> 15); + + temp = ((int32_t) * (int16_t *)(src + ((srcPtr + 2)^S)) * ((int32_t)((int16_t)lut[2]))); + accum += (int32_t)(temp >> 15); + + temp = ((int32_t) * (int16_t *)(src + ((srcPtr + 3)^S)) * ((int32_t)((int16_t)lut[3]))); + accum += (int32_t)(temp >> 15); + + accum = clamp_s16(accum); + + dst[dstPtr ^ S] = (accum); + dstPtr++; + Accum += Pitch; + srcPtr += (Accum >> 16); + Accum &= 0xffff; + } + for (x = 0; x < 4; x++) + ((uint16_t *)rsp.RDRAM)[((addy / 2) + x)^S] = src[(srcPtr + x)^S]; + *(uint16_t *)(rsp.RDRAM + addy + 10) = Accum; +} + +/* TODO Needs accuracy verification... */ +static void INTERLEAVE3(uint32_t inst1, uint32_t inst2) +{ + uint16_t *outbuff = (uint16_t *)(BufferSpace + 0x4f0); + uint16_t *inSrcR; + uint16_t *inSrcL; + uint16_t Left, Right, Left2, Right2; + int x; + + inSrcR = (uint16_t *)(BufferSpace + 0xb40); + inSrcL = (uint16_t *)(BufferSpace + 0x9d0); + + for (x = 0; x < (0x170 / 4); x++) { + Left = *(inSrcL++); + Right = *(inSrcR++); + Left2 = *(inSrcL++); + Right2 = *(inSrcR++); + +#ifdef M64P_BIG_ENDIAN + *(outbuff++) = Right; + *(outbuff++) = Left; + *(outbuff++) = Right2; + *(outbuff++) = Left2; +#else + *(outbuff++) = Right2; + *(outbuff++) = Left2; + *(outbuff++) = Right; + *(outbuff++) = Left; +#endif + } +} + +static void WHATISTHIS(uint32_t inst1, uint32_t inst2) +{ +} + +static uint32_t setaddr; +static void MP3ADDY(uint32_t inst1, uint32_t inst2) +{ + setaddr = (inst2 & 0xffffff); +} + +/* +FFT = Fast Fourier Transform +DCT = Discrete Cosine Transform +MPEG-1 Layer 3 retains Layer 2's 1152-sample window, as well as the FFT polyphase filter for +backward compatibility, but adds a modified DCT filter. DCT's advantages over DFTs (discrete +Fourier transforms) include half as many multiply-accumulate operations and half the +generated coefficients because the sinusoidal portion of the calculation is absent, and DCT +generally involves simpler math. The finite lengths of a conventional DCTs' bandpass impulse +responses, however, may result in block-boundary effects. MDCTs overlap the analysis blocks +and lowpass-filter the decoded audio to remove aliases, eliminating these effects. MDCTs also +have a higher transform coding gain than the standard DCT, and their basic functions +correspond to better bandpass response. + +MPEG-1 Layer 3's DCT sub-bands are unequally sized, and correspond to the human auditory +system's critical bands. In Layer 3 decoders must support both constant- and variable-bit-rate +bit streams. (However, many Layer 1 and 2 decoders also handle variable bit rates). Finally, +Layer 3 encoders Huffman-code the quantized coefficients before archiving or transmission for +additional lossless compression. Bit streams range from 32 to 320 kbps, and 128-kbps rates +achieve near-CD quality, an important specification to enable dual-channel ISDN +(integrated-services-digital-network) to be the future high-bandwidth pipe to the home. + +*/ +static void DISABLE(uint32_t inst1, uint32_t inst2) +{ +} + + +const acmd_callback_t ABI3[0x10] = { + DISABLE , ADPCM3 , CLEARBUFF3, ENVMIXER3 , LOADBUFF3, RESAMPLE3 , SAVEBUFF3, MP3, + MP3ADDY, SETVOL3, DMEMMOVE3 , LOADADPCM3 , MIXER3 , INTERLEAVE3, WHATISTHIS , SETLOOP3 +}; diff --git a/source/mupen64plus-rsp-hle/src/ucode3mp3.c b/source/mupen64plus-rsp-hle/src/ucode3mp3.c new file mode 100755 index 0000000..e32b90a --- /dev/null +++ b/source/mupen64plus-rsp-hle/src/ucode3mp3.c @@ -0,0 +1,707 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * Mupen64plus-rsp-hle - ucode3mp3.h * + * Mupen64Plus homepage: http://code.google.com/p/mupen64plus/ * + * Copyright (C) 2009 Richard Goedeken * + * Copyright (C) 2002 Hacktarux * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +# include +#include + +#include "m64p_plugin.h" +#include "hle.h" + +static const uint16_t DeWindowLUT [0x420] = { + 0x0000, 0xFFF3, 0x005D, 0xFF38, 0x037A, 0xF736, 0x0B37, 0xC00E, + 0x7FFF, 0x3FF2, 0x0B37, 0x08CA, 0x037A, 0x00C8, 0x005D, 0x000D, + 0x0000, 0xFFF3, 0x005D, 0xFF38, 0x037A, 0xF736, 0x0B37, 0xC00E, + 0x7FFF, 0x3FF2, 0x0B37, 0x08CA, 0x037A, 0x00C8, 0x005D, 0x000D, + 0x0000, 0xFFF2, 0x005F, 0xFF1D, 0x0369, 0xF697, 0x0A2A, 0xBCE7, + 0x7FEB, 0x3CCB, 0x0C2B, 0x082B, 0x0385, 0x00AF, 0x005B, 0x000B, + 0x0000, 0xFFF2, 0x005F, 0xFF1D, 0x0369, 0xF697, 0x0A2A, 0xBCE7, + 0x7FEB, 0x3CCB, 0x0C2B, 0x082B, 0x0385, 0x00AF, 0x005B, 0x000B, + 0x0000, 0xFFF1, 0x0061, 0xFF02, 0x0354, 0xF5F9, 0x0905, 0xB9C4, + 0x7FB0, 0x39A4, 0x0D08, 0x078C, 0x038C, 0x0098, 0x0058, 0x000A, + 0x0000, 0xFFF1, 0x0061, 0xFF02, 0x0354, 0xF5F9, 0x0905, 0xB9C4, + 0x7FB0, 0x39A4, 0x0D08, 0x078C, 0x038C, 0x0098, 0x0058, 0x000A, + 0x0000, 0xFFEF, 0x0062, 0xFEE6, 0x033B, 0xF55C, 0x07C8, 0xB6A4, + 0x7F4D, 0x367E, 0x0DCE, 0x06EE, 0x038F, 0x0080, 0x0056, 0x0009, + 0x0000, 0xFFEF, 0x0062, 0xFEE6, 0x033B, 0xF55C, 0x07C8, 0xB6A4, + 0x7F4D, 0x367E, 0x0DCE, 0x06EE, 0x038F, 0x0080, 0x0056, 0x0009, + 0x0000, 0xFFEE, 0x0063, 0xFECA, 0x031C, 0xF4C3, 0x0671, 0xB38C, + 0x7EC2, 0x335D, 0x0E7C, 0x0652, 0x038E, 0x006B, 0x0053, 0x0008, + 0x0000, 0xFFEE, 0x0063, 0xFECA, 0x031C, 0xF4C3, 0x0671, 0xB38C, + 0x7EC2, 0x335D, 0x0E7C, 0x0652, 0x038E, 0x006B, 0x0053, 0x0008, + 0x0000, 0xFFEC, 0x0064, 0xFEAC, 0x02F7, 0xF42C, 0x0502, 0xB07C, + 0x7E12, 0x3041, 0x0F14, 0x05B7, 0x038A, 0x0056, 0x0050, 0x0007, + 0x0000, 0xFFEC, 0x0064, 0xFEAC, 0x02F7, 0xF42C, 0x0502, 0xB07C, + 0x7E12, 0x3041, 0x0F14, 0x05B7, 0x038A, 0x0056, 0x0050, 0x0007, + 0x0000, 0xFFEB, 0x0064, 0xFE8E, 0x02CE, 0xF399, 0x037A, 0xAD75, + 0x7D3A, 0x2D2C, 0x0F97, 0x0520, 0x0382, 0x0043, 0x004D, 0x0007, + 0x0000, 0xFFEB, 0x0064, 0xFE8E, 0x02CE, 0xF399, 0x037A, 0xAD75, + 0x7D3A, 0x2D2C, 0x0F97, 0x0520, 0x0382, 0x0043, 0x004D, 0x0007, + 0xFFFF, 0xFFE9, 0x0063, 0xFE6F, 0x029E, 0xF30B, 0x01D8, 0xAA7B, + 0x7C3D, 0x2A1F, 0x1004, 0x048B, 0x0377, 0x0030, 0x004A, 0x0006, + 0xFFFF, 0xFFE9, 0x0063, 0xFE6F, 0x029E, 0xF30B, 0x01D8, 0xAA7B, + 0x7C3D, 0x2A1F, 0x1004, 0x048B, 0x0377, 0x0030, 0x004A, 0x0006, + 0xFFFF, 0xFFE7, 0x0062, 0xFE4F, 0x0269, 0xF282, 0x001F, 0xA78D, + 0x7B1A, 0x271C, 0x105D, 0x03F9, 0x036A, 0x001F, 0x0046, 0x0006, + 0xFFFF, 0xFFE7, 0x0062, 0xFE4F, 0x0269, 0xF282, 0x001F, 0xA78D, + 0x7B1A, 0x271C, 0x105D, 0x03F9, 0x036A, 0x001F, 0x0046, 0x0006, + 0xFFFF, 0xFFE4, 0x0061, 0xFE2F, 0x022F, 0xF1FF, 0xFE4C, 0xA4AF, + 0x79D3, 0x2425, 0x10A2, 0x036C, 0x0359, 0x0010, 0x0043, 0x0005, + 0xFFFF, 0xFFE4, 0x0061, 0xFE2F, 0x022F, 0xF1FF, 0xFE4C, 0xA4AF, + 0x79D3, 0x2425, 0x10A2, 0x036C, 0x0359, 0x0010, 0x0043, 0x0005, + 0xFFFF, 0xFFE2, 0x005E, 0xFE10, 0x01EE, 0xF184, 0xFC61, 0xA1E1, + 0x7869, 0x2139, 0x10D3, 0x02E3, 0x0346, 0x0001, 0x0040, 0x0004, + 0xFFFF, 0xFFE2, 0x005E, 0xFE10, 0x01EE, 0xF184, 0xFC61, 0xA1E1, + 0x7869, 0x2139, 0x10D3, 0x02E3, 0x0346, 0x0001, 0x0040, 0x0004, + 0xFFFF, 0xFFE0, 0x005B, 0xFDF0, 0x01A8, 0xF111, 0xFA5F, 0x9F27, + 0x76DB, 0x1E5C, 0x10F2, 0x025E, 0x0331, 0xFFF3, 0x003D, 0x0004, + 0xFFFF, 0xFFE0, 0x005B, 0xFDF0, 0x01A8, 0xF111, 0xFA5F, 0x9F27, + 0x76DB, 0x1E5C, 0x10F2, 0x025E, 0x0331, 0xFFF3, 0x003D, 0x0004, + 0xFFFF, 0xFFDE, 0x0057, 0xFDD0, 0x015B, 0xF0A7, 0xF845, 0x9C80, + 0x752C, 0x1B8E, 0x1100, 0x01DE, 0x0319, 0xFFE7, 0x003A, 0x0003, + 0xFFFF, 0xFFDE, 0x0057, 0xFDD0, 0x015B, 0xF0A7, 0xF845, 0x9C80, + 0x752C, 0x1B8E, 0x1100, 0x01DE, 0x0319, 0xFFE7, 0x003A, 0x0003, + 0xFFFE, 0xFFDB, 0x0053, 0xFDB0, 0x0108, 0xF046, 0xF613, 0x99EE, + 0x735C, 0x18D1, 0x10FD, 0x0163, 0x0300, 0xFFDC, 0x0037, 0x0003, + 0xFFFE, 0xFFDB, 0x0053, 0xFDB0, 0x0108, 0xF046, 0xF613, 0x99EE, + 0x735C, 0x18D1, 0x10FD, 0x0163, 0x0300, 0xFFDC, 0x0037, 0x0003, + 0xFFFE, 0xFFD8, 0x004D, 0xFD90, 0x00B0, 0xEFF0, 0xF3CC, 0x9775, + 0x716C, 0x1624, 0x10EA, 0x00EE, 0x02E5, 0xFFD2, 0x0033, 0x0003, + 0xFFFE, 0xFFD8, 0x004D, 0xFD90, 0x00B0, 0xEFF0, 0xF3CC, 0x9775, + 0x716C, 0x1624, 0x10EA, 0x00EE, 0x02E5, 0xFFD2, 0x0033, 0x0003, + 0xFFFE, 0xFFD6, 0x0047, 0xFD72, 0x0051, 0xEFA6, 0xF16F, 0x9514, + 0x6F5E, 0x138A, 0x10C8, 0x007E, 0x02CA, 0xFFC9, 0x0030, 0x0003, + 0xFFFE, 0xFFD6, 0x0047, 0xFD72, 0x0051, 0xEFA6, 0xF16F, 0x9514, + 0x6F5E, 0x138A, 0x10C8, 0x007E, 0x02CA, 0xFFC9, 0x0030, 0x0003, + 0xFFFE, 0xFFD3, 0x0040, 0xFD54, 0xFFEC, 0xEF68, 0xEEFC, 0x92CD, + 0x6D33, 0x1104, 0x1098, 0x0014, 0x02AC, 0xFFC0, 0x002D, 0x0002, + 0xFFFE, 0xFFD3, 0x0040, 0xFD54, 0xFFEC, 0xEF68, 0xEEFC, 0x92CD, + 0x6D33, 0x1104, 0x1098, 0x0014, 0x02AC, 0xFFC0, 0x002D, 0x0002, + 0x0030, 0xFFC9, 0x02CA, 0x007E, 0x10C8, 0x138A, 0x6F5E, 0x9514, + 0xF16F, 0xEFA6, 0x0051, 0xFD72, 0x0047, 0xFFD6, 0xFFFE, 0x0003, + 0x0030, 0xFFC9, 0x02CA, 0x007E, 0x10C8, 0x138A, 0x6F5E, 0x9514, + 0xF16F, 0xEFA6, 0x0051, 0xFD72, 0x0047, 0xFFD6, 0xFFFE, 0x0003, + 0x0033, 0xFFD2, 0x02E5, 0x00EE, 0x10EA, 0x1624, 0x716C, 0x9775, + 0xF3CC, 0xEFF0, 0x00B0, 0xFD90, 0x004D, 0xFFD8, 0xFFFE, 0x0003, + 0x0033, 0xFFD2, 0x02E5, 0x00EE, 0x10EA, 0x1624, 0x716C, 0x9775, + 0xF3CC, 0xEFF0, 0x00B0, 0xFD90, 0x004D, 0xFFD8, 0xFFFE, 0x0003, + 0x0037, 0xFFDC, 0x0300, 0x0163, 0x10FD, 0x18D1, 0x735C, 0x99EE, + 0xF613, 0xF046, 0x0108, 0xFDB0, 0x0053, 0xFFDB, 0xFFFE, 0x0003, + 0x0037, 0xFFDC, 0x0300, 0x0163, 0x10FD, 0x18D1, 0x735C, 0x99EE, + 0xF613, 0xF046, 0x0108, 0xFDB0, 0x0053, 0xFFDB, 0xFFFE, 0x0003, + 0x003A, 0xFFE7, 0x0319, 0x01DE, 0x1100, 0x1B8E, 0x752C, 0x9C80, + 0xF845, 0xF0A7, 0x015B, 0xFDD0, 0x0057, 0xFFDE, 0xFFFF, 0x0003, + 0x003A, 0xFFE7, 0x0319, 0x01DE, 0x1100, 0x1B8E, 0x752C, 0x9C80, + 0xF845, 0xF0A7, 0x015B, 0xFDD0, 0x0057, 0xFFDE, 0xFFFF, 0x0004, + 0x003D, 0xFFF3, 0x0331, 0x025E, 0x10F2, 0x1E5C, 0x76DB, 0x9F27, + 0xFA5F, 0xF111, 0x01A8, 0xFDF0, 0x005B, 0xFFE0, 0xFFFF, 0x0004, + 0x003D, 0xFFF3, 0x0331, 0x025E, 0x10F2, 0x1E5C, 0x76DB, 0x9F27, + 0xFA5F, 0xF111, 0x01A8, 0xFDF0, 0x005B, 0xFFE0, 0xFFFF, 0x0004, + 0x0040, 0x0001, 0x0346, 0x02E3, 0x10D3, 0x2139, 0x7869, 0xA1E1, + 0xFC61, 0xF184, 0x01EE, 0xFE10, 0x005E, 0xFFE2, 0xFFFF, 0x0004, + 0x0040, 0x0001, 0x0346, 0x02E3, 0x10D3, 0x2139, 0x7869, 0xA1E1, + 0xFC61, 0xF184, 0x01EE, 0xFE10, 0x005E, 0xFFE2, 0xFFFF, 0x0005, + 0x0043, 0x0010, 0x0359, 0x036C, 0x10A2, 0x2425, 0x79D3, 0xA4AF, + 0xFE4C, 0xF1FF, 0x022F, 0xFE2F, 0x0061, 0xFFE4, 0xFFFF, 0x0005, + 0x0043, 0x0010, 0x0359, 0x036C, 0x10A2, 0x2425, 0x79D3, 0xA4AF, + 0xFE4C, 0xF1FF, 0x022F, 0xFE2F, 0x0061, 0xFFE4, 0xFFFF, 0x0006, + 0x0046, 0x001F, 0x036A, 0x03F9, 0x105D, 0x271C, 0x7B1A, 0xA78D, + 0x001F, 0xF282, 0x0269, 0xFE4F, 0x0062, 0xFFE7, 0xFFFF, 0x0006, + 0x0046, 0x001F, 0x036A, 0x03F9, 0x105D, 0x271C, 0x7B1A, 0xA78D, + 0x001F, 0xF282, 0x0269, 0xFE4F, 0x0062, 0xFFE7, 0xFFFF, 0x0006, + 0x004A, 0x0030, 0x0377, 0x048B, 0x1004, 0x2A1F, 0x7C3D, 0xAA7B, + 0x01D8, 0xF30B, 0x029E, 0xFE6F, 0x0063, 0xFFE9, 0xFFFF, 0x0006, + 0x004A, 0x0030, 0x0377, 0x048B, 0x1004, 0x2A1F, 0x7C3D, 0xAA7B, + 0x01D8, 0xF30B, 0x029E, 0xFE6F, 0x0063, 0xFFE9, 0xFFFF, 0x0007, + 0x004D, 0x0043, 0x0382, 0x0520, 0x0F97, 0x2D2C, 0x7D3A, 0xAD75, + 0x037A, 0xF399, 0x02CE, 0xFE8E, 0x0064, 0xFFEB, 0x0000, 0x0007, + 0x004D, 0x0043, 0x0382, 0x0520, 0x0F97, 0x2D2C, 0x7D3A, 0xAD75, + 0x037A, 0xF399, 0x02CE, 0xFE8E, 0x0064, 0xFFEB, 0x0000, 0x0007, + 0x0050, 0x0056, 0x038A, 0x05B7, 0x0F14, 0x3041, 0x7E12, 0xB07C, + 0x0502, 0xF42C, 0x02F7, 0xFEAC, 0x0064, 0xFFEC, 0x0000, 0x0007, + 0x0050, 0x0056, 0x038A, 0x05B7, 0x0F14, 0x3041, 0x7E12, 0xB07C, + 0x0502, 0xF42C, 0x02F7, 0xFEAC, 0x0064, 0xFFEC, 0x0000, 0x0008, + 0x0053, 0x006B, 0x038E, 0x0652, 0x0E7C, 0x335D, 0x7EC2, 0xB38C, + 0x0671, 0xF4C3, 0x031C, 0xFECA, 0x0063, 0xFFEE, 0x0000, 0x0008, + 0x0053, 0x006B, 0x038E, 0x0652, 0x0E7C, 0x335D, 0x7EC2, 0xB38C, + 0x0671, 0xF4C3, 0x031C, 0xFECA, 0x0063, 0xFFEE, 0x0000, 0x0009, + 0x0056, 0x0080, 0x038F, 0x06EE, 0x0DCE, 0x367E, 0x7F4D, 0xB6A4, + 0x07C8, 0xF55C, 0x033B, 0xFEE6, 0x0062, 0xFFEF, 0x0000, 0x0009, + 0x0056, 0x0080, 0x038F, 0x06EE, 0x0DCE, 0x367E, 0x7F4D, 0xB6A4, + 0x07C8, 0xF55C, 0x033B, 0xFEE6, 0x0062, 0xFFEF, 0x0000, 0x000A, + 0x0058, 0x0098, 0x038C, 0x078C, 0x0D08, 0x39A4, 0x7FB0, 0xB9C4, + 0x0905, 0xF5F9, 0x0354, 0xFF02, 0x0061, 0xFFF1, 0x0000, 0x000A, + 0x0058, 0x0098, 0x038C, 0x078C, 0x0D08, 0x39A4, 0x7FB0, 0xB9C4, + 0x0905, 0xF5F9, 0x0354, 0xFF02, 0x0061, 0xFFF1, 0x0000, 0x000B, + 0x005B, 0x00AF, 0x0385, 0x082B, 0x0C2B, 0x3CCB, 0x7FEB, 0xBCE7, + 0x0A2A, 0xF697, 0x0369, 0xFF1D, 0x005F, 0xFFF2, 0x0000, 0x000B, + 0x005B, 0x00AF, 0x0385, 0x082B, 0x0C2B, 0x3CCB, 0x7FEB, 0xBCE7, + 0x0A2A, 0xF697, 0x0369, 0xFF1D, 0x005F, 0xFFF2, 0x0000, 0x000D, + 0x005D, 0x00C8, 0x037A, 0x08CA, 0x0B37, 0x3FF2, 0x7FFF, 0xC00E, + 0x0B37, 0xF736, 0x037A, 0xFF38, 0x005D, 0xFFF3, 0x0000, 0x000D, + 0x005D, 0x00C8, 0x037A, 0x08CA, 0x0B37, 0x3FF2, 0x7FFF, 0xC00E, + 0x0B37, 0xF736, 0x037A, 0xFF38, 0x005D, 0xFFF3, 0x0000, 0x0000 +}; + +static uint8_t mp3data[0x1000]; + +static int32_t v[32]; + +static void MP3AB0(void) +{ + /* Part 2 - 100% Accurate */ + static const uint16_t LUT2[8] = { + 0xFEC4, 0xF4FA, 0xC5E4, 0xE1C4, + 0x1916, 0x4A50, 0xA268, 0x78AE + }; + static const uint16_t LUT3[4] = { 0xFB14, 0xD4DC, 0x31F2, 0x8E3A }; + int i; + + for (i = 0; i < 8; i++) { + v[16 + i] = v[0 + i] + v[8 + i]; + v[24 + i] = ((v[0 + i] - v[8 + i]) * LUT2[i]) >> 0x10; + } + + /* Part 3: 4-wide butterflies */ + + for (i = 0; i < 4; i++) { + v[0 + i] = v[16 + i] + v[20 + i]; + v[4 + i] = ((v[16 + i] - v[20 + i]) * LUT3[i]) >> 0x10; + + v[8 + i] = v[24 + i] + v[28 + i]; + v[12 + i] = ((v[24 + i] - v[28 + i]) * LUT3[i]) >> 0x10; + } + + /* Part 4: 2-wide butterflies - 100% Accurate */ + + for (i = 0; i < 16; i += 4) { + v[16 + i] = v[0 + i] + v[2 + i]; + v[18 + i] = ((v[0 + i] - v[2 + i]) * 0xEC84) >> 0x10; + + v[17 + i] = v[1 + i] + v[3 + i]; + v[19 + i] = ((v[1 + i] - v[3 + i]) * 0x61F8) >> 0x10; + } +} + +static void InnerLoop(); + +static uint32_t inPtr, outPtr; + +static uint32_t t6;/* = 0x08A0; - I think these are temporary storage buffers */ +static uint32_t t5;/* = 0x0AC0; */ +static uint32_t t4;/* = (inst1 & 0x1E); */ + +void MP3(uint32_t inst1, uint32_t inst2) +{ + /* Initialization Code */ + uint32_t readPtr; /* s5 */ + uint32_t writePtr; /* s6 */ + uint32_t tmp; + int cnt, cnt2; + + /* I think these are temporary storage buffers */ + t6 = 0x08A0; + t5 = 0x0AC0; + t4 = (inst1 & 0x1E); + + writePtr = inst2 & 0xFFFFFF; + readPtr = writePtr; + /* Just do that for efficiency... may remove and use directly later anyway */ + memcpy(mp3data + 0xCE8, rsp.RDRAM + readPtr, 8); + /* This must be a header byte or whatnot */ + readPtr += 8; + + for (cnt = 0; cnt < 0x480; cnt += 0x180) { + /* DMA: 0xCF0 <- RDRAM[s5] : 0x180 */ + memcpy(mp3data + 0xCF0, rsp.RDRAM + readPtr, 0x180); + inPtr = 0xCF0; /* s7 */ + outPtr = 0xE70; /* s3 */ +/* --------------- Inner Loop Start -------------------- */ + for (cnt2 = 0; cnt2 < 0x180; cnt2 += 0x40) { + t6 &= 0xFFE0; + t5 &= 0xFFE0; + t6 |= t4; + t5 |= t4; + InnerLoop(); + t4 = (t4 - 2) & 0x1E; + tmp = t6; + t6 = t5; + t5 = tmp; + inPtr += 0x40; + } +/* --------------- Inner Loop End -------------------- */ + memcpy(rsp.RDRAM + writePtr, mp3data + 0xe70, 0x180); + writePtr += 0x180; + readPtr += 0x180; + } +} + + + +static void InnerLoop(void) +{ + /* Part 1: 100% Accurate */ + + /* 0, 1, 3, 2, 7, 6, 4, 5, 7, 6, 4, 5, 0, 1, 3, 2 */ + static const uint16_t LUT6[16] = { + 0xFFB2, 0xFD3A, 0xF10A, 0xF854, + 0xBDAE, 0xCDA0, 0xE76C, 0xDB94, + 0x1920, 0x4B20, 0xAC7C, 0x7C68, + 0xABEC, 0x9880, 0xDAE8, 0x839C + }; + int i; + uint32_t t0; + uint32_t t1; + uint32_t t2; + uint32_t t3; + int32_t v2 = 0, v4 = 0, v6 = 0, v8 = 0; + uint32_t offset; + uint32_t addptr; + int x; + int32_t mult6; + int32_t mult4; + int tmp; + int32_t hi0; + int32_t hi1; + int32_t vt; + + v[0] = *(int16_t *)(mp3data + inPtr + (0x00 ^ S16)); + v[31] = *(int16_t *)(mp3data + inPtr + (0x3E ^ S16)); + v[0] += v[31]; + v[1] = *(int16_t *)(mp3data + inPtr + (0x02 ^ S16)); + v[30] = *(int16_t *)(mp3data + inPtr + (0x3C ^ S16)); + v[1] += v[30]; + v[2] = *(int16_t *)(mp3data + inPtr + (0x06 ^ S16)); + v[28] = *(int16_t *)(mp3data + inPtr + (0x38 ^ S16)); + v[2] += v[28]; + v[3] = *(int16_t *)(mp3data + inPtr + (0x04 ^ S16)); + v[29] = *(int16_t *)(mp3data + inPtr + (0x3A ^ S16)); + v[3] += v[29]; + + v[4] = *(int16_t *)(mp3data + inPtr + (0x0E ^ S16)); + v[24] = *(int16_t *)(mp3data + inPtr + (0x30 ^ S16)); + v[4] += v[24]; + v[5] = *(int16_t *)(mp3data + inPtr + (0x0C ^ S16)); + v[25] = *(int16_t *)(mp3data + inPtr + (0x32 ^ S16)); + v[5] += v[25]; + v[6] = *(int16_t *)(mp3data + inPtr + (0x08 ^ S16)); + v[27] = *(int16_t *)(mp3data + inPtr + (0x36 ^ S16)); + v[6] += v[27]; + v[7] = *(int16_t *)(mp3data + inPtr + (0x0A ^ S16)); + v[26] = *(int16_t *)(mp3data + inPtr + (0x34 ^ S16)); + v[7] += v[26]; + + v[8] = *(int16_t *)(mp3data + inPtr + (0x1E ^ S16)); + v[16] = *(int16_t *)(mp3data + inPtr + (0x20 ^ S16)); + v[8] += v[16]; + v[9] = *(int16_t *)(mp3data + inPtr + (0x1C ^ S16)); + v[17] = *(int16_t *)(mp3data + inPtr + (0x22 ^ S16)); + v[9] += v[17]; + v[10] = *(int16_t *)(mp3data + inPtr + (0x18 ^ S16)); + v[19] = *(int16_t *)(mp3data + inPtr + (0x26 ^ S16)); + v[10] += v[19]; + v[11] = *(int16_t *)(mp3data + inPtr + (0x1A ^ S16)); + v[18] = *(int16_t *)(mp3data + inPtr + (0x24 ^ S16)); + v[11] += v[18]; + + v[12] = *(int16_t *)(mp3data + inPtr + (0x10 ^ S16)); + v[23] = *(int16_t *)(mp3data + inPtr + (0x2E ^ S16)); + v[12] += v[23]; + v[13] = *(int16_t *)(mp3data + inPtr + (0x12 ^ S16)); + v[22] = *(int16_t *)(mp3data + inPtr + (0x2C ^ S16)); + v[13] += v[22]; + v[14] = *(int16_t *)(mp3data + inPtr + (0x16 ^ S16)); + v[20] = *(int16_t *)(mp3data + inPtr + (0x28 ^ S16)); + v[14] += v[20]; + v[15] = *(int16_t *)(mp3data + inPtr + (0x14 ^ S16)); + v[21] = *(int16_t *)(mp3data + inPtr + (0x2A ^ S16)); + v[15] += v[21]; + + /* Part 2-4 */ + + MP3AB0(); + + /* Part 5 - 1-Wide Butterflies - 100% Accurate but need SSVs!!! */ + + t0 = t6 + 0x100; + t1 = t6 + 0x200; + t2 = t5 + 0x100; + t3 = t5 + 0x200; + + /* 0x13A8 */ + v[1] = 0; + v[11] = ((v[16] - v[17]) * 0xB504) >> 0x10; + + v[16] = -v[16] - v[17]; + v[2] = v[18] + v[19]; + /* ** Store v[11] -> (T6 + 0)** */ + *(int16_t *)(mp3data + ((t6 + (short)0x0))) = (short)v[11]; + + + v[11] = -v[11]; + /* ** Store v[16] -> (T3 + 0)** */ + *(int16_t *)(mp3data + ((t3 + (short)0x0))) = (short)v[16]; + /* ** Store v[11] -> (T5 + 0)** */ + *(int16_t *)(mp3data + ((t5 + (short)0x0))) = (short)v[11]; + /* 0x13E8 - Verified.... */ + v[2] = -v[2]; + /* ** Store v[2] -> (T2 + 0)** */ + *(int16_t *)(mp3data + ((t2 + (short)0x0))) = (short)v[2]; + v[3] = (((v[18] - v[19]) * 0x16A09) >> 0x10) + v[2]; + /* ** Store v[3] -> (T0 + 0)** */ + *(int16_t *)(mp3data + ((t0 + (short)0x0))) = (short)v[3]; + /* 0x1400 - Verified */ + v[4] = -v[20] - v[21]; + v[6] = v[22] + v[23]; + v[5] = ((v[20] - v[21]) * 0x16A09) >> 0x10; + /* ** Store v[4] -> (T3 + 0xFF80) */ + *(int16_t *)(mp3data + ((t3 + (short)0xFF80))) = (short)v[4]; + v[7] = ((v[22] - v[23]) * 0x2D413) >> 0x10; + v[5] = v[5] - v[4]; + v[7] = v[7] - v[5]; + v[6] = v[6] + v[6]; + v[5] = v[5] - v[6]; + v[4] = -v[4] - v[6]; + /* *** Store v[7] -> (T1 + 0xFF80) */ + *(int16_t *)(mp3data + ((t1 + (short)0xFF80))) = (short)v[7]; + /* *** Store v[4] -> (T2 + 0xFF80) */ + *(int16_t *)(mp3data + ((t2 + (short)0xFF80))) = (short)v[4]; + /* *** Store v[5] -> (T0 + 0xFF80) */ + *(int16_t *)(mp3data + ((t0 + (short)0xFF80))) = (short)v[5]; + v[8] = v[24] + v[25]; + + + v[9] = ((v[24] - v[25]) * 0x16A09) >> 0x10; + v[2] = v[8] + v[9]; + v[11] = ((v[26] - v[27]) * 0x2D413) >> 0x10; + v[13] = ((v[28] - v[29]) * 0x2D413) >> 0x10; + + v[10] = v[26] + v[27]; + v[10] = v[10] + v[10]; + v[12] = v[28] + v[29]; + v[12] = v[12] + v[12]; + v[14] = v[30] + v[31]; + v[3] = v[8] + v[10]; + v[14] = v[14] + v[14]; + v[13] = (v[13] - v[2]) + v[12]; + v[15] = (((v[30] - v[31]) * 0x5A827) >> 0x10) - (v[11] + v[2]); + v[14] = -(v[14] + v[14]) + v[3]; + v[17] = v[13] - v[10]; + v[9] = v[9] + v[14]; + /* ** Store v[9] -> (T6 + 0x40) */ + *(int16_t *)(mp3data + ((t6 + (short)0x40))) = (short)v[9]; + v[11] = v[11] - v[13]; + /* ** Store v[17] -> (T0 + 0xFFC0) */ + *(int16_t *)(mp3data + ((t0 + (short)0xFFC0))) = (short)v[17]; + v[12] = v[8] - v[12]; + /* ** Store v[11] -> (T0 + 0x40) */ + *(int16_t *)(mp3data + ((t0 + (short)0x40))) = (short)v[11]; + v[8] = -v[8]; + /* ** Store v[15] -> (T1 + 0xFFC0) */ + *(int16_t *)(mp3data + ((t1 + (short)0xFFC0))) = (short)v[15]; + v[10] = -v[10] - v[12]; + /* ** Store v[12] -> (T2 + 0x40) */ + *(int16_t *)(mp3data + ((t2 + (short)0x40))) = (short)v[12]; + /* ** Store v[8] -> (T3 + 0xFFC0) */ + *(int16_t *)(mp3data + ((t3 + (short)0xFFC0))) = (short)v[8]; + /* ** Store v[14] -> (T5 + 0x40) */ + *(int16_t *)(mp3data + ((t5 + (short)0x40))) = (short)v[14]; + /* ** Store v[10] -> (T2 + 0xFFC0) */ + *(int16_t *)(mp3data + ((t2 + (short)0xFFC0))) = (short)v[10]; + /* 0x14FC - Verified... */ + + /* Part 6 - 100% Accurate */ + + v[0] = *(int16_t *)(mp3data + inPtr + (0x00 ^ S16)); + v[31] = *(int16_t *)(mp3data + inPtr + (0x3E ^ S16)); + v[0] -= v[31]; + v[1] = *(int16_t *)(mp3data + inPtr + (0x02 ^ S16)); + v[30] = *(int16_t *)(mp3data + inPtr + (0x3C ^ S16)); + v[1] -= v[30]; + v[2] = *(int16_t *)(mp3data + inPtr + (0x06 ^ S16)); + v[28] = *(int16_t *)(mp3data + inPtr + (0x38 ^ S16)); + v[2] -= v[28]; + v[3] = *(int16_t *)(mp3data + inPtr + (0x04 ^ S16)); + v[29] = *(int16_t *)(mp3data + inPtr + (0x3A ^ S16)); + v[3] -= v[29]; + + v[4] = *(int16_t *)(mp3data + inPtr + (0x0E ^ S16)); + v[24] = *(int16_t *)(mp3data + inPtr + (0x30 ^ S16)); + v[4] -= v[24]; + v[5] = *(int16_t *)(mp3data + inPtr + (0x0C ^ S16)); + v[25] = *(int16_t *)(mp3data + inPtr + (0x32 ^ S16)); + v[5] -= v[25]; + v[6] = *(int16_t *)(mp3data + inPtr + (0x08 ^ S16)); + v[27] = *(int16_t *)(mp3data + inPtr + (0x36 ^ S16)); + v[6] -= v[27]; + v[7] = *(int16_t *)(mp3data + inPtr + (0x0A ^ S16)); + v[26] = *(int16_t *)(mp3data + inPtr + (0x34 ^ S16)); + v[7] -= v[26]; + + v[8] = *(int16_t *)(mp3data + inPtr + (0x1E ^ S16)); + v[16] = *(int16_t *)(mp3data + inPtr + (0x20 ^ S16)); + v[8] -= v[16]; + v[9] = *(int16_t *)(mp3data + inPtr + (0x1C ^ S16)); + v[17] = *(int16_t *)(mp3data + inPtr + (0x22 ^ S16)); + v[9] -= v[17]; + v[10] = *(int16_t *)(mp3data + inPtr + (0x18 ^ S16)); + v[19] = *(int16_t *)(mp3data + inPtr + (0x26 ^ S16)); + v[10] -= v[19]; + v[11] = *(int16_t *)(mp3data + inPtr + (0x1A ^ S16)); + v[18] = *(int16_t *)(mp3data + inPtr + (0x24 ^ S16)); + v[11] -= v[18]; + + v[12] = *(int16_t *)(mp3data + inPtr + (0x10 ^ S16)); + v[23] = *(int16_t *)(mp3data + inPtr + (0x2E ^ S16)); + v[12] -= v[23]; + v[13] = *(int16_t *)(mp3data + inPtr + (0x12 ^ S16)); + v[22] = *(int16_t *)(mp3data + inPtr + (0x2C ^ S16)); + v[13] -= v[22]; + v[14] = *(int16_t *)(mp3data + inPtr + (0x16 ^ S16)); + v[20] = *(int16_t *)(mp3data + inPtr + (0x28 ^ S16)); + v[14] -= v[20]; + v[15] = *(int16_t *)(mp3data + inPtr + (0x14 ^ S16)); + v[21] = *(int16_t *)(mp3data + inPtr + (0x2A ^ S16)); + v[15] -= v[21]; + + for (i = 0; i < 16; i++) + v[0 + i] = (v[0 + i] * LUT6[i]) >> 0x10; + v[0] = v[0] + v[0]; + v[1] = v[1] + v[1]; + v[2] = v[2] + v[2]; + v[3] = v[3] + v[3]; + v[4] = v[4] + v[4]; + v[5] = v[5] + v[5]; + v[6] = v[6] + v[6]; + v[7] = v[7] + v[7]; + v[12] = v[12] + v[12]; + v[13] = v[13] + v[13]; + v[15] = v[15] + v[15]; + + MP3AB0(); + + /* Part 7: - 100% Accurate + SSV - Unoptimized */ + + v[0] = (v[17] + v[16]) >> 1; + v[1] = ((v[17] * (int)((short)0xA57E * 2)) + (v[16] * 0xB504)) >> 0x10; + v[2] = -v[18] - v[19]; + v[3] = ((v[18] - v[19]) * 0x16A09) >> 0x10; + v[4] = v[20] + v[21] + v[0]; + v[5] = (((v[20] - v[21]) * 0x16A09) >> 0x10) + v[1]; + v[6] = (((v[22] + v[23]) << 1) + v[0]) - v[2]; + v[7] = (((v[22] - v[23]) * 0x2D413) >> 0x10) + v[0] + v[1] + v[3]; + /* 0x16A8 */ + /* Save v[0] -> (T3 + 0xFFE0) */ + *(int16_t *)(mp3data + ((t3 + (short)0xFFE0))) = (short) - v[0]; + v[8] = v[24] + v[25]; + v[9] = ((v[24] - v[25]) * 0x16A09) >> 0x10; + v[10] = ((v[26] + v[27]) << 1) + v[8]; + v[11] = (((v[26] - v[27]) * 0x2D413) >> 0x10) + v[8] + v[9]; + v[12] = v[4] - ((v[28] + v[29]) << 1); + /* ** Store v12 -> (T2 + 0x20) */ + *(int16_t *)(mp3data + ((t2 + (short)0x20))) = (short)v[12]; + v[13] = (((v[28] - v[29]) * 0x2D413) >> 0x10) - v[12] - v[5]; + v[14] = v[30] + v[31]; + v[14] = v[14] + v[14]; + v[14] = v[14] + v[14]; + v[14] = v[6] - v[14]; + v[15] = (((v[30] - v[31]) * 0x5A827) >> 0x10) - v[7]; + /* Store v14 -> (T5 + 0x20) */ + *(int16_t *)(mp3data + ((t5 + (short)0x20))) = (short)v[14]; + v[14] = v[14] + v[1]; + /* Store v[14] -> (T6 + 0x20) */ + *(int16_t *)(mp3data + ((t6 + (short)0x20))) = (short)v[14]; + /* Store v[15] -> (T1 + 0xFFE0) */ + *(int16_t *)(mp3data + ((t1 + (short)0xFFE0))) = (short)v[15]; + v[9] = v[9] + v[10]; + v[1] = v[1] + v[6]; + v[6] = v[10] - v[6]; + v[1] = v[9] - v[1]; + /* Store v[6] -> (T5 + 0x60) */ + *(int16_t *)(mp3data + ((t5 + (short)0x60))) = (short)v[6]; + v[10] = v[10] + v[2]; + v[10] = v[4] - v[10]; + /* Store v[10] -> (T2 + 0xFFA0) */ + *(int16_t *)(mp3data + ((t2 + (short)0xFFA0))) = (short)v[10]; + v[12] = v[2] - v[12]; + /* Store v[12] -> (T2 + 0xFFE0) */ + *(int16_t *)(mp3data + ((t2 + (short)0xFFE0))) = (short)v[12]; + v[5] = v[4] + v[5]; + v[4] = v[8] - v[4]; + /* Store v[4] -> (T2 + 0x60) */ + *(int16_t *)(mp3data + ((t2 + (short)0x60))) = (short)v[4]; + v[0] = v[0] - v[8]; + /* Store v[0] -> (T3 + 0xFFA0) */ + *(int16_t *)(mp3data + ((t3 + (short)0xFFA0))) = (short)v[0]; + v[7] = v[7] - v[11]; + /* Store v[7] -> (T1 + 0xFFA0) */ + *(int16_t *)(mp3data + ((t1 + (short)0xFFA0))) = (short)v[7]; + v[11] = v[11] - v[3]; + /* Store v[1] -> (T6 + 0x60) */ + *(int16_t *)(mp3data + ((t6 + (short)0x60))) = (short)v[1]; + v[11] = v[11] - v[5]; + /* Store v[11] -> (T0 + 0x60) */ + *(int16_t *)(mp3data + ((t0 + (short)0x60))) = (short)v[11]; + v[3] = v[3] - v[13]; + /* Store v[3] -> (T0 + 0x20) */ + *(int16_t *)(mp3data + ((t0 + (short)0x20))) = (short)v[3]; + v[13] = v[13] + v[2]; + /* Store v[13] -> (T0 + 0xFFE0) */ + *(int16_t *)(mp3data + ((t0 + (short)0xFFE0))) = (short)v[13]; + v[2] = (v[5] - v[2]) - v[9]; + /* Store v[2] -> (T0 + 0xFFA0) */ + *(int16_t *)(mp3data + ((t0 + (short)0xFFA0))) = (short)v[2]; + /* 0x7A8 - Verified... */ + + /* Step 8 - Dewindowing */ + + addptr = t6 & 0xFFE0; + + offset = 0x10 - (t4 >> 1); + for (x = 0; x < 8; x++) { + int32_t v0; + int32_t v18; + v2 = v4 = v6 = v8 = 0; + + for (i = 7; i >= 0; i--) { + v2 += ((int) * (int16_t *)(mp3data + (addptr) + 0x00) * (short)DeWindowLUT[offset + 0x00] + 0x4000) >> 0xF; + v4 += ((int) * (int16_t *)(mp3data + (addptr) + 0x10) * (short)DeWindowLUT[offset + 0x08] + 0x4000) >> 0xF; + v6 += ((int) * (int16_t *)(mp3data + (addptr) + 0x20) * (short)DeWindowLUT[offset + 0x20] + 0x4000) >> 0xF; + v8 += ((int) * (int16_t *)(mp3data + (addptr) + 0x30) * (short)DeWindowLUT[offset + 0x28] + 0x4000) >> 0xF; + addptr += 2; + offset++; + } + v0 = v2 + v4; + v18 = v6 + v8; + /* Clamp(v0); */ + /* Clamp(v18); */ + /* clamp??? */ + *(int16_t *)(mp3data + (outPtr ^ S16)) = v0; + *(int16_t *)(mp3data + ((outPtr + 2)^S16)) = v18; + outPtr += 4; + addptr += 0x30; + offset += 0x38; + } + + offset = 0x10 - (t4 >> 1) + 8 * 0x40; + v2 = v4 = 0; + for (i = 0; i < 4; i++) { + v2 += ((int) * (int16_t *)(mp3data + (addptr) + 0x00) * (short)DeWindowLUT[offset + 0x00] + 0x4000) >> 0xF; + v2 += ((int) * (int16_t *)(mp3data + (addptr) + 0x10) * (short)DeWindowLUT[offset + 0x08] + 0x4000) >> 0xF; + addptr += 2; + offset++; + v4 += ((int) * (int16_t *)(mp3data + (addptr) + 0x00) * (short)DeWindowLUT[offset + 0x00] + 0x4000) >> 0xF; + v4 += ((int) * (int16_t *)(mp3data + (addptr) + 0x10) * (short)DeWindowLUT[offset + 0x08] + 0x4000) >> 0xF; + addptr += 2; + offset++; + } + mult6 = *(int32_t *)(mp3data + 0xCE8); + mult4 = *(int32_t *)(mp3data + 0xCEC); + if (t4 & 0x2) { + v2 = (v2 **(uint32_t *)(mp3data + 0xCE8)) >> 0x10; + *(int16_t *)(mp3data + (outPtr ^ S16)) = v2; + } else { + v4 = (v4 **(uint32_t *)(mp3data + 0xCE8)) >> 0x10; + *(int16_t *)(mp3data + (outPtr ^ S16)) = v4; + mult4 = *(uint32_t *)(mp3data + 0xCE8); + } + addptr -= 0x50; + + for (x = 0; x < 8; x++) { + int32_t v0; + int32_t v18; + v2 = v4 = v6 = v8 = 0; + + offset = (0x22F - (t4 >> 1) + x * 0x40); + + for (i = 0; i < 4; i++) { + v2 += ((int) * (int16_t *)(mp3data + (addptr) + 0x20) * (short)DeWindowLUT[offset + 0x00] + 0x4000) >> 0xF; + v2 -= ((int) * (int16_t *)(mp3data + ((addptr + 2)) + 0x20) * (short)DeWindowLUT[offset + 0x01] + 0x4000) >> 0xF; + v4 += ((int) * (int16_t *)(mp3data + (addptr) + 0x30) * (short)DeWindowLUT[offset + 0x08] + 0x4000) >> 0xF; + v4 -= ((int) * (int16_t *)(mp3data + ((addptr + 2)) + 0x30) * (short)DeWindowLUT[offset + 0x09] + 0x4000) >> 0xF; + v6 += ((int) * (int16_t *)(mp3data + (addptr) + 0x00) * (short)DeWindowLUT[offset + 0x20] + 0x4000) >> 0xF; + v6 -= ((int) * (int16_t *)(mp3data + ((addptr + 2)) + 0x00) * (short)DeWindowLUT[offset + 0x21] + 0x4000) >> 0xF; + v8 += ((int) * (int16_t *)(mp3data + (addptr) + 0x10) * (short)DeWindowLUT[offset + 0x28] + 0x4000) >> 0xF; + v8 -= ((int) * (int16_t *)(mp3data + ((addptr + 2)) + 0x10) * (short)DeWindowLUT[offset + 0x29] + 0x4000) >> 0xF; + addptr += 4; + offset += 2; + } + v0 = v2 + v4; + v18 = v6 + v8; + /* Clamp(v0); */ + /* Clamp(v18); */ + /* clamp??? */ + *(int16_t *)(mp3data + ((outPtr + 2)^S16)) = v0; + *(int16_t *)(mp3data + ((outPtr + 4)^S16)) = v18; + outPtr += 4; + addptr -= 0x50; + } + + tmp = outPtr; + hi0 = mult6; + hi1 = mult4; + + hi0 = (int)hi0 >> 0x10; + hi1 = (int)hi1 >> 0x10; + for (i = 0; i < 8; i++) { + /* v0 */ + vt = (*(int16_t *)(mp3data + ((tmp - 0x40)^S16)) * hi0); + if (vt > 32767) { + vt = 32767; + } else { + if (vt < -32767) + vt = -32767; + } + *(int16_t *)((uint8_t *)mp3data + ((tmp - 0x40)^S16)) = (int16_t)vt; + + /* v17 */ + vt = (*(int16_t *)(mp3data + ((tmp - 0x30)^S16)) * hi0); + if (vt > 32767) { + vt = 32767; + } else { + if (vt < -32767) + vt = -32767; + } + *(int16_t *)((uint8_t *)mp3data + ((tmp - 0x30)^S16)) = vt; + + /* v2 */ + vt = (*(int16_t *)(mp3data + ((tmp - 0x1E)^S16)) * hi1); + if (vt > 32767) { + vt = 32767; + } else { + if (vt < -32767) + vt = -32767; + } + *(int16_t *)((uint8_t *)mp3data + ((tmp - 0x1E)^S16)) = vt; + + /* v4 */ + vt = (*(int16_t *)(mp3data + ((tmp - 0xE)^S16)) * hi1); + if (vt > 32767) { + vt = 32767; + } else { + if (vt < -32767) + vt = -32767; + } + *(int16_t *)((uint8_t *)mp3data + ((tmp - 0xE)^S16)) = vt; + tmp += 2; + } +} +