[mupen64plus-pandora.git] / source / mupen64plus-rsp-hle / src / ucode3.c

/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
 *   Mupen64plus-rsp-hle - ucode3.c                                        *
 *   Mupen64Plus homepage: http://code.google.com/p/mupen64plus/           *
 *   Copyright (C) 2009 Richard Goedeken                                   *
 *   Copyright (C) 2002 Hacktarux                                          *
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published by  *
 *   the Free Software Foundation; either version 2 of the License, or     *
 *   (at your option) any later version.                                   *
 *                                                                         *
 *   This program is distributed in the hope that it will be useful,       *
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
 *   GNU General Public License for more details.                          *
 *                                                                         *
 *   You should have received a copy of the GNU General Public License     *
 *   along with this program; if not, write to the                         *
 *   Free Software Foundation, Inc.,                                       *
 *   51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.          *
 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */

# include <string.h>
#include <stdint.h>

#include "m64p_plugin.h"
#include "hle.h"
#include "alist_internal.h"

static void SETVOL3(uint32_t inst1, uint32_t inst2)
{
    uint8_t Flags = (uint8_t)(inst1 >> 0x10);
    if (Flags & 0x4) { /* 288 */
        if (Flags & 0x2) { /* 290 */
            Vol_Left  = (int16_t)inst1; /* 0x50 */
            Env_Dry   = (int16_t)(inst2 >> 0x10); /* 0x4E */
            Env_Wet   = (int16_t)inst2; /* 0x4C */
        } else {
            VolTrg_Right  = (int16_t)inst1; /* 0x46 */
            VolRamp_Right = (int32_t)inst2; /* 0x48/0x4A */
        }
    } else {
        VolTrg_Left  = (int16_t)inst1; /* 0x40 */
        VolRamp_Left = (int32_t)inst2; /* 0x42/0x44 */
    }
}

static void ENVMIXER3(uint32_t inst1, uint32_t inst2)
{
    uint8_t flags = (uint8_t)((inst1 >> 16) & 0xff);
    uint32_t addy = (inst2 & 0xFFFFFF);

    short *inp = (short *)(BufferSpace + 0x4F0);
    short *out = (short *)(BufferSpace + 0x9D0);
    short *aux1 = (short *)(BufferSpace + 0xB40);
    short *aux2 = (short *)(BufferSpace + 0xCB0);
    short *aux3 = (short *)(BufferSpace + 0xE20);
    int32_t MainR;
    int32_t MainL;
    int32_t AuxR;
    int32_t AuxL;
    int i1, o1, a1, a2, a3;
    short zero[8];
    int y;

    int32_t LAdder, LAcc, LVol;
    int32_t RAdder, RAcc, RVol;
    /* Most significant part of the Ramp Value */
    int16_t RSig, LSig;
    int16_t Wet, Dry;
    int16_t LTrg, RTrg;

    memset(zero, 0, sizeof(zero));

    Vol_Right = (int16_t)inst1;

    if (flags & A_INIT) {
        LAdder = VolRamp_Left / 8;
        LAcc  = 0;
        LVol  = Vol_Left;
        LSig = (int16_t)(VolRamp_Left >> 16);

        RAdder = VolRamp_Right / 8;
        RAcc  = 0;
        RVol  = Vol_Right;
        RSig = (int16_t)(VolRamp_Right >> 16);

        /* Save Wet/Dry values */
        Wet = (int16_t)Env_Wet;
        Dry = (int16_t)Env_Dry;
        /* Save Current Left/Right Targets */
        LTrg = VolTrg_Left;
        RTrg = VolTrg_Right;
    } else {
        memcpy((uint8_t *)hleMixerWorkArea, rsp.RDRAM + addy, 80);
        Wet    = *(int16_t *)(hleMixerWorkArea +  0); /* 0-1 */
        Dry    = *(int16_t *)(hleMixerWorkArea +  2); /* 2-3 */
        LTrg   = *(int16_t *)(hleMixerWorkArea +  4); /* 4-5 */
        RTrg   = *(int16_t *)(hleMixerWorkArea +  6); /* 6-7 */
        LAdder = *(int32_t *)(hleMixerWorkArea +  8); /* 8-9 (hleMixerWorkArea is a 16bit pointer) */
        RAdder = *(int32_t *)(hleMixerWorkArea + 10); /* 10-11 */
        LAcc   = *(int32_t *)(hleMixerWorkArea + 12); /* 12-13 */
        RAcc   = *(int32_t *)(hleMixerWorkArea + 14); /* 14-15 */
        LVol   = *(int32_t *)(hleMixerWorkArea + 16); /* 16-17 */
        RVol   = *(int32_t *)(hleMixerWorkArea + 18); /* 18-19 */
        LSig   = *(int16_t *)(hleMixerWorkArea + 20); /* 20-21 */
        RSig   = *(int16_t *)(hleMixerWorkArea + 22); /* 22-23 */
    }

    for (y = 0; y < (0x170 / 2); y++) {

        /* Left */
        LAcc += LAdder;
        LVol += (LAcc >> 16);
        LAcc &= 0xFFFF;

        /* Right */
        RAcc += RAdder;
        RVol += (RAcc >> 16);
        RAcc &= 0xFFFF;
/****************************************************************/
        /* Clamp Left */
        if (LSig >= 0) { /* VLT */
            if (LVol > LTrg)
                LVol = LTrg;
        } else { /* VGE */
            if (LVol < LTrg)
                LVol = LTrg;
        }

        /* Clamp Right */
        if (RSig >= 0) { /* VLT */
            if (RVol > RTrg)
                RVol = RTrg;
        } else { /* VGE */
            if (RVol < RTrg)
                RVol = RTrg;
        }
/****************************************************************/
        MainL = ((Dry * LVol) + 0x4000) >> 15;
        MainR = ((Dry * RVol) + 0x4000) >> 15;

        o1 = out [y ^ S];
        a1 = aux1[y ^ S];
        i1 = inp [y ^ S];

        o1 += ((i1 * MainL) + 0x4000) >> 15;
        a1 += ((i1 * MainR) + 0x4000) >> 15;

/****************************************************************/
        o1 = clamp_s16(o1);
        a1 = clamp_s16(a1);

/****************************************************************/

        out[y ^ S] = o1;
        aux1[y ^ S] = a1;

/****************************************************************/
        a2 = aux2[y ^ S];
        a3 = aux3[y ^ S];

        AuxL  = ((Wet * LVol) + 0x4000) >> 15;
        AuxR  = ((Wet * RVol) + 0x4000) >> 15;

        a2 += ((i1 * AuxL) + 0x4000) >> 15;
        a3 += ((i1 * AuxR) + 0x4000) >> 15;

        a2 = clamp_s16(a2);
        a3 = clamp_s16(a3);

        aux2[y ^ S] = a2;
        aux3[y ^ S] = a3;
    }

    *(int16_t *)(hleMixerWorkArea +  0) = Wet; /* 0-1 */
    *(int16_t *)(hleMixerWorkArea +  2) = Dry; /* 2-3 */
    *(int16_t *)(hleMixerWorkArea +  4) = LTrg; /* 4-5 */
    *(int16_t *)(hleMixerWorkArea +  6) = RTrg; /* 6-7 */
    *(int32_t *)(hleMixerWorkArea +  8) = LAdder; /* 8-9 (hleMixerWorkArea is a 16bit pointer) */
    *(int32_t *)(hleMixerWorkArea + 10) = RAdder; /* 10-11 */
    *(int32_t *)(hleMixerWorkArea + 12) = LAcc; /* 12-13 */
    *(int32_t *)(hleMixerWorkArea + 14) = RAcc; /* 14-15 */
    *(int32_t *)(hleMixerWorkArea + 16) = LVol; /* 16-17 */
    *(int32_t *)(hleMixerWorkArea + 18) = RVol; /* 18-19 */
    *(int16_t *)(hleMixerWorkArea + 20) = LSig; /* 20-21 */
    *(int16_t *)(hleMixerWorkArea + 22) = RSig; /* 22-23 */
    memcpy(rsp.RDRAM + addy, (uint8_t *)hleMixerWorkArea, 80);
}

static void CLEARBUFF3(uint32_t inst1, uint32_t inst2)
{
    uint16_t addr = (uint16_t)(inst1 & 0xffff);
    uint16_t count = (uint16_t)(inst2 & 0xffff);
    memset(BufferSpace + addr + 0x4f0, 0, count);
}

/* TODO Needs accuracy verification... */
static void MIXER3(uint32_t inst1, uint32_t inst2)
{
    uint16_t dmemin  = (uint16_t)(inst2 >> 0x10)  + 0x4f0;
    uint16_t dmemout = (uint16_t)(inst2 & 0xFFFF) + 0x4f0;
    int32_t gain    = (int16_t)(inst1 & 0xFFFF);
    int32_t temp;
    int x;

    for (x = 0; x < 0x170; x += 2) {
        /* TODO I think I can do this a lot easier */
        temp = (*(int16_t *)(BufferSpace + dmemin + x) * gain) >> 15;
        temp += *(int16_t *)(BufferSpace + dmemout + x);

        temp = clamp_s16((int32_t)temp);

        *(uint16_t *)(BufferSpace + dmemout + x) = (uint16_t)(temp & 0xFFFF);
    }
}

static void LOADBUFF3(uint32_t inst1, uint32_t inst2)
{
    uint32_t v0 = (inst2 & 0xfffffc);
    uint32_t cnt = (((inst1 >> 0xC) + 3) & 0xFFC);
    uint32_t src = (inst1 & 0xffc) + 0x4f0;
    memcpy(BufferSpace + src, rsp.RDRAM + v0, cnt);
}

static void SAVEBUFF3(uint32_t inst1, uint32_t inst2)
{
    uint32_t v0 = (inst2 & 0xfffffc);
    uint32_t cnt = (((inst1 >> 0xC) + 3) & 0xFFC);
    uint32_t src = (inst1 & 0xffc) + 0x4f0;
    memcpy(rsp.RDRAM + v0, BufferSpace + src, cnt);
}

/* Loads an ADPCM table
 * NOTE Works 100% Now 03-13-01
 */
static void LOADADPCM3(uint32_t inst1, uint32_t inst2)
{
    uint32_t v0 = (inst2 & 0xffffff);
    uint32_t x;

    uint16_t *table = (uint16_t *)(rsp.RDRAM + v0);
    for (x = 0; x < ((inst1 & 0xffff) >> 0x4); x++) {
        adpcmtable[(0x0 + (x << 3))^S] = table[0];
        adpcmtable[(0x1 + (x << 3))^S] = table[1];

        adpcmtable[(0x2 + (x << 3))^S] = table[2];
        adpcmtable[(0x3 + (x << 3))^S] = table[3];

        adpcmtable[(0x4 + (x << 3))^S] = table[4];
        adpcmtable[(0x5 + (x << 3))^S] = table[5];

        adpcmtable[(0x6 + (x << 3))^S] = table[6];
        adpcmtable[(0x7 + (x << 3))^S] = table[7];
        table += 8;
    }
}

/* TODO Needs accuracy verification... */
static void DMEMMOVE3(uint32_t inst1, uint32_t inst2)
{
    uint32_t cnt;
    uint32_t v0 = (inst1 & 0xFFFF) + 0x4f0;
    uint32_t v1 = (inst2 >> 0x10) + 0x4f0;
    uint32_t count = ((inst2 + 3) & 0xfffc);

    for (cnt = 0; cnt < count; cnt++)
        *(uint8_t *)(BufferSpace + ((cnt + v1)^S8)) = *(uint8_t *)(BufferSpace + ((cnt + v0)^S8));
}

static void SETLOOP3(uint32_t inst1, uint32_t inst2)
{
    loopval = (inst2 & 0xffffff);
}

/* TODO Verified to be 100% Accurate... */
static void ADPCM3(uint32_t inst1, uint32_t inst2)
{
    unsigned char Flags = (uint8_t)(inst2 >> 0x1c) & 0xff;
    unsigned int Address = (inst1 & 0xffffff);
    unsigned short inPtr = (inst2 >> 12) & 0xf;
    short *out = (short *)(BufferSpace + (inst2 & 0xfff) + 0x4f0);
    short count = (short)((inst2 >> 16) & 0xfff);
    unsigned char icode;
    unsigned char code;
    int vscale;
    unsigned short index;
    unsigned short j;
    int a[8];
    short *book1, *book2;
    int l1;
    int l2;
    int inp1[8];
    int inp2[8];

    memset(out, 0, 32);

    if (!(Flags & 0x1)) {
        if (Flags & 0x2)
            memcpy(out, &rsp.RDRAM[loopval], 32);
        else
            memcpy(out, &rsp.RDRAM[Address], 32);
    }

    l1 = out[14 ^ S];
    l2 = out[15 ^ S];
    out += 16;
    while (count > 0) {
        /* the first interation through, these values are
         * either 0 in the case of A_INIT, from a special
         * area of memory in the case of A_LOOP or just
         * the values we calculated the last time
         */

        code = BufferSpace[(0x4f0 + inPtr)^S8];
        index = code & 0xf;
        /* index into the adpcm code table */
        index <<= 4;
        book1 = (short *)&adpcmtable[index];
        book2 = book1 + 8;
        /* upper nibble is scale */
        code >>= 4;
        /* very strange. 0x8000 would be .5 in 16:16 format
         * so this appears to be a fractional scale based
         * on the 12 based inverse of the scale value.  note
         * that this could be negative, in which case we do
         * not use the calculated vscale value... see the
         * if(code>12) check below
         */
        vscale = (0x8000 >> ((12 - code) - 1));

        /* coded adpcm data lies next */
        inPtr++;
        j = 0;
        /* loop of 8, for 8 coded nibbles from 4 bytes
         * which yields 8 short pcm values
         */
        while (j < 8) {
            icode = BufferSpace[(0x4f0 + inPtr)^S8];
            inPtr++;

            /* this will in effect be signed */
            inp1[j] = (int16_t)((icode & 0xf0) << 8);
            if (code < 12)
                inp1[j] = ((int)((int)inp1[j] * (int)vscale) >> 16);
            j++;

            inp1[j] = (int16_t)((icode & 0xf) << 12);
            if (code < 12)
                inp1[j] = ((int)((int)inp1[j] * (int)vscale) >> 16);
            j++;
        }
        j = 0;
        while (j < 8) {
            icode = BufferSpace[(0x4f0 + inPtr)^S8];
            inPtr++;

            /* this will in effect be signed */
            inp2[j] = (short)((icode & 0xf0) << 8);
            if (code < 12)
                inp2[j] = ((int)((int)inp2[j] * (int)vscale) >> 16);
            j++;

            inp2[j] = (short)((icode & 0xf) << 12);
            if (code < 12)
                inp2[j] = ((int)((int)inp2[j] * (int)vscale) >> 16);
            j++;
        }

        a[0] = (int)book1[0] * (int)l1;
        a[0] += (int)book2[0] * (int)l2;
        a[0] += (int)inp1[0] * (int)2048;

        a[1] = (int)book1[1] * (int)l1;
        a[1] += (int)book2[1] * (int)l2;
        a[1] += (int)book2[0] * inp1[0];
        a[1] += (int)inp1[1] * (int)2048;

        a[2] = (int)book1[2] * (int)l1;
        a[2] += (int)book2[2] * (int)l2;
        a[2] += (int)book2[1] * inp1[0];
        a[2] += (int)book2[0] * inp1[1];
        a[2] += (int)inp1[2] * (int)2048;

        a[3] = (int)book1[3] * (int)l1;
        a[3] += (int)book2[3] * (int)l2;
        a[3] += (int)book2[2] * inp1[0];
        a[3] += (int)book2[1] * inp1[1];
        a[3] += (int)book2[0] * inp1[2];
        a[3] += (int)inp1[3] * (int)2048;

        a[4] = (int)book1[4] * (int)l1;
        a[4] += (int)book2[4] * (int)l2;
        a[4] += (int)book2[3] * inp1[0];
        a[4] += (int)book2[2] * inp1[1];
        a[4] += (int)book2[1] * inp1[2];
        a[4] += (int)book2[0] * inp1[3];
        a[4] += (int)inp1[4] * (int)2048;

        a[5] = (int)book1[5] * (int)l1;
        a[5] += (int)book2[5] * (int)l2;
        a[5] += (int)book2[4] * inp1[0];
        a[5] += (int)book2[3] * inp1[1];
        a[5] += (int)book2[2] * inp1[2];
        a[5] += (int)book2[1] * inp1[3];
        a[5] += (int)book2[0] * inp1[4];
        a[5] += (int)inp1[5] * (int)2048;

        a[6] = (int)book1[6] * (int)l1;
        a[6] += (int)book2[6] * (int)l2;
        a[6] += (int)book2[5] * inp1[0];
        a[6] += (int)book2[4] * inp1[1];
        a[6] += (int)book2[3] * inp1[2];
        a[6] += (int)book2[2] * inp1[3];
        a[6] += (int)book2[1] * inp1[4];
        a[6] += (int)book2[0] * inp1[5];
        a[6] += (int)inp1[6] * (int)2048;

        a[7] = (int)book1[7] * (int)l1;
        a[7] += (int)book2[7] * (int)l2;
        a[7] += (int)book2[6] * inp1[0];
        a[7] += (int)book2[5] * inp1[1];
        a[7] += (int)book2[4] * inp1[2];
        a[7] += (int)book2[3] * inp1[3];
        a[7] += (int)book2[2] * inp1[4];
        a[7] += (int)book2[1] * inp1[5];
        a[7] += (int)book2[0] * inp1[6];
        a[7] += (int)inp1[7] * (int)2048;

        for (j = 0; j < 8; j++) {
            a[j ^ S] >>= 11;
            a[j ^ S] = clamp_s16(a[j ^ S]);
            *(out++) = a[j ^ S];
        }
        l1 = a[6];
        l2 = a[7];

        a[0] = (int)book1[0] * (int)l1;
        a[0] += (int)book2[0] * (int)l2;
        a[0] += (int)inp2[0] * (int)2048;

        a[1] = (int)book1[1] * (int)l1;
        a[1] += (int)book2[1] * (int)l2;
        a[1] += (int)book2[0] * inp2[0];
        a[1] += (int)inp2[1] * (int)2048;

        a[2] = (int)book1[2] * (int)l1;
        a[2] += (int)book2[2] * (int)l2;
        a[2] += (int)book2[1] * inp2[0];
        a[2] += (int)book2[0] * inp2[1];
        a[2] += (int)inp2[2] * (int)2048;

        a[3] = (int)book1[3] * (int)l1;
        a[3] += (int)book2[3] * (int)l2;
        a[3] += (int)book2[2] * inp2[0];
        a[3] += (int)book2[1] * inp2[1];
        a[3] += (int)book2[0] * inp2[2];
        a[3] += (int)inp2[3] * (int)2048;

        a[4] = (int)book1[4] * (int)l1;
        a[4] += (int)book2[4] * (int)l2;
        a[4] += (int)book2[3] * inp2[0];
        a[4] += (int)book2[2] * inp2[1];
        a[4] += (int)book2[1] * inp2[2];
        a[4] += (int)book2[0] * inp2[3];
        a[4] += (int)inp2[4] * (int)2048;

        a[5] = (int)book1[5] * (int)l1;
        a[5] += (int)book2[5] * (int)l2;
        a[5] += (int)book2[4] * inp2[0];
        a[5] += (int)book2[3] * inp2[1];
        a[5] += (int)book2[2] * inp2[2];
        a[5] += (int)book2[1] * inp2[3];
        a[5] += (int)book2[0] * inp2[4];
        a[5] += (int)inp2[5] * (int)2048;

        a[6] = (int)book1[6] * (int)l1;
        a[6] += (int)book2[6] * (int)l2;
        a[6] += (int)book2[5] * inp2[0];
        a[6] += (int)book2[4] * inp2[1];
        a[6] += (int)book2[3] * inp2[2];
        a[6] += (int)book2[2] * inp2[3];
        a[6] += (int)book2[1] * inp2[4];
        a[6] += (int)book2[0] * inp2[5];
        a[6] += (int)inp2[6] * (int)2048;

        a[7] = (int)book1[7] * (int)l1;
        a[7] += (int)book2[7] * (int)l2;
        a[7] += (int)book2[6] * inp2[0];
        a[7] += (int)book2[5] * inp2[1];
        a[7] += (int)book2[4] * inp2[2];
        a[7] += (int)book2[3] * inp2[3];
        a[7] += (int)book2[2] * inp2[4];
        a[7] += (int)book2[1] * inp2[5];
        a[7] += (int)book2[0] * inp2[6];
        a[7] += (int)inp2[7] * (int)2048;

        for (j = 0; j < 8; j++) {
            a[j ^ S] >>= 11;
            a[j ^ S] = clamp_s16(a[j ^ S]);
            *(out++) = a[j ^ S];
        }
        l1 = a[6];
        l2 = a[7];

        count -= 32;
    }
    out -= 16;
    memcpy(&rsp.RDRAM[Address], out, 32);
}

static void RESAMPLE3(uint32_t inst1, uint32_t inst2)
{
    unsigned char Flags = (uint8_t)((inst2 >> 0x1e));
    unsigned int Pitch = ((inst2 >> 0xe) & 0xffff) << 1;
    uint32_t addy = (inst1 & 0xffffff);
    unsigned int Accum = 0;
    unsigned int location;
    int16_t *lut;
    short *dst;
    int16_t *src;
    uint32_t srcPtr = ((((inst2 >> 2) & 0xfff) + 0x4f0) / 2);
    uint32_t dstPtr;
    int32_t temp;
    int32_t accum;
    int x, i;

    dst = (short *)(BufferSpace);
    src = (int16_t *)(BufferSpace);

    srcPtr -= 4;

    if (inst2 & 0x3)
        dstPtr = 0x660 / 2;
    else
        dstPtr = 0x4f0 / 2;

    if ((Flags & 0x1) == 0) {
        for (x = 0; x < 4; x++)
            src[(srcPtr + x)^S] = ((uint16_t *)rsp.RDRAM)[((addy / 2) + x)^S];
        Accum = *(uint16_t *)(rsp.RDRAM + addy + 10);
    } else {
        for (x = 0; x < 4; x++)
            src[(srcPtr + x)^S] = 0;
    }

    for (i = 0; i < 0x170 / 2; i++)    {
        location = (((Accum * 0x40) >> 0x10) * 8);
        lut = (int16_t *)(((uint8_t *)ResampleLUT) + location);

        temp = ((int32_t) * (int16_t *)(src + ((srcPtr + 0)^S)) * ((int32_t)((int16_t)lut[0])));
        accum = (int32_t)(temp >> 15);

        temp = ((int32_t) * (int16_t *)(src + ((srcPtr + 1)^S)) * ((int32_t)((int16_t)lut[1])));
        accum += (int32_t)(temp >> 15);

        temp = ((int32_t) * (int16_t *)(src + ((srcPtr + 2)^S)) * ((int32_t)((int16_t)lut[2])));
        accum += (int32_t)(temp >> 15);

        temp = ((int32_t) * (int16_t *)(src + ((srcPtr + 3)^S)) * ((int32_t)((int16_t)lut[3])));
        accum += (int32_t)(temp >> 15);

        accum = clamp_s16(accum);

        dst[dstPtr ^ S] = (accum);
        dstPtr++;
        Accum += Pitch;
        srcPtr += (Accum >> 16);
        Accum &= 0xffff;
    }
    for (x = 0; x < 4; x++)
        ((uint16_t *)rsp.RDRAM)[((addy / 2) + x)^S] = src[(srcPtr + x)^S];
    *(uint16_t *)(rsp.RDRAM + addy + 10) = Accum;
}

/* TODO Needs accuracy verification... */
static void INTERLEAVE3(uint32_t inst1, uint32_t inst2)
{
    uint16_t *outbuff = (uint16_t *)(BufferSpace + 0x4f0);
    uint16_t *inSrcR;
    uint16_t *inSrcL;
    uint16_t Left, Right, Left2, Right2;
    int x;

    inSrcR = (uint16_t *)(BufferSpace + 0xb40);
    inSrcL = (uint16_t *)(BufferSpace + 0x9d0);

    for (x = 0; x < (0x170 / 4); x++) {
        Left = *(inSrcL++);
        Right = *(inSrcR++);
        Left2 = *(inSrcL++);
        Right2 = *(inSrcR++);

#ifdef M64P_BIG_ENDIAN
        *(outbuff++) = Right;
        *(outbuff++) = Left;
        *(outbuff++) = Right2;
        *(outbuff++) = Left2;
#else
        *(outbuff++) = Right2;
        *(outbuff++) = Left2;
        *(outbuff++) = Right;
        *(outbuff++) = Left;
#endif
    }
}

static void WHATISTHIS(uint32_t inst1, uint32_t inst2)
{
}

static uint32_t setaddr;
static void MP3ADDY(uint32_t inst1, uint32_t inst2)
{
    setaddr = (inst2 & 0xffffff);
}

/*
FFT = Fast Fourier Transform
DCT = Discrete Cosine Transform
MPEG-1 Layer 3 retains Layer 2's 1152-sample window, as well as the FFT polyphase filter for
backward compatibility, but adds a modified DCT filter. DCT's advantages over DFTs (discrete
Fourier transforms) include half as many multiply-accumulate operations and half the
generated coefficients because the sinusoidal portion of the calculation is absent, and DCT
generally involves simpler math. The finite lengths of a conventional DCTs' bandpass impulse
responses, however, may result in block-boundary effects. MDCTs overlap the analysis blocks
and lowpass-filter the decoded audio to remove aliases, eliminating these effects. MDCTs also
have a higher transform coding gain than the standard DCT, and their basic functions
correspond to better bandpass response.

MPEG-1 Layer 3's DCT sub-bands are unequally sized, and correspond to the human auditory
system's critical bands. In Layer 3 decoders must support both constant- and variable-bit-rate
bit streams. (However, many Layer 1 and 2 decoders also handle variable bit rates). Finally,
Layer 3 encoders Huffman-code the quantized coefficients before archiving or transmission for
additional lossless compression. Bit streams range from 32 to 320 kbps, and 128-kbps rates
achieve near-CD quality, an important specification to enable dual-channel ISDN
(integrated-services-digital-network) to be the future high-bandwidth pipe to the home.

*/
static void DISABLE(uint32_t inst1, uint32_t inst2)
{
}


const acmd_callback_t ABI3[0x10] = {
    DISABLE , ADPCM3 , CLEARBUFF3,  ENVMIXER3  , LOADBUFF3, RESAMPLE3  , SAVEBUFF3, MP3,
    MP3ADDY, SETVOL3, DMEMMOVE3 , LOADADPCM3 , MIXER3   , INTERLEAVE3, WHATISTHIS   , SETLOOP3
};
Commit	Line	Data
	1	/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
	2	* Mupen64plus-rsp-hle - ucode3.c *
	3	* Mupen64Plus homepage: http://code.google.com/p/mupen64plus/ *
	4	* Copyright (C) 2009 Richard Goedeken *
	5	* Copyright (C) 2002 Hacktarux *
	6	* *
	7	* This program is free software; you can redistribute it and/or modify *
	8	* it under the terms of the GNU General Public License as published by *
	9	* the Free Software Foundation; either version 2 of the License, or *
	10	* (at your option) any later version. *
	11	* *
	12	* This program is distributed in the hope that it will be useful, *
	13	* but WITHOUT ANY WARRANTY; without even the implied warranty of *
	14	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
	15	* GNU General Public License for more details. *
	16	* *
	17	* You should have received a copy of the GNU General Public License *
	18	* along with this program; if not, write to the *
	19	* Free Software Foundation, Inc., *
	20	* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
	21	* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
	22
	23	# include <string.h>
	24	#include <stdint.h>
	25
	26	#include "m64p_plugin.h"
	27	#include "hle.h"
	28	#include "alist_internal.h"
	29
	30	static void SETVOL3(uint32_t inst1, uint32_t inst2)
	31	{
	32	uint8_t Flags = (uint8_t)(inst1 >> 0x10);
	33	if (Flags & 0x4) { /* 288 */
	34	if (Flags & 0x2) { /* 290 */
	35	Vol_Left = (int16_t)inst1; /* 0x50 */
	36	Env_Dry = (int16_t)(inst2 >> 0x10); /* 0x4E */
	37	Env_Wet = (int16_t)inst2; /* 0x4C */
	38	} else {
	39	VolTrg_Right = (int16_t)inst1; /* 0x46 */
	40	VolRamp_Right = (int32_t)inst2; /* 0x48/0x4A */
	41	}
	42	} else {
	43	VolTrg_Left = (int16_t)inst1; /* 0x40 */
	44	VolRamp_Left = (int32_t)inst2; /* 0x42/0x44 */
	45	}
	46	}
	47
	48	static void ENVMIXER3(uint32_t inst1, uint32_t inst2)
	49	{
	50	uint8_t flags = (uint8_t)((inst1 >> 16) & 0xff);
	51	uint32_t addy = (inst2 & 0xFFFFFF);
	52
	53	short inp = (short )(BufferSpace + 0x4F0);
	54	short out = (short )(BufferSpace + 0x9D0);
	55	short aux1 = (short )(BufferSpace + 0xB40);
	56	short aux2 = (short )(BufferSpace + 0xCB0);
	57	short aux3 = (short )(BufferSpace + 0xE20);
	58	int32_t MainR;
	59	int32_t MainL;
	60	int32_t AuxR;
	61	int32_t AuxL;
	62	int i1, o1, a1, a2, a3;
	63	short zero[8];
	64	int y;
	65
	66	int32_t LAdder, LAcc, LVol;
	67	int32_t RAdder, RAcc, RVol;
	68	/* Most significant part of the Ramp Value */
	69	int16_t RSig, LSig;
	70	int16_t Wet, Dry;
	71	int16_t LTrg, RTrg;
	72
	73	memset(zero, 0, sizeof(zero));
	74
	75	Vol_Right = (int16_t)inst1;
	76
	77	if (flags & A_INIT) {
	78	LAdder = VolRamp_Left / 8;
	79	LAcc = 0;
	80	LVol = Vol_Left;
	81	LSig = (int16_t)(VolRamp_Left >> 16);
	82
	83	RAdder = VolRamp_Right / 8;
	84	RAcc = 0;
	85	RVol = Vol_Right;
	86	RSig = (int16_t)(VolRamp_Right >> 16);
	87
	88	/* Save Wet/Dry values */
	89	Wet = (int16_t)Env_Wet;
	90	Dry = (int16_t)Env_Dry;
	91	/* Save Current Left/Right Targets */
	92	LTrg = VolTrg_Left;
	93	RTrg = VolTrg_Right;
	94	} else {
	95	memcpy((uint8_t *)hleMixerWorkArea, rsp.RDRAM + addy, 80);
	96	Wet = (int16_t )(hleMixerWorkArea + 0); /* 0-1 */
	97	Dry = (int16_t )(hleMixerWorkArea + 2); /* 2-3 */
	98	LTrg = (int16_t )(hleMixerWorkArea + 4); /* 4-5 */
	99	RTrg = (int16_t )(hleMixerWorkArea + 6); /* 6-7 */
	100	LAdder = (int32_t )(hleMixerWorkArea + 8); /* 8-9 (hleMixerWorkArea is a 16bit pointer) */
	101	RAdder = (int32_t )(hleMixerWorkArea + 10); /* 10-11 */
	102	LAcc = (int32_t )(hleMixerWorkArea + 12); /* 12-13 */
	103	RAcc = (int32_t )(hleMixerWorkArea + 14); /* 14-15 */
	104	LVol = (int32_t )(hleMixerWorkArea + 16); /* 16-17 */
	105	RVol = (int32_t )(hleMixerWorkArea + 18); /* 18-19 */
	106	LSig = (int16_t )(hleMixerWorkArea + 20); /* 20-21 */
	107	RSig = (int16_t )(hleMixerWorkArea + 22); /* 22-23 */
	108	}
	109
	110	for (y = 0; y < (0x170 / 2); y++) {
	111
	112	/* Left */
	113	LAcc += LAdder;
	114	LVol += (LAcc >> 16);
	115	LAcc &= 0xFFFF;
	116
	117	/* Right */
	118	RAcc += RAdder;
	119	RVol += (RAcc >> 16);
	120	RAcc &= 0xFFFF;
	121	/****************************************************************/
	122	/* Clamp Left */
	123	if (LSig >= 0) { /* VLT */
	124	if (LVol > LTrg)
	125	LVol = LTrg;
	126	} else { /* VGE */
	127	if (LVol < LTrg)
	128	LVol = LTrg;
	129	}
	130
	131	/* Clamp Right */
	132	if (RSig >= 0) { /* VLT */
	133	if (RVol > RTrg)
	134	RVol = RTrg;
	135	} else { /* VGE */
	136	if (RVol < RTrg)
	137	RVol = RTrg;
	138	}
	139	/****************************************************************/
	140	MainL = ((Dry * LVol) + 0x4000) >> 15;
	141	MainR = ((Dry * RVol) + 0x4000) >> 15;
	142
	143	o1 = out [y ^ S];
	144	a1 = aux1[y ^ S];
	145	i1 = inp [y ^ S];
	146
	147	o1 += ((i1 * MainL) + 0x4000) >> 15;
	148	a1 += ((i1 * MainR) + 0x4000) >> 15;
	149
	150	/****************************************************************/
	151	o1 = clamp_s16(o1);
	152	a1 = clamp_s16(a1);
	153
	154	/****************************************************************/
	155
	156	out[y ^ S] = o1;
	157	aux1[y ^ S] = a1;
	158
	159	/****************************************************************/
	160	a2 = aux2[y ^ S];
	161	a3 = aux3[y ^ S];
	162
	163	AuxL = ((Wet * LVol) + 0x4000) >> 15;
	164	AuxR = ((Wet * RVol) + 0x4000) >> 15;
	165
	166	a2 += ((i1 * AuxL) + 0x4000) >> 15;
	167	a3 += ((i1 * AuxR) + 0x4000) >> 15;
	168
	169	a2 = clamp_s16(a2);
	170	a3 = clamp_s16(a3);
	171
	172	aux2[y ^ S] = a2;
	173	aux3[y ^ S] = a3;
	174	}
	175
	176	(int16_t )(hleMixerWorkArea + 0) = Wet; /* 0-1 */
	177	(int16_t )(hleMixerWorkArea + 2) = Dry; /* 2-3 */
	178	(int16_t )(hleMixerWorkArea + 4) = LTrg; /* 4-5 */
	179	(int16_t )(hleMixerWorkArea + 6) = RTrg; /* 6-7 */
	180	(int32_t )(hleMixerWorkArea + 8) = LAdder; /* 8-9 (hleMixerWorkArea is a 16bit pointer) */
	181	(int32_t )(hleMixerWorkArea + 10) = RAdder; /* 10-11 */
	182	(int32_t )(hleMixerWorkArea + 12) = LAcc; /* 12-13 */
	183	(int32_t )(hleMixerWorkArea + 14) = RAcc; /* 14-15 */
	184	(int32_t )(hleMixerWorkArea + 16) = LVol; /* 16-17 */
	185	(int32_t )(hleMixerWorkArea + 18) = RVol; /* 18-19 */
	186	(int16_t )(hleMixerWorkArea + 20) = LSig; /* 20-21 */
	187	(int16_t )(hleMixerWorkArea + 22) = RSig; /* 22-23 */
	188	memcpy(rsp.RDRAM + addy, (uint8_t *)hleMixerWorkArea, 80);
	189	}
	190
	191	static void CLEARBUFF3(uint32_t inst1, uint32_t inst2)
	192	{
	193	uint16_t addr = (uint16_t)(inst1 & 0xffff);
	194	uint16_t count = (uint16_t)(inst2 & 0xffff);
	195	memset(BufferSpace + addr + 0x4f0, 0, count);
	196	}
	197
	198	/* TODO Needs accuracy verification... */
	199	static void MIXER3(uint32_t inst1, uint32_t inst2)
	200	{
	201	uint16_t dmemin = (uint16_t)(inst2 >> 0x10) + 0x4f0;
	202	uint16_t dmemout = (uint16_t)(inst2 & 0xFFFF) + 0x4f0;
	203	int32_t gain = (int16_t)(inst1 & 0xFFFF);
	204	int32_t temp;
	205	int x;
	206
	207	for (x = 0; x < 0x170; x += 2) {
	208	/* TODO I think I can do this a lot easier */
	209	temp = ((int16_t )(BufferSpace + dmemin + x) * gain) >> 15;
	210	temp += (int16_t )(BufferSpace + dmemout + x);
	211
	212	temp = clamp_s16((int32_t)temp);
	213
	214	(uint16_t )(BufferSpace + dmemout + x) = (uint16_t)(temp & 0xFFFF);
	215	}
	216	}
	217
	218	static void LOADBUFF3(uint32_t inst1, uint32_t inst2)
	219	{
	220	uint32_t v0 = (inst2 & 0xfffffc);
	221	uint32_t cnt = (((inst1 >> 0xC) + 3) & 0xFFC);
	222	uint32_t src = (inst1 & 0xffc) + 0x4f0;
	223	memcpy(BufferSpace + src, rsp.RDRAM + v0, cnt);
	224	}
	225
	226	static void SAVEBUFF3(uint32_t inst1, uint32_t inst2)
	227	{
	228	uint32_t v0 = (inst2 & 0xfffffc);
	229	uint32_t cnt = (((inst1 >> 0xC) + 3) & 0xFFC);
	230	uint32_t src = (inst1 & 0xffc) + 0x4f0;
	231	memcpy(rsp.RDRAM + v0, BufferSpace + src, cnt);
	232	}
	233
	234	/* Loads an ADPCM table
	235	* NOTE Works 100% Now 03-13-01
	236	*/
	237	static void LOADADPCM3(uint32_t inst1, uint32_t inst2)
	238	{
	239	uint32_t v0 = (inst2 & 0xffffff);
	240	uint32_t x;
	241
	242	uint16_t table = (uint16_t )(rsp.RDRAM + v0);
	243	for (x = 0; x < ((inst1 & 0xffff) >> 0x4); x++) {
	244	adpcmtable[(0x0 + (x << 3))^S] = table[0];
	245	adpcmtable[(0x1 + (x << 3))^S] = table[1];
	246
	247	adpcmtable[(0x2 + (x << 3))^S] = table[2];
	248	adpcmtable[(0x3 + (x << 3))^S] = table[3];
	249
	250	adpcmtable[(0x4 + (x << 3))^S] = table[4];
	251	adpcmtable[(0x5 + (x << 3))^S] = table[5];
	252
	253	adpcmtable[(0x6 + (x << 3))^S] = table[6];
	254	adpcmtable[(0x7 + (x << 3))^S] = table[7];
	255	table += 8;
	256	}
	257	}
	258
	259	/* TODO Needs accuracy verification... */
	260	static void DMEMMOVE3(uint32_t inst1, uint32_t inst2)
	261	{
	262	uint32_t cnt;
	263	uint32_t v0 = (inst1 & 0xFFFF) + 0x4f0;
	264	uint32_t v1 = (inst2 >> 0x10) + 0x4f0;
	265	uint32_t count = ((inst2 + 3) & 0xfffc);
	266
	267	for (cnt = 0; cnt < count; cnt++)
	268	(uint8_t )(BufferSpace + ((cnt + v1)^S8)) = (uint8_t )(BufferSpace + ((cnt + v0)^S8));
	269	}
	270
	271	static void SETLOOP3(uint32_t inst1, uint32_t inst2)
	272	{
	273	loopval = (inst2 & 0xffffff);
	274	}
	275
	276	/* TODO Verified to be 100% Accurate... */
	277	static void ADPCM3(uint32_t inst1, uint32_t inst2)
	278	{
	279	unsigned char Flags = (uint8_t)(inst2 >> 0x1c) & 0xff;
	280	unsigned int Address = (inst1 & 0xffffff);
	281	unsigned short inPtr = (inst2 >> 12) & 0xf;
	282	short out = (short )(BufferSpace + (inst2 & 0xfff) + 0x4f0);
	283	short count = (short)((inst2 >> 16) & 0xfff);
	284	unsigned char icode;
	285	unsigned char code;
	286	int vscale;
	287	unsigned short index;
	288	unsigned short j;
	289	int a[8];
	290	short book1, book2;
	291	int l1;
	292	int l2;
	293	int inp1[8];
	294	int inp2[8];
	295
	296	memset(out, 0, 32);
	297
	298	if (!(Flags & 0x1)) {
	299	if (Flags & 0x2)
	300	memcpy(out, &rsp.RDRAM[loopval], 32);
	301	else
	302	memcpy(out, &rsp.RDRAM[Address], 32);
	303	}
	304
	305	l1 = out[14 ^ S];
	306	l2 = out[15 ^ S];
	307	out += 16;
	308	while (count > 0) {
	309	/* the first interation through, these values are
	310	* either 0 in the case of A_INIT, from a special
	311	* area of memory in the case of A_LOOP or just
	312	* the values we calculated the last time
	313	*/
	314
	315	code = BufferSpace[(0x4f0 + inPtr)^S8];
	316	index = code & 0xf;
	317	/* index into the adpcm code table */
	318	index <<= 4;
	319	book1 = (short *)&adpcmtable[index];
	320	book2 = book1 + 8;
	321	/* upper nibble is scale */
	322	code >>= 4;
	323	/* very strange. 0x8000 would be .5 in 16:16 format
	324	* so this appears to be a fractional scale based
	325	* on the 12 based inverse of the scale value. note
	326	* that this could be negative, in which case we do
	327	* not use the calculated vscale value... see the
	328	* if(code>12) check below
	329	*/
	330	vscale = (0x8000 >> ((12 - code) - 1));
	331
	332	/* coded adpcm data lies next */
	333	inPtr++;
	334	j = 0;
	335	/* loop of 8, for 8 coded nibbles from 4 bytes
	336	* which yields 8 short pcm values
	337	*/
	338	while (j < 8) {
	339	icode = BufferSpace[(0x4f0 + inPtr)^S8];
	340	inPtr++;
	341
	342	/* this will in effect be signed */
	343	inp1[j] = (int16_t)((icode & 0xf0) << 8);
	344	if (code < 12)
	345	inp1[j] = ((int)((int)inp1[j] * (int)vscale) >> 16);
	346	j++;
	347
	348	inp1[j] = (int16_t)((icode & 0xf) << 12);
	349	if (code < 12)
	350	inp1[j] = ((int)((int)inp1[j] * (int)vscale) >> 16);
	351	j++;
	352	}
	353	j = 0;
	354	while (j < 8) {
	355	icode = BufferSpace[(0x4f0 + inPtr)^S8];
	356	inPtr++;
	357
	358	/* this will in effect be signed */
	359	inp2[j] = (short)((icode & 0xf0) << 8);
	360	if (code < 12)
	361	inp2[j] = ((int)((int)inp2[j] * (int)vscale) >> 16);
	362	j++;
	363
	364	inp2[j] = (short)((icode & 0xf) << 12);
	365	if (code < 12)
	366	inp2[j] = ((int)((int)inp2[j] * (int)vscale) >> 16);
	367	j++;
	368	}
	369
	370	a[0] = (int)book1[0] * (int)l1;
	371	a[0] += (int)book2[0] * (int)l2;
	372	a[0] += (int)inp1[0] * (int)2048;
	373
	374	a[1] = (int)book1[1] * (int)l1;
	375	a[1] += (int)book2[1] * (int)l2;
	376	a[1] += (int)book2[0] * inp1[0];
	377	a[1] += (int)inp1[1] * (int)2048;
	378
	379	a[2] = (int)book1[2] * (int)l1;
	380	a[2] += (int)book2[2] * (int)l2;
	381	a[2] += (int)book2[1] * inp1[0];
	382	a[2] += (int)book2[0] * inp1[1];
	383	a[2] += (int)inp1[2] * (int)2048;
	384
	385	a[3] = (int)book1[3] * (int)l1;
	386	a[3] += (int)book2[3] * (int)l2;
	387	a[3] += (int)book2[2] * inp1[0];
	388	a[3] += (int)book2[1] * inp1[1];
	389	a[3] += (int)book2[0] * inp1[2];
	390	a[3] += (int)inp1[3] * (int)2048;
	391
	392	a[4] = (int)book1[4] * (int)l1;
	393	a[4] += (int)book2[4] * (int)l2;
	394	a[4] += (int)book2[3] * inp1[0];
	395	a[4] += (int)book2[2] * inp1[1];
	396	a[4] += (int)book2[1] * inp1[2];
	397	a[4] += (int)book2[0] * inp1[3];
	398	a[4] += (int)inp1[4] * (int)2048;
	399
	400	a[5] = (int)book1[5] * (int)l1;
	401	a[5] += (int)book2[5] * (int)l2;
	402	a[5] += (int)book2[4] * inp1[0];
	403	a[5] += (int)book2[3] * inp1[1];
	404	a[5] += (int)book2[2] * inp1[2];
	405	a[5] += (int)book2[1] * inp1[3];
	406	a[5] += (int)book2[0] * inp1[4];
	407	a[5] += (int)inp1[5] * (int)2048;
	408
	409	a[6] = (int)book1[6] * (int)l1;
	410	a[6] += (int)book2[6] * (int)l2;
	411	a[6] += (int)book2[5] * inp1[0];
	412	a[6] += (int)book2[4] * inp1[1];
	413	a[6] += (int)book2[3] * inp1[2];
	414	a[6] += (int)book2[2] * inp1[3];
	415	a[6] += (int)book2[1] * inp1[4];
	416	a[6] += (int)book2[0] * inp1[5];
	417	a[6] += (int)inp1[6] * (int)2048;
	418
	419	a[7] = (int)book1[7] * (int)l1;
	420	a[7] += (int)book2[7] * (int)l2;
	421	a[7] += (int)book2[6] * inp1[0];
	422	a[7] += (int)book2[5] * inp1[1];
	423	a[7] += (int)book2[4] * inp1[2];
	424	a[7] += (int)book2[3] * inp1[3];
	425	a[7] += (int)book2[2] * inp1[4];
	426	a[7] += (int)book2[1] * inp1[5];
	427	a[7] += (int)book2[0] * inp1[6];
	428	a[7] += (int)inp1[7] * (int)2048;
	429
	430	for (j = 0; j < 8; j++) {
	431	a[j ^ S] >>= 11;
	432	a[j ^ S] = clamp_s16(a[j ^ S]);
	433	*(out++) = a[j ^ S];
	434	}
	435	l1 = a[6];
	436	l2 = a[7];
	437
	438	a[0] = (int)book1[0] * (int)l1;
	439	a[0] += (int)book2[0] * (int)l2;
	440	a[0] += (int)inp2[0] * (int)2048;
	441
	442	a[1] = (int)book1[1] * (int)l1;
	443	a[1] += (int)book2[1] * (int)l2;
	444	a[1] += (int)book2[0] * inp2[0];
	445	a[1] += (int)inp2[1] * (int)2048;
	446
	447	a[2] = (int)book1[2] * (int)l1;
	448	a[2] += (int)book2[2] * (int)l2;
	449	a[2] += (int)book2[1] * inp2[0];
	450	a[2] += (int)book2[0] * inp2[1];
	451	a[2] += (int)inp2[2] * (int)2048;
	452
	453	a[3] = (int)book1[3] * (int)l1;
	454	a[3] += (int)book2[3] * (int)l2;
	455	a[3] += (int)book2[2] * inp2[0];
	456	a[3] += (int)book2[1] * inp2[1];
	457	a[3] += (int)book2[0] * inp2[2];
	458	a[3] += (int)inp2[3] * (int)2048;
	459
	460	a[4] = (int)book1[4] * (int)l1;
	461	a[4] += (int)book2[4] * (int)l2;
	462	a[4] += (int)book2[3] * inp2[0];
	463	a[4] += (int)book2[2] * inp2[1];
	464	a[4] += (int)book2[1] * inp2[2];
	465	a[4] += (int)book2[0] * inp2[3];
	466	a[4] += (int)inp2[4] * (int)2048;
	467
	468	a[5] = (int)book1[5] * (int)l1;
	469	a[5] += (int)book2[5] * (int)l2;
	470	a[5] += (int)book2[4] * inp2[0];
	471	a[5] += (int)book2[3] * inp2[1];
	472	a[5] += (int)book2[2] * inp2[2];
	473	a[5] += (int)book2[1] * inp2[3];
	474	a[5] += (int)book2[0] * inp2[4];
	475	a[5] += (int)inp2[5] * (int)2048;
	476
	477	a[6] = (int)book1[6] * (int)l1;
	478	a[6] += (int)book2[6] * (int)l2;
	479	a[6] += (int)book2[5] * inp2[0];
	480	a[6] += (int)book2[4] * inp2[1];
	481	a[6] += (int)book2[3] * inp2[2];
	482	a[6] += (int)book2[2] * inp2[3];
	483	a[6] += (int)book2[1] * inp2[4];
	484	a[6] += (int)book2[0] * inp2[5];
	485	a[6] += (int)inp2[6] * (int)2048;
	486
	487	a[7] = (int)book1[7] * (int)l1;
	488	a[7] += (int)book2[7] * (int)l2;
	489	a[7] += (int)book2[6] * inp2[0];
	490	a[7] += (int)book2[5] * inp2[1];
	491	a[7] += (int)book2[4] * inp2[2];
	492	a[7] += (int)book2[3] * inp2[3];
	493	a[7] += (int)book2[2] * inp2[4];
	494	a[7] += (int)book2[1] * inp2[5];
	495	a[7] += (int)book2[0] * inp2[6];
	496	a[7] += (int)inp2[7] * (int)2048;
	497
	498	for (j = 0; j < 8; j++) {
	499	a[j ^ S] >>= 11;
	500	a[j ^ S] = clamp_s16(a[j ^ S]);
	501	*(out++) = a[j ^ S];
	502	}
	503	l1 = a[6];
	504	l2 = a[7];
	505
	506	count -= 32;
	507	}
	508	out -= 16;
	509	memcpy(&rsp.RDRAM[Address], out, 32);
	510	}
	511
	512	static void RESAMPLE3(uint32_t inst1, uint32_t inst2)
	513	{
	514	unsigned char Flags = (uint8_t)((inst2 >> 0x1e));
	515	unsigned int Pitch = ((inst2 >> 0xe) & 0xffff) << 1;
	516	uint32_t addy = (inst1 & 0xffffff);
	517	unsigned int Accum = 0;
	518	unsigned int location;
	519	int16_t *lut;
	520	short *dst;
	521	int16_t *src;
	522	uint32_t srcPtr = ((((inst2 >> 2) & 0xfff) + 0x4f0) / 2);
	523	uint32_t dstPtr;
	524	int32_t temp;
	525	int32_t accum;
	526	int x, i;
	527
	528	dst = (short *)(BufferSpace);
	529	src = (int16_t *)(BufferSpace);
	530
	531	srcPtr -= 4;
	532
	533	if (inst2 & 0x3)
	534	dstPtr = 0x660 / 2;
	535	else
	536	dstPtr = 0x4f0 / 2;
	537
	538	if ((Flags & 0x1) == 0) {
	539	for (x = 0; x < 4; x++)
	540	src[(srcPtr + x)^S] = ((uint16_t *)rsp.RDRAM)[((addy / 2) + x)^S];
	541	Accum = (uint16_t )(rsp.RDRAM + addy + 10);
	542	} else {
	543	for (x = 0; x < 4; x++)
	544	src[(srcPtr + x)^S] = 0;
	545	}
	546
	547	for (i = 0; i < 0x170 / 2; i++) {
	548	location = (((Accum * 0x40) >> 0x10) * 8);
	549	lut = (int16_t )(((uint8_t )ResampleLUT) + location);
	550
	551	temp = ((int32_t) * (int16_t )(src + ((srcPtr + 0)^S)) ((int32_t)((int16_t)lut[0])));
	552	accum = (int32_t)(temp >> 15);
	553
	554	temp = ((int32_t) * (int16_t )(src + ((srcPtr + 1)^S)) ((int32_t)((int16_t)lut[1])));
	555	accum += (int32_t)(temp >> 15);
	556
	557	temp = ((int32_t) * (int16_t )(src + ((srcPtr + 2)^S)) ((int32_t)((int16_t)lut[2])));
	558	accum += (int32_t)(temp >> 15);
	559
	560	temp = ((int32_t) * (int16_t )(src + ((srcPtr + 3)^S)) ((int32_t)((int16_t)lut[3])));
	561	accum += (int32_t)(temp >> 15);
	562
	563	accum = clamp_s16(accum);
	564
	565	dst[dstPtr ^ S] = (accum);
	566	dstPtr++;
	567	Accum += Pitch;
	568	srcPtr += (Accum >> 16);
	569	Accum &= 0xffff;
	570	}
	571	for (x = 0; x < 4; x++)
	572	((uint16_t *)rsp.RDRAM)[((addy / 2) + x)^S] = src[(srcPtr + x)^S];
	573	(uint16_t )(rsp.RDRAM + addy + 10) = Accum;
	574	}
	575
	576	/* TODO Needs accuracy verification... */
	577	static void INTERLEAVE3(uint32_t inst1, uint32_t inst2)
	578	{
	579	uint16_t outbuff = (uint16_t )(BufferSpace + 0x4f0);
	580	uint16_t *inSrcR;
	581	uint16_t *inSrcL;
	582	uint16_t Left, Right, Left2, Right2;
	583	int x;
	584
	585	inSrcR = (uint16_t *)(BufferSpace + 0xb40);
	586	inSrcL = (uint16_t *)(BufferSpace + 0x9d0);
	587
	588	for (x = 0; x < (0x170 / 4); x++) {
	589	Left = *(inSrcL++);
	590	Right = *(inSrcR++);
	591	Left2 = *(inSrcL++);
	592	Right2 = *(inSrcR++);
	593
	594	#ifdef M64P_BIG_ENDIAN
	595	*(outbuff++) = Right;
	596	*(outbuff++) = Left;
	597	*(outbuff++) = Right2;
	598	*(outbuff++) = Left2;
	599	#else
	600	*(outbuff++) = Right2;
	601	*(outbuff++) = Left2;
	602	*(outbuff++) = Right;
	603	*(outbuff++) = Left;
	604	#endif
	605	}
	606	}
	607
	608	static void WHATISTHIS(uint32_t inst1, uint32_t inst2)
	609	{
	610	}
	611
	612	static uint32_t setaddr;
	613	static void MP3ADDY(uint32_t inst1, uint32_t inst2)
	614	{
	615	setaddr = (inst2 & 0xffffff);
	616	}
	617
	618	/*
	619	FFT = Fast Fourier Transform
	620	DCT = Discrete Cosine Transform
	621	MPEG-1 Layer 3 retains Layer 2's 1152-sample window, as well as the FFT polyphase filter for
	622	backward compatibility, but adds a modified DCT filter. DCT's advantages over DFTs (discrete
	623	Fourier transforms) include half as many multiply-accumulate operations and half the
	624	generated coefficients because the sinusoidal portion of the calculation is absent, and DCT
	625	generally involves simpler math. The finite lengths of a conventional DCTs' bandpass impulse
	626	responses, however, may result in block-boundary effects. MDCTs overlap the analysis blocks
	627	and lowpass-filter the decoded audio to remove aliases, eliminating these effects. MDCTs also
	628	have a higher transform coding gain than the standard DCT, and their basic functions
	629	correspond to better bandpass response.
	630
	631	MPEG-1 Layer 3's DCT sub-bands are unequally sized, and correspond to the human auditory
	632	system's critical bands. In Layer 3 decoders must support both constant- and variable-bit-rate
	633	bit streams. (However, many Layer 1 and 2 decoders also handle variable bit rates). Finally,
	634	Layer 3 encoders Huffman-code the quantized coefficients before archiving or transmission for
	635	additional lossless compression. Bit streams range from 32 to 320 kbps, and 128-kbps rates
	636	achieve near-CD quality, an important specification to enable dual-channel ISDN
	637	(integrated-services-digital-network) to be the future high-bandwidth pipe to the home.
	638
	639	*/
	640	static void DISABLE(uint32_t inst1, uint32_t inst2)
	641	{
	642	}
	643
	644
	645	const acmd_callback_t ABI3[0x10] = {
	646	DISABLE , ADPCM3 , CLEARBUFF3, ENVMIXER3 , LOADBUFF3, RESAMPLE3 , SAVEBUFF3, MP3,
	647	MP3ADDY, SETVOL3, DMEMMOVE3 , LOADADPCM3 , MIXER3 , INTERLEAVE3, WHATISTHIS , SETLOOP3
	648	};