| 1 | /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * |
| 2 | * Mupen64plus-rsp-hle - ucode3.c * |
| 3 | * Mupen64Plus homepage: http://code.google.com/p/mupen64plus/ * |
| 4 | * Copyright (C) 2009 Richard Goedeken * |
| 5 | * Copyright (C) 2002 Hacktarux * |
| 6 | * * |
| 7 | * This program is free software; you can redistribute it and/or modify * |
| 8 | * it under the terms of the GNU General Public License as published by * |
| 9 | * the Free Software Foundation; either version 2 of the License, or * |
| 10 | * (at your option) any later version. * |
| 11 | * * |
| 12 | * This program is distributed in the hope that it will be useful, * |
| 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of * |
| 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * |
| 15 | * GNU General Public License for more details. * |
| 16 | * * |
| 17 | * You should have received a copy of the GNU General Public License * |
| 18 | * along with this program; if not, write to the * |
| 19 | * Free Software Foundation, Inc., * |
| 20 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * |
| 21 | * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ |
| 22 | |
| 23 | # include <string.h> |
| 24 | #include <stdint.h> |
| 25 | |
| 26 | #include "m64p_plugin.h" |
| 27 | #include "hle.h" |
| 28 | #include "alist_internal.h" |
| 29 | |
| 30 | static void SETVOL3(uint32_t inst1, uint32_t inst2) |
| 31 | { |
| 32 | uint8_t Flags = (uint8_t)(inst1 >> 0x10); |
| 33 | if (Flags & 0x4) { /* 288 */ |
| 34 | if (Flags & 0x2) { /* 290 */ |
| 35 | Vol_Left = (int16_t)inst1; /* 0x50 */ |
| 36 | Env_Dry = (int16_t)(inst2 >> 0x10); /* 0x4E */ |
| 37 | Env_Wet = (int16_t)inst2; /* 0x4C */ |
| 38 | } else { |
| 39 | VolTrg_Right = (int16_t)inst1; /* 0x46 */ |
| 40 | VolRamp_Right = (int32_t)inst2; /* 0x48/0x4A */ |
| 41 | } |
| 42 | } else { |
| 43 | VolTrg_Left = (int16_t)inst1; /* 0x40 */ |
| 44 | VolRamp_Left = (int32_t)inst2; /* 0x42/0x44 */ |
| 45 | } |
| 46 | } |
| 47 | |
| 48 | static void ENVMIXER3(uint32_t inst1, uint32_t inst2) |
| 49 | { |
| 50 | uint8_t flags = (uint8_t)((inst1 >> 16) & 0xff); |
| 51 | uint32_t addy = (inst2 & 0xFFFFFF); |
| 52 | |
| 53 | short *inp = (short *)(BufferSpace + 0x4F0); |
| 54 | short *out = (short *)(BufferSpace + 0x9D0); |
| 55 | short *aux1 = (short *)(BufferSpace + 0xB40); |
| 56 | short *aux2 = (short *)(BufferSpace + 0xCB0); |
| 57 | short *aux3 = (short *)(BufferSpace + 0xE20); |
| 58 | int32_t MainR; |
| 59 | int32_t MainL; |
| 60 | int32_t AuxR; |
| 61 | int32_t AuxL; |
| 62 | int i1, o1, a1, a2, a3; |
| 63 | short zero[8]; |
| 64 | int y; |
| 65 | |
| 66 | int32_t LAdder, LAcc, LVol; |
| 67 | int32_t RAdder, RAcc, RVol; |
| 68 | /* Most significant part of the Ramp Value */ |
| 69 | int16_t RSig, LSig; |
| 70 | int16_t Wet, Dry; |
| 71 | int16_t LTrg, RTrg; |
| 72 | |
| 73 | memset(zero, 0, sizeof(zero)); |
| 74 | |
| 75 | Vol_Right = (int16_t)inst1; |
| 76 | |
| 77 | if (flags & A_INIT) { |
| 78 | LAdder = VolRamp_Left / 8; |
| 79 | LAcc = 0; |
| 80 | LVol = Vol_Left; |
| 81 | LSig = (int16_t)(VolRamp_Left >> 16); |
| 82 | |
| 83 | RAdder = VolRamp_Right / 8; |
| 84 | RAcc = 0; |
| 85 | RVol = Vol_Right; |
| 86 | RSig = (int16_t)(VolRamp_Right >> 16); |
| 87 | |
| 88 | /* Save Wet/Dry values */ |
| 89 | Wet = (int16_t)Env_Wet; |
| 90 | Dry = (int16_t)Env_Dry; |
| 91 | /* Save Current Left/Right Targets */ |
| 92 | LTrg = VolTrg_Left; |
| 93 | RTrg = VolTrg_Right; |
| 94 | } else { |
| 95 | memcpy((uint8_t *)hleMixerWorkArea, rsp.RDRAM + addy, 80); |
| 96 | Wet = *(int16_t *)(hleMixerWorkArea + 0); /* 0-1 */ |
| 97 | Dry = *(int16_t *)(hleMixerWorkArea + 2); /* 2-3 */ |
| 98 | LTrg = *(int16_t *)(hleMixerWorkArea + 4); /* 4-5 */ |
| 99 | RTrg = *(int16_t *)(hleMixerWorkArea + 6); /* 6-7 */ |
| 100 | LAdder = *(int32_t *)(hleMixerWorkArea + 8); /* 8-9 (hleMixerWorkArea is a 16bit pointer) */ |
| 101 | RAdder = *(int32_t *)(hleMixerWorkArea + 10); /* 10-11 */ |
| 102 | LAcc = *(int32_t *)(hleMixerWorkArea + 12); /* 12-13 */ |
| 103 | RAcc = *(int32_t *)(hleMixerWorkArea + 14); /* 14-15 */ |
| 104 | LVol = *(int32_t *)(hleMixerWorkArea + 16); /* 16-17 */ |
| 105 | RVol = *(int32_t *)(hleMixerWorkArea + 18); /* 18-19 */ |
| 106 | LSig = *(int16_t *)(hleMixerWorkArea + 20); /* 20-21 */ |
| 107 | RSig = *(int16_t *)(hleMixerWorkArea + 22); /* 22-23 */ |
| 108 | } |
| 109 | |
| 110 | for (y = 0; y < (0x170 / 2); y++) { |
| 111 | |
| 112 | /* Left */ |
| 113 | LAcc += LAdder; |
| 114 | LVol += (LAcc >> 16); |
| 115 | LAcc &= 0xFFFF; |
| 116 | |
| 117 | /* Right */ |
| 118 | RAcc += RAdder; |
| 119 | RVol += (RAcc >> 16); |
| 120 | RAcc &= 0xFFFF; |
| 121 | /****************************************************************/ |
| 122 | /* Clamp Left */ |
| 123 | if (LSig >= 0) { /* VLT */ |
| 124 | if (LVol > LTrg) |
| 125 | LVol = LTrg; |
| 126 | } else { /* VGE */ |
| 127 | if (LVol < LTrg) |
| 128 | LVol = LTrg; |
| 129 | } |
| 130 | |
| 131 | /* Clamp Right */ |
| 132 | if (RSig >= 0) { /* VLT */ |
| 133 | if (RVol > RTrg) |
| 134 | RVol = RTrg; |
| 135 | } else { /* VGE */ |
| 136 | if (RVol < RTrg) |
| 137 | RVol = RTrg; |
| 138 | } |
| 139 | /****************************************************************/ |
| 140 | MainL = ((Dry * LVol) + 0x4000) >> 15; |
| 141 | MainR = ((Dry * RVol) + 0x4000) >> 15; |
| 142 | |
| 143 | o1 = out [y ^ S]; |
| 144 | a1 = aux1[y ^ S]; |
| 145 | i1 = inp [y ^ S]; |
| 146 | |
| 147 | o1 += ((i1 * MainL) + 0x4000) >> 15; |
| 148 | a1 += ((i1 * MainR) + 0x4000) >> 15; |
| 149 | |
| 150 | /****************************************************************/ |
| 151 | o1 = clamp_s16(o1); |
| 152 | a1 = clamp_s16(a1); |
| 153 | |
| 154 | /****************************************************************/ |
| 155 | |
| 156 | out[y ^ S] = o1; |
| 157 | aux1[y ^ S] = a1; |
| 158 | |
| 159 | /****************************************************************/ |
| 160 | a2 = aux2[y ^ S]; |
| 161 | a3 = aux3[y ^ S]; |
| 162 | |
| 163 | AuxL = ((Wet * LVol) + 0x4000) >> 15; |
| 164 | AuxR = ((Wet * RVol) + 0x4000) >> 15; |
| 165 | |
| 166 | a2 += ((i1 * AuxL) + 0x4000) >> 15; |
| 167 | a3 += ((i1 * AuxR) + 0x4000) >> 15; |
| 168 | |
| 169 | a2 = clamp_s16(a2); |
| 170 | a3 = clamp_s16(a3); |
| 171 | |
| 172 | aux2[y ^ S] = a2; |
| 173 | aux3[y ^ S] = a3; |
| 174 | } |
| 175 | |
| 176 | *(int16_t *)(hleMixerWorkArea + 0) = Wet; /* 0-1 */ |
| 177 | *(int16_t *)(hleMixerWorkArea + 2) = Dry; /* 2-3 */ |
| 178 | *(int16_t *)(hleMixerWorkArea + 4) = LTrg; /* 4-5 */ |
| 179 | *(int16_t *)(hleMixerWorkArea + 6) = RTrg; /* 6-7 */ |
| 180 | *(int32_t *)(hleMixerWorkArea + 8) = LAdder; /* 8-9 (hleMixerWorkArea is a 16bit pointer) */ |
| 181 | *(int32_t *)(hleMixerWorkArea + 10) = RAdder; /* 10-11 */ |
| 182 | *(int32_t *)(hleMixerWorkArea + 12) = LAcc; /* 12-13 */ |
| 183 | *(int32_t *)(hleMixerWorkArea + 14) = RAcc; /* 14-15 */ |
| 184 | *(int32_t *)(hleMixerWorkArea + 16) = LVol; /* 16-17 */ |
| 185 | *(int32_t *)(hleMixerWorkArea + 18) = RVol; /* 18-19 */ |
| 186 | *(int16_t *)(hleMixerWorkArea + 20) = LSig; /* 20-21 */ |
| 187 | *(int16_t *)(hleMixerWorkArea + 22) = RSig; /* 22-23 */ |
| 188 | memcpy(rsp.RDRAM + addy, (uint8_t *)hleMixerWorkArea, 80); |
| 189 | } |
| 190 | |
| 191 | static void CLEARBUFF3(uint32_t inst1, uint32_t inst2) |
| 192 | { |
| 193 | uint16_t addr = (uint16_t)(inst1 & 0xffff); |
| 194 | uint16_t count = (uint16_t)(inst2 & 0xffff); |
| 195 | memset(BufferSpace + addr + 0x4f0, 0, count); |
| 196 | } |
| 197 | |
| 198 | /* TODO Needs accuracy verification... */ |
| 199 | static void MIXER3(uint32_t inst1, uint32_t inst2) |
| 200 | { |
| 201 | uint16_t dmemin = (uint16_t)(inst2 >> 0x10) + 0x4f0; |
| 202 | uint16_t dmemout = (uint16_t)(inst2 & 0xFFFF) + 0x4f0; |
| 203 | int32_t gain = (int16_t)(inst1 & 0xFFFF); |
| 204 | int32_t temp; |
| 205 | int x; |
| 206 | |
| 207 | for (x = 0; x < 0x170; x += 2) { |
| 208 | /* TODO I think I can do this a lot easier */ |
| 209 | temp = (*(int16_t *)(BufferSpace + dmemin + x) * gain) >> 15; |
| 210 | temp += *(int16_t *)(BufferSpace + dmemout + x); |
| 211 | |
| 212 | temp = clamp_s16((int32_t)temp); |
| 213 | |
| 214 | *(uint16_t *)(BufferSpace + dmemout + x) = (uint16_t)(temp & 0xFFFF); |
| 215 | } |
| 216 | } |
| 217 | |
| 218 | static void LOADBUFF3(uint32_t inst1, uint32_t inst2) |
| 219 | { |
| 220 | uint32_t v0 = (inst2 & 0xfffffc); |
| 221 | uint32_t cnt = (((inst1 >> 0xC) + 3) & 0xFFC); |
| 222 | uint32_t src = (inst1 & 0xffc) + 0x4f0; |
| 223 | memcpy(BufferSpace + src, rsp.RDRAM + v0, cnt); |
| 224 | } |
| 225 | |
| 226 | static void SAVEBUFF3(uint32_t inst1, uint32_t inst2) |
| 227 | { |
| 228 | uint32_t v0 = (inst2 & 0xfffffc); |
| 229 | uint32_t cnt = (((inst1 >> 0xC) + 3) & 0xFFC); |
| 230 | uint32_t src = (inst1 & 0xffc) + 0x4f0; |
| 231 | memcpy(rsp.RDRAM + v0, BufferSpace + src, cnt); |
| 232 | } |
| 233 | |
| 234 | /* Loads an ADPCM table |
| 235 | * NOTE Works 100% Now 03-13-01 |
| 236 | */ |
| 237 | static void LOADADPCM3(uint32_t inst1, uint32_t inst2) |
| 238 | { |
| 239 | uint32_t v0 = (inst2 & 0xffffff); |
| 240 | uint32_t x; |
| 241 | |
| 242 | uint16_t *table = (uint16_t *)(rsp.RDRAM + v0); |
| 243 | for (x = 0; x < ((inst1 & 0xffff) >> 0x4); x++) { |
| 244 | adpcmtable[(0x0 + (x << 3))^S] = table[0]; |
| 245 | adpcmtable[(0x1 + (x << 3))^S] = table[1]; |
| 246 | |
| 247 | adpcmtable[(0x2 + (x << 3))^S] = table[2]; |
| 248 | adpcmtable[(0x3 + (x << 3))^S] = table[3]; |
| 249 | |
| 250 | adpcmtable[(0x4 + (x << 3))^S] = table[4]; |
| 251 | adpcmtable[(0x5 + (x << 3))^S] = table[5]; |
| 252 | |
| 253 | adpcmtable[(0x6 + (x << 3))^S] = table[6]; |
| 254 | adpcmtable[(0x7 + (x << 3))^S] = table[7]; |
| 255 | table += 8; |
| 256 | } |
| 257 | } |
| 258 | |
| 259 | /* TODO Needs accuracy verification... */ |
| 260 | static void DMEMMOVE3(uint32_t inst1, uint32_t inst2) |
| 261 | { |
| 262 | uint32_t cnt; |
| 263 | uint32_t v0 = (inst1 & 0xFFFF) + 0x4f0; |
| 264 | uint32_t v1 = (inst2 >> 0x10) + 0x4f0; |
| 265 | uint32_t count = ((inst2 + 3) & 0xfffc); |
| 266 | |
| 267 | for (cnt = 0; cnt < count; cnt++) |
| 268 | *(uint8_t *)(BufferSpace + ((cnt + v1)^S8)) = *(uint8_t *)(BufferSpace + ((cnt + v0)^S8)); |
| 269 | } |
| 270 | |
| 271 | static void SETLOOP3(uint32_t inst1, uint32_t inst2) |
| 272 | { |
| 273 | loopval = (inst2 & 0xffffff); |
| 274 | } |
| 275 | |
| 276 | /* TODO Verified to be 100% Accurate... */ |
| 277 | static void ADPCM3(uint32_t inst1, uint32_t inst2) |
| 278 | { |
| 279 | unsigned char Flags = (uint8_t)(inst2 >> 0x1c) & 0xff; |
| 280 | unsigned int Address = (inst1 & 0xffffff); |
| 281 | unsigned short inPtr = (inst2 >> 12) & 0xf; |
| 282 | short *out = (short *)(BufferSpace + (inst2 & 0xfff) + 0x4f0); |
| 283 | short count = (short)((inst2 >> 16) & 0xfff); |
| 284 | unsigned char icode; |
| 285 | unsigned char code; |
| 286 | int vscale; |
| 287 | unsigned short index; |
| 288 | unsigned short j; |
| 289 | int a[8]; |
| 290 | short *book1, *book2; |
| 291 | int l1; |
| 292 | int l2; |
| 293 | int inp1[8]; |
| 294 | int inp2[8]; |
| 295 | |
| 296 | memset(out, 0, 32); |
| 297 | |
| 298 | if (!(Flags & 0x1)) { |
| 299 | if (Flags & 0x2) |
| 300 | memcpy(out, &rsp.RDRAM[loopval], 32); |
| 301 | else |
| 302 | memcpy(out, &rsp.RDRAM[Address], 32); |
| 303 | } |
| 304 | |
| 305 | l1 = out[14 ^ S]; |
| 306 | l2 = out[15 ^ S]; |
| 307 | out += 16; |
| 308 | while (count > 0) { |
| 309 | /* the first interation through, these values are |
| 310 | * either 0 in the case of A_INIT, from a special |
| 311 | * area of memory in the case of A_LOOP or just |
| 312 | * the values we calculated the last time |
| 313 | */ |
| 314 | |
| 315 | code = BufferSpace[(0x4f0 + inPtr)^S8]; |
| 316 | index = code & 0xf; |
| 317 | /* index into the adpcm code table */ |
| 318 | index <<= 4; |
| 319 | book1 = (short *)&adpcmtable[index]; |
| 320 | book2 = book1 + 8; |
| 321 | /* upper nibble is scale */ |
| 322 | code >>= 4; |
| 323 | /* very strange. 0x8000 would be .5 in 16:16 format |
| 324 | * so this appears to be a fractional scale based |
| 325 | * on the 12 based inverse of the scale value. note |
| 326 | * that this could be negative, in which case we do |
| 327 | * not use the calculated vscale value... see the |
| 328 | * if(code>12) check below |
| 329 | */ |
| 330 | vscale = (0x8000 >> ((12 - code) - 1)); |
| 331 | |
| 332 | /* coded adpcm data lies next */ |
| 333 | inPtr++; |
| 334 | j = 0; |
| 335 | /* loop of 8, for 8 coded nibbles from 4 bytes |
| 336 | * which yields 8 short pcm values |
| 337 | */ |
| 338 | while (j < 8) { |
| 339 | icode = BufferSpace[(0x4f0 + inPtr)^S8]; |
| 340 | inPtr++; |
| 341 | |
| 342 | /* this will in effect be signed */ |
| 343 | inp1[j] = (int16_t)((icode & 0xf0) << 8); |
| 344 | if (code < 12) |
| 345 | inp1[j] = ((int)((int)inp1[j] * (int)vscale) >> 16); |
| 346 | j++; |
| 347 | |
| 348 | inp1[j] = (int16_t)((icode & 0xf) << 12); |
| 349 | if (code < 12) |
| 350 | inp1[j] = ((int)((int)inp1[j] * (int)vscale) >> 16); |
| 351 | j++; |
| 352 | } |
| 353 | j = 0; |
| 354 | while (j < 8) { |
| 355 | icode = BufferSpace[(0x4f0 + inPtr)^S8]; |
| 356 | inPtr++; |
| 357 | |
| 358 | /* this will in effect be signed */ |
| 359 | inp2[j] = (short)((icode & 0xf0) << 8); |
| 360 | if (code < 12) |
| 361 | inp2[j] = ((int)((int)inp2[j] * (int)vscale) >> 16); |
| 362 | j++; |
| 363 | |
| 364 | inp2[j] = (short)((icode & 0xf) << 12); |
| 365 | if (code < 12) |
| 366 | inp2[j] = ((int)((int)inp2[j] * (int)vscale) >> 16); |
| 367 | j++; |
| 368 | } |
| 369 | |
| 370 | a[0] = (int)book1[0] * (int)l1; |
| 371 | a[0] += (int)book2[0] * (int)l2; |
| 372 | a[0] += (int)inp1[0] * (int)2048; |
| 373 | |
| 374 | a[1] = (int)book1[1] * (int)l1; |
| 375 | a[1] += (int)book2[1] * (int)l2; |
| 376 | a[1] += (int)book2[0] * inp1[0]; |
| 377 | a[1] += (int)inp1[1] * (int)2048; |
| 378 | |
| 379 | a[2] = (int)book1[2] * (int)l1; |
| 380 | a[2] += (int)book2[2] * (int)l2; |
| 381 | a[2] += (int)book2[1] * inp1[0]; |
| 382 | a[2] += (int)book2[0] * inp1[1]; |
| 383 | a[2] += (int)inp1[2] * (int)2048; |
| 384 | |
| 385 | a[3] = (int)book1[3] * (int)l1; |
| 386 | a[3] += (int)book2[3] * (int)l2; |
| 387 | a[3] += (int)book2[2] * inp1[0]; |
| 388 | a[3] += (int)book2[1] * inp1[1]; |
| 389 | a[3] += (int)book2[0] * inp1[2]; |
| 390 | a[3] += (int)inp1[3] * (int)2048; |
| 391 | |
| 392 | a[4] = (int)book1[4] * (int)l1; |
| 393 | a[4] += (int)book2[4] * (int)l2; |
| 394 | a[4] += (int)book2[3] * inp1[0]; |
| 395 | a[4] += (int)book2[2] * inp1[1]; |
| 396 | a[4] += (int)book2[1] * inp1[2]; |
| 397 | a[4] += (int)book2[0] * inp1[3]; |
| 398 | a[4] += (int)inp1[4] * (int)2048; |
| 399 | |
| 400 | a[5] = (int)book1[5] * (int)l1; |
| 401 | a[5] += (int)book2[5] * (int)l2; |
| 402 | a[5] += (int)book2[4] * inp1[0]; |
| 403 | a[5] += (int)book2[3] * inp1[1]; |
| 404 | a[5] += (int)book2[2] * inp1[2]; |
| 405 | a[5] += (int)book2[1] * inp1[3]; |
| 406 | a[5] += (int)book2[0] * inp1[4]; |
| 407 | a[5] += (int)inp1[5] * (int)2048; |
| 408 | |
| 409 | a[6] = (int)book1[6] * (int)l1; |
| 410 | a[6] += (int)book2[6] * (int)l2; |
| 411 | a[6] += (int)book2[5] * inp1[0]; |
| 412 | a[6] += (int)book2[4] * inp1[1]; |
| 413 | a[6] += (int)book2[3] * inp1[2]; |
| 414 | a[6] += (int)book2[2] * inp1[3]; |
| 415 | a[6] += (int)book2[1] * inp1[4]; |
| 416 | a[6] += (int)book2[0] * inp1[5]; |
| 417 | a[6] += (int)inp1[6] * (int)2048; |
| 418 | |
| 419 | a[7] = (int)book1[7] * (int)l1; |
| 420 | a[7] += (int)book2[7] * (int)l2; |
| 421 | a[7] += (int)book2[6] * inp1[0]; |
| 422 | a[7] += (int)book2[5] * inp1[1]; |
| 423 | a[7] += (int)book2[4] * inp1[2]; |
| 424 | a[7] += (int)book2[3] * inp1[3]; |
| 425 | a[7] += (int)book2[2] * inp1[4]; |
| 426 | a[7] += (int)book2[1] * inp1[5]; |
| 427 | a[7] += (int)book2[0] * inp1[6]; |
| 428 | a[7] += (int)inp1[7] * (int)2048; |
| 429 | |
| 430 | for (j = 0; j < 8; j++) { |
| 431 | a[j ^ S] >>= 11; |
| 432 | a[j ^ S] = clamp_s16(a[j ^ S]); |
| 433 | *(out++) = a[j ^ S]; |
| 434 | } |
| 435 | l1 = a[6]; |
| 436 | l2 = a[7]; |
| 437 | |
| 438 | a[0] = (int)book1[0] * (int)l1; |
| 439 | a[0] += (int)book2[0] * (int)l2; |
| 440 | a[0] += (int)inp2[0] * (int)2048; |
| 441 | |
| 442 | a[1] = (int)book1[1] * (int)l1; |
| 443 | a[1] += (int)book2[1] * (int)l2; |
| 444 | a[1] += (int)book2[0] * inp2[0]; |
| 445 | a[1] += (int)inp2[1] * (int)2048; |
| 446 | |
| 447 | a[2] = (int)book1[2] * (int)l1; |
| 448 | a[2] += (int)book2[2] * (int)l2; |
| 449 | a[2] += (int)book2[1] * inp2[0]; |
| 450 | a[2] += (int)book2[0] * inp2[1]; |
| 451 | a[2] += (int)inp2[2] * (int)2048; |
| 452 | |
| 453 | a[3] = (int)book1[3] * (int)l1; |
| 454 | a[3] += (int)book2[3] * (int)l2; |
| 455 | a[3] += (int)book2[2] * inp2[0]; |
| 456 | a[3] += (int)book2[1] * inp2[1]; |
| 457 | a[3] += (int)book2[0] * inp2[2]; |
| 458 | a[3] += (int)inp2[3] * (int)2048; |
| 459 | |
| 460 | a[4] = (int)book1[4] * (int)l1; |
| 461 | a[4] += (int)book2[4] * (int)l2; |
| 462 | a[4] += (int)book2[3] * inp2[0]; |
| 463 | a[4] += (int)book2[2] * inp2[1]; |
| 464 | a[4] += (int)book2[1] * inp2[2]; |
| 465 | a[4] += (int)book2[0] * inp2[3]; |
| 466 | a[4] += (int)inp2[4] * (int)2048; |
| 467 | |
| 468 | a[5] = (int)book1[5] * (int)l1; |
| 469 | a[5] += (int)book2[5] * (int)l2; |
| 470 | a[5] += (int)book2[4] * inp2[0]; |
| 471 | a[5] += (int)book2[3] * inp2[1]; |
| 472 | a[5] += (int)book2[2] * inp2[2]; |
| 473 | a[5] += (int)book2[1] * inp2[3]; |
| 474 | a[5] += (int)book2[0] * inp2[4]; |
| 475 | a[5] += (int)inp2[5] * (int)2048; |
| 476 | |
| 477 | a[6] = (int)book1[6] * (int)l1; |
| 478 | a[6] += (int)book2[6] * (int)l2; |
| 479 | a[6] += (int)book2[5] * inp2[0]; |
| 480 | a[6] += (int)book2[4] * inp2[1]; |
| 481 | a[6] += (int)book2[3] * inp2[2]; |
| 482 | a[6] += (int)book2[2] * inp2[3]; |
| 483 | a[6] += (int)book2[1] * inp2[4]; |
| 484 | a[6] += (int)book2[0] * inp2[5]; |
| 485 | a[6] += (int)inp2[6] * (int)2048; |
| 486 | |
| 487 | a[7] = (int)book1[7] * (int)l1; |
| 488 | a[7] += (int)book2[7] * (int)l2; |
| 489 | a[7] += (int)book2[6] * inp2[0]; |
| 490 | a[7] += (int)book2[5] * inp2[1]; |
| 491 | a[7] += (int)book2[4] * inp2[2]; |
| 492 | a[7] += (int)book2[3] * inp2[3]; |
| 493 | a[7] += (int)book2[2] * inp2[4]; |
| 494 | a[7] += (int)book2[1] * inp2[5]; |
| 495 | a[7] += (int)book2[0] * inp2[6]; |
| 496 | a[7] += (int)inp2[7] * (int)2048; |
| 497 | |
| 498 | for (j = 0; j < 8; j++) { |
| 499 | a[j ^ S] >>= 11; |
| 500 | a[j ^ S] = clamp_s16(a[j ^ S]); |
| 501 | *(out++) = a[j ^ S]; |
| 502 | } |
| 503 | l1 = a[6]; |
| 504 | l2 = a[7]; |
| 505 | |
| 506 | count -= 32; |
| 507 | } |
| 508 | out -= 16; |
| 509 | memcpy(&rsp.RDRAM[Address], out, 32); |
| 510 | } |
| 511 | |
| 512 | static void RESAMPLE3(uint32_t inst1, uint32_t inst2) |
| 513 | { |
| 514 | unsigned char Flags = (uint8_t)((inst2 >> 0x1e)); |
| 515 | unsigned int Pitch = ((inst2 >> 0xe) & 0xffff) << 1; |
| 516 | uint32_t addy = (inst1 & 0xffffff); |
| 517 | unsigned int Accum = 0; |
| 518 | unsigned int location; |
| 519 | int16_t *lut; |
| 520 | short *dst; |
| 521 | int16_t *src; |
| 522 | uint32_t srcPtr = ((((inst2 >> 2) & 0xfff) + 0x4f0) / 2); |
| 523 | uint32_t dstPtr; |
| 524 | int32_t temp; |
| 525 | int32_t accum; |
| 526 | int x, i; |
| 527 | |
| 528 | dst = (short *)(BufferSpace); |
| 529 | src = (int16_t *)(BufferSpace); |
| 530 | |
| 531 | srcPtr -= 4; |
| 532 | |
| 533 | if (inst2 & 0x3) |
| 534 | dstPtr = 0x660 / 2; |
| 535 | else |
| 536 | dstPtr = 0x4f0 / 2; |
| 537 | |
| 538 | if ((Flags & 0x1) == 0) { |
| 539 | for (x = 0; x < 4; x++) |
| 540 | src[(srcPtr + x)^S] = ((uint16_t *)rsp.RDRAM)[((addy / 2) + x)^S]; |
| 541 | Accum = *(uint16_t *)(rsp.RDRAM + addy + 10); |
| 542 | } else { |
| 543 | for (x = 0; x < 4; x++) |
| 544 | src[(srcPtr + x)^S] = 0; |
| 545 | } |
| 546 | |
| 547 | for (i = 0; i < 0x170 / 2; i++) { |
| 548 | location = (((Accum * 0x40) >> 0x10) * 8); |
| 549 | lut = (int16_t *)(((uint8_t *)ResampleLUT) + location); |
| 550 | |
| 551 | temp = ((int32_t) * (int16_t *)(src + ((srcPtr + 0)^S)) * ((int32_t)((int16_t)lut[0]))); |
| 552 | accum = (int32_t)(temp >> 15); |
| 553 | |
| 554 | temp = ((int32_t) * (int16_t *)(src + ((srcPtr + 1)^S)) * ((int32_t)((int16_t)lut[1]))); |
| 555 | accum += (int32_t)(temp >> 15); |
| 556 | |
| 557 | temp = ((int32_t) * (int16_t *)(src + ((srcPtr + 2)^S)) * ((int32_t)((int16_t)lut[2]))); |
| 558 | accum += (int32_t)(temp >> 15); |
| 559 | |
| 560 | temp = ((int32_t) * (int16_t *)(src + ((srcPtr + 3)^S)) * ((int32_t)((int16_t)lut[3]))); |
| 561 | accum += (int32_t)(temp >> 15); |
| 562 | |
| 563 | accum = clamp_s16(accum); |
| 564 | |
| 565 | dst[dstPtr ^ S] = (accum); |
| 566 | dstPtr++; |
| 567 | Accum += Pitch; |
| 568 | srcPtr += (Accum >> 16); |
| 569 | Accum &= 0xffff; |
| 570 | } |
| 571 | for (x = 0; x < 4; x++) |
| 572 | ((uint16_t *)rsp.RDRAM)[((addy / 2) + x)^S] = src[(srcPtr + x)^S]; |
| 573 | *(uint16_t *)(rsp.RDRAM + addy + 10) = Accum; |
| 574 | } |
| 575 | |
| 576 | /* TODO Needs accuracy verification... */ |
| 577 | static void INTERLEAVE3(uint32_t inst1, uint32_t inst2) |
| 578 | { |
| 579 | uint16_t *outbuff = (uint16_t *)(BufferSpace + 0x4f0); |
| 580 | uint16_t *inSrcR; |
| 581 | uint16_t *inSrcL; |
| 582 | uint16_t Left, Right, Left2, Right2; |
| 583 | int x; |
| 584 | |
| 585 | inSrcR = (uint16_t *)(BufferSpace + 0xb40); |
| 586 | inSrcL = (uint16_t *)(BufferSpace + 0x9d0); |
| 587 | |
| 588 | for (x = 0; x < (0x170 / 4); x++) { |
| 589 | Left = *(inSrcL++); |
| 590 | Right = *(inSrcR++); |
| 591 | Left2 = *(inSrcL++); |
| 592 | Right2 = *(inSrcR++); |
| 593 | |
| 594 | #ifdef M64P_BIG_ENDIAN |
| 595 | *(outbuff++) = Right; |
| 596 | *(outbuff++) = Left; |
| 597 | *(outbuff++) = Right2; |
| 598 | *(outbuff++) = Left2; |
| 599 | #else |
| 600 | *(outbuff++) = Right2; |
| 601 | *(outbuff++) = Left2; |
| 602 | *(outbuff++) = Right; |
| 603 | *(outbuff++) = Left; |
| 604 | #endif |
| 605 | } |
| 606 | } |
| 607 | |
| 608 | static void WHATISTHIS(uint32_t inst1, uint32_t inst2) |
| 609 | { |
| 610 | } |
| 611 | |
| 612 | static uint32_t setaddr; |
| 613 | static void MP3ADDY(uint32_t inst1, uint32_t inst2) |
| 614 | { |
| 615 | setaddr = (inst2 & 0xffffff); |
| 616 | } |
| 617 | |
| 618 | /* |
| 619 | FFT = Fast Fourier Transform |
| 620 | DCT = Discrete Cosine Transform |
| 621 | MPEG-1 Layer 3 retains Layer 2's 1152-sample window, as well as the FFT polyphase filter for |
| 622 | backward compatibility, but adds a modified DCT filter. DCT's advantages over DFTs (discrete |
| 623 | Fourier transforms) include half as many multiply-accumulate operations and half the |
| 624 | generated coefficients because the sinusoidal portion of the calculation is absent, and DCT |
| 625 | generally involves simpler math. The finite lengths of a conventional DCTs' bandpass impulse |
| 626 | responses, however, may result in block-boundary effects. MDCTs overlap the analysis blocks |
| 627 | and lowpass-filter the decoded audio to remove aliases, eliminating these effects. MDCTs also |
| 628 | have a higher transform coding gain than the standard DCT, and their basic functions |
| 629 | correspond to better bandpass response. |
| 630 | |
| 631 | MPEG-1 Layer 3's DCT sub-bands are unequally sized, and correspond to the human auditory |
| 632 | system's critical bands. In Layer 3 decoders must support both constant- and variable-bit-rate |
| 633 | bit streams. (However, many Layer 1 and 2 decoders also handle variable bit rates). Finally, |
| 634 | Layer 3 encoders Huffman-code the quantized coefficients before archiving or transmission for |
| 635 | additional lossless compression. Bit streams range from 32 to 320 kbps, and 128-kbps rates |
| 636 | achieve near-CD quality, an important specification to enable dual-channel ISDN |
| 637 | (integrated-services-digital-network) to be the future high-bandwidth pipe to the home. |
| 638 | |
| 639 | */ |
| 640 | static void DISABLE(uint32_t inst1, uint32_t inst2) |
| 641 | { |
| 642 | } |
| 643 | |
| 644 | |
| 645 | const acmd_callback_t ABI3[0x10] = { |
| 646 | DISABLE , ADPCM3 , CLEARBUFF3, ENVMIXER3 , LOADBUFF3, RESAMPLE3 , SAVEBUFF3, MP3, |
| 647 | MP3ADDY, SETVOL3, DMEMMOVE3 , LOADADPCM3 , MIXER3 , INTERLEAVE3, WHATISTHIS , SETLOOP3 |
| 648 | }; |