| 1 | /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * |
| 2 | * Mupen64plus-rsp-hle - ucode2.c * |
| 3 | * Mupen64Plus homepage: http://code.google.com/p/mupen64plus/ * |
| 4 | * Copyright (C) 2009 Richard Goedeken * |
| 5 | * Copyright (C) 2002 Hacktarux * |
| 6 | * * |
| 7 | * This program is free software; you can redistribute it and/or modify * |
| 8 | * it under the terms of the GNU General Public License as published by * |
| 9 | * the Free Software Foundation; either version 2 of the License, or * |
| 10 | * (at your option) any later version. * |
| 11 | * * |
| 12 | * This program is distributed in the hope that it will be useful, * |
| 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of * |
| 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * |
| 15 | * GNU General Public License for more details. * |
| 16 | * * |
| 17 | * You should have received a copy of the GNU General Public License * |
| 18 | * along with this program; if not, write to the * |
| 19 | * Free Software Foundation, Inc., * |
| 20 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * |
| 21 | * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ |
| 22 | |
| 23 | #include <string.h> |
| 24 | #include <stdbool.h> |
| 25 | #include <stdint.h> |
| 26 | |
| 27 | #include "m64p_plugin.h" |
| 28 | #include "m64p_types.h" |
| 29 | #include "hle.h" |
| 30 | #include "alist_internal.h" |
| 31 | #include "alist.h" |
| 32 | |
| 33 | static void SPNOOP(uint32_t inst1, uint32_t inst2) |
| 34 | { |
| 35 | DebugMessage(M64MSG_ERROR, "Unknown/Unimplemented Audio Command %i in ABI 2", (int)(inst1 >> 24)); |
| 36 | } |
| 37 | |
| 38 | |
| 39 | static bool isMKABI = false; |
| 40 | static bool isZeldaABI = false; |
| 41 | |
| 42 | void init_ucode2(void) |
| 43 | { |
| 44 | isMKABI = isZeldaABI = false; |
| 45 | } |
| 46 | |
| 47 | /* Loads an ADPCM table |
| 48 | * NOTE Works 100% Now 03-13-01 |
| 49 | */ |
| 50 | static void LOADADPCM2(uint32_t inst1, uint32_t inst2) |
| 51 | { |
| 52 | uint32_t v0 = (inst2 & 0xffffff); |
| 53 | uint32_t x; |
| 54 | /* Zelda2 Specific... */ |
| 55 | uint16_t *table = (uint16_t *)(rsp.RDRAM + v0); |
| 56 | |
| 57 | for (x = 0; x < ((inst1 & 0xffff) >> 0x4); x++) { |
| 58 | adpcmtable[(0x0 + (x << 3))^S] = table[0]; |
| 59 | adpcmtable[(0x1 + (x << 3))^S] = table[1]; |
| 60 | |
| 61 | adpcmtable[(0x2 + (x << 3))^S] = table[2]; |
| 62 | adpcmtable[(0x3 + (x << 3))^S] = table[3]; |
| 63 | |
| 64 | adpcmtable[(0x4 + (x << 3))^S] = table[4]; |
| 65 | adpcmtable[(0x5 + (x << 3))^S] = table[5]; |
| 66 | |
| 67 | adpcmtable[(0x6 + (x << 3))^S] = table[6]; |
| 68 | adpcmtable[(0x7 + (x << 3))^S] = table[7]; |
| 69 | table += 8; |
| 70 | } |
| 71 | } |
| 72 | |
| 73 | static void SETLOOP2(uint32_t inst1, uint32_t inst2) |
| 74 | { |
| 75 | loopval = inst2 & 0xffffff; /* No segment? */ |
| 76 | } |
| 77 | |
| 78 | static void SETBUFF2(uint32_t inst1, uint32_t inst2) |
| 79 | { |
| 80 | AudioInBuffer = (uint16_t)(inst1); /* 0x00 */ |
| 81 | AudioOutBuffer = (uint16_t)((inst2 >> 0x10)); /* 0x02 */ |
| 82 | AudioCount = (uint16_t)(inst2); /* 0x04 */ |
| 83 | } |
| 84 | |
| 85 | /* NOTE Verified to be 100% Accurate... */ |
| 86 | static void ADPCM2(uint32_t inst1, uint32_t inst2) |
| 87 | { |
| 88 | unsigned char Flags = (uint8_t)(inst1 >> 16) & 0xff; |
| 89 | unsigned int Address = (inst2 & 0xffffff); |
| 90 | unsigned short inPtr = 0; |
| 91 | short *out = (short *)(BufferSpace + AudioOutBuffer); |
| 92 | short count = (short)AudioCount; |
| 93 | unsigned char icode; |
| 94 | unsigned char code; |
| 95 | int vscale; |
| 96 | unsigned short index; |
| 97 | unsigned short j; |
| 98 | int a[8]; |
| 99 | short *book1, *book2; |
| 100 | |
| 101 | uint8_t srange; |
| 102 | uint8_t mask1; |
| 103 | uint8_t mask2; |
| 104 | uint8_t shifter; |
| 105 | |
| 106 | int l1; |
| 107 | int l2; |
| 108 | int inp1[8]; |
| 109 | int inp2[8]; |
| 110 | |
| 111 | memset(out, 0, 32); |
| 112 | |
| 113 | /* Tricky lil Zelda MM and ABI2!!! hahaha I know your secrets! :DDD */ |
| 114 | if (Flags & 0x4) { |
| 115 | srange = 0xE; |
| 116 | mask1 = 0xC0; |
| 117 | mask2 = 0x30; |
| 118 | shifter = 10; |
| 119 | } else { |
| 120 | srange = 0xC; |
| 121 | mask1 = 0xf0; |
| 122 | mask2 = 0x0f; |
| 123 | shifter = 12; |
| 124 | } |
| 125 | |
| 126 | if (!(Flags & 0x1)) { |
| 127 | if (Flags & 0x2) |
| 128 | memcpy(out, &rsp.RDRAM[loopval], 32); |
| 129 | else |
| 130 | memcpy(out, &rsp.RDRAM[Address], 32); |
| 131 | } |
| 132 | |
| 133 | l1 = out[14 ^ S]; |
| 134 | l2 = out[15 ^ S]; |
| 135 | out += 16; |
| 136 | while (count > 0) { |
| 137 | code = BufferSpace[(AudioInBuffer + inPtr)^S8]; |
| 138 | index = code & 0xf; |
| 139 | index <<= 4; |
| 140 | book1 = (short *)&adpcmtable[index]; |
| 141 | book2 = book1 + 8; |
| 142 | code >>= 4; |
| 143 | vscale = (0x8000 >> ((srange - code) - 1)); |
| 144 | |
| 145 | inPtr++; |
| 146 | j = 0; |
| 147 | |
| 148 | while (j < 8) { |
| 149 | icode = BufferSpace[(AudioInBuffer + inPtr)^S8]; |
| 150 | inPtr++; |
| 151 | |
| 152 | /* this will in effect be signed */ |
| 153 | inp1[j] = (int16_t)((icode & mask1) << 8); |
| 154 | if (code < srange) |
| 155 | inp1[j] = ((int)((int)inp1[j] * (int)vscale) >> 16); |
| 156 | j++; |
| 157 | |
| 158 | inp1[j] = (int16_t)((icode & mask2) << shifter); |
| 159 | if (code < srange) |
| 160 | inp1[j] = ((int)((int)inp1[j] * (int)vscale) >> 16); |
| 161 | j++; |
| 162 | |
| 163 | if (Flags & 4) { |
| 164 | /* this will in effect be signed */ |
| 165 | inp1[j] = (int16_t)((icode & 0xC) << 12); |
| 166 | if (code < 0xE) |
| 167 | inp1[j] = ((int)((int)inp1[j] * (int)vscale) >> 16); |
| 168 | j++; |
| 169 | |
| 170 | inp1[j] = (int16_t)((icode & 0x3) << 14); |
| 171 | if (code < 0xE) |
| 172 | inp1[j] = ((int)((int)inp1[j] * (int)vscale) >> 16); |
| 173 | j++; |
| 174 | } |
| 175 | } |
| 176 | |
| 177 | |
| 178 | |
| 179 | j = 0; |
| 180 | while (j < 8) { |
| 181 | icode = BufferSpace[(AudioInBuffer + inPtr)^S8]; |
| 182 | inPtr++; |
| 183 | |
| 184 | inp2[j] = (int16_t)((icode & mask1) << 8); |
| 185 | if (code < srange) |
| 186 | inp2[j] = ((int)((int)inp2[j] * (int)vscale) >> 16); |
| 187 | j++; |
| 188 | |
| 189 | inp2[j] = (int16_t)((icode & mask2) << shifter); |
| 190 | if (code < srange) |
| 191 | inp2[j] = ((int)((int)inp2[j] * (int)vscale) >> 16); |
| 192 | j++; |
| 193 | |
| 194 | if (Flags & 4) { |
| 195 | inp2[j] = (int16_t)((icode & 0xC) << 12); |
| 196 | if (code < 0xE) |
| 197 | inp2[j] = ((int)((int)inp2[j] * (int)vscale) >> 16); |
| 198 | j++; |
| 199 | |
| 200 | inp2[j] = (int16_t)((icode & 0x3) << 14); |
| 201 | if (code < 0xE) |
| 202 | inp2[j] = ((int)((int)inp2[j] * (int)vscale) >> 16); |
| 203 | j++; |
| 204 | } |
| 205 | } |
| 206 | |
| 207 | a[0] = (int)book1[0] * (int)l1; |
| 208 | a[0] += (int)book2[0] * (int)l2; |
| 209 | a[0] += (int)inp1[0] * (int)2048; |
| 210 | |
| 211 | a[1] = (int)book1[1] * (int)l1; |
| 212 | a[1] += (int)book2[1] * (int)l2; |
| 213 | a[1] += (int)book2[0] * inp1[0]; |
| 214 | a[1] += (int)inp1[1] * (int)2048; |
| 215 | |
| 216 | a[2] = (int)book1[2] * (int)l1; |
| 217 | a[2] += (int)book2[2] * (int)l2; |
| 218 | a[2] += (int)book2[1] * inp1[0]; |
| 219 | a[2] += (int)book2[0] * inp1[1]; |
| 220 | a[2] += (int)inp1[2] * (int)2048; |
| 221 | |
| 222 | a[3] = (int)book1[3] * (int)l1; |
| 223 | a[3] += (int)book2[3] * (int)l2; |
| 224 | a[3] += (int)book2[2] * inp1[0]; |
| 225 | a[3] += (int)book2[1] * inp1[1]; |
| 226 | a[3] += (int)book2[0] * inp1[2]; |
| 227 | a[3] += (int)inp1[3] * (int)2048; |
| 228 | |
| 229 | a[4] = (int)book1[4] * (int)l1; |
| 230 | a[4] += (int)book2[4] * (int)l2; |
| 231 | a[4] += (int)book2[3] * inp1[0]; |
| 232 | a[4] += (int)book2[2] * inp1[1]; |
| 233 | a[4] += (int)book2[1] * inp1[2]; |
| 234 | a[4] += (int)book2[0] * inp1[3]; |
| 235 | a[4] += (int)inp1[4] * (int)2048; |
| 236 | |
| 237 | a[5] = (int)book1[5] * (int)l1; |
| 238 | a[5] += (int)book2[5] * (int)l2; |
| 239 | a[5] += (int)book2[4] * inp1[0]; |
| 240 | a[5] += (int)book2[3] * inp1[1]; |
| 241 | a[5] += (int)book2[2] * inp1[2]; |
| 242 | a[5] += (int)book2[1] * inp1[3]; |
| 243 | a[5] += (int)book2[0] * inp1[4]; |
| 244 | a[5] += (int)inp1[5] * (int)2048; |
| 245 | |
| 246 | a[6] = (int)book1[6] * (int)l1; |
| 247 | a[6] += (int)book2[6] * (int)l2; |
| 248 | a[6] += (int)book2[5] * inp1[0]; |
| 249 | a[6] += (int)book2[4] * inp1[1]; |
| 250 | a[6] += (int)book2[3] * inp1[2]; |
| 251 | a[6] += (int)book2[2] * inp1[3]; |
| 252 | a[6] += (int)book2[1] * inp1[4]; |
| 253 | a[6] += (int)book2[0] * inp1[5]; |
| 254 | a[6] += (int)inp1[6] * (int)2048; |
| 255 | |
| 256 | a[7] = (int)book1[7] * (int)l1; |
| 257 | a[7] += (int)book2[7] * (int)l2; |
| 258 | a[7] += (int)book2[6] * inp1[0]; |
| 259 | a[7] += (int)book2[5] * inp1[1]; |
| 260 | a[7] += (int)book2[4] * inp1[2]; |
| 261 | a[7] += (int)book2[3] * inp1[3]; |
| 262 | a[7] += (int)book2[2] * inp1[4]; |
| 263 | a[7] += (int)book2[1] * inp1[5]; |
| 264 | a[7] += (int)book2[0] * inp1[6]; |
| 265 | a[7] += (int)inp1[7] * (int)2048; |
| 266 | |
| 267 | for (j = 0; j < 8; j++) { |
| 268 | a[j ^ S] >>= 11; |
| 269 | a[j ^ S] = clamp_s16(a[j ^ S]); |
| 270 | *(out++) = a[j ^ S]; |
| 271 | } |
| 272 | l1 = a[6]; |
| 273 | l2 = a[7]; |
| 274 | |
| 275 | a[0] = (int)book1[0] * (int)l1; |
| 276 | a[0] += (int)book2[0] * (int)l2; |
| 277 | a[0] += (int)inp2[0] * (int)2048; |
| 278 | |
| 279 | a[1] = (int)book1[1] * (int)l1; |
| 280 | a[1] += (int)book2[1] * (int)l2; |
| 281 | a[1] += (int)book2[0] * inp2[0]; |
| 282 | a[1] += (int)inp2[1] * (int)2048; |
| 283 | |
| 284 | a[2] = (int)book1[2] * (int)l1; |
| 285 | a[2] += (int)book2[2] * (int)l2; |
| 286 | a[2] += (int)book2[1] * inp2[0]; |
| 287 | a[2] += (int)book2[0] * inp2[1]; |
| 288 | a[2] += (int)inp2[2] * (int)2048; |
| 289 | |
| 290 | a[3] = (int)book1[3] * (int)l1; |
| 291 | a[3] += (int)book2[3] * (int)l2; |
| 292 | a[3] += (int)book2[2] * inp2[0]; |
| 293 | a[3] += (int)book2[1] * inp2[1]; |
| 294 | a[3] += (int)book2[0] * inp2[2]; |
| 295 | a[3] += (int)inp2[3] * (int)2048; |
| 296 | |
| 297 | a[4] = (int)book1[4] * (int)l1; |
| 298 | a[4] += (int)book2[4] * (int)l2; |
| 299 | a[4] += (int)book2[3] * inp2[0]; |
| 300 | a[4] += (int)book2[2] * inp2[1]; |
| 301 | a[4] += (int)book2[1] * inp2[2]; |
| 302 | a[4] += (int)book2[0] * inp2[3]; |
| 303 | a[4] += (int)inp2[4] * (int)2048; |
| 304 | |
| 305 | a[5] = (int)book1[5] * (int)l1; |
| 306 | a[5] += (int)book2[5] * (int)l2; |
| 307 | a[5] += (int)book2[4] * inp2[0]; |
| 308 | a[5] += (int)book2[3] * inp2[1]; |
| 309 | a[5] += (int)book2[2] * inp2[2]; |
| 310 | a[5] += (int)book2[1] * inp2[3]; |
| 311 | a[5] += (int)book2[0] * inp2[4]; |
| 312 | a[5] += (int)inp2[5] * (int)2048; |
| 313 | |
| 314 | a[6] = (int)book1[6] * (int)l1; |
| 315 | a[6] += (int)book2[6] * (int)l2; |
| 316 | a[6] += (int)book2[5] * inp2[0]; |
| 317 | a[6] += (int)book2[4] * inp2[1]; |
| 318 | a[6] += (int)book2[3] * inp2[2]; |
| 319 | a[6] += (int)book2[2] * inp2[3]; |
| 320 | a[6] += (int)book2[1] * inp2[4]; |
| 321 | a[6] += (int)book2[0] * inp2[5]; |
| 322 | a[6] += (int)inp2[6] * (int)2048; |
| 323 | |
| 324 | a[7] = (int)book1[7] * (int)l1; |
| 325 | a[7] += (int)book2[7] * (int)l2; |
| 326 | a[7] += (int)book2[6] * inp2[0]; |
| 327 | a[7] += (int)book2[5] * inp2[1]; |
| 328 | a[7] += (int)book2[4] * inp2[2]; |
| 329 | a[7] += (int)book2[3] * inp2[3]; |
| 330 | a[7] += (int)book2[2] * inp2[4]; |
| 331 | a[7] += (int)book2[1] * inp2[5]; |
| 332 | a[7] += (int)book2[0] * inp2[6]; |
| 333 | a[7] += (int)inp2[7] * (int)2048; |
| 334 | |
| 335 | for (j = 0; j < 8; j++) { |
| 336 | a[j ^ S] >>= 11; |
| 337 | a[j ^ S] = clamp_s16(a[j ^ S]); |
| 338 | *(out++) = a[j ^ S]; |
| 339 | } |
| 340 | l1 = a[6]; |
| 341 | l2 = a[7]; |
| 342 | |
| 343 | count -= 32; |
| 344 | } |
| 345 | out -= 16; |
| 346 | memcpy(&rsp.RDRAM[Address], out, 32); |
| 347 | } |
| 348 | |
| 349 | static void CLEARBUFF2(uint32_t inst1, uint32_t inst2) |
| 350 | { |
| 351 | uint16_t addr = (uint16_t)(inst1 & 0xffff); |
| 352 | uint16_t count = (uint16_t)(inst2 & 0xffff); |
| 353 | if (count > 0) |
| 354 | memset(BufferSpace + addr, 0, count); |
| 355 | } |
| 356 | |
| 357 | /* TODO Needs accuracy verification... */ |
| 358 | static void LOADBUFF2(uint32_t inst1, uint32_t inst2) |
| 359 | { |
| 360 | uint32_t v0; |
| 361 | uint32_t cnt = (((inst1 >> 0xC) + 3) & 0xFFC); |
| 362 | v0 = (inst2 & 0xfffffc); |
| 363 | memcpy(BufferSpace + (inst1 & 0xfffc), rsp.RDRAM + v0, (cnt + 3) & 0xFFFC); |
| 364 | } |
| 365 | |
| 366 | /* TODO Needs accuracy verification... */ |
| 367 | static void SAVEBUFF2(uint32_t inst1, uint32_t inst2) |
| 368 | { |
| 369 | uint32_t v0; |
| 370 | uint32_t cnt = (((inst1 >> 0xC) + 3) & 0xFFC); |
| 371 | v0 = (inst2 & 0xfffffc); |
| 372 | memcpy(rsp.RDRAM + v0, BufferSpace + (inst1 & 0xfffc), (cnt + 3) & 0xFFFC); |
| 373 | } |
| 374 | |
| 375 | /* TODO Needs accuracy verification... */ |
| 376 | static void MIXER2(uint32_t inst1, uint32_t inst2) |
| 377 | { |
| 378 | uint16_t dmemin = (uint16_t)(inst2 >> 0x10); |
| 379 | uint16_t dmemout = (uint16_t)(inst2 & 0xFFFF); |
| 380 | uint32_t count = ((inst1 >> 12) & 0xFF0); |
| 381 | int32_t gain = (int16_t)(inst1 & 0xFFFF); |
| 382 | int32_t temp; |
| 383 | unsigned int x; |
| 384 | |
| 385 | for (x = 0; x < count; x += 2) { |
| 386 | /* TODO I think I can do this a lot easier */ |
| 387 | temp = (*(int16_t *)(BufferSpace + dmemin + x) * gain) >> 15; |
| 388 | temp += *(int16_t *)(BufferSpace + dmemout + x); |
| 389 | |
| 390 | temp = clamp_s16((int32_t)temp); |
| 391 | |
| 392 | *(uint16_t *)(BufferSpace + dmemout + x) = (uint16_t)(temp & 0xFFFF); |
| 393 | } |
| 394 | } |
| 395 | |
| 396 | |
| 397 | static void RESAMPLE2(uint32_t inst1, uint32_t inst2) |
| 398 | { |
| 399 | unsigned char Flags = (uint8_t)((inst1 >> 16) & 0xff); |
| 400 | unsigned int Pitch = ((inst1 & 0xffff)) << 1; |
| 401 | uint32_t addy = (inst2 & 0xffffff); |
| 402 | unsigned int Accum = 0; |
| 403 | unsigned int location; |
| 404 | int16_t *lut; |
| 405 | short *dst; |
| 406 | int16_t *src; |
| 407 | uint32_t srcPtr = (AudioInBuffer / 2); |
| 408 | uint32_t dstPtr = (AudioOutBuffer / 2); |
| 409 | int32_t temp; |
| 410 | int32_t accum; |
| 411 | int x, i; |
| 412 | |
| 413 | dst = (short *)(BufferSpace); |
| 414 | src = (int16_t *)(BufferSpace); |
| 415 | |
| 416 | if (addy > (1024 * 1024 * 8)) |
| 417 | addy = (inst2 & 0xffffff); |
| 418 | |
| 419 | srcPtr -= 4; |
| 420 | |
| 421 | if ((Flags & 0x1) == 0) { |
| 422 | for (x = 0; x < 4; x++) |
| 423 | src[(srcPtr + x)^S] = ((uint16_t *)rsp.RDRAM)[((addy / 2) + x)^S]; |
| 424 | Accum = *(uint16_t *)(rsp.RDRAM + addy + 10); |
| 425 | } else { |
| 426 | for (x = 0; x < 4; x++) |
| 427 | src[(srcPtr + x)^S] = 0; |
| 428 | } |
| 429 | |
| 430 | for (i = 0; i < ((AudioCount + 0xf) & 0xFFF0) / 2; i++) { |
| 431 | location = (((Accum * 0x40) >> 0x10) * 8); |
| 432 | lut = (int16_t *)(((uint8_t *)ResampleLUT) + location); |
| 433 | |
| 434 | temp = ((int32_t) * (int16_t *)(src + ((srcPtr + 0)^S)) * ((int32_t)((int16_t)lut[0]))); |
| 435 | accum = (int32_t)(temp >> 15); |
| 436 | |
| 437 | temp = ((int32_t) * (int16_t *)(src + ((srcPtr + 1)^S)) * ((int32_t)((int16_t)lut[1]))); |
| 438 | accum += (int32_t)(temp >> 15); |
| 439 | |
| 440 | temp = ((int32_t) * (int16_t *)(src + ((srcPtr + 2)^S)) * ((int32_t)((int16_t)lut[2]))); |
| 441 | accum += (int32_t)(temp >> 15); |
| 442 | |
| 443 | temp = ((int32_t) * (int16_t *)(src + ((srcPtr + 3)^S)) * ((int32_t)((int16_t)lut[3]))); |
| 444 | accum += (int32_t)(temp >> 15); |
| 445 | |
| 446 | accum = clamp_s16(accum); |
| 447 | |
| 448 | dst[dstPtr ^ S] = (int16_t)(accum); |
| 449 | dstPtr++; |
| 450 | Accum += Pitch; |
| 451 | srcPtr += (Accum >> 16); |
| 452 | Accum &= 0xffff; |
| 453 | } |
| 454 | for (x = 0; x < 4; x++) |
| 455 | ((uint16_t *)rsp.RDRAM)[((addy / 2) + x)^S] = src[(srcPtr + x)^S]; |
| 456 | *(uint16_t *)(rsp.RDRAM + addy + 10) = (uint16_t)Accum; |
| 457 | } |
| 458 | |
| 459 | /* TODO Needs accuracy verification... */ |
| 460 | static void DMEMMOVE2(uint32_t inst1, uint32_t inst2) |
| 461 | { |
| 462 | uint32_t cnt; |
| 463 | uint32_t v0 = (inst1 & 0xFFFF); |
| 464 | uint32_t v1 = (inst2 >> 0x10); |
| 465 | uint32_t count = ((inst2 + 3) & 0xfffc); |
| 466 | |
| 467 | if ((inst2 & 0xffff) == 0) |
| 468 | return; |
| 469 | |
| 470 | for (cnt = 0; cnt < count; cnt++) |
| 471 | *(uint8_t *)(BufferSpace + ((cnt + v1)^S8)) = *(uint8_t *)(BufferSpace + ((cnt + v0)^S8)); |
| 472 | } |
| 473 | |
| 474 | static uint32_t t3, s5, s6; |
| 475 | static uint16_t env[8]; |
| 476 | |
| 477 | static void ENVSETUP1(uint32_t inst1, uint32_t inst2) |
| 478 | { |
| 479 | uint32_t tmp; |
| 480 | |
| 481 | t3 = inst1 & 0xFFFF; |
| 482 | tmp = (inst1 >> 0x8) & 0xFF00; |
| 483 | env[4] = (uint16_t)tmp; |
| 484 | tmp += t3; |
| 485 | env[5] = (uint16_t)tmp; |
| 486 | s5 = inst2 >> 0x10; |
| 487 | s6 = inst2 & 0xFFFF; |
| 488 | } |
| 489 | |
| 490 | static void ENVSETUP2(uint32_t inst1, uint32_t inst2) |
| 491 | { |
| 492 | uint32_t tmp; |
| 493 | |
| 494 | tmp = (inst2 >> 0x10); |
| 495 | env[0] = (uint16_t)tmp; |
| 496 | tmp += s5; |
| 497 | env[1] = (uint16_t)tmp; |
| 498 | tmp = inst2 & 0xffff; |
| 499 | env[2] = (uint16_t)tmp; |
| 500 | tmp += s6; |
| 501 | env[3] = (uint16_t)tmp; |
| 502 | } |
| 503 | |
| 504 | static void ENVMIXER2(uint32_t inst1, uint32_t inst2) |
| 505 | { |
| 506 | int16_t *bufft6, *bufft7, *buffs0, *buffs1; |
| 507 | int16_t *buffs3; |
| 508 | int32_t count; |
| 509 | uint32_t adder; |
| 510 | |
| 511 | int16_t vec9, vec10; |
| 512 | |
| 513 | int16_t v2[8]; |
| 514 | |
| 515 | buffs3 = (int16_t *)(BufferSpace + ((inst1 >> 0x0c) & 0x0ff0)); |
| 516 | bufft6 = (int16_t *)(BufferSpace + ((inst2 >> 0x14) & 0x0ff0)); |
| 517 | bufft7 = (int16_t *)(BufferSpace + ((inst2 >> 0x0c) & 0x0ff0)); |
| 518 | buffs0 = (int16_t *)(BufferSpace + ((inst2 >> 0x04) & 0x0ff0)); |
| 519 | buffs1 = (int16_t *)(BufferSpace + ((inst2 << 0x04) & 0x0ff0)); |
| 520 | |
| 521 | |
| 522 | v2[0] = 0 - (int16_t)((inst1 & 0x2) >> 1); |
| 523 | v2[1] = 0 - (int16_t)((inst1 & 0x1)); |
| 524 | v2[2] = 0 - (int16_t)((inst1 & 0x8) >> 1); |
| 525 | v2[3] = 0 - (int16_t)((inst1 & 0x4) >> 1); |
| 526 | |
| 527 | count = (inst1 >> 8) & 0xff; |
| 528 | |
| 529 | if (!isMKABI) { |
| 530 | s5 *= 2; |
| 531 | s6 *= 2; |
| 532 | t3 *= 2; |
| 533 | adder = 0x10; |
| 534 | } else { |
| 535 | inst1 = 0; |
| 536 | adder = 0x8; |
| 537 | t3 = 0; |
| 538 | } |
| 539 | |
| 540 | |
| 541 | while (count > 0) { |
| 542 | int temp, x; |
| 543 | for (x = 0; x < 0x8; x++) { |
| 544 | vec9 = (int16_t)(((int32_t)buffs3[x ^ S] * (uint32_t)env[0]) >> 0x10) ^ v2[0]; |
| 545 | vec10 = (int16_t)(((int32_t)buffs3[x ^ S] * (uint32_t)env[2]) >> 0x10) ^ v2[1]; |
| 546 | temp = bufft6[x ^ S] + vec9; |
| 547 | temp = clamp_s16(temp); |
| 548 | bufft6[x ^ S] = temp; |
| 549 | temp = bufft7[x ^ S] + vec10; |
| 550 | temp = clamp_s16(temp); |
| 551 | bufft7[x ^ S] = temp; |
| 552 | vec9 = (int16_t)(((int32_t)vec9 * (uint32_t)env[4]) >> 0x10) ^ v2[2]; |
| 553 | vec10 = (int16_t)(((int32_t)vec10 * (uint32_t)env[4]) >> 0x10) ^ v2[3]; |
| 554 | if (inst1 & 0x10) { |
| 555 | temp = buffs0[x ^ S] + vec10; |
| 556 | temp = clamp_s16(temp); |
| 557 | buffs0[x ^ S] = temp; |
| 558 | temp = buffs1[x ^ S] + vec9; |
| 559 | temp = clamp_s16(temp); |
| 560 | buffs1[x ^ S] = temp; |
| 561 | } else { |
| 562 | temp = buffs0[x ^ S] + vec9; |
| 563 | temp = clamp_s16(temp); |
| 564 | buffs0[x ^ S] = temp; |
| 565 | temp = buffs1[x ^ S] + vec10; |
| 566 | temp = clamp_s16(temp); |
| 567 | buffs1[x ^ S] = temp; |
| 568 | } |
| 569 | } |
| 570 | |
| 571 | if (!isMKABI) |
| 572 | for (x = 0x8; x < 0x10; x++) { |
| 573 | vec9 = (int16_t)(((int32_t)buffs3[x ^ S] * (uint32_t)env[1]) >> 0x10) ^ v2[0]; |
| 574 | vec10 = (int16_t)(((int32_t)buffs3[x ^ S] * (uint32_t)env[3]) >> 0x10) ^ v2[1]; |
| 575 | temp = bufft6[x ^ S] + vec9; |
| 576 | temp = clamp_s16(temp); |
| 577 | bufft6[x ^ S] = temp; |
| 578 | temp = bufft7[x ^ S] + vec10; |
| 579 | temp = clamp_s16(temp); |
| 580 | bufft7[x ^ S] = temp; |
| 581 | vec9 = (int16_t)(((int32_t)vec9 * (uint32_t)env[5]) >> 0x10) ^ v2[2]; |
| 582 | vec10 = (int16_t)(((int32_t)vec10 * (uint32_t)env[5]) >> 0x10) ^ v2[3]; |
| 583 | if (inst1 & 0x10) { |
| 584 | temp = buffs0[x ^ S] + vec10; |
| 585 | temp = clamp_s16(temp); |
| 586 | buffs0[x ^ S] = temp; |
| 587 | temp = buffs1[x ^ S] + vec9; |
| 588 | temp = clamp_s16(temp); |
| 589 | buffs1[x ^ S] = temp; |
| 590 | } else { |
| 591 | temp = buffs0[x ^ S] + vec9; |
| 592 | temp = clamp_s16(temp); |
| 593 | buffs0[x ^ S] = temp; |
| 594 | temp = buffs1[x ^ S] + vec10; |
| 595 | temp = clamp_s16(temp); |
| 596 | buffs1[x ^ S] = temp; |
| 597 | } |
| 598 | } |
| 599 | bufft6 += adder; |
| 600 | bufft7 += adder; |
| 601 | buffs0 += adder; |
| 602 | buffs1 += adder; |
| 603 | buffs3 += adder; |
| 604 | count -= adder; |
| 605 | env[0] += (uint16_t)s5; |
| 606 | env[1] += (uint16_t)s5; |
| 607 | env[2] += (uint16_t)s6; |
| 608 | env[3] += (uint16_t)s6; |
| 609 | env[4] += (uint16_t)t3; |
| 610 | env[5] += (uint16_t)t3; |
| 611 | } |
| 612 | } |
| 613 | |
| 614 | static void DUPLICATE2(uint32_t inst1, uint32_t inst2) |
| 615 | { |
| 616 | unsigned short Count = (inst1 >> 16) & 0xff; |
| 617 | unsigned short In = inst1 & 0xffff; |
| 618 | unsigned short Out = (inst2 >> 16); |
| 619 | |
| 620 | unsigned short buff[64]; |
| 621 | |
| 622 | memcpy(buff, BufferSpace + In, 128); |
| 623 | |
| 624 | while (Count) { |
| 625 | memcpy(BufferSpace + Out, buff, 128); |
| 626 | Out += 128; |
| 627 | Count--; |
| 628 | } |
| 629 | } |
| 630 | |
| 631 | static void INTERL2(uint32_t inst1, uint32_t inst2) |
| 632 | { |
| 633 | short Count = inst1 & 0xffff; |
| 634 | unsigned short Out = inst2 & 0xffff; |
| 635 | unsigned short In = (inst2 >> 16); |
| 636 | |
| 637 | unsigned char *src, *dst; |
| 638 | src = (unsigned char *)(BufferSpace); /* [In]; */ |
| 639 | dst = (unsigned char *)(BufferSpace); /* [Out]; */ |
| 640 | while (Count) { |
| 641 | *(short *)(dst + (Out ^ S8)) = *(short *)(src + (In ^ S8)); |
| 642 | Out += 2; |
| 643 | In += 4; |
| 644 | Count--; |
| 645 | } |
| 646 | } |
| 647 | |
| 648 | /* TODO Needs accuracy verification... */ |
| 649 | static void INTERLEAVE2(uint32_t inst1, uint32_t inst2) |
| 650 | { |
| 651 | uint32_t inL, inR; |
| 652 | uint16_t *outbuff; |
| 653 | uint16_t *inSrcR; |
| 654 | uint16_t *inSrcL; |
| 655 | uint16_t Left, Right, Left2, Right2; |
| 656 | uint32_t count; |
| 657 | uint32_t x; |
| 658 | |
| 659 | count = ((inst1 >> 12) & 0xFF0); |
| 660 | if (count == 0) { |
| 661 | outbuff = (uint16_t *)(AudioOutBuffer + BufferSpace); |
| 662 | count = AudioCount; |
| 663 | } else |
| 664 | outbuff = (uint16_t *)((inst1 & 0xFFFF) + BufferSpace); |
| 665 | |
| 666 | inR = inst2 & 0xFFFF; |
| 667 | inL = (inst2 >> 16) & 0xFFFF; |
| 668 | |
| 669 | inSrcR = (uint16_t *)(BufferSpace + inR); |
| 670 | inSrcL = (uint16_t *)(BufferSpace + inL); |
| 671 | |
| 672 | for (x = 0; x < (count / 4); x++) { |
| 673 | Left = *(inSrcL++); |
| 674 | Right = *(inSrcR++); |
| 675 | Left2 = *(inSrcL++); |
| 676 | Right2 = *(inSrcR++); |
| 677 | |
| 678 | #ifdef M64P_BIG_ENDIAN |
| 679 | *(outbuff++) = Right; |
| 680 | *(outbuff++) = Left; |
| 681 | *(outbuff++) = Right2; |
| 682 | *(outbuff++) = Left2; |
| 683 | #else |
| 684 | *(outbuff++) = Right2; |
| 685 | *(outbuff++) = Left2; |
| 686 | *(outbuff++) = Right; |
| 687 | *(outbuff++) = Left; |
| 688 | #endif |
| 689 | } |
| 690 | } |
| 691 | |
| 692 | static void ADDMIXER(uint32_t inst1, uint32_t inst2) |
| 693 | { |
| 694 | short Count = (inst1 >> 12) & 0x00ff0; |
| 695 | uint16_t InBuffer = (inst2 >> 16); |
| 696 | uint16_t OutBuffer = inst2 & 0xffff; |
| 697 | int cntr; |
| 698 | |
| 699 | int16_t *inp, *outp; |
| 700 | int32_t temp; |
| 701 | inp = (int16_t *)(BufferSpace + InBuffer); |
| 702 | outp = (int16_t *)(BufferSpace + OutBuffer); |
| 703 | for (cntr = 0; cntr < Count; cntr += 2) { |
| 704 | temp = *outp + *inp; |
| 705 | temp = clamp_s16(temp); |
| 706 | *(outp++) = temp; |
| 707 | inp++; |
| 708 | } |
| 709 | } |
| 710 | |
| 711 | static void HILOGAIN(uint32_t inst1, uint32_t inst2) |
| 712 | { |
| 713 | uint16_t cnt = inst1 & 0xffff; |
| 714 | uint16_t out = (inst2 >> 16) & 0xffff; |
| 715 | int16_t hi = (int16_t)((inst1 >> 4) & 0xf000); |
| 716 | uint16_t lo = (inst1 >> 20) & 0xf; |
| 717 | int16_t *src = (int16_t *)(BufferSpace + out); |
| 718 | int32_t tmp, val; |
| 719 | |
| 720 | while (cnt) { |
| 721 | val = (int32_t) * src; |
| 722 | tmp = ((val * (int32_t)hi) >> 16) + (uint32_t)(val * lo); |
| 723 | tmp = clamp_s16(tmp); |
| 724 | *src = tmp; |
| 725 | src++; |
| 726 | cnt -= 2; |
| 727 | } |
| 728 | } |
| 729 | |
| 730 | static void FILTER2(uint32_t inst1, uint32_t inst2) |
| 731 | { |
| 732 | static int cnt = 0; |
| 733 | static int16_t *lutt6; |
| 734 | static int16_t *lutt5; |
| 735 | uint8_t *save = (rsp.RDRAM + (inst2 & 0xFFFFFF)); |
| 736 | uint8_t t4 = (uint8_t)((inst1 >> 0x10) & 0xFF); |
| 737 | int x; |
| 738 | short *inp1, *inp2; |
| 739 | int32_t out1[8]; |
| 740 | int16_t outbuff[0x3c0], *outp; |
| 741 | uint32_t inPtr; |
| 742 | |
| 743 | if (t4 > 1) { |
| 744 | /* Then set the cnt variable */ |
| 745 | cnt = (inst1 & 0xFFFF); |
| 746 | lutt6 = (int16_t *)save; |
| 747 | return; |
| 748 | } |
| 749 | |
| 750 | if (t4 == 0) |
| 751 | lutt5 = (short *)(save + 0x10); |
| 752 | |
| 753 | lutt5 = (short *)(save + 0x10); |
| 754 | |
| 755 | for (x = 0; x < 8; x++) { |
| 756 | int32_t a; |
| 757 | a = (lutt5[x] + lutt6[x]) >> 1; |
| 758 | lutt5[x] = lutt6[x] = (short)a; |
| 759 | } |
| 760 | inPtr = (uint32_t)(inst1 & 0xffff); |
| 761 | inp1 = (short *)(save); |
| 762 | outp = outbuff; |
| 763 | inp2 = (short *)(BufferSpace + inPtr); |
| 764 | for (x = 0; x < cnt; x += 0x10) { |
| 765 | out1[1] = inp1[0] * lutt6[6]; |
| 766 | out1[1] += inp1[3] * lutt6[7]; |
| 767 | out1[1] += inp1[2] * lutt6[4]; |
| 768 | out1[1] += inp1[5] * lutt6[5]; |
| 769 | out1[1] += inp1[4] * lutt6[2]; |
| 770 | out1[1] += inp1[7] * lutt6[3]; |
| 771 | out1[1] += inp1[6] * lutt6[0]; |
| 772 | out1[1] += inp2[1] * lutt6[1]; /* 1 */ |
| 773 | |
| 774 | out1[0] = inp1[3] * lutt6[6]; |
| 775 | out1[0] += inp1[2] * lutt6[7]; |
| 776 | out1[0] += inp1[5] * lutt6[4]; |
| 777 | out1[0] += inp1[4] * lutt6[5]; |
| 778 | out1[0] += inp1[7] * lutt6[2]; |
| 779 | out1[0] += inp1[6] * lutt6[3]; |
| 780 | out1[0] += inp2[1] * lutt6[0]; |
| 781 | out1[0] += inp2[0] * lutt6[1]; |
| 782 | |
| 783 | out1[3] = inp1[2] * lutt6[6]; |
| 784 | out1[3] += inp1[5] * lutt6[7]; |
| 785 | out1[3] += inp1[4] * lutt6[4]; |
| 786 | out1[3] += inp1[7] * lutt6[5]; |
| 787 | out1[3] += inp1[6] * lutt6[2]; |
| 788 | out1[3] += inp2[1] * lutt6[3]; |
| 789 | out1[3] += inp2[0] * lutt6[0]; |
| 790 | out1[3] += inp2[3] * lutt6[1]; |
| 791 | |
| 792 | out1[2] = inp1[5] * lutt6[6]; |
| 793 | out1[2] += inp1[4] * lutt6[7]; |
| 794 | out1[2] += inp1[7] * lutt6[4]; |
| 795 | out1[2] += inp1[6] * lutt6[5]; |
| 796 | out1[2] += inp2[1] * lutt6[2]; |
| 797 | out1[2] += inp2[0] * lutt6[3]; |
| 798 | out1[2] += inp2[3] * lutt6[0]; |
| 799 | out1[2] += inp2[2] * lutt6[1]; |
| 800 | |
| 801 | out1[5] = inp1[4] * lutt6[6]; |
| 802 | out1[5] += inp1[7] * lutt6[7]; |
| 803 | out1[5] += inp1[6] * lutt6[4]; |
| 804 | out1[5] += inp2[1] * lutt6[5]; |
| 805 | out1[5] += inp2[0] * lutt6[2]; |
| 806 | out1[5] += inp2[3] * lutt6[3]; |
| 807 | out1[5] += inp2[2] * lutt6[0]; |
| 808 | out1[5] += inp2[5] * lutt6[1]; |
| 809 | |
| 810 | out1[4] = inp1[7] * lutt6[6]; |
| 811 | out1[4] += inp1[6] * lutt6[7]; |
| 812 | out1[4] += inp2[1] * lutt6[4]; |
| 813 | out1[4] += inp2[0] * lutt6[5]; |
| 814 | out1[4] += inp2[3] * lutt6[2]; |
| 815 | out1[4] += inp2[2] * lutt6[3]; |
| 816 | out1[4] += inp2[5] * lutt6[0]; |
| 817 | out1[4] += inp2[4] * lutt6[1]; |
| 818 | |
| 819 | out1[7] = inp1[6] * lutt6[6]; |
| 820 | out1[7] += inp2[1] * lutt6[7]; |
| 821 | out1[7] += inp2[0] * lutt6[4]; |
| 822 | out1[7] += inp2[3] * lutt6[5]; |
| 823 | out1[7] += inp2[2] * lutt6[2]; |
| 824 | out1[7] += inp2[5] * lutt6[3]; |
| 825 | out1[7] += inp2[4] * lutt6[0]; |
| 826 | out1[7] += inp2[7] * lutt6[1]; |
| 827 | |
| 828 | out1[6] = inp2[1] * lutt6[6]; |
| 829 | out1[6] += inp2[0] * lutt6[7]; |
| 830 | out1[6] += inp2[3] * lutt6[4]; |
| 831 | out1[6] += inp2[2] * lutt6[5]; |
| 832 | out1[6] += inp2[5] * lutt6[2]; |
| 833 | out1[6] += inp2[4] * lutt6[3]; |
| 834 | out1[6] += inp2[7] * lutt6[0]; |
| 835 | out1[6] += inp2[6] * lutt6[1]; |
| 836 | outp[1] = /*CLAMP*/((out1[1] + 0x4000) >> 0xF); |
| 837 | outp[0] = /*CLAMP*/((out1[0] + 0x4000) >> 0xF); |
| 838 | outp[3] = /*CLAMP*/((out1[3] + 0x4000) >> 0xF); |
| 839 | outp[2] = /*CLAMP*/((out1[2] + 0x4000) >> 0xF); |
| 840 | outp[5] = /*CLAMP*/((out1[5] + 0x4000) >> 0xF); |
| 841 | outp[4] = /*CLAMP*/((out1[4] + 0x4000) >> 0xF); |
| 842 | outp[7] = /*CLAMP*/((out1[7] + 0x4000) >> 0xF); |
| 843 | outp[6] = /*CLAMP*/((out1[6] + 0x4000) >> 0xF); |
| 844 | inp1 = inp2; |
| 845 | inp2 += 8; |
| 846 | outp += 8; |
| 847 | } |
| 848 | memcpy(save, inp2 - 8, 0x10); |
| 849 | memcpy(BufferSpace + (inst1 & 0xffff), outbuff, cnt); |
| 850 | } |
| 851 | |
| 852 | static void SEGMENT2(uint32_t inst1, uint32_t inst2) |
| 853 | { |
| 854 | if (isZeldaABI) { |
| 855 | FILTER2(inst1, inst2); |
| 856 | return; |
| 857 | } |
| 858 | if ((inst1 & 0xffffff) == 0) { |
| 859 | isMKABI = true; |
| 860 | } else { |
| 861 | isMKABI = false; |
| 862 | isZeldaABI = true; |
| 863 | FILTER2(inst1, inst2); |
| 864 | } |
| 865 | } |
| 866 | |
| 867 | static void UNKNOWN(uint32_t inst1, uint32_t inst2) |
| 868 | { |
| 869 | } |
| 870 | |
| 871 | const acmd_callback_t ABI2[0x20] = { |
| 872 | SPNOOP , ADPCM2, CLEARBUFF2, UNKNOWN, ADDMIXER, RESAMPLE2, UNKNOWN, SEGMENT2, |
| 873 | SETBUFF2 , DUPLICATE2, DMEMMOVE2, LOADADPCM2, MIXER2, INTERLEAVE2, HILOGAIN, SETLOOP2, |
| 874 | SPNOOP, INTERL2 , ENVSETUP1, ENVMIXER2, LOADBUFF2, SAVEBUFF2, ENVSETUP2, SPNOOP, |
| 875 | HILOGAIN , SPNOOP, DUPLICATE2 , UNKNOWN , SPNOOP , SPNOOP , SPNOOP , SPNOOP |
| 876 | }; |
| 877 | /* NOTES: |
| 878 | * |
| 879 | * FILTER/SEGMENT - Still needs to be finished up... add FILTER? |
| 880 | * UNKNOWWN #27 - Is this worth doing? Looks like a pain in the ass just for WaveRace64 |
| 881 | */ |
| 882 | |