| 1 | /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * |
| 2 | * Mupen64plus-rsp-hle - jpeg.c * |
| 3 | * Mupen64Plus homepage: http://code.google.com/p/mupen64plus/ * |
| 4 | * Copyright (C) 2012 Bobby Smiles * |
| 5 | * Copyright (C) 2009 Richard Goedeken * |
| 6 | * Copyright (C) 2002 Hacktarux * |
| 7 | * * |
| 8 | * This program is free software; you can redistribute it and/or modify * |
| 9 | * it under the terms of the GNU General Public License as published by * |
| 10 | * the Free Software Foundation; either version 2 of the License, or * |
| 11 | * (at your option) any later version. * |
| 12 | * * |
| 13 | * This program is distributed in the hope that it will be useful, * |
| 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of * |
| 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * |
| 16 | * GNU General Public License for more details. * |
| 17 | * * |
| 18 | * You should have received a copy of the GNU General Public License * |
| 19 | * along with this program; if not, write to the * |
| 20 | * Free Software Foundation, Inc., * |
| 21 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * |
| 22 | * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ |
| 23 | |
| 24 | #include <assert.h> |
| 25 | #include <stdlib.h> |
| 26 | #include <stdint.h> |
| 27 | |
| 28 | #define M64P_PLUGIN_PROTOTYPES 1 |
| 29 | #include "m64p_types.h" |
| 30 | #include "m64p_plugin.h" |
| 31 | #include "hle.h" |
| 32 | |
| 33 | #define SUBBLOCK_SIZE 64 |
| 34 | |
| 35 | typedef void (*tile_line_emitter_t)(const int16_t *y, const int16_t *u, uint32_t address); |
| 36 | typedef void (*std_macroblock_decoder_t)(int16_t *macroblock, unsigned int subblock_count, const int16_t qtables[3][SUBBLOCK_SIZE]); |
| 37 | |
| 38 | /* rdram operations */ |
| 39 | // FIXME: these functions deserve their own module |
| 40 | static void rdram_read_many_u16(uint16_t *dst, uint32_t address, unsigned int count); |
| 41 | static void rdram_write_many_u16(const uint16_t *src, uint32_t address, unsigned int count); |
| 42 | static uint32_t rdram_read_u32(uint32_t address); |
| 43 | static void rdram_write_many_u32(const uint32_t *src, uint32_t address, unsigned int count); |
| 44 | |
| 45 | /* standard jpeg ucode decoder */ |
| 46 | static void jpeg_decode_std(const char * const version, const std_macroblock_decoder_t decode_mb, const tile_line_emitter_t emit_line); |
| 47 | |
| 48 | /* helper functions */ |
| 49 | static uint8_t clamp_u8(int16_t x); |
| 50 | static int16_t clamp_s12(int16_t x); |
| 51 | static int16_t clamp_s16(int32_t x); |
| 52 | static uint16_t clamp_RGBA_component(int16_t x); |
| 53 | |
| 54 | /* pixel conversion & foratting */ |
| 55 | static uint32_t GetUYVY(int16_t y1, int16_t y2, int16_t u, int16_t v); |
| 56 | static uint16_t GetRGBA(int16_t y, int16_t u, int16_t v); |
| 57 | |
| 58 | /* tile line emitters */ |
| 59 | static void EmitYUVTileLine(const int16_t *y, const int16_t *u, uint32_t address); |
| 60 | static void EmitRGBATileLine(const int16_t *y, const int16_t *u, uint32_t address); |
| 61 | |
| 62 | /* macroblocks operations */ |
| 63 | static void DecodeMacroblock1(int16_t *macroblock, int32_t *y_dc, int32_t *u_dc, int32_t *v_dc, const int16_t *qtable); |
| 64 | static void DecodeMacroblock2(int16_t *macroblock, unsigned int subblock_count, const int16_t qtables[3][SUBBLOCK_SIZE]); |
| 65 | static void DecodeMacroblock3(int16_t *macroblock, unsigned int subblock_count, const int16_t qtables[3][SUBBLOCK_SIZE]); |
| 66 | static void EmitTilesMode0(const tile_line_emitter_t emit_line, const int16_t *macroblock, uint32_t address); |
| 67 | static void EmitTilesMode2(const tile_line_emitter_t emit_line, const int16_t *macroblock, uint32_t address); |
| 68 | |
| 69 | /* subblocks operations */ |
| 70 | static void TransposeSubBlock(int16_t *dst, const int16_t *src); |
| 71 | static void ZigZagSubBlock(int16_t *dst, const int16_t *src); |
| 72 | static void ReorderSubBlock(int16_t *dst, const int16_t *src, const unsigned int *table); |
| 73 | static void MultSubBlocks(int16_t *dst, const int16_t *src1, const int16_t *src2, unsigned int shift); |
| 74 | static void ScaleSubBlock(int16_t *dst, const int16_t *src, int16_t scale); |
| 75 | static void RShiftSubBlock(int16_t *dst, const int16_t *src, unsigned int shift); |
| 76 | static void InverseDCT1D(const float * const x, float *dst, unsigned int stride); |
| 77 | static void InverseDCTSubBlock(int16_t *dst, const int16_t *src); |
| 78 | static void RescaleYSubBlock(int16_t *dst, const int16_t *src); |
| 79 | static void RescaleUVSubBlock(int16_t *dst, const int16_t *src); |
| 80 | |
| 81 | /* transposed dequantization table */ |
| 82 | static const int16_t DEFAULT_QTABLE[SUBBLOCK_SIZE] = |
| 83 | { |
| 84 | 16, 12, 14, 14, 18, 24, 49, 72, |
| 85 | 11, 12, 13, 17, 22, 35, 64, 92, |
| 86 | 10, 14, 16, 22, 37, 55, 78, 95, |
| 87 | 16, 19, 24, 29, 56, 64, 87, 98, |
| 88 | 24, 26, 40, 51, 68, 81, 103, 112, |
| 89 | 40, 58, 57, 87, 109, 104, 121, 100, |
| 90 | 51, 60, 69, 80, 103, 113, 120, 103, |
| 91 | 61, 55, 56, 62, 77, 92, 101, 99 |
| 92 | }; |
| 93 | |
| 94 | /* zig-zag indices */ |
| 95 | static const unsigned int ZIGZAG_TABLE[SUBBLOCK_SIZE] = |
| 96 | { |
| 97 | 0, 1, 5, 6, 14, 15, 27, 28, |
| 98 | 2, 4, 7, 13, 16, 26, 29, 42, |
| 99 | 3, 8, 12, 17, 25, 30, 41, 43, |
| 100 | 9, 11, 18, 24, 31, 40, 44, 53, |
| 101 | 10, 19, 23, 32, 39, 45, 52, 54, |
| 102 | 20, 22, 33, 38, 46, 51, 55, 60, |
| 103 | 21, 34, 37, 47, 50, 56, 59, 61, |
| 104 | 35, 36, 48, 49, 57, 58, 62, 63 |
| 105 | }; |
| 106 | |
| 107 | /* transposition indices */ |
| 108 | static const unsigned int TRANSPOSE_TABLE[SUBBLOCK_SIZE] = |
| 109 | { |
| 110 | 0, 8, 16, 24, 32, 40, 48, 56, |
| 111 | 1, 9, 17, 25, 33, 41, 49, 57, |
| 112 | 2, 10, 18, 26, 34, 42, 50, 58, |
| 113 | 3, 11, 19, 27, 35, 43, 51, 59, |
| 114 | 4, 12, 20, 28, 36, 44, 52, 60, |
| 115 | 5, 13, 21, 29, 37, 45, 53, 61, |
| 116 | 6, 14, 22, 30, 38, 46, 54, 62, |
| 117 | 7, 15, 23, 31, 39, 47, 55, 63 |
| 118 | }; |
| 119 | |
| 120 | |
| 121 | |
| 122 | /* IDCT related constants |
| 123 | * Cn = alpha * cos(n * PI / 16) (alpha is chosen such as C4 = 1) */ |
| 124 | static const float IDCT_C3 = 1.175875602f; |
| 125 | static const float IDCT_C6 = 0.541196100f; |
| 126 | static const float IDCT_K[10] = |
| 127 | { |
| 128 | 0.765366865f, /* C2-C6 */ |
| 129 | -1.847759065f, /* -C2-C6 */ |
| 130 | -0.390180644f, /* C5-C3 */ |
| 131 | -1.961570561f, /* -C5-C3 */ |
| 132 | 1.501321110f, /* C1+C3-C5-C7 */ |
| 133 | 2.053119869f, /* C1+C3-C5+C7 */ |
| 134 | 3.072711027f, /* C1+C3+C5-C7 */ |
| 135 | 0.298631336f, /* -C1+C3+C5-C7 */ |
| 136 | -0.899976223f, /* C7-C3 */ |
| 137 | -2.562915448f /* -C1-C3 */ |
| 138 | }; |
| 139 | |
| 140 | |
| 141 | /* global functions */ |
| 142 | |
| 143 | /*************************************************************************** |
| 144 | * JPEG decoding ucode found in Japanese exclusive version of Pokemon Stadium. |
| 145 | **************************************************************************/ |
| 146 | void jpeg_decode_PS0() |
| 147 | { |
| 148 | jpeg_decode_std("PS0", DecodeMacroblock3, EmitYUVTileLine); |
| 149 | } |
| 150 | |
| 151 | /*************************************************************************** |
| 152 | * JPEG decoding ucode found in Ocarina of Time, Pokemon Stadium 1 and |
| 153 | * Pokemon Stadium 2. |
| 154 | **************************************************************************/ |
| 155 | void jpeg_decode_PS() |
| 156 | { |
| 157 | jpeg_decode_std("PS", DecodeMacroblock2, EmitRGBATileLine); |
| 158 | } |
| 159 | |
| 160 | /*************************************************************************** |
| 161 | * JPEG decoding ucode found in Ogre Battle and Bottom of the 9th. |
| 162 | **************************************************************************/ |
| 163 | void jpeg_decode_OB() |
| 164 | { |
| 165 | int16_t qtable[SUBBLOCK_SIZE]; |
| 166 | unsigned int mb; |
| 167 | |
| 168 | int32_t y_dc = 0; |
| 169 | int32_t u_dc = 0; |
| 170 | int32_t v_dc = 0; |
| 171 | |
| 172 | const OSTask_t * const task = get_task(); |
| 173 | |
| 174 | uint32_t address = task->data_ptr; |
| 175 | const unsigned int macroblock_count = task->data_size; |
| 176 | const int qscale = task->yield_data_size; |
| 177 | |
| 178 | DebugMessage(M64MSG_VERBOSE, "jpeg_decode_OB: *buffer=%x, #MB=%d, qscale=%d", |
| 179 | address, |
| 180 | macroblock_count, |
| 181 | qscale); |
| 182 | |
| 183 | if (qscale != 0) |
| 184 | { |
| 185 | if (qscale > 0) |
| 186 | { |
| 187 | ScaleSubBlock(qtable, DEFAULT_QTABLE, qscale); |
| 188 | } |
| 189 | else |
| 190 | { |
| 191 | RShiftSubBlock(qtable, DEFAULT_QTABLE, -qscale); |
| 192 | } |
| 193 | } |
| 194 | |
| 195 | for (mb = 0; mb < macroblock_count; ++mb) |
| 196 | { |
| 197 | int16_t macroblock[6*SUBBLOCK_SIZE]; |
| 198 | |
| 199 | rdram_read_many_u16((uint16_t*)macroblock, address, 6*SUBBLOCK_SIZE); |
| 200 | DecodeMacroblock1(macroblock, &y_dc, &u_dc, &v_dc, (qscale != 0) ? qtable : NULL); |
| 201 | EmitTilesMode2(EmitYUVTileLine, macroblock, address); |
| 202 | |
| 203 | address += (2*6*SUBBLOCK_SIZE); |
| 204 | } |
| 205 | } |
| 206 | |
| 207 | |
| 208 | /* local functions */ |
| 209 | static void jpeg_decode_std(const char * const version, const std_macroblock_decoder_t decode_mb, const tile_line_emitter_t emit_line) |
| 210 | { |
| 211 | int16_t qtables[3][SUBBLOCK_SIZE]; |
| 212 | unsigned int mb; |
| 213 | uint32_t address; |
| 214 | uint32_t macroblock_count; |
| 215 | uint32_t mode; |
| 216 | uint32_t qtableY_ptr; |
| 217 | uint32_t qtableU_ptr; |
| 218 | uint32_t qtableV_ptr; |
| 219 | unsigned int subblock_count; |
| 220 | unsigned int macroblock_size; |
| 221 | int16_t *macroblock; |
| 222 | const OSTask_t * const task = get_task(); |
| 223 | |
| 224 | if (task->flags & 0x1) |
| 225 | { |
| 226 | DebugMessage(M64MSG_WARNING, "jpeg_decode_%s: task yielding not implemented", version); |
| 227 | return; |
| 228 | } |
| 229 | |
| 230 | address = rdram_read_u32(task->data_ptr); |
| 231 | macroblock_count = rdram_read_u32(task->data_ptr + 4); |
| 232 | mode = rdram_read_u32(task->data_ptr + 8); |
| 233 | qtableY_ptr = rdram_read_u32(task->data_ptr + 12); |
| 234 | qtableU_ptr = rdram_read_u32(task->data_ptr + 16); |
| 235 | qtableV_ptr = rdram_read_u32(task->data_ptr + 20); |
| 236 | |
| 237 | DebugMessage(M64MSG_VERBOSE, "jpeg_decode_%s: *buffer=%x, #MB=%d, mode=%d, *Qy=%x, *Qu=%x, *Qv=%x", |
| 238 | version, |
| 239 | address, |
| 240 | macroblock_count, |
| 241 | mode, |
| 242 | qtableY_ptr, |
| 243 | qtableU_ptr, |
| 244 | qtableV_ptr); |
| 245 | |
| 246 | if (mode != 0 && mode != 2) |
| 247 | { |
| 248 | DebugMessage(M64MSG_WARNING, "jpeg_decode_%s: invalid mode %d", version, mode); |
| 249 | return; |
| 250 | } |
| 251 | |
| 252 | subblock_count = mode + 4; |
| 253 | macroblock_size = 2*subblock_count*SUBBLOCK_SIZE; |
| 254 | |
| 255 | rdram_read_many_u16((uint16_t*)qtables[0], qtableY_ptr, SUBBLOCK_SIZE); |
| 256 | rdram_read_many_u16((uint16_t*)qtables[1], qtableU_ptr, SUBBLOCK_SIZE); |
| 257 | rdram_read_many_u16((uint16_t*)qtables[2], qtableV_ptr, SUBBLOCK_SIZE); |
| 258 | |
| 259 | macroblock = malloc(sizeof(*macroblock) * macroblock_size); |
| 260 | if (!macroblock) |
| 261 | { |
| 262 | DebugMessage(M64MSG_WARNING, "jpeg_decode_%s: could not allocate macroblock", version); |
| 263 | return; |
| 264 | } |
| 265 | |
| 266 | for (mb = 0; mb < macroblock_count; ++mb) |
| 267 | { |
| 268 | rdram_read_many_u16((uint16_t*)macroblock, address, macroblock_size >> 1); |
| 269 | decode_mb(macroblock, subblock_count, (const int16_t (*)[SUBBLOCK_SIZE])qtables); |
| 270 | |
| 271 | if (mode == 0) |
| 272 | { |
| 273 | EmitTilesMode0(emit_line, macroblock, address); |
| 274 | } |
| 275 | else |
| 276 | { |
| 277 | EmitTilesMode2(emit_line, macroblock, address); |
| 278 | } |
| 279 | |
| 280 | address += macroblock_size; |
| 281 | } |
| 282 | free(macroblock); |
| 283 | } |
| 284 | |
| 285 | static uint8_t clamp_u8(int16_t x) |
| 286 | { |
| 287 | return (x & (0xff00)) ? ((-x) >> 15) & 0xff : x; |
| 288 | } |
| 289 | |
| 290 | static int16_t clamp_s12(int16_t x) |
| 291 | { |
| 292 | if (x < -0x800) { x = -0x800; } else if (x > 0x7f0) { x = 0x7f0; } |
| 293 | return x; |
| 294 | } |
| 295 | |
| 296 | static int16_t clamp_s16(int32_t x) |
| 297 | { |
| 298 | if (x > 32767) { x = 32767; } else if (x < -32768) { x = -32768; } |
| 299 | return x; |
| 300 | } |
| 301 | |
| 302 | static uint16_t clamp_RGBA_component(int16_t x) |
| 303 | { |
| 304 | if (x > 0xff0) { x = 0xff0; } else if (x < 0) { x = 0; } |
| 305 | return (x & 0xf80); |
| 306 | } |
| 307 | |
| 308 | static uint32_t GetUYVY(int16_t y1, int16_t y2, int16_t u, int16_t v) |
| 309 | { |
| 310 | return (uint32_t)clamp_u8(u) << 24 |
| 311 | | (uint32_t)clamp_u8(y1) << 16 |
| 312 | | (uint32_t)clamp_u8(v) << 8 |
| 313 | | (uint32_t)clamp_u8(y2); |
| 314 | } |
| 315 | |
| 316 | static uint16_t GetRGBA(int16_t y, int16_t u, int16_t v) |
| 317 | { |
| 318 | const float fY = (float)y + 2048.0f; |
| 319 | const float fU = (float)u; |
| 320 | const float fV = (float)v; |
| 321 | |
| 322 | const uint16_t r = clamp_RGBA_component((int16_t)(fY + 1.4025*fV)); |
| 323 | const uint16_t g = clamp_RGBA_component((int16_t)(fY - 0.3443*fU - 0.7144*fV)); |
| 324 | const uint16_t b = clamp_RGBA_component((int16_t)(fY + 1.7729*fU )); |
| 325 | |
| 326 | return (r << 4) | (g >> 1) | (b >> 6) | 1; |
| 327 | } |
| 328 | |
| 329 | static void EmitYUVTileLine(const int16_t *y, const int16_t *u, uint32_t address) |
| 330 | { |
| 331 | uint32_t uyvy[8]; |
| 332 | |
| 333 | const int16_t * const v = u + SUBBLOCK_SIZE; |
| 334 | const int16_t * const y2 = y + SUBBLOCK_SIZE; |
| 335 | |
| 336 | uyvy[0] = GetUYVY(y[0], y[1], u[0], v[0]); |
| 337 | uyvy[1] = GetUYVY(y[2], y[3], u[1], v[1]); |
| 338 | uyvy[2] = GetUYVY(y[4], y[5], u[2], v[2]); |
| 339 | uyvy[3] = GetUYVY(y[6], y[7], u[3], v[3]); |
| 340 | uyvy[4] = GetUYVY(y2[0], y2[1], u[4], v[4]); |
| 341 | uyvy[5] = GetUYVY(y2[2], y2[3], u[5], v[5]); |
| 342 | uyvy[6] = GetUYVY(y2[4], y2[5], u[6], v[6]); |
| 343 | uyvy[7] = GetUYVY(y2[6], y2[7], u[7], v[7]); |
| 344 | |
| 345 | rdram_write_many_u32(uyvy, address, 8); |
| 346 | } |
| 347 | |
| 348 | static void EmitRGBATileLine(const int16_t *y, const int16_t *u, uint32_t address) |
| 349 | { |
| 350 | uint16_t rgba[16]; |
| 351 | |
| 352 | const int16_t * const v = u + SUBBLOCK_SIZE; |
| 353 | const int16_t * const y2 = y + SUBBLOCK_SIZE; |
| 354 | |
| 355 | rgba[0] = GetRGBA(y[0], u[0], v[0]); |
| 356 | rgba[1] = GetRGBA(y[1], u[0], v[0]); |
| 357 | rgba[2] = GetRGBA(y[2], u[1], v[1]); |
| 358 | rgba[3] = GetRGBA(y[3], u[1], v[1]); |
| 359 | rgba[4] = GetRGBA(y[4], u[2], v[2]); |
| 360 | rgba[5] = GetRGBA(y[5], u[2], v[2]); |
| 361 | rgba[6] = GetRGBA(y[6], u[3], v[3]); |
| 362 | rgba[7] = GetRGBA(y[7], u[3], v[3]); |
| 363 | rgba[8] = GetRGBA(y2[0], u[4], v[4]); |
| 364 | rgba[9] = GetRGBA(y2[1], u[4], v[4]); |
| 365 | rgba[10] = GetRGBA(y2[2], u[5], v[5]); |
| 366 | rgba[11] = GetRGBA(y2[3], u[5], v[5]); |
| 367 | rgba[12] = GetRGBA(y2[4], u[6], v[6]); |
| 368 | rgba[13] = GetRGBA(y2[5], u[6], v[6]); |
| 369 | rgba[14] = GetRGBA(y2[6], u[7], v[7]); |
| 370 | rgba[15] = GetRGBA(y2[7], u[7], v[7]); |
| 371 | |
| 372 | rdram_write_many_u16(rgba, address, 16); |
| 373 | } |
| 374 | |
| 375 | static void EmitTilesMode0(const tile_line_emitter_t emit_line, const int16_t *macroblock, uint32_t address) |
| 376 | { |
| 377 | unsigned int i; |
| 378 | |
| 379 | unsigned int y_offset = 0; |
| 380 | unsigned int u_offset = 2*SUBBLOCK_SIZE; |
| 381 | |
| 382 | for (i = 0; i < 8; ++i) |
| 383 | { |
| 384 | emit_line(¯oblock[y_offset], ¯oblock[u_offset], address); |
| 385 | |
| 386 | y_offset += 8; |
| 387 | u_offset += 8; |
| 388 | address += 32; |
| 389 | } |
| 390 | } |
| 391 | |
| 392 | static void EmitTilesMode2(const tile_line_emitter_t emit_line, const int16_t *macroblock, uint32_t address) |
| 393 | { |
| 394 | unsigned int i; |
| 395 | |
| 396 | unsigned int y_offset = 0; |
| 397 | unsigned int u_offset = 4*SUBBLOCK_SIZE; |
| 398 | |
| 399 | for (i = 0; i < 8; ++i) |
| 400 | { |
| 401 | emit_line(¯oblock[y_offset], ¯oblock[u_offset], address); |
| 402 | emit_line(¯oblock[y_offset + 8], ¯oblock[u_offset], address + 32); |
| 403 | |
| 404 | y_offset += (i == 3) ? SUBBLOCK_SIZE+16 : 16; |
| 405 | u_offset += 8; |
| 406 | address += 64; |
| 407 | } |
| 408 | } |
| 409 | |
| 410 | static void DecodeMacroblock1(int16_t *macroblock, int32_t *y_dc, int32_t *u_dc, int32_t *v_dc, const int16_t *qtable) |
| 411 | { |
| 412 | int sb; |
| 413 | |
| 414 | for (sb = 0; sb < 6; ++sb) |
| 415 | { |
| 416 | int16_t tmp_sb[SUBBLOCK_SIZE]; |
| 417 | |
| 418 | /* update DC */ |
| 419 | int32_t dc = (int32_t)macroblock[0]; |
| 420 | switch(sb) |
| 421 | { |
| 422 | case 0: case 1: case 2: case 3: |
| 423 | *y_dc += dc; macroblock[0] = *y_dc & 0xffff; break; |
| 424 | case 4: *u_dc += dc; macroblock[0] = *u_dc & 0xffff; break; |
| 425 | case 5: *v_dc += dc; macroblock[0] = *v_dc & 0xffff; break; |
| 426 | } |
| 427 | |
| 428 | ZigZagSubBlock(tmp_sb, macroblock); |
| 429 | if (qtable != NULL) { MultSubBlocks(tmp_sb, tmp_sb, qtable, 0); } |
| 430 | TransposeSubBlock(macroblock, tmp_sb); |
| 431 | InverseDCTSubBlock(macroblock, macroblock); |
| 432 | |
| 433 | macroblock += SUBBLOCK_SIZE; |
| 434 | } |
| 435 | } |
| 436 | |
| 437 | static void DecodeMacroblock2(int16_t *macroblock, unsigned int subblock_count, const int16_t qtables[3][SUBBLOCK_SIZE]) |
| 438 | { |
| 439 | unsigned int sb; |
| 440 | unsigned int q = 0; |
| 441 | |
| 442 | for (sb = 0; sb < subblock_count; ++sb) |
| 443 | { |
| 444 | int16_t tmp_sb[SUBBLOCK_SIZE]; |
| 445 | const int isChromaSubBlock = (subblock_count - sb <= 2); |
| 446 | |
| 447 | if (isChromaSubBlock) { ++q; } |
| 448 | |
| 449 | MultSubBlocks(macroblock, macroblock, qtables[q], 4); |
| 450 | ZigZagSubBlock(tmp_sb, macroblock); |
| 451 | InverseDCTSubBlock(macroblock, tmp_sb); |
| 452 | |
| 453 | macroblock += SUBBLOCK_SIZE; |
| 454 | } |
| 455 | |
| 456 | } |
| 457 | |
| 458 | static void DecodeMacroblock3(int16_t *macroblock, unsigned int subblock_count, const int16_t qtables[3][SUBBLOCK_SIZE]) |
| 459 | { |
| 460 | unsigned int sb; |
| 461 | unsigned int q = 0; |
| 462 | |
| 463 | for (sb = 0; sb < subblock_count; ++sb) |
| 464 | { |
| 465 | int16_t tmp_sb[SUBBLOCK_SIZE]; |
| 466 | const int isChromaSubBlock = (subblock_count - sb <= 2); |
| 467 | |
| 468 | if (isChromaSubBlock) { ++q; } |
| 469 | |
| 470 | MultSubBlocks(macroblock, macroblock, qtables[q], 4); |
| 471 | ZigZagSubBlock(tmp_sb, macroblock); |
| 472 | InverseDCTSubBlock(macroblock, tmp_sb); |
| 473 | |
| 474 | if (isChromaSubBlock) |
| 475 | { |
| 476 | RescaleUVSubBlock(macroblock, macroblock); |
| 477 | } |
| 478 | else |
| 479 | { |
| 480 | RescaleYSubBlock(macroblock, macroblock); |
| 481 | } |
| 482 | |
| 483 | macroblock += SUBBLOCK_SIZE; |
| 484 | } |
| 485 | } |
| 486 | |
| 487 | static void TransposeSubBlock(int16_t *dst, const int16_t *src) |
| 488 | { |
| 489 | ReorderSubBlock(dst, src, TRANSPOSE_TABLE); |
| 490 | } |
| 491 | |
| 492 | static void ZigZagSubBlock(int16_t *dst, const int16_t *src) |
| 493 | { |
| 494 | ReorderSubBlock(dst, src, ZIGZAG_TABLE); |
| 495 | } |
| 496 | |
| 497 | static void ReorderSubBlock(int16_t *dst, const int16_t *src, const unsigned int *table) |
| 498 | { |
| 499 | unsigned int i; |
| 500 | |
| 501 | /* source and destination sublocks cannot overlap */ |
| 502 | assert(abs(dst - src) > SUBBLOCK_SIZE); |
| 503 | |
| 504 | for (i = 0; i < SUBBLOCK_SIZE; ++i) |
| 505 | { |
| 506 | dst[i] = src[table[i]]; |
| 507 | } |
| 508 | } |
| 509 | |
| 510 | static void MultSubBlocks(int16_t *dst, const int16_t *src1, const int16_t *src2, unsigned int shift) |
| 511 | { |
| 512 | unsigned int i; |
| 513 | |
| 514 | for (i = 0; i < SUBBLOCK_SIZE; ++i) |
| 515 | { |
| 516 | int32_t v = src1[i] * src2[i]; |
| 517 | dst[i] = clamp_s16(v) << shift; |
| 518 | } |
| 519 | } |
| 520 | |
| 521 | static void ScaleSubBlock(int16_t *dst, const int16_t *src, int16_t scale) |
| 522 | { |
| 523 | unsigned int i; |
| 524 | |
| 525 | for (i = 0; i < SUBBLOCK_SIZE; ++i) |
| 526 | { |
| 527 | int32_t v = src[i] * scale; |
| 528 | dst[i] = clamp_s16(v); |
| 529 | } |
| 530 | } |
| 531 | |
| 532 | static void RShiftSubBlock(int16_t *dst, const int16_t *src, unsigned int shift) |
| 533 | { |
| 534 | unsigned int i; |
| 535 | |
| 536 | for (i = 0; i < SUBBLOCK_SIZE; ++i) |
| 537 | { |
| 538 | dst[i] = src[i] >> shift; |
| 539 | } |
| 540 | } |
| 541 | |
| 542 | /*************************************************************************** |
| 543 | * Fast 2D IDCT using separable formulation and normalization |
| 544 | * Computations use single precision floats |
| 545 | * Implementation based on Wikipedia : |
| 546 | * http://fr.wikipedia.org/wiki/Transform%C3%A9e_en_cosinus_discr%C3%A8te |
| 547 | **************************************************************************/ |
| 548 | static void InverseDCT1D(const float * const x, float *dst, unsigned int stride) |
| 549 | { |
| 550 | float e[4]; |
| 551 | float f[4]; |
| 552 | float x26, x1357, x15, x37, x17, x35; |
| 553 | |
| 554 | x15 = IDCT_K[2] * (x[1] + x[5]); |
| 555 | x37 = IDCT_K[3] * (x[3] + x[7]); |
| 556 | x17 = IDCT_K[8] * (x[1] + x[7]); |
| 557 | x35 = IDCT_K[9] * (x[3] + x[5]); |
| 558 | x1357 = IDCT_C3 * (x[1] + x[3] + x[5] + x[7]); |
| 559 | x26 = IDCT_C6 * (x[2] + x[6]); |
| 560 | |
| 561 | f[0] = x[0] + x[4]; |
| 562 | f[1] = x[0] - x[4]; |
| 563 | f[2] = x26 + IDCT_K[0]*x[2]; |
| 564 | f[3] = x26 + IDCT_K[1]*x[6]; |
| 565 | |
| 566 | e[0] = x1357 + x15 + IDCT_K[4]*x[1] + x17; |
| 567 | e[1] = x1357 + x37 + IDCT_K[6]*x[3] + x35; |
| 568 | e[2] = x1357 + x15 + IDCT_K[5]*x[5] + x35; |
| 569 | e[3] = x1357 + x37 + IDCT_K[7]*x[7] + x17; |
| 570 | |
| 571 | *dst = f[0] + f[2] + e[0]; dst += stride; |
| 572 | *dst = f[1] + f[3] + e[1]; dst += stride; |
| 573 | *dst = f[1] - f[3] + e[2]; dst += stride; |
| 574 | *dst = f[0] - f[2] + e[3]; dst += stride; |
| 575 | *dst = f[0] - f[2] - e[3]; dst += stride; |
| 576 | *dst = f[1] - f[3] - e[2]; dst += stride; |
| 577 | *dst = f[1] + f[3] - e[1]; dst += stride; |
| 578 | *dst = f[0] + f[2] - e[0]; dst += stride; |
| 579 | } |
| 580 | |
| 581 | static void InverseDCTSubBlock(int16_t *dst, const int16_t *src) |
| 582 | { |
| 583 | float x[8]; |
| 584 | float block[SUBBLOCK_SIZE]; |
| 585 | unsigned int i, j; |
| 586 | |
| 587 | /* idct 1d on rows (+transposition) */ |
| 588 | for (i = 0; i < 8; ++i) |
| 589 | { |
| 590 | for (j = 0; j < 8; ++j) |
| 591 | { |
| 592 | x[j] = (float)src[i*8+j]; |
| 593 | } |
| 594 | |
| 595 | InverseDCT1D(x, &block[i], 8); |
| 596 | } |
| 597 | |
| 598 | /* idct 1d on columns (thanks to previous transposition) */ |
| 599 | for (i = 0; i < 8; ++i) |
| 600 | { |
| 601 | InverseDCT1D(&block[i*8], x, 1); |
| 602 | |
| 603 | /* C4 = 1 normalization implies a division by 8 */ |
| 604 | for (j = 0; j < 8; ++j) |
| 605 | { |
| 606 | dst[i+j*8] = (int16_t)x[j] >> 3; |
| 607 | } |
| 608 | } |
| 609 | } |
| 610 | |
| 611 | static void RescaleYSubBlock(int16_t *dst, const int16_t *src) |
| 612 | { |
| 613 | unsigned int i; |
| 614 | |
| 615 | for (i = 0; i < SUBBLOCK_SIZE; ++i) |
| 616 | { |
| 617 | dst[i] = (((uint32_t)(clamp_s12(src[i]) + 0x800) * 0xdb0) >> 16) + 0x10; |
| 618 | } |
| 619 | } |
| 620 | |
| 621 | static void RescaleUVSubBlock(int16_t *dst, const int16_t *src) |
| 622 | { |
| 623 | unsigned int i; |
| 624 | |
| 625 | for (i = 0; i < SUBBLOCK_SIZE; ++i) |
| 626 | { |
| 627 | dst[i] = (((int)clamp_s12(src[i]) * 0xe00) >> 16) + 0x80; |
| 628 | } |
| 629 | } |
| 630 | |
| 631 | |
| 632 | |
| 633 | /* FIXME: assume presence of expansion pack */ |
| 634 | #define MEMMASK 0x7fffff |
| 635 | |
| 636 | static void rdram_read_many_u16(uint16_t *dst, uint32_t address, unsigned int count) |
| 637 | { |
| 638 | while (count != 0) |
| 639 | { |
| 640 | uint16_t s = rsp.RDRAM[((address++)^S8) & MEMMASK]; |
| 641 | s <<= 8; |
| 642 | s |= rsp.RDRAM[((address++)^S8) & MEMMASK]; |
| 643 | |
| 644 | *(dst++) = s; |
| 645 | |
| 646 | --count; |
| 647 | } |
| 648 | } |
| 649 | |
| 650 | static void rdram_write_many_u16(const uint16_t *src, uint32_t address, unsigned int count) |
| 651 | { |
| 652 | while (count != 0) |
| 653 | { |
| 654 | rsp.RDRAM[((address++)^S8) & MEMMASK] = (uint8_t)(*src >> 8); |
| 655 | rsp.RDRAM[((address++)^S8) & MEMMASK] = (uint8_t)(*(src++) & 0xff); |
| 656 | |
| 657 | --count; |
| 658 | } |
| 659 | } |
| 660 | |
| 661 | static uint32_t rdram_read_u32(uint32_t address) |
| 662 | { |
| 663 | uint32_t r = rsp.RDRAM[((address++) ^ S8) & MEMMASK]; r <<= 8; |
| 664 | r |= rsp.RDRAM[((address++) ^ S8) & MEMMASK]; r <<= 8; |
| 665 | r |= rsp.RDRAM[((address++) ^ S8) & MEMMASK]; r <<= 8; |
| 666 | r |= rsp.RDRAM[((address++) ^ S8) & MEMMASK]; |
| 667 | |
| 668 | return r; |
| 669 | } |
| 670 | |
| 671 | static void rdram_write_many_u32(const uint32_t *src, uint32_t address, unsigned int count) |
| 672 | { |
| 673 | while (count != 0) |
| 674 | { |
| 675 | rsp.RDRAM[((address++)^S8) & MEMMASK] = (uint8_t)(*src >> 24); |
| 676 | rsp.RDRAM[((address++)^S8) & MEMMASK] = (uint8_t)(*src >> 16); |
| 677 | rsp.RDRAM[((address++)^S8) & MEMMASK] = (uint8_t)(*src >> 8); |
| 678 | rsp.RDRAM[((address++)^S8) & MEMMASK] = (uint8_t)(*(src++) & 0xff); |
| 679 | |
| 680 | --count; |
| 681 | } |
| 682 | } |
| 683 | |