X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=source%2Fmupen64plus-rsp-hle%2Fsrc%2Fjpeg.c;h=59004673fd7e69d362b019b292fb1e087bc9acdd;hb=df00ea1301b9e755242364def666d5b12d470762;hp=28fcc8bdd9223ed545b56023742c85b899b5c7bc;hpb=d9e74a6f3c9f96acfb0d64f4bda43f9b9c14984e;p=mupen64plus-pandora.git diff --git a/source/mupen64plus-rsp-hle/src/jpeg.c b/source/mupen64plus-rsp-hle/src/jpeg.c index 28fcc8b..5900467 100755 --- a/source/mupen64plus-rsp-hle/src/jpeg.c +++ b/source/mupen64plus-rsp-hle/src/jpeg.c @@ -21,34 +21,38 @@ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ +#include #include #include -#include #define M64P_PLUGIN_PROTOTYPES 1 #include "m64p_types.h" #include "m64p_plugin.h" #include "hle.h" +#include "jpeg.h" #define SUBBLOCK_SIZE 64 typedef void (*tile_line_emitter_t)(const int16_t *y, const int16_t *u, uint32_t address); -typedef void (*std_macroblock_decoder_t)(int16_t *macroblock, unsigned int subblock_count, const int16_t qtables[3][SUBBLOCK_SIZE]); +typedef void (*subblock_transform_t)(int16_t *dst, const int16_t *src); -/* rdram operations */ -// FIXME: these functions deserve their own module +/* rdram operations + * FIXME: these functions deserve their own module + */ static void rdram_read_many_u16(uint16_t *dst, uint32_t address, unsigned int count); static void rdram_write_many_u16(const uint16_t *src, uint32_t address, unsigned int count); static uint32_t rdram_read_u32(uint32_t address); static void rdram_write_many_u32(const uint32_t *src, uint32_t address, unsigned int count); /* standard jpeg ucode decoder */ -static void jpeg_decode_std(const char * const version, const std_macroblock_decoder_t decode_mb, const tile_line_emitter_t emit_line); +static void jpeg_decode_std(const char *const version, + const subblock_transform_t transform_luma, + const subblock_transform_t transform_chroma, + const tile_line_emitter_t emit_line); /* helper functions */ static uint8_t clamp_u8(int16_t x); static int16_t clamp_s12(int16_t x); -static int16_t clamp_s16(int32_t x); static uint16_t clamp_RGBA_component(int16_t x); /* pixel conversion & foratting */ @@ -60,9 +64,12 @@ static void EmitYUVTileLine(const int16_t *y, const int16_t *u, uint32_t address static void EmitRGBATileLine(const int16_t *y, const int16_t *u, uint32_t address); /* macroblocks operations */ -static void DecodeMacroblock1(int16_t *macroblock, int32_t *y_dc, int32_t *u_dc, int32_t *v_dc, const int16_t *qtable); -static void DecodeMacroblock2(int16_t *macroblock, unsigned int subblock_count, const int16_t qtables[3][SUBBLOCK_SIZE]); -static void DecodeMacroblock3(int16_t *macroblock, unsigned int subblock_count, const int16_t qtables[3][SUBBLOCK_SIZE]); +static void decode_macroblock_ob(int16_t *macroblock, int32_t *y_dc, int32_t *u_dc, int32_t *v_dc, const int16_t *qtable); +static void decode_macroblock_std(const subblock_transform_t transform_luma, + const subblock_transform_t transform_chroma, + int16_t *macroblock, + unsigned int subblock_count, + const int16_t qtables[3][SUBBLOCK_SIZE]); static void EmitTilesMode0(const tile_line_emitter_t emit_line, const int16_t *macroblock, uint32_t address); static void EmitTilesMode2(const tile_line_emitter_t emit_line, const int16_t *macroblock, uint32_t address); @@ -73,14 +80,13 @@ static void ReorderSubBlock(int16_t *dst, const int16_t *src, const unsigned int static void MultSubBlocks(int16_t *dst, const int16_t *src1, const int16_t *src2, unsigned int shift); static void ScaleSubBlock(int16_t *dst, const int16_t *src, int16_t scale); static void RShiftSubBlock(int16_t *dst, const int16_t *src, unsigned int shift); -static void InverseDCT1D(const float * const x, float *dst, unsigned int stride); +static void InverseDCT1D(const float *const x, float *dst, unsigned int stride); static void InverseDCTSubBlock(int16_t *dst, const int16_t *src); static void RescaleYSubBlock(int16_t *dst, const int16_t *src); static void RescaleUVSubBlock(int16_t *dst, const int16_t *src); /* transposed dequantization table */ -static const int16_t DEFAULT_QTABLE[SUBBLOCK_SIZE] = -{ +static const int16_t DEFAULT_QTABLE[SUBBLOCK_SIZE] = { 16, 12, 14, 14, 18, 24, 49, 72, 11, 12, 13, 17, 22, 35, 64, 92, 10, 14, 16, 22, 37, 55, 78, 95, @@ -92,8 +98,7 @@ static const int16_t DEFAULT_QTABLE[SUBBLOCK_SIZE] = }; /* zig-zag indices */ -static const unsigned int ZIGZAG_TABLE[SUBBLOCK_SIZE] = -{ +static const unsigned int ZIGZAG_TABLE[SUBBLOCK_SIZE] = { 0, 1, 5, 6, 14, 15, 27, 28, 2, 4, 7, 13, 16, 26, 29, 42, 3, 8, 12, 17, 25, 30, 41, 43, @@ -105,8 +110,7 @@ static const unsigned int ZIGZAG_TABLE[SUBBLOCK_SIZE] = }; /* transposition indices */ -static const unsigned int TRANSPOSE_TABLE[SUBBLOCK_SIZE] = -{ +static const unsigned int TRANSPOSE_TABLE[SUBBLOCK_SIZE] = { 0, 8, 16, 24, 32, 40, 48, 56, 1, 9, 17, 25, 33, 41, 49, 57, 2, 10, 18, 26, 34, 42, 50, 58, @@ -123,18 +127,17 @@ static const unsigned int TRANSPOSE_TABLE[SUBBLOCK_SIZE] = * Cn = alpha * cos(n * PI / 16) (alpha is chosen such as C4 = 1) */ static const float IDCT_C3 = 1.175875602f; static const float IDCT_C6 = 0.541196100f; -static const float IDCT_K[10] = -{ - 0.765366865f, /* C2-C6 */ - -1.847759065f, /* -C2-C6 */ - -0.390180644f, /* C5-C3 */ - -1.961570561f, /* -C5-C3 */ - 1.501321110f, /* C1+C3-C5-C7 */ - 2.053119869f, /* C1+C3-C5+C7 */ - 3.072711027f, /* C1+C3+C5-C7 */ - 0.298631336f, /* -C1+C3+C5-C7 */ - -0.899976223f, /* C7-C3 */ - -2.562915448f /* -C1-C3 */ +static const float IDCT_K[10] = { + 0.765366865f, /* C2-C6 */ + -1.847759065f, /* -C2-C6 */ + -0.390180644f, /* C5-C3 */ + -1.961570561f, /* -C5-C3 */ + 1.501321110f, /* C1+C3-C5-C7 */ + 2.053119869f, /* C1+C3-C5+C7 */ + 3.072711027f, /* C1+C3+C5-C7 */ + 0.298631336f, /* -C1+C3+C5-C7 */ + -0.899976223f, /* C7-C3 */ + -2.562915448f /* -C1-C3 */ }; @@ -143,24 +146,24 @@ static const float IDCT_K[10] = /*************************************************************************** * JPEG decoding ucode found in Japanese exclusive version of Pokemon Stadium. **************************************************************************/ -void jpeg_decode_PS0() +void jpeg_decode_PS0(void) { - jpeg_decode_std("PS0", DecodeMacroblock3, EmitYUVTileLine); + jpeg_decode_std("PS0", RescaleYSubBlock, RescaleUVSubBlock, EmitYUVTileLine); } /*************************************************************************** * JPEG decoding ucode found in Ocarina of Time, Pokemon Stadium 1 and * Pokemon Stadium 2. **************************************************************************/ -void jpeg_decode_PS() +void jpeg_decode_PS(void) { - jpeg_decode_std("PS", DecodeMacroblock2, EmitRGBATileLine); + jpeg_decode_std("PS", NULL, NULL, EmitRGBATileLine); } /*************************************************************************** * JPEG decoding ucode found in Ogre Battle and Bottom of the 9th. **************************************************************************/ -void jpeg_decode_OB() +void jpeg_decode_OB(void) { int16_t qtable[SUBBLOCK_SIZE]; unsigned int mb; @@ -168,45 +171,42 @@ void jpeg_decode_OB() int32_t y_dc = 0; int32_t u_dc = 0; int32_t v_dc = 0; - - const OSTask_t * const task = get_task(); + + const OSTask_t *const task = get_task(); uint32_t address = task->data_ptr; const unsigned int macroblock_count = task->data_size; const int qscale = task->yield_data_size; DebugMessage(M64MSG_VERBOSE, "jpeg_decode_OB: *buffer=%x, #MB=%d, qscale=%d", - address, - macroblock_count, - qscale); + address, + macroblock_count, + qscale); - if (qscale != 0) - { + if (qscale != 0) { if (qscale > 0) - { ScaleSubBlock(qtable, DEFAULT_QTABLE, qscale); - } else - { RShiftSubBlock(qtable, DEFAULT_QTABLE, -qscale); - } } - for (mb = 0; mb < macroblock_count; ++mb) - { - int16_t macroblock[6*SUBBLOCK_SIZE]; + for (mb = 0; mb < macroblock_count; ++mb) { + int16_t macroblock[6 * SUBBLOCK_SIZE]; - rdram_read_many_u16((uint16_t*)macroblock, address, 6*SUBBLOCK_SIZE); - DecodeMacroblock1(macroblock, &y_dc, &u_dc, &v_dc, (qscale != 0) ? qtable : NULL); + rdram_read_many_u16((uint16_t *)macroblock, address, 6 * SUBBLOCK_SIZE); + decode_macroblock_ob(macroblock, &y_dc, &u_dc, &v_dc, (qscale != 0) ? qtable : NULL); EmitTilesMode2(EmitYUVTileLine, macroblock, address); - address += (2*6*SUBBLOCK_SIZE); + address += (2 * 6 * SUBBLOCK_SIZE); } } /* local functions */ -static void jpeg_decode_std(const char * const version, const std_macroblock_decoder_t decode_mb, const tile_line_emitter_t emit_line) +static void jpeg_decode_std(const char *const version, + const subblock_transform_t transform_luma, + const subblock_transform_t transform_chroma, + const tile_line_emitter_t emit_line) { int16_t qtables[3][SUBBLOCK_SIZE]; unsigned int mb; @@ -218,11 +218,11 @@ static void jpeg_decode_std(const char * const version, const std_macroblock_dec uint32_t qtableV_ptr; unsigned int subblock_count; unsigned int macroblock_size; - int16_t *macroblock; - const OSTask_t * const task = get_task(); + /* macroblock contains at most 6 subblocks */ + int16_t macroblock[6 * SUBBLOCK_SIZE]; + const OSTask_t *const task = get_task(); - if (task->flags & 0x1) - { + if (task->flags & 0x1) { DebugMessage(M64MSG_WARNING, "jpeg_decode_%s: task yielding not implemented", version); return; } @@ -235,51 +235,38 @@ static void jpeg_decode_std(const char * const version, const std_macroblock_dec qtableV_ptr = rdram_read_u32(task->data_ptr + 20); DebugMessage(M64MSG_VERBOSE, "jpeg_decode_%s: *buffer=%x, #MB=%d, mode=%d, *Qy=%x, *Qu=%x, *Qv=%x", - version, - address, - macroblock_count, - mode, - qtableY_ptr, - qtableU_ptr, - qtableV_ptr); - - if (mode != 0 && mode != 2) - { + version, + address, + macroblock_count, + mode, + qtableY_ptr, + qtableU_ptr, + qtableV_ptr); + + if (mode != 0 && mode != 2) { DebugMessage(M64MSG_WARNING, "jpeg_decode_%s: invalid mode %d", version, mode); return; } - - subblock_count = mode + 4; - macroblock_size = 2*subblock_count*SUBBLOCK_SIZE; - rdram_read_many_u16((uint16_t*)qtables[0], qtableY_ptr, SUBBLOCK_SIZE); - rdram_read_many_u16((uint16_t*)qtables[1], qtableU_ptr, SUBBLOCK_SIZE); - rdram_read_many_u16((uint16_t*)qtables[2], qtableV_ptr, SUBBLOCK_SIZE); + subblock_count = mode + 4; + macroblock_size = subblock_count * SUBBLOCK_SIZE; - macroblock = malloc(sizeof(*macroblock) * macroblock_size); - if (!macroblock) - { - DebugMessage(M64MSG_WARNING, "jpeg_decode_%s: could not allocate macroblock", version); - return; - } + rdram_read_many_u16((uint16_t *)qtables[0], qtableY_ptr, SUBBLOCK_SIZE); + rdram_read_many_u16((uint16_t *)qtables[1], qtableU_ptr, SUBBLOCK_SIZE); + rdram_read_many_u16((uint16_t *)qtables[2], qtableV_ptr, SUBBLOCK_SIZE); - for (mb = 0; mb < macroblock_count; ++mb) - { - rdram_read_many_u16((uint16_t*)macroblock, address, macroblock_size >> 1); - decode_mb(macroblock, subblock_count, (const int16_t (*)[SUBBLOCK_SIZE])qtables); + for (mb = 0; mb < macroblock_count; ++mb) { + rdram_read_many_u16((uint16_t *)macroblock, address, macroblock_size); + decode_macroblock_std(transform_luma, transform_chroma, + macroblock, subblock_count, (const int16_t (*)[SUBBLOCK_SIZE])qtables); if (mode == 0) - { EmitTilesMode0(emit_line, macroblock, address); - } else - { EmitTilesMode2(emit_line, macroblock, address); - } - address += macroblock_size; + address += 2 * macroblock_size; } - free(macroblock); } static uint8_t clamp_u8(int16_t x) @@ -289,28 +276,28 @@ static uint8_t clamp_u8(int16_t x) static int16_t clamp_s12(int16_t x) { - if (x < -0x800) { x = -0x800; } else if (x > 0x7f0) { x = 0x7f0; } - return x; -} - -static int16_t clamp_s16(int32_t x) -{ - if (x > 32767) { x = 32767; } else if (x < -32768) { x = -32768; } + if (x < -0x800) + x = -0x800; + else if (x > 0x7f0) + x = 0x7f0; return x; } static uint16_t clamp_RGBA_component(int16_t x) { - if (x > 0xff0) { x = 0xff0; } else if (x < 0) { x = 0; } + if (x > 0xff0) + x = 0xff0; + else if (x < 0) + x = 0; return (x & 0xf80); } static uint32_t GetUYVY(int16_t y1, int16_t y2, int16_t u, int16_t v) { - return (uint32_t)clamp_u8(u) << 24 - | (uint32_t)clamp_u8(y1) << 16 - | (uint32_t)clamp_u8(v) << 8 - | (uint32_t)clamp_u8(y2); + return (uint32_t)clamp_u8(u) << 24 | + (uint32_t)clamp_u8(y1) << 16 | + (uint32_t)clamp_u8(v) << 8 | + (uint32_t)clamp_u8(y2); } static uint16_t GetRGBA(int16_t y, int16_t u, int16_t v) @@ -319,9 +306,9 @@ static uint16_t GetRGBA(int16_t y, int16_t u, int16_t v) const float fU = (float)u; const float fV = (float)v; - const uint16_t r = clamp_RGBA_component((int16_t)(fY + 1.4025*fV)); - const uint16_t g = clamp_RGBA_component((int16_t)(fY - 0.3443*fU - 0.7144*fV)); - const uint16_t b = clamp_RGBA_component((int16_t)(fY + 1.7729*fU )); + const uint16_t r = clamp_RGBA_component((int16_t)(fY + 1.4025 * fV)); + const uint16_t g = clamp_RGBA_component((int16_t)(fY - 0.3443 * fU - 0.7144 * fV)); + const uint16_t b = clamp_RGBA_component((int16_t)(fY + 1.7729 * fU)); return (r << 4) | (g >> 1) | (b >> 6) | 1; } @@ -330,8 +317,8 @@ static void EmitYUVTileLine(const int16_t *y, const int16_t *u, uint32_t address { uint32_t uyvy[8]; - const int16_t * const v = u + SUBBLOCK_SIZE; - const int16_t * const y2 = y + SUBBLOCK_SIZE; + const int16_t *const v = u + SUBBLOCK_SIZE; + const int16_t *const y2 = y + SUBBLOCK_SIZE; uyvy[0] = GetUYVY(y[0], y[1], u[0], v[0]); uyvy[1] = GetUYVY(y[2], y[3], u[1], v[1]); @@ -349,8 +336,8 @@ static void EmitRGBATileLine(const int16_t *y, const int16_t *u, uint32_t addres { uint16_t rgba[16]; - const int16_t * const v = u + SUBBLOCK_SIZE; - const int16_t * const y2 = y + SUBBLOCK_SIZE; + const int16_t *const v = u + SUBBLOCK_SIZE; + const int16_t *const y2 = y + SUBBLOCK_SIZE; rgba[0] = GetRGBA(y[0], u[0], v[0]); rgba[1] = GetRGBA(y[1], u[0], v[0]); @@ -377,10 +364,9 @@ static void EmitTilesMode0(const tile_line_emitter_t emit_line, const int16_t *m unsigned int i; unsigned int y_offset = 0; - unsigned int u_offset = 2*SUBBLOCK_SIZE; + unsigned int u_offset = 2 * SUBBLOCK_SIZE; - for (i = 0; i < 8; ++i) - { + for (i = 0; i < 8; ++i) { emit_line(¯oblock[y_offset], ¯oblock[u_offset], address); y_offset += 8; @@ -394,90 +380,81 @@ static void EmitTilesMode2(const tile_line_emitter_t emit_line, const int16_t *m unsigned int i; unsigned int y_offset = 0; - unsigned int u_offset = 4*SUBBLOCK_SIZE; + unsigned int u_offset = 4 * SUBBLOCK_SIZE; - for (i = 0; i < 8; ++i) - { + for (i = 0; i < 8; ++i) { emit_line(¯oblock[y_offset], ¯oblock[u_offset], address); emit_line(¯oblock[y_offset + 8], ¯oblock[u_offset], address + 32); - y_offset += (i == 3) ? SUBBLOCK_SIZE+16 : 16; + y_offset += (i == 3) ? SUBBLOCK_SIZE + 16 : 16; u_offset += 8; address += 64; } } -static void DecodeMacroblock1(int16_t *macroblock, int32_t *y_dc, int32_t *u_dc, int32_t *v_dc, const int16_t *qtable) +static void decode_macroblock_ob(int16_t *macroblock, int32_t *y_dc, int32_t *u_dc, int32_t *v_dc, const int16_t *qtable) { int sb; - for (sb = 0; sb < 6; ++sb) - { + for (sb = 0; sb < 6; ++sb) { int16_t tmp_sb[SUBBLOCK_SIZE]; /* update DC */ int32_t dc = (int32_t)macroblock[0]; - switch(sb) - { - case 0: case 1: case 2: case 3: - *y_dc += dc; macroblock[0] = *y_dc & 0xffff; break; - case 4: *u_dc += dc; macroblock[0] = *u_dc & 0xffff; break; - case 5: *v_dc += dc; macroblock[0] = *v_dc & 0xffff; break; + switch (sb) { + case 0: + case 1: + case 2: + case 3: + *y_dc += dc; + macroblock[0] = *y_dc & 0xffff; + break; + case 4: + *u_dc += dc; + macroblock[0] = *u_dc & 0xffff; + break; + case 5: + *v_dc += dc; + macroblock[0] = *v_dc & 0xffff; + break; } ZigZagSubBlock(tmp_sb, macroblock); - if (qtable != NULL) { MultSubBlocks(tmp_sb, tmp_sb, qtable, 0); } + if (qtable != NULL) + MultSubBlocks(tmp_sb, tmp_sb, qtable, 0); TransposeSubBlock(macroblock, tmp_sb); InverseDCTSubBlock(macroblock, macroblock); - - macroblock += SUBBLOCK_SIZE; - } -} - -static void DecodeMacroblock2(int16_t *macroblock, unsigned int subblock_count, const int16_t qtables[3][SUBBLOCK_SIZE]) -{ - unsigned int sb; - unsigned int q = 0; - - for (sb = 0; sb < subblock_count; ++sb) - { - int16_t tmp_sb[SUBBLOCK_SIZE]; - const int isChromaSubBlock = (subblock_count - sb <= 2); - - if (isChromaSubBlock) { ++q; } - - MultSubBlocks(macroblock, macroblock, qtables[q], 4); - ZigZagSubBlock(tmp_sb, macroblock); - InverseDCTSubBlock(macroblock, tmp_sb); macroblock += SUBBLOCK_SIZE; } - } -static void DecodeMacroblock3(int16_t *macroblock, unsigned int subblock_count, const int16_t qtables[3][SUBBLOCK_SIZE]) +static void decode_macroblock_std(const subblock_transform_t transform_luma, + const subblock_transform_t transform_chroma, + int16_t *macroblock, + unsigned int subblock_count, + const int16_t qtables[3][SUBBLOCK_SIZE]) { unsigned int sb; unsigned int q = 0; - for (sb = 0; sb < subblock_count; ++sb) - { + for (sb = 0; sb < subblock_count; ++sb) { int16_t tmp_sb[SUBBLOCK_SIZE]; const int isChromaSubBlock = (subblock_count - sb <= 2); - if (isChromaSubBlock) { ++q; } + if (isChromaSubBlock) + ++q; MultSubBlocks(macroblock, macroblock, qtables[q], 4); ZigZagSubBlock(tmp_sb, macroblock); InverseDCTSubBlock(macroblock, tmp_sb); - if (isChromaSubBlock) - { - RescaleUVSubBlock(macroblock, macroblock); - } - else - { - RescaleYSubBlock(macroblock, macroblock); + if (isChromaSubBlock) { + if (transform_chroma != NULL) + transform_chroma(macroblock, macroblock); + } else { + if (transform_luma != NULL) + transform_luma(macroblock, macroblock); } macroblock += SUBBLOCK_SIZE; @@ -502,17 +479,14 @@ static void ReorderSubBlock(int16_t *dst, const int16_t *src, const unsigned int assert(abs(dst - src) > SUBBLOCK_SIZE); for (i = 0; i < SUBBLOCK_SIZE; ++i) - { dst[i] = src[table[i]]; - } } static void MultSubBlocks(int16_t *dst, const int16_t *src1, const int16_t *src2, unsigned int shift) { unsigned int i; - for (i = 0; i < SUBBLOCK_SIZE; ++i) - { + for (i = 0; i < SUBBLOCK_SIZE; ++i) { int32_t v = src1[i] * src2[i]; dst[i] = clamp_s16(v) << shift; } @@ -522,8 +496,7 @@ static void ScaleSubBlock(int16_t *dst, const int16_t *src, int16_t scale) { unsigned int i; - for (i = 0; i < SUBBLOCK_SIZE; ++i) - { + for (i = 0; i < SUBBLOCK_SIZE; ++i) { int32_t v = src[i] * scale; dst[i] = clamp_s16(v); } @@ -534,9 +507,7 @@ static void RShiftSubBlock(int16_t *dst, const int16_t *src, unsigned int shift) unsigned int i; for (i = 0; i < SUBBLOCK_SIZE; ++i) - { dst[i] = src[i] >> shift; - } } /*************************************************************************** @@ -545,7 +516,7 @@ static void RShiftSubBlock(int16_t *dst, const int16_t *src, unsigned int shift) * Implementation based on Wikipedia : * http://fr.wikipedia.org/wiki/Transform%C3%A9e_en_cosinus_discr%C3%A8te **************************************************************************/ -static void InverseDCT1D(const float * const x, float *dst, unsigned int stride) +static void InverseDCT1D(const float *const x, float *dst, unsigned int stride) { float e[4]; float f[4]; @@ -560,22 +531,29 @@ static void InverseDCT1D(const float * const x, float *dst, unsigned int stride) f[0] = x[0] + x[4]; f[1] = x[0] - x[4]; - f[2] = x26 + IDCT_K[0]*x[2]; - f[3] = x26 + IDCT_K[1]*x[6]; - - e[0] = x1357 + x15 + IDCT_K[4]*x[1] + x17; - e[1] = x1357 + x37 + IDCT_K[6]*x[3] + x35; - e[2] = x1357 + x15 + IDCT_K[5]*x[5] + x35; - e[3] = x1357 + x37 + IDCT_K[7]*x[7] + x17; - - *dst = f[0] + f[2] + e[0]; dst += stride; - *dst = f[1] + f[3] + e[1]; dst += stride; - *dst = f[1] - f[3] + e[2]; dst += stride; - *dst = f[0] - f[2] + e[3]; dst += stride; - *dst = f[0] - f[2] - e[3]; dst += stride; - *dst = f[1] - f[3] - e[2]; dst += stride; - *dst = f[1] + f[3] - e[1]; dst += stride; - *dst = f[0] + f[2] - e[0]; dst += stride; + f[2] = x26 + IDCT_K[0] * x[2]; + f[3] = x26 + IDCT_K[1] * x[6]; + + e[0] = x1357 + x15 + IDCT_K[4] * x[1] + x17; + e[1] = x1357 + x37 + IDCT_K[6] * x[3] + x35; + e[2] = x1357 + x15 + IDCT_K[5] * x[5] + x35; + e[3] = x1357 + x37 + IDCT_K[7] * x[7] + x17; + + *dst = f[0] + f[2] + e[0]; + dst += stride; + *dst = f[1] + f[3] + e[1]; + dst += stride; + *dst = f[1] - f[3] + e[2]; + dst += stride; + *dst = f[0] - f[2] + e[3]; + dst += stride; + *dst = f[0] - f[2] - e[3]; + dst += stride; + *dst = f[1] - f[3] - e[2]; + dst += stride; + *dst = f[1] + f[3] - e[1]; + dst += stride; + *dst = f[0] + f[2] - e[0]; } static void InverseDCTSubBlock(int16_t *dst, const int16_t *src) @@ -585,26 +563,20 @@ static void InverseDCTSubBlock(int16_t *dst, const int16_t *src) unsigned int i, j; /* idct 1d on rows (+transposition) */ - for (i = 0; i < 8; ++i) - { + for (i = 0; i < 8; ++i) { for (j = 0; j < 8; ++j) - { - x[j] = (float)src[i*8+j]; - } + x[j] = (float)src[i * 8 + j]; InverseDCT1D(x, &block[i], 8); } /* idct 1d on columns (thanks to previous transposition) */ - for (i = 0; i < 8; ++i) - { - InverseDCT1D(&block[i*8], x, 1); + for (i = 0; i < 8; ++i) { + InverseDCT1D(&block[i * 8], x, 1); /* C4 = 1 normalization implies a division by 8 */ for (j = 0; j < 8; ++j) - { - dst[i+j*8] = (int16_t)x[j] >> 3; - } + dst[i + j * 8] = (int16_t)x[j] >> 3; } } @@ -613,9 +585,7 @@ static void RescaleYSubBlock(int16_t *dst, const int16_t *src) unsigned int i; for (i = 0; i < SUBBLOCK_SIZE; ++i) - { dst[i] = (((uint32_t)(clamp_s12(src[i]) + 0x800) * 0xdb0) >> 16) + 0x10; - } } static void RescaleUVSubBlock(int16_t *dst, const int16_t *src) @@ -623,9 +593,7 @@ static void RescaleUVSubBlock(int16_t *dst, const int16_t *src) unsigned int i; for (i = 0; i < SUBBLOCK_SIZE; ++i) - { dst[i] = (((int)clamp_s12(src[i]) * 0xe00) >> 16) + 0x80; - } } @@ -635,8 +603,7 @@ static void RescaleUVSubBlock(int16_t *dst, const int16_t *src) static void rdram_read_many_u16(uint16_t *dst, uint32_t address, unsigned int count) { - while (count != 0) - { + while (count != 0) { uint16_t s = rsp.RDRAM[((address++)^S8) & MEMMASK]; s <<= 8; s |= rsp.RDRAM[((address++)^S8) & MEMMASK]; @@ -649,8 +616,7 @@ static void rdram_read_many_u16(uint16_t *dst, uint32_t address, unsigned int co static void rdram_write_many_u16(const uint16_t *src, uint32_t address, unsigned int count) { - while (count != 0) - { + while (count != 0) { rsp.RDRAM[((address++)^S8) & MEMMASK] = (uint8_t)(*src >> 8); rsp.RDRAM[((address++)^S8) & MEMMASK] = (uint8_t)(*(src++) & 0xff); @@ -660,9 +626,12 @@ static void rdram_write_many_u16(const uint16_t *src, uint32_t address, unsigned static uint32_t rdram_read_u32(uint32_t address) { - uint32_t r = rsp.RDRAM[((address++) ^ S8) & MEMMASK]; r <<= 8; - r |= rsp.RDRAM[((address++) ^ S8) & MEMMASK]; r <<= 8; - r |= rsp.RDRAM[((address++) ^ S8) & MEMMASK]; r <<= 8; + uint32_t r = rsp.RDRAM[((address++) ^ S8) & MEMMASK]; + r <<= 8; + r |= rsp.RDRAM[((address++) ^ S8) & MEMMASK]; + r <<= 8; + r |= rsp.RDRAM[((address++) ^ S8) & MEMMASK]; + r <<= 8; r |= rsp.RDRAM[((address++) ^ S8) & MEMMASK]; return r; @@ -670,8 +639,7 @@ static uint32_t rdram_read_u32(uint32_t address) static void rdram_write_many_u32(const uint32_t *src, uint32_t address, unsigned int count) { - while (count != 0) - { + while (count != 0) { rsp.RDRAM[((address++)^S8) & MEMMASK] = (uint8_t)(*src >> 24); rsp.RDRAM[((address++)^S8) & MEMMASK] = (uint8_t)(*src >> 16); rsp.RDRAM[((address++)^S8) & MEMMASK] = (uint8_t)(*src >> 8);