RSP: Added musyx ucode suport to HLE from mupen64plus-ae
[mupen64plus-pandora.git] / source / mupen64plus-rsp-hle / src / jpeg.c
CommitLineData
d9e74a6f 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus-rsp-hle - jpeg.c *
3 * Mupen64Plus homepage: http://code.google.com/p/mupen64plus/ *
4 * Copyright (C) 2012 Bobby Smiles *
5 * Copyright (C) 2009 Richard Goedeken *
6 * Copyright (C) 2002 Hacktarux *
7 * *
8 * This program is free software; you can redistribute it and/or modify *
9 * it under the terms of the GNU General Public License as published by *
10 * the Free Software Foundation; either version 2 of the License, or *
11 * (at your option) any later version. *
12 * *
13 * This program is distributed in the hope that it will be useful, *
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
16 * GNU General Public License for more details. *
17 * *
18 * You should have received a copy of the GNU General Public License *
19 * along with this program; if not, write to the *
20 * Free Software Foundation, Inc., *
21 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
22 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
23
0a8a0368 24#include <stdint.h>
d9e74a6f 25#include <assert.h>
26#include <stdlib.h>
d9e74a6f 27
28#define M64P_PLUGIN_PROTOTYPES 1
29#include "m64p_types.h"
30#include "m64p_plugin.h"
31#include "hle.h"
0a8a0368 32#include "jpeg.h"
d9e74a6f 33
34#define SUBBLOCK_SIZE 64
35
36typedef void (*tile_line_emitter_t)(const int16_t *y, const int16_t *u, uint32_t address);
0a8a0368 37typedef void (*subblock_transform_t)(int16_t *dst, const int16_t *src);
d9e74a6f 38
0a8a0368 39/* rdram operations
40 * FIXME: these functions deserve their own module
41 */
d9e74a6f 42static void rdram_read_many_u16(uint16_t *dst, uint32_t address, unsigned int count);
43static void rdram_write_many_u16(const uint16_t *src, uint32_t address, unsigned int count);
44static uint32_t rdram_read_u32(uint32_t address);
45static void rdram_write_many_u32(const uint32_t *src, uint32_t address, unsigned int count);
46
47/* standard jpeg ucode decoder */
0a8a0368 48static void jpeg_decode_std(const char *const version,
49 const subblock_transform_t transform_luma,
50 const subblock_transform_t transform_chroma,
51 const tile_line_emitter_t emit_line);
d9e74a6f 52
53/* helper functions */
54static uint8_t clamp_u8(int16_t x);
55static int16_t clamp_s12(int16_t x);
d9e74a6f 56static uint16_t clamp_RGBA_component(int16_t x);
57
58/* pixel conversion & foratting */
59static uint32_t GetUYVY(int16_t y1, int16_t y2, int16_t u, int16_t v);
60static uint16_t GetRGBA(int16_t y, int16_t u, int16_t v);
61
62/* tile line emitters */
63static void EmitYUVTileLine(const int16_t *y, const int16_t *u, uint32_t address);
64static void EmitRGBATileLine(const int16_t *y, const int16_t *u, uint32_t address);
65
66/* macroblocks operations */
2d262872 67static void decode_macroblock_ob(int16_t *macroblock, int32_t *y_dc, int32_t *u_dc, int32_t *v_dc, const int16_t *qtable);
0a8a0368 68static void decode_macroblock_std(const subblock_transform_t transform_luma,
69 const subblock_transform_t transform_chroma,
70 int16_t *macroblock,
71 unsigned int subblock_count,
72 const int16_t qtables[3][SUBBLOCK_SIZE]);
d9e74a6f 73static void EmitTilesMode0(const tile_line_emitter_t emit_line, const int16_t *macroblock, uint32_t address);
74static void EmitTilesMode2(const tile_line_emitter_t emit_line, const int16_t *macroblock, uint32_t address);
75
76/* subblocks operations */
77static void TransposeSubBlock(int16_t *dst, const int16_t *src);
78static void ZigZagSubBlock(int16_t *dst, const int16_t *src);
79static void ReorderSubBlock(int16_t *dst, const int16_t *src, const unsigned int *table);
80static void MultSubBlocks(int16_t *dst, const int16_t *src1, const int16_t *src2, unsigned int shift);
81static void ScaleSubBlock(int16_t *dst, const int16_t *src, int16_t scale);
82static void RShiftSubBlock(int16_t *dst, const int16_t *src, unsigned int shift);
0a8a0368 83static void InverseDCT1D(const float *const x, float *dst, unsigned int stride);
d9e74a6f 84static void InverseDCTSubBlock(int16_t *dst, const int16_t *src);
85static void RescaleYSubBlock(int16_t *dst, const int16_t *src);
86static void RescaleUVSubBlock(int16_t *dst, const int16_t *src);
87
88/* transposed dequantization table */
0a8a0368 89static const int16_t DEFAULT_QTABLE[SUBBLOCK_SIZE] = {
d9e74a6f 90 16, 12, 14, 14, 18, 24, 49, 72,
91 11, 12, 13, 17, 22, 35, 64, 92,
92 10, 14, 16, 22, 37, 55, 78, 95,
93 16, 19, 24, 29, 56, 64, 87, 98,
94 24, 26, 40, 51, 68, 81, 103, 112,
95 40, 58, 57, 87, 109, 104, 121, 100,
96 51, 60, 69, 80, 103, 113, 120, 103,
97 61, 55, 56, 62, 77, 92, 101, 99
98};
99
100/* zig-zag indices */
0a8a0368 101static const unsigned int ZIGZAG_TABLE[SUBBLOCK_SIZE] = {
d9e74a6f 102 0, 1, 5, 6, 14, 15, 27, 28,
103 2, 4, 7, 13, 16, 26, 29, 42,
104 3, 8, 12, 17, 25, 30, 41, 43,
105 9, 11, 18, 24, 31, 40, 44, 53,
106 10, 19, 23, 32, 39, 45, 52, 54,
107 20, 22, 33, 38, 46, 51, 55, 60,
108 21, 34, 37, 47, 50, 56, 59, 61,
109 35, 36, 48, 49, 57, 58, 62, 63
110};
111
112/* transposition indices */
0a8a0368 113static const unsigned int TRANSPOSE_TABLE[SUBBLOCK_SIZE] = {
d9e74a6f 114 0, 8, 16, 24, 32, 40, 48, 56,
115 1, 9, 17, 25, 33, 41, 49, 57,
116 2, 10, 18, 26, 34, 42, 50, 58,
117 3, 11, 19, 27, 35, 43, 51, 59,
118 4, 12, 20, 28, 36, 44, 52, 60,
119 5, 13, 21, 29, 37, 45, 53, 61,
120 6, 14, 22, 30, 38, 46, 54, 62,
121 7, 15, 23, 31, 39, 47, 55, 63
122};
123
124
125
126/* IDCT related constants
127 * Cn = alpha * cos(n * PI / 16) (alpha is chosen such as C4 = 1) */
128static const float IDCT_C3 = 1.175875602f;
129static const float IDCT_C6 = 0.541196100f;
0a8a0368 130static const float IDCT_K[10] = {
131 0.765366865f, /* C2-C6 */
132 -1.847759065f, /* -C2-C6 */
133 -0.390180644f, /* C5-C3 */
134 -1.961570561f, /* -C5-C3 */
135 1.501321110f, /* C1+C3-C5-C7 */
136 2.053119869f, /* C1+C3-C5+C7 */
137 3.072711027f, /* C1+C3+C5-C7 */
138 0.298631336f, /* -C1+C3+C5-C7 */
139 -0.899976223f, /* C7-C3 */
140 -2.562915448f /* -C1-C3 */
d9e74a6f 141};
142
143
144/* global functions */
145
146/***************************************************************************
147 * JPEG decoding ucode found in Japanese exclusive version of Pokemon Stadium.
148 **************************************************************************/
0a8a0368 149void jpeg_decode_PS0(void)
d9e74a6f 150{
2d262872 151 jpeg_decode_std("PS0", RescaleYSubBlock, RescaleUVSubBlock, EmitYUVTileLine);
d9e74a6f 152}
153
154/***************************************************************************
155 * JPEG decoding ucode found in Ocarina of Time, Pokemon Stadium 1 and
156 * Pokemon Stadium 2.
157 **************************************************************************/
0a8a0368 158void jpeg_decode_PS(void)
d9e74a6f 159{
2d262872 160 jpeg_decode_std("PS", NULL, NULL, EmitRGBATileLine);
d9e74a6f 161}
162
163/***************************************************************************
164 * JPEG decoding ucode found in Ogre Battle and Bottom of the 9th.
165 **************************************************************************/
0a8a0368 166void jpeg_decode_OB(void)
d9e74a6f 167{
168 int16_t qtable[SUBBLOCK_SIZE];
169 unsigned int mb;
170
171 int32_t y_dc = 0;
172 int32_t u_dc = 0;
173 int32_t v_dc = 0;
0a8a0368 174
175 const OSTask_t *const task = get_task();
d9e74a6f 176
177 uint32_t address = task->data_ptr;
178 const unsigned int macroblock_count = task->data_size;
179 const int qscale = task->yield_data_size;
180
181 DebugMessage(M64MSG_VERBOSE, "jpeg_decode_OB: *buffer=%x, #MB=%d, qscale=%d",
0a8a0368 182 address,
183 macroblock_count,
184 qscale);
d9e74a6f 185
0a8a0368 186 if (qscale != 0) {
d9e74a6f 187 if (qscale > 0)
d9e74a6f 188 ScaleSubBlock(qtable, DEFAULT_QTABLE, qscale);
d9e74a6f 189 else
d9e74a6f 190 RShiftSubBlock(qtable, DEFAULT_QTABLE, -qscale);
d9e74a6f 191 }
192
0a8a0368 193 for (mb = 0; mb < macroblock_count; ++mb) {
194 int16_t macroblock[6 * SUBBLOCK_SIZE];
d9e74a6f 195
0a8a0368 196 rdram_read_many_u16((uint16_t *)macroblock, address, 6 * SUBBLOCK_SIZE);
2d262872 197 decode_macroblock_ob(macroblock, &y_dc, &u_dc, &v_dc, (qscale != 0) ? qtable : NULL);
d9e74a6f 198 EmitTilesMode2(EmitYUVTileLine, macroblock, address);
199
0a8a0368 200 address += (2 * 6 * SUBBLOCK_SIZE);
d9e74a6f 201 }
202}
203
204
205/* local functions */
0a8a0368 206static void jpeg_decode_std(const char *const version,
207 const subblock_transform_t transform_luma,
208 const subblock_transform_t transform_chroma,
209 const tile_line_emitter_t emit_line)
d9e74a6f 210{
211 int16_t qtables[3][SUBBLOCK_SIZE];
212 unsigned int mb;
213 uint32_t address;
214 uint32_t macroblock_count;
215 uint32_t mode;
216 uint32_t qtableY_ptr;
217 uint32_t qtableU_ptr;
218 uint32_t qtableV_ptr;
219 unsigned int subblock_count;
220 unsigned int macroblock_size;
0a8a0368 221 /* macroblock contains at most 6 subblocks */
222 int16_t macroblock[6 * SUBBLOCK_SIZE];
223 const OSTask_t *const task = get_task();
d9e74a6f 224
0a8a0368 225 if (task->flags & 0x1) {
d9e74a6f 226 DebugMessage(M64MSG_WARNING, "jpeg_decode_%s: task yielding not implemented", version);
227 return;
228 }
229
230 address = rdram_read_u32(task->data_ptr);
231 macroblock_count = rdram_read_u32(task->data_ptr + 4);
232 mode = rdram_read_u32(task->data_ptr + 8);
233 qtableY_ptr = rdram_read_u32(task->data_ptr + 12);
234 qtableU_ptr = rdram_read_u32(task->data_ptr + 16);
235 qtableV_ptr = rdram_read_u32(task->data_ptr + 20);
236
237 DebugMessage(M64MSG_VERBOSE, "jpeg_decode_%s: *buffer=%x, #MB=%d, mode=%d, *Qy=%x, *Qu=%x, *Qv=%x",
0a8a0368 238 version,
239 address,
240 macroblock_count,
241 mode,
242 qtableY_ptr,
243 qtableU_ptr,
244 qtableV_ptr);
245
246 if (mode != 0 && mode != 2) {
d9e74a6f 247 DebugMessage(M64MSG_WARNING, "jpeg_decode_%s: invalid mode %d", version, mode);
248 return;
249 }
0a8a0368 250
d9e74a6f 251 subblock_count = mode + 4;
0a8a0368 252 macroblock_size = subblock_count * SUBBLOCK_SIZE;
d9e74a6f 253
0a8a0368 254 rdram_read_many_u16((uint16_t *)qtables[0], qtableY_ptr, SUBBLOCK_SIZE);
255 rdram_read_many_u16((uint16_t *)qtables[1], qtableU_ptr, SUBBLOCK_SIZE);
256 rdram_read_many_u16((uint16_t *)qtables[2], qtableV_ptr, SUBBLOCK_SIZE);
d9e74a6f 257
0a8a0368 258 for (mb = 0; mb < macroblock_count; ++mb) {
259 rdram_read_many_u16((uint16_t *)macroblock, address, macroblock_size);
2d262872 260 decode_macroblock_std(transform_luma, transform_chroma,
0a8a0368 261 macroblock, subblock_count, (const int16_t (*)[SUBBLOCK_SIZE])qtables);
d9e74a6f 262
263 if (mode == 0)
d9e74a6f 264 EmitTilesMode0(emit_line, macroblock, address);
d9e74a6f 265 else
d9e74a6f 266 EmitTilesMode2(emit_line, macroblock, address);
d9e74a6f 267
0a8a0368 268 address += 2 * macroblock_size;
d9e74a6f 269 }
d9e74a6f 270}
271
272static uint8_t clamp_u8(int16_t x)
273{
274 return (x & (0xff00)) ? ((-x) >> 15) & 0xff : x;
275}
276
277static int16_t clamp_s12(int16_t x)
278{
0a8a0368 279 if (x < -0x800)
280 x = -0x800;
281 else if (x > 0x7f0)
282 x = 0x7f0;
d9e74a6f 283 return x;
284}
285
286static uint16_t clamp_RGBA_component(int16_t x)
287{
0a8a0368 288 if (x > 0xff0)
289 x = 0xff0;
290 else if (x < 0)
291 x = 0;
d9e74a6f 292 return (x & 0xf80);
293}
294
295static uint32_t GetUYVY(int16_t y1, int16_t y2, int16_t u, int16_t v)
296{
0a8a0368 297 return (uint32_t)clamp_u8(u) << 24 |
298 (uint32_t)clamp_u8(y1) << 16 |
299 (uint32_t)clamp_u8(v) << 8 |
300 (uint32_t)clamp_u8(y2);
d9e74a6f 301}
302
303static uint16_t GetRGBA(int16_t y, int16_t u, int16_t v)
304{
305 const float fY = (float)y + 2048.0f;
306 const float fU = (float)u;
307 const float fV = (float)v;
308
0a8a0368 309 const uint16_t r = clamp_RGBA_component((int16_t)(fY + 1.4025 * fV));
310 const uint16_t g = clamp_RGBA_component((int16_t)(fY - 0.3443 * fU - 0.7144 * fV));
311 const uint16_t b = clamp_RGBA_component((int16_t)(fY + 1.7729 * fU));
d9e74a6f 312
313 return (r << 4) | (g >> 1) | (b >> 6) | 1;
314}
315
316static void EmitYUVTileLine(const int16_t *y, const int16_t *u, uint32_t address)
317{
318 uint32_t uyvy[8];
319
0a8a0368 320 const int16_t *const v = u + SUBBLOCK_SIZE;
321 const int16_t *const y2 = y + SUBBLOCK_SIZE;
d9e74a6f 322
323 uyvy[0] = GetUYVY(y[0], y[1], u[0], v[0]);
324 uyvy[1] = GetUYVY(y[2], y[3], u[1], v[1]);
325 uyvy[2] = GetUYVY(y[4], y[5], u[2], v[2]);
326 uyvy[3] = GetUYVY(y[6], y[7], u[3], v[3]);
327 uyvy[4] = GetUYVY(y2[0], y2[1], u[4], v[4]);
328 uyvy[5] = GetUYVY(y2[2], y2[3], u[5], v[5]);
329 uyvy[6] = GetUYVY(y2[4], y2[5], u[6], v[6]);
330 uyvy[7] = GetUYVY(y2[6], y2[7], u[7], v[7]);
331
332 rdram_write_many_u32(uyvy, address, 8);
333}
334
335static void EmitRGBATileLine(const int16_t *y, const int16_t *u, uint32_t address)
336{
337 uint16_t rgba[16];
338
0a8a0368 339 const int16_t *const v = u + SUBBLOCK_SIZE;
340 const int16_t *const y2 = y + SUBBLOCK_SIZE;
d9e74a6f 341
342 rgba[0] = GetRGBA(y[0], u[0], v[0]);
343 rgba[1] = GetRGBA(y[1], u[0], v[0]);
344 rgba[2] = GetRGBA(y[2], u[1], v[1]);
345 rgba[3] = GetRGBA(y[3], u[1], v[1]);
346 rgba[4] = GetRGBA(y[4], u[2], v[2]);
347 rgba[5] = GetRGBA(y[5], u[2], v[2]);
348 rgba[6] = GetRGBA(y[6], u[3], v[3]);
349 rgba[7] = GetRGBA(y[7], u[3], v[3]);
350 rgba[8] = GetRGBA(y2[0], u[4], v[4]);
351 rgba[9] = GetRGBA(y2[1], u[4], v[4]);
352 rgba[10] = GetRGBA(y2[2], u[5], v[5]);
353 rgba[11] = GetRGBA(y2[3], u[5], v[5]);
354 rgba[12] = GetRGBA(y2[4], u[6], v[6]);
355 rgba[13] = GetRGBA(y2[5], u[6], v[6]);
356 rgba[14] = GetRGBA(y2[6], u[7], v[7]);
357 rgba[15] = GetRGBA(y2[7], u[7], v[7]);
358
359 rdram_write_many_u16(rgba, address, 16);
360}
361
362static void EmitTilesMode0(const tile_line_emitter_t emit_line, const int16_t *macroblock, uint32_t address)
363{
364 unsigned int i;
365
366 unsigned int y_offset = 0;
0a8a0368 367 unsigned int u_offset = 2 * SUBBLOCK_SIZE;
d9e74a6f 368
0a8a0368 369 for (i = 0; i < 8; ++i) {
d9e74a6f 370 emit_line(&macroblock[y_offset], &macroblock[u_offset], address);
371
372 y_offset += 8;
373 u_offset += 8;
374 address += 32;
375 }
376}
377
378static void EmitTilesMode2(const tile_line_emitter_t emit_line, const int16_t *macroblock, uint32_t address)
379{
380 unsigned int i;
381
382 unsigned int y_offset = 0;
0a8a0368 383 unsigned int u_offset = 4 * SUBBLOCK_SIZE;
d9e74a6f 384
0a8a0368 385 for (i = 0; i < 8; ++i) {
d9e74a6f 386 emit_line(&macroblock[y_offset], &macroblock[u_offset], address);
387 emit_line(&macroblock[y_offset + 8], &macroblock[u_offset], address + 32);
388
0a8a0368 389 y_offset += (i == 3) ? SUBBLOCK_SIZE + 16 : 16;
d9e74a6f 390 u_offset += 8;
391 address += 64;
392 }
393}
394
2d262872 395static void decode_macroblock_ob(int16_t *macroblock, int32_t *y_dc, int32_t *u_dc, int32_t *v_dc, const int16_t *qtable)
d9e74a6f 396{
397 int sb;
398
0a8a0368 399 for (sb = 0; sb < 6; ++sb) {
d9e74a6f 400 int16_t tmp_sb[SUBBLOCK_SIZE];
401
402 /* update DC */
403 int32_t dc = (int32_t)macroblock[0];
0a8a0368 404 switch (sb) {
405 case 0:
406 case 1:
407 case 2:
408 case 3:
409 *y_dc += dc;
410 macroblock[0] = *y_dc & 0xffff;
411 break;
412 case 4:
413 *u_dc += dc;
414 macroblock[0] = *u_dc & 0xffff;
415 break;
416 case 5:
417 *v_dc += dc;
418 macroblock[0] = *v_dc & 0xffff;
419 break;
d9e74a6f 420 }
421
422 ZigZagSubBlock(tmp_sb, macroblock);
0a8a0368 423 if (qtable != NULL)
424 MultSubBlocks(tmp_sb, tmp_sb, qtable, 0);
d9e74a6f 425 TransposeSubBlock(macroblock, tmp_sb);
426 InverseDCTSubBlock(macroblock, macroblock);
0a8a0368 427
d9e74a6f 428 macroblock += SUBBLOCK_SIZE;
429 }
430}
431
0a8a0368 432static void decode_macroblock_std(const subblock_transform_t transform_luma,
433 const subblock_transform_t transform_chroma,
434 int16_t *macroblock,
435 unsigned int subblock_count,
436 const int16_t qtables[3][SUBBLOCK_SIZE])
d9e74a6f 437{
438 unsigned int sb;
439 unsigned int q = 0;
440
0a8a0368 441 for (sb = 0; sb < subblock_count; ++sb) {
d9e74a6f 442 int16_t tmp_sb[SUBBLOCK_SIZE];
443 const int isChromaSubBlock = (subblock_count - sb <= 2);
444
0a8a0368 445 if (isChromaSubBlock)
446 ++q;
d9e74a6f 447
448 MultSubBlocks(macroblock, macroblock, qtables[q], 4);
449 ZigZagSubBlock(tmp_sb, macroblock);
450 InverseDCTSubBlock(macroblock, tmp_sb);
451
0a8a0368 452 if (isChromaSubBlock) {
2d262872 453 if (transform_chroma != NULL)
454 transform_chroma(macroblock, macroblock);
0a8a0368 455 } else {
2d262872 456 if (transform_luma != NULL)
457 transform_luma(macroblock, macroblock);
d9e74a6f 458 }
459
460 macroblock += SUBBLOCK_SIZE;
461 }
462}
463
464static void TransposeSubBlock(int16_t *dst, const int16_t *src)
465{
466 ReorderSubBlock(dst, src, TRANSPOSE_TABLE);
467}
468
469static void ZigZagSubBlock(int16_t *dst, const int16_t *src)
470{
471 ReorderSubBlock(dst, src, ZIGZAG_TABLE);
472}
473
474static void ReorderSubBlock(int16_t *dst, const int16_t *src, const unsigned int *table)
475{
476 unsigned int i;
477
478 /* source and destination sublocks cannot overlap */
479 assert(abs(dst - src) > SUBBLOCK_SIZE);
480
481 for (i = 0; i < SUBBLOCK_SIZE; ++i)
d9e74a6f 482 dst[i] = src[table[i]];
d9e74a6f 483}
484
485static void MultSubBlocks(int16_t *dst, const int16_t *src1, const int16_t *src2, unsigned int shift)
486{
487 unsigned int i;
488
0a8a0368 489 for (i = 0; i < SUBBLOCK_SIZE; ++i) {
d9e74a6f 490 int32_t v = src1[i] * src2[i];
491 dst[i] = clamp_s16(v) << shift;
492 }
493}
494
495static void ScaleSubBlock(int16_t *dst, const int16_t *src, int16_t scale)
496{
497 unsigned int i;
498
0a8a0368 499 for (i = 0; i < SUBBLOCK_SIZE; ++i) {
d9e74a6f 500 int32_t v = src[i] * scale;
501 dst[i] = clamp_s16(v);
502 }
503}
504
505static void RShiftSubBlock(int16_t *dst, const int16_t *src, unsigned int shift)
506{
507 unsigned int i;
508
509 for (i = 0; i < SUBBLOCK_SIZE; ++i)
d9e74a6f 510 dst[i] = src[i] >> shift;
d9e74a6f 511}
512
513/***************************************************************************
514 * Fast 2D IDCT using separable formulation and normalization
515 * Computations use single precision floats
516 * Implementation based on Wikipedia :
517 * http://fr.wikipedia.org/wiki/Transform%C3%A9e_en_cosinus_discr%C3%A8te
518 **************************************************************************/
0a8a0368 519static void InverseDCT1D(const float *const x, float *dst, unsigned int stride)
d9e74a6f 520{
521 float e[4];
522 float f[4];
523 float x26, x1357, x15, x37, x17, x35;
524
525 x15 = IDCT_K[2] * (x[1] + x[5]);
526 x37 = IDCT_K[3] * (x[3] + x[7]);
527 x17 = IDCT_K[8] * (x[1] + x[7]);
528 x35 = IDCT_K[9] * (x[3] + x[5]);
529 x1357 = IDCT_C3 * (x[1] + x[3] + x[5] + x[7]);
530 x26 = IDCT_C6 * (x[2] + x[6]);
531
532 f[0] = x[0] + x[4];
533 f[1] = x[0] - x[4];
0a8a0368 534 f[2] = x26 + IDCT_K[0] * x[2];
535 f[3] = x26 + IDCT_K[1] * x[6];
536
537 e[0] = x1357 + x15 + IDCT_K[4] * x[1] + x17;
538 e[1] = x1357 + x37 + IDCT_K[6] * x[3] + x35;
539 e[2] = x1357 + x15 + IDCT_K[5] * x[5] + x35;
540 e[3] = x1357 + x37 + IDCT_K[7] * x[7] + x17;
541
542 *dst = f[0] + f[2] + e[0];
543 dst += stride;
544 *dst = f[1] + f[3] + e[1];
545 dst += stride;
546 *dst = f[1] - f[3] + e[2];
547 dst += stride;
548 *dst = f[0] - f[2] + e[3];
549 dst += stride;
550 *dst = f[0] - f[2] - e[3];
551 dst += stride;
552 *dst = f[1] - f[3] - e[2];
553 dst += stride;
554 *dst = f[1] + f[3] - e[1];
555 dst += stride;
556 *dst = f[0] + f[2] - e[0];
d9e74a6f 557}
558
559static void InverseDCTSubBlock(int16_t *dst, const int16_t *src)
560{
561 float x[8];
562 float block[SUBBLOCK_SIZE];
563 unsigned int i, j;
564
565 /* idct 1d on rows (+transposition) */
0a8a0368 566 for (i = 0; i < 8; ++i) {
d9e74a6f 567 for (j = 0; j < 8; ++j)
0a8a0368 568 x[j] = (float)src[i * 8 + j];
d9e74a6f 569
570 InverseDCT1D(x, &block[i], 8);
571 }
572
573 /* idct 1d on columns (thanks to previous transposition) */
0a8a0368 574 for (i = 0; i < 8; ++i) {
575 InverseDCT1D(&block[i * 8], x, 1);
d9e74a6f 576
577 /* C4 = 1 normalization implies a division by 8 */
578 for (j = 0; j < 8; ++j)
0a8a0368 579 dst[i + j * 8] = (int16_t)x[j] >> 3;
d9e74a6f 580 }
581}
582
583static void RescaleYSubBlock(int16_t *dst, const int16_t *src)
584{
585 unsigned int i;
586
587 for (i = 0; i < SUBBLOCK_SIZE; ++i)
d9e74a6f 588 dst[i] = (((uint32_t)(clamp_s12(src[i]) + 0x800) * 0xdb0) >> 16) + 0x10;
d9e74a6f 589}
590
591static void RescaleUVSubBlock(int16_t *dst, const int16_t *src)
592{
593 unsigned int i;
594
595 for (i = 0; i < SUBBLOCK_SIZE; ++i)
d9e74a6f 596 dst[i] = (((int)clamp_s12(src[i]) * 0xe00) >> 16) + 0x80;
d9e74a6f 597}
598
599
600
601/* FIXME: assume presence of expansion pack */
602#define MEMMASK 0x7fffff
603
604static void rdram_read_many_u16(uint16_t *dst, uint32_t address, unsigned int count)
605{
0a8a0368 606 while (count != 0) {
d9e74a6f 607 uint16_t s = rsp.RDRAM[((address++)^S8) & MEMMASK];
608 s <<= 8;
609 s |= rsp.RDRAM[((address++)^S8) & MEMMASK];
610
611 *(dst++) = s;
612
613 --count;
614 }
615}
616
617static void rdram_write_many_u16(const uint16_t *src, uint32_t address, unsigned int count)
618{
0a8a0368 619 while (count != 0) {
d9e74a6f 620 rsp.RDRAM[((address++)^S8) & MEMMASK] = (uint8_t)(*src >> 8);
621 rsp.RDRAM[((address++)^S8) & MEMMASK] = (uint8_t)(*(src++) & 0xff);
622
623 --count;
624 }
625}
626
627static uint32_t rdram_read_u32(uint32_t address)
628{
0a8a0368 629 uint32_t r = rsp.RDRAM[((address++) ^ S8) & MEMMASK];
630 r <<= 8;
631 r |= rsp.RDRAM[((address++) ^ S8) & MEMMASK];
632 r <<= 8;
633 r |= rsp.RDRAM[((address++) ^ S8) & MEMMASK];
634 r <<= 8;
d9e74a6f 635 r |= rsp.RDRAM[((address++) ^ S8) & MEMMASK];
636
637 return r;
638}
639
640static void rdram_write_many_u32(const uint32_t *src, uint32_t address, unsigned int count)
641{
0a8a0368 642 while (count != 0) {
d9e74a6f 643 rsp.RDRAM[((address++)^S8) & MEMMASK] = (uint8_t)(*src >> 24);
644 rsp.RDRAM[((address++)^S8) & MEMMASK] = (uint8_t)(*src >> 16);
645 rsp.RDRAM[((address++)^S8) & MEMMASK] = (uint8_t)(*src >> 8);
646 rsp.RDRAM[((address++)^S8) & MEMMASK] = (uint8_t)(*(src++) & 0xff);
647
648 --count;
649 }
650}
651