ALL: Huge upstream synch + PerRom DelaySI & CountPerOp parameters
[mupen64plus-pandora.git] / source / mupen64plus-rsp-hle / src / jpeg.c
CommitLineData
d9e74a6f 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus-rsp-hle - jpeg.c *
3 * Mupen64Plus homepage: http://code.google.com/p/mupen64plus/ *
4 * Copyright (C) 2012 Bobby Smiles *
5 * Copyright (C) 2009 Richard Goedeken *
6 * Copyright (C) 2002 Hacktarux *
7 * *
8 * This program is free software; you can redistribute it and/or modify *
9 * it under the terms of the GNU General Public License as published by *
10 * the Free Software Foundation; either version 2 of the License, or *
11 * (at your option) any later version. *
12 * *
13 * This program is distributed in the hope that it will be useful, *
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
16 * GNU General Public License for more details. *
17 * *
18 * You should have received a copy of the GNU General Public License *
19 * along with this program; if not, write to the *
20 * Free Software Foundation, Inc., *
21 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
22 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
23
24#include <assert.h>
25#include <stdlib.h>
26#include <stdint.h>
27
28#define M64P_PLUGIN_PROTOTYPES 1
29#include "m64p_types.h"
30#include "m64p_plugin.h"
31#include "hle.h"
32
33#define SUBBLOCK_SIZE 64
34
35typedef void (*tile_line_emitter_t)(const int16_t *y, const int16_t *u, uint32_t address);
2d262872 36typedef void (*subblock_transform_t)(int16_t* dst, const int16_t* src);
d9e74a6f 37
38/* rdram operations */
39// FIXME: these functions deserve their own module
40static void rdram_read_many_u16(uint16_t *dst, uint32_t address, unsigned int count);
41static void rdram_write_many_u16(const uint16_t *src, uint32_t address, unsigned int count);
42static uint32_t rdram_read_u32(uint32_t address);
43static void rdram_write_many_u32(const uint32_t *src, uint32_t address, unsigned int count);
44
45/* standard jpeg ucode decoder */
2d262872 46static void jpeg_decode_std(const char * const version,
47 const subblock_transform_t transform_luma,
48 const subblock_transform_t transform_chroma,
49 const tile_line_emitter_t emit_line);
d9e74a6f 50
51/* helper functions */
52static uint8_t clamp_u8(int16_t x);
53static int16_t clamp_s12(int16_t x);
54static int16_t clamp_s16(int32_t x);
55static uint16_t clamp_RGBA_component(int16_t x);
56
57/* pixel conversion & foratting */
58static uint32_t GetUYVY(int16_t y1, int16_t y2, int16_t u, int16_t v);
59static uint16_t GetRGBA(int16_t y, int16_t u, int16_t v);
60
61/* tile line emitters */
62static void EmitYUVTileLine(const int16_t *y, const int16_t *u, uint32_t address);
63static void EmitRGBATileLine(const int16_t *y, const int16_t *u, uint32_t address);
64
65/* macroblocks operations */
2d262872 66static void decode_macroblock_ob(int16_t *macroblock, int32_t *y_dc, int32_t *u_dc, int32_t *v_dc, const int16_t *qtable);
67static void decode_macroblock_std(
68 const subblock_transform_t transform_luma,
69 const subblock_transform_t transform_chroma,
70 int16_t *macroblock, unsigned int subblock_count, const int16_t qtables[3][SUBBLOCK_SIZE]);
d9e74a6f 71static void EmitTilesMode0(const tile_line_emitter_t emit_line, const int16_t *macroblock, uint32_t address);
72static void EmitTilesMode2(const tile_line_emitter_t emit_line, const int16_t *macroblock, uint32_t address);
73
74/* subblocks operations */
75static void TransposeSubBlock(int16_t *dst, const int16_t *src);
76static void ZigZagSubBlock(int16_t *dst, const int16_t *src);
77static void ReorderSubBlock(int16_t *dst, const int16_t *src, const unsigned int *table);
78static void MultSubBlocks(int16_t *dst, const int16_t *src1, const int16_t *src2, unsigned int shift);
79static void ScaleSubBlock(int16_t *dst, const int16_t *src, int16_t scale);
80static void RShiftSubBlock(int16_t *dst, const int16_t *src, unsigned int shift);
81static void InverseDCT1D(const float * const x, float *dst, unsigned int stride);
82static void InverseDCTSubBlock(int16_t *dst, const int16_t *src);
83static void RescaleYSubBlock(int16_t *dst, const int16_t *src);
84static void RescaleUVSubBlock(int16_t *dst, const int16_t *src);
85
86/* transposed dequantization table */
87static const int16_t DEFAULT_QTABLE[SUBBLOCK_SIZE] =
88{
89 16, 12, 14, 14, 18, 24, 49, 72,
90 11, 12, 13, 17, 22, 35, 64, 92,
91 10, 14, 16, 22, 37, 55, 78, 95,
92 16, 19, 24, 29, 56, 64, 87, 98,
93 24, 26, 40, 51, 68, 81, 103, 112,
94 40, 58, 57, 87, 109, 104, 121, 100,
95 51, 60, 69, 80, 103, 113, 120, 103,
96 61, 55, 56, 62, 77, 92, 101, 99
97};
98
99/* zig-zag indices */
100static const unsigned int ZIGZAG_TABLE[SUBBLOCK_SIZE] =
101{
102 0, 1, 5, 6, 14, 15, 27, 28,
103 2, 4, 7, 13, 16, 26, 29, 42,
104 3, 8, 12, 17, 25, 30, 41, 43,
105 9, 11, 18, 24, 31, 40, 44, 53,
106 10, 19, 23, 32, 39, 45, 52, 54,
107 20, 22, 33, 38, 46, 51, 55, 60,
108 21, 34, 37, 47, 50, 56, 59, 61,
109 35, 36, 48, 49, 57, 58, 62, 63
110};
111
112/* transposition indices */
113static const unsigned int TRANSPOSE_TABLE[SUBBLOCK_SIZE] =
114{
115 0, 8, 16, 24, 32, 40, 48, 56,
116 1, 9, 17, 25, 33, 41, 49, 57,
117 2, 10, 18, 26, 34, 42, 50, 58,
118 3, 11, 19, 27, 35, 43, 51, 59,
119 4, 12, 20, 28, 36, 44, 52, 60,
120 5, 13, 21, 29, 37, 45, 53, 61,
121 6, 14, 22, 30, 38, 46, 54, 62,
122 7, 15, 23, 31, 39, 47, 55, 63
123};
124
125
126
127/* IDCT related constants
128 * Cn = alpha * cos(n * PI / 16) (alpha is chosen such as C4 = 1) */
129static const float IDCT_C3 = 1.175875602f;
130static const float IDCT_C6 = 0.541196100f;
131static const float IDCT_K[10] =
132{
133 0.765366865f, /* C2-C6 */
134 -1.847759065f, /* -C2-C6 */
135 -0.390180644f, /* C5-C3 */
136 -1.961570561f, /* -C5-C3 */
137 1.501321110f, /* C1+C3-C5-C7 */
138 2.053119869f, /* C1+C3-C5+C7 */
139 3.072711027f, /* C1+C3+C5-C7 */
140 0.298631336f, /* -C1+C3+C5-C7 */
141 -0.899976223f, /* C7-C3 */
142 -2.562915448f /* -C1-C3 */
143};
144
145
146/* global functions */
147
148/***************************************************************************
149 * JPEG decoding ucode found in Japanese exclusive version of Pokemon Stadium.
150 **************************************************************************/
151void jpeg_decode_PS0()
152{
2d262872 153 jpeg_decode_std("PS0", RescaleYSubBlock, RescaleUVSubBlock, EmitYUVTileLine);
d9e74a6f 154}
155
156/***************************************************************************
157 * JPEG decoding ucode found in Ocarina of Time, Pokemon Stadium 1 and
158 * Pokemon Stadium 2.
159 **************************************************************************/
160void jpeg_decode_PS()
161{
2d262872 162 jpeg_decode_std("PS", NULL, NULL, EmitRGBATileLine);
d9e74a6f 163}
164
165/***************************************************************************
166 * JPEG decoding ucode found in Ogre Battle and Bottom of the 9th.
167 **************************************************************************/
168void jpeg_decode_OB()
169{
170 int16_t qtable[SUBBLOCK_SIZE];
171 unsigned int mb;
172
173 int32_t y_dc = 0;
174 int32_t u_dc = 0;
175 int32_t v_dc = 0;
176
177 const OSTask_t * const task = get_task();
178
179 uint32_t address = task->data_ptr;
180 const unsigned int macroblock_count = task->data_size;
181 const int qscale = task->yield_data_size;
182
183 DebugMessage(M64MSG_VERBOSE, "jpeg_decode_OB: *buffer=%x, #MB=%d, qscale=%d",
184 address,
185 macroblock_count,
186 qscale);
187
188 if (qscale != 0)
189 {
190 if (qscale > 0)
191 {
192 ScaleSubBlock(qtable, DEFAULT_QTABLE, qscale);
193 }
194 else
195 {
196 RShiftSubBlock(qtable, DEFAULT_QTABLE, -qscale);
197 }
198 }
199
200 for (mb = 0; mb < macroblock_count; ++mb)
201 {
202 int16_t macroblock[6*SUBBLOCK_SIZE];
203
204 rdram_read_many_u16((uint16_t*)macroblock, address, 6*SUBBLOCK_SIZE);
2d262872 205 decode_macroblock_ob(macroblock, &y_dc, &u_dc, &v_dc, (qscale != 0) ? qtable : NULL);
d9e74a6f 206 EmitTilesMode2(EmitYUVTileLine, macroblock, address);
207
208 address += (2*6*SUBBLOCK_SIZE);
209 }
210}
211
212
213/* local functions */
2d262872 214static void jpeg_decode_std(const char * const version,
215 const subblock_transform_t transform_luma,
216 const subblock_transform_t transform_chroma,
217 const tile_line_emitter_t emit_line)
d9e74a6f 218{
219 int16_t qtables[3][SUBBLOCK_SIZE];
220 unsigned int mb;
221 uint32_t address;
222 uint32_t macroblock_count;
223 uint32_t mode;
224 uint32_t qtableY_ptr;
225 uint32_t qtableU_ptr;
226 uint32_t qtableV_ptr;
227 unsigned int subblock_count;
228 unsigned int macroblock_size;
2d262872 229 int16_t macroblock[6*SUBBLOCK_SIZE]; /* macroblock contains at most 6 subblobcks */
d9e74a6f 230 const OSTask_t * const task = get_task();
231
232 if (task->flags & 0x1)
233 {
234 DebugMessage(M64MSG_WARNING, "jpeg_decode_%s: task yielding not implemented", version);
235 return;
236 }
237
238 address = rdram_read_u32(task->data_ptr);
239 macroblock_count = rdram_read_u32(task->data_ptr + 4);
240 mode = rdram_read_u32(task->data_ptr + 8);
241 qtableY_ptr = rdram_read_u32(task->data_ptr + 12);
242 qtableU_ptr = rdram_read_u32(task->data_ptr + 16);
243 qtableV_ptr = rdram_read_u32(task->data_ptr + 20);
244
245 DebugMessage(M64MSG_VERBOSE, "jpeg_decode_%s: *buffer=%x, #MB=%d, mode=%d, *Qy=%x, *Qu=%x, *Qv=%x",
246 version,
247 address,
248 macroblock_count,
249 mode,
250 qtableY_ptr,
251 qtableU_ptr,
252 qtableV_ptr);
253
254 if (mode != 0 && mode != 2)
255 {
256 DebugMessage(M64MSG_WARNING, "jpeg_decode_%s: invalid mode %d", version, mode);
257 return;
258 }
259
260 subblock_count = mode + 4;
2d262872 261 macroblock_size = subblock_count*SUBBLOCK_SIZE;
d9e74a6f 262
263 rdram_read_many_u16((uint16_t*)qtables[0], qtableY_ptr, SUBBLOCK_SIZE);
264 rdram_read_many_u16((uint16_t*)qtables[1], qtableU_ptr, SUBBLOCK_SIZE);
265 rdram_read_many_u16((uint16_t*)qtables[2], qtableV_ptr, SUBBLOCK_SIZE);
266
d9e74a6f 267 for (mb = 0; mb < macroblock_count; ++mb)
268 {
2d262872 269 rdram_read_many_u16((uint16_t*)macroblock, address, macroblock_size);
270 decode_macroblock_std(transform_luma, transform_chroma,
271 macroblock, subblock_count, (const int16_t (*)[SUBBLOCK_SIZE])qtables);
d9e74a6f 272
273 if (mode == 0)
274 {
275 EmitTilesMode0(emit_line, macroblock, address);
276 }
277 else
278 {
279 EmitTilesMode2(emit_line, macroblock, address);
280 }
281
2d262872 282 address += 2*macroblock_size;
d9e74a6f 283 }
d9e74a6f 284}
285
286static uint8_t clamp_u8(int16_t x)
287{
288 return (x & (0xff00)) ? ((-x) >> 15) & 0xff : x;
289}
290
291static int16_t clamp_s12(int16_t x)
292{
293 if (x < -0x800) { x = -0x800; } else if (x > 0x7f0) { x = 0x7f0; }
294 return x;
295}
296
297static int16_t clamp_s16(int32_t x)
298{
299 if (x > 32767) { x = 32767; } else if (x < -32768) { x = -32768; }
300 return x;
301}
302
303static uint16_t clamp_RGBA_component(int16_t x)
304{
305 if (x > 0xff0) { x = 0xff0; } else if (x < 0) { x = 0; }
306 return (x & 0xf80);
307}
308
309static uint32_t GetUYVY(int16_t y1, int16_t y2, int16_t u, int16_t v)
310{
311 return (uint32_t)clamp_u8(u) << 24
312 | (uint32_t)clamp_u8(y1) << 16
313 | (uint32_t)clamp_u8(v) << 8
314 | (uint32_t)clamp_u8(y2);
315}
316
317static uint16_t GetRGBA(int16_t y, int16_t u, int16_t v)
318{
319 const float fY = (float)y + 2048.0f;
320 const float fU = (float)u;
321 const float fV = (float)v;
322
323 const uint16_t r = clamp_RGBA_component((int16_t)(fY + 1.4025*fV));
324 const uint16_t g = clamp_RGBA_component((int16_t)(fY - 0.3443*fU - 0.7144*fV));
325 const uint16_t b = clamp_RGBA_component((int16_t)(fY + 1.7729*fU ));
326
327 return (r << 4) | (g >> 1) | (b >> 6) | 1;
328}
329
330static void EmitYUVTileLine(const int16_t *y, const int16_t *u, uint32_t address)
331{
332 uint32_t uyvy[8];
333
334 const int16_t * const v = u + SUBBLOCK_SIZE;
335 const int16_t * const y2 = y + SUBBLOCK_SIZE;
336
337 uyvy[0] = GetUYVY(y[0], y[1], u[0], v[0]);
338 uyvy[1] = GetUYVY(y[2], y[3], u[1], v[1]);
339 uyvy[2] = GetUYVY(y[4], y[5], u[2], v[2]);
340 uyvy[3] = GetUYVY(y[6], y[7], u[3], v[3]);
341 uyvy[4] = GetUYVY(y2[0], y2[1], u[4], v[4]);
342 uyvy[5] = GetUYVY(y2[2], y2[3], u[5], v[5]);
343 uyvy[6] = GetUYVY(y2[4], y2[5], u[6], v[6]);
344 uyvy[7] = GetUYVY(y2[6], y2[7], u[7], v[7]);
345
346 rdram_write_many_u32(uyvy, address, 8);
347}
348
349static void EmitRGBATileLine(const int16_t *y, const int16_t *u, uint32_t address)
350{
351 uint16_t rgba[16];
352
353 const int16_t * const v = u + SUBBLOCK_SIZE;
354 const int16_t * const y2 = y + SUBBLOCK_SIZE;
355
356 rgba[0] = GetRGBA(y[0], u[0], v[0]);
357 rgba[1] = GetRGBA(y[1], u[0], v[0]);
358 rgba[2] = GetRGBA(y[2], u[1], v[1]);
359 rgba[3] = GetRGBA(y[3], u[1], v[1]);
360 rgba[4] = GetRGBA(y[4], u[2], v[2]);
361 rgba[5] = GetRGBA(y[5], u[2], v[2]);
362 rgba[6] = GetRGBA(y[6], u[3], v[3]);
363 rgba[7] = GetRGBA(y[7], u[3], v[3]);
364 rgba[8] = GetRGBA(y2[0], u[4], v[4]);
365 rgba[9] = GetRGBA(y2[1], u[4], v[4]);
366 rgba[10] = GetRGBA(y2[2], u[5], v[5]);
367 rgba[11] = GetRGBA(y2[3], u[5], v[5]);
368 rgba[12] = GetRGBA(y2[4], u[6], v[6]);
369 rgba[13] = GetRGBA(y2[5], u[6], v[6]);
370 rgba[14] = GetRGBA(y2[6], u[7], v[7]);
371 rgba[15] = GetRGBA(y2[7], u[7], v[7]);
372
373 rdram_write_many_u16(rgba, address, 16);
374}
375
376static void EmitTilesMode0(const tile_line_emitter_t emit_line, const int16_t *macroblock, uint32_t address)
377{
378 unsigned int i;
379
380 unsigned int y_offset = 0;
381 unsigned int u_offset = 2*SUBBLOCK_SIZE;
382
383 for (i = 0; i < 8; ++i)
384 {
385 emit_line(&macroblock[y_offset], &macroblock[u_offset], address);
386
387 y_offset += 8;
388 u_offset += 8;
389 address += 32;
390 }
391}
392
393static void EmitTilesMode2(const tile_line_emitter_t emit_line, const int16_t *macroblock, uint32_t address)
394{
395 unsigned int i;
396
397 unsigned int y_offset = 0;
398 unsigned int u_offset = 4*SUBBLOCK_SIZE;
399
400 for (i = 0; i < 8; ++i)
401 {
402 emit_line(&macroblock[y_offset], &macroblock[u_offset], address);
403 emit_line(&macroblock[y_offset + 8], &macroblock[u_offset], address + 32);
404
405 y_offset += (i == 3) ? SUBBLOCK_SIZE+16 : 16;
406 u_offset += 8;
407 address += 64;
408 }
409}
410
2d262872 411static void decode_macroblock_ob(int16_t *macroblock, int32_t *y_dc, int32_t *u_dc, int32_t *v_dc, const int16_t *qtable)
d9e74a6f 412{
413 int sb;
414
415 for (sb = 0; sb < 6; ++sb)
416 {
417 int16_t tmp_sb[SUBBLOCK_SIZE];
418
419 /* update DC */
420 int32_t dc = (int32_t)macroblock[0];
421 switch(sb)
422 {
423 case 0: case 1: case 2: case 3:
424 *y_dc += dc; macroblock[0] = *y_dc & 0xffff; break;
425 case 4: *u_dc += dc; macroblock[0] = *u_dc & 0xffff; break;
426 case 5: *v_dc += dc; macroblock[0] = *v_dc & 0xffff; break;
427 }
428
429 ZigZagSubBlock(tmp_sb, macroblock);
430 if (qtable != NULL) { MultSubBlocks(tmp_sb, tmp_sb, qtable, 0); }
431 TransposeSubBlock(macroblock, tmp_sb);
432 InverseDCTSubBlock(macroblock, macroblock);
433
434 macroblock += SUBBLOCK_SIZE;
435 }
436}
437
2d262872 438static void decode_macroblock_std(
439 const subblock_transform_t transform_luma,
440 const subblock_transform_t transform_chroma,
441 int16_t *macroblock, unsigned int subblock_count, const int16_t qtables[3][SUBBLOCK_SIZE])
d9e74a6f 442{
443 unsigned int sb;
444 unsigned int q = 0;
445
446 for (sb = 0; sb < subblock_count; ++sb)
447 {
448 int16_t tmp_sb[SUBBLOCK_SIZE];
449 const int isChromaSubBlock = (subblock_count - sb <= 2);
450
451 if (isChromaSubBlock) { ++q; }
452
453 MultSubBlocks(macroblock, macroblock, qtables[q], 4);
454 ZigZagSubBlock(tmp_sb, macroblock);
455 InverseDCTSubBlock(macroblock, tmp_sb);
456
457 if (isChromaSubBlock)
458 {
2d262872 459 if (transform_chroma != NULL)
460 transform_chroma(macroblock, macroblock);
d9e74a6f 461 }
462 else
463 {
2d262872 464 if (transform_luma != NULL)
465 transform_luma(macroblock, macroblock);
d9e74a6f 466 }
467
468 macroblock += SUBBLOCK_SIZE;
469 }
470}
471
472static void TransposeSubBlock(int16_t *dst, const int16_t *src)
473{
474 ReorderSubBlock(dst, src, TRANSPOSE_TABLE);
475}
476
477static void ZigZagSubBlock(int16_t *dst, const int16_t *src)
478{
479 ReorderSubBlock(dst, src, ZIGZAG_TABLE);
480}
481
482static void ReorderSubBlock(int16_t *dst, const int16_t *src, const unsigned int *table)
483{
484 unsigned int i;
485
486 /* source and destination sublocks cannot overlap */
487 assert(abs(dst - src) > SUBBLOCK_SIZE);
488
489 for (i = 0; i < SUBBLOCK_SIZE; ++i)
490 {
491 dst[i] = src[table[i]];
492 }
493}
494
495static void MultSubBlocks(int16_t *dst, const int16_t *src1, const int16_t *src2, unsigned int shift)
496{
497 unsigned int i;
498
499 for (i = 0; i < SUBBLOCK_SIZE; ++i)
500 {
501 int32_t v = src1[i] * src2[i];
502 dst[i] = clamp_s16(v) << shift;
503 }
504}
505
506static void ScaleSubBlock(int16_t *dst, const int16_t *src, int16_t scale)
507{
508 unsigned int i;
509
510 for (i = 0; i < SUBBLOCK_SIZE; ++i)
511 {
512 int32_t v = src[i] * scale;
513 dst[i] = clamp_s16(v);
514 }
515}
516
517static void RShiftSubBlock(int16_t *dst, const int16_t *src, unsigned int shift)
518{
519 unsigned int i;
520
521 for (i = 0; i < SUBBLOCK_SIZE; ++i)
522 {
523 dst[i] = src[i] >> shift;
524 }
525}
526
527/***************************************************************************
528 * Fast 2D IDCT using separable formulation and normalization
529 * Computations use single precision floats
530 * Implementation based on Wikipedia :
531 * http://fr.wikipedia.org/wiki/Transform%C3%A9e_en_cosinus_discr%C3%A8te
532 **************************************************************************/
533static void InverseDCT1D(const float * const x, float *dst, unsigned int stride)
534{
535 float e[4];
536 float f[4];
537 float x26, x1357, x15, x37, x17, x35;
538
539 x15 = IDCT_K[2] * (x[1] + x[5]);
540 x37 = IDCT_K[3] * (x[3] + x[7]);
541 x17 = IDCT_K[8] * (x[1] + x[7]);
542 x35 = IDCT_K[9] * (x[3] + x[5]);
543 x1357 = IDCT_C3 * (x[1] + x[3] + x[5] + x[7]);
544 x26 = IDCT_C6 * (x[2] + x[6]);
545
546 f[0] = x[0] + x[4];
547 f[1] = x[0] - x[4];
548 f[2] = x26 + IDCT_K[0]*x[2];
549 f[3] = x26 + IDCT_K[1]*x[6];
550
551 e[0] = x1357 + x15 + IDCT_K[4]*x[1] + x17;
552 e[1] = x1357 + x37 + IDCT_K[6]*x[3] + x35;
553 e[2] = x1357 + x15 + IDCT_K[5]*x[5] + x35;
554 e[3] = x1357 + x37 + IDCT_K[7]*x[7] + x17;
555
556 *dst = f[0] + f[2] + e[0]; dst += stride;
557 *dst = f[1] + f[3] + e[1]; dst += stride;
558 *dst = f[1] - f[3] + e[2]; dst += stride;
559 *dst = f[0] - f[2] + e[3]; dst += stride;
560 *dst = f[0] - f[2] - e[3]; dst += stride;
561 *dst = f[1] - f[3] - e[2]; dst += stride;
562 *dst = f[1] + f[3] - e[1]; dst += stride;
563 *dst = f[0] + f[2] - e[0]; dst += stride;
564}
565
566static void InverseDCTSubBlock(int16_t *dst, const int16_t *src)
567{
568 float x[8];
569 float block[SUBBLOCK_SIZE];
570 unsigned int i, j;
571
572 /* idct 1d on rows (+transposition) */
573 for (i = 0; i < 8; ++i)
574 {
575 for (j = 0; j < 8; ++j)
576 {
577 x[j] = (float)src[i*8+j];
578 }
579
580 InverseDCT1D(x, &block[i], 8);
581 }
582
583 /* idct 1d on columns (thanks to previous transposition) */
584 for (i = 0; i < 8; ++i)
585 {
586 InverseDCT1D(&block[i*8], x, 1);
587
588 /* C4 = 1 normalization implies a division by 8 */
589 for (j = 0; j < 8; ++j)
590 {
591 dst[i+j*8] = (int16_t)x[j] >> 3;
592 }
593 }
594}
595
596static void RescaleYSubBlock(int16_t *dst, const int16_t *src)
597{
598 unsigned int i;
599
600 for (i = 0; i < SUBBLOCK_SIZE; ++i)
601 {
602 dst[i] = (((uint32_t)(clamp_s12(src[i]) + 0x800) * 0xdb0) >> 16) + 0x10;
603 }
604}
605
606static void RescaleUVSubBlock(int16_t *dst, const int16_t *src)
607{
608 unsigned int i;
609
610 for (i = 0; i < SUBBLOCK_SIZE; ++i)
611 {
612 dst[i] = (((int)clamp_s12(src[i]) * 0xe00) >> 16) + 0x80;
613 }
614}
615
616
617
618/* FIXME: assume presence of expansion pack */
619#define MEMMASK 0x7fffff
620
621static void rdram_read_many_u16(uint16_t *dst, uint32_t address, unsigned int count)
622{
623 while (count != 0)
624 {
625 uint16_t s = rsp.RDRAM[((address++)^S8) & MEMMASK];
626 s <<= 8;
627 s |= rsp.RDRAM[((address++)^S8) & MEMMASK];
628
629 *(dst++) = s;
630
631 --count;
632 }
633}
634
635static void rdram_write_many_u16(const uint16_t *src, uint32_t address, unsigned int count)
636{
637 while (count != 0)
638 {
639 rsp.RDRAM[((address++)^S8) & MEMMASK] = (uint8_t)(*src >> 8);
640 rsp.RDRAM[((address++)^S8) & MEMMASK] = (uint8_t)(*(src++) & 0xff);
641
642 --count;
643 }
644}
645
646static uint32_t rdram_read_u32(uint32_t address)
647{
648 uint32_t r = rsp.RDRAM[((address++) ^ S8) & MEMMASK]; r <<= 8;
649 r |= rsp.RDRAM[((address++) ^ S8) & MEMMASK]; r <<= 8;
650 r |= rsp.RDRAM[((address++) ^ S8) & MEMMASK]; r <<= 8;
651 r |= rsp.RDRAM[((address++) ^ S8) & MEMMASK];
652
653 return r;
654}
655
656static void rdram_write_many_u32(const uint32_t *src, uint32_t address, unsigned int count)
657{
658 while (count != 0)
659 {
660 rsp.RDRAM[((address++)^S8) & MEMMASK] = (uint8_t)(*src >> 24);
661 rsp.RDRAM[((address++)^S8) & MEMMASK] = (uint8_t)(*src >> 16);
662 rsp.RDRAM[((address++)^S8) & MEMMASK] = (uint8_t)(*src >> 8);
663 rsp.RDRAM[((address++)^S8) & MEMMASK] = (uint8_t)(*(src++) & 0xff);
664
665 --count;
666 }
667}
668