RSP HLE plugin. Compile and run on the OpenPandora
[mupen64plus-pandora.git] / source / mupen64plus-rsp-hle / src / jpeg.c
CommitLineData
d9e74a6f 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus-rsp-hle - jpeg.c *
3 * Mupen64Plus homepage: http://code.google.com/p/mupen64plus/ *
4 * Copyright (C) 2012 Bobby Smiles *
5 * Copyright (C) 2009 Richard Goedeken *
6 * Copyright (C) 2002 Hacktarux *
7 * *
8 * This program is free software; you can redistribute it and/or modify *
9 * it under the terms of the GNU General Public License as published by *
10 * the Free Software Foundation; either version 2 of the License, or *
11 * (at your option) any later version. *
12 * *
13 * This program is distributed in the hope that it will be useful, *
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
16 * GNU General Public License for more details. *
17 * *
18 * You should have received a copy of the GNU General Public License *
19 * along with this program; if not, write to the *
20 * Free Software Foundation, Inc., *
21 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
22 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
23
24#include <assert.h>
25#include <stdlib.h>
26#include <stdint.h>
27
28#define M64P_PLUGIN_PROTOTYPES 1
29#include "m64p_types.h"
30#include "m64p_plugin.h"
31#include "hle.h"
32
33#define SUBBLOCK_SIZE 64
34
35typedef void (*tile_line_emitter_t)(const int16_t *y, const int16_t *u, uint32_t address);
36typedef void (*std_macroblock_decoder_t)(int16_t *macroblock, unsigned int subblock_count, const int16_t qtables[3][SUBBLOCK_SIZE]);
37
38/* rdram operations */
39// FIXME: these functions deserve their own module
40static void rdram_read_many_u16(uint16_t *dst, uint32_t address, unsigned int count);
41static void rdram_write_many_u16(const uint16_t *src, uint32_t address, unsigned int count);
42static uint32_t rdram_read_u32(uint32_t address);
43static void rdram_write_many_u32(const uint32_t *src, uint32_t address, unsigned int count);
44
45/* standard jpeg ucode decoder */
46static void jpeg_decode_std(const char * const version, const std_macroblock_decoder_t decode_mb, const tile_line_emitter_t emit_line);
47
48/* helper functions */
49static uint8_t clamp_u8(int16_t x);
50static int16_t clamp_s12(int16_t x);
51static int16_t clamp_s16(int32_t x);
52static uint16_t clamp_RGBA_component(int16_t x);
53
54/* pixel conversion & foratting */
55static uint32_t GetUYVY(int16_t y1, int16_t y2, int16_t u, int16_t v);
56static uint16_t GetRGBA(int16_t y, int16_t u, int16_t v);
57
58/* tile line emitters */
59static void EmitYUVTileLine(const int16_t *y, const int16_t *u, uint32_t address);
60static void EmitRGBATileLine(const int16_t *y, const int16_t *u, uint32_t address);
61
62/* macroblocks operations */
63static void DecodeMacroblock1(int16_t *macroblock, int32_t *y_dc, int32_t *u_dc, int32_t *v_dc, const int16_t *qtable);
64static void DecodeMacroblock2(int16_t *macroblock, unsigned int subblock_count, const int16_t qtables[3][SUBBLOCK_SIZE]);
65static void DecodeMacroblock3(int16_t *macroblock, unsigned int subblock_count, const int16_t qtables[3][SUBBLOCK_SIZE]);
66static void EmitTilesMode0(const tile_line_emitter_t emit_line, const int16_t *macroblock, uint32_t address);
67static void EmitTilesMode2(const tile_line_emitter_t emit_line, const int16_t *macroblock, uint32_t address);
68
69/* subblocks operations */
70static void TransposeSubBlock(int16_t *dst, const int16_t *src);
71static void ZigZagSubBlock(int16_t *dst, const int16_t *src);
72static void ReorderSubBlock(int16_t *dst, const int16_t *src, const unsigned int *table);
73static void MultSubBlocks(int16_t *dst, const int16_t *src1, const int16_t *src2, unsigned int shift);
74static void ScaleSubBlock(int16_t *dst, const int16_t *src, int16_t scale);
75static void RShiftSubBlock(int16_t *dst, const int16_t *src, unsigned int shift);
76static void InverseDCT1D(const float * const x, float *dst, unsigned int stride);
77static void InverseDCTSubBlock(int16_t *dst, const int16_t *src);
78static void RescaleYSubBlock(int16_t *dst, const int16_t *src);
79static void RescaleUVSubBlock(int16_t *dst, const int16_t *src);
80
81/* transposed dequantization table */
82static const int16_t DEFAULT_QTABLE[SUBBLOCK_SIZE] =
83{
84 16, 12, 14, 14, 18, 24, 49, 72,
85 11, 12, 13, 17, 22, 35, 64, 92,
86 10, 14, 16, 22, 37, 55, 78, 95,
87 16, 19, 24, 29, 56, 64, 87, 98,
88 24, 26, 40, 51, 68, 81, 103, 112,
89 40, 58, 57, 87, 109, 104, 121, 100,
90 51, 60, 69, 80, 103, 113, 120, 103,
91 61, 55, 56, 62, 77, 92, 101, 99
92};
93
94/* zig-zag indices */
95static const unsigned int ZIGZAG_TABLE[SUBBLOCK_SIZE] =
96{
97 0, 1, 5, 6, 14, 15, 27, 28,
98 2, 4, 7, 13, 16, 26, 29, 42,
99 3, 8, 12, 17, 25, 30, 41, 43,
100 9, 11, 18, 24, 31, 40, 44, 53,
101 10, 19, 23, 32, 39, 45, 52, 54,
102 20, 22, 33, 38, 46, 51, 55, 60,
103 21, 34, 37, 47, 50, 56, 59, 61,
104 35, 36, 48, 49, 57, 58, 62, 63
105};
106
107/* transposition indices */
108static const unsigned int TRANSPOSE_TABLE[SUBBLOCK_SIZE] =
109{
110 0, 8, 16, 24, 32, 40, 48, 56,
111 1, 9, 17, 25, 33, 41, 49, 57,
112 2, 10, 18, 26, 34, 42, 50, 58,
113 3, 11, 19, 27, 35, 43, 51, 59,
114 4, 12, 20, 28, 36, 44, 52, 60,
115 5, 13, 21, 29, 37, 45, 53, 61,
116 6, 14, 22, 30, 38, 46, 54, 62,
117 7, 15, 23, 31, 39, 47, 55, 63
118};
119
120
121
122/* IDCT related constants
123 * Cn = alpha * cos(n * PI / 16) (alpha is chosen such as C4 = 1) */
124static const float IDCT_C3 = 1.175875602f;
125static const float IDCT_C6 = 0.541196100f;
126static const float IDCT_K[10] =
127{
128 0.765366865f, /* C2-C6 */
129 -1.847759065f, /* -C2-C6 */
130 -0.390180644f, /* C5-C3 */
131 -1.961570561f, /* -C5-C3 */
132 1.501321110f, /* C1+C3-C5-C7 */
133 2.053119869f, /* C1+C3-C5+C7 */
134 3.072711027f, /* C1+C3+C5-C7 */
135 0.298631336f, /* -C1+C3+C5-C7 */
136 -0.899976223f, /* C7-C3 */
137 -2.562915448f /* -C1-C3 */
138};
139
140
141/* global functions */
142
143/***************************************************************************
144 * JPEG decoding ucode found in Japanese exclusive version of Pokemon Stadium.
145 **************************************************************************/
146void jpeg_decode_PS0()
147{
148 jpeg_decode_std("PS0", DecodeMacroblock3, EmitYUVTileLine);
149}
150
151/***************************************************************************
152 * JPEG decoding ucode found in Ocarina of Time, Pokemon Stadium 1 and
153 * Pokemon Stadium 2.
154 **************************************************************************/
155void jpeg_decode_PS()
156{
157 jpeg_decode_std("PS", DecodeMacroblock2, EmitRGBATileLine);
158}
159
160/***************************************************************************
161 * JPEG decoding ucode found in Ogre Battle and Bottom of the 9th.
162 **************************************************************************/
163void jpeg_decode_OB()
164{
165 int16_t qtable[SUBBLOCK_SIZE];
166 unsigned int mb;
167
168 int32_t y_dc = 0;
169 int32_t u_dc = 0;
170 int32_t v_dc = 0;
171
172 const OSTask_t * const task = get_task();
173
174 uint32_t address = task->data_ptr;
175 const unsigned int macroblock_count = task->data_size;
176 const int qscale = task->yield_data_size;
177
178 DebugMessage(M64MSG_VERBOSE, "jpeg_decode_OB: *buffer=%x, #MB=%d, qscale=%d",
179 address,
180 macroblock_count,
181 qscale);
182
183 if (qscale != 0)
184 {
185 if (qscale > 0)
186 {
187 ScaleSubBlock(qtable, DEFAULT_QTABLE, qscale);
188 }
189 else
190 {
191 RShiftSubBlock(qtable, DEFAULT_QTABLE, -qscale);
192 }
193 }
194
195 for (mb = 0; mb < macroblock_count; ++mb)
196 {
197 int16_t macroblock[6*SUBBLOCK_SIZE];
198
199 rdram_read_many_u16((uint16_t*)macroblock, address, 6*SUBBLOCK_SIZE);
200 DecodeMacroblock1(macroblock, &y_dc, &u_dc, &v_dc, (qscale != 0) ? qtable : NULL);
201 EmitTilesMode2(EmitYUVTileLine, macroblock, address);
202
203 address += (2*6*SUBBLOCK_SIZE);
204 }
205}
206
207
208/* local functions */
209static void jpeg_decode_std(const char * const version, const std_macroblock_decoder_t decode_mb, const tile_line_emitter_t emit_line)
210{
211 int16_t qtables[3][SUBBLOCK_SIZE];
212 unsigned int mb;
213 uint32_t address;
214 uint32_t macroblock_count;
215 uint32_t mode;
216 uint32_t qtableY_ptr;
217 uint32_t qtableU_ptr;
218 uint32_t qtableV_ptr;
219 unsigned int subblock_count;
220 unsigned int macroblock_size;
221 int16_t *macroblock;
222 const OSTask_t * const task = get_task();
223
224 if (task->flags & 0x1)
225 {
226 DebugMessage(M64MSG_WARNING, "jpeg_decode_%s: task yielding not implemented", version);
227 return;
228 }
229
230 address = rdram_read_u32(task->data_ptr);
231 macroblock_count = rdram_read_u32(task->data_ptr + 4);
232 mode = rdram_read_u32(task->data_ptr + 8);
233 qtableY_ptr = rdram_read_u32(task->data_ptr + 12);
234 qtableU_ptr = rdram_read_u32(task->data_ptr + 16);
235 qtableV_ptr = rdram_read_u32(task->data_ptr + 20);
236
237 DebugMessage(M64MSG_VERBOSE, "jpeg_decode_%s: *buffer=%x, #MB=%d, mode=%d, *Qy=%x, *Qu=%x, *Qv=%x",
238 version,
239 address,
240 macroblock_count,
241 mode,
242 qtableY_ptr,
243 qtableU_ptr,
244 qtableV_ptr);
245
246 if (mode != 0 && mode != 2)
247 {
248 DebugMessage(M64MSG_WARNING, "jpeg_decode_%s: invalid mode %d", version, mode);
249 return;
250 }
251
252 subblock_count = mode + 4;
253 macroblock_size = 2*subblock_count*SUBBLOCK_SIZE;
254
255 rdram_read_many_u16((uint16_t*)qtables[0], qtableY_ptr, SUBBLOCK_SIZE);
256 rdram_read_many_u16((uint16_t*)qtables[1], qtableU_ptr, SUBBLOCK_SIZE);
257 rdram_read_many_u16((uint16_t*)qtables[2], qtableV_ptr, SUBBLOCK_SIZE);
258
259 macroblock = malloc(sizeof(*macroblock) * macroblock_size);
260 if (!macroblock)
261 {
262 DebugMessage(M64MSG_WARNING, "jpeg_decode_%s: could not allocate macroblock", version);
263 return;
264 }
265
266 for (mb = 0; mb < macroblock_count; ++mb)
267 {
268 rdram_read_many_u16((uint16_t*)macroblock, address, macroblock_size >> 1);
269 decode_mb(macroblock, subblock_count, (const int16_t (*)[SUBBLOCK_SIZE])qtables);
270
271 if (mode == 0)
272 {
273 EmitTilesMode0(emit_line, macroblock, address);
274 }
275 else
276 {
277 EmitTilesMode2(emit_line, macroblock, address);
278 }
279
280 address += macroblock_size;
281 }
282 free(macroblock);
283}
284
285static uint8_t clamp_u8(int16_t x)
286{
287 return (x & (0xff00)) ? ((-x) >> 15) & 0xff : x;
288}
289
290static int16_t clamp_s12(int16_t x)
291{
292 if (x < -0x800) { x = -0x800; } else if (x > 0x7f0) { x = 0x7f0; }
293 return x;
294}
295
296static int16_t clamp_s16(int32_t x)
297{
298 if (x > 32767) { x = 32767; } else if (x < -32768) { x = -32768; }
299 return x;
300}
301
302static uint16_t clamp_RGBA_component(int16_t x)
303{
304 if (x > 0xff0) { x = 0xff0; } else if (x < 0) { x = 0; }
305 return (x & 0xf80);
306}
307
308static uint32_t GetUYVY(int16_t y1, int16_t y2, int16_t u, int16_t v)
309{
310 return (uint32_t)clamp_u8(u) << 24
311 | (uint32_t)clamp_u8(y1) << 16
312 | (uint32_t)clamp_u8(v) << 8
313 | (uint32_t)clamp_u8(y2);
314}
315
316static uint16_t GetRGBA(int16_t y, int16_t u, int16_t v)
317{
318 const float fY = (float)y + 2048.0f;
319 const float fU = (float)u;
320 const float fV = (float)v;
321
322 const uint16_t r = clamp_RGBA_component((int16_t)(fY + 1.4025*fV));
323 const uint16_t g = clamp_RGBA_component((int16_t)(fY - 0.3443*fU - 0.7144*fV));
324 const uint16_t b = clamp_RGBA_component((int16_t)(fY + 1.7729*fU ));
325
326 return (r << 4) | (g >> 1) | (b >> 6) | 1;
327}
328
329static void EmitYUVTileLine(const int16_t *y, const int16_t *u, uint32_t address)
330{
331 uint32_t uyvy[8];
332
333 const int16_t * const v = u + SUBBLOCK_SIZE;
334 const int16_t * const y2 = y + SUBBLOCK_SIZE;
335
336 uyvy[0] = GetUYVY(y[0], y[1], u[0], v[0]);
337 uyvy[1] = GetUYVY(y[2], y[3], u[1], v[1]);
338 uyvy[2] = GetUYVY(y[4], y[5], u[2], v[2]);
339 uyvy[3] = GetUYVY(y[6], y[7], u[3], v[3]);
340 uyvy[4] = GetUYVY(y2[0], y2[1], u[4], v[4]);
341 uyvy[5] = GetUYVY(y2[2], y2[3], u[5], v[5]);
342 uyvy[6] = GetUYVY(y2[4], y2[5], u[6], v[6]);
343 uyvy[7] = GetUYVY(y2[6], y2[7], u[7], v[7]);
344
345 rdram_write_many_u32(uyvy, address, 8);
346}
347
348static void EmitRGBATileLine(const int16_t *y, const int16_t *u, uint32_t address)
349{
350 uint16_t rgba[16];
351
352 const int16_t * const v = u + SUBBLOCK_SIZE;
353 const int16_t * const y2 = y + SUBBLOCK_SIZE;
354
355 rgba[0] = GetRGBA(y[0], u[0], v[0]);
356 rgba[1] = GetRGBA(y[1], u[0], v[0]);
357 rgba[2] = GetRGBA(y[2], u[1], v[1]);
358 rgba[3] = GetRGBA(y[3], u[1], v[1]);
359 rgba[4] = GetRGBA(y[4], u[2], v[2]);
360 rgba[5] = GetRGBA(y[5], u[2], v[2]);
361 rgba[6] = GetRGBA(y[6], u[3], v[3]);
362 rgba[7] = GetRGBA(y[7], u[3], v[3]);
363 rgba[8] = GetRGBA(y2[0], u[4], v[4]);
364 rgba[9] = GetRGBA(y2[1], u[4], v[4]);
365 rgba[10] = GetRGBA(y2[2], u[5], v[5]);
366 rgba[11] = GetRGBA(y2[3], u[5], v[5]);
367 rgba[12] = GetRGBA(y2[4], u[6], v[6]);
368 rgba[13] = GetRGBA(y2[5], u[6], v[6]);
369 rgba[14] = GetRGBA(y2[6], u[7], v[7]);
370 rgba[15] = GetRGBA(y2[7], u[7], v[7]);
371
372 rdram_write_many_u16(rgba, address, 16);
373}
374
375static void EmitTilesMode0(const tile_line_emitter_t emit_line, const int16_t *macroblock, uint32_t address)
376{
377 unsigned int i;
378
379 unsigned int y_offset = 0;
380 unsigned int u_offset = 2*SUBBLOCK_SIZE;
381
382 for (i = 0; i < 8; ++i)
383 {
384 emit_line(&macroblock[y_offset], &macroblock[u_offset], address);
385
386 y_offset += 8;
387 u_offset += 8;
388 address += 32;
389 }
390}
391
392static void EmitTilesMode2(const tile_line_emitter_t emit_line, const int16_t *macroblock, uint32_t address)
393{
394 unsigned int i;
395
396 unsigned int y_offset = 0;
397 unsigned int u_offset = 4*SUBBLOCK_SIZE;
398
399 for (i = 0; i < 8; ++i)
400 {
401 emit_line(&macroblock[y_offset], &macroblock[u_offset], address);
402 emit_line(&macroblock[y_offset + 8], &macroblock[u_offset], address + 32);
403
404 y_offset += (i == 3) ? SUBBLOCK_SIZE+16 : 16;
405 u_offset += 8;
406 address += 64;
407 }
408}
409
410static void DecodeMacroblock1(int16_t *macroblock, int32_t *y_dc, int32_t *u_dc, int32_t *v_dc, const int16_t *qtable)
411{
412 int sb;
413
414 for (sb = 0; sb < 6; ++sb)
415 {
416 int16_t tmp_sb[SUBBLOCK_SIZE];
417
418 /* update DC */
419 int32_t dc = (int32_t)macroblock[0];
420 switch(sb)
421 {
422 case 0: case 1: case 2: case 3:
423 *y_dc += dc; macroblock[0] = *y_dc & 0xffff; break;
424 case 4: *u_dc += dc; macroblock[0] = *u_dc & 0xffff; break;
425 case 5: *v_dc += dc; macroblock[0] = *v_dc & 0xffff; break;
426 }
427
428 ZigZagSubBlock(tmp_sb, macroblock);
429 if (qtable != NULL) { MultSubBlocks(tmp_sb, tmp_sb, qtable, 0); }
430 TransposeSubBlock(macroblock, tmp_sb);
431 InverseDCTSubBlock(macroblock, macroblock);
432
433 macroblock += SUBBLOCK_SIZE;
434 }
435}
436
437static void DecodeMacroblock2(int16_t *macroblock, unsigned int subblock_count, const int16_t qtables[3][SUBBLOCK_SIZE])
438{
439 unsigned int sb;
440 unsigned int q = 0;
441
442 for (sb = 0; sb < subblock_count; ++sb)
443 {
444 int16_t tmp_sb[SUBBLOCK_SIZE];
445 const int isChromaSubBlock = (subblock_count - sb <= 2);
446
447 if (isChromaSubBlock) { ++q; }
448
449 MultSubBlocks(macroblock, macroblock, qtables[q], 4);
450 ZigZagSubBlock(tmp_sb, macroblock);
451 InverseDCTSubBlock(macroblock, tmp_sb);
452
453 macroblock += SUBBLOCK_SIZE;
454 }
455
456}
457
458static void DecodeMacroblock3(int16_t *macroblock, unsigned int subblock_count, const int16_t qtables[3][SUBBLOCK_SIZE])
459{
460 unsigned int sb;
461 unsigned int q = 0;
462
463 for (sb = 0; sb < subblock_count; ++sb)
464 {
465 int16_t tmp_sb[SUBBLOCK_SIZE];
466 const int isChromaSubBlock = (subblock_count - sb <= 2);
467
468 if (isChromaSubBlock) { ++q; }
469
470 MultSubBlocks(macroblock, macroblock, qtables[q], 4);
471 ZigZagSubBlock(tmp_sb, macroblock);
472 InverseDCTSubBlock(macroblock, tmp_sb);
473
474 if (isChromaSubBlock)
475 {
476 RescaleUVSubBlock(macroblock, macroblock);
477 }
478 else
479 {
480 RescaleYSubBlock(macroblock, macroblock);
481 }
482
483 macroblock += SUBBLOCK_SIZE;
484 }
485}
486
487static void TransposeSubBlock(int16_t *dst, const int16_t *src)
488{
489 ReorderSubBlock(dst, src, TRANSPOSE_TABLE);
490}
491
492static void ZigZagSubBlock(int16_t *dst, const int16_t *src)
493{
494 ReorderSubBlock(dst, src, ZIGZAG_TABLE);
495}
496
497static void ReorderSubBlock(int16_t *dst, const int16_t *src, const unsigned int *table)
498{
499 unsigned int i;
500
501 /* source and destination sublocks cannot overlap */
502 assert(abs(dst - src) > SUBBLOCK_SIZE);
503
504 for (i = 0; i < SUBBLOCK_SIZE; ++i)
505 {
506 dst[i] = src[table[i]];
507 }
508}
509
510static void MultSubBlocks(int16_t *dst, const int16_t *src1, const int16_t *src2, unsigned int shift)
511{
512 unsigned int i;
513
514 for (i = 0; i < SUBBLOCK_SIZE; ++i)
515 {
516 int32_t v = src1[i] * src2[i];
517 dst[i] = clamp_s16(v) << shift;
518 }
519}
520
521static void ScaleSubBlock(int16_t *dst, const int16_t *src, int16_t scale)
522{
523 unsigned int i;
524
525 for (i = 0; i < SUBBLOCK_SIZE; ++i)
526 {
527 int32_t v = src[i] * scale;
528 dst[i] = clamp_s16(v);
529 }
530}
531
532static void RShiftSubBlock(int16_t *dst, const int16_t *src, unsigned int shift)
533{
534 unsigned int i;
535
536 for (i = 0; i < SUBBLOCK_SIZE; ++i)
537 {
538 dst[i] = src[i] >> shift;
539 }
540}
541
542/***************************************************************************
543 * Fast 2D IDCT using separable formulation and normalization
544 * Computations use single precision floats
545 * Implementation based on Wikipedia :
546 * http://fr.wikipedia.org/wiki/Transform%C3%A9e_en_cosinus_discr%C3%A8te
547 **************************************************************************/
548static void InverseDCT1D(const float * const x, float *dst, unsigned int stride)
549{
550 float e[4];
551 float f[4];
552 float x26, x1357, x15, x37, x17, x35;
553
554 x15 = IDCT_K[2] * (x[1] + x[5]);
555 x37 = IDCT_K[3] * (x[3] + x[7]);
556 x17 = IDCT_K[8] * (x[1] + x[7]);
557 x35 = IDCT_K[9] * (x[3] + x[5]);
558 x1357 = IDCT_C3 * (x[1] + x[3] + x[5] + x[7]);
559 x26 = IDCT_C6 * (x[2] + x[6]);
560
561 f[0] = x[0] + x[4];
562 f[1] = x[0] - x[4];
563 f[2] = x26 + IDCT_K[0]*x[2];
564 f[3] = x26 + IDCT_K[1]*x[6];
565
566 e[0] = x1357 + x15 + IDCT_K[4]*x[1] + x17;
567 e[1] = x1357 + x37 + IDCT_K[6]*x[3] + x35;
568 e[2] = x1357 + x15 + IDCT_K[5]*x[5] + x35;
569 e[3] = x1357 + x37 + IDCT_K[7]*x[7] + x17;
570
571 *dst = f[0] + f[2] + e[0]; dst += stride;
572 *dst = f[1] + f[3] + e[1]; dst += stride;
573 *dst = f[1] - f[3] + e[2]; dst += stride;
574 *dst = f[0] - f[2] + e[3]; dst += stride;
575 *dst = f[0] - f[2] - e[3]; dst += stride;
576 *dst = f[1] - f[3] - e[2]; dst += stride;
577 *dst = f[1] + f[3] - e[1]; dst += stride;
578 *dst = f[0] + f[2] - e[0]; dst += stride;
579}
580
581static void InverseDCTSubBlock(int16_t *dst, const int16_t *src)
582{
583 float x[8];
584 float block[SUBBLOCK_SIZE];
585 unsigned int i, j;
586
587 /* idct 1d on rows (+transposition) */
588 for (i = 0; i < 8; ++i)
589 {
590 for (j = 0; j < 8; ++j)
591 {
592 x[j] = (float)src[i*8+j];
593 }
594
595 InverseDCT1D(x, &block[i], 8);
596 }
597
598 /* idct 1d on columns (thanks to previous transposition) */
599 for (i = 0; i < 8; ++i)
600 {
601 InverseDCT1D(&block[i*8], x, 1);
602
603 /* C4 = 1 normalization implies a division by 8 */
604 for (j = 0; j < 8; ++j)
605 {
606 dst[i+j*8] = (int16_t)x[j] >> 3;
607 }
608 }
609}
610
611static void RescaleYSubBlock(int16_t *dst, const int16_t *src)
612{
613 unsigned int i;
614
615 for (i = 0; i < SUBBLOCK_SIZE; ++i)
616 {
617 dst[i] = (((uint32_t)(clamp_s12(src[i]) + 0x800) * 0xdb0) >> 16) + 0x10;
618 }
619}
620
621static void RescaleUVSubBlock(int16_t *dst, const int16_t *src)
622{
623 unsigned int i;
624
625 for (i = 0; i < SUBBLOCK_SIZE; ++i)
626 {
627 dst[i] = (((int)clamp_s12(src[i]) * 0xe00) >> 16) + 0x80;
628 }
629}
630
631
632
633/* FIXME: assume presence of expansion pack */
634#define MEMMASK 0x7fffff
635
636static void rdram_read_many_u16(uint16_t *dst, uint32_t address, unsigned int count)
637{
638 while (count != 0)
639 {
640 uint16_t s = rsp.RDRAM[((address++)^S8) & MEMMASK];
641 s <<= 8;
642 s |= rsp.RDRAM[((address++)^S8) & MEMMASK];
643
644 *(dst++) = s;
645
646 --count;
647 }
648}
649
650static void rdram_write_many_u16(const uint16_t *src, uint32_t address, unsigned int count)
651{
652 while (count != 0)
653 {
654 rsp.RDRAM[((address++)^S8) & MEMMASK] = (uint8_t)(*src >> 8);
655 rsp.RDRAM[((address++)^S8) & MEMMASK] = (uint8_t)(*(src++) & 0xff);
656
657 --count;
658 }
659}
660
661static uint32_t rdram_read_u32(uint32_t address)
662{
663 uint32_t r = rsp.RDRAM[((address++) ^ S8) & MEMMASK]; r <<= 8;
664 r |= rsp.RDRAM[((address++) ^ S8) & MEMMASK]; r <<= 8;
665 r |= rsp.RDRAM[((address++) ^ S8) & MEMMASK]; r <<= 8;
666 r |= rsp.RDRAM[((address++) ^ S8) & MEMMASK];
667
668 return r;
669}
670
671static void rdram_write_many_u32(const uint32_t *src, uint32_t address, unsigned int count)
672{
673 while (count != 0)
674 {
675 rsp.RDRAM[((address++)^S8) & MEMMASK] = (uint8_t)(*src >> 24);
676 rsp.RDRAM[((address++)^S8) & MEMMASK] = (uint8_t)(*src >> 16);
677 rsp.RDRAM[((address++)^S8) & MEMMASK] = (uint8_t)(*src >> 8);
678 rsp.RDRAM[((address++)^S8) & MEMMASK] = (uint8_t)(*(src++) & 0xff);
679
680 --count;
681 }
682}
683