d9e74a6f |
1 | /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * |
2 | * Mupen64plus-rsp-hle - jpeg.c * |
3 | * Mupen64Plus homepage: http://code.google.com/p/mupen64plus/ * |
4 | * Copyright (C) 2012 Bobby Smiles * |
5 | * Copyright (C) 2009 Richard Goedeken * |
6 | * Copyright (C) 2002 Hacktarux * |
7 | * * |
8 | * This program is free software; you can redistribute it and/or modify * |
9 | * it under the terms of the GNU General Public License as published by * |
10 | * the Free Software Foundation; either version 2 of the License, or * |
11 | * (at your option) any later version. * |
12 | * * |
13 | * This program is distributed in the hope that it will be useful, * |
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of * |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * |
16 | * GNU General Public License for more details. * |
17 | * * |
18 | * You should have received a copy of the GNU General Public License * |
19 | * along with this program; if not, write to the * |
20 | * Free Software Foundation, Inc., * |
21 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * |
22 | * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ |
23 | |
24 | #include <assert.h> |
25 | #include <stdlib.h> |
26 | #include <stdint.h> |
27 | |
28 | #define M64P_PLUGIN_PROTOTYPES 1 |
29 | #include "m64p_types.h" |
30 | #include "m64p_plugin.h" |
31 | #include "hle.h" |
32 | |
33 | #define SUBBLOCK_SIZE 64 |
34 | |
35 | typedef void (*tile_line_emitter_t)(const int16_t *y, const int16_t *u, uint32_t address); |
36 | typedef void (*std_macroblock_decoder_t)(int16_t *macroblock, unsigned int subblock_count, const int16_t qtables[3][SUBBLOCK_SIZE]); |
37 | |
38 | /* rdram operations */ |
39 | // FIXME: these functions deserve their own module |
40 | static void rdram_read_many_u16(uint16_t *dst, uint32_t address, unsigned int count); |
41 | static void rdram_write_many_u16(const uint16_t *src, uint32_t address, unsigned int count); |
42 | static uint32_t rdram_read_u32(uint32_t address); |
43 | static void rdram_write_many_u32(const uint32_t *src, uint32_t address, unsigned int count); |
44 | |
45 | /* standard jpeg ucode decoder */ |
46 | static void jpeg_decode_std(const char * const version, const std_macroblock_decoder_t decode_mb, const tile_line_emitter_t emit_line); |
47 | |
48 | /* helper functions */ |
49 | static uint8_t clamp_u8(int16_t x); |
50 | static int16_t clamp_s12(int16_t x); |
51 | static int16_t clamp_s16(int32_t x); |
52 | static uint16_t clamp_RGBA_component(int16_t x); |
53 | |
54 | /* pixel conversion & foratting */ |
55 | static uint32_t GetUYVY(int16_t y1, int16_t y2, int16_t u, int16_t v); |
56 | static uint16_t GetRGBA(int16_t y, int16_t u, int16_t v); |
57 | |
58 | /* tile line emitters */ |
59 | static void EmitYUVTileLine(const int16_t *y, const int16_t *u, uint32_t address); |
60 | static void EmitRGBATileLine(const int16_t *y, const int16_t *u, uint32_t address); |
61 | |
62 | /* macroblocks operations */ |
63 | static void DecodeMacroblock1(int16_t *macroblock, int32_t *y_dc, int32_t *u_dc, int32_t *v_dc, const int16_t *qtable); |
64 | static void DecodeMacroblock2(int16_t *macroblock, unsigned int subblock_count, const int16_t qtables[3][SUBBLOCK_SIZE]); |
65 | static void DecodeMacroblock3(int16_t *macroblock, unsigned int subblock_count, const int16_t qtables[3][SUBBLOCK_SIZE]); |
66 | static void EmitTilesMode0(const tile_line_emitter_t emit_line, const int16_t *macroblock, uint32_t address); |
67 | static void EmitTilesMode2(const tile_line_emitter_t emit_line, const int16_t *macroblock, uint32_t address); |
68 | |
69 | /* subblocks operations */ |
70 | static void TransposeSubBlock(int16_t *dst, const int16_t *src); |
71 | static void ZigZagSubBlock(int16_t *dst, const int16_t *src); |
72 | static void ReorderSubBlock(int16_t *dst, const int16_t *src, const unsigned int *table); |
73 | static void MultSubBlocks(int16_t *dst, const int16_t *src1, const int16_t *src2, unsigned int shift); |
74 | static void ScaleSubBlock(int16_t *dst, const int16_t *src, int16_t scale); |
75 | static void RShiftSubBlock(int16_t *dst, const int16_t *src, unsigned int shift); |
76 | static void InverseDCT1D(const float * const x, float *dst, unsigned int stride); |
77 | static void InverseDCTSubBlock(int16_t *dst, const int16_t *src); |
78 | static void RescaleYSubBlock(int16_t *dst, const int16_t *src); |
79 | static void RescaleUVSubBlock(int16_t *dst, const int16_t *src); |
80 | |
81 | /* transposed dequantization table */ |
82 | static const int16_t DEFAULT_QTABLE[SUBBLOCK_SIZE] = |
83 | { |
84 | 16, 12, 14, 14, 18, 24, 49, 72, |
85 | 11, 12, 13, 17, 22, 35, 64, 92, |
86 | 10, 14, 16, 22, 37, 55, 78, 95, |
87 | 16, 19, 24, 29, 56, 64, 87, 98, |
88 | 24, 26, 40, 51, 68, 81, 103, 112, |
89 | 40, 58, 57, 87, 109, 104, 121, 100, |
90 | 51, 60, 69, 80, 103, 113, 120, 103, |
91 | 61, 55, 56, 62, 77, 92, 101, 99 |
92 | }; |
93 | |
94 | /* zig-zag indices */ |
95 | static const unsigned int ZIGZAG_TABLE[SUBBLOCK_SIZE] = |
96 | { |
97 | 0, 1, 5, 6, 14, 15, 27, 28, |
98 | 2, 4, 7, 13, 16, 26, 29, 42, |
99 | 3, 8, 12, 17, 25, 30, 41, 43, |
100 | 9, 11, 18, 24, 31, 40, 44, 53, |
101 | 10, 19, 23, 32, 39, 45, 52, 54, |
102 | 20, 22, 33, 38, 46, 51, 55, 60, |
103 | 21, 34, 37, 47, 50, 56, 59, 61, |
104 | 35, 36, 48, 49, 57, 58, 62, 63 |
105 | }; |
106 | |
107 | /* transposition indices */ |
108 | static const unsigned int TRANSPOSE_TABLE[SUBBLOCK_SIZE] = |
109 | { |
110 | 0, 8, 16, 24, 32, 40, 48, 56, |
111 | 1, 9, 17, 25, 33, 41, 49, 57, |
112 | 2, 10, 18, 26, 34, 42, 50, 58, |
113 | 3, 11, 19, 27, 35, 43, 51, 59, |
114 | 4, 12, 20, 28, 36, 44, 52, 60, |
115 | 5, 13, 21, 29, 37, 45, 53, 61, |
116 | 6, 14, 22, 30, 38, 46, 54, 62, |
117 | 7, 15, 23, 31, 39, 47, 55, 63 |
118 | }; |
119 | |
120 | |
121 | |
122 | /* IDCT related constants |
123 | * Cn = alpha * cos(n * PI / 16) (alpha is chosen such as C4 = 1) */ |
124 | static const float IDCT_C3 = 1.175875602f; |
125 | static const float IDCT_C6 = 0.541196100f; |
126 | static const float IDCT_K[10] = |
127 | { |
128 | 0.765366865f, /* C2-C6 */ |
129 | -1.847759065f, /* -C2-C6 */ |
130 | -0.390180644f, /* C5-C3 */ |
131 | -1.961570561f, /* -C5-C3 */ |
132 | 1.501321110f, /* C1+C3-C5-C7 */ |
133 | 2.053119869f, /* C1+C3-C5+C7 */ |
134 | 3.072711027f, /* C1+C3+C5-C7 */ |
135 | 0.298631336f, /* -C1+C3+C5-C7 */ |
136 | -0.899976223f, /* C7-C3 */ |
137 | -2.562915448f /* -C1-C3 */ |
138 | }; |
139 | |
140 | |
141 | /* global functions */ |
142 | |
143 | /*************************************************************************** |
144 | * JPEG decoding ucode found in Japanese exclusive version of Pokemon Stadium. |
145 | **************************************************************************/ |
146 | void jpeg_decode_PS0() |
147 | { |
148 | jpeg_decode_std("PS0", DecodeMacroblock3, EmitYUVTileLine); |
149 | } |
150 | |
151 | /*************************************************************************** |
152 | * JPEG decoding ucode found in Ocarina of Time, Pokemon Stadium 1 and |
153 | * Pokemon Stadium 2. |
154 | **************************************************************************/ |
155 | void jpeg_decode_PS() |
156 | { |
157 | jpeg_decode_std("PS", DecodeMacroblock2, EmitRGBATileLine); |
158 | } |
159 | |
160 | /*************************************************************************** |
161 | * JPEG decoding ucode found in Ogre Battle and Bottom of the 9th. |
162 | **************************************************************************/ |
163 | void jpeg_decode_OB() |
164 | { |
165 | int16_t qtable[SUBBLOCK_SIZE]; |
166 | unsigned int mb; |
167 | |
168 | int32_t y_dc = 0; |
169 | int32_t u_dc = 0; |
170 | int32_t v_dc = 0; |
171 | |
172 | const OSTask_t * const task = get_task(); |
173 | |
174 | uint32_t address = task->data_ptr; |
175 | const unsigned int macroblock_count = task->data_size; |
176 | const int qscale = task->yield_data_size; |
177 | |
178 | DebugMessage(M64MSG_VERBOSE, "jpeg_decode_OB: *buffer=%x, #MB=%d, qscale=%d", |
179 | address, |
180 | macroblock_count, |
181 | qscale); |
182 | |
183 | if (qscale != 0) |
184 | { |
185 | if (qscale > 0) |
186 | { |
187 | ScaleSubBlock(qtable, DEFAULT_QTABLE, qscale); |
188 | } |
189 | else |
190 | { |
191 | RShiftSubBlock(qtable, DEFAULT_QTABLE, -qscale); |
192 | } |
193 | } |
194 | |
195 | for (mb = 0; mb < macroblock_count; ++mb) |
196 | { |
197 | int16_t macroblock[6*SUBBLOCK_SIZE]; |
198 | |
199 | rdram_read_many_u16((uint16_t*)macroblock, address, 6*SUBBLOCK_SIZE); |
200 | DecodeMacroblock1(macroblock, &y_dc, &u_dc, &v_dc, (qscale != 0) ? qtable : NULL); |
201 | EmitTilesMode2(EmitYUVTileLine, macroblock, address); |
202 | |
203 | address += (2*6*SUBBLOCK_SIZE); |
204 | } |
205 | } |
206 | |
207 | |
208 | /* local functions */ |
209 | static void jpeg_decode_std(const char * const version, const std_macroblock_decoder_t decode_mb, const tile_line_emitter_t emit_line) |
210 | { |
211 | int16_t qtables[3][SUBBLOCK_SIZE]; |
212 | unsigned int mb; |
213 | uint32_t address; |
214 | uint32_t macroblock_count; |
215 | uint32_t mode; |
216 | uint32_t qtableY_ptr; |
217 | uint32_t qtableU_ptr; |
218 | uint32_t qtableV_ptr; |
219 | unsigned int subblock_count; |
220 | unsigned int macroblock_size; |
221 | int16_t *macroblock; |
222 | const OSTask_t * const task = get_task(); |
223 | |
224 | if (task->flags & 0x1) |
225 | { |
226 | DebugMessage(M64MSG_WARNING, "jpeg_decode_%s: task yielding not implemented", version); |
227 | return; |
228 | } |
229 | |
230 | address = rdram_read_u32(task->data_ptr); |
231 | macroblock_count = rdram_read_u32(task->data_ptr + 4); |
232 | mode = rdram_read_u32(task->data_ptr + 8); |
233 | qtableY_ptr = rdram_read_u32(task->data_ptr + 12); |
234 | qtableU_ptr = rdram_read_u32(task->data_ptr + 16); |
235 | qtableV_ptr = rdram_read_u32(task->data_ptr + 20); |
236 | |
237 | DebugMessage(M64MSG_VERBOSE, "jpeg_decode_%s: *buffer=%x, #MB=%d, mode=%d, *Qy=%x, *Qu=%x, *Qv=%x", |
238 | version, |
239 | address, |
240 | macroblock_count, |
241 | mode, |
242 | qtableY_ptr, |
243 | qtableU_ptr, |
244 | qtableV_ptr); |
245 | |
246 | if (mode != 0 && mode != 2) |
247 | { |
248 | DebugMessage(M64MSG_WARNING, "jpeg_decode_%s: invalid mode %d", version, mode); |
249 | return; |
250 | } |
251 | |
252 | subblock_count = mode + 4; |
253 | macroblock_size = 2*subblock_count*SUBBLOCK_SIZE; |
254 | |
255 | rdram_read_many_u16((uint16_t*)qtables[0], qtableY_ptr, SUBBLOCK_SIZE); |
256 | rdram_read_many_u16((uint16_t*)qtables[1], qtableU_ptr, SUBBLOCK_SIZE); |
257 | rdram_read_many_u16((uint16_t*)qtables[2], qtableV_ptr, SUBBLOCK_SIZE); |
258 | |
259 | macroblock = malloc(sizeof(*macroblock) * macroblock_size); |
260 | if (!macroblock) |
261 | { |
262 | DebugMessage(M64MSG_WARNING, "jpeg_decode_%s: could not allocate macroblock", version); |
263 | return; |
264 | } |
265 | |
266 | for (mb = 0; mb < macroblock_count; ++mb) |
267 | { |
268 | rdram_read_many_u16((uint16_t*)macroblock, address, macroblock_size >> 1); |
269 | decode_mb(macroblock, subblock_count, (const int16_t (*)[SUBBLOCK_SIZE])qtables); |
270 | |
271 | if (mode == 0) |
272 | { |
273 | EmitTilesMode0(emit_line, macroblock, address); |
274 | } |
275 | else |
276 | { |
277 | EmitTilesMode2(emit_line, macroblock, address); |
278 | } |
279 | |
280 | address += macroblock_size; |
281 | } |
282 | free(macroblock); |
283 | } |
284 | |
285 | static uint8_t clamp_u8(int16_t x) |
286 | { |
287 | return (x & (0xff00)) ? ((-x) >> 15) & 0xff : x; |
288 | } |
289 | |
290 | static int16_t clamp_s12(int16_t x) |
291 | { |
292 | if (x < -0x800) { x = -0x800; } else if (x > 0x7f0) { x = 0x7f0; } |
293 | return x; |
294 | } |
295 | |
296 | static int16_t clamp_s16(int32_t x) |
297 | { |
298 | if (x > 32767) { x = 32767; } else if (x < -32768) { x = -32768; } |
299 | return x; |
300 | } |
301 | |
302 | static uint16_t clamp_RGBA_component(int16_t x) |
303 | { |
304 | if (x > 0xff0) { x = 0xff0; } else if (x < 0) { x = 0; } |
305 | return (x & 0xf80); |
306 | } |
307 | |
308 | static uint32_t GetUYVY(int16_t y1, int16_t y2, int16_t u, int16_t v) |
309 | { |
310 | return (uint32_t)clamp_u8(u) << 24 |
311 | | (uint32_t)clamp_u8(y1) << 16 |
312 | | (uint32_t)clamp_u8(v) << 8 |
313 | | (uint32_t)clamp_u8(y2); |
314 | } |
315 | |
316 | static uint16_t GetRGBA(int16_t y, int16_t u, int16_t v) |
317 | { |
318 | const float fY = (float)y + 2048.0f; |
319 | const float fU = (float)u; |
320 | const float fV = (float)v; |
321 | |
322 | const uint16_t r = clamp_RGBA_component((int16_t)(fY + 1.4025*fV)); |
323 | const uint16_t g = clamp_RGBA_component((int16_t)(fY - 0.3443*fU - 0.7144*fV)); |
324 | const uint16_t b = clamp_RGBA_component((int16_t)(fY + 1.7729*fU )); |
325 | |
326 | return (r << 4) | (g >> 1) | (b >> 6) | 1; |
327 | } |
328 | |
329 | static void EmitYUVTileLine(const int16_t *y, const int16_t *u, uint32_t address) |
330 | { |
331 | uint32_t uyvy[8]; |
332 | |
333 | const int16_t * const v = u + SUBBLOCK_SIZE; |
334 | const int16_t * const y2 = y + SUBBLOCK_SIZE; |
335 | |
336 | uyvy[0] = GetUYVY(y[0], y[1], u[0], v[0]); |
337 | uyvy[1] = GetUYVY(y[2], y[3], u[1], v[1]); |
338 | uyvy[2] = GetUYVY(y[4], y[5], u[2], v[2]); |
339 | uyvy[3] = GetUYVY(y[6], y[7], u[3], v[3]); |
340 | uyvy[4] = GetUYVY(y2[0], y2[1], u[4], v[4]); |
341 | uyvy[5] = GetUYVY(y2[2], y2[3], u[5], v[5]); |
342 | uyvy[6] = GetUYVY(y2[4], y2[5], u[6], v[6]); |
343 | uyvy[7] = GetUYVY(y2[6], y2[7], u[7], v[7]); |
344 | |
345 | rdram_write_many_u32(uyvy, address, 8); |
346 | } |
347 | |
348 | static void EmitRGBATileLine(const int16_t *y, const int16_t *u, uint32_t address) |
349 | { |
350 | uint16_t rgba[16]; |
351 | |
352 | const int16_t * const v = u + SUBBLOCK_SIZE; |
353 | const int16_t * const y2 = y + SUBBLOCK_SIZE; |
354 | |
355 | rgba[0] = GetRGBA(y[0], u[0], v[0]); |
356 | rgba[1] = GetRGBA(y[1], u[0], v[0]); |
357 | rgba[2] = GetRGBA(y[2], u[1], v[1]); |
358 | rgba[3] = GetRGBA(y[3], u[1], v[1]); |
359 | rgba[4] = GetRGBA(y[4], u[2], v[2]); |
360 | rgba[5] = GetRGBA(y[5], u[2], v[2]); |
361 | rgba[6] = GetRGBA(y[6], u[3], v[3]); |
362 | rgba[7] = GetRGBA(y[7], u[3], v[3]); |
363 | rgba[8] = GetRGBA(y2[0], u[4], v[4]); |
364 | rgba[9] = GetRGBA(y2[1], u[4], v[4]); |
365 | rgba[10] = GetRGBA(y2[2], u[5], v[5]); |
366 | rgba[11] = GetRGBA(y2[3], u[5], v[5]); |
367 | rgba[12] = GetRGBA(y2[4], u[6], v[6]); |
368 | rgba[13] = GetRGBA(y2[5], u[6], v[6]); |
369 | rgba[14] = GetRGBA(y2[6], u[7], v[7]); |
370 | rgba[15] = GetRGBA(y2[7], u[7], v[7]); |
371 | |
372 | rdram_write_many_u16(rgba, address, 16); |
373 | } |
374 | |
375 | static void EmitTilesMode0(const tile_line_emitter_t emit_line, const int16_t *macroblock, uint32_t address) |
376 | { |
377 | unsigned int i; |
378 | |
379 | unsigned int y_offset = 0; |
380 | unsigned int u_offset = 2*SUBBLOCK_SIZE; |
381 | |
382 | for (i = 0; i < 8; ++i) |
383 | { |
384 | emit_line(¯oblock[y_offset], ¯oblock[u_offset], address); |
385 | |
386 | y_offset += 8; |
387 | u_offset += 8; |
388 | address += 32; |
389 | } |
390 | } |
391 | |
392 | static void EmitTilesMode2(const tile_line_emitter_t emit_line, const int16_t *macroblock, uint32_t address) |
393 | { |
394 | unsigned int i; |
395 | |
396 | unsigned int y_offset = 0; |
397 | unsigned int u_offset = 4*SUBBLOCK_SIZE; |
398 | |
399 | for (i = 0; i < 8; ++i) |
400 | { |
401 | emit_line(¯oblock[y_offset], ¯oblock[u_offset], address); |
402 | emit_line(¯oblock[y_offset + 8], ¯oblock[u_offset], address + 32); |
403 | |
404 | y_offset += (i == 3) ? SUBBLOCK_SIZE+16 : 16; |
405 | u_offset += 8; |
406 | address += 64; |
407 | } |
408 | } |
409 | |
410 | static void DecodeMacroblock1(int16_t *macroblock, int32_t *y_dc, int32_t *u_dc, int32_t *v_dc, const int16_t *qtable) |
411 | { |
412 | int sb; |
413 | |
414 | for (sb = 0; sb < 6; ++sb) |
415 | { |
416 | int16_t tmp_sb[SUBBLOCK_SIZE]; |
417 | |
418 | /* update DC */ |
419 | int32_t dc = (int32_t)macroblock[0]; |
420 | switch(sb) |
421 | { |
422 | case 0: case 1: case 2: case 3: |
423 | *y_dc += dc; macroblock[0] = *y_dc & 0xffff; break; |
424 | case 4: *u_dc += dc; macroblock[0] = *u_dc & 0xffff; break; |
425 | case 5: *v_dc += dc; macroblock[0] = *v_dc & 0xffff; break; |
426 | } |
427 | |
428 | ZigZagSubBlock(tmp_sb, macroblock); |
429 | if (qtable != NULL) { MultSubBlocks(tmp_sb, tmp_sb, qtable, 0); } |
430 | TransposeSubBlock(macroblock, tmp_sb); |
431 | InverseDCTSubBlock(macroblock, macroblock); |
432 | |
433 | macroblock += SUBBLOCK_SIZE; |
434 | } |
435 | } |
436 | |
437 | static void DecodeMacroblock2(int16_t *macroblock, unsigned int subblock_count, const int16_t qtables[3][SUBBLOCK_SIZE]) |
438 | { |
439 | unsigned int sb; |
440 | unsigned int q = 0; |
441 | |
442 | for (sb = 0; sb < subblock_count; ++sb) |
443 | { |
444 | int16_t tmp_sb[SUBBLOCK_SIZE]; |
445 | const int isChromaSubBlock = (subblock_count - sb <= 2); |
446 | |
447 | if (isChromaSubBlock) { ++q; } |
448 | |
449 | MultSubBlocks(macroblock, macroblock, qtables[q], 4); |
450 | ZigZagSubBlock(tmp_sb, macroblock); |
451 | InverseDCTSubBlock(macroblock, tmp_sb); |
452 | |
453 | macroblock += SUBBLOCK_SIZE; |
454 | } |
455 | |
456 | } |
457 | |
458 | static void DecodeMacroblock3(int16_t *macroblock, unsigned int subblock_count, const int16_t qtables[3][SUBBLOCK_SIZE]) |
459 | { |
460 | unsigned int sb; |
461 | unsigned int q = 0; |
462 | |
463 | for (sb = 0; sb < subblock_count; ++sb) |
464 | { |
465 | int16_t tmp_sb[SUBBLOCK_SIZE]; |
466 | const int isChromaSubBlock = (subblock_count - sb <= 2); |
467 | |
468 | if (isChromaSubBlock) { ++q; } |
469 | |
470 | MultSubBlocks(macroblock, macroblock, qtables[q], 4); |
471 | ZigZagSubBlock(tmp_sb, macroblock); |
472 | InverseDCTSubBlock(macroblock, tmp_sb); |
473 | |
474 | if (isChromaSubBlock) |
475 | { |
476 | RescaleUVSubBlock(macroblock, macroblock); |
477 | } |
478 | else |
479 | { |
480 | RescaleYSubBlock(macroblock, macroblock); |
481 | } |
482 | |
483 | macroblock += SUBBLOCK_SIZE; |
484 | } |
485 | } |
486 | |
487 | static void TransposeSubBlock(int16_t *dst, const int16_t *src) |
488 | { |
489 | ReorderSubBlock(dst, src, TRANSPOSE_TABLE); |
490 | } |
491 | |
492 | static void ZigZagSubBlock(int16_t *dst, const int16_t *src) |
493 | { |
494 | ReorderSubBlock(dst, src, ZIGZAG_TABLE); |
495 | } |
496 | |
497 | static void ReorderSubBlock(int16_t *dst, const int16_t *src, const unsigned int *table) |
498 | { |
499 | unsigned int i; |
500 | |
501 | /* source and destination sublocks cannot overlap */ |
502 | assert(abs(dst - src) > SUBBLOCK_SIZE); |
503 | |
504 | for (i = 0; i < SUBBLOCK_SIZE; ++i) |
505 | { |
506 | dst[i] = src[table[i]]; |
507 | } |
508 | } |
509 | |
510 | static void MultSubBlocks(int16_t *dst, const int16_t *src1, const int16_t *src2, unsigned int shift) |
511 | { |
512 | unsigned int i; |
513 | |
514 | for (i = 0; i < SUBBLOCK_SIZE; ++i) |
515 | { |
516 | int32_t v = src1[i] * src2[i]; |
517 | dst[i] = clamp_s16(v) << shift; |
518 | } |
519 | } |
520 | |
521 | static void ScaleSubBlock(int16_t *dst, const int16_t *src, int16_t scale) |
522 | { |
523 | unsigned int i; |
524 | |
525 | for (i = 0; i < SUBBLOCK_SIZE; ++i) |
526 | { |
527 | int32_t v = src[i] * scale; |
528 | dst[i] = clamp_s16(v); |
529 | } |
530 | } |
531 | |
532 | static void RShiftSubBlock(int16_t *dst, const int16_t *src, unsigned int shift) |
533 | { |
534 | unsigned int i; |
535 | |
536 | for (i = 0; i < SUBBLOCK_SIZE; ++i) |
537 | { |
538 | dst[i] = src[i] >> shift; |
539 | } |
540 | } |
541 | |
542 | /*************************************************************************** |
543 | * Fast 2D IDCT using separable formulation and normalization |
544 | * Computations use single precision floats |
545 | * Implementation based on Wikipedia : |
546 | * http://fr.wikipedia.org/wiki/Transform%C3%A9e_en_cosinus_discr%C3%A8te |
547 | **************************************************************************/ |
548 | static void InverseDCT1D(const float * const x, float *dst, unsigned int stride) |
549 | { |
550 | float e[4]; |
551 | float f[4]; |
552 | float x26, x1357, x15, x37, x17, x35; |
553 | |
554 | x15 = IDCT_K[2] * (x[1] + x[5]); |
555 | x37 = IDCT_K[3] * (x[3] + x[7]); |
556 | x17 = IDCT_K[8] * (x[1] + x[7]); |
557 | x35 = IDCT_K[9] * (x[3] + x[5]); |
558 | x1357 = IDCT_C3 * (x[1] + x[3] + x[5] + x[7]); |
559 | x26 = IDCT_C6 * (x[2] + x[6]); |
560 | |
561 | f[0] = x[0] + x[4]; |
562 | f[1] = x[0] - x[4]; |
563 | f[2] = x26 + IDCT_K[0]*x[2]; |
564 | f[3] = x26 + IDCT_K[1]*x[6]; |
565 | |
566 | e[0] = x1357 + x15 + IDCT_K[4]*x[1] + x17; |
567 | e[1] = x1357 + x37 + IDCT_K[6]*x[3] + x35; |
568 | e[2] = x1357 + x15 + IDCT_K[5]*x[5] + x35; |
569 | e[3] = x1357 + x37 + IDCT_K[7]*x[7] + x17; |
570 | |
571 | *dst = f[0] + f[2] + e[0]; dst += stride; |
572 | *dst = f[1] + f[3] + e[1]; dst += stride; |
573 | *dst = f[1] - f[3] + e[2]; dst += stride; |
574 | *dst = f[0] - f[2] + e[3]; dst += stride; |
575 | *dst = f[0] - f[2] - e[3]; dst += stride; |
576 | *dst = f[1] - f[3] - e[2]; dst += stride; |
577 | *dst = f[1] + f[3] - e[1]; dst += stride; |
578 | *dst = f[0] + f[2] - e[0]; dst += stride; |
579 | } |
580 | |
581 | static void InverseDCTSubBlock(int16_t *dst, const int16_t *src) |
582 | { |
583 | float x[8]; |
584 | float block[SUBBLOCK_SIZE]; |
585 | unsigned int i, j; |
586 | |
587 | /* idct 1d on rows (+transposition) */ |
588 | for (i = 0; i < 8; ++i) |
589 | { |
590 | for (j = 0; j < 8; ++j) |
591 | { |
592 | x[j] = (float)src[i*8+j]; |
593 | } |
594 | |
595 | InverseDCT1D(x, &block[i], 8); |
596 | } |
597 | |
598 | /* idct 1d on columns (thanks to previous transposition) */ |
599 | for (i = 0; i < 8; ++i) |
600 | { |
601 | InverseDCT1D(&block[i*8], x, 1); |
602 | |
603 | /* C4 = 1 normalization implies a division by 8 */ |
604 | for (j = 0; j < 8; ++j) |
605 | { |
606 | dst[i+j*8] = (int16_t)x[j] >> 3; |
607 | } |
608 | } |
609 | } |
610 | |
611 | static void RescaleYSubBlock(int16_t *dst, const int16_t *src) |
612 | { |
613 | unsigned int i; |
614 | |
615 | for (i = 0; i < SUBBLOCK_SIZE; ++i) |
616 | { |
617 | dst[i] = (((uint32_t)(clamp_s12(src[i]) + 0x800) * 0xdb0) >> 16) + 0x10; |
618 | } |
619 | } |
620 | |
621 | static void RescaleUVSubBlock(int16_t *dst, const int16_t *src) |
622 | { |
623 | unsigned int i; |
624 | |
625 | for (i = 0; i < SUBBLOCK_SIZE; ++i) |
626 | { |
627 | dst[i] = (((int)clamp_s12(src[i]) * 0xe00) >> 16) + 0x80; |
628 | } |
629 | } |
630 | |
631 | |
632 | |
633 | /* FIXME: assume presence of expansion pack */ |
634 | #define MEMMASK 0x7fffff |
635 | |
636 | static void rdram_read_many_u16(uint16_t *dst, uint32_t address, unsigned int count) |
637 | { |
638 | while (count != 0) |
639 | { |
640 | uint16_t s = rsp.RDRAM[((address++)^S8) & MEMMASK]; |
641 | s <<= 8; |
642 | s |= rsp.RDRAM[((address++)^S8) & MEMMASK]; |
643 | |
644 | *(dst++) = s; |
645 | |
646 | --count; |
647 | } |
648 | } |
649 | |
650 | static void rdram_write_many_u16(const uint16_t *src, uint32_t address, unsigned int count) |
651 | { |
652 | while (count != 0) |
653 | { |
654 | rsp.RDRAM[((address++)^S8) & MEMMASK] = (uint8_t)(*src >> 8); |
655 | rsp.RDRAM[((address++)^S8) & MEMMASK] = (uint8_t)(*(src++) & 0xff); |
656 | |
657 | --count; |
658 | } |
659 | } |
660 | |
661 | static uint32_t rdram_read_u32(uint32_t address) |
662 | { |
663 | uint32_t r = rsp.RDRAM[((address++) ^ S8) & MEMMASK]; r <<= 8; |
664 | r |= rsp.RDRAM[((address++) ^ S8) & MEMMASK]; r <<= 8; |
665 | r |= rsp.RDRAM[((address++) ^ S8) & MEMMASK]; r <<= 8; |
666 | r |= rsp.RDRAM[((address++) ^ S8) & MEMMASK]; |
667 | |
668 | return r; |
669 | } |
670 | |
671 | static void rdram_write_many_u32(const uint32_t *src, uint32_t address, unsigned int count) |
672 | { |
673 | while (count != 0) |
674 | { |
675 | rsp.RDRAM[((address++)^S8) & MEMMASK] = (uint8_t)(*src >> 24); |
676 | rsp.RDRAM[((address++)^S8) & MEMMASK] = (uint8_t)(*src >> 16); |
677 | rsp.RDRAM[((address++)^S8) & MEMMASK] = (uint8_t)(*src >> 8); |
678 | rsp.RDRAM[((address++)^S8) & MEMMASK] = (uint8_t)(*(src++) & 0xff); |
679 | |
680 | --count; |
681 | } |
682 | } |
683 | |