d9e74a6f |
1 | /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * |
2 | * Mupen64plus-rsp-hle - jpeg.c * |
3 | * Mupen64Plus homepage: http://code.google.com/p/mupen64plus/ * |
4 | * Copyright (C) 2012 Bobby Smiles * |
5 | * Copyright (C) 2009 Richard Goedeken * |
6 | * Copyright (C) 2002 Hacktarux * |
7 | * * |
8 | * This program is free software; you can redistribute it and/or modify * |
9 | * it under the terms of the GNU General Public License as published by * |
10 | * the Free Software Foundation; either version 2 of the License, or * |
11 | * (at your option) any later version. * |
12 | * * |
13 | * This program is distributed in the hope that it will be useful, * |
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of * |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * |
16 | * GNU General Public License for more details. * |
17 | * * |
18 | * You should have received a copy of the GNU General Public License * |
19 | * along with this program; if not, write to the * |
20 | * Free Software Foundation, Inc., * |
21 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * |
22 | * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ |
23 | |
24 | #include <assert.h> |
25 | #include <stdlib.h> |
26 | #include <stdint.h> |
27 | |
28 | #define M64P_PLUGIN_PROTOTYPES 1 |
29 | #include "m64p_types.h" |
30 | #include "m64p_plugin.h" |
31 | #include "hle.h" |
32 | |
33 | #define SUBBLOCK_SIZE 64 |
34 | |
35 | typedef void (*tile_line_emitter_t)(const int16_t *y, const int16_t *u, uint32_t address); |
2d262872 |
36 | typedef void (*subblock_transform_t)(int16_t* dst, const int16_t* src); |
d9e74a6f |
37 | |
38 | /* rdram operations */ |
39 | // FIXME: these functions deserve their own module |
40 | static void rdram_read_many_u16(uint16_t *dst, uint32_t address, unsigned int count); |
41 | static void rdram_write_many_u16(const uint16_t *src, uint32_t address, unsigned int count); |
42 | static uint32_t rdram_read_u32(uint32_t address); |
43 | static void rdram_write_many_u32(const uint32_t *src, uint32_t address, unsigned int count); |
44 | |
45 | /* standard jpeg ucode decoder */ |
2d262872 |
46 | static void jpeg_decode_std(const char * const version, |
47 | const subblock_transform_t transform_luma, |
48 | const subblock_transform_t transform_chroma, |
49 | const tile_line_emitter_t emit_line); |
d9e74a6f |
50 | |
51 | /* helper functions */ |
52 | static uint8_t clamp_u8(int16_t x); |
53 | static int16_t clamp_s12(int16_t x); |
54 | static int16_t clamp_s16(int32_t x); |
55 | static uint16_t clamp_RGBA_component(int16_t x); |
56 | |
57 | /* pixel conversion & foratting */ |
58 | static uint32_t GetUYVY(int16_t y1, int16_t y2, int16_t u, int16_t v); |
59 | static uint16_t GetRGBA(int16_t y, int16_t u, int16_t v); |
60 | |
61 | /* tile line emitters */ |
62 | static void EmitYUVTileLine(const int16_t *y, const int16_t *u, uint32_t address); |
63 | static void EmitRGBATileLine(const int16_t *y, const int16_t *u, uint32_t address); |
64 | |
65 | /* macroblocks operations */ |
2d262872 |
66 | static void decode_macroblock_ob(int16_t *macroblock, int32_t *y_dc, int32_t *u_dc, int32_t *v_dc, const int16_t *qtable); |
67 | static void decode_macroblock_std( |
68 | const subblock_transform_t transform_luma, |
69 | const subblock_transform_t transform_chroma, |
70 | int16_t *macroblock, unsigned int subblock_count, const int16_t qtables[3][SUBBLOCK_SIZE]); |
d9e74a6f |
71 | static void EmitTilesMode0(const tile_line_emitter_t emit_line, const int16_t *macroblock, uint32_t address); |
72 | static void EmitTilesMode2(const tile_line_emitter_t emit_line, const int16_t *macroblock, uint32_t address); |
73 | |
74 | /* subblocks operations */ |
75 | static void TransposeSubBlock(int16_t *dst, const int16_t *src); |
76 | static void ZigZagSubBlock(int16_t *dst, const int16_t *src); |
77 | static void ReorderSubBlock(int16_t *dst, const int16_t *src, const unsigned int *table); |
78 | static void MultSubBlocks(int16_t *dst, const int16_t *src1, const int16_t *src2, unsigned int shift); |
79 | static void ScaleSubBlock(int16_t *dst, const int16_t *src, int16_t scale); |
80 | static void RShiftSubBlock(int16_t *dst, const int16_t *src, unsigned int shift); |
81 | static void InverseDCT1D(const float * const x, float *dst, unsigned int stride); |
82 | static void InverseDCTSubBlock(int16_t *dst, const int16_t *src); |
83 | static void RescaleYSubBlock(int16_t *dst, const int16_t *src); |
84 | static void RescaleUVSubBlock(int16_t *dst, const int16_t *src); |
85 | |
86 | /* transposed dequantization table */ |
87 | static const int16_t DEFAULT_QTABLE[SUBBLOCK_SIZE] = |
88 | { |
89 | 16, 12, 14, 14, 18, 24, 49, 72, |
90 | 11, 12, 13, 17, 22, 35, 64, 92, |
91 | 10, 14, 16, 22, 37, 55, 78, 95, |
92 | 16, 19, 24, 29, 56, 64, 87, 98, |
93 | 24, 26, 40, 51, 68, 81, 103, 112, |
94 | 40, 58, 57, 87, 109, 104, 121, 100, |
95 | 51, 60, 69, 80, 103, 113, 120, 103, |
96 | 61, 55, 56, 62, 77, 92, 101, 99 |
97 | }; |
98 | |
99 | /* zig-zag indices */ |
100 | static const unsigned int ZIGZAG_TABLE[SUBBLOCK_SIZE] = |
101 | { |
102 | 0, 1, 5, 6, 14, 15, 27, 28, |
103 | 2, 4, 7, 13, 16, 26, 29, 42, |
104 | 3, 8, 12, 17, 25, 30, 41, 43, |
105 | 9, 11, 18, 24, 31, 40, 44, 53, |
106 | 10, 19, 23, 32, 39, 45, 52, 54, |
107 | 20, 22, 33, 38, 46, 51, 55, 60, |
108 | 21, 34, 37, 47, 50, 56, 59, 61, |
109 | 35, 36, 48, 49, 57, 58, 62, 63 |
110 | }; |
111 | |
112 | /* transposition indices */ |
113 | static const unsigned int TRANSPOSE_TABLE[SUBBLOCK_SIZE] = |
114 | { |
115 | 0, 8, 16, 24, 32, 40, 48, 56, |
116 | 1, 9, 17, 25, 33, 41, 49, 57, |
117 | 2, 10, 18, 26, 34, 42, 50, 58, |
118 | 3, 11, 19, 27, 35, 43, 51, 59, |
119 | 4, 12, 20, 28, 36, 44, 52, 60, |
120 | 5, 13, 21, 29, 37, 45, 53, 61, |
121 | 6, 14, 22, 30, 38, 46, 54, 62, |
122 | 7, 15, 23, 31, 39, 47, 55, 63 |
123 | }; |
124 | |
125 | |
126 | |
127 | /* IDCT related constants |
128 | * Cn = alpha * cos(n * PI / 16) (alpha is chosen such as C4 = 1) */ |
129 | static const float IDCT_C3 = 1.175875602f; |
130 | static const float IDCT_C6 = 0.541196100f; |
131 | static const float IDCT_K[10] = |
132 | { |
133 | 0.765366865f, /* C2-C6 */ |
134 | -1.847759065f, /* -C2-C6 */ |
135 | -0.390180644f, /* C5-C3 */ |
136 | -1.961570561f, /* -C5-C3 */ |
137 | 1.501321110f, /* C1+C3-C5-C7 */ |
138 | 2.053119869f, /* C1+C3-C5+C7 */ |
139 | 3.072711027f, /* C1+C3+C5-C7 */ |
140 | 0.298631336f, /* -C1+C3+C5-C7 */ |
141 | -0.899976223f, /* C7-C3 */ |
142 | -2.562915448f /* -C1-C3 */ |
143 | }; |
144 | |
145 | |
146 | /* global functions */ |
147 | |
148 | /*************************************************************************** |
149 | * JPEG decoding ucode found in Japanese exclusive version of Pokemon Stadium. |
150 | **************************************************************************/ |
151 | void jpeg_decode_PS0() |
152 | { |
2d262872 |
153 | jpeg_decode_std("PS0", RescaleYSubBlock, RescaleUVSubBlock, EmitYUVTileLine); |
d9e74a6f |
154 | } |
155 | |
156 | /*************************************************************************** |
157 | * JPEG decoding ucode found in Ocarina of Time, Pokemon Stadium 1 and |
158 | * Pokemon Stadium 2. |
159 | **************************************************************************/ |
160 | void jpeg_decode_PS() |
161 | { |
2d262872 |
162 | jpeg_decode_std("PS", NULL, NULL, EmitRGBATileLine); |
d9e74a6f |
163 | } |
164 | |
165 | /*************************************************************************** |
166 | * JPEG decoding ucode found in Ogre Battle and Bottom of the 9th. |
167 | **************************************************************************/ |
168 | void jpeg_decode_OB() |
169 | { |
170 | int16_t qtable[SUBBLOCK_SIZE]; |
171 | unsigned int mb; |
172 | |
173 | int32_t y_dc = 0; |
174 | int32_t u_dc = 0; |
175 | int32_t v_dc = 0; |
176 | |
177 | const OSTask_t * const task = get_task(); |
178 | |
179 | uint32_t address = task->data_ptr; |
180 | const unsigned int macroblock_count = task->data_size; |
181 | const int qscale = task->yield_data_size; |
182 | |
183 | DebugMessage(M64MSG_VERBOSE, "jpeg_decode_OB: *buffer=%x, #MB=%d, qscale=%d", |
184 | address, |
185 | macroblock_count, |
186 | qscale); |
187 | |
188 | if (qscale != 0) |
189 | { |
190 | if (qscale > 0) |
191 | { |
192 | ScaleSubBlock(qtable, DEFAULT_QTABLE, qscale); |
193 | } |
194 | else |
195 | { |
196 | RShiftSubBlock(qtable, DEFAULT_QTABLE, -qscale); |
197 | } |
198 | } |
199 | |
200 | for (mb = 0; mb < macroblock_count; ++mb) |
201 | { |
202 | int16_t macroblock[6*SUBBLOCK_SIZE]; |
203 | |
204 | rdram_read_many_u16((uint16_t*)macroblock, address, 6*SUBBLOCK_SIZE); |
2d262872 |
205 | decode_macroblock_ob(macroblock, &y_dc, &u_dc, &v_dc, (qscale != 0) ? qtable : NULL); |
d9e74a6f |
206 | EmitTilesMode2(EmitYUVTileLine, macroblock, address); |
207 | |
208 | address += (2*6*SUBBLOCK_SIZE); |
209 | } |
210 | } |
211 | |
212 | |
213 | /* local functions */ |
2d262872 |
214 | static void jpeg_decode_std(const char * const version, |
215 | const subblock_transform_t transform_luma, |
216 | const subblock_transform_t transform_chroma, |
217 | const tile_line_emitter_t emit_line) |
d9e74a6f |
218 | { |
219 | int16_t qtables[3][SUBBLOCK_SIZE]; |
220 | unsigned int mb; |
221 | uint32_t address; |
222 | uint32_t macroblock_count; |
223 | uint32_t mode; |
224 | uint32_t qtableY_ptr; |
225 | uint32_t qtableU_ptr; |
226 | uint32_t qtableV_ptr; |
227 | unsigned int subblock_count; |
228 | unsigned int macroblock_size; |
2d262872 |
229 | int16_t macroblock[6*SUBBLOCK_SIZE]; /* macroblock contains at most 6 subblobcks */ |
d9e74a6f |
230 | const OSTask_t * const task = get_task(); |
231 | |
232 | if (task->flags & 0x1) |
233 | { |
234 | DebugMessage(M64MSG_WARNING, "jpeg_decode_%s: task yielding not implemented", version); |
235 | return; |
236 | } |
237 | |
238 | address = rdram_read_u32(task->data_ptr); |
239 | macroblock_count = rdram_read_u32(task->data_ptr + 4); |
240 | mode = rdram_read_u32(task->data_ptr + 8); |
241 | qtableY_ptr = rdram_read_u32(task->data_ptr + 12); |
242 | qtableU_ptr = rdram_read_u32(task->data_ptr + 16); |
243 | qtableV_ptr = rdram_read_u32(task->data_ptr + 20); |
244 | |
245 | DebugMessage(M64MSG_VERBOSE, "jpeg_decode_%s: *buffer=%x, #MB=%d, mode=%d, *Qy=%x, *Qu=%x, *Qv=%x", |
246 | version, |
247 | address, |
248 | macroblock_count, |
249 | mode, |
250 | qtableY_ptr, |
251 | qtableU_ptr, |
252 | qtableV_ptr); |
253 | |
254 | if (mode != 0 && mode != 2) |
255 | { |
256 | DebugMessage(M64MSG_WARNING, "jpeg_decode_%s: invalid mode %d", version, mode); |
257 | return; |
258 | } |
259 | |
260 | subblock_count = mode + 4; |
2d262872 |
261 | macroblock_size = subblock_count*SUBBLOCK_SIZE; |
d9e74a6f |
262 | |
263 | rdram_read_many_u16((uint16_t*)qtables[0], qtableY_ptr, SUBBLOCK_SIZE); |
264 | rdram_read_many_u16((uint16_t*)qtables[1], qtableU_ptr, SUBBLOCK_SIZE); |
265 | rdram_read_many_u16((uint16_t*)qtables[2], qtableV_ptr, SUBBLOCK_SIZE); |
266 | |
d9e74a6f |
267 | for (mb = 0; mb < macroblock_count; ++mb) |
268 | { |
2d262872 |
269 | rdram_read_many_u16((uint16_t*)macroblock, address, macroblock_size); |
270 | decode_macroblock_std(transform_luma, transform_chroma, |
271 | macroblock, subblock_count, (const int16_t (*)[SUBBLOCK_SIZE])qtables); |
d9e74a6f |
272 | |
273 | if (mode == 0) |
274 | { |
275 | EmitTilesMode0(emit_line, macroblock, address); |
276 | } |
277 | else |
278 | { |
279 | EmitTilesMode2(emit_line, macroblock, address); |
280 | } |
281 | |
2d262872 |
282 | address += 2*macroblock_size; |
d9e74a6f |
283 | } |
d9e74a6f |
284 | } |
285 | |
286 | static uint8_t clamp_u8(int16_t x) |
287 | { |
288 | return (x & (0xff00)) ? ((-x) >> 15) & 0xff : x; |
289 | } |
290 | |
291 | static int16_t clamp_s12(int16_t x) |
292 | { |
293 | if (x < -0x800) { x = -0x800; } else if (x > 0x7f0) { x = 0x7f0; } |
294 | return x; |
295 | } |
296 | |
297 | static int16_t clamp_s16(int32_t x) |
298 | { |
299 | if (x > 32767) { x = 32767; } else if (x < -32768) { x = -32768; } |
300 | return x; |
301 | } |
302 | |
303 | static uint16_t clamp_RGBA_component(int16_t x) |
304 | { |
305 | if (x > 0xff0) { x = 0xff0; } else if (x < 0) { x = 0; } |
306 | return (x & 0xf80); |
307 | } |
308 | |
309 | static uint32_t GetUYVY(int16_t y1, int16_t y2, int16_t u, int16_t v) |
310 | { |
311 | return (uint32_t)clamp_u8(u) << 24 |
312 | | (uint32_t)clamp_u8(y1) << 16 |
313 | | (uint32_t)clamp_u8(v) << 8 |
314 | | (uint32_t)clamp_u8(y2); |
315 | } |
316 | |
317 | static uint16_t GetRGBA(int16_t y, int16_t u, int16_t v) |
318 | { |
319 | const float fY = (float)y + 2048.0f; |
320 | const float fU = (float)u; |
321 | const float fV = (float)v; |
322 | |
323 | const uint16_t r = clamp_RGBA_component((int16_t)(fY + 1.4025*fV)); |
324 | const uint16_t g = clamp_RGBA_component((int16_t)(fY - 0.3443*fU - 0.7144*fV)); |
325 | const uint16_t b = clamp_RGBA_component((int16_t)(fY + 1.7729*fU )); |
326 | |
327 | return (r << 4) | (g >> 1) | (b >> 6) | 1; |
328 | } |
329 | |
330 | static void EmitYUVTileLine(const int16_t *y, const int16_t *u, uint32_t address) |
331 | { |
332 | uint32_t uyvy[8]; |
333 | |
334 | const int16_t * const v = u + SUBBLOCK_SIZE; |
335 | const int16_t * const y2 = y + SUBBLOCK_SIZE; |
336 | |
337 | uyvy[0] = GetUYVY(y[0], y[1], u[0], v[0]); |
338 | uyvy[1] = GetUYVY(y[2], y[3], u[1], v[1]); |
339 | uyvy[2] = GetUYVY(y[4], y[5], u[2], v[2]); |
340 | uyvy[3] = GetUYVY(y[6], y[7], u[3], v[3]); |
341 | uyvy[4] = GetUYVY(y2[0], y2[1], u[4], v[4]); |
342 | uyvy[5] = GetUYVY(y2[2], y2[3], u[5], v[5]); |
343 | uyvy[6] = GetUYVY(y2[4], y2[5], u[6], v[6]); |
344 | uyvy[7] = GetUYVY(y2[6], y2[7], u[7], v[7]); |
345 | |
346 | rdram_write_many_u32(uyvy, address, 8); |
347 | } |
348 | |
349 | static void EmitRGBATileLine(const int16_t *y, const int16_t *u, uint32_t address) |
350 | { |
351 | uint16_t rgba[16]; |
352 | |
353 | const int16_t * const v = u + SUBBLOCK_SIZE; |
354 | const int16_t * const y2 = y + SUBBLOCK_SIZE; |
355 | |
356 | rgba[0] = GetRGBA(y[0], u[0], v[0]); |
357 | rgba[1] = GetRGBA(y[1], u[0], v[0]); |
358 | rgba[2] = GetRGBA(y[2], u[1], v[1]); |
359 | rgba[3] = GetRGBA(y[3], u[1], v[1]); |
360 | rgba[4] = GetRGBA(y[4], u[2], v[2]); |
361 | rgba[5] = GetRGBA(y[5], u[2], v[2]); |
362 | rgba[6] = GetRGBA(y[6], u[3], v[3]); |
363 | rgba[7] = GetRGBA(y[7], u[3], v[3]); |
364 | rgba[8] = GetRGBA(y2[0], u[4], v[4]); |
365 | rgba[9] = GetRGBA(y2[1], u[4], v[4]); |
366 | rgba[10] = GetRGBA(y2[2], u[5], v[5]); |
367 | rgba[11] = GetRGBA(y2[3], u[5], v[5]); |
368 | rgba[12] = GetRGBA(y2[4], u[6], v[6]); |
369 | rgba[13] = GetRGBA(y2[5], u[6], v[6]); |
370 | rgba[14] = GetRGBA(y2[6], u[7], v[7]); |
371 | rgba[15] = GetRGBA(y2[7], u[7], v[7]); |
372 | |
373 | rdram_write_many_u16(rgba, address, 16); |
374 | } |
375 | |
376 | static void EmitTilesMode0(const tile_line_emitter_t emit_line, const int16_t *macroblock, uint32_t address) |
377 | { |
378 | unsigned int i; |
379 | |
380 | unsigned int y_offset = 0; |
381 | unsigned int u_offset = 2*SUBBLOCK_SIZE; |
382 | |
383 | for (i = 0; i < 8; ++i) |
384 | { |
385 | emit_line(¯oblock[y_offset], ¯oblock[u_offset], address); |
386 | |
387 | y_offset += 8; |
388 | u_offset += 8; |
389 | address += 32; |
390 | } |
391 | } |
392 | |
393 | static void EmitTilesMode2(const tile_line_emitter_t emit_line, const int16_t *macroblock, uint32_t address) |
394 | { |
395 | unsigned int i; |
396 | |
397 | unsigned int y_offset = 0; |
398 | unsigned int u_offset = 4*SUBBLOCK_SIZE; |
399 | |
400 | for (i = 0; i < 8; ++i) |
401 | { |
402 | emit_line(¯oblock[y_offset], ¯oblock[u_offset], address); |
403 | emit_line(¯oblock[y_offset + 8], ¯oblock[u_offset], address + 32); |
404 | |
405 | y_offset += (i == 3) ? SUBBLOCK_SIZE+16 : 16; |
406 | u_offset += 8; |
407 | address += 64; |
408 | } |
409 | } |
410 | |
2d262872 |
411 | static void decode_macroblock_ob(int16_t *macroblock, int32_t *y_dc, int32_t *u_dc, int32_t *v_dc, const int16_t *qtable) |
d9e74a6f |
412 | { |
413 | int sb; |
414 | |
415 | for (sb = 0; sb < 6; ++sb) |
416 | { |
417 | int16_t tmp_sb[SUBBLOCK_SIZE]; |
418 | |
419 | /* update DC */ |
420 | int32_t dc = (int32_t)macroblock[0]; |
421 | switch(sb) |
422 | { |
423 | case 0: case 1: case 2: case 3: |
424 | *y_dc += dc; macroblock[0] = *y_dc & 0xffff; break; |
425 | case 4: *u_dc += dc; macroblock[0] = *u_dc & 0xffff; break; |
426 | case 5: *v_dc += dc; macroblock[0] = *v_dc & 0xffff; break; |
427 | } |
428 | |
429 | ZigZagSubBlock(tmp_sb, macroblock); |
430 | if (qtable != NULL) { MultSubBlocks(tmp_sb, tmp_sb, qtable, 0); } |
431 | TransposeSubBlock(macroblock, tmp_sb); |
432 | InverseDCTSubBlock(macroblock, macroblock); |
433 | |
434 | macroblock += SUBBLOCK_SIZE; |
435 | } |
436 | } |
437 | |
2d262872 |
438 | static void decode_macroblock_std( |
439 | const subblock_transform_t transform_luma, |
440 | const subblock_transform_t transform_chroma, |
441 | int16_t *macroblock, unsigned int subblock_count, const int16_t qtables[3][SUBBLOCK_SIZE]) |
d9e74a6f |
442 | { |
443 | unsigned int sb; |
444 | unsigned int q = 0; |
445 | |
446 | for (sb = 0; sb < subblock_count; ++sb) |
447 | { |
448 | int16_t tmp_sb[SUBBLOCK_SIZE]; |
449 | const int isChromaSubBlock = (subblock_count - sb <= 2); |
450 | |
451 | if (isChromaSubBlock) { ++q; } |
452 | |
453 | MultSubBlocks(macroblock, macroblock, qtables[q], 4); |
454 | ZigZagSubBlock(tmp_sb, macroblock); |
455 | InverseDCTSubBlock(macroblock, tmp_sb); |
456 | |
457 | if (isChromaSubBlock) |
458 | { |
2d262872 |
459 | if (transform_chroma != NULL) |
460 | transform_chroma(macroblock, macroblock); |
d9e74a6f |
461 | } |
462 | else |
463 | { |
2d262872 |
464 | if (transform_luma != NULL) |
465 | transform_luma(macroblock, macroblock); |
d9e74a6f |
466 | } |
467 | |
468 | macroblock += SUBBLOCK_SIZE; |
469 | } |
470 | } |
471 | |
472 | static void TransposeSubBlock(int16_t *dst, const int16_t *src) |
473 | { |
474 | ReorderSubBlock(dst, src, TRANSPOSE_TABLE); |
475 | } |
476 | |
477 | static void ZigZagSubBlock(int16_t *dst, const int16_t *src) |
478 | { |
479 | ReorderSubBlock(dst, src, ZIGZAG_TABLE); |
480 | } |
481 | |
482 | static void ReorderSubBlock(int16_t *dst, const int16_t *src, const unsigned int *table) |
483 | { |
484 | unsigned int i; |
485 | |
486 | /* source and destination sublocks cannot overlap */ |
487 | assert(abs(dst - src) > SUBBLOCK_SIZE); |
488 | |
489 | for (i = 0; i < SUBBLOCK_SIZE; ++i) |
490 | { |
491 | dst[i] = src[table[i]]; |
492 | } |
493 | } |
494 | |
495 | static void MultSubBlocks(int16_t *dst, const int16_t *src1, const int16_t *src2, unsigned int shift) |
496 | { |
497 | unsigned int i; |
498 | |
499 | for (i = 0; i < SUBBLOCK_SIZE; ++i) |
500 | { |
501 | int32_t v = src1[i] * src2[i]; |
502 | dst[i] = clamp_s16(v) << shift; |
503 | } |
504 | } |
505 | |
506 | static void ScaleSubBlock(int16_t *dst, const int16_t *src, int16_t scale) |
507 | { |
508 | unsigned int i; |
509 | |
510 | for (i = 0; i < SUBBLOCK_SIZE; ++i) |
511 | { |
512 | int32_t v = src[i] * scale; |
513 | dst[i] = clamp_s16(v); |
514 | } |
515 | } |
516 | |
517 | static void RShiftSubBlock(int16_t *dst, const int16_t *src, unsigned int shift) |
518 | { |
519 | unsigned int i; |
520 | |
521 | for (i = 0; i < SUBBLOCK_SIZE; ++i) |
522 | { |
523 | dst[i] = src[i] >> shift; |
524 | } |
525 | } |
526 | |
527 | /*************************************************************************** |
528 | * Fast 2D IDCT using separable formulation and normalization |
529 | * Computations use single precision floats |
530 | * Implementation based on Wikipedia : |
531 | * http://fr.wikipedia.org/wiki/Transform%C3%A9e_en_cosinus_discr%C3%A8te |
532 | **************************************************************************/ |
533 | static void InverseDCT1D(const float * const x, float *dst, unsigned int stride) |
534 | { |
535 | float e[4]; |
536 | float f[4]; |
537 | float x26, x1357, x15, x37, x17, x35; |
538 | |
539 | x15 = IDCT_K[2] * (x[1] + x[5]); |
540 | x37 = IDCT_K[3] * (x[3] + x[7]); |
541 | x17 = IDCT_K[8] * (x[1] + x[7]); |
542 | x35 = IDCT_K[9] * (x[3] + x[5]); |
543 | x1357 = IDCT_C3 * (x[1] + x[3] + x[5] + x[7]); |
544 | x26 = IDCT_C6 * (x[2] + x[6]); |
545 | |
546 | f[0] = x[0] + x[4]; |
547 | f[1] = x[0] - x[4]; |
548 | f[2] = x26 + IDCT_K[0]*x[2]; |
549 | f[3] = x26 + IDCT_K[1]*x[6]; |
550 | |
551 | e[0] = x1357 + x15 + IDCT_K[4]*x[1] + x17; |
552 | e[1] = x1357 + x37 + IDCT_K[6]*x[3] + x35; |
553 | e[2] = x1357 + x15 + IDCT_K[5]*x[5] + x35; |
554 | e[3] = x1357 + x37 + IDCT_K[7]*x[7] + x17; |
555 | |
556 | *dst = f[0] + f[2] + e[0]; dst += stride; |
557 | *dst = f[1] + f[3] + e[1]; dst += stride; |
558 | *dst = f[1] - f[3] + e[2]; dst += stride; |
559 | *dst = f[0] - f[2] + e[3]; dst += stride; |
560 | *dst = f[0] - f[2] - e[3]; dst += stride; |
561 | *dst = f[1] - f[3] - e[2]; dst += stride; |
562 | *dst = f[1] + f[3] - e[1]; dst += stride; |
563 | *dst = f[0] + f[2] - e[0]; dst += stride; |
564 | } |
565 | |
566 | static void InverseDCTSubBlock(int16_t *dst, const int16_t *src) |
567 | { |
568 | float x[8]; |
569 | float block[SUBBLOCK_SIZE]; |
570 | unsigned int i, j; |
571 | |
572 | /* idct 1d on rows (+transposition) */ |
573 | for (i = 0; i < 8; ++i) |
574 | { |
575 | for (j = 0; j < 8; ++j) |
576 | { |
577 | x[j] = (float)src[i*8+j]; |
578 | } |
579 | |
580 | InverseDCT1D(x, &block[i], 8); |
581 | } |
582 | |
583 | /* idct 1d on columns (thanks to previous transposition) */ |
584 | for (i = 0; i < 8; ++i) |
585 | { |
586 | InverseDCT1D(&block[i*8], x, 1); |
587 | |
588 | /* C4 = 1 normalization implies a division by 8 */ |
589 | for (j = 0; j < 8; ++j) |
590 | { |
591 | dst[i+j*8] = (int16_t)x[j] >> 3; |
592 | } |
593 | } |
594 | } |
595 | |
596 | static void RescaleYSubBlock(int16_t *dst, const int16_t *src) |
597 | { |
598 | unsigned int i; |
599 | |
600 | for (i = 0; i < SUBBLOCK_SIZE; ++i) |
601 | { |
602 | dst[i] = (((uint32_t)(clamp_s12(src[i]) + 0x800) * 0xdb0) >> 16) + 0x10; |
603 | } |
604 | } |
605 | |
606 | static void RescaleUVSubBlock(int16_t *dst, const int16_t *src) |
607 | { |
608 | unsigned int i; |
609 | |
610 | for (i = 0; i < SUBBLOCK_SIZE; ++i) |
611 | { |
612 | dst[i] = (((int)clamp_s12(src[i]) * 0xe00) >> 16) + 0x80; |
613 | } |
614 | } |
615 | |
616 | |
617 | |
618 | /* FIXME: assume presence of expansion pack */ |
619 | #define MEMMASK 0x7fffff |
620 | |
621 | static void rdram_read_many_u16(uint16_t *dst, uint32_t address, unsigned int count) |
622 | { |
623 | while (count != 0) |
624 | { |
625 | uint16_t s = rsp.RDRAM[((address++)^S8) & MEMMASK]; |
626 | s <<= 8; |
627 | s |= rsp.RDRAM[((address++)^S8) & MEMMASK]; |
628 | |
629 | *(dst++) = s; |
630 | |
631 | --count; |
632 | } |
633 | } |
634 | |
635 | static void rdram_write_many_u16(const uint16_t *src, uint32_t address, unsigned int count) |
636 | { |
637 | while (count != 0) |
638 | { |
639 | rsp.RDRAM[((address++)^S8) & MEMMASK] = (uint8_t)(*src >> 8); |
640 | rsp.RDRAM[((address++)^S8) & MEMMASK] = (uint8_t)(*(src++) & 0xff); |
641 | |
642 | --count; |
643 | } |
644 | } |
645 | |
646 | static uint32_t rdram_read_u32(uint32_t address) |
647 | { |
648 | uint32_t r = rsp.RDRAM[((address++) ^ S8) & MEMMASK]; r <<= 8; |
649 | r |= rsp.RDRAM[((address++) ^ S8) & MEMMASK]; r <<= 8; |
650 | r |= rsp.RDRAM[((address++) ^ S8) & MEMMASK]; r <<= 8; |
651 | r |= rsp.RDRAM[((address++) ^ S8) & MEMMASK]; |
652 | |
653 | return r; |
654 | } |
655 | |
656 | static void rdram_write_many_u32(const uint32_t *src, uint32_t address, unsigned int count) |
657 | { |
658 | while (count != 0) |
659 | { |
660 | rsp.RDRAM[((address++)^S8) & MEMMASK] = (uint8_t)(*src >> 24); |
661 | rsp.RDRAM[((address++)^S8) & MEMMASK] = (uint8_t)(*src >> 16); |
662 | rsp.RDRAM[((address++)^S8) & MEMMASK] = (uint8_t)(*src >> 8); |
663 | rsp.RDRAM[((address++)^S8) & MEMMASK] = (uint8_t)(*(src++) & 0xff); |
664 | |
665 | --count; |
666 | } |
667 | } |
668 | |