d9e74a6f |
1 | /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * |
2 | * Mupen64plus-rsp-hle - jpeg.c * |
3 | * Mupen64Plus homepage: http://code.google.com/p/mupen64plus/ * |
4 | * Copyright (C) 2012 Bobby Smiles * |
5 | * Copyright (C) 2009 Richard Goedeken * |
6 | * Copyright (C) 2002 Hacktarux * |
7 | * * |
8 | * This program is free software; you can redistribute it and/or modify * |
9 | * it under the terms of the GNU General Public License as published by * |
10 | * the Free Software Foundation; either version 2 of the License, or * |
11 | * (at your option) any later version. * |
12 | * * |
13 | * This program is distributed in the hope that it will be useful, * |
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of * |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * |
16 | * GNU General Public License for more details. * |
17 | * * |
18 | * You should have received a copy of the GNU General Public License * |
19 | * along with this program; if not, write to the * |
20 | * Free Software Foundation, Inc., * |
21 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * |
22 | * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ |
23 | |
0a8a0368 |
24 | #include <stdint.h> |
d9e74a6f |
25 | #include <assert.h> |
26 | #include <stdlib.h> |
d9e74a6f |
27 | |
28 | #define M64P_PLUGIN_PROTOTYPES 1 |
29 | #include "m64p_types.h" |
30 | #include "m64p_plugin.h" |
31 | #include "hle.h" |
0a8a0368 |
32 | #include "jpeg.h" |
d9e74a6f |
33 | |
34 | #define SUBBLOCK_SIZE 64 |
35 | |
36 | typedef void (*tile_line_emitter_t)(const int16_t *y, const int16_t *u, uint32_t address); |
0a8a0368 |
37 | typedef void (*subblock_transform_t)(int16_t *dst, const int16_t *src); |
d9e74a6f |
38 | |
0a8a0368 |
39 | /* rdram operations |
40 | * FIXME: these functions deserve their own module |
41 | */ |
d9e74a6f |
42 | static void rdram_read_many_u16(uint16_t *dst, uint32_t address, unsigned int count); |
43 | static void rdram_write_many_u16(const uint16_t *src, uint32_t address, unsigned int count); |
44 | static uint32_t rdram_read_u32(uint32_t address); |
45 | static void rdram_write_many_u32(const uint32_t *src, uint32_t address, unsigned int count); |
46 | |
47 | /* standard jpeg ucode decoder */ |
0a8a0368 |
48 | static void jpeg_decode_std(const char *const version, |
49 | const subblock_transform_t transform_luma, |
50 | const subblock_transform_t transform_chroma, |
51 | const tile_line_emitter_t emit_line); |
d9e74a6f |
52 | |
53 | /* helper functions */ |
54 | static uint8_t clamp_u8(int16_t x); |
55 | static int16_t clamp_s12(int16_t x); |
d9e74a6f |
56 | static uint16_t clamp_RGBA_component(int16_t x); |
57 | |
58 | /* pixel conversion & foratting */ |
59 | static uint32_t GetUYVY(int16_t y1, int16_t y2, int16_t u, int16_t v); |
60 | static uint16_t GetRGBA(int16_t y, int16_t u, int16_t v); |
61 | |
62 | /* tile line emitters */ |
63 | static void EmitYUVTileLine(const int16_t *y, const int16_t *u, uint32_t address); |
64 | static void EmitRGBATileLine(const int16_t *y, const int16_t *u, uint32_t address); |
65 | |
66 | /* macroblocks operations */ |
2d262872 |
67 | static void decode_macroblock_ob(int16_t *macroblock, int32_t *y_dc, int32_t *u_dc, int32_t *v_dc, const int16_t *qtable); |
0a8a0368 |
68 | static void decode_macroblock_std(const subblock_transform_t transform_luma, |
69 | const subblock_transform_t transform_chroma, |
70 | int16_t *macroblock, |
71 | unsigned int subblock_count, |
72 | const int16_t qtables[3][SUBBLOCK_SIZE]); |
d9e74a6f |
73 | static void EmitTilesMode0(const tile_line_emitter_t emit_line, const int16_t *macroblock, uint32_t address); |
74 | static void EmitTilesMode2(const tile_line_emitter_t emit_line, const int16_t *macroblock, uint32_t address); |
75 | |
76 | /* subblocks operations */ |
77 | static void TransposeSubBlock(int16_t *dst, const int16_t *src); |
78 | static void ZigZagSubBlock(int16_t *dst, const int16_t *src); |
79 | static void ReorderSubBlock(int16_t *dst, const int16_t *src, const unsigned int *table); |
80 | static void MultSubBlocks(int16_t *dst, const int16_t *src1, const int16_t *src2, unsigned int shift); |
81 | static void ScaleSubBlock(int16_t *dst, const int16_t *src, int16_t scale); |
82 | static void RShiftSubBlock(int16_t *dst, const int16_t *src, unsigned int shift); |
0a8a0368 |
83 | static void InverseDCT1D(const float *const x, float *dst, unsigned int stride); |
d9e74a6f |
84 | static void InverseDCTSubBlock(int16_t *dst, const int16_t *src); |
85 | static void RescaleYSubBlock(int16_t *dst, const int16_t *src); |
86 | static void RescaleUVSubBlock(int16_t *dst, const int16_t *src); |
87 | |
88 | /* transposed dequantization table */ |
0a8a0368 |
89 | static const int16_t DEFAULT_QTABLE[SUBBLOCK_SIZE] = { |
d9e74a6f |
90 | 16, 12, 14, 14, 18, 24, 49, 72, |
91 | 11, 12, 13, 17, 22, 35, 64, 92, |
92 | 10, 14, 16, 22, 37, 55, 78, 95, |
93 | 16, 19, 24, 29, 56, 64, 87, 98, |
94 | 24, 26, 40, 51, 68, 81, 103, 112, |
95 | 40, 58, 57, 87, 109, 104, 121, 100, |
96 | 51, 60, 69, 80, 103, 113, 120, 103, |
97 | 61, 55, 56, 62, 77, 92, 101, 99 |
98 | }; |
99 | |
100 | /* zig-zag indices */ |
0a8a0368 |
101 | static const unsigned int ZIGZAG_TABLE[SUBBLOCK_SIZE] = { |
d9e74a6f |
102 | 0, 1, 5, 6, 14, 15, 27, 28, |
103 | 2, 4, 7, 13, 16, 26, 29, 42, |
104 | 3, 8, 12, 17, 25, 30, 41, 43, |
105 | 9, 11, 18, 24, 31, 40, 44, 53, |
106 | 10, 19, 23, 32, 39, 45, 52, 54, |
107 | 20, 22, 33, 38, 46, 51, 55, 60, |
108 | 21, 34, 37, 47, 50, 56, 59, 61, |
109 | 35, 36, 48, 49, 57, 58, 62, 63 |
110 | }; |
111 | |
112 | /* transposition indices */ |
0a8a0368 |
113 | static const unsigned int TRANSPOSE_TABLE[SUBBLOCK_SIZE] = { |
d9e74a6f |
114 | 0, 8, 16, 24, 32, 40, 48, 56, |
115 | 1, 9, 17, 25, 33, 41, 49, 57, |
116 | 2, 10, 18, 26, 34, 42, 50, 58, |
117 | 3, 11, 19, 27, 35, 43, 51, 59, |
118 | 4, 12, 20, 28, 36, 44, 52, 60, |
119 | 5, 13, 21, 29, 37, 45, 53, 61, |
120 | 6, 14, 22, 30, 38, 46, 54, 62, |
121 | 7, 15, 23, 31, 39, 47, 55, 63 |
122 | }; |
123 | |
124 | |
125 | |
126 | /* IDCT related constants |
127 | * Cn = alpha * cos(n * PI / 16) (alpha is chosen such as C4 = 1) */ |
128 | static const float IDCT_C3 = 1.175875602f; |
129 | static const float IDCT_C6 = 0.541196100f; |
0a8a0368 |
130 | static const float IDCT_K[10] = { |
131 | 0.765366865f, /* C2-C6 */ |
132 | -1.847759065f, /* -C2-C6 */ |
133 | -0.390180644f, /* C5-C3 */ |
134 | -1.961570561f, /* -C5-C3 */ |
135 | 1.501321110f, /* C1+C3-C5-C7 */ |
136 | 2.053119869f, /* C1+C3-C5+C7 */ |
137 | 3.072711027f, /* C1+C3+C5-C7 */ |
138 | 0.298631336f, /* -C1+C3+C5-C7 */ |
139 | -0.899976223f, /* C7-C3 */ |
140 | -2.562915448f /* -C1-C3 */ |
d9e74a6f |
141 | }; |
142 | |
143 | |
144 | /* global functions */ |
145 | |
146 | /*************************************************************************** |
147 | * JPEG decoding ucode found in Japanese exclusive version of Pokemon Stadium. |
148 | **************************************************************************/ |
0a8a0368 |
149 | void jpeg_decode_PS0(void) |
d9e74a6f |
150 | { |
2d262872 |
151 | jpeg_decode_std("PS0", RescaleYSubBlock, RescaleUVSubBlock, EmitYUVTileLine); |
d9e74a6f |
152 | } |
153 | |
154 | /*************************************************************************** |
155 | * JPEG decoding ucode found in Ocarina of Time, Pokemon Stadium 1 and |
156 | * Pokemon Stadium 2. |
157 | **************************************************************************/ |
0a8a0368 |
158 | void jpeg_decode_PS(void) |
d9e74a6f |
159 | { |
2d262872 |
160 | jpeg_decode_std("PS", NULL, NULL, EmitRGBATileLine); |
d9e74a6f |
161 | } |
162 | |
163 | /*************************************************************************** |
164 | * JPEG decoding ucode found in Ogre Battle and Bottom of the 9th. |
165 | **************************************************************************/ |
0a8a0368 |
166 | void jpeg_decode_OB(void) |
d9e74a6f |
167 | { |
168 | int16_t qtable[SUBBLOCK_SIZE]; |
169 | unsigned int mb; |
170 | |
171 | int32_t y_dc = 0; |
172 | int32_t u_dc = 0; |
173 | int32_t v_dc = 0; |
0a8a0368 |
174 | |
175 | const OSTask_t *const task = get_task(); |
d9e74a6f |
176 | |
177 | uint32_t address = task->data_ptr; |
178 | const unsigned int macroblock_count = task->data_size; |
179 | const int qscale = task->yield_data_size; |
180 | |
181 | DebugMessage(M64MSG_VERBOSE, "jpeg_decode_OB: *buffer=%x, #MB=%d, qscale=%d", |
0a8a0368 |
182 | address, |
183 | macroblock_count, |
184 | qscale); |
d9e74a6f |
185 | |
0a8a0368 |
186 | if (qscale != 0) { |
d9e74a6f |
187 | if (qscale > 0) |
d9e74a6f |
188 | ScaleSubBlock(qtable, DEFAULT_QTABLE, qscale); |
d9e74a6f |
189 | else |
d9e74a6f |
190 | RShiftSubBlock(qtable, DEFAULT_QTABLE, -qscale); |
d9e74a6f |
191 | } |
192 | |
0a8a0368 |
193 | for (mb = 0; mb < macroblock_count; ++mb) { |
194 | int16_t macroblock[6 * SUBBLOCK_SIZE]; |
d9e74a6f |
195 | |
0a8a0368 |
196 | rdram_read_many_u16((uint16_t *)macroblock, address, 6 * SUBBLOCK_SIZE); |
2d262872 |
197 | decode_macroblock_ob(macroblock, &y_dc, &u_dc, &v_dc, (qscale != 0) ? qtable : NULL); |
d9e74a6f |
198 | EmitTilesMode2(EmitYUVTileLine, macroblock, address); |
199 | |
0a8a0368 |
200 | address += (2 * 6 * SUBBLOCK_SIZE); |
d9e74a6f |
201 | } |
202 | } |
203 | |
204 | |
205 | /* local functions */ |
0a8a0368 |
206 | static void jpeg_decode_std(const char *const version, |
207 | const subblock_transform_t transform_luma, |
208 | const subblock_transform_t transform_chroma, |
209 | const tile_line_emitter_t emit_line) |
d9e74a6f |
210 | { |
211 | int16_t qtables[3][SUBBLOCK_SIZE]; |
212 | unsigned int mb; |
213 | uint32_t address; |
214 | uint32_t macroblock_count; |
215 | uint32_t mode; |
216 | uint32_t qtableY_ptr; |
217 | uint32_t qtableU_ptr; |
218 | uint32_t qtableV_ptr; |
219 | unsigned int subblock_count; |
220 | unsigned int macroblock_size; |
0a8a0368 |
221 | /* macroblock contains at most 6 subblocks */ |
222 | int16_t macroblock[6 * SUBBLOCK_SIZE]; |
223 | const OSTask_t *const task = get_task(); |
d9e74a6f |
224 | |
0a8a0368 |
225 | if (task->flags & 0x1) { |
d9e74a6f |
226 | DebugMessage(M64MSG_WARNING, "jpeg_decode_%s: task yielding not implemented", version); |
227 | return; |
228 | } |
229 | |
230 | address = rdram_read_u32(task->data_ptr); |
231 | macroblock_count = rdram_read_u32(task->data_ptr + 4); |
232 | mode = rdram_read_u32(task->data_ptr + 8); |
233 | qtableY_ptr = rdram_read_u32(task->data_ptr + 12); |
234 | qtableU_ptr = rdram_read_u32(task->data_ptr + 16); |
235 | qtableV_ptr = rdram_read_u32(task->data_ptr + 20); |
236 | |
237 | DebugMessage(M64MSG_VERBOSE, "jpeg_decode_%s: *buffer=%x, #MB=%d, mode=%d, *Qy=%x, *Qu=%x, *Qv=%x", |
0a8a0368 |
238 | version, |
239 | address, |
240 | macroblock_count, |
241 | mode, |
242 | qtableY_ptr, |
243 | qtableU_ptr, |
244 | qtableV_ptr); |
245 | |
246 | if (mode != 0 && mode != 2) { |
d9e74a6f |
247 | DebugMessage(M64MSG_WARNING, "jpeg_decode_%s: invalid mode %d", version, mode); |
248 | return; |
249 | } |
0a8a0368 |
250 | |
d9e74a6f |
251 | subblock_count = mode + 4; |
0a8a0368 |
252 | macroblock_size = subblock_count * SUBBLOCK_SIZE; |
d9e74a6f |
253 | |
0a8a0368 |
254 | rdram_read_many_u16((uint16_t *)qtables[0], qtableY_ptr, SUBBLOCK_SIZE); |
255 | rdram_read_many_u16((uint16_t *)qtables[1], qtableU_ptr, SUBBLOCK_SIZE); |
256 | rdram_read_many_u16((uint16_t *)qtables[2], qtableV_ptr, SUBBLOCK_SIZE); |
d9e74a6f |
257 | |
0a8a0368 |
258 | for (mb = 0; mb < macroblock_count; ++mb) { |
259 | rdram_read_many_u16((uint16_t *)macroblock, address, macroblock_size); |
2d262872 |
260 | decode_macroblock_std(transform_luma, transform_chroma, |
0a8a0368 |
261 | macroblock, subblock_count, (const int16_t (*)[SUBBLOCK_SIZE])qtables); |
d9e74a6f |
262 | |
263 | if (mode == 0) |
d9e74a6f |
264 | EmitTilesMode0(emit_line, macroblock, address); |
d9e74a6f |
265 | else |
d9e74a6f |
266 | EmitTilesMode2(emit_line, macroblock, address); |
d9e74a6f |
267 | |
0a8a0368 |
268 | address += 2 * macroblock_size; |
d9e74a6f |
269 | } |
d9e74a6f |
270 | } |
271 | |
272 | static uint8_t clamp_u8(int16_t x) |
273 | { |
274 | return (x & (0xff00)) ? ((-x) >> 15) & 0xff : x; |
275 | } |
276 | |
277 | static int16_t clamp_s12(int16_t x) |
278 | { |
0a8a0368 |
279 | if (x < -0x800) |
280 | x = -0x800; |
281 | else if (x > 0x7f0) |
282 | x = 0x7f0; |
d9e74a6f |
283 | return x; |
284 | } |
285 | |
286 | static uint16_t clamp_RGBA_component(int16_t x) |
287 | { |
0a8a0368 |
288 | if (x > 0xff0) |
289 | x = 0xff0; |
290 | else if (x < 0) |
291 | x = 0; |
d9e74a6f |
292 | return (x & 0xf80); |
293 | } |
294 | |
295 | static uint32_t GetUYVY(int16_t y1, int16_t y2, int16_t u, int16_t v) |
296 | { |
0a8a0368 |
297 | return (uint32_t)clamp_u8(u) << 24 | |
298 | (uint32_t)clamp_u8(y1) << 16 | |
299 | (uint32_t)clamp_u8(v) << 8 | |
300 | (uint32_t)clamp_u8(y2); |
d9e74a6f |
301 | } |
302 | |
303 | static uint16_t GetRGBA(int16_t y, int16_t u, int16_t v) |
304 | { |
305 | const float fY = (float)y + 2048.0f; |
306 | const float fU = (float)u; |
307 | const float fV = (float)v; |
308 | |
0a8a0368 |
309 | const uint16_t r = clamp_RGBA_component((int16_t)(fY + 1.4025 * fV)); |
310 | const uint16_t g = clamp_RGBA_component((int16_t)(fY - 0.3443 * fU - 0.7144 * fV)); |
311 | const uint16_t b = clamp_RGBA_component((int16_t)(fY + 1.7729 * fU)); |
d9e74a6f |
312 | |
313 | return (r << 4) | (g >> 1) | (b >> 6) | 1; |
314 | } |
315 | |
316 | static void EmitYUVTileLine(const int16_t *y, const int16_t *u, uint32_t address) |
317 | { |
318 | uint32_t uyvy[8]; |
319 | |
0a8a0368 |
320 | const int16_t *const v = u + SUBBLOCK_SIZE; |
321 | const int16_t *const y2 = y + SUBBLOCK_SIZE; |
d9e74a6f |
322 | |
323 | uyvy[0] = GetUYVY(y[0], y[1], u[0], v[0]); |
324 | uyvy[1] = GetUYVY(y[2], y[3], u[1], v[1]); |
325 | uyvy[2] = GetUYVY(y[4], y[5], u[2], v[2]); |
326 | uyvy[3] = GetUYVY(y[6], y[7], u[3], v[3]); |
327 | uyvy[4] = GetUYVY(y2[0], y2[1], u[4], v[4]); |
328 | uyvy[5] = GetUYVY(y2[2], y2[3], u[5], v[5]); |
329 | uyvy[6] = GetUYVY(y2[4], y2[5], u[6], v[6]); |
330 | uyvy[7] = GetUYVY(y2[6], y2[7], u[7], v[7]); |
331 | |
332 | rdram_write_many_u32(uyvy, address, 8); |
333 | } |
334 | |
335 | static void EmitRGBATileLine(const int16_t *y, const int16_t *u, uint32_t address) |
336 | { |
337 | uint16_t rgba[16]; |
338 | |
0a8a0368 |
339 | const int16_t *const v = u + SUBBLOCK_SIZE; |
340 | const int16_t *const y2 = y + SUBBLOCK_SIZE; |
d9e74a6f |
341 | |
342 | rgba[0] = GetRGBA(y[0], u[0], v[0]); |
343 | rgba[1] = GetRGBA(y[1], u[0], v[0]); |
344 | rgba[2] = GetRGBA(y[2], u[1], v[1]); |
345 | rgba[3] = GetRGBA(y[3], u[1], v[1]); |
346 | rgba[4] = GetRGBA(y[4], u[2], v[2]); |
347 | rgba[5] = GetRGBA(y[5], u[2], v[2]); |
348 | rgba[6] = GetRGBA(y[6], u[3], v[3]); |
349 | rgba[7] = GetRGBA(y[7], u[3], v[3]); |
350 | rgba[8] = GetRGBA(y2[0], u[4], v[4]); |
351 | rgba[9] = GetRGBA(y2[1], u[4], v[4]); |
352 | rgba[10] = GetRGBA(y2[2], u[5], v[5]); |
353 | rgba[11] = GetRGBA(y2[3], u[5], v[5]); |
354 | rgba[12] = GetRGBA(y2[4], u[6], v[6]); |
355 | rgba[13] = GetRGBA(y2[5], u[6], v[6]); |
356 | rgba[14] = GetRGBA(y2[6], u[7], v[7]); |
357 | rgba[15] = GetRGBA(y2[7], u[7], v[7]); |
358 | |
359 | rdram_write_many_u16(rgba, address, 16); |
360 | } |
361 | |
362 | static void EmitTilesMode0(const tile_line_emitter_t emit_line, const int16_t *macroblock, uint32_t address) |
363 | { |
364 | unsigned int i; |
365 | |
366 | unsigned int y_offset = 0; |
0a8a0368 |
367 | unsigned int u_offset = 2 * SUBBLOCK_SIZE; |
d9e74a6f |
368 | |
0a8a0368 |
369 | for (i = 0; i < 8; ++i) { |
d9e74a6f |
370 | emit_line(¯oblock[y_offset], ¯oblock[u_offset], address); |
371 | |
372 | y_offset += 8; |
373 | u_offset += 8; |
374 | address += 32; |
375 | } |
376 | } |
377 | |
378 | static void EmitTilesMode2(const tile_line_emitter_t emit_line, const int16_t *macroblock, uint32_t address) |
379 | { |
380 | unsigned int i; |
381 | |
382 | unsigned int y_offset = 0; |
0a8a0368 |
383 | unsigned int u_offset = 4 * SUBBLOCK_SIZE; |
d9e74a6f |
384 | |
0a8a0368 |
385 | for (i = 0; i < 8; ++i) { |
d9e74a6f |
386 | emit_line(¯oblock[y_offset], ¯oblock[u_offset], address); |
387 | emit_line(¯oblock[y_offset + 8], ¯oblock[u_offset], address + 32); |
388 | |
0a8a0368 |
389 | y_offset += (i == 3) ? SUBBLOCK_SIZE + 16 : 16; |
d9e74a6f |
390 | u_offset += 8; |
391 | address += 64; |
392 | } |
393 | } |
394 | |
2d262872 |
395 | static void decode_macroblock_ob(int16_t *macroblock, int32_t *y_dc, int32_t *u_dc, int32_t *v_dc, const int16_t *qtable) |
d9e74a6f |
396 | { |
397 | int sb; |
398 | |
0a8a0368 |
399 | for (sb = 0; sb < 6; ++sb) { |
d9e74a6f |
400 | int16_t tmp_sb[SUBBLOCK_SIZE]; |
401 | |
402 | /* update DC */ |
403 | int32_t dc = (int32_t)macroblock[0]; |
0a8a0368 |
404 | switch (sb) { |
405 | case 0: |
406 | case 1: |
407 | case 2: |
408 | case 3: |
409 | *y_dc += dc; |
410 | macroblock[0] = *y_dc & 0xffff; |
411 | break; |
412 | case 4: |
413 | *u_dc += dc; |
414 | macroblock[0] = *u_dc & 0xffff; |
415 | break; |
416 | case 5: |
417 | *v_dc += dc; |
418 | macroblock[0] = *v_dc & 0xffff; |
419 | break; |
d9e74a6f |
420 | } |
421 | |
422 | ZigZagSubBlock(tmp_sb, macroblock); |
0a8a0368 |
423 | if (qtable != NULL) |
424 | MultSubBlocks(tmp_sb, tmp_sb, qtable, 0); |
d9e74a6f |
425 | TransposeSubBlock(macroblock, tmp_sb); |
426 | InverseDCTSubBlock(macroblock, macroblock); |
0a8a0368 |
427 | |
d9e74a6f |
428 | macroblock += SUBBLOCK_SIZE; |
429 | } |
430 | } |
431 | |
0a8a0368 |
432 | static void decode_macroblock_std(const subblock_transform_t transform_luma, |
433 | const subblock_transform_t transform_chroma, |
434 | int16_t *macroblock, |
435 | unsigned int subblock_count, |
436 | const int16_t qtables[3][SUBBLOCK_SIZE]) |
d9e74a6f |
437 | { |
438 | unsigned int sb; |
439 | unsigned int q = 0; |
440 | |
0a8a0368 |
441 | for (sb = 0; sb < subblock_count; ++sb) { |
d9e74a6f |
442 | int16_t tmp_sb[SUBBLOCK_SIZE]; |
443 | const int isChromaSubBlock = (subblock_count - sb <= 2); |
444 | |
0a8a0368 |
445 | if (isChromaSubBlock) |
446 | ++q; |
d9e74a6f |
447 | |
448 | MultSubBlocks(macroblock, macroblock, qtables[q], 4); |
449 | ZigZagSubBlock(tmp_sb, macroblock); |
450 | InverseDCTSubBlock(macroblock, tmp_sb); |
451 | |
0a8a0368 |
452 | if (isChromaSubBlock) { |
2d262872 |
453 | if (transform_chroma != NULL) |
454 | transform_chroma(macroblock, macroblock); |
0a8a0368 |
455 | } else { |
2d262872 |
456 | if (transform_luma != NULL) |
457 | transform_luma(macroblock, macroblock); |
d9e74a6f |
458 | } |
459 | |
460 | macroblock += SUBBLOCK_SIZE; |
461 | } |
462 | } |
463 | |
464 | static void TransposeSubBlock(int16_t *dst, const int16_t *src) |
465 | { |
466 | ReorderSubBlock(dst, src, TRANSPOSE_TABLE); |
467 | } |
468 | |
469 | static void ZigZagSubBlock(int16_t *dst, const int16_t *src) |
470 | { |
471 | ReorderSubBlock(dst, src, ZIGZAG_TABLE); |
472 | } |
473 | |
474 | static void ReorderSubBlock(int16_t *dst, const int16_t *src, const unsigned int *table) |
475 | { |
476 | unsigned int i; |
477 | |
478 | /* source and destination sublocks cannot overlap */ |
479 | assert(abs(dst - src) > SUBBLOCK_SIZE); |
480 | |
481 | for (i = 0; i < SUBBLOCK_SIZE; ++i) |
d9e74a6f |
482 | dst[i] = src[table[i]]; |
d9e74a6f |
483 | } |
484 | |
485 | static void MultSubBlocks(int16_t *dst, const int16_t *src1, const int16_t *src2, unsigned int shift) |
486 | { |
487 | unsigned int i; |
488 | |
0a8a0368 |
489 | for (i = 0; i < SUBBLOCK_SIZE; ++i) { |
d9e74a6f |
490 | int32_t v = src1[i] * src2[i]; |
491 | dst[i] = clamp_s16(v) << shift; |
492 | } |
493 | } |
494 | |
495 | static void ScaleSubBlock(int16_t *dst, const int16_t *src, int16_t scale) |
496 | { |
497 | unsigned int i; |
498 | |
0a8a0368 |
499 | for (i = 0; i < SUBBLOCK_SIZE; ++i) { |
d9e74a6f |
500 | int32_t v = src[i] * scale; |
501 | dst[i] = clamp_s16(v); |
502 | } |
503 | } |
504 | |
505 | static void RShiftSubBlock(int16_t *dst, const int16_t *src, unsigned int shift) |
506 | { |
507 | unsigned int i; |
508 | |
509 | for (i = 0; i < SUBBLOCK_SIZE; ++i) |
d9e74a6f |
510 | dst[i] = src[i] >> shift; |
d9e74a6f |
511 | } |
512 | |
513 | /*************************************************************************** |
514 | * Fast 2D IDCT using separable formulation and normalization |
515 | * Computations use single precision floats |
516 | * Implementation based on Wikipedia : |
517 | * http://fr.wikipedia.org/wiki/Transform%C3%A9e_en_cosinus_discr%C3%A8te |
518 | **************************************************************************/ |
0a8a0368 |
519 | static void InverseDCT1D(const float *const x, float *dst, unsigned int stride) |
d9e74a6f |
520 | { |
521 | float e[4]; |
522 | float f[4]; |
523 | float x26, x1357, x15, x37, x17, x35; |
524 | |
525 | x15 = IDCT_K[2] * (x[1] + x[5]); |
526 | x37 = IDCT_K[3] * (x[3] + x[7]); |
527 | x17 = IDCT_K[8] * (x[1] + x[7]); |
528 | x35 = IDCT_K[9] * (x[3] + x[5]); |
529 | x1357 = IDCT_C3 * (x[1] + x[3] + x[5] + x[7]); |
530 | x26 = IDCT_C6 * (x[2] + x[6]); |
531 | |
532 | f[0] = x[0] + x[4]; |
533 | f[1] = x[0] - x[4]; |
0a8a0368 |
534 | f[2] = x26 + IDCT_K[0] * x[2]; |
535 | f[3] = x26 + IDCT_K[1] * x[6]; |
536 | |
537 | e[0] = x1357 + x15 + IDCT_K[4] * x[1] + x17; |
538 | e[1] = x1357 + x37 + IDCT_K[6] * x[3] + x35; |
539 | e[2] = x1357 + x15 + IDCT_K[5] * x[5] + x35; |
540 | e[3] = x1357 + x37 + IDCT_K[7] * x[7] + x17; |
541 | |
542 | *dst = f[0] + f[2] + e[0]; |
543 | dst += stride; |
544 | *dst = f[1] + f[3] + e[1]; |
545 | dst += stride; |
546 | *dst = f[1] - f[3] + e[2]; |
547 | dst += stride; |
548 | *dst = f[0] - f[2] + e[3]; |
549 | dst += stride; |
550 | *dst = f[0] - f[2] - e[3]; |
551 | dst += stride; |
552 | *dst = f[1] - f[3] - e[2]; |
553 | dst += stride; |
554 | *dst = f[1] + f[3] - e[1]; |
555 | dst += stride; |
556 | *dst = f[0] + f[2] - e[0]; |
d9e74a6f |
557 | } |
558 | |
559 | static void InverseDCTSubBlock(int16_t *dst, const int16_t *src) |
560 | { |
561 | float x[8]; |
562 | float block[SUBBLOCK_SIZE]; |
563 | unsigned int i, j; |
564 | |
565 | /* idct 1d on rows (+transposition) */ |
0a8a0368 |
566 | for (i = 0; i < 8; ++i) { |
d9e74a6f |
567 | for (j = 0; j < 8; ++j) |
0a8a0368 |
568 | x[j] = (float)src[i * 8 + j]; |
d9e74a6f |
569 | |
570 | InverseDCT1D(x, &block[i], 8); |
571 | } |
572 | |
573 | /* idct 1d on columns (thanks to previous transposition) */ |
0a8a0368 |
574 | for (i = 0; i < 8; ++i) { |
575 | InverseDCT1D(&block[i * 8], x, 1); |
d9e74a6f |
576 | |
577 | /* C4 = 1 normalization implies a division by 8 */ |
578 | for (j = 0; j < 8; ++j) |
0a8a0368 |
579 | dst[i + j * 8] = (int16_t)x[j] >> 3; |
d9e74a6f |
580 | } |
581 | } |
582 | |
583 | static void RescaleYSubBlock(int16_t *dst, const int16_t *src) |
584 | { |
585 | unsigned int i; |
586 | |
587 | for (i = 0; i < SUBBLOCK_SIZE; ++i) |
d9e74a6f |
588 | dst[i] = (((uint32_t)(clamp_s12(src[i]) + 0x800) * 0xdb0) >> 16) + 0x10; |
d9e74a6f |
589 | } |
590 | |
591 | static void RescaleUVSubBlock(int16_t *dst, const int16_t *src) |
592 | { |
593 | unsigned int i; |
594 | |
595 | for (i = 0; i < SUBBLOCK_SIZE; ++i) |
d9e74a6f |
596 | dst[i] = (((int)clamp_s12(src[i]) * 0xe00) >> 16) + 0x80; |
d9e74a6f |
597 | } |
598 | |
599 | |
600 | |
601 | /* FIXME: assume presence of expansion pack */ |
602 | #define MEMMASK 0x7fffff |
603 | |
604 | static void rdram_read_many_u16(uint16_t *dst, uint32_t address, unsigned int count) |
605 | { |
0a8a0368 |
606 | while (count != 0) { |
d9e74a6f |
607 | uint16_t s = rsp.RDRAM[((address++)^S8) & MEMMASK]; |
608 | s <<= 8; |
609 | s |= rsp.RDRAM[((address++)^S8) & MEMMASK]; |
610 | |
611 | *(dst++) = s; |
612 | |
613 | --count; |
614 | } |
615 | } |
616 | |
617 | static void rdram_write_many_u16(const uint16_t *src, uint32_t address, unsigned int count) |
618 | { |
0a8a0368 |
619 | while (count != 0) { |
d9e74a6f |
620 | rsp.RDRAM[((address++)^S8) & MEMMASK] = (uint8_t)(*src >> 8); |
621 | rsp.RDRAM[((address++)^S8) & MEMMASK] = (uint8_t)(*(src++) & 0xff); |
622 | |
623 | --count; |
624 | } |
625 | } |
626 | |
627 | static uint32_t rdram_read_u32(uint32_t address) |
628 | { |
0a8a0368 |
629 | uint32_t r = rsp.RDRAM[((address++) ^ S8) & MEMMASK]; |
630 | r <<= 8; |
631 | r |= rsp.RDRAM[((address++) ^ S8) & MEMMASK]; |
632 | r <<= 8; |
633 | r |= rsp.RDRAM[((address++) ^ S8) & MEMMASK]; |
634 | r <<= 8; |
d9e74a6f |
635 | r |= rsp.RDRAM[((address++) ^ S8) & MEMMASK]; |
636 | |
637 | return r; |
638 | } |
639 | |
640 | static void rdram_write_many_u32(const uint32_t *src, uint32_t address, unsigned int count) |
641 | { |
0a8a0368 |
642 | while (count != 0) { |
d9e74a6f |
643 | rsp.RDRAM[((address++)^S8) & MEMMASK] = (uint8_t)(*src >> 24); |
644 | rsp.RDRAM[((address++)^S8) & MEMMASK] = (uint8_t)(*src >> 16); |
645 | rsp.RDRAM[((address++)^S8) & MEMMASK] = (uint8_t)(*src >> 8); |
646 | rsp.RDRAM[((address++)^S8) & MEMMASK] = (uint8_t)(*(src++) & 0xff); |
647 | |
648 | --count; |
649 | } |
650 | } |
651 | |