gpu_unai: fix inline asm statements
[pcsx_rearmed.git] / libpcsxcore / mdec.c
CommitLineData
ef79bbde
P
1/***************************************************************************
2 * Copyright (C) 2010 Gabriele Gorla *
3 * Copyright (C) 2007 Ryan Schultz, PCSX-df Team, PCSX team *
4 * *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. *
19 ***************************************************************************/
20
21#include "mdec.h"
22
528ad661 23/* memory speed is 1 byte per MDEC_BIAS psx clock
24 * That mean (PSXCLK / MDEC_BIAS) B/s
25 * MDEC_BIAS = 2.0 => ~16MB/s
26 * MDEC_BIAS = 3.0 => ~11MB/s
27 * and so on ...
28 * I guess I have 50 images in 50Hz ... (could be 25 images ?)
29 * 320x240x24@50Hz => 11.52 MB/s
30 * 320x240x24@60Hz => 13.824 MB/s
31 * 320x240x16@50Hz => 7.68 MB/s
32 * 320x240x16@60Hz => 9.216 MB/s
33 * so 2.0 to 4.0 should be fine.
34 */
35#define MDEC_BIAS 2.0f
36
ef79bbde
P
37#define DSIZE 8
38#define DSIZE2 (DSIZE * DSIZE)
39
40#define SCALE(x, n) ((x) >> (n))
41#define SCALER(x, n) (((x) + ((1 << (n)) >> 1)) >> (n))
42
43#define AAN_CONST_BITS 12
44#define AAN_PRESCALE_BITS 16
45
46#define AAN_CONST_SIZE 24
47#define AAN_CONST_SCALE (AAN_CONST_SIZE - AAN_CONST_BITS)
48
49#define AAN_PRESCALE_SIZE 20
50#define AAN_PRESCALE_SCALE (AAN_PRESCALE_SIZE-AAN_PRESCALE_BITS)
51#define AAN_EXTRA 12
52
53#define FIX_1_082392200 SCALER(18159528, AAN_CONST_SCALE) // B6
54#define FIX_1_414213562 SCALER(23726566, AAN_CONST_SCALE) // A4
55#define FIX_1_847759065 SCALER(31000253, AAN_CONST_SCALE) // A2
56#define FIX_2_613125930 SCALER(43840978, AAN_CONST_SCALE) // B2
57
58#define MULS(var, const) (SCALE((var) * (const), AAN_CONST_BITS))
59
60#define RLE_RUN(a) ((a) >> 10)
61#define RLE_VAL(a) (((int)(a) << (sizeof(int) * 8 - 10)) >> (sizeof(int) * 8 - 10))
62
63#if 0
64static void printmatrixu8(u8 *m) {
65 int i;
66 for(i = 0; i < DSIZE2; i++) {
67 printf("%3d ",m[i]);
68 if((i+1) % 8 == 0) printf("\n");
69 }
70}
71#endif
72
73static inline void fillcol(int *blk, int val) {
74 blk[0 * DSIZE] = blk[1 * DSIZE] = blk[2 * DSIZE] = blk[3 * DSIZE]
75 = blk[4 * DSIZE] = blk[5 * DSIZE] = blk[6 * DSIZE] = blk[7 * DSIZE] = val;
76}
77
78static inline void fillrow(int *blk, int val) {
79 blk[0] = blk[1] = blk[2] = blk[3]
80 = blk[4] = blk[5] = blk[6] = blk[7] = val;
81}
82
528ad661 83static void idct(int *block,int used_col) {
ef79bbde
P
84 int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
85 int z5, z10, z11, z12, z13;
86 int *ptr;
87 int i;
88
89 // the block has only the DC coefficient
90 if (used_col == -1) {
91 int v = block[0];
92 for (i = 0; i < DSIZE2; i++) block[i] = v;
93 return;
94 }
95
96 // last_col keeps track of the highest column with non zero coefficients
97 ptr = block;
98 for (i = 0; i < DSIZE; i++, ptr++) {
99 if ((used_col & (1 << i)) == 0) {
100 // the column is empty or has only the DC coefficient
101 if (ptr[DSIZE * 0]) {
102 fillcol(ptr, ptr[0]);
103 used_col |= (1 << i);
104 }
105 continue;
106 }
107
108 // further optimization could be made by keeping track of
109 // last_row in rl2blk
110 z10 = ptr[DSIZE * 0] + ptr[DSIZE * 4]; // s04
111 z11 = ptr[DSIZE * 0] - ptr[DSIZE * 4]; // d04
112 z13 = ptr[DSIZE * 2] + ptr[DSIZE * 6]; // s26
113 z12 = MULS(ptr[DSIZE * 2] - ptr[DSIZE * 6], FIX_1_414213562) - z13;
114 //^^^^ d26=d26*2*A4-s26
115
116 tmp0 = z10 + z13; // os07 = s04 + s26
117 tmp3 = z10 - z13; // os34 = s04 - s26
118 tmp1 = z11 + z12; // os16 = d04 + d26
119 tmp2 = z11 - z12; // os25 = d04 - d26
120
121 z13 = ptr[DSIZE * 3] + ptr[DSIZE * 5]; //s53
122 z10 = ptr[DSIZE * 3] - ptr[DSIZE * 5]; //-d53
123 z11 = ptr[DSIZE * 1] + ptr[DSIZE * 7]; //s17
124 z12 = ptr[DSIZE * 1] - ptr[DSIZE * 7]; //d17
125
126 tmp7 = z11 + z13; // od07 = s17 + s53
127
128 z5 = (z12 - z10) * (FIX_1_847759065);
129 tmp6 = SCALE(z10*(FIX_2_613125930) + z5, AAN_CONST_BITS) - tmp7;
130 tmp5 = MULS(z11 - z13, FIX_1_414213562) - tmp6;
131 tmp4 = SCALE(z12*(FIX_1_082392200) - z5, AAN_CONST_BITS) + tmp5;
132
133 // path #1
134 //z5 = (z12 - z10)* FIX_1_847759065;
135 // tmp0 = (d17 + d53) * 2*A2
136
137 //tmp6 = DESCALE(z10*FIX_2_613125930 + z5, CONST_BITS) - tmp7;
138 // od16 = (d53*-2*B2 + tmp0) - od07
139
140 //tmp4 = DESCALE(z12*FIX_1_082392200 - z5, CONST_BITS) + tmp5;
141 // od34 = (d17*2*B6 - tmp0) + od25
142
143 // path #2
144
145 // od34 = d17*2*(B6-A2) - d53*2*A2
146 // od16 = d53*2*(A2-B2) + d17*2*A2
147
148 // end
149
150 // tmp5 = MULS(z11 - z13, FIX_1_414213562) - tmp6;
151 // od25 = (s17 - s53)*2*A4 - od16
152
153 ptr[DSIZE * 0] = (tmp0 + tmp7); // os07 + od07
154 ptr[DSIZE * 7] = (tmp0 - tmp7); // os07 - od07
155 ptr[DSIZE * 1] = (tmp1 + tmp6); // os16 + od16
156 ptr[DSIZE * 6] = (tmp1 - tmp6); // os16 - od16
157 ptr[DSIZE * 2] = (tmp2 + tmp5); // os25 + od25
158 ptr[DSIZE * 5] = (tmp2 - tmp5); // os25 - od25
159 ptr[DSIZE * 4] = (tmp3 + tmp4); // os34 + od34
160 ptr[DSIZE * 3] = (tmp3 - tmp4); // os34 - od34
161 }
162
163 ptr = block;
164 if (used_col == 1) {
165 for (i = 0; i < DSIZE; i++)
166 fillrow(block + DSIZE * i, block[DSIZE * i]);
167 } else {
168 for (i = 0; i < DSIZE; i++, ptr += DSIZE) {
169 z10 = ptr[0] + ptr[4];
170 z11 = ptr[0] - ptr[4];
171 z13 = ptr[2] + ptr[6];
172 z12 = MULS(ptr[2] - ptr[6], FIX_1_414213562) - z13;
173
174 tmp0 = z10 + z13;
175 tmp3 = z10 - z13;
176 tmp1 = z11 + z12;
177 tmp2 = z11 - z12;
178
179 z13 = ptr[3] + ptr[5];
180 z10 = ptr[3] - ptr[5];
181 z11 = ptr[1] + ptr[7];
182 z12 = ptr[1] - ptr[7];
183
184 tmp7 = z11 + z13;
185 z5 = (z12 - z10) * FIX_1_847759065;
186 tmp6 = SCALE(z10 * FIX_2_613125930 + z5, AAN_CONST_BITS) - tmp7;
187 tmp5 = MULS(z11 - z13, FIX_1_414213562) - tmp6;
188 tmp4 = SCALE(z12 * FIX_1_082392200 - z5, AAN_CONST_BITS) + tmp5;
189
190 ptr[0] = tmp0 + tmp7;
191
192 ptr[7] = tmp0 - tmp7;
193 ptr[1] = tmp1 + tmp6;
194 ptr[6] = tmp1 - tmp6;
195 ptr[2] = tmp2 + tmp5;
196 ptr[5] = tmp2 - tmp5;
197 ptr[4] = tmp3 + tmp4;
198 ptr[3] = tmp3 - tmp4;
199 }
200 }
201}
202
203// mdec0: command register
204#define MDEC0_STP 0x02000000
205#define MDEC0_RGB24 0x08000000
528ad661 206#define MDEC0_SIZE_MASK 0x0000FFFF
ef79bbde
P
207
208// mdec1: status register
209#define MDEC1_BUSY 0x20000000
210#define MDEC1_DREQ 0x18000000
211#define MDEC1_FIFO 0xc0000000
212#define MDEC1_RGB24 0x02000000
213#define MDEC1_STP 0x00800000
214#define MDEC1_RESET 0x80000000
215
528ad661 216struct _pending_dma1 {
217 u32 adr;
218 u32 bcr;
219 u32 chcr;
220};
221
c62b43c9 222static struct {
528ad661 223 u32 reg0;
224 u32 reg1;
225 u16 * rl;
226 u16 * rl_end;
227 u8 * block_buffer_pos;
228 u8 block_buffer[16*16*3];
229 struct _pending_dma1 pending_dma1;
ef79bbde
P
230} mdec;
231
232static int iq_y[DSIZE2], iq_uv[DSIZE2];
233
234static int zscan[DSIZE2] = {
235 0 , 1 , 8 , 16, 9 , 2 , 3 , 10,
236 17, 24, 32, 25, 18, 11, 4 , 5 ,
237 12, 19, 26, 33, 40, 48, 41, 34,
238 27, 20, 13, 6 , 7 , 14, 21, 28,
239 35, 42, 49, 56, 57, 50, 43, 36,
240 29, 22, 15, 23, 30, 37, 44, 51,
241 58, 59, 52, 45, 38, 31, 39, 46,
242 53, 60, 61, 54, 47, 55, 62, 63
243};
244
245static int aanscales[DSIZE2] = {
246 1048576, 1454417, 1370031, 1232995, 1048576, 823861, 567485, 289301,
247 1454417, 2017334, 1900287, 1710213, 1454417, 1142728, 787125, 401273,
248 1370031, 1900287, 1790031, 1610986, 1370031, 1076426, 741455, 377991,
249 1232995, 1710213, 1610986, 1449849, 1232995, 968758, 667292, 340183,
250 1048576, 1454417, 1370031, 1232995, 1048576, 823861, 567485, 289301,
251 823861, 1142728, 1076426, 968758, 823861, 647303, 445870, 227303,
252 567485, 787125, 741455, 667292, 567485, 445870, 307121, 156569,
253 289301, 401273, 377991, 340183, 289301, 227303, 156569, 79818
254};
255
256static void iqtab_init(int *iqtab, unsigned char *iq_y) {
257 int i;
258
259 for (i = 0; i < DSIZE2; i++) {
260 iqtab[i] = (iq_y[i] * SCALER(aanscales[zscan[i]], AAN_PRESCALE_SCALE));
261 }
262}
263
264#define MDEC_END_OF_DATA 0xfe00
265
528ad661 266static unsigned short *rl2blk(int *blk, unsigned short *mdec_rl) {
ef79bbde
P
267 int i, k, q_scale, rl, used_col;
268 int *iqtab;
269
270 memset(blk, 0, 6 * DSIZE2 * sizeof(int));
271 iqtab = iq_uv;
272 for (i = 0; i < 6; i++) {
273 // decode blocks (Cr,Cb,Y1,Y2,Y3,Y4)
274 if (i == 2) iqtab = iq_y;
275
276 rl = SWAP16(*mdec_rl); mdec_rl++;
277 q_scale = RLE_RUN(rl);
278 blk[0] = SCALER(iqtab[0] * RLE_VAL(rl), AAN_EXTRA - 3);
279 for (k = 0, used_col = 0;;) {
280 rl = SWAP16(*mdec_rl); mdec_rl++;
281 if (rl == MDEC_END_OF_DATA) break;
282 k += RLE_RUN(rl) + 1; // skip zero-coefficients
283
284 if (k > 63) {
285 // printf("run lenght exceeded 64 enties\n");
286 break;
287 }
288
289 // zigzag transformation
290 blk[zscan[k]] = SCALER(RLE_VAL(rl) * iqtab[k] * q_scale, AAN_EXTRA);
291 // keep track of used columns to speed up the idtc
292 used_col |= (zscan[k] > 7) ? 1 << (zscan[k] & 7) : 0;
293 }
294
295 if (k == 0) used_col = -1;
296 // used_col is -1 for blocks with only the DC coefficient
297 // any other value is a bitmask of the columns that have
298 // at least one non zero cofficient in the rows 1-7
299 // single coefficients in row 0 are treted specially
300 // in the idtc function
301 idct(blk, used_col);
302 blk += DSIZE2;
303 }
304 return mdec_rl;
305}
306
307// full scale (JPEG)
308// Y/Cb/Cr[0...255] -> R/G/B[0...255]
309// R = 1.000 * (Y) + 1.400 * (Cr - 128)
310// G = 1.000 * (Y) - 0.343 * (Cb - 128) - 0.711 (Cr - 128)
311// B = 1.000 * (Y) + 1.765 * (Cb - 128)
312#define MULR(a) ((1434 * (a)))
313#define MULB(a) ((1807 * (a)))
314#define MULG2(a, b) ((-351 * (a) - 728 * (b)))
315#define MULY(a) ((a) << 10)
316
317#define MAKERGB15(r, g, b, a) (SWAP16(a | ((b) << 10) | ((g) << 5) | (r)))
318#define SCALE8(c) SCALER(c, 20)
319#define SCALE5(c) SCALER(c, 23)
320
321#define CLAMP5(c) ( ((c) < -16) ? 0 : (((c) > (31 - 16)) ? 31 : ((c) + 16)) )
322#define CLAMP8(c) ( ((c) < -128) ? 0 : (((c) > (255 - 128)) ? 255 : ((c) + 128)) )
323
324#define CLAMP_SCALE8(a) (CLAMP8(SCALE8(a)))
325#define CLAMP_SCALE5(a) (CLAMP5(SCALE5(a)))
326
528ad661 327static inline void putlinebw15(u16 *image, int *Yblk) {
ef79bbde
P
328 int i;
329 int A = (mdec.reg0 & MDEC0_STP) ? 0x8000 : 0;
330
331 for (i = 0; i < 8; i++, Yblk++) {
332 int Y = *Yblk;
333 // missing rounding
334 image[i] = SWAP16((CLAMP5(Y >> 3) * 0x421) | A);
335 }
336}
337
528ad661 338static inline void putquadrgb15(u16 *image, int *Yblk, int Cr, int Cb) {
ef79bbde
P
339 int Y, R, G, B;
340 int A = (mdec.reg0 & MDEC0_STP) ? 0x8000 : 0;
341 R = MULR(Cr);
342 G = MULG2(Cb, Cr);
343 B = MULB(Cb);
344
345 // added transparency
346 Y = MULY(Yblk[0]);
347 image[0] = MAKERGB15(CLAMP_SCALE5(Y + R), CLAMP_SCALE5(Y + G), CLAMP_SCALE5(Y + B), A);
348 Y = MULY(Yblk[1]);
349 image[1] = MAKERGB15(CLAMP_SCALE5(Y + R), CLAMP_SCALE5(Y + G), CLAMP_SCALE5(Y + B), A);
350 Y = MULY(Yblk[8]);
351 image[16] = MAKERGB15(CLAMP_SCALE5(Y + R), CLAMP_SCALE5(Y + G), CLAMP_SCALE5(Y + B), A);
352 Y = MULY(Yblk[9]);
353 image[17] = MAKERGB15(CLAMP_SCALE5(Y + R), CLAMP_SCALE5(Y + G), CLAMP_SCALE5(Y + B), A);
354}
355
528ad661 356static inline void yuv2rgb15(int *blk, unsigned short *image) {
ef79bbde
P
357 int x, y;
358 int *Yblk = blk + DSIZE2 * 2;
359 int *Crblk = blk;
360 int *Cbblk = blk + DSIZE2;
361
362 if (!Config.Mdec) {
363 for (y = 0; y < 16; y += 2, Crblk += 4, Cbblk += 4, Yblk += 8, image += 24) {
364 if (y == 8) Yblk += DSIZE2;
365 for (x = 0; x < 4; x++, image += 2, Crblk++, Cbblk++, Yblk += 2) {
366 putquadrgb15(image, Yblk, *Crblk, *Cbblk);
367 putquadrgb15(image + 8, Yblk + DSIZE2, *(Crblk + 4), *(Cbblk + 4));
368 }
369 }
370 } else {
371 for (y = 0; y < 16; y++, Yblk += 8, image += 16) {
372 if (y == 8) Yblk += DSIZE2;
373 putlinebw15(image, Yblk);
374 putlinebw15(image + 8, Yblk + DSIZE2);
375 }
376 }
377}
378
528ad661 379static inline void putlinebw24(u8 * image, int *Yblk) {
ef79bbde
P
380 int i;
381 unsigned char Y;
382 for (i = 0; i < 8 * 3; i += 3, Yblk++) {
383 Y = CLAMP8(*Yblk);
384 image[i + 0] = Y;
385 image[i + 1] = Y;
386 image[i + 2] = Y;
387 }
388}
389
528ad661 390static inline void putquadrgb24(u8 * image, int *Yblk, int Cr, int Cb) {
ef79bbde
P
391 int Y, R, G, B;
392
393 R = MULR(Cr);
394 G = MULG2(Cb,Cr);
395 B = MULB(Cb);
396
397 Y = MULY(Yblk[0]);
398 image[0 * 3 + 0] = CLAMP_SCALE8(Y + R);
399 image[0 * 3 + 1] = CLAMP_SCALE8(Y + G);
400 image[0 * 3 + 2] = CLAMP_SCALE8(Y + B);
401 Y = MULY(Yblk[1]);
402 image[1 * 3 + 0] = CLAMP_SCALE8(Y + R);
403 image[1 * 3 + 1] = CLAMP_SCALE8(Y + G);
404 image[1 * 3 + 2] = CLAMP_SCALE8(Y + B);
405 Y = MULY(Yblk[8]);
406 image[16 * 3 + 0] = CLAMP_SCALE8(Y + R);
407 image[16 * 3 + 1] = CLAMP_SCALE8(Y + G);
408 image[16 * 3 + 2] = CLAMP_SCALE8(Y + B);
409 Y = MULY(Yblk[9]);
410 image[17 * 3 + 0] = CLAMP_SCALE8(Y + R);
411 image[17 * 3 + 1] = CLAMP_SCALE8(Y + G);
412 image[17 * 3 + 2] = CLAMP_SCALE8(Y + B);
413}
414
528ad661 415static void yuv2rgb24(int *blk, u8 *image) {
ef79bbde
P
416 int x, y;
417 int *Yblk = blk + DSIZE2 * 2;
418 int *Crblk = blk;
419 int *Cbblk = blk + DSIZE2;
420
421 if (!Config.Mdec) {
528ad661 422 for (y = 0; y < 16; y += 2, Crblk += 4, Cbblk += 4, Yblk += 8, image += 8 * 3 * 3) {
ef79bbde
P
423 if (y == 8) Yblk += DSIZE2;
424 for (x = 0; x < 4; x++, image += 6, Crblk++, Cbblk++, Yblk += 2) {
425 putquadrgb24(image, Yblk, *Crblk, *Cbblk);
426 putquadrgb24(image + 8 * 3, Yblk + DSIZE2, *(Crblk + 4), *(Cbblk + 4));
427 }
428 }
429 } else {
430 for (y = 0; y < 16; y++, Yblk += 8, image += 16 * 3) {
431 if (y == 8) Yblk += DSIZE2;
432 putlinebw24(image, Yblk);
433 putlinebw24(image + 8 * 3, Yblk + DSIZE2);
434 }
435 }
436}
437
438void mdecInit(void) {
c62b43c9 439 memset(&mdec, 0, sizeof(mdec));
440 memset(iq_y, 0, sizeof(iq_y));
441 memset(iq_uv, 0, sizeof(iq_uv));
ef79bbde 442 mdec.rl = (u16 *)&psxM[0x100000];
ef79bbde
P
443}
444
445// command register
446void mdecWrite0(u32 data) {
ef79bbde
P
447 mdec.reg0 = data;
448}
449
450u32 mdecRead0(void) {
ef79bbde
P
451 return mdec.reg0;
452}
453
454// status register
455void mdecWrite1(u32 data) {
ef79bbde
P
456 if (data & MDEC1_RESET) { // mdec reset
457 mdec.reg0 = 0;
458 mdec.reg1 = 0;
528ad661 459 mdec.pending_dma1.adr = 0;
460 mdec.block_buffer_pos = 0;
ef79bbde
P
461 }
462}
463
464u32 mdecRead1(void) {
465 u32 v = mdec.reg1;
ef79bbde
P
466 return v;
467}
468
469void psxDma0(u32 adr, u32 bcr, u32 chcr) {
470 int cmd = mdec.reg0;
471 int size;
ef79bbde
P
472
473 if (chcr != 0x01000201) {
ef79bbde
P
474 return;
475 }
476
528ad661 477 /* mdec is STP till dma0 is released */
478 mdec.reg1 |= MDEC1_STP;
479
ef79bbde
P
480 size = (bcr >> 16) * (bcr & 0xffff);
481
482 switch (cmd >> 28) {
483 case 0x3: // decode
528ad661 484 mdec.rl = (u16 *) PSXM(adr);
485 /* now the mdec is busy till all data are decoded */
486 mdec.reg1 |= MDEC1_BUSY;
487 /* detect the end of decoding */
488 mdec.rl_end = mdec.rl + (size * 2);
489
490 /* sanity check */
491 if(mdec.rl_end <= mdec.rl) {
492 MDECINDMA_INT( size / 4 );
493 return;
494 }
495
496 /* process the pending dma1 */
497 if(mdec.pending_dma1.adr){
498 psxDma1(mdec.pending_dma1.adr, mdec.pending_dma1.bcr, mdec.pending_dma1.chcr);
499 }
500 mdec.pending_dma1.adr = 0;
501 return;
502
ef79bbde
P
503
504 case 0x4: // quantization table upload
505 {
506 u8 *p = (u8 *)PSXM(adr);
507 // printf("uploading new quantization table\n");
508 // printmatrixu8(p);
509 // printmatrixu8(p + 64);
510 iqtab_init(iq_y, p);
511 iqtab_init(iq_uv, p + 64);
512 }
528ad661 513
514 MDECINDMA_INT( size / 4 );
515 return;
ef79bbde
P
516
517 case 0x6: // cosine table
518 // printf("mdec cosine table\n");
528ad661 519
520 MDECINDMA_INT( size / 4 );
521 return;
ef79bbde
P
522
523 default:
524 // printf("mdec unknown command\n");
525 break;
526 }
527
528 HW_DMA0_CHCR &= SWAP32(~0x01000000);
529 DMA_INTERRUPT(0);
530}
531
528ad661 532void mdec0Interrupt()
533{
ad418c19 534 if (HW_DMA0_CHCR & SWAP32(0x01000000))
535 {
536 HW_DMA0_CHCR &= SWAP32(~0x01000000);
537 DMA_INTERRUPT(0);
538 }
528ad661 539}
540
541#define SIZE_OF_24B_BLOCK (16*16*3)
542#define SIZE_OF_16B_BLOCK (16*16*2)
543
ef79bbde
P
544void psxDma1(u32 adr, u32 bcr, u32 chcr) {
545 int blk[DSIZE2 * 6];
528ad661 546 u8 * image;
ef79bbde 547 int size;
528ad661 548 int dmacnt;
ef79bbde
P
549
550 if (chcr != 0x01000200) return;
551
552 size = (bcr >> 16) * (bcr & 0xffff);
528ad661 553 /* size in byte */
554 size *= 4;
555 /* I guess the memory speed is limitating */
556 dmacnt = size;
557
558 if (!(mdec.reg1 & MDEC1_BUSY)) {
559 /* add to pending */
560 mdec.pending_dma1.adr = adr;
561 mdec.pending_dma1.bcr = bcr;
562 mdec.pending_dma1.chcr = chcr;
563 /* do not free the dma */
564 } else {
ef79bbde 565
528ad661 566 image = (u8 *)PSXM(adr);
567
568 if (mdec.reg0 & MDEC0_RGB24) {
569 /* 16 bits decoding
570 * block are 16 px * 16 px, each px are 2 byte
571 */
572
573 /* there is some partial block pending ? */
574 if(mdec.block_buffer_pos != 0) {
575 int n = mdec.block_buffer - mdec.block_buffer_pos + SIZE_OF_16B_BLOCK;
576 /* TODO: check if partial block do not larger than size */
577 memcpy(image, mdec.block_buffer_pos, n);
578 image += n;
579 size -= n;
580 mdec.block_buffer_pos = 0;
581 }
ef79bbde 582
528ad661 583 while(size >= SIZE_OF_16B_BLOCK) {
ef79bbde 584 mdec.rl = rl2blk(blk, mdec.rl);
528ad661 585 yuv2rgb15(blk, (u16 *)image);
586 image += SIZE_OF_16B_BLOCK;
587 size -= SIZE_OF_16B_BLOCK;
ef79bbde 588 }
528ad661 589
590 if(size != 0) {
ef79bbde 591 mdec.rl = rl2blk(blk, mdec.rl);
528ad661 592 yuv2rgb15(blk, (u16 *)mdec.block_buffer);
593 memcpy(image, mdec.block_buffer, size);
594 mdec.block_buffer_pos = mdec.block_buffer + size;
595 }
596
597 } else {
598 /* 24 bits decoding
599 * block are 16 px * 16 px, each px are 3 byte
600 */
601
602 /* there is some partial block pending ? */
603 if(mdec.block_buffer_pos != 0) {
604 int n = mdec.block_buffer - mdec.block_buffer_pos + SIZE_OF_24B_BLOCK;
605 /* TODO: check if partial block do not larger than size */
606 memcpy(image, mdec.block_buffer_pos, n);
607 image += n;
608 size -= n;
609 mdec.block_buffer_pos = 0;
610 }
611
612 while(size >= SIZE_OF_24B_BLOCK) {
613 mdec.rl = rl2blk(blk, mdec.rl);
614 yuv2rgb24(blk, image);
615 image += SIZE_OF_24B_BLOCK;
616 size -= SIZE_OF_24B_BLOCK;
ef79bbde 617 }
ef79bbde 618
528ad661 619 if(size != 0) {
620 mdec.rl = rl2blk(blk, mdec.rl);
621 yuv2rgb24(blk, mdec.block_buffer);
622 memcpy(image, mdec.block_buffer, size);
623 mdec.block_buffer_pos = mdec.block_buffer + size;
624 }
625 }
626
627 /* define the power of mdec */
628 MDECOUTDMA_INT((int) ((dmacnt* MDEC_BIAS)));
629 }
ef79bbde
P
630}
631
632void mdec1Interrupt() {
528ad661 633 /* Author : gschwind
634 *
635 * in that case we have done all decoding stuff
636 * Note that : each block end with 0xfe00 flags
637 * the list of blocks end with the same 0xfe00 flags
638 * data loock like :
639 *
640 * data block ...
641 * 0xfe00
642 * data block ...
643 * 0xfe00
644 * a lost of block ..
645 *
646 * 0xfe00
647 * the last block
648 * 0xfe00
649 * 0xfe00
650 *
651 * OR
652 *
653 * if the 0xfe00 is not present the data size is important.
654 *
655 */
656
ad418c19 657 /* MDEC_END_OF_DATA avoids read outside memory */
658 if (mdec.rl >= mdec.rl_end || SWAP16(*(mdec.rl)) == MDEC_END_OF_DATA) {
659 mdec.reg1 &= ~(MDEC1_STP|MDEC1_BUSY);
660 if (HW_DMA0_CHCR & SWAP32(0x01000000))
661 {
662 HW_DMA0_CHCR &= SWAP32(~0x01000000);
663 DMA_INTERRUPT(0);
664 }
ef79bbde 665 }
528ad661 666
ad418c19 667 if (HW_DMA1_CHCR & SWAP32(0x01000000))
668 {
669 HW_DMA1_CHCR &= SWAP32(~0x01000000);
670 DMA_INTERRUPT(1);
671 }
ef79bbde
P
672}
673
674int mdecFreeze(gzFile f, int Mode) {
675 gzfreeze(&mdec, sizeof(mdec));
676 gzfreeze(iq_y, sizeof(iq_y));
677 gzfreeze(iq_uv, sizeof(iq_uv));
678
679 return 0;
680}