update gpu DMA related code to pcsxr-svn Nov 28
[pcsx_rearmed.git] / libpcsxcore / mdec.c
CommitLineData
ef79bbde
P
1/***************************************************************************
2 * Copyright (C) 2010 Gabriele Gorla *
3 * Copyright (C) 2007 Ryan Schultz, PCSX-df Team, PCSX team *
4 * *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. *
19 ***************************************************************************/
20
21#include "mdec.h"
22
528ad661 23/* memory speed is 1 byte per MDEC_BIAS psx clock
24 * That mean (PSXCLK / MDEC_BIAS) B/s
25 * MDEC_BIAS = 2.0 => ~16MB/s
26 * MDEC_BIAS = 3.0 => ~11MB/s
27 * and so on ...
28 * I guess I have 50 images in 50Hz ... (could be 25 images ?)
29 * 320x240x24@50Hz => 11.52 MB/s
30 * 320x240x24@60Hz => 13.824 MB/s
31 * 320x240x16@50Hz => 7.68 MB/s
32 * 320x240x16@60Hz => 9.216 MB/s
33 * so 2.0 to 4.0 should be fine.
34 */
35#define MDEC_BIAS 2.0f
36
ef79bbde
P
37#define DSIZE 8
38#define DSIZE2 (DSIZE * DSIZE)
39
40#define SCALE(x, n) ((x) >> (n))
41#define SCALER(x, n) (((x) + ((1 << (n)) >> 1)) >> (n))
42
43#define AAN_CONST_BITS 12
44#define AAN_PRESCALE_BITS 16
45
46#define AAN_CONST_SIZE 24
47#define AAN_CONST_SCALE (AAN_CONST_SIZE - AAN_CONST_BITS)
48
49#define AAN_PRESCALE_SIZE 20
50#define AAN_PRESCALE_SCALE (AAN_PRESCALE_SIZE-AAN_PRESCALE_BITS)
51#define AAN_EXTRA 12
52
53#define FIX_1_082392200 SCALER(18159528, AAN_CONST_SCALE) // B6
54#define FIX_1_414213562 SCALER(23726566, AAN_CONST_SCALE) // A4
55#define FIX_1_847759065 SCALER(31000253, AAN_CONST_SCALE) // A2
56#define FIX_2_613125930 SCALER(43840978, AAN_CONST_SCALE) // B2
57
58#define MULS(var, const) (SCALE((var) * (const), AAN_CONST_BITS))
59
60#define RLE_RUN(a) ((a) >> 10)
61#define RLE_VAL(a) (((int)(a) << (sizeof(int) * 8 - 10)) >> (sizeof(int) * 8 - 10))
62
63#if 0
64static void printmatrixu8(u8 *m) {
65 int i;
66 for(i = 0; i < DSIZE2; i++) {
67 printf("%3d ",m[i]);
68 if((i+1) % 8 == 0) printf("\n");
69 }
70}
71#endif
72
73static inline void fillcol(int *blk, int val) {
74 blk[0 * DSIZE] = blk[1 * DSIZE] = blk[2 * DSIZE] = blk[3 * DSIZE]
75 = blk[4 * DSIZE] = blk[5 * DSIZE] = blk[6 * DSIZE] = blk[7 * DSIZE] = val;
76}
77
78static inline void fillrow(int *blk, int val) {
79 blk[0] = blk[1] = blk[2] = blk[3]
80 = blk[4] = blk[5] = blk[6] = blk[7] = val;
81}
82
528ad661 83static void idct(int *block,int used_col) {
ef79bbde
P
84 int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
85 int z5, z10, z11, z12, z13;
86 int *ptr;
87 int i;
88
89 // the block has only the DC coefficient
90 if (used_col == -1) {
91 int v = block[0];
92 for (i = 0; i < DSIZE2; i++) block[i] = v;
93 return;
94 }
95
96 // last_col keeps track of the highest column with non zero coefficients
97 ptr = block;
98 for (i = 0; i < DSIZE; i++, ptr++) {
99 if ((used_col & (1 << i)) == 0) {
100 // the column is empty or has only the DC coefficient
101 if (ptr[DSIZE * 0]) {
102 fillcol(ptr, ptr[0]);
103 used_col |= (1 << i);
104 }
105 continue;
106 }
107
108 // further optimization could be made by keeping track of
109 // last_row in rl2blk
110 z10 = ptr[DSIZE * 0] + ptr[DSIZE * 4]; // s04
111 z11 = ptr[DSIZE * 0] - ptr[DSIZE * 4]; // d04
112 z13 = ptr[DSIZE * 2] + ptr[DSIZE * 6]; // s26
113 z12 = MULS(ptr[DSIZE * 2] - ptr[DSIZE * 6], FIX_1_414213562) - z13;
114 //^^^^ d26=d26*2*A4-s26
115
116 tmp0 = z10 + z13; // os07 = s04 + s26
117 tmp3 = z10 - z13; // os34 = s04 - s26
118 tmp1 = z11 + z12; // os16 = d04 + d26
119 tmp2 = z11 - z12; // os25 = d04 - d26
120
121 z13 = ptr[DSIZE * 3] + ptr[DSIZE * 5]; //s53
122 z10 = ptr[DSIZE * 3] - ptr[DSIZE * 5]; //-d53
123 z11 = ptr[DSIZE * 1] + ptr[DSIZE * 7]; //s17
124 z12 = ptr[DSIZE * 1] - ptr[DSIZE * 7]; //d17
125
126 tmp7 = z11 + z13; // od07 = s17 + s53
127
128 z5 = (z12 - z10) * (FIX_1_847759065);
129 tmp6 = SCALE(z10*(FIX_2_613125930) + z5, AAN_CONST_BITS) - tmp7;
130 tmp5 = MULS(z11 - z13, FIX_1_414213562) - tmp6;
131 tmp4 = SCALE(z12*(FIX_1_082392200) - z5, AAN_CONST_BITS) + tmp5;
132
133 // path #1
134 //z5 = (z12 - z10)* FIX_1_847759065;
135 // tmp0 = (d17 + d53) * 2*A2
136
137 //tmp6 = DESCALE(z10*FIX_2_613125930 + z5, CONST_BITS) - tmp7;
138 // od16 = (d53*-2*B2 + tmp0) - od07
139
140 //tmp4 = DESCALE(z12*FIX_1_082392200 - z5, CONST_BITS) + tmp5;
141 // od34 = (d17*2*B6 - tmp0) + od25
142
143 // path #2
144
145 // od34 = d17*2*(B6-A2) - d53*2*A2
146 // od16 = d53*2*(A2-B2) + d17*2*A2
147
148 // end
149
150 // tmp5 = MULS(z11 - z13, FIX_1_414213562) - tmp6;
151 // od25 = (s17 - s53)*2*A4 - od16
152
153 ptr[DSIZE * 0] = (tmp0 + tmp7); // os07 + od07
154 ptr[DSIZE * 7] = (tmp0 - tmp7); // os07 - od07
155 ptr[DSIZE * 1] = (tmp1 + tmp6); // os16 + od16
156 ptr[DSIZE * 6] = (tmp1 - tmp6); // os16 - od16
157 ptr[DSIZE * 2] = (tmp2 + tmp5); // os25 + od25
158 ptr[DSIZE * 5] = (tmp2 - tmp5); // os25 - od25
159 ptr[DSIZE * 4] = (tmp3 + tmp4); // os34 + od34
160 ptr[DSIZE * 3] = (tmp3 - tmp4); // os34 - od34
161 }
162
163 ptr = block;
164 if (used_col == 1) {
165 for (i = 0; i < DSIZE; i++)
166 fillrow(block + DSIZE * i, block[DSIZE * i]);
167 } else {
168 for (i = 0; i < DSIZE; i++, ptr += DSIZE) {
169 z10 = ptr[0] + ptr[4];
170 z11 = ptr[0] - ptr[4];
171 z13 = ptr[2] + ptr[6];
172 z12 = MULS(ptr[2] - ptr[6], FIX_1_414213562) - z13;
173
174 tmp0 = z10 + z13;
175 tmp3 = z10 - z13;
176 tmp1 = z11 + z12;
177 tmp2 = z11 - z12;
178
179 z13 = ptr[3] + ptr[5];
180 z10 = ptr[3] - ptr[5];
181 z11 = ptr[1] + ptr[7];
182 z12 = ptr[1] - ptr[7];
183
184 tmp7 = z11 + z13;
185 z5 = (z12 - z10) * FIX_1_847759065;
186 tmp6 = SCALE(z10 * FIX_2_613125930 + z5, AAN_CONST_BITS) - tmp7;
187 tmp5 = MULS(z11 - z13, FIX_1_414213562) - tmp6;
188 tmp4 = SCALE(z12 * FIX_1_082392200 - z5, AAN_CONST_BITS) + tmp5;
189
190 ptr[0] = tmp0 + tmp7;
191
192 ptr[7] = tmp0 - tmp7;
193 ptr[1] = tmp1 + tmp6;
194 ptr[6] = tmp1 - tmp6;
195 ptr[2] = tmp2 + tmp5;
196 ptr[5] = tmp2 - tmp5;
197 ptr[4] = tmp3 + tmp4;
198 ptr[3] = tmp3 - tmp4;
199 }
200 }
201}
202
203// mdec0: command register
204#define MDEC0_STP 0x02000000
205#define MDEC0_RGB24 0x08000000
528ad661 206#define MDEC0_SIZE_MASK 0x0000FFFF
ef79bbde
P
207
208// mdec1: status register
209#define MDEC1_BUSY 0x20000000
210#define MDEC1_DREQ 0x18000000
211#define MDEC1_FIFO 0xc0000000
212#define MDEC1_RGB24 0x02000000
213#define MDEC1_STP 0x00800000
214#define MDEC1_RESET 0x80000000
215
528ad661 216struct _pending_dma1 {
217 u32 adr;
218 u32 bcr;
219 u32 chcr;
220};
221
ef79bbde 222struct {
528ad661 223 u32 reg0;
224 u32 reg1;
225 u16 * rl;
226 u16 * rl_end;
227 u8 * block_buffer_pos;
228 u8 block_buffer[16*16*3];
229 struct _pending_dma1 pending_dma1;
ef79bbde
P
230} mdec;
231
232static int iq_y[DSIZE2], iq_uv[DSIZE2];
233
234static int zscan[DSIZE2] = {
235 0 , 1 , 8 , 16, 9 , 2 , 3 , 10,
236 17, 24, 32, 25, 18, 11, 4 , 5 ,
237 12, 19, 26, 33, 40, 48, 41, 34,
238 27, 20, 13, 6 , 7 , 14, 21, 28,
239 35, 42, 49, 56, 57, 50, 43, 36,
240 29, 22, 15, 23, 30, 37, 44, 51,
241 58, 59, 52, 45, 38, 31, 39, 46,
242 53, 60, 61, 54, 47, 55, 62, 63
243};
244
245static int aanscales[DSIZE2] = {
246 1048576, 1454417, 1370031, 1232995, 1048576, 823861, 567485, 289301,
247 1454417, 2017334, 1900287, 1710213, 1454417, 1142728, 787125, 401273,
248 1370031, 1900287, 1790031, 1610986, 1370031, 1076426, 741455, 377991,
249 1232995, 1710213, 1610986, 1449849, 1232995, 968758, 667292, 340183,
250 1048576, 1454417, 1370031, 1232995, 1048576, 823861, 567485, 289301,
251 823861, 1142728, 1076426, 968758, 823861, 647303, 445870, 227303,
252 567485, 787125, 741455, 667292, 567485, 445870, 307121, 156569,
253 289301, 401273, 377991, 340183, 289301, 227303, 156569, 79818
254};
255
256static void iqtab_init(int *iqtab, unsigned char *iq_y) {
257 int i;
258
259 for (i = 0; i < DSIZE2; i++) {
260 iqtab[i] = (iq_y[i] * SCALER(aanscales[zscan[i]], AAN_PRESCALE_SCALE));
261 }
262}
263
264#define MDEC_END_OF_DATA 0xfe00
265
528ad661 266static unsigned short *rl2blk(int *blk, unsigned short *mdec_rl) {
ef79bbde
P
267 int i, k, q_scale, rl, used_col;
268 int *iqtab;
269
270 memset(blk, 0, 6 * DSIZE2 * sizeof(int));
271 iqtab = iq_uv;
272 for (i = 0; i < 6; i++) {
273 // decode blocks (Cr,Cb,Y1,Y2,Y3,Y4)
274 if (i == 2) iqtab = iq_y;
275
276 rl = SWAP16(*mdec_rl); mdec_rl++;
277 q_scale = RLE_RUN(rl);
278 blk[0] = SCALER(iqtab[0] * RLE_VAL(rl), AAN_EXTRA - 3);
279 for (k = 0, used_col = 0;;) {
280 rl = SWAP16(*mdec_rl); mdec_rl++;
281 if (rl == MDEC_END_OF_DATA) break;
282 k += RLE_RUN(rl) + 1; // skip zero-coefficients
283
284 if (k > 63) {
285 // printf("run lenght exceeded 64 enties\n");
286 break;
287 }
288
289 // zigzag transformation
290 blk[zscan[k]] = SCALER(RLE_VAL(rl) * iqtab[k] * q_scale, AAN_EXTRA);
291 // keep track of used columns to speed up the idtc
292 used_col |= (zscan[k] > 7) ? 1 << (zscan[k] & 7) : 0;
293 }
294
295 if (k == 0) used_col = -1;
296 // used_col is -1 for blocks with only the DC coefficient
297 // any other value is a bitmask of the columns that have
298 // at least one non zero cofficient in the rows 1-7
299 // single coefficients in row 0 are treted specially
300 // in the idtc function
301 idct(blk, used_col);
302 blk += DSIZE2;
303 }
304 return mdec_rl;
305}
306
307// full scale (JPEG)
308// Y/Cb/Cr[0...255] -> R/G/B[0...255]
309// R = 1.000 * (Y) + 1.400 * (Cr - 128)
310// G = 1.000 * (Y) - 0.343 * (Cb - 128) - 0.711 (Cr - 128)
311// B = 1.000 * (Y) + 1.765 * (Cb - 128)
312#define MULR(a) ((1434 * (a)))
313#define MULB(a) ((1807 * (a)))
314#define MULG2(a, b) ((-351 * (a) - 728 * (b)))
315#define MULY(a) ((a) << 10)
316
317#define MAKERGB15(r, g, b, a) (SWAP16(a | ((b) << 10) | ((g) << 5) | (r)))
318#define SCALE8(c) SCALER(c, 20)
319#define SCALE5(c) SCALER(c, 23)
320
321#define CLAMP5(c) ( ((c) < -16) ? 0 : (((c) > (31 - 16)) ? 31 : ((c) + 16)) )
322#define CLAMP8(c) ( ((c) < -128) ? 0 : (((c) > (255 - 128)) ? 255 : ((c) + 128)) )
323
324#define CLAMP_SCALE8(a) (CLAMP8(SCALE8(a)))
325#define CLAMP_SCALE5(a) (CLAMP5(SCALE5(a)))
326
528ad661 327static inline void putlinebw15(u16 *image, int *Yblk) {
ef79bbde
P
328 int i;
329 int A = (mdec.reg0 & MDEC0_STP) ? 0x8000 : 0;
330
331 for (i = 0; i < 8; i++, Yblk++) {
332 int Y = *Yblk;
333 // missing rounding
334 image[i] = SWAP16((CLAMP5(Y >> 3) * 0x421) | A);
335 }
336}
337
528ad661 338static inline void putquadrgb15(u16 *image, int *Yblk, int Cr, int Cb) {
ef79bbde
P
339 int Y, R, G, B;
340 int A = (mdec.reg0 & MDEC0_STP) ? 0x8000 : 0;
341 R = MULR(Cr);
342 G = MULG2(Cb, Cr);
343 B = MULB(Cb);
344
345 // added transparency
346 Y = MULY(Yblk[0]);
347 image[0] = MAKERGB15(CLAMP_SCALE5(Y + R), CLAMP_SCALE5(Y + G), CLAMP_SCALE5(Y + B), A);
348 Y = MULY(Yblk[1]);
349 image[1] = MAKERGB15(CLAMP_SCALE5(Y + R), CLAMP_SCALE5(Y + G), CLAMP_SCALE5(Y + B), A);
350 Y = MULY(Yblk[8]);
351 image[16] = MAKERGB15(CLAMP_SCALE5(Y + R), CLAMP_SCALE5(Y + G), CLAMP_SCALE5(Y + B), A);
352 Y = MULY(Yblk[9]);
353 image[17] = MAKERGB15(CLAMP_SCALE5(Y + R), CLAMP_SCALE5(Y + G), CLAMP_SCALE5(Y + B), A);
354}
355
528ad661 356static inline void yuv2rgb15(int *blk, unsigned short *image) {
ef79bbde
P
357 int x, y;
358 int *Yblk = blk + DSIZE2 * 2;
359 int *Crblk = blk;
360 int *Cbblk = blk + DSIZE2;
361
362 if (!Config.Mdec) {
363 for (y = 0; y < 16; y += 2, Crblk += 4, Cbblk += 4, Yblk += 8, image += 24) {
364 if (y == 8) Yblk += DSIZE2;
365 for (x = 0; x < 4; x++, image += 2, Crblk++, Cbblk++, Yblk += 2) {
366 putquadrgb15(image, Yblk, *Crblk, *Cbblk);
367 putquadrgb15(image + 8, Yblk + DSIZE2, *(Crblk + 4), *(Cbblk + 4));
368 }
369 }
370 } else {
371 for (y = 0; y < 16; y++, Yblk += 8, image += 16) {
372 if (y == 8) Yblk += DSIZE2;
373 putlinebw15(image, Yblk);
374 putlinebw15(image + 8, Yblk + DSIZE2);
375 }
376 }
377}
378
528ad661 379static inline void putlinebw24(u8 * image, int *Yblk) {
ef79bbde
P
380 int i;
381 unsigned char Y;
382 for (i = 0; i < 8 * 3; i += 3, Yblk++) {
383 Y = CLAMP8(*Yblk);
384 image[i + 0] = Y;
385 image[i + 1] = Y;
386 image[i + 2] = Y;
387 }
388}
389
528ad661 390static inline void putquadrgb24(u8 * image, int *Yblk, int Cr, int Cb) {
ef79bbde
P
391 int Y, R, G, B;
392
393 R = MULR(Cr);
394 G = MULG2(Cb,Cr);
395 B = MULB(Cb);
396
397 Y = MULY(Yblk[0]);
398 image[0 * 3 + 0] = CLAMP_SCALE8(Y + R);
399 image[0 * 3 + 1] = CLAMP_SCALE8(Y + G);
400 image[0 * 3 + 2] = CLAMP_SCALE8(Y + B);
401 Y = MULY(Yblk[1]);
402 image[1 * 3 + 0] = CLAMP_SCALE8(Y + R);
403 image[1 * 3 + 1] = CLAMP_SCALE8(Y + G);
404 image[1 * 3 + 2] = CLAMP_SCALE8(Y + B);
405 Y = MULY(Yblk[8]);
406 image[16 * 3 + 0] = CLAMP_SCALE8(Y + R);
407 image[16 * 3 + 1] = CLAMP_SCALE8(Y + G);
408 image[16 * 3 + 2] = CLAMP_SCALE8(Y + B);
409 Y = MULY(Yblk[9]);
410 image[17 * 3 + 0] = CLAMP_SCALE8(Y + R);
411 image[17 * 3 + 1] = CLAMP_SCALE8(Y + G);
412 image[17 * 3 + 2] = CLAMP_SCALE8(Y + B);
413}
414
528ad661 415static void yuv2rgb24(int *blk, u8 *image) {
ef79bbde
P
416 int x, y;
417 int *Yblk = blk + DSIZE2 * 2;
418 int *Crblk = blk;
419 int *Cbblk = blk + DSIZE2;
420
421 if (!Config.Mdec) {
528ad661 422 for (y = 0; y < 16; y += 2, Crblk += 4, Cbblk += 4, Yblk += 8, image += 8 * 3 * 3) {
ef79bbde
P
423 if (y == 8) Yblk += DSIZE2;
424 for (x = 0; x < 4; x++, image += 6, Crblk++, Cbblk++, Yblk += 2) {
425 putquadrgb24(image, Yblk, *Crblk, *Cbblk);
426 putquadrgb24(image + 8 * 3, Yblk + DSIZE2, *(Crblk + 4), *(Cbblk + 4));
427 }
428 }
429 } else {
430 for (y = 0; y < 16; y++, Yblk += 8, image += 16 * 3) {
431 if (y == 8) Yblk += DSIZE2;
432 putlinebw24(image, Yblk);
433 putlinebw24(image + 8 * 3, Yblk + DSIZE2);
434 }
435 }
436}
437
438void mdecInit(void) {
439 mdec.rl = (u16 *)&psxM[0x100000];
440 mdec.reg0 = 0;
441 mdec.reg1 = 0;
528ad661 442 mdec.pending_dma1.adr = 0;
443 mdec.block_buffer_pos = 0;
ef79bbde
P
444}
445
446// command register
447void mdecWrite0(u32 data) {
ef79bbde
P
448 mdec.reg0 = data;
449}
450
451u32 mdecRead0(void) {
ef79bbde
P
452 return mdec.reg0;
453}
454
455// status register
456void mdecWrite1(u32 data) {
ef79bbde
P
457 if (data & MDEC1_RESET) { // mdec reset
458 mdec.reg0 = 0;
459 mdec.reg1 = 0;
528ad661 460 mdec.pending_dma1.adr = 0;
461 mdec.block_buffer_pos = 0;
ef79bbde
P
462 }
463}
464
465u32 mdecRead1(void) {
466 u32 v = mdec.reg1;
ef79bbde
P
467 return v;
468}
469
470void psxDma0(u32 adr, u32 bcr, u32 chcr) {
471 int cmd = mdec.reg0;
472 int size;
ef79bbde
P
473
474 if (chcr != 0x01000201) {
ef79bbde
P
475 return;
476 }
477
528ad661 478 /* mdec is STP till dma0 is released */
479 mdec.reg1 |= MDEC1_STP;
480
ef79bbde
P
481 size = (bcr >> 16) * (bcr & 0xffff);
482
483 switch (cmd >> 28) {
484 case 0x3: // decode
528ad661 485 mdec.rl = (u16 *) PSXM(adr);
486 /* now the mdec is busy till all data are decoded */
487 mdec.reg1 |= MDEC1_BUSY;
488 /* detect the end of decoding */
489 mdec.rl_end = mdec.rl + (size * 2);
490
491 /* sanity check */
492 if(mdec.rl_end <= mdec.rl) {
493 MDECINDMA_INT( size / 4 );
494 return;
495 }
496
497 /* process the pending dma1 */
498 if(mdec.pending_dma1.adr){
499 psxDma1(mdec.pending_dma1.adr, mdec.pending_dma1.bcr, mdec.pending_dma1.chcr);
500 }
501 mdec.pending_dma1.adr = 0;
502 return;
503
ef79bbde
P
504
505 case 0x4: // quantization table upload
506 {
507 u8 *p = (u8 *)PSXM(adr);
508 // printf("uploading new quantization table\n");
509 // printmatrixu8(p);
510 // printmatrixu8(p + 64);
511 iqtab_init(iq_y, p);
512 iqtab_init(iq_uv, p + 64);
513 }
528ad661 514
515 MDECINDMA_INT( size / 4 );
516 return;
ef79bbde
P
517
518 case 0x6: // cosine table
519 // printf("mdec cosine table\n");
528ad661 520
521 MDECINDMA_INT( size / 4 );
522 return;
ef79bbde
P
523
524 default:
525 // printf("mdec unknown command\n");
526 break;
527 }
528
529 HW_DMA0_CHCR &= SWAP32(~0x01000000);
530 DMA_INTERRUPT(0);
531}
532
528ad661 533void mdec0Interrupt()
534{
535 HW_DMA0_CHCR &= SWAP32(~0x01000000);
536 DMA_INTERRUPT(0);
537}
538
539#define SIZE_OF_24B_BLOCK (16*16*3)
540#define SIZE_OF_16B_BLOCK (16*16*2)
541
ef79bbde
P
542void psxDma1(u32 adr, u32 bcr, u32 chcr) {
543 int blk[DSIZE2 * 6];
528ad661 544 u8 * image;
ef79bbde 545 int size;
528ad661 546 int dmacnt;
ef79bbde
P
547
548 if (chcr != 0x01000200) return;
549
550 size = (bcr >> 16) * (bcr & 0xffff);
528ad661 551 /* size in byte */
552 size *= 4;
553 /* I guess the memory speed is limitating */
554 dmacnt = size;
555
556 if (!(mdec.reg1 & MDEC1_BUSY)) {
557 /* add to pending */
558 mdec.pending_dma1.adr = adr;
559 mdec.pending_dma1.bcr = bcr;
560 mdec.pending_dma1.chcr = chcr;
561 /* do not free the dma */
562 } else {
ef79bbde 563
528ad661 564 image = (u8 *)PSXM(adr);
565
566 if (mdec.reg0 & MDEC0_RGB24) {
567 /* 16 bits decoding
568 * block are 16 px * 16 px, each px are 2 byte
569 */
570
571 /* there is some partial block pending ? */
572 if(mdec.block_buffer_pos != 0) {
573 int n = mdec.block_buffer - mdec.block_buffer_pos + SIZE_OF_16B_BLOCK;
574 /* TODO: check if partial block do not larger than size */
575 memcpy(image, mdec.block_buffer_pos, n);
576 image += n;
577 size -= n;
578 mdec.block_buffer_pos = 0;
579 }
ef79bbde 580
528ad661 581 while(size >= SIZE_OF_16B_BLOCK) {
ef79bbde 582 mdec.rl = rl2blk(blk, mdec.rl);
528ad661 583 yuv2rgb15(blk, (u16 *)image);
584 image += SIZE_OF_16B_BLOCK;
585 size -= SIZE_OF_16B_BLOCK;
ef79bbde 586 }
528ad661 587
588 if(size != 0) {
ef79bbde 589 mdec.rl = rl2blk(blk, mdec.rl);
528ad661 590 yuv2rgb15(blk, (u16 *)mdec.block_buffer);
591 memcpy(image, mdec.block_buffer, size);
592 mdec.block_buffer_pos = mdec.block_buffer + size;
593 }
594
595 } else {
596 /* 24 bits decoding
597 * block are 16 px * 16 px, each px are 3 byte
598 */
599
600 /* there is some partial block pending ? */
601 if(mdec.block_buffer_pos != 0) {
602 int n = mdec.block_buffer - mdec.block_buffer_pos + SIZE_OF_24B_BLOCK;
603 /* TODO: check if partial block do not larger than size */
604 memcpy(image, mdec.block_buffer_pos, n);
605 image += n;
606 size -= n;
607 mdec.block_buffer_pos = 0;
608 }
609
610 while(size >= SIZE_OF_24B_BLOCK) {
611 mdec.rl = rl2blk(blk, mdec.rl);
612 yuv2rgb24(blk, image);
613 image += SIZE_OF_24B_BLOCK;
614 size -= SIZE_OF_24B_BLOCK;
ef79bbde 615 }
ef79bbde 616
528ad661 617 if(size != 0) {
618 mdec.rl = rl2blk(blk, mdec.rl);
619 yuv2rgb24(blk, mdec.block_buffer);
620 memcpy(image, mdec.block_buffer, size);
621 mdec.block_buffer_pos = mdec.block_buffer + size;
622 }
623 }
624
625 /* define the power of mdec */
626 MDECOUTDMA_INT((int) ((dmacnt* MDEC_BIAS)));
627 }
ef79bbde
P
628}
629
630void mdec1Interrupt() {
528ad661 631 /* Author : gschwind
632 *
633 * in that case we have done all decoding stuff
634 * Note that : each block end with 0xfe00 flags
635 * the list of blocks end with the same 0xfe00 flags
636 * data loock like :
637 *
638 * data block ...
639 * 0xfe00
640 * data block ...
641 * 0xfe00
642 * a lost of block ..
643 *
644 * 0xfe00
645 * the last block
646 * 0xfe00
647 * 0xfe00
648 *
649 * OR
650 *
651 * if the 0xfe00 is not present the data size is important.
652 *
653 */
654
655 /* this else if avoid to read outside memory */
656 if(mdec.rl >= mdec.rl_end) {
657 mdec.reg1 &= ~MDEC1_STP;
658 HW_DMA0_CHCR &= SWAP32(~0x01000000);
659 DMA_INTERRUPT(0);
660 mdec.reg1 &= ~MDEC1_BUSY;
661 } else if (SWAP16(*(mdec.rl)) == MDEC_END_OF_DATA) {
662 mdec.reg1 &= ~MDEC1_STP;
663 HW_DMA0_CHCR &= SWAP32(~0x01000000);
664 DMA_INTERRUPT(0);
ef79bbde
P
665 mdec.reg1 &= ~MDEC1_BUSY;
666 }
528ad661 667
668 HW_DMA1_CHCR &= SWAP32(~0x01000000);
669 DMA_INTERRUPT(1);
670 return;
ef79bbde
P
671}
672
673int mdecFreeze(gzFile f, int Mode) {
674 gzfreeze(&mdec, sizeof(mdec));
675 gzfreeze(iq_y, sizeof(iq_y));
676 gzfreeze(iq_uv, sizeof(iq_uv));
677
678 return 0;
679}