frontend: update libpicofe, fix missed callbacks
[pcsx_rearmed.git] / libpcsxcore / mdec.c
... / ...
CommitLineData
1/***************************************************************************
2 * Copyright (C) 2010 Gabriele Gorla *
3 * Copyright (C) 2007 Ryan Schultz, PCSX-df Team, PCSX team *
4 * *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. *
19 ***************************************************************************/
20
21#include "mdec.h"
22
23/* memory speed is 1 byte per MDEC_BIAS psx clock
24 * That mean (PSXCLK / MDEC_BIAS) B/s
25 * MDEC_BIAS = 2.0 => ~16MB/s
26 * MDEC_BIAS = 3.0 => ~11MB/s
27 * and so on ...
28 * I guess I have 50 images in 50Hz ... (could be 25 images ?)
29 * 320x240x24@50Hz => 11.52 MB/s
30 * 320x240x24@60Hz => 13.824 MB/s
31 * 320x240x16@50Hz => 7.68 MB/s
32 * 320x240x16@60Hz => 9.216 MB/s
33 * so 2.0 to 4.0 should be fine.
34 */
35
36/*
37 * >= 14 for Sol Divide
38 * <= 18 for "Disney's Treasure Planet"
39 * Psychic Detective may break on *any* change
40 */
41#define MDEC_BIAS 14
42#define MDEC_DELAY 1024
43
44#define DSIZE 8
45#define DSIZE2 (DSIZE * DSIZE)
46
47#define SCALE(x, n) ((x) >> (n))
48#define SCALER(x, n) (((x) + ((1 << (n)) >> 1)) >> (n))
49
50#define AAN_CONST_BITS 12
51#define AAN_PRESCALE_BITS 16
52
53#define AAN_CONST_SIZE 24
54#define AAN_CONST_SCALE (AAN_CONST_SIZE - AAN_CONST_BITS)
55
56#define AAN_PRESCALE_SIZE 20
57#define AAN_PRESCALE_SCALE (AAN_PRESCALE_SIZE-AAN_PRESCALE_BITS)
58#define AAN_EXTRA 12
59
60#define FIX_1_082392200 SCALER(18159528, AAN_CONST_SCALE) // B6
61#define FIX_1_414213562 SCALER(23726566, AAN_CONST_SCALE) // A4
62#define FIX_1_847759065 SCALER(31000253, AAN_CONST_SCALE) // A2
63#define FIX_2_613125930 SCALER(43840978, AAN_CONST_SCALE) // B2
64
65#define MULS(var, const) (SCALE((var) * (const), AAN_CONST_BITS))
66
67#define RLE_RUN(a) ((a) >> 10)
68#define RLE_VAL(a) (((int)(a) << (sizeof(int) * 8 - 10)) >> (sizeof(int) * 8 - 10))
69
70#if 0
71static void printmatrixu8(u8 *m) {
72 int i;
73 for(i = 0; i < DSIZE2; i++) {
74 printf("%3d ",m[i]);
75 if((i+1) % 8 == 0) printf("\n");
76 }
77}
78#endif
79
80static inline void fillcol(int *blk, int val) {
81 blk[0 * DSIZE] = blk[1 * DSIZE] = blk[2 * DSIZE] = blk[3 * DSIZE]
82 = blk[4 * DSIZE] = blk[5 * DSIZE] = blk[6 * DSIZE] = blk[7 * DSIZE] = val;
83}
84
85static inline void fillrow(int *blk, int val) {
86 blk[0] = blk[1] = blk[2] = blk[3]
87 = blk[4] = blk[5] = blk[6] = blk[7] = val;
88}
89
90static void idct(int *block,int used_col) {
91 int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
92 int z5, z10, z11, z12, z13;
93 int *ptr;
94 int i;
95
96 // the block has only the DC coefficient
97 if (used_col == -1) {
98 int v = block[0];
99 for (i = 0; i < DSIZE2; i++) block[i] = v;
100 return;
101 }
102
103 // last_col keeps track of the highest column with non zero coefficients
104 ptr = block;
105 for (i = 0; i < DSIZE; i++, ptr++) {
106 if ((used_col & (1 << i)) == 0) {
107 // the column is empty or has only the DC coefficient
108 if (ptr[DSIZE * 0]) {
109 fillcol(ptr, ptr[0]);
110 used_col |= (1 << i);
111 }
112 continue;
113 }
114
115 // further optimization could be made by keeping track of
116 // last_row in rl2blk
117 z10 = ptr[DSIZE * 0] + ptr[DSIZE * 4]; // s04
118 z11 = ptr[DSIZE * 0] - ptr[DSIZE * 4]; // d04
119 z13 = ptr[DSIZE * 2] + ptr[DSIZE * 6]; // s26
120 z12 = MULS(ptr[DSIZE * 2] - ptr[DSIZE * 6], FIX_1_414213562) - z13;
121 //^^^^ d26=d26*2*A4-s26
122
123 tmp0 = z10 + z13; // os07 = s04 + s26
124 tmp3 = z10 - z13; // os34 = s04 - s26
125 tmp1 = z11 + z12; // os16 = d04 + d26
126 tmp2 = z11 - z12; // os25 = d04 - d26
127
128 z13 = ptr[DSIZE * 3] + ptr[DSIZE * 5]; //s53
129 z10 = ptr[DSIZE * 3] - ptr[DSIZE * 5]; //-d53
130 z11 = ptr[DSIZE * 1] + ptr[DSIZE * 7]; //s17
131 z12 = ptr[DSIZE * 1] - ptr[DSIZE * 7]; //d17
132
133 tmp7 = z11 + z13; // od07 = s17 + s53
134
135 z5 = (z12 - z10) * (FIX_1_847759065);
136 tmp6 = SCALE(z10*(FIX_2_613125930) + z5, AAN_CONST_BITS) - tmp7;
137 tmp5 = MULS(z11 - z13, FIX_1_414213562) - tmp6;
138 tmp4 = SCALE(z12*(FIX_1_082392200) - z5, AAN_CONST_BITS) + tmp5;
139
140 // path #1
141 //z5 = (z12 - z10)* FIX_1_847759065;
142 // tmp0 = (d17 + d53) * 2*A2
143
144 //tmp6 = DESCALE(z10*FIX_2_613125930 + z5, CONST_BITS) - tmp7;
145 // od16 = (d53*-2*B2 + tmp0) - od07
146
147 //tmp4 = DESCALE(z12*FIX_1_082392200 - z5, CONST_BITS) + tmp5;
148 // od34 = (d17*2*B6 - tmp0) + od25
149
150 // path #2
151
152 // od34 = d17*2*(B6-A2) - d53*2*A2
153 // od16 = d53*2*(A2-B2) + d17*2*A2
154
155 // end
156
157 // tmp5 = MULS(z11 - z13, FIX_1_414213562) - tmp6;
158 // od25 = (s17 - s53)*2*A4 - od16
159
160 ptr[DSIZE * 0] = (tmp0 + tmp7); // os07 + od07
161 ptr[DSIZE * 7] = (tmp0 - tmp7); // os07 - od07
162 ptr[DSIZE * 1] = (tmp1 + tmp6); // os16 + od16
163 ptr[DSIZE * 6] = (tmp1 - tmp6); // os16 - od16
164 ptr[DSIZE * 2] = (tmp2 + tmp5); // os25 + od25
165 ptr[DSIZE * 5] = (tmp2 - tmp5); // os25 - od25
166 ptr[DSIZE * 4] = (tmp3 + tmp4); // os34 + od34
167 ptr[DSIZE * 3] = (tmp3 - tmp4); // os34 - od34
168 }
169
170 ptr = block;
171 if (used_col == 1) {
172 for (i = 0; i < DSIZE; i++)
173 fillrow(block + DSIZE * i, block[DSIZE * i]);
174 } else {
175 for (i = 0; i < DSIZE; i++, ptr += DSIZE) {
176 z10 = ptr[0] + ptr[4];
177 z11 = ptr[0] - ptr[4];
178 z13 = ptr[2] + ptr[6];
179 z12 = MULS(ptr[2] - ptr[6], FIX_1_414213562) - z13;
180
181 tmp0 = z10 + z13;
182 tmp3 = z10 - z13;
183 tmp1 = z11 + z12;
184 tmp2 = z11 - z12;
185
186 z13 = ptr[3] + ptr[5];
187 z10 = ptr[3] - ptr[5];
188 z11 = ptr[1] + ptr[7];
189 z12 = ptr[1] - ptr[7];
190
191 tmp7 = z11 + z13;
192 z5 = (z12 - z10) * FIX_1_847759065;
193 tmp6 = SCALE(z10 * FIX_2_613125930 + z5, AAN_CONST_BITS) - tmp7;
194 tmp5 = MULS(z11 - z13, FIX_1_414213562) - tmp6;
195 tmp4 = SCALE(z12 * FIX_1_082392200 - z5, AAN_CONST_BITS) + tmp5;
196
197 ptr[0] = tmp0 + tmp7;
198
199 ptr[7] = tmp0 - tmp7;
200 ptr[1] = tmp1 + tmp6;
201 ptr[6] = tmp1 - tmp6;
202 ptr[2] = tmp2 + tmp5;
203 ptr[5] = tmp2 - tmp5;
204 ptr[4] = tmp3 + tmp4;
205 ptr[3] = tmp3 - tmp4;
206 }
207 }
208}
209
210// mdec0: command register
211#define MDEC0_STP 0x02000000
212#define MDEC0_RGB24 0x08000000
213#define MDEC0_SIZE_MASK 0x0000FFFF
214
215// mdec1: status register
216#define MDEC1_BUSY 0x20000000
217#define MDEC1_DREQ 0x18000000
218#define MDEC1_FIFO 0xc0000000
219#define MDEC1_RGB24 0x02000000
220#define MDEC1_STP 0x00800000
221#define MDEC1_RESET 0x80000000
222
223struct _pending_dma1 {
224 u32 adr;
225 u32 bcr;
226 u32 chcr;
227};
228
229static struct {
230 u32 reg0;
231 u32 reg1;
232 const u16 * rl;
233 const u16 * rl_end;
234 u8 * block_buffer_pos;
235 u8 block_buffer[16*16*3];
236 struct _pending_dma1 pending_dma1;
237} mdec;
238
239static int iq_y[DSIZE2], iq_uv[DSIZE2];
240
241static int zscan[DSIZE2] = {
242 0 , 1 , 8 , 16, 9 , 2 , 3 , 10,
243 17, 24, 32, 25, 18, 11, 4 , 5 ,
244 12, 19, 26, 33, 40, 48, 41, 34,
245 27, 20, 13, 6 , 7 , 14, 21, 28,
246 35, 42, 49, 56, 57, 50, 43, 36,
247 29, 22, 15, 23, 30, 37, 44, 51,
248 58, 59, 52, 45, 38, 31, 39, 46,
249 53, 60, 61, 54, 47, 55, 62, 63
250};
251
252static int aanscales[DSIZE2] = {
253 1048576, 1454417, 1370031, 1232995, 1048576, 823861, 567485, 289301,
254 1454417, 2017334, 1900287, 1710213, 1454417, 1142728, 787125, 401273,
255 1370031, 1900287, 1790031, 1610986, 1370031, 1076426, 741455, 377991,
256 1232995, 1710213, 1610986, 1449849, 1232995, 968758, 667292, 340183,
257 1048576, 1454417, 1370031, 1232995, 1048576, 823861, 567485, 289301,
258 823861, 1142728, 1076426, 968758, 823861, 647303, 445870, 227303,
259 567485, 787125, 741455, 667292, 567485, 445870, 307121, 156569,
260 289301, 401273, 377991, 340183, 289301, 227303, 156569, 79818
261};
262
263static void iqtab_init(int *iqtab, const unsigned char *iq_y) {
264 int i;
265
266 for (i = 0; i < DSIZE2; i++) {
267 iqtab[i] = (iq_y[i] * SCALER(aanscales[zscan[i]], AAN_PRESCALE_SCALE));
268 }
269}
270
271#define MDEC_END_OF_DATA 0xfe00
272
273static const unsigned short *rl2blk(int *blk, const unsigned short *mdec_rl) {
274 int i, k, q_scale, rl, used_col;
275 int *iqtab;
276
277 memset(blk, 0, 6 * DSIZE2 * sizeof(int));
278 iqtab = iq_uv;
279 for (i = 0; i < 6; i++) {
280 // decode blocks (Cr,Cb,Y1,Y2,Y3,Y4)
281 if (i == 2) iqtab = iq_y;
282
283 rl = SWAP16(*mdec_rl); mdec_rl++;
284 q_scale = RLE_RUN(rl);
285 blk[0] = SCALER(iqtab[0] * RLE_VAL(rl), AAN_EXTRA - 3);
286 for (k = 0, used_col = 0;;) {
287 rl = SWAP16(*mdec_rl); mdec_rl++;
288 if (rl == MDEC_END_OF_DATA) break;
289 k += RLE_RUN(rl) + 1; // skip zero-coefficients
290
291 if (k > 63) {
292 // printf("run lenght exceeded 64 enties\n");
293 break;
294 }
295
296 // zigzag transformation
297 blk[zscan[k]] = SCALER(RLE_VAL(rl) * iqtab[k] * q_scale, AAN_EXTRA);
298 // keep track of used columns to speed up the idtc
299 used_col |= (zscan[k] > 7) ? 1 << (zscan[k] & 7) : 0;
300 }
301
302 if (k == 0) used_col = -1;
303 // used_col is -1 for blocks with only the DC coefficient
304 // any other value is a bitmask of the columns that have
305 // at least one non zero cofficient in the rows 1-7
306 // single coefficients in row 0 are treted specially
307 // in the idtc function
308 idct(blk, used_col);
309 blk += DSIZE2;
310 }
311 return mdec_rl;
312}
313
314// full scale (JPEG)
315// Y/Cb/Cr[0...255] -> R/G/B[0...255]
316// R = 1.000 * (Y) + 1.400 * (Cr - 128)
317// G = 1.000 * (Y) - 0.343 * (Cb - 128) - 0.711 (Cr - 128)
318// B = 1.000 * (Y) + 1.765 * (Cb - 128)
319#define MULR(a) ((1434 * (a)))
320#define MULB(a) ((1807 * (a)))
321#define MULG2(a, b) ((-351 * (a) - 728 * (b)))
322#define MULY(a) ((a) << 10)
323
324#define MAKERGB15(r, g, b, a) (SWAP16(a | ((b) << 10) | ((g) << 5) | (r)))
325#define SCALE8(c) SCALER(c, 20)
326#define SCALE5(c) SCALER(c, 23)
327
328static inline int clamp5(int v)
329{
330 v += 16;
331 v = v < 0 ? 0 : (v > 31 ? 31 : v);
332 return v;
333}
334
335static inline int clamp8(int v)
336{
337 v += 128;
338 v = v < 0 ? 0 : (v > 255 ? 255 : v);
339 return v;
340}
341
342#define CLAMP_SCALE8(a) (clamp8(SCALE8(a)))
343#define CLAMP_SCALE5(a) (clamp5(SCALE5(a)))
344
345static inline void putlinebw15(u16 *image, int *Yblk) {
346 int i;
347 int A = (mdec.reg0 & MDEC0_STP) ? 0x8000 : 0;
348
349 for (i = 0; i < 8; i++, Yblk++) {
350 int Y = *Yblk;
351 // missing rounding
352 image[i] = SWAP16((clamp5(Y >> 3) * 0x421) | A);
353 }
354}
355
356static inline void putquadrgb15(u16 *image, int *Yblk, int Cr, int Cb) {
357 int Y, R, G, B;
358 int A = (mdec.reg0 & MDEC0_STP) ? 0x8000 : 0;
359 R = MULR(Cr);
360 G = MULG2(Cb, Cr);
361 B = MULB(Cb);
362
363 // added transparency
364 Y = MULY(Yblk[0]);
365 image[0] = MAKERGB15(CLAMP_SCALE5(Y + R), CLAMP_SCALE5(Y + G), CLAMP_SCALE5(Y + B), A);
366 Y = MULY(Yblk[1]);
367 image[1] = MAKERGB15(CLAMP_SCALE5(Y + R), CLAMP_SCALE5(Y + G), CLAMP_SCALE5(Y + B), A);
368 Y = MULY(Yblk[8]);
369 image[16] = MAKERGB15(CLAMP_SCALE5(Y + R), CLAMP_SCALE5(Y + G), CLAMP_SCALE5(Y + B), A);
370 Y = MULY(Yblk[9]);
371 image[17] = MAKERGB15(CLAMP_SCALE5(Y + R), CLAMP_SCALE5(Y + G), CLAMP_SCALE5(Y + B), A);
372}
373
374static inline void yuv2rgb15(int *blk, unsigned short *image) {
375 int x, y;
376 int *Yblk = blk + DSIZE2 * 2;
377 int *Crblk = blk;
378 int *Cbblk = blk + DSIZE2;
379
380 if (!Config.Mdec) {
381 for (y = 0; y < 16; y += 2, Crblk += 4, Cbblk += 4, Yblk += 8, image += 24) {
382 if (y == 8) Yblk += DSIZE2;
383 for (x = 0; x < 4; x++, image += 2, Crblk++, Cbblk++, Yblk += 2) {
384 putquadrgb15(image, Yblk, *Crblk, *Cbblk);
385 putquadrgb15(image + 8, Yblk + DSIZE2, *(Crblk + 4), *(Cbblk + 4));
386 }
387 }
388 } else {
389 for (y = 0; y < 16; y++, Yblk += 8, image += 16) {
390 if (y == 8) Yblk += DSIZE2;
391 putlinebw15(image, Yblk);
392 putlinebw15(image + 8, Yblk + DSIZE2);
393 }
394 }
395}
396
397static inline void putlinebw24(u8 * image, int *Yblk) {
398 int i;
399 unsigned char Y;
400 for (i = 0; i < 8 * 3; i += 3, Yblk++) {
401 Y = clamp8(*Yblk);
402 image[i + 0] = Y;
403 image[i + 1] = Y;
404 image[i + 2] = Y;
405 }
406}
407
408static inline void putquadrgb24(u8 * image, int *Yblk, int Cr, int Cb) {
409 int Y, R, G, B;
410
411 R = MULR(Cr);
412 G = MULG2(Cb,Cr);
413 B = MULB(Cb);
414
415 Y = MULY(Yblk[0]);
416 image[0 * 3 + 0] = CLAMP_SCALE8(Y + R);
417 image[0 * 3 + 1] = CLAMP_SCALE8(Y + G);
418 image[0 * 3 + 2] = CLAMP_SCALE8(Y + B);
419 Y = MULY(Yblk[1]);
420 image[1 * 3 + 0] = CLAMP_SCALE8(Y + R);
421 image[1 * 3 + 1] = CLAMP_SCALE8(Y + G);
422 image[1 * 3 + 2] = CLAMP_SCALE8(Y + B);
423 Y = MULY(Yblk[8]);
424 image[16 * 3 + 0] = CLAMP_SCALE8(Y + R);
425 image[16 * 3 + 1] = CLAMP_SCALE8(Y + G);
426 image[16 * 3 + 2] = CLAMP_SCALE8(Y + B);
427 Y = MULY(Yblk[9]);
428 image[17 * 3 + 0] = CLAMP_SCALE8(Y + R);
429 image[17 * 3 + 1] = CLAMP_SCALE8(Y + G);
430 image[17 * 3 + 2] = CLAMP_SCALE8(Y + B);
431}
432
433static void yuv2rgb24(int *blk, u8 *image) {
434 int x, y;
435 int *Yblk = blk + DSIZE2 * 2;
436 int *Crblk = blk;
437 int *Cbblk = blk + DSIZE2;
438
439 if (!Config.Mdec) {
440 for (y = 0; y < 16; y += 2, Crblk += 4, Cbblk += 4, Yblk += 8, image += 8 * 3 * 3) {
441 if (y == 8) Yblk += DSIZE2;
442 for (x = 0; x < 4; x++, image += 6, Crblk++, Cbblk++, Yblk += 2) {
443 putquadrgb24(image, Yblk, *Crblk, *Cbblk);
444 putquadrgb24(image + 8 * 3, Yblk + DSIZE2, *(Crblk + 4), *(Cbblk + 4));
445 }
446 }
447 } else {
448 for (y = 0; y < 16; y++, Yblk += 8, image += 16 * 3) {
449 if (y == 8) Yblk += DSIZE2;
450 putlinebw24(image, Yblk);
451 putlinebw24(image + 8 * 3, Yblk + DSIZE2);
452 }
453 }
454}
455
456void mdecInit(void) {
457 memset(&mdec, 0, sizeof(mdec));
458 memset(iq_y, 0, sizeof(iq_y));
459 memset(iq_uv, 0, sizeof(iq_uv));
460 mdec.rl = (u16 *)&psxM[0x100000];
461}
462
463// command register
464void mdecWrite0(u32 data) {
465 mdec.reg0 = data;
466}
467
468u32 mdecRead0(void) {
469 return mdec.reg0;
470}
471
472// status register
473void mdecWrite1(u32 data) {
474 if (data & MDEC1_RESET) { // mdec reset
475 mdec.reg0 = 0;
476 mdec.reg1 = 0;
477 mdec.pending_dma1.adr = 0;
478 mdec.block_buffer_pos = 0;
479 }
480}
481
482u32 mdecRead1(void) {
483 u32 v = mdec.reg1;
484 return v;
485}
486
487void psxDma0(u32 adr, u32 bcr, u32 chcr) {
488 u32 cmd = mdec.reg0, words_max = 0;
489 const void *mem;
490 int size;
491
492 if (chcr != 0x01000201) {
493 log_unhandled("mdec0: invalid dma %08x\n", chcr);
494 return;
495 }
496
497 /* mdec is STP till dma0 is released */
498 mdec.reg1 |= MDEC1_STP;
499
500 size = (bcr >> 16) * (bcr & 0xffff);
501
502 adr &= ~3;
503 mem = getDmaRam(adr, &words_max);
504 if (mem == INVALID_PTR || size > words_max) {
505 log_unhandled("bad dma0 madr %x\n", adr);
506 HW_DMA0_CHCR &= SWAP32(~0x01000000);
507 return;
508 }
509
510 switch (cmd >> 28) {
511 case 0x3: // decode 15/24bpp
512 mdec.rl = mem;
513 /* now the mdec is busy till all data are decoded */
514 mdec.reg1 |= MDEC1_BUSY;
515 /* detect the end of decoding */
516 mdec.rl_end = mdec.rl + (size * 2);
517
518 /* sanity check */
519 if(mdec.rl_end <= mdec.rl)
520 break;
521
522 /* process the pending dma1 */
523 if(mdec.pending_dma1.adr){
524 psxDma1(mdec.pending_dma1.adr, mdec.pending_dma1.bcr, mdec.pending_dma1.chcr);
525 }
526 mdec.pending_dma1.adr = 0;
527 return;
528
529
530 case 0x4: // quantization table upload
531 {
532 const u8 *p = mem;
533 // printf("uploading new quantization table\n");
534 // printmatrixu8(p);
535 // printmatrixu8(p + 64);
536 iqtab_init(iq_y, p);
537 iqtab_init(iq_uv, p + 64);
538 }
539 break;
540
541 case 0x6: // cosine table
542 // printf("mdec cosine table\n");
543 break;
544
545 default:
546 log_unhandled("mdec: unknown command %08x\n", cmd);
547 break;
548 }
549
550 set_event(PSXINT_MDECINDMA, size);
551}
552
553void mdec0Interrupt()
554{
555 if (HW_DMA0_CHCR & SWAP32(0x01000000))
556 {
557 HW_DMA0_CHCR &= SWAP32(~0x01000000);
558 DMA_INTERRUPT(0);
559 }
560}
561
562#define SIZE_OF_24B_BLOCK (16*16*3)
563#define SIZE_OF_16B_BLOCK (16*16*2)
564
565void psxDma1(u32 adr, u32 bcr, u32 chcr) {
566 u32 words, words_max = 0;
567 int blk[DSIZE2 * 6];
568 u8 * image;
569 int size;
570
571 if (chcr != 0x01000200) {
572 log_unhandled("mdec1: invalid dma %08x\n", chcr);
573 return;
574 }
575
576 words = (bcr >> 16) * (bcr & 0xffff);
577 /* size in byte */
578 size = words * 4;
579
580 if (!(mdec.reg1 & MDEC1_BUSY)) {
581 /* add to pending */
582 mdec.pending_dma1.adr = adr;
583 mdec.pending_dma1.bcr = bcr;
584 mdec.pending_dma1.chcr = chcr;
585 /* do not free the dma */
586 return;
587 }
588
589 adr &= ~3;
590 image = getDmaRam(adr, &words_max);
591 if (image == INVALID_PTR || words > words_max) {
592 log_unhandled("bad dma1 madr %x\n", adr);
593 HW_DMA1_CHCR &= SWAP32(~0x01000000);
594 return;
595 }
596
597 if (mdec.reg0 & MDEC0_RGB24) {
598 /* 16 bits decoding
599 * block are 16 px * 16 px, each px are 2 byte
600 */
601
602 /* there is some partial block pending ? */
603 if(mdec.block_buffer_pos != 0) {
604 int n = mdec.block_buffer - mdec.block_buffer_pos + SIZE_OF_16B_BLOCK;
605 /* TODO: check if partial block do not larger than size */
606 memcpy(image, mdec.block_buffer_pos, n);
607 image += n;
608 size -= n;
609 mdec.block_buffer_pos = 0;
610 }
611
612 while(size >= SIZE_OF_16B_BLOCK) {
613 mdec.rl = rl2blk(blk, mdec.rl);
614 yuv2rgb15(blk, (u16 *)image);
615 image += SIZE_OF_16B_BLOCK;
616 size -= SIZE_OF_16B_BLOCK;
617 }
618
619 if(size != 0) {
620 mdec.rl = rl2blk(blk, mdec.rl);
621 yuv2rgb15(blk, (u16 *)mdec.block_buffer);
622 memcpy(image, mdec.block_buffer, size);
623 mdec.block_buffer_pos = mdec.block_buffer + size;
624 }
625
626 } else {
627 /* 24 bits decoding
628 * block are 16 px * 16 px, each px are 3 byte
629 */
630
631 /* there is some partial block pending ? */
632 if(mdec.block_buffer_pos != 0) {
633 int n = mdec.block_buffer - mdec.block_buffer_pos + SIZE_OF_24B_BLOCK;
634 /* TODO: check if partial block do not larger than size */
635 memcpy(image, mdec.block_buffer_pos, n);
636 image += n;
637 size -= n;
638 mdec.block_buffer_pos = 0;
639 }
640
641 while(size >= SIZE_OF_24B_BLOCK) {
642 mdec.rl = rl2blk(blk, mdec.rl);
643 yuv2rgb24(blk, image);
644 image += SIZE_OF_24B_BLOCK;
645 size -= SIZE_OF_24B_BLOCK;
646 }
647
648 if(size != 0) {
649 mdec.rl = rl2blk(blk, mdec.rl);
650 yuv2rgb24(blk, mdec.block_buffer);
651 memcpy(image, mdec.block_buffer, size);
652 mdec.block_buffer_pos = mdec.block_buffer + size;
653 }
654 }
655 if (size < 0)
656 log_unhandled("mdec: bork\n");
657
658 /* define the power of mdec */
659 set_event(PSXINT_MDECOUTDMA, words * MDEC_BIAS + MDEC_DELAY);
660 /* some CPU stalling */
661 psxRegs.cycle += words * MDEC_BIAS / 4;
662}
663
664void mdec1Interrupt() {
665 /* Author : gschwind
666 *
667 * in that case we have done all decoding stuff
668 * Note that : each block end with 0xfe00 flags
669 * the list of blocks end with the same 0xfe00 flags
670 * data loock like :
671 *
672 * data block ...
673 * 0xfe00
674 * data block ...
675 * 0xfe00
676 * a lost of block ..
677 *
678 * 0xfe00
679 * the last block
680 * 0xfe00
681 * 0xfe00
682 *
683 * OR
684 *
685 * if the 0xfe00 is not present the data size is important.
686 *
687 */
688
689 /* MDEC_END_OF_DATA avoids read outside memory */
690 //printf("mdec left %zd, v=%04x\n", mdec.rl_end - mdec.rl, *(mdec.rl));
691 if (mdec.rl >= mdec.rl_end || SWAP16(*(mdec.rl)) == MDEC_END_OF_DATA) {
692 mdec.reg1 &= ~(MDEC1_STP|MDEC1_BUSY);
693 if (HW_DMA0_CHCR & SWAP32(0x01000000))
694 {
695 HW_DMA0_CHCR &= SWAP32(~0x01000000);
696 DMA_INTERRUPT(0);
697 }
698 }
699
700 if (HW_DMA1_CHCR & SWAP32(0x01000000))
701 {
702 HW_DMA1_CHCR &= SWAP32(~0x01000000);
703 DMA_INTERRUPT(1);
704 }
705}
706
707int mdecFreeze(void *f, int Mode) {
708 u8 *base = (u8 *)psxM;
709 u32 v;
710
711 gzfreeze(&mdec.reg0, sizeof(mdec.reg0));
712 gzfreeze(&mdec.reg1, sizeof(mdec.reg1));
713
714 v = (u8 *)mdec.rl - base;
715 gzfreeze(&v, sizeof(v));
716 mdec.rl = (u16 *)(base + (v & 0x1ffffe));
717 v = (u8 *)mdec.rl_end - base;
718 gzfreeze(&v, sizeof(v));
719 mdec.rl_end = (u16 *)(base + (v & 0x1ffffe));
720
721 v = 0;
722 if (mdec.block_buffer_pos)
723 v = mdec.block_buffer_pos - mdec.block_buffer;
724 gzfreeze(&v, sizeof(v));
725 mdec.block_buffer_pos = 0;
726 if (v && v < sizeof(mdec.block_buffer))
727 mdec.block_buffer_pos = mdec.block_buffer;
728
729 gzfreeze(&mdec.block_buffer, sizeof(mdec.block_buffer));
730 gzfreeze(&mdec.pending_dma1, sizeof(mdec.pending_dma1));
731 gzfreeze(iq_y, sizeof(iq_y));
732 gzfreeze(iq_uv, sizeof(iq_uv));
733
734 return 0;
735}