frontend: update libpicofe, fix missed callbacks
[pcsx_rearmed.git] / libpcsxcore / mdec.c
CommitLineData
ef79bbde
P
1/***************************************************************************
2 * Copyright (C) 2010 Gabriele Gorla *
3 * Copyright (C) 2007 Ryan Schultz, PCSX-df Team, PCSX team *
4 * *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. *
19 ***************************************************************************/
20
21#include "mdec.h"
22
528ad661 23/* memory speed is 1 byte per MDEC_BIAS psx clock
24 * That mean (PSXCLK / MDEC_BIAS) B/s
25 * MDEC_BIAS = 2.0 => ~16MB/s
26 * MDEC_BIAS = 3.0 => ~11MB/s
27 * and so on ...
28 * I guess I have 50 images in 50Hz ... (could be 25 images ?)
29 * 320x240x24@50Hz => 11.52 MB/s
30 * 320x240x24@60Hz => 13.824 MB/s
31 * 320x240x16@50Hz => 7.68 MB/s
32 * 320x240x16@60Hz => 9.216 MB/s
33 * so 2.0 to 4.0 should be fine.
34 */
1ffdacd5 35
4f329f16 36/*
9cbce417 37 * >= 14 for Sol Divide
4f329f16 38 * <= 18 for "Disney's Treasure Planet"
9cbce417 39 * Psychic Detective may break on *any* change
4f329f16 40 */
9cbce417 41#define MDEC_BIAS 14
42#define MDEC_DELAY 1024
528ad661 43
ef79bbde
P
44#define DSIZE 8
45#define DSIZE2 (DSIZE * DSIZE)
46
47#define SCALE(x, n) ((x) >> (n))
48#define SCALER(x, n) (((x) + ((1 << (n)) >> 1)) >> (n))
49
50#define AAN_CONST_BITS 12
51#define AAN_PRESCALE_BITS 16
52
53#define AAN_CONST_SIZE 24
54#define AAN_CONST_SCALE (AAN_CONST_SIZE - AAN_CONST_BITS)
55
56#define AAN_PRESCALE_SIZE 20
57#define AAN_PRESCALE_SCALE (AAN_PRESCALE_SIZE-AAN_PRESCALE_BITS)
58#define AAN_EXTRA 12
59
60#define FIX_1_082392200 SCALER(18159528, AAN_CONST_SCALE) // B6
61#define FIX_1_414213562 SCALER(23726566, AAN_CONST_SCALE) // A4
62#define FIX_1_847759065 SCALER(31000253, AAN_CONST_SCALE) // A2
63#define FIX_2_613125930 SCALER(43840978, AAN_CONST_SCALE) // B2
64
65#define MULS(var, const) (SCALE((var) * (const), AAN_CONST_BITS))
66
67#define RLE_RUN(a) ((a) >> 10)
68#define RLE_VAL(a) (((int)(a) << (sizeof(int) * 8 - 10)) >> (sizeof(int) * 8 - 10))
69
70#if 0
71static void printmatrixu8(u8 *m) {
72 int i;
73 for(i = 0; i < DSIZE2; i++) {
74 printf("%3d ",m[i]);
75 if((i+1) % 8 == 0) printf("\n");
76 }
77}
78#endif
79
80static inline void fillcol(int *blk, int val) {
81 blk[0 * DSIZE] = blk[1 * DSIZE] = blk[2 * DSIZE] = blk[3 * DSIZE]
82 = blk[4 * DSIZE] = blk[5 * DSIZE] = blk[6 * DSIZE] = blk[7 * DSIZE] = val;
83}
84
85static inline void fillrow(int *blk, int val) {
86 blk[0] = blk[1] = blk[2] = blk[3]
87 = blk[4] = blk[5] = blk[6] = blk[7] = val;
88}
89
528ad661 90static void idct(int *block,int used_col) {
ef79bbde
P
91 int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
92 int z5, z10, z11, z12, z13;
93 int *ptr;
94 int i;
95
96 // the block has only the DC coefficient
97 if (used_col == -1) {
98 int v = block[0];
99 for (i = 0; i < DSIZE2; i++) block[i] = v;
100 return;
101 }
102
103 // last_col keeps track of the highest column with non zero coefficients
104 ptr = block;
105 for (i = 0; i < DSIZE; i++, ptr++) {
106 if ((used_col & (1 << i)) == 0) {
107 // the column is empty or has only the DC coefficient
108 if (ptr[DSIZE * 0]) {
109 fillcol(ptr, ptr[0]);
110 used_col |= (1 << i);
111 }
112 continue;
113 }
114
115 // further optimization could be made by keeping track of
116 // last_row in rl2blk
117 z10 = ptr[DSIZE * 0] + ptr[DSIZE * 4]; // s04
118 z11 = ptr[DSIZE * 0] - ptr[DSIZE * 4]; // d04
119 z13 = ptr[DSIZE * 2] + ptr[DSIZE * 6]; // s26
120 z12 = MULS(ptr[DSIZE * 2] - ptr[DSIZE * 6], FIX_1_414213562) - z13;
121 //^^^^ d26=d26*2*A4-s26
122
123 tmp0 = z10 + z13; // os07 = s04 + s26
124 tmp3 = z10 - z13; // os34 = s04 - s26
125 tmp1 = z11 + z12; // os16 = d04 + d26
126 tmp2 = z11 - z12; // os25 = d04 - d26
127
128 z13 = ptr[DSIZE * 3] + ptr[DSIZE * 5]; //s53
129 z10 = ptr[DSIZE * 3] - ptr[DSIZE * 5]; //-d53
130 z11 = ptr[DSIZE * 1] + ptr[DSIZE * 7]; //s17
131 z12 = ptr[DSIZE * 1] - ptr[DSIZE * 7]; //d17
132
133 tmp7 = z11 + z13; // od07 = s17 + s53
134
135 z5 = (z12 - z10) * (FIX_1_847759065);
136 tmp6 = SCALE(z10*(FIX_2_613125930) + z5, AAN_CONST_BITS) - tmp7;
137 tmp5 = MULS(z11 - z13, FIX_1_414213562) - tmp6;
138 tmp4 = SCALE(z12*(FIX_1_082392200) - z5, AAN_CONST_BITS) + tmp5;
139
140 // path #1
141 //z5 = (z12 - z10)* FIX_1_847759065;
142 // tmp0 = (d17 + d53) * 2*A2
143
144 //tmp6 = DESCALE(z10*FIX_2_613125930 + z5, CONST_BITS) - tmp7;
145 // od16 = (d53*-2*B2 + tmp0) - od07
146
147 //tmp4 = DESCALE(z12*FIX_1_082392200 - z5, CONST_BITS) + tmp5;
148 // od34 = (d17*2*B6 - tmp0) + od25
149
150 // path #2
151
152 // od34 = d17*2*(B6-A2) - d53*2*A2
153 // od16 = d53*2*(A2-B2) + d17*2*A2
154
155 // end
156
157 // tmp5 = MULS(z11 - z13, FIX_1_414213562) - tmp6;
158 // od25 = (s17 - s53)*2*A4 - od16
159
160 ptr[DSIZE * 0] = (tmp0 + tmp7); // os07 + od07
161 ptr[DSIZE * 7] = (tmp0 - tmp7); // os07 - od07
162 ptr[DSIZE * 1] = (tmp1 + tmp6); // os16 + od16
163 ptr[DSIZE * 6] = (tmp1 - tmp6); // os16 - od16
164 ptr[DSIZE * 2] = (tmp2 + tmp5); // os25 + od25
165 ptr[DSIZE * 5] = (tmp2 - tmp5); // os25 - od25
166 ptr[DSIZE * 4] = (tmp3 + tmp4); // os34 + od34
167 ptr[DSIZE * 3] = (tmp3 - tmp4); // os34 - od34
168 }
169
170 ptr = block;
171 if (used_col == 1) {
172 for (i = 0; i < DSIZE; i++)
173 fillrow(block + DSIZE * i, block[DSIZE * i]);
174 } else {
175 for (i = 0; i < DSIZE; i++, ptr += DSIZE) {
176 z10 = ptr[0] + ptr[4];
177 z11 = ptr[0] - ptr[4];
178 z13 = ptr[2] + ptr[6];
179 z12 = MULS(ptr[2] - ptr[6], FIX_1_414213562) - z13;
180
181 tmp0 = z10 + z13;
182 tmp3 = z10 - z13;
183 tmp1 = z11 + z12;
184 tmp2 = z11 - z12;
185
186 z13 = ptr[3] + ptr[5];
187 z10 = ptr[3] - ptr[5];
188 z11 = ptr[1] + ptr[7];
189 z12 = ptr[1] - ptr[7];
190
191 tmp7 = z11 + z13;
192 z5 = (z12 - z10) * FIX_1_847759065;
193 tmp6 = SCALE(z10 * FIX_2_613125930 + z5, AAN_CONST_BITS) - tmp7;
194 tmp5 = MULS(z11 - z13, FIX_1_414213562) - tmp6;
195 tmp4 = SCALE(z12 * FIX_1_082392200 - z5, AAN_CONST_BITS) + tmp5;
196
197 ptr[0] = tmp0 + tmp7;
198
199 ptr[7] = tmp0 - tmp7;
200 ptr[1] = tmp1 + tmp6;
201 ptr[6] = tmp1 - tmp6;
202 ptr[2] = tmp2 + tmp5;
203 ptr[5] = tmp2 - tmp5;
204 ptr[4] = tmp3 + tmp4;
205 ptr[3] = tmp3 - tmp4;
206 }
207 }
208}
209
210// mdec0: command register
211#define MDEC0_STP 0x02000000
212#define MDEC0_RGB24 0x08000000
528ad661 213#define MDEC0_SIZE_MASK 0x0000FFFF
ef79bbde
P
214
215// mdec1: status register
216#define MDEC1_BUSY 0x20000000
217#define MDEC1_DREQ 0x18000000
218#define MDEC1_FIFO 0xc0000000
219#define MDEC1_RGB24 0x02000000
220#define MDEC1_STP 0x00800000
221#define MDEC1_RESET 0x80000000
222
528ad661 223struct _pending_dma1 {
224 u32 adr;
225 u32 bcr;
226 u32 chcr;
227};
228
c62b43c9 229static struct {
528ad661 230 u32 reg0;
231 u32 reg1;
b54a1ac7 232 const u16 * rl;
233 const u16 * rl_end;
528ad661 234 u8 * block_buffer_pos;
235 u8 block_buffer[16*16*3];
236 struct _pending_dma1 pending_dma1;
ef79bbde
P
237} mdec;
238
239static int iq_y[DSIZE2], iq_uv[DSIZE2];
240
241static int zscan[DSIZE2] = {
242 0 , 1 , 8 , 16, 9 , 2 , 3 , 10,
243 17, 24, 32, 25, 18, 11, 4 , 5 ,
244 12, 19, 26, 33, 40, 48, 41, 34,
245 27, 20, 13, 6 , 7 , 14, 21, 28,
246 35, 42, 49, 56, 57, 50, 43, 36,
247 29, 22, 15, 23, 30, 37, 44, 51,
248 58, 59, 52, 45, 38, 31, 39, 46,
249 53, 60, 61, 54, 47, 55, 62, 63
250};
251
252static int aanscales[DSIZE2] = {
253 1048576, 1454417, 1370031, 1232995, 1048576, 823861, 567485, 289301,
254 1454417, 2017334, 1900287, 1710213, 1454417, 1142728, 787125, 401273,
255 1370031, 1900287, 1790031, 1610986, 1370031, 1076426, 741455, 377991,
256 1232995, 1710213, 1610986, 1449849, 1232995, 968758, 667292, 340183,
257 1048576, 1454417, 1370031, 1232995, 1048576, 823861, 567485, 289301,
258 823861, 1142728, 1076426, 968758, 823861, 647303, 445870, 227303,
259 567485, 787125, 741455, 667292, 567485, 445870, 307121, 156569,
260 289301, 401273, 377991, 340183, 289301, 227303, 156569, 79818
261};
262
b54a1ac7 263static void iqtab_init(int *iqtab, const unsigned char *iq_y) {
ef79bbde
P
264 int i;
265
266 for (i = 0; i < DSIZE2; i++) {
267 iqtab[i] = (iq_y[i] * SCALER(aanscales[zscan[i]], AAN_PRESCALE_SCALE));
268 }
269}
270
271#define MDEC_END_OF_DATA 0xfe00
272
b54a1ac7 273static const unsigned short *rl2blk(int *blk, const unsigned short *mdec_rl) {
ef79bbde
P
274 int i, k, q_scale, rl, used_col;
275 int *iqtab;
276
277 memset(blk, 0, 6 * DSIZE2 * sizeof(int));
278 iqtab = iq_uv;
279 for (i = 0; i < 6; i++) {
280 // decode blocks (Cr,Cb,Y1,Y2,Y3,Y4)
281 if (i == 2) iqtab = iq_y;
282
283 rl = SWAP16(*mdec_rl); mdec_rl++;
284 q_scale = RLE_RUN(rl);
285 blk[0] = SCALER(iqtab[0] * RLE_VAL(rl), AAN_EXTRA - 3);
286 for (k = 0, used_col = 0;;) {
287 rl = SWAP16(*mdec_rl); mdec_rl++;
288 if (rl == MDEC_END_OF_DATA) break;
289 k += RLE_RUN(rl) + 1; // skip zero-coefficients
290
291 if (k > 63) {
292 // printf("run lenght exceeded 64 enties\n");
293 break;
294 }
295
296 // zigzag transformation
297 blk[zscan[k]] = SCALER(RLE_VAL(rl) * iqtab[k] * q_scale, AAN_EXTRA);
298 // keep track of used columns to speed up the idtc
299 used_col |= (zscan[k] > 7) ? 1 << (zscan[k] & 7) : 0;
300 }
301
302 if (k == 0) used_col = -1;
303 // used_col is -1 for blocks with only the DC coefficient
304 // any other value is a bitmask of the columns that have
305 // at least one non zero cofficient in the rows 1-7
306 // single coefficients in row 0 are treted specially
307 // in the idtc function
308 idct(blk, used_col);
309 blk += DSIZE2;
310 }
311 return mdec_rl;
312}
313
314// full scale (JPEG)
315// Y/Cb/Cr[0...255] -> R/G/B[0...255]
316// R = 1.000 * (Y) + 1.400 * (Cr - 128)
317// G = 1.000 * (Y) - 0.343 * (Cb - 128) - 0.711 (Cr - 128)
318// B = 1.000 * (Y) + 1.765 * (Cb - 128)
319#define MULR(a) ((1434 * (a)))
320#define MULB(a) ((1807 * (a)))
321#define MULG2(a, b) ((-351 * (a) - 728 * (b)))
322#define MULY(a) ((a) << 10)
323
324#define MAKERGB15(r, g, b, a) (SWAP16(a | ((b) << 10) | ((g) << 5) | (r)))
325#define SCALE8(c) SCALER(c, 20)
326#define SCALE5(c) SCALER(c, 23)
327
3c3a80b2 328static inline int clamp5(int v)
329{
330 v += 16;
331 v = v < 0 ? 0 : (v > 31 ? 31 : v);
332 return v;
333}
334
335static inline int clamp8(int v)
336{
337 v += 128;
338 v = v < 0 ? 0 : (v > 255 ? 255 : v);
339 return v;
340}
ef79bbde 341
3c3a80b2 342#define CLAMP_SCALE8(a) (clamp8(SCALE8(a)))
343#define CLAMP_SCALE5(a) (clamp5(SCALE5(a)))
ef79bbde 344
528ad661 345static inline void putlinebw15(u16 *image, int *Yblk) {
ef79bbde
P
346 int i;
347 int A = (mdec.reg0 & MDEC0_STP) ? 0x8000 : 0;
348
349 for (i = 0; i < 8; i++, Yblk++) {
350 int Y = *Yblk;
351 // missing rounding
3c3a80b2 352 image[i] = SWAP16((clamp5(Y >> 3) * 0x421) | A);
ef79bbde
P
353 }
354}
355
528ad661 356static inline void putquadrgb15(u16 *image, int *Yblk, int Cr, int Cb) {
ef79bbde
P
357 int Y, R, G, B;
358 int A = (mdec.reg0 & MDEC0_STP) ? 0x8000 : 0;
359 R = MULR(Cr);
360 G = MULG2(Cb, Cr);
361 B = MULB(Cb);
362
363 // added transparency
364 Y = MULY(Yblk[0]);
365 image[0] = MAKERGB15(CLAMP_SCALE5(Y + R), CLAMP_SCALE5(Y + G), CLAMP_SCALE5(Y + B), A);
366 Y = MULY(Yblk[1]);
367 image[1] = MAKERGB15(CLAMP_SCALE5(Y + R), CLAMP_SCALE5(Y + G), CLAMP_SCALE5(Y + B), A);
368 Y = MULY(Yblk[8]);
369 image[16] = MAKERGB15(CLAMP_SCALE5(Y + R), CLAMP_SCALE5(Y + G), CLAMP_SCALE5(Y + B), A);
370 Y = MULY(Yblk[9]);
371 image[17] = MAKERGB15(CLAMP_SCALE5(Y + R), CLAMP_SCALE5(Y + G), CLAMP_SCALE5(Y + B), A);
372}
373
528ad661 374static inline void yuv2rgb15(int *blk, unsigned short *image) {
ef79bbde
P
375 int x, y;
376 int *Yblk = blk + DSIZE2 * 2;
377 int *Crblk = blk;
378 int *Cbblk = blk + DSIZE2;
379
380 if (!Config.Mdec) {
381 for (y = 0; y < 16; y += 2, Crblk += 4, Cbblk += 4, Yblk += 8, image += 24) {
382 if (y == 8) Yblk += DSIZE2;
383 for (x = 0; x < 4; x++, image += 2, Crblk++, Cbblk++, Yblk += 2) {
384 putquadrgb15(image, Yblk, *Crblk, *Cbblk);
385 putquadrgb15(image + 8, Yblk + DSIZE2, *(Crblk + 4), *(Cbblk + 4));
386 }
387 }
388 } else {
389 for (y = 0; y < 16; y++, Yblk += 8, image += 16) {
390 if (y == 8) Yblk += DSIZE2;
391 putlinebw15(image, Yblk);
392 putlinebw15(image + 8, Yblk + DSIZE2);
393 }
394 }
395}
396
528ad661 397static inline void putlinebw24(u8 * image, int *Yblk) {
ef79bbde
P
398 int i;
399 unsigned char Y;
400 for (i = 0; i < 8 * 3; i += 3, Yblk++) {
3c3a80b2 401 Y = clamp8(*Yblk);
ef79bbde
P
402 image[i + 0] = Y;
403 image[i + 1] = Y;
404 image[i + 2] = Y;
405 }
406}
407
528ad661 408static inline void putquadrgb24(u8 * image, int *Yblk, int Cr, int Cb) {
ef79bbde
P
409 int Y, R, G, B;
410
411 R = MULR(Cr);
412 G = MULG2(Cb,Cr);
413 B = MULB(Cb);
414
415 Y = MULY(Yblk[0]);
416 image[0 * 3 + 0] = CLAMP_SCALE8(Y + R);
417 image[0 * 3 + 1] = CLAMP_SCALE8(Y + G);
418 image[0 * 3 + 2] = CLAMP_SCALE8(Y + B);
419 Y = MULY(Yblk[1]);
420 image[1 * 3 + 0] = CLAMP_SCALE8(Y + R);
421 image[1 * 3 + 1] = CLAMP_SCALE8(Y + G);
422 image[1 * 3 + 2] = CLAMP_SCALE8(Y + B);
423 Y = MULY(Yblk[8]);
424 image[16 * 3 + 0] = CLAMP_SCALE8(Y + R);
425 image[16 * 3 + 1] = CLAMP_SCALE8(Y + G);
426 image[16 * 3 + 2] = CLAMP_SCALE8(Y + B);
427 Y = MULY(Yblk[9]);
428 image[17 * 3 + 0] = CLAMP_SCALE8(Y + R);
429 image[17 * 3 + 1] = CLAMP_SCALE8(Y + G);
430 image[17 * 3 + 2] = CLAMP_SCALE8(Y + B);
431}
432
528ad661 433static void yuv2rgb24(int *blk, u8 *image) {
ef79bbde
P
434 int x, y;
435 int *Yblk = blk + DSIZE2 * 2;
436 int *Crblk = blk;
437 int *Cbblk = blk + DSIZE2;
438
439 if (!Config.Mdec) {
528ad661 440 for (y = 0; y < 16; y += 2, Crblk += 4, Cbblk += 4, Yblk += 8, image += 8 * 3 * 3) {
ef79bbde
P
441 if (y == 8) Yblk += DSIZE2;
442 for (x = 0; x < 4; x++, image += 6, Crblk++, Cbblk++, Yblk += 2) {
443 putquadrgb24(image, Yblk, *Crblk, *Cbblk);
444 putquadrgb24(image + 8 * 3, Yblk + DSIZE2, *(Crblk + 4), *(Cbblk + 4));
445 }
446 }
447 } else {
448 for (y = 0; y < 16; y++, Yblk += 8, image += 16 * 3) {
449 if (y == 8) Yblk += DSIZE2;
450 putlinebw24(image, Yblk);
451 putlinebw24(image + 8 * 3, Yblk + DSIZE2);
452 }
453 }
454}
455
456void mdecInit(void) {
c62b43c9 457 memset(&mdec, 0, sizeof(mdec));
458 memset(iq_y, 0, sizeof(iq_y));
459 memset(iq_uv, 0, sizeof(iq_uv));
ef79bbde 460 mdec.rl = (u16 *)&psxM[0x100000];
ef79bbde
P
461}
462
463// command register
464void mdecWrite0(u32 data) {
ef79bbde
P
465 mdec.reg0 = data;
466}
467
468u32 mdecRead0(void) {
ef79bbde
P
469 return mdec.reg0;
470}
471
472// status register
473void mdecWrite1(u32 data) {
ef79bbde
P
474 if (data & MDEC1_RESET) { // mdec reset
475 mdec.reg0 = 0;
476 mdec.reg1 = 0;
528ad661 477 mdec.pending_dma1.adr = 0;
478 mdec.block_buffer_pos = 0;
ef79bbde
P
479 }
480}
481
482u32 mdecRead1(void) {
483 u32 v = mdec.reg1;
ef79bbde
P
484 return v;
485}
486
487void psxDma0(u32 adr, u32 bcr, u32 chcr) {
b54a1ac7 488 u32 cmd = mdec.reg0, words_max = 0;
489 const void *mem;
ef79bbde 490 int size;
ef79bbde
P
491
492 if (chcr != 0x01000201) {
8c84ba5f 493 log_unhandled("mdec0: invalid dma %08x\n", chcr);
ef79bbde
P
494 return;
495 }
496
528ad661 497 /* mdec is STP till dma0 is released */
498 mdec.reg1 |= MDEC1_STP;
499
ef79bbde
P
500 size = (bcr >> 16) * (bcr & 0xffff);
501
b54a1ac7 502 adr &= ~3;
503 mem = getDmaRam(adr, &words_max);
504 if (mem == INVALID_PTR || size > words_max) {
505 log_unhandled("bad dma0 madr %x\n", adr);
506 HW_DMA0_CHCR &= SWAP32(~0x01000000);
507 return;
508 }
509
ef79bbde 510 switch (cmd >> 28) {
4f329f16 511 case 0x3: // decode 15/24bpp
b54a1ac7 512 mdec.rl = mem;
528ad661 513 /* now the mdec is busy till all data are decoded */
514 mdec.reg1 |= MDEC1_BUSY;
515 /* detect the end of decoding */
516 mdec.rl_end = mdec.rl + (size * 2);
517
518 /* sanity check */
4f329f16 519 if(mdec.rl_end <= mdec.rl)
520 break;
528ad661 521
522 /* process the pending dma1 */
523 if(mdec.pending_dma1.adr){
524 psxDma1(mdec.pending_dma1.adr, mdec.pending_dma1.bcr, mdec.pending_dma1.chcr);
525 }
526 mdec.pending_dma1.adr = 0;
527 return;
528
ef79bbde
P
529
530 case 0x4: // quantization table upload
531 {
b54a1ac7 532 const u8 *p = mem;
ef79bbde
P
533 // printf("uploading new quantization table\n");
534 // printmatrixu8(p);
535 // printmatrixu8(p + 64);
536 iqtab_init(iq_y, p);
537 iqtab_init(iq_uv, p + 64);
538 }
4f329f16 539 break;
ef79bbde
P
540
541 case 0x6: // cosine table
542 // printf("mdec cosine table\n");
4f329f16 543 break;
ef79bbde
P
544
545 default:
6c9db47c 546 log_unhandled("mdec: unknown command %08x\n", cmd);
ef79bbde
P
547 break;
548 }
549
9a0a61d2 550 set_event(PSXINT_MDECINDMA, size);
ef79bbde
P
551}
552
528ad661 553void mdec0Interrupt()
554{
ad418c19 555 if (HW_DMA0_CHCR & SWAP32(0x01000000))
556 {
557 HW_DMA0_CHCR &= SWAP32(~0x01000000);
558 DMA_INTERRUPT(0);
559 }
528ad661 560}
561
562#define SIZE_OF_24B_BLOCK (16*16*3)
563#define SIZE_OF_16B_BLOCK (16*16*2)
564
ef79bbde 565void psxDma1(u32 adr, u32 bcr, u32 chcr) {
b54a1ac7 566 u32 words, words_max = 0;
ef79bbde 567 int blk[DSIZE2 * 6];
528ad661 568 u8 * image;
ef79bbde 569 int size;
ef79bbde 570
8c84ba5f 571 if (chcr != 0x01000200) {
572 log_unhandled("mdec1: invalid dma %08x\n", chcr);
573 return;
574 }
ef79bbde 575
fc4803bd 576 words = (bcr >> 16) * (bcr & 0xffff);
528ad661 577 /* size in byte */
fc4803bd 578 size = words * 4;
528ad661 579
580 if (!(mdec.reg1 & MDEC1_BUSY)) {
581 /* add to pending */
582 mdec.pending_dma1.adr = adr;
583 mdec.pending_dma1.bcr = bcr;
584 mdec.pending_dma1.chcr = chcr;
585 /* do not free the dma */
b54a1ac7 586 return;
587 }
ef79bbde 588
b54a1ac7 589 adr &= ~3;
590 image = getDmaRam(adr, &words_max);
591 if (image == INVALID_PTR || words > words_max) {
592 log_unhandled("bad dma1 madr %x\n", adr);
593 HW_DMA1_CHCR &= SWAP32(~0x01000000);
594 return;
595 }
528ad661 596
597 if (mdec.reg0 & MDEC0_RGB24) {
598 /* 16 bits decoding
599 * block are 16 px * 16 px, each px are 2 byte
600 */
601
602 /* there is some partial block pending ? */
603 if(mdec.block_buffer_pos != 0) {
604 int n = mdec.block_buffer - mdec.block_buffer_pos + SIZE_OF_16B_BLOCK;
605 /* TODO: check if partial block do not larger than size */
606 memcpy(image, mdec.block_buffer_pos, n);
607 image += n;
608 size -= n;
609 mdec.block_buffer_pos = 0;
610 }
ef79bbde 611
528ad661 612 while(size >= SIZE_OF_16B_BLOCK) {
ef79bbde 613 mdec.rl = rl2blk(blk, mdec.rl);
528ad661 614 yuv2rgb15(blk, (u16 *)image);
615 image += SIZE_OF_16B_BLOCK;
616 size -= SIZE_OF_16B_BLOCK;
ef79bbde 617 }
528ad661 618
619 if(size != 0) {
ef79bbde 620 mdec.rl = rl2blk(blk, mdec.rl);
528ad661 621 yuv2rgb15(blk, (u16 *)mdec.block_buffer);
622 memcpy(image, mdec.block_buffer, size);
623 mdec.block_buffer_pos = mdec.block_buffer + size;
624 }
625
626 } else {
627 /* 24 bits decoding
628 * block are 16 px * 16 px, each px are 3 byte
629 */
630
631 /* there is some partial block pending ? */
632 if(mdec.block_buffer_pos != 0) {
633 int n = mdec.block_buffer - mdec.block_buffer_pos + SIZE_OF_24B_BLOCK;
634 /* TODO: check if partial block do not larger than size */
635 memcpy(image, mdec.block_buffer_pos, n);
636 image += n;
637 size -= n;
638 mdec.block_buffer_pos = 0;
639 }
640
641 while(size >= SIZE_OF_24B_BLOCK) {
642 mdec.rl = rl2blk(blk, mdec.rl);
643 yuv2rgb24(blk, image);
644 image += SIZE_OF_24B_BLOCK;
645 size -= SIZE_OF_24B_BLOCK;
ef79bbde 646 }
ef79bbde 647
528ad661 648 if(size != 0) {
649 mdec.rl = rl2blk(blk, mdec.rl);
650 yuv2rgb24(blk, mdec.block_buffer);
651 memcpy(image, mdec.block_buffer, size);
652 mdec.block_buffer_pos = mdec.block_buffer + size;
653 }
654 }
b54a1ac7 655 if (size < 0)
656 log_unhandled("mdec: bork\n");
528ad661 657
b54a1ac7 658 /* define the power of mdec */
9cbce417 659 set_event(PSXINT_MDECOUTDMA, words * MDEC_BIAS + MDEC_DELAY);
b54a1ac7 660 /* some CPU stalling */
9cbce417 661 psxRegs.cycle += words * MDEC_BIAS / 4;
ef79bbde
P
662}
663
664void mdec1Interrupt() {
528ad661 665 /* Author : gschwind
666 *
667 * in that case we have done all decoding stuff
668 * Note that : each block end with 0xfe00 flags
669 * the list of blocks end with the same 0xfe00 flags
670 * data loock like :
671 *
672 * data block ...
673 * 0xfe00
674 * data block ...
675 * 0xfe00
676 * a lost of block ..
677 *
678 * 0xfe00
679 * the last block
680 * 0xfe00
681 * 0xfe00
682 *
683 * OR
684 *
685 * if the 0xfe00 is not present the data size is important.
686 *
687 */
688
ad418c19 689 /* MDEC_END_OF_DATA avoids read outside memory */
b54a1ac7 690 //printf("mdec left %zd, v=%04x\n", mdec.rl_end - mdec.rl, *(mdec.rl));
ad418c19 691 if (mdec.rl >= mdec.rl_end || SWAP16(*(mdec.rl)) == MDEC_END_OF_DATA) {
692 mdec.reg1 &= ~(MDEC1_STP|MDEC1_BUSY);
693 if (HW_DMA0_CHCR & SWAP32(0x01000000))
694 {
695 HW_DMA0_CHCR &= SWAP32(~0x01000000);
696 DMA_INTERRUPT(0);
697 }
ef79bbde 698 }
528ad661 699
ad418c19 700 if (HW_DMA1_CHCR & SWAP32(0x01000000))
701 {
702 HW_DMA1_CHCR &= SWAP32(~0x01000000);
703 DMA_INTERRUPT(1);
704 }
ef79bbde
P
705}
706
496d88d4 707int mdecFreeze(void *f, int Mode) {
b34d6a80 708 u8 *base = (u8 *)psxM;
8e1040b6 709 u32 v;
710
711 gzfreeze(&mdec.reg0, sizeof(mdec.reg0));
712 gzfreeze(&mdec.reg1, sizeof(mdec.reg1));
713
8e1040b6 714 v = (u8 *)mdec.rl - base;
715 gzfreeze(&v, sizeof(v));
b34d6a80 716 mdec.rl = (u16 *)(base + (v & 0x1ffffe));
8e1040b6 717 v = (u8 *)mdec.rl_end - base;
718 gzfreeze(&v, sizeof(v));
b34d6a80 719 mdec.rl_end = (u16 *)(base + (v & 0x1ffffe));
8e1040b6 720
721 v = 0;
722 if (mdec.block_buffer_pos)
b34d6a80 723 v = mdec.block_buffer_pos - mdec.block_buffer;
8e1040b6 724 gzfreeze(&v, sizeof(v));
725 mdec.block_buffer_pos = 0;
b34d6a80 726 if (v && v < sizeof(mdec.block_buffer))
727 mdec.block_buffer_pos = mdec.block_buffer;
8e1040b6 728
729 gzfreeze(&mdec.block_buffer, sizeof(mdec.block_buffer));
730 gzfreeze(&mdec.pending_dma1, sizeof(mdec.pending_dma1));
ef79bbde
P
731 gzfreeze(iq_y, sizeof(iq_y));
732 gzfreeze(iq_uv, sizeof(iq_uv));
733
734 return 0;
735}