1 /***************************************************************************
2 * Copyright (C) 2010 Gabriele Gorla *
3 * Copyright (C) 2007 Ryan Schultz, PCSX-df Team, PCSX team *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. *
19 ***************************************************************************/
24 #define DSIZE2 (DSIZE * DSIZE)
26 #define SCALE(x, n) ((x) >> (n))
27 #define SCALER(x, n) (((x) + ((1 << (n)) >> 1)) >> (n))
29 #define AAN_CONST_BITS 12
30 #define AAN_PRESCALE_BITS 16
32 #define AAN_CONST_SIZE 24
33 #define AAN_CONST_SCALE (AAN_CONST_SIZE - AAN_CONST_BITS)
35 #define AAN_PRESCALE_SIZE 20
36 #define AAN_PRESCALE_SCALE (AAN_PRESCALE_SIZE-AAN_PRESCALE_BITS)
39 #define FIX_1_082392200 SCALER(18159528, AAN_CONST_SCALE) // B6
40 #define FIX_1_414213562 SCALER(23726566, AAN_CONST_SCALE) // A4
41 #define FIX_1_847759065 SCALER(31000253, AAN_CONST_SCALE) // A2
42 #define FIX_2_613125930 SCALER(43840978, AAN_CONST_SCALE) // B2
44 #define MULS(var, const) (SCALE((var) * (const), AAN_CONST_BITS))
46 #define RLE_RUN(a) ((a) >> 10)
47 #define RLE_VAL(a) (((int)(a) << (sizeof(int) * 8 - 10)) >> (sizeof(int) * 8 - 10))
50 static void printmatrixu8(u8 *m) {
52 for(i = 0; i < DSIZE2; i++) {
54 if((i+1) % 8 == 0) printf("\n");
59 static inline void fillcol(int *blk, int val) {
60 blk[0 * DSIZE] = blk[1 * DSIZE] = blk[2 * DSIZE] = blk[3 * DSIZE]
61 = blk[4 * DSIZE] = blk[5 * DSIZE] = blk[6 * DSIZE] = blk[7 * DSIZE] = val;
64 static inline void fillrow(int *blk, int val) {
65 blk[0] = blk[1] = blk[2] = blk[3]
66 = blk[4] = blk[5] = blk[6] = blk[7] = val;
69 void idct(int *block,int used_col) {
70 int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
71 int z5, z10, z11, z12, z13;
75 // the block has only the DC coefficient
78 for (i = 0; i < DSIZE2; i++) block[i] = v;
82 // last_col keeps track of the highest column with non zero coefficients
84 for (i = 0; i < DSIZE; i++, ptr++) {
85 if ((used_col & (1 << i)) == 0) {
86 // the column is empty or has only the DC coefficient
94 // further optimization could be made by keeping track of
96 z10 = ptr[DSIZE * 0] + ptr[DSIZE * 4]; // s04
97 z11 = ptr[DSIZE * 0] - ptr[DSIZE * 4]; // d04
98 z13 = ptr[DSIZE * 2] + ptr[DSIZE * 6]; // s26
99 z12 = MULS(ptr[DSIZE * 2] - ptr[DSIZE * 6], FIX_1_414213562) - z13;
100 //^^^^ d26=d26*2*A4-s26
102 tmp0 = z10 + z13; // os07 = s04 + s26
103 tmp3 = z10 - z13; // os34 = s04 - s26
104 tmp1 = z11 + z12; // os16 = d04 + d26
105 tmp2 = z11 - z12; // os25 = d04 - d26
107 z13 = ptr[DSIZE * 3] + ptr[DSIZE * 5]; //s53
108 z10 = ptr[DSIZE * 3] - ptr[DSIZE * 5]; //-d53
109 z11 = ptr[DSIZE * 1] + ptr[DSIZE * 7]; //s17
110 z12 = ptr[DSIZE * 1] - ptr[DSIZE * 7]; //d17
112 tmp7 = z11 + z13; // od07 = s17 + s53
114 z5 = (z12 - z10) * (FIX_1_847759065);
115 tmp6 = SCALE(z10*(FIX_2_613125930) + z5, AAN_CONST_BITS) - tmp7;
116 tmp5 = MULS(z11 - z13, FIX_1_414213562) - tmp6;
117 tmp4 = SCALE(z12*(FIX_1_082392200) - z5, AAN_CONST_BITS) + tmp5;
120 //z5 = (z12 - z10)* FIX_1_847759065;
121 // tmp0 = (d17 + d53) * 2*A2
123 //tmp6 = DESCALE(z10*FIX_2_613125930 + z5, CONST_BITS) - tmp7;
124 // od16 = (d53*-2*B2 + tmp0) - od07
126 //tmp4 = DESCALE(z12*FIX_1_082392200 - z5, CONST_BITS) + tmp5;
127 // od34 = (d17*2*B6 - tmp0) + od25
131 // od34 = d17*2*(B6-A2) - d53*2*A2
132 // od16 = d53*2*(A2-B2) + d17*2*A2
136 // tmp5 = MULS(z11 - z13, FIX_1_414213562) - tmp6;
137 // od25 = (s17 - s53)*2*A4 - od16
139 ptr[DSIZE * 0] = (tmp0 + tmp7); // os07 + od07
140 ptr[DSIZE * 7] = (tmp0 - tmp7); // os07 - od07
141 ptr[DSIZE * 1] = (tmp1 + tmp6); // os16 + od16
142 ptr[DSIZE * 6] = (tmp1 - tmp6); // os16 - od16
143 ptr[DSIZE * 2] = (tmp2 + tmp5); // os25 + od25
144 ptr[DSIZE * 5] = (tmp2 - tmp5); // os25 - od25
145 ptr[DSIZE * 4] = (tmp3 + tmp4); // os34 + od34
146 ptr[DSIZE * 3] = (tmp3 - tmp4); // os34 - od34
151 for (i = 0; i < DSIZE; i++)
152 fillrow(block + DSIZE * i, block[DSIZE * i]);
154 for (i = 0; i < DSIZE; i++, ptr += DSIZE) {
155 z10 = ptr[0] + ptr[4];
156 z11 = ptr[0] - ptr[4];
157 z13 = ptr[2] + ptr[6];
158 z12 = MULS(ptr[2] - ptr[6], FIX_1_414213562) - z13;
165 z13 = ptr[3] + ptr[5];
166 z10 = ptr[3] - ptr[5];
167 z11 = ptr[1] + ptr[7];
168 z12 = ptr[1] - ptr[7];
171 z5 = (z12 - z10) * FIX_1_847759065;
172 tmp6 = SCALE(z10 * FIX_2_613125930 + z5, AAN_CONST_BITS) - tmp7;
173 tmp5 = MULS(z11 - z13, FIX_1_414213562) - tmp6;
174 tmp4 = SCALE(z12 * FIX_1_082392200 - z5, AAN_CONST_BITS) + tmp5;
176 ptr[0] = tmp0 + tmp7;
178 ptr[7] = tmp0 - tmp7;
179 ptr[1] = tmp1 + tmp6;
180 ptr[6] = tmp1 - tmp6;
181 ptr[2] = tmp2 + tmp5;
182 ptr[5] = tmp2 - tmp5;
183 ptr[4] = tmp3 + tmp4;
184 ptr[3] = tmp3 - tmp4;
189 // mdec0: command register
190 #define MDEC0_STP 0x02000000
191 #define MDEC0_RGB24 0x08000000
192 #define MDEC0_SIZE_MASK 0xFFFF
194 // mdec1: status register
195 #define MDEC1_BUSY 0x20000000
196 #define MDEC1_DREQ 0x18000000
197 #define MDEC1_FIFO 0xc0000000
198 #define MDEC1_RGB24 0x02000000
199 #define MDEC1_STP 0x00800000
200 #define MDEC1_RESET 0x80000000
209 static int iq_y[DSIZE2], iq_uv[DSIZE2];
211 static int zscan[DSIZE2] = {
212 0 , 1 , 8 , 16, 9 , 2 , 3 , 10,
213 17, 24, 32, 25, 18, 11, 4 , 5 ,
214 12, 19, 26, 33, 40, 48, 41, 34,
215 27, 20, 13, 6 , 7 , 14, 21, 28,
216 35, 42, 49, 56, 57, 50, 43, 36,
217 29, 22, 15, 23, 30, 37, 44, 51,
218 58, 59, 52, 45, 38, 31, 39, 46,
219 53, 60, 61, 54, 47, 55, 62, 63
222 static int aanscales[DSIZE2] = {
223 1048576, 1454417, 1370031, 1232995, 1048576, 823861, 567485, 289301,
224 1454417, 2017334, 1900287, 1710213, 1454417, 1142728, 787125, 401273,
225 1370031, 1900287, 1790031, 1610986, 1370031, 1076426, 741455, 377991,
226 1232995, 1710213, 1610986, 1449849, 1232995, 968758, 667292, 340183,
227 1048576, 1454417, 1370031, 1232995, 1048576, 823861, 567485, 289301,
228 823861, 1142728, 1076426, 968758, 823861, 647303, 445870, 227303,
229 567485, 787125, 741455, 667292, 567485, 445870, 307121, 156569,
230 289301, 401273, 377991, 340183, 289301, 227303, 156569, 79818
233 static void iqtab_init(int *iqtab, unsigned char *iq_y) {
236 for (i = 0; i < DSIZE2; i++) {
237 iqtab[i] = (iq_y[i] * SCALER(aanscales[zscan[i]], AAN_PRESCALE_SCALE));
241 #define MDEC_END_OF_DATA 0xfe00
243 unsigned short *rl2blk(int *blk, unsigned short *mdec_rl) {
244 int i, k, q_scale, rl, used_col;
247 memset(blk, 0, 6 * DSIZE2 * sizeof(int));
249 for (i = 0; i < 6; i++) {
250 // decode blocks (Cr,Cb,Y1,Y2,Y3,Y4)
251 if (i == 2) iqtab = iq_y;
253 rl = SWAP16(*mdec_rl); mdec_rl++;
254 q_scale = RLE_RUN(rl);
255 blk[0] = SCALER(iqtab[0] * RLE_VAL(rl), AAN_EXTRA - 3);
256 for (k = 0, used_col = 0;;) {
257 rl = SWAP16(*mdec_rl); mdec_rl++;
258 if (rl == MDEC_END_OF_DATA) break;
259 k += RLE_RUN(rl) + 1; // skip zero-coefficients
262 // printf("run lenght exceeded 64 enties\n");
266 // zigzag transformation
267 blk[zscan[k]] = SCALER(RLE_VAL(rl) * iqtab[k] * q_scale, AAN_EXTRA);
268 // keep track of used columns to speed up the idtc
269 used_col |= (zscan[k] > 7) ? 1 << (zscan[k] & 7) : 0;
272 if (k == 0) used_col = -1;
273 // used_col is -1 for blocks with only the DC coefficient
274 // any other value is a bitmask of the columns that have
275 // at least one non zero cofficient in the rows 1-7
276 // single coefficients in row 0 are treted specially
277 // in the idtc function
285 // Y/Cb/Cr[0...255] -> R/G/B[0...255]
286 // R = 1.000 * (Y) + 1.400 * (Cr - 128)
287 // G = 1.000 * (Y) - 0.343 * (Cb - 128) - 0.711 (Cr - 128)
288 // B = 1.000 * (Y) + 1.765 * (Cb - 128)
289 #define MULR(a) ((1434 * (a)))
290 #define MULB(a) ((1807 * (a)))
291 #define MULG2(a, b) ((-351 * (a) - 728 * (b)))
292 #define MULY(a) ((a) << 10)
294 #define MAKERGB15(r, g, b, a) (SWAP16(a | ((b) << 10) | ((g) << 5) | (r)))
295 #define SCALE8(c) SCALER(c, 20)
296 #define SCALE5(c) SCALER(c, 23)
298 #define CLAMP5(c) ( ((c) < -16) ? 0 : (((c) > (31 - 16)) ? 31 : ((c) + 16)) )
299 #define CLAMP8(c) ( ((c) < -128) ? 0 : (((c) > (255 - 128)) ? 255 : ((c) + 128)) )
301 #define CLAMP_SCALE8(a) (CLAMP8(SCALE8(a)))
302 #define CLAMP_SCALE5(a) (CLAMP5(SCALE5(a)))
304 static inline void putlinebw15(unsigned short *image, int *Yblk) {
306 int A = (mdec.reg0 & MDEC0_STP) ? 0x8000 : 0;
308 for (i = 0; i < 8; i++, Yblk++) {
311 image[i] = SWAP16((CLAMP5(Y >> 3) * 0x421) | A);
315 static void putquadrgb15(unsigned short *image, int *Yblk, int Cr, int Cb) {
317 int A = (mdec.reg0 & MDEC0_STP) ? 0x8000 : 0;
322 // added transparency
324 image[0] = MAKERGB15(CLAMP_SCALE5(Y + R), CLAMP_SCALE5(Y + G), CLAMP_SCALE5(Y + B), A);
326 image[1] = MAKERGB15(CLAMP_SCALE5(Y + R), CLAMP_SCALE5(Y + G), CLAMP_SCALE5(Y + B), A);
328 image[16] = MAKERGB15(CLAMP_SCALE5(Y + R), CLAMP_SCALE5(Y + G), CLAMP_SCALE5(Y + B), A);
330 image[17] = MAKERGB15(CLAMP_SCALE5(Y + R), CLAMP_SCALE5(Y + G), CLAMP_SCALE5(Y + B), A);
333 static void yuv2rgb15(int *blk, unsigned short *image) {
335 int *Yblk = blk + DSIZE2 * 2;
337 int *Cbblk = blk + DSIZE2;
340 for (y = 0; y < 16; y += 2, Crblk += 4, Cbblk += 4, Yblk += 8, image += 24) {
341 if (y == 8) Yblk += DSIZE2;
342 for (x = 0; x < 4; x++, image += 2, Crblk++, Cbblk++, Yblk += 2) {
343 putquadrgb15(image, Yblk, *Crblk, *Cbblk);
344 putquadrgb15(image + 8, Yblk + DSIZE2, *(Crblk + 4), *(Cbblk + 4));
348 for (y = 0; y < 16; y++, Yblk += 8, image += 16) {
349 if (y == 8) Yblk += DSIZE2;
350 putlinebw15(image, Yblk);
351 putlinebw15(image + 8, Yblk + DSIZE2);
356 static inline void putlinebw24(unsigned char *image, int *Yblk) {
359 for (i = 0; i < 8 * 3; i += 3, Yblk++) {
367 static void putquadrgb24(unsigned char *image, int *Yblk, int Cr, int Cb) {
375 image[0 * 3 + 0] = CLAMP_SCALE8(Y + R);
376 image[0 * 3 + 1] = CLAMP_SCALE8(Y + G);
377 image[0 * 3 + 2] = CLAMP_SCALE8(Y + B);
379 image[1 * 3 + 0] = CLAMP_SCALE8(Y + R);
380 image[1 * 3 + 1] = CLAMP_SCALE8(Y + G);
381 image[1 * 3 + 2] = CLAMP_SCALE8(Y + B);
383 image[16 * 3 + 0] = CLAMP_SCALE8(Y + R);
384 image[16 * 3 + 1] = CLAMP_SCALE8(Y + G);
385 image[16 * 3 + 2] = CLAMP_SCALE8(Y + B);
387 image[17 * 3 + 0] = CLAMP_SCALE8(Y + R);
388 image[17 * 3 + 1] = CLAMP_SCALE8(Y + G);
389 image[17 * 3 + 2] = CLAMP_SCALE8(Y + B);
392 static void yuv2rgb24(int *blk, unsigned char *image) {
394 int *Yblk = blk + DSIZE2 * 2;
396 int *Cbblk = blk + DSIZE2;
399 for (y = 0; y < 16; y += 2, Crblk += 4, Cbblk += 4, Yblk += 8, image += 24 * 3) {
400 if (y == 8) Yblk += DSIZE2;
401 for (x = 0; x < 4; x++, image += 6, Crblk++, Cbblk++, Yblk += 2) {
402 putquadrgb24(image, Yblk, *Crblk, *Cbblk);
403 putquadrgb24(image + 8 * 3, Yblk + DSIZE2, *(Crblk + 4), *(Cbblk + 4));
407 for (y = 0; y < 16; y++, Yblk += 8, image += 16 * 3) {
408 if (y == 8) Yblk += DSIZE2;
409 putlinebw24(image, Yblk);
410 putlinebw24(image + 8 * 3, Yblk + DSIZE2);
415 void mdecInit(void) {
416 mdec.rl = (u16 *)&psxM[0x100000];
422 void mdecWrite0(u32 data) {
424 CDR_LOG("mdec0 write %08x\n", data);
429 u32 mdecRead0(void) {
431 CDR_LOG("mdec0 read %08x\n", mdec.reg0);
433 // mame is returning 0
438 void mdecWrite1(u32 data) {
440 CDR_LOG("mdec1 write %08x\n", data);
442 if (data & MDEC1_RESET) { // mdec reset
448 u32 mdecRead1(void) {
450 v |= (mdec.reg0 & MDEC0_STP) ? MDEC1_STP : 0;
451 v |= (mdec.reg0 & MDEC0_RGB24) ? MDEC1_RGB24 : 0;
453 CDR_LOG("mdec1 read %08x\n", v);
458 void psxDma0(u32 adr, u32 bcr, u32 chcr) {
463 CDR_LOG("DMA0 %08x %08x %08x\n", adr, bcr, chcr);
466 if (chcr != 0x01000201) {
467 // printf("chcr != 0x01000201\n");
471 size = (bcr >> 16) * (bcr & 0xffff);
475 mdec.rl = (u16 *)PSXM(adr);
476 mdec.rlsize = mdec.reg0 & MDEC0_SIZE_MASK;
479 case 0x4: // quantization table upload
481 u8 *p = (u8 *)PSXM(adr);
482 // printf("uploading new quantization table\n");
484 // printmatrixu8(p + 64);
486 iqtab_init(iq_uv, p + 64);
490 case 0x6: // cosine table
491 // printf("mdec cosine table\n");
495 // printf("mdec unknown command\n");
499 HW_DMA0_CHCR &= SWAP32(~0x01000000);
503 void psxDma1(u32 adr, u32 bcr, u32 chcr) {
505 unsigned short *image;
509 CDR_LOG("DMA1 %08x %08x %08x (cmd = %08x)\n", adr, bcr, chcr, mdec.reg0);
512 if (chcr != 0x01000200) return;
514 size = (bcr >> 16) * (bcr & 0xffff);
516 image = (u16 *)PSXM(adr);
518 if (mdec.reg0 & MDEC0_RGB24) { // 15-b decoding
519 // MDECOUTDMA_INT(((size * (1000000 / 9000)) / 4) /** 4*/);
520 MDECOUTDMA_INT(size / 4);
521 size = size / ((16 * 16) / 2);
522 for (; size > 0; size--, image += (16 * 16)) {
523 mdec.rl = rl2blk(blk, mdec.rl);
524 yuv2rgb15(blk, image);
526 } else { // 24-b decoding
527 // MDECOUTDMA_INT(((size * (1000000 / 9000)) / 4) /** 4*/);
528 MDECOUTDMA_INT(size / 4);
529 size = size / ((24 * 16) / 2);
530 for (; size > 0; size--, image += (24 * 16)) {
531 mdec.rl = rl2blk(blk, mdec.rl);
532 yuv2rgb24(blk, (u8 *)image);
536 mdec.reg1 |= MDEC1_BUSY;
539 void mdec1Interrupt() {
541 CDR_LOG("mdec1Interrupt\n");
543 if (HW_DMA1_CHCR & SWAP32(0x01000000)) {
544 // Set a fixed value totaly arbitrarie another sound value is
545 // PSXCLK / 60 or PSXCLK / 50 since the bug happened at end of frame.
546 // PSXCLK / 1000 seems good for FF9. (for FF9 need < ~28000)
547 // CAUTION: commented interrupt-handling may lead to problems, keep an eye ;-)
548 MDECOUTDMA_INT(PSXCLK / 1000 * BIAS);
549 // psxRegs.interrupt |= 0x02000000;
550 // psxRegs.intCycle[5 + 24 + 1] *= 8;
551 // psxRegs.intCycle[5 + 24] = psxRegs.cycle;
552 HW_DMA1_CHCR &= SWAP32(~0x01000000);
555 mdec.reg1 &= ~MDEC1_BUSY;
559 int mdecFreeze(gzFile f, int Mode) {
560 gzfreeze(&mdec, sizeof(mdec));
561 gzfreeze(iq_y, sizeof(iq_y));
562 gzfreeze(iq_uv, sizeof(iq_uv));