setup spu r8 handlers
[pcsx_rearmed.git] / plugins / gpulib / gpu.c
... / ...
CommitLineData
1/*
2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
11#include <stdio.h>
12#include <stdlib.h>
13#include <string.h>
14#include <stdlib.h> /* for calloc */
15
16#include "gpu.h"
17
18#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
19#ifdef __GNUC__
20#define unlikely(x) __builtin_expect((x), 0)
21#define preload __builtin_prefetch
22#define noinline __attribute__((noinline))
23#else
24#define unlikely(x)
25#define preload(...)
26#define noinline
27#endif
28
29//#define log_io gpu_log
30#define log_io(...)
31
32struct psx_gpu gpu;
33
34static noinline int do_cmd_buffer(uint32_t *data, int count);
35static void finish_vram_transfer(int is_read);
36
37static noinline void do_cmd_reset(void)
38{
39 renderer_sync();
40
41 if (unlikely(gpu.cmd_len > 0))
42 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
43 gpu.cmd_len = 0;
44
45 if (unlikely(gpu.dma.h > 0))
46 finish_vram_transfer(gpu.dma_start.is_read);
47 gpu.dma.h = 0;
48}
49
50static noinline void do_reset(void)
51{
52 unsigned int i;
53
54 do_cmd_reset();
55
56 memset(gpu.regs, 0, sizeof(gpu.regs));
57 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
58 gpu.ex_regs[i] = (0xe0 + i) << 24;
59 gpu.status = 0x14802000;
60 gpu.gp0 = 0;
61 gpu.regs[3] = 1;
62 gpu.screen.hres = gpu.screen.w = 256;
63 gpu.screen.vres = gpu.screen.h = 240;
64 gpu.screen.x = gpu.screen.y = 0;
65 renderer_sync_ecmds(gpu.ex_regs);
66 renderer_notify_res_change();
67}
68
69static noinline void update_width(void)
70{
71 static const short hres_all[8] = { 256, 368, 320, 368, 512, 368, 640, 368 };
72 static const uint8_t hdivs[8] = { 10, 7, 8, 7, 5, 7, 4, 7 };
73 uint8_t hdiv = hdivs[(gpu.status >> 16) & 7];
74 int hres = hres_all[(gpu.status >> 16) & 7];
75 int pal = gpu.status & PSX_GPU_STATUS_PAL;
76 int sw = gpu.screen.x2 - gpu.screen.x1;
77 int x = 0, x_auto;
78 if (sw <= 0)
79 /* nothing displayed? */;
80 else {
81 int s = pal ? 656 : 608; // or 600? pal is just a guess
82 x = (gpu.screen.x1 - s) / hdiv;
83 x = (x + 1) & ~1; // blitter limitation
84 sw /= hdiv;
85 sw = (sw + 2) & ~3; // according to nocash
86 switch (gpu.state.screen_centering_type) {
87 case 1:
88 break;
89 case 2:
90 x = gpu.state.screen_centering_x;
91 break;
92 default:
93 // correct if slightly miscentered
94 x_auto = (hres - sw) / 2 & ~3;
95 if ((uint32_t)x_auto <= 8u && abs(x) < 24)
96 x = x_auto;
97 }
98 if (x + sw > hres)
99 sw = hres - x;
100 // .x range check is done in vout_update()
101 }
102 // reduce the unpleasant right border that a few games have
103 if (gpu.state.screen_centering_type == 0
104 && x <= 4 && hres - (x + sw) >= 4)
105 hres -= 4;
106 gpu.screen.x = x;
107 gpu.screen.w = sw;
108 gpu.screen.hres = hres;
109 gpu.state.dims_changed = 1;
110 //printf("xx %d %d -> %2d, %d / %d\n",
111 // gpu.screen.x1, gpu.screen.x2, x, sw, hres);
112}
113
114static noinline void update_height(void)
115{
116 int pal = gpu.status & PSX_GPU_STATUS_PAL;
117 int dheight = gpu.status & PSX_GPU_STATUS_DHEIGHT;
118 int y = gpu.screen.y1 - (pal ? 39 : 16); // 39 for spyro
119 int sh = gpu.screen.y2 - gpu.screen.y1;
120 int center_tol = 16;
121 int vres = 240;
122
123 if (pal && (sh > 240 || gpu.screen.vres == 256))
124 vres = 256;
125 if (dheight)
126 y *= 2, sh *= 2, vres *= 2, center_tol *= 2;
127 if (sh <= 0)
128 /* nothing displayed? */;
129 else {
130 switch (gpu.state.screen_centering_type) {
131 case 1:
132 break;
133 case 2:
134 y = gpu.state.screen_centering_y;
135 break;
136 default:
137 // correct if slightly miscentered
138 if ((uint32_t)(vres - sh) <= 1 && abs(y) <= center_tol)
139 y = 0;
140 }
141 if (y + sh > vres)
142 sh = vres - y;
143 }
144 gpu.screen.y = y;
145 gpu.screen.h = sh;
146 gpu.screen.vres = vres;
147 gpu.state.dims_changed = 1;
148 //printf("yy %d %d -> %d, %d / %d\n",
149 // gpu.screen.y1, gpu.screen.y2, y, sh, vres);
150}
151
152static noinline void decide_frameskip(void)
153{
154 *gpu.frameskip.dirty = 1;
155
156 if (gpu.frameskip.active)
157 gpu.frameskip.cnt++;
158 else {
159 gpu.frameskip.cnt = 0;
160 gpu.frameskip.frame_ready = 1;
161 }
162
163 if (*gpu.frameskip.force)
164 gpu.frameskip.active = 1;
165 else if (!gpu.frameskip.active && *gpu.frameskip.advice)
166 gpu.frameskip.active = 1;
167 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
168 gpu.frameskip.active = 1;
169 else
170 gpu.frameskip.active = 0;
171
172 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
173 int dummy;
174 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy);
175 gpu.frameskip.pending_fill[0] = 0;
176 }
177}
178
179static noinline int decide_frameskip_allow(uint32_t cmd_e3)
180{
181 // no frameskip if it decides to draw to display area,
182 // but not for interlace since it'll most likely always do that
183 uint32_t x = cmd_e3 & 0x3ff;
184 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
185 gpu.frameskip.allow = (gpu.status & PSX_GPU_STATUS_INTERLACE) ||
186 (uint32_t)(x - gpu.screen.src_x) >= (uint32_t)gpu.screen.w ||
187 (uint32_t)(y - gpu.screen.src_y) >= (uint32_t)gpu.screen.h;
188 return gpu.frameskip.allow;
189}
190
191static void flush_cmd_buffer(void);
192
193static noinline void get_gpu_info(uint32_t data)
194{
195 if (unlikely(gpu.cmd_len > 0))
196 flush_cmd_buffer();
197 switch (data & 0x0f) {
198 case 0x02:
199 case 0x03:
200 case 0x04:
201 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
202 break;
203 case 0x05:
204 gpu.gp0 = gpu.ex_regs[5] & 0x3fffff;
205 break;
206 case 0x07:
207 gpu.gp0 = 2;
208 break;
209 default:
210 // gpu.gp0 unchanged
211 break;
212 }
213}
214
215// double, for overdraw guard
216#define VRAM_SIZE ((1024 * 512 * 2 * 2) + 4096)
217
218// Minimum 16-byte VRAM alignment needed by gpu_unai's pixel-skipping
219// renderer/downscaler it uses in high res modes:
220#ifdef GCW_ZERO
221 // On GCW platform (MIPS), align to 8192 bytes (1 TLB entry) to reduce # of
222 // fills. (Will change this value if it ever gets large page support)
223 #define VRAM_ALIGN 8192
224#else
225 #define VRAM_ALIGN 16
226#endif
227
228// vram ptr received from mmap/malloc/alloc (will deallocate using this)
229static uint16_t *vram_ptr_orig = NULL;
230
231#ifndef GPULIB_USE_MMAP
232# ifdef __linux__
233# define GPULIB_USE_MMAP 1
234# else
235# define GPULIB_USE_MMAP 0
236# endif
237#endif
238static int map_vram(void)
239{
240#if GPULIB_USE_MMAP
241 gpu.vram = vram_ptr_orig = gpu.mmap(VRAM_SIZE + (VRAM_ALIGN-1));
242#else
243 gpu.vram = vram_ptr_orig = calloc(VRAM_SIZE + (VRAM_ALIGN-1), 1);
244#endif
245 if (gpu.vram != NULL && gpu.vram != (void *)(intptr_t)-1) {
246 // 4kb guard in front
247 gpu.vram += (4096 / 2);
248 // Align
249 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
250 return 0;
251 }
252 else {
253 fprintf(stderr, "could not map vram, expect crashes\n");
254 return -1;
255 }
256}
257
258long GPUinit(void)
259{
260 int ret;
261 ret = vout_init();
262 ret |= renderer_init();
263
264 memset(&gpu.state, 0, sizeof(gpu.state));
265 memset(&gpu.frameskip, 0, sizeof(gpu.frameskip));
266 gpu.zero = 0;
267 gpu.state.frame_count = &gpu.zero;
268 gpu.state.hcnt = &gpu.zero;
269 gpu.cmd_len = 0;
270 do_reset();
271
272 /*if (gpu.mmap != NULL) {
273 if (map_vram() != 0)
274 ret = -1;
275 }*/
276 return ret;
277}
278
279long GPUshutdown(void)
280{
281 long ret;
282
283 renderer_finish();
284 ret = vout_finish();
285
286 if (vram_ptr_orig != NULL) {
287#if GPULIB_USE_MMAP
288 gpu.munmap(vram_ptr_orig, VRAM_SIZE);
289#else
290 free(vram_ptr_orig);
291#endif
292 }
293 vram_ptr_orig = gpu.vram = NULL;
294
295 return ret;
296}
297
298void GPUwriteStatus(uint32_t data)
299{
300 uint32_t cmd = data >> 24;
301
302 if (cmd < ARRAY_SIZE(gpu.regs)) {
303 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
304 return;
305 gpu.regs[cmd] = data;
306 }
307
308 gpu.state.fb_dirty = 1;
309
310 switch (cmd) {
311 case 0x00:
312 do_reset();
313 break;
314 case 0x01:
315 do_cmd_reset();
316 break;
317 case 0x03:
318 if (data & 1) {
319 gpu.status |= PSX_GPU_STATUS_BLANKING;
320 gpu.state.dims_changed = 1; // for hud clearing
321 }
322 else
323 gpu.status &= ~PSX_GPU_STATUS_BLANKING;
324 break;
325 case 0x04:
326 gpu.status &= ~PSX_GPU_STATUS_DMA_MASK;
327 gpu.status |= PSX_GPU_STATUS_DMA(data & 3);
328 break;
329 case 0x05:
330 gpu.screen.src_x = data & 0x3ff;
331 gpu.screen.src_y = (data >> 10) & 0x1ff;
332 renderer_notify_scanout_x_change(gpu.screen.src_x, gpu.screen.hres);
333 if (gpu.frameskip.set) {
334 decide_frameskip_allow(gpu.ex_regs[3]);
335 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
336 decide_frameskip();
337 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
338 }
339 }
340 break;
341 case 0x06:
342 gpu.screen.x1 = data & 0xfff;
343 gpu.screen.x2 = (data >> 12) & 0xfff;
344 update_width();
345 break;
346 case 0x07:
347 gpu.screen.y1 = data & 0x3ff;
348 gpu.screen.y2 = (data >> 10) & 0x3ff;
349 update_height();
350 break;
351 case 0x08:
352 gpu.status = (gpu.status & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
353 update_width();
354 update_height();
355 renderer_notify_res_change();
356 break;
357 default:
358 if ((cmd & 0xf0) == 0x10)
359 get_gpu_info(data);
360 break;
361 }
362
363#ifdef GPUwriteStatus_ext
364 GPUwriteStatus_ext(data);
365#endif
366}
367
368const unsigned char cmd_lengths[256] =
369{
370 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
371 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
372 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
373 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
374 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
375 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
376 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
377 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
378 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 80
379 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
380 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // a0
381 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
382 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // c0
383 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
384 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
385 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
386};
387
388#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
389
390static void cpy_msb(uint16_t *dst, const uint16_t *src, int l, uint16_t msb)
391{
392 int i;
393 for (i = 0; i < l; i++)
394 dst[i] = src[i] | msb;
395}
396
397static inline void do_vram_line(int x, int y, uint16_t *mem, int l,
398 int is_read, uint16_t msb)
399{
400 uint16_t *vram = VRAM_MEM_XY(x, y);
401 if (unlikely(is_read))
402 memcpy(mem, vram, l * 2);
403 else if (unlikely(msb))
404 cpy_msb(vram, mem, l, msb);
405 else
406 memcpy(vram, mem, l * 2);
407}
408
409static int do_vram_io(uint32_t *data, int count, int is_read)
410{
411 int count_initial = count;
412 uint16_t msb = gpu.ex_regs[6] << 15;
413 uint16_t *sdata = (uint16_t *)data;
414 int x = gpu.dma.x, y = gpu.dma.y;
415 int w = gpu.dma.w, h = gpu.dma.h;
416 int o = gpu.dma.offset;
417 int l;
418 count *= 2; // operate in 16bpp pixels
419
420 renderer_sync();
421
422 if (gpu.dma.offset) {
423 l = w - gpu.dma.offset;
424 if (count < l)
425 l = count;
426
427 do_vram_line(x + o, y, sdata, l, is_read, msb);
428
429 if (o + l < w)
430 o += l;
431 else {
432 o = 0;
433 y++;
434 h--;
435 }
436 sdata += l;
437 count -= l;
438 }
439
440 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
441 y &= 511;
442 do_vram_line(x, y, sdata, w, is_read, msb);
443 }
444
445 if (h > 0) {
446 if (count > 0) {
447 y &= 511;
448 do_vram_line(x, y, sdata, count, is_read, msb);
449 o = count;
450 count = 0;
451 }
452 }
453 else
454 finish_vram_transfer(is_read);
455 gpu.dma.y = y;
456 gpu.dma.h = h;
457 gpu.dma.offset = o;
458
459 return count_initial - count / 2;
460}
461
462static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
463{
464 if (gpu.dma.h)
465 log_anomaly("start_vram_transfer while old unfinished\n");
466
467 gpu.dma.x = pos_word & 0x3ff;
468 gpu.dma.y = (pos_word >> 16) & 0x1ff;
469 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
470 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
471 gpu.dma.offset = 0;
472 gpu.dma.is_read = is_read;
473 gpu.dma_start = gpu.dma;
474
475 renderer_flush_queues();
476 if (is_read) {
477 gpu.status |= PSX_GPU_STATUS_IMG;
478 // XXX: wrong for width 1
479 gpu.gp0 = LE32TOH(*(uint32_t *) VRAM_MEM_XY(gpu.dma.x, gpu.dma.y));
480 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
481 }
482
483 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
484 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
485}
486
487static void finish_vram_transfer(int is_read)
488{
489 if (is_read)
490 gpu.status &= ~PSX_GPU_STATUS_IMG;
491 else {
492 gpu.state.fb_dirty = 1;
493 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
494 gpu.dma_start.w, gpu.dma_start.h, 0);
495 }
496}
497
498static void do_vram_copy(const uint32_t *params)
499{
500 const uint32_t sx = LE32TOH(params[0]) & 0x3FF;
501 const uint32_t sy = (LE32TOH(params[0]) >> 16) & 0x1FF;
502 const uint32_t dx = LE32TOH(params[1]) & 0x3FF;
503 const uint32_t dy = (LE32TOH(params[1]) >> 16) & 0x1FF;
504 uint32_t w = ((LE32TOH(params[2]) - 1) & 0x3FF) + 1;
505 uint32_t h = (((LE32TOH(params[2]) >> 16) - 1) & 0x1FF) + 1;
506 uint16_t msb = gpu.ex_regs[6] << 15;
507 uint16_t lbuf[128];
508 uint32_t x, y;
509
510 if (sx == dx && sy == dy && msb == 0)
511 return;
512
513 renderer_flush_queues();
514
515 if (unlikely((sx < dx && dx < sx + w) || sx + w > 1024 || dx + w > 1024 || msb))
516 {
517 for (y = 0; y < h; y++)
518 {
519 const uint16_t *src = VRAM_MEM_XY(0, (sy + y) & 0x1ff);
520 uint16_t *dst = VRAM_MEM_XY(0, (dy + y) & 0x1ff);
521 for (x = 0; x < w; x += ARRAY_SIZE(lbuf))
522 {
523 uint32_t x1, w1 = w - x;
524 if (w1 > ARRAY_SIZE(lbuf))
525 w1 = ARRAY_SIZE(lbuf);
526 for (x1 = 0; x1 < w1; x1++)
527 lbuf[x1] = src[(sx + x + x1) & 0x3ff];
528 for (x1 = 0; x1 < w1; x1++)
529 dst[(dx + x + x1) & 0x3ff] = lbuf[x1] | msb;
530 }
531 }
532 }
533 else
534 {
535 uint32_t sy1 = sy, dy1 = dy;
536 for (y = 0; y < h; y++, sy1++, dy1++)
537 memcpy(VRAM_MEM_XY(dx, dy1 & 0x1ff), VRAM_MEM_XY(sx, sy1 & 0x1ff), w * 2);
538 }
539
540 renderer_update_caches(dx, dy, w, h, 0);
541}
542
543static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
544{
545 int cmd = 0, pos = 0, len, dummy, v;
546 int skip = 1;
547
548 gpu.frameskip.pending_fill[0] = 0;
549
550 while (pos < count && skip) {
551 uint32_t *list = data + pos;
552 cmd = LE32TOH(list[0]) >> 24;
553 len = 1 + cmd_lengths[cmd];
554
555 switch (cmd) {
556 case 0x02:
557 if ((LE32TOH(list[2]) & 0x3ff) > gpu.screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu.screen.h)
558 // clearing something large, don't skip
559 do_cmd_list(list, 3, &dummy);
560 else
561 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
562 break;
563 case 0x24 ... 0x27:
564 case 0x2c ... 0x2f:
565 case 0x34 ... 0x37:
566 case 0x3c ... 0x3f:
567 gpu.ex_regs[1] &= ~0x1ff;
568 gpu.ex_regs[1] |= LE32TOH(list[4 + ((cmd >> 4) & 1)]) & 0x1ff;
569 break;
570 case 0x48 ... 0x4F:
571 for (v = 3; pos + v < count; v++)
572 {
573 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
574 break;
575 }
576 len += v - 3;
577 break;
578 case 0x58 ... 0x5F:
579 for (v = 4; pos + v < count; v += 2)
580 {
581 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
582 break;
583 }
584 len += v - 4;
585 break;
586 default:
587 if (cmd == 0xe3)
588 skip = decide_frameskip_allow(LE32TOH(list[0]));
589 if ((cmd & 0xf8) == 0xe0)
590 gpu.ex_regs[cmd & 7] = LE32TOH(list[0]);
591 break;
592 }
593
594 if (pos + len > count) {
595 cmd = -1;
596 break; // incomplete cmd
597 }
598 if (0x80 <= cmd && cmd <= 0xdf)
599 break; // image i/o
600
601 pos += len;
602 }
603
604 renderer_sync_ecmds(gpu.ex_regs);
605 *last_cmd = cmd;
606 return pos;
607}
608
609static noinline int do_cmd_buffer(uint32_t *data, int count)
610{
611 int cmd, pos;
612 uint32_t old_e3 = gpu.ex_regs[3];
613 int vram_dirty = 0;
614
615 // process buffer
616 for (pos = 0; pos < count; )
617 {
618 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
619 vram_dirty = 1;
620 pos += do_vram_io(data + pos, count - pos, 0);
621 if (pos == count)
622 break;
623 }
624
625 cmd = LE32TOH(data[pos]) >> 24;
626 if (0xa0 <= cmd && cmd <= 0xdf) {
627 if (unlikely((pos+2) >= count)) {
628 // incomplete vram write/read cmd, can't consume yet
629 cmd = -1;
630 break;
631 }
632
633 // consume vram write/read cmd
634 start_vram_transfer(LE32TOH(data[pos + 1]), LE32TOH(data[pos + 2]), (cmd & 0xe0) == 0xc0);
635 pos += 3;
636 continue;
637 }
638 else if ((cmd & 0xe0) == 0x80) {
639 if (unlikely((pos+3) >= count)) {
640 cmd = -1; // incomplete cmd, can't consume yet
641 break;
642 }
643 do_vram_copy(data + pos + 1);
644 vram_dirty = 1;
645 pos += 4;
646 continue;
647 }
648
649 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
650 if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0))
651 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
652 else {
653 pos += do_cmd_list(data + pos, count - pos, &cmd);
654 vram_dirty = 1;
655 }
656
657 if (cmd == -1)
658 // incomplete cmd
659 break;
660 }
661
662 gpu.status &= ~0x1fff;
663 gpu.status |= gpu.ex_regs[1] & 0x7ff;
664 gpu.status |= (gpu.ex_regs[6] & 3) << 11;
665
666 gpu.state.fb_dirty |= vram_dirty;
667
668 if (old_e3 != gpu.ex_regs[3])
669 decide_frameskip_allow(gpu.ex_regs[3]);
670
671 return count - pos;
672}
673
674static void flush_cmd_buffer(void)
675{
676 int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
677 if (left > 0)
678 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
679 gpu.cmd_len = left;
680}
681
682void GPUwriteDataMem(uint32_t *mem, int count)
683{
684 int left;
685
686 log_io("gpu_dma_write %p %d\n", mem, count);
687
688 if (unlikely(gpu.cmd_len > 0))
689 flush_cmd_buffer();
690
691 left = do_cmd_buffer(mem, count);
692 if (left)
693 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
694}
695
696void GPUwriteData(uint32_t data)
697{
698 log_io("gpu_write %08x\n", data);
699 gpu.cmd_buffer[gpu.cmd_len++] = HTOLE32(data);
700 if (gpu.cmd_len >= CMD_BUFFER_LEN)
701 flush_cmd_buffer();
702}
703
704long GPUdmaChain(uint32_t *rambase, uint32_t start_addr, uint32_t *progress_addr)
705{
706 uint32_t addr, *list, ld_addr = 0;
707 int len, left, count;
708 long cpu_cycles = 0;
709
710 preload(rambase + (start_addr & 0x1fffff) / 4);
711
712 if (unlikely(gpu.cmd_len > 0))
713 flush_cmd_buffer();
714
715 log_io("gpu_dma_chain\n");
716 addr = start_addr & 0xffffff;
717 for (count = 0; (addr & 0x800000) == 0; count++)
718 {
719 list = rambase + (addr & 0x1fffff) / 4;
720 len = LE32TOH(list[0]) >> 24;
721 addr = LE32TOH(list[0]) & 0xffffff;
722 preload(rambase + (addr & 0x1fffff) / 4);
723
724 cpu_cycles += 10;
725 if (len > 0)
726 cpu_cycles += 5 + len;
727
728 log_io(".chain %08lx #%d+%d\n",
729 (long)(list - rambase) * 4, len, gpu.cmd_len);
730 if (unlikely(gpu.cmd_len > 0)) {
731 if (gpu.cmd_len + len > ARRAY_SIZE(gpu.cmd_buffer)) {
732 log_anomaly("cmd_buffer overflow, likely garbage commands\n");
733 gpu.cmd_len = 0;
734 }
735 memcpy(gpu.cmd_buffer + gpu.cmd_len, list + 1, len * 4);
736 gpu.cmd_len += len;
737 flush_cmd_buffer();
738 continue;
739 }
740
741 if (len) {
742 left = do_cmd_buffer(list + 1, len);
743 if (left) {
744 memcpy(gpu.cmd_buffer, list + 1 + len - left, left * 4);
745 gpu.cmd_len = left;
746 log_anomaly("GPUdmaChain: %d/%d words left\n", left, len);
747 }
748 }
749
750 if (progress_addr) {
751 *progress_addr = addr;
752 break;
753 }
754 #define LD_THRESHOLD (8*1024)
755 if (count >= LD_THRESHOLD) {
756 if (count == LD_THRESHOLD) {
757 ld_addr = addr;
758 continue;
759 }
760
761 // loop detection marker
762 // (bit23 set causes DMA error on real machine, so
763 // unlikely to be ever set by the game)
764 list[0] |= HTOLE32(0x800000);
765 }
766 }
767
768 if (ld_addr != 0) {
769 // remove loop detection markers
770 count -= LD_THRESHOLD + 2;
771 addr = ld_addr & 0x1fffff;
772 while (count-- > 0) {
773 list = rambase + addr / 4;
774 addr = LE32TOH(list[0]) & 0x1fffff;
775 list[0] &= HTOLE32(~0x800000);
776 }
777 }
778
779 gpu.state.last_list.frame = *gpu.state.frame_count;
780 gpu.state.last_list.hcnt = *gpu.state.hcnt;
781 gpu.state.last_list.cycles = cpu_cycles;
782 gpu.state.last_list.addr = start_addr;
783
784 return cpu_cycles;
785}
786
787void GPUreadDataMem(uint32_t *mem, int count)
788{
789 log_io("gpu_dma_read %p %d\n", mem, count);
790
791 if (unlikely(gpu.cmd_len > 0))
792 flush_cmd_buffer();
793
794 if (gpu.dma.h)
795 do_vram_io(mem, count, 1);
796}
797
798uint32_t GPUreadData(void)
799{
800 uint32_t ret;
801
802 if (unlikely(gpu.cmd_len > 0))
803 flush_cmd_buffer();
804
805 ret = gpu.gp0;
806 if (gpu.dma.h) {
807 ret = HTOLE32(ret);
808 do_vram_io(&ret, 1, 1);
809 ret = LE32TOH(ret);
810 }
811
812 log_io("gpu_read %08x\n", ret);
813 return ret;
814}
815
816uint32_t GPUreadStatus(void)
817{
818 uint32_t ret;
819
820 if (unlikely(gpu.cmd_len > 0))
821 flush_cmd_buffer();
822
823 ret = gpu.status;
824 log_io("gpu_read_status %08x\n", ret);
825 return ret;
826}
827
828struct GPUFreeze
829{
830 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
831 uint32_t ulStatus; // current gpu status
832 uint32_t ulControl[256]; // latest control register values
833 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
834};
835
836long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
837{
838 int i;
839
840 switch (type) {
841 case 1: // save
842 if (gpu.cmd_len > 0)
843 flush_cmd_buffer();
844
845 renderer_sync();
846 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
847 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
848 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
849 freeze->ulStatus = gpu.status;
850 break;
851 case 0: // load
852 renderer_sync();
853 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
854 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
855 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
856 gpu.status = freeze->ulStatus;
857 gpu.cmd_len = 0;
858 for (i = 8; i > 0; i--) {
859 gpu.regs[i] ^= 1; // avoid reg change detection
860 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
861 }
862 renderer_sync_ecmds(gpu.ex_regs);
863 renderer_update_caches(0, 0, 1024, 512, 1);
864 break;
865 }
866
867 return 1;
868}
869
870void GPUupdateLace(void)
871{
872 if (gpu.cmd_len > 0)
873 flush_cmd_buffer();
874 renderer_flush_queues();
875
876#ifndef RAW_FB_DISPLAY
877 if (gpu.status & PSX_GPU_STATUS_BLANKING) {
878 if (!gpu.state.blanked) {
879 vout_blank();
880 gpu.state.blanked = 1;
881 gpu.state.fb_dirty = 1;
882 }
883 return;
884 }
885
886 renderer_notify_update_lace(0);
887
888 if (!gpu.state.fb_dirty)
889 return;
890#endif
891
892 if (gpu.frameskip.set) {
893 if (!gpu.frameskip.frame_ready) {
894 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
895 return;
896 gpu.frameskip.active = 0;
897 }
898 gpu.frameskip.frame_ready = 0;
899 }
900
901 vout_update();
902 if (gpu.state.enhancement_active && !gpu.state.enhancement_was_active)
903 renderer_update_caches(0, 0, 1024, 512, 1);
904 gpu.state.enhancement_was_active = gpu.state.enhancement_active;
905 gpu.state.fb_dirty = 0;
906 gpu.state.blanked = 0;
907 renderer_notify_update_lace(1);
908}
909
910void GPUvBlank(int is_vblank, int lcf)
911{
912 int interlace = gpu.state.allow_interlace
913 && (gpu.status & PSX_GPU_STATUS_INTERLACE)
914 && (gpu.status & PSX_GPU_STATUS_DHEIGHT);
915 // interlace doesn't look nice on progressive displays,
916 // so we have this "auto" mode here for games that don't read vram
917 if (gpu.state.allow_interlace == 2
918 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
919 {
920 interlace = 0;
921 }
922 if (interlace || interlace != gpu.state.old_interlace) {
923 gpu.state.old_interlace = interlace;
924
925 if (gpu.cmd_len > 0)
926 flush_cmd_buffer();
927 renderer_flush_queues();
928 renderer_set_interlace(interlace, !lcf);
929 }
930}
931
932void GPUgetScreenInfo(int *y, int *base_hres)
933{
934 *y = gpu.screen.y;
935 *base_hres = gpu.screen.vres;
936 if (gpu.status & PSX_GPU_STATUS_DHEIGHT)
937 *base_hres >>= 1;
938}
939
940#include "../../frontend/plugin_lib.h"
941
942void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
943{
944 gpu.frameskip.set = cbs->frameskip;
945 gpu.frameskip.advice = &cbs->fskip_advice;
946 gpu.frameskip.force = &cbs->fskip_force;
947 gpu.frameskip.dirty = (void *)&cbs->fskip_dirty;
948 gpu.frameskip.active = 0;
949 gpu.frameskip.frame_ready = 1;
950 gpu.state.hcnt = cbs->gpu_hcnt;
951 gpu.state.frame_count = cbs->gpu_frame_count;
952 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
953 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
954 if (gpu.state.screen_centering_type != cbs->screen_centering_type
955 || gpu.state.screen_centering_x != cbs->screen_centering_x
956 || gpu.state.screen_centering_y != cbs->screen_centering_y) {
957 gpu.state.screen_centering_type = cbs->screen_centering_type;
958 gpu.state.screen_centering_x = cbs->screen_centering_x;
959 gpu.state.screen_centering_y = cbs->screen_centering_y;
960 update_width();
961 update_height();
962 }
963
964 gpu.mmap = cbs->mmap;
965 gpu.munmap = cbs->munmap;
966
967 // delayed vram mmap
968 if (gpu.vram == NULL)
969 map_vram();
970
971 if (cbs->pl_vout_set_raw_vram)
972 cbs->pl_vout_set_raw_vram(gpu.vram);
973 renderer_set_config(cbs);
974 vout_set_config(cbs);
975}
976
977// vim:shiftwidth=2:expandtab