libretro: drop the Frame Duping option
[pcsx_rearmed.git] / plugins / gpulib / gpu.c
... / ...
CommitLineData
1/*
2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
11#include <stdio.h>
12#include <stdlib.h>
13#include <string.h>
14#include <stdlib.h> /* for calloc */
15
16#include "gpu.h"
17#include "../../libpcsxcore/gpu.h" // meh
18#include "../../frontend/plugin_lib.h"
19
20#ifndef ARRAY_SIZE
21#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
22#endif
23#ifdef __GNUC__
24#define unlikely(x) __builtin_expect((x), 0)
25#define preload __builtin_prefetch
26#define noinline __attribute__((noinline))
27#else
28#define unlikely(x)
29#define preload(...)
30#define noinline
31#endif
32
33//#define log_io gpu_log
34#define log_io(...)
35
36struct psx_gpu gpu;
37
38static noinline int do_cmd_buffer(uint32_t *data, int count);
39static void finish_vram_transfer(int is_read);
40
41static noinline void do_cmd_reset(void)
42{
43 renderer_sync();
44
45 if (unlikely(gpu.cmd_len > 0))
46 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
47 gpu.cmd_len = 0;
48
49 if (unlikely(gpu.dma.h > 0))
50 finish_vram_transfer(gpu.dma_start.is_read);
51 gpu.dma.h = 0;
52}
53
54static noinline void do_reset(void)
55{
56 unsigned int i;
57
58 do_cmd_reset();
59
60 memset(gpu.regs, 0, sizeof(gpu.regs));
61 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
62 gpu.ex_regs[i] = (0xe0 + i) << 24;
63 gpu.status = 0x14802000;
64 gpu.gp0 = 0;
65 gpu.regs[3] = 1;
66 gpu.screen.hres = gpu.screen.w = 256;
67 gpu.screen.vres = gpu.screen.h = 240;
68 gpu.screen.x = gpu.screen.y = 0;
69 renderer_sync_ecmds(gpu.ex_regs);
70 renderer_notify_res_change();
71}
72
73static noinline void update_width(void)
74{
75 static const short hres_all[8] = { 256, 368, 320, 368, 512, 368, 640, 368 };
76 static const uint8_t hdivs[8] = { 10, 7, 8, 7, 5, 7, 4, 7 };
77 uint8_t hdiv = hdivs[(gpu.status >> 16) & 7];
78 int hres = hres_all[(gpu.status >> 16) & 7];
79 int pal = gpu.status & PSX_GPU_STATUS_PAL;
80 int sw = gpu.screen.x2 - gpu.screen.x1;
81 int type = gpu.state.screen_centering_type;
82 int x = 0, x_auto;
83 if (type == C_AUTO)
84 type = gpu.state.screen_centering_type_default;
85 if (sw <= 0)
86 /* nothing displayed? */;
87 else {
88 int s = pal ? 656 : 608; // or 600? pal is just a guess
89 x = (gpu.screen.x1 - s) / hdiv;
90 x = (x + 1) & ~1; // blitter limitation
91 sw /= hdiv;
92 sw = (sw + 2) & ~3; // according to nocash
93 switch (type) {
94 case C_INGAME:
95 break;
96 case C_MANUAL:
97 x = gpu.state.screen_centering_x;
98 break;
99 default:
100 // correct if slightly miscentered
101 x_auto = (hres - sw) / 2 & ~3;
102 if ((uint32_t)x_auto <= 8u && abs(x) < 24)
103 x = x_auto;
104 }
105 if (x + sw > hres)
106 sw = hres - x;
107 // .x range check is done in vout_update()
108 }
109 // reduce the unpleasant right border that a few games have
110 if (gpu.state.screen_centering_type == 0
111 && x <= 4 && hres - (x + sw) >= 4)
112 hres -= 4;
113 gpu.screen.x = x;
114 gpu.screen.w = sw;
115 gpu.screen.hres = hres;
116 gpu.state.dims_changed = 1;
117 //printf("xx %d %d -> %2d, %d / %d\n",
118 // gpu.screen.x1, gpu.screen.x2, x, sw, hres);
119}
120
121static noinline void update_height(void)
122{
123 int pal = gpu.status & PSX_GPU_STATUS_PAL;
124 int dheight = gpu.status & PSX_GPU_STATUS_DHEIGHT;
125 int y = gpu.screen.y1 - (pal ? 39 : 16); // 39 for spyro
126 int sh = gpu.screen.y2 - gpu.screen.y1;
127 int center_tol = 16;
128 int vres = 240;
129
130 if (pal && (sh > 240 || gpu.screen.vres == 256))
131 vres = 256;
132 if (dheight)
133 y *= 2, sh *= 2, vres *= 2, center_tol *= 2;
134 if (sh <= 0)
135 /* nothing displayed? */;
136 else {
137 switch (gpu.state.screen_centering_type) {
138 case C_INGAME:
139 break;
140 case C_BORDERLESS:
141 y = 0;
142 break;
143 case C_MANUAL:
144 y = gpu.state.screen_centering_y;
145 break;
146 default:
147 // correct if slightly miscentered
148 if ((uint32_t)(vres - sh) <= 1 && abs(y) <= center_tol)
149 y = 0;
150 }
151 if (y + sh > vres)
152 sh = vres - y;
153 }
154 gpu.screen.y = y;
155 gpu.screen.h = sh;
156 gpu.screen.vres = vres;
157 gpu.state.dims_changed = 1;
158 //printf("yy %d %d -> %d, %d / %d\n",
159 // gpu.screen.y1, gpu.screen.y2, y, sh, vres);
160}
161
162static noinline void decide_frameskip(void)
163{
164 *gpu.frameskip.dirty = 1;
165
166 if (gpu.frameskip.active)
167 gpu.frameskip.cnt++;
168 else {
169 gpu.frameskip.cnt = 0;
170 gpu.frameskip.frame_ready = 1;
171 }
172
173 if (*gpu.frameskip.force)
174 gpu.frameskip.active = 1;
175 else if (!gpu.frameskip.active && *gpu.frameskip.advice)
176 gpu.frameskip.active = 1;
177 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
178 gpu.frameskip.active = 1;
179 else
180 gpu.frameskip.active = 0;
181
182 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
183 int dummy;
184 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy);
185 gpu.frameskip.pending_fill[0] = 0;
186 }
187}
188
189static noinline int decide_frameskip_allow(uint32_t cmd_e3)
190{
191 // no frameskip if it decides to draw to display area,
192 // but not for interlace since it'll most likely always do that
193 uint32_t x = cmd_e3 & 0x3ff;
194 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
195 gpu.frameskip.allow = (gpu.status & PSX_GPU_STATUS_INTERLACE) ||
196 (uint32_t)(x - gpu.screen.src_x) >= (uint32_t)gpu.screen.w ||
197 (uint32_t)(y - gpu.screen.src_y) >= (uint32_t)gpu.screen.h;
198 return gpu.frameskip.allow;
199}
200
201static void flush_cmd_buffer(void);
202
203static noinline void get_gpu_info(uint32_t data)
204{
205 if (unlikely(gpu.cmd_len > 0))
206 flush_cmd_buffer();
207 switch (data & 0x0f) {
208 case 0x02:
209 case 0x03:
210 case 0x04:
211 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
212 break;
213 case 0x05:
214 gpu.gp0 = gpu.ex_regs[5] & 0x3fffff;
215 break;
216 case 0x07:
217 gpu.gp0 = 2;
218 break;
219 default:
220 // gpu.gp0 unchanged
221 break;
222 }
223}
224
225#ifndef max
226#define max(a, b) (((a) > (b)) ? (a) : (b))
227#endif
228
229// Minimum 16-byte VRAM alignment needed by gpu_unai's pixel-skipping
230// renderer/downscaler it uses in high res modes:
231#ifdef GCW_ZERO
232 // On GCW platform (MIPS), align to 8192 bytes (1 TLB entry) to reduce # of
233 // fills. (Will change this value if it ever gets large page support)
234 #define VRAM_ALIGN 8192
235#else
236 #define VRAM_ALIGN 16
237#endif
238
239// double, for overdraw guard + at least 1 page before
240#define VRAM_SIZE ((1024 * 512 * 2 * 2) + max(VRAM_ALIGN, 4096))
241
242// vram ptr received from mmap/malloc/alloc (will deallocate using this)
243static uint16_t *vram_ptr_orig = NULL;
244
245#ifndef GPULIB_USE_MMAP
246# ifdef __linux__
247# define GPULIB_USE_MMAP 1
248# else
249# define GPULIB_USE_MMAP 0
250# endif
251#endif
252static int map_vram(void)
253{
254#if GPULIB_USE_MMAP
255 gpu.vram = vram_ptr_orig = gpu.mmap(VRAM_SIZE);
256#else
257 gpu.vram = vram_ptr_orig = calloc(VRAM_SIZE, 1);
258#endif
259 if (gpu.vram != NULL && gpu.vram != (void *)(intptr_t)-1) {
260 // 4kb guard in front
261 gpu.vram += (4096 / 2);
262 // Align
263 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
264 return 0;
265 }
266 else {
267 fprintf(stderr, "could not map vram, expect crashes\n");
268 return -1;
269 }
270}
271
272long GPUinit(void)
273{
274 int ret;
275 ret = vout_init();
276 ret |= renderer_init();
277
278 memset(&gpu.state, 0, sizeof(gpu.state));
279 memset(&gpu.frameskip, 0, sizeof(gpu.frameskip));
280 gpu.zero = 0;
281 gpu.state.frame_count = &gpu.zero;
282 gpu.state.hcnt = &gpu.zero;
283 gpu.cmd_len = 0;
284 do_reset();
285
286 /*if (gpu.mmap != NULL) {
287 if (map_vram() != 0)
288 ret = -1;
289 }*/
290 return ret;
291}
292
293long GPUshutdown(void)
294{
295 long ret;
296
297 renderer_finish();
298 ret = vout_finish();
299
300 if (vram_ptr_orig != NULL) {
301#if GPULIB_USE_MMAP
302 gpu.munmap(vram_ptr_orig, VRAM_SIZE);
303#else
304 free(vram_ptr_orig);
305#endif
306 }
307 vram_ptr_orig = gpu.vram = NULL;
308
309 return ret;
310}
311
312void GPUwriteStatus(uint32_t data)
313{
314 uint32_t cmd = data >> 24;
315 int src_x, src_y;
316
317 if (cmd < ARRAY_SIZE(gpu.regs)) {
318 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
319 return;
320 gpu.regs[cmd] = data;
321 }
322
323 gpu.state.fb_dirty = 1;
324
325 switch (cmd) {
326 case 0x00:
327 do_reset();
328 break;
329 case 0x01:
330 do_cmd_reset();
331 break;
332 case 0x03:
333 if (data & 1) {
334 gpu.status |= PSX_GPU_STATUS_BLANKING;
335 gpu.state.dims_changed = 1; // for hud clearing
336 }
337 else
338 gpu.status &= ~PSX_GPU_STATUS_BLANKING;
339 break;
340 case 0x04:
341 gpu.status &= ~PSX_GPU_STATUS_DMA_MASK;
342 gpu.status |= PSX_GPU_STATUS_DMA(data & 3);
343 break;
344 case 0x05:
345 src_x = data & 0x3ff; src_y = (data >> 10) & 0x1ff;
346 if (src_x != gpu.screen.src_x || src_y != gpu.screen.src_y) {
347 gpu.screen.src_x = src_x;
348 gpu.screen.src_y = src_y;
349 renderer_notify_scanout_change(src_x, src_y);
350 if (gpu.frameskip.set) {
351 decide_frameskip_allow(gpu.ex_regs[3]);
352 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
353 decide_frameskip();
354 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
355 }
356 }
357 }
358 break;
359 case 0x06:
360 gpu.screen.x1 = data & 0xfff;
361 gpu.screen.x2 = (data >> 12) & 0xfff;
362 update_width();
363 break;
364 case 0x07:
365 gpu.screen.y1 = data & 0x3ff;
366 gpu.screen.y2 = (data >> 10) & 0x3ff;
367 update_height();
368 break;
369 case 0x08:
370 gpu.status = (gpu.status & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
371 update_width();
372 update_height();
373 renderer_notify_res_change();
374 break;
375 default:
376 if ((cmd & 0xf0) == 0x10)
377 get_gpu_info(data);
378 break;
379 }
380
381#ifdef GPUwriteStatus_ext
382 GPUwriteStatus_ext(data);
383#endif
384}
385
386const unsigned char cmd_lengths[256] =
387{
388 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
389 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
390 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
391 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
392 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
393 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
394 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
395 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
396 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 80
397 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
398 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // a0
399 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
400 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // c0
401 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
402 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
403 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
404};
405
406#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
407
408static void cpy_msb(uint16_t *dst, const uint16_t *src, int l, uint16_t msb)
409{
410 int i;
411 for (i = 0; i < l; i++)
412 dst[i] = src[i] | msb;
413}
414
415static inline void do_vram_line(int x, int y, uint16_t *mem, int l,
416 int is_read, uint16_t msb)
417{
418 uint16_t *vram = VRAM_MEM_XY(x, y);
419 if (unlikely(is_read))
420 memcpy(mem, vram, l * 2);
421 else if (unlikely(msb))
422 cpy_msb(vram, mem, l, msb);
423 else
424 memcpy(vram, mem, l * 2);
425}
426
427static int do_vram_io(uint32_t *data, int count, int is_read)
428{
429 int count_initial = count;
430 uint16_t msb = gpu.ex_regs[6] << 15;
431 uint16_t *sdata = (uint16_t *)data;
432 int x = gpu.dma.x, y = gpu.dma.y;
433 int w = gpu.dma.w, h = gpu.dma.h;
434 int o = gpu.dma.offset;
435 int l;
436 count *= 2; // operate in 16bpp pixels
437
438 renderer_sync();
439
440 if (gpu.dma.offset) {
441 l = w - gpu.dma.offset;
442 if (count < l)
443 l = count;
444
445 do_vram_line(x + o, y, sdata, l, is_read, msb);
446
447 if (o + l < w)
448 o += l;
449 else {
450 o = 0;
451 y++;
452 h--;
453 }
454 sdata += l;
455 count -= l;
456 }
457
458 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
459 y &= 511;
460 do_vram_line(x, y, sdata, w, is_read, msb);
461 }
462
463 if (h > 0) {
464 if (count > 0) {
465 y &= 511;
466 do_vram_line(x, y, sdata, count, is_read, msb);
467 o = count;
468 count = 0;
469 }
470 }
471 else
472 finish_vram_transfer(is_read);
473 gpu.dma.y = y;
474 gpu.dma.h = h;
475 gpu.dma.offset = o;
476
477 return count_initial - count / 2;
478}
479
480static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
481{
482 if (gpu.dma.h)
483 log_anomaly("start_vram_transfer while old unfinished\n");
484
485 gpu.dma.x = pos_word & 0x3ff;
486 gpu.dma.y = (pos_word >> 16) & 0x1ff;
487 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
488 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
489 gpu.dma.offset = 0;
490 gpu.dma.is_read = is_read;
491 gpu.dma_start = gpu.dma;
492
493 renderer_flush_queues();
494 if (is_read) {
495 gpu.status |= PSX_GPU_STATUS_IMG;
496 // XXX: wrong for width 1
497 gpu.gp0 = LE32TOH(*(uint32_t *) VRAM_MEM_XY(gpu.dma.x, gpu.dma.y));
498 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
499 }
500
501 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
502 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
503 if (gpu.gpu_state_change)
504 gpu.gpu_state_change(PGS_VRAM_TRANSFER_START);
505}
506
507static void finish_vram_transfer(int is_read)
508{
509 if (is_read)
510 gpu.status &= ~PSX_GPU_STATUS_IMG;
511 else {
512 gpu.state.fb_dirty = 1;
513 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
514 gpu.dma_start.w, gpu.dma_start.h, 0);
515 }
516 if (gpu.gpu_state_change)
517 gpu.gpu_state_change(PGS_VRAM_TRANSFER_END);
518}
519
520static void do_vram_copy(const uint32_t *params)
521{
522 const uint32_t sx = LE32TOH(params[0]) & 0x3FF;
523 const uint32_t sy = (LE32TOH(params[0]) >> 16) & 0x1FF;
524 const uint32_t dx = LE32TOH(params[1]) & 0x3FF;
525 const uint32_t dy = (LE32TOH(params[1]) >> 16) & 0x1FF;
526 uint32_t w = ((LE32TOH(params[2]) - 1) & 0x3FF) + 1;
527 uint32_t h = (((LE32TOH(params[2]) >> 16) - 1) & 0x1FF) + 1;
528 uint16_t msb = gpu.ex_regs[6] << 15;
529 uint16_t lbuf[128];
530 uint32_t x, y;
531
532 if (sx == dx && sy == dy && msb == 0)
533 return;
534
535 renderer_flush_queues();
536
537 if (unlikely((sx < dx && dx < sx + w) || sx + w > 1024 || dx + w > 1024 || msb))
538 {
539 for (y = 0; y < h; y++)
540 {
541 const uint16_t *src = VRAM_MEM_XY(0, (sy + y) & 0x1ff);
542 uint16_t *dst = VRAM_MEM_XY(0, (dy + y) & 0x1ff);
543 for (x = 0; x < w; x += ARRAY_SIZE(lbuf))
544 {
545 uint32_t x1, w1 = w - x;
546 if (w1 > ARRAY_SIZE(lbuf))
547 w1 = ARRAY_SIZE(lbuf);
548 for (x1 = 0; x1 < w1; x1++)
549 lbuf[x1] = src[(sx + x + x1) & 0x3ff];
550 for (x1 = 0; x1 < w1; x1++)
551 dst[(dx + x + x1) & 0x3ff] = lbuf[x1] | msb;
552 }
553 }
554 }
555 else
556 {
557 uint32_t sy1 = sy, dy1 = dy;
558 for (y = 0; y < h; y++, sy1++, dy1++)
559 memcpy(VRAM_MEM_XY(dx, dy1 & 0x1ff), VRAM_MEM_XY(sx, sy1 & 0x1ff), w * 2);
560 }
561
562 renderer_update_caches(dx, dy, w, h, 0);
563}
564
565static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
566{
567 int cmd = 0, pos = 0, len, dummy, v;
568 int skip = 1;
569
570 gpu.frameskip.pending_fill[0] = 0;
571
572 while (pos < count && skip) {
573 uint32_t *list = data + pos;
574 cmd = LE32TOH(list[0]) >> 24;
575 len = 1 + cmd_lengths[cmd];
576
577 switch (cmd) {
578 case 0x02:
579 if ((LE32TOH(list[2]) & 0x3ff) > gpu.screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu.screen.h)
580 // clearing something large, don't skip
581 do_cmd_list(list, 3, &dummy);
582 else
583 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
584 break;
585 case 0x24 ... 0x27:
586 case 0x2c ... 0x2f:
587 case 0x34 ... 0x37:
588 case 0x3c ... 0x3f:
589 gpu.ex_regs[1] &= ~0x1ff;
590 gpu.ex_regs[1] |= LE32TOH(list[4 + ((cmd >> 4) & 1)]) & 0x1ff;
591 break;
592 case 0x48 ... 0x4F:
593 for (v = 3; pos + v < count; v++)
594 {
595 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
596 break;
597 }
598 len += v - 3;
599 break;
600 case 0x58 ... 0x5F:
601 for (v = 4; pos + v < count; v += 2)
602 {
603 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
604 break;
605 }
606 len += v - 4;
607 break;
608 default:
609 if (cmd == 0xe3)
610 skip = decide_frameskip_allow(LE32TOH(list[0]));
611 if ((cmd & 0xf8) == 0xe0)
612 gpu.ex_regs[cmd & 7] = LE32TOH(list[0]);
613 break;
614 }
615
616 if (pos + len > count) {
617 cmd = -1;
618 break; // incomplete cmd
619 }
620 if (0x80 <= cmd && cmd <= 0xdf)
621 break; // image i/o
622
623 pos += len;
624 }
625
626 renderer_sync_ecmds(gpu.ex_regs);
627 *last_cmd = cmd;
628 return pos;
629}
630
631static noinline int do_cmd_buffer(uint32_t *data, int count)
632{
633 int cmd, pos;
634 uint32_t old_e3 = gpu.ex_regs[3];
635 int vram_dirty = 0;
636
637 // process buffer
638 for (pos = 0; pos < count; )
639 {
640 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
641 vram_dirty = 1;
642 pos += do_vram_io(data + pos, count - pos, 0);
643 if (pos == count)
644 break;
645 }
646
647 cmd = LE32TOH(data[pos]) >> 24;
648 if (0xa0 <= cmd && cmd <= 0xdf) {
649 if (unlikely((pos+2) >= count)) {
650 // incomplete vram write/read cmd, can't consume yet
651 cmd = -1;
652 break;
653 }
654
655 // consume vram write/read cmd
656 start_vram_transfer(LE32TOH(data[pos + 1]), LE32TOH(data[pos + 2]), (cmd & 0xe0) == 0xc0);
657 pos += 3;
658 continue;
659 }
660 else if ((cmd & 0xe0) == 0x80) {
661 if (unlikely((pos+3) >= count)) {
662 cmd = -1; // incomplete cmd, can't consume yet
663 break;
664 }
665 do_vram_copy(data + pos + 1);
666 vram_dirty = 1;
667 pos += 4;
668 continue;
669 }
670
671 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
672 if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0))
673 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
674 else {
675 pos += do_cmd_list(data + pos, count - pos, &cmd);
676 vram_dirty = 1;
677 }
678
679 if (cmd == -1)
680 // incomplete cmd
681 break;
682 }
683
684 gpu.status &= ~0x1fff;
685 gpu.status |= gpu.ex_regs[1] & 0x7ff;
686 gpu.status |= (gpu.ex_regs[6] & 3) << 11;
687
688 gpu.state.fb_dirty |= vram_dirty;
689
690 if (old_e3 != gpu.ex_regs[3])
691 decide_frameskip_allow(gpu.ex_regs[3]);
692
693 return count - pos;
694}
695
696static noinline void flush_cmd_buffer(void)
697{
698 int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
699 if (left > 0)
700 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
701 if (left != gpu.cmd_len) {
702 if (!gpu.dma.h && gpu.gpu_state_change)
703 gpu.gpu_state_change(PGS_PRIMITIVE_START);
704 gpu.cmd_len = left;
705 }
706}
707
708void GPUwriteDataMem(uint32_t *mem, int count)
709{
710 int left;
711
712 log_io("gpu_dma_write %p %d\n", mem, count);
713
714 if (unlikely(gpu.cmd_len > 0))
715 flush_cmd_buffer();
716
717 left = do_cmd_buffer(mem, count);
718 if (left)
719 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
720}
721
722void GPUwriteData(uint32_t data)
723{
724 log_io("gpu_write %08x\n", data);
725 gpu.cmd_buffer[gpu.cmd_len++] = HTOLE32(data);
726 if (gpu.cmd_len >= CMD_BUFFER_LEN)
727 flush_cmd_buffer();
728}
729
730long GPUdmaChain(uint32_t *rambase, uint32_t start_addr, uint32_t *progress_addr)
731{
732 uint32_t addr, *list, ld_addr = 0;
733 int len, left, count;
734 long cpu_cycles = 0;
735
736 preload(rambase + (start_addr & 0x1fffff) / 4);
737
738 if (unlikely(gpu.cmd_len > 0))
739 flush_cmd_buffer();
740
741 log_io("gpu_dma_chain\n");
742 addr = start_addr & 0xffffff;
743 for (count = 0; (addr & 0x800000) == 0; count++)
744 {
745 list = rambase + (addr & 0x1fffff) / 4;
746 len = LE32TOH(list[0]) >> 24;
747 addr = LE32TOH(list[0]) & 0xffffff;
748 preload(rambase + (addr & 0x1fffff) / 4);
749
750 cpu_cycles += 10;
751 if (len > 0)
752 cpu_cycles += 5 + len;
753
754 log_io(".chain %08lx #%d+%d\n",
755 (long)(list - rambase) * 4, len, gpu.cmd_len);
756 if (unlikely(gpu.cmd_len > 0)) {
757 if (gpu.cmd_len + len > ARRAY_SIZE(gpu.cmd_buffer)) {
758 log_anomaly("cmd_buffer overflow, likely garbage commands\n");
759 gpu.cmd_len = 0;
760 }
761 memcpy(gpu.cmd_buffer + gpu.cmd_len, list + 1, len * 4);
762 gpu.cmd_len += len;
763 flush_cmd_buffer();
764 continue;
765 }
766
767 if (len) {
768 left = do_cmd_buffer(list + 1, len);
769 if (left) {
770 memcpy(gpu.cmd_buffer, list + 1 + len - left, left * 4);
771 gpu.cmd_len = left;
772 log_anomaly("GPUdmaChain: %d/%d words left\n", left, len);
773 }
774 }
775
776 if (progress_addr) {
777 *progress_addr = addr;
778 break;
779 }
780 #define LD_THRESHOLD (8*1024)
781 if (count >= LD_THRESHOLD) {
782 if (count == LD_THRESHOLD) {
783 ld_addr = addr;
784 continue;
785 }
786
787 // loop detection marker
788 // (bit23 set causes DMA error on real machine, so
789 // unlikely to be ever set by the game)
790 list[0] |= HTOLE32(0x800000);
791 }
792 }
793
794 if (ld_addr != 0) {
795 // remove loop detection markers
796 count -= LD_THRESHOLD + 2;
797 addr = ld_addr & 0x1fffff;
798 while (count-- > 0) {
799 list = rambase + addr / 4;
800 addr = LE32TOH(list[0]) & 0x1fffff;
801 list[0] &= HTOLE32(~0x800000);
802 }
803 }
804
805 gpu.state.last_list.frame = *gpu.state.frame_count;
806 gpu.state.last_list.hcnt = *gpu.state.hcnt;
807 gpu.state.last_list.cycles = cpu_cycles;
808 gpu.state.last_list.addr = start_addr;
809
810 return cpu_cycles;
811}
812
813void GPUreadDataMem(uint32_t *mem, int count)
814{
815 log_io("gpu_dma_read %p %d\n", mem, count);
816
817 if (unlikely(gpu.cmd_len > 0))
818 flush_cmd_buffer();
819
820 if (gpu.dma.h)
821 do_vram_io(mem, count, 1);
822}
823
824uint32_t GPUreadData(void)
825{
826 uint32_t ret;
827
828 if (unlikely(gpu.cmd_len > 0))
829 flush_cmd_buffer();
830
831 ret = gpu.gp0;
832 if (gpu.dma.h) {
833 ret = HTOLE32(ret);
834 do_vram_io(&ret, 1, 1);
835 ret = LE32TOH(ret);
836 }
837
838 log_io("gpu_read %08x\n", ret);
839 return ret;
840}
841
842uint32_t GPUreadStatus(void)
843{
844 uint32_t ret;
845
846 if (unlikely(gpu.cmd_len > 0))
847 flush_cmd_buffer();
848
849 ret = gpu.status;
850 log_io("gpu_read_status %08x\n", ret);
851 return ret;
852}
853
854struct GPUFreeze
855{
856 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
857 uint32_t ulStatus; // current gpu status
858 uint32_t ulControl[256]; // latest control register values
859 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
860};
861
862long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
863{
864 int i;
865
866 switch (type) {
867 case 1: // save
868 if (gpu.cmd_len > 0)
869 flush_cmd_buffer();
870
871 renderer_sync();
872 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
873 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
874 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
875 freeze->ulStatus = gpu.status;
876 break;
877 case 0: // load
878 renderer_sync();
879 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
880 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
881 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
882 gpu.status = freeze->ulStatus;
883 gpu.cmd_len = 0;
884 for (i = 8; i > 0; i--) {
885 gpu.regs[i] ^= 1; // avoid reg change detection
886 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
887 }
888 renderer_sync_ecmds(gpu.ex_regs);
889 renderer_update_caches(0, 0, 1024, 512, 0);
890 break;
891 }
892
893 return 1;
894}
895
896void GPUupdateLace(void)
897{
898 if (gpu.cmd_len > 0)
899 flush_cmd_buffer();
900 renderer_flush_queues();
901
902#ifndef RAW_FB_DISPLAY
903 if (gpu.status & PSX_GPU_STATUS_BLANKING) {
904 if (!gpu.state.blanked) {
905 vout_blank();
906 gpu.state.blanked = 1;
907 gpu.state.fb_dirty = 1;
908 }
909 return;
910 }
911
912 renderer_notify_update_lace(0);
913
914 if (!gpu.state.fb_dirty)
915 return;
916#endif
917
918 if (gpu.frameskip.set) {
919 if (!gpu.frameskip.frame_ready) {
920 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
921 return;
922 gpu.frameskip.active = 0;
923 }
924 gpu.frameskip.frame_ready = 0;
925 }
926
927 vout_update();
928 if (gpu.state.enhancement_active && !gpu.state.enhancement_was_active)
929 renderer_update_caches(0, 0, 1024, 512, 1);
930 gpu.state.enhancement_was_active = gpu.state.enhancement_active;
931 gpu.state.fb_dirty = 0;
932 gpu.state.blanked = 0;
933 renderer_notify_update_lace(1);
934}
935
936void GPUvBlank(int is_vblank, int lcf)
937{
938 int interlace = gpu.state.allow_interlace
939 && (gpu.status & PSX_GPU_STATUS_INTERLACE)
940 && (gpu.status & PSX_GPU_STATUS_DHEIGHT);
941 // interlace doesn't look nice on progressive displays,
942 // so we have this "auto" mode here for games that don't read vram
943 if (gpu.state.allow_interlace == 2
944 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
945 {
946 interlace = 0;
947 }
948 if (interlace || interlace != gpu.state.old_interlace) {
949 gpu.state.old_interlace = interlace;
950
951 if (gpu.cmd_len > 0)
952 flush_cmd_buffer();
953 renderer_flush_queues();
954 renderer_set_interlace(interlace, !lcf);
955 }
956}
957
958void GPUgetScreenInfo(int *y, int *base_hres)
959{
960 *y = gpu.screen.y;
961 *base_hres = gpu.screen.vres;
962 if (gpu.status & PSX_GPU_STATUS_DHEIGHT)
963 *base_hres >>= 1;
964}
965
966void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
967{
968 gpu.frameskip.set = cbs->frameskip;
969 gpu.frameskip.advice = &cbs->fskip_advice;
970 gpu.frameskip.force = &cbs->fskip_force;
971 gpu.frameskip.dirty = (void *)&cbs->fskip_dirty;
972 gpu.frameskip.active = 0;
973 gpu.frameskip.frame_ready = 1;
974 gpu.state.hcnt = cbs->gpu_hcnt;
975 gpu.state.frame_count = cbs->gpu_frame_count;
976 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
977 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
978 gpu.state.screen_centering_type_default = cbs->screen_centering_type_default;
979 if (gpu.state.screen_centering_type != cbs->screen_centering_type
980 || gpu.state.screen_centering_x != cbs->screen_centering_x
981 || gpu.state.screen_centering_y != cbs->screen_centering_y) {
982 gpu.state.screen_centering_type = cbs->screen_centering_type;
983 gpu.state.screen_centering_x = cbs->screen_centering_x;
984 gpu.state.screen_centering_y = cbs->screen_centering_y;
985 update_width();
986 update_height();
987 }
988
989 gpu.mmap = cbs->mmap;
990 gpu.munmap = cbs->munmap;
991 gpu.gpu_state_change = cbs->gpu_state_change;
992
993 // delayed vram mmap
994 if (gpu.vram == NULL)
995 map_vram();
996
997 if (cbs->pl_vout_set_raw_vram)
998 cbs->pl_vout_set_raw_vram(gpu.vram);
999 renderer_set_config(cbs);
1000 vout_set_config(cbs);
1001}
1002
1003// vim:shiftwidth=2:expandtab