gpulib: add a "borderless" option to restore old behavior
[pcsx_rearmed.git] / plugins / gpulib / gpu.c
... / ...
CommitLineData
1/*
2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
11#include <stdio.h>
12#include <stdlib.h>
13#include <string.h>
14#include <stdlib.h> /* for calloc */
15
16#include "gpu.h"
17#include "../../libpcsxcore/gpu.h" // meh
18#include "../../frontend/plugin_lib.h"
19
20#ifndef ARRAY_SIZE
21#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
22#endif
23#ifdef __GNUC__
24#define unlikely(x) __builtin_expect((x), 0)
25#define preload __builtin_prefetch
26#define noinline __attribute__((noinline))
27#else
28#define unlikely(x)
29#define preload(...)
30#define noinline
31#endif
32
33//#define log_io gpu_log
34#define log_io(...)
35
36struct psx_gpu gpu;
37
38static noinline int do_cmd_buffer(uint32_t *data, int count);
39static void finish_vram_transfer(int is_read);
40
41static noinline void do_cmd_reset(void)
42{
43 renderer_sync();
44
45 if (unlikely(gpu.cmd_len > 0))
46 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
47 gpu.cmd_len = 0;
48
49 if (unlikely(gpu.dma.h > 0))
50 finish_vram_transfer(gpu.dma_start.is_read);
51 gpu.dma.h = 0;
52}
53
54static noinline void do_reset(void)
55{
56 unsigned int i;
57
58 do_cmd_reset();
59
60 memset(gpu.regs, 0, sizeof(gpu.regs));
61 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
62 gpu.ex_regs[i] = (0xe0 + i) << 24;
63 gpu.status = 0x14802000;
64 gpu.gp0 = 0;
65 gpu.regs[3] = 1;
66 gpu.screen.hres = gpu.screen.w = 256;
67 gpu.screen.vres = gpu.screen.h = 240;
68 gpu.screen.x = gpu.screen.y = 0;
69 renderer_sync_ecmds(gpu.ex_regs);
70 renderer_notify_res_change();
71}
72
73static noinline void update_width(void)
74{
75 static const short hres_all[8] = { 256, 368, 320, 368, 512, 368, 640, 368 };
76 static const uint8_t hdivs[8] = { 10, 7, 8, 7, 5, 7, 4, 7 };
77 uint8_t hdiv = hdivs[(gpu.status >> 16) & 7];
78 int hres = hres_all[(gpu.status >> 16) & 7];
79 int pal = gpu.status & PSX_GPU_STATUS_PAL;
80 int sw = gpu.screen.x2 - gpu.screen.x1;
81 int x = 0, x_auto;
82 if (sw <= 0)
83 /* nothing displayed? */;
84 else {
85 int s = pal ? 656 : 608; // or 600? pal is just a guess
86 x = (gpu.screen.x1 - s) / hdiv;
87 x = (x + 1) & ~1; // blitter limitation
88 sw /= hdiv;
89 sw = (sw + 2) & ~3; // according to nocash
90 switch (gpu.state.screen_centering_type) {
91 case C_INGAME:
92 break;
93 case C_MANUAL:
94 x = gpu.state.screen_centering_x;
95 break;
96 default:
97 // correct if slightly miscentered
98 x_auto = (hres - sw) / 2 & ~3;
99 if ((uint32_t)x_auto <= 8u && abs(x) < 24)
100 x = x_auto;
101 }
102 if (x + sw > hres)
103 sw = hres - x;
104 // .x range check is done in vout_update()
105 }
106 // reduce the unpleasant right border that a few games have
107 if (gpu.state.screen_centering_type == 0
108 && x <= 4 && hres - (x + sw) >= 4)
109 hres -= 4;
110 gpu.screen.x = x;
111 gpu.screen.w = sw;
112 gpu.screen.hres = hres;
113 gpu.state.dims_changed = 1;
114 //printf("xx %d %d -> %2d, %d / %d\n",
115 // gpu.screen.x1, gpu.screen.x2, x, sw, hres);
116}
117
118static noinline void update_height(void)
119{
120 int pal = gpu.status & PSX_GPU_STATUS_PAL;
121 int dheight = gpu.status & PSX_GPU_STATUS_DHEIGHT;
122 int y = gpu.screen.y1 - (pal ? 39 : 16); // 39 for spyro
123 int sh = gpu.screen.y2 - gpu.screen.y1;
124 int center_tol = 16;
125 int vres = 240;
126
127 if (pal && (sh > 240 || gpu.screen.vres == 256))
128 vres = 256;
129 if (dheight)
130 y *= 2, sh *= 2, vres *= 2, center_tol *= 2;
131 if (sh <= 0)
132 /* nothing displayed? */;
133 else {
134 switch (gpu.state.screen_centering_type) {
135 case C_INGAME:
136 break;
137 case C_BORDERLESS:
138 y = 0;
139 break;
140 case C_MANUAL:
141 y = gpu.state.screen_centering_y;
142 break;
143 default:
144 // correct if slightly miscentered
145 if ((uint32_t)(vres - sh) <= 1 && abs(y) <= center_tol)
146 y = 0;
147 }
148 if (y + sh > vres)
149 sh = vres - y;
150 }
151 gpu.screen.y = y;
152 gpu.screen.h = sh;
153 gpu.screen.vres = vres;
154 gpu.state.dims_changed = 1;
155 //printf("yy %d %d -> %d, %d / %d\n",
156 // gpu.screen.y1, gpu.screen.y2, y, sh, vres);
157}
158
159static noinline void decide_frameskip(void)
160{
161 *gpu.frameskip.dirty = 1;
162
163 if (gpu.frameskip.active)
164 gpu.frameskip.cnt++;
165 else {
166 gpu.frameskip.cnt = 0;
167 gpu.frameskip.frame_ready = 1;
168 }
169
170 if (*gpu.frameskip.force)
171 gpu.frameskip.active = 1;
172 else if (!gpu.frameskip.active && *gpu.frameskip.advice)
173 gpu.frameskip.active = 1;
174 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
175 gpu.frameskip.active = 1;
176 else
177 gpu.frameskip.active = 0;
178
179 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
180 int dummy;
181 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy);
182 gpu.frameskip.pending_fill[0] = 0;
183 }
184}
185
186static noinline int decide_frameskip_allow(uint32_t cmd_e3)
187{
188 // no frameskip if it decides to draw to display area,
189 // but not for interlace since it'll most likely always do that
190 uint32_t x = cmd_e3 & 0x3ff;
191 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
192 gpu.frameskip.allow = (gpu.status & PSX_GPU_STATUS_INTERLACE) ||
193 (uint32_t)(x - gpu.screen.src_x) >= (uint32_t)gpu.screen.w ||
194 (uint32_t)(y - gpu.screen.src_y) >= (uint32_t)gpu.screen.h;
195 return gpu.frameskip.allow;
196}
197
198static void flush_cmd_buffer(void);
199
200static noinline void get_gpu_info(uint32_t data)
201{
202 if (unlikely(gpu.cmd_len > 0))
203 flush_cmd_buffer();
204 switch (data & 0x0f) {
205 case 0x02:
206 case 0x03:
207 case 0x04:
208 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
209 break;
210 case 0x05:
211 gpu.gp0 = gpu.ex_regs[5] & 0x3fffff;
212 break;
213 case 0x07:
214 gpu.gp0 = 2;
215 break;
216 default:
217 // gpu.gp0 unchanged
218 break;
219 }
220}
221
222// double, for overdraw guard
223#define VRAM_SIZE ((1024 * 512 * 2 * 2) + 4096)
224
225// Minimum 16-byte VRAM alignment needed by gpu_unai's pixel-skipping
226// renderer/downscaler it uses in high res modes:
227#ifdef GCW_ZERO
228 // On GCW platform (MIPS), align to 8192 bytes (1 TLB entry) to reduce # of
229 // fills. (Will change this value if it ever gets large page support)
230 #define VRAM_ALIGN 8192
231#else
232 #define VRAM_ALIGN 16
233#endif
234
235// vram ptr received from mmap/malloc/alloc (will deallocate using this)
236static uint16_t *vram_ptr_orig = NULL;
237
238#ifndef GPULIB_USE_MMAP
239# ifdef __linux__
240# define GPULIB_USE_MMAP 1
241# else
242# define GPULIB_USE_MMAP 0
243# endif
244#endif
245static int map_vram(void)
246{
247#if GPULIB_USE_MMAP
248 gpu.vram = vram_ptr_orig = gpu.mmap(VRAM_SIZE + (VRAM_ALIGN-1));
249#else
250 gpu.vram = vram_ptr_orig = calloc(VRAM_SIZE + (VRAM_ALIGN-1), 1);
251#endif
252 if (gpu.vram != NULL && gpu.vram != (void *)(intptr_t)-1) {
253 // 4kb guard in front
254 gpu.vram += (4096 / 2);
255 // Align
256 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
257 return 0;
258 }
259 else {
260 fprintf(stderr, "could not map vram, expect crashes\n");
261 return -1;
262 }
263}
264
265long GPUinit(void)
266{
267 int ret;
268 ret = vout_init();
269 ret |= renderer_init();
270
271 memset(&gpu.state, 0, sizeof(gpu.state));
272 memset(&gpu.frameskip, 0, sizeof(gpu.frameskip));
273 gpu.zero = 0;
274 gpu.state.frame_count = &gpu.zero;
275 gpu.state.hcnt = &gpu.zero;
276 gpu.cmd_len = 0;
277 do_reset();
278
279 /*if (gpu.mmap != NULL) {
280 if (map_vram() != 0)
281 ret = -1;
282 }*/
283 return ret;
284}
285
286long GPUshutdown(void)
287{
288 long ret;
289
290 renderer_finish();
291 ret = vout_finish();
292
293 if (vram_ptr_orig != NULL) {
294#if GPULIB_USE_MMAP
295 gpu.munmap(vram_ptr_orig, VRAM_SIZE);
296#else
297 free(vram_ptr_orig);
298#endif
299 }
300 vram_ptr_orig = gpu.vram = NULL;
301
302 return ret;
303}
304
305void GPUwriteStatus(uint32_t data)
306{
307 uint32_t cmd = data >> 24;
308 int src_x, src_y;
309
310 if (cmd < ARRAY_SIZE(gpu.regs)) {
311 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
312 return;
313 gpu.regs[cmd] = data;
314 }
315
316 gpu.state.fb_dirty = 1;
317
318 switch (cmd) {
319 case 0x00:
320 do_reset();
321 break;
322 case 0x01:
323 do_cmd_reset();
324 break;
325 case 0x03:
326 if (data & 1) {
327 gpu.status |= PSX_GPU_STATUS_BLANKING;
328 gpu.state.dims_changed = 1; // for hud clearing
329 }
330 else
331 gpu.status &= ~PSX_GPU_STATUS_BLANKING;
332 break;
333 case 0x04:
334 gpu.status &= ~PSX_GPU_STATUS_DMA_MASK;
335 gpu.status |= PSX_GPU_STATUS_DMA(data & 3);
336 break;
337 case 0x05:
338 src_x = data & 0x3ff; src_y = (data >> 10) & 0x1ff;
339 if (src_x != gpu.screen.src_x || src_y != gpu.screen.src_y) {
340 gpu.screen.src_x = src_x;
341 gpu.screen.src_y = src_y;
342 renderer_notify_scanout_change(src_x, src_y);
343 if (gpu.frameskip.set) {
344 decide_frameskip_allow(gpu.ex_regs[3]);
345 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
346 decide_frameskip();
347 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
348 }
349 }
350 }
351 break;
352 case 0x06:
353 gpu.screen.x1 = data & 0xfff;
354 gpu.screen.x2 = (data >> 12) & 0xfff;
355 update_width();
356 break;
357 case 0x07:
358 gpu.screen.y1 = data & 0x3ff;
359 gpu.screen.y2 = (data >> 10) & 0x3ff;
360 update_height();
361 break;
362 case 0x08:
363 gpu.status = (gpu.status & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
364 update_width();
365 update_height();
366 renderer_notify_res_change();
367 break;
368 default:
369 if ((cmd & 0xf0) == 0x10)
370 get_gpu_info(data);
371 break;
372 }
373
374#ifdef GPUwriteStatus_ext
375 GPUwriteStatus_ext(data);
376#endif
377}
378
379const unsigned char cmd_lengths[256] =
380{
381 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
382 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
383 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
384 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
385 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
386 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
387 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
388 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
389 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 80
390 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
391 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // a0
392 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
393 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // c0
394 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
395 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
396 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
397};
398
399#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
400
401static void cpy_msb(uint16_t *dst, const uint16_t *src, int l, uint16_t msb)
402{
403 int i;
404 for (i = 0; i < l; i++)
405 dst[i] = src[i] | msb;
406}
407
408static inline void do_vram_line(int x, int y, uint16_t *mem, int l,
409 int is_read, uint16_t msb)
410{
411 uint16_t *vram = VRAM_MEM_XY(x, y);
412 if (unlikely(is_read))
413 memcpy(mem, vram, l * 2);
414 else if (unlikely(msb))
415 cpy_msb(vram, mem, l, msb);
416 else
417 memcpy(vram, mem, l * 2);
418}
419
420static int do_vram_io(uint32_t *data, int count, int is_read)
421{
422 int count_initial = count;
423 uint16_t msb = gpu.ex_regs[6] << 15;
424 uint16_t *sdata = (uint16_t *)data;
425 int x = gpu.dma.x, y = gpu.dma.y;
426 int w = gpu.dma.w, h = gpu.dma.h;
427 int o = gpu.dma.offset;
428 int l;
429 count *= 2; // operate in 16bpp pixels
430
431 renderer_sync();
432
433 if (gpu.dma.offset) {
434 l = w - gpu.dma.offset;
435 if (count < l)
436 l = count;
437
438 do_vram_line(x + o, y, sdata, l, is_read, msb);
439
440 if (o + l < w)
441 o += l;
442 else {
443 o = 0;
444 y++;
445 h--;
446 }
447 sdata += l;
448 count -= l;
449 }
450
451 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
452 y &= 511;
453 do_vram_line(x, y, sdata, w, is_read, msb);
454 }
455
456 if (h > 0) {
457 if (count > 0) {
458 y &= 511;
459 do_vram_line(x, y, sdata, count, is_read, msb);
460 o = count;
461 count = 0;
462 }
463 }
464 else
465 finish_vram_transfer(is_read);
466 gpu.dma.y = y;
467 gpu.dma.h = h;
468 gpu.dma.offset = o;
469
470 return count_initial - count / 2;
471}
472
473static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
474{
475 if (gpu.dma.h)
476 log_anomaly("start_vram_transfer while old unfinished\n");
477
478 gpu.dma.x = pos_word & 0x3ff;
479 gpu.dma.y = (pos_word >> 16) & 0x1ff;
480 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
481 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
482 gpu.dma.offset = 0;
483 gpu.dma.is_read = is_read;
484 gpu.dma_start = gpu.dma;
485
486 renderer_flush_queues();
487 if (is_read) {
488 gpu.status |= PSX_GPU_STATUS_IMG;
489 // XXX: wrong for width 1
490 gpu.gp0 = LE32TOH(*(uint32_t *) VRAM_MEM_XY(gpu.dma.x, gpu.dma.y));
491 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
492 }
493
494 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
495 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
496 if (gpu.gpu_state_change)
497 gpu.gpu_state_change(PGS_VRAM_TRANSFER_START);
498}
499
500static void finish_vram_transfer(int is_read)
501{
502 if (is_read)
503 gpu.status &= ~PSX_GPU_STATUS_IMG;
504 else {
505 gpu.state.fb_dirty = 1;
506 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
507 gpu.dma_start.w, gpu.dma_start.h, 0);
508 }
509 if (gpu.gpu_state_change)
510 gpu.gpu_state_change(PGS_VRAM_TRANSFER_END);
511}
512
513static void do_vram_copy(const uint32_t *params)
514{
515 const uint32_t sx = LE32TOH(params[0]) & 0x3FF;
516 const uint32_t sy = (LE32TOH(params[0]) >> 16) & 0x1FF;
517 const uint32_t dx = LE32TOH(params[1]) & 0x3FF;
518 const uint32_t dy = (LE32TOH(params[1]) >> 16) & 0x1FF;
519 uint32_t w = ((LE32TOH(params[2]) - 1) & 0x3FF) + 1;
520 uint32_t h = (((LE32TOH(params[2]) >> 16) - 1) & 0x1FF) + 1;
521 uint16_t msb = gpu.ex_regs[6] << 15;
522 uint16_t lbuf[128];
523 uint32_t x, y;
524
525 if (sx == dx && sy == dy && msb == 0)
526 return;
527
528 renderer_flush_queues();
529
530 if (unlikely((sx < dx && dx < sx + w) || sx + w > 1024 || dx + w > 1024 || msb))
531 {
532 for (y = 0; y < h; y++)
533 {
534 const uint16_t *src = VRAM_MEM_XY(0, (sy + y) & 0x1ff);
535 uint16_t *dst = VRAM_MEM_XY(0, (dy + y) & 0x1ff);
536 for (x = 0; x < w; x += ARRAY_SIZE(lbuf))
537 {
538 uint32_t x1, w1 = w - x;
539 if (w1 > ARRAY_SIZE(lbuf))
540 w1 = ARRAY_SIZE(lbuf);
541 for (x1 = 0; x1 < w1; x1++)
542 lbuf[x1] = src[(sx + x + x1) & 0x3ff];
543 for (x1 = 0; x1 < w1; x1++)
544 dst[(dx + x + x1) & 0x3ff] = lbuf[x1] | msb;
545 }
546 }
547 }
548 else
549 {
550 uint32_t sy1 = sy, dy1 = dy;
551 for (y = 0; y < h; y++, sy1++, dy1++)
552 memcpy(VRAM_MEM_XY(dx, dy1 & 0x1ff), VRAM_MEM_XY(sx, sy1 & 0x1ff), w * 2);
553 }
554
555 renderer_update_caches(dx, dy, w, h, 0);
556}
557
558static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
559{
560 int cmd = 0, pos = 0, len, dummy, v;
561 int skip = 1;
562
563 gpu.frameskip.pending_fill[0] = 0;
564
565 while (pos < count && skip) {
566 uint32_t *list = data + pos;
567 cmd = LE32TOH(list[0]) >> 24;
568 len = 1 + cmd_lengths[cmd];
569
570 switch (cmd) {
571 case 0x02:
572 if ((LE32TOH(list[2]) & 0x3ff) > gpu.screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu.screen.h)
573 // clearing something large, don't skip
574 do_cmd_list(list, 3, &dummy);
575 else
576 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
577 break;
578 case 0x24 ... 0x27:
579 case 0x2c ... 0x2f:
580 case 0x34 ... 0x37:
581 case 0x3c ... 0x3f:
582 gpu.ex_regs[1] &= ~0x1ff;
583 gpu.ex_regs[1] |= LE32TOH(list[4 + ((cmd >> 4) & 1)]) & 0x1ff;
584 break;
585 case 0x48 ... 0x4F:
586 for (v = 3; pos + v < count; v++)
587 {
588 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
589 break;
590 }
591 len += v - 3;
592 break;
593 case 0x58 ... 0x5F:
594 for (v = 4; pos + v < count; v += 2)
595 {
596 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
597 break;
598 }
599 len += v - 4;
600 break;
601 default:
602 if (cmd == 0xe3)
603 skip = decide_frameskip_allow(LE32TOH(list[0]));
604 if ((cmd & 0xf8) == 0xe0)
605 gpu.ex_regs[cmd & 7] = LE32TOH(list[0]);
606 break;
607 }
608
609 if (pos + len > count) {
610 cmd = -1;
611 break; // incomplete cmd
612 }
613 if (0x80 <= cmd && cmd <= 0xdf)
614 break; // image i/o
615
616 pos += len;
617 }
618
619 renderer_sync_ecmds(gpu.ex_regs);
620 *last_cmd = cmd;
621 return pos;
622}
623
624static noinline int do_cmd_buffer(uint32_t *data, int count)
625{
626 int cmd, pos;
627 uint32_t old_e3 = gpu.ex_regs[3];
628 int vram_dirty = 0;
629
630 // process buffer
631 for (pos = 0; pos < count; )
632 {
633 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
634 vram_dirty = 1;
635 pos += do_vram_io(data + pos, count - pos, 0);
636 if (pos == count)
637 break;
638 }
639
640 cmd = LE32TOH(data[pos]) >> 24;
641 if (0xa0 <= cmd && cmd <= 0xdf) {
642 if (unlikely((pos+2) >= count)) {
643 // incomplete vram write/read cmd, can't consume yet
644 cmd = -1;
645 break;
646 }
647
648 // consume vram write/read cmd
649 start_vram_transfer(LE32TOH(data[pos + 1]), LE32TOH(data[pos + 2]), (cmd & 0xe0) == 0xc0);
650 pos += 3;
651 continue;
652 }
653 else if ((cmd & 0xe0) == 0x80) {
654 if (unlikely((pos+3) >= count)) {
655 cmd = -1; // incomplete cmd, can't consume yet
656 break;
657 }
658 do_vram_copy(data + pos + 1);
659 vram_dirty = 1;
660 pos += 4;
661 continue;
662 }
663
664 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
665 if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0))
666 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
667 else {
668 pos += do_cmd_list(data + pos, count - pos, &cmd);
669 vram_dirty = 1;
670 }
671
672 if (cmd == -1)
673 // incomplete cmd
674 break;
675 }
676
677 gpu.status &= ~0x1fff;
678 gpu.status |= gpu.ex_regs[1] & 0x7ff;
679 gpu.status |= (gpu.ex_regs[6] & 3) << 11;
680
681 gpu.state.fb_dirty |= vram_dirty;
682
683 if (old_e3 != gpu.ex_regs[3])
684 decide_frameskip_allow(gpu.ex_regs[3]);
685
686 return count - pos;
687}
688
689static noinline void flush_cmd_buffer(void)
690{
691 int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
692 if (left > 0)
693 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
694 if (left != gpu.cmd_len) {
695 if (!gpu.dma.h && gpu.gpu_state_change)
696 gpu.gpu_state_change(PGS_PRIMITIVE_START);
697 gpu.cmd_len = left;
698 }
699}
700
701void GPUwriteDataMem(uint32_t *mem, int count)
702{
703 int left;
704
705 log_io("gpu_dma_write %p %d\n", mem, count);
706
707 if (unlikely(gpu.cmd_len > 0))
708 flush_cmd_buffer();
709
710 left = do_cmd_buffer(mem, count);
711 if (left)
712 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
713}
714
715void GPUwriteData(uint32_t data)
716{
717 log_io("gpu_write %08x\n", data);
718 gpu.cmd_buffer[gpu.cmd_len++] = HTOLE32(data);
719 if (gpu.cmd_len >= CMD_BUFFER_LEN)
720 flush_cmd_buffer();
721}
722
723long GPUdmaChain(uint32_t *rambase, uint32_t start_addr, uint32_t *progress_addr)
724{
725 uint32_t addr, *list, ld_addr = 0;
726 int len, left, count;
727 long cpu_cycles = 0;
728
729 preload(rambase + (start_addr & 0x1fffff) / 4);
730
731 if (unlikely(gpu.cmd_len > 0))
732 flush_cmd_buffer();
733
734 log_io("gpu_dma_chain\n");
735 addr = start_addr & 0xffffff;
736 for (count = 0; (addr & 0x800000) == 0; count++)
737 {
738 list = rambase + (addr & 0x1fffff) / 4;
739 len = LE32TOH(list[0]) >> 24;
740 addr = LE32TOH(list[0]) & 0xffffff;
741 preload(rambase + (addr & 0x1fffff) / 4);
742
743 cpu_cycles += 10;
744 if (len > 0)
745 cpu_cycles += 5 + len;
746
747 log_io(".chain %08lx #%d+%d\n",
748 (long)(list - rambase) * 4, len, gpu.cmd_len);
749 if (unlikely(gpu.cmd_len > 0)) {
750 if (gpu.cmd_len + len > ARRAY_SIZE(gpu.cmd_buffer)) {
751 log_anomaly("cmd_buffer overflow, likely garbage commands\n");
752 gpu.cmd_len = 0;
753 }
754 memcpy(gpu.cmd_buffer + gpu.cmd_len, list + 1, len * 4);
755 gpu.cmd_len += len;
756 flush_cmd_buffer();
757 continue;
758 }
759
760 if (len) {
761 left = do_cmd_buffer(list + 1, len);
762 if (left) {
763 memcpy(gpu.cmd_buffer, list + 1 + len - left, left * 4);
764 gpu.cmd_len = left;
765 log_anomaly("GPUdmaChain: %d/%d words left\n", left, len);
766 }
767 }
768
769 if (progress_addr) {
770 *progress_addr = addr;
771 break;
772 }
773 #define LD_THRESHOLD (8*1024)
774 if (count >= LD_THRESHOLD) {
775 if (count == LD_THRESHOLD) {
776 ld_addr = addr;
777 continue;
778 }
779
780 // loop detection marker
781 // (bit23 set causes DMA error on real machine, so
782 // unlikely to be ever set by the game)
783 list[0] |= HTOLE32(0x800000);
784 }
785 }
786
787 if (ld_addr != 0) {
788 // remove loop detection markers
789 count -= LD_THRESHOLD + 2;
790 addr = ld_addr & 0x1fffff;
791 while (count-- > 0) {
792 list = rambase + addr / 4;
793 addr = LE32TOH(list[0]) & 0x1fffff;
794 list[0] &= HTOLE32(~0x800000);
795 }
796 }
797
798 gpu.state.last_list.frame = *gpu.state.frame_count;
799 gpu.state.last_list.hcnt = *gpu.state.hcnt;
800 gpu.state.last_list.cycles = cpu_cycles;
801 gpu.state.last_list.addr = start_addr;
802
803 return cpu_cycles;
804}
805
806void GPUreadDataMem(uint32_t *mem, int count)
807{
808 log_io("gpu_dma_read %p %d\n", mem, count);
809
810 if (unlikely(gpu.cmd_len > 0))
811 flush_cmd_buffer();
812
813 if (gpu.dma.h)
814 do_vram_io(mem, count, 1);
815}
816
817uint32_t GPUreadData(void)
818{
819 uint32_t ret;
820
821 if (unlikely(gpu.cmd_len > 0))
822 flush_cmd_buffer();
823
824 ret = gpu.gp0;
825 if (gpu.dma.h) {
826 ret = HTOLE32(ret);
827 do_vram_io(&ret, 1, 1);
828 ret = LE32TOH(ret);
829 }
830
831 log_io("gpu_read %08x\n", ret);
832 return ret;
833}
834
835uint32_t GPUreadStatus(void)
836{
837 uint32_t ret;
838
839 if (unlikely(gpu.cmd_len > 0))
840 flush_cmd_buffer();
841
842 ret = gpu.status;
843 log_io("gpu_read_status %08x\n", ret);
844 return ret;
845}
846
847struct GPUFreeze
848{
849 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
850 uint32_t ulStatus; // current gpu status
851 uint32_t ulControl[256]; // latest control register values
852 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
853};
854
855long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
856{
857 int i;
858
859 switch (type) {
860 case 1: // save
861 if (gpu.cmd_len > 0)
862 flush_cmd_buffer();
863
864 renderer_sync();
865 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
866 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
867 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
868 freeze->ulStatus = gpu.status;
869 break;
870 case 0: // load
871 renderer_sync();
872 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
873 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
874 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
875 gpu.status = freeze->ulStatus;
876 gpu.cmd_len = 0;
877 for (i = 8; i > 0; i--) {
878 gpu.regs[i] ^= 1; // avoid reg change detection
879 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
880 }
881 renderer_sync_ecmds(gpu.ex_regs);
882 renderer_update_caches(0, 0, 1024, 512, 0);
883 break;
884 }
885
886 return 1;
887}
888
889void GPUupdateLace(void)
890{
891 if (gpu.cmd_len > 0)
892 flush_cmd_buffer();
893 renderer_flush_queues();
894
895#ifndef RAW_FB_DISPLAY
896 if (gpu.status & PSX_GPU_STATUS_BLANKING) {
897 if (!gpu.state.blanked) {
898 vout_blank();
899 gpu.state.blanked = 1;
900 gpu.state.fb_dirty = 1;
901 }
902 return;
903 }
904
905 renderer_notify_update_lace(0);
906
907 if (!gpu.state.fb_dirty)
908 return;
909#endif
910
911 if (gpu.frameskip.set) {
912 if (!gpu.frameskip.frame_ready) {
913 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
914 return;
915 gpu.frameskip.active = 0;
916 }
917 gpu.frameskip.frame_ready = 0;
918 }
919
920 vout_update();
921 if (gpu.state.enhancement_active && !gpu.state.enhancement_was_active)
922 renderer_update_caches(0, 0, 1024, 512, 1);
923 gpu.state.enhancement_was_active = gpu.state.enhancement_active;
924 gpu.state.fb_dirty = 0;
925 gpu.state.blanked = 0;
926 renderer_notify_update_lace(1);
927}
928
929void GPUvBlank(int is_vblank, int lcf)
930{
931 int interlace = gpu.state.allow_interlace
932 && (gpu.status & PSX_GPU_STATUS_INTERLACE)
933 && (gpu.status & PSX_GPU_STATUS_DHEIGHT);
934 // interlace doesn't look nice on progressive displays,
935 // so we have this "auto" mode here for games that don't read vram
936 if (gpu.state.allow_interlace == 2
937 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
938 {
939 interlace = 0;
940 }
941 if (interlace || interlace != gpu.state.old_interlace) {
942 gpu.state.old_interlace = interlace;
943
944 if (gpu.cmd_len > 0)
945 flush_cmd_buffer();
946 renderer_flush_queues();
947 renderer_set_interlace(interlace, !lcf);
948 }
949}
950
951void GPUgetScreenInfo(int *y, int *base_hres)
952{
953 *y = gpu.screen.y;
954 *base_hres = gpu.screen.vres;
955 if (gpu.status & PSX_GPU_STATUS_DHEIGHT)
956 *base_hres >>= 1;
957}
958
959void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
960{
961 gpu.frameskip.set = cbs->frameskip;
962 gpu.frameskip.advice = &cbs->fskip_advice;
963 gpu.frameskip.force = &cbs->fskip_force;
964 gpu.frameskip.dirty = (void *)&cbs->fskip_dirty;
965 gpu.frameskip.active = 0;
966 gpu.frameskip.frame_ready = 1;
967 gpu.state.hcnt = cbs->gpu_hcnt;
968 gpu.state.frame_count = cbs->gpu_frame_count;
969 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
970 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
971 if (gpu.state.screen_centering_type != cbs->screen_centering_type
972 || gpu.state.screen_centering_x != cbs->screen_centering_x
973 || gpu.state.screen_centering_y != cbs->screen_centering_y) {
974 gpu.state.screen_centering_type = cbs->screen_centering_type;
975 gpu.state.screen_centering_x = cbs->screen_centering_x;
976 gpu.state.screen_centering_y = cbs->screen_centering_y;
977 update_width();
978 update_height();
979 }
980
981 gpu.mmap = cbs->mmap;
982 gpu.munmap = cbs->munmap;
983 gpu.gpu_state_change = cbs->gpu_state_change;
984
985 // delayed vram mmap
986 if (gpu.vram == NULL)
987 map_vram();
988
989 if (cbs->pl_vout_set_raw_vram)
990 cbs->pl_vout_set_raw_vram(gpu.vram);
991 renderer_set_config(cbs);
992 vout_set_config(cbs);
993}
994
995// vim:shiftwidth=2:expandtab