gpulib: add a "borderless" option to restore old behavior
[pcsx_rearmed.git] / plugins / gpulib / gpu.c
... / ...
CommitLineData
1/*
2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
11#include <stdio.h>
12#include <stdlib.h>
13#include <string.h>
14#include "gpu.h"
15#include "../../libpcsxcore/gpu.h" // meh
16#include "../../frontend/plugin_lib.h"
17
18#ifndef ARRAY_SIZE
19#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
20#endif
21#ifdef __GNUC__
22#define unlikely(x) __builtin_expect((x), 0)
23#define preload __builtin_prefetch
24#define noinline __attribute__((noinline))
25#else
26#define unlikely(x)
27#define preload(...)
28#define noinline
29#endif
30
31//#define log_io gpu_log
32#define log_io(...)
33
34struct psx_gpu gpu;
35
36static noinline int do_cmd_buffer(uint32_t *data, int count);
37static void finish_vram_transfer(int is_read);
38
39static noinline void do_cmd_reset(void)
40{
41 if (unlikely(gpu.cmd_len > 0))
42 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
43 gpu.cmd_len = 0;
44
45 if (unlikely(gpu.dma.h > 0))
46 finish_vram_transfer(gpu.dma_start.is_read);
47 gpu.dma.h = 0;
48}
49
50static noinline void do_reset(void)
51{
52 unsigned int i;
53
54 do_cmd_reset();
55
56 memset(gpu.regs, 0, sizeof(gpu.regs));
57 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
58 gpu.ex_regs[i] = (0xe0 + i) << 24;
59 gpu.status = 0x14802000;
60 gpu.gp0 = 0;
61 gpu.regs[3] = 1;
62 gpu.screen.hres = gpu.screen.w = 256;
63 gpu.screen.vres = gpu.screen.h = 240;
64 gpu.screen.x = gpu.screen.y = 0;
65 renderer_sync_ecmds(gpu.ex_regs);
66 renderer_notify_res_change();
67}
68
69static noinline void update_width(void)
70{
71 static const short hres_all[8] = { 256, 368, 320, 368, 512, 368, 640, 368 };
72 static const uint8_t hdivs[8] = { 10, 7, 8, 7, 5, 7, 4, 7 };
73 uint8_t hdiv = hdivs[(gpu.status >> 16) & 7];
74 int hres = hres_all[(gpu.status >> 16) & 7];
75 int pal = gpu.status & PSX_GPU_STATUS_PAL;
76 int sw = gpu.screen.x2 - gpu.screen.x1;
77 int x = 0, x_auto;
78 if (sw <= 0)
79 /* nothing displayed? */;
80 else {
81 int s = pal ? 656 : 608; // or 600? pal is just a guess
82 x = (gpu.screen.x1 - s) / hdiv;
83 x = (x + 1) & ~1; // blitter limitation
84 sw /= hdiv;
85 sw = (sw + 2) & ~3; // according to nocash
86 switch (gpu.state.screen_centering_type) {
87 case C_INGAME:
88 break;
89 case C_MANUAL:
90 x = gpu.state.screen_centering_x;
91 break;
92 default:
93 // correct if slightly miscentered
94 x_auto = (hres - sw) / 2 & ~3;
95 if ((uint32_t)x_auto <= 8u && abs(x) < 24)
96 x = x_auto;
97 }
98 if (x + sw > hres)
99 sw = hres - x;
100 // .x range check is done in vout_update()
101 }
102 // reduce the unpleasant right border that a few games have
103 if (gpu.state.screen_centering_type == 0
104 && x <= 4 && hres - (x + sw) >= 4)
105 hres -= 4;
106 gpu.screen.x = x;
107 gpu.screen.w = sw;
108 gpu.screen.hres = hres;
109 gpu.state.dims_changed = 1;
110 //printf("xx %d %d -> %2d, %d / %d\n",
111 // gpu.screen.x1, gpu.screen.x2, x, sw, hres);
112}
113
114static noinline void update_height(void)
115{
116 int pal = gpu.status & PSX_GPU_STATUS_PAL;
117 int dheight = gpu.status & PSX_GPU_STATUS_DHEIGHT;
118 int y = gpu.screen.y1 - (pal ? 39 : 16); // 39 for spyro
119 int sh = gpu.screen.y2 - gpu.screen.y1;
120 int center_tol = 16;
121 int vres = 240;
122
123 if (pal && (sh > 240 || gpu.screen.vres == 256))
124 vres = 256;
125 if (dheight)
126 y *= 2, sh *= 2, vres *= 2, center_tol *= 2;
127 if (sh <= 0)
128 /* nothing displayed? */;
129 else {
130 switch (gpu.state.screen_centering_type) {
131 case C_INGAME:
132 break;
133 case C_BORDERLESS:
134 y = 0;
135 break;
136 case C_MANUAL:
137 y = gpu.state.screen_centering_y;
138 break;
139 default:
140 // correct if slightly miscentered
141 if ((uint32_t)(vres - sh) <= 1 && abs(y) <= center_tol)
142 y = 0;
143 }
144 if (y + sh > vres)
145 sh = vres - y;
146 }
147 gpu.screen.y = y;
148 gpu.screen.h = sh;
149 gpu.screen.vres = vres;
150 gpu.state.dims_changed = 1;
151 //printf("yy %d %d -> %d, %d / %d\n",
152 // gpu.screen.y1, gpu.screen.y2, y, sh, vres);
153}
154
155static noinline void decide_frameskip(void)
156{
157 if (gpu.frameskip.active)
158 gpu.frameskip.cnt++;
159 else {
160 gpu.frameskip.cnt = 0;
161 gpu.frameskip.frame_ready = 1;
162 }
163
164 if (!gpu.frameskip.active && *gpu.frameskip.advice)
165 gpu.frameskip.active = 1;
166 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
167 gpu.frameskip.active = 1;
168 else
169 gpu.frameskip.active = 0;
170
171 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
172 int dummy;
173 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy);
174 gpu.frameskip.pending_fill[0] = 0;
175 }
176}
177
178static noinline int decide_frameskip_allow(uint32_t cmd_e3)
179{
180 // no frameskip if it decides to draw to display area,
181 // but not for interlace since it'll most likely always do that
182 uint32_t x = cmd_e3 & 0x3ff;
183 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
184 gpu.frameskip.allow = (gpu.status & PSX_GPU_STATUS_INTERLACE) ||
185 (uint32_t)(x - gpu.screen.src_x) >= (uint32_t)gpu.screen.w ||
186 (uint32_t)(y - gpu.screen.src_y) >= (uint32_t)gpu.screen.h;
187 return gpu.frameskip.allow;
188}
189
190static void flush_cmd_buffer(void);
191
192static noinline void get_gpu_info(uint32_t data)
193{
194 if (unlikely(gpu.cmd_len > 0))
195 flush_cmd_buffer();
196 switch (data & 0x0f) {
197 case 0x02:
198 case 0x03:
199 case 0x04:
200 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
201 break;
202 case 0x05:
203 gpu.gp0 = gpu.ex_regs[5] & 0x3fffff;
204 break;
205 case 0x07:
206 gpu.gp0 = 2;
207 break;
208 default:
209 // gpu.gp0 unchanged
210 break;
211 }
212}
213
214// double, for overdraw guard
215#define VRAM_SIZE (1024 * 512 * 2 * 2)
216
217static int map_vram(void)
218{
219 gpu.vram = gpu.mmap(VRAM_SIZE);
220 if (gpu.vram != NULL) {
221 gpu.vram += 4096 / 2;
222 return 0;
223 }
224 else {
225 fprintf(stderr, "could not map vram, expect crashes\n");
226 return -1;
227 }
228}
229
230long GPUinit(void)
231{
232 int ret;
233 ret = vout_init();
234 ret |= renderer_init();
235
236 memset(&gpu.state, 0, sizeof(gpu.state));
237 memset(&gpu.frameskip, 0, sizeof(gpu.frameskip));
238 gpu.zero = 0;
239 gpu.state.frame_count = &gpu.zero;
240 gpu.state.hcnt = &gpu.zero;
241 gpu.cmd_len = 0;
242 do_reset();
243
244 if (gpu.mmap != NULL) {
245 if (map_vram() != 0)
246 ret = -1;
247 }
248 return ret;
249}
250
251long GPUshutdown(void)
252{
253 long ret;
254
255 renderer_finish();
256 ret = vout_finish();
257 if (gpu.vram != NULL) {
258 gpu.vram -= 4096 / 2;
259 gpu.munmap(gpu.vram, VRAM_SIZE);
260 }
261 gpu.vram = NULL;
262
263 return ret;
264}
265
266void GPUwriteStatus(uint32_t data)
267{
268 uint32_t cmd = data >> 24;
269 int src_x, src_y;
270
271 if (cmd < ARRAY_SIZE(gpu.regs)) {
272 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
273 return;
274 gpu.regs[cmd] = data;
275 }
276
277 gpu.state.fb_dirty = 1;
278
279 switch (cmd) {
280 case 0x00:
281 do_reset();
282 break;
283 case 0x01:
284 do_cmd_reset();
285 break;
286 case 0x03:
287 if (data & 1) {
288 gpu.status |= PSX_GPU_STATUS_BLANKING;
289 gpu.state.dims_changed = 1; // for hud clearing
290 }
291 else
292 gpu.status &= ~PSX_GPU_STATUS_BLANKING;
293 break;
294 case 0x04:
295 gpu.status &= ~PSX_GPU_STATUS_DMA_MASK;
296 gpu.status |= PSX_GPU_STATUS_DMA(data & 3);
297 break;
298 case 0x05:
299 src_x = data & 0x3ff; src_y = (data >> 10) & 0x1ff;
300 if (src_x != gpu.screen.src_x || src_y != gpu.screen.src_y) {
301 gpu.screen.src_x = src_x;
302 gpu.screen.src_y = src_y;
303 renderer_notify_scanout_change(src_x, src_y);
304 if (gpu.frameskip.set) {
305 decide_frameskip_allow(gpu.ex_regs[3]);
306 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
307 decide_frameskip();
308 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
309 }
310 }
311 }
312 break;
313 case 0x06:
314 gpu.screen.x1 = data & 0xfff;
315 gpu.screen.x2 = (data >> 12) & 0xfff;
316 update_width();
317 break;
318 case 0x07:
319 gpu.screen.y1 = data & 0x3ff;
320 gpu.screen.y2 = (data >> 10) & 0x3ff;
321 update_height();
322 break;
323 case 0x08:
324 gpu.status = (gpu.status & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
325 update_width();
326 update_height();
327 renderer_notify_res_change();
328 break;
329 default:
330 if ((cmd & 0xf0) == 0x10)
331 get_gpu_info(data);
332 break;
333 }
334
335#ifdef GPUwriteStatus_ext
336 GPUwriteStatus_ext(data);
337#endif
338}
339
340const unsigned char cmd_lengths[256] =
341{
342 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
343 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
344 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
345 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
346 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
347 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
348 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
349 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
350 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 80
351 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
352 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // a0
353 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
354 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // c0
355 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
356 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
357 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
358};
359
360#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
361
362static void cpy_msb(uint16_t *dst, const uint16_t *src, int l, uint16_t msb)
363{
364 int i;
365 for (i = 0; i < l; i++)
366 dst[i] = src[i] | msb;
367}
368
369static inline void do_vram_line(int x, int y, uint16_t *mem, int l,
370 int is_read, uint16_t msb)
371{
372 uint16_t *vram = VRAM_MEM_XY(x, y);
373 if (unlikely(is_read))
374 memcpy(mem, vram, l * 2);
375 else if (unlikely(msb))
376 cpy_msb(vram, mem, l, msb);
377 else
378 memcpy(vram, mem, l * 2);
379}
380
381static int do_vram_io(uint32_t *data, int count, int is_read)
382{
383 int count_initial = count;
384 uint16_t msb = gpu.ex_regs[6] << 15;
385 uint16_t *sdata = (uint16_t *)data;
386 int x = gpu.dma.x, y = gpu.dma.y;
387 int w = gpu.dma.w, h = gpu.dma.h;
388 int o = gpu.dma.offset;
389 int l;
390 count *= 2; // operate in 16bpp pixels
391
392 if (gpu.dma.offset) {
393 l = w - gpu.dma.offset;
394 if (count < l)
395 l = count;
396
397 do_vram_line(x + o, y, sdata, l, is_read, msb);
398
399 if (o + l < w)
400 o += l;
401 else {
402 o = 0;
403 y++;
404 h--;
405 }
406 sdata += l;
407 count -= l;
408 }
409
410 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
411 y &= 511;
412 do_vram_line(x, y, sdata, w, is_read, msb);
413 }
414
415 if (h > 0) {
416 if (count > 0) {
417 y &= 511;
418 do_vram_line(x, y, sdata, count, is_read, msb);
419 o = count;
420 count = 0;
421 }
422 }
423 else
424 finish_vram_transfer(is_read);
425 gpu.dma.y = y;
426 gpu.dma.h = h;
427 gpu.dma.offset = o;
428
429 return count_initial - count / 2;
430}
431
432static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
433{
434 if (gpu.dma.h)
435 log_anomaly("start_vram_transfer while old unfinished\n");
436
437 gpu.dma.x = pos_word & 0x3ff;
438 gpu.dma.y = (pos_word >> 16) & 0x1ff;
439 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
440 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
441 gpu.dma.offset = 0;
442 gpu.dma.is_read = is_read;
443 gpu.dma_start = gpu.dma;
444
445 renderer_flush_queues();
446 if (is_read) {
447 gpu.status |= PSX_GPU_STATUS_IMG;
448 // XXX: wrong for width 1
449 gpu.gp0 = LE32TOH(*(uint32_t *) VRAM_MEM_XY(gpu.dma.x, gpu.dma.y));
450 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
451 }
452
453 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
454 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
455 if (gpu.gpu_state_change)
456 gpu.gpu_state_change(PGS_VRAM_TRANSFER_START);
457}
458
459static void finish_vram_transfer(int is_read)
460{
461 if (is_read)
462 gpu.status &= ~PSX_GPU_STATUS_IMG;
463 else {
464 gpu.state.fb_dirty = 1;
465 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
466 gpu.dma_start.w, gpu.dma_start.h, 0);
467 }
468 if (gpu.gpu_state_change)
469 gpu.gpu_state_change(PGS_VRAM_TRANSFER_END);
470}
471
472static void do_vram_copy(const uint32_t *params)
473{
474 const uint32_t sx = LE32TOH(params[0]) & 0x3FF;
475 const uint32_t sy = (LE32TOH(params[0]) >> 16) & 0x1FF;
476 const uint32_t dx = LE32TOH(params[1]) & 0x3FF;
477 const uint32_t dy = (LE32TOH(params[1]) >> 16) & 0x1FF;
478 uint32_t w = ((LE32TOH(params[2]) - 1) & 0x3FF) + 1;
479 uint32_t h = (((LE32TOH(params[2]) >> 16) - 1) & 0x1FF) + 1;
480 uint16_t msb = gpu.ex_regs[6] << 15;
481 uint16_t lbuf[128];
482 uint32_t x, y;
483
484 if (sx == dx && sy == dy && msb == 0)
485 return;
486
487 renderer_flush_queues();
488
489 if (unlikely((sx < dx && dx < sx + w) || sx + w > 1024 || dx + w > 1024 || msb))
490 {
491 for (y = 0; y < h; y++)
492 {
493 const uint16_t *src = VRAM_MEM_XY(0, (sy + y) & 0x1ff);
494 uint16_t *dst = VRAM_MEM_XY(0, (dy + y) & 0x1ff);
495 for (x = 0; x < w; x += ARRAY_SIZE(lbuf))
496 {
497 uint32_t x1, w1 = w - x;
498 if (w1 > ARRAY_SIZE(lbuf))
499 w1 = ARRAY_SIZE(lbuf);
500 for (x1 = 0; x1 < w1; x1++)
501 lbuf[x1] = src[(sx + x + x1) & 0x3ff];
502 for (x1 = 0; x1 < w1; x1++)
503 dst[(dx + x + x1) & 0x3ff] = lbuf[x1] | msb;
504 }
505 }
506 }
507 else
508 {
509 uint32_t sy1 = sy, dy1 = dy;
510 for (y = 0; y < h; y++, sy1++, dy1++)
511 memcpy(VRAM_MEM_XY(dx, dy1 & 0x1ff), VRAM_MEM_XY(sx, sy1 & 0x1ff), w * 2);
512 }
513
514 renderer_update_caches(dx, dy, w, h, 0);
515}
516
517static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
518{
519 int cmd = 0, pos = 0, len, dummy, v;
520 int skip = 1;
521
522 gpu.frameskip.pending_fill[0] = 0;
523
524 while (pos < count && skip) {
525 uint32_t *list = data + pos;
526 cmd = LE32TOH(list[0]) >> 24;
527 len = 1 + cmd_lengths[cmd];
528
529 switch (cmd) {
530 case 0x02:
531 if ((LE32TOH(list[2]) & 0x3ff) > gpu.screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu.screen.h)
532 // clearing something large, don't skip
533 do_cmd_list(list, 3, &dummy);
534 else
535 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
536 break;
537 case 0x24 ... 0x27:
538 case 0x2c ... 0x2f:
539 case 0x34 ... 0x37:
540 case 0x3c ... 0x3f:
541 gpu.ex_regs[1] &= ~0x1ff;
542 gpu.ex_regs[1] |= LE32TOH(list[4 + ((cmd >> 4) & 1)]) & 0x1ff;
543 break;
544 case 0x48 ... 0x4F:
545 for (v = 3; pos + v < count; v++)
546 {
547 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
548 break;
549 }
550 len += v - 3;
551 break;
552 case 0x58 ... 0x5F:
553 for (v = 4; pos + v < count; v += 2)
554 {
555 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
556 break;
557 }
558 len += v - 4;
559 break;
560 default:
561 if (cmd == 0xe3)
562 skip = decide_frameskip_allow(LE32TOH(list[0]));
563 if ((cmd & 0xf8) == 0xe0)
564 gpu.ex_regs[cmd & 7] = LE32TOH(list[0]);
565 break;
566 }
567
568 if (pos + len > count) {
569 cmd = -1;
570 break; // incomplete cmd
571 }
572 if (0x80 <= cmd && cmd <= 0xdf)
573 break; // image i/o
574
575 pos += len;
576 }
577
578 renderer_sync_ecmds(gpu.ex_regs);
579 *last_cmd = cmd;
580 return pos;
581}
582
583static noinline int do_cmd_buffer(uint32_t *data, int count)
584{
585 int cmd, pos;
586 uint32_t old_e3 = gpu.ex_regs[3];
587 int vram_dirty = 0;
588
589 // process buffer
590 for (pos = 0; pos < count; )
591 {
592 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
593 vram_dirty = 1;
594 pos += do_vram_io(data + pos, count - pos, 0);
595 if (pos == count)
596 break;
597 }
598
599 cmd = LE32TOH(data[pos]) >> 24;
600 if (0xa0 <= cmd && cmd <= 0xdf) {
601 if (unlikely((pos+2) >= count)) {
602 // incomplete vram write/read cmd, can't consume yet
603 cmd = -1;
604 break;
605 }
606
607 // consume vram write/read cmd
608 start_vram_transfer(LE32TOH(data[pos + 1]), LE32TOH(data[pos + 2]), (cmd & 0xe0) == 0xc0);
609 pos += 3;
610 continue;
611 }
612 else if ((cmd & 0xe0) == 0x80) {
613 if (unlikely((pos+3) >= count)) {
614 cmd = -1; // incomplete cmd, can't consume yet
615 break;
616 }
617 do_vram_copy(data + pos + 1);
618 vram_dirty = 1;
619 pos += 4;
620 continue;
621 }
622
623 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
624 if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0))
625 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
626 else {
627 pos += do_cmd_list(data + pos, count - pos, &cmd);
628 vram_dirty = 1;
629 }
630
631 if (cmd == -1)
632 // incomplete cmd
633 break;
634 }
635
636 gpu.status &= ~0x1fff;
637 gpu.status |= gpu.ex_regs[1] & 0x7ff;
638 gpu.status |= (gpu.ex_regs[6] & 3) << 11;
639
640 gpu.state.fb_dirty |= vram_dirty;
641
642 if (old_e3 != gpu.ex_regs[3])
643 decide_frameskip_allow(gpu.ex_regs[3]);
644
645 return count - pos;
646}
647
648static noinline void flush_cmd_buffer(void)
649{
650 int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
651 if (left > 0)
652 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
653 if (left != gpu.cmd_len) {
654 if (!gpu.dma.h && gpu.gpu_state_change)
655 gpu.gpu_state_change(PGS_PRIMITIVE_START);
656 gpu.cmd_len = left;
657 }
658}
659
660void GPUwriteDataMem(uint32_t *mem, int count)
661{
662 int left;
663
664 log_io("gpu_dma_write %p %d\n", mem, count);
665
666 if (unlikely(gpu.cmd_len > 0))
667 flush_cmd_buffer();
668
669 left = do_cmd_buffer(mem, count);
670 if (left)
671 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
672}
673
674void GPUwriteData(uint32_t data)
675{
676 log_io("gpu_write %08x\n", data);
677 gpu.cmd_buffer[gpu.cmd_len++] = HTOLE32(data);
678 if (gpu.cmd_len >= CMD_BUFFER_LEN)
679 flush_cmd_buffer();
680}
681
682long GPUdmaChain(uint32_t *rambase, uint32_t start_addr, uint32_t *progress_addr)
683{
684 uint32_t addr, *list, ld_addr = 0;
685 int len, left, count;
686 long cpu_cycles = 0;
687
688 preload(rambase + (start_addr & 0x1fffff) / 4);
689
690 if (unlikely(gpu.cmd_len > 0))
691 flush_cmd_buffer();
692
693 log_io("gpu_dma_chain\n");
694 addr = start_addr & 0xffffff;
695 for (count = 0; (addr & 0x800000) == 0; count++)
696 {
697 list = rambase + (addr & 0x1fffff) / 4;
698 len = LE32TOH(list[0]) >> 24;
699 addr = LE32TOH(list[0]) & 0xffffff;
700 preload(rambase + (addr & 0x1fffff) / 4);
701
702 cpu_cycles += 10;
703 if (len > 0)
704 cpu_cycles += 5 + len;
705
706 log_io(".chain %08lx #%d+%d\n",
707 (long)(list - rambase) * 4, len, gpu.cmd_len);
708 if (unlikely(gpu.cmd_len > 0)) {
709 if (gpu.cmd_len + len > ARRAY_SIZE(gpu.cmd_buffer)) {
710 log_anomaly("cmd_buffer overflow, likely garbage commands\n");
711 gpu.cmd_len = 0;
712 }
713 memcpy(gpu.cmd_buffer + gpu.cmd_len, list + 1, len * 4);
714 gpu.cmd_len += len;
715 flush_cmd_buffer();
716 continue;
717 }
718
719 if (len) {
720 left = do_cmd_buffer(list + 1, len);
721 if (left) {
722 memcpy(gpu.cmd_buffer, list + 1 + len - left, left * 4);
723 gpu.cmd_len = left;
724 log_anomaly("GPUdmaChain: %d/%d words left\n", left, len);
725 }
726 }
727
728 if (progress_addr) {
729 *progress_addr = addr;
730 break;
731 }
732 #define LD_THRESHOLD (8*1024)
733 if (count >= LD_THRESHOLD) {
734 if (count == LD_THRESHOLD) {
735 ld_addr = addr;
736 continue;
737 }
738
739 // loop detection marker
740 // (bit23 set causes DMA error on real machine, so
741 // unlikely to be ever set by the game)
742 list[0] |= HTOLE32(0x800000);
743 }
744 }
745
746 if (ld_addr != 0) {
747 // remove loop detection markers
748 count -= LD_THRESHOLD + 2;
749 addr = ld_addr & 0x1fffff;
750 while (count-- > 0) {
751 list = rambase + addr / 4;
752 addr = LE32TOH(list[0]) & 0x1fffff;
753 list[0] &= HTOLE32(~0x800000);
754 }
755 }
756
757 gpu.state.last_list.frame = *gpu.state.frame_count;
758 gpu.state.last_list.hcnt = *gpu.state.hcnt;
759 gpu.state.last_list.cycles = cpu_cycles;
760 gpu.state.last_list.addr = start_addr;
761
762 return cpu_cycles;
763}
764
765void GPUreadDataMem(uint32_t *mem, int count)
766{
767 log_io("gpu_dma_read %p %d\n", mem, count);
768
769 if (unlikely(gpu.cmd_len > 0))
770 flush_cmd_buffer();
771
772 if (gpu.dma.h)
773 do_vram_io(mem, count, 1);
774}
775
776uint32_t GPUreadData(void)
777{
778 uint32_t ret;
779
780 if (unlikely(gpu.cmd_len > 0))
781 flush_cmd_buffer();
782
783 ret = gpu.gp0;
784 if (gpu.dma.h) {
785 ret = HTOLE32(ret);
786 do_vram_io(&ret, 1, 1);
787 ret = LE32TOH(ret);
788 }
789
790 log_io("gpu_read %08x\n", ret);
791 return ret;
792}
793
794uint32_t GPUreadStatus(void)
795{
796 uint32_t ret;
797
798 if (unlikely(gpu.cmd_len > 0))
799 flush_cmd_buffer();
800
801 ret = gpu.status;
802 log_io("gpu_read_status %08x\n", ret);
803 return ret;
804}
805
806struct GPUFreeze
807{
808 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
809 uint32_t ulStatus; // current gpu status
810 uint32_t ulControl[256]; // latest control register values
811 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
812};
813
814long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
815{
816 int i;
817
818 switch (type) {
819 case 1: // save
820 if (gpu.cmd_len > 0)
821 flush_cmd_buffer();
822 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
823 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
824 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
825 freeze->ulStatus = gpu.status;
826 break;
827 case 0: // load
828 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
829 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
830 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
831 gpu.status = freeze->ulStatus;
832 gpu.cmd_len = 0;
833 for (i = 8; i > 0; i--) {
834 gpu.regs[i] ^= 1; // avoid reg change detection
835 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
836 }
837 renderer_sync_ecmds(gpu.ex_regs);
838 renderer_update_caches(0, 0, 1024, 512, 0);
839 break;
840 }
841
842 return 1;
843}
844
845void GPUupdateLace(void)
846{
847 if (gpu.cmd_len > 0)
848 flush_cmd_buffer();
849 renderer_flush_queues();
850
851#ifndef RAW_FB_DISPLAY
852 if (gpu.status & PSX_GPU_STATUS_BLANKING) {
853 if (!gpu.state.blanked) {
854 vout_blank();
855 gpu.state.blanked = 1;
856 gpu.state.fb_dirty = 1;
857 }
858 return;
859 }
860
861 if (!gpu.state.fb_dirty)
862 return;
863#endif
864
865 if (gpu.frameskip.set) {
866 if (!gpu.frameskip.frame_ready) {
867 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
868 return;
869 gpu.frameskip.active = 0;
870 }
871 gpu.frameskip.frame_ready = 0;
872 }
873
874 vout_update();
875 if (gpu.state.enhancement_active && !gpu.state.enhancement_was_active)
876 renderer_update_caches(0, 0, 1024, 512, 1);
877 gpu.state.enhancement_was_active = gpu.state.enhancement_active;
878 gpu.state.fb_dirty = 0;
879 gpu.state.blanked = 0;
880}
881
882void GPUvBlank(int is_vblank, int lcf)
883{
884 int interlace = gpu.state.allow_interlace
885 && (gpu.status & PSX_GPU_STATUS_INTERLACE)
886 && (gpu.status & PSX_GPU_STATUS_DHEIGHT);
887 // interlace doesn't look nice on progressive displays,
888 // so we have this "auto" mode here for games that don't read vram
889 if (gpu.state.allow_interlace == 2
890 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
891 {
892 interlace = 0;
893 }
894 if (interlace || interlace != gpu.state.old_interlace) {
895 gpu.state.old_interlace = interlace;
896
897 if (gpu.cmd_len > 0)
898 flush_cmd_buffer();
899 renderer_flush_queues();
900 renderer_set_interlace(interlace, !lcf);
901 }
902}
903
904void GPUgetScreenInfo(int *y, int *base_hres)
905{
906 *y = gpu.screen.y;
907 *base_hres = gpu.screen.vres;
908 if (gpu.status & PSX_GPU_STATUS_DHEIGHT)
909 *base_hres >>= 1;
910}
911
912void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
913{
914 gpu.frameskip.set = cbs->frameskip;
915 gpu.frameskip.advice = &cbs->fskip_advice;
916 gpu.frameskip.active = 0;
917 gpu.frameskip.frame_ready = 1;
918 gpu.state.hcnt = cbs->gpu_hcnt;
919 gpu.state.frame_count = cbs->gpu_frame_count;
920 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
921 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
922 if (gpu.state.screen_centering_type != cbs->screen_centering_type
923 || gpu.state.screen_centering_x != cbs->screen_centering_x
924 || gpu.state.screen_centering_y != cbs->screen_centering_y) {
925 gpu.state.screen_centering_type = cbs->screen_centering_type;
926 gpu.state.screen_centering_x = cbs->screen_centering_x;
927 gpu.state.screen_centering_y = cbs->screen_centering_y;
928 update_width();
929 update_height();
930 }
931
932 gpu.mmap = cbs->mmap;
933 gpu.munmap = cbs->munmap;
934 gpu.gpu_state_change = cbs->gpu_state_change;
935
936 // delayed vram mmap
937 if (gpu.vram == NULL)
938 map_vram();
939
940 if (cbs->pl_vout_set_raw_vram)
941 cbs->pl_vout_set_raw_vram(gpu.vram);
942 renderer_set_config(cbs);
943 vout_set_config(cbs);
944}
945
946// vim:shiftwidth=2:expandtab