gpu: improve timings of clipped sprites
[pcsx_rearmed.git] / plugins / gpulib / gpu.c
... / ...
CommitLineData
1/*
2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
11#include <stdio.h>
12#include <stdlib.h>
13#include <string.h>
14#include <stdlib.h> /* for calloc */
15
16#include "gpu.h"
17#include "gpu_timing.h"
18#include "../../libpcsxcore/gpu.h" // meh
19#include "../../frontend/plugin_lib.h"
20
21#ifndef ARRAY_SIZE
22#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
23#endif
24#ifdef __GNUC__
25#define unlikely(x) __builtin_expect((x), 0)
26#define preload __builtin_prefetch
27#define noinline __attribute__((noinline))
28#else
29#define unlikely(x)
30#define preload(...)
31#define noinline
32#endif
33
34//#define log_io gpu_log
35#define log_io(...)
36
37struct psx_gpu gpu;
38
39static noinline int do_cmd_buffer(uint32_t *data, int count, int *cpu_cycles);
40static void finish_vram_transfer(int is_read);
41
42static noinline void do_cmd_reset(void)
43{
44 int dummy = 0;
45 renderer_sync();
46 if (unlikely(gpu.cmd_len > 0))
47 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len, &dummy);
48 gpu.cmd_len = 0;
49
50 if (unlikely(gpu.dma.h > 0))
51 finish_vram_transfer(gpu.dma_start.is_read);
52 gpu.dma.h = 0;
53}
54
55static noinline void do_reset(void)
56{
57 unsigned int i;
58
59 do_cmd_reset();
60
61 memset(gpu.regs, 0, sizeof(gpu.regs));
62 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
63 gpu.ex_regs[i] = (0xe0 + i) << 24;
64 gpu.status = 0x14802000;
65 gpu.gp0 = 0;
66 gpu.regs[3] = 1;
67 gpu.screen.hres = gpu.screen.w = 256;
68 gpu.screen.vres = gpu.screen.h = 240;
69 gpu.screen.x = gpu.screen.y = 0;
70 renderer_sync_ecmds(gpu.ex_regs);
71 renderer_notify_res_change();
72}
73
74static noinline void update_width(void)
75{
76 static const short hres_all[8] = { 256, 368, 320, 368, 512, 368, 640, 368 };
77 static const uint8_t hdivs[8] = { 10, 7, 8, 7, 5, 7, 4, 7 };
78 uint8_t hdiv = hdivs[(gpu.status >> 16) & 7];
79 int hres = hres_all[(gpu.status >> 16) & 7];
80 int pal = gpu.status & PSX_GPU_STATUS_PAL;
81 int sw = gpu.screen.x2 - gpu.screen.x1;
82 int type = gpu.state.screen_centering_type;
83 int x = 0, x_auto;
84 if (type == C_AUTO)
85 type = gpu.state.screen_centering_type_default;
86 if (sw <= 0)
87 /* nothing displayed? */;
88 else {
89 int s = pal ? 656 : 608; // or 600? pal is just a guess
90 x = (gpu.screen.x1 - s) / hdiv;
91 x = (x + 1) & ~1; // blitter limitation
92 sw /= hdiv;
93 sw = (sw + 2) & ~3; // according to nocash
94 switch (type) {
95 case C_INGAME:
96 break;
97 case C_MANUAL:
98 x = gpu.state.screen_centering_x;
99 break;
100 default:
101 // correct if slightly miscentered
102 x_auto = (hres - sw) / 2 & ~3;
103 if ((uint32_t)x_auto <= 8u && abs(x) < 24)
104 x = x_auto;
105 }
106 if (x + sw > hres)
107 sw = hres - x;
108 // .x range check is done in vout_update()
109 }
110 // reduce the unpleasant right border that a few games have
111 if (gpu.state.screen_centering_type == 0
112 && x <= 4 && hres - (x + sw) >= 4)
113 hres -= 4;
114 gpu.screen.x = x;
115 gpu.screen.w = sw;
116 gpu.screen.hres = hres;
117 gpu.state.dims_changed = 1;
118 //printf("xx %d %d -> %2d, %d / %d\n",
119 // gpu.screen.x1, gpu.screen.x2, x, sw, hres);
120}
121
122static noinline void update_height(void)
123{
124 int pal = gpu.status & PSX_GPU_STATUS_PAL;
125 int dheight = gpu.status & PSX_GPU_STATUS_DHEIGHT;
126 int y = gpu.screen.y1 - (pal ? 39 : 16); // 39 for spyro
127 int sh = gpu.screen.y2 - gpu.screen.y1;
128 int center_tol = 16;
129 int vres = 240;
130
131 if (pal && (sh > 240 || gpu.screen.vres == 256))
132 vres = 256;
133 if (dheight)
134 y *= 2, sh *= 2, vres *= 2, center_tol *= 2;
135 if (sh <= 0)
136 /* nothing displayed? */;
137 else {
138 switch (gpu.state.screen_centering_type) {
139 case C_INGAME:
140 break;
141 case C_BORDERLESS:
142 y = 0;
143 break;
144 case C_MANUAL:
145 y = gpu.state.screen_centering_y;
146 break;
147 default:
148 // correct if slightly miscentered
149 if ((uint32_t)(vres - sh) <= 1 && abs(y) <= center_tol)
150 y = 0;
151 }
152 if (y + sh > vres)
153 sh = vres - y;
154 }
155 gpu.screen.y = y;
156 gpu.screen.h = sh;
157 gpu.screen.vres = vres;
158 gpu.state.dims_changed = 1;
159 //printf("yy %d %d -> %d, %d / %d\n",
160 // gpu.screen.y1, gpu.screen.y2, y, sh, vres);
161}
162
163static noinline void decide_frameskip(void)
164{
165 *gpu.frameskip.dirty = 1;
166
167 if (gpu.frameskip.active)
168 gpu.frameskip.cnt++;
169 else {
170 gpu.frameskip.cnt = 0;
171 gpu.frameskip.frame_ready = 1;
172 }
173
174 if (*gpu.frameskip.force)
175 gpu.frameskip.active = 1;
176 else if (!gpu.frameskip.active && *gpu.frameskip.advice)
177 gpu.frameskip.active = 1;
178 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
179 gpu.frameskip.active = 1;
180 else
181 gpu.frameskip.active = 0;
182
183 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
184 int dummy = 0;
185 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy, &dummy);
186 gpu.frameskip.pending_fill[0] = 0;
187 }
188}
189
190static noinline int decide_frameskip_allow(uint32_t cmd_e3)
191{
192 // no frameskip if it decides to draw to display area,
193 // but not for interlace since it'll most likely always do that
194 uint32_t x = cmd_e3 & 0x3ff;
195 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
196 gpu.frameskip.allow = (gpu.status & PSX_GPU_STATUS_INTERLACE) ||
197 (uint32_t)(x - gpu.screen.src_x) >= (uint32_t)gpu.screen.w ||
198 (uint32_t)(y - gpu.screen.src_y) >= (uint32_t)gpu.screen.h;
199 return gpu.frameskip.allow;
200}
201
202static void flush_cmd_buffer(void);
203
204static noinline void get_gpu_info(uint32_t data)
205{
206 if (unlikely(gpu.cmd_len > 0))
207 flush_cmd_buffer();
208 switch (data & 0x0f) {
209 case 0x02:
210 case 0x03:
211 case 0x04:
212 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
213 break;
214 case 0x05:
215 gpu.gp0 = gpu.ex_regs[5] & 0x3fffff;
216 break;
217 case 0x07:
218 gpu.gp0 = 2;
219 break;
220 default:
221 // gpu.gp0 unchanged
222 break;
223 }
224}
225
226#ifndef max
227#define max(a, b) (((a) > (b)) ? (a) : (b))
228#endif
229
230// Minimum 16-byte VRAM alignment needed by gpu_unai's pixel-skipping
231// renderer/downscaler it uses in high res modes:
232#ifdef GCW_ZERO
233 // On GCW platform (MIPS), align to 8192 bytes (1 TLB entry) to reduce # of
234 // fills. (Will change this value if it ever gets large page support)
235 #define VRAM_ALIGN 8192
236#else
237 #define VRAM_ALIGN 16
238#endif
239
240// double, for overdraw guard + at least 1 page before
241#define VRAM_SIZE ((1024 * 512 * 2 * 2) + max(VRAM_ALIGN, 4096))
242
243// vram ptr received from mmap/malloc/alloc (will deallocate using this)
244static uint16_t *vram_ptr_orig = NULL;
245
246#ifndef GPULIB_USE_MMAP
247# ifdef __linux__
248# define GPULIB_USE_MMAP 1
249# else
250# define GPULIB_USE_MMAP 0
251# endif
252#endif
253static int map_vram(void)
254{
255#if GPULIB_USE_MMAP
256 gpu.vram = vram_ptr_orig = gpu.mmap(VRAM_SIZE);
257#else
258 gpu.vram = vram_ptr_orig = calloc(VRAM_SIZE, 1);
259#endif
260 if (gpu.vram != NULL && gpu.vram != (void *)(intptr_t)-1) {
261 // 4kb guard in front
262 gpu.vram += (4096 / 2);
263 // Align
264 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
265 return 0;
266 }
267 else {
268 fprintf(stderr, "could not map vram, expect crashes\n");
269 return -1;
270 }
271}
272
273long GPUinit(void)
274{
275 int ret;
276 ret = vout_init();
277 ret |= renderer_init();
278
279 memset(&gpu.state, 0, sizeof(gpu.state));
280 memset(&gpu.frameskip, 0, sizeof(gpu.frameskip));
281 gpu.zero = 0;
282 gpu.state.frame_count = &gpu.zero;
283 gpu.state.hcnt = &gpu.zero;
284 gpu.cmd_len = 0;
285 do_reset();
286
287 /*if (gpu.mmap != NULL) {
288 if (map_vram() != 0)
289 ret = -1;
290 }*/
291 return ret;
292}
293
294long GPUshutdown(void)
295{
296 long ret;
297
298 renderer_finish();
299 ret = vout_finish();
300
301 if (vram_ptr_orig != NULL) {
302#if GPULIB_USE_MMAP
303 gpu.munmap(vram_ptr_orig, VRAM_SIZE);
304#else
305 free(vram_ptr_orig);
306#endif
307 }
308 vram_ptr_orig = gpu.vram = NULL;
309
310 return ret;
311}
312
313void GPUwriteStatus(uint32_t data)
314{
315 uint32_t cmd = data >> 24;
316 int src_x, src_y;
317
318 if (cmd < ARRAY_SIZE(gpu.regs)) {
319 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
320 return;
321 gpu.regs[cmd] = data;
322 }
323
324 gpu.state.fb_dirty = 1;
325
326 switch (cmd) {
327 case 0x00:
328 do_reset();
329 break;
330 case 0x01:
331 do_cmd_reset();
332 break;
333 case 0x03:
334 if (data & 1) {
335 gpu.status |= PSX_GPU_STATUS_BLANKING;
336 gpu.state.dims_changed = 1; // for hud clearing
337 }
338 else
339 gpu.status &= ~PSX_GPU_STATUS_BLANKING;
340 break;
341 case 0x04:
342 gpu.status &= ~PSX_GPU_STATUS_DMA_MASK;
343 gpu.status |= PSX_GPU_STATUS_DMA(data & 3);
344 break;
345 case 0x05:
346 src_x = data & 0x3ff; src_y = (data >> 10) & 0x1ff;
347 if (src_x != gpu.screen.src_x || src_y != gpu.screen.src_y) {
348 gpu.screen.src_x = src_x;
349 gpu.screen.src_y = src_y;
350 renderer_notify_scanout_change(src_x, src_y);
351 if (gpu.frameskip.set) {
352 decide_frameskip_allow(gpu.ex_regs[3]);
353 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
354 decide_frameskip();
355 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
356 }
357 }
358 }
359 break;
360 case 0x06:
361 gpu.screen.x1 = data & 0xfff;
362 gpu.screen.x2 = (data >> 12) & 0xfff;
363 update_width();
364 break;
365 case 0x07:
366 gpu.screen.y1 = data & 0x3ff;
367 gpu.screen.y2 = (data >> 10) & 0x3ff;
368 update_height();
369 break;
370 case 0x08:
371 gpu.status = (gpu.status & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
372 update_width();
373 update_height();
374 renderer_notify_res_change();
375 break;
376 default:
377 if ((cmd & 0xf0) == 0x10)
378 get_gpu_info(data);
379 break;
380 }
381
382#ifdef GPUwriteStatus_ext
383 GPUwriteStatus_ext(data);
384#endif
385}
386
387const unsigned char cmd_lengths[256] =
388{
389 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
390 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
391 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
392 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
393 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
394 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
395 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
396 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
397 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 80
398 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
399 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // a0
400 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
401 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // c0
402 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
403 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
404 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
405};
406
407#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
408
409static void cpy_msb(uint16_t *dst, const uint16_t *src, int l, uint16_t msb)
410{
411 int i;
412 for (i = 0; i < l; i++)
413 dst[i] = src[i] | msb;
414}
415
416static inline void do_vram_line(int x, int y, uint16_t *mem, int l,
417 int is_read, uint16_t msb)
418{
419 uint16_t *vram = VRAM_MEM_XY(x, y);
420 if (unlikely(is_read))
421 memcpy(mem, vram, l * 2);
422 else if (unlikely(msb))
423 cpy_msb(vram, mem, l, msb);
424 else
425 memcpy(vram, mem, l * 2);
426}
427
428static int do_vram_io(uint32_t *data, int count, int is_read)
429{
430 int count_initial = count;
431 uint16_t msb = gpu.ex_regs[6] << 15;
432 uint16_t *sdata = (uint16_t *)data;
433 int x = gpu.dma.x, y = gpu.dma.y;
434 int w = gpu.dma.w, h = gpu.dma.h;
435 int o = gpu.dma.offset;
436 int l;
437 count *= 2; // operate in 16bpp pixels
438
439 renderer_sync();
440
441 if (gpu.dma.offset) {
442 l = w - gpu.dma.offset;
443 if (count < l)
444 l = count;
445
446 do_vram_line(x + o, y, sdata, l, is_read, msb);
447
448 if (o + l < w)
449 o += l;
450 else {
451 o = 0;
452 y++;
453 h--;
454 }
455 sdata += l;
456 count -= l;
457 }
458
459 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
460 y &= 511;
461 do_vram_line(x, y, sdata, w, is_read, msb);
462 }
463
464 if (h > 0) {
465 if (count > 0) {
466 y &= 511;
467 do_vram_line(x, y, sdata, count, is_read, msb);
468 o = count;
469 count = 0;
470 }
471 }
472 else
473 finish_vram_transfer(is_read);
474 gpu.dma.y = y;
475 gpu.dma.h = h;
476 gpu.dma.offset = o;
477
478 return count_initial - count / 2;
479}
480
481static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
482{
483 if (gpu.dma.h)
484 log_anomaly("start_vram_transfer while old unfinished\n");
485
486 gpu.dma.x = pos_word & 0x3ff;
487 gpu.dma.y = (pos_word >> 16) & 0x1ff;
488 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
489 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
490 gpu.dma.offset = 0;
491 gpu.dma.is_read = is_read;
492 gpu.dma_start = gpu.dma;
493
494 renderer_flush_queues();
495 if (is_read) {
496 gpu.status |= PSX_GPU_STATUS_IMG;
497 // XXX: wrong for width 1
498 gpu.gp0 = LE32TOH(*(uint32_t *) VRAM_MEM_XY(gpu.dma.x, gpu.dma.y));
499 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
500 }
501
502 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
503 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
504 if (gpu.gpu_state_change)
505 gpu.gpu_state_change(PGS_VRAM_TRANSFER_START);
506}
507
508static void finish_vram_transfer(int is_read)
509{
510 if (is_read)
511 gpu.status &= ~PSX_GPU_STATUS_IMG;
512 else {
513 gpu.state.fb_dirty = 1;
514 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
515 gpu.dma_start.w, gpu.dma_start.h, 0);
516 }
517 if (gpu.gpu_state_change)
518 gpu.gpu_state_change(PGS_VRAM_TRANSFER_END);
519}
520
521static void do_vram_copy(const uint32_t *params, int *cpu_cycles)
522{
523 const uint32_t sx = LE32TOH(params[0]) & 0x3FF;
524 const uint32_t sy = (LE32TOH(params[0]) >> 16) & 0x1FF;
525 const uint32_t dx = LE32TOH(params[1]) & 0x3FF;
526 const uint32_t dy = (LE32TOH(params[1]) >> 16) & 0x1FF;
527 uint32_t w = ((LE32TOH(params[2]) - 1) & 0x3FF) + 1;
528 uint32_t h = (((LE32TOH(params[2]) >> 16) - 1) & 0x1FF) + 1;
529 uint16_t msb = gpu.ex_regs[6] << 15;
530 uint16_t lbuf[128];
531 uint32_t x, y;
532
533 *cpu_cycles += gput_copy(w, h);
534 if (sx == dx && sy == dy && msb == 0)
535 return;
536
537 renderer_flush_queues();
538
539 if (unlikely((sx < dx && dx < sx + w) || sx + w > 1024 || dx + w > 1024 || msb))
540 {
541 for (y = 0; y < h; y++)
542 {
543 const uint16_t *src = VRAM_MEM_XY(0, (sy + y) & 0x1ff);
544 uint16_t *dst = VRAM_MEM_XY(0, (dy + y) & 0x1ff);
545 for (x = 0; x < w; x += ARRAY_SIZE(lbuf))
546 {
547 uint32_t x1, w1 = w - x;
548 if (w1 > ARRAY_SIZE(lbuf))
549 w1 = ARRAY_SIZE(lbuf);
550 for (x1 = 0; x1 < w1; x1++)
551 lbuf[x1] = src[(sx + x + x1) & 0x3ff];
552 for (x1 = 0; x1 < w1; x1++)
553 dst[(dx + x + x1) & 0x3ff] = lbuf[x1] | msb;
554 }
555 }
556 }
557 else
558 {
559 uint32_t sy1 = sy, dy1 = dy;
560 for (y = 0; y < h; y++, sy1++, dy1++)
561 memcpy(VRAM_MEM_XY(dx, dy1 & 0x1ff), VRAM_MEM_XY(sx, sy1 & 0x1ff), w * 2);
562 }
563
564 renderer_update_caches(dx, dy, w, h, 0);
565}
566
567static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
568{
569 int cmd = 0, pos = 0, len, dummy = 0, v;
570 int skip = 1;
571
572 gpu.frameskip.pending_fill[0] = 0;
573
574 while (pos < count && skip) {
575 uint32_t *list = data + pos;
576 cmd = LE32TOH(list[0]) >> 24;
577 len = 1 + cmd_lengths[cmd];
578
579 switch (cmd) {
580 case 0x02:
581 if ((LE32TOH(list[2]) & 0x3ff) > gpu.screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu.screen.h)
582 // clearing something large, don't skip
583 do_cmd_list(list, 3, &dummy, &dummy);
584 else
585 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
586 break;
587 case 0x24 ... 0x27:
588 case 0x2c ... 0x2f:
589 case 0x34 ... 0x37:
590 case 0x3c ... 0x3f:
591 gpu.ex_regs[1] &= ~0x1ff;
592 gpu.ex_regs[1] |= LE32TOH(list[4 + ((cmd >> 4) & 1)]) & 0x1ff;
593 break;
594 case 0x48 ... 0x4F:
595 for (v = 3; pos + v < count; v++)
596 {
597 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
598 break;
599 }
600 len += v - 3;
601 break;
602 case 0x58 ... 0x5F:
603 for (v = 4; pos + v < count; v += 2)
604 {
605 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
606 break;
607 }
608 len += v - 4;
609 break;
610 default:
611 if (cmd == 0xe3)
612 skip = decide_frameskip_allow(LE32TOH(list[0]));
613 if ((cmd & 0xf8) == 0xe0)
614 gpu.ex_regs[cmd & 7] = LE32TOH(list[0]);
615 break;
616 }
617
618 if (pos + len > count) {
619 cmd = -1;
620 break; // incomplete cmd
621 }
622 if (0x80 <= cmd && cmd <= 0xdf)
623 break; // image i/o
624
625 pos += len;
626 }
627
628 renderer_sync_ecmds(gpu.ex_regs);
629 *last_cmd = cmd;
630 return pos;
631}
632
633static noinline int do_cmd_buffer(uint32_t *data, int count, int *cpu_cycles)
634{
635 int cmd, pos;
636 uint32_t old_e3 = gpu.ex_regs[3];
637 int vram_dirty = 0;
638
639 // process buffer
640 for (pos = 0; pos < count; )
641 {
642 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
643 vram_dirty = 1;
644 pos += do_vram_io(data + pos, count - pos, 0);
645 if (pos == count)
646 break;
647 }
648
649 cmd = LE32TOH(data[pos]) >> 24;
650 if (0xa0 <= cmd && cmd <= 0xdf) {
651 if (unlikely((pos+2) >= count)) {
652 // incomplete vram write/read cmd, can't consume yet
653 cmd = -1;
654 break;
655 }
656
657 // consume vram write/read cmd
658 start_vram_transfer(LE32TOH(data[pos + 1]), LE32TOH(data[pos + 2]), (cmd & 0xe0) == 0xc0);
659 pos += 3;
660 continue;
661 }
662 else if ((cmd & 0xe0) == 0x80) {
663 if (unlikely((pos+3) >= count)) {
664 cmd = -1; // incomplete cmd, can't consume yet
665 break;
666 }
667 do_vram_copy(data + pos + 1, cpu_cycles);
668 vram_dirty = 1;
669 pos += 4;
670 continue;
671 }
672 else if (cmd == 0x1f) {
673 log_anomaly("irq1?\n");
674 pos++;
675 continue;
676 }
677
678 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
679 if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0))
680 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
681 else {
682 pos += do_cmd_list(data + pos, count - pos, cpu_cycles, &cmd);
683 vram_dirty = 1;
684 }
685
686 if (cmd == -1)
687 // incomplete cmd
688 break;
689 }
690
691 gpu.status &= ~0x1fff;
692 gpu.status |= gpu.ex_regs[1] & 0x7ff;
693 gpu.status |= (gpu.ex_regs[6] & 3) << 11;
694
695 gpu.state.fb_dirty |= vram_dirty;
696
697 if (old_e3 != gpu.ex_regs[3])
698 decide_frameskip_allow(gpu.ex_regs[3]);
699
700 return count - pos;
701}
702
703static noinline void flush_cmd_buffer(void)
704{
705 int dummy = 0, left;
706 left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len, &dummy);
707 if (left > 0)
708 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
709 if (left != gpu.cmd_len) {
710 if (!gpu.dma.h && gpu.gpu_state_change)
711 gpu.gpu_state_change(PGS_PRIMITIVE_START);
712 gpu.cmd_len = left;
713 }
714}
715
716void GPUwriteDataMem(uint32_t *mem, int count)
717{
718 int dummy = 0, left;
719
720 log_io("gpu_dma_write %p %d\n", mem, count);
721
722 if (unlikely(gpu.cmd_len > 0))
723 flush_cmd_buffer();
724
725 left = do_cmd_buffer(mem, count, &dummy);
726 if (left)
727 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
728}
729
730void GPUwriteData(uint32_t data)
731{
732 log_io("gpu_write %08x\n", data);
733 gpu.cmd_buffer[gpu.cmd_len++] = HTOLE32(data);
734 if (gpu.cmd_len >= CMD_BUFFER_LEN)
735 flush_cmd_buffer();
736}
737
738long GPUdmaChain(uint32_t *rambase, uint32_t start_addr, uint32_t *progress_addr)
739{
740 uint32_t addr, *list, ld_addr = 0;
741 int len, left, count;
742 int cpu_cycles = 0;
743
744 preload(rambase + (start_addr & 0x1fffff) / 4);
745
746 if (unlikely(gpu.cmd_len > 0))
747 flush_cmd_buffer();
748
749 log_io("gpu_dma_chain\n");
750 addr = start_addr & 0xffffff;
751 for (count = 0; (addr & 0x800000) == 0; count++)
752 {
753 list = rambase + (addr & 0x1fffff) / 4;
754 len = LE32TOH(list[0]) >> 24;
755 addr = LE32TOH(list[0]) & 0xffffff;
756 preload(rambase + (addr & 0x1fffff) / 4);
757
758 cpu_cycles += 10;
759 if (len > 0)
760 cpu_cycles += 5 + len;
761
762 log_io(".chain %08lx #%d+%d %u\n",
763 (long)(list - rambase) * 4, len, gpu.cmd_len, cpu_cycles);
764 if (unlikely(gpu.cmd_len > 0)) {
765 if (gpu.cmd_len + len > ARRAY_SIZE(gpu.cmd_buffer)) {
766 log_anomaly("cmd_buffer overflow, likely garbage commands\n");
767 gpu.cmd_len = 0;
768 }
769 memcpy(gpu.cmd_buffer + gpu.cmd_len, list + 1, len * 4);
770 gpu.cmd_len += len;
771 flush_cmd_buffer();
772 continue;
773 }
774
775 if (len) {
776 left = do_cmd_buffer(list + 1, len, &cpu_cycles);
777 if (left) {
778 memcpy(gpu.cmd_buffer, list + 1 + len - left, left * 4);
779 gpu.cmd_len = left;
780 log_anomaly("GPUdmaChain: %d/%d words left\n", left, len);
781 }
782 }
783
784 if (progress_addr) {
785 *progress_addr = addr;
786 break;
787 }
788 #define LD_THRESHOLD (8*1024)
789 if (count >= LD_THRESHOLD) {
790 if (count == LD_THRESHOLD) {
791 ld_addr = addr;
792 continue;
793 }
794
795 // loop detection marker
796 // (bit23 set causes DMA error on real machine, so
797 // unlikely to be ever set by the game)
798 list[0] |= HTOLE32(0x800000);
799 }
800 }
801
802 if (ld_addr != 0) {
803 // remove loop detection markers
804 count -= LD_THRESHOLD + 2;
805 addr = ld_addr & 0x1fffff;
806 while (count-- > 0) {
807 list = rambase + addr / 4;
808 addr = LE32TOH(list[0]) & 0x1fffff;
809 list[0] &= HTOLE32(~0x800000);
810 }
811 }
812
813 gpu.state.last_list.frame = *gpu.state.frame_count;
814 gpu.state.last_list.hcnt = *gpu.state.hcnt;
815 gpu.state.last_list.cycles = cpu_cycles;
816 gpu.state.last_list.addr = start_addr;
817
818 return cpu_cycles;
819}
820
821void GPUreadDataMem(uint32_t *mem, int count)
822{
823 log_io("gpu_dma_read %p %d\n", mem, count);
824
825 if (unlikely(gpu.cmd_len > 0))
826 flush_cmd_buffer();
827
828 if (gpu.dma.h)
829 do_vram_io(mem, count, 1);
830}
831
832uint32_t GPUreadData(void)
833{
834 uint32_t ret;
835
836 if (unlikely(gpu.cmd_len > 0))
837 flush_cmd_buffer();
838
839 ret = gpu.gp0;
840 if (gpu.dma.h) {
841 ret = HTOLE32(ret);
842 do_vram_io(&ret, 1, 1);
843 ret = LE32TOH(ret);
844 }
845
846 log_io("gpu_read %08x\n", ret);
847 return ret;
848}
849
850uint32_t GPUreadStatus(void)
851{
852 uint32_t ret;
853
854 if (unlikely(gpu.cmd_len > 0))
855 flush_cmd_buffer();
856
857 ret = gpu.status;
858 log_io("gpu_read_status %08x\n", ret);
859 return ret;
860}
861
862struct GPUFreeze
863{
864 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
865 uint32_t ulStatus; // current gpu status
866 uint32_t ulControl[256]; // latest control register values
867 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
868};
869
870long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
871{
872 int i;
873
874 switch (type) {
875 case 1: // save
876 if (gpu.cmd_len > 0)
877 flush_cmd_buffer();
878
879 renderer_sync();
880 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
881 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
882 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
883 freeze->ulStatus = gpu.status;
884 break;
885 case 0: // load
886 renderer_sync();
887 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
888 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
889 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
890 gpu.status = freeze->ulStatus;
891 gpu.cmd_len = 0;
892 for (i = 8; i > 0; i--) {
893 gpu.regs[i] ^= 1; // avoid reg change detection
894 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
895 }
896 renderer_sync_ecmds(gpu.ex_regs);
897 renderer_update_caches(0, 0, 1024, 512, 0);
898 break;
899 }
900
901 return 1;
902}
903
904void GPUupdateLace(void)
905{
906 if (gpu.cmd_len > 0)
907 flush_cmd_buffer();
908 renderer_flush_queues();
909
910#ifndef RAW_FB_DISPLAY
911 if (gpu.status & PSX_GPU_STATUS_BLANKING) {
912 if (!gpu.state.blanked) {
913 vout_blank();
914 gpu.state.blanked = 1;
915 gpu.state.fb_dirty = 1;
916 }
917 return;
918 }
919
920 renderer_notify_update_lace(0);
921
922 if (!gpu.state.fb_dirty)
923 return;
924#endif
925
926 if (gpu.frameskip.set) {
927 if (!gpu.frameskip.frame_ready) {
928 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
929 return;
930 gpu.frameskip.active = 0;
931 }
932 gpu.frameskip.frame_ready = 0;
933 }
934
935 vout_update();
936 if (gpu.state.enhancement_active && !gpu.state.enhancement_was_active)
937 renderer_update_caches(0, 0, 1024, 512, 1);
938 gpu.state.enhancement_was_active = gpu.state.enhancement_active;
939 gpu.state.fb_dirty = 0;
940 gpu.state.blanked = 0;
941 renderer_notify_update_lace(1);
942}
943
944void GPUvBlank(int is_vblank, int lcf)
945{
946 int interlace = gpu.state.allow_interlace
947 && (gpu.status & PSX_GPU_STATUS_INTERLACE)
948 && (gpu.status & PSX_GPU_STATUS_DHEIGHT);
949 // interlace doesn't look nice on progressive displays,
950 // so we have this "auto" mode here for games that don't read vram
951 if (gpu.state.allow_interlace == 2
952 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
953 {
954 interlace = 0;
955 }
956 if (interlace || interlace != gpu.state.old_interlace) {
957 gpu.state.old_interlace = interlace;
958
959 if (gpu.cmd_len > 0)
960 flush_cmd_buffer();
961 renderer_flush_queues();
962 renderer_set_interlace(interlace, !lcf);
963 }
964}
965
966void GPUgetScreenInfo(int *y, int *base_hres)
967{
968 *y = gpu.screen.y;
969 *base_hres = gpu.screen.vres;
970 if (gpu.status & PSX_GPU_STATUS_DHEIGHT)
971 *base_hres >>= 1;
972}
973
974void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
975{
976 gpu.frameskip.set = cbs->frameskip;
977 gpu.frameskip.advice = &cbs->fskip_advice;
978 gpu.frameskip.force = &cbs->fskip_force;
979 gpu.frameskip.dirty = (void *)&cbs->fskip_dirty;
980 gpu.frameskip.active = 0;
981 gpu.frameskip.frame_ready = 1;
982 gpu.state.hcnt = cbs->gpu_hcnt;
983 gpu.state.frame_count = cbs->gpu_frame_count;
984 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
985 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
986 gpu.state.screen_centering_type_default = cbs->screen_centering_type_default;
987 if (gpu.state.screen_centering_type != cbs->screen_centering_type
988 || gpu.state.screen_centering_x != cbs->screen_centering_x
989 || gpu.state.screen_centering_y != cbs->screen_centering_y) {
990 gpu.state.screen_centering_type = cbs->screen_centering_type;
991 gpu.state.screen_centering_x = cbs->screen_centering_x;
992 gpu.state.screen_centering_y = cbs->screen_centering_y;
993 update_width();
994 update_height();
995 }
996
997 gpu.mmap = cbs->mmap;
998 gpu.munmap = cbs->munmap;
999 gpu.gpu_state_change = cbs->gpu_state_change;
1000
1001 // delayed vram mmap
1002 if (gpu.vram == NULL)
1003 map_vram();
1004
1005 if (cbs->pl_vout_set_raw_vram)
1006 cbs->pl_vout_set_raw_vram(gpu.vram);
1007 renderer_set_config(cbs);
1008 vout_set_config(cbs);
1009}
1010
1011// vim:shiftwidth=2:expandtab