gpu: improve timings of clipped sprites
[pcsx_rearmed.git] / plugins / gpulib / gpu.c
... / ...
CommitLineData
1/*
2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
11#include <stdio.h>
12#include <stdlib.h>
13#include <string.h>
14#include "gpu.h"
15#include "gpu_timing.h"
16#include "../../libpcsxcore/gpu.h" // meh
17#include "../../frontend/plugin_lib.h"
18
19#ifndef ARRAY_SIZE
20#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
21#endif
22#ifdef __GNUC__
23#define unlikely(x) __builtin_expect((x), 0)
24#define preload __builtin_prefetch
25#define noinline __attribute__((noinline))
26#else
27#define unlikely(x)
28#define preload(...)
29#define noinline
30#endif
31
32//#define log_io gpu_log
33#define log_io(...)
34
35struct psx_gpu gpu;
36
37static noinline int do_cmd_buffer(uint32_t *data, int count, int *cpu_cycles);
38static void finish_vram_transfer(int is_read);
39
40static noinline void do_cmd_reset(void)
41{
42 int dummy = 0;
43 if (unlikely(gpu.cmd_len > 0))
44 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len, &dummy);
45 gpu.cmd_len = 0;
46
47 if (unlikely(gpu.dma.h > 0))
48 finish_vram_transfer(gpu.dma_start.is_read);
49 gpu.dma.h = 0;
50}
51
52static noinline void do_reset(void)
53{
54 unsigned int i;
55
56 do_cmd_reset();
57
58 memset(gpu.regs, 0, sizeof(gpu.regs));
59 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
60 gpu.ex_regs[i] = (0xe0 + i) << 24;
61 gpu.status = 0x14802000;
62 gpu.gp0 = 0;
63 gpu.regs[3] = 1;
64 gpu.screen.hres = gpu.screen.w = 256;
65 gpu.screen.vres = gpu.screen.h = 240;
66 gpu.screen.x = gpu.screen.y = 0;
67 renderer_sync_ecmds(gpu.ex_regs);
68 renderer_notify_res_change();
69}
70
71static noinline void update_width(void)
72{
73 static const short hres_all[8] = { 256, 368, 320, 368, 512, 368, 640, 368 };
74 static const uint8_t hdivs[8] = { 10, 7, 8, 7, 5, 7, 4, 7 };
75 uint8_t hdiv = hdivs[(gpu.status >> 16) & 7];
76 int hres = hres_all[(gpu.status >> 16) & 7];
77 int pal = gpu.status & PSX_GPU_STATUS_PAL;
78 int sw = gpu.screen.x2 - gpu.screen.x1;
79 int type = gpu.state.screen_centering_type;
80 int x = 0, x_auto;
81 if (type == C_AUTO)
82 type = gpu.state.screen_centering_type_default;
83 if (sw <= 0)
84 /* nothing displayed? */;
85 else {
86 int s = pal ? 656 : 608; // or 600? pal is just a guess
87 x = (gpu.screen.x1 - s) / hdiv;
88 x = (x + 1) & ~1; // blitter limitation
89 sw /= hdiv;
90 sw = (sw + 2) & ~3; // according to nocash
91 switch (type) {
92 case C_INGAME:
93 break;
94 case C_MANUAL:
95 x = gpu.state.screen_centering_x;
96 break;
97 default:
98 // correct if slightly miscentered
99 x_auto = (hres - sw) / 2 & ~3;
100 if ((uint32_t)x_auto <= 8u && abs(x) < 24)
101 x = x_auto;
102 }
103 if (x + sw > hres)
104 sw = hres - x;
105 // .x range check is done in vout_update()
106 }
107 // reduce the unpleasant right border that a few games have
108 if (gpu.state.screen_centering_type == 0
109 && x <= 4 && hres - (x + sw) >= 4)
110 hres -= 4;
111 gpu.screen.x = x;
112 gpu.screen.w = sw;
113 gpu.screen.hres = hres;
114 gpu.state.dims_changed = 1;
115 //printf("xx %d %d -> %2d, %d / %d\n",
116 // gpu.screen.x1, gpu.screen.x2, x, sw, hres);
117}
118
119static noinline void update_height(void)
120{
121 int pal = gpu.status & PSX_GPU_STATUS_PAL;
122 int dheight = gpu.status & PSX_GPU_STATUS_DHEIGHT;
123 int y = gpu.screen.y1 - (pal ? 39 : 16); // 39 for spyro
124 int sh = gpu.screen.y2 - gpu.screen.y1;
125 int center_tol = 16;
126 int vres = 240;
127
128 if (pal && (sh > 240 || gpu.screen.vres == 256))
129 vres = 256;
130 if (dheight)
131 y *= 2, sh *= 2, vres *= 2, center_tol *= 2;
132 if (sh <= 0)
133 /* nothing displayed? */;
134 else {
135 switch (gpu.state.screen_centering_type) {
136 case C_INGAME:
137 break;
138 case C_BORDERLESS:
139 y = 0;
140 break;
141 case C_MANUAL:
142 y = gpu.state.screen_centering_y;
143 break;
144 default:
145 // correct if slightly miscentered
146 if ((uint32_t)(vres - sh) <= 1 && abs(y) <= center_tol)
147 y = 0;
148 }
149 if (y + sh > vres)
150 sh = vres - y;
151 }
152 gpu.screen.y = y;
153 gpu.screen.h = sh;
154 gpu.screen.vres = vres;
155 gpu.state.dims_changed = 1;
156 //printf("yy %d %d -> %d, %d / %d\n",
157 // gpu.screen.y1, gpu.screen.y2, y, sh, vres);
158}
159
160static noinline void decide_frameskip(void)
161{
162 if (gpu.frameskip.active)
163 gpu.frameskip.cnt++;
164 else {
165 gpu.frameskip.cnt = 0;
166 gpu.frameskip.frame_ready = 1;
167 }
168
169 if (!gpu.frameskip.active && *gpu.frameskip.advice)
170 gpu.frameskip.active = 1;
171 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
172 gpu.frameskip.active = 1;
173 else
174 gpu.frameskip.active = 0;
175
176 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
177 int dummy = 0;
178 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy, &dummy);
179 gpu.frameskip.pending_fill[0] = 0;
180 }
181}
182
183static noinline int decide_frameskip_allow(uint32_t cmd_e3)
184{
185 // no frameskip if it decides to draw to display area,
186 // but not for interlace since it'll most likely always do that
187 uint32_t x = cmd_e3 & 0x3ff;
188 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
189 gpu.frameskip.allow = (gpu.status & PSX_GPU_STATUS_INTERLACE) ||
190 (uint32_t)(x - gpu.screen.src_x) >= (uint32_t)gpu.screen.w ||
191 (uint32_t)(y - gpu.screen.src_y) >= (uint32_t)gpu.screen.h;
192 return gpu.frameskip.allow;
193}
194
195static void flush_cmd_buffer(void);
196
197static noinline void get_gpu_info(uint32_t data)
198{
199 if (unlikely(gpu.cmd_len > 0))
200 flush_cmd_buffer();
201 switch (data & 0x0f) {
202 case 0x02:
203 case 0x03:
204 case 0x04:
205 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
206 break;
207 case 0x05:
208 gpu.gp0 = gpu.ex_regs[5] & 0x3fffff;
209 break;
210 case 0x07:
211 gpu.gp0 = 2;
212 break;
213 default:
214 // gpu.gp0 unchanged
215 break;
216 }
217}
218
219// double, for overdraw guard
220#define VRAM_SIZE (1024 * 512 * 2 * 2)
221
222static int map_vram(void)
223{
224 gpu.vram = gpu.mmap(VRAM_SIZE);
225 if (gpu.vram != NULL) {
226 gpu.vram += 4096 / 2;
227 return 0;
228 }
229 else {
230 fprintf(stderr, "could not map vram, expect crashes\n");
231 return -1;
232 }
233}
234
235long GPUinit(void)
236{
237 int ret;
238 ret = vout_init();
239 ret |= renderer_init();
240
241 memset(&gpu.state, 0, sizeof(gpu.state));
242 memset(&gpu.frameskip, 0, sizeof(gpu.frameskip));
243 gpu.zero = 0;
244 gpu.state.frame_count = &gpu.zero;
245 gpu.state.hcnt = &gpu.zero;
246 gpu.cmd_len = 0;
247 do_reset();
248
249 if (gpu.mmap != NULL) {
250 if (map_vram() != 0)
251 ret = -1;
252 }
253 return ret;
254}
255
256long GPUshutdown(void)
257{
258 long ret;
259
260 renderer_finish();
261 ret = vout_finish();
262 if (gpu.vram != NULL) {
263 gpu.vram -= 4096 / 2;
264 gpu.munmap(gpu.vram, VRAM_SIZE);
265 }
266 gpu.vram = NULL;
267
268 return ret;
269}
270
271void GPUwriteStatus(uint32_t data)
272{
273 uint32_t cmd = data >> 24;
274 int src_x, src_y;
275
276 if (cmd < ARRAY_SIZE(gpu.regs)) {
277 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
278 return;
279 gpu.regs[cmd] = data;
280 }
281
282 gpu.state.fb_dirty = 1;
283
284 switch (cmd) {
285 case 0x00:
286 do_reset();
287 break;
288 case 0x01:
289 do_cmd_reset();
290 break;
291 case 0x03:
292 if (data & 1) {
293 gpu.status |= PSX_GPU_STATUS_BLANKING;
294 gpu.state.dims_changed = 1; // for hud clearing
295 }
296 else
297 gpu.status &= ~PSX_GPU_STATUS_BLANKING;
298 break;
299 case 0x04:
300 gpu.status &= ~PSX_GPU_STATUS_DMA_MASK;
301 gpu.status |= PSX_GPU_STATUS_DMA(data & 3);
302 break;
303 case 0x05:
304 src_x = data & 0x3ff; src_y = (data >> 10) & 0x1ff;
305 if (src_x != gpu.screen.src_x || src_y != gpu.screen.src_y) {
306 gpu.screen.src_x = src_x;
307 gpu.screen.src_y = src_y;
308 renderer_notify_scanout_change(src_x, src_y);
309 if (gpu.frameskip.set) {
310 decide_frameskip_allow(gpu.ex_regs[3]);
311 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
312 decide_frameskip();
313 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
314 }
315 }
316 }
317 break;
318 case 0x06:
319 gpu.screen.x1 = data & 0xfff;
320 gpu.screen.x2 = (data >> 12) & 0xfff;
321 update_width();
322 break;
323 case 0x07:
324 gpu.screen.y1 = data & 0x3ff;
325 gpu.screen.y2 = (data >> 10) & 0x3ff;
326 update_height();
327 break;
328 case 0x08:
329 gpu.status = (gpu.status & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
330 update_width();
331 update_height();
332 renderer_notify_res_change();
333 break;
334 default:
335 if ((cmd & 0xf0) == 0x10)
336 get_gpu_info(data);
337 break;
338 }
339
340#ifdef GPUwriteStatus_ext
341 GPUwriteStatus_ext(data);
342#endif
343}
344
345const unsigned char cmd_lengths[256] =
346{
347 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
348 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
349 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
350 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
351 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
352 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
353 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
354 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
355 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 80
356 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
357 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // a0
358 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
359 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // c0
360 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
361 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
362 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
363};
364
365#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
366
367static void cpy_msb(uint16_t *dst, const uint16_t *src, int l, uint16_t msb)
368{
369 int i;
370 for (i = 0; i < l; i++)
371 dst[i] = src[i] | msb;
372}
373
374static inline void do_vram_line(int x, int y, uint16_t *mem, int l,
375 int is_read, uint16_t msb)
376{
377 uint16_t *vram = VRAM_MEM_XY(x, y);
378 if (unlikely(is_read))
379 memcpy(mem, vram, l * 2);
380 else if (unlikely(msb))
381 cpy_msb(vram, mem, l, msb);
382 else
383 memcpy(vram, mem, l * 2);
384}
385
386static int do_vram_io(uint32_t *data, int count, int is_read)
387{
388 int count_initial = count;
389 uint16_t msb = gpu.ex_regs[6] << 15;
390 uint16_t *sdata = (uint16_t *)data;
391 int x = gpu.dma.x, y = gpu.dma.y;
392 int w = gpu.dma.w, h = gpu.dma.h;
393 int o = gpu.dma.offset;
394 int l;
395 count *= 2; // operate in 16bpp pixels
396
397 if (gpu.dma.offset) {
398 l = w - gpu.dma.offset;
399 if (count < l)
400 l = count;
401
402 do_vram_line(x + o, y, sdata, l, is_read, msb);
403
404 if (o + l < w)
405 o += l;
406 else {
407 o = 0;
408 y++;
409 h--;
410 }
411 sdata += l;
412 count -= l;
413 }
414
415 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
416 y &= 511;
417 do_vram_line(x, y, sdata, w, is_read, msb);
418 }
419
420 if (h > 0) {
421 if (count > 0) {
422 y &= 511;
423 do_vram_line(x, y, sdata, count, is_read, msb);
424 o = count;
425 count = 0;
426 }
427 }
428 else
429 finish_vram_transfer(is_read);
430 gpu.dma.y = y;
431 gpu.dma.h = h;
432 gpu.dma.offset = o;
433
434 return count_initial - count / 2;
435}
436
437static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
438{
439 if (gpu.dma.h)
440 log_anomaly("start_vram_transfer while old unfinished\n");
441
442 gpu.dma.x = pos_word & 0x3ff;
443 gpu.dma.y = (pos_word >> 16) & 0x1ff;
444 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
445 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
446 gpu.dma.offset = 0;
447 gpu.dma.is_read = is_read;
448 gpu.dma_start = gpu.dma;
449
450 renderer_flush_queues();
451 if (is_read) {
452 gpu.status |= PSX_GPU_STATUS_IMG;
453 // XXX: wrong for width 1
454 gpu.gp0 = LE32TOH(*(uint32_t *) VRAM_MEM_XY(gpu.dma.x, gpu.dma.y));
455 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
456 }
457
458 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
459 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
460 if (gpu.gpu_state_change)
461 gpu.gpu_state_change(PGS_VRAM_TRANSFER_START);
462}
463
464static void finish_vram_transfer(int is_read)
465{
466 if (is_read)
467 gpu.status &= ~PSX_GPU_STATUS_IMG;
468 else {
469 gpu.state.fb_dirty = 1;
470 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
471 gpu.dma_start.w, gpu.dma_start.h, 0);
472 }
473 if (gpu.gpu_state_change)
474 gpu.gpu_state_change(PGS_VRAM_TRANSFER_END);
475}
476
477static void do_vram_copy(const uint32_t *params, int *cpu_cycles)
478{
479 const uint32_t sx = LE32TOH(params[0]) & 0x3FF;
480 const uint32_t sy = (LE32TOH(params[0]) >> 16) & 0x1FF;
481 const uint32_t dx = LE32TOH(params[1]) & 0x3FF;
482 const uint32_t dy = (LE32TOH(params[1]) >> 16) & 0x1FF;
483 uint32_t w = ((LE32TOH(params[2]) - 1) & 0x3FF) + 1;
484 uint32_t h = (((LE32TOH(params[2]) >> 16) - 1) & 0x1FF) + 1;
485 uint16_t msb = gpu.ex_regs[6] << 15;
486 uint16_t lbuf[128];
487 uint32_t x, y;
488
489 *cpu_cycles += gput_copy(w, h);
490 if (sx == dx && sy == dy && msb == 0)
491 return;
492
493 renderer_flush_queues();
494
495 if (unlikely((sx < dx && dx < sx + w) || sx + w > 1024 || dx + w > 1024 || msb))
496 {
497 for (y = 0; y < h; y++)
498 {
499 const uint16_t *src = VRAM_MEM_XY(0, (sy + y) & 0x1ff);
500 uint16_t *dst = VRAM_MEM_XY(0, (dy + y) & 0x1ff);
501 for (x = 0; x < w; x += ARRAY_SIZE(lbuf))
502 {
503 uint32_t x1, w1 = w - x;
504 if (w1 > ARRAY_SIZE(lbuf))
505 w1 = ARRAY_SIZE(lbuf);
506 for (x1 = 0; x1 < w1; x1++)
507 lbuf[x1] = src[(sx + x + x1) & 0x3ff];
508 for (x1 = 0; x1 < w1; x1++)
509 dst[(dx + x + x1) & 0x3ff] = lbuf[x1] | msb;
510 }
511 }
512 }
513 else
514 {
515 uint32_t sy1 = sy, dy1 = dy;
516 for (y = 0; y < h; y++, sy1++, dy1++)
517 memcpy(VRAM_MEM_XY(dx, dy1 & 0x1ff), VRAM_MEM_XY(sx, sy1 & 0x1ff), w * 2);
518 }
519
520 renderer_update_caches(dx, dy, w, h, 0);
521}
522
523static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
524{
525 int cmd = 0, pos = 0, len, dummy = 0, v;
526 int skip = 1;
527
528 gpu.frameskip.pending_fill[0] = 0;
529
530 while (pos < count && skip) {
531 uint32_t *list = data + pos;
532 cmd = LE32TOH(list[0]) >> 24;
533 len = 1 + cmd_lengths[cmd];
534
535 switch (cmd) {
536 case 0x02:
537 if ((LE32TOH(list[2]) & 0x3ff) > gpu.screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu.screen.h)
538 // clearing something large, don't skip
539 do_cmd_list(list, 3, &dummy, &dummy);
540 else
541 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
542 break;
543 case 0x24 ... 0x27:
544 case 0x2c ... 0x2f:
545 case 0x34 ... 0x37:
546 case 0x3c ... 0x3f:
547 gpu.ex_regs[1] &= ~0x1ff;
548 gpu.ex_regs[1] |= LE32TOH(list[4 + ((cmd >> 4) & 1)]) & 0x1ff;
549 break;
550 case 0x48 ... 0x4F:
551 for (v = 3; pos + v < count; v++)
552 {
553 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
554 break;
555 }
556 len += v - 3;
557 break;
558 case 0x58 ... 0x5F:
559 for (v = 4; pos + v < count; v += 2)
560 {
561 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
562 break;
563 }
564 len += v - 4;
565 break;
566 default:
567 if (cmd == 0xe3)
568 skip = decide_frameskip_allow(LE32TOH(list[0]));
569 if ((cmd & 0xf8) == 0xe0)
570 gpu.ex_regs[cmd & 7] = LE32TOH(list[0]);
571 break;
572 }
573
574 if (pos + len > count) {
575 cmd = -1;
576 break; // incomplete cmd
577 }
578 if (0x80 <= cmd && cmd <= 0xdf)
579 break; // image i/o
580
581 pos += len;
582 }
583
584 renderer_sync_ecmds(gpu.ex_regs);
585 *last_cmd = cmd;
586 return pos;
587}
588
589static noinline int do_cmd_buffer(uint32_t *data, int count, int *cpu_cycles)
590{
591 int cmd, pos;
592 uint32_t old_e3 = gpu.ex_regs[3];
593 int vram_dirty = 0;
594
595 // process buffer
596 for (pos = 0; pos < count; )
597 {
598 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
599 vram_dirty = 1;
600 pos += do_vram_io(data + pos, count - pos, 0);
601 if (pos == count)
602 break;
603 }
604
605 cmd = LE32TOH(data[pos]) >> 24;
606 if (0xa0 <= cmd && cmd <= 0xdf) {
607 if (unlikely((pos+2) >= count)) {
608 // incomplete vram write/read cmd, can't consume yet
609 cmd = -1;
610 break;
611 }
612
613 // consume vram write/read cmd
614 start_vram_transfer(LE32TOH(data[pos + 1]), LE32TOH(data[pos + 2]), (cmd & 0xe0) == 0xc0);
615 pos += 3;
616 continue;
617 }
618 else if ((cmd & 0xe0) == 0x80) {
619 if (unlikely((pos+3) >= count)) {
620 cmd = -1; // incomplete cmd, can't consume yet
621 break;
622 }
623 do_vram_copy(data + pos + 1, cpu_cycles);
624 vram_dirty = 1;
625 pos += 4;
626 continue;
627 }
628 else if (cmd == 0x1f) {
629 log_anomaly("irq1?\n");
630 pos++;
631 continue;
632 }
633
634 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
635 if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0))
636 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
637 else {
638 pos += do_cmd_list(data + pos, count - pos, cpu_cycles, &cmd);
639 vram_dirty = 1;
640 }
641
642 if (cmd == -1)
643 // incomplete cmd
644 break;
645 }
646
647 gpu.status &= ~0x1fff;
648 gpu.status |= gpu.ex_regs[1] & 0x7ff;
649 gpu.status |= (gpu.ex_regs[6] & 3) << 11;
650
651 gpu.state.fb_dirty |= vram_dirty;
652
653 if (old_e3 != gpu.ex_regs[3])
654 decide_frameskip_allow(gpu.ex_regs[3]);
655
656 return count - pos;
657}
658
659static noinline void flush_cmd_buffer(void)
660{
661 int dummy = 0, left;
662 left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len, &dummy);
663 if (left > 0)
664 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
665 if (left != gpu.cmd_len) {
666 if (!gpu.dma.h && gpu.gpu_state_change)
667 gpu.gpu_state_change(PGS_PRIMITIVE_START);
668 gpu.cmd_len = left;
669 }
670}
671
672void GPUwriteDataMem(uint32_t *mem, int count)
673{
674 int dummy = 0, left;
675
676 log_io("gpu_dma_write %p %d\n", mem, count);
677
678 if (unlikely(gpu.cmd_len > 0))
679 flush_cmd_buffer();
680
681 left = do_cmd_buffer(mem, count, &dummy);
682 if (left)
683 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
684}
685
686void GPUwriteData(uint32_t data)
687{
688 log_io("gpu_write %08x\n", data);
689 gpu.cmd_buffer[gpu.cmd_len++] = HTOLE32(data);
690 if (gpu.cmd_len >= CMD_BUFFER_LEN)
691 flush_cmd_buffer();
692}
693
694long GPUdmaChain(uint32_t *rambase, uint32_t start_addr, uint32_t *progress_addr)
695{
696 uint32_t addr, *list, ld_addr = 0;
697 int len, left, count;
698 int cpu_cycles = 0;
699
700 preload(rambase + (start_addr & 0x1fffff) / 4);
701
702 if (unlikely(gpu.cmd_len > 0))
703 flush_cmd_buffer();
704
705 log_io("gpu_dma_chain\n");
706 addr = start_addr & 0xffffff;
707 for (count = 0; (addr & 0x800000) == 0; count++)
708 {
709 list = rambase + (addr & 0x1fffff) / 4;
710 len = LE32TOH(list[0]) >> 24;
711 addr = LE32TOH(list[0]) & 0xffffff;
712 preload(rambase + (addr & 0x1fffff) / 4);
713
714 cpu_cycles += 10;
715 if (len > 0)
716 cpu_cycles += 5 + len;
717
718 log_io(".chain %08lx #%d+%d %u\n",
719 (long)(list - rambase) * 4, len, gpu.cmd_len, cpu_cycles);
720 if (unlikely(gpu.cmd_len > 0)) {
721 if (gpu.cmd_len + len > ARRAY_SIZE(gpu.cmd_buffer)) {
722 log_anomaly("cmd_buffer overflow, likely garbage commands\n");
723 gpu.cmd_len = 0;
724 }
725 memcpy(gpu.cmd_buffer + gpu.cmd_len, list + 1, len * 4);
726 gpu.cmd_len += len;
727 flush_cmd_buffer();
728 continue;
729 }
730
731 if (len) {
732 left = do_cmd_buffer(list + 1, len, &cpu_cycles);
733 if (left) {
734 memcpy(gpu.cmd_buffer, list + 1 + len - left, left * 4);
735 gpu.cmd_len = left;
736 log_anomaly("GPUdmaChain: %d/%d words left\n", left, len);
737 }
738 }
739
740 if (progress_addr) {
741 *progress_addr = addr;
742 break;
743 }
744 #define LD_THRESHOLD (8*1024)
745 if (count >= LD_THRESHOLD) {
746 if (count == LD_THRESHOLD) {
747 ld_addr = addr;
748 continue;
749 }
750
751 // loop detection marker
752 // (bit23 set causes DMA error on real machine, so
753 // unlikely to be ever set by the game)
754 list[0] |= HTOLE32(0x800000);
755 }
756 }
757
758 if (ld_addr != 0) {
759 // remove loop detection markers
760 count -= LD_THRESHOLD + 2;
761 addr = ld_addr & 0x1fffff;
762 while (count-- > 0) {
763 list = rambase + addr / 4;
764 addr = LE32TOH(list[0]) & 0x1fffff;
765 list[0] &= HTOLE32(~0x800000);
766 }
767 }
768
769 gpu.state.last_list.frame = *gpu.state.frame_count;
770 gpu.state.last_list.hcnt = *gpu.state.hcnt;
771 gpu.state.last_list.cycles = cpu_cycles;
772 gpu.state.last_list.addr = start_addr;
773
774 return cpu_cycles;
775}
776
777void GPUreadDataMem(uint32_t *mem, int count)
778{
779 log_io("gpu_dma_read %p %d\n", mem, count);
780
781 if (unlikely(gpu.cmd_len > 0))
782 flush_cmd_buffer();
783
784 if (gpu.dma.h)
785 do_vram_io(mem, count, 1);
786}
787
788uint32_t GPUreadData(void)
789{
790 uint32_t ret;
791
792 if (unlikely(gpu.cmd_len > 0))
793 flush_cmd_buffer();
794
795 ret = gpu.gp0;
796 if (gpu.dma.h) {
797 ret = HTOLE32(ret);
798 do_vram_io(&ret, 1, 1);
799 ret = LE32TOH(ret);
800 }
801
802 log_io("gpu_read %08x\n", ret);
803 return ret;
804}
805
806uint32_t GPUreadStatus(void)
807{
808 uint32_t ret;
809
810 if (unlikely(gpu.cmd_len > 0))
811 flush_cmd_buffer();
812
813 ret = gpu.status;
814 log_io("gpu_read_status %08x\n", ret);
815 return ret;
816}
817
818struct GPUFreeze
819{
820 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
821 uint32_t ulStatus; // current gpu status
822 uint32_t ulControl[256]; // latest control register values
823 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
824};
825
826long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
827{
828 int i;
829
830 switch (type) {
831 case 1: // save
832 if (gpu.cmd_len > 0)
833 flush_cmd_buffer();
834 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
835 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
836 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
837 freeze->ulStatus = gpu.status;
838 break;
839 case 0: // load
840 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
841 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
842 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
843 gpu.status = freeze->ulStatus;
844 gpu.cmd_len = 0;
845 for (i = 8; i > 0; i--) {
846 gpu.regs[i] ^= 1; // avoid reg change detection
847 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
848 }
849 renderer_sync_ecmds(gpu.ex_regs);
850 renderer_update_caches(0, 0, 1024, 512, 0);
851 break;
852 }
853
854 return 1;
855}
856
857void GPUupdateLace(void)
858{
859 if (gpu.cmd_len > 0)
860 flush_cmd_buffer();
861 renderer_flush_queues();
862
863#ifndef RAW_FB_DISPLAY
864 if (gpu.status & PSX_GPU_STATUS_BLANKING) {
865 if (!gpu.state.blanked) {
866 vout_blank();
867 gpu.state.blanked = 1;
868 gpu.state.fb_dirty = 1;
869 }
870 return;
871 }
872
873 if (!gpu.state.fb_dirty)
874 return;
875#endif
876
877 if (gpu.frameskip.set) {
878 if (!gpu.frameskip.frame_ready) {
879 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
880 return;
881 gpu.frameskip.active = 0;
882 }
883 gpu.frameskip.frame_ready = 0;
884 }
885
886 vout_update();
887 if (gpu.state.enhancement_active && !gpu.state.enhancement_was_active)
888 renderer_update_caches(0, 0, 1024, 512, 1);
889 gpu.state.enhancement_was_active = gpu.state.enhancement_active;
890 gpu.state.fb_dirty = 0;
891 gpu.state.blanked = 0;
892}
893
894void GPUvBlank(int is_vblank, int lcf)
895{
896 int interlace = gpu.state.allow_interlace
897 && (gpu.status & PSX_GPU_STATUS_INTERLACE)
898 && (gpu.status & PSX_GPU_STATUS_DHEIGHT);
899 // interlace doesn't look nice on progressive displays,
900 // so we have this "auto" mode here for games that don't read vram
901 if (gpu.state.allow_interlace == 2
902 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
903 {
904 interlace = 0;
905 }
906 if (interlace || interlace != gpu.state.old_interlace) {
907 gpu.state.old_interlace = interlace;
908
909 if (gpu.cmd_len > 0)
910 flush_cmd_buffer();
911 renderer_flush_queues();
912 renderer_set_interlace(interlace, !lcf);
913 }
914}
915
916void GPUgetScreenInfo(int *y, int *base_hres)
917{
918 *y = gpu.screen.y;
919 *base_hres = gpu.screen.vres;
920 if (gpu.status & PSX_GPU_STATUS_DHEIGHT)
921 *base_hres >>= 1;
922}
923
924void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
925{
926 gpu.frameskip.set = cbs->frameskip;
927 gpu.frameskip.advice = &cbs->fskip_advice;
928 gpu.frameskip.active = 0;
929 gpu.frameskip.frame_ready = 1;
930 gpu.state.hcnt = cbs->gpu_hcnt;
931 gpu.state.frame_count = cbs->gpu_frame_count;
932 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
933 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
934 gpu.state.screen_centering_type_default = cbs->screen_centering_type_default;
935 if (gpu.state.screen_centering_type != cbs->screen_centering_type
936 || gpu.state.screen_centering_x != cbs->screen_centering_x
937 || gpu.state.screen_centering_y != cbs->screen_centering_y) {
938 gpu.state.screen_centering_type = cbs->screen_centering_type;
939 gpu.state.screen_centering_x = cbs->screen_centering_x;
940 gpu.state.screen_centering_y = cbs->screen_centering_y;
941 update_width();
942 update_height();
943 }
944
945 gpu.mmap = cbs->mmap;
946 gpu.munmap = cbs->munmap;
947 gpu.gpu_state_change = cbs->gpu_state_change;
948
949 // delayed vram mmap
950 if (gpu.vram == NULL)
951 map_vram();
952
953 if (cbs->pl_vout_set_raw_vram)
954 cbs->pl_vout_set_raw_vram(gpu.vram);
955 renderer_set_config(cbs);
956 vout_set_config(cbs);
957}
958
959// vim:shiftwidth=2:expandtab