gpu_neon: rework buffering to reduce flickering
[pcsx_rearmed.git] / plugins / gpulib / gpu.c
... / ...
CommitLineData
1/*
2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
11#include <stdio.h>
12#include <stdlib.h>
13#include <string.h>
14#include "gpu.h"
15#include "../../libpcsxcore/gpu.h" // meh
16
17#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
18#ifdef __GNUC__
19#define unlikely(x) __builtin_expect((x), 0)
20#define preload __builtin_prefetch
21#define noinline __attribute__((noinline))
22#else
23#define unlikely(x)
24#define preload(...)
25#define noinline
26#endif
27
28//#define log_io gpu_log
29#define log_io(...)
30
31struct psx_gpu gpu;
32
33static noinline int do_cmd_buffer(uint32_t *data, int count);
34static void finish_vram_transfer(int is_read);
35
36static noinline void do_cmd_reset(void)
37{
38 if (unlikely(gpu.cmd_len > 0))
39 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
40 gpu.cmd_len = 0;
41
42 if (unlikely(gpu.dma.h > 0))
43 finish_vram_transfer(gpu.dma_start.is_read);
44 gpu.dma.h = 0;
45}
46
47static noinline void do_reset(void)
48{
49 unsigned int i;
50
51 do_cmd_reset();
52
53 memset(gpu.regs, 0, sizeof(gpu.regs));
54 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
55 gpu.ex_regs[i] = (0xe0 + i) << 24;
56 gpu.status = 0x14802000;
57 gpu.gp0 = 0;
58 gpu.regs[3] = 1;
59 gpu.screen.hres = gpu.screen.w = 256;
60 gpu.screen.vres = gpu.screen.h = 240;
61 gpu.screen.x = gpu.screen.y = 0;
62 renderer_sync_ecmds(gpu.ex_regs);
63 renderer_notify_res_change();
64}
65
66static noinline void update_width(void)
67{
68 static const short hres_all[8] = { 256, 368, 320, 368, 512, 368, 640, 368 };
69 static const uint8_t hdivs[8] = { 10, 7, 8, 7, 5, 7, 4, 7 };
70 uint8_t hdiv = hdivs[(gpu.status >> 16) & 7];
71 int hres = hres_all[(gpu.status >> 16) & 7];
72 int pal = gpu.status & PSX_GPU_STATUS_PAL;
73 int sw = gpu.screen.x2 - gpu.screen.x1;
74 int x = 0, x_auto;
75 if (sw <= 0)
76 /* nothing displayed? */;
77 else {
78 int s = pal ? 656 : 608; // or 600? pal is just a guess
79 x = (gpu.screen.x1 - s) / hdiv;
80 x = (x + 1) & ~1; // blitter limitation
81 sw /= hdiv;
82 sw = (sw + 2) & ~3; // according to nocash
83 switch (gpu.state.screen_centering_type) {
84 case 1:
85 break;
86 case 2:
87 x = gpu.state.screen_centering_x;
88 break;
89 default:
90 // correct if slightly miscentered
91 x_auto = (hres - sw) / 2 & ~3;
92 if ((uint32_t)x_auto <= 8u && abs(x) < 24)
93 x = x_auto;
94 }
95 if (x + sw > hres)
96 sw = hres - x;
97 // .x range check is done in vout_update()
98 }
99 // reduce the unpleasant right border that a few games have
100 if (gpu.state.screen_centering_type == 0
101 && x <= 4 && hres - (x + sw) >= 4)
102 hres -= 4;
103 gpu.screen.x = x;
104 gpu.screen.w = sw;
105 gpu.screen.hres = hres;
106 gpu.state.dims_changed = 1;
107 //printf("xx %d %d -> %2d, %d / %d\n",
108 // gpu.screen.x1, gpu.screen.x2, x, sw, hres);
109}
110
111static noinline void update_height(void)
112{
113 int pal = gpu.status & PSX_GPU_STATUS_PAL;
114 int dheight = gpu.status & PSX_GPU_STATUS_DHEIGHT;
115 int y = gpu.screen.y1 - (pal ? 39 : 16); // 39 for spyro
116 int sh = gpu.screen.y2 - gpu.screen.y1;
117 int center_tol = 16;
118 int vres = 240;
119
120 if (pal && (sh > 240 || gpu.screen.vres == 256))
121 vres = 256;
122 if (dheight)
123 y *= 2, sh *= 2, vres *= 2, center_tol *= 2;
124 if (sh <= 0)
125 /* nothing displayed? */;
126 else {
127 switch (gpu.state.screen_centering_type) {
128 case 1:
129 break;
130 case 2:
131 y = gpu.state.screen_centering_y;
132 break;
133 default:
134 // correct if slightly miscentered
135 if ((uint32_t)(vres - sh) <= 1 && abs(y) <= center_tol)
136 y = 0;
137 }
138 if (y + sh > vres)
139 sh = vres - y;
140 }
141 gpu.screen.y = y;
142 gpu.screen.h = sh;
143 gpu.screen.vres = vres;
144 gpu.state.dims_changed = 1;
145 //printf("yy %d %d -> %d, %d / %d\n",
146 // gpu.screen.y1, gpu.screen.y2, y, sh, vres);
147}
148
149static noinline void decide_frameskip(void)
150{
151 if (gpu.frameskip.active)
152 gpu.frameskip.cnt++;
153 else {
154 gpu.frameskip.cnt = 0;
155 gpu.frameskip.frame_ready = 1;
156 }
157
158 if (!gpu.frameskip.active && *gpu.frameskip.advice)
159 gpu.frameskip.active = 1;
160 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
161 gpu.frameskip.active = 1;
162 else
163 gpu.frameskip.active = 0;
164
165 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
166 int dummy;
167 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy);
168 gpu.frameskip.pending_fill[0] = 0;
169 }
170}
171
172static noinline int decide_frameskip_allow(uint32_t cmd_e3)
173{
174 // no frameskip if it decides to draw to display area,
175 // but not for interlace since it'll most likely always do that
176 uint32_t x = cmd_e3 & 0x3ff;
177 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
178 gpu.frameskip.allow = (gpu.status & PSX_GPU_STATUS_INTERLACE) ||
179 (uint32_t)(x - gpu.screen.src_x) >= (uint32_t)gpu.screen.w ||
180 (uint32_t)(y - gpu.screen.src_y) >= (uint32_t)gpu.screen.h;
181 return gpu.frameskip.allow;
182}
183
184static void flush_cmd_buffer(void);
185
186static noinline void get_gpu_info(uint32_t data)
187{
188 if (unlikely(gpu.cmd_len > 0))
189 flush_cmd_buffer();
190 switch (data & 0x0f) {
191 case 0x02:
192 case 0x03:
193 case 0x04:
194 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
195 break;
196 case 0x05:
197 gpu.gp0 = gpu.ex_regs[5] & 0x3fffff;
198 break;
199 case 0x07:
200 gpu.gp0 = 2;
201 break;
202 default:
203 // gpu.gp0 unchanged
204 break;
205 }
206}
207
208// double, for overdraw guard
209#define VRAM_SIZE (1024 * 512 * 2 * 2)
210
211static int map_vram(void)
212{
213 gpu.vram = gpu.mmap(VRAM_SIZE);
214 if (gpu.vram != NULL) {
215 gpu.vram += 4096 / 2;
216 return 0;
217 }
218 else {
219 fprintf(stderr, "could not map vram, expect crashes\n");
220 return -1;
221 }
222}
223
224long GPUinit(void)
225{
226 int ret;
227 ret = vout_init();
228 ret |= renderer_init();
229
230 memset(&gpu.state, 0, sizeof(gpu.state));
231 memset(&gpu.frameskip, 0, sizeof(gpu.frameskip));
232 gpu.zero = 0;
233 gpu.state.frame_count = &gpu.zero;
234 gpu.state.hcnt = &gpu.zero;
235 gpu.cmd_len = 0;
236 do_reset();
237
238 if (gpu.mmap != NULL) {
239 if (map_vram() != 0)
240 ret = -1;
241 }
242 return ret;
243}
244
245long GPUshutdown(void)
246{
247 long ret;
248
249 renderer_finish();
250 ret = vout_finish();
251 if (gpu.vram != NULL) {
252 gpu.vram -= 4096 / 2;
253 gpu.munmap(gpu.vram, VRAM_SIZE);
254 }
255 gpu.vram = NULL;
256
257 return ret;
258}
259
260void GPUwriteStatus(uint32_t data)
261{
262 uint32_t cmd = data >> 24;
263 int src_x, src_y;
264
265 if (cmd < ARRAY_SIZE(gpu.regs)) {
266 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
267 return;
268 gpu.regs[cmd] = data;
269 }
270
271 gpu.state.fb_dirty = 1;
272
273 switch (cmd) {
274 case 0x00:
275 do_reset();
276 break;
277 case 0x01:
278 do_cmd_reset();
279 break;
280 case 0x03:
281 if (data & 1) {
282 gpu.status |= PSX_GPU_STATUS_BLANKING;
283 gpu.state.dims_changed = 1; // for hud clearing
284 }
285 else
286 gpu.status &= ~PSX_GPU_STATUS_BLANKING;
287 break;
288 case 0x04:
289 gpu.status &= ~PSX_GPU_STATUS_DMA_MASK;
290 gpu.status |= PSX_GPU_STATUS_DMA(data & 3);
291 break;
292 case 0x05:
293 src_x = data & 0x3ff; src_y = (data >> 10) & 0x1ff;
294 if (src_x != gpu.screen.src_x || src_y != gpu.screen.src_y) {
295 gpu.screen.src_x = src_x;
296 gpu.screen.src_y = src_y;
297 renderer_notify_scanout_change(src_x, src_y);
298 if (gpu.frameskip.set) {
299 decide_frameskip_allow(gpu.ex_regs[3]);
300 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
301 decide_frameskip();
302 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
303 }
304 }
305 }
306 break;
307 case 0x06:
308 gpu.screen.x1 = data & 0xfff;
309 gpu.screen.x2 = (data >> 12) & 0xfff;
310 update_width();
311 break;
312 case 0x07:
313 gpu.screen.y1 = data & 0x3ff;
314 gpu.screen.y2 = (data >> 10) & 0x3ff;
315 update_height();
316 break;
317 case 0x08:
318 gpu.status = (gpu.status & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
319 update_width();
320 update_height();
321 renderer_notify_res_change();
322 break;
323 default:
324 if ((cmd & 0xf0) == 0x10)
325 get_gpu_info(data);
326 break;
327 }
328
329#ifdef GPUwriteStatus_ext
330 GPUwriteStatus_ext(data);
331#endif
332}
333
334const unsigned char cmd_lengths[256] =
335{
336 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
337 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
338 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
339 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
340 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
341 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
342 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
343 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
344 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 80
345 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
346 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // a0
347 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
348 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // c0
349 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
350 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
351 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
352};
353
354#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
355
356static void cpy_msb(uint16_t *dst, const uint16_t *src, int l, uint16_t msb)
357{
358 int i;
359 for (i = 0; i < l; i++)
360 dst[i] = src[i] | msb;
361}
362
363static inline void do_vram_line(int x, int y, uint16_t *mem, int l,
364 int is_read, uint16_t msb)
365{
366 uint16_t *vram = VRAM_MEM_XY(x, y);
367 if (unlikely(is_read))
368 memcpy(mem, vram, l * 2);
369 else if (unlikely(msb))
370 cpy_msb(vram, mem, l, msb);
371 else
372 memcpy(vram, mem, l * 2);
373}
374
375static int do_vram_io(uint32_t *data, int count, int is_read)
376{
377 int count_initial = count;
378 uint16_t msb = gpu.ex_regs[6] << 15;
379 uint16_t *sdata = (uint16_t *)data;
380 int x = gpu.dma.x, y = gpu.dma.y;
381 int w = gpu.dma.w, h = gpu.dma.h;
382 int o = gpu.dma.offset;
383 int l;
384 count *= 2; // operate in 16bpp pixels
385
386 if (gpu.dma.offset) {
387 l = w - gpu.dma.offset;
388 if (count < l)
389 l = count;
390
391 do_vram_line(x + o, y, sdata, l, is_read, msb);
392
393 if (o + l < w)
394 o += l;
395 else {
396 o = 0;
397 y++;
398 h--;
399 }
400 sdata += l;
401 count -= l;
402 }
403
404 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
405 y &= 511;
406 do_vram_line(x, y, sdata, w, is_read, msb);
407 }
408
409 if (h > 0) {
410 if (count > 0) {
411 y &= 511;
412 do_vram_line(x, y, sdata, count, is_read, msb);
413 o = count;
414 count = 0;
415 }
416 }
417 else
418 finish_vram_transfer(is_read);
419 gpu.dma.y = y;
420 gpu.dma.h = h;
421 gpu.dma.offset = o;
422
423 return count_initial - count / 2;
424}
425
426static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
427{
428 if (gpu.dma.h)
429 log_anomaly("start_vram_transfer while old unfinished\n");
430
431 gpu.dma.x = pos_word & 0x3ff;
432 gpu.dma.y = (pos_word >> 16) & 0x1ff;
433 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
434 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
435 gpu.dma.offset = 0;
436 gpu.dma.is_read = is_read;
437 gpu.dma_start = gpu.dma;
438
439 renderer_flush_queues();
440 if (is_read) {
441 gpu.status |= PSX_GPU_STATUS_IMG;
442 // XXX: wrong for width 1
443 gpu.gp0 = LE32TOH(*(uint32_t *) VRAM_MEM_XY(gpu.dma.x, gpu.dma.y));
444 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
445 }
446
447 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
448 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
449 if (gpu.gpu_state_change)
450 gpu.gpu_state_change(PGS_VRAM_TRANSFER_START);
451}
452
453static void finish_vram_transfer(int is_read)
454{
455 if (is_read)
456 gpu.status &= ~PSX_GPU_STATUS_IMG;
457 else {
458 gpu.state.fb_dirty = 1;
459 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
460 gpu.dma_start.w, gpu.dma_start.h, 0);
461 }
462 if (gpu.gpu_state_change)
463 gpu.gpu_state_change(PGS_VRAM_TRANSFER_END);
464}
465
466static void do_vram_copy(const uint32_t *params)
467{
468 const uint32_t sx = LE32TOH(params[0]) & 0x3FF;
469 const uint32_t sy = (LE32TOH(params[0]) >> 16) & 0x1FF;
470 const uint32_t dx = LE32TOH(params[1]) & 0x3FF;
471 const uint32_t dy = (LE32TOH(params[1]) >> 16) & 0x1FF;
472 uint32_t w = ((LE32TOH(params[2]) - 1) & 0x3FF) + 1;
473 uint32_t h = (((LE32TOH(params[2]) >> 16) - 1) & 0x1FF) + 1;
474 uint16_t msb = gpu.ex_regs[6] << 15;
475 uint16_t lbuf[128];
476 uint32_t x, y;
477
478 if (sx == dx && sy == dy && msb == 0)
479 return;
480
481 renderer_flush_queues();
482
483 if (unlikely((sx < dx && dx < sx + w) || sx + w > 1024 || dx + w > 1024 || msb))
484 {
485 for (y = 0; y < h; y++)
486 {
487 const uint16_t *src = VRAM_MEM_XY(0, (sy + y) & 0x1ff);
488 uint16_t *dst = VRAM_MEM_XY(0, (dy + y) & 0x1ff);
489 for (x = 0; x < w; x += ARRAY_SIZE(lbuf))
490 {
491 uint32_t x1, w1 = w - x;
492 if (w1 > ARRAY_SIZE(lbuf))
493 w1 = ARRAY_SIZE(lbuf);
494 for (x1 = 0; x1 < w1; x1++)
495 lbuf[x1] = src[(sx + x + x1) & 0x3ff];
496 for (x1 = 0; x1 < w1; x1++)
497 dst[(dx + x + x1) & 0x3ff] = lbuf[x1] | msb;
498 }
499 }
500 }
501 else
502 {
503 uint32_t sy1 = sy, dy1 = dy;
504 for (y = 0; y < h; y++, sy1++, dy1++)
505 memcpy(VRAM_MEM_XY(dx, dy1 & 0x1ff), VRAM_MEM_XY(sx, sy1 & 0x1ff), w * 2);
506 }
507
508 renderer_update_caches(dx, dy, w, h, 0);
509}
510
511static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
512{
513 int cmd = 0, pos = 0, len, dummy, v;
514 int skip = 1;
515
516 gpu.frameskip.pending_fill[0] = 0;
517
518 while (pos < count && skip) {
519 uint32_t *list = data + pos;
520 cmd = LE32TOH(list[0]) >> 24;
521 len = 1 + cmd_lengths[cmd];
522
523 switch (cmd) {
524 case 0x02:
525 if ((LE32TOH(list[2]) & 0x3ff) > gpu.screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu.screen.h)
526 // clearing something large, don't skip
527 do_cmd_list(list, 3, &dummy);
528 else
529 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
530 break;
531 case 0x24 ... 0x27:
532 case 0x2c ... 0x2f:
533 case 0x34 ... 0x37:
534 case 0x3c ... 0x3f:
535 gpu.ex_regs[1] &= ~0x1ff;
536 gpu.ex_regs[1] |= LE32TOH(list[4 + ((cmd >> 4) & 1)]) & 0x1ff;
537 break;
538 case 0x48 ... 0x4F:
539 for (v = 3; pos + v < count; v++)
540 {
541 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
542 break;
543 }
544 len += v - 3;
545 break;
546 case 0x58 ... 0x5F:
547 for (v = 4; pos + v < count; v += 2)
548 {
549 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
550 break;
551 }
552 len += v - 4;
553 break;
554 default:
555 if (cmd == 0xe3)
556 skip = decide_frameskip_allow(LE32TOH(list[0]));
557 if ((cmd & 0xf8) == 0xe0)
558 gpu.ex_regs[cmd & 7] = LE32TOH(list[0]);
559 break;
560 }
561
562 if (pos + len > count) {
563 cmd = -1;
564 break; // incomplete cmd
565 }
566 if (0x80 <= cmd && cmd <= 0xdf)
567 break; // image i/o
568
569 pos += len;
570 }
571
572 renderer_sync_ecmds(gpu.ex_regs);
573 *last_cmd = cmd;
574 return pos;
575}
576
577static noinline int do_cmd_buffer(uint32_t *data, int count)
578{
579 int cmd, pos;
580 uint32_t old_e3 = gpu.ex_regs[3];
581 int vram_dirty = 0;
582
583 // process buffer
584 for (pos = 0; pos < count; )
585 {
586 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
587 vram_dirty = 1;
588 pos += do_vram_io(data + pos, count - pos, 0);
589 if (pos == count)
590 break;
591 }
592
593 cmd = LE32TOH(data[pos]) >> 24;
594 if (0xa0 <= cmd && cmd <= 0xdf) {
595 if (unlikely((pos+2) >= count)) {
596 // incomplete vram write/read cmd, can't consume yet
597 cmd = -1;
598 break;
599 }
600
601 // consume vram write/read cmd
602 start_vram_transfer(LE32TOH(data[pos + 1]), LE32TOH(data[pos + 2]), (cmd & 0xe0) == 0xc0);
603 pos += 3;
604 continue;
605 }
606 else if ((cmd & 0xe0) == 0x80) {
607 if (unlikely((pos+3) >= count)) {
608 cmd = -1; // incomplete cmd, can't consume yet
609 break;
610 }
611 do_vram_copy(data + pos + 1);
612 vram_dirty = 1;
613 pos += 4;
614 continue;
615 }
616
617 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
618 if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0))
619 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
620 else {
621 pos += do_cmd_list(data + pos, count - pos, &cmd);
622 vram_dirty = 1;
623 }
624
625 if (cmd == -1)
626 // incomplete cmd
627 break;
628 }
629
630 gpu.status &= ~0x1fff;
631 gpu.status |= gpu.ex_regs[1] & 0x7ff;
632 gpu.status |= (gpu.ex_regs[6] & 3) << 11;
633
634 gpu.state.fb_dirty |= vram_dirty;
635
636 if (old_e3 != gpu.ex_regs[3])
637 decide_frameskip_allow(gpu.ex_regs[3]);
638
639 return count - pos;
640}
641
642static noinline void flush_cmd_buffer(void)
643{
644 int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
645 if (left > 0)
646 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
647 if (left != gpu.cmd_len) {
648 if (!gpu.dma.h && gpu.gpu_state_change)
649 gpu.gpu_state_change(PGS_PRIMITIVE_START);
650 gpu.cmd_len = left;
651 }
652}
653
654void GPUwriteDataMem(uint32_t *mem, int count)
655{
656 int left;
657
658 log_io("gpu_dma_write %p %d\n", mem, count);
659
660 if (unlikely(gpu.cmd_len > 0))
661 flush_cmd_buffer();
662
663 left = do_cmd_buffer(mem, count);
664 if (left)
665 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
666}
667
668void GPUwriteData(uint32_t data)
669{
670 log_io("gpu_write %08x\n", data);
671 gpu.cmd_buffer[gpu.cmd_len++] = HTOLE32(data);
672 if (gpu.cmd_len >= CMD_BUFFER_LEN)
673 flush_cmd_buffer();
674}
675
676long GPUdmaChain(uint32_t *rambase, uint32_t start_addr, uint32_t *progress_addr)
677{
678 uint32_t addr, *list, ld_addr = 0;
679 int len, left, count;
680 long cpu_cycles = 0;
681
682 preload(rambase + (start_addr & 0x1fffff) / 4);
683
684 if (unlikely(gpu.cmd_len > 0))
685 flush_cmd_buffer();
686
687 log_io("gpu_dma_chain\n");
688 addr = start_addr & 0xffffff;
689 for (count = 0; (addr & 0x800000) == 0; count++)
690 {
691 list = rambase + (addr & 0x1fffff) / 4;
692 len = LE32TOH(list[0]) >> 24;
693 addr = LE32TOH(list[0]) & 0xffffff;
694 preload(rambase + (addr & 0x1fffff) / 4);
695
696 cpu_cycles += 10;
697 if (len > 0)
698 cpu_cycles += 5 + len;
699
700 log_io(".chain %08lx #%d+%d\n",
701 (long)(list - rambase) * 4, len, gpu.cmd_len);
702 if (unlikely(gpu.cmd_len > 0)) {
703 if (gpu.cmd_len + len > ARRAY_SIZE(gpu.cmd_buffer)) {
704 log_anomaly("cmd_buffer overflow, likely garbage commands\n");
705 gpu.cmd_len = 0;
706 }
707 memcpy(gpu.cmd_buffer + gpu.cmd_len, list + 1, len * 4);
708 gpu.cmd_len += len;
709 flush_cmd_buffer();
710 continue;
711 }
712
713 if (len) {
714 left = do_cmd_buffer(list + 1, len);
715 if (left) {
716 memcpy(gpu.cmd_buffer, list + 1 + len - left, left * 4);
717 gpu.cmd_len = left;
718 log_anomaly("GPUdmaChain: %d/%d words left\n", left, len);
719 }
720 }
721
722 if (progress_addr) {
723 *progress_addr = addr;
724 break;
725 }
726 #define LD_THRESHOLD (8*1024)
727 if (count >= LD_THRESHOLD) {
728 if (count == LD_THRESHOLD) {
729 ld_addr = addr;
730 continue;
731 }
732
733 // loop detection marker
734 // (bit23 set causes DMA error on real machine, so
735 // unlikely to be ever set by the game)
736 list[0] |= HTOLE32(0x800000);
737 }
738 }
739
740 if (ld_addr != 0) {
741 // remove loop detection markers
742 count -= LD_THRESHOLD + 2;
743 addr = ld_addr & 0x1fffff;
744 while (count-- > 0) {
745 list = rambase + addr / 4;
746 addr = LE32TOH(list[0]) & 0x1fffff;
747 list[0] &= HTOLE32(~0x800000);
748 }
749 }
750
751 gpu.state.last_list.frame = *gpu.state.frame_count;
752 gpu.state.last_list.hcnt = *gpu.state.hcnt;
753 gpu.state.last_list.cycles = cpu_cycles;
754 gpu.state.last_list.addr = start_addr;
755
756 return cpu_cycles;
757}
758
759void GPUreadDataMem(uint32_t *mem, int count)
760{
761 log_io("gpu_dma_read %p %d\n", mem, count);
762
763 if (unlikely(gpu.cmd_len > 0))
764 flush_cmd_buffer();
765
766 if (gpu.dma.h)
767 do_vram_io(mem, count, 1);
768}
769
770uint32_t GPUreadData(void)
771{
772 uint32_t ret;
773
774 if (unlikely(gpu.cmd_len > 0))
775 flush_cmd_buffer();
776
777 ret = gpu.gp0;
778 if (gpu.dma.h) {
779 ret = HTOLE32(ret);
780 do_vram_io(&ret, 1, 1);
781 ret = LE32TOH(ret);
782 }
783
784 log_io("gpu_read %08x\n", ret);
785 return ret;
786}
787
788uint32_t GPUreadStatus(void)
789{
790 uint32_t ret;
791
792 if (unlikely(gpu.cmd_len > 0))
793 flush_cmd_buffer();
794
795 ret = gpu.status;
796 log_io("gpu_read_status %08x\n", ret);
797 return ret;
798}
799
800struct GPUFreeze
801{
802 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
803 uint32_t ulStatus; // current gpu status
804 uint32_t ulControl[256]; // latest control register values
805 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
806};
807
808long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
809{
810 int i;
811
812 switch (type) {
813 case 1: // save
814 if (gpu.cmd_len > 0)
815 flush_cmd_buffer();
816 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
817 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
818 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
819 freeze->ulStatus = gpu.status;
820 break;
821 case 0: // load
822 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
823 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
824 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
825 gpu.status = freeze->ulStatus;
826 gpu.cmd_len = 0;
827 for (i = 8; i > 0; i--) {
828 gpu.regs[i] ^= 1; // avoid reg change detection
829 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
830 }
831 renderer_sync_ecmds(gpu.ex_regs);
832 renderer_update_caches(0, 0, 1024, 512, 0);
833 break;
834 }
835
836 return 1;
837}
838
839void GPUupdateLace(void)
840{
841 if (gpu.cmd_len > 0)
842 flush_cmd_buffer();
843 renderer_flush_queues();
844
845#ifndef RAW_FB_DISPLAY
846 if (gpu.status & PSX_GPU_STATUS_BLANKING) {
847 if (!gpu.state.blanked) {
848 vout_blank();
849 gpu.state.blanked = 1;
850 gpu.state.fb_dirty = 1;
851 }
852 return;
853 }
854
855 if (!gpu.state.fb_dirty)
856 return;
857#endif
858
859 if (gpu.frameskip.set) {
860 if (!gpu.frameskip.frame_ready) {
861 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
862 return;
863 gpu.frameskip.active = 0;
864 }
865 gpu.frameskip.frame_ready = 0;
866 }
867
868 vout_update();
869 if (gpu.state.enhancement_active && !gpu.state.enhancement_was_active)
870 renderer_update_caches(0, 0, 1024, 512, 1);
871 gpu.state.enhancement_was_active = gpu.state.enhancement_active;
872 gpu.state.fb_dirty = 0;
873 gpu.state.blanked = 0;
874}
875
876void GPUvBlank(int is_vblank, int lcf)
877{
878 int interlace = gpu.state.allow_interlace
879 && (gpu.status & PSX_GPU_STATUS_INTERLACE)
880 && (gpu.status & PSX_GPU_STATUS_DHEIGHT);
881 // interlace doesn't look nice on progressive displays,
882 // so we have this "auto" mode here for games that don't read vram
883 if (gpu.state.allow_interlace == 2
884 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
885 {
886 interlace = 0;
887 }
888 if (interlace || interlace != gpu.state.old_interlace) {
889 gpu.state.old_interlace = interlace;
890
891 if (gpu.cmd_len > 0)
892 flush_cmd_buffer();
893 renderer_flush_queues();
894 renderer_set_interlace(interlace, !lcf);
895 }
896}
897
898void GPUgetScreenInfo(int *y, int *base_hres)
899{
900 *y = gpu.screen.y;
901 *base_hres = gpu.screen.vres;
902 if (gpu.status & PSX_GPU_STATUS_DHEIGHT)
903 *base_hres >>= 1;
904}
905
906#include "../../frontend/plugin_lib.h"
907
908void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
909{
910 gpu.frameskip.set = cbs->frameskip;
911 gpu.frameskip.advice = &cbs->fskip_advice;
912 gpu.frameskip.active = 0;
913 gpu.frameskip.frame_ready = 1;
914 gpu.state.hcnt = cbs->gpu_hcnt;
915 gpu.state.frame_count = cbs->gpu_frame_count;
916 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
917 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
918 if (gpu.state.screen_centering_type != cbs->screen_centering_type
919 || gpu.state.screen_centering_x != cbs->screen_centering_x
920 || gpu.state.screen_centering_y != cbs->screen_centering_y) {
921 gpu.state.screen_centering_type = cbs->screen_centering_type;
922 gpu.state.screen_centering_x = cbs->screen_centering_x;
923 gpu.state.screen_centering_y = cbs->screen_centering_y;
924 update_width();
925 update_height();
926 }
927
928 gpu.mmap = cbs->mmap;
929 gpu.munmap = cbs->munmap;
930 gpu.gpu_state_change = cbs->gpu_state_change;
931
932 // delayed vram mmap
933 if (gpu.vram == NULL)
934 map_vram();
935
936 if (cbs->pl_vout_set_raw_vram)
937 cbs->pl_vout_set_raw_vram(gpu.vram);
938 renderer_set_config(cbs);
939 vout_set_config(cbs);
940}
941
942// vim:shiftwidth=2:expandtab