gpulib: handle vram copy in gpulib
[pcsx_rearmed.git] / plugins / gpulib / gpu.c
... / ...
CommitLineData
1/*
2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
11#include <stdio.h>
12#include <stdlib.h>
13#include <string.h>
14#include "gpu.h"
15
16#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
17#ifdef __GNUC__
18#define unlikely(x) __builtin_expect((x), 0)
19#define preload __builtin_prefetch
20#define noinline __attribute__((noinline))
21#else
22#define unlikely(x)
23#define preload(...)
24#define noinline
25#endif
26
27//#define log_io gpu_log
28#define log_io(...)
29
30struct psx_gpu gpu;
31
32static noinline int do_cmd_buffer(uint32_t *data, int count);
33static void finish_vram_transfer(int is_read);
34
35static noinline void do_cmd_reset(void)
36{
37 if (unlikely(gpu.cmd_len > 0))
38 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
39 gpu.cmd_len = 0;
40
41 if (unlikely(gpu.dma.h > 0))
42 finish_vram_transfer(gpu.dma_start.is_read);
43 gpu.dma.h = 0;
44}
45
46static noinline void do_reset(void)
47{
48 unsigned int i;
49
50 do_cmd_reset();
51
52 memset(gpu.regs, 0, sizeof(gpu.regs));
53 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
54 gpu.ex_regs[i] = (0xe0 + i) << 24;
55 gpu.status = 0x14802000;
56 gpu.gp0 = 0;
57 gpu.regs[3] = 1;
58 gpu.screen.hres = gpu.screen.w = 256;
59 gpu.screen.vres = gpu.screen.h = 240;
60 gpu.screen.x = gpu.screen.y = 0;
61 renderer_sync_ecmds(gpu.ex_regs);
62 renderer_notify_res_change();
63}
64
65static noinline void update_width(void)
66{
67 static const short hres_all[8] = { 256, 368, 320, 368, 512, 368, 640, 368 };
68 static const uint8_t hdivs[8] = { 10, 7, 8, 7, 5, 7, 4, 7 };
69 uint8_t hdiv = hdivs[(gpu.status >> 16) & 7];
70 int hres = hres_all[(gpu.status >> 16) & 7];
71 int pal = gpu.status & PSX_GPU_STATUS_PAL;
72 int sw = gpu.screen.x2 - gpu.screen.x1;
73 int x = 0, x_auto;
74 if (sw <= 0)
75 /* nothing displayed? */;
76 else {
77 int s = pal ? 656 : 608; // or 600? pal is just a guess
78 x = (gpu.screen.x1 - s) / hdiv;
79 x = (x + 1) & ~1; // blitter limitation
80 sw /= hdiv;
81 sw = (sw + 2) & ~3; // according to nocash
82 switch (gpu.state.screen_centering_type) {
83 case 1:
84 break;
85 case 2:
86 x = gpu.state.screen_centering_x;
87 break;
88 default:
89 // correct if slightly miscentered
90 x_auto = (hres - sw) / 2 & ~3;
91 if ((uint32_t)x_auto <= 8u && abs(x) < 24)
92 x = x_auto;
93 }
94 if (x + sw > hres)
95 sw = hres - x;
96 // .x range check is done in vout_update()
97 }
98 // reduce the unpleasant right border that a few games have
99 if (gpu.state.screen_centering_type == 0
100 && x <= 4 && hres - (x + sw) >= 4)
101 hres -= 4;
102 gpu.screen.x = x;
103 gpu.screen.w = sw;
104 gpu.screen.hres = hres;
105 gpu.state.dims_changed = 1;
106 //printf("xx %d %d -> %2d, %d / %d\n",
107 // gpu.screen.x1, gpu.screen.x2, x, sw, hres);
108}
109
110static noinline void update_height(void)
111{
112 int pal = gpu.status & PSX_GPU_STATUS_PAL;
113 int dheight = gpu.status & PSX_GPU_STATUS_DHEIGHT;
114 int y = gpu.screen.y1 - (pal ? 39 : 16); // 39 for spyro
115 int sh = gpu.screen.y2 - gpu.screen.y1;
116 int center_tol = 16;
117 int vres = 240;
118
119 if (pal && (sh > 240 || gpu.screen.vres == 256))
120 vres = 256;
121 if (dheight)
122 y *= 2, sh *= 2, vres *= 2, center_tol *= 2;
123 if (sh <= 0)
124 /* nothing displayed? */;
125 else {
126 switch (gpu.state.screen_centering_type) {
127 case 1:
128 break;
129 case 2:
130 y = gpu.state.screen_centering_y;
131 break;
132 default:
133 // correct if slightly miscentered
134 if ((uint32_t)(vres - sh) <= 1 && abs(y) <= center_tol)
135 y = 0;
136 }
137 if (y + sh > vres)
138 sh = vres - y;
139 }
140 gpu.screen.y = y;
141 gpu.screen.h = sh;
142 gpu.screen.vres = vres;
143 gpu.state.dims_changed = 1;
144 //printf("yy %d %d -> %d, %d / %d\n",
145 // gpu.screen.y1, gpu.screen.y2, y, sh, vres);
146}
147
148static noinline void decide_frameskip(void)
149{
150 if (gpu.frameskip.active)
151 gpu.frameskip.cnt++;
152 else {
153 gpu.frameskip.cnt = 0;
154 gpu.frameskip.frame_ready = 1;
155 }
156
157 if (!gpu.frameskip.active && *gpu.frameskip.advice)
158 gpu.frameskip.active = 1;
159 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
160 gpu.frameskip.active = 1;
161 else
162 gpu.frameskip.active = 0;
163
164 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
165 int dummy;
166 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy);
167 gpu.frameskip.pending_fill[0] = 0;
168 }
169}
170
171static noinline int decide_frameskip_allow(uint32_t cmd_e3)
172{
173 // no frameskip if it decides to draw to display area,
174 // but not for interlace since it'll most likely always do that
175 uint32_t x = cmd_e3 & 0x3ff;
176 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
177 gpu.frameskip.allow = (gpu.status & PSX_GPU_STATUS_INTERLACE) ||
178 (uint32_t)(x - gpu.screen.src_x) >= (uint32_t)gpu.screen.w ||
179 (uint32_t)(y - gpu.screen.src_y) >= (uint32_t)gpu.screen.h;
180 return gpu.frameskip.allow;
181}
182
183static void flush_cmd_buffer(void);
184
185static noinline void get_gpu_info(uint32_t data)
186{
187 if (unlikely(gpu.cmd_len > 0))
188 flush_cmd_buffer();
189 switch (data & 0x0f) {
190 case 0x02:
191 case 0x03:
192 case 0x04:
193 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
194 break;
195 case 0x05:
196 gpu.gp0 = gpu.ex_regs[5] & 0x3fffff;
197 break;
198 case 0x07:
199 gpu.gp0 = 2;
200 break;
201 default:
202 // gpu.gp0 unchanged
203 break;
204 }
205}
206
207// double, for overdraw guard
208#define VRAM_SIZE (1024 * 512 * 2 * 2)
209
210static int map_vram(void)
211{
212 gpu.vram = gpu.mmap(VRAM_SIZE);
213 if (gpu.vram != NULL) {
214 gpu.vram += 4096 / 2;
215 return 0;
216 }
217 else {
218 fprintf(stderr, "could not map vram, expect crashes\n");
219 return -1;
220 }
221}
222
223long GPUinit(void)
224{
225 int ret;
226 ret = vout_init();
227 ret |= renderer_init();
228
229 memset(&gpu.state, 0, sizeof(gpu.state));
230 memset(&gpu.frameskip, 0, sizeof(gpu.frameskip));
231 gpu.zero = 0;
232 gpu.state.frame_count = &gpu.zero;
233 gpu.state.hcnt = &gpu.zero;
234 gpu.cmd_len = 0;
235 do_reset();
236
237 if (gpu.mmap != NULL) {
238 if (map_vram() != 0)
239 ret = -1;
240 }
241 return ret;
242}
243
244long GPUshutdown(void)
245{
246 long ret;
247
248 renderer_finish();
249 ret = vout_finish();
250 if (gpu.vram != NULL) {
251 gpu.vram -= 4096 / 2;
252 gpu.munmap(gpu.vram, VRAM_SIZE);
253 }
254 gpu.vram = NULL;
255
256 return ret;
257}
258
259void GPUwriteStatus(uint32_t data)
260{
261 uint32_t cmd = data >> 24;
262
263 if (cmd < ARRAY_SIZE(gpu.regs)) {
264 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
265 return;
266 gpu.regs[cmd] = data;
267 }
268
269 gpu.state.fb_dirty = 1;
270
271 switch (cmd) {
272 case 0x00:
273 do_reset();
274 break;
275 case 0x01:
276 do_cmd_reset();
277 break;
278 case 0x03:
279 if (data & 1) {
280 gpu.status |= PSX_GPU_STATUS_BLANKING;
281 gpu.state.dims_changed = 1; // for hud clearing
282 }
283 else
284 gpu.status &= ~PSX_GPU_STATUS_BLANKING;
285 break;
286 case 0x04:
287 gpu.status &= ~PSX_GPU_STATUS_DMA_MASK;
288 gpu.status |= PSX_GPU_STATUS_DMA(data & 3);
289 break;
290 case 0x05:
291 gpu.screen.src_x = data & 0x3ff;
292 gpu.screen.src_y = (data >> 10) & 0x1ff;
293 renderer_notify_scanout_x_change(gpu.screen.src_x, gpu.screen.hres);
294 if (gpu.frameskip.set) {
295 decide_frameskip_allow(gpu.ex_regs[3]);
296 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
297 decide_frameskip();
298 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
299 }
300 }
301 break;
302 case 0x06:
303 gpu.screen.x1 = data & 0xfff;
304 gpu.screen.x2 = (data >> 12) & 0xfff;
305 update_width();
306 break;
307 case 0x07:
308 gpu.screen.y1 = data & 0x3ff;
309 gpu.screen.y2 = (data >> 10) & 0x3ff;
310 update_height();
311 break;
312 case 0x08:
313 gpu.status = (gpu.status & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
314 update_width();
315 update_height();
316 renderer_notify_res_change();
317 break;
318 default:
319 if ((cmd & 0xf0) == 0x10)
320 get_gpu_info(data);
321 break;
322 }
323
324#ifdef GPUwriteStatus_ext
325 GPUwriteStatus_ext(data);
326#endif
327}
328
329const unsigned char cmd_lengths[256] =
330{
331 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
332 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
333 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
334 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
335 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
336 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
337 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
338 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
339 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 80
340 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
341 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // a0
342 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
343 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // c0
344 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
345 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
346 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
347};
348
349#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
350
351static void cpy_msb(uint16_t *dst, const uint16_t *src, int l, uint16_t msb)
352{
353 int i;
354 for (i = 0; i < l; i++)
355 dst[i] = src[i] | msb;
356}
357
358static inline void do_vram_line(int x, int y, uint16_t *mem, int l,
359 int is_read, uint16_t msb)
360{
361 uint16_t *vram = VRAM_MEM_XY(x, y);
362 if (unlikely(is_read))
363 memcpy(mem, vram, l * 2);
364 else if (unlikely(msb))
365 cpy_msb(vram, mem, l, msb);
366 else
367 memcpy(vram, mem, l * 2);
368}
369
370static int do_vram_io(uint32_t *data, int count, int is_read)
371{
372 int count_initial = count;
373 uint16_t msb = gpu.ex_regs[6] << 15;
374 uint16_t *sdata = (uint16_t *)data;
375 int x = gpu.dma.x, y = gpu.dma.y;
376 int w = gpu.dma.w, h = gpu.dma.h;
377 int o = gpu.dma.offset;
378 int l;
379 count *= 2; // operate in 16bpp pixels
380
381 if (gpu.dma.offset) {
382 l = w - gpu.dma.offset;
383 if (count < l)
384 l = count;
385
386 do_vram_line(x + o, y, sdata, l, is_read, msb);
387
388 if (o + l < w)
389 o += l;
390 else {
391 o = 0;
392 y++;
393 h--;
394 }
395 sdata += l;
396 count -= l;
397 }
398
399 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
400 y &= 511;
401 do_vram_line(x, y, sdata, w, is_read, msb);
402 }
403
404 if (h > 0) {
405 if (count > 0) {
406 y &= 511;
407 do_vram_line(x, y, sdata, count, is_read, msb);
408 o = count;
409 count = 0;
410 }
411 }
412 else
413 finish_vram_transfer(is_read);
414 gpu.dma.y = y;
415 gpu.dma.h = h;
416 gpu.dma.offset = o;
417
418 return count_initial - count / 2;
419}
420
421static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
422{
423 if (gpu.dma.h)
424 log_anomaly("start_vram_transfer while old unfinished\n");
425
426 gpu.dma.x = pos_word & 0x3ff;
427 gpu.dma.y = (pos_word >> 16) & 0x1ff;
428 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
429 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
430 gpu.dma.offset = 0;
431 gpu.dma.is_read = is_read;
432 gpu.dma_start = gpu.dma;
433
434 renderer_flush_queues();
435 if (is_read) {
436 gpu.status |= PSX_GPU_STATUS_IMG;
437 // XXX: wrong for width 1
438 gpu.gp0 = LE32TOH(*(uint32_t *) VRAM_MEM_XY(gpu.dma.x, gpu.dma.y));
439 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
440 }
441
442 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
443 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
444}
445
446static void finish_vram_transfer(int is_read)
447{
448 if (is_read)
449 gpu.status &= ~PSX_GPU_STATUS_IMG;
450 else
451 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
452 gpu.dma_start.w, gpu.dma_start.h, 0);
453}
454
455static void do_vram_copy(const uint32_t *params)
456{
457 const uint32_t sx = LE32TOH(params[0]) & 0x3FF;
458 const uint32_t sy = (LE32TOH(params[0]) >> 16) & 0x1FF;
459 const uint32_t dx = LE32TOH(params[1]) & 0x3FF;
460 const uint32_t dy = (LE32TOH(params[1]) >> 16) & 0x1FF;
461 uint32_t w = ((LE32TOH(params[2]) - 1) & 0x3FF) + 1;
462 uint32_t h = (((LE32TOH(params[2]) >> 16) - 1) & 0x1FF) + 1;
463 uint16_t msb = gpu.ex_regs[6] << 15;
464 uint16_t lbuf[128];
465 uint32_t x, y;
466
467 if (sx == dx && sy == dy && msb == 0)
468 return;
469
470 renderer_flush_queues();
471
472 if (unlikely((sx < dx && dx < sx + w) || sx + w > 1024 || dx + w > 1024 || msb))
473 {
474 for (y = 0; y < h; y++)
475 {
476 const uint16_t *src = VRAM_MEM_XY(0, (sy + y) & 0x1ff);
477 uint16_t *dst = VRAM_MEM_XY(0, (dy + y) & 0x1ff);
478 for (x = 0; x < w; x += ARRAY_SIZE(lbuf))
479 {
480 uint32_t x1, w1 = w - x;
481 if (w1 > ARRAY_SIZE(lbuf))
482 w1 = ARRAY_SIZE(lbuf);
483 for (x1 = 0; x1 < w1; x1++)
484 lbuf[x1] = src[(sx + x + x1) & 0x3ff];
485 for (x1 = 0; x1 < w1; x1++)
486 dst[(dx + x + x1) & 0x3ff] = lbuf[x1] | msb;
487 }
488 }
489 }
490 else
491 {
492 uint32_t sy1 = sy, dy1 = dy;
493 for (y = 0; y < h; y++, sy1++, dy1++)
494 memcpy(VRAM_MEM_XY(dx, dy1 & 0x1ff), VRAM_MEM_XY(sx, sy1 & 0x1ff), w * 2);
495 }
496
497 renderer_update_caches(dx, dy, w, h, 0);
498}
499
500static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
501{
502 int cmd = 0, pos = 0, len, dummy, v;
503 int skip = 1;
504
505 gpu.frameskip.pending_fill[0] = 0;
506
507 while (pos < count && skip) {
508 uint32_t *list = data + pos;
509 cmd = LE32TOH(list[0]) >> 24;
510 len = 1 + cmd_lengths[cmd];
511
512 switch (cmd) {
513 case 0x02:
514 if ((LE32TOH(list[2]) & 0x3ff) > gpu.screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu.screen.h)
515 // clearing something large, don't skip
516 do_cmd_list(list, 3, &dummy);
517 else
518 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
519 break;
520 case 0x24 ... 0x27:
521 case 0x2c ... 0x2f:
522 case 0x34 ... 0x37:
523 case 0x3c ... 0x3f:
524 gpu.ex_regs[1] &= ~0x1ff;
525 gpu.ex_regs[1] |= LE32TOH(list[4 + ((cmd >> 4) & 1)]) & 0x1ff;
526 break;
527 case 0x48 ... 0x4F:
528 for (v = 3; pos + v < count; v++)
529 {
530 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
531 break;
532 }
533 len += v - 3;
534 break;
535 case 0x58 ... 0x5F:
536 for (v = 4; pos + v < count; v += 2)
537 {
538 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
539 break;
540 }
541 len += v - 4;
542 break;
543 default:
544 if (cmd == 0xe3)
545 skip = decide_frameskip_allow(LE32TOH(list[0]));
546 if ((cmd & 0xf8) == 0xe0)
547 gpu.ex_regs[cmd & 7] = LE32TOH(list[0]);
548 break;
549 }
550
551 if (pos + len > count) {
552 cmd = -1;
553 break; // incomplete cmd
554 }
555 if (0x80 <= cmd && cmd <= 0xdf)
556 break; // image i/o
557
558 pos += len;
559 }
560
561 renderer_sync_ecmds(gpu.ex_regs);
562 *last_cmd = cmd;
563 return pos;
564}
565
566static noinline int do_cmd_buffer(uint32_t *data, int count)
567{
568 int cmd, pos;
569 uint32_t old_e3 = gpu.ex_regs[3];
570 int vram_dirty = 0;
571
572 // process buffer
573 for (pos = 0; pos < count; )
574 {
575 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
576 vram_dirty = 1;
577 pos += do_vram_io(data + pos, count - pos, 0);
578 if (pos == count)
579 break;
580 }
581
582 cmd = LE32TOH(data[pos]) >> 24;
583 if (0xa0 <= cmd && cmd <= 0xdf) {
584 if (unlikely((pos+2) >= count)) {
585 // incomplete vram write/read cmd, can't consume yet
586 cmd = -1;
587 break;
588 }
589
590 // consume vram write/read cmd
591 start_vram_transfer(LE32TOH(data[pos + 1]), LE32TOH(data[pos + 2]), (cmd & 0xe0) == 0xc0);
592 pos += 3;
593 continue;
594 }
595 else if ((cmd & 0xe0) == 0x80) {
596 if (unlikely((pos+3) >= count)) {
597 cmd = -1; // incomplete cmd, can't consume yet
598 break;
599 }
600 do_vram_copy(data + pos + 1);
601 pos += 4;
602 continue;
603 }
604
605 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
606 if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0))
607 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
608 else {
609 pos += do_cmd_list(data + pos, count - pos, &cmd);
610 vram_dirty = 1;
611 }
612
613 if (cmd == -1)
614 // incomplete cmd
615 break;
616 }
617
618 gpu.status &= ~0x1fff;
619 gpu.status |= gpu.ex_regs[1] & 0x7ff;
620 gpu.status |= (gpu.ex_regs[6] & 3) << 11;
621
622 gpu.state.fb_dirty |= vram_dirty;
623
624 if (old_e3 != gpu.ex_regs[3])
625 decide_frameskip_allow(gpu.ex_regs[3]);
626
627 return count - pos;
628}
629
630static void flush_cmd_buffer(void)
631{
632 int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
633 if (left > 0)
634 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
635 gpu.cmd_len = left;
636}
637
638void GPUwriteDataMem(uint32_t *mem, int count)
639{
640 int left;
641
642 log_io("gpu_dma_write %p %d\n", mem, count);
643
644 if (unlikely(gpu.cmd_len > 0))
645 flush_cmd_buffer();
646
647 left = do_cmd_buffer(mem, count);
648 if (left)
649 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
650}
651
652void GPUwriteData(uint32_t data)
653{
654 log_io("gpu_write %08x\n", data);
655 gpu.cmd_buffer[gpu.cmd_len++] = HTOLE32(data);
656 if (gpu.cmd_len >= CMD_BUFFER_LEN)
657 flush_cmd_buffer();
658}
659
660long GPUdmaChain(uint32_t *rambase, uint32_t start_addr, uint32_t *progress_addr)
661{
662 uint32_t addr, *list, ld_addr = 0;
663 int len, left, count;
664 long cpu_cycles = 0;
665
666 preload(rambase + (start_addr & 0x1fffff) / 4);
667
668 if (unlikely(gpu.cmd_len > 0))
669 flush_cmd_buffer();
670
671 log_io("gpu_dma_chain\n");
672 addr = start_addr & 0xffffff;
673 for (count = 0; (addr & 0x800000) == 0; count++)
674 {
675 list = rambase + (addr & 0x1fffff) / 4;
676 len = LE32TOH(list[0]) >> 24;
677 addr = LE32TOH(list[0]) & 0xffffff;
678 preload(rambase + (addr & 0x1fffff) / 4);
679
680 cpu_cycles += 10;
681 if (len > 0)
682 cpu_cycles += 5 + len;
683
684 log_io(".chain %08lx #%d+%d\n",
685 (long)(list - rambase) * 4, len, gpu.cmd_len);
686 if (unlikely(gpu.cmd_len > 0)) {
687 if (gpu.cmd_len + len > ARRAY_SIZE(gpu.cmd_buffer)) {
688 log_anomaly("cmd_buffer overflow, likely garbage commands\n");
689 gpu.cmd_len = 0;
690 }
691 memcpy(gpu.cmd_buffer + gpu.cmd_len, list + 1, len * 4);
692 gpu.cmd_len += len;
693 flush_cmd_buffer();
694 continue;
695 }
696
697 if (len) {
698 left = do_cmd_buffer(list + 1, len);
699 if (left) {
700 memcpy(gpu.cmd_buffer, list + 1 + len - left, left * 4);
701 gpu.cmd_len = left;
702 log_anomaly("GPUdmaChain: %d/%d words left\n", left, len);
703 }
704 }
705
706 if (progress_addr) {
707 *progress_addr = addr;
708 break;
709 }
710 #define LD_THRESHOLD (8*1024)
711 if (count >= LD_THRESHOLD) {
712 if (count == LD_THRESHOLD) {
713 ld_addr = addr;
714 continue;
715 }
716
717 // loop detection marker
718 // (bit23 set causes DMA error on real machine, so
719 // unlikely to be ever set by the game)
720 list[0] |= HTOLE32(0x800000);
721 }
722 }
723
724 if (ld_addr != 0) {
725 // remove loop detection markers
726 count -= LD_THRESHOLD + 2;
727 addr = ld_addr & 0x1fffff;
728 while (count-- > 0) {
729 list = rambase + addr / 4;
730 addr = LE32TOH(list[0]) & 0x1fffff;
731 list[0] &= HTOLE32(~0x800000);
732 }
733 }
734
735 gpu.state.last_list.frame = *gpu.state.frame_count;
736 gpu.state.last_list.hcnt = *gpu.state.hcnt;
737 gpu.state.last_list.cycles = cpu_cycles;
738 gpu.state.last_list.addr = start_addr;
739
740 return cpu_cycles;
741}
742
743void GPUreadDataMem(uint32_t *mem, int count)
744{
745 log_io("gpu_dma_read %p %d\n", mem, count);
746
747 if (unlikely(gpu.cmd_len > 0))
748 flush_cmd_buffer();
749
750 if (gpu.dma.h)
751 do_vram_io(mem, count, 1);
752}
753
754uint32_t GPUreadData(void)
755{
756 uint32_t ret;
757
758 if (unlikely(gpu.cmd_len > 0))
759 flush_cmd_buffer();
760
761 ret = gpu.gp0;
762 if (gpu.dma.h) {
763 ret = HTOLE32(ret);
764 do_vram_io(&ret, 1, 1);
765 ret = LE32TOH(ret);
766 }
767
768 log_io("gpu_read %08x\n", ret);
769 return ret;
770}
771
772uint32_t GPUreadStatus(void)
773{
774 uint32_t ret;
775
776 if (unlikely(gpu.cmd_len > 0))
777 flush_cmd_buffer();
778
779 ret = gpu.status;
780 log_io("gpu_read_status %08x\n", ret);
781 return ret;
782}
783
784struct GPUFreeze
785{
786 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
787 uint32_t ulStatus; // current gpu status
788 uint32_t ulControl[256]; // latest control register values
789 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
790};
791
792long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
793{
794 int i;
795
796 switch (type) {
797 case 1: // save
798 if (gpu.cmd_len > 0)
799 flush_cmd_buffer();
800 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
801 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
802 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
803 freeze->ulStatus = gpu.status;
804 break;
805 case 0: // load
806 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
807 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
808 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
809 gpu.status = freeze->ulStatus;
810 gpu.cmd_len = 0;
811 for (i = 8; i > 0; i--) {
812 gpu.regs[i] ^= 1; // avoid reg change detection
813 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
814 }
815 renderer_sync_ecmds(gpu.ex_regs);
816 renderer_update_caches(0, 0, 1024, 512, 1);
817 break;
818 }
819
820 return 1;
821}
822
823void GPUupdateLace(void)
824{
825 if (gpu.cmd_len > 0)
826 flush_cmd_buffer();
827 renderer_flush_queues();
828
829#ifndef RAW_FB_DISPLAY
830 if (gpu.status & PSX_GPU_STATUS_BLANKING) {
831 if (!gpu.state.blanked) {
832 vout_blank();
833 gpu.state.blanked = 1;
834 gpu.state.fb_dirty = 1;
835 }
836 return;
837 }
838
839 if (!gpu.state.fb_dirty)
840 return;
841#endif
842
843 if (gpu.frameskip.set) {
844 if (!gpu.frameskip.frame_ready) {
845 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
846 return;
847 gpu.frameskip.active = 0;
848 }
849 gpu.frameskip.frame_ready = 0;
850 }
851
852 vout_update();
853 if (gpu.state.enhancement_active && !gpu.state.enhancement_was_active)
854 renderer_update_caches(0, 0, 1024, 512, 1);
855 gpu.state.enhancement_was_active = gpu.state.enhancement_active;
856 gpu.state.fb_dirty = 0;
857 gpu.state.blanked = 0;
858}
859
860void GPUvBlank(int is_vblank, int lcf)
861{
862 int interlace = gpu.state.allow_interlace
863 && (gpu.status & PSX_GPU_STATUS_INTERLACE)
864 && (gpu.status & PSX_GPU_STATUS_DHEIGHT);
865 // interlace doesn't look nice on progressive displays,
866 // so we have this "auto" mode here for games that don't read vram
867 if (gpu.state.allow_interlace == 2
868 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
869 {
870 interlace = 0;
871 }
872 if (interlace || interlace != gpu.state.old_interlace) {
873 gpu.state.old_interlace = interlace;
874
875 if (gpu.cmd_len > 0)
876 flush_cmd_buffer();
877 renderer_flush_queues();
878 renderer_set_interlace(interlace, !lcf);
879 }
880}
881
882#include "../../frontend/plugin_lib.h"
883
884void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
885{
886 gpu.frameskip.set = cbs->frameskip;
887 gpu.frameskip.advice = &cbs->fskip_advice;
888 gpu.frameskip.active = 0;
889 gpu.frameskip.frame_ready = 1;
890 gpu.state.hcnt = cbs->gpu_hcnt;
891 gpu.state.frame_count = cbs->gpu_frame_count;
892 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
893 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
894 if (gpu.state.screen_centering_type != cbs->screen_centering_type
895 || gpu.state.screen_centering_x != cbs->screen_centering_x
896 || gpu.state.screen_centering_y != cbs->screen_centering_y) {
897 gpu.state.screen_centering_type = cbs->screen_centering_type;
898 gpu.state.screen_centering_x = cbs->screen_centering_x;
899 gpu.state.screen_centering_y = cbs->screen_centering_y;
900 update_width();
901 update_height();
902 }
903
904 gpu.mmap = cbs->mmap;
905 gpu.munmap = cbs->munmap;
906
907 // delayed vram mmap
908 if (gpu.vram == NULL)
909 map_vram();
910
911 if (cbs->pl_vout_set_raw_vram)
912 cbs->pl_vout_set_raw_vram(gpu.vram);
913 renderer_set_config(cbs);
914 vout_set_config(cbs);
915}
916
917// vim:shiftwidth=2:expandtab