libretro: drop the Frame Duping option
[pcsx_rearmed.git] / plugins / gpulib / gpu.c
... / ...
CommitLineData
1/*
2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
11#include <stdio.h>
12#include <stdlib.h>
13#include <string.h>
14#include "gpu.h"
15#include "../../libpcsxcore/gpu.h" // meh
16#include "../../frontend/plugin_lib.h"
17
18#ifndef ARRAY_SIZE
19#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
20#endif
21#ifdef __GNUC__
22#define unlikely(x) __builtin_expect((x), 0)
23#define preload __builtin_prefetch
24#define noinline __attribute__((noinline))
25#else
26#define unlikely(x)
27#define preload(...)
28#define noinline
29#endif
30
31//#define log_io gpu_log
32#define log_io(...)
33
34struct psx_gpu gpu;
35
36static noinline int do_cmd_buffer(uint32_t *data, int count);
37static void finish_vram_transfer(int is_read);
38
39static noinline void do_cmd_reset(void)
40{
41 if (unlikely(gpu.cmd_len > 0))
42 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
43 gpu.cmd_len = 0;
44
45 if (unlikely(gpu.dma.h > 0))
46 finish_vram_transfer(gpu.dma_start.is_read);
47 gpu.dma.h = 0;
48}
49
50static noinline void do_reset(void)
51{
52 unsigned int i;
53
54 do_cmd_reset();
55
56 memset(gpu.regs, 0, sizeof(gpu.regs));
57 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
58 gpu.ex_regs[i] = (0xe0 + i) << 24;
59 gpu.status = 0x14802000;
60 gpu.gp0 = 0;
61 gpu.regs[3] = 1;
62 gpu.screen.hres = gpu.screen.w = 256;
63 gpu.screen.vres = gpu.screen.h = 240;
64 gpu.screen.x = gpu.screen.y = 0;
65 renderer_sync_ecmds(gpu.ex_regs);
66 renderer_notify_res_change();
67}
68
69static noinline void update_width(void)
70{
71 static const short hres_all[8] = { 256, 368, 320, 368, 512, 368, 640, 368 };
72 static const uint8_t hdivs[8] = { 10, 7, 8, 7, 5, 7, 4, 7 };
73 uint8_t hdiv = hdivs[(gpu.status >> 16) & 7];
74 int hres = hres_all[(gpu.status >> 16) & 7];
75 int pal = gpu.status & PSX_GPU_STATUS_PAL;
76 int sw = gpu.screen.x2 - gpu.screen.x1;
77 int type = gpu.state.screen_centering_type;
78 int x = 0, x_auto;
79 if (type == C_AUTO)
80 type = gpu.state.screen_centering_type_default;
81 if (sw <= 0)
82 /* nothing displayed? */;
83 else {
84 int s = pal ? 656 : 608; // or 600? pal is just a guess
85 x = (gpu.screen.x1 - s) / hdiv;
86 x = (x + 1) & ~1; // blitter limitation
87 sw /= hdiv;
88 sw = (sw + 2) & ~3; // according to nocash
89 switch (type) {
90 case C_INGAME:
91 break;
92 case C_MANUAL:
93 x = gpu.state.screen_centering_x;
94 break;
95 default:
96 // correct if slightly miscentered
97 x_auto = (hres - sw) / 2 & ~3;
98 if ((uint32_t)x_auto <= 8u && abs(x) < 24)
99 x = x_auto;
100 }
101 if (x + sw > hres)
102 sw = hres - x;
103 // .x range check is done in vout_update()
104 }
105 // reduce the unpleasant right border that a few games have
106 if (gpu.state.screen_centering_type == 0
107 && x <= 4 && hres - (x + sw) >= 4)
108 hres -= 4;
109 gpu.screen.x = x;
110 gpu.screen.w = sw;
111 gpu.screen.hres = hres;
112 gpu.state.dims_changed = 1;
113 //printf("xx %d %d -> %2d, %d / %d\n",
114 // gpu.screen.x1, gpu.screen.x2, x, sw, hres);
115}
116
117static noinline void update_height(void)
118{
119 int pal = gpu.status & PSX_GPU_STATUS_PAL;
120 int dheight = gpu.status & PSX_GPU_STATUS_DHEIGHT;
121 int y = gpu.screen.y1 - (pal ? 39 : 16); // 39 for spyro
122 int sh = gpu.screen.y2 - gpu.screen.y1;
123 int center_tol = 16;
124 int vres = 240;
125
126 if (pal && (sh > 240 || gpu.screen.vres == 256))
127 vres = 256;
128 if (dheight)
129 y *= 2, sh *= 2, vres *= 2, center_tol *= 2;
130 if (sh <= 0)
131 /* nothing displayed? */;
132 else {
133 switch (gpu.state.screen_centering_type) {
134 case C_INGAME:
135 break;
136 case C_BORDERLESS:
137 y = 0;
138 break;
139 case C_MANUAL:
140 y = gpu.state.screen_centering_y;
141 break;
142 default:
143 // correct if slightly miscentered
144 if ((uint32_t)(vres - sh) <= 1 && abs(y) <= center_tol)
145 y = 0;
146 }
147 if (y + sh > vres)
148 sh = vres - y;
149 }
150 gpu.screen.y = y;
151 gpu.screen.h = sh;
152 gpu.screen.vres = vres;
153 gpu.state.dims_changed = 1;
154 //printf("yy %d %d -> %d, %d / %d\n",
155 // gpu.screen.y1, gpu.screen.y2, y, sh, vres);
156}
157
158static noinline void decide_frameskip(void)
159{
160 if (gpu.frameskip.active)
161 gpu.frameskip.cnt++;
162 else {
163 gpu.frameskip.cnt = 0;
164 gpu.frameskip.frame_ready = 1;
165 }
166
167 if (!gpu.frameskip.active && *gpu.frameskip.advice)
168 gpu.frameskip.active = 1;
169 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
170 gpu.frameskip.active = 1;
171 else
172 gpu.frameskip.active = 0;
173
174 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
175 int dummy;
176 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy);
177 gpu.frameskip.pending_fill[0] = 0;
178 }
179}
180
181static noinline int decide_frameskip_allow(uint32_t cmd_e3)
182{
183 // no frameskip if it decides to draw to display area,
184 // but not for interlace since it'll most likely always do that
185 uint32_t x = cmd_e3 & 0x3ff;
186 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
187 gpu.frameskip.allow = (gpu.status & PSX_GPU_STATUS_INTERLACE) ||
188 (uint32_t)(x - gpu.screen.src_x) >= (uint32_t)gpu.screen.w ||
189 (uint32_t)(y - gpu.screen.src_y) >= (uint32_t)gpu.screen.h;
190 return gpu.frameskip.allow;
191}
192
193static void flush_cmd_buffer(void);
194
195static noinline void get_gpu_info(uint32_t data)
196{
197 if (unlikely(gpu.cmd_len > 0))
198 flush_cmd_buffer();
199 switch (data & 0x0f) {
200 case 0x02:
201 case 0x03:
202 case 0x04:
203 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
204 break;
205 case 0x05:
206 gpu.gp0 = gpu.ex_regs[5] & 0x3fffff;
207 break;
208 case 0x07:
209 gpu.gp0 = 2;
210 break;
211 default:
212 // gpu.gp0 unchanged
213 break;
214 }
215}
216
217// double, for overdraw guard
218#define VRAM_SIZE (1024 * 512 * 2 * 2)
219
220static int map_vram(void)
221{
222 gpu.vram = gpu.mmap(VRAM_SIZE);
223 if (gpu.vram != NULL) {
224 gpu.vram += 4096 / 2;
225 return 0;
226 }
227 else {
228 fprintf(stderr, "could not map vram, expect crashes\n");
229 return -1;
230 }
231}
232
233long GPUinit(void)
234{
235 int ret;
236 ret = vout_init();
237 ret |= renderer_init();
238
239 memset(&gpu.state, 0, sizeof(gpu.state));
240 memset(&gpu.frameskip, 0, sizeof(gpu.frameskip));
241 gpu.zero = 0;
242 gpu.state.frame_count = &gpu.zero;
243 gpu.state.hcnt = &gpu.zero;
244 gpu.cmd_len = 0;
245 do_reset();
246
247 if (gpu.mmap != NULL) {
248 if (map_vram() != 0)
249 ret = -1;
250 }
251 return ret;
252}
253
254long GPUshutdown(void)
255{
256 long ret;
257
258 renderer_finish();
259 ret = vout_finish();
260 if (gpu.vram != NULL) {
261 gpu.vram -= 4096 / 2;
262 gpu.munmap(gpu.vram, VRAM_SIZE);
263 }
264 gpu.vram = NULL;
265
266 return ret;
267}
268
269void GPUwriteStatus(uint32_t data)
270{
271 uint32_t cmd = data >> 24;
272 int src_x, src_y;
273
274 if (cmd < ARRAY_SIZE(gpu.regs)) {
275 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
276 return;
277 gpu.regs[cmd] = data;
278 }
279
280 gpu.state.fb_dirty = 1;
281
282 switch (cmd) {
283 case 0x00:
284 do_reset();
285 break;
286 case 0x01:
287 do_cmd_reset();
288 break;
289 case 0x03:
290 if (data & 1) {
291 gpu.status |= PSX_GPU_STATUS_BLANKING;
292 gpu.state.dims_changed = 1; // for hud clearing
293 }
294 else
295 gpu.status &= ~PSX_GPU_STATUS_BLANKING;
296 break;
297 case 0x04:
298 gpu.status &= ~PSX_GPU_STATUS_DMA_MASK;
299 gpu.status |= PSX_GPU_STATUS_DMA(data & 3);
300 break;
301 case 0x05:
302 src_x = data & 0x3ff; src_y = (data >> 10) & 0x1ff;
303 if (src_x != gpu.screen.src_x || src_y != gpu.screen.src_y) {
304 gpu.screen.src_x = src_x;
305 gpu.screen.src_y = src_y;
306 renderer_notify_scanout_change(src_x, src_y);
307 if (gpu.frameskip.set) {
308 decide_frameskip_allow(gpu.ex_regs[3]);
309 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
310 decide_frameskip();
311 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
312 }
313 }
314 }
315 break;
316 case 0x06:
317 gpu.screen.x1 = data & 0xfff;
318 gpu.screen.x2 = (data >> 12) & 0xfff;
319 update_width();
320 break;
321 case 0x07:
322 gpu.screen.y1 = data & 0x3ff;
323 gpu.screen.y2 = (data >> 10) & 0x3ff;
324 update_height();
325 break;
326 case 0x08:
327 gpu.status = (gpu.status & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
328 update_width();
329 update_height();
330 renderer_notify_res_change();
331 break;
332 default:
333 if ((cmd & 0xf0) == 0x10)
334 get_gpu_info(data);
335 break;
336 }
337
338#ifdef GPUwriteStatus_ext
339 GPUwriteStatus_ext(data);
340#endif
341}
342
343const unsigned char cmd_lengths[256] =
344{
345 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
346 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
347 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
348 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
349 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
350 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
351 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
352 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
353 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 80
354 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
355 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // a0
356 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
357 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // c0
358 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
359 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
360 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
361};
362
363#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
364
365static void cpy_msb(uint16_t *dst, const uint16_t *src, int l, uint16_t msb)
366{
367 int i;
368 for (i = 0; i < l; i++)
369 dst[i] = src[i] | msb;
370}
371
372static inline void do_vram_line(int x, int y, uint16_t *mem, int l,
373 int is_read, uint16_t msb)
374{
375 uint16_t *vram = VRAM_MEM_XY(x, y);
376 if (unlikely(is_read))
377 memcpy(mem, vram, l * 2);
378 else if (unlikely(msb))
379 cpy_msb(vram, mem, l, msb);
380 else
381 memcpy(vram, mem, l * 2);
382}
383
384static int do_vram_io(uint32_t *data, int count, int is_read)
385{
386 int count_initial = count;
387 uint16_t msb = gpu.ex_regs[6] << 15;
388 uint16_t *sdata = (uint16_t *)data;
389 int x = gpu.dma.x, y = gpu.dma.y;
390 int w = gpu.dma.w, h = gpu.dma.h;
391 int o = gpu.dma.offset;
392 int l;
393 count *= 2; // operate in 16bpp pixels
394
395 if (gpu.dma.offset) {
396 l = w - gpu.dma.offset;
397 if (count < l)
398 l = count;
399
400 do_vram_line(x + o, y, sdata, l, is_read, msb);
401
402 if (o + l < w)
403 o += l;
404 else {
405 o = 0;
406 y++;
407 h--;
408 }
409 sdata += l;
410 count -= l;
411 }
412
413 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
414 y &= 511;
415 do_vram_line(x, y, sdata, w, is_read, msb);
416 }
417
418 if (h > 0) {
419 if (count > 0) {
420 y &= 511;
421 do_vram_line(x, y, sdata, count, is_read, msb);
422 o = count;
423 count = 0;
424 }
425 }
426 else
427 finish_vram_transfer(is_read);
428 gpu.dma.y = y;
429 gpu.dma.h = h;
430 gpu.dma.offset = o;
431
432 return count_initial - count / 2;
433}
434
435static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
436{
437 if (gpu.dma.h)
438 log_anomaly("start_vram_transfer while old unfinished\n");
439
440 gpu.dma.x = pos_word & 0x3ff;
441 gpu.dma.y = (pos_word >> 16) & 0x1ff;
442 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
443 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
444 gpu.dma.offset = 0;
445 gpu.dma.is_read = is_read;
446 gpu.dma_start = gpu.dma;
447
448 renderer_flush_queues();
449 if (is_read) {
450 gpu.status |= PSX_GPU_STATUS_IMG;
451 // XXX: wrong for width 1
452 gpu.gp0 = LE32TOH(*(uint32_t *) VRAM_MEM_XY(gpu.dma.x, gpu.dma.y));
453 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
454 }
455
456 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
457 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
458 if (gpu.gpu_state_change)
459 gpu.gpu_state_change(PGS_VRAM_TRANSFER_START);
460}
461
462static void finish_vram_transfer(int is_read)
463{
464 if (is_read)
465 gpu.status &= ~PSX_GPU_STATUS_IMG;
466 else {
467 gpu.state.fb_dirty = 1;
468 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
469 gpu.dma_start.w, gpu.dma_start.h, 0);
470 }
471 if (gpu.gpu_state_change)
472 gpu.gpu_state_change(PGS_VRAM_TRANSFER_END);
473}
474
475static void do_vram_copy(const uint32_t *params)
476{
477 const uint32_t sx = LE32TOH(params[0]) & 0x3FF;
478 const uint32_t sy = (LE32TOH(params[0]) >> 16) & 0x1FF;
479 const uint32_t dx = LE32TOH(params[1]) & 0x3FF;
480 const uint32_t dy = (LE32TOH(params[1]) >> 16) & 0x1FF;
481 uint32_t w = ((LE32TOH(params[2]) - 1) & 0x3FF) + 1;
482 uint32_t h = (((LE32TOH(params[2]) >> 16) - 1) & 0x1FF) + 1;
483 uint16_t msb = gpu.ex_regs[6] << 15;
484 uint16_t lbuf[128];
485 uint32_t x, y;
486
487 if (sx == dx && sy == dy && msb == 0)
488 return;
489
490 renderer_flush_queues();
491
492 if (unlikely((sx < dx && dx < sx + w) || sx + w > 1024 || dx + w > 1024 || msb))
493 {
494 for (y = 0; y < h; y++)
495 {
496 const uint16_t *src = VRAM_MEM_XY(0, (sy + y) & 0x1ff);
497 uint16_t *dst = VRAM_MEM_XY(0, (dy + y) & 0x1ff);
498 for (x = 0; x < w; x += ARRAY_SIZE(lbuf))
499 {
500 uint32_t x1, w1 = w - x;
501 if (w1 > ARRAY_SIZE(lbuf))
502 w1 = ARRAY_SIZE(lbuf);
503 for (x1 = 0; x1 < w1; x1++)
504 lbuf[x1] = src[(sx + x + x1) & 0x3ff];
505 for (x1 = 0; x1 < w1; x1++)
506 dst[(dx + x + x1) & 0x3ff] = lbuf[x1] | msb;
507 }
508 }
509 }
510 else
511 {
512 uint32_t sy1 = sy, dy1 = dy;
513 for (y = 0; y < h; y++, sy1++, dy1++)
514 memcpy(VRAM_MEM_XY(dx, dy1 & 0x1ff), VRAM_MEM_XY(sx, sy1 & 0x1ff), w * 2);
515 }
516
517 renderer_update_caches(dx, dy, w, h, 0);
518}
519
520static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
521{
522 int cmd = 0, pos = 0, len, dummy, v;
523 int skip = 1;
524
525 gpu.frameskip.pending_fill[0] = 0;
526
527 while (pos < count && skip) {
528 uint32_t *list = data + pos;
529 cmd = LE32TOH(list[0]) >> 24;
530 len = 1 + cmd_lengths[cmd];
531
532 switch (cmd) {
533 case 0x02:
534 if ((LE32TOH(list[2]) & 0x3ff) > gpu.screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu.screen.h)
535 // clearing something large, don't skip
536 do_cmd_list(list, 3, &dummy);
537 else
538 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
539 break;
540 case 0x24 ... 0x27:
541 case 0x2c ... 0x2f:
542 case 0x34 ... 0x37:
543 case 0x3c ... 0x3f:
544 gpu.ex_regs[1] &= ~0x1ff;
545 gpu.ex_regs[1] |= LE32TOH(list[4 + ((cmd >> 4) & 1)]) & 0x1ff;
546 break;
547 case 0x48 ... 0x4F:
548 for (v = 3; pos + v < count; v++)
549 {
550 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
551 break;
552 }
553 len += v - 3;
554 break;
555 case 0x58 ... 0x5F:
556 for (v = 4; pos + v < count; v += 2)
557 {
558 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
559 break;
560 }
561 len += v - 4;
562 break;
563 default:
564 if (cmd == 0xe3)
565 skip = decide_frameskip_allow(LE32TOH(list[0]));
566 if ((cmd & 0xf8) == 0xe0)
567 gpu.ex_regs[cmd & 7] = LE32TOH(list[0]);
568 break;
569 }
570
571 if (pos + len > count) {
572 cmd = -1;
573 break; // incomplete cmd
574 }
575 if (0x80 <= cmd && cmd <= 0xdf)
576 break; // image i/o
577
578 pos += len;
579 }
580
581 renderer_sync_ecmds(gpu.ex_regs);
582 *last_cmd = cmd;
583 return pos;
584}
585
586static noinline int do_cmd_buffer(uint32_t *data, int count)
587{
588 int cmd, pos;
589 uint32_t old_e3 = gpu.ex_regs[3];
590 int vram_dirty = 0;
591
592 // process buffer
593 for (pos = 0; pos < count; )
594 {
595 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
596 vram_dirty = 1;
597 pos += do_vram_io(data + pos, count - pos, 0);
598 if (pos == count)
599 break;
600 }
601
602 cmd = LE32TOH(data[pos]) >> 24;
603 if (0xa0 <= cmd && cmd <= 0xdf) {
604 if (unlikely((pos+2) >= count)) {
605 // incomplete vram write/read cmd, can't consume yet
606 cmd = -1;
607 break;
608 }
609
610 // consume vram write/read cmd
611 start_vram_transfer(LE32TOH(data[pos + 1]), LE32TOH(data[pos + 2]), (cmd & 0xe0) == 0xc0);
612 pos += 3;
613 continue;
614 }
615 else if ((cmd & 0xe0) == 0x80) {
616 if (unlikely((pos+3) >= count)) {
617 cmd = -1; // incomplete cmd, can't consume yet
618 break;
619 }
620 do_vram_copy(data + pos + 1);
621 vram_dirty = 1;
622 pos += 4;
623 continue;
624 }
625
626 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
627 if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0))
628 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
629 else {
630 pos += do_cmd_list(data + pos, count - pos, &cmd);
631 vram_dirty = 1;
632 }
633
634 if (cmd == -1)
635 // incomplete cmd
636 break;
637 }
638
639 gpu.status &= ~0x1fff;
640 gpu.status |= gpu.ex_regs[1] & 0x7ff;
641 gpu.status |= (gpu.ex_regs[6] & 3) << 11;
642
643 gpu.state.fb_dirty |= vram_dirty;
644
645 if (old_e3 != gpu.ex_regs[3])
646 decide_frameskip_allow(gpu.ex_regs[3]);
647
648 return count - pos;
649}
650
651static noinline void flush_cmd_buffer(void)
652{
653 int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
654 if (left > 0)
655 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
656 if (left != gpu.cmd_len) {
657 if (!gpu.dma.h && gpu.gpu_state_change)
658 gpu.gpu_state_change(PGS_PRIMITIVE_START);
659 gpu.cmd_len = left;
660 }
661}
662
663void GPUwriteDataMem(uint32_t *mem, int count)
664{
665 int left;
666
667 log_io("gpu_dma_write %p %d\n", mem, count);
668
669 if (unlikely(gpu.cmd_len > 0))
670 flush_cmd_buffer();
671
672 left = do_cmd_buffer(mem, count);
673 if (left)
674 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
675}
676
677void GPUwriteData(uint32_t data)
678{
679 log_io("gpu_write %08x\n", data);
680 gpu.cmd_buffer[gpu.cmd_len++] = HTOLE32(data);
681 if (gpu.cmd_len >= CMD_BUFFER_LEN)
682 flush_cmd_buffer();
683}
684
685long GPUdmaChain(uint32_t *rambase, uint32_t start_addr, uint32_t *progress_addr)
686{
687 uint32_t addr, *list, ld_addr = 0;
688 int len, left, count;
689 long cpu_cycles = 0;
690
691 preload(rambase + (start_addr & 0x1fffff) / 4);
692
693 if (unlikely(gpu.cmd_len > 0))
694 flush_cmd_buffer();
695
696 log_io("gpu_dma_chain\n");
697 addr = start_addr & 0xffffff;
698 for (count = 0; (addr & 0x800000) == 0; count++)
699 {
700 list = rambase + (addr & 0x1fffff) / 4;
701 len = LE32TOH(list[0]) >> 24;
702 addr = LE32TOH(list[0]) & 0xffffff;
703 preload(rambase + (addr & 0x1fffff) / 4);
704
705 cpu_cycles += 10;
706 if (len > 0)
707 cpu_cycles += 5 + len;
708
709 log_io(".chain %08lx #%d+%d\n",
710 (long)(list - rambase) * 4, len, gpu.cmd_len);
711 if (unlikely(gpu.cmd_len > 0)) {
712 if (gpu.cmd_len + len > ARRAY_SIZE(gpu.cmd_buffer)) {
713 log_anomaly("cmd_buffer overflow, likely garbage commands\n");
714 gpu.cmd_len = 0;
715 }
716 memcpy(gpu.cmd_buffer + gpu.cmd_len, list + 1, len * 4);
717 gpu.cmd_len += len;
718 flush_cmd_buffer();
719 continue;
720 }
721
722 if (len) {
723 left = do_cmd_buffer(list + 1, len);
724 if (left) {
725 memcpy(gpu.cmd_buffer, list + 1 + len - left, left * 4);
726 gpu.cmd_len = left;
727 log_anomaly("GPUdmaChain: %d/%d words left\n", left, len);
728 }
729 }
730
731 if (progress_addr) {
732 *progress_addr = addr;
733 break;
734 }
735 #define LD_THRESHOLD (8*1024)
736 if (count >= LD_THRESHOLD) {
737 if (count == LD_THRESHOLD) {
738 ld_addr = addr;
739 continue;
740 }
741
742 // loop detection marker
743 // (bit23 set causes DMA error on real machine, so
744 // unlikely to be ever set by the game)
745 list[0] |= HTOLE32(0x800000);
746 }
747 }
748
749 if (ld_addr != 0) {
750 // remove loop detection markers
751 count -= LD_THRESHOLD + 2;
752 addr = ld_addr & 0x1fffff;
753 while (count-- > 0) {
754 list = rambase + addr / 4;
755 addr = LE32TOH(list[0]) & 0x1fffff;
756 list[0] &= HTOLE32(~0x800000);
757 }
758 }
759
760 gpu.state.last_list.frame = *gpu.state.frame_count;
761 gpu.state.last_list.hcnt = *gpu.state.hcnt;
762 gpu.state.last_list.cycles = cpu_cycles;
763 gpu.state.last_list.addr = start_addr;
764
765 return cpu_cycles;
766}
767
768void GPUreadDataMem(uint32_t *mem, int count)
769{
770 log_io("gpu_dma_read %p %d\n", mem, count);
771
772 if (unlikely(gpu.cmd_len > 0))
773 flush_cmd_buffer();
774
775 if (gpu.dma.h)
776 do_vram_io(mem, count, 1);
777}
778
779uint32_t GPUreadData(void)
780{
781 uint32_t ret;
782
783 if (unlikely(gpu.cmd_len > 0))
784 flush_cmd_buffer();
785
786 ret = gpu.gp0;
787 if (gpu.dma.h) {
788 ret = HTOLE32(ret);
789 do_vram_io(&ret, 1, 1);
790 ret = LE32TOH(ret);
791 }
792
793 log_io("gpu_read %08x\n", ret);
794 return ret;
795}
796
797uint32_t GPUreadStatus(void)
798{
799 uint32_t ret;
800
801 if (unlikely(gpu.cmd_len > 0))
802 flush_cmd_buffer();
803
804 ret = gpu.status;
805 log_io("gpu_read_status %08x\n", ret);
806 return ret;
807}
808
809struct GPUFreeze
810{
811 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
812 uint32_t ulStatus; // current gpu status
813 uint32_t ulControl[256]; // latest control register values
814 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
815};
816
817long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
818{
819 int i;
820
821 switch (type) {
822 case 1: // save
823 if (gpu.cmd_len > 0)
824 flush_cmd_buffer();
825 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
826 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
827 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
828 freeze->ulStatus = gpu.status;
829 break;
830 case 0: // load
831 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
832 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
833 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
834 gpu.status = freeze->ulStatus;
835 gpu.cmd_len = 0;
836 for (i = 8; i > 0; i--) {
837 gpu.regs[i] ^= 1; // avoid reg change detection
838 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
839 }
840 renderer_sync_ecmds(gpu.ex_regs);
841 renderer_update_caches(0, 0, 1024, 512, 0);
842 break;
843 }
844
845 return 1;
846}
847
848void GPUupdateLace(void)
849{
850 if (gpu.cmd_len > 0)
851 flush_cmd_buffer();
852 renderer_flush_queues();
853
854#ifndef RAW_FB_DISPLAY
855 if (gpu.status & PSX_GPU_STATUS_BLANKING) {
856 if (!gpu.state.blanked) {
857 vout_blank();
858 gpu.state.blanked = 1;
859 gpu.state.fb_dirty = 1;
860 }
861 return;
862 }
863
864 if (!gpu.state.fb_dirty)
865 return;
866#endif
867
868 if (gpu.frameskip.set) {
869 if (!gpu.frameskip.frame_ready) {
870 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
871 return;
872 gpu.frameskip.active = 0;
873 }
874 gpu.frameskip.frame_ready = 0;
875 }
876
877 vout_update();
878 if (gpu.state.enhancement_active && !gpu.state.enhancement_was_active)
879 renderer_update_caches(0, 0, 1024, 512, 1);
880 gpu.state.enhancement_was_active = gpu.state.enhancement_active;
881 gpu.state.fb_dirty = 0;
882 gpu.state.blanked = 0;
883}
884
885void GPUvBlank(int is_vblank, int lcf)
886{
887 int interlace = gpu.state.allow_interlace
888 && (gpu.status & PSX_GPU_STATUS_INTERLACE)
889 && (gpu.status & PSX_GPU_STATUS_DHEIGHT);
890 // interlace doesn't look nice on progressive displays,
891 // so we have this "auto" mode here for games that don't read vram
892 if (gpu.state.allow_interlace == 2
893 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
894 {
895 interlace = 0;
896 }
897 if (interlace || interlace != gpu.state.old_interlace) {
898 gpu.state.old_interlace = interlace;
899
900 if (gpu.cmd_len > 0)
901 flush_cmd_buffer();
902 renderer_flush_queues();
903 renderer_set_interlace(interlace, !lcf);
904 }
905}
906
907void GPUgetScreenInfo(int *y, int *base_hres)
908{
909 *y = gpu.screen.y;
910 *base_hres = gpu.screen.vres;
911 if (gpu.status & PSX_GPU_STATUS_DHEIGHT)
912 *base_hres >>= 1;
913}
914
915void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
916{
917 gpu.frameskip.set = cbs->frameskip;
918 gpu.frameskip.advice = &cbs->fskip_advice;
919 gpu.frameskip.active = 0;
920 gpu.frameskip.frame_ready = 1;
921 gpu.state.hcnt = cbs->gpu_hcnt;
922 gpu.state.frame_count = cbs->gpu_frame_count;
923 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
924 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
925 gpu.state.screen_centering_type_default = cbs->screen_centering_type_default;
926 if (gpu.state.screen_centering_type != cbs->screen_centering_type
927 || gpu.state.screen_centering_x != cbs->screen_centering_x
928 || gpu.state.screen_centering_y != cbs->screen_centering_y) {
929 gpu.state.screen_centering_type = cbs->screen_centering_type;
930 gpu.state.screen_centering_x = cbs->screen_centering_x;
931 gpu.state.screen_centering_y = cbs->screen_centering_y;
932 update_width();
933 update_height();
934 }
935
936 gpu.mmap = cbs->mmap;
937 gpu.munmap = cbs->munmap;
938 gpu.gpu_state_change = cbs->gpu_state_change;
939
940 // delayed vram mmap
941 if (gpu.vram == NULL)
942 map_vram();
943
944 if (cbs->pl_vout_set_raw_vram)
945 cbs->pl_vout_set_raw_vram(gpu.vram);
946 renderer_set_config(cbs);
947 vout_set_config(cbs);
948}
949
950// vim:shiftwidth=2:expandtab