try some overscan display option
[pcsx_rearmed.git] / plugins / gpulib / gpu.c
... / ...
CommitLineData
1/*
2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
11#include <stdio.h>
12#include <stdlib.h>
13#include <string.h>
14#include "gpu.h"
15#include "gpu_timing.h"
16#include "../../libpcsxcore/gpu.h" // meh
17#include "../../frontend/plugin_lib.h"
18
19#ifndef ARRAY_SIZE
20#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
21#endif
22#ifdef __GNUC__
23#define unlikely(x) __builtin_expect((x), 0)
24#define preload __builtin_prefetch
25#define noinline __attribute__((noinline))
26#else
27#define unlikely(x)
28#define preload(...)
29#define noinline
30#endif
31
32//#define log_io gpu_log
33#define log_io(...)
34
35struct psx_gpu gpu;
36
37static noinline int do_cmd_buffer(uint32_t *data, int count,
38 int *cycles_sum, int *cycles_last);
39static void finish_vram_transfer(int is_read);
40
41static noinline void do_cmd_reset(void)
42{
43 int dummy = 0;
44 if (unlikely(gpu.cmd_len > 0))
45 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len, &dummy, &dummy);
46 gpu.cmd_len = 0;
47
48 if (unlikely(gpu.dma.h > 0))
49 finish_vram_transfer(gpu.dma_start.is_read);
50 gpu.dma.h = 0;
51}
52
53static noinline void do_reset(void)
54{
55 unsigned int i;
56
57 do_cmd_reset();
58
59 memset(gpu.regs, 0, sizeof(gpu.regs));
60 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
61 gpu.ex_regs[i] = (0xe0 + i) << 24;
62 gpu.status = 0x14802000;
63 gpu.gp0 = 0;
64 gpu.regs[3] = 1;
65 gpu.screen.hres = gpu.screen.w = 256;
66 gpu.screen.vres = gpu.screen.h = 240;
67 gpu.screen.x = gpu.screen.y = 0;
68 renderer_sync_ecmds(gpu.ex_regs);
69 renderer_notify_res_change();
70}
71
72static noinline void update_width(void)
73{
74 static const short hres_all[8] = { 256, 368, 320, 368, 512, 368, 640, 368 };
75 static const uint8_t hdivs[8] = { 10, 7, 8, 7, 5, 7, 4, 7 };
76 uint8_t hdiv = hdivs[(gpu.status >> 16) & 7];
77 int hres = hres_all[(gpu.status >> 16) & 7];
78 int pal = gpu.status & PSX_GPU_STATUS_PAL;
79 int sw = gpu.screen.x2 - gpu.screen.x1;
80 int type = gpu.state.screen_centering_type;
81 int x = 0, x_auto;
82 if (type == C_AUTO)
83 type = gpu.state.screen_centering_type_default;
84 if (sw <= 0)
85 /* nothing displayed? */;
86 else {
87 int s = pal ? 656 : 608; // or 600? pal is just a guess
88 x = (gpu.screen.x1 - s) / hdiv;
89 x = (x + 1) & ~1; // blitter limitation
90 sw /= hdiv;
91 sw = (sw + 2) & ~3; // according to nocash
92
93 if (gpu.state.show_overscan == 2) // widescreen hack
94 sw = (sw + 63) & ~63;
95 if (gpu.state.show_overscan && sw >= hres)
96 x = 0, hres = sw;
97 switch (type) {
98 case C_INGAME:
99 break;
100 case C_MANUAL:
101 x = gpu.state.screen_centering_x;
102 break;
103 default:
104 // correct if slightly miscentered
105 x_auto = (hres - sw) / 2 & ~3;
106 if ((uint32_t)x_auto <= 8u && abs(x) < 24)
107 x = x_auto;
108 }
109 if (x + sw > hres)
110 sw = hres - x;
111 // .x range check is done in vout_update()
112 }
113 // reduce the unpleasant right border that a few games have
114 if (gpu.state.screen_centering_type == 0
115 && x <= 4 && hres - (x + sw) >= 4)
116 hres -= 4;
117 gpu.screen.x = x;
118 gpu.screen.w = sw;
119 gpu.screen.hres = hres;
120 gpu.state.dims_changed = 1;
121 //printf("xx %d %d (%d) -> %2d, %d / %d\n", gpu.screen.x1,
122 // gpu.screen.x2, gpu.screen.x2 - gpu.screen.x1, x, sw, hres);
123}
124
125static noinline void update_height(void)
126{
127 int pal = gpu.status & PSX_GPU_STATUS_PAL;
128 int dheight = gpu.status & PSX_GPU_STATUS_DHEIGHT;
129 int y = gpu.screen.y1 - (pal ? 39 : 16); // 39 for spyro
130 int sh = gpu.screen.y2 - gpu.screen.y1;
131 int center_tol = 16;
132 int vres = 240;
133
134 if (pal && (sh > 240 || gpu.screen.vres == 256))
135 vres = 256;
136 if (dheight)
137 y *= 2, sh *= 2, vres *= 2, center_tol *= 2;
138 if (sh <= 0)
139 /* nothing displayed? */;
140 else {
141 switch (gpu.state.screen_centering_type) {
142 case C_INGAME:
143 break;
144 case C_BORDERLESS:
145 y = 0;
146 break;
147 case C_MANUAL:
148 y = gpu.state.screen_centering_y;
149 break;
150 default:
151 // correct if slightly miscentered
152 if ((uint32_t)(vres - sh) <= 1 && abs(y) <= center_tol)
153 y = 0;
154 }
155 if (y + sh > vres)
156 sh = vres - y;
157 }
158 gpu.screen.y = y;
159 gpu.screen.h = sh;
160 gpu.screen.vres = vres;
161 gpu.state.dims_changed = 1;
162 //printf("yy %d %d -> %d, %d / %d\n",
163 // gpu.screen.y1, gpu.screen.y2, y, sh, vres);
164}
165
166static noinline void decide_frameskip(void)
167{
168 if (gpu.frameskip.active)
169 gpu.frameskip.cnt++;
170 else {
171 gpu.frameskip.cnt = 0;
172 gpu.frameskip.frame_ready = 1;
173 }
174
175 if (!gpu.frameskip.active && *gpu.frameskip.advice)
176 gpu.frameskip.active = 1;
177 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
178 gpu.frameskip.active = 1;
179 else
180 gpu.frameskip.active = 0;
181
182 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
183 int dummy = 0;
184 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy, &dummy, &dummy);
185 gpu.frameskip.pending_fill[0] = 0;
186 }
187}
188
189static noinline int decide_frameskip_allow(uint32_t cmd_e3)
190{
191 // no frameskip if it decides to draw to display area,
192 // but not for interlace since it'll most likely always do that
193 uint32_t x = cmd_e3 & 0x3ff;
194 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
195 gpu.frameskip.allow = (gpu.status & PSX_GPU_STATUS_INTERLACE) ||
196 (uint32_t)(x - gpu.screen.src_x) >= (uint32_t)gpu.screen.w ||
197 (uint32_t)(y - gpu.screen.src_y) >= (uint32_t)gpu.screen.h;
198 return gpu.frameskip.allow;
199}
200
201static void flush_cmd_buffer(void);
202
203static noinline void get_gpu_info(uint32_t data)
204{
205 if (unlikely(gpu.cmd_len > 0))
206 flush_cmd_buffer();
207 switch (data & 0x0f) {
208 case 0x02:
209 case 0x03:
210 case 0x04:
211 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
212 break;
213 case 0x05:
214 gpu.gp0 = gpu.ex_regs[5] & 0x3fffff;
215 break;
216 case 0x07:
217 gpu.gp0 = 2;
218 break;
219 default:
220 // gpu.gp0 unchanged
221 break;
222 }
223}
224
225// double, for overdraw guard
226#define VRAM_SIZE (1024 * 512 * 2 * 2)
227
228static int map_vram(void)
229{
230 gpu.vram = gpu.mmap(VRAM_SIZE);
231 if (gpu.vram != NULL) {
232 gpu.vram += 4096 / 2;
233 return 0;
234 }
235 else {
236 fprintf(stderr, "could not map vram, expect crashes\n");
237 return -1;
238 }
239}
240
241long GPUinit(void)
242{
243 int ret;
244 ret = vout_init();
245 ret |= renderer_init();
246
247 memset(&gpu.state, 0, sizeof(gpu.state));
248 memset(&gpu.frameskip, 0, sizeof(gpu.frameskip));
249 gpu.zero = 0;
250 gpu.state.frame_count = &gpu.zero;
251 gpu.state.hcnt = &gpu.zero;
252 gpu.cmd_len = 0;
253 do_reset();
254
255 if (gpu.mmap != NULL) {
256 if (map_vram() != 0)
257 ret = -1;
258 }
259 return ret;
260}
261
262long GPUshutdown(void)
263{
264 long ret;
265
266 renderer_finish();
267 ret = vout_finish();
268 if (gpu.vram != NULL) {
269 gpu.vram -= 4096 / 2;
270 gpu.munmap(gpu.vram, VRAM_SIZE);
271 }
272 gpu.vram = NULL;
273
274 return ret;
275}
276
277void GPUwriteStatus(uint32_t data)
278{
279 uint32_t cmd = data >> 24;
280 int src_x, src_y;
281
282 if (cmd < ARRAY_SIZE(gpu.regs)) {
283 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
284 return;
285 gpu.regs[cmd] = data;
286 }
287
288 gpu.state.fb_dirty = 1;
289
290 switch (cmd) {
291 case 0x00:
292 do_reset();
293 break;
294 case 0x01:
295 do_cmd_reset();
296 break;
297 case 0x03:
298 if (data & 1) {
299 gpu.status |= PSX_GPU_STATUS_BLANKING;
300 gpu.state.dims_changed = 1; // for hud clearing
301 }
302 else
303 gpu.status &= ~PSX_GPU_STATUS_BLANKING;
304 break;
305 case 0x04:
306 gpu.status &= ~PSX_GPU_STATUS_DMA_MASK;
307 gpu.status |= PSX_GPU_STATUS_DMA(data & 3);
308 break;
309 case 0x05:
310 src_x = data & 0x3ff; src_y = (data >> 10) & 0x1ff;
311 if (src_x != gpu.screen.src_x || src_y != gpu.screen.src_y) {
312 gpu.screen.src_x = src_x;
313 gpu.screen.src_y = src_y;
314 renderer_notify_scanout_change(src_x, src_y);
315 if (gpu.frameskip.set) {
316 decide_frameskip_allow(gpu.ex_regs[3]);
317 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
318 decide_frameskip();
319 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
320 }
321 }
322 }
323 break;
324 case 0x06:
325 gpu.screen.x1 = data & 0xfff;
326 gpu.screen.x2 = (data >> 12) & 0xfff;
327 update_width();
328 break;
329 case 0x07:
330 gpu.screen.y1 = data & 0x3ff;
331 gpu.screen.y2 = (data >> 10) & 0x3ff;
332 update_height();
333 break;
334 case 0x08:
335 gpu.status = (gpu.status & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
336 update_width();
337 update_height();
338 renderer_notify_res_change();
339 break;
340 default:
341 if ((cmd & 0xf0) == 0x10)
342 get_gpu_info(data);
343 break;
344 }
345
346#ifdef GPUwriteStatus_ext
347 GPUwriteStatus_ext(data);
348#endif
349}
350
351const unsigned char cmd_lengths[256] =
352{
353 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
354 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
355 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
356 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
357 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
358 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
359 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
360 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
361 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 80
362 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
363 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // a0
364 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
365 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // c0
366 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
367 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
368 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
369};
370
371#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
372
373static void cpy_msb(uint16_t *dst, const uint16_t *src, int l, uint16_t msb)
374{
375 int i;
376 for (i = 0; i < l; i++)
377 dst[i] = src[i] | msb;
378}
379
380static inline void do_vram_line(int x, int y, uint16_t *mem, int l,
381 int is_read, uint16_t msb)
382{
383 uint16_t *vram = VRAM_MEM_XY(x, y);
384 if (unlikely(is_read))
385 memcpy(mem, vram, l * 2);
386 else if (unlikely(msb))
387 cpy_msb(vram, mem, l, msb);
388 else
389 memcpy(vram, mem, l * 2);
390}
391
392static int do_vram_io(uint32_t *data, int count, int is_read)
393{
394 int count_initial = count;
395 uint16_t msb = gpu.ex_regs[6] << 15;
396 uint16_t *sdata = (uint16_t *)data;
397 int x = gpu.dma.x, y = gpu.dma.y;
398 int w = gpu.dma.w, h = gpu.dma.h;
399 int o = gpu.dma.offset;
400 int l;
401 count *= 2; // operate in 16bpp pixels
402
403 if (gpu.dma.offset) {
404 l = w - gpu.dma.offset;
405 if (count < l)
406 l = count;
407
408 do_vram_line(x + o, y, sdata, l, is_read, msb);
409
410 if (o + l < w)
411 o += l;
412 else {
413 o = 0;
414 y++;
415 h--;
416 }
417 sdata += l;
418 count -= l;
419 }
420
421 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
422 y &= 511;
423 do_vram_line(x, y, sdata, w, is_read, msb);
424 }
425
426 if (h > 0) {
427 if (count > 0) {
428 y &= 511;
429 do_vram_line(x, y, sdata, count, is_read, msb);
430 o = count;
431 count = 0;
432 }
433 }
434 else
435 finish_vram_transfer(is_read);
436 gpu.dma.y = y;
437 gpu.dma.h = h;
438 gpu.dma.offset = o;
439
440 return count_initial - count / 2;
441}
442
443static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
444{
445 if (gpu.dma.h)
446 log_anomaly("start_vram_transfer while old unfinished\n");
447
448 gpu.dma.x = pos_word & 0x3ff;
449 gpu.dma.y = (pos_word >> 16) & 0x1ff;
450 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
451 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
452 gpu.dma.offset = 0;
453 gpu.dma.is_read = is_read;
454 gpu.dma_start = gpu.dma;
455
456 renderer_flush_queues();
457 if (is_read) {
458 gpu.status |= PSX_GPU_STATUS_IMG;
459 // XXX: wrong for width 1
460 gpu.gp0 = LE32TOH(*(uint32_t *) VRAM_MEM_XY(gpu.dma.x, gpu.dma.y));
461 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
462 }
463
464 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
465 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
466 if (gpu.gpu_state_change)
467 gpu.gpu_state_change(PGS_VRAM_TRANSFER_START);
468}
469
470static void finish_vram_transfer(int is_read)
471{
472 if (is_read)
473 gpu.status &= ~PSX_GPU_STATUS_IMG;
474 else {
475 gpu.state.fb_dirty = 1;
476 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
477 gpu.dma_start.w, gpu.dma_start.h, 0);
478 }
479 if (gpu.gpu_state_change)
480 gpu.gpu_state_change(PGS_VRAM_TRANSFER_END);
481}
482
483static void do_vram_copy(const uint32_t *params, int *cpu_cycles)
484{
485 const uint32_t sx = LE32TOH(params[0]) & 0x3FF;
486 const uint32_t sy = (LE32TOH(params[0]) >> 16) & 0x1FF;
487 const uint32_t dx = LE32TOH(params[1]) & 0x3FF;
488 const uint32_t dy = (LE32TOH(params[1]) >> 16) & 0x1FF;
489 uint32_t w = ((LE32TOH(params[2]) - 1) & 0x3FF) + 1;
490 uint32_t h = (((LE32TOH(params[2]) >> 16) - 1) & 0x1FF) + 1;
491 uint16_t msb = gpu.ex_regs[6] << 15;
492 uint16_t lbuf[128];
493 uint32_t x, y;
494
495 *cpu_cycles += gput_copy(w, h);
496 if (sx == dx && sy == dy && msb == 0)
497 return;
498
499 renderer_flush_queues();
500
501 if (unlikely((sx < dx && dx < sx + w) || sx + w > 1024 || dx + w > 1024 || msb))
502 {
503 for (y = 0; y < h; y++)
504 {
505 const uint16_t *src = VRAM_MEM_XY(0, (sy + y) & 0x1ff);
506 uint16_t *dst = VRAM_MEM_XY(0, (dy + y) & 0x1ff);
507 for (x = 0; x < w; x += ARRAY_SIZE(lbuf))
508 {
509 uint32_t x1, w1 = w - x;
510 if (w1 > ARRAY_SIZE(lbuf))
511 w1 = ARRAY_SIZE(lbuf);
512 for (x1 = 0; x1 < w1; x1++)
513 lbuf[x1] = src[(sx + x + x1) & 0x3ff];
514 for (x1 = 0; x1 < w1; x1++)
515 dst[(dx + x + x1) & 0x3ff] = lbuf[x1] | msb;
516 }
517 }
518 }
519 else
520 {
521 uint32_t sy1 = sy, dy1 = dy;
522 for (y = 0; y < h; y++, sy1++, dy1++)
523 memcpy(VRAM_MEM_XY(dx, dy1 & 0x1ff), VRAM_MEM_XY(sx, sy1 & 0x1ff), w * 2);
524 }
525
526 renderer_update_caches(dx, dy, w, h, 0);
527}
528
529static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
530{
531 int cmd = 0, pos = 0, len, dummy = 0, v;
532 int skip = 1;
533
534 gpu.frameskip.pending_fill[0] = 0;
535
536 while (pos < count && skip) {
537 uint32_t *list = data + pos;
538 cmd = LE32TOH(list[0]) >> 24;
539 len = 1 + cmd_lengths[cmd];
540 if (pos + len > count) {
541 cmd = -1;
542 break; // incomplete cmd
543 }
544
545 switch (cmd) {
546 case 0x02:
547 if ((LE32TOH(list[2]) & 0x3ff) > gpu.screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu.screen.h)
548 // clearing something large, don't skip
549 do_cmd_list(list, 3, &dummy, &dummy, &dummy);
550 else
551 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
552 break;
553 case 0x24 ... 0x27:
554 case 0x2c ... 0x2f:
555 case 0x34 ... 0x37:
556 case 0x3c ... 0x3f:
557 gpu.ex_regs[1] &= ~0x1ff;
558 gpu.ex_regs[1] |= LE32TOH(list[4 + ((cmd >> 4) & 1)]) & 0x1ff;
559 break;
560 case 0x48 ... 0x4F:
561 for (v = 3; pos + v < count; v++)
562 {
563 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
564 break;
565 }
566 len += v - 3;
567 break;
568 case 0x58 ... 0x5F:
569 for (v = 4; pos + v < count; v += 2)
570 {
571 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
572 break;
573 }
574 len += v - 4;
575 break;
576 default:
577 if (cmd == 0xe3)
578 skip = decide_frameskip_allow(LE32TOH(list[0]));
579 if ((cmd & 0xf8) == 0xe0)
580 gpu.ex_regs[cmd & 7] = LE32TOH(list[0]);
581 break;
582 }
583 if (0x80 <= cmd && cmd <= 0xdf)
584 break; // image i/o
585
586 pos += len;
587 }
588
589 renderer_sync_ecmds(gpu.ex_regs);
590 *last_cmd = cmd;
591 return pos;
592}
593
594static noinline int do_cmd_buffer(uint32_t *data, int count,
595 int *cycles_sum, int *cycles_last)
596{
597 int cmd, pos;
598 uint32_t old_e3 = gpu.ex_regs[3];
599 int vram_dirty = 0;
600
601 // process buffer
602 for (pos = 0; pos < count; )
603 {
604 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
605 vram_dirty = 1;
606 pos += do_vram_io(data + pos, count - pos, 0);
607 if (pos == count)
608 break;
609 }
610
611 cmd = LE32TOH(data[pos]) >> 24;
612 if (0xa0 <= cmd && cmd <= 0xdf) {
613 if (unlikely((pos+2) >= count)) {
614 // incomplete vram write/read cmd, can't consume yet
615 cmd = -1;
616 break;
617 }
618
619 // consume vram write/read cmd
620 start_vram_transfer(LE32TOH(data[pos + 1]), LE32TOH(data[pos + 2]), (cmd & 0xe0) == 0xc0);
621 pos += 3;
622 continue;
623 }
624 else if ((cmd & 0xe0) == 0x80) {
625 if (unlikely((pos+3) >= count)) {
626 cmd = -1; // incomplete cmd, can't consume yet
627 break;
628 }
629 *cycles_sum += *cycles_last;
630 *cycles_last = 0;
631 do_vram_copy(data + pos + 1, cycles_last);
632 vram_dirty = 1;
633 pos += 4;
634 continue;
635 }
636 else if (cmd == 0x1f) {
637 log_anomaly("irq1?\n");
638 pos++;
639 continue;
640 }
641
642 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
643 if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0))
644 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
645 else {
646 pos += do_cmd_list(data + pos, count - pos, cycles_sum, cycles_last, &cmd);
647 vram_dirty = 1;
648 }
649
650 if (cmd == -1)
651 // incomplete cmd
652 break;
653 }
654
655 gpu.status &= ~0x1fff;
656 gpu.status |= gpu.ex_regs[1] & 0x7ff;
657 gpu.status |= (gpu.ex_regs[6] & 3) << 11;
658
659 gpu.state.fb_dirty |= vram_dirty;
660
661 if (old_e3 != gpu.ex_regs[3])
662 decide_frameskip_allow(gpu.ex_regs[3]);
663
664 return count - pos;
665}
666
667static noinline void flush_cmd_buffer(void)
668{
669 int dummy = 0, left;
670 left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len, &dummy, &dummy);
671 if (left > 0)
672 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
673 if (left != gpu.cmd_len) {
674 if (!gpu.dma.h && gpu.gpu_state_change)
675 gpu.gpu_state_change(PGS_PRIMITIVE_START);
676 gpu.cmd_len = left;
677 }
678}
679
680void GPUwriteDataMem(uint32_t *mem, int count)
681{
682 int dummy = 0, left;
683
684 log_io("gpu_dma_write %p %d\n", mem, count);
685
686 if (unlikely(gpu.cmd_len > 0))
687 flush_cmd_buffer();
688
689 left = do_cmd_buffer(mem, count, &dummy, &dummy);
690 if (left)
691 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
692}
693
694void GPUwriteData(uint32_t data)
695{
696 log_io("gpu_write %08x\n", data);
697 gpu.cmd_buffer[gpu.cmd_len++] = HTOLE32(data);
698 if (gpu.cmd_len >= CMD_BUFFER_LEN)
699 flush_cmd_buffer();
700}
701
702long GPUdmaChain(uint32_t *rambase, uint32_t start_addr,
703 uint32_t *progress_addr, int32_t *cycles_last_cmd)
704{
705 uint32_t addr, *list, ld_addr;
706 int len, left, count, ld_count = 32;
707 int cpu_cycles_sum = 0;
708 int cpu_cycles_last = 0;
709
710 preload(rambase + (start_addr & 0x1fffff) / 4);
711
712 if (unlikely(gpu.cmd_len > 0))
713 flush_cmd_buffer();
714
715 log_io("gpu_dma_chain\n");
716 addr = ld_addr = start_addr & 0xffffff;
717 for (count = 0; (addr & 0x800000) == 0; count++)
718 {
719 list = rambase + (addr & 0x1fffff) / 4;
720 len = LE32TOH(list[0]) >> 24;
721 addr = LE32TOH(list[0]) & 0xffffff;
722 preload(rambase + (addr & 0x1fffff) / 4);
723
724 cpu_cycles_sum += 10;
725 if (len > 0)
726 cpu_cycles_sum += 5 + len;
727
728 log_io(".chain %08lx #%d+%d %u+%u\n",
729 (long)(list - rambase) * 4, len, gpu.cmd_len, cpu_cycles_sum, cpu_cycles_last);
730 if (unlikely(gpu.cmd_len > 0)) {
731 if (gpu.cmd_len + len > ARRAY_SIZE(gpu.cmd_buffer)) {
732 log_anomaly("cmd_buffer overflow, likely garbage commands\n");
733 gpu.cmd_len = 0;
734 }
735 memcpy(gpu.cmd_buffer + gpu.cmd_len, list + 1, len * 4);
736 gpu.cmd_len += len;
737 flush_cmd_buffer();
738 continue;
739 }
740
741 if (len) {
742 left = do_cmd_buffer(list + 1, len, &cpu_cycles_sum, &cpu_cycles_last);
743 if (left) {
744 memcpy(gpu.cmd_buffer, list + 1 + len - left, left * 4);
745 gpu.cmd_len = left;
746 log_anomaly("GPUdmaChain: %d/%d words left\n", left, len);
747 }
748 }
749
750 if (progress_addr) {
751 *progress_addr = addr;
752 break;
753 }
754 if (addr == ld_addr) {
755 log_anomaly("GPUdmaChain: loop @ %08x, cnt=%u\n", addr, count);
756 break;
757 }
758 if (count == ld_count) {
759 ld_addr = addr;
760 ld_count *= 2;
761 }
762 }
763
764 //printf(" -> %d %d\n", cpu_cycles_sum, cpu_cycles_last);
765 gpu.state.last_list.frame = *gpu.state.frame_count;
766 gpu.state.last_list.hcnt = *gpu.state.hcnt;
767 gpu.state.last_list.cycles = cpu_cycles_sum + cpu_cycles_last;
768 gpu.state.last_list.addr = start_addr;
769
770 *cycles_last_cmd = cpu_cycles_last;
771 return cpu_cycles_sum;
772}
773
774void GPUreadDataMem(uint32_t *mem, int count)
775{
776 log_io("gpu_dma_read %p %d\n", mem, count);
777
778 if (unlikely(gpu.cmd_len > 0))
779 flush_cmd_buffer();
780
781 if (gpu.dma.h)
782 do_vram_io(mem, count, 1);
783}
784
785uint32_t GPUreadData(void)
786{
787 uint32_t ret;
788
789 if (unlikely(gpu.cmd_len > 0))
790 flush_cmd_buffer();
791
792 ret = gpu.gp0;
793 if (gpu.dma.h) {
794 ret = HTOLE32(ret);
795 do_vram_io(&ret, 1, 1);
796 ret = LE32TOH(ret);
797 }
798
799 log_io("gpu_read %08x\n", ret);
800 return ret;
801}
802
803uint32_t GPUreadStatus(void)
804{
805 uint32_t ret;
806
807 if (unlikely(gpu.cmd_len > 0))
808 flush_cmd_buffer();
809
810 ret = gpu.status;
811 log_io("gpu_read_status %08x\n", ret);
812 return ret;
813}
814
815struct GPUFreeze
816{
817 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
818 uint32_t ulStatus; // current gpu status
819 uint32_t ulControl[256]; // latest control register values
820 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
821};
822
823long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
824{
825 int i;
826
827 switch (type) {
828 case 1: // save
829 if (gpu.cmd_len > 0)
830 flush_cmd_buffer();
831 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
832 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
833 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
834 freeze->ulStatus = gpu.status;
835 break;
836 case 0: // load
837 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
838 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
839 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
840 gpu.status = freeze->ulStatus;
841 gpu.cmd_len = 0;
842 for (i = 8; i > 0; i--) {
843 gpu.regs[i] ^= 1; // avoid reg change detection
844 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
845 }
846 renderer_sync_ecmds(gpu.ex_regs);
847 renderer_update_caches(0, 0, 1024, 512, 0);
848 break;
849 }
850
851 return 1;
852}
853
854void GPUupdateLace(void)
855{
856 if (gpu.cmd_len > 0)
857 flush_cmd_buffer();
858 renderer_flush_queues();
859
860#ifndef RAW_FB_DISPLAY
861 if (gpu.status & PSX_GPU_STATUS_BLANKING) {
862 if (!gpu.state.blanked) {
863 vout_blank();
864 gpu.state.blanked = 1;
865 gpu.state.fb_dirty = 1;
866 }
867 return;
868 }
869
870 if (!gpu.state.fb_dirty)
871 return;
872#endif
873
874 if (gpu.frameskip.set) {
875 if (!gpu.frameskip.frame_ready) {
876 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
877 return;
878 gpu.frameskip.active = 0;
879 }
880 gpu.frameskip.frame_ready = 0;
881 }
882
883 vout_update();
884 if (gpu.state.enhancement_active && !gpu.state.enhancement_was_active)
885 renderer_update_caches(0, 0, 1024, 512, 1);
886 gpu.state.enhancement_was_active = gpu.state.enhancement_active;
887 gpu.state.fb_dirty = 0;
888 gpu.state.blanked = 0;
889}
890
891void GPUvBlank(int is_vblank, int lcf)
892{
893 int interlace = gpu.state.allow_interlace
894 && (gpu.status & PSX_GPU_STATUS_INTERLACE)
895 && (gpu.status & PSX_GPU_STATUS_DHEIGHT);
896 // interlace doesn't look nice on progressive displays,
897 // so we have this "auto" mode here for games that don't read vram
898 if (gpu.state.allow_interlace == 2
899 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
900 {
901 interlace = 0;
902 }
903 if (interlace || interlace != gpu.state.old_interlace) {
904 gpu.state.old_interlace = interlace;
905
906 if (gpu.cmd_len > 0)
907 flush_cmd_buffer();
908 renderer_flush_queues();
909 renderer_set_interlace(interlace, !lcf);
910 }
911}
912
913void GPUgetScreenInfo(int *y, int *base_hres)
914{
915 *y = gpu.screen.y;
916 *base_hres = gpu.screen.vres;
917 if (gpu.status & PSX_GPU_STATUS_DHEIGHT)
918 *base_hres >>= 1;
919}
920
921void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
922{
923 gpu.frameskip.set = cbs->frameskip;
924 gpu.frameskip.advice = &cbs->fskip_advice;
925 gpu.frameskip.active = 0;
926 gpu.frameskip.frame_ready = 1;
927 gpu.state.hcnt = (uint32_t *)cbs->gpu_hcnt;
928 gpu.state.frame_count = (uint32_t *)cbs->gpu_frame_count;
929 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
930 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
931 gpu.state.screen_centering_type_default = cbs->screen_centering_type_default;
932 if (gpu.state.screen_centering_type != cbs->screen_centering_type
933 || gpu.state.screen_centering_x != cbs->screen_centering_x
934 || gpu.state.screen_centering_y != cbs->screen_centering_y
935 || gpu.state.show_overscan != cbs->show_overscan) {
936 gpu.state.screen_centering_type = cbs->screen_centering_type;
937 gpu.state.screen_centering_x = cbs->screen_centering_x;
938 gpu.state.screen_centering_y = cbs->screen_centering_y;
939 gpu.state.show_overscan = cbs->show_overscan;
940 update_width();
941 update_height();
942 }
943
944 gpu.mmap = cbs->mmap;
945 gpu.munmap = cbs->munmap;
946 gpu.gpu_state_change = cbs->gpu_state_change;
947
948 // delayed vram mmap
949 if (gpu.vram == NULL)
950 map_vram();
951
952 if (cbs->pl_vout_set_raw_vram)
953 cbs->pl_vout_set_raw_vram(gpu.vram);
954 renderer_set_config(cbs);
955 vout_set_config(cbs);
956}
957
958// vim:shiftwidth=2:expandtab