gpu: rework dma vs busy timing
[pcsx_rearmed.git] / plugins / gpulib / gpu.c
... / ...
CommitLineData
1/*
2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
11#include <stdio.h>
12#include <stdlib.h>
13#include <string.h>
14#include "gpu.h"
15#include "gpu_timing.h"
16#include "../../libpcsxcore/gpu.h" // meh
17#include "../../frontend/plugin_lib.h"
18
19#ifndef ARRAY_SIZE
20#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
21#endif
22#ifdef __GNUC__
23#define unlikely(x) __builtin_expect((x), 0)
24#define preload __builtin_prefetch
25#define noinline __attribute__((noinline))
26#else
27#define unlikely(x)
28#define preload(...)
29#define noinline
30#endif
31
32//#define log_io gpu_log
33#define log_io(...)
34
35struct psx_gpu gpu;
36
37static noinline int do_cmd_buffer(uint32_t *data, int count,
38 int *cycles_sum, int *cycles_last);
39static void finish_vram_transfer(int is_read);
40
41static noinline void do_cmd_reset(void)
42{
43 int dummy = 0;
44 if (unlikely(gpu.cmd_len > 0))
45 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len, &dummy, &dummy);
46 gpu.cmd_len = 0;
47
48 if (unlikely(gpu.dma.h > 0))
49 finish_vram_transfer(gpu.dma_start.is_read);
50 gpu.dma.h = 0;
51}
52
53static noinline void do_reset(void)
54{
55 unsigned int i;
56
57 do_cmd_reset();
58
59 memset(gpu.regs, 0, sizeof(gpu.regs));
60 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
61 gpu.ex_regs[i] = (0xe0 + i) << 24;
62 gpu.status = 0x14802000;
63 gpu.gp0 = 0;
64 gpu.regs[3] = 1;
65 gpu.screen.hres = gpu.screen.w = 256;
66 gpu.screen.vres = gpu.screen.h = 240;
67 gpu.screen.x = gpu.screen.y = 0;
68 renderer_sync_ecmds(gpu.ex_regs);
69 renderer_notify_res_change();
70}
71
72static noinline void update_width(void)
73{
74 static const short hres_all[8] = { 256, 368, 320, 368, 512, 368, 640, 368 };
75 static const uint8_t hdivs[8] = { 10, 7, 8, 7, 5, 7, 4, 7 };
76 uint8_t hdiv = hdivs[(gpu.status >> 16) & 7];
77 int hres = hres_all[(gpu.status >> 16) & 7];
78 int pal = gpu.status & PSX_GPU_STATUS_PAL;
79 int sw = gpu.screen.x2 - gpu.screen.x1;
80 int type = gpu.state.screen_centering_type;
81 int x = 0, x_auto;
82 if (type == C_AUTO)
83 type = gpu.state.screen_centering_type_default;
84 if (sw <= 0)
85 /* nothing displayed? */;
86 else {
87 int s = pal ? 656 : 608; // or 600? pal is just a guess
88 x = (gpu.screen.x1 - s) / hdiv;
89 x = (x + 1) & ~1; // blitter limitation
90 sw /= hdiv;
91 sw = (sw + 2) & ~3; // according to nocash
92 switch (type) {
93 case C_INGAME:
94 break;
95 case C_MANUAL:
96 x = gpu.state.screen_centering_x;
97 break;
98 default:
99 // correct if slightly miscentered
100 x_auto = (hres - sw) / 2 & ~3;
101 if ((uint32_t)x_auto <= 8u && abs(x) < 24)
102 x = x_auto;
103 }
104 if (x + sw > hres)
105 sw = hres - x;
106 // .x range check is done in vout_update()
107 }
108 // reduce the unpleasant right border that a few games have
109 if (gpu.state.screen_centering_type == 0
110 && x <= 4 && hres - (x + sw) >= 4)
111 hres -= 4;
112 gpu.screen.x = x;
113 gpu.screen.w = sw;
114 gpu.screen.hres = hres;
115 gpu.state.dims_changed = 1;
116 //printf("xx %d %d -> %2d, %d / %d\n",
117 // gpu.screen.x1, gpu.screen.x2, x, sw, hres);
118}
119
120static noinline void update_height(void)
121{
122 int pal = gpu.status & PSX_GPU_STATUS_PAL;
123 int dheight = gpu.status & PSX_GPU_STATUS_DHEIGHT;
124 int y = gpu.screen.y1 - (pal ? 39 : 16); // 39 for spyro
125 int sh = gpu.screen.y2 - gpu.screen.y1;
126 int center_tol = 16;
127 int vres = 240;
128
129 if (pal && (sh > 240 || gpu.screen.vres == 256))
130 vres = 256;
131 if (dheight)
132 y *= 2, sh *= 2, vres *= 2, center_tol *= 2;
133 if (sh <= 0)
134 /* nothing displayed? */;
135 else {
136 switch (gpu.state.screen_centering_type) {
137 case C_INGAME:
138 break;
139 case C_BORDERLESS:
140 y = 0;
141 break;
142 case C_MANUAL:
143 y = gpu.state.screen_centering_y;
144 break;
145 default:
146 // correct if slightly miscentered
147 if ((uint32_t)(vres - sh) <= 1 && abs(y) <= center_tol)
148 y = 0;
149 }
150 if (y + sh > vres)
151 sh = vres - y;
152 }
153 gpu.screen.y = y;
154 gpu.screen.h = sh;
155 gpu.screen.vres = vres;
156 gpu.state.dims_changed = 1;
157 //printf("yy %d %d -> %d, %d / %d\n",
158 // gpu.screen.y1, gpu.screen.y2, y, sh, vres);
159}
160
161static noinline void decide_frameskip(void)
162{
163 if (gpu.frameskip.active)
164 gpu.frameskip.cnt++;
165 else {
166 gpu.frameskip.cnt = 0;
167 gpu.frameskip.frame_ready = 1;
168 }
169
170 if (!gpu.frameskip.active && *gpu.frameskip.advice)
171 gpu.frameskip.active = 1;
172 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
173 gpu.frameskip.active = 1;
174 else
175 gpu.frameskip.active = 0;
176
177 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
178 int dummy = 0;
179 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy, &dummy, &dummy);
180 gpu.frameskip.pending_fill[0] = 0;
181 }
182}
183
184static noinline int decide_frameskip_allow(uint32_t cmd_e3)
185{
186 // no frameskip if it decides to draw to display area,
187 // but not for interlace since it'll most likely always do that
188 uint32_t x = cmd_e3 & 0x3ff;
189 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
190 gpu.frameskip.allow = (gpu.status & PSX_GPU_STATUS_INTERLACE) ||
191 (uint32_t)(x - gpu.screen.src_x) >= (uint32_t)gpu.screen.w ||
192 (uint32_t)(y - gpu.screen.src_y) >= (uint32_t)gpu.screen.h;
193 return gpu.frameskip.allow;
194}
195
196static void flush_cmd_buffer(void);
197
198static noinline void get_gpu_info(uint32_t data)
199{
200 if (unlikely(gpu.cmd_len > 0))
201 flush_cmd_buffer();
202 switch (data & 0x0f) {
203 case 0x02:
204 case 0x03:
205 case 0x04:
206 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
207 break;
208 case 0x05:
209 gpu.gp0 = gpu.ex_regs[5] & 0x3fffff;
210 break;
211 case 0x07:
212 gpu.gp0 = 2;
213 break;
214 default:
215 // gpu.gp0 unchanged
216 break;
217 }
218}
219
220// double, for overdraw guard
221#define VRAM_SIZE (1024 * 512 * 2 * 2)
222
223static int map_vram(void)
224{
225 gpu.vram = gpu.mmap(VRAM_SIZE);
226 if (gpu.vram != NULL) {
227 gpu.vram += 4096 / 2;
228 return 0;
229 }
230 else {
231 fprintf(stderr, "could not map vram, expect crashes\n");
232 return -1;
233 }
234}
235
236long GPUinit(void)
237{
238 int ret;
239 ret = vout_init();
240 ret |= renderer_init();
241
242 memset(&gpu.state, 0, sizeof(gpu.state));
243 memset(&gpu.frameskip, 0, sizeof(gpu.frameskip));
244 gpu.zero = 0;
245 gpu.state.frame_count = &gpu.zero;
246 gpu.state.hcnt = &gpu.zero;
247 gpu.cmd_len = 0;
248 do_reset();
249
250 if (gpu.mmap != NULL) {
251 if (map_vram() != 0)
252 ret = -1;
253 }
254 return ret;
255}
256
257long GPUshutdown(void)
258{
259 long ret;
260
261 renderer_finish();
262 ret = vout_finish();
263 if (gpu.vram != NULL) {
264 gpu.vram -= 4096 / 2;
265 gpu.munmap(gpu.vram, VRAM_SIZE);
266 }
267 gpu.vram = NULL;
268
269 return ret;
270}
271
272void GPUwriteStatus(uint32_t data)
273{
274 uint32_t cmd = data >> 24;
275 int src_x, src_y;
276
277 if (cmd < ARRAY_SIZE(gpu.regs)) {
278 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
279 return;
280 gpu.regs[cmd] = data;
281 }
282
283 gpu.state.fb_dirty = 1;
284
285 switch (cmd) {
286 case 0x00:
287 do_reset();
288 break;
289 case 0x01:
290 do_cmd_reset();
291 break;
292 case 0x03:
293 if (data & 1) {
294 gpu.status |= PSX_GPU_STATUS_BLANKING;
295 gpu.state.dims_changed = 1; // for hud clearing
296 }
297 else
298 gpu.status &= ~PSX_GPU_STATUS_BLANKING;
299 break;
300 case 0x04:
301 gpu.status &= ~PSX_GPU_STATUS_DMA_MASK;
302 gpu.status |= PSX_GPU_STATUS_DMA(data & 3);
303 break;
304 case 0x05:
305 src_x = data & 0x3ff; src_y = (data >> 10) & 0x1ff;
306 if (src_x != gpu.screen.src_x || src_y != gpu.screen.src_y) {
307 gpu.screen.src_x = src_x;
308 gpu.screen.src_y = src_y;
309 renderer_notify_scanout_change(src_x, src_y);
310 if (gpu.frameskip.set) {
311 decide_frameskip_allow(gpu.ex_regs[3]);
312 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
313 decide_frameskip();
314 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
315 }
316 }
317 }
318 break;
319 case 0x06:
320 gpu.screen.x1 = data & 0xfff;
321 gpu.screen.x2 = (data >> 12) & 0xfff;
322 update_width();
323 break;
324 case 0x07:
325 gpu.screen.y1 = data & 0x3ff;
326 gpu.screen.y2 = (data >> 10) & 0x3ff;
327 update_height();
328 break;
329 case 0x08:
330 gpu.status = (gpu.status & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
331 update_width();
332 update_height();
333 renderer_notify_res_change();
334 break;
335 default:
336 if ((cmd & 0xf0) == 0x10)
337 get_gpu_info(data);
338 break;
339 }
340
341#ifdef GPUwriteStatus_ext
342 GPUwriteStatus_ext(data);
343#endif
344}
345
346const unsigned char cmd_lengths[256] =
347{
348 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
349 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
350 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
351 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
352 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
353 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
354 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
355 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
356 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 80
357 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
358 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // a0
359 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
360 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // c0
361 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
362 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
363 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
364};
365
366#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
367
368static void cpy_msb(uint16_t *dst, const uint16_t *src, int l, uint16_t msb)
369{
370 int i;
371 for (i = 0; i < l; i++)
372 dst[i] = src[i] | msb;
373}
374
375static inline void do_vram_line(int x, int y, uint16_t *mem, int l,
376 int is_read, uint16_t msb)
377{
378 uint16_t *vram = VRAM_MEM_XY(x, y);
379 if (unlikely(is_read))
380 memcpy(mem, vram, l * 2);
381 else if (unlikely(msb))
382 cpy_msb(vram, mem, l, msb);
383 else
384 memcpy(vram, mem, l * 2);
385}
386
387static int do_vram_io(uint32_t *data, int count, int is_read)
388{
389 int count_initial = count;
390 uint16_t msb = gpu.ex_regs[6] << 15;
391 uint16_t *sdata = (uint16_t *)data;
392 int x = gpu.dma.x, y = gpu.dma.y;
393 int w = gpu.dma.w, h = gpu.dma.h;
394 int o = gpu.dma.offset;
395 int l;
396 count *= 2; // operate in 16bpp pixels
397
398 if (gpu.dma.offset) {
399 l = w - gpu.dma.offset;
400 if (count < l)
401 l = count;
402
403 do_vram_line(x + o, y, sdata, l, is_read, msb);
404
405 if (o + l < w)
406 o += l;
407 else {
408 o = 0;
409 y++;
410 h--;
411 }
412 sdata += l;
413 count -= l;
414 }
415
416 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
417 y &= 511;
418 do_vram_line(x, y, sdata, w, is_read, msb);
419 }
420
421 if (h > 0) {
422 if (count > 0) {
423 y &= 511;
424 do_vram_line(x, y, sdata, count, is_read, msb);
425 o = count;
426 count = 0;
427 }
428 }
429 else
430 finish_vram_transfer(is_read);
431 gpu.dma.y = y;
432 gpu.dma.h = h;
433 gpu.dma.offset = o;
434
435 return count_initial - count / 2;
436}
437
438static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
439{
440 if (gpu.dma.h)
441 log_anomaly("start_vram_transfer while old unfinished\n");
442
443 gpu.dma.x = pos_word & 0x3ff;
444 gpu.dma.y = (pos_word >> 16) & 0x1ff;
445 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
446 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
447 gpu.dma.offset = 0;
448 gpu.dma.is_read = is_read;
449 gpu.dma_start = gpu.dma;
450
451 renderer_flush_queues();
452 if (is_read) {
453 gpu.status |= PSX_GPU_STATUS_IMG;
454 // XXX: wrong for width 1
455 gpu.gp0 = LE32TOH(*(uint32_t *) VRAM_MEM_XY(gpu.dma.x, gpu.dma.y));
456 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
457 }
458
459 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
460 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
461 if (gpu.gpu_state_change)
462 gpu.gpu_state_change(PGS_VRAM_TRANSFER_START);
463}
464
465static void finish_vram_transfer(int is_read)
466{
467 if (is_read)
468 gpu.status &= ~PSX_GPU_STATUS_IMG;
469 else {
470 gpu.state.fb_dirty = 1;
471 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
472 gpu.dma_start.w, gpu.dma_start.h, 0);
473 }
474 if (gpu.gpu_state_change)
475 gpu.gpu_state_change(PGS_VRAM_TRANSFER_END);
476}
477
478static void do_vram_copy(const uint32_t *params, int *cpu_cycles)
479{
480 const uint32_t sx = LE32TOH(params[0]) & 0x3FF;
481 const uint32_t sy = (LE32TOH(params[0]) >> 16) & 0x1FF;
482 const uint32_t dx = LE32TOH(params[1]) & 0x3FF;
483 const uint32_t dy = (LE32TOH(params[1]) >> 16) & 0x1FF;
484 uint32_t w = ((LE32TOH(params[2]) - 1) & 0x3FF) + 1;
485 uint32_t h = (((LE32TOH(params[2]) >> 16) - 1) & 0x1FF) + 1;
486 uint16_t msb = gpu.ex_regs[6] << 15;
487 uint16_t lbuf[128];
488 uint32_t x, y;
489
490 *cpu_cycles += gput_copy(w, h);
491 if (sx == dx && sy == dy && msb == 0)
492 return;
493
494 renderer_flush_queues();
495
496 if (unlikely((sx < dx && dx < sx + w) || sx + w > 1024 || dx + w > 1024 || msb))
497 {
498 for (y = 0; y < h; y++)
499 {
500 const uint16_t *src = VRAM_MEM_XY(0, (sy + y) & 0x1ff);
501 uint16_t *dst = VRAM_MEM_XY(0, (dy + y) & 0x1ff);
502 for (x = 0; x < w; x += ARRAY_SIZE(lbuf))
503 {
504 uint32_t x1, w1 = w - x;
505 if (w1 > ARRAY_SIZE(lbuf))
506 w1 = ARRAY_SIZE(lbuf);
507 for (x1 = 0; x1 < w1; x1++)
508 lbuf[x1] = src[(sx + x + x1) & 0x3ff];
509 for (x1 = 0; x1 < w1; x1++)
510 dst[(dx + x + x1) & 0x3ff] = lbuf[x1] | msb;
511 }
512 }
513 }
514 else
515 {
516 uint32_t sy1 = sy, dy1 = dy;
517 for (y = 0; y < h; y++, sy1++, dy1++)
518 memcpy(VRAM_MEM_XY(dx, dy1 & 0x1ff), VRAM_MEM_XY(sx, sy1 & 0x1ff), w * 2);
519 }
520
521 renderer_update_caches(dx, dy, w, h, 0);
522}
523
524static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
525{
526 int cmd = 0, pos = 0, len, dummy = 0, v;
527 int skip = 1;
528
529 gpu.frameskip.pending_fill[0] = 0;
530
531 while (pos < count && skip) {
532 uint32_t *list = data + pos;
533 cmd = LE32TOH(list[0]) >> 24;
534 len = 1 + cmd_lengths[cmd];
535
536 switch (cmd) {
537 case 0x02:
538 if ((LE32TOH(list[2]) & 0x3ff) > gpu.screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu.screen.h)
539 // clearing something large, don't skip
540 do_cmd_list(list, 3, &dummy, &dummy, &dummy);
541 else
542 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
543 break;
544 case 0x24 ... 0x27:
545 case 0x2c ... 0x2f:
546 case 0x34 ... 0x37:
547 case 0x3c ... 0x3f:
548 gpu.ex_regs[1] &= ~0x1ff;
549 gpu.ex_regs[1] |= LE32TOH(list[4 + ((cmd >> 4) & 1)]) & 0x1ff;
550 break;
551 case 0x48 ... 0x4F:
552 for (v = 3; pos + v < count; v++)
553 {
554 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
555 break;
556 }
557 len += v - 3;
558 break;
559 case 0x58 ... 0x5F:
560 for (v = 4; pos + v < count; v += 2)
561 {
562 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
563 break;
564 }
565 len += v - 4;
566 break;
567 default:
568 if (cmd == 0xe3)
569 skip = decide_frameskip_allow(LE32TOH(list[0]));
570 if ((cmd & 0xf8) == 0xe0)
571 gpu.ex_regs[cmd & 7] = LE32TOH(list[0]);
572 break;
573 }
574
575 if (pos + len > count) {
576 cmd = -1;
577 break; // incomplete cmd
578 }
579 if (0x80 <= cmd && cmd <= 0xdf)
580 break; // image i/o
581
582 pos += len;
583 }
584
585 renderer_sync_ecmds(gpu.ex_regs);
586 *last_cmd = cmd;
587 return pos;
588}
589
590static noinline int do_cmd_buffer(uint32_t *data, int count,
591 int *cycles_sum, int *cycles_last)
592{
593 int cmd, pos;
594 uint32_t old_e3 = gpu.ex_regs[3];
595 int vram_dirty = 0;
596
597 // process buffer
598 for (pos = 0; pos < count; )
599 {
600 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
601 vram_dirty = 1;
602 pos += do_vram_io(data + pos, count - pos, 0);
603 if (pos == count)
604 break;
605 }
606
607 cmd = LE32TOH(data[pos]) >> 24;
608 if (0xa0 <= cmd && cmd <= 0xdf) {
609 if (unlikely((pos+2) >= count)) {
610 // incomplete vram write/read cmd, can't consume yet
611 cmd = -1;
612 break;
613 }
614
615 // consume vram write/read cmd
616 start_vram_transfer(LE32TOH(data[pos + 1]), LE32TOH(data[pos + 2]), (cmd & 0xe0) == 0xc0);
617 pos += 3;
618 continue;
619 }
620 else if ((cmd & 0xe0) == 0x80) {
621 if (unlikely((pos+3) >= count)) {
622 cmd = -1; // incomplete cmd, can't consume yet
623 break;
624 }
625 *cycles_sum += *cycles_last;
626 *cycles_last = 0;
627 do_vram_copy(data + pos + 1, cycles_last);
628 vram_dirty = 1;
629 pos += 4;
630 continue;
631 }
632 else if (cmd == 0x1f) {
633 log_anomaly("irq1?\n");
634 pos++;
635 continue;
636 }
637
638 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
639 if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0))
640 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
641 else {
642 pos += do_cmd_list(data + pos, count - pos, cycles_sum, cycles_last, &cmd);
643 vram_dirty = 1;
644 }
645
646 if (cmd == -1)
647 // incomplete cmd
648 break;
649 }
650
651 gpu.status &= ~0x1fff;
652 gpu.status |= gpu.ex_regs[1] & 0x7ff;
653 gpu.status |= (gpu.ex_regs[6] & 3) << 11;
654
655 gpu.state.fb_dirty |= vram_dirty;
656
657 if (old_e3 != gpu.ex_regs[3])
658 decide_frameskip_allow(gpu.ex_regs[3]);
659
660 return count - pos;
661}
662
663static noinline void flush_cmd_buffer(void)
664{
665 int dummy = 0, left;
666 left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len, &dummy, &dummy);
667 if (left > 0)
668 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
669 if (left != gpu.cmd_len) {
670 if (!gpu.dma.h && gpu.gpu_state_change)
671 gpu.gpu_state_change(PGS_PRIMITIVE_START);
672 gpu.cmd_len = left;
673 }
674}
675
676void GPUwriteDataMem(uint32_t *mem, int count)
677{
678 int dummy = 0, left;
679
680 log_io("gpu_dma_write %p %d\n", mem, count);
681
682 if (unlikely(gpu.cmd_len > 0))
683 flush_cmd_buffer();
684
685 left = do_cmd_buffer(mem, count, &dummy, &dummy);
686 if (left)
687 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
688}
689
690void GPUwriteData(uint32_t data)
691{
692 log_io("gpu_write %08x\n", data);
693 gpu.cmd_buffer[gpu.cmd_len++] = HTOLE32(data);
694 if (gpu.cmd_len >= CMD_BUFFER_LEN)
695 flush_cmd_buffer();
696}
697
698long GPUdmaChain(uint32_t *rambase, uint32_t start_addr,
699 uint32_t *progress_addr, int32_t *cycles_last_cmd)
700{
701 uint32_t addr, *list, ld_addr = 0;
702 int len, left, count;
703 int cpu_cycles_sum = 0;
704 int cpu_cycles_last = 0;
705
706 preload(rambase + (start_addr & 0x1fffff) / 4);
707
708 if (unlikely(gpu.cmd_len > 0))
709 flush_cmd_buffer();
710
711 log_io("gpu_dma_chain\n");
712 addr = start_addr & 0xffffff;
713 for (count = 0; (addr & 0x800000) == 0; count++)
714 {
715 list = rambase + (addr & 0x1fffff) / 4;
716 len = LE32TOH(list[0]) >> 24;
717 addr = LE32TOH(list[0]) & 0xffffff;
718 preload(rambase + (addr & 0x1fffff) / 4);
719
720 cpu_cycles_sum += 10;
721 if (len > 0)
722 cpu_cycles_sum += 5 + len;
723
724 log_io(".chain %08lx #%d+%d %u+%u\n",
725 (long)(list - rambase) * 4, len, gpu.cmd_len, cpu_cycles_sum, cpu_cycles_last);
726 if (unlikely(gpu.cmd_len > 0)) {
727 if (gpu.cmd_len + len > ARRAY_SIZE(gpu.cmd_buffer)) {
728 log_anomaly("cmd_buffer overflow, likely garbage commands\n");
729 gpu.cmd_len = 0;
730 }
731 memcpy(gpu.cmd_buffer + gpu.cmd_len, list + 1, len * 4);
732 gpu.cmd_len += len;
733 flush_cmd_buffer();
734 continue;
735 }
736
737 if (len) {
738 left = do_cmd_buffer(list + 1, len, &cpu_cycles_sum, &cpu_cycles_last);
739 if (left) {
740 memcpy(gpu.cmd_buffer, list + 1 + len - left, left * 4);
741 gpu.cmd_len = left;
742 log_anomaly("GPUdmaChain: %d/%d words left\n", left, len);
743 }
744 }
745
746 if (progress_addr) {
747 *progress_addr = addr;
748 break;
749 }
750 #define LD_THRESHOLD (8*1024)
751 if (count >= LD_THRESHOLD) {
752 if (count == LD_THRESHOLD) {
753 ld_addr = addr;
754 continue;
755 }
756
757 // loop detection marker
758 // (bit23 set causes DMA error on real machine, so
759 // unlikely to be ever set by the game)
760 list[0] |= HTOLE32(0x800000);
761 }
762 }
763
764 if (ld_addr != 0) {
765 // remove loop detection markers
766 count -= LD_THRESHOLD + 2;
767 addr = ld_addr & 0x1fffff;
768 while (count-- > 0) {
769 list = rambase + addr / 4;
770 addr = LE32TOH(list[0]) & 0x1fffff;
771 list[0] &= HTOLE32(~0x800000);
772 }
773 }
774
775 //printf(" -> %d %d\n", cpu_cycles_sum, cpu_cycles_last);
776 gpu.state.last_list.frame = *gpu.state.frame_count;
777 gpu.state.last_list.hcnt = *gpu.state.hcnt;
778 gpu.state.last_list.cycles = cpu_cycles_sum + cpu_cycles_last;
779 gpu.state.last_list.addr = start_addr;
780
781 *cycles_last_cmd = cpu_cycles_last;
782 return cpu_cycles_sum;
783}
784
785void GPUreadDataMem(uint32_t *mem, int count)
786{
787 log_io("gpu_dma_read %p %d\n", mem, count);
788
789 if (unlikely(gpu.cmd_len > 0))
790 flush_cmd_buffer();
791
792 if (gpu.dma.h)
793 do_vram_io(mem, count, 1);
794}
795
796uint32_t GPUreadData(void)
797{
798 uint32_t ret;
799
800 if (unlikely(gpu.cmd_len > 0))
801 flush_cmd_buffer();
802
803 ret = gpu.gp0;
804 if (gpu.dma.h) {
805 ret = HTOLE32(ret);
806 do_vram_io(&ret, 1, 1);
807 ret = LE32TOH(ret);
808 }
809
810 log_io("gpu_read %08x\n", ret);
811 return ret;
812}
813
814uint32_t GPUreadStatus(void)
815{
816 uint32_t ret;
817
818 if (unlikely(gpu.cmd_len > 0))
819 flush_cmd_buffer();
820
821 ret = gpu.status;
822 log_io("gpu_read_status %08x\n", ret);
823 return ret;
824}
825
826struct GPUFreeze
827{
828 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
829 uint32_t ulStatus; // current gpu status
830 uint32_t ulControl[256]; // latest control register values
831 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
832};
833
834long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
835{
836 int i;
837
838 switch (type) {
839 case 1: // save
840 if (gpu.cmd_len > 0)
841 flush_cmd_buffer();
842 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
843 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
844 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
845 freeze->ulStatus = gpu.status;
846 break;
847 case 0: // load
848 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
849 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
850 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
851 gpu.status = freeze->ulStatus;
852 gpu.cmd_len = 0;
853 for (i = 8; i > 0; i--) {
854 gpu.regs[i] ^= 1; // avoid reg change detection
855 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
856 }
857 renderer_sync_ecmds(gpu.ex_regs);
858 renderer_update_caches(0, 0, 1024, 512, 0);
859 break;
860 }
861
862 return 1;
863}
864
865void GPUupdateLace(void)
866{
867 if (gpu.cmd_len > 0)
868 flush_cmd_buffer();
869 renderer_flush_queues();
870
871#ifndef RAW_FB_DISPLAY
872 if (gpu.status & PSX_GPU_STATUS_BLANKING) {
873 if (!gpu.state.blanked) {
874 vout_blank();
875 gpu.state.blanked = 1;
876 gpu.state.fb_dirty = 1;
877 }
878 return;
879 }
880
881 if (!gpu.state.fb_dirty)
882 return;
883#endif
884
885 if (gpu.frameskip.set) {
886 if (!gpu.frameskip.frame_ready) {
887 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
888 return;
889 gpu.frameskip.active = 0;
890 }
891 gpu.frameskip.frame_ready = 0;
892 }
893
894 vout_update();
895 if (gpu.state.enhancement_active && !gpu.state.enhancement_was_active)
896 renderer_update_caches(0, 0, 1024, 512, 1);
897 gpu.state.enhancement_was_active = gpu.state.enhancement_active;
898 gpu.state.fb_dirty = 0;
899 gpu.state.blanked = 0;
900}
901
902void GPUvBlank(int is_vblank, int lcf)
903{
904 int interlace = gpu.state.allow_interlace
905 && (gpu.status & PSX_GPU_STATUS_INTERLACE)
906 && (gpu.status & PSX_GPU_STATUS_DHEIGHT);
907 // interlace doesn't look nice on progressive displays,
908 // so we have this "auto" mode here for games that don't read vram
909 if (gpu.state.allow_interlace == 2
910 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
911 {
912 interlace = 0;
913 }
914 if (interlace || interlace != gpu.state.old_interlace) {
915 gpu.state.old_interlace = interlace;
916
917 if (gpu.cmd_len > 0)
918 flush_cmd_buffer();
919 renderer_flush_queues();
920 renderer_set_interlace(interlace, !lcf);
921 }
922}
923
924void GPUgetScreenInfo(int *y, int *base_hres)
925{
926 *y = gpu.screen.y;
927 *base_hres = gpu.screen.vres;
928 if (gpu.status & PSX_GPU_STATUS_DHEIGHT)
929 *base_hres >>= 1;
930}
931
932void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
933{
934 gpu.frameskip.set = cbs->frameskip;
935 gpu.frameskip.advice = &cbs->fskip_advice;
936 gpu.frameskip.active = 0;
937 gpu.frameskip.frame_ready = 1;
938 gpu.state.hcnt = cbs->gpu_hcnt;
939 gpu.state.frame_count = cbs->gpu_frame_count;
940 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
941 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
942 gpu.state.screen_centering_type_default = cbs->screen_centering_type_default;
943 if (gpu.state.screen_centering_type != cbs->screen_centering_type
944 || gpu.state.screen_centering_x != cbs->screen_centering_x
945 || gpu.state.screen_centering_y != cbs->screen_centering_y) {
946 gpu.state.screen_centering_type = cbs->screen_centering_type;
947 gpu.state.screen_centering_x = cbs->screen_centering_x;
948 gpu.state.screen_centering_y = cbs->screen_centering_y;
949 update_width();
950 update_height();
951 }
952
953 gpu.mmap = cbs->mmap;
954 gpu.munmap = cbs->munmap;
955 gpu.gpu_state_change = cbs->gpu_state_change;
956
957 // delayed vram mmap
958 if (gpu.vram == NULL)
959 map_vram();
960
961 if (cbs->pl_vout_set_raw_vram)
962 cbs->pl_vout_set_raw_vram(gpu.vram);
963 renderer_set_config(cbs);
964 vout_set_config(cbs);
965}
966
967// vim:shiftwidth=2:expandtab