misc: allow slow-booting to cdda or whatever
[pcsx_rearmed.git] / plugins / gpulib / gpu.c
... / ...
CommitLineData
1/*
2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
11#include <stdio.h>
12#include <stdlib.h>
13#include <string.h>
14#include "gpu.h"
15#include "gpu_timing.h"
16#include "../../libpcsxcore/gpu.h" // meh
17#include "../../frontend/plugin_lib.h"
18
19#ifndef ARRAY_SIZE
20#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
21#endif
22#ifdef __GNUC__
23#define unlikely(x) __builtin_expect((x), 0)
24#define preload __builtin_prefetch
25#define noinline __attribute__((noinline))
26#else
27#define unlikely(x)
28#define preload(...)
29#define noinline
30#endif
31
32//#define log_io gpu_log
33#define log_io(...)
34
35struct psx_gpu gpu;
36
37static noinline int do_cmd_buffer(uint32_t *data, int count,
38 int *cycles_sum, int *cycles_last);
39static void finish_vram_transfer(int is_read);
40
41static noinline void do_cmd_reset(void)
42{
43 int dummy = 0;
44 if (unlikely(gpu.cmd_len > 0))
45 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len, &dummy, &dummy);
46 gpu.cmd_len = 0;
47
48 if (unlikely(gpu.dma.h > 0))
49 finish_vram_transfer(gpu.dma_start.is_read);
50 gpu.dma.h = 0;
51}
52
53static noinline void do_reset(void)
54{
55 unsigned int i;
56
57 do_cmd_reset();
58
59 memset(gpu.regs, 0, sizeof(gpu.regs));
60 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
61 gpu.ex_regs[i] = (0xe0 + i) << 24;
62 gpu.status = 0x14802000;
63 gpu.gp0 = 0;
64 gpu.regs[3] = 1;
65 gpu.screen.hres = gpu.screen.w = 256;
66 gpu.screen.vres = gpu.screen.h = 240;
67 gpu.screen.x = gpu.screen.y = 0;
68 renderer_sync_ecmds(gpu.ex_regs);
69 renderer_notify_res_change();
70}
71
72static noinline void update_width(void)
73{
74 static const short hres_all[8] = { 256, 368, 320, 368, 512, 368, 640, 368 };
75 static const uint8_t hdivs[8] = { 10, 7, 8, 7, 5, 7, 4, 7 };
76 uint8_t hdiv = hdivs[(gpu.status >> 16) & 7];
77 int hres = hres_all[(gpu.status >> 16) & 7];
78 int pal = gpu.status & PSX_GPU_STATUS_PAL;
79 int sw = gpu.screen.x2 - gpu.screen.x1;
80 int type = gpu.state.screen_centering_type;
81 int x = 0, x_auto;
82 if (type == C_AUTO)
83 type = gpu.state.screen_centering_type_default;
84 if (sw <= 0)
85 /* nothing displayed? */;
86 else {
87 int s = pal ? 656 : 608; // or 600? pal is just a guess
88 x = (gpu.screen.x1 - s) / hdiv;
89 x = (x + 1) & ~1; // blitter limitation
90 sw /= hdiv;
91 sw = (sw + 2) & ~3; // according to nocash
92 switch (type) {
93 case C_INGAME:
94 break;
95 case C_MANUAL:
96 x = gpu.state.screen_centering_x;
97 break;
98 default:
99 // correct if slightly miscentered
100 x_auto = (hres - sw) / 2 & ~3;
101 if ((uint32_t)x_auto <= 8u && abs(x) < 24)
102 x = x_auto;
103 }
104 if (x + sw > hres)
105 sw = hres - x;
106 // .x range check is done in vout_update()
107 }
108 // reduce the unpleasant right border that a few games have
109 if (gpu.state.screen_centering_type == 0
110 && x <= 4 && hres - (x + sw) >= 4)
111 hres -= 4;
112 gpu.screen.x = x;
113 gpu.screen.w = sw;
114 gpu.screen.hres = hres;
115 gpu.state.dims_changed = 1;
116 //printf("xx %d %d -> %2d, %d / %d\n",
117 // gpu.screen.x1, gpu.screen.x2, x, sw, hres);
118}
119
120static noinline void update_height(void)
121{
122 int pal = gpu.status & PSX_GPU_STATUS_PAL;
123 int dheight = gpu.status & PSX_GPU_STATUS_DHEIGHT;
124 int y = gpu.screen.y1 - (pal ? 39 : 16); // 39 for spyro
125 int sh = gpu.screen.y2 - gpu.screen.y1;
126 int center_tol = 16;
127 int vres = 240;
128
129 if (pal && (sh > 240 || gpu.screen.vres == 256))
130 vres = 256;
131 if (dheight)
132 y *= 2, sh *= 2, vres *= 2, center_tol *= 2;
133 if (sh <= 0)
134 /* nothing displayed? */;
135 else {
136 switch (gpu.state.screen_centering_type) {
137 case C_INGAME:
138 break;
139 case C_BORDERLESS:
140 y = 0;
141 break;
142 case C_MANUAL:
143 y = gpu.state.screen_centering_y;
144 break;
145 default:
146 // correct if slightly miscentered
147 if ((uint32_t)(vres - sh) <= 1 && abs(y) <= center_tol)
148 y = 0;
149 }
150 if (y + sh > vres)
151 sh = vres - y;
152 }
153 gpu.screen.y = y;
154 gpu.screen.h = sh;
155 gpu.screen.vres = vres;
156 gpu.state.dims_changed = 1;
157 //printf("yy %d %d -> %d, %d / %d\n",
158 // gpu.screen.y1, gpu.screen.y2, y, sh, vres);
159}
160
161static noinline void decide_frameskip(void)
162{
163 if (gpu.frameskip.active)
164 gpu.frameskip.cnt++;
165 else {
166 gpu.frameskip.cnt = 0;
167 gpu.frameskip.frame_ready = 1;
168 }
169
170 if (!gpu.frameskip.active && *gpu.frameskip.advice)
171 gpu.frameskip.active = 1;
172 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
173 gpu.frameskip.active = 1;
174 else
175 gpu.frameskip.active = 0;
176
177 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
178 int dummy = 0;
179 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy, &dummy, &dummy);
180 gpu.frameskip.pending_fill[0] = 0;
181 }
182}
183
184static noinline int decide_frameskip_allow(uint32_t cmd_e3)
185{
186 // no frameskip if it decides to draw to display area,
187 // but not for interlace since it'll most likely always do that
188 uint32_t x = cmd_e3 & 0x3ff;
189 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
190 gpu.frameskip.allow = (gpu.status & PSX_GPU_STATUS_INTERLACE) ||
191 (uint32_t)(x - gpu.screen.src_x) >= (uint32_t)gpu.screen.w ||
192 (uint32_t)(y - gpu.screen.src_y) >= (uint32_t)gpu.screen.h;
193 return gpu.frameskip.allow;
194}
195
196static void flush_cmd_buffer(void);
197
198static noinline void get_gpu_info(uint32_t data)
199{
200 if (unlikely(gpu.cmd_len > 0))
201 flush_cmd_buffer();
202 switch (data & 0x0f) {
203 case 0x02:
204 case 0x03:
205 case 0x04:
206 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
207 break;
208 case 0x05:
209 gpu.gp0 = gpu.ex_regs[5] & 0x3fffff;
210 break;
211 case 0x07:
212 gpu.gp0 = 2;
213 break;
214 default:
215 // gpu.gp0 unchanged
216 break;
217 }
218}
219
220// double, for overdraw guard
221#define VRAM_SIZE (1024 * 512 * 2 * 2)
222
223static int map_vram(void)
224{
225 gpu.vram = gpu.mmap(VRAM_SIZE);
226 if (gpu.vram != NULL) {
227 gpu.vram += 4096 / 2;
228 return 0;
229 }
230 else {
231 fprintf(stderr, "could not map vram, expect crashes\n");
232 return -1;
233 }
234}
235
236long GPUinit(void)
237{
238 int ret;
239 ret = vout_init();
240 ret |= renderer_init();
241
242 memset(&gpu.state, 0, sizeof(gpu.state));
243 memset(&gpu.frameskip, 0, sizeof(gpu.frameskip));
244 gpu.zero = 0;
245 gpu.state.frame_count = &gpu.zero;
246 gpu.state.hcnt = &gpu.zero;
247 gpu.cmd_len = 0;
248 do_reset();
249
250 if (gpu.mmap != NULL) {
251 if (map_vram() != 0)
252 ret = -1;
253 }
254 return ret;
255}
256
257long GPUshutdown(void)
258{
259 long ret;
260
261 renderer_finish();
262 ret = vout_finish();
263 if (gpu.vram != NULL) {
264 gpu.vram -= 4096 / 2;
265 gpu.munmap(gpu.vram, VRAM_SIZE);
266 }
267 gpu.vram = NULL;
268
269 return ret;
270}
271
272void GPUwriteStatus(uint32_t data)
273{
274 uint32_t cmd = data >> 24;
275 int src_x, src_y;
276
277 if (cmd < ARRAY_SIZE(gpu.regs)) {
278 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
279 return;
280 gpu.regs[cmd] = data;
281 }
282
283 gpu.state.fb_dirty = 1;
284
285 switch (cmd) {
286 case 0x00:
287 do_reset();
288 break;
289 case 0x01:
290 do_cmd_reset();
291 break;
292 case 0x03:
293 if (data & 1) {
294 gpu.status |= PSX_GPU_STATUS_BLANKING;
295 gpu.state.dims_changed = 1; // for hud clearing
296 }
297 else
298 gpu.status &= ~PSX_GPU_STATUS_BLANKING;
299 break;
300 case 0x04:
301 gpu.status &= ~PSX_GPU_STATUS_DMA_MASK;
302 gpu.status |= PSX_GPU_STATUS_DMA(data & 3);
303 break;
304 case 0x05:
305 src_x = data & 0x3ff; src_y = (data >> 10) & 0x1ff;
306 if (src_x != gpu.screen.src_x || src_y != gpu.screen.src_y) {
307 gpu.screen.src_x = src_x;
308 gpu.screen.src_y = src_y;
309 renderer_notify_scanout_change(src_x, src_y);
310 if (gpu.frameskip.set) {
311 decide_frameskip_allow(gpu.ex_regs[3]);
312 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
313 decide_frameskip();
314 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
315 }
316 }
317 }
318 break;
319 case 0x06:
320 gpu.screen.x1 = data & 0xfff;
321 gpu.screen.x2 = (data >> 12) & 0xfff;
322 update_width();
323 break;
324 case 0x07:
325 gpu.screen.y1 = data & 0x3ff;
326 gpu.screen.y2 = (data >> 10) & 0x3ff;
327 update_height();
328 break;
329 case 0x08:
330 gpu.status = (gpu.status & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
331 update_width();
332 update_height();
333 renderer_notify_res_change();
334 break;
335 default:
336 if ((cmd & 0xf0) == 0x10)
337 get_gpu_info(data);
338 break;
339 }
340
341#ifdef GPUwriteStatus_ext
342 GPUwriteStatus_ext(data);
343#endif
344}
345
346const unsigned char cmd_lengths[256] =
347{
348 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
349 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
350 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
351 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
352 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
353 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
354 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
355 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
356 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 80
357 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
358 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // a0
359 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
360 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // c0
361 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
362 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
363 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
364};
365
366#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
367
368static void cpy_msb(uint16_t *dst, const uint16_t *src, int l, uint16_t msb)
369{
370 int i;
371 for (i = 0; i < l; i++)
372 dst[i] = src[i] | msb;
373}
374
375static inline void do_vram_line(int x, int y, uint16_t *mem, int l,
376 int is_read, uint16_t msb)
377{
378 uint16_t *vram = VRAM_MEM_XY(x, y);
379 if (unlikely(is_read))
380 memcpy(mem, vram, l * 2);
381 else if (unlikely(msb))
382 cpy_msb(vram, mem, l, msb);
383 else
384 memcpy(vram, mem, l * 2);
385}
386
387static int do_vram_io(uint32_t *data, int count, int is_read)
388{
389 int count_initial = count;
390 uint16_t msb = gpu.ex_regs[6] << 15;
391 uint16_t *sdata = (uint16_t *)data;
392 int x = gpu.dma.x, y = gpu.dma.y;
393 int w = gpu.dma.w, h = gpu.dma.h;
394 int o = gpu.dma.offset;
395 int l;
396 count *= 2; // operate in 16bpp pixels
397
398 if (gpu.dma.offset) {
399 l = w - gpu.dma.offset;
400 if (count < l)
401 l = count;
402
403 do_vram_line(x + o, y, sdata, l, is_read, msb);
404
405 if (o + l < w)
406 o += l;
407 else {
408 o = 0;
409 y++;
410 h--;
411 }
412 sdata += l;
413 count -= l;
414 }
415
416 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
417 y &= 511;
418 do_vram_line(x, y, sdata, w, is_read, msb);
419 }
420
421 if (h > 0) {
422 if (count > 0) {
423 y &= 511;
424 do_vram_line(x, y, sdata, count, is_read, msb);
425 o = count;
426 count = 0;
427 }
428 }
429 else
430 finish_vram_transfer(is_read);
431 gpu.dma.y = y;
432 gpu.dma.h = h;
433 gpu.dma.offset = o;
434
435 return count_initial - count / 2;
436}
437
438static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
439{
440 if (gpu.dma.h)
441 log_anomaly("start_vram_transfer while old unfinished\n");
442
443 gpu.dma.x = pos_word & 0x3ff;
444 gpu.dma.y = (pos_word >> 16) & 0x1ff;
445 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
446 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
447 gpu.dma.offset = 0;
448 gpu.dma.is_read = is_read;
449 gpu.dma_start = gpu.dma;
450
451 renderer_flush_queues();
452 if (is_read) {
453 gpu.status |= PSX_GPU_STATUS_IMG;
454 // XXX: wrong for width 1
455 gpu.gp0 = LE32TOH(*(uint32_t *) VRAM_MEM_XY(gpu.dma.x, gpu.dma.y));
456 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
457 }
458
459 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
460 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
461 if (gpu.gpu_state_change)
462 gpu.gpu_state_change(PGS_VRAM_TRANSFER_START);
463}
464
465static void finish_vram_transfer(int is_read)
466{
467 if (is_read)
468 gpu.status &= ~PSX_GPU_STATUS_IMG;
469 else {
470 gpu.state.fb_dirty = 1;
471 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
472 gpu.dma_start.w, gpu.dma_start.h, 0);
473 }
474 if (gpu.gpu_state_change)
475 gpu.gpu_state_change(PGS_VRAM_TRANSFER_END);
476}
477
478static void do_vram_copy(const uint32_t *params, int *cpu_cycles)
479{
480 const uint32_t sx = LE32TOH(params[0]) & 0x3FF;
481 const uint32_t sy = (LE32TOH(params[0]) >> 16) & 0x1FF;
482 const uint32_t dx = LE32TOH(params[1]) & 0x3FF;
483 const uint32_t dy = (LE32TOH(params[1]) >> 16) & 0x1FF;
484 uint32_t w = ((LE32TOH(params[2]) - 1) & 0x3FF) + 1;
485 uint32_t h = (((LE32TOH(params[2]) >> 16) - 1) & 0x1FF) + 1;
486 uint16_t msb = gpu.ex_regs[6] << 15;
487 uint16_t lbuf[128];
488 uint32_t x, y;
489
490 *cpu_cycles += gput_copy(w, h);
491 if (sx == dx && sy == dy && msb == 0)
492 return;
493
494 renderer_flush_queues();
495
496 if (unlikely((sx < dx && dx < sx + w) || sx + w > 1024 || dx + w > 1024 || msb))
497 {
498 for (y = 0; y < h; y++)
499 {
500 const uint16_t *src = VRAM_MEM_XY(0, (sy + y) & 0x1ff);
501 uint16_t *dst = VRAM_MEM_XY(0, (dy + y) & 0x1ff);
502 for (x = 0; x < w; x += ARRAY_SIZE(lbuf))
503 {
504 uint32_t x1, w1 = w - x;
505 if (w1 > ARRAY_SIZE(lbuf))
506 w1 = ARRAY_SIZE(lbuf);
507 for (x1 = 0; x1 < w1; x1++)
508 lbuf[x1] = src[(sx + x + x1) & 0x3ff];
509 for (x1 = 0; x1 < w1; x1++)
510 dst[(dx + x + x1) & 0x3ff] = lbuf[x1] | msb;
511 }
512 }
513 }
514 else
515 {
516 uint32_t sy1 = sy, dy1 = dy;
517 for (y = 0; y < h; y++, sy1++, dy1++)
518 memcpy(VRAM_MEM_XY(dx, dy1 & 0x1ff), VRAM_MEM_XY(sx, sy1 & 0x1ff), w * 2);
519 }
520
521 renderer_update_caches(dx, dy, w, h, 0);
522}
523
524static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
525{
526 int cmd = 0, pos = 0, len, dummy = 0, v;
527 int skip = 1;
528
529 gpu.frameskip.pending_fill[0] = 0;
530
531 while (pos < count && skip) {
532 uint32_t *list = data + pos;
533 cmd = LE32TOH(list[0]) >> 24;
534 len = 1 + cmd_lengths[cmd];
535
536 switch (cmd) {
537 case 0x02:
538 if ((LE32TOH(list[2]) & 0x3ff) > gpu.screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu.screen.h)
539 // clearing something large, don't skip
540 do_cmd_list(list, 3, &dummy, &dummy, &dummy);
541 else
542 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
543 break;
544 case 0x24 ... 0x27:
545 case 0x2c ... 0x2f:
546 case 0x34 ... 0x37:
547 case 0x3c ... 0x3f:
548 gpu.ex_regs[1] &= ~0x1ff;
549 gpu.ex_regs[1] |= LE32TOH(list[4 + ((cmd >> 4) & 1)]) & 0x1ff;
550 break;
551 case 0x48 ... 0x4F:
552 for (v = 3; pos + v < count; v++)
553 {
554 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
555 break;
556 }
557 len += v - 3;
558 break;
559 case 0x58 ... 0x5F:
560 for (v = 4; pos + v < count; v += 2)
561 {
562 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
563 break;
564 }
565 len += v - 4;
566 break;
567 default:
568 if (cmd == 0xe3)
569 skip = decide_frameskip_allow(LE32TOH(list[0]));
570 if ((cmd & 0xf8) == 0xe0)
571 gpu.ex_regs[cmd & 7] = LE32TOH(list[0]);
572 break;
573 }
574
575 if (pos + len > count) {
576 cmd = -1;
577 break; // incomplete cmd
578 }
579 if (0x80 <= cmd && cmd <= 0xdf)
580 break; // image i/o
581
582 pos += len;
583 }
584
585 renderer_sync_ecmds(gpu.ex_regs);
586 *last_cmd = cmd;
587 return pos;
588}
589
590static noinline int do_cmd_buffer(uint32_t *data, int count,
591 int *cycles_sum, int *cycles_last)
592{
593 int cmd, pos;
594 uint32_t old_e3 = gpu.ex_regs[3];
595 int vram_dirty = 0;
596
597 // process buffer
598 for (pos = 0; pos < count; )
599 {
600 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
601 vram_dirty = 1;
602 pos += do_vram_io(data + pos, count - pos, 0);
603 if (pos == count)
604 break;
605 }
606
607 cmd = LE32TOH(data[pos]) >> 24;
608 if (0xa0 <= cmd && cmd <= 0xdf) {
609 if (unlikely((pos+2) >= count)) {
610 // incomplete vram write/read cmd, can't consume yet
611 cmd = -1;
612 break;
613 }
614
615 // consume vram write/read cmd
616 start_vram_transfer(LE32TOH(data[pos + 1]), LE32TOH(data[pos + 2]), (cmd & 0xe0) == 0xc0);
617 pos += 3;
618 continue;
619 }
620 else if ((cmd & 0xe0) == 0x80) {
621 if (unlikely((pos+3) >= count)) {
622 cmd = -1; // incomplete cmd, can't consume yet
623 break;
624 }
625 *cycles_sum += *cycles_last;
626 *cycles_last = 0;
627 do_vram_copy(data + pos + 1, cycles_last);
628 vram_dirty = 1;
629 pos += 4;
630 continue;
631 }
632 else if (cmd == 0x1f) {
633 log_anomaly("irq1?\n");
634 pos++;
635 continue;
636 }
637
638 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
639 if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0))
640 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
641 else {
642 pos += do_cmd_list(data + pos, count - pos, cycles_sum, cycles_last, &cmd);
643 vram_dirty = 1;
644 }
645
646 if (cmd == -1)
647 // incomplete cmd
648 break;
649 }
650
651 gpu.status &= ~0x1fff;
652 gpu.status |= gpu.ex_regs[1] & 0x7ff;
653 gpu.status |= (gpu.ex_regs[6] & 3) << 11;
654
655 gpu.state.fb_dirty |= vram_dirty;
656
657 if (old_e3 != gpu.ex_regs[3])
658 decide_frameskip_allow(gpu.ex_regs[3]);
659
660 return count - pos;
661}
662
663static noinline void flush_cmd_buffer(void)
664{
665 int dummy = 0, left;
666 left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len, &dummy, &dummy);
667 if (left > 0)
668 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
669 if (left != gpu.cmd_len) {
670 if (!gpu.dma.h && gpu.gpu_state_change)
671 gpu.gpu_state_change(PGS_PRIMITIVE_START);
672 gpu.cmd_len = left;
673 }
674}
675
676void GPUwriteDataMem(uint32_t *mem, int count)
677{
678 int dummy = 0, left;
679
680 log_io("gpu_dma_write %p %d\n", mem, count);
681
682 if (unlikely(gpu.cmd_len > 0))
683 flush_cmd_buffer();
684
685 left = do_cmd_buffer(mem, count, &dummy, &dummy);
686 if (left)
687 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
688}
689
690void GPUwriteData(uint32_t data)
691{
692 log_io("gpu_write %08x\n", data);
693 gpu.cmd_buffer[gpu.cmd_len++] = HTOLE32(data);
694 if (gpu.cmd_len >= CMD_BUFFER_LEN)
695 flush_cmd_buffer();
696}
697
698long GPUdmaChain(uint32_t *rambase, uint32_t start_addr,
699 uint32_t *progress_addr, int32_t *cycles_last_cmd)
700{
701 uint32_t addr, *list, ld_addr;
702 int len, left, count, ld_count = 32;
703 int cpu_cycles_sum = 0;
704 int cpu_cycles_last = 0;
705
706 preload(rambase + (start_addr & 0x1fffff) / 4);
707
708 if (unlikely(gpu.cmd_len > 0))
709 flush_cmd_buffer();
710
711 log_io("gpu_dma_chain\n");
712 addr = ld_addr = start_addr & 0xffffff;
713 for (count = 0; (addr & 0x800000) == 0; count++)
714 {
715 list = rambase + (addr & 0x1fffff) / 4;
716 len = LE32TOH(list[0]) >> 24;
717 addr = LE32TOH(list[0]) & 0xffffff;
718 preload(rambase + (addr & 0x1fffff) / 4);
719
720 cpu_cycles_sum += 10;
721 if (len > 0)
722 cpu_cycles_sum += 5 + len;
723
724 log_io(".chain %08lx #%d+%d %u+%u\n",
725 (long)(list - rambase) * 4, len, gpu.cmd_len, cpu_cycles_sum, cpu_cycles_last);
726 if (unlikely(gpu.cmd_len > 0)) {
727 if (gpu.cmd_len + len > ARRAY_SIZE(gpu.cmd_buffer)) {
728 log_anomaly("cmd_buffer overflow, likely garbage commands\n");
729 gpu.cmd_len = 0;
730 }
731 memcpy(gpu.cmd_buffer + gpu.cmd_len, list + 1, len * 4);
732 gpu.cmd_len += len;
733 flush_cmd_buffer();
734 continue;
735 }
736
737 if (len) {
738 left = do_cmd_buffer(list + 1, len, &cpu_cycles_sum, &cpu_cycles_last);
739 if (left) {
740 memcpy(gpu.cmd_buffer, list + 1 + len - left, left * 4);
741 gpu.cmd_len = left;
742 log_anomaly("GPUdmaChain: %d/%d words left\n", left, len);
743 }
744 }
745
746 if (progress_addr) {
747 *progress_addr = addr;
748 break;
749 }
750 if (addr == ld_addr) {
751 log_anomaly("GPUdmaChain: loop @ %08x, cnt=%u\n", addr, count);
752 break;
753 }
754 if (count == ld_count) {
755 ld_addr = addr;
756 ld_count *= 2;
757 }
758 }
759
760 //printf(" -> %d %d\n", cpu_cycles_sum, cpu_cycles_last);
761 gpu.state.last_list.frame = *gpu.state.frame_count;
762 gpu.state.last_list.hcnt = *gpu.state.hcnt;
763 gpu.state.last_list.cycles = cpu_cycles_sum + cpu_cycles_last;
764 gpu.state.last_list.addr = start_addr;
765
766 *cycles_last_cmd = cpu_cycles_last;
767 return cpu_cycles_sum;
768}
769
770void GPUreadDataMem(uint32_t *mem, int count)
771{
772 log_io("gpu_dma_read %p %d\n", mem, count);
773
774 if (unlikely(gpu.cmd_len > 0))
775 flush_cmd_buffer();
776
777 if (gpu.dma.h)
778 do_vram_io(mem, count, 1);
779}
780
781uint32_t GPUreadData(void)
782{
783 uint32_t ret;
784
785 if (unlikely(gpu.cmd_len > 0))
786 flush_cmd_buffer();
787
788 ret = gpu.gp0;
789 if (gpu.dma.h) {
790 ret = HTOLE32(ret);
791 do_vram_io(&ret, 1, 1);
792 ret = LE32TOH(ret);
793 }
794
795 log_io("gpu_read %08x\n", ret);
796 return ret;
797}
798
799uint32_t GPUreadStatus(void)
800{
801 uint32_t ret;
802
803 if (unlikely(gpu.cmd_len > 0))
804 flush_cmd_buffer();
805
806 ret = gpu.status;
807 log_io("gpu_read_status %08x\n", ret);
808 return ret;
809}
810
811struct GPUFreeze
812{
813 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
814 uint32_t ulStatus; // current gpu status
815 uint32_t ulControl[256]; // latest control register values
816 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
817};
818
819long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
820{
821 int i;
822
823 switch (type) {
824 case 1: // save
825 if (gpu.cmd_len > 0)
826 flush_cmd_buffer();
827 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
828 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
829 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
830 freeze->ulStatus = gpu.status;
831 break;
832 case 0: // load
833 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
834 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
835 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
836 gpu.status = freeze->ulStatus;
837 gpu.cmd_len = 0;
838 for (i = 8; i > 0; i--) {
839 gpu.regs[i] ^= 1; // avoid reg change detection
840 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
841 }
842 renderer_sync_ecmds(gpu.ex_regs);
843 renderer_update_caches(0, 0, 1024, 512, 0);
844 break;
845 }
846
847 return 1;
848}
849
850void GPUupdateLace(void)
851{
852 if (gpu.cmd_len > 0)
853 flush_cmd_buffer();
854 renderer_flush_queues();
855
856#ifndef RAW_FB_DISPLAY
857 if (gpu.status & PSX_GPU_STATUS_BLANKING) {
858 if (!gpu.state.blanked) {
859 vout_blank();
860 gpu.state.blanked = 1;
861 gpu.state.fb_dirty = 1;
862 }
863 return;
864 }
865
866 if (!gpu.state.fb_dirty)
867 return;
868#endif
869
870 if (gpu.frameskip.set) {
871 if (!gpu.frameskip.frame_ready) {
872 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
873 return;
874 gpu.frameskip.active = 0;
875 }
876 gpu.frameskip.frame_ready = 0;
877 }
878
879 vout_update();
880 if (gpu.state.enhancement_active && !gpu.state.enhancement_was_active)
881 renderer_update_caches(0, 0, 1024, 512, 1);
882 gpu.state.enhancement_was_active = gpu.state.enhancement_active;
883 gpu.state.fb_dirty = 0;
884 gpu.state.blanked = 0;
885}
886
887void GPUvBlank(int is_vblank, int lcf)
888{
889 int interlace = gpu.state.allow_interlace
890 && (gpu.status & PSX_GPU_STATUS_INTERLACE)
891 && (gpu.status & PSX_GPU_STATUS_DHEIGHT);
892 // interlace doesn't look nice on progressive displays,
893 // so we have this "auto" mode here for games that don't read vram
894 if (gpu.state.allow_interlace == 2
895 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
896 {
897 interlace = 0;
898 }
899 if (interlace || interlace != gpu.state.old_interlace) {
900 gpu.state.old_interlace = interlace;
901
902 if (gpu.cmd_len > 0)
903 flush_cmd_buffer();
904 renderer_flush_queues();
905 renderer_set_interlace(interlace, !lcf);
906 }
907}
908
909void GPUgetScreenInfo(int *y, int *base_hres)
910{
911 *y = gpu.screen.y;
912 *base_hres = gpu.screen.vres;
913 if (gpu.status & PSX_GPU_STATUS_DHEIGHT)
914 *base_hres >>= 1;
915}
916
917void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
918{
919 gpu.frameskip.set = cbs->frameskip;
920 gpu.frameskip.advice = &cbs->fskip_advice;
921 gpu.frameskip.active = 0;
922 gpu.frameskip.frame_ready = 1;
923 gpu.state.hcnt = cbs->gpu_hcnt;
924 gpu.state.frame_count = cbs->gpu_frame_count;
925 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
926 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
927 gpu.state.screen_centering_type_default = cbs->screen_centering_type_default;
928 if (gpu.state.screen_centering_type != cbs->screen_centering_type
929 || gpu.state.screen_centering_x != cbs->screen_centering_x
930 || gpu.state.screen_centering_y != cbs->screen_centering_y) {
931 gpu.state.screen_centering_type = cbs->screen_centering_type;
932 gpu.state.screen_centering_x = cbs->screen_centering_x;
933 gpu.state.screen_centering_y = cbs->screen_centering_y;
934 update_width();
935 update_height();
936 }
937
938 gpu.mmap = cbs->mmap;
939 gpu.munmap = cbs->munmap;
940 gpu.gpu_state_change = cbs->gpu_state_change;
941
942 // delayed vram mmap
943 if (gpu.vram == NULL)
944 map_vram();
945
946 if (cbs->pl_vout_set_raw_vram)
947 cbs->pl_vout_set_raw_vram(gpu.vram);
948 renderer_set_config(cbs);
949 vout_set_config(cbs);
950}
951
952// vim:shiftwidth=2:expandtab