cdrom: change pause timing again
[pcsx_rearmed.git] / plugins / gpulib / gpu.c
... / ...
CommitLineData
1/*
2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
11#include <stdio.h>
12#include <stdlib.h>
13#include <string.h>
14#include "gpu.h"
15#include "gpu_timing.h"
16#include "../../libpcsxcore/gpu.h" // meh
17#include "../../frontend/plugin_lib.h"
18
19#ifndef ARRAY_SIZE
20#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
21#endif
22#ifdef __GNUC__
23#define unlikely(x) __builtin_expect((x), 0)
24#define preload __builtin_prefetch
25#define noinline __attribute__((noinline))
26#else
27#define unlikely(x)
28#define preload(...)
29#define noinline
30#endif
31
32//#define log_io gpu_log
33#define log_io(...)
34
35struct psx_gpu gpu;
36
37static noinline int do_cmd_buffer(uint32_t *data, int count,
38 int *cycles_sum, int *cycles_last);
39static void finish_vram_transfer(int is_read);
40
41static noinline void do_cmd_reset(void)
42{
43 int dummy = 0;
44 if (unlikely(gpu.cmd_len > 0))
45 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len, &dummy, &dummy);
46 gpu.cmd_len = 0;
47
48 if (unlikely(gpu.dma.h > 0))
49 finish_vram_transfer(gpu.dma_start.is_read);
50 gpu.dma.h = 0;
51}
52
53static noinline void do_reset(void)
54{
55 unsigned int i;
56
57 do_cmd_reset();
58
59 memset(gpu.regs, 0, sizeof(gpu.regs));
60 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
61 gpu.ex_regs[i] = (0xe0 + i) << 24;
62 gpu.status = 0x14802000;
63 gpu.gp0 = 0;
64 gpu.regs[3] = 1;
65 gpu.screen.hres = gpu.screen.w = 256;
66 gpu.screen.vres = gpu.screen.h = 240;
67 gpu.screen.x = gpu.screen.y = 0;
68 renderer_sync_ecmds(gpu.ex_regs);
69 renderer_notify_res_change();
70}
71
72static noinline void update_width(void)
73{
74 static const short hres_all[8] = { 256, 368, 320, 368, 512, 368, 640, 368 };
75 static const uint8_t hdivs[8] = { 10, 7, 8, 7, 5, 7, 4, 7 };
76 uint8_t hdiv = hdivs[(gpu.status >> 16) & 7];
77 int hres = hres_all[(gpu.status >> 16) & 7];
78 int pal = gpu.status & PSX_GPU_STATUS_PAL;
79 int sw = gpu.screen.x2 - gpu.screen.x1;
80 int type = gpu.state.screen_centering_type;
81 int x = 0, x_auto;
82 if (type == C_AUTO)
83 type = gpu.state.screen_centering_type_default;
84 if (sw <= 0)
85 /* nothing displayed? */;
86 else {
87 int s = pal ? 656 : 608; // or 600? pal is just a guess
88 x = (gpu.screen.x1 - s) / hdiv;
89 x = (x + 1) & ~1; // blitter limitation
90 sw /= hdiv;
91 sw = (sw + 2) & ~3; // according to nocash
92 switch (type) {
93 case C_INGAME:
94 break;
95 case C_MANUAL:
96 x = gpu.state.screen_centering_x;
97 break;
98 default:
99 // correct if slightly miscentered
100 x_auto = (hres - sw) / 2 & ~3;
101 if ((uint32_t)x_auto <= 8u && abs(x) < 24)
102 x = x_auto;
103 }
104 if (x + sw > hres)
105 sw = hres - x;
106 // .x range check is done in vout_update()
107 }
108 // reduce the unpleasant right border that a few games have
109 if (gpu.state.screen_centering_type == 0
110 && x <= 4 && hres - (x + sw) >= 4)
111 hres -= 4;
112 gpu.screen.x = x;
113 gpu.screen.w = sw;
114 gpu.screen.hres = hres;
115 gpu.state.dims_changed = 1;
116 //printf("xx %d %d -> %2d, %d / %d\n",
117 // gpu.screen.x1, gpu.screen.x2, x, sw, hres);
118}
119
120static noinline void update_height(void)
121{
122 int pal = gpu.status & PSX_GPU_STATUS_PAL;
123 int dheight = gpu.status & PSX_GPU_STATUS_DHEIGHT;
124 int y = gpu.screen.y1 - (pal ? 39 : 16); // 39 for spyro
125 int sh = gpu.screen.y2 - gpu.screen.y1;
126 int center_tol = 16;
127 int vres = 240;
128
129 if (pal && (sh > 240 || gpu.screen.vres == 256))
130 vres = 256;
131 if (dheight)
132 y *= 2, sh *= 2, vres *= 2, center_tol *= 2;
133 if (sh <= 0)
134 /* nothing displayed? */;
135 else {
136 switch (gpu.state.screen_centering_type) {
137 case C_INGAME:
138 break;
139 case C_BORDERLESS:
140 y = 0;
141 break;
142 case C_MANUAL:
143 y = gpu.state.screen_centering_y;
144 break;
145 default:
146 // correct if slightly miscentered
147 if ((uint32_t)(vres - sh) <= 1 && abs(y) <= center_tol)
148 y = 0;
149 }
150 if (y + sh > vres)
151 sh = vres - y;
152 }
153 gpu.screen.y = y;
154 gpu.screen.h = sh;
155 gpu.screen.vres = vres;
156 gpu.state.dims_changed = 1;
157 //printf("yy %d %d -> %d, %d / %d\n",
158 // gpu.screen.y1, gpu.screen.y2, y, sh, vres);
159}
160
161static noinline void decide_frameskip(void)
162{
163 if (gpu.frameskip.active)
164 gpu.frameskip.cnt++;
165 else {
166 gpu.frameskip.cnt = 0;
167 gpu.frameskip.frame_ready = 1;
168 }
169
170 if (!gpu.frameskip.active && *gpu.frameskip.advice)
171 gpu.frameskip.active = 1;
172 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
173 gpu.frameskip.active = 1;
174 else
175 gpu.frameskip.active = 0;
176
177 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
178 int dummy = 0;
179 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy, &dummy, &dummy);
180 gpu.frameskip.pending_fill[0] = 0;
181 }
182}
183
184static noinline int decide_frameskip_allow(uint32_t cmd_e3)
185{
186 // no frameskip if it decides to draw to display area,
187 // but not for interlace since it'll most likely always do that
188 uint32_t x = cmd_e3 & 0x3ff;
189 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
190 gpu.frameskip.allow = (gpu.status & PSX_GPU_STATUS_INTERLACE) ||
191 (uint32_t)(x - gpu.screen.src_x) >= (uint32_t)gpu.screen.w ||
192 (uint32_t)(y - gpu.screen.src_y) >= (uint32_t)gpu.screen.h;
193 return gpu.frameskip.allow;
194}
195
196static void flush_cmd_buffer(void);
197
198static noinline void get_gpu_info(uint32_t data)
199{
200 if (unlikely(gpu.cmd_len > 0))
201 flush_cmd_buffer();
202 switch (data & 0x0f) {
203 case 0x02:
204 case 0x03:
205 case 0x04:
206 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
207 break;
208 case 0x05:
209 gpu.gp0 = gpu.ex_regs[5] & 0x3fffff;
210 break;
211 case 0x07:
212 gpu.gp0 = 2;
213 break;
214 default:
215 // gpu.gp0 unchanged
216 break;
217 }
218}
219
220// double, for overdraw guard
221#define VRAM_SIZE (1024 * 512 * 2 * 2)
222
223static int map_vram(void)
224{
225 gpu.vram = gpu.mmap(VRAM_SIZE);
226 if (gpu.vram != NULL) {
227 gpu.vram += 4096 / 2;
228 return 0;
229 }
230 else {
231 fprintf(stderr, "could not map vram, expect crashes\n");
232 return -1;
233 }
234}
235
236long GPUinit(void)
237{
238 int ret;
239 ret = vout_init();
240 ret |= renderer_init();
241
242 memset(&gpu.state, 0, sizeof(gpu.state));
243 memset(&gpu.frameskip, 0, sizeof(gpu.frameskip));
244 gpu.zero = 0;
245 gpu.state.frame_count = &gpu.zero;
246 gpu.state.hcnt = &gpu.zero;
247 gpu.cmd_len = 0;
248 do_reset();
249
250 if (gpu.mmap != NULL) {
251 if (map_vram() != 0)
252 ret = -1;
253 }
254 return ret;
255}
256
257long GPUshutdown(void)
258{
259 long ret;
260
261 renderer_finish();
262 ret = vout_finish();
263 if (gpu.vram != NULL) {
264 gpu.vram -= 4096 / 2;
265 gpu.munmap(gpu.vram, VRAM_SIZE);
266 }
267 gpu.vram = NULL;
268
269 return ret;
270}
271
272void GPUwriteStatus(uint32_t data)
273{
274 uint32_t cmd = data >> 24;
275 int src_x, src_y;
276
277 if (cmd < ARRAY_SIZE(gpu.regs)) {
278 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
279 return;
280 gpu.regs[cmd] = data;
281 }
282
283 gpu.state.fb_dirty = 1;
284
285 switch (cmd) {
286 case 0x00:
287 do_reset();
288 break;
289 case 0x01:
290 do_cmd_reset();
291 break;
292 case 0x03:
293 if (data & 1) {
294 gpu.status |= PSX_GPU_STATUS_BLANKING;
295 gpu.state.dims_changed = 1; // for hud clearing
296 }
297 else
298 gpu.status &= ~PSX_GPU_STATUS_BLANKING;
299 break;
300 case 0x04:
301 gpu.status &= ~PSX_GPU_STATUS_DMA_MASK;
302 gpu.status |= PSX_GPU_STATUS_DMA(data & 3);
303 break;
304 case 0x05:
305 src_x = data & 0x3ff; src_y = (data >> 10) & 0x1ff;
306 if (src_x != gpu.screen.src_x || src_y != gpu.screen.src_y) {
307 gpu.screen.src_x = src_x;
308 gpu.screen.src_y = src_y;
309 renderer_notify_scanout_change(src_x, src_y);
310 if (gpu.frameskip.set) {
311 decide_frameskip_allow(gpu.ex_regs[3]);
312 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
313 decide_frameskip();
314 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
315 }
316 }
317 }
318 break;
319 case 0x06:
320 gpu.screen.x1 = data & 0xfff;
321 gpu.screen.x2 = (data >> 12) & 0xfff;
322 update_width();
323 break;
324 case 0x07:
325 gpu.screen.y1 = data & 0x3ff;
326 gpu.screen.y2 = (data >> 10) & 0x3ff;
327 update_height();
328 break;
329 case 0x08:
330 gpu.status = (gpu.status & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
331 update_width();
332 update_height();
333 renderer_notify_res_change();
334 break;
335 default:
336 if ((cmd & 0xf0) == 0x10)
337 get_gpu_info(data);
338 break;
339 }
340
341#ifdef GPUwriteStatus_ext
342 GPUwriteStatus_ext(data);
343#endif
344}
345
346const unsigned char cmd_lengths[256] =
347{
348 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
349 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
350 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
351 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
352 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
353 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
354 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
355 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
356 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 80
357 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
358 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // a0
359 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
360 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // c0
361 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
362 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
363 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
364};
365
366#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
367
368static void cpy_msb(uint16_t *dst, const uint16_t *src, int l, uint16_t msb)
369{
370 int i;
371 for (i = 0; i < l; i++)
372 dst[i] = src[i] | msb;
373}
374
375static inline void do_vram_line(int x, int y, uint16_t *mem, int l,
376 int is_read, uint16_t msb)
377{
378 uint16_t *vram = VRAM_MEM_XY(x, y);
379 if (unlikely(is_read))
380 memcpy(mem, vram, l * 2);
381 else if (unlikely(msb))
382 cpy_msb(vram, mem, l, msb);
383 else
384 memcpy(vram, mem, l * 2);
385}
386
387static int do_vram_io(uint32_t *data, int count, int is_read)
388{
389 int count_initial = count;
390 uint16_t msb = gpu.ex_regs[6] << 15;
391 uint16_t *sdata = (uint16_t *)data;
392 int x = gpu.dma.x, y = gpu.dma.y;
393 int w = gpu.dma.w, h = gpu.dma.h;
394 int o = gpu.dma.offset;
395 int l;
396 count *= 2; // operate in 16bpp pixels
397
398 if (gpu.dma.offset) {
399 l = w - gpu.dma.offset;
400 if (count < l)
401 l = count;
402
403 do_vram_line(x + o, y, sdata, l, is_read, msb);
404
405 if (o + l < w)
406 o += l;
407 else {
408 o = 0;
409 y++;
410 h--;
411 }
412 sdata += l;
413 count -= l;
414 }
415
416 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
417 y &= 511;
418 do_vram_line(x, y, sdata, w, is_read, msb);
419 }
420
421 if (h > 0) {
422 if (count > 0) {
423 y &= 511;
424 do_vram_line(x, y, sdata, count, is_read, msb);
425 o = count;
426 count = 0;
427 }
428 }
429 else
430 finish_vram_transfer(is_read);
431 gpu.dma.y = y;
432 gpu.dma.h = h;
433 gpu.dma.offset = o;
434
435 return count_initial - count / 2;
436}
437
438static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
439{
440 if (gpu.dma.h)
441 log_anomaly("start_vram_transfer while old unfinished\n");
442
443 gpu.dma.x = pos_word & 0x3ff;
444 gpu.dma.y = (pos_word >> 16) & 0x1ff;
445 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
446 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
447 gpu.dma.offset = 0;
448 gpu.dma.is_read = is_read;
449 gpu.dma_start = gpu.dma;
450
451 renderer_flush_queues();
452 if (is_read) {
453 gpu.status |= PSX_GPU_STATUS_IMG;
454 // XXX: wrong for width 1
455 gpu.gp0 = LE32TOH(*(uint32_t *) VRAM_MEM_XY(gpu.dma.x, gpu.dma.y));
456 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
457 }
458
459 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
460 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
461 if (gpu.gpu_state_change)
462 gpu.gpu_state_change(PGS_VRAM_TRANSFER_START);
463}
464
465static void finish_vram_transfer(int is_read)
466{
467 if (is_read)
468 gpu.status &= ~PSX_GPU_STATUS_IMG;
469 else {
470 gpu.state.fb_dirty = 1;
471 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
472 gpu.dma_start.w, gpu.dma_start.h, 0);
473 }
474 if (gpu.gpu_state_change)
475 gpu.gpu_state_change(PGS_VRAM_TRANSFER_END);
476}
477
478static void do_vram_copy(const uint32_t *params, int *cpu_cycles)
479{
480 const uint32_t sx = LE32TOH(params[0]) & 0x3FF;
481 const uint32_t sy = (LE32TOH(params[0]) >> 16) & 0x1FF;
482 const uint32_t dx = LE32TOH(params[1]) & 0x3FF;
483 const uint32_t dy = (LE32TOH(params[1]) >> 16) & 0x1FF;
484 uint32_t w = ((LE32TOH(params[2]) - 1) & 0x3FF) + 1;
485 uint32_t h = (((LE32TOH(params[2]) >> 16) - 1) & 0x1FF) + 1;
486 uint16_t msb = gpu.ex_regs[6] << 15;
487 uint16_t lbuf[128];
488 uint32_t x, y;
489
490 *cpu_cycles += gput_copy(w, h);
491 if (sx == dx && sy == dy && msb == 0)
492 return;
493
494 renderer_flush_queues();
495
496 if (unlikely((sx < dx && dx < sx + w) || sx + w > 1024 || dx + w > 1024 || msb))
497 {
498 for (y = 0; y < h; y++)
499 {
500 const uint16_t *src = VRAM_MEM_XY(0, (sy + y) & 0x1ff);
501 uint16_t *dst = VRAM_MEM_XY(0, (dy + y) & 0x1ff);
502 for (x = 0; x < w; x += ARRAY_SIZE(lbuf))
503 {
504 uint32_t x1, w1 = w - x;
505 if (w1 > ARRAY_SIZE(lbuf))
506 w1 = ARRAY_SIZE(lbuf);
507 for (x1 = 0; x1 < w1; x1++)
508 lbuf[x1] = src[(sx + x + x1) & 0x3ff];
509 for (x1 = 0; x1 < w1; x1++)
510 dst[(dx + x + x1) & 0x3ff] = lbuf[x1] | msb;
511 }
512 }
513 }
514 else
515 {
516 uint32_t sy1 = sy, dy1 = dy;
517 for (y = 0; y < h; y++, sy1++, dy1++)
518 memcpy(VRAM_MEM_XY(dx, dy1 & 0x1ff), VRAM_MEM_XY(sx, sy1 & 0x1ff), w * 2);
519 }
520
521 renderer_update_caches(dx, dy, w, h, 0);
522}
523
524static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
525{
526 int cmd = 0, pos = 0, len, dummy = 0, v;
527 int skip = 1;
528
529 gpu.frameskip.pending_fill[0] = 0;
530
531 while (pos < count && skip) {
532 uint32_t *list = data + pos;
533 cmd = LE32TOH(list[0]) >> 24;
534 len = 1 + cmd_lengths[cmd];
535 if (pos + len > count) {
536 cmd = -1;
537 break; // incomplete cmd
538 }
539
540 switch (cmd) {
541 case 0x02:
542 if ((LE32TOH(list[2]) & 0x3ff) > gpu.screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu.screen.h)
543 // clearing something large, don't skip
544 do_cmd_list(list, 3, &dummy, &dummy, &dummy);
545 else
546 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
547 break;
548 case 0x24 ... 0x27:
549 case 0x2c ... 0x2f:
550 case 0x34 ... 0x37:
551 case 0x3c ... 0x3f:
552 gpu.ex_regs[1] &= ~0x1ff;
553 gpu.ex_regs[1] |= LE32TOH(list[4 + ((cmd >> 4) & 1)]) & 0x1ff;
554 break;
555 case 0x48 ... 0x4F:
556 for (v = 3; pos + v < count; v++)
557 {
558 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
559 break;
560 }
561 len += v - 3;
562 break;
563 case 0x58 ... 0x5F:
564 for (v = 4; pos + v < count; v += 2)
565 {
566 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
567 break;
568 }
569 len += v - 4;
570 break;
571 default:
572 if (cmd == 0xe3)
573 skip = decide_frameskip_allow(LE32TOH(list[0]));
574 if ((cmd & 0xf8) == 0xe0)
575 gpu.ex_regs[cmd & 7] = LE32TOH(list[0]);
576 break;
577 }
578 if (0x80 <= cmd && cmd <= 0xdf)
579 break; // image i/o
580
581 pos += len;
582 }
583
584 renderer_sync_ecmds(gpu.ex_regs);
585 *last_cmd = cmd;
586 return pos;
587}
588
589static noinline int do_cmd_buffer(uint32_t *data, int count,
590 int *cycles_sum, int *cycles_last)
591{
592 int cmd, pos;
593 uint32_t old_e3 = gpu.ex_regs[3];
594 int vram_dirty = 0;
595
596 // process buffer
597 for (pos = 0; pos < count; )
598 {
599 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
600 vram_dirty = 1;
601 pos += do_vram_io(data + pos, count - pos, 0);
602 if (pos == count)
603 break;
604 }
605
606 cmd = LE32TOH(data[pos]) >> 24;
607 if (0xa0 <= cmd && cmd <= 0xdf) {
608 if (unlikely((pos+2) >= count)) {
609 // incomplete vram write/read cmd, can't consume yet
610 cmd = -1;
611 break;
612 }
613
614 // consume vram write/read cmd
615 start_vram_transfer(LE32TOH(data[pos + 1]), LE32TOH(data[pos + 2]), (cmd & 0xe0) == 0xc0);
616 pos += 3;
617 continue;
618 }
619 else if ((cmd & 0xe0) == 0x80) {
620 if (unlikely((pos+3) >= count)) {
621 cmd = -1; // incomplete cmd, can't consume yet
622 break;
623 }
624 *cycles_sum += *cycles_last;
625 *cycles_last = 0;
626 do_vram_copy(data + pos + 1, cycles_last);
627 vram_dirty = 1;
628 pos += 4;
629 continue;
630 }
631 else if (cmd == 0x1f) {
632 log_anomaly("irq1?\n");
633 pos++;
634 continue;
635 }
636
637 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
638 if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0))
639 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
640 else {
641 pos += do_cmd_list(data + pos, count - pos, cycles_sum, cycles_last, &cmd);
642 vram_dirty = 1;
643 }
644
645 if (cmd == -1)
646 // incomplete cmd
647 break;
648 }
649
650 gpu.status &= ~0x1fff;
651 gpu.status |= gpu.ex_regs[1] & 0x7ff;
652 gpu.status |= (gpu.ex_regs[6] & 3) << 11;
653
654 gpu.state.fb_dirty |= vram_dirty;
655
656 if (old_e3 != gpu.ex_regs[3])
657 decide_frameskip_allow(gpu.ex_regs[3]);
658
659 return count - pos;
660}
661
662static noinline void flush_cmd_buffer(void)
663{
664 int dummy = 0, left;
665 left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len, &dummy, &dummy);
666 if (left > 0)
667 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
668 if (left != gpu.cmd_len) {
669 if (!gpu.dma.h && gpu.gpu_state_change)
670 gpu.gpu_state_change(PGS_PRIMITIVE_START);
671 gpu.cmd_len = left;
672 }
673}
674
675void GPUwriteDataMem(uint32_t *mem, int count)
676{
677 int dummy = 0, left;
678
679 log_io("gpu_dma_write %p %d\n", mem, count);
680
681 if (unlikely(gpu.cmd_len > 0))
682 flush_cmd_buffer();
683
684 left = do_cmd_buffer(mem, count, &dummy, &dummy);
685 if (left)
686 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
687}
688
689void GPUwriteData(uint32_t data)
690{
691 log_io("gpu_write %08x\n", data);
692 gpu.cmd_buffer[gpu.cmd_len++] = HTOLE32(data);
693 if (gpu.cmd_len >= CMD_BUFFER_LEN)
694 flush_cmd_buffer();
695}
696
697long GPUdmaChain(uint32_t *rambase, uint32_t start_addr,
698 uint32_t *progress_addr, int32_t *cycles_last_cmd)
699{
700 uint32_t addr, *list, ld_addr;
701 int len, left, count, ld_count = 32;
702 int cpu_cycles_sum = 0;
703 int cpu_cycles_last = 0;
704
705 preload(rambase + (start_addr & 0x1fffff) / 4);
706
707 if (unlikely(gpu.cmd_len > 0))
708 flush_cmd_buffer();
709
710 log_io("gpu_dma_chain\n");
711 addr = ld_addr = start_addr & 0xffffff;
712 for (count = 0; (addr & 0x800000) == 0; count++)
713 {
714 list = rambase + (addr & 0x1fffff) / 4;
715 len = LE32TOH(list[0]) >> 24;
716 addr = LE32TOH(list[0]) & 0xffffff;
717 preload(rambase + (addr & 0x1fffff) / 4);
718
719 cpu_cycles_sum += 10;
720 if (len > 0)
721 cpu_cycles_sum += 5 + len;
722
723 log_io(".chain %08lx #%d+%d %u+%u\n",
724 (long)(list - rambase) * 4, len, gpu.cmd_len, cpu_cycles_sum, cpu_cycles_last);
725 if (unlikely(gpu.cmd_len > 0)) {
726 if (gpu.cmd_len + len > ARRAY_SIZE(gpu.cmd_buffer)) {
727 log_anomaly("cmd_buffer overflow, likely garbage commands\n");
728 gpu.cmd_len = 0;
729 }
730 memcpy(gpu.cmd_buffer + gpu.cmd_len, list + 1, len * 4);
731 gpu.cmd_len += len;
732 flush_cmd_buffer();
733 continue;
734 }
735
736 if (len) {
737 left = do_cmd_buffer(list + 1, len, &cpu_cycles_sum, &cpu_cycles_last);
738 if (left) {
739 memcpy(gpu.cmd_buffer, list + 1 + len - left, left * 4);
740 gpu.cmd_len = left;
741 log_anomaly("GPUdmaChain: %d/%d words left\n", left, len);
742 }
743 }
744
745 if (progress_addr) {
746 *progress_addr = addr;
747 break;
748 }
749 if (addr == ld_addr) {
750 log_anomaly("GPUdmaChain: loop @ %08x, cnt=%u\n", addr, count);
751 break;
752 }
753 if (count == ld_count) {
754 ld_addr = addr;
755 ld_count *= 2;
756 }
757 }
758
759 //printf(" -> %d %d\n", cpu_cycles_sum, cpu_cycles_last);
760 gpu.state.last_list.frame = *gpu.state.frame_count;
761 gpu.state.last_list.hcnt = *gpu.state.hcnt;
762 gpu.state.last_list.cycles = cpu_cycles_sum + cpu_cycles_last;
763 gpu.state.last_list.addr = start_addr;
764
765 *cycles_last_cmd = cpu_cycles_last;
766 return cpu_cycles_sum;
767}
768
769void GPUreadDataMem(uint32_t *mem, int count)
770{
771 log_io("gpu_dma_read %p %d\n", mem, count);
772
773 if (unlikely(gpu.cmd_len > 0))
774 flush_cmd_buffer();
775
776 if (gpu.dma.h)
777 do_vram_io(mem, count, 1);
778}
779
780uint32_t GPUreadData(void)
781{
782 uint32_t ret;
783
784 if (unlikely(gpu.cmd_len > 0))
785 flush_cmd_buffer();
786
787 ret = gpu.gp0;
788 if (gpu.dma.h) {
789 ret = HTOLE32(ret);
790 do_vram_io(&ret, 1, 1);
791 ret = LE32TOH(ret);
792 }
793
794 log_io("gpu_read %08x\n", ret);
795 return ret;
796}
797
798uint32_t GPUreadStatus(void)
799{
800 uint32_t ret;
801
802 if (unlikely(gpu.cmd_len > 0))
803 flush_cmd_buffer();
804
805 ret = gpu.status;
806 log_io("gpu_read_status %08x\n", ret);
807 return ret;
808}
809
810struct GPUFreeze
811{
812 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
813 uint32_t ulStatus; // current gpu status
814 uint32_t ulControl[256]; // latest control register values
815 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
816};
817
818long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
819{
820 int i;
821
822 switch (type) {
823 case 1: // save
824 if (gpu.cmd_len > 0)
825 flush_cmd_buffer();
826 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
827 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
828 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
829 freeze->ulStatus = gpu.status;
830 break;
831 case 0: // load
832 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
833 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
834 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
835 gpu.status = freeze->ulStatus;
836 gpu.cmd_len = 0;
837 for (i = 8; i > 0; i--) {
838 gpu.regs[i] ^= 1; // avoid reg change detection
839 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
840 }
841 renderer_sync_ecmds(gpu.ex_regs);
842 renderer_update_caches(0, 0, 1024, 512, 0);
843 break;
844 }
845
846 return 1;
847}
848
849void GPUupdateLace(void)
850{
851 if (gpu.cmd_len > 0)
852 flush_cmd_buffer();
853 renderer_flush_queues();
854
855#ifndef RAW_FB_DISPLAY
856 if (gpu.status & PSX_GPU_STATUS_BLANKING) {
857 if (!gpu.state.blanked) {
858 vout_blank();
859 gpu.state.blanked = 1;
860 gpu.state.fb_dirty = 1;
861 }
862 return;
863 }
864
865 if (!gpu.state.fb_dirty)
866 return;
867#endif
868
869 if (gpu.frameskip.set) {
870 if (!gpu.frameskip.frame_ready) {
871 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
872 return;
873 gpu.frameskip.active = 0;
874 }
875 gpu.frameskip.frame_ready = 0;
876 }
877
878 vout_update();
879 if (gpu.state.enhancement_active && !gpu.state.enhancement_was_active)
880 renderer_update_caches(0, 0, 1024, 512, 1);
881 gpu.state.enhancement_was_active = gpu.state.enhancement_active;
882 gpu.state.fb_dirty = 0;
883 gpu.state.blanked = 0;
884}
885
886void GPUvBlank(int is_vblank, int lcf)
887{
888 int interlace = gpu.state.allow_interlace
889 && (gpu.status & PSX_GPU_STATUS_INTERLACE)
890 && (gpu.status & PSX_GPU_STATUS_DHEIGHT);
891 // interlace doesn't look nice on progressive displays,
892 // so we have this "auto" mode here for games that don't read vram
893 if (gpu.state.allow_interlace == 2
894 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
895 {
896 interlace = 0;
897 }
898 if (interlace || interlace != gpu.state.old_interlace) {
899 gpu.state.old_interlace = interlace;
900
901 if (gpu.cmd_len > 0)
902 flush_cmd_buffer();
903 renderer_flush_queues();
904 renderer_set_interlace(interlace, !lcf);
905 }
906}
907
908void GPUgetScreenInfo(int *y, int *base_hres)
909{
910 *y = gpu.screen.y;
911 *base_hres = gpu.screen.vres;
912 if (gpu.status & PSX_GPU_STATUS_DHEIGHT)
913 *base_hres >>= 1;
914}
915
916void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
917{
918 gpu.frameskip.set = cbs->frameskip;
919 gpu.frameskip.advice = &cbs->fskip_advice;
920 gpu.frameskip.active = 0;
921 gpu.frameskip.frame_ready = 1;
922 gpu.state.hcnt = cbs->gpu_hcnt;
923 gpu.state.frame_count = cbs->gpu_frame_count;
924 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
925 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
926 gpu.state.screen_centering_type_default = cbs->screen_centering_type_default;
927 if (gpu.state.screen_centering_type != cbs->screen_centering_type
928 || gpu.state.screen_centering_x != cbs->screen_centering_x
929 || gpu.state.screen_centering_y != cbs->screen_centering_y) {
930 gpu.state.screen_centering_type = cbs->screen_centering_type;
931 gpu.state.screen_centering_x = cbs->screen_centering_x;
932 gpu.state.screen_centering_y = cbs->screen_centering_y;
933 update_width();
934 update_height();
935 }
936
937 gpu.mmap = cbs->mmap;
938 gpu.munmap = cbs->munmap;
939 gpu.gpu_state_change = cbs->gpu_state_change;
940
941 // delayed vram mmap
942 if (gpu.vram == NULL)
943 map_vram();
944
945 if (cbs->pl_vout_set_raw_vram)
946 cbs->pl_vout_set_raw_vram(gpu.vram);
947 renderer_set_config(cbs);
948 vout_set_config(cbs);
949}
950
951// vim:shiftwidth=2:expandtab