gpulib: don't lose a fill in frameskip mode
[pcsx_rearmed.git] / plugins / gpulib / gpu.c
... / ...
CommitLineData
1/*
2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
11#include <stdio.h>
12#include <string.h>
13#include "gpu.h"
14
15#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
16#define unlikely(x) __builtin_expect((x), 0)
17#define noinline __attribute__((noinline))
18
19#define gpu_log(fmt, ...) \
20 printf("%d:%03d: " fmt, *gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__)
21
22//#define log_io gpu_log
23#define log_io(...)
24//#define log_anomaly gpu_log
25#define log_anomaly(...)
26
27struct psx_gpu gpu __attribute__((aligned(2048)));
28
29static noinline int do_cmd_buffer(uint32_t *data, int count);
30static void finish_vram_transfer(int is_read);
31
32static noinline void do_cmd_reset(void)
33{
34 if (unlikely(gpu.cmd_len > 0))
35 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
36 gpu.cmd_len = 0;
37
38 if (unlikely(gpu.dma.h > 0))
39 finish_vram_transfer(gpu.dma_start.is_read);
40 gpu.dma.h = 0;
41}
42
43static noinline void do_reset(void)
44{
45 unsigned int i;
46
47 do_cmd_reset();
48
49 memset(gpu.regs, 0, sizeof(gpu.regs));
50 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
51 gpu.ex_regs[i] = (0xe0 + i) << 24;
52 gpu.status.reg = 0x14802000;
53 gpu.gp0 = 0;
54 gpu.regs[3] = 1;
55 gpu.screen.hres = gpu.screen.w = 256;
56 gpu.screen.vres = gpu.screen.h = 240;
57}
58
59static noinline void update_width(void)
60{
61 int sw = gpu.screen.x2 - gpu.screen.x1;
62 if (sw <= 0 || sw >= 2560)
63 // full width
64 gpu.screen.w = gpu.screen.hres;
65 else
66 gpu.screen.w = sw * gpu.screen.hres / 2560;
67}
68
69static noinline void update_height(void)
70{
71 int sh = gpu.screen.y2 - gpu.screen.y1;
72 if (gpu.status.dheight)
73 sh *= 2;
74 if (sh <= 0)
75 sh = gpu.screen.vres;
76
77 gpu.screen.h = sh;
78}
79
80static noinline void decide_frameskip(void)
81{
82 if (gpu.frameskip.active)
83 gpu.frameskip.cnt++;
84 else {
85 gpu.frameskip.cnt = 0;
86 gpu.frameskip.frame_ready = 1;
87 }
88
89 if (!gpu.frameskip.active && *gpu.frameskip.advice)
90 gpu.frameskip.active = 1;
91 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
92 gpu.frameskip.active = 1;
93 else
94 gpu.frameskip.active = 0;
95
96 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
97 int dummy;
98 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy);
99 gpu.frameskip.pending_fill[0] = 0;
100 }
101}
102
103static noinline int decide_frameskip_allow(uint32_t cmd_e3)
104{
105 // no frameskip if it decides to draw to display area,
106 // but not for interlace since it'll most likely always do that
107 uint32_t x = cmd_e3 & 0x3ff;
108 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
109 gpu.frameskip.allow = gpu.status.interlace ||
110 (uint32_t)(x - gpu.screen.x) >= (uint32_t)gpu.screen.w ||
111 (uint32_t)(y - gpu.screen.y) >= (uint32_t)gpu.screen.h;
112 return gpu.frameskip.allow;
113}
114
115static noinline void get_gpu_info(uint32_t data)
116{
117 switch (data & 0x0f) {
118 case 0x02:
119 case 0x03:
120 case 0x04:
121 case 0x05:
122 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
123 break;
124 case 0x06:
125 gpu.gp0 = gpu.ex_regs[5] & 0xfffff;
126 break;
127 case 0x07:
128 gpu.gp0 = 2;
129 break;
130 default:
131 gpu.gp0 = 0;
132 break;
133 }
134}
135
136long GPUinit(void)
137{
138 int ret;
139 ret = vout_init();
140 ret |= renderer_init();
141
142 gpu.state.frame_count = &gpu.zero;
143 gpu.state.hcnt = &gpu.zero;
144 gpu.frameskip.active = 0;
145 gpu.cmd_len = 0;
146 do_reset();
147
148 return ret;
149}
150
151long GPUshutdown(void)
152{
153 return vout_finish();
154}
155
156void GPUwriteStatus(uint32_t data)
157{
158 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
159 static const short vres[4] = { 240, 480, 256, 480 };
160 uint32_t cmd = data >> 24;
161
162 if (cmd < ARRAY_SIZE(gpu.regs)) {
163 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
164 return;
165 gpu.regs[cmd] = data;
166 }
167
168 gpu.state.fb_dirty = 1;
169
170 switch (cmd) {
171 case 0x00:
172 do_reset();
173 break;
174 case 0x01:
175 do_cmd_reset();
176 break;
177 case 0x03:
178 gpu.status.blanking = data & 1;
179 break;
180 case 0x04:
181 gpu.status.dma = data & 3;
182 break;
183 case 0x05:
184 gpu.screen.x = data & 0x3ff;
185 gpu.screen.y = (data >> 10) & 0x3ff;
186 if (gpu.frameskip.set) {
187 decide_frameskip_allow(gpu.ex_regs[3]);
188 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
189 decide_frameskip();
190 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
191 }
192 }
193 break;
194 case 0x06:
195 gpu.screen.x1 = data & 0xfff;
196 gpu.screen.x2 = (data >> 12) & 0xfff;
197 update_width();
198 break;
199 case 0x07:
200 gpu.screen.y1 = data & 0x3ff;
201 gpu.screen.y2 = (data >> 10) & 0x3ff;
202 update_height();
203 break;
204 case 0x08:
205 gpu.status.reg = (gpu.status.reg & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
206 gpu.screen.hres = hres[(gpu.status.reg >> 16) & 7];
207 gpu.screen.vres = vres[(gpu.status.reg >> 19) & 3];
208 update_width();
209 update_height();
210 break;
211 default:
212 if ((cmd & 0xf0) == 0x10)
213 get_gpu_info(data);
214 break;
215 }
216
217#ifdef GPUwriteStatus_ext
218 GPUwriteStatus_ext(data);
219#endif
220}
221
222const unsigned char cmd_lengths[256] =
223{
224 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
225 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
226 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
227 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
228 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
229 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
230 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
231 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
232 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
233 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
234 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
235 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
236 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
237 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
238 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
239 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
240};
241
242#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
243
244static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
245{
246 uint16_t *vram = VRAM_MEM_XY(x, y);
247 if (is_read)
248 memcpy(mem, vram, l * 2);
249 else
250 memcpy(vram, mem, l * 2);
251}
252
253static int do_vram_io(uint32_t *data, int count, int is_read)
254{
255 int count_initial = count;
256 uint16_t *sdata = (uint16_t *)data;
257 int x = gpu.dma.x, y = gpu.dma.y;
258 int w = gpu.dma.w, h = gpu.dma.h;
259 int o = gpu.dma.offset;
260 int l;
261 count *= 2; // operate in 16bpp pixels
262
263 if (gpu.dma.offset) {
264 l = w - gpu.dma.offset;
265 if (count < l)
266 l = count;
267
268 do_vram_line(x + o, y, sdata, l, is_read);
269
270 if (o + l < w)
271 o += l;
272 else {
273 o = 0;
274 y++;
275 h--;
276 }
277 sdata += l;
278 count -= l;
279 }
280
281 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
282 y &= 511;
283 do_vram_line(x, y, sdata, w, is_read);
284 }
285
286 if (h > 0) {
287 if (count > 0) {
288 y &= 511;
289 do_vram_line(x, y, sdata, count, is_read);
290 o = count;
291 count = 0;
292 }
293 }
294 else
295 finish_vram_transfer(is_read);
296 gpu.dma.y = y;
297 gpu.dma.h = h;
298 gpu.dma.offset = o;
299
300 return count_initial - count / 2;
301}
302
303static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
304{
305 if (gpu.dma.h)
306 log_anomaly("start_vram_transfer while old unfinished\n");
307
308 gpu.dma.x = pos_word & 0x3ff;
309 gpu.dma.y = (pos_word >> 16) & 0x1ff;
310 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
311 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
312 gpu.dma.offset = 0;
313 gpu.dma.is_read = is_read;
314 gpu.dma_start = gpu.dma;
315
316 renderer_flush_queues();
317 if (is_read) {
318 gpu.status.img = 1;
319 // XXX: wrong for width 1
320 memcpy(&gpu.gp0, VRAM_MEM_XY(gpu.dma.x, gpu.dma.y), 4);
321 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
322 }
323
324 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
325 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
326}
327
328static void finish_vram_transfer(int is_read)
329{
330 if (is_read)
331 gpu.status.img = 0;
332 else
333 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
334 gpu.dma_start.w, gpu.dma_start.h);
335}
336
337static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
338{
339 int cmd = 0, pos = 0, len, dummy;
340 int skip = 1;
341
342 gpu.frameskip.pending_fill[0] = 0;
343
344 // XXX: polylines are not properly handled
345 while (pos < count && skip) {
346 uint32_t *list = data + pos;
347 cmd = list[0] >> 24;
348 len = 1 + cmd_lengths[cmd];
349
350 if (cmd == 0x02) {
351 if ((list[2] & 0x3ff) > gpu.screen.w || ((list[2] >> 16) & 0x1ff) > gpu.screen.h)
352 // clearing something large, don't skip
353 do_cmd_list(list, 3, &dummy);
354 else
355 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
356 }
357 else if ((cmd & 0xf4) == 0x24) {
358 // flat textured prim
359 gpu.ex_regs[1] &= ~0x1ff;
360 gpu.ex_regs[1] |= list[4] & 0x1ff;
361 }
362 else if ((cmd & 0xf4) == 0x34) {
363 // shaded textured prim
364 gpu.ex_regs[1] &= ~0x1ff;
365 gpu.ex_regs[1] |= list[5] & 0x1ff;
366 }
367 else if (cmd == 0xe3)
368 skip = decide_frameskip_allow(list[0]);
369
370 if ((cmd & 0xf8) == 0xe0)
371 gpu.ex_regs[cmd & 7] = list[0];
372
373 if (pos + len > count) {
374 cmd = -1;
375 break; // incomplete cmd
376 }
377 if (cmd == 0xa0 || cmd == 0xc0)
378 break; // image i/o
379 pos += len;
380 }
381
382 renderer_sync_ecmds(gpu.ex_regs);
383 *last_cmd = cmd;
384 return pos;
385}
386
387static noinline int do_cmd_buffer(uint32_t *data, int count)
388{
389 int cmd, pos;
390 uint32_t old_e3 = gpu.ex_regs[3];
391 int vram_dirty = 0;
392
393 // process buffer
394 for (pos = 0; pos < count; )
395 {
396 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
397 vram_dirty = 1;
398 pos += do_vram_io(data + pos, count - pos, 0);
399 if (pos == count)
400 break;
401 }
402
403 cmd = data[pos] >> 24;
404 if (cmd == 0xa0 || cmd == 0xc0) {
405 // consume vram write/read cmd
406 start_vram_transfer(data[pos + 1], data[pos + 2], cmd == 0xc0);
407 pos += 3;
408 continue;
409 }
410
411 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
412 if (gpu.frameskip.active && (gpu.frameskip.allow || ((data[pos] >> 24) & 0xf0) == 0xe0))
413 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
414 else {
415 pos += do_cmd_list(data + pos, count - pos, &cmd);
416 vram_dirty = 1;
417 }
418
419 if (cmd == -1)
420 // incomplete cmd
421 break;
422 }
423
424 gpu.status.reg &= ~0x1fff;
425 gpu.status.reg |= gpu.ex_regs[1] & 0x7ff;
426 gpu.status.reg |= (gpu.ex_regs[6] & 3) << 11;
427
428 gpu.state.fb_dirty |= vram_dirty;
429
430 if (old_e3 != gpu.ex_regs[3])
431 decide_frameskip_allow(gpu.ex_regs[3]);
432
433 return count - pos;
434}
435
436static void flush_cmd_buffer(void)
437{
438 int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
439 if (left > 0)
440 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
441 gpu.cmd_len = left;
442}
443
444void GPUwriteDataMem(uint32_t *mem, int count)
445{
446 int left;
447
448 log_io("gpu_dma_write %p %d\n", mem, count);
449
450 if (unlikely(gpu.cmd_len > 0))
451 flush_cmd_buffer();
452
453 left = do_cmd_buffer(mem, count);
454 if (left)
455 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
456}
457
458void GPUwriteData(uint32_t data)
459{
460 log_io("gpu_write %08x\n", data);
461 gpu.cmd_buffer[gpu.cmd_len++] = data;
462 if (gpu.cmd_len >= CMD_BUFFER_LEN)
463 flush_cmd_buffer();
464}
465
466long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
467{
468 uint32_t addr, *list;
469 uint32_t *llist_entry = NULL;
470 int len, left, count;
471 long cpu_cycles = 0;
472
473 if (unlikely(gpu.cmd_len > 0))
474 flush_cmd_buffer();
475
476 // ff7 sends it's main list twice, detect this
477 if (*gpu.state.frame_count == gpu.state.last_list.frame &&
478 *gpu.state.hcnt - gpu.state.last_list.hcnt <= 1 &&
479 gpu.state.last_list.cycles > 2048)
480 {
481 llist_entry = rambase + (gpu.state.last_list.addr & 0x1fffff) / 4;
482 *llist_entry |= 0x800000;
483 }
484
485 log_io("gpu_dma_chain\n");
486 addr = start_addr & 0xffffff;
487 for (count = 0; addr != 0xffffff; count++)
488 {
489 list = rambase + (addr & 0x1fffff) / 4;
490 len = list[0] >> 24;
491 addr = list[0] & 0xffffff;
492 cpu_cycles += 10;
493 if (len > 0)
494 cpu_cycles += 5 + len;
495
496 log_io(".chain %08x #%d\n", (list - rambase) * 4, len);
497
498 // loop detection marker
499 // (bit23 set causes DMA error on real machine, so
500 // unlikely to be ever set by the game)
501 list[0] |= 0x800000;
502
503 if (len) {
504 left = do_cmd_buffer(list + 1, len);
505 if (left)
506 log_anomaly("GPUdmaChain: discarded %d/%d words\n", left, len);
507 }
508
509 if (addr & 0x800000)
510 break;
511 }
512
513 // remove loop detection markers
514 addr = start_addr & 0x1fffff;
515 while (count-- > 0) {
516 list = rambase + addr / 4;
517 addr = list[0] & 0x1fffff;
518 list[0] &= ~0x800000;
519 }
520 if (llist_entry)
521 *llist_entry &= ~0x800000;
522
523 gpu.state.last_list.frame = *gpu.state.frame_count;
524 gpu.state.last_list.hcnt = *gpu.state.hcnt;
525 gpu.state.last_list.cycles = cpu_cycles;
526 gpu.state.last_list.addr = start_addr;
527
528 return cpu_cycles;
529}
530
531void GPUreadDataMem(uint32_t *mem, int count)
532{
533 log_io("gpu_dma_read %p %d\n", mem, count);
534
535 if (unlikely(gpu.cmd_len > 0))
536 flush_cmd_buffer();
537
538 if (gpu.dma.h)
539 do_vram_io(mem, count, 1);
540}
541
542uint32_t GPUreadData(void)
543{
544 uint32_t ret;
545
546 if (unlikely(gpu.cmd_len > 0))
547 flush_cmd_buffer();
548
549 ret = gpu.gp0;
550 if (gpu.dma.h)
551 do_vram_io(&ret, 1, 1);
552
553 log_io("gpu_read %08x\n", ret);
554 return ret;
555}
556
557uint32_t GPUreadStatus(void)
558{
559 uint32_t ret;
560
561 if (unlikely(gpu.cmd_len > 0))
562 flush_cmd_buffer();
563
564 ret = gpu.status.reg;
565 log_io("gpu_read_status %08x\n", ret);
566 return ret;
567}
568
569struct GPUFreeze
570{
571 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
572 uint32_t ulStatus; // current gpu status
573 uint32_t ulControl[256]; // latest control register values
574 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
575};
576
577long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
578{
579 int i;
580
581 switch (type) {
582 case 1: // save
583 if (gpu.cmd_len > 0)
584 flush_cmd_buffer();
585 memcpy(freeze->psxVRam, gpu.vram, sizeof(gpu.vram));
586 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
587 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
588 freeze->ulStatus = gpu.status.reg;
589 break;
590 case 0: // load
591 memcpy(gpu.vram, freeze->psxVRam, sizeof(gpu.vram));
592 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
593 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
594 gpu.status.reg = freeze->ulStatus;
595 gpu.cmd_len = 0;
596 for (i = 8; i > 0; i--) {
597 gpu.regs[i] ^= 1; // avoid reg change detection
598 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
599 }
600 renderer_sync_ecmds(gpu.ex_regs);
601 renderer_update_caches(0, 0, 1024, 512);
602 break;
603 }
604
605 return 1;
606}
607
608void GPUupdateLace(void)
609{
610 if (gpu.cmd_len > 0)
611 flush_cmd_buffer();
612 renderer_flush_queues();
613
614 if (gpu.status.blanking) {
615 if (!gpu.state.blanked) {
616 vout_blank();
617 gpu.state.blanked = 1;
618 gpu.state.fb_dirty = 1;
619 }
620 return;
621 }
622
623 if (!gpu.state.fb_dirty)
624 return;
625
626 if (gpu.frameskip.set) {
627 if (!gpu.frameskip.frame_ready) {
628 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
629 return;
630 gpu.frameskip.active = 0;
631 }
632 gpu.frameskip.frame_ready = 0;
633 }
634
635 vout_update();
636 gpu.state.fb_dirty = 0;
637 gpu.state.blanked = 0;
638}
639
640void GPUvBlank(int is_vblank, int lcf)
641{
642 int interlace = gpu.state.allow_interlace
643 && gpu.status.interlace && gpu.status.dheight;
644 // interlace doesn't look nice on progressive displays,
645 // so we have this "auto" mode here for games that don't read vram
646 if (gpu.state.allow_interlace == 2
647 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
648 {
649 interlace = 0;
650 }
651 if (interlace || interlace != gpu.state.old_interlace) {
652 gpu.state.old_interlace = interlace;
653
654 if (gpu.cmd_len > 0)
655 flush_cmd_buffer();
656 renderer_flush_queues();
657 renderer_set_interlace(interlace, !lcf);
658 }
659}
660
661#include "../../frontend/plugin_lib.h"
662
663void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
664{
665 gpu.frameskip.set = cbs->frameskip;
666 gpu.frameskip.advice = &cbs->fskip_advice;
667 gpu.frameskip.active = 0;
668 gpu.frameskip.frame_ready = 1;
669 gpu.state.hcnt = cbs->gpu_hcnt;
670 gpu.state.frame_count = cbs->gpu_frame_count;
671 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
672
673 if (cbs->pl_vout_set_raw_vram)
674 cbs->pl_vout_set_raw_vram(gpu.vram);
675 renderer_set_config(cbs);
676 vout_set_config(cbs);
677}
678
679// vim:shiftwidth=2:expandtab