gpulib: yet another frameskip hack
[pcsx_rearmed.git] / plugins / gpulib / gpu.c
... / ...
CommitLineData
1/*
2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
11#include <stdio.h>
12#include <string.h>
13#include "gpu.h"
14
15#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
16#define unlikely(x) __builtin_expect((x), 0)
17#define noinline __attribute__((noinline))
18
19#define gpu_log(fmt, ...) \
20 printf("%d:%03d: " fmt, *gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__)
21
22//#define log_io gpu_log
23#define log_io(...)
24//#define log_anomaly gpu_log
25#define log_anomaly(...)
26
27struct psx_gpu gpu __attribute__((aligned(2048)));
28
29static noinline int do_cmd_buffer(uint32_t *data, int count);
30static void finish_vram_transfer(int is_read);
31
32static noinline void do_cmd_reset(void)
33{
34 if (unlikely(gpu.cmd_len > 0))
35 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
36 gpu.cmd_len = 0;
37
38 if (unlikely(gpu.dma.h > 0))
39 finish_vram_transfer(gpu.dma_start.is_read);
40 gpu.dma.h = 0;
41}
42
43static noinline void do_reset(void)
44{
45 unsigned int i;
46
47 do_cmd_reset();
48
49 memset(gpu.regs, 0, sizeof(gpu.regs));
50 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
51 gpu.ex_regs[i] = (0xe0 + i) << 24;
52 gpu.status.reg = 0x14802000;
53 gpu.gp0 = 0;
54 gpu.regs[3] = 1;
55 gpu.screen.hres = gpu.screen.w = 256;
56 gpu.screen.vres = gpu.screen.h = 240;
57}
58
59static noinline void update_width(void)
60{
61 int sw = gpu.screen.x2 - gpu.screen.x1;
62 if (sw <= 0 || sw >= 2560)
63 // full width
64 gpu.screen.w = gpu.screen.hres;
65 else
66 gpu.screen.w = sw * gpu.screen.hres / 2560;
67}
68
69static noinline void update_height(void)
70{
71 int sh = gpu.screen.y2 - gpu.screen.y1;
72 if (gpu.status.dheight)
73 sh *= 2;
74 if (sh <= 0)
75 sh = gpu.screen.vres;
76
77 gpu.screen.h = sh;
78}
79
80static noinline void decide_frameskip(void)
81{
82 if (gpu.frameskip.active)
83 gpu.frameskip.cnt++;
84 else {
85 gpu.frameskip.cnt = 0;
86 gpu.frameskip.frame_ready = 1;
87 }
88
89 if (!gpu.frameskip.active && *gpu.frameskip.advice)
90 gpu.frameskip.active = 1;
91 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
92 gpu.frameskip.active = 1;
93 else
94 gpu.frameskip.active = 0;
95}
96
97static noinline int decide_frameskip_allow(uint32_t cmd_e3)
98{
99 // no frameskip if it decides to draw to display area,
100 // but not for interlace since it'll most likely always do that
101 uint32_t x = cmd_e3 & 0x3ff;
102 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
103 gpu.frameskip.allow = gpu.status.interlace ||
104 (uint32_t)(x - gpu.screen.x) >= (uint32_t)gpu.screen.w ||
105 (uint32_t)(y - gpu.screen.y) >= (uint32_t)gpu.screen.h;
106 return gpu.frameskip.allow;
107}
108
109static noinline void get_gpu_info(uint32_t data)
110{
111 switch (data & 0x0f) {
112 case 0x02:
113 case 0x03:
114 case 0x04:
115 case 0x05:
116 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
117 break;
118 case 0x06:
119 gpu.gp0 = gpu.ex_regs[5] & 0xfffff;
120 break;
121 case 0x07:
122 gpu.gp0 = 2;
123 break;
124 default:
125 gpu.gp0 = 0;
126 break;
127 }
128}
129
130long GPUinit(void)
131{
132 int ret;
133 ret = vout_init();
134 ret |= renderer_init();
135
136 gpu.state.frame_count = &gpu.zero;
137 gpu.state.hcnt = &gpu.zero;
138 gpu.frameskip.active = 0;
139 gpu.cmd_len = 0;
140 do_reset();
141
142 return ret;
143}
144
145long GPUshutdown(void)
146{
147 return vout_finish();
148}
149
150void GPUwriteStatus(uint32_t data)
151{
152 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
153 static const short vres[4] = { 240, 480, 256, 480 };
154 uint32_t cmd = data >> 24;
155
156 if (cmd < ARRAY_SIZE(gpu.regs)) {
157 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
158 return;
159 gpu.regs[cmd] = data;
160 }
161
162 gpu.state.fb_dirty = 1;
163
164 switch (cmd) {
165 case 0x00:
166 do_reset();
167 break;
168 case 0x01:
169 do_cmd_reset();
170 break;
171 case 0x03:
172 gpu.status.blanking = data & 1;
173 break;
174 case 0x04:
175 gpu.status.dma = data & 3;
176 break;
177 case 0x05:
178 gpu.screen.x = data & 0x3ff;
179 gpu.screen.y = (data >> 10) & 0x3ff;
180 if (gpu.frameskip.set) {
181 decide_frameskip_allow(gpu.ex_regs[3]);
182 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
183 decide_frameskip();
184 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
185 }
186 }
187 break;
188 case 0x06:
189 gpu.screen.x1 = data & 0xfff;
190 gpu.screen.x2 = (data >> 12) & 0xfff;
191 update_width();
192 break;
193 case 0x07:
194 gpu.screen.y1 = data & 0x3ff;
195 gpu.screen.y2 = (data >> 10) & 0x3ff;
196 update_height();
197 break;
198 case 0x08:
199 gpu.status.reg = (gpu.status.reg & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
200 gpu.screen.hres = hres[(gpu.status.reg >> 16) & 7];
201 gpu.screen.vres = vres[(gpu.status.reg >> 19) & 3];
202 update_width();
203 update_height();
204 break;
205 default:
206 if ((cmd & 0xf0) == 0x10)
207 get_gpu_info(data);
208 break;
209 }
210
211#ifdef GPUwriteStatus_ext
212 GPUwriteStatus_ext(data);
213#endif
214}
215
216const unsigned char cmd_lengths[256] =
217{
218 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
219 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
220 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
221 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
222 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
223 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
224 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
225 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
226 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
227 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
228 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
229 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
230 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
231 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
232 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
233 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
234};
235
236#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
237
238static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
239{
240 uint16_t *vram = VRAM_MEM_XY(x, y);
241 if (is_read)
242 memcpy(mem, vram, l * 2);
243 else
244 memcpy(vram, mem, l * 2);
245}
246
247static int do_vram_io(uint32_t *data, int count, int is_read)
248{
249 int count_initial = count;
250 uint16_t *sdata = (uint16_t *)data;
251 int x = gpu.dma.x, y = gpu.dma.y;
252 int w = gpu.dma.w, h = gpu.dma.h;
253 int o = gpu.dma.offset;
254 int l;
255 count *= 2; // operate in 16bpp pixels
256
257 if (gpu.dma.offset) {
258 l = w - gpu.dma.offset;
259 if (count < l)
260 l = count;
261
262 do_vram_line(x + o, y, sdata, l, is_read);
263
264 if (o + l < w)
265 o += l;
266 else {
267 o = 0;
268 y++;
269 h--;
270 }
271 sdata += l;
272 count -= l;
273 }
274
275 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
276 y &= 511;
277 do_vram_line(x, y, sdata, w, is_read);
278 }
279
280 if (h > 0) {
281 if (count > 0) {
282 y &= 511;
283 do_vram_line(x, y, sdata, count, is_read);
284 o = count;
285 count = 0;
286 }
287 }
288 else
289 finish_vram_transfer(is_read);
290 gpu.dma.y = y;
291 gpu.dma.h = h;
292 gpu.dma.offset = o;
293
294 return count_initial - count / 2;
295}
296
297static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
298{
299 if (gpu.dma.h)
300 log_anomaly("start_vram_transfer while old unfinished\n");
301
302 gpu.dma.x = pos_word & 0x3ff;
303 gpu.dma.y = (pos_word >> 16) & 0x1ff;
304 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
305 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
306 gpu.dma.offset = 0;
307 gpu.dma.is_read = is_read;
308 gpu.dma_start = gpu.dma;
309
310 renderer_flush_queues();
311 if (is_read) {
312 gpu.status.img = 1;
313 // XXX: wrong for width 1
314 memcpy(&gpu.gp0, VRAM_MEM_XY(gpu.dma.x, gpu.dma.y), 4);
315 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
316 }
317
318 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
319 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
320}
321
322static void finish_vram_transfer(int is_read)
323{
324 if (is_read)
325 gpu.status.img = 0;
326 else
327 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
328 gpu.dma_start.w, gpu.dma_start.h);
329}
330
331static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
332{
333 int cmd = 0, pos = 0, len, dummy;
334 int skip = 1;
335
336 while (pos < count && skip) {
337 uint32_t *list = data + pos;
338 cmd = list[0] >> 24;
339 len = 1 + cmd_lengths[cmd];
340
341 if (cmd == 0x02) {
342 if ((list[2] & 0x3ff) > gpu.screen.w || ((list[2] >> 16) & 0x1ff) > gpu.screen.h)
343 // clearing something large, don't skip
344 do_cmd_list(data + pos, 3, &dummy);
345 }
346 else if ((cmd & 0xf4) == 0x24) {
347 // flat textured prim
348 gpu.ex_regs[1] &= ~0x1ff;
349 gpu.ex_regs[1] |= list[4] & 0x1ff;
350 }
351 else if ((cmd & 0xf4) == 0x34) {
352 // shaded textured prim
353 gpu.ex_regs[1] &= ~0x1ff;
354 gpu.ex_regs[1] |= list[5] & 0x1ff;
355 }
356 else if (cmd == 0xe3)
357 skip = decide_frameskip_allow(list[0]);
358
359 if ((cmd & 0xf8) == 0xe0)
360 gpu.ex_regs[cmd & 7] = list[0];
361
362 if (pos + len > count) {
363 cmd = -1;
364 break; // incomplete cmd
365 }
366 if (cmd == 0xa0 || cmd == 0xc0)
367 break; // image i/o
368 pos += len;
369 }
370
371 renderer_sync_ecmds(gpu.ex_regs);
372 *last_cmd = cmd;
373 return pos;
374}
375
376static noinline int do_cmd_buffer(uint32_t *data, int count)
377{
378 int cmd, pos;
379 uint32_t old_e3 = gpu.ex_regs[3];
380 int vram_dirty = 0;
381
382 // process buffer
383 for (pos = 0; pos < count; )
384 {
385 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
386 vram_dirty = 1;
387 pos += do_vram_io(data + pos, count - pos, 0);
388 if (pos == count)
389 break;
390 }
391
392 cmd = data[pos] >> 24;
393 if (cmd == 0xa0 || cmd == 0xc0) {
394 // consume vram write/read cmd
395 start_vram_transfer(data[pos + 1], data[pos + 2], cmd == 0xc0);
396 pos += 3;
397 continue;
398 }
399
400 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
401 if (gpu.frameskip.active && (gpu.frameskip.allow || ((data[pos] >> 24) & 0xf0) == 0xe0))
402 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
403 else {
404 pos += do_cmd_list(data + pos, count - pos, &cmd);
405 vram_dirty = 1;
406 }
407
408 if (cmd == -1)
409 // incomplete cmd
410 break;
411 }
412
413 gpu.status.reg &= ~0x1fff;
414 gpu.status.reg |= gpu.ex_regs[1] & 0x7ff;
415 gpu.status.reg |= (gpu.ex_regs[6] & 3) << 11;
416
417 gpu.state.fb_dirty |= vram_dirty;
418
419 if (old_e3 != gpu.ex_regs[3])
420 decide_frameskip_allow(gpu.ex_regs[3]);
421
422 return count - pos;
423}
424
425static void flush_cmd_buffer(void)
426{
427 int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
428 if (left > 0)
429 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
430 gpu.cmd_len = left;
431}
432
433void GPUwriteDataMem(uint32_t *mem, int count)
434{
435 int left;
436
437 log_io("gpu_dma_write %p %d\n", mem, count);
438
439 if (unlikely(gpu.cmd_len > 0))
440 flush_cmd_buffer();
441
442 left = do_cmd_buffer(mem, count);
443 if (left)
444 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
445}
446
447void GPUwriteData(uint32_t data)
448{
449 log_io("gpu_write %08x\n", data);
450 gpu.cmd_buffer[gpu.cmd_len++] = data;
451 if (gpu.cmd_len >= CMD_BUFFER_LEN)
452 flush_cmd_buffer();
453}
454
455long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
456{
457 uint32_t addr, *list;
458 uint32_t *llist_entry = NULL;
459 int len, left, count;
460 long cpu_cycles = 0;
461
462 if (unlikely(gpu.cmd_len > 0))
463 flush_cmd_buffer();
464
465 // ff7 sends it's main list twice, detect this
466 if (*gpu.state.frame_count == gpu.state.last_list.frame &&
467 *gpu.state.hcnt - gpu.state.last_list.hcnt <= 1 &&
468 gpu.state.last_list.cycles > 2048)
469 {
470 llist_entry = rambase + (gpu.state.last_list.addr & 0x1fffff) / 4;
471 *llist_entry |= 0x800000;
472 }
473
474 log_io("gpu_dma_chain\n");
475 addr = start_addr & 0xffffff;
476 for (count = 0; addr != 0xffffff; count++)
477 {
478 list = rambase + (addr & 0x1fffff) / 4;
479 len = list[0] >> 24;
480 addr = list[0] & 0xffffff;
481 cpu_cycles += 10;
482 if (len > 0)
483 cpu_cycles += 5 + len;
484
485 log_io(".chain %08x #%d\n", (list - rambase) * 4, len);
486
487 // loop detection marker
488 // (bit23 set causes DMA error on real machine, so
489 // unlikely to be ever set by the game)
490 list[0] |= 0x800000;
491
492 if (len) {
493 left = do_cmd_buffer(list + 1, len);
494 if (left)
495 log_anomaly("GPUdmaChain: discarded %d/%d words\n", left, len);
496 }
497
498 if (addr & 0x800000)
499 break;
500 }
501
502 // remove loop detection markers
503 addr = start_addr & 0x1fffff;
504 while (count-- > 0) {
505 list = rambase + addr / 4;
506 addr = list[0] & 0x1fffff;
507 list[0] &= ~0x800000;
508 }
509 if (llist_entry)
510 *llist_entry &= ~0x800000;
511
512 gpu.state.last_list.frame = *gpu.state.frame_count;
513 gpu.state.last_list.hcnt = *gpu.state.hcnt;
514 gpu.state.last_list.cycles = cpu_cycles;
515 gpu.state.last_list.addr = start_addr;
516
517 return cpu_cycles;
518}
519
520void GPUreadDataMem(uint32_t *mem, int count)
521{
522 log_io("gpu_dma_read %p %d\n", mem, count);
523
524 if (unlikely(gpu.cmd_len > 0))
525 flush_cmd_buffer();
526
527 if (gpu.dma.h)
528 do_vram_io(mem, count, 1);
529}
530
531uint32_t GPUreadData(void)
532{
533 uint32_t ret;
534
535 if (unlikely(gpu.cmd_len > 0))
536 flush_cmd_buffer();
537
538 ret = gpu.gp0;
539 if (gpu.dma.h)
540 do_vram_io(&ret, 1, 1);
541
542 log_io("gpu_read %08x\n", ret);
543 return ret;
544}
545
546uint32_t GPUreadStatus(void)
547{
548 uint32_t ret;
549
550 if (unlikely(gpu.cmd_len > 0))
551 flush_cmd_buffer();
552
553 ret = gpu.status.reg;
554 log_io("gpu_read_status %08x\n", ret);
555 return ret;
556}
557
558struct GPUFreeze
559{
560 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
561 uint32_t ulStatus; // current gpu status
562 uint32_t ulControl[256]; // latest control register values
563 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
564};
565
566long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
567{
568 int i;
569
570 switch (type) {
571 case 1: // save
572 if (gpu.cmd_len > 0)
573 flush_cmd_buffer();
574 memcpy(freeze->psxVRam, gpu.vram, sizeof(gpu.vram));
575 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
576 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
577 freeze->ulStatus = gpu.status.reg;
578 break;
579 case 0: // load
580 memcpy(gpu.vram, freeze->psxVRam, sizeof(gpu.vram));
581 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
582 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
583 gpu.status.reg = freeze->ulStatus;
584 for (i = 8; i > 0; i--) {
585 gpu.regs[i] ^= 1; // avoid reg change detection
586 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
587 }
588 renderer_sync_ecmds(gpu.ex_regs);
589 renderer_update_caches(0, 0, 1024, 512);
590 break;
591 }
592
593 return 1;
594}
595
596void GPUupdateLace(void)
597{
598 if (gpu.cmd_len > 0)
599 flush_cmd_buffer();
600 renderer_flush_queues();
601
602 if (gpu.status.blanking || !gpu.state.fb_dirty)
603 return;
604
605 if (gpu.frameskip.set) {
606 if (!gpu.frameskip.frame_ready) {
607 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
608 return;
609 gpu.frameskip.active = 0;
610 }
611 gpu.frameskip.frame_ready = 0;
612 }
613
614 vout_update();
615 gpu.state.fb_dirty = 0;
616}
617
618void GPUvBlank(int is_vblank, int lcf)
619{
620 int interlace = gpu.state.allow_interlace
621 && gpu.status.interlace && gpu.status.dheight;
622 // interlace doesn't look nice on progressive displays,
623 // so we have this "auto" mode here for games that don't read vram
624 if (gpu.state.allow_interlace == 2
625 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
626 {
627 interlace = 0;
628 }
629 if (interlace || interlace != gpu.state.old_interlace) {
630 gpu.state.old_interlace = interlace;
631
632 if (gpu.cmd_len > 0)
633 flush_cmd_buffer();
634 renderer_flush_queues();
635 renderer_set_interlace(interlace, !lcf);
636 }
637}
638
639#include "../../frontend/plugin_lib.h"
640
641void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
642{
643 gpu.frameskip.set = cbs->frameskip;
644 gpu.frameskip.advice = &cbs->fskip_advice;
645 gpu.frameskip.active = 0;
646 gpu.frameskip.frame_ready = 1;
647 gpu.state.hcnt = cbs->gpu_hcnt;
648 gpu.state.frame_count = cbs->gpu_frame_count;
649 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
650
651 if (cbs->pl_vout_set_raw_vram)
652 cbs->pl_vout_set_raw_vram(gpu.vram);
653 renderer_set_config(cbs);
654 vout_set_config(cbs);
655}
656
657// vim:shiftwidth=2:expandtab