gpulib: clear fb when display is blanked
[pcsx_rearmed.git] / plugins / gpulib / gpu.c
... / ...
CommitLineData
1/*
2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
11#include <stdio.h>
12#include <string.h>
13#include "gpu.h"
14
15#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
16#define unlikely(x) __builtin_expect((x), 0)
17#define noinline __attribute__((noinline))
18
19#define gpu_log(fmt, ...) \
20 printf("%d:%03d: " fmt, *gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__)
21
22//#define log_io gpu_log
23#define log_io(...)
24//#define log_anomaly gpu_log
25#define log_anomaly(...)
26
27struct psx_gpu gpu __attribute__((aligned(2048)));
28
29static noinline int do_cmd_buffer(uint32_t *data, int count);
30static void finish_vram_transfer(int is_read);
31
32static noinline void do_cmd_reset(void)
33{
34 if (unlikely(gpu.cmd_len > 0))
35 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
36 gpu.cmd_len = 0;
37
38 if (unlikely(gpu.dma.h > 0))
39 finish_vram_transfer(gpu.dma_start.is_read);
40 gpu.dma.h = 0;
41}
42
43static noinline void do_reset(void)
44{
45 unsigned int i;
46
47 do_cmd_reset();
48
49 memset(gpu.regs, 0, sizeof(gpu.regs));
50 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
51 gpu.ex_regs[i] = (0xe0 + i) << 24;
52 gpu.status.reg = 0x14802000;
53 gpu.gp0 = 0;
54 gpu.regs[3] = 1;
55 gpu.screen.hres = gpu.screen.w = 256;
56 gpu.screen.vres = gpu.screen.h = 240;
57}
58
59static noinline void update_width(void)
60{
61 int sw = gpu.screen.x2 - gpu.screen.x1;
62 if (sw <= 0 || sw >= 2560)
63 // full width
64 gpu.screen.w = gpu.screen.hres;
65 else
66 gpu.screen.w = sw * gpu.screen.hres / 2560;
67}
68
69static noinline void update_height(void)
70{
71 int sh = gpu.screen.y2 - gpu.screen.y1;
72 if (gpu.status.dheight)
73 sh *= 2;
74 if (sh <= 0)
75 sh = gpu.screen.vres;
76
77 gpu.screen.h = sh;
78}
79
80static noinline void decide_frameskip(void)
81{
82 if (gpu.frameskip.active)
83 gpu.frameskip.cnt++;
84 else {
85 gpu.frameskip.cnt = 0;
86 gpu.frameskip.frame_ready = 1;
87 }
88
89 if (!gpu.frameskip.active && *gpu.frameskip.advice)
90 gpu.frameskip.active = 1;
91 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
92 gpu.frameskip.active = 1;
93 else
94 gpu.frameskip.active = 0;
95}
96
97static noinline int decide_frameskip_allow(uint32_t cmd_e3)
98{
99 // no frameskip if it decides to draw to display area,
100 // but not for interlace since it'll most likely always do that
101 uint32_t x = cmd_e3 & 0x3ff;
102 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
103 gpu.frameskip.allow = gpu.status.interlace ||
104 (uint32_t)(x - gpu.screen.x) >= (uint32_t)gpu.screen.w ||
105 (uint32_t)(y - gpu.screen.y) >= (uint32_t)gpu.screen.h;
106 return gpu.frameskip.allow;
107}
108
109static noinline void get_gpu_info(uint32_t data)
110{
111 switch (data & 0x0f) {
112 case 0x02:
113 case 0x03:
114 case 0x04:
115 case 0x05:
116 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
117 break;
118 case 0x06:
119 gpu.gp0 = gpu.ex_regs[5] & 0xfffff;
120 break;
121 case 0x07:
122 gpu.gp0 = 2;
123 break;
124 default:
125 gpu.gp0 = 0;
126 break;
127 }
128}
129
130long GPUinit(void)
131{
132 int ret;
133 ret = vout_init();
134 ret |= renderer_init();
135
136 gpu.state.frame_count = &gpu.zero;
137 gpu.state.hcnt = &gpu.zero;
138 gpu.frameskip.active = 0;
139 gpu.cmd_len = 0;
140 do_reset();
141
142 return ret;
143}
144
145long GPUshutdown(void)
146{
147 return vout_finish();
148}
149
150void GPUwriteStatus(uint32_t data)
151{
152 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
153 static const short vres[4] = { 240, 480, 256, 480 };
154 uint32_t cmd = data >> 24;
155
156 if (cmd < ARRAY_SIZE(gpu.regs)) {
157 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
158 return;
159 gpu.regs[cmd] = data;
160 }
161
162 gpu.state.fb_dirty = 1;
163
164 switch (cmd) {
165 case 0x00:
166 do_reset();
167 break;
168 case 0x01:
169 do_cmd_reset();
170 break;
171 case 0x03:
172 gpu.status.blanking = data & 1;
173 break;
174 case 0x04:
175 gpu.status.dma = data & 3;
176 break;
177 case 0x05:
178 gpu.screen.x = data & 0x3ff;
179 gpu.screen.y = (data >> 10) & 0x3ff;
180 if (gpu.frameskip.set) {
181 decide_frameskip_allow(gpu.ex_regs[3]);
182 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
183 decide_frameskip();
184 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
185 }
186 }
187 break;
188 case 0x06:
189 gpu.screen.x1 = data & 0xfff;
190 gpu.screen.x2 = (data >> 12) & 0xfff;
191 update_width();
192 break;
193 case 0x07:
194 gpu.screen.y1 = data & 0x3ff;
195 gpu.screen.y2 = (data >> 10) & 0x3ff;
196 update_height();
197 break;
198 case 0x08:
199 gpu.status.reg = (gpu.status.reg & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
200 gpu.screen.hres = hres[(gpu.status.reg >> 16) & 7];
201 gpu.screen.vres = vres[(gpu.status.reg >> 19) & 3];
202 update_width();
203 update_height();
204 break;
205 default:
206 if ((cmd & 0xf0) == 0x10)
207 get_gpu_info(data);
208 break;
209 }
210
211#ifdef GPUwriteStatus_ext
212 GPUwriteStatus_ext(data);
213#endif
214}
215
216const unsigned char cmd_lengths[256] =
217{
218 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
219 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
220 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
221 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
222 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
223 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
224 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
225 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
226 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
227 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
228 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
229 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
230 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
231 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
232 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
233 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
234};
235
236#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
237
238static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
239{
240 uint16_t *vram = VRAM_MEM_XY(x, y);
241 if (is_read)
242 memcpy(mem, vram, l * 2);
243 else
244 memcpy(vram, mem, l * 2);
245}
246
247static int do_vram_io(uint32_t *data, int count, int is_read)
248{
249 int count_initial = count;
250 uint16_t *sdata = (uint16_t *)data;
251 int x = gpu.dma.x, y = gpu.dma.y;
252 int w = gpu.dma.w, h = gpu.dma.h;
253 int o = gpu.dma.offset;
254 int l;
255 count *= 2; // operate in 16bpp pixels
256
257 if (gpu.dma.offset) {
258 l = w - gpu.dma.offset;
259 if (count < l)
260 l = count;
261
262 do_vram_line(x + o, y, sdata, l, is_read);
263
264 if (o + l < w)
265 o += l;
266 else {
267 o = 0;
268 y++;
269 h--;
270 }
271 sdata += l;
272 count -= l;
273 }
274
275 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
276 y &= 511;
277 do_vram_line(x, y, sdata, w, is_read);
278 }
279
280 if (h > 0) {
281 if (count > 0) {
282 y &= 511;
283 do_vram_line(x, y, sdata, count, is_read);
284 o = count;
285 count = 0;
286 }
287 }
288 else
289 finish_vram_transfer(is_read);
290 gpu.dma.y = y;
291 gpu.dma.h = h;
292 gpu.dma.offset = o;
293
294 return count_initial - count / 2;
295}
296
297static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
298{
299 if (gpu.dma.h)
300 log_anomaly("start_vram_transfer while old unfinished\n");
301
302 gpu.dma.x = pos_word & 0x3ff;
303 gpu.dma.y = (pos_word >> 16) & 0x1ff;
304 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
305 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
306 gpu.dma.offset = 0;
307 gpu.dma.is_read = is_read;
308 gpu.dma_start = gpu.dma;
309
310 renderer_flush_queues();
311 if (is_read) {
312 gpu.status.img = 1;
313 // XXX: wrong for width 1
314 memcpy(&gpu.gp0, VRAM_MEM_XY(gpu.dma.x, gpu.dma.y), 4);
315 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
316 }
317
318 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
319 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
320}
321
322static void finish_vram_transfer(int is_read)
323{
324 if (is_read)
325 gpu.status.img = 0;
326 else
327 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
328 gpu.dma_start.w, gpu.dma_start.h);
329}
330
331static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
332{
333 int cmd = 0, pos = 0, len, dummy;
334 int skip = 1;
335
336 // XXX: polylines are not properly handled
337 while (pos < count && skip) {
338 uint32_t *list = data + pos;
339 cmd = list[0] >> 24;
340 len = 1 + cmd_lengths[cmd];
341
342 if (cmd == 0x02) {
343 if ((list[2] & 0x3ff) > gpu.screen.w || ((list[2] >> 16) & 0x1ff) > gpu.screen.h)
344 // clearing something large, don't skip
345 do_cmd_list(data + pos, 3, &dummy);
346 }
347 else if ((cmd & 0xf4) == 0x24) {
348 // flat textured prim
349 gpu.ex_regs[1] &= ~0x1ff;
350 gpu.ex_regs[1] |= list[4] & 0x1ff;
351 }
352 else if ((cmd & 0xf4) == 0x34) {
353 // shaded textured prim
354 gpu.ex_regs[1] &= ~0x1ff;
355 gpu.ex_regs[1] |= list[5] & 0x1ff;
356 }
357 else if (cmd == 0xe3)
358 skip = decide_frameskip_allow(list[0]);
359
360 if ((cmd & 0xf8) == 0xe0)
361 gpu.ex_regs[cmd & 7] = list[0];
362
363 if (pos + len > count) {
364 cmd = -1;
365 break; // incomplete cmd
366 }
367 if (cmd == 0xa0 || cmd == 0xc0)
368 break; // image i/o
369 pos += len;
370 }
371
372 renderer_sync_ecmds(gpu.ex_regs);
373 *last_cmd = cmd;
374 return pos;
375}
376
377static noinline int do_cmd_buffer(uint32_t *data, int count)
378{
379 int cmd, pos;
380 uint32_t old_e3 = gpu.ex_regs[3];
381 int vram_dirty = 0;
382
383 // process buffer
384 for (pos = 0; pos < count; )
385 {
386 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
387 vram_dirty = 1;
388 pos += do_vram_io(data + pos, count - pos, 0);
389 if (pos == count)
390 break;
391 }
392
393 cmd = data[pos] >> 24;
394 if (cmd == 0xa0 || cmd == 0xc0) {
395 // consume vram write/read cmd
396 start_vram_transfer(data[pos + 1], data[pos + 2], cmd == 0xc0);
397 pos += 3;
398 continue;
399 }
400
401 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
402 if (gpu.frameskip.active && (gpu.frameskip.allow || ((data[pos] >> 24) & 0xf0) == 0xe0))
403 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
404 else {
405 pos += do_cmd_list(data + pos, count - pos, &cmd);
406 vram_dirty = 1;
407 }
408
409 if (cmd == -1)
410 // incomplete cmd
411 break;
412 }
413
414 gpu.status.reg &= ~0x1fff;
415 gpu.status.reg |= gpu.ex_regs[1] & 0x7ff;
416 gpu.status.reg |= (gpu.ex_regs[6] & 3) << 11;
417
418 gpu.state.fb_dirty |= vram_dirty;
419
420 if (old_e3 != gpu.ex_regs[3])
421 decide_frameskip_allow(gpu.ex_regs[3]);
422
423 return count - pos;
424}
425
426static void flush_cmd_buffer(void)
427{
428 int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
429 if (left > 0)
430 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
431 gpu.cmd_len = left;
432}
433
434void GPUwriteDataMem(uint32_t *mem, int count)
435{
436 int left;
437
438 log_io("gpu_dma_write %p %d\n", mem, count);
439
440 if (unlikely(gpu.cmd_len > 0))
441 flush_cmd_buffer();
442
443 left = do_cmd_buffer(mem, count);
444 if (left)
445 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
446}
447
448void GPUwriteData(uint32_t data)
449{
450 log_io("gpu_write %08x\n", data);
451 gpu.cmd_buffer[gpu.cmd_len++] = data;
452 if (gpu.cmd_len >= CMD_BUFFER_LEN)
453 flush_cmd_buffer();
454}
455
456long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
457{
458 uint32_t addr, *list;
459 uint32_t *llist_entry = NULL;
460 int len, left, count;
461 long cpu_cycles = 0;
462
463 if (unlikely(gpu.cmd_len > 0))
464 flush_cmd_buffer();
465
466 // ff7 sends it's main list twice, detect this
467 if (*gpu.state.frame_count == gpu.state.last_list.frame &&
468 *gpu.state.hcnt - gpu.state.last_list.hcnt <= 1 &&
469 gpu.state.last_list.cycles > 2048)
470 {
471 llist_entry = rambase + (gpu.state.last_list.addr & 0x1fffff) / 4;
472 *llist_entry |= 0x800000;
473 }
474
475 log_io("gpu_dma_chain\n");
476 addr = start_addr & 0xffffff;
477 for (count = 0; addr != 0xffffff; count++)
478 {
479 list = rambase + (addr & 0x1fffff) / 4;
480 len = list[0] >> 24;
481 addr = list[0] & 0xffffff;
482 cpu_cycles += 10;
483 if (len > 0)
484 cpu_cycles += 5 + len;
485
486 log_io(".chain %08x #%d\n", (list - rambase) * 4, len);
487
488 // loop detection marker
489 // (bit23 set causes DMA error on real machine, so
490 // unlikely to be ever set by the game)
491 list[0] |= 0x800000;
492
493 if (len) {
494 left = do_cmd_buffer(list + 1, len);
495 if (left)
496 log_anomaly("GPUdmaChain: discarded %d/%d words\n", left, len);
497 }
498
499 if (addr & 0x800000)
500 break;
501 }
502
503 // remove loop detection markers
504 addr = start_addr & 0x1fffff;
505 while (count-- > 0) {
506 list = rambase + addr / 4;
507 addr = list[0] & 0x1fffff;
508 list[0] &= ~0x800000;
509 }
510 if (llist_entry)
511 *llist_entry &= ~0x800000;
512
513 gpu.state.last_list.frame = *gpu.state.frame_count;
514 gpu.state.last_list.hcnt = *gpu.state.hcnt;
515 gpu.state.last_list.cycles = cpu_cycles;
516 gpu.state.last_list.addr = start_addr;
517
518 return cpu_cycles;
519}
520
521void GPUreadDataMem(uint32_t *mem, int count)
522{
523 log_io("gpu_dma_read %p %d\n", mem, count);
524
525 if (unlikely(gpu.cmd_len > 0))
526 flush_cmd_buffer();
527
528 if (gpu.dma.h)
529 do_vram_io(mem, count, 1);
530}
531
532uint32_t GPUreadData(void)
533{
534 uint32_t ret;
535
536 if (unlikely(gpu.cmd_len > 0))
537 flush_cmd_buffer();
538
539 ret = gpu.gp0;
540 if (gpu.dma.h)
541 do_vram_io(&ret, 1, 1);
542
543 log_io("gpu_read %08x\n", ret);
544 return ret;
545}
546
547uint32_t GPUreadStatus(void)
548{
549 uint32_t ret;
550
551 if (unlikely(gpu.cmd_len > 0))
552 flush_cmd_buffer();
553
554 ret = gpu.status.reg;
555 log_io("gpu_read_status %08x\n", ret);
556 return ret;
557}
558
559struct GPUFreeze
560{
561 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
562 uint32_t ulStatus; // current gpu status
563 uint32_t ulControl[256]; // latest control register values
564 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
565};
566
567long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
568{
569 int i;
570
571 switch (type) {
572 case 1: // save
573 if (gpu.cmd_len > 0)
574 flush_cmd_buffer();
575 memcpy(freeze->psxVRam, gpu.vram, sizeof(gpu.vram));
576 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
577 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
578 freeze->ulStatus = gpu.status.reg;
579 break;
580 case 0: // load
581 memcpy(gpu.vram, freeze->psxVRam, sizeof(gpu.vram));
582 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
583 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
584 gpu.status.reg = freeze->ulStatus;
585 gpu.cmd_len = 0;
586 for (i = 8; i > 0; i--) {
587 gpu.regs[i] ^= 1; // avoid reg change detection
588 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
589 }
590 renderer_sync_ecmds(gpu.ex_regs);
591 renderer_update_caches(0, 0, 1024, 512);
592 break;
593 }
594
595 return 1;
596}
597
598void GPUupdateLace(void)
599{
600 if (gpu.cmd_len > 0)
601 flush_cmd_buffer();
602 renderer_flush_queues();
603
604 if (gpu.status.blanking) {
605 if (!gpu.state.blanked) {
606 vout_blank();
607 gpu.state.blanked = 1;
608 gpu.state.fb_dirty = 1;
609 }
610 return;
611 }
612
613 if (!gpu.state.fb_dirty)
614 return;
615
616 if (gpu.frameskip.set) {
617 if (!gpu.frameskip.frame_ready) {
618 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
619 return;
620 gpu.frameskip.active = 0;
621 }
622 gpu.frameskip.frame_ready = 0;
623 }
624
625 vout_update();
626 gpu.state.fb_dirty = 0;
627 gpu.state.blanked = 0;
628}
629
630void GPUvBlank(int is_vblank, int lcf)
631{
632 int interlace = gpu.state.allow_interlace
633 && gpu.status.interlace && gpu.status.dheight;
634 // interlace doesn't look nice on progressive displays,
635 // so we have this "auto" mode here for games that don't read vram
636 if (gpu.state.allow_interlace == 2
637 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
638 {
639 interlace = 0;
640 }
641 if (interlace || interlace != gpu.state.old_interlace) {
642 gpu.state.old_interlace = interlace;
643
644 if (gpu.cmd_len > 0)
645 flush_cmd_buffer();
646 renderer_flush_queues();
647 renderer_set_interlace(interlace, !lcf);
648 }
649}
650
651#include "../../frontend/plugin_lib.h"
652
653void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
654{
655 gpu.frameskip.set = cbs->frameskip;
656 gpu.frameskip.advice = &cbs->fskip_advice;
657 gpu.frameskip.active = 0;
658 gpu.frameskip.frame_ready = 1;
659 gpu.state.hcnt = cbs->gpu_hcnt;
660 gpu.state.frame_count = cbs->gpu_frame_count;
661 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
662
663 if (cbs->pl_vout_set_raw_vram)
664 cbs->pl_vout_set_raw_vram(gpu.vram);
665 renderer_set_config(cbs);
666 vout_set_config(cbs);
667}
668
669// vim:shiftwidth=2:expandtab