2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
4 * This work is licensed under the terms of any of these licenses
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
15 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
16 #define unlikely(x) __builtin_expect((x), 0)
17 #define noinline __attribute__((noinline))
19 #define gpu_log(fmt, ...) \
20 printf("%d:%03d: " fmt, *gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__)
22 //#define log_io gpu_log
24 //#define log_anomaly gpu_log
25 #define log_anomaly(...)
29 static noinline int do_cmd_buffer(uint32_t *data, int count);
30 static void finish_vram_transfer(int is_read);
32 static noinline void do_cmd_reset(void)
34 if (unlikely(gpu.cmd_len > 0))
35 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
38 if (unlikely(gpu.dma.h > 0))
39 finish_vram_transfer(gpu.dma_start.is_read);
43 static noinline void do_reset(void)
49 memset(gpu.regs, 0, sizeof(gpu.regs));
50 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
51 gpu.ex_regs[i] = (0xe0 + i) << 24;
52 gpu.status.reg = 0x14802000;
55 gpu.screen.hres = gpu.screen.w = 256;
56 gpu.screen.vres = gpu.screen.h = 240;
59 static noinline void update_width(void)
61 int sw = gpu.screen.x2 - gpu.screen.x1;
62 if (sw <= 0 || sw >= 2560)
64 gpu.screen.w = gpu.screen.hres;
66 gpu.screen.w = sw * gpu.screen.hres / 2560;
69 static noinline void update_height(void)
71 int sh = gpu.screen.y2 - gpu.screen.y1;
72 if (gpu.status.dheight)
80 static noinline void decide_frameskip(void)
82 if (gpu.frameskip.active)
85 gpu.frameskip.cnt = 0;
86 gpu.frameskip.frame_ready = 1;
89 if (!gpu.frameskip.active && *gpu.frameskip.advice)
90 gpu.frameskip.active = 1;
91 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
92 gpu.frameskip.active = 1;
94 gpu.frameskip.active = 0;
96 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
98 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy);
99 gpu.frameskip.pending_fill[0] = 0;
103 static noinline int decide_frameskip_allow(uint32_t cmd_e3)
105 // no frameskip if it decides to draw to display area,
106 // but not for interlace since it'll most likely always do that
107 uint32_t x = cmd_e3 & 0x3ff;
108 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
109 gpu.frameskip.allow = gpu.status.interlace ||
110 (uint32_t)(x - gpu.screen.x) >= (uint32_t)gpu.screen.w ||
111 (uint32_t)(y - gpu.screen.y) >= (uint32_t)gpu.screen.h;
112 return gpu.frameskip.allow;
115 static noinline void get_gpu_info(uint32_t data)
117 switch (data & 0x0f) {
122 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
125 gpu.gp0 = gpu.ex_regs[5] & 0xfffff;
136 // double, for overdraw guard
137 #define VRAM_SIZE (1024 * 512 * 2 * 2)
139 static int map_vram(void)
141 gpu.vram = gpu.mmap(VRAM_SIZE);
142 if (gpu.vram != NULL) {
143 gpu.vram += 4096 / 2;
147 fprintf(stderr, "could not map vram, expect crashes\n");
156 ret |= renderer_init();
158 gpu.state.frame_count = &gpu.zero;
159 gpu.state.hcnt = &gpu.zero;
160 gpu.frameskip.active = 0;
164 if (gpu.mmap != NULL) {
171 long GPUshutdown(void)
177 if (gpu.vram != NULL) {
178 gpu.vram -= 4096 / 2;
179 gpu.munmap(gpu.vram, VRAM_SIZE);
186 void GPUwriteStatus(uint32_t data)
188 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
189 static const short vres[4] = { 240, 480, 256, 480 };
190 uint32_t cmd = data >> 24;
192 if (cmd < ARRAY_SIZE(gpu.regs)) {
193 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
195 gpu.regs[cmd] = data;
198 gpu.state.fb_dirty = 1;
208 gpu.status.blanking = data & 1;
211 gpu.status.dma = data & 3;
214 gpu.screen.x = data & 0x3ff;
215 gpu.screen.y = (data >> 10) & 0x1ff;
216 if (gpu.frameskip.set) {
217 decide_frameskip_allow(gpu.ex_regs[3]);
218 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
220 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
225 gpu.screen.x1 = data & 0xfff;
226 gpu.screen.x2 = (data >> 12) & 0xfff;
230 gpu.screen.y1 = data & 0x3ff;
231 gpu.screen.y2 = (data >> 10) & 0x3ff;
235 gpu.status.reg = (gpu.status.reg & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
236 gpu.screen.hres = hres[(gpu.status.reg >> 16) & 7];
237 gpu.screen.vres = vres[(gpu.status.reg >> 19) & 3];
240 renderer_notify_res_change();
243 if ((cmd & 0xf0) == 0x10)
248 #ifdef GPUwriteStatus_ext
249 GPUwriteStatus_ext(data);
253 const unsigned char cmd_lengths[256] =
255 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
256 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
257 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
258 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
259 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
260 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
261 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
262 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
263 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
264 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
265 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
266 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
267 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
268 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
269 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
270 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
273 #define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
275 static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
277 uint16_t *vram = VRAM_MEM_XY(x, y);
279 memcpy(mem, vram, l * 2);
281 memcpy(vram, mem, l * 2);
284 static int do_vram_io(uint32_t *data, int count, int is_read)
286 int count_initial = count;
287 uint16_t *sdata = (uint16_t *)data;
288 int x = gpu.dma.x, y = gpu.dma.y;
289 int w = gpu.dma.w, h = gpu.dma.h;
290 int o = gpu.dma.offset;
292 count *= 2; // operate in 16bpp pixels
294 if (gpu.dma.offset) {
295 l = w - gpu.dma.offset;
299 do_vram_line(x + o, y, sdata, l, is_read);
312 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
314 do_vram_line(x, y, sdata, w, is_read);
320 do_vram_line(x, y, sdata, count, is_read);
326 finish_vram_transfer(is_read);
331 return count_initial - count / 2;
334 static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
337 log_anomaly("start_vram_transfer while old unfinished\n");
339 gpu.dma.x = pos_word & 0x3ff;
340 gpu.dma.y = (pos_word >> 16) & 0x1ff;
341 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
342 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
344 gpu.dma.is_read = is_read;
345 gpu.dma_start = gpu.dma;
347 renderer_flush_queues();
350 // XXX: wrong for width 1
351 memcpy(&gpu.gp0, VRAM_MEM_XY(gpu.dma.x, gpu.dma.y), 4);
352 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
355 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
356 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
359 static void finish_vram_transfer(int is_read)
364 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
365 gpu.dma_start.w, gpu.dma_start.h);
368 static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
370 int cmd = 0, pos = 0, len, dummy, v;
373 gpu.frameskip.pending_fill[0] = 0;
375 while (pos < count && skip) {
376 uint32_t *list = data + pos;
378 len = 1 + cmd_lengths[cmd];
382 if ((list[2] & 0x3ff) > gpu.screen.w || ((list[2] >> 16) & 0x1ff) > gpu.screen.h)
383 // clearing something large, don't skip
384 do_cmd_list(list, 3, &dummy);
386 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
392 gpu.ex_regs[1] &= ~0x1ff;
393 gpu.ex_regs[1] |= list[4 + ((cmd >> 4) & 1)] & 0x1ff;
396 for (v = 3; pos + v < count; v++)
398 if ((list[v] & 0xf000f000) == 0x50005000)
404 for (v = 4; pos + v < count; v += 2)
406 if ((list[v] & 0xf000f000) == 0x50005000)
413 skip = decide_frameskip_allow(list[0]);
414 if ((cmd & 0xf8) == 0xe0)
415 gpu.ex_regs[cmd & 7] = list[0];
419 if (pos + len > count) {
421 break; // incomplete cmd
423 if (0xa0 <= cmd && cmd <= 0xdf)
429 renderer_sync_ecmds(gpu.ex_regs);
434 static noinline int do_cmd_buffer(uint32_t *data, int count)
437 uint32_t old_e3 = gpu.ex_regs[3];
441 for (pos = 0; pos < count; )
443 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
445 pos += do_vram_io(data + pos, count - pos, 0);
450 cmd = data[pos] >> 24;
451 if (0xa0 <= cmd && cmd <= 0xdf) {
452 // consume vram write/read cmd
453 start_vram_transfer(data[pos + 1], data[pos + 2], (cmd & 0xe0) == 0xc0);
458 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
459 if (gpu.frameskip.active && (gpu.frameskip.allow || ((data[pos] >> 24) & 0xf0) == 0xe0))
460 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
462 pos += do_cmd_list(data + pos, count - pos, &cmd);
471 gpu.status.reg &= ~0x1fff;
472 gpu.status.reg |= gpu.ex_regs[1] & 0x7ff;
473 gpu.status.reg |= (gpu.ex_regs[6] & 3) << 11;
475 gpu.state.fb_dirty |= vram_dirty;
477 if (old_e3 != gpu.ex_regs[3])
478 decide_frameskip_allow(gpu.ex_regs[3]);
483 static void flush_cmd_buffer(void)
485 int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
487 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
491 void GPUwriteDataMem(uint32_t *mem, int count)
495 log_io("gpu_dma_write %p %d\n", mem, count);
497 if (unlikely(gpu.cmd_len > 0))
500 left = do_cmd_buffer(mem, count);
502 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
505 void GPUwriteData(uint32_t data)
507 log_io("gpu_write %08x\n", data);
508 gpu.cmd_buffer[gpu.cmd_len++] = data;
509 if (gpu.cmd_len >= CMD_BUFFER_LEN)
513 long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
515 uint32_t addr, *list;
516 uint32_t *llist_entry = NULL;
517 int len, left, count;
520 if (unlikely(gpu.cmd_len > 0))
523 // ff7 sends it's main list twice, detect this
524 if (*gpu.state.frame_count == gpu.state.last_list.frame &&
525 *gpu.state.hcnt - gpu.state.last_list.hcnt <= 1 &&
526 gpu.state.last_list.cycles > 2048)
528 llist_entry = rambase + (gpu.state.last_list.addr & 0x1fffff) / 4;
529 *llist_entry |= 0x800000;
532 log_io("gpu_dma_chain\n");
533 addr = start_addr & 0xffffff;
534 for (count = 0; addr != 0xffffff; count++)
536 list = rambase + (addr & 0x1fffff) / 4;
538 addr = list[0] & 0xffffff;
541 cpu_cycles += 5 + len;
543 log_io(".chain %08x #%d\n", (list - rambase) * 4, len);
545 // loop detection marker
546 // (bit23 set causes DMA error on real machine, so
547 // unlikely to be ever set by the game)
551 left = do_cmd_buffer(list + 1, len);
553 log_anomaly("GPUdmaChain: discarded %d/%d words\n", left, len);
560 // remove loop detection markers
561 addr = start_addr & 0x1fffff;
562 while (count-- > 0) {
563 list = rambase + addr / 4;
564 addr = list[0] & 0x1fffff;
565 list[0] &= ~0x800000;
568 *llist_entry &= ~0x800000;
570 gpu.state.last_list.frame = *gpu.state.frame_count;
571 gpu.state.last_list.hcnt = *gpu.state.hcnt;
572 gpu.state.last_list.cycles = cpu_cycles;
573 gpu.state.last_list.addr = start_addr;
578 void GPUreadDataMem(uint32_t *mem, int count)
580 log_io("gpu_dma_read %p %d\n", mem, count);
582 if (unlikely(gpu.cmd_len > 0))
586 do_vram_io(mem, count, 1);
589 uint32_t GPUreadData(void)
593 if (unlikely(gpu.cmd_len > 0))
598 do_vram_io(&ret, 1, 1);
600 log_io("gpu_read %08x\n", ret);
604 uint32_t GPUreadStatus(void)
608 if (unlikely(gpu.cmd_len > 0))
611 ret = gpu.status.reg;
612 log_io("gpu_read_status %08x\n", ret);
618 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
619 uint32_t ulStatus; // current gpu status
620 uint32_t ulControl[256]; // latest control register values
621 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
624 long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
632 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
633 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
634 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
635 freeze->ulStatus = gpu.status.reg;
638 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
639 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
640 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
641 gpu.status.reg = freeze->ulStatus;
643 for (i = 8; i > 0; i--) {
644 gpu.regs[i] ^= 1; // avoid reg change detection
645 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
647 renderer_sync_ecmds(gpu.ex_regs);
648 renderer_update_caches(0, 0, 1024, 512);
655 void GPUupdateLace(void)
659 renderer_flush_queues();
661 if (gpu.status.blanking) {
662 if (!gpu.state.blanked) {
664 gpu.state.blanked = 1;
665 gpu.state.fb_dirty = 1;
670 if (!gpu.state.fb_dirty)
673 if (gpu.frameskip.set) {
674 if (!gpu.frameskip.frame_ready) {
675 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
677 gpu.frameskip.active = 0;
679 gpu.frameskip.frame_ready = 0;
683 gpu.state.fb_dirty = 0;
684 gpu.state.blanked = 0;
687 void GPUvBlank(int is_vblank, int lcf)
689 int interlace = gpu.state.allow_interlace
690 && gpu.status.interlace && gpu.status.dheight;
691 // interlace doesn't look nice on progressive displays,
692 // so we have this "auto" mode here for games that don't read vram
693 if (gpu.state.allow_interlace == 2
694 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
698 if (interlace || interlace != gpu.state.old_interlace) {
699 gpu.state.old_interlace = interlace;
703 renderer_flush_queues();
704 renderer_set_interlace(interlace, !lcf);
708 #include "../../frontend/plugin_lib.h"
710 void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
712 gpu.frameskip.set = cbs->frameskip;
713 gpu.frameskip.advice = &cbs->fskip_advice;
714 gpu.frameskip.active = 0;
715 gpu.frameskip.frame_ready = 1;
716 gpu.state.hcnt = cbs->gpu_hcnt;
717 gpu.state.frame_count = cbs->gpu_frame_count;
718 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
719 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
721 gpu.mmap = cbs->mmap;
722 gpu.munmap = cbs->munmap;
725 if (gpu.vram == NULL)
728 if (cbs->pl_vout_set_raw_vram)
729 cbs->pl_vout_set_raw_vram(gpu.vram);
730 renderer_set_config(cbs);
731 vout_set_config(cbs);
734 // vim:shiftwidth=2:expandtab