2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
4 * This work is licensed under the terms of any of these licenses
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
15 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
17 #define unlikely(x) __builtin_expect((x), 0)
18 #define preload __builtin_prefetch
19 #define noinline __attribute__((noinline))
27 #define gpu_log(fmt, ...) \
28 printf("%d:%03d: " fmt, *gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__)
30 //#define log_io gpu_log
32 //#define log_anomaly gpu_log
33 #define log_anomaly(...)
37 static noinline int do_cmd_buffer(uint32_t *data, int count);
38 static void finish_vram_transfer(int is_read);
40 static noinline void do_cmd_reset(void)
42 if (unlikely(gpu.cmd_len > 0))
43 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
46 if (unlikely(gpu.dma.h > 0))
47 finish_vram_transfer(gpu.dma_start.is_read);
51 static noinline void do_reset(void)
57 memset(gpu.regs, 0, sizeof(gpu.regs));
58 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
59 gpu.ex_regs[i] = (0xe0 + i) << 24;
60 gpu.status.reg = 0x14802000;
63 gpu.screen.hres = gpu.screen.w = 256;
64 gpu.screen.vres = gpu.screen.h = 240;
67 static noinline void update_width(void)
69 int sw = gpu.screen.x2 - gpu.screen.x1;
70 if (sw <= 0 || sw >= 2560)
72 gpu.screen.w = gpu.screen.hres;
74 gpu.screen.w = sw * gpu.screen.hres / 2560;
77 static noinline void update_height(void)
79 // TODO: emulate this properly..
80 int sh = gpu.screen.y2 - gpu.screen.y1;
81 if (gpu.status.dheight)
83 if (sh <= 0 || sh > gpu.screen.vres)
89 static noinline void decide_frameskip(void)
91 if (gpu.frameskip.active)
94 gpu.frameskip.cnt = 0;
95 gpu.frameskip.frame_ready = 1;
98 if (!gpu.frameskip.active && *gpu.frameskip.advice)
99 gpu.frameskip.active = 1;
100 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
101 gpu.frameskip.active = 1;
103 gpu.frameskip.active = 0;
105 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
107 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy);
108 gpu.frameskip.pending_fill[0] = 0;
112 static noinline int decide_frameskip_allow(uint32_t cmd_e3)
114 // no frameskip if it decides to draw to display area,
115 // but not for interlace since it'll most likely always do that
116 uint32_t x = cmd_e3 & 0x3ff;
117 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
118 gpu.frameskip.allow = gpu.status.interlace ||
119 (uint32_t)(x - gpu.screen.x) >= (uint32_t)gpu.screen.w ||
120 (uint32_t)(y - gpu.screen.y) >= (uint32_t)gpu.screen.h;
121 return gpu.frameskip.allow;
124 static noinline void get_gpu_info(uint32_t data)
126 switch (data & 0x0f) {
131 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
134 gpu.gp0 = gpu.ex_regs[5] & 0xfffff;
145 // double, for overdraw guard
146 #define VRAM_SIZE (1024 * 512 * 2 * 2)
148 static int map_vram(void)
150 gpu.vram = gpu.mmap(VRAM_SIZE);
151 if (gpu.vram != NULL) {
152 gpu.vram += 4096 / 2;
156 fprintf(stderr, "could not map vram, expect crashes\n");
165 ret |= renderer_init();
167 gpu.state.frame_count = &gpu.zero;
168 gpu.state.hcnt = &gpu.zero;
169 gpu.frameskip.active = 0;
173 if (gpu.mmap != NULL) {
180 long GPUshutdown(void)
186 if (gpu.vram != NULL) {
187 gpu.vram -= 4096 / 2;
188 gpu.munmap(gpu.vram, VRAM_SIZE);
195 void GPUwriteStatus(uint32_t data)
197 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
198 static const short vres[4] = { 240, 480, 256, 480 };
199 uint32_t cmd = data >> 24;
201 if (cmd < ARRAY_SIZE(gpu.regs)) {
202 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
204 gpu.regs[cmd] = data;
207 gpu.state.fb_dirty = 1;
217 gpu.status.blanking = data & 1;
220 gpu.status.dma = data & 3;
223 gpu.screen.x = data & 0x3ff;
224 gpu.screen.y = (data >> 10) & 0x1ff;
225 if (gpu.frameskip.set) {
226 decide_frameskip_allow(gpu.ex_regs[3]);
227 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
229 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
234 gpu.screen.x1 = data & 0xfff;
235 gpu.screen.x2 = (data >> 12) & 0xfff;
239 gpu.screen.y1 = data & 0x3ff;
240 gpu.screen.y2 = (data >> 10) & 0x3ff;
244 gpu.status.reg = (gpu.status.reg & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
245 gpu.screen.hres = hres[(gpu.status.reg >> 16) & 7];
246 gpu.screen.vres = vres[(gpu.status.reg >> 19) & 3];
249 renderer_notify_res_change();
252 if ((cmd & 0xf0) == 0x10)
257 #ifdef GPUwriteStatus_ext
258 GPUwriteStatus_ext(data);
262 const unsigned char cmd_lengths[256] =
264 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
265 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
266 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
267 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
268 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
269 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
270 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
271 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
272 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
273 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
274 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
275 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
276 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
277 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
278 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
279 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
282 #define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
284 static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
286 uint16_t *vram = VRAM_MEM_XY(x, y);
288 memcpy(mem, vram, l * 2);
290 memcpy(vram, mem, l * 2);
293 static int do_vram_io(uint32_t *data, int count, int is_read)
295 int count_initial = count;
296 uint16_t *sdata = (uint16_t *)data;
297 int x = gpu.dma.x, y = gpu.dma.y;
298 int w = gpu.dma.w, h = gpu.dma.h;
299 int o = gpu.dma.offset;
301 count *= 2; // operate in 16bpp pixels
303 if (gpu.dma.offset) {
304 l = w - gpu.dma.offset;
308 do_vram_line(x + o, y, sdata, l, is_read);
321 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
323 do_vram_line(x, y, sdata, w, is_read);
329 do_vram_line(x, y, sdata, count, is_read);
335 finish_vram_transfer(is_read);
340 return count_initial - count / 2;
343 static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
346 log_anomaly("start_vram_transfer while old unfinished\n");
348 gpu.dma.x = pos_word & 0x3ff;
349 gpu.dma.y = (pos_word >> 16) & 0x1ff;
350 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
351 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
353 gpu.dma.is_read = is_read;
354 gpu.dma_start = gpu.dma;
356 renderer_flush_queues();
359 // XXX: wrong for width 1
360 memcpy(&gpu.gp0, VRAM_MEM_XY(gpu.dma.x, gpu.dma.y), 4);
361 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
364 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
365 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
368 static void finish_vram_transfer(int is_read)
373 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
374 gpu.dma_start.w, gpu.dma_start.h);
377 static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
379 int cmd = 0, pos = 0, len, dummy, v;
382 gpu.frameskip.pending_fill[0] = 0;
384 while (pos < count && skip) {
385 uint32_t *list = data + pos;
387 len = 1 + cmd_lengths[cmd];
391 if ((list[2] & 0x3ff) > gpu.screen.w || ((list[2] >> 16) & 0x1ff) > gpu.screen.h)
392 // clearing something large, don't skip
393 do_cmd_list(list, 3, &dummy);
395 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
401 gpu.ex_regs[1] &= ~0x1ff;
402 gpu.ex_regs[1] |= list[4 + ((cmd >> 4) & 1)] & 0x1ff;
405 for (v = 3; pos + v < count; v++)
407 if ((list[v] & 0xf000f000) == 0x50005000)
413 for (v = 4; pos + v < count; v += 2)
415 if ((list[v] & 0xf000f000) == 0x50005000)
422 skip = decide_frameskip_allow(list[0]);
423 if ((cmd & 0xf8) == 0xe0)
424 gpu.ex_regs[cmd & 7] = list[0];
428 if (pos + len > count) {
430 break; // incomplete cmd
432 if (0xa0 <= cmd && cmd <= 0xdf)
438 renderer_sync_ecmds(gpu.ex_regs);
443 static noinline int do_cmd_buffer(uint32_t *data, int count)
446 uint32_t old_e3 = gpu.ex_regs[3];
450 for (pos = 0; pos < count; )
452 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
454 pos += do_vram_io(data + pos, count - pos, 0);
459 cmd = data[pos] >> 24;
460 if (0xa0 <= cmd && cmd <= 0xdf) {
461 // consume vram write/read cmd
462 start_vram_transfer(data[pos + 1], data[pos + 2], (cmd & 0xe0) == 0xc0);
467 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
468 if (gpu.frameskip.active && (gpu.frameskip.allow || ((data[pos] >> 24) & 0xf0) == 0xe0))
469 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
471 pos += do_cmd_list(data + pos, count - pos, &cmd);
480 gpu.status.reg &= ~0x1fff;
481 gpu.status.reg |= gpu.ex_regs[1] & 0x7ff;
482 gpu.status.reg |= (gpu.ex_regs[6] & 3) << 11;
484 gpu.state.fb_dirty |= vram_dirty;
486 if (old_e3 != gpu.ex_regs[3])
487 decide_frameskip_allow(gpu.ex_regs[3]);
492 static void flush_cmd_buffer(void)
494 int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
496 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
500 void GPUwriteDataMem(uint32_t *mem, int count)
504 log_io("gpu_dma_write %p %d\n", mem, count);
506 if (unlikely(gpu.cmd_len > 0))
509 left = do_cmd_buffer(mem, count);
511 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
514 void GPUwriteData(uint32_t data)
516 log_io("gpu_write %08x\n", data);
517 gpu.cmd_buffer[gpu.cmd_len++] = data;
518 if (gpu.cmd_len >= CMD_BUFFER_LEN)
522 long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
524 uint32_t addr, *list;
525 uint32_t *llist_entry = NULL;
526 int len, left, count;
529 preload(rambase + (start_addr & 0x1fffff) / 4);
531 if (unlikely(gpu.cmd_len > 0))
534 // ff7 sends it's main list twice, detect this
535 if (*gpu.state.frame_count == gpu.state.last_list.frame &&
536 *gpu.state.hcnt - gpu.state.last_list.hcnt <= 1 &&
537 gpu.state.last_list.cycles > 2048)
539 llist_entry = rambase + (gpu.state.last_list.addr & 0x1fffff) / 4;
540 *llist_entry |= 0x800000;
543 log_io("gpu_dma_chain\n");
544 addr = start_addr & 0xffffff;
545 for (count = 0; addr != 0xffffff; count++)
547 list = rambase + (addr & 0x1fffff) / 4;
549 addr = list[0] & 0xffffff;
550 preload(rambase + (addr & 0x1fffff) / 4);
554 cpu_cycles += 5 + len;
556 log_io(".chain %08x #%d\n", (list - rambase) * 4, len);
558 // loop detection marker
559 // (bit23 set causes DMA error on real machine, so
560 // unlikely to be ever set by the game)
564 left = do_cmd_buffer(list + 1, len);
566 log_anomaly("GPUdmaChain: discarded %d/%d words\n", left, len);
573 // remove loop detection markers
574 addr = start_addr & 0x1fffff;
575 while (count-- > 0) {
576 list = rambase + addr / 4;
577 addr = list[0] & 0x1fffff;
578 list[0] &= ~0x800000;
581 *llist_entry &= ~0x800000;
583 gpu.state.last_list.frame = *gpu.state.frame_count;
584 gpu.state.last_list.hcnt = *gpu.state.hcnt;
585 gpu.state.last_list.cycles = cpu_cycles;
586 gpu.state.last_list.addr = start_addr;
591 void GPUreadDataMem(uint32_t *mem, int count)
593 log_io("gpu_dma_read %p %d\n", mem, count);
595 if (unlikely(gpu.cmd_len > 0))
599 do_vram_io(mem, count, 1);
602 uint32_t GPUreadData(void)
606 if (unlikely(gpu.cmd_len > 0))
611 do_vram_io(&ret, 1, 1);
613 log_io("gpu_read %08x\n", ret);
617 uint32_t GPUreadStatus(void)
621 if (unlikely(gpu.cmd_len > 0))
624 ret = gpu.status.reg;
625 log_io("gpu_read_status %08x\n", ret);
631 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
632 uint32_t ulStatus; // current gpu status
633 uint32_t ulControl[256]; // latest control register values
634 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
637 long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
645 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
646 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
647 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
648 freeze->ulStatus = gpu.status.reg;
651 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
652 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
653 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
654 gpu.status.reg = freeze->ulStatus;
656 for (i = 8; i > 0; i--) {
657 gpu.regs[i] ^= 1; // avoid reg change detection
658 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
660 renderer_sync_ecmds(gpu.ex_regs);
661 renderer_update_caches(0, 0, 1024, 512);
668 void GPUupdateLace(void)
672 renderer_flush_queues();
674 if (gpu.status.blanking) {
675 if (!gpu.state.blanked) {
677 gpu.state.blanked = 1;
678 gpu.state.fb_dirty = 1;
683 if (!gpu.state.fb_dirty)
686 if (gpu.frameskip.set) {
687 if (!gpu.frameskip.frame_ready) {
688 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
690 gpu.frameskip.active = 0;
692 gpu.frameskip.frame_ready = 0;
696 gpu.state.fb_dirty = 0;
697 gpu.state.blanked = 0;
700 void GPUvBlank(int is_vblank, int lcf)
702 int interlace = gpu.state.allow_interlace
703 && gpu.status.interlace && gpu.status.dheight;
704 // interlace doesn't look nice on progressive displays,
705 // so we have this "auto" mode here for games that don't read vram
706 if (gpu.state.allow_interlace == 2
707 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
711 if (interlace || interlace != gpu.state.old_interlace) {
712 gpu.state.old_interlace = interlace;
716 renderer_flush_queues();
717 renderer_set_interlace(interlace, !lcf);
721 #include "../../frontend/plugin_lib.h"
723 void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
725 gpu.frameskip.set = cbs->frameskip;
726 gpu.frameskip.advice = &cbs->fskip_advice;
727 gpu.frameskip.active = 0;
728 gpu.frameskip.frame_ready = 1;
729 gpu.state.hcnt = cbs->gpu_hcnt;
730 gpu.state.frame_count = cbs->gpu_frame_count;
731 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
732 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
734 gpu.mmap = cbs->mmap;
735 gpu.munmap = cbs->munmap;
738 if (gpu.vram == NULL)
741 if (cbs->pl_vout_set_raw_vram)
742 cbs->pl_vout_set_raw_vram(gpu.vram);
743 renderer_set_config(cbs);
744 vout_set_config(cbs);
747 // vim:shiftwidth=2:expandtab