2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
4 * This work is licensed under the terms of any of these licenses
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
15 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
17 #define unlikely(x) __builtin_expect((x), 0)
18 #define preload __builtin_prefetch
19 #define noinline __attribute__((noinline))
27 #define gpu_log(fmt, ...) \
28 printf("%d:%03d: " fmt, *gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__)
30 //#define log_io gpu_log
32 //#define log_anomaly gpu_log
33 #define log_anomaly(...)
37 static noinline int do_cmd_buffer(uint32_t *data, int count);
38 static void finish_vram_transfer(int is_read);
40 static noinline void do_cmd_reset(void)
42 if (unlikely(gpu.cmd_len > 0))
43 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
46 if (unlikely(gpu.dma.h > 0))
47 finish_vram_transfer(gpu.dma_start.is_read);
51 static noinline void do_reset(void)
57 memset(gpu.regs, 0, sizeof(gpu.regs));
58 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
59 gpu.ex_regs[i] = (0xe0 + i) << 24;
60 gpu.status.reg = 0x14802000;
63 gpu.screen.hres = gpu.screen.w = 256;
64 gpu.screen.vres = gpu.screen.h = 240;
67 static noinline void update_width(void)
69 int sw = gpu.screen.x2 - gpu.screen.x1;
70 if (sw <= 0 || sw >= 2560)
72 gpu.screen.w = gpu.screen.hres;
74 gpu.screen.w = sw * gpu.screen.hres / 2560;
77 static noinline void update_height(void)
79 // TODO: emulate this properly..
80 int sh = gpu.screen.y2 - gpu.screen.y1;
81 if (gpu.status.dheight)
83 if (sh <= 0 || sh > gpu.screen.vres)
89 static noinline void decide_frameskip(void)
91 if (gpu.frameskip.active)
94 gpu.frameskip.cnt = 0;
95 gpu.frameskip.frame_ready = 1;
98 if (!gpu.frameskip.active && *gpu.frameskip.advice)
99 gpu.frameskip.active = 1;
100 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
101 gpu.frameskip.active = 1;
103 gpu.frameskip.active = 0;
105 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
107 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy);
108 gpu.frameskip.pending_fill[0] = 0;
112 static noinline int decide_frameskip_allow(uint32_t cmd_e3)
114 // no frameskip if it decides to draw to display area,
115 // but not for interlace since it'll most likely always do that
116 uint32_t x = cmd_e3 & 0x3ff;
117 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
118 gpu.frameskip.allow = gpu.status.interlace ||
119 (uint32_t)(x - gpu.screen.x) >= (uint32_t)gpu.screen.w ||
120 (uint32_t)(y - gpu.screen.y) >= (uint32_t)gpu.screen.h;
121 return gpu.frameskip.allow;
124 static noinline void get_gpu_info(uint32_t data)
126 switch (data & 0x0f) {
131 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
134 gpu.gp0 = gpu.ex_regs[5] & 0xfffff;
145 // double, for overdraw guard
146 #define VRAM_SIZE (1024 * 512 * 2 * 2)
148 static int map_vram(void)
150 gpu.vram = gpu.mmap(VRAM_SIZE);
151 if (gpu.vram != NULL) {
152 gpu.vram += 4096 / 2;
156 fprintf(stderr, "could not map vram, expect crashes\n");
165 ret |= renderer_init();
167 gpu.state.frame_count = &gpu.zero;
168 gpu.state.hcnt = &gpu.zero;
169 gpu.frameskip.active = 0;
173 if (gpu.mmap != NULL) {
180 long GPUshutdown(void)
186 if (gpu.vram != NULL) {
187 gpu.vram -= 4096 / 2;
188 gpu.munmap(gpu.vram, VRAM_SIZE);
195 void GPUwriteStatus(uint32_t data)
197 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
198 static const short vres[4] = { 240, 480, 256, 480 };
199 uint32_t cmd = data >> 24;
201 if (cmd < ARRAY_SIZE(gpu.regs)) {
202 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
204 gpu.regs[cmd] = data;
207 gpu.state.fb_dirty = 1;
217 gpu.status.blanking = data & 1;
220 gpu.status.dma = data & 3;
223 gpu.screen.x = data & 0x3ff;
224 gpu.screen.y = (data >> 10) & 0x1ff;
225 if (gpu.frameskip.set) {
226 decide_frameskip_allow(gpu.ex_regs[3]);
227 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
229 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
234 gpu.screen.x1 = data & 0xfff;
235 gpu.screen.x2 = (data >> 12) & 0xfff;
239 gpu.screen.y1 = data & 0x3ff;
240 gpu.screen.y2 = (data >> 10) & 0x3ff;
244 gpu.status.reg = (gpu.status.reg & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
245 gpu.screen.hres = hres[(gpu.status.reg >> 16) & 7];
246 gpu.screen.vres = vres[(gpu.status.reg >> 19) & 3];
249 renderer_notify_res_change();
252 if ((cmd & 0xf0) == 0x10)
257 #ifdef GPUwriteStatus_ext
258 GPUwriteStatus_ext(data);
262 const unsigned char cmd_lengths[256] =
264 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
265 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
266 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
267 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
268 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
269 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
270 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
271 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
272 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
273 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
274 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
275 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
276 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
277 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
278 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
279 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
282 #define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
284 static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
286 uint16_t *vram = VRAM_MEM_XY(x, y);
288 memcpy(mem, vram, l * 2);
290 memcpy(vram, mem, l * 2);
293 static int do_vram_io(uint32_t *data, int count, int is_read)
295 int count_initial = count;
296 uint16_t *sdata = (uint16_t *)data;
297 int x = gpu.dma.x, y = gpu.dma.y;
298 int w = gpu.dma.w, h = gpu.dma.h;
299 int o = gpu.dma.offset;
301 count *= 2; // operate in 16bpp pixels
303 if (gpu.dma.offset) {
304 l = w - gpu.dma.offset;
308 do_vram_line(x + o, y, sdata, l, is_read);
321 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
323 do_vram_line(x, y, sdata, w, is_read);
329 do_vram_line(x, y, sdata, count, is_read);
335 finish_vram_transfer(is_read);
340 return count_initial - count / 2;
343 static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
346 log_anomaly("start_vram_transfer while old unfinished\n");
348 gpu.dma.x = pos_word & 0x3ff;
349 gpu.dma.y = (pos_word >> 16) & 0x1ff;
350 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
351 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
353 gpu.dma.is_read = is_read;
354 gpu.dma_start = gpu.dma;
356 renderer_flush_queues();
359 // XXX: wrong for width 1
360 memcpy(&gpu.gp0, VRAM_MEM_XY(gpu.dma.x, gpu.dma.y), 4);
361 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
364 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
365 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
368 static void finish_vram_transfer(int is_read)
373 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
374 gpu.dma_start.w, gpu.dma_start.h);
377 static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
379 int cmd = 0, pos = 0, len, dummy, v;
382 gpu.frameskip.pending_fill[0] = 0;
384 while (pos < count && skip) {
385 uint32_t *list = data + pos;
387 len = 1 + cmd_lengths[cmd];
391 if ((list[2] & 0x3ff) > gpu.screen.w || ((list[2] >> 16) & 0x1ff) > gpu.screen.h)
392 // clearing something large, don't skip
393 do_cmd_list(list, 3, &dummy);
395 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
401 gpu.ex_regs[1] &= ~0x1ff;
402 gpu.ex_regs[1] |= list[4 + ((cmd >> 4) & 1)] & 0x1ff;
405 for (v = 3; pos + v < count; v++)
407 if ((list[v] & 0xf000f000) == 0x50005000)
413 for (v = 4; pos + v < count; v += 2)
415 if ((list[v] & 0xf000f000) == 0x50005000)
422 skip = decide_frameskip_allow(list[0]);
423 if ((cmd & 0xf8) == 0xe0)
424 gpu.ex_regs[cmd & 7] = list[0];
428 if (pos + len > count) {
430 break; // incomplete cmd
432 if (0xa0 <= cmd && cmd <= 0xdf)
438 renderer_sync_ecmds(gpu.ex_regs);
443 static noinline int do_cmd_buffer(uint32_t *data, int count)
446 uint32_t old_e3 = gpu.ex_regs[3];
450 for (pos = 0; pos < count; )
452 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
454 pos += do_vram_io(data + pos, count - pos, 0);
459 cmd = data[pos] >> 24;
460 if (0xa0 <= cmd && cmd <= 0xdf) {
461 // consume vram write/read cmd
462 start_vram_transfer(data[pos + 1], data[pos + 2], (cmd & 0xe0) == 0xc0);
467 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
468 if (gpu.frameskip.active && (gpu.frameskip.allow || ((data[pos] >> 24) & 0xf0) == 0xe0))
469 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
471 pos += do_cmd_list(data + pos, count - pos, &cmd);
480 gpu.status.reg &= ~0x1fff;
481 gpu.status.reg |= gpu.ex_regs[1] & 0x7ff;
482 gpu.status.reg |= (gpu.ex_regs[6] & 3) << 11;
484 gpu.state.fb_dirty |= vram_dirty;
486 if (old_e3 != gpu.ex_regs[3])
487 decide_frameskip_allow(gpu.ex_regs[3]);
492 static void flush_cmd_buffer(void)
494 int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
496 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
500 void GPUwriteDataMem(uint32_t *mem, int count)
504 log_io("gpu_dma_write %p %d\n", mem, count);
506 if (unlikely(gpu.cmd_len > 0))
509 left = do_cmd_buffer(mem, count);
511 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
514 void GPUwriteData(uint32_t data)
516 log_io("gpu_write %08x\n", data);
517 gpu.cmd_buffer[gpu.cmd_len++] = data;
518 if (gpu.cmd_len >= CMD_BUFFER_LEN)
522 long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
524 uint32_t addr, *list, ld_addr = 0;
525 uint32_t *llist_entry = NULL;
526 int len, left, count;
529 preload(rambase + (start_addr & 0x1fffff) / 4);
531 if (unlikely(gpu.cmd_len > 0))
534 // ff7 sends it's main list twice, detect this
535 if (*gpu.state.frame_count == gpu.state.last_list.frame &&
536 *gpu.state.hcnt - gpu.state.last_list.hcnt <= 1 &&
537 gpu.state.last_list.cycles > 2048)
539 llist_entry = rambase + (gpu.state.last_list.addr & 0x1fffff) / 4;
540 *llist_entry |= 0x800000;
543 log_io("gpu_dma_chain\n");
544 addr = start_addr & 0xffffff;
545 for (count = 0; (addr & 0x800000) == 0; count++)
547 list = rambase + (addr & 0x1fffff) / 4;
549 addr = list[0] & 0xffffff;
550 preload(rambase + (addr & 0x1fffff) / 4);
554 cpu_cycles += 5 + len;
556 log_io(".chain %08x #%d\n", (list - rambase) * 4, len);
559 left = do_cmd_buffer(list + 1, len);
561 log_anomaly("GPUdmaChain: discarded %d/%d words\n", left, len);
564 #define LD_THRESHOLD (8*1024)
565 if (count >= LD_THRESHOLD) {
566 if (count == LD_THRESHOLD) {
571 // loop detection marker
572 // (bit23 set causes DMA error on real machine, so
573 // unlikely to be ever set by the game)
579 // remove loop detection markers
580 count -= LD_THRESHOLD + 2;
581 addr = ld_addr & 0x1fffff;
582 while (count-- > 0) {
583 list = rambase + addr / 4;
584 addr = list[0] & 0x1fffff;
585 list[0] &= ~0x800000;
590 *llist_entry &= ~0x800000;
592 gpu.state.last_list.frame = *gpu.state.frame_count;
593 gpu.state.last_list.hcnt = *gpu.state.hcnt;
594 gpu.state.last_list.cycles = cpu_cycles;
595 gpu.state.last_list.addr = start_addr;
600 void GPUreadDataMem(uint32_t *mem, int count)
602 log_io("gpu_dma_read %p %d\n", mem, count);
604 if (unlikely(gpu.cmd_len > 0))
608 do_vram_io(mem, count, 1);
611 uint32_t GPUreadData(void)
615 if (unlikely(gpu.cmd_len > 0))
620 do_vram_io(&ret, 1, 1);
622 log_io("gpu_read %08x\n", ret);
626 uint32_t GPUreadStatus(void)
630 if (unlikely(gpu.cmd_len > 0))
633 ret = gpu.status.reg;
634 log_io("gpu_read_status %08x\n", ret);
640 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
641 uint32_t ulStatus; // current gpu status
642 uint32_t ulControl[256]; // latest control register values
643 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
646 long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
654 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
655 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
656 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
657 freeze->ulStatus = gpu.status.reg;
660 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
661 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
662 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
663 gpu.status.reg = freeze->ulStatus;
665 for (i = 8; i > 0; i--) {
666 gpu.regs[i] ^= 1; // avoid reg change detection
667 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
669 renderer_sync_ecmds(gpu.ex_regs);
670 renderer_update_caches(0, 0, 1024, 512);
677 void GPUupdateLace(void)
681 renderer_flush_queues();
683 if (gpu.status.blanking) {
684 if (!gpu.state.blanked) {
686 gpu.state.blanked = 1;
687 gpu.state.fb_dirty = 1;
692 if (!gpu.state.fb_dirty)
695 if (gpu.frameskip.set) {
696 if (!gpu.frameskip.frame_ready) {
697 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
699 gpu.frameskip.active = 0;
701 gpu.frameskip.frame_ready = 0;
705 gpu.state.fb_dirty = 0;
706 gpu.state.blanked = 0;
709 void GPUvBlank(int is_vblank, int lcf)
711 int interlace = gpu.state.allow_interlace
712 && gpu.status.interlace && gpu.status.dheight;
713 // interlace doesn't look nice on progressive displays,
714 // so we have this "auto" mode here for games that don't read vram
715 if (gpu.state.allow_interlace == 2
716 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
720 if (interlace || interlace != gpu.state.old_interlace) {
721 gpu.state.old_interlace = interlace;
725 renderer_flush_queues();
726 renderer_set_interlace(interlace, !lcf);
730 #include "../../frontend/plugin_lib.h"
732 void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
734 gpu.frameskip.set = cbs->frameskip;
735 gpu.frameskip.advice = &cbs->fskip_advice;
736 gpu.frameskip.active = 0;
737 gpu.frameskip.frame_ready = 1;
738 gpu.state.hcnt = cbs->gpu_hcnt;
739 gpu.state.frame_count = cbs->gpu_frame_count;
740 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
741 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
743 gpu.mmap = cbs->mmap;
744 gpu.munmap = cbs->munmap;
747 if (gpu.vram == NULL)
750 if (cbs->pl_vout_set_raw_vram)
751 cbs->pl_vout_set_raw_vram(gpu.vram);
752 renderer_set_config(cbs);
753 vout_set_config(cbs);
756 // vim:shiftwidth=2:expandtab