2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
4 * This work is licensed under the terms of any of these licenses
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
15 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
16 #define unlikely(x) __builtin_expect((x), 0)
17 #define noinline __attribute__((noinline))
19 #define gpu_log(fmt, ...) \
20 printf("%d:%03d: " fmt, *gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__)
22 //#define log_io gpu_log
24 //#define log_anomaly gpu_log
25 #define log_anomaly(...)
29 static noinline int do_cmd_buffer(uint32_t *data, int count);
30 static void finish_vram_transfer(int is_read);
32 static noinline void do_cmd_reset(void)
34 if (unlikely(gpu.cmd_len > 0))
35 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
38 if (unlikely(gpu.dma.h > 0))
39 finish_vram_transfer(gpu.dma_start.is_read);
43 static noinline void do_reset(void)
49 memset(gpu.regs, 0, sizeof(gpu.regs));
50 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
51 gpu.ex_regs[i] = (0xe0 + i) << 24;
52 gpu.status.reg = 0x14802000;
55 gpu.screen.hres = gpu.screen.w = 256;
56 gpu.screen.vres = gpu.screen.h = 240;
59 static noinline void update_width(void)
61 int sw = gpu.screen.x2 - gpu.screen.x1;
62 if (sw <= 0 || sw >= 2560)
64 gpu.screen.w = gpu.screen.hres;
66 gpu.screen.w = sw * gpu.screen.hres / 2560;
69 static noinline void update_height(void)
71 // TODO: emulate this properly..
72 int sh = gpu.screen.y2 - gpu.screen.y1;
73 if (gpu.status.dheight)
75 if (sh <= 0 || sh > gpu.screen.vres)
81 static noinline void decide_frameskip(void)
83 if (gpu.frameskip.active)
86 gpu.frameskip.cnt = 0;
87 gpu.frameskip.frame_ready = 1;
90 if (!gpu.frameskip.active && *gpu.frameskip.advice)
91 gpu.frameskip.active = 1;
92 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
93 gpu.frameskip.active = 1;
95 gpu.frameskip.active = 0;
97 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
99 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy);
100 gpu.frameskip.pending_fill[0] = 0;
104 static noinline int decide_frameskip_allow(uint32_t cmd_e3)
106 // no frameskip if it decides to draw to display area,
107 // but not for interlace since it'll most likely always do that
108 uint32_t x = cmd_e3 & 0x3ff;
109 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
110 gpu.frameskip.allow = gpu.status.interlace ||
111 (uint32_t)(x - gpu.screen.x) >= (uint32_t)gpu.screen.w ||
112 (uint32_t)(y - gpu.screen.y) >= (uint32_t)gpu.screen.h;
113 return gpu.frameskip.allow;
116 static noinline void get_gpu_info(uint32_t data)
118 switch (data & 0x0f) {
123 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
126 gpu.gp0 = gpu.ex_regs[5] & 0xfffff;
137 // double, for overdraw guard
138 #define VRAM_SIZE (1024 * 512 * 2 * 2)
140 static int map_vram(void)
142 gpu.vram = gpu.mmap(VRAM_SIZE);
143 if (gpu.vram != NULL) {
144 gpu.vram += 4096 / 2;
148 fprintf(stderr, "could not map vram, expect crashes\n");
157 ret |= renderer_init();
159 gpu.state.frame_count = &gpu.zero;
160 gpu.state.hcnt = &gpu.zero;
161 gpu.frameskip.active = 0;
165 if (gpu.mmap != NULL) {
172 long GPUshutdown(void)
178 if (gpu.vram != NULL) {
179 gpu.vram -= 4096 / 2;
180 gpu.munmap(gpu.vram, VRAM_SIZE);
187 void GPUwriteStatus(uint32_t data)
189 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
190 static const short vres[4] = { 240, 480, 256, 480 };
191 uint32_t cmd = data >> 24;
193 if (cmd < ARRAY_SIZE(gpu.regs)) {
194 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
196 gpu.regs[cmd] = data;
199 gpu.state.fb_dirty = 1;
209 gpu.status.blanking = data & 1;
212 gpu.status.dma = data & 3;
215 gpu.screen.x = data & 0x3ff;
216 gpu.screen.y = (data >> 10) & 0x1ff;
217 if (gpu.frameskip.set) {
218 decide_frameskip_allow(gpu.ex_regs[3]);
219 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
221 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
226 gpu.screen.x1 = data & 0xfff;
227 gpu.screen.x2 = (data >> 12) & 0xfff;
231 gpu.screen.y1 = data & 0x3ff;
232 gpu.screen.y2 = (data >> 10) & 0x3ff;
236 gpu.status.reg = (gpu.status.reg & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
237 gpu.screen.hres = hres[(gpu.status.reg >> 16) & 7];
238 gpu.screen.vres = vres[(gpu.status.reg >> 19) & 3];
241 renderer_notify_res_change();
244 if ((cmd & 0xf0) == 0x10)
249 #ifdef GPUwriteStatus_ext
250 GPUwriteStatus_ext(data);
254 const unsigned char cmd_lengths[256] =
256 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
257 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
258 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
259 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
260 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
261 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
262 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
263 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
264 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
265 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
266 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
267 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
268 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
269 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
270 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
271 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
274 #define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
276 static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
278 uint16_t *vram = VRAM_MEM_XY(x, y);
280 memcpy(mem, vram, l * 2);
282 memcpy(vram, mem, l * 2);
285 static int do_vram_io(uint32_t *data, int count, int is_read)
287 int count_initial = count;
288 uint16_t *sdata = (uint16_t *)data;
289 int x = gpu.dma.x, y = gpu.dma.y;
290 int w = gpu.dma.w, h = gpu.dma.h;
291 int o = gpu.dma.offset;
293 count *= 2; // operate in 16bpp pixels
295 if (gpu.dma.offset) {
296 l = w - gpu.dma.offset;
300 do_vram_line(x + o, y, sdata, l, is_read);
313 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
315 do_vram_line(x, y, sdata, w, is_read);
321 do_vram_line(x, y, sdata, count, is_read);
327 finish_vram_transfer(is_read);
332 return count_initial - count / 2;
335 static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
338 log_anomaly("start_vram_transfer while old unfinished\n");
340 gpu.dma.x = pos_word & 0x3ff;
341 gpu.dma.y = (pos_word >> 16) & 0x1ff;
342 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
343 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
345 gpu.dma.is_read = is_read;
346 gpu.dma_start = gpu.dma;
348 renderer_flush_queues();
351 // XXX: wrong for width 1
352 memcpy(&gpu.gp0, VRAM_MEM_XY(gpu.dma.x, gpu.dma.y), 4);
353 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
356 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
357 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
360 static void finish_vram_transfer(int is_read)
365 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
366 gpu.dma_start.w, gpu.dma_start.h);
369 static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
371 int cmd = 0, pos = 0, len, dummy, v;
374 gpu.frameskip.pending_fill[0] = 0;
376 while (pos < count && skip) {
377 uint32_t *list = data + pos;
379 len = 1 + cmd_lengths[cmd];
383 if ((list[2] & 0x3ff) > gpu.screen.w || ((list[2] >> 16) & 0x1ff) > gpu.screen.h)
384 // clearing something large, don't skip
385 do_cmd_list(list, 3, &dummy);
387 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
393 gpu.ex_regs[1] &= ~0x1ff;
394 gpu.ex_regs[1] |= list[4 + ((cmd >> 4) & 1)] & 0x1ff;
397 for (v = 3; pos + v < count; v++)
399 if ((list[v] & 0xf000f000) == 0x50005000)
405 for (v = 4; pos + v < count; v += 2)
407 if ((list[v] & 0xf000f000) == 0x50005000)
414 skip = decide_frameskip_allow(list[0]);
415 if ((cmd & 0xf8) == 0xe0)
416 gpu.ex_regs[cmd & 7] = list[0];
420 if (pos + len > count) {
422 break; // incomplete cmd
424 if (0xa0 <= cmd && cmd <= 0xdf)
430 renderer_sync_ecmds(gpu.ex_regs);
435 static noinline int do_cmd_buffer(uint32_t *data, int count)
438 uint32_t old_e3 = gpu.ex_regs[3];
442 for (pos = 0; pos < count; )
444 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
446 pos += do_vram_io(data + pos, count - pos, 0);
451 cmd = data[pos] >> 24;
452 if (0xa0 <= cmd && cmd <= 0xdf) {
453 // consume vram write/read cmd
454 start_vram_transfer(data[pos + 1], data[pos + 2], (cmd & 0xe0) == 0xc0);
459 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
460 if (gpu.frameskip.active && (gpu.frameskip.allow || ((data[pos] >> 24) & 0xf0) == 0xe0))
461 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
463 pos += do_cmd_list(data + pos, count - pos, &cmd);
472 gpu.status.reg &= ~0x1fff;
473 gpu.status.reg |= gpu.ex_regs[1] & 0x7ff;
474 gpu.status.reg |= (gpu.ex_regs[6] & 3) << 11;
476 gpu.state.fb_dirty |= vram_dirty;
478 if (old_e3 != gpu.ex_regs[3])
479 decide_frameskip_allow(gpu.ex_regs[3]);
484 static void flush_cmd_buffer(void)
486 int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
488 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
492 void GPUwriteDataMem(uint32_t *mem, int count)
496 log_io("gpu_dma_write %p %d\n", mem, count);
498 if (unlikely(gpu.cmd_len > 0))
501 left = do_cmd_buffer(mem, count);
503 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
506 void GPUwriteData(uint32_t data)
508 log_io("gpu_write %08x\n", data);
509 gpu.cmd_buffer[gpu.cmd_len++] = data;
510 if (gpu.cmd_len >= CMD_BUFFER_LEN)
514 long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
516 uint32_t addr, *list;
517 uint32_t *llist_entry = NULL;
518 int len, left, count;
521 if (unlikely(gpu.cmd_len > 0))
524 // ff7 sends it's main list twice, detect this
525 if (*gpu.state.frame_count == gpu.state.last_list.frame &&
526 *gpu.state.hcnt - gpu.state.last_list.hcnt <= 1 &&
527 gpu.state.last_list.cycles > 2048)
529 llist_entry = rambase + (gpu.state.last_list.addr & 0x1fffff) / 4;
530 *llist_entry |= 0x800000;
533 log_io("gpu_dma_chain\n");
534 addr = start_addr & 0xffffff;
535 for (count = 0; addr != 0xffffff; count++)
537 list = rambase + (addr & 0x1fffff) / 4;
539 addr = list[0] & 0xffffff;
542 cpu_cycles += 5 + len;
544 log_io(".chain %08x #%d\n", (list - rambase) * 4, len);
546 // loop detection marker
547 // (bit23 set causes DMA error on real machine, so
548 // unlikely to be ever set by the game)
552 left = do_cmd_buffer(list + 1, len);
554 log_anomaly("GPUdmaChain: discarded %d/%d words\n", left, len);
561 // remove loop detection markers
562 addr = start_addr & 0x1fffff;
563 while (count-- > 0) {
564 list = rambase + addr / 4;
565 addr = list[0] & 0x1fffff;
566 list[0] &= ~0x800000;
569 *llist_entry &= ~0x800000;
571 gpu.state.last_list.frame = *gpu.state.frame_count;
572 gpu.state.last_list.hcnt = *gpu.state.hcnt;
573 gpu.state.last_list.cycles = cpu_cycles;
574 gpu.state.last_list.addr = start_addr;
579 void GPUreadDataMem(uint32_t *mem, int count)
581 log_io("gpu_dma_read %p %d\n", mem, count);
583 if (unlikely(gpu.cmd_len > 0))
587 do_vram_io(mem, count, 1);
590 uint32_t GPUreadData(void)
594 if (unlikely(gpu.cmd_len > 0))
599 do_vram_io(&ret, 1, 1);
601 log_io("gpu_read %08x\n", ret);
605 uint32_t GPUreadStatus(void)
609 if (unlikely(gpu.cmd_len > 0))
612 ret = gpu.status.reg;
613 log_io("gpu_read_status %08x\n", ret);
619 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
620 uint32_t ulStatus; // current gpu status
621 uint32_t ulControl[256]; // latest control register values
622 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
625 long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
633 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
634 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
635 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
636 freeze->ulStatus = gpu.status.reg;
639 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
640 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
641 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
642 gpu.status.reg = freeze->ulStatus;
644 for (i = 8; i > 0; i--) {
645 gpu.regs[i] ^= 1; // avoid reg change detection
646 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
648 renderer_sync_ecmds(gpu.ex_regs);
649 renderer_update_caches(0, 0, 1024, 512);
656 void GPUupdateLace(void)
660 renderer_flush_queues();
662 if (gpu.status.blanking) {
663 if (!gpu.state.blanked) {
665 gpu.state.blanked = 1;
666 gpu.state.fb_dirty = 1;
671 if (!gpu.state.fb_dirty)
674 if (gpu.frameskip.set) {
675 if (!gpu.frameskip.frame_ready) {
676 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
678 gpu.frameskip.active = 0;
680 gpu.frameskip.frame_ready = 0;
684 gpu.state.fb_dirty = 0;
685 gpu.state.blanked = 0;
688 void GPUvBlank(int is_vblank, int lcf)
690 int interlace = gpu.state.allow_interlace
691 && gpu.status.interlace && gpu.status.dheight;
692 // interlace doesn't look nice on progressive displays,
693 // so we have this "auto" mode here for games that don't read vram
694 if (gpu.state.allow_interlace == 2
695 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
699 if (interlace || interlace != gpu.state.old_interlace) {
700 gpu.state.old_interlace = interlace;
704 renderer_flush_queues();
705 renderer_set_interlace(interlace, !lcf);
709 #include "../../frontend/plugin_lib.h"
711 void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
713 gpu.frameskip.set = cbs->frameskip;
714 gpu.frameskip.advice = &cbs->fskip_advice;
715 gpu.frameskip.active = 0;
716 gpu.frameskip.frame_ready = 1;
717 gpu.state.hcnt = cbs->gpu_hcnt;
718 gpu.state.frame_count = cbs->gpu_frame_count;
719 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
720 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
722 gpu.mmap = cbs->mmap;
723 gpu.munmap = cbs->munmap;
726 if (gpu.vram == NULL)
729 if (cbs->pl_vout_set_raw_vram)
730 cbs->pl_vout_set_raw_vram(gpu.vram);
731 renderer_set_config(cbs);
732 vout_set_config(cbs);
735 // vim:shiftwidth=2:expandtab