2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
4 * This work is licensed under the terms of any of these licenses
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
15 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
17 #define unlikely(x) __builtin_expect((x), 0)
18 #define preload __builtin_prefetch
19 #define noinline __attribute__((noinline))
26 #define gpu_log(fmt, ...) \
27 printf("%d:%03d: " fmt, *gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__)
29 //#define log_io gpu_log
31 //#define log_anomaly gpu_log
32 #define log_anomaly(...)
36 static noinline int do_cmd_buffer(uint32_t *data, int count);
37 static void finish_vram_transfer(int is_read);
39 static noinline void do_cmd_reset(void)
41 if (unlikely(gpu.cmd_len > 0))
42 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
45 if (unlikely(gpu.dma.h > 0))
46 finish_vram_transfer(gpu.dma_start.is_read);
50 static noinline void do_reset(void)
56 memset(gpu.regs, 0, sizeof(gpu.regs));
57 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
58 gpu.ex_regs[i] = (0xe0 + i) << 24;
59 gpu.status = 0x14802000;
62 gpu.screen.hres = gpu.screen.w = 256;
63 gpu.screen.vres = gpu.screen.h = 240;
66 static noinline void update_width(void)
68 int sw = gpu.screen.x2 - gpu.screen.x1;
69 if (sw <= 0 || sw >= 2560)
71 gpu.screen.w = gpu.screen.hres;
73 gpu.screen.w = sw * gpu.screen.hres / 2560;
76 static noinline void update_height(void)
78 // TODO: emulate this properly..
79 int sh = gpu.screen.y2 - gpu.screen.y1;
80 if (gpu.status & PSX_GPU_STATUS_DHEIGHT)
82 if (sh <= 0 || sh > gpu.screen.vres)
88 static noinline void decide_frameskip(void)
90 if (gpu.frameskip.active)
93 gpu.frameskip.cnt = 0;
94 gpu.frameskip.frame_ready = 1;
97 if (!gpu.frameskip.active && *gpu.frameskip.advice)
98 gpu.frameskip.active = 1;
99 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
100 gpu.frameskip.active = 1;
102 gpu.frameskip.active = 0;
104 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
106 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy);
107 gpu.frameskip.pending_fill[0] = 0;
111 static noinline int decide_frameskip_allow(uint32_t cmd_e3)
113 // no frameskip if it decides to draw to display area,
114 // but not for interlace since it'll most likely always do that
115 uint32_t x = cmd_e3 & 0x3ff;
116 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
117 gpu.frameskip.allow = (gpu.status & PSX_GPU_STATUS_INTERLACE) ||
118 (uint32_t)(x - gpu.screen.x) >= (uint32_t)gpu.screen.w ||
119 (uint32_t)(y - gpu.screen.y) >= (uint32_t)gpu.screen.h;
120 return gpu.frameskip.allow;
123 static noinline void get_gpu_info(uint32_t data)
125 switch (data & 0x0f) {
129 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
132 gpu.gp0 = gpu.ex_regs[5] & 0x3fffff;
143 // double, for overdraw guard
144 #define VRAM_SIZE (1024 * 512 * 2 * 2)
146 static int map_vram(void)
148 gpu.vram = gpu.mmap(VRAM_SIZE);
149 if (gpu.vram != NULL) {
150 gpu.vram += 4096 / 2;
154 fprintf(stderr, "could not map vram, expect crashes\n");
163 ret |= renderer_init();
165 gpu.state.frame_count = &gpu.zero;
166 gpu.state.hcnt = &gpu.zero;
167 gpu.frameskip.active = 0;
171 if (gpu.mmap != NULL) {
178 long GPUshutdown(void)
184 if (gpu.vram != NULL) {
185 gpu.vram -= 4096 / 2;
186 gpu.munmap(gpu.vram, VRAM_SIZE);
193 void GPUwriteStatus(uint32_t data)
195 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
196 static const short vres[4] = { 240, 480, 256, 480 };
197 uint32_t cmd = data >> 24;
199 if (cmd < ARRAY_SIZE(gpu.regs)) {
200 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
202 gpu.regs[cmd] = data;
205 gpu.state.fb_dirty = 1;
216 gpu.status |= PSX_GPU_STATUS_BLANKING;
218 gpu.status &= ~PSX_GPU_STATUS_BLANKING;
221 gpu.status &= ~PSX_GPU_STATUS_DMA_MASK;
222 gpu.status |= PSX_GPU_STATUS_DMA(data & 3);
225 gpu.screen.x = data & 0x3ff;
226 gpu.screen.y = (data >> 10) & 0x1ff;
227 if (gpu.frameskip.set) {
228 decide_frameskip_allow(gpu.ex_regs[3]);
229 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
231 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
236 gpu.screen.x1 = data & 0xfff;
237 gpu.screen.x2 = (data >> 12) & 0xfff;
241 gpu.screen.y1 = data & 0x3ff;
242 gpu.screen.y2 = (data >> 10) & 0x3ff;
246 gpu.status = (gpu.status & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
247 gpu.screen.hres = hres[(gpu.status >> 16) & 7];
248 gpu.screen.vres = vres[(gpu.status >> 19) & 3];
251 renderer_notify_res_change();
254 if ((cmd & 0xf0) == 0x10)
259 #ifdef GPUwriteStatus_ext
260 GPUwriteStatus_ext(data);
264 const unsigned char cmd_lengths[256] =
266 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
267 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
268 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
269 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
270 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
271 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
272 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
273 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
274 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
275 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
276 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
277 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
278 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
279 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
280 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
281 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
284 #define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
286 static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
288 uint16_t *vram = VRAM_MEM_XY(x, y);
290 memcpy(mem, vram, l * 2);
292 memcpy(vram, mem, l * 2);
295 static int do_vram_io(uint32_t *data, int count, int is_read)
297 int count_initial = count;
298 uint16_t *sdata = (uint16_t *)data;
299 int x = gpu.dma.x, y = gpu.dma.y;
300 int w = gpu.dma.w, h = gpu.dma.h;
301 int o = gpu.dma.offset;
303 count *= 2; // operate in 16bpp pixels
305 if (gpu.dma.offset) {
306 l = w - gpu.dma.offset;
310 do_vram_line(x + o, y, sdata, l, is_read);
323 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
325 do_vram_line(x, y, sdata, w, is_read);
331 do_vram_line(x, y, sdata, count, is_read);
337 finish_vram_transfer(is_read);
342 return count_initial - count / 2;
345 static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
348 log_anomaly("start_vram_transfer while old unfinished\n");
350 gpu.dma.x = pos_word & 0x3ff;
351 gpu.dma.y = (pos_word >> 16) & 0x1ff;
352 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
353 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
355 gpu.dma.is_read = is_read;
356 gpu.dma_start = gpu.dma;
358 renderer_flush_queues();
360 gpu.status |= PSX_GPU_STATUS_IMG;
361 // XXX: wrong for width 1
362 gpu.gp0 = LE32TOH(*(uint32_t *) VRAM_MEM_XY(gpu.dma.x, gpu.dma.y));
363 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
366 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
367 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
370 static void finish_vram_transfer(int is_read)
373 gpu.status &= ~PSX_GPU_STATUS_IMG;
375 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
376 gpu.dma_start.w, gpu.dma_start.h);
379 static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
381 int cmd = 0, pos = 0, len, dummy, v;
384 gpu.frameskip.pending_fill[0] = 0;
386 while (pos < count && skip) {
387 uint32_t *list = data + pos;
388 cmd = LE32TOH(list[0]) >> 24;
389 len = 1 + cmd_lengths[cmd];
393 if ((LE32TOH(list[2]) & 0x3ff) > gpu.screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu.screen.h)
394 // clearing something large, don't skip
395 do_cmd_list(list, 3, &dummy);
397 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
403 gpu.ex_regs[1] &= ~0x1ff;
404 gpu.ex_regs[1] |= LE32TOH(list[4 + ((cmd >> 4) & 1)]) & 0x1ff;
407 for (v = 3; pos + v < count; v++)
409 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
415 for (v = 4; pos + v < count; v += 2)
417 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
424 skip = decide_frameskip_allow(LE32TOH(list[0]));
425 if ((cmd & 0xf8) == 0xe0)
426 gpu.ex_regs[cmd & 7] = LE32TOH(list[0]);
430 if (pos + len > count) {
432 break; // incomplete cmd
434 if (0xa0 <= cmd && cmd <= 0xdf)
440 renderer_sync_ecmds(gpu.ex_regs);
445 static noinline int do_cmd_buffer(uint32_t *data, int count)
448 uint32_t old_e3 = gpu.ex_regs[3];
452 for (pos = 0; pos < count; )
454 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
456 pos += do_vram_io(data + pos, count - pos, 0);
461 cmd = LE32TOH(data[pos]) >> 24;
462 if (0xa0 <= cmd && cmd <= 0xdf) {
463 if (unlikely((pos+2) >= count)) {
464 // incomplete vram write/read cmd, can't consume yet
469 // consume vram write/read cmd
470 start_vram_transfer(LE32TOH(data[pos + 1]), LE32TOH(data[pos + 2]), (cmd & 0xe0) == 0xc0);
475 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
476 if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0))
477 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
479 pos += do_cmd_list(data + pos, count - pos, &cmd);
488 gpu.status &= ~0x1fff;
489 gpu.status |= gpu.ex_regs[1] & 0x7ff;
490 gpu.status |= (gpu.ex_regs[6] & 3) << 11;
492 gpu.state.fb_dirty |= vram_dirty;
494 if (old_e3 != gpu.ex_regs[3])
495 decide_frameskip_allow(gpu.ex_regs[3]);
500 static void flush_cmd_buffer(void)
502 int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
504 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
508 void GPUwriteDataMem(uint32_t *mem, int count)
512 log_io("gpu_dma_write %p %d\n", mem, count);
514 if (unlikely(gpu.cmd_len > 0))
517 left = do_cmd_buffer(mem, count);
519 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
522 void GPUwriteData(uint32_t data)
524 log_io("gpu_write %08x\n", data);
525 gpu.cmd_buffer[gpu.cmd_len++] = HTOLE32(data);
526 if (gpu.cmd_len >= CMD_BUFFER_LEN)
530 long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
532 uint32_t addr, *list, ld_addr = 0;
533 int len, left, count;
536 preload(rambase + (start_addr & 0x1fffff) / 4);
538 if (unlikely(gpu.cmd_len > 0))
541 log_io("gpu_dma_chain\n");
542 addr = start_addr & 0xffffff;
543 for (count = 0; (addr & 0x800000) == 0; count++)
545 list = rambase + (addr & 0x1fffff) / 4;
546 len = LE32TOH(list[0]) >> 24;
547 addr = LE32TOH(list[0]) & 0xffffff;
548 preload(rambase + (addr & 0x1fffff) / 4);
552 cpu_cycles += 5 + len;
554 log_io(".chain %08x #%d\n", (list - rambase) * 4, len);
557 left = do_cmd_buffer(list + 1, len);
559 log_anomaly("GPUdmaChain: discarded %d/%d words\n", left, len);
562 #define LD_THRESHOLD (8*1024)
563 if (count >= LD_THRESHOLD) {
564 if (count == LD_THRESHOLD) {
569 // loop detection marker
570 // (bit23 set causes DMA error on real machine, so
571 // unlikely to be ever set by the game)
572 list[0] |= HTOLE32(0x800000);
577 // remove loop detection markers
578 count -= LD_THRESHOLD + 2;
579 addr = ld_addr & 0x1fffff;
580 while (count-- > 0) {
581 list = rambase + addr / 4;
582 addr = LE32TOH(list[0]) & 0x1fffff;
583 list[0] &= HTOLE32(~0x800000);
587 gpu.state.last_list.frame = *gpu.state.frame_count;
588 gpu.state.last_list.hcnt = *gpu.state.hcnt;
589 gpu.state.last_list.cycles = cpu_cycles;
590 gpu.state.last_list.addr = start_addr;
595 void GPUreadDataMem(uint32_t *mem, int count)
597 log_io("gpu_dma_read %p %d\n", mem, count);
599 if (unlikely(gpu.cmd_len > 0))
603 do_vram_io(mem, count, 1);
606 uint32_t GPUreadData(void)
610 if (unlikely(gpu.cmd_len > 0))
616 do_vram_io(&ret, 1, 1);
620 log_io("gpu_read %08x\n", ret);
624 uint32_t GPUreadStatus(void)
628 if (unlikely(gpu.cmd_len > 0))
632 log_io("gpu_read_status %08x\n", ret);
638 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
639 uint32_t ulStatus; // current gpu status
640 uint32_t ulControl[256]; // latest control register values
641 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
644 long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
652 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
653 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
654 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
655 freeze->ulStatus = gpu.status;
658 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
659 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
660 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
661 gpu.status = freeze->ulStatus;
663 for (i = 8; i > 0; i--) {
664 gpu.regs[i] ^= 1; // avoid reg change detection
665 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
667 renderer_sync_ecmds(gpu.ex_regs);
668 renderer_update_caches(0, 0, 1024, 512);
675 void GPUupdateLace(void)
679 renderer_flush_queues();
681 if (gpu.status & PSX_GPU_STATUS_BLANKING) {
682 if (!gpu.state.blanked) {
684 gpu.state.blanked = 1;
685 gpu.state.fb_dirty = 1;
690 if (!gpu.state.fb_dirty)
693 if (gpu.frameskip.set) {
694 if (!gpu.frameskip.frame_ready) {
695 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
697 gpu.frameskip.active = 0;
699 gpu.frameskip.frame_ready = 0;
703 gpu.state.fb_dirty = 0;
704 gpu.state.blanked = 0;
707 void GPUvBlank(int is_vblank, int lcf)
709 int interlace = gpu.state.allow_interlace
710 && (gpu.status & PSX_GPU_STATUS_INTERLACE)
711 && (gpu.status & PSX_GPU_STATUS_DHEIGHT);
712 // interlace doesn't look nice on progressive displays,
713 // so we have this "auto" mode here for games that don't read vram
714 if (gpu.state.allow_interlace == 2
715 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
719 if (interlace || interlace != gpu.state.old_interlace) {
720 gpu.state.old_interlace = interlace;
724 renderer_flush_queues();
725 renderer_set_interlace(interlace, !lcf);
729 #include "../../frontend/plugin_lib.h"
731 void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
733 gpu.frameskip.set = cbs->frameskip;
734 gpu.frameskip.advice = &cbs->fskip_advice;
735 gpu.frameskip.active = 0;
736 gpu.frameskip.frame_ready = 1;
737 gpu.state.hcnt = cbs->gpu_hcnt;
738 gpu.state.frame_count = cbs->gpu_frame_count;
739 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
740 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
742 gpu.mmap = cbs->mmap;
743 gpu.munmap = cbs->munmap;
746 if (gpu.vram == NULL)
749 if (cbs->pl_vout_set_raw_vram)
750 cbs->pl_vout_set_raw_vram(gpu.vram);
751 renderer_set_config(cbs);
752 vout_set_config(cbs);
755 // vim:shiftwidth=2:expandtab