2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
4 * This work is licensed under the terms of any of these licenses
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
15 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
17 #define unlikely(x) __builtin_expect((x), 0)
18 #define preload __builtin_prefetch
19 #define noinline __attribute__((noinline))
26 #define gpu_log(fmt, ...) \
27 printf("%d:%03d: " fmt, *gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__)
29 //#define log_io gpu_log
31 //#define log_anomaly gpu_log
32 #define log_anomaly(...)
36 static noinline int do_cmd_buffer(uint32_t *data, int count);
37 static void finish_vram_transfer(int is_read);
39 static noinline void do_cmd_reset(void)
41 if (unlikely(gpu.cmd_len > 0))
42 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
45 if (unlikely(gpu.dma.h > 0))
46 finish_vram_transfer(gpu.dma_start.is_read);
50 static noinline void do_reset(void)
56 memset(gpu.regs, 0, sizeof(gpu.regs));
57 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
58 gpu.ex_regs[i] = (0xe0 + i) << 24;
59 gpu.status = 0x14802000;
62 gpu.screen.hres = gpu.screen.w = 256;
63 gpu.screen.vres = gpu.screen.h = 240;
66 static noinline void update_width(void)
68 int sw = gpu.screen.x2 - gpu.screen.x1;
69 if (sw <= 0 || sw >= 2560)
71 gpu.screen.w = gpu.screen.hres;
73 gpu.screen.w = sw * gpu.screen.hres / 2560;
76 static noinline void update_height(void)
78 // TODO: emulate this properly..
79 int sh = gpu.screen.y2 - gpu.screen.y1;
80 if (gpu.status & PSX_GPU_STATUS_DHEIGHT)
82 if (sh <= 0 || sh > gpu.screen.vres)
88 static noinline void decide_frameskip(void)
90 if (gpu.frameskip.active)
93 gpu.frameskip.cnt = 0;
94 gpu.frameskip.frame_ready = 1;
97 if (!gpu.frameskip.active && *gpu.frameskip.advice)
98 gpu.frameskip.active = 1;
99 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
100 gpu.frameskip.active = 1;
102 gpu.frameskip.active = 0;
104 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
106 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy);
107 gpu.frameskip.pending_fill[0] = 0;
111 static noinline int decide_frameskip_allow(uint32_t cmd_e3)
113 // no frameskip if it decides to draw to display area,
114 // but not for interlace since it'll most likely always do that
115 uint32_t x = cmd_e3 & 0x3ff;
116 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
117 gpu.frameskip.allow = (gpu.status & PSX_GPU_STATUS_INTERLACE) ||
118 (uint32_t)(x - gpu.screen.x) >= (uint32_t)gpu.screen.w ||
119 (uint32_t)(y - gpu.screen.y) >= (uint32_t)gpu.screen.h;
120 return gpu.frameskip.allow;
123 static noinline void get_gpu_info(uint32_t data)
125 switch (data & 0x0f) {
129 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
132 gpu.gp0 = gpu.ex_regs[5] & 0x3fffff;
143 // double, for overdraw guard
144 #define VRAM_SIZE (1024 * 512 * 2 * 2)
146 static int map_vram(void)
148 gpu.vram = gpu.mmap(VRAM_SIZE);
149 if (gpu.vram != NULL) {
150 gpu.vram += 4096 / 2;
154 fprintf(stderr, "could not map vram, expect crashes\n");
163 ret |= renderer_init();
165 gpu.state.frame_count = &gpu.zero;
166 gpu.state.hcnt = &gpu.zero;
167 gpu.frameskip.active = 0;
171 if (gpu.mmap != NULL) {
178 long GPUshutdown(void)
184 if (gpu.vram != NULL) {
185 gpu.vram -= 4096 / 2;
186 gpu.munmap(gpu.vram, VRAM_SIZE);
193 void GPUwriteStatus(uint32_t data)
195 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
196 static const short vres[4] = { 240, 480, 256, 480 };
197 uint32_t cmd = data >> 24;
199 if (cmd < ARRAY_SIZE(gpu.regs)) {
200 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
202 gpu.regs[cmd] = data;
205 gpu.state.fb_dirty = 1;
216 gpu.status |= PSX_GPU_STATUS_BLANKING;
218 gpu.status &= ~PSX_GPU_STATUS_BLANKING;
221 gpu.status &= ~PSX_GPU_STATUS_DMA_MASK;
222 gpu.status |= PSX_GPU_STATUS_DMA(data & 3);
225 gpu.screen.x = data & 0x3ff;
226 gpu.screen.y = (data >> 10) & 0x1ff;
227 if (gpu.frameskip.set) {
228 decide_frameskip_allow(gpu.ex_regs[3]);
229 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
231 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
236 gpu.screen.x1 = data & 0xfff;
237 gpu.screen.x2 = (data >> 12) & 0xfff;
241 gpu.screen.y1 = data & 0x3ff;
242 gpu.screen.y2 = (data >> 10) & 0x3ff;
246 gpu.status = (gpu.status & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
247 gpu.screen.hres = hres[(gpu.status >> 16) & 7];
248 gpu.screen.vres = vres[(gpu.status >> 19) & 3];
251 renderer_notify_res_change();
254 if ((cmd & 0xf0) == 0x10)
259 #ifdef GPUwriteStatus_ext
260 GPUwriteStatus_ext(data);
264 const unsigned char cmd_lengths[256] =
266 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
267 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
268 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
269 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
270 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
271 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
272 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
273 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
274 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
275 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
276 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
277 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
278 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
279 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
280 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
281 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
284 #define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
286 static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
288 uint16_t *vram = VRAM_MEM_XY(x, y);
290 memcpy(mem, vram, l * 2);
292 memcpy(vram, mem, l * 2);
295 static int do_vram_io(uint32_t *data, int count, int is_read)
297 int count_initial = count;
298 uint16_t *sdata = (uint16_t *)data;
299 int x = gpu.dma.x, y = gpu.dma.y;
300 int w = gpu.dma.w, h = gpu.dma.h;
301 int o = gpu.dma.offset;
303 count *= 2; // operate in 16bpp pixels
305 if (gpu.dma.offset) {
306 l = w - gpu.dma.offset;
310 do_vram_line(x + o, y, sdata, l, is_read);
323 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
325 do_vram_line(x, y, sdata, w, is_read);
331 do_vram_line(x, y, sdata, count, is_read);
337 finish_vram_transfer(is_read);
342 return count_initial - count / 2;
345 static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
348 log_anomaly("start_vram_transfer while old unfinished\n");
350 gpu.dma.x = pos_word & 0x3ff;
351 gpu.dma.y = (pos_word >> 16) & 0x1ff;
352 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
353 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
355 gpu.dma.is_read = is_read;
356 gpu.dma_start = gpu.dma;
358 renderer_flush_queues();
360 gpu.status |= PSX_GPU_STATUS_IMG;
361 // XXX: wrong for width 1
362 gpu.gp0 = LE32TOH(*(uint32_t *) VRAM_MEM_XY(gpu.dma.x, gpu.dma.y));
363 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
366 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
367 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
370 static void finish_vram_transfer(int is_read)
373 gpu.status &= ~PSX_GPU_STATUS_IMG;
375 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
376 gpu.dma_start.w, gpu.dma_start.h);
379 static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
381 int cmd = 0, pos = 0, len, dummy, v;
384 gpu.frameskip.pending_fill[0] = 0;
386 while (pos < count && skip) {
387 uint32_t *list = data + pos;
388 cmd = LE32TOH(list[0]) >> 24;
389 len = 1 + cmd_lengths[cmd];
393 if ((LE32TOH(list[2]) & 0x3ff) > gpu.screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu.screen.h)
394 // clearing something large, don't skip
395 do_cmd_list(list, 3, &dummy);
397 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
403 gpu.ex_regs[1] &= ~0x1ff;
404 gpu.ex_regs[1] |= LE32TOH(list[4 + ((cmd >> 4) & 1)]) & 0x1ff;
407 for (v = 3; pos + v < count; v++)
409 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
415 for (v = 4; pos + v < count; v += 2)
417 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
424 skip = decide_frameskip_allow(LE32TOH(list[0]));
425 if ((cmd & 0xf8) == 0xe0)
426 gpu.ex_regs[cmd & 7] = LE32TOH(list[0]);
430 if (pos + len > count) {
432 break; // incomplete cmd
434 if (0xa0 <= cmd && cmd <= 0xdf)
440 renderer_sync_ecmds(gpu.ex_regs);
445 static noinline int do_cmd_buffer(uint32_t *data, int count)
448 uint32_t old_e3 = gpu.ex_regs[3];
452 for (pos = 0; pos < count; )
454 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
456 pos += do_vram_io(data + pos, count - pos, 0);
461 cmd = LE32TOH(data[pos]) >> 24;
462 if (0xa0 <= cmd && cmd <= 0xdf) {
463 if (unlikely((pos+2) >= count)) {
464 // incomplete vram write/read cmd, can't consume yet
469 // consume vram write/read cmd
470 start_vram_transfer(LE32TOH(data[pos + 1]), LE32TOH(data[pos + 2]), (cmd & 0xe0) == 0xc0);
475 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
476 if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0))
477 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
479 pos += do_cmd_list(data + pos, count - pos, &cmd);
488 gpu.status &= ~0x1fff;
489 gpu.status |= gpu.ex_regs[1] & 0x7ff;
490 gpu.status |= (gpu.ex_regs[6] & 3) << 11;
492 gpu.state.fb_dirty |= vram_dirty;
494 if (old_e3 != gpu.ex_regs[3])
495 decide_frameskip_allow(gpu.ex_regs[3]);
500 static void flush_cmd_buffer(void)
502 int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
504 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
508 void GPUwriteDataMem(uint32_t *mem, int count)
512 log_io("gpu_dma_write %p %d\n", mem, count);
514 if (unlikely(gpu.cmd_len > 0))
517 left = do_cmd_buffer(mem, count);
519 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
522 void GPUwriteData(uint32_t data)
524 log_io("gpu_write %08x\n", data);
525 gpu.cmd_buffer[gpu.cmd_len++] = HTOLE32(data);
526 if (gpu.cmd_len >= CMD_BUFFER_LEN)
530 long GPUdmaChain(uint32_t *rambase, uint32_t start_addr, uint32_t *progress_addr)
532 uint32_t addr, *list, ld_addr = 0;
533 int len, left, count;
536 preload(rambase + (start_addr & 0x1fffff) / 4);
538 if (unlikely(gpu.cmd_len > 0))
541 log_io("gpu_dma_chain\n");
542 addr = start_addr & 0xffffff;
543 for (count = 0; (addr & 0x800000) == 0; count++)
545 list = rambase + (addr & 0x1fffff) / 4;
546 len = LE32TOH(list[0]) >> 24;
547 addr = LE32TOH(list[0]) & 0xffffff;
548 preload(rambase + (addr & 0x1fffff) / 4);
552 cpu_cycles += 5 + len;
554 log_io(".chain %08x #%d\n", (list - rambase) * 4, len);
557 left = do_cmd_buffer(list + 1, len);
559 log_anomaly("GPUdmaChain: discarded %d/%d words\n", left, len);
563 *progress_addr = addr;
566 #define LD_THRESHOLD (8*1024)
567 if (count >= LD_THRESHOLD) {
568 if (count == LD_THRESHOLD) {
573 // loop detection marker
574 // (bit23 set causes DMA error on real machine, so
575 // unlikely to be ever set by the game)
576 list[0] |= HTOLE32(0x800000);
581 // remove loop detection markers
582 count -= LD_THRESHOLD + 2;
583 addr = ld_addr & 0x1fffff;
584 while (count-- > 0) {
585 list = rambase + addr / 4;
586 addr = LE32TOH(list[0]) & 0x1fffff;
587 list[0] &= HTOLE32(~0x800000);
591 gpu.state.last_list.frame = *gpu.state.frame_count;
592 gpu.state.last_list.hcnt = *gpu.state.hcnt;
593 gpu.state.last_list.cycles = cpu_cycles;
594 gpu.state.last_list.addr = start_addr;
599 void GPUreadDataMem(uint32_t *mem, int count)
601 log_io("gpu_dma_read %p %d\n", mem, count);
603 if (unlikely(gpu.cmd_len > 0))
607 do_vram_io(mem, count, 1);
610 uint32_t GPUreadData(void)
614 if (unlikely(gpu.cmd_len > 0))
620 do_vram_io(&ret, 1, 1);
624 log_io("gpu_read %08x\n", ret);
628 uint32_t GPUreadStatus(void)
632 if (unlikely(gpu.cmd_len > 0))
636 log_io("gpu_read_status %08x\n", ret);
642 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
643 uint32_t ulStatus; // current gpu status
644 uint32_t ulControl[256]; // latest control register values
645 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
648 long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
656 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
657 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
658 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
659 freeze->ulStatus = gpu.status;
662 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
663 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
664 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
665 gpu.status = freeze->ulStatus;
667 for (i = 8; i > 0; i--) {
668 gpu.regs[i] ^= 1; // avoid reg change detection
669 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
671 renderer_sync_ecmds(gpu.ex_regs);
672 renderer_update_caches(0, 0, 1024, 512);
679 void GPUupdateLace(void)
683 renderer_flush_queues();
685 if (gpu.status & PSX_GPU_STATUS_BLANKING) {
686 if (!gpu.state.blanked) {
688 gpu.state.blanked = 1;
689 gpu.state.fb_dirty = 1;
694 if (!gpu.state.fb_dirty)
697 if (gpu.frameskip.set) {
698 if (!gpu.frameskip.frame_ready) {
699 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
701 gpu.frameskip.active = 0;
703 gpu.frameskip.frame_ready = 0;
707 gpu.state.fb_dirty = 0;
708 gpu.state.blanked = 0;
711 void GPUvBlank(int is_vblank, int lcf)
713 int interlace = gpu.state.allow_interlace
714 && (gpu.status & PSX_GPU_STATUS_INTERLACE)
715 && (gpu.status & PSX_GPU_STATUS_DHEIGHT);
716 // interlace doesn't look nice on progressive displays,
717 // so we have this "auto" mode here for games that don't read vram
718 if (gpu.state.allow_interlace == 2
719 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
723 if (interlace || interlace != gpu.state.old_interlace) {
724 gpu.state.old_interlace = interlace;
728 renderer_flush_queues();
729 renderer_set_interlace(interlace, !lcf);
733 #include "../../frontend/plugin_lib.h"
735 void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
737 gpu.frameskip.set = cbs->frameskip;
738 gpu.frameskip.advice = &cbs->fskip_advice;
739 gpu.frameskip.active = 0;
740 gpu.frameskip.frame_ready = 1;
741 gpu.state.hcnt = cbs->gpu_hcnt;
742 gpu.state.frame_count = cbs->gpu_frame_count;
743 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
744 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
746 gpu.mmap = cbs->mmap;
747 gpu.munmap = cbs->munmap;
750 if (gpu.vram == NULL)
753 if (cbs->pl_vout_set_raw_vram)
754 cbs->pl_vout_set_raw_vram(gpu.vram);
755 renderer_set_config(cbs);
756 vout_set_config(cbs);
759 // vim:shiftwidth=2:expandtab