2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
4 * This work is licensed under the terms of any of these licenses
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
15 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
17 #define unlikely(x) __builtin_expect((x), 0)
18 #define preload __builtin_prefetch
19 #define noinline __attribute__((noinline))
26 #define gpu_log(fmt, ...) \
27 printf("%d:%03d: " fmt, *gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__)
29 //#define log_io gpu_log
31 //#define log_anomaly gpu_log
32 #define log_anomaly(...)
36 static noinline int do_cmd_buffer(uint32_t *data, int count);
37 static void finish_vram_transfer(int is_read);
39 static noinline void do_cmd_reset(void)
41 if (unlikely(gpu.cmd_len > 0))
42 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
45 if (unlikely(gpu.dma.h > 0))
46 finish_vram_transfer(gpu.dma_start.is_read);
50 static noinline void do_reset(void)
56 memset(gpu.regs, 0, sizeof(gpu.regs));
57 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
58 gpu.ex_regs[i] = (0xe0 + i) << 24;
59 gpu.status = 0x14802000;
62 gpu.screen.hres = gpu.screen.w = 256;
63 gpu.screen.vres = gpu.screen.h = 240;
66 static noinline void update_width(void)
68 int sw = gpu.screen.x2 - gpu.screen.x1;
69 if (sw <= 0 || sw >= 2560)
71 gpu.screen.w = gpu.screen.hres;
73 gpu.screen.w = sw * gpu.screen.hres / 2560;
76 static noinline void update_height(void)
78 // TODO: emulate this properly..
79 int sh = gpu.screen.y2 - gpu.screen.y1;
80 if (gpu.status & PSX_GPU_STATUS_DHEIGHT)
82 if (sh <= 0 || sh > gpu.screen.vres)
88 static noinline void decide_frameskip(void)
90 if (gpu.frameskip.active)
93 gpu.frameskip.cnt = 0;
94 gpu.frameskip.frame_ready = 1;
97 if (!gpu.frameskip.active && *gpu.frameskip.advice)
98 gpu.frameskip.active = 1;
99 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
100 gpu.frameskip.active = 1;
102 gpu.frameskip.active = 0;
104 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
106 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy);
107 gpu.frameskip.pending_fill[0] = 0;
111 static noinline int decide_frameskip_allow(uint32_t cmd_e3)
113 // no frameskip if it decides to draw to display area,
114 // but not for interlace since it'll most likely always do that
115 uint32_t x = cmd_e3 & 0x3ff;
116 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
117 gpu.frameskip.allow = (gpu.status & PSX_GPU_STATUS_INTERLACE) ||
118 (uint32_t)(x - gpu.screen.x) >= (uint32_t)gpu.screen.w ||
119 (uint32_t)(y - gpu.screen.y) >= (uint32_t)gpu.screen.h;
120 return gpu.frameskip.allow;
123 static noinline void get_gpu_info(uint32_t data)
125 switch (data & 0x0f) {
129 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
132 gpu.gp0 = gpu.ex_regs[5] & 0x3fffff;
143 // double, for overdraw guard
144 #define VRAM_SIZE (1024 * 512 * 2 * 2)
146 static int map_vram(void)
148 gpu.vram = gpu.mmap(VRAM_SIZE);
149 if (gpu.vram != NULL) {
150 gpu.vram += 4096 / 2;
154 fprintf(stderr, "could not map vram, expect crashes\n");
163 ret |= renderer_init();
165 gpu.state.frame_count = &gpu.zero;
166 gpu.state.hcnt = &gpu.zero;
167 gpu.frameskip.active = 0;
171 if (gpu.mmap != NULL) {
178 long GPUshutdown(void)
184 if (gpu.vram != NULL) {
185 gpu.vram -= 4096 / 2;
186 gpu.munmap(gpu.vram, VRAM_SIZE);
193 void GPUwriteStatus(uint32_t data)
195 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
196 static const short vres[4] = { 240, 480, 256, 480 };
197 uint32_t cmd = data >> 24;
199 if (cmd < ARRAY_SIZE(gpu.regs)) {
200 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
202 gpu.regs[cmd] = data;
205 gpu.state.fb_dirty = 1;
216 gpu.status |= PSX_GPU_STATUS_BLANKING;
218 gpu.status &= ~PSX_GPU_STATUS_BLANKING;
221 gpu.status &= ~PSX_GPU_STATUS_DMA_MASK;
222 gpu.status |= PSX_GPU_STATUS_DMA(data & 3);
225 gpu.screen.x = data & 0x3ff;
226 gpu.screen.y = (data >> 10) & 0x1ff;
227 if (gpu.frameskip.set) {
228 decide_frameskip_allow(gpu.ex_regs[3]);
229 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
231 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
236 gpu.screen.x1 = data & 0xfff;
237 gpu.screen.x2 = (data >> 12) & 0xfff;
241 gpu.screen.y1 = data & 0x3ff;
242 gpu.screen.y2 = (data >> 10) & 0x3ff;
246 gpu.status = (gpu.status & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
247 gpu.screen.hres = hres[(gpu.status >> 16) & 7];
248 gpu.screen.vres = vres[(gpu.status >> 19) & 3];
251 renderer_notify_res_change();
254 if ((cmd & 0xf0) == 0x10)
259 #ifdef GPUwriteStatus_ext
260 GPUwriteStatus_ext(data);
264 const unsigned char cmd_lengths[256] =
266 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
267 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
268 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
269 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
270 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
271 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
272 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
273 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
274 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
275 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
276 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
277 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
278 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
279 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
280 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
281 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
284 #define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
286 static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
288 uint16_t *vram = VRAM_MEM_XY(x, y);
290 memcpy(mem, vram, l * 2);
292 memcpy(vram, mem, l * 2);
295 static int do_vram_io(uint32_t *data, int count, int is_read)
297 int count_initial = count;
298 uint16_t *sdata = (uint16_t *)data;
299 int x = gpu.dma.x, y = gpu.dma.y;
300 int w = gpu.dma.w, h = gpu.dma.h;
301 int o = gpu.dma.offset;
303 count *= 2; // operate in 16bpp pixels
305 if (gpu.dma.offset) {
306 l = w - gpu.dma.offset;
310 do_vram_line(x + o, y, sdata, l, is_read);
323 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
325 do_vram_line(x, y, sdata, w, is_read);
331 do_vram_line(x, y, sdata, count, is_read);
337 finish_vram_transfer(is_read);
342 return count_initial - count / 2;
345 static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
348 log_anomaly("start_vram_transfer while old unfinished\n");
350 gpu.dma.x = pos_word & 0x3ff;
351 gpu.dma.y = (pos_word >> 16) & 0x1ff;
352 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
353 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
355 gpu.dma.is_read = is_read;
356 gpu.dma_start = gpu.dma;
358 renderer_flush_queues();
360 gpu.status |= PSX_GPU_STATUS_IMG;
361 // XXX: wrong for width 1
362 gpu.gp0 = LE32TOH(*(uint32_t *) VRAM_MEM_XY(gpu.dma.x, gpu.dma.y));
363 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
366 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
367 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
370 static void finish_vram_transfer(int is_read)
373 gpu.status &= ~PSX_GPU_STATUS_IMG;
375 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
376 gpu.dma_start.w, gpu.dma_start.h);
379 static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
381 int cmd = 0, pos = 0, len, dummy, v;
384 gpu.frameskip.pending_fill[0] = 0;
386 while (pos < count && skip) {
387 uint32_t *list = data + pos;
388 cmd = LE32TOH(list[0]) >> 24;
389 len = 1 + cmd_lengths[cmd];
393 if ((LE32TOH(list[2]) & 0x3ff) > gpu.screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu.screen.h)
394 // clearing something large, don't skip
395 do_cmd_list(list, 3, &dummy);
397 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
403 gpu.ex_regs[1] &= ~0x1ff;
404 gpu.ex_regs[1] |= LE32TOH(list[4 + ((cmd >> 4) & 1)]) & 0x1ff;
407 for (v = 3; pos + v < count; v++)
409 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
415 for (v = 4; pos + v < count; v += 2)
417 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
424 skip = decide_frameskip_allow(LE32TOH(list[0]));
425 if ((cmd & 0xf8) == 0xe0)
426 gpu.ex_regs[cmd & 7] = LE32TOH(list[0]);
430 if (pos + len > count) {
432 break; // incomplete cmd
434 if (0xa0 <= cmd && cmd <= 0xdf)
440 renderer_sync_ecmds(gpu.ex_regs);
445 static noinline int do_cmd_buffer(uint32_t *data, int count)
448 uint32_t old_e3 = gpu.ex_regs[3];
452 for (pos = 0; pos < count; )
454 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
456 pos += do_vram_io(data + pos, count - pos, 0);
461 cmd = LE32TOH(data[pos]) >> 24;
462 if (0xa0 <= cmd && cmd <= 0xdf) {
463 if (unlikely((pos+2) >= count)) {
464 // incomplete vram write/read cmd, can't consume yet
469 // consume vram write/read cmd
470 start_vram_transfer(LE32TOH(data[pos + 1]), LE32TOH(data[pos + 2]), (cmd & 0xe0) == 0xc0);
475 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
476 if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0))
477 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
479 pos += do_cmd_list(data + pos, count - pos, &cmd);
488 gpu.status &= ~0x1fff;
489 gpu.status |= gpu.ex_regs[1] & 0x7ff;
490 gpu.status |= (gpu.ex_regs[6] & 3) << 11;
492 gpu.state.fb_dirty |= vram_dirty;
494 if (old_e3 != gpu.ex_regs[3])
495 decide_frameskip_allow(gpu.ex_regs[3]);
500 static void flush_cmd_buffer(void)
502 int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
504 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
508 void GPUwriteDataMem(uint32_t *mem, int count)
512 log_io("gpu_dma_write %p %d\n", mem, count);
514 if (unlikely(gpu.cmd_len > 0))
517 left = do_cmd_buffer(mem, count);
519 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
522 void GPUwriteData(uint32_t data)
524 log_io("gpu_write %08x\n", data);
525 gpu.cmd_buffer[gpu.cmd_len++] = HTOLE32(data);
526 if (gpu.cmd_len >= CMD_BUFFER_LEN)
530 long GPUdmaChain(uint32_t *rambase, uint32_t start_addr, uint32_t *progress_addr)
532 uint32_t addr, *list, ld_addr = 0;
533 int len, left, count;
536 preload(rambase + (start_addr & 0x1fffff) / 4);
538 if (unlikely(gpu.cmd_len > 0))
541 log_io("gpu_dma_chain\n");
542 addr = start_addr & 0xffffff;
543 for (count = 0; (addr & 0x800000) == 0; count++)
545 list = rambase + (addr & 0x1fffff) / 4;
546 len = LE32TOH(list[0]) >> 24;
547 addr = LE32TOH(list[0]) & 0xffffff;
548 preload(rambase + (addr & 0x1fffff) / 4);
552 cpu_cycles += 5 + len;
554 log_io(".chain %08lx #%d+%d\n",
555 (long)(list - rambase) * 4, len, gpu.cmd_len);
556 if (unlikely(gpu.cmd_len > 0)) {
557 memcpy(gpu.cmd_buffer + gpu.cmd_len, list + 1, len * 4);
564 left = do_cmd_buffer(list + 1, len);
566 memcpy(gpu.cmd_buffer, list + 1 + len - left, left * 4);
568 log_anomaly("GPUdmaChain: %d/%d words left\n", left, len);
573 *progress_addr = addr;
576 #define LD_THRESHOLD (8*1024)
577 if (count >= LD_THRESHOLD) {
578 if (count == LD_THRESHOLD) {
583 // loop detection marker
584 // (bit23 set causes DMA error on real machine, so
585 // unlikely to be ever set by the game)
586 list[0] |= HTOLE32(0x800000);
591 // remove loop detection markers
592 count -= LD_THRESHOLD + 2;
593 addr = ld_addr & 0x1fffff;
594 while (count-- > 0) {
595 list = rambase + addr / 4;
596 addr = LE32TOH(list[0]) & 0x1fffff;
597 list[0] &= HTOLE32(~0x800000);
601 gpu.state.last_list.frame = *gpu.state.frame_count;
602 gpu.state.last_list.hcnt = *gpu.state.hcnt;
603 gpu.state.last_list.cycles = cpu_cycles;
604 gpu.state.last_list.addr = start_addr;
609 void GPUreadDataMem(uint32_t *mem, int count)
611 log_io("gpu_dma_read %p %d\n", mem, count);
613 if (unlikely(gpu.cmd_len > 0))
617 do_vram_io(mem, count, 1);
620 uint32_t GPUreadData(void)
624 if (unlikely(gpu.cmd_len > 0))
630 do_vram_io(&ret, 1, 1);
634 log_io("gpu_read %08x\n", ret);
638 uint32_t GPUreadStatus(void)
642 if (unlikely(gpu.cmd_len > 0))
646 log_io("gpu_read_status %08x\n", ret);
652 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
653 uint32_t ulStatus; // current gpu status
654 uint32_t ulControl[256]; // latest control register values
655 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
658 long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
666 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
667 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
668 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
669 freeze->ulStatus = gpu.status;
672 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
673 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
674 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
675 gpu.status = freeze->ulStatus;
677 for (i = 8; i > 0; i--) {
678 gpu.regs[i] ^= 1; // avoid reg change detection
679 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
681 renderer_sync_ecmds(gpu.ex_regs);
682 renderer_update_caches(0, 0, 1024, 512);
689 void GPUupdateLace(void)
693 renderer_flush_queues();
695 if (gpu.status & PSX_GPU_STATUS_BLANKING) {
696 if (!gpu.state.blanked) {
698 gpu.state.blanked = 1;
699 gpu.state.fb_dirty = 1;
704 if (!gpu.state.fb_dirty)
707 if (gpu.frameskip.set) {
708 if (!gpu.frameskip.frame_ready) {
709 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
711 gpu.frameskip.active = 0;
713 gpu.frameskip.frame_ready = 0;
717 gpu.state.fb_dirty = 0;
718 gpu.state.blanked = 0;
721 void GPUvBlank(int is_vblank, int lcf)
723 int interlace = gpu.state.allow_interlace
724 && (gpu.status & PSX_GPU_STATUS_INTERLACE)
725 && (gpu.status & PSX_GPU_STATUS_DHEIGHT);
726 // interlace doesn't look nice on progressive displays,
727 // so we have this "auto" mode here for games that don't read vram
728 if (gpu.state.allow_interlace == 2
729 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
733 if (interlace || interlace != gpu.state.old_interlace) {
734 gpu.state.old_interlace = interlace;
738 renderer_flush_queues();
739 renderer_set_interlace(interlace, !lcf);
743 #include "../../frontend/plugin_lib.h"
745 void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
747 gpu.frameskip.set = cbs->frameskip;
748 gpu.frameskip.advice = &cbs->fskip_advice;
749 gpu.frameskip.active = 0;
750 gpu.frameskip.frame_ready = 1;
751 gpu.state.hcnt = cbs->gpu_hcnt;
752 gpu.state.frame_count = cbs->gpu_frame_count;
753 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
754 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
756 gpu.mmap = cbs->mmap;
757 gpu.munmap = cbs->munmap;
760 if (gpu.vram == NULL)
763 if (cbs->pl_vout_set_raw_vram)
764 cbs->pl_vout_set_raw_vram(gpu.vram);
765 renderer_set_config(cbs);
766 vout_set_config(cbs);
769 // vim:shiftwidth=2:expandtab