2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
4 * This work is licensed under the terms of any of these licenses
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
15 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
17 #define unlikely(x) __builtin_expect((x), 0)
18 #define preload __builtin_prefetch
19 #define noinline __attribute__((noinline))
26 #define gpu_log(fmt, ...) \
27 printf("%d:%03d: " fmt, *gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__)
29 //#define log_io gpu_log
31 //#define log_anomaly gpu_log
32 #define log_anomaly(...)
36 static noinline int do_cmd_buffer(uint32_t *data, int count);
37 static void finish_vram_transfer(int is_read);
39 static noinline void do_cmd_reset(void)
41 if (unlikely(gpu.cmd_len > 0))
42 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
45 if (unlikely(gpu.dma.h > 0))
46 finish_vram_transfer(gpu.dma_start.is_read);
50 static noinline void do_reset(void)
56 memset(gpu.regs, 0, sizeof(gpu.regs));
57 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
58 gpu.ex_regs[i] = (0xe0 + i) << 24;
59 gpu.status = 0x14802000;
62 gpu.screen.hres = gpu.screen.w = 256;
63 gpu.screen.vres = gpu.screen.h = 240;
66 static noinline void update_width(void)
68 int sw = gpu.screen.x2 - gpu.screen.x1;
69 if (sw <= 0 || sw >= 2560)
71 gpu.screen.w = gpu.screen.hres;
73 gpu.screen.w = sw * gpu.screen.hres / 2560;
76 static noinline void update_height(void)
78 // TODO: emulate this properly..
79 int sh = gpu.screen.y2 - gpu.screen.y1;
80 if (gpu.status & PSX_GPU_STATUS_DHEIGHT)
82 if (sh <= 0 || sh > gpu.screen.vres)
88 static noinline void decide_frameskip(void)
90 if (gpu.frameskip.active)
93 gpu.frameskip.cnt = 0;
94 gpu.frameskip.frame_ready = 1;
97 if (!gpu.frameskip.active && *gpu.frameskip.advice)
98 gpu.frameskip.active = 1;
99 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
100 gpu.frameskip.active = 1;
102 gpu.frameskip.active = 0;
104 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
106 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy);
107 gpu.frameskip.pending_fill[0] = 0;
111 static noinline int decide_frameskip_allow(uint32_t cmd_e3)
113 // no frameskip if it decides to draw to display area,
114 // but not for interlace since it'll most likely always do that
115 uint32_t x = cmd_e3 & 0x3ff;
116 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
117 gpu.frameskip.allow = (gpu.status & PSX_GPU_STATUS_INTERLACE) ||
118 (uint32_t)(x - gpu.screen.x) >= (uint32_t)gpu.screen.w ||
119 (uint32_t)(y - gpu.screen.y) >= (uint32_t)gpu.screen.h;
120 return gpu.frameskip.allow;
123 static noinline void get_gpu_info(uint32_t data)
125 switch (data & 0x0f) {
130 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
133 gpu.gp0 = gpu.ex_regs[5] & 0xfffff;
144 // double, for overdraw guard
145 #define VRAM_SIZE (1024 * 512 * 2 * 2)
147 static int map_vram(void)
149 gpu.vram = gpu.mmap(VRAM_SIZE);
150 if (gpu.vram != NULL) {
151 gpu.vram += 4096 / 2;
155 fprintf(stderr, "could not map vram, expect crashes\n");
164 ret |= renderer_init();
166 gpu.state.frame_count = &gpu.zero;
167 gpu.state.hcnt = &gpu.zero;
168 gpu.frameskip.active = 0;
172 if (gpu.mmap != NULL) {
179 long GPUshutdown(void)
185 if (gpu.vram != NULL) {
186 gpu.vram -= 4096 / 2;
187 gpu.munmap(gpu.vram, VRAM_SIZE);
194 void GPUwriteStatus(uint32_t data)
196 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
197 static const short vres[4] = { 240, 480, 256, 480 };
198 uint32_t cmd = data >> 24;
200 if (cmd < ARRAY_SIZE(gpu.regs)) {
201 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
203 gpu.regs[cmd] = data;
206 gpu.state.fb_dirty = 1;
217 gpu.status |= PSX_GPU_STATUS_BLANKING;
219 gpu.status &= ~PSX_GPU_STATUS_BLANKING;
222 gpu.status &= ~PSX_GPU_STATUS_DMA_MASK;
223 gpu.status |= PSX_GPU_STATUS_DMA(data & 3);
226 gpu.screen.x = data & 0x3ff;
227 gpu.screen.y = (data >> 10) & 0x1ff;
228 if (gpu.frameskip.set) {
229 decide_frameskip_allow(gpu.ex_regs[3]);
230 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
232 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
237 gpu.screen.x1 = data & 0xfff;
238 gpu.screen.x2 = (data >> 12) & 0xfff;
242 gpu.screen.y1 = data & 0x3ff;
243 gpu.screen.y2 = (data >> 10) & 0x3ff;
247 gpu.status = (gpu.status & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
248 gpu.screen.hres = hres[(gpu.status >> 16) & 7];
249 gpu.screen.vres = vres[(gpu.status >> 19) & 3];
252 renderer_notify_res_change();
255 if ((cmd & 0xf0) == 0x10)
260 #ifdef GPUwriteStatus_ext
261 GPUwriteStatus_ext(data);
265 const unsigned char cmd_lengths[256] =
267 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
268 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
269 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
270 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
271 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
272 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
273 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
274 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
275 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
276 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
277 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
278 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
279 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
280 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
281 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
282 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
285 #define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
287 static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
289 uint16_t *vram = VRAM_MEM_XY(x, y);
291 memcpy(mem, vram, l * 2);
293 memcpy(vram, mem, l * 2);
296 static int do_vram_io(uint32_t *data, int count, int is_read)
298 int count_initial = count;
299 uint16_t *sdata = (uint16_t *)data;
300 int x = gpu.dma.x, y = gpu.dma.y;
301 int w = gpu.dma.w, h = gpu.dma.h;
302 int o = gpu.dma.offset;
304 count *= 2; // operate in 16bpp pixels
306 if (gpu.dma.offset) {
307 l = w - gpu.dma.offset;
311 do_vram_line(x + o, y, sdata, l, is_read);
324 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
326 do_vram_line(x, y, sdata, w, is_read);
332 do_vram_line(x, y, sdata, count, is_read);
338 finish_vram_transfer(is_read);
343 return count_initial - count / 2;
346 static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
349 log_anomaly("start_vram_transfer while old unfinished\n");
351 gpu.dma.x = pos_word & 0x3ff;
352 gpu.dma.y = (pos_word >> 16) & 0x1ff;
353 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
354 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
356 gpu.dma.is_read = is_read;
357 gpu.dma_start = gpu.dma;
359 renderer_flush_queues();
361 gpu.status |= PSX_GPU_STATUS_IMG;
362 // XXX: wrong for width 1
363 gpu.gp0 = LE32TOH(*(uint32_t *) VRAM_MEM_XY(gpu.dma.x, gpu.dma.y));
364 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
367 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
368 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
371 static void finish_vram_transfer(int is_read)
374 gpu.status &= ~PSX_GPU_STATUS_IMG;
376 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
377 gpu.dma_start.w, gpu.dma_start.h);
380 static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
382 int cmd = 0, pos = 0, len, dummy, v;
385 gpu.frameskip.pending_fill[0] = 0;
387 while (pos < count && skip) {
388 uint32_t *list = data + pos;
389 cmd = LE32TOH(list[0]) >> 24;
390 len = 1 + cmd_lengths[cmd];
394 if ((LE32TOH(list[2]) & 0x3ff) > gpu.screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu.screen.h)
395 // clearing something large, don't skip
396 do_cmd_list(list, 3, &dummy);
398 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
404 gpu.ex_regs[1] &= ~0x1ff;
405 gpu.ex_regs[1] |= LE32TOH(list[4 + ((cmd >> 4) & 1)]) & 0x1ff;
408 for (v = 3; pos + v < count; v++)
410 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
416 for (v = 4; pos + v < count; v += 2)
418 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
425 skip = decide_frameskip_allow(LE32TOH(list[0]));
426 if ((cmd & 0xf8) == 0xe0)
427 gpu.ex_regs[cmd & 7] = LE32TOH(list[0]);
431 if (pos + len > count) {
433 break; // incomplete cmd
435 if (0xa0 <= cmd && cmd <= 0xdf)
441 renderer_sync_ecmds(gpu.ex_regs);
446 static noinline int do_cmd_buffer(uint32_t *data, int count)
449 uint32_t old_e3 = gpu.ex_regs[3];
453 for (pos = 0; pos < count; )
455 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
457 pos += do_vram_io(data + pos, count - pos, 0);
462 cmd = LE32TOH(data[pos]) >> 24;
463 if (0xa0 <= cmd && cmd <= 0xdf) {
464 if (unlikely((pos+2) >= count)) {
465 // incomplete vram write/read cmd, can't consume yet
470 // consume vram write/read cmd
471 start_vram_transfer(LE32TOH(data[pos + 1]), LE32TOH(data[pos + 2]), (cmd & 0xe0) == 0xc0);
476 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
477 if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0))
478 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
480 pos += do_cmd_list(data + pos, count - pos, &cmd);
489 gpu.status &= ~0x1fff;
490 gpu.status |= gpu.ex_regs[1] & 0x7ff;
491 gpu.status |= (gpu.ex_regs[6] & 3) << 11;
493 gpu.state.fb_dirty |= vram_dirty;
495 if (old_e3 != gpu.ex_regs[3])
496 decide_frameskip_allow(gpu.ex_regs[3]);
501 static void flush_cmd_buffer(void)
503 int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
505 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
509 void GPUwriteDataMem(uint32_t *mem, int count)
513 log_io("gpu_dma_write %p %d\n", mem, count);
515 if (unlikely(gpu.cmd_len > 0))
518 left = do_cmd_buffer(mem, count);
520 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
523 void GPUwriteData(uint32_t data)
525 log_io("gpu_write %08x\n", data);
526 gpu.cmd_buffer[gpu.cmd_len++] = HTOLE32(data);
527 if (gpu.cmd_len >= CMD_BUFFER_LEN)
531 long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
533 uint32_t addr, *list, ld_addr = 0;
534 int len, left, count;
537 preload(rambase + (start_addr & 0x1fffff) / 4);
539 if (unlikely(gpu.cmd_len > 0))
542 log_io("gpu_dma_chain\n");
543 addr = start_addr & 0xffffff;
544 for (count = 0; (addr & 0x800000) == 0; count++)
546 list = rambase + (addr & 0x1fffff) / 4;
547 len = LE32TOH(list[0]) >> 24;
548 addr = LE32TOH(list[0]) & 0xffffff;
549 preload(rambase + (addr & 0x1fffff) / 4);
553 cpu_cycles += 5 + len;
555 log_io(".chain %08x #%d\n", (list - rambase) * 4, len);
558 left = do_cmd_buffer(list + 1, len);
560 log_anomaly("GPUdmaChain: discarded %d/%d words\n", left, len);
563 #define LD_THRESHOLD (8*1024)
564 if (count >= LD_THRESHOLD) {
565 if (count == LD_THRESHOLD) {
570 // loop detection marker
571 // (bit23 set causes DMA error on real machine, so
572 // unlikely to be ever set by the game)
573 list[0] |= HTOLE32(0x800000);
578 // remove loop detection markers
579 count -= LD_THRESHOLD + 2;
580 addr = ld_addr & 0x1fffff;
581 while (count-- > 0) {
582 list = rambase + addr / 4;
583 addr = LE32TOH(list[0]) & 0x1fffff;
584 list[0] &= HTOLE32(~0x800000);
588 gpu.state.last_list.frame = *gpu.state.frame_count;
589 gpu.state.last_list.hcnt = *gpu.state.hcnt;
590 gpu.state.last_list.cycles = cpu_cycles;
591 gpu.state.last_list.addr = start_addr;
596 void GPUreadDataMem(uint32_t *mem, int count)
598 log_io("gpu_dma_read %p %d\n", mem, count);
600 if (unlikely(gpu.cmd_len > 0))
604 do_vram_io(mem, count, 1);
607 uint32_t GPUreadData(void)
611 if (unlikely(gpu.cmd_len > 0))
617 do_vram_io(&ret, 1, 1);
621 log_io("gpu_read %08x\n", ret);
625 uint32_t GPUreadStatus(void)
629 if (unlikely(gpu.cmd_len > 0))
633 log_io("gpu_read_status %08x\n", ret);
639 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
640 uint32_t ulStatus; // current gpu status
641 uint32_t ulControl[256]; // latest control register values
642 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
645 long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
653 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
654 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
655 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
656 freeze->ulStatus = gpu.status;
659 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
660 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
661 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
662 gpu.status = freeze->ulStatus;
664 for (i = 8; i > 0; i--) {
665 gpu.regs[i] ^= 1; // avoid reg change detection
666 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
668 renderer_sync_ecmds(gpu.ex_regs);
669 renderer_update_caches(0, 0, 1024, 512);
676 void GPUupdateLace(void)
680 renderer_flush_queues();
682 if (gpu.status & PSX_GPU_STATUS_BLANKING) {
683 if (!gpu.state.blanked) {
685 gpu.state.blanked = 1;
686 gpu.state.fb_dirty = 1;
691 if (!gpu.state.fb_dirty)
694 if (gpu.frameskip.set) {
695 if (!gpu.frameskip.frame_ready) {
696 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
698 gpu.frameskip.active = 0;
700 gpu.frameskip.frame_ready = 0;
704 gpu.state.fb_dirty = 0;
705 gpu.state.blanked = 0;
708 void GPUvBlank(int is_vblank, int lcf)
710 int interlace = gpu.state.allow_interlace
711 && (gpu.status & PSX_GPU_STATUS_INTERLACE)
712 && (gpu.status & PSX_GPU_STATUS_DHEIGHT);
713 // interlace doesn't look nice on progressive displays,
714 // so we have this "auto" mode here for games that don't read vram
715 if (gpu.state.allow_interlace == 2
716 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
720 if (interlace || interlace != gpu.state.old_interlace) {
721 gpu.state.old_interlace = interlace;
725 renderer_flush_queues();
726 renderer_set_interlace(interlace, !lcf);
730 #include "../../frontend/plugin_lib.h"
732 void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
734 gpu.frameskip.set = cbs->frameskip;
735 gpu.frameskip.advice = &cbs->fskip_advice;
736 gpu.frameskip.active = 0;
737 gpu.frameskip.frame_ready = 1;
738 gpu.state.hcnt = cbs->gpu_hcnt;
739 gpu.state.frame_count = cbs->gpu_frame_count;
740 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
741 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
743 gpu.mmap = cbs->mmap;
744 gpu.munmap = cbs->munmap;
747 if (gpu.vram == NULL)
750 if (cbs->pl_vout_set_raw_vram)
751 cbs->pl_vout_set_raw_vram(gpu.vram);
752 renderer_set_config(cbs);
753 vout_set_config(cbs);
756 // vim:shiftwidth=2:expandtab