2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
4 * This work is licensed under the terms of any of these licenses
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
16 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
18 #define unlikely(x) __builtin_expect((x), 0)
19 #define preload __builtin_prefetch
20 #define noinline __attribute__((noinline))
27 #define gpu_log(fmt, ...) \
28 printf("%d:%03d: " fmt, *gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__)
30 //#define log_io gpu_log
32 //#define log_anomaly gpu_log
33 #define log_anomaly(...)
37 static noinline int do_cmd_buffer(uint32_t *data, int count);
38 static void finish_vram_transfer(int is_read);
40 static noinline void do_cmd_reset(void)
42 if (unlikely(gpu.cmd_len > 0))
43 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
46 if (unlikely(gpu.dma.h > 0))
47 finish_vram_transfer(gpu.dma_start.is_read);
51 static noinline void do_reset(void)
57 memset(gpu.regs, 0, sizeof(gpu.regs));
58 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
59 gpu.ex_regs[i] = (0xe0 + i) << 24;
60 gpu.status = 0x14802000;
63 gpu.screen.hres = gpu.screen.w = 256;
64 gpu.screen.vres = gpu.screen.h = 240;
65 gpu.screen.x = gpu.screen.y = 0;
68 static noinline void update_width(void)
70 static const short hres_all[8] = { 256, 368, 320, 368, 512, 368, 640, 368 };
71 static const uint8_t hdivs[8] = { 10, 7, 8, 7, 5, 7, 4, 7 };
72 uint8_t hdiv = hdivs[(gpu.status >> 16) & 7];
73 int hres = hres_all[(gpu.status >> 16) & 7];
74 int pal = gpu.status & PSX_GPU_STATUS_PAL;
75 int sw = gpu.screen.x2 - gpu.screen.x1;
78 /* nothing displayed? */;
80 int s = pal ? 656 : 608; // or 600? pal is just a guess
81 x = (gpu.screen.x1 - s) / hdiv;
82 x = (x + 1) & ~1; // blitter limitation
84 sw = (sw + 2) & ~3; // according to nocash
85 switch (gpu.state.screen_centering_type) {
89 x = gpu.state.screen_centering_x;
92 // correct if slightly miscentered
93 x_auto = (hres - sw) / 2 & ~3;
94 if ((uint32_t)x_auto <= 8u && abs(x) < 24)
99 // .x range check is done in vout_update()
101 // reduce the unpleasant right border that a few games have
102 if (gpu.state.screen_centering_type == 0
103 && x <= 4 && hres - (x + sw) >= 4)
107 gpu.screen.hres = hres;
108 gpu.state.dims_changed = 1;
109 //printf("xx %d %d -> %2d, %d / %d\n",
110 // gpu.screen.x1, gpu.screen.x2, x, sw, hres);
113 static noinline void update_height(void)
115 int pal = gpu.status & PSX_GPU_STATUS_PAL;
116 int dheight = gpu.status & PSX_GPU_STATUS_DHEIGHT;
117 int y = gpu.screen.y1 - (pal ? 39 : 16); // 39 for spyro
118 int sh = gpu.screen.y2 - gpu.screen.y1;
122 if (pal && (sh > 240 || gpu.screen.vres == 256))
125 y *= 2, sh *= 2, vres *= 2, center_tol *= 2;
127 /* nothing displayed? */;
129 switch (gpu.state.screen_centering_type) {
133 y = gpu.state.screen_centering_y;
136 // correct if slightly miscentered
137 if ((uint32_t)(vres - sh) <= 1 && abs(y) <= center_tol)
145 gpu.screen.vres = vres;
146 gpu.state.dims_changed = 1;
147 //printf("yy %d %d -> %d, %d / %d\n",
148 // gpu.screen.y1, gpu.screen.y2, y, sh, vres);
151 static noinline void decide_frameskip(void)
153 if (gpu.frameskip.active)
156 gpu.frameskip.cnt = 0;
157 gpu.frameskip.frame_ready = 1;
160 if (!gpu.frameskip.active && *gpu.frameskip.advice)
161 gpu.frameskip.active = 1;
162 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
163 gpu.frameskip.active = 1;
165 gpu.frameskip.active = 0;
167 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
169 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy);
170 gpu.frameskip.pending_fill[0] = 0;
174 static noinline int decide_frameskip_allow(uint32_t cmd_e3)
176 // no frameskip if it decides to draw to display area,
177 // but not for interlace since it'll most likely always do that
178 uint32_t x = cmd_e3 & 0x3ff;
179 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
180 gpu.frameskip.allow = (gpu.status & PSX_GPU_STATUS_INTERLACE) ||
181 (uint32_t)(x - gpu.screen.src_x) >= (uint32_t)gpu.screen.w ||
182 (uint32_t)(y - gpu.screen.src_y) >= (uint32_t)gpu.screen.h;
183 return gpu.frameskip.allow;
186 static noinline void get_gpu_info(uint32_t data)
188 switch (data & 0x0f) {
192 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
195 gpu.gp0 = gpu.ex_regs[5] & 0x3fffff;
206 // double, for overdraw guard
207 #define VRAM_SIZE (1024 * 512 * 2 * 2)
209 static int map_vram(void)
211 gpu.vram = gpu.mmap(VRAM_SIZE);
212 if (gpu.vram != NULL) {
213 gpu.vram += 4096 / 2;
217 fprintf(stderr, "could not map vram, expect crashes\n");
226 ret |= renderer_init();
228 gpu.state.frame_count = &gpu.zero;
229 gpu.state.hcnt = &gpu.zero;
230 gpu.frameskip.active = 0;
234 if (gpu.mmap != NULL) {
241 long GPUshutdown(void)
247 if (gpu.vram != NULL) {
248 gpu.vram -= 4096 / 2;
249 gpu.munmap(gpu.vram, VRAM_SIZE);
256 void GPUwriteStatus(uint32_t data)
258 uint32_t cmd = data >> 24;
260 if (cmd < ARRAY_SIZE(gpu.regs)) {
261 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
263 gpu.regs[cmd] = data;
266 gpu.state.fb_dirty = 1;
277 gpu.status |= PSX_GPU_STATUS_BLANKING;
278 gpu.state.dims_changed = 1; // for hud clearing
281 gpu.status &= ~PSX_GPU_STATUS_BLANKING;
284 gpu.status &= ~PSX_GPU_STATUS_DMA_MASK;
285 gpu.status |= PSX_GPU_STATUS_DMA(data & 3);
288 gpu.screen.src_x = data & 0x3ff;
289 gpu.screen.src_y = (data >> 10) & 0x1ff;
290 if (gpu.frameskip.set) {
291 decide_frameskip_allow(gpu.ex_regs[3]);
292 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
294 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
299 gpu.screen.x1 = data & 0xfff;
300 gpu.screen.x2 = (data >> 12) & 0xfff;
304 gpu.screen.y1 = data & 0x3ff;
305 gpu.screen.y2 = (data >> 10) & 0x3ff;
309 gpu.status = (gpu.status & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
312 renderer_notify_res_change();
315 if ((cmd & 0xf0) == 0x10)
320 #ifdef GPUwriteStatus_ext
321 GPUwriteStatus_ext(data);
325 const unsigned char cmd_lengths[256] =
327 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
328 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
329 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
330 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
331 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
332 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
333 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
334 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
335 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
336 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
337 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
338 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
339 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
340 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
341 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
342 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
345 #define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
347 static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
349 uint16_t *vram = VRAM_MEM_XY(x, y);
351 memcpy(mem, vram, l * 2);
353 memcpy(vram, mem, l * 2);
356 static int do_vram_io(uint32_t *data, int count, int is_read)
358 int count_initial = count;
359 uint16_t *sdata = (uint16_t *)data;
360 int x = gpu.dma.x, y = gpu.dma.y;
361 int w = gpu.dma.w, h = gpu.dma.h;
362 int o = gpu.dma.offset;
364 count *= 2; // operate in 16bpp pixels
366 if (gpu.dma.offset) {
367 l = w - gpu.dma.offset;
371 do_vram_line(x + o, y, sdata, l, is_read);
384 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
386 do_vram_line(x, y, sdata, w, is_read);
392 do_vram_line(x, y, sdata, count, is_read);
398 finish_vram_transfer(is_read);
403 return count_initial - count / 2;
406 static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
409 log_anomaly("start_vram_transfer while old unfinished\n");
411 gpu.dma.x = pos_word & 0x3ff;
412 gpu.dma.y = (pos_word >> 16) & 0x1ff;
413 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
414 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
416 gpu.dma.is_read = is_read;
417 gpu.dma_start = gpu.dma;
419 renderer_flush_queues();
421 gpu.status |= PSX_GPU_STATUS_IMG;
422 // XXX: wrong for width 1
423 gpu.gp0 = LE32TOH(*(uint32_t *) VRAM_MEM_XY(gpu.dma.x, gpu.dma.y));
424 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
427 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
428 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
431 static void finish_vram_transfer(int is_read)
434 gpu.status &= ~PSX_GPU_STATUS_IMG;
436 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
437 gpu.dma_start.w, gpu.dma_start.h);
440 static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
442 int cmd = 0, pos = 0, len, dummy, v;
445 gpu.frameskip.pending_fill[0] = 0;
447 while (pos < count && skip) {
448 uint32_t *list = data + pos;
449 cmd = LE32TOH(list[0]) >> 24;
450 len = 1 + cmd_lengths[cmd];
454 if ((LE32TOH(list[2]) & 0x3ff) > gpu.screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu.screen.h)
455 // clearing something large, don't skip
456 do_cmd_list(list, 3, &dummy);
458 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
464 gpu.ex_regs[1] &= ~0x1ff;
465 gpu.ex_regs[1] |= LE32TOH(list[4 + ((cmd >> 4) & 1)]) & 0x1ff;
468 for (v = 3; pos + v < count; v++)
470 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
476 for (v = 4; pos + v < count; v += 2)
478 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
485 skip = decide_frameskip_allow(LE32TOH(list[0]));
486 if ((cmd & 0xf8) == 0xe0)
487 gpu.ex_regs[cmd & 7] = LE32TOH(list[0]);
491 if (pos + len > count) {
493 break; // incomplete cmd
495 if (0xa0 <= cmd && cmd <= 0xdf)
501 renderer_sync_ecmds(gpu.ex_regs);
506 static noinline int do_cmd_buffer(uint32_t *data, int count)
509 uint32_t old_e3 = gpu.ex_regs[3];
513 for (pos = 0; pos < count; )
515 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
517 pos += do_vram_io(data + pos, count - pos, 0);
522 cmd = LE32TOH(data[pos]) >> 24;
523 if (0xa0 <= cmd && cmd <= 0xdf) {
524 if (unlikely((pos+2) >= count)) {
525 // incomplete vram write/read cmd, can't consume yet
530 // consume vram write/read cmd
531 start_vram_transfer(LE32TOH(data[pos + 1]), LE32TOH(data[pos + 2]), (cmd & 0xe0) == 0xc0);
536 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
537 if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0))
538 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
540 pos += do_cmd_list(data + pos, count - pos, &cmd);
549 gpu.status &= ~0x1fff;
550 gpu.status |= gpu.ex_regs[1] & 0x7ff;
551 gpu.status |= (gpu.ex_regs[6] & 3) << 11;
553 gpu.state.fb_dirty |= vram_dirty;
555 if (old_e3 != gpu.ex_regs[3])
556 decide_frameskip_allow(gpu.ex_regs[3]);
561 static void flush_cmd_buffer(void)
563 int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
565 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
569 void GPUwriteDataMem(uint32_t *mem, int count)
573 log_io("gpu_dma_write %p %d\n", mem, count);
575 if (unlikely(gpu.cmd_len > 0))
578 left = do_cmd_buffer(mem, count);
580 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
583 void GPUwriteData(uint32_t data)
585 log_io("gpu_write %08x\n", data);
586 gpu.cmd_buffer[gpu.cmd_len++] = HTOLE32(data);
587 if (gpu.cmd_len >= CMD_BUFFER_LEN)
591 long GPUdmaChain(uint32_t *rambase, uint32_t start_addr, uint32_t *progress_addr)
593 uint32_t addr, *list, ld_addr = 0;
594 int len, left, count;
597 preload(rambase + (start_addr & 0x1fffff) / 4);
599 if (unlikely(gpu.cmd_len > 0))
602 log_io("gpu_dma_chain\n");
603 addr = start_addr & 0xffffff;
604 for (count = 0; (addr & 0x800000) == 0; count++)
606 list = rambase + (addr & 0x1fffff) / 4;
607 len = LE32TOH(list[0]) >> 24;
608 addr = LE32TOH(list[0]) & 0xffffff;
609 preload(rambase + (addr & 0x1fffff) / 4);
613 cpu_cycles += 5 + len;
615 log_io(".chain %08lx #%d+%d\n",
616 (long)(list - rambase) * 4, len, gpu.cmd_len);
617 if (unlikely(gpu.cmd_len > 0)) {
618 memcpy(gpu.cmd_buffer + gpu.cmd_len, list + 1, len * 4);
625 left = do_cmd_buffer(list + 1, len);
627 memcpy(gpu.cmd_buffer, list + 1 + len - left, left * 4);
629 log_anomaly("GPUdmaChain: %d/%d words left\n", left, len);
634 *progress_addr = addr;
637 #define LD_THRESHOLD (8*1024)
638 if (count >= LD_THRESHOLD) {
639 if (count == LD_THRESHOLD) {
644 // loop detection marker
645 // (bit23 set causes DMA error on real machine, so
646 // unlikely to be ever set by the game)
647 list[0] |= HTOLE32(0x800000);
652 // remove loop detection markers
653 count -= LD_THRESHOLD + 2;
654 addr = ld_addr & 0x1fffff;
655 while (count-- > 0) {
656 list = rambase + addr / 4;
657 addr = LE32TOH(list[0]) & 0x1fffff;
658 list[0] &= HTOLE32(~0x800000);
662 gpu.state.last_list.frame = *gpu.state.frame_count;
663 gpu.state.last_list.hcnt = *gpu.state.hcnt;
664 gpu.state.last_list.cycles = cpu_cycles;
665 gpu.state.last_list.addr = start_addr;
670 void GPUreadDataMem(uint32_t *mem, int count)
672 log_io("gpu_dma_read %p %d\n", mem, count);
674 if (unlikely(gpu.cmd_len > 0))
678 do_vram_io(mem, count, 1);
681 uint32_t GPUreadData(void)
685 if (unlikely(gpu.cmd_len > 0))
691 do_vram_io(&ret, 1, 1);
695 log_io("gpu_read %08x\n", ret);
699 uint32_t GPUreadStatus(void)
703 if (unlikely(gpu.cmd_len > 0))
707 log_io("gpu_read_status %08x\n", ret);
713 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
714 uint32_t ulStatus; // current gpu status
715 uint32_t ulControl[256]; // latest control register values
716 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
719 long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
727 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
728 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
729 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
730 freeze->ulStatus = gpu.status;
733 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
734 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
735 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
736 gpu.status = freeze->ulStatus;
738 for (i = 8; i > 0; i--) {
739 gpu.regs[i] ^= 1; // avoid reg change detection
740 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
742 renderer_sync_ecmds(gpu.ex_regs);
743 renderer_update_caches(0, 0, 1024, 512);
750 void GPUupdateLace(void)
754 renderer_flush_queues();
756 if (gpu.status & PSX_GPU_STATUS_BLANKING) {
757 if (!gpu.state.blanked) {
759 gpu.state.blanked = 1;
760 gpu.state.fb_dirty = 1;
765 if (!gpu.state.fb_dirty)
768 if (gpu.frameskip.set) {
769 if (!gpu.frameskip.frame_ready) {
770 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
772 gpu.frameskip.active = 0;
774 gpu.frameskip.frame_ready = 0;
778 gpu.state.fb_dirty = 0;
779 gpu.state.blanked = 0;
782 void GPUvBlank(int is_vblank, int lcf)
784 int interlace = gpu.state.allow_interlace
785 && (gpu.status & PSX_GPU_STATUS_INTERLACE)
786 && (gpu.status & PSX_GPU_STATUS_DHEIGHT);
787 // interlace doesn't look nice on progressive displays,
788 // so we have this "auto" mode here for games that don't read vram
789 if (gpu.state.allow_interlace == 2
790 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
794 if (interlace || interlace != gpu.state.old_interlace) {
795 gpu.state.old_interlace = interlace;
799 renderer_flush_queues();
800 renderer_set_interlace(interlace, !lcf);
804 #include "../../frontend/plugin_lib.h"
806 void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
808 gpu.frameskip.set = cbs->frameskip;
809 gpu.frameskip.advice = &cbs->fskip_advice;
810 gpu.frameskip.active = 0;
811 gpu.frameskip.frame_ready = 1;
812 gpu.state.hcnt = cbs->gpu_hcnt;
813 gpu.state.frame_count = cbs->gpu_frame_count;
814 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
815 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
816 if (gpu.state.screen_centering_type != cbs->screen_centering_type
817 || gpu.state.screen_centering_x != cbs->screen_centering_x
818 || gpu.state.screen_centering_y != cbs->screen_centering_y) {
819 gpu.state.screen_centering_type = cbs->screen_centering_type;
820 gpu.state.screen_centering_x = cbs->screen_centering_x;
821 gpu.state.screen_centering_y = cbs->screen_centering_y;
826 gpu.mmap = cbs->mmap;
827 gpu.munmap = cbs->munmap;
830 if (gpu.vram == NULL)
833 if (cbs->pl_vout_set_raw_vram)
834 cbs->pl_vout_set_raw_vram(gpu.vram);
835 renderer_set_config(cbs);
836 vout_set_config(cbs);
839 // vim:shiftwidth=2:expandtab