2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
4 * This work is licensed under the terms of any of these licenses
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
16 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
18 #define unlikely(x) __builtin_expect((x), 0)
19 #define preload __builtin_prefetch
20 #define noinline __attribute__((noinline))
27 //#define log_io gpu_log
32 static noinline int do_cmd_buffer(uint32_t *data, int count);
33 static void finish_vram_transfer(int is_read);
35 static noinline void do_cmd_reset(void)
37 if (unlikely(gpu.cmd_len > 0))
38 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
41 if (unlikely(gpu.dma.h > 0))
42 finish_vram_transfer(gpu.dma_start.is_read);
46 static noinline void do_reset(void)
52 memset(gpu.regs, 0, sizeof(gpu.regs));
53 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
54 gpu.ex_regs[i] = (0xe0 + i) << 24;
55 gpu.status = 0x14802000;
58 gpu.screen.hres = gpu.screen.w = 256;
59 gpu.screen.vres = gpu.screen.h = 240;
60 gpu.screen.x = gpu.screen.y = 0;
61 renderer_sync_ecmds(gpu.ex_regs);
62 renderer_notify_res_change();
65 static noinline void update_width(void)
67 static const short hres_all[8] = { 256, 368, 320, 368, 512, 368, 640, 368 };
68 static const uint8_t hdivs[8] = { 10, 7, 8, 7, 5, 7, 4, 7 };
69 uint8_t hdiv = hdivs[(gpu.status >> 16) & 7];
70 int hres = hres_all[(gpu.status >> 16) & 7];
71 int pal = gpu.status & PSX_GPU_STATUS_PAL;
72 int sw = gpu.screen.x2 - gpu.screen.x1;
75 /* nothing displayed? */;
77 int s = pal ? 656 : 608; // or 600? pal is just a guess
78 x = (gpu.screen.x1 - s) / hdiv;
79 x = (x + 1) & ~1; // blitter limitation
81 sw = (sw + 2) & ~3; // according to nocash
82 switch (gpu.state.screen_centering_type) {
86 x = gpu.state.screen_centering_x;
89 // correct if slightly miscentered
90 x_auto = (hres - sw) / 2 & ~3;
91 if ((uint32_t)x_auto <= 8u && abs(x) < 24)
96 // .x range check is done in vout_update()
98 // reduce the unpleasant right border that a few games have
99 if (gpu.state.screen_centering_type == 0
100 && x <= 4 && hres - (x + sw) >= 4)
104 gpu.screen.hres = hres;
105 gpu.state.dims_changed = 1;
106 //printf("xx %d %d -> %2d, %d / %d\n",
107 // gpu.screen.x1, gpu.screen.x2, x, sw, hres);
110 static noinline void update_height(void)
112 int pal = gpu.status & PSX_GPU_STATUS_PAL;
113 int dheight = gpu.status & PSX_GPU_STATUS_DHEIGHT;
114 int y = gpu.screen.y1 - (pal ? 39 : 16); // 39 for spyro
115 int sh = gpu.screen.y2 - gpu.screen.y1;
119 if (pal && (sh > 240 || gpu.screen.vres == 256))
122 y *= 2, sh *= 2, vres *= 2, center_tol *= 2;
124 /* nothing displayed? */;
126 switch (gpu.state.screen_centering_type) {
130 y = gpu.state.screen_centering_y;
133 // correct if slightly miscentered
134 if ((uint32_t)(vres - sh) <= 1 && abs(y) <= center_tol)
142 gpu.screen.vres = vres;
143 gpu.state.dims_changed = 1;
144 //printf("yy %d %d -> %d, %d / %d\n",
145 // gpu.screen.y1, gpu.screen.y2, y, sh, vres);
148 static noinline void decide_frameskip(void)
150 if (gpu.frameskip.active)
153 gpu.frameskip.cnt = 0;
154 gpu.frameskip.frame_ready = 1;
157 if (!gpu.frameskip.active && *gpu.frameskip.advice)
158 gpu.frameskip.active = 1;
159 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
160 gpu.frameskip.active = 1;
162 gpu.frameskip.active = 0;
164 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
166 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy);
167 gpu.frameskip.pending_fill[0] = 0;
171 static noinline int decide_frameskip_allow(uint32_t cmd_e3)
173 // no frameskip if it decides to draw to display area,
174 // but not for interlace since it'll most likely always do that
175 uint32_t x = cmd_e3 & 0x3ff;
176 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
177 gpu.frameskip.allow = (gpu.status & PSX_GPU_STATUS_INTERLACE) ||
178 (uint32_t)(x - gpu.screen.src_x) >= (uint32_t)gpu.screen.w ||
179 (uint32_t)(y - gpu.screen.src_y) >= (uint32_t)gpu.screen.h;
180 return gpu.frameskip.allow;
183 static void flush_cmd_buffer(void);
185 static noinline void get_gpu_info(uint32_t data)
187 if (unlikely(gpu.cmd_len > 0))
189 switch (data & 0x0f) {
193 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
196 gpu.gp0 = gpu.ex_regs[5] & 0x3fffff;
207 // double, for overdraw guard
208 #define VRAM_SIZE (1024 * 512 * 2 * 2)
210 static int map_vram(void)
212 gpu.vram = gpu.mmap(VRAM_SIZE);
213 if (gpu.vram != NULL) {
214 gpu.vram += 4096 / 2;
218 fprintf(stderr, "could not map vram, expect crashes\n");
227 ret |= renderer_init();
229 memset(&gpu.state, 0, sizeof(gpu.state));
230 memset(&gpu.frameskip, 0, sizeof(gpu.frameskip));
232 gpu.state.frame_count = &gpu.zero;
233 gpu.state.hcnt = &gpu.zero;
237 if (gpu.mmap != NULL) {
244 long GPUshutdown(void)
250 if (gpu.vram != NULL) {
251 gpu.vram -= 4096 / 2;
252 gpu.munmap(gpu.vram, VRAM_SIZE);
259 void GPUwriteStatus(uint32_t data)
261 uint32_t cmd = data >> 24;
263 if (cmd < ARRAY_SIZE(gpu.regs)) {
264 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
266 gpu.regs[cmd] = data;
269 gpu.state.fb_dirty = 1;
280 gpu.status |= PSX_GPU_STATUS_BLANKING;
281 gpu.state.dims_changed = 1; // for hud clearing
284 gpu.status &= ~PSX_GPU_STATUS_BLANKING;
287 gpu.status &= ~PSX_GPU_STATUS_DMA_MASK;
288 gpu.status |= PSX_GPU_STATUS_DMA(data & 3);
291 gpu.screen.src_x = data & 0x3ff;
292 gpu.screen.src_y = (data >> 10) & 0x1ff;
293 renderer_notify_scanout_x_change(gpu.screen.src_x, gpu.screen.hres);
294 if (gpu.frameskip.set) {
295 decide_frameskip_allow(gpu.ex_regs[3]);
296 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
298 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
303 gpu.screen.x1 = data & 0xfff;
304 gpu.screen.x2 = (data >> 12) & 0xfff;
308 gpu.screen.y1 = data & 0x3ff;
309 gpu.screen.y2 = (data >> 10) & 0x3ff;
313 gpu.status = (gpu.status & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
316 renderer_notify_res_change();
319 if ((cmd & 0xf0) == 0x10)
324 #ifdef GPUwriteStatus_ext
325 GPUwriteStatus_ext(data);
329 const unsigned char cmd_lengths[256] =
331 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
332 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
333 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
334 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
335 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
336 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
337 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
338 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
339 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
340 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
341 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
342 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
343 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
344 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
345 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
346 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
349 #define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
351 static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
353 uint16_t *vram = VRAM_MEM_XY(x, y);
355 memcpy(mem, vram, l * 2);
357 memcpy(vram, mem, l * 2);
360 static int do_vram_io(uint32_t *data, int count, int is_read)
362 int count_initial = count;
363 uint16_t *sdata = (uint16_t *)data;
364 int x = gpu.dma.x, y = gpu.dma.y;
365 int w = gpu.dma.w, h = gpu.dma.h;
366 int o = gpu.dma.offset;
368 count *= 2; // operate in 16bpp pixels
370 if (gpu.dma.offset) {
371 l = w - gpu.dma.offset;
375 do_vram_line(x + o, y, sdata, l, is_read);
388 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
390 do_vram_line(x, y, sdata, w, is_read);
396 do_vram_line(x, y, sdata, count, is_read);
402 finish_vram_transfer(is_read);
407 return count_initial - count / 2;
410 static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
413 log_anomaly("start_vram_transfer while old unfinished\n");
415 gpu.dma.x = pos_word & 0x3ff;
416 gpu.dma.y = (pos_word >> 16) & 0x1ff;
417 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
418 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
420 gpu.dma.is_read = is_read;
421 gpu.dma_start = gpu.dma;
423 renderer_flush_queues();
425 gpu.status |= PSX_GPU_STATUS_IMG;
426 // XXX: wrong for width 1
427 gpu.gp0 = LE32TOH(*(uint32_t *) VRAM_MEM_XY(gpu.dma.x, gpu.dma.y));
428 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
431 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
432 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
435 static void finish_vram_transfer(int is_read)
438 gpu.status &= ~PSX_GPU_STATUS_IMG;
440 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
441 gpu.dma_start.w, gpu.dma_start.h, 0);
444 static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
446 int cmd = 0, pos = 0, len, dummy, v;
449 gpu.frameskip.pending_fill[0] = 0;
451 while (pos < count && skip) {
452 uint32_t *list = data + pos;
453 cmd = LE32TOH(list[0]) >> 24;
454 len = 1 + cmd_lengths[cmd];
458 if ((LE32TOH(list[2]) & 0x3ff) > gpu.screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu.screen.h)
459 // clearing something large, don't skip
460 do_cmd_list(list, 3, &dummy);
462 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
468 gpu.ex_regs[1] &= ~0x1ff;
469 gpu.ex_regs[1] |= LE32TOH(list[4 + ((cmd >> 4) & 1)]) & 0x1ff;
472 for (v = 3; pos + v < count; v++)
474 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
480 for (v = 4; pos + v < count; v += 2)
482 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
489 skip = decide_frameskip_allow(LE32TOH(list[0]));
490 if ((cmd & 0xf8) == 0xe0)
491 gpu.ex_regs[cmd & 7] = LE32TOH(list[0]);
495 if (pos + len > count) {
497 break; // incomplete cmd
499 if (0xa0 <= cmd && cmd <= 0xdf)
505 renderer_sync_ecmds(gpu.ex_regs);
510 static noinline int do_cmd_buffer(uint32_t *data, int count)
513 uint32_t old_e3 = gpu.ex_regs[3];
517 for (pos = 0; pos < count; )
519 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
521 pos += do_vram_io(data + pos, count - pos, 0);
526 cmd = LE32TOH(data[pos]) >> 24;
527 if (0xa0 <= cmd && cmd <= 0xdf) {
528 if (unlikely((pos+2) >= count)) {
529 // incomplete vram write/read cmd, can't consume yet
534 // consume vram write/read cmd
535 start_vram_transfer(LE32TOH(data[pos + 1]), LE32TOH(data[pos + 2]), (cmd & 0xe0) == 0xc0);
540 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
541 if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0))
542 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
544 pos += do_cmd_list(data + pos, count - pos, &cmd);
553 gpu.status &= ~0x1fff;
554 gpu.status |= gpu.ex_regs[1] & 0x7ff;
555 gpu.status |= (gpu.ex_regs[6] & 3) << 11;
557 gpu.state.fb_dirty |= vram_dirty;
559 if (old_e3 != gpu.ex_regs[3])
560 decide_frameskip_allow(gpu.ex_regs[3]);
565 static void flush_cmd_buffer(void)
567 int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
569 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
573 void GPUwriteDataMem(uint32_t *mem, int count)
577 log_io("gpu_dma_write %p %d\n", mem, count);
579 if (unlikely(gpu.cmd_len > 0))
582 left = do_cmd_buffer(mem, count);
584 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
587 void GPUwriteData(uint32_t data)
589 log_io("gpu_write %08x\n", data);
590 gpu.cmd_buffer[gpu.cmd_len++] = HTOLE32(data);
591 if (gpu.cmd_len >= CMD_BUFFER_LEN)
595 long GPUdmaChain(uint32_t *rambase, uint32_t start_addr, uint32_t *progress_addr)
597 uint32_t addr, *list, ld_addr = 0;
598 int len, left, count;
601 preload(rambase + (start_addr & 0x1fffff) / 4);
603 if (unlikely(gpu.cmd_len > 0))
606 log_io("gpu_dma_chain\n");
607 addr = start_addr & 0xffffff;
608 for (count = 0; (addr & 0x800000) == 0; count++)
610 list = rambase + (addr & 0x1fffff) / 4;
611 len = LE32TOH(list[0]) >> 24;
612 addr = LE32TOH(list[0]) & 0xffffff;
613 preload(rambase + (addr & 0x1fffff) / 4);
617 cpu_cycles += 5 + len;
619 log_io(".chain %08lx #%d+%d\n",
620 (long)(list - rambase) * 4, len, gpu.cmd_len);
621 if (unlikely(gpu.cmd_len > 0)) {
622 if (gpu.cmd_len + len > ARRAY_SIZE(gpu.cmd_buffer)) {
623 log_anomaly("cmd_buffer overflow, likely garbage commands\n");
626 memcpy(gpu.cmd_buffer + gpu.cmd_len, list + 1, len * 4);
633 left = do_cmd_buffer(list + 1, len);
635 memcpy(gpu.cmd_buffer, list + 1 + len - left, left * 4);
637 log_anomaly("GPUdmaChain: %d/%d words left\n", left, len);
642 *progress_addr = addr;
645 #define LD_THRESHOLD (8*1024)
646 if (count >= LD_THRESHOLD) {
647 if (count == LD_THRESHOLD) {
652 // loop detection marker
653 // (bit23 set causes DMA error on real machine, so
654 // unlikely to be ever set by the game)
655 list[0] |= HTOLE32(0x800000);
660 // remove loop detection markers
661 count -= LD_THRESHOLD + 2;
662 addr = ld_addr & 0x1fffff;
663 while (count-- > 0) {
664 list = rambase + addr / 4;
665 addr = LE32TOH(list[0]) & 0x1fffff;
666 list[0] &= HTOLE32(~0x800000);
670 gpu.state.last_list.frame = *gpu.state.frame_count;
671 gpu.state.last_list.hcnt = *gpu.state.hcnt;
672 gpu.state.last_list.cycles = cpu_cycles;
673 gpu.state.last_list.addr = start_addr;
678 void GPUreadDataMem(uint32_t *mem, int count)
680 log_io("gpu_dma_read %p %d\n", mem, count);
682 if (unlikely(gpu.cmd_len > 0))
686 do_vram_io(mem, count, 1);
689 uint32_t GPUreadData(void)
693 if (unlikely(gpu.cmd_len > 0))
699 do_vram_io(&ret, 1, 1);
703 log_io("gpu_read %08x\n", ret);
707 uint32_t GPUreadStatus(void)
711 if (unlikely(gpu.cmd_len > 0))
715 log_io("gpu_read_status %08x\n", ret);
721 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
722 uint32_t ulStatus; // current gpu status
723 uint32_t ulControl[256]; // latest control register values
724 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
727 long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
735 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
736 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
737 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
738 freeze->ulStatus = gpu.status;
741 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
742 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
743 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
744 gpu.status = freeze->ulStatus;
746 for (i = 8; i > 0; i--) {
747 gpu.regs[i] ^= 1; // avoid reg change detection
748 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
750 renderer_sync_ecmds(gpu.ex_regs);
751 renderer_update_caches(0, 0, 1024, 512, 1);
758 void GPUupdateLace(void)
762 renderer_flush_queues();
764 #ifndef RAW_FB_DISPLAY
765 if (gpu.status & PSX_GPU_STATUS_BLANKING) {
766 if (!gpu.state.blanked) {
768 gpu.state.blanked = 1;
769 gpu.state.fb_dirty = 1;
774 if (!gpu.state.fb_dirty)
778 if (gpu.frameskip.set) {
779 if (!gpu.frameskip.frame_ready) {
780 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
782 gpu.frameskip.active = 0;
784 gpu.frameskip.frame_ready = 0;
788 if (gpu.state.enhancement_active && !gpu.state.enhancement_was_active)
789 renderer_update_caches(0, 0, 1024, 512, 1);
790 gpu.state.enhancement_was_active = gpu.state.enhancement_active;
791 gpu.state.fb_dirty = 0;
792 gpu.state.blanked = 0;
795 void GPUvBlank(int is_vblank, int lcf)
797 int interlace = gpu.state.allow_interlace
798 && (gpu.status & PSX_GPU_STATUS_INTERLACE)
799 && (gpu.status & PSX_GPU_STATUS_DHEIGHT);
800 // interlace doesn't look nice on progressive displays,
801 // so we have this "auto" mode here for games that don't read vram
802 if (gpu.state.allow_interlace == 2
803 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
807 if (interlace || interlace != gpu.state.old_interlace) {
808 gpu.state.old_interlace = interlace;
812 renderer_flush_queues();
813 renderer_set_interlace(interlace, !lcf);
817 #include "../../frontend/plugin_lib.h"
819 void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
821 gpu.frameskip.set = cbs->frameskip;
822 gpu.frameskip.advice = &cbs->fskip_advice;
823 gpu.frameskip.active = 0;
824 gpu.frameskip.frame_ready = 1;
825 gpu.state.hcnt = cbs->gpu_hcnt;
826 gpu.state.frame_count = cbs->gpu_frame_count;
827 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
828 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
829 if (gpu.state.screen_centering_type != cbs->screen_centering_type
830 || gpu.state.screen_centering_x != cbs->screen_centering_x
831 || gpu.state.screen_centering_y != cbs->screen_centering_y) {
832 gpu.state.screen_centering_type = cbs->screen_centering_type;
833 gpu.state.screen_centering_x = cbs->screen_centering_x;
834 gpu.state.screen_centering_y = cbs->screen_centering_y;
839 gpu.mmap = cbs->mmap;
840 gpu.munmap = cbs->munmap;
843 if (gpu.vram == NULL)
846 if (cbs->pl_vout_set_raw_vram)
847 cbs->pl_vout_set_raw_vram(gpu.vram);
848 renderer_set_config(cbs);
849 vout_set_config(cbs);
852 // vim:shiftwidth=2:expandtab