2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
4 * This work is licensed under the terms of any of these licenses
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
16 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
18 #define unlikely(x) __builtin_expect((x), 0)
19 #define preload __builtin_prefetch
20 #define noinline __attribute__((noinline))
27 //#define log_io gpu_log
32 static noinline int do_cmd_buffer(uint32_t *data, int count);
33 static void finish_vram_transfer(int is_read);
35 static noinline void do_cmd_reset(void)
37 if (unlikely(gpu.cmd_len > 0))
38 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
41 if (unlikely(gpu.dma.h > 0))
42 finish_vram_transfer(gpu.dma_start.is_read);
46 static noinline void do_reset(void)
52 memset(gpu.regs, 0, sizeof(gpu.regs));
53 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
54 gpu.ex_regs[i] = (0xe0 + i) << 24;
55 gpu.status = 0x14802000;
58 gpu.screen.hres = gpu.screen.w = 256;
59 gpu.screen.vres = gpu.screen.h = 240;
60 gpu.screen.x = gpu.screen.y = 0;
61 renderer_notify_res_change();
64 static noinline void update_width(void)
66 static const short hres_all[8] = { 256, 368, 320, 368, 512, 368, 640, 368 };
67 static const uint8_t hdivs[8] = { 10, 7, 8, 7, 5, 7, 4, 7 };
68 uint8_t hdiv = hdivs[(gpu.status >> 16) & 7];
69 int hres = hres_all[(gpu.status >> 16) & 7];
70 int pal = gpu.status & PSX_GPU_STATUS_PAL;
71 int sw = gpu.screen.x2 - gpu.screen.x1;
74 /* nothing displayed? */;
76 int s = pal ? 656 : 608; // or 600? pal is just a guess
77 x = (gpu.screen.x1 - s) / hdiv;
78 x = (x + 1) & ~1; // blitter limitation
80 sw = (sw + 2) & ~3; // according to nocash
81 switch (gpu.state.screen_centering_type) {
85 x = gpu.state.screen_centering_x;
88 // correct if slightly miscentered
89 x_auto = (hres - sw) / 2 & ~3;
90 if ((uint32_t)x_auto <= 8u && abs(x) < 24)
95 // .x range check is done in vout_update()
97 // reduce the unpleasant right border that a few games have
98 if (gpu.state.screen_centering_type == 0
99 && x <= 4 && hres - (x + sw) >= 4)
103 gpu.screen.hres = hres;
104 gpu.state.dims_changed = 1;
105 //printf("xx %d %d -> %2d, %d / %d\n",
106 // gpu.screen.x1, gpu.screen.x2, x, sw, hres);
109 static noinline void update_height(void)
111 int pal = gpu.status & PSX_GPU_STATUS_PAL;
112 int dheight = gpu.status & PSX_GPU_STATUS_DHEIGHT;
113 int y = gpu.screen.y1 - (pal ? 39 : 16); // 39 for spyro
114 int sh = gpu.screen.y2 - gpu.screen.y1;
118 if (pal && (sh > 240 || gpu.screen.vres == 256))
121 y *= 2, sh *= 2, vres *= 2, center_tol *= 2;
123 /* nothing displayed? */;
125 switch (gpu.state.screen_centering_type) {
129 y = gpu.state.screen_centering_y;
132 // correct if slightly miscentered
133 if ((uint32_t)(vres - sh) <= 1 && abs(y) <= center_tol)
141 gpu.screen.vres = vres;
142 gpu.state.dims_changed = 1;
143 //printf("yy %d %d -> %d, %d / %d\n",
144 // gpu.screen.y1, gpu.screen.y2, y, sh, vres);
147 static noinline void decide_frameskip(void)
149 if (gpu.frameskip.active)
152 gpu.frameskip.cnt = 0;
153 gpu.frameskip.frame_ready = 1;
156 if (!gpu.frameskip.active && *gpu.frameskip.advice)
157 gpu.frameskip.active = 1;
158 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
159 gpu.frameskip.active = 1;
161 gpu.frameskip.active = 0;
163 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
165 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy);
166 gpu.frameskip.pending_fill[0] = 0;
170 static noinline int decide_frameskip_allow(uint32_t cmd_e3)
172 // no frameskip if it decides to draw to display area,
173 // but not for interlace since it'll most likely always do that
174 uint32_t x = cmd_e3 & 0x3ff;
175 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
176 gpu.frameskip.allow = (gpu.status & PSX_GPU_STATUS_INTERLACE) ||
177 (uint32_t)(x - gpu.screen.src_x) >= (uint32_t)gpu.screen.w ||
178 (uint32_t)(y - gpu.screen.src_y) >= (uint32_t)gpu.screen.h;
179 return gpu.frameskip.allow;
182 static noinline void get_gpu_info(uint32_t data)
184 switch (data & 0x0f) {
188 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
191 gpu.gp0 = gpu.ex_regs[5] & 0x3fffff;
202 // double, for overdraw guard
203 #define VRAM_SIZE (1024 * 512 * 2 * 2)
205 static int map_vram(void)
207 gpu.vram = gpu.mmap(VRAM_SIZE);
208 if (gpu.vram != NULL) {
209 gpu.vram += 4096 / 2;
213 fprintf(stderr, "could not map vram, expect crashes\n");
222 ret |= renderer_init();
224 memset(&gpu.state, 0, sizeof(gpu.state));
225 memset(&gpu.frameskip, 0, sizeof(gpu.frameskip));
227 gpu.state.frame_count = &gpu.zero;
228 gpu.state.hcnt = &gpu.zero;
232 if (gpu.mmap != NULL) {
239 long GPUshutdown(void)
245 if (gpu.vram != NULL) {
246 gpu.vram -= 4096 / 2;
247 gpu.munmap(gpu.vram, VRAM_SIZE);
254 void GPUwriteStatus(uint32_t data)
256 uint32_t cmd = data >> 24;
258 if (cmd < ARRAY_SIZE(gpu.regs)) {
259 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
261 gpu.regs[cmd] = data;
264 gpu.state.fb_dirty = 1;
275 gpu.status |= PSX_GPU_STATUS_BLANKING;
276 gpu.state.dims_changed = 1; // for hud clearing
279 gpu.status &= ~PSX_GPU_STATUS_BLANKING;
282 gpu.status &= ~PSX_GPU_STATUS_DMA_MASK;
283 gpu.status |= PSX_GPU_STATUS_DMA(data & 3);
286 gpu.screen.src_x = data & 0x3ff;
287 gpu.screen.src_y = (data >> 10) & 0x1ff;
288 renderer_notify_scanout_x_change(gpu.screen.src_x, gpu.screen.hres);
289 if (gpu.frameskip.set) {
290 decide_frameskip_allow(gpu.ex_regs[3]);
291 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
293 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
298 gpu.screen.x1 = data & 0xfff;
299 gpu.screen.x2 = (data >> 12) & 0xfff;
303 gpu.screen.y1 = data & 0x3ff;
304 gpu.screen.y2 = (data >> 10) & 0x3ff;
308 gpu.status = (gpu.status & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
311 renderer_notify_res_change();
314 if ((cmd & 0xf0) == 0x10)
319 #ifdef GPUwriteStatus_ext
320 GPUwriteStatus_ext(data);
324 const unsigned char cmd_lengths[256] =
326 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
327 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
328 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
329 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
330 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
331 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
332 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
333 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
334 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
335 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
336 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
337 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
338 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
339 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
340 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
341 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
344 #define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
346 static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
348 uint16_t *vram = VRAM_MEM_XY(x, y);
350 memcpy(mem, vram, l * 2);
352 memcpy(vram, mem, l * 2);
355 static int do_vram_io(uint32_t *data, int count, int is_read)
357 int count_initial = count;
358 uint16_t *sdata = (uint16_t *)data;
359 int x = gpu.dma.x, y = gpu.dma.y;
360 int w = gpu.dma.w, h = gpu.dma.h;
361 int o = gpu.dma.offset;
363 count *= 2; // operate in 16bpp pixels
365 if (gpu.dma.offset) {
366 l = w - gpu.dma.offset;
370 do_vram_line(x + o, y, sdata, l, is_read);
383 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
385 do_vram_line(x, y, sdata, w, is_read);
391 do_vram_line(x, y, sdata, count, is_read);
397 finish_vram_transfer(is_read);
402 return count_initial - count / 2;
405 static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
408 log_anomaly("start_vram_transfer while old unfinished\n");
410 gpu.dma.x = pos_word & 0x3ff;
411 gpu.dma.y = (pos_word >> 16) & 0x1ff;
412 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
413 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
415 gpu.dma.is_read = is_read;
416 gpu.dma_start = gpu.dma;
418 renderer_flush_queues();
420 gpu.status |= PSX_GPU_STATUS_IMG;
421 // XXX: wrong for width 1
422 gpu.gp0 = LE32TOH(*(uint32_t *) VRAM_MEM_XY(gpu.dma.x, gpu.dma.y));
423 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
426 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
427 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
430 static void finish_vram_transfer(int is_read)
433 gpu.status &= ~PSX_GPU_STATUS_IMG;
435 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
436 gpu.dma_start.w, gpu.dma_start.h, 0);
439 static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
441 int cmd = 0, pos = 0, len, dummy, v;
444 gpu.frameskip.pending_fill[0] = 0;
446 while (pos < count && skip) {
447 uint32_t *list = data + pos;
448 cmd = LE32TOH(list[0]) >> 24;
449 len = 1 + cmd_lengths[cmd];
453 if ((LE32TOH(list[2]) & 0x3ff) > gpu.screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu.screen.h)
454 // clearing something large, don't skip
455 do_cmd_list(list, 3, &dummy);
457 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
463 gpu.ex_regs[1] &= ~0x1ff;
464 gpu.ex_regs[1] |= LE32TOH(list[4 + ((cmd >> 4) & 1)]) & 0x1ff;
467 for (v = 3; pos + v < count; v++)
469 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
475 for (v = 4; pos + v < count; v += 2)
477 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
484 skip = decide_frameskip_allow(LE32TOH(list[0]));
485 if ((cmd & 0xf8) == 0xe0)
486 gpu.ex_regs[cmd & 7] = LE32TOH(list[0]);
490 if (pos + len > count) {
492 break; // incomplete cmd
494 if (0xa0 <= cmd && cmd <= 0xdf)
500 renderer_sync_ecmds(gpu.ex_regs);
505 static noinline int do_cmd_buffer(uint32_t *data, int count)
508 uint32_t old_e3 = gpu.ex_regs[3];
512 for (pos = 0; pos < count; )
514 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
516 pos += do_vram_io(data + pos, count - pos, 0);
521 cmd = LE32TOH(data[pos]) >> 24;
522 if (0xa0 <= cmd && cmd <= 0xdf) {
523 if (unlikely((pos+2) >= count)) {
524 // incomplete vram write/read cmd, can't consume yet
529 // consume vram write/read cmd
530 start_vram_transfer(LE32TOH(data[pos + 1]), LE32TOH(data[pos + 2]), (cmd & 0xe0) == 0xc0);
535 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
536 if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0))
537 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
539 pos += do_cmd_list(data + pos, count - pos, &cmd);
548 gpu.status &= ~0x1fff;
549 gpu.status |= gpu.ex_regs[1] & 0x7ff;
550 gpu.status |= (gpu.ex_regs[6] & 3) << 11;
552 gpu.state.fb_dirty |= vram_dirty;
554 if (old_e3 != gpu.ex_regs[3])
555 decide_frameskip_allow(gpu.ex_regs[3]);
560 static void flush_cmd_buffer(void)
562 int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
564 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
568 void GPUwriteDataMem(uint32_t *mem, int count)
572 log_io("gpu_dma_write %p %d\n", mem, count);
574 if (unlikely(gpu.cmd_len > 0))
577 left = do_cmd_buffer(mem, count);
579 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
582 void GPUwriteData(uint32_t data)
584 log_io("gpu_write %08x\n", data);
585 gpu.cmd_buffer[gpu.cmd_len++] = HTOLE32(data);
586 if (gpu.cmd_len >= CMD_BUFFER_LEN)
590 long GPUdmaChain(uint32_t *rambase, uint32_t start_addr, uint32_t *progress_addr)
592 uint32_t addr, *list, ld_addr = 0;
593 int len, left, count;
596 preload(rambase + (start_addr & 0x1fffff) / 4);
598 if (unlikely(gpu.cmd_len > 0))
601 log_io("gpu_dma_chain\n");
602 addr = start_addr & 0xffffff;
603 for (count = 0; (addr & 0x800000) == 0; count++)
605 list = rambase + (addr & 0x1fffff) / 4;
606 len = LE32TOH(list[0]) >> 24;
607 addr = LE32TOH(list[0]) & 0xffffff;
608 preload(rambase + (addr & 0x1fffff) / 4);
612 cpu_cycles += 5 + len;
614 log_io(".chain %08lx #%d+%d\n",
615 (long)(list - rambase) * 4, len, gpu.cmd_len);
616 if (unlikely(gpu.cmd_len > 0)) {
617 if (gpu.cmd_len + len > ARRAY_SIZE(gpu.cmd_buffer)) {
618 log_anomaly("cmd_buffer overflow, likely garbage commands\n");
621 memcpy(gpu.cmd_buffer + gpu.cmd_len, list + 1, len * 4);
628 left = do_cmd_buffer(list + 1, len);
630 memcpy(gpu.cmd_buffer, list + 1 + len - left, left * 4);
632 log_anomaly("GPUdmaChain: %d/%d words left\n", left, len);
637 *progress_addr = addr;
640 #define LD_THRESHOLD (8*1024)
641 if (count >= LD_THRESHOLD) {
642 if (count == LD_THRESHOLD) {
647 // loop detection marker
648 // (bit23 set causes DMA error on real machine, so
649 // unlikely to be ever set by the game)
650 list[0] |= HTOLE32(0x800000);
655 // remove loop detection markers
656 count -= LD_THRESHOLD + 2;
657 addr = ld_addr & 0x1fffff;
658 while (count-- > 0) {
659 list = rambase + addr / 4;
660 addr = LE32TOH(list[0]) & 0x1fffff;
661 list[0] &= HTOLE32(~0x800000);
665 gpu.state.last_list.frame = *gpu.state.frame_count;
666 gpu.state.last_list.hcnt = *gpu.state.hcnt;
667 gpu.state.last_list.cycles = cpu_cycles;
668 gpu.state.last_list.addr = start_addr;
673 void GPUreadDataMem(uint32_t *mem, int count)
675 log_io("gpu_dma_read %p %d\n", mem, count);
677 if (unlikely(gpu.cmd_len > 0))
681 do_vram_io(mem, count, 1);
684 uint32_t GPUreadData(void)
688 if (unlikely(gpu.cmd_len > 0))
694 do_vram_io(&ret, 1, 1);
698 log_io("gpu_read %08x\n", ret);
702 uint32_t GPUreadStatus(void)
706 if (unlikely(gpu.cmd_len > 0))
710 log_io("gpu_read_status %08x\n", ret);
716 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
717 uint32_t ulStatus; // current gpu status
718 uint32_t ulControl[256]; // latest control register values
719 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
722 long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
730 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
731 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
732 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
733 freeze->ulStatus = gpu.status;
736 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
737 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
738 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
739 gpu.status = freeze->ulStatus;
741 for (i = 8; i > 0; i--) {
742 gpu.regs[i] ^= 1; // avoid reg change detection
743 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
745 renderer_sync_ecmds(gpu.ex_regs);
746 renderer_update_caches(0, 0, 1024, 512, 1);
753 void GPUupdateLace(void)
757 renderer_flush_queues();
759 if (gpu.status & PSX_GPU_STATUS_BLANKING) {
760 if (!gpu.state.blanked) {
762 gpu.state.blanked = 1;
763 gpu.state.fb_dirty = 1;
768 if (!gpu.state.fb_dirty)
771 if (gpu.frameskip.set) {
772 if (!gpu.frameskip.frame_ready) {
773 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
775 gpu.frameskip.active = 0;
777 gpu.frameskip.frame_ready = 0;
781 if (gpu.state.enhancement_active && !gpu.state.enhancement_was_active)
782 renderer_update_caches(0, 0, 1024, 512, 1);
783 gpu.state.enhancement_was_active = gpu.state.enhancement_active;
784 gpu.state.fb_dirty = 0;
785 gpu.state.blanked = 0;
788 void GPUvBlank(int is_vblank, int lcf)
790 int interlace = gpu.state.allow_interlace
791 && (gpu.status & PSX_GPU_STATUS_INTERLACE)
792 && (gpu.status & PSX_GPU_STATUS_DHEIGHT);
793 // interlace doesn't look nice on progressive displays,
794 // so we have this "auto" mode here for games that don't read vram
795 if (gpu.state.allow_interlace == 2
796 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
800 if (interlace || interlace != gpu.state.old_interlace) {
801 gpu.state.old_interlace = interlace;
805 renderer_flush_queues();
806 renderer_set_interlace(interlace, !lcf);
810 #include "../../frontend/plugin_lib.h"
812 void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
814 gpu.frameskip.set = cbs->frameskip;
815 gpu.frameskip.advice = &cbs->fskip_advice;
816 gpu.frameskip.active = 0;
817 gpu.frameskip.frame_ready = 1;
818 gpu.state.hcnt = cbs->gpu_hcnt;
819 gpu.state.frame_count = cbs->gpu_frame_count;
820 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
821 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
822 if (gpu.state.screen_centering_type != cbs->screen_centering_type
823 || gpu.state.screen_centering_x != cbs->screen_centering_x
824 || gpu.state.screen_centering_y != cbs->screen_centering_y) {
825 gpu.state.screen_centering_type = cbs->screen_centering_type;
826 gpu.state.screen_centering_x = cbs->screen_centering_x;
827 gpu.state.screen_centering_y = cbs->screen_centering_y;
832 gpu.mmap = cbs->mmap;
833 gpu.munmap = cbs->munmap;
836 if (gpu.vram == NULL)
839 if (cbs->pl_vout_set_raw_vram)
840 cbs->pl_vout_set_raw_vram(gpu.vram);
841 renderer_set_config(cbs);
842 vout_set_config(cbs);
845 // vim:shiftwidth=2:expandtab