2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
4 * This work is licensed under the terms of any of these licenses
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
16 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
18 #define unlikely(x) __builtin_expect((x), 0)
19 #define preload __builtin_prefetch
20 #define noinline __attribute__((noinline))
27 //#define log_io gpu_log
32 static noinline int do_cmd_buffer(uint32_t *data, int count);
33 static void finish_vram_transfer(int is_read);
35 static noinline void do_cmd_reset(void)
37 if (unlikely(gpu.cmd_len > 0))
38 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
41 if (unlikely(gpu.dma.h > 0))
42 finish_vram_transfer(gpu.dma_start.is_read);
46 static noinline void do_reset(void)
52 memset(gpu.regs, 0, sizeof(gpu.regs));
53 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
54 gpu.ex_regs[i] = (0xe0 + i) << 24;
55 gpu.status = 0x14802000;
58 gpu.screen.hres = gpu.screen.w = 256;
59 gpu.screen.vres = gpu.screen.h = 240;
60 gpu.screen.x = gpu.screen.y = 0;
61 renderer_notify_res_change();
64 static noinline void update_width(void)
66 static const short hres_all[8] = { 256, 368, 320, 368, 512, 368, 640, 368 };
67 static const uint8_t hdivs[8] = { 10, 7, 8, 7, 5, 7, 4, 7 };
68 uint8_t hdiv = hdivs[(gpu.status >> 16) & 7];
69 int hres = hres_all[(gpu.status >> 16) & 7];
70 int pal = gpu.status & PSX_GPU_STATUS_PAL;
71 int sw = gpu.screen.x2 - gpu.screen.x1;
74 /* nothing displayed? */;
76 int s = pal ? 656 : 608; // or 600? pal is just a guess
77 x = (gpu.screen.x1 - s) / hdiv;
78 x = (x + 1) & ~1; // blitter limitation
80 sw = (sw + 2) & ~3; // according to nocash
81 switch (gpu.state.screen_centering_type) {
85 x = gpu.state.screen_centering_x;
88 // correct if slightly miscentered
89 x_auto = (hres - sw) / 2 & ~3;
90 if ((uint32_t)x_auto <= 8u && abs(x) < 24)
95 // .x range check is done in vout_update()
97 // reduce the unpleasant right border that a few games have
98 if (gpu.state.screen_centering_type == 0
99 && x <= 4 && hres - (x + sw) >= 4)
103 gpu.screen.hres = hres;
104 gpu.state.dims_changed = 1;
105 //printf("xx %d %d -> %2d, %d / %d\n",
106 // gpu.screen.x1, gpu.screen.x2, x, sw, hres);
109 static noinline void update_height(void)
111 int pal = gpu.status & PSX_GPU_STATUS_PAL;
112 int dheight = gpu.status & PSX_GPU_STATUS_DHEIGHT;
113 int y = gpu.screen.y1 - (pal ? 39 : 16); // 39 for spyro
114 int sh = gpu.screen.y2 - gpu.screen.y1;
118 if (pal && (sh > 240 || gpu.screen.vres == 256))
121 y *= 2, sh *= 2, vres *= 2, center_tol *= 2;
123 /* nothing displayed? */;
125 switch (gpu.state.screen_centering_type) {
129 y = gpu.state.screen_centering_y;
132 // correct if slightly miscentered
133 if ((uint32_t)(vres - sh) <= 1 && abs(y) <= center_tol)
141 gpu.screen.vres = vres;
142 gpu.state.dims_changed = 1;
143 //printf("yy %d %d -> %d, %d / %d\n",
144 // gpu.screen.y1, gpu.screen.y2, y, sh, vres);
147 static noinline void decide_frameskip(void)
149 if (gpu.frameskip.active)
152 gpu.frameskip.cnt = 0;
153 gpu.frameskip.frame_ready = 1;
156 if (!gpu.frameskip.active && *gpu.frameskip.advice)
157 gpu.frameskip.active = 1;
158 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
159 gpu.frameskip.active = 1;
161 gpu.frameskip.active = 0;
163 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
165 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy);
166 gpu.frameskip.pending_fill[0] = 0;
170 static noinline int decide_frameskip_allow(uint32_t cmd_e3)
172 // no frameskip if it decides to draw to display area,
173 // but not for interlace since it'll most likely always do that
174 uint32_t x = cmd_e3 & 0x3ff;
175 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
176 gpu.frameskip.allow = (gpu.status & PSX_GPU_STATUS_INTERLACE) ||
177 (uint32_t)(x - gpu.screen.src_x) >= (uint32_t)gpu.screen.w ||
178 (uint32_t)(y - gpu.screen.src_y) >= (uint32_t)gpu.screen.h;
179 return gpu.frameskip.allow;
182 static noinline void get_gpu_info(uint32_t data)
184 switch (data & 0x0f) {
188 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
191 gpu.gp0 = gpu.ex_regs[5] & 0x3fffff;
202 // double, for overdraw guard
203 #define VRAM_SIZE (1024 * 512 * 2 * 2)
205 static int map_vram(void)
207 gpu.vram = gpu.mmap(VRAM_SIZE);
208 if (gpu.vram != NULL) {
209 gpu.vram += 4096 / 2;
213 fprintf(stderr, "could not map vram, expect crashes\n");
222 ret |= renderer_init();
224 memset(&gpu.state, 0, sizeof(gpu.state));
225 memset(&gpu.frameskip, 0, sizeof(gpu.frameskip));
227 gpu.state.frame_count = &gpu.zero;
228 gpu.state.hcnt = &gpu.zero;
232 if (gpu.mmap != NULL) {
239 long GPUshutdown(void)
245 if (gpu.vram != NULL) {
246 gpu.vram -= 4096 / 2;
247 gpu.munmap(gpu.vram, VRAM_SIZE);
254 void GPUwriteStatus(uint32_t data)
256 uint32_t cmd = data >> 24;
258 if (cmd < ARRAY_SIZE(gpu.regs)) {
259 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
261 gpu.regs[cmd] = data;
264 gpu.state.fb_dirty = 1;
275 gpu.status |= PSX_GPU_STATUS_BLANKING;
276 gpu.state.dims_changed = 1; // for hud clearing
279 gpu.status &= ~PSX_GPU_STATUS_BLANKING;
282 gpu.status &= ~PSX_GPU_STATUS_DMA_MASK;
283 gpu.status |= PSX_GPU_STATUS_DMA(data & 3);
286 gpu.screen.src_x = data & 0x3ff;
287 gpu.screen.src_y = (data >> 10) & 0x1ff;
288 renderer_notify_scanout_x_change(gpu.screen.src_x, gpu.screen.hres);
289 if (gpu.frameskip.set) {
290 decide_frameskip_allow(gpu.ex_regs[3]);
291 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
293 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
298 gpu.screen.x1 = data & 0xfff;
299 gpu.screen.x2 = (data >> 12) & 0xfff;
303 gpu.screen.y1 = data & 0x3ff;
304 gpu.screen.y2 = (data >> 10) & 0x3ff;
308 gpu.status = (gpu.status & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
311 renderer_notify_res_change();
314 if ((cmd & 0xf0) == 0x10)
319 #ifdef GPUwriteStatus_ext
320 GPUwriteStatus_ext(data);
324 const unsigned char cmd_lengths[256] =
326 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
327 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
328 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
329 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
330 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
331 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
332 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
333 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
334 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
335 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
336 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
337 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
338 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
339 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
340 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
341 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
344 #define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
346 static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
348 uint16_t *vram = VRAM_MEM_XY(x, y);
350 memcpy(mem, vram, l * 2);
352 memcpy(vram, mem, l * 2);
355 static int do_vram_io(uint32_t *data, int count, int is_read)
357 int count_initial = count;
358 uint16_t *sdata = (uint16_t *)data;
359 int x = gpu.dma.x, y = gpu.dma.y;
360 int w = gpu.dma.w, h = gpu.dma.h;
361 int o = gpu.dma.offset;
363 count *= 2; // operate in 16bpp pixels
365 if (gpu.dma.offset) {
366 l = w - gpu.dma.offset;
370 do_vram_line(x + o, y, sdata, l, is_read);
383 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
385 do_vram_line(x, y, sdata, w, is_read);
391 do_vram_line(x, y, sdata, count, is_read);
397 finish_vram_transfer(is_read);
402 return count_initial - count / 2;
405 static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
408 log_anomaly("start_vram_transfer while old unfinished\n");
410 gpu.dma.x = pos_word & 0x3ff;
411 gpu.dma.y = (pos_word >> 16) & 0x1ff;
412 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
413 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
415 gpu.dma.is_read = is_read;
416 gpu.dma_start = gpu.dma;
418 renderer_flush_queues();
420 gpu.status |= PSX_GPU_STATUS_IMG;
421 // XXX: wrong for width 1
422 gpu.gp0 = LE32TOH(*(uint32_t *) VRAM_MEM_XY(gpu.dma.x, gpu.dma.y));
423 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
426 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
427 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
430 static void finish_vram_transfer(int is_read)
433 gpu.status &= ~PSX_GPU_STATUS_IMG;
435 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
436 gpu.dma_start.w, gpu.dma_start.h, 0);
439 static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
441 int cmd = 0, pos = 0, len, dummy, v;
444 gpu.frameskip.pending_fill[0] = 0;
446 while (pos < count && skip) {
447 uint32_t *list = data + pos;
448 cmd = LE32TOH(list[0]) >> 24;
449 len = 1 + cmd_lengths[cmd];
453 if ((LE32TOH(list[2]) & 0x3ff) > gpu.screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu.screen.h)
454 // clearing something large, don't skip
455 do_cmd_list(list, 3, &dummy);
457 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
463 gpu.ex_regs[1] &= ~0x1ff;
464 gpu.ex_regs[1] |= LE32TOH(list[4 + ((cmd >> 4) & 1)]) & 0x1ff;
467 for (v = 3; pos + v < count; v++)
469 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
475 for (v = 4; pos + v < count; v += 2)
477 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
484 skip = decide_frameskip_allow(LE32TOH(list[0]));
485 if ((cmd & 0xf8) == 0xe0)
486 gpu.ex_regs[cmd & 7] = LE32TOH(list[0]);
490 if (pos + len > count) {
492 break; // incomplete cmd
494 if (0xa0 <= cmd && cmd <= 0xdf)
500 renderer_sync_ecmds(gpu.ex_regs);
505 static noinline int do_cmd_buffer(uint32_t *data, int count)
508 uint32_t old_e3 = gpu.ex_regs[3];
512 for (pos = 0; pos < count; )
514 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
516 pos += do_vram_io(data + pos, count - pos, 0);
521 cmd = LE32TOH(data[pos]) >> 24;
522 if (0xa0 <= cmd && cmd <= 0xdf) {
523 if (unlikely((pos+2) >= count)) {
524 // incomplete vram write/read cmd, can't consume yet
529 // consume vram write/read cmd
530 start_vram_transfer(LE32TOH(data[pos + 1]), LE32TOH(data[pos + 2]), (cmd & 0xe0) == 0xc0);
535 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
536 if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0))
537 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
539 pos += do_cmd_list(data + pos, count - pos, &cmd);
548 gpu.status &= ~0x1fff;
549 gpu.status |= gpu.ex_regs[1] & 0x7ff;
550 gpu.status |= (gpu.ex_regs[6] & 3) << 11;
552 gpu.state.fb_dirty |= vram_dirty;
554 if (old_e3 != gpu.ex_regs[3])
555 decide_frameskip_allow(gpu.ex_regs[3]);
560 static void flush_cmd_buffer(void)
562 int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
564 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
568 void GPUwriteDataMem(uint32_t *mem, int count)
572 log_io("gpu_dma_write %p %d\n", mem, count);
574 if (unlikely(gpu.cmd_len > 0))
577 left = do_cmd_buffer(mem, count);
579 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
582 void GPUwriteData(uint32_t data)
584 log_io("gpu_write %08x\n", data);
585 gpu.cmd_buffer[gpu.cmd_len++] = HTOLE32(data);
586 if (gpu.cmd_len >= CMD_BUFFER_LEN)
590 long GPUdmaChain(uint32_t *rambase, uint32_t start_addr, uint32_t *progress_addr)
592 uint32_t addr, *list, ld_addr = 0;
593 int len, left, count;
596 preload(rambase + (start_addr & 0x1fffff) / 4);
598 if (unlikely(gpu.cmd_len > 0))
601 log_io("gpu_dma_chain\n");
602 addr = start_addr & 0xffffff;
603 for (count = 0; (addr & 0x800000) == 0; count++)
605 list = rambase + (addr & 0x1fffff) / 4;
606 len = LE32TOH(list[0]) >> 24;
607 addr = LE32TOH(list[0]) & 0xffffff;
608 preload(rambase + (addr & 0x1fffff) / 4);
612 cpu_cycles += 5 + len;
614 log_io(".chain %08lx #%d+%d\n",
615 (long)(list - rambase) * 4, len, gpu.cmd_len);
616 if (unlikely(gpu.cmd_len > 0)) {
617 memcpy(gpu.cmd_buffer + gpu.cmd_len, list + 1, len * 4);
624 left = do_cmd_buffer(list + 1, len);
626 memcpy(gpu.cmd_buffer, list + 1 + len - left, left * 4);
628 log_anomaly("GPUdmaChain: %d/%d words left\n", left, len);
633 *progress_addr = addr;
636 #define LD_THRESHOLD (8*1024)
637 if (count >= LD_THRESHOLD) {
638 if (count == LD_THRESHOLD) {
643 // loop detection marker
644 // (bit23 set causes DMA error on real machine, so
645 // unlikely to be ever set by the game)
646 list[0] |= HTOLE32(0x800000);
651 // remove loop detection markers
652 count -= LD_THRESHOLD + 2;
653 addr = ld_addr & 0x1fffff;
654 while (count-- > 0) {
655 list = rambase + addr / 4;
656 addr = LE32TOH(list[0]) & 0x1fffff;
657 list[0] &= HTOLE32(~0x800000);
661 gpu.state.last_list.frame = *gpu.state.frame_count;
662 gpu.state.last_list.hcnt = *gpu.state.hcnt;
663 gpu.state.last_list.cycles = cpu_cycles;
664 gpu.state.last_list.addr = start_addr;
669 void GPUreadDataMem(uint32_t *mem, int count)
671 log_io("gpu_dma_read %p %d\n", mem, count);
673 if (unlikely(gpu.cmd_len > 0))
677 do_vram_io(mem, count, 1);
680 uint32_t GPUreadData(void)
684 if (unlikely(gpu.cmd_len > 0))
690 do_vram_io(&ret, 1, 1);
694 log_io("gpu_read %08x\n", ret);
698 uint32_t GPUreadStatus(void)
702 if (unlikely(gpu.cmd_len > 0))
706 log_io("gpu_read_status %08x\n", ret);
712 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
713 uint32_t ulStatus; // current gpu status
714 uint32_t ulControl[256]; // latest control register values
715 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
718 long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
726 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
727 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
728 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
729 freeze->ulStatus = gpu.status;
732 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
733 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
734 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
735 gpu.status = freeze->ulStatus;
737 for (i = 8; i > 0; i--) {
738 gpu.regs[i] ^= 1; // avoid reg change detection
739 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
741 renderer_sync_ecmds(gpu.ex_regs);
742 renderer_update_caches(0, 0, 1024, 512, 1);
749 void GPUupdateLace(void)
753 renderer_flush_queues();
755 if (gpu.status & PSX_GPU_STATUS_BLANKING) {
756 if (!gpu.state.blanked) {
758 gpu.state.blanked = 1;
759 gpu.state.fb_dirty = 1;
764 if (!gpu.state.fb_dirty)
767 if (gpu.frameskip.set) {
768 if (!gpu.frameskip.frame_ready) {
769 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
771 gpu.frameskip.active = 0;
773 gpu.frameskip.frame_ready = 0;
777 if (gpu.state.enhancement_active && !gpu.state.enhancement_was_active)
778 renderer_update_caches(0, 0, 1024, 512, 1);
779 gpu.state.enhancement_was_active = gpu.state.enhancement_active;
780 gpu.state.fb_dirty = 0;
781 gpu.state.blanked = 0;
784 void GPUvBlank(int is_vblank, int lcf)
786 int interlace = gpu.state.allow_interlace
787 && (gpu.status & PSX_GPU_STATUS_INTERLACE)
788 && (gpu.status & PSX_GPU_STATUS_DHEIGHT);
789 // interlace doesn't look nice on progressive displays,
790 // so we have this "auto" mode here for games that don't read vram
791 if (gpu.state.allow_interlace == 2
792 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
796 if (interlace || interlace != gpu.state.old_interlace) {
797 gpu.state.old_interlace = interlace;
801 renderer_flush_queues();
802 renderer_set_interlace(interlace, !lcf);
806 #include "../../frontend/plugin_lib.h"
808 void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
810 gpu.frameskip.set = cbs->frameskip;
811 gpu.frameskip.advice = &cbs->fskip_advice;
812 gpu.frameskip.active = 0;
813 gpu.frameskip.frame_ready = 1;
814 gpu.state.hcnt = cbs->gpu_hcnt;
815 gpu.state.frame_count = cbs->gpu_frame_count;
816 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
817 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
818 if (gpu.state.screen_centering_type != cbs->screen_centering_type
819 || gpu.state.screen_centering_x != cbs->screen_centering_x
820 || gpu.state.screen_centering_y != cbs->screen_centering_y) {
821 gpu.state.screen_centering_type = cbs->screen_centering_type;
822 gpu.state.screen_centering_x = cbs->screen_centering_x;
823 gpu.state.screen_centering_y = cbs->screen_centering_y;
828 gpu.mmap = cbs->mmap;
829 gpu.munmap = cbs->munmap;
832 if (gpu.vram == NULL)
835 if (cbs->pl_vout_set_raw_vram)
836 cbs->pl_vout_set_raw_vram(gpu.vram);
837 renderer_set_config(cbs);
838 vout_set_config(cbs);
841 // vim:shiftwidth=2:expandtab