2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
4 * This work is licensed under the terms of any of these licenses
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
13 #include <stdlib.h> /* for calloc */
17 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
19 #define unlikely(x) __builtin_expect((x), 0)
20 #define preload __builtin_prefetch
21 #define noinline __attribute__((noinline))
28 #define gpu_log(fmt, ...) \
29 printf("%d:%03d: " fmt, *gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__)
31 //#define log_io gpu_log
33 //#define log_anomaly gpu_log
34 #define log_anomaly(...)
38 static noinline int do_cmd_buffer(uint32_t *data, int count);
39 static void finish_vram_transfer(int is_read);
41 static noinline void do_cmd_reset(void)
45 if (unlikely(gpu.cmd_len > 0))
46 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
49 if (unlikely(gpu.dma.h > 0))
50 finish_vram_transfer(gpu.dma_start.is_read);
54 static noinline void do_reset(void)
60 memset(gpu.regs, 0, sizeof(gpu.regs));
61 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
62 gpu.ex_regs[i] = (0xe0 + i) << 24;
63 gpu.status = 0x14802000;
66 gpu.screen.hres = gpu.screen.w = 256;
67 gpu.screen.vres = gpu.screen.h = 240;
70 static noinline void update_width(void)
72 int sw = gpu.screen.x2 - gpu.screen.x1;
73 if (sw <= 0 || sw >= 2560)
75 gpu.screen.w = gpu.screen.hres;
77 gpu.screen.w = sw * gpu.screen.hres / 2560;
80 static noinline void update_height(void)
82 // TODO: emulate this properly..
83 int sh = gpu.screen.y2 - gpu.screen.y1;
84 if (gpu.status & PSX_GPU_STATUS_DHEIGHT)
86 if (sh <= 0 || sh > gpu.screen.vres)
92 static noinline void decide_frameskip(void)
94 *gpu.frameskip.dirty = 1;
96 if (gpu.frameskip.active)
99 gpu.frameskip.cnt = 0;
100 gpu.frameskip.frame_ready = 1;
103 if (*gpu.frameskip.force)
104 gpu.frameskip.active = 1;
105 else if (!gpu.frameskip.active && *gpu.frameskip.advice)
106 gpu.frameskip.active = 1;
107 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
108 gpu.frameskip.active = 1;
110 gpu.frameskip.active = 0;
112 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
114 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy);
115 gpu.frameskip.pending_fill[0] = 0;
119 static noinline int decide_frameskip_allow(uint32_t cmd_e3)
121 // no frameskip if it decides to draw to display area,
122 // but not for interlace since it'll most likely always do that
123 uint32_t x = cmd_e3 & 0x3ff;
124 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
125 gpu.frameskip.allow = (gpu.status & PSX_GPU_STATUS_INTERLACE) ||
126 (uint32_t)(x - gpu.screen.x) >= (uint32_t)gpu.screen.w ||
127 (uint32_t)(y - gpu.screen.y) >= (uint32_t)gpu.screen.h;
128 return gpu.frameskip.allow;
131 static noinline void get_gpu_info(uint32_t data)
133 switch (data & 0x0f) {
137 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
140 gpu.gp0 = gpu.ex_regs[5] & 0x3fffff;
151 // double, for overdraw guard
152 #define VRAM_SIZE ((1024 * 512 * 2 * 2) + 4096)
154 // Minimum 16-byte VRAM alignment needed by gpu_unai's pixel-skipping
155 // renderer/downscaler it uses in high res modes:
157 // On GCW platform (MIPS), align to 8192 bytes (1 TLB entry) to reduce # of
158 // fills. (Will change this value if it ever gets large page support)
159 #define VRAM_ALIGN 8192
161 #define VRAM_ALIGN 16
164 // vram ptr received from mmap/malloc/alloc (will deallocate using this)
165 static uint16_t *vram_ptr_orig = NULL;
167 #ifdef GPULIB_USE_MMAP
168 static int map_vram(void)
170 gpu.vram = vram_ptr_orig = gpu.mmap(VRAM_SIZE + (VRAM_ALIGN-1));
171 if (gpu.vram != NULL) {
172 // 4kb guard in front
173 gpu.vram += (4096 / 2);
175 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
179 fprintf(stderr, "could not map vram, expect crashes\n");
184 static int map_vram(void)
186 gpu.vram = vram_ptr_orig = (uint16_t*)calloc(VRAM_SIZE + (VRAM_ALIGN-1), 1);
187 if (gpu.vram != NULL) {
188 // 4kb guard in front
189 gpu.vram += (4096 / 2);
191 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
194 fprintf(stderr, "could not allocate vram, expect crashes\n");
199 static int allocate_vram(void)
201 gpu.vram = vram_ptr_orig = (uint16_t*)calloc(VRAM_SIZE + (VRAM_ALIGN-1), 1);
202 if (gpu.vram != NULL) {
203 // 4kb guard in front
204 gpu.vram += (4096 / 2);
206 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
209 fprintf(stderr, "could not allocate vram, expect crashes\n");
217 #ifndef GPULIB_USE_MMAP
218 if (gpu.vram == NULL) {
219 if (allocate_vram() != 0) {
220 printf("ERROR: could not allocate VRAM, exiting..\n");
226 //extern uint32_t hSyncCount; // in psxcounters.cpp
227 //extern uint32_t frame_counter; // in psxcounters.cpp
228 //gpu.state.hcnt = &hSyncCount;
229 //gpu.state.frame_count = &frame_counter;
233 ret |= renderer_init();
235 gpu.state.frame_count = &gpu.zero;
236 gpu.state.hcnt = &gpu.zero;
237 gpu.frameskip.active = 0;
241 /*if (gpu.mmap != NULL) {
248 long GPUshutdown(void)
255 if (vram_ptr_orig != NULL) {
256 #ifdef GPULIB_USE_MMAP
257 gpu.munmap(vram_ptr_orig, VRAM_SIZE);
262 vram_ptr_orig = gpu.vram = NULL;
267 void GPUwriteStatus(uint32_t data)
269 //senquack TODO: Would it be wise to add cmd buffer flush here, since
270 // status settings can affect commands already in buffer?
272 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
273 static const short vres[4] = { 240, 480, 256, 480 };
274 uint32_t cmd = data >> 24;
276 if (cmd < ARRAY_SIZE(gpu.regs)) {
277 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
279 gpu.regs[cmd] = data;
282 gpu.state.fb_dirty = 1;
293 gpu.status |= PSX_GPU_STATUS_BLANKING;
295 gpu.status &= ~PSX_GPU_STATUS_BLANKING;
298 gpu.status &= ~PSX_GPU_STATUS_DMA_MASK;
299 gpu.status |= PSX_GPU_STATUS_DMA(data & 3);
302 gpu.screen.x = data & 0x3ff;
303 gpu.screen.y = (data >> 10) & 0x1ff;
304 if (gpu.frameskip.set) {
305 decide_frameskip_allow(gpu.ex_regs[3]);
306 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
308 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
313 gpu.screen.x1 = data & 0xfff;
314 gpu.screen.x2 = (data >> 12) & 0xfff;
318 gpu.screen.y1 = data & 0x3ff;
319 gpu.screen.y2 = (data >> 10) & 0x3ff;
323 gpu.status = (gpu.status & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
324 gpu.screen.hres = hres[(gpu.status >> 16) & 7];
325 gpu.screen.vres = vres[(gpu.status >> 19) & 3];
328 renderer_notify_res_change();
331 if ((cmd & 0xf0) == 0x10)
336 #ifdef GPUwriteStatus_ext
337 GPUwriteStatus_ext(data);
341 const unsigned char cmd_lengths[256] =
343 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
344 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
345 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
346 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
347 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
348 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
349 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
350 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
351 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
352 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
353 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
354 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
355 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
356 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
357 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
358 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
361 #define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
363 static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
365 uint16_t *vram = VRAM_MEM_XY(x, y);
367 memcpy(mem, vram, l * 2);
369 memcpy(vram, mem, l * 2);
372 static int do_vram_io(uint32_t *data, int count, int is_read)
374 int count_initial = count;
375 uint16_t *sdata = (uint16_t *)data;
376 int x = gpu.dma.x, y = gpu.dma.y;
377 int w = gpu.dma.w, h = gpu.dma.h;
378 int o = gpu.dma.offset;
380 count *= 2; // operate in 16bpp pixels
384 if (gpu.dma.offset) {
385 l = w - gpu.dma.offset;
389 do_vram_line(x + o, y, sdata, l, is_read);
402 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
404 do_vram_line(x, y, sdata, w, is_read);
410 do_vram_line(x, y, sdata, count, is_read);
416 finish_vram_transfer(is_read);
421 return count_initial - count / 2;
424 static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
427 log_anomaly("start_vram_transfer while old unfinished\n");
429 gpu.dma.x = pos_word & 0x3ff;
430 gpu.dma.y = (pos_word >> 16) & 0x1ff;
431 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
432 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
434 gpu.dma.is_read = is_read;
435 gpu.dma_start = gpu.dma;
437 renderer_flush_queues();
439 gpu.status |= PSX_GPU_STATUS_IMG;
440 // XXX: wrong for width 1
441 gpu.gp0 = LE32TOH(*(uint32_t *) VRAM_MEM_XY(gpu.dma.x, gpu.dma.y));
442 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
445 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
446 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
449 static void finish_vram_transfer(int is_read)
452 gpu.status &= ~PSX_GPU_STATUS_IMG;
454 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
455 gpu.dma_start.w, gpu.dma_start.h);
458 static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
460 int cmd = 0, pos = 0, len, dummy, v;
463 gpu.frameskip.pending_fill[0] = 0;
465 while (pos < count && skip) {
466 uint32_t *list = data + pos;
467 cmd = LE32TOH(list[0]) >> 24;
468 len = 1 + cmd_lengths[cmd];
472 if ((LE32TOH(list[2]) & 0x3ff) > gpu.screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu.screen.h)
473 // clearing something large, don't skip
474 do_cmd_list(list, 3, &dummy);
476 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
482 gpu.ex_regs[1] &= ~0x1ff;
483 gpu.ex_regs[1] |= LE32TOH(list[4 + ((cmd >> 4) & 1)]) & 0x1ff;
486 for (v = 3; pos + v < count; v++)
488 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
494 for (v = 4; pos + v < count; v += 2)
496 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
503 skip = decide_frameskip_allow(LE32TOH(list[0]));
504 if ((cmd & 0xf8) == 0xe0)
505 gpu.ex_regs[cmd & 7] = LE32TOH(list[0]);
509 if (pos + len > count) {
511 break; // incomplete cmd
513 if (0xa0 <= cmd && cmd <= 0xdf)
519 renderer_sync_ecmds(gpu.ex_regs);
524 static noinline int do_cmd_buffer(uint32_t *data, int count)
527 uint32_t old_e3 = gpu.ex_regs[3];
531 for (pos = 0; pos < count; )
533 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
535 pos += do_vram_io(data + pos, count - pos, 0);
540 cmd = LE32TOH(data[pos]) >> 24;
541 if (0xa0 <= cmd && cmd <= 0xdf) {
542 if (unlikely((pos+2) >= count)) {
543 // incomplete vram write/read cmd, can't consume yet
548 // consume vram write/read cmd
549 start_vram_transfer(LE32TOH(data[pos + 1]), LE32TOH(data[pos + 2]), (cmd & 0xe0) == 0xc0);
554 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
555 if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0))
556 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
558 pos += do_cmd_list(data + pos, count - pos, &cmd);
567 gpu.status &= ~0x1fff;
568 gpu.status |= gpu.ex_regs[1] & 0x7ff;
569 gpu.status |= (gpu.ex_regs[6] & 3) << 11;
571 gpu.state.fb_dirty |= vram_dirty;
573 if (old_e3 != gpu.ex_regs[3])
574 decide_frameskip_allow(gpu.ex_regs[3]);
579 static void flush_cmd_buffer(void)
581 int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
583 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
587 void GPUwriteDataMem(uint32_t *mem, int count)
591 log_io("gpu_dma_write %p %d\n", mem, count);
593 if (unlikely(gpu.cmd_len > 0))
596 left = do_cmd_buffer(mem, count);
598 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
601 void GPUwriteData(uint32_t data)
603 log_io("gpu_write %08x\n", data);
604 gpu.cmd_buffer[gpu.cmd_len++] = HTOLE32(data);
605 if (gpu.cmd_len >= CMD_BUFFER_LEN)
609 long GPUdmaChain(uint32_t *rambase, uint32_t start_addr, uint32_t *progress_addr)
611 uint32_t addr, *list, ld_addr = 0;
612 int len, left, count;
615 preload(rambase + (start_addr & 0x1fffff) / 4);
617 if (unlikely(gpu.cmd_len > 0))
620 log_io("gpu_dma_chain\n");
621 addr = start_addr & 0xffffff;
622 for (count = 0; (addr & 0x800000) == 0; count++)
624 list = rambase + (addr & 0x1fffff) / 4;
625 len = LE32TOH(list[0]) >> 24;
626 addr = LE32TOH(list[0]) & 0xffffff;
627 preload(rambase + (addr & 0x1fffff) / 4);
631 cpu_cycles += 5 + len;
633 log_io(".chain %08x #%d\n", (list - rambase) * 4, len);
636 left = do_cmd_buffer(list + 1, len);
638 log_anomaly("GPUdmaChain: discarded %d/%d words\n", left, len);
642 *progress_addr = addr;
645 #define LD_THRESHOLD (8*1024)
646 if (count >= LD_THRESHOLD) {
647 if (count == LD_THRESHOLD) {
652 // loop detection marker
653 // (bit23 set causes DMA error on real machine, so
654 // unlikely to be ever set by the game)
655 list[0] |= HTOLE32(0x800000);
660 // remove loop detection markers
661 count -= LD_THRESHOLD + 2;
662 addr = ld_addr & 0x1fffff;
663 while (count-- > 0) {
664 list = rambase + addr / 4;
665 addr = LE32TOH(list[0]) & 0x1fffff;
666 list[0] &= HTOLE32(~0x800000);
670 gpu.state.last_list.frame = *gpu.state.frame_count;
671 gpu.state.last_list.hcnt = *gpu.state.hcnt;
672 gpu.state.last_list.cycles = cpu_cycles;
673 gpu.state.last_list.addr = start_addr;
678 void GPUreadDataMem(uint32_t *mem, int count)
680 log_io("gpu_dma_read %p %d\n", mem, count);
682 if (unlikely(gpu.cmd_len > 0))
686 do_vram_io(mem, count, 1);
689 uint32_t GPUreadData(void)
693 if (unlikely(gpu.cmd_len > 0))
699 do_vram_io(&ret, 1, 1);
703 log_io("gpu_read %08x\n", ret);
707 uint32_t GPUreadStatus(void)
711 if (unlikely(gpu.cmd_len > 0))
715 log_io("gpu_read_status %08x\n", ret);
721 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
722 uint32_t ulStatus; // current gpu status
723 uint32_t ulControl[256]; // latest control register values
724 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
727 long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
737 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
738 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
739 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
740 freeze->ulStatus = gpu.status;
744 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
745 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
746 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
747 gpu.status = freeze->ulStatus;
749 for (i = 8; i > 0; i--) {
750 gpu.regs[i] ^= 1; // avoid reg change detection
751 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
753 renderer_sync_ecmds(gpu.ex_regs);
754 renderer_update_caches(0, 0, 1024, 512);
761 void GPUupdateLace(void)
765 renderer_flush_queues();
767 if (gpu.status & PSX_GPU_STATUS_BLANKING) {
768 if (!gpu.state.blanked) {
770 gpu.state.blanked = 1;
771 gpu.state.fb_dirty = 1;
776 renderer_notify_update_lace(0);
778 if (!gpu.state.fb_dirty)
781 if (gpu.frameskip.set) {
782 if (!gpu.frameskip.frame_ready) {
783 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
785 gpu.frameskip.active = 0;
787 gpu.frameskip.frame_ready = 0;
791 gpu.state.fb_dirty = 0;
792 gpu.state.blanked = 0;
793 renderer_notify_update_lace(1);
796 void GPUvBlank(int is_vblank, int lcf)
798 int interlace = gpu.state.allow_interlace
799 && (gpu.status & PSX_GPU_STATUS_INTERLACE)
800 && (gpu.status & PSX_GPU_STATUS_DHEIGHT);
801 // interlace doesn't look nice on progressive displays,
802 // so we have this "auto" mode here for games that don't read vram
803 if (gpu.state.allow_interlace == 2
804 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
808 if (interlace || interlace != gpu.state.old_interlace) {
809 gpu.state.old_interlace = interlace;
813 renderer_flush_queues();
814 renderer_set_interlace(interlace, !lcf);
818 #include "../../frontend/plugin_lib.h"
820 void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
822 gpu.frameskip.set = cbs->frameskip;
823 gpu.frameskip.advice = &cbs->fskip_advice;
824 gpu.frameskip.force = &cbs->fskip_force;
825 gpu.frameskip.dirty = &cbs->fskip_dirty;
826 gpu.frameskip.active = 0;
827 gpu.frameskip.frame_ready = 1;
828 gpu.state.hcnt = cbs->gpu_hcnt;
829 gpu.state.frame_count = cbs->gpu_frame_count;
830 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
831 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
833 gpu.useDithering = cbs->gpu_neon.allow_dithering;
834 gpu.mmap = cbs->mmap;
835 gpu.munmap = cbs->munmap;
838 if (gpu.vram == NULL)
841 if (cbs->pl_vout_set_raw_vram)
842 cbs->pl_vout_set_raw_vram(gpu.vram);
843 renderer_set_config(cbs);
844 vout_set_config(cbs);
847 // vim:shiftwidth=2:expandtab