2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
4 * This work is licensed under the terms of any of these licenses
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
13 #include <stdlib.h> /* for calloc */
17 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
19 #define unlikely(x) __builtin_expect((x), 0)
20 #define preload __builtin_prefetch
21 #define noinline __attribute__((noinline))
28 #define gpu_log(fmt, ...) \
29 printf("%d:%03d: " fmt, *gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__)
31 //#define log_io gpu_log
33 //#define log_anomaly gpu_log
34 #define log_anomaly(...)
38 static noinline int do_cmd_buffer(uint32_t *data, int count);
39 static void finish_vram_transfer(int is_read);
41 static noinline void do_cmd_reset(void)
45 if (unlikely(gpu.cmd_len > 0))
46 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
49 if (unlikely(gpu.dma.h > 0))
50 finish_vram_transfer(gpu.dma_start.is_read);
54 static noinline void do_reset(void)
60 memset(gpu.regs, 0, sizeof(gpu.regs));
61 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
62 gpu.ex_regs[i] = (0xe0 + i) << 24;
63 gpu.status = 0x14802000;
66 gpu.screen.hres = gpu.screen.w = 256;
67 gpu.screen.vres = gpu.screen.h = 240;
70 static noinline void update_width(void)
72 int sw = gpu.screen.x2 - gpu.screen.x1;
73 if (sw <= 0 || sw >= 2560)
75 gpu.screen.w = gpu.screen.hres;
77 gpu.screen.w = sw * gpu.screen.hres / 2560;
80 static noinline void update_height(void)
82 // TODO: emulate this properly..
83 int sh = gpu.screen.y2 - gpu.screen.y1;
84 if (gpu.status & PSX_GPU_STATUS_DHEIGHT)
86 if (sh <= 0 || sh > gpu.screen.vres)
92 static noinline void decide_frameskip(void)
94 *gpu.frameskip.dirty = 1;
96 if (gpu.frameskip.active)
99 gpu.frameskip.cnt = 0;
100 gpu.frameskip.frame_ready = 1;
103 if (*gpu.frameskip.force)
104 gpu.frameskip.active = 1;
105 else if (!gpu.frameskip.active && *gpu.frameskip.advice)
106 gpu.frameskip.active = 1;
107 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
108 gpu.frameskip.active = 1;
110 gpu.frameskip.active = 0;
112 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
114 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy);
115 gpu.frameskip.pending_fill[0] = 0;
119 static noinline int decide_frameskip_allow(uint32_t cmd_e3)
121 // no frameskip if it decides to draw to display area,
122 // but not for interlace since it'll most likely always do that
123 uint32_t x = cmd_e3 & 0x3ff;
124 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
125 gpu.frameskip.allow = (gpu.status & PSX_GPU_STATUS_INTERLACE) ||
126 (uint32_t)(x - gpu.screen.x) >= (uint32_t)gpu.screen.w ||
127 (uint32_t)(y - gpu.screen.y) >= (uint32_t)gpu.screen.h;
128 return gpu.frameskip.allow;
131 static noinline void get_gpu_info(uint32_t data)
133 switch (data & 0x0f) {
137 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
140 gpu.gp0 = gpu.ex_regs[5] & 0x3fffff;
151 // double, for overdraw guard
152 #define VRAM_SIZE ((1024 * 512 * 2 * 2) + 4096)
154 // Minimum 16-byte VRAM alignment needed by gpu_unai's pixel-skipping
155 // renderer/downscaler it uses in high res modes:
157 // On GCW platform (MIPS), align to 8192 bytes (1 TLB entry) to reduce # of
158 // fills. (Will change this value if it ever gets large page support)
159 #define VRAM_ALIGN 8192
161 #define VRAM_ALIGN 16
164 // vram ptr received from mmap/malloc/alloc (will deallocate using this)
165 static uint16_t *vram_ptr_orig = NULL;
167 #ifdef GPULIB_USE_MMAP
168 static int map_vram(void)
170 gpu.vram = vram_ptr_orig = gpu.mmap(VRAM_SIZE + (VRAM_ALIGN-1));
171 if (gpu.vram != NULL) {
172 // 4kb guard in front
173 gpu.vram += (4096 / 2);
175 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
179 fprintf(stderr, "could not map vram, expect crashes\n");
184 static int map_vram(void)
186 gpu.vram = vram_ptr_orig = (uint16_t*)calloc(VRAM_SIZE + (VRAM_ALIGN-1), 1);
187 if (gpu.vram != NULL) {
188 // 4kb guard in front
189 gpu.vram += (4096 / 2);
191 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
194 fprintf(stderr, "could not allocate vram, expect crashes\n");
199 static int allocate_vram(void)
201 gpu.vram = vram_ptr_orig = (uint16_t*)calloc(VRAM_SIZE + (VRAM_ALIGN-1), 1);
202 if (gpu.vram != NULL) {
203 // 4kb guard in front
204 gpu.vram += (4096 / 2);
206 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
209 fprintf(stderr, "could not allocate vram, expect crashes\n");
217 #ifndef GPULIB_USE_MMAP
218 if (gpu.vram == NULL) {
219 if (allocate_vram() != 0) {
220 printf("ERROR: could not allocate VRAM, exiting..\n");
226 //extern uint32_t hSyncCount; // in psxcounters.cpp
227 //extern uint32_t frame_counter; // in psxcounters.cpp
228 //gpu.state.hcnt = &hSyncCount;
229 //gpu.state.frame_count = &frame_counter;
233 ret |= renderer_init();
235 gpu.state.frame_count = &gpu.zero;
236 gpu.state.hcnt = &gpu.zero;
237 gpu.frameskip.active = 0;
241 /*if (gpu.mmap != NULL) {
248 long GPUshutdown(void)
255 if (vram_ptr_orig != NULL) {
256 #ifdef GPULIB_USE_MMAP
257 gpu.munmap(vram_ptr_orig, VRAM_SIZE);
262 vram_ptr_orig = gpu.vram = NULL;
267 void GPUwriteStatus(uint32_t data)
269 //senquack TODO: Would it be wise to add cmd buffer flush here, since
270 // status settings can affect commands already in buffer?
272 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
273 static const short vres[4] = { 240, 480, 256, 480 };
274 uint32_t cmd = data >> 24;
276 if (cmd < ARRAY_SIZE(gpu.regs)) {
277 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
279 gpu.regs[cmd] = data;
282 gpu.state.fb_dirty = 1;
293 gpu.status |= PSX_GPU_STATUS_BLANKING;
295 gpu.status &= ~PSX_GPU_STATUS_BLANKING;
298 gpu.status &= ~PSX_GPU_STATUS_DMA_MASK;
299 gpu.status |= PSX_GPU_STATUS_DMA(data & 3);
302 gpu.screen.x = data & 0x3ff;
303 gpu.screen.y = (data >> 10) & 0x1ff;
304 if (gpu.frameskip.set) {
305 decide_frameskip_allow(gpu.ex_regs[3]);
306 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
308 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
313 gpu.screen.x1 = data & 0xfff;
314 gpu.screen.x2 = (data >> 12) & 0xfff;
318 gpu.screen.y1 = data & 0x3ff;
319 gpu.screen.y2 = (data >> 10) & 0x3ff;
323 gpu.status = (gpu.status & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
324 gpu.screen.hres = hres[(gpu.status >> 16) & 7];
325 gpu.screen.vres = vres[(gpu.status >> 19) & 3];
328 renderer_notify_res_change();
331 if ((cmd & 0xf0) == 0x10)
336 #ifdef GPUwriteStatus_ext
337 GPUwriteStatus_ext(data);
341 const unsigned char cmd_lengths[256] =
343 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
344 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
345 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
346 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
347 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
348 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
349 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
350 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
351 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
352 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
353 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
354 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
355 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
356 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
357 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
358 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
361 #define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
363 static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
365 uint16_t *vram = VRAM_MEM_XY(x, y);
367 memcpy(mem, vram, l * 2);
369 memcpy(vram, mem, l * 2);
372 static int do_vram_io(uint32_t *data, int count, int is_read)
374 int count_initial = count;
375 uint16_t *sdata = (uint16_t *)data;
376 int x = gpu.dma.x, y = gpu.dma.y;
377 int w = gpu.dma.w, h = gpu.dma.h;
378 int o = gpu.dma.offset;
380 count *= 2; // operate in 16bpp pixels
384 if (gpu.dma.offset) {
385 l = w - gpu.dma.offset;
389 do_vram_line(x + o, y, sdata, l, is_read);
402 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
404 do_vram_line(x, y, sdata, w, is_read);
410 do_vram_line(x, y, sdata, count, is_read);
416 finish_vram_transfer(is_read);
421 return count_initial - count / 2;
424 static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
427 log_anomaly("start_vram_transfer while old unfinished\n");
429 gpu.dma.x = pos_word & 0x3ff;
430 gpu.dma.y = (pos_word >> 16) & 0x1ff;
431 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
432 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
434 gpu.dma.is_read = is_read;
435 gpu.dma_start = gpu.dma;
437 renderer_flush_queues();
439 gpu.status |= PSX_GPU_STATUS_IMG;
440 // XXX: wrong for width 1
441 gpu.gp0 = LE32TOH(*(uint32_t *) VRAM_MEM_XY(gpu.dma.x, gpu.dma.y));
442 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
445 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
446 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
449 static void finish_vram_transfer(int is_read)
452 gpu.status &= ~PSX_GPU_STATUS_IMG;
454 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
455 gpu.dma_start.w, gpu.dma_start.h);
458 static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
460 int cmd = 0, pos = 0, len, dummy, v;
463 gpu.frameskip.pending_fill[0] = 0;
465 while (pos < count && skip) {
466 uint32_t *list = data + pos;
467 cmd = LE32TOH(list[0]) >> 24;
468 len = 1 + cmd_lengths[cmd];
472 if ((LE32TOH(list[2]) & 0x3ff) > gpu.screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu.screen.h)
473 // clearing something large, don't skip
474 do_cmd_list(list, 3, &dummy);
476 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
482 gpu.ex_regs[1] &= ~0x1ff;
483 gpu.ex_regs[1] |= LE32TOH(list[4 + ((cmd >> 4) & 1)]) & 0x1ff;
486 for (v = 3; pos + v < count; v++)
488 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
494 for (v = 4; pos + v < count; v += 2)
496 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
503 skip = decide_frameskip_allow(LE32TOH(list[0]));
504 if ((cmd & 0xf8) == 0xe0)
505 gpu.ex_regs[cmd & 7] = LE32TOH(list[0]);
509 if (pos + len > count) {
511 break; // incomplete cmd
513 if (0xa0 <= cmd && cmd <= 0xdf)
519 renderer_sync_ecmds(gpu.ex_regs);
524 static noinline int do_cmd_buffer(uint32_t *data, int count)
527 uint32_t old_e3 = gpu.ex_regs[3];
531 for (pos = 0; pos < count; )
533 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
535 pos += do_vram_io(data + pos, count - pos, 0);
540 cmd = LE32TOH(data[pos]) >> 24;
541 if (0xa0 <= cmd && cmd <= 0xdf) {
542 if (unlikely((pos+2) >= count)) {
543 // incomplete vram write/read cmd, can't consume yet
548 // consume vram write/read cmd
549 start_vram_transfer(LE32TOH(data[pos + 1]), LE32TOH(data[pos + 2]), (cmd & 0xe0) == 0xc0);
554 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
555 if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0))
556 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
558 pos += do_cmd_list(data + pos, count - pos, &cmd);
567 gpu.status &= ~0x1fff;
568 gpu.status |= gpu.ex_regs[1] & 0x7ff;
569 gpu.status |= (gpu.ex_regs[6] & 3) << 11;
571 gpu.state.fb_dirty |= vram_dirty;
573 if (old_e3 != gpu.ex_regs[3])
574 decide_frameskip_allow(gpu.ex_regs[3]);
579 static void flush_cmd_buffer(void)
581 int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
583 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
587 void GPUwriteDataMem(uint32_t *mem, int count)
591 log_io("gpu_dma_write %p %d\n", mem, count);
593 if (unlikely(gpu.cmd_len > 0))
596 left = do_cmd_buffer(mem, count);
598 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
601 void GPUwriteData(uint32_t data)
603 log_io("gpu_write %08x\n", data);
604 gpu.cmd_buffer[gpu.cmd_len++] = HTOLE32(data);
605 if (gpu.cmd_len >= CMD_BUFFER_LEN)
609 long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
611 uint32_t addr, *list, ld_addr = 0;
612 int len, left, count;
615 preload(rambase + (start_addr & 0x1fffff) / 4);
617 if (unlikely(gpu.cmd_len > 0))
620 log_io("gpu_dma_chain\n");
621 addr = start_addr & 0xffffff;
622 for (count = 0; (addr & 0x800000) == 0; count++)
624 list = rambase + (addr & 0x1fffff) / 4;
625 len = LE32TOH(list[0]) >> 24;
626 addr = LE32TOH(list[0]) & 0xffffff;
627 preload(rambase + (addr & 0x1fffff) / 4);
631 cpu_cycles += 5 + len;
633 log_io(".chain %08x #%d\n", (list - rambase) * 4, len);
636 left = do_cmd_buffer(list + 1, len);
638 log_anomaly("GPUdmaChain: discarded %d/%d words\n", left, len);
641 #define LD_THRESHOLD (8*1024)
642 if (count >= LD_THRESHOLD) {
643 if (count == LD_THRESHOLD) {
648 // loop detection marker
649 // (bit23 set causes DMA error on real machine, so
650 // unlikely to be ever set by the game)
651 list[0] |= HTOLE32(0x800000);
656 // remove loop detection markers
657 count -= LD_THRESHOLD + 2;
658 addr = ld_addr & 0x1fffff;
659 while (count-- > 0) {
660 list = rambase + addr / 4;
661 addr = LE32TOH(list[0]) & 0x1fffff;
662 list[0] &= HTOLE32(~0x800000);
666 gpu.state.last_list.frame = *gpu.state.frame_count;
667 gpu.state.last_list.hcnt = *gpu.state.hcnt;
668 gpu.state.last_list.cycles = cpu_cycles;
669 gpu.state.last_list.addr = start_addr;
674 void GPUreadDataMem(uint32_t *mem, int count)
676 log_io("gpu_dma_read %p %d\n", mem, count);
678 if (unlikely(gpu.cmd_len > 0))
682 do_vram_io(mem, count, 1);
685 uint32_t GPUreadData(void)
689 if (unlikely(gpu.cmd_len > 0))
695 do_vram_io(&ret, 1, 1);
699 log_io("gpu_read %08x\n", ret);
703 uint32_t GPUreadStatus(void)
707 if (unlikely(gpu.cmd_len > 0))
711 log_io("gpu_read_status %08x\n", ret);
717 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
718 uint32_t ulStatus; // current gpu status
719 uint32_t ulControl[256]; // latest control register values
720 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
723 long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
733 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
734 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
735 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
736 freeze->ulStatus = gpu.status;
740 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
741 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
742 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
743 gpu.status = freeze->ulStatus;
745 for (i = 8; i > 0; i--) {
746 gpu.regs[i] ^= 1; // avoid reg change detection
747 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
749 renderer_sync_ecmds(gpu.ex_regs);
750 renderer_update_caches(0, 0, 1024, 512);
757 void GPUupdateLace(void)
761 renderer_flush_queues();
763 if (gpu.status & PSX_GPU_STATUS_BLANKING) {
764 if (!gpu.state.blanked) {
766 gpu.state.blanked = 1;
767 gpu.state.fb_dirty = 1;
772 renderer_notify_update_lace(0);
774 if (!gpu.state.fb_dirty)
777 if (gpu.frameskip.set) {
778 if (!gpu.frameskip.frame_ready) {
779 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
781 gpu.frameskip.active = 0;
783 gpu.frameskip.frame_ready = 0;
787 gpu.state.fb_dirty = 0;
788 gpu.state.blanked = 0;
789 renderer_notify_update_lace(1);
792 void GPUvBlank(int is_vblank, int lcf)
794 int interlace = gpu.state.allow_interlace
795 && (gpu.status & PSX_GPU_STATUS_INTERLACE)
796 && (gpu.status & PSX_GPU_STATUS_DHEIGHT);
797 // interlace doesn't look nice on progressive displays,
798 // so we have this "auto" mode here for games that don't read vram
799 if (gpu.state.allow_interlace == 2
800 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
804 if (interlace || interlace != gpu.state.old_interlace) {
805 gpu.state.old_interlace = interlace;
809 renderer_flush_queues();
810 renderer_set_interlace(interlace, !lcf);
814 #include "../../frontend/plugin_lib.h"
816 void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
818 gpu.frameskip.set = cbs->frameskip;
819 gpu.frameskip.advice = &cbs->fskip_advice;
820 gpu.frameskip.force = &cbs->fskip_force;
821 gpu.frameskip.dirty = &cbs->fskip_dirty;
822 gpu.frameskip.active = 0;
823 gpu.frameskip.frame_ready = 1;
824 gpu.state.hcnt = cbs->gpu_hcnt;
825 gpu.state.frame_count = cbs->gpu_frame_count;
826 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
827 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
829 gpu.useDithering = cbs->gpu_neon.allow_dithering;
830 gpu.mmap = cbs->mmap;
831 gpu.munmap = cbs->munmap;
834 if (gpu.vram == NULL)
837 if (cbs->pl_vout_set_raw_vram)
838 cbs->pl_vout_set_raw_vram(gpu.vram);
839 renderer_set_config(cbs);
840 vout_set_config(cbs);
843 // vim:shiftwidth=2:expandtab