2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
4 * This work is licensed under the terms of any of these licenses
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
13 #include <stdlib.h> /* for calloc */
17 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
19 #define unlikely(x) __builtin_expect((x), 0)
20 #define preload __builtin_prefetch
21 #define noinline __attribute__((noinline))
28 #define gpu_log(fmt, ...) \
29 printf("%d:%03d: " fmt, *gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__)
31 //#define log_io gpu_log
33 //#define log_anomaly gpu_log
34 #define log_anomaly(...)
38 static noinline int do_cmd_buffer(uint32_t *data, int count);
39 static void finish_vram_transfer(int is_read);
41 static noinline void do_cmd_reset(void)
45 if (unlikely(gpu.cmd_len > 0))
46 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
49 if (unlikely(gpu.dma.h > 0))
50 finish_vram_transfer(gpu.dma_start.is_read);
54 static noinline void do_reset(void)
60 memset(gpu.regs, 0, sizeof(gpu.regs));
61 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
62 gpu.ex_regs[i] = (0xe0 + i) << 24;
63 gpu.status = 0x14802000;
66 gpu.screen.hres = gpu.screen.w = 256;
67 gpu.screen.vres = gpu.screen.h = 240;
70 static noinline void update_width(void)
72 int sw = gpu.screen.x2 - gpu.screen.x1;
73 if (sw <= 0 || sw >= 2560)
75 gpu.screen.w = gpu.screen.hres;
77 gpu.screen.w = sw * gpu.screen.hres / 2560;
80 static noinline void update_height(void)
82 // TODO: emulate this properly..
83 int sh = gpu.screen.y2 - gpu.screen.y1;
84 if (gpu.status & PSX_GPU_STATUS_DHEIGHT)
86 if (sh <= 0 || sh > gpu.screen.vres)
92 static noinline void decide_frameskip(void)
94 *gpu.frameskip.dirty = 1;
96 if (gpu.frameskip.active)
99 gpu.frameskip.cnt = 0;
100 gpu.frameskip.frame_ready = 1;
103 if (*gpu.frameskip.force)
104 gpu.frameskip.active = 1;
105 else if (!gpu.frameskip.active && *gpu.frameskip.advice)
106 gpu.frameskip.active = 1;
107 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
108 gpu.frameskip.active = 1;
110 gpu.frameskip.active = 0;
112 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
114 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy);
115 gpu.frameskip.pending_fill[0] = 0;
119 static noinline int decide_frameskip_allow(uint32_t cmd_e3)
121 // no frameskip if it decides to draw to display area,
122 // but not for interlace since it'll most likely always do that
123 uint32_t x = cmd_e3 & 0x3ff;
124 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
125 gpu.frameskip.allow = (gpu.status & PSX_GPU_STATUS_INTERLACE) ||
126 (uint32_t)(x - gpu.screen.x) >= (uint32_t)gpu.screen.w ||
127 (uint32_t)(y - gpu.screen.y) >= (uint32_t)gpu.screen.h;
128 return gpu.frameskip.allow;
131 static noinline void get_gpu_info(uint32_t data)
133 switch (data & 0x0f) {
137 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
140 gpu.gp0 = gpu.ex_regs[5] & 0x3fffff;
151 // double, for overdraw guard
152 #define VRAM_SIZE ((1024 * 512 * 2 * 2) + 4096)
154 // Minimum 16-byte VRAM alignment needed by gpu_unai's pixel-skipping
155 // renderer/downscaler it uses in high res modes:
157 // On GCW platform (MIPS), align to 8192 bytes (1 TLB entry) to reduce # of
158 // fills. (Will change this value if it ever gets large page support)
159 #define VRAM_ALIGN 8192
161 #define VRAM_ALIGN 16
164 // vram ptr received from mmap/malloc/alloc (will deallocate using this)
165 static uint16_t *vram_ptr_orig = NULL;
167 #ifdef GPULIB_USE_MMAP
168 static int map_vram(void)
170 gpu.vram = vram_ptr_orig = gpu.mmap(VRAM_SIZE + (VRAM_ALIGN-1));
171 if (gpu.vram != NULL) {
172 // 4kb guard in front
173 gpu.vram += (4096 / 2);
175 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
179 fprintf(stderr, "could not map vram, expect crashes\n");
184 static int map_vram(void)
186 gpu.vram = vram_ptr_orig = (uint16_t*)calloc(VRAM_SIZE + (VRAM_ALIGN-1), 1);
187 if (gpu.vram != NULL) {
188 // 4kb guard in front
189 gpu.vram += (4096 / 2);
191 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
194 fprintf(stderr, "could not allocate vram, expect crashes\n");
199 static int allocate_vram(void)
201 gpu.vram = vram_ptr_orig = (uint16_t*)calloc(VRAM_SIZE + (VRAM_ALIGN-1), 1);
202 if (gpu.vram != NULL) {
203 // 4kb guard in front
204 gpu.vram += (4096 / 2);
206 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
209 fprintf(stderr, "could not allocate vram, expect crashes\n");
217 #ifndef GPULIB_USE_MMAP
218 if (gpu.vram == NULL) {
219 if (allocate_vram() != 0) {
220 printf("ERROR: could not allocate VRAM, exiting..\n");
226 //extern uint32_t hSyncCount; // in psxcounters.cpp
227 //extern uint32_t frame_counter; // in psxcounters.cpp
228 //gpu.state.hcnt = &hSyncCount;
229 //gpu.state.frame_count = &frame_counter;
233 ret |= renderer_init();
235 gpu.state.frame_count = &gpu.zero;
236 gpu.state.hcnt = &gpu.zero;
237 gpu.frameskip.active = 0;
241 /*if (gpu.mmap != NULL) {
248 long GPUshutdown(void)
255 if (vram_ptr_orig != NULL) {
256 #ifdef GPULIB_USE_MMAP
257 gpu.munmap(vram_ptr_orig, VRAM_SIZE);
262 vram_ptr_orig = gpu.vram = NULL;
267 void GPUwriteStatus(uint32_t data)
269 //senquack TODO: Would it be wise to add cmd buffer flush here, since
270 // status settings can affect commands already in buffer?
272 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
273 static const short vres[4] = { 240, 480, 256, 480 };
274 uint32_t cmd = data >> 24;
276 if (cmd < ARRAY_SIZE(gpu.regs)) {
277 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
279 gpu.regs[cmd] = data;
282 gpu.state.fb_dirty = 1;
293 gpu.status |= PSX_GPU_STATUS_BLANKING;
295 gpu.status &= ~PSX_GPU_STATUS_BLANKING;
298 gpu.status &= ~PSX_GPU_STATUS_DMA_MASK;
299 gpu.status |= PSX_GPU_STATUS_DMA(data & 3);
302 gpu.screen.x = data & 0x3ff;
303 gpu.screen.y = (data >> 10) & 0x1ff;
304 if (gpu.frameskip.set) {
305 decide_frameskip_allow(gpu.ex_regs[3]);
306 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
308 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
313 gpu.screen.x1 = data & 0xfff;
314 gpu.screen.x2 = (data >> 12) & 0xfff;
318 gpu.screen.y1 = data & 0x3ff;
319 gpu.screen.y2 = (data >> 10) & 0x3ff;
323 gpu.status = (gpu.status & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
324 gpu.screen.hres = hres[(gpu.status >> 16) & 7];
325 gpu.screen.vres = vres[(gpu.status >> 19) & 3];
328 renderer_notify_res_change();
331 if ((cmd & 0xf0) == 0x10)
336 #ifdef GPUwriteStatus_ext
337 GPUwriteStatus_ext(data);
341 const unsigned char cmd_lengths[256] =
343 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
344 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
345 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
346 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
347 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
348 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
349 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
350 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
351 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
352 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
353 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
354 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
355 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
356 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
357 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
358 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
361 #define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
363 static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
365 uint16_t *vram = VRAM_MEM_XY(x, y);
367 memcpy(mem, vram, l * 2);
369 memcpy(vram, mem, l * 2);
372 static int do_vram_io(uint32_t *data, int count, int is_read)
374 int count_initial = count;
375 uint16_t *sdata = (uint16_t *)data;
376 int x = gpu.dma.x, y = gpu.dma.y;
377 int w = gpu.dma.w, h = gpu.dma.h;
378 int o = gpu.dma.offset;
380 count *= 2; // operate in 16bpp pixels
384 if (gpu.dma.offset) {
385 l = w - gpu.dma.offset;
389 do_vram_line(x + o, y, sdata, l, is_read);
402 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
404 do_vram_line(x, y, sdata, w, is_read);
410 do_vram_line(x, y, sdata, count, is_read);
416 finish_vram_transfer(is_read);
421 return count_initial - count / 2;
424 static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
427 log_anomaly("start_vram_transfer while old unfinished\n");
429 gpu.dma.x = pos_word & 0x3ff;
430 gpu.dma.y = (pos_word >> 16) & 0x1ff;
431 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
432 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
434 gpu.dma.is_read = is_read;
435 gpu.dma_start = gpu.dma;
437 renderer_flush_queues();
439 gpu.status |= PSX_GPU_STATUS_IMG;
440 // XXX: wrong for width 1
441 gpu.gp0 = LE32TOH(*(uint32_t *) VRAM_MEM_XY(gpu.dma.x, gpu.dma.y));
442 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
445 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
446 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
449 static void finish_vram_transfer(int is_read)
452 gpu.status &= ~PSX_GPU_STATUS_IMG;
454 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
455 gpu.dma_start.w, gpu.dma_start.h);
458 static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
460 int cmd = 0, pos = 0, len, dummy, v;
463 gpu.frameskip.pending_fill[0] = 0;
465 while (pos < count && skip) {
466 uint32_t *list = data + pos;
467 cmd = LE32TOH(list[0]) >> 24;
468 len = 1 + cmd_lengths[cmd];
472 if ((LE32TOH(list[2]) & 0x3ff) > gpu.screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu.screen.h)
473 // clearing something large, don't skip
474 do_cmd_list(list, 3, &dummy);
476 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
482 gpu.ex_regs[1] &= ~0x1ff;
483 gpu.ex_regs[1] |= LE32TOH(list[4 + ((cmd >> 4) & 1)]) & 0x1ff;
486 for (v = 3; pos + v < count; v++)
488 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
494 for (v = 4; pos + v < count; v += 2)
496 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
503 skip = decide_frameskip_allow(LE32TOH(list[0]));
504 if ((cmd & 0xf8) == 0xe0)
505 gpu.ex_regs[cmd & 7] = LE32TOH(list[0]);
509 if (pos + len > count) {
511 break; // incomplete cmd
513 if (0xa0 <= cmd && cmd <= 0xdf)
519 renderer_sync_ecmds(gpu.ex_regs);
524 static noinline int do_cmd_buffer(uint32_t *data, int count)
527 uint32_t old_e3 = gpu.ex_regs[3];
531 for (pos = 0; pos < count; )
533 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
535 pos += do_vram_io(data + pos, count - pos, 0);
540 cmd = LE32TOH(data[pos]) >> 24;
541 if (0xa0 <= cmd && cmd <= 0xdf) {
542 if (unlikely((pos+2) >= count)) {
543 // incomplete vram write/read cmd, can't consume yet
548 // consume vram write/read cmd
549 start_vram_transfer(LE32TOH(data[pos + 1]), LE32TOH(data[pos + 2]), (cmd & 0xe0) == 0xc0);
554 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
555 if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0))
556 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
558 pos += do_cmd_list(data + pos, count - pos, &cmd);
567 gpu.status &= ~0x1fff;
568 gpu.status |= gpu.ex_regs[1] & 0x7ff;
569 gpu.status |= (gpu.ex_regs[6] & 3) << 11;
571 gpu.state.fb_dirty |= vram_dirty;
573 if (old_e3 != gpu.ex_regs[3])
574 decide_frameskip_allow(gpu.ex_regs[3]);
579 static void flush_cmd_buffer(void)
581 int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
583 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
587 void GPUwriteDataMem(uint32_t *mem, int count)
591 log_io("gpu_dma_write %p %d\n", mem, count);
593 if (unlikely(gpu.cmd_len > 0))
596 left = do_cmd_buffer(mem, count);
598 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
601 void GPUwriteData(uint32_t data)
603 log_io("gpu_write %08x\n", data);
604 gpu.cmd_buffer[gpu.cmd_len++] = HTOLE32(data);
605 if (gpu.cmd_len >= CMD_BUFFER_LEN)
609 long GPUdmaChain(uint32_t *rambase, uint32_t start_addr, uint32_t *progress_addr)
611 uint32_t addr, *list, ld_addr = 0;
612 int len, left, count;
615 preload(rambase + (start_addr & 0x1fffff) / 4);
617 if (unlikely(gpu.cmd_len > 0))
620 log_io("gpu_dma_chain\n");
621 addr = start_addr & 0xffffff;
622 for (count = 0; (addr & 0x800000) == 0; count++)
624 list = rambase + (addr & 0x1fffff) / 4;
625 len = LE32TOH(list[0]) >> 24;
626 addr = LE32TOH(list[0]) & 0xffffff;
627 preload(rambase + (addr & 0x1fffff) / 4);
631 cpu_cycles += 5 + len;
633 log_io(".chain %08lx #%d+%d\n",
634 (long)(list - rambase) * 4, len, gpu.cmd_len);
635 if (unlikely(gpu.cmd_len > 0)) {
636 memcpy(gpu.cmd_buffer + gpu.cmd_len, list + 1, len * 4);
643 left = do_cmd_buffer(list + 1, len);
645 memcpy(gpu.cmd_buffer, list + 1 + len - left, left * 4);
647 log_anomaly("GPUdmaChain: %d/%d words left\n", left, len);
652 *progress_addr = addr;
655 #define LD_THRESHOLD (8*1024)
656 if (count >= LD_THRESHOLD) {
657 if (count == LD_THRESHOLD) {
662 // loop detection marker
663 // (bit23 set causes DMA error on real machine, so
664 // unlikely to be ever set by the game)
665 list[0] |= HTOLE32(0x800000);
670 // remove loop detection markers
671 count -= LD_THRESHOLD + 2;
672 addr = ld_addr & 0x1fffff;
673 while (count-- > 0) {
674 list = rambase + addr / 4;
675 addr = LE32TOH(list[0]) & 0x1fffff;
676 list[0] &= HTOLE32(~0x800000);
680 gpu.state.last_list.frame = *gpu.state.frame_count;
681 gpu.state.last_list.hcnt = *gpu.state.hcnt;
682 gpu.state.last_list.cycles = cpu_cycles;
683 gpu.state.last_list.addr = start_addr;
688 void GPUreadDataMem(uint32_t *mem, int count)
690 log_io("gpu_dma_read %p %d\n", mem, count);
692 if (unlikely(gpu.cmd_len > 0))
696 do_vram_io(mem, count, 1);
699 uint32_t GPUreadData(void)
703 if (unlikely(gpu.cmd_len > 0))
709 do_vram_io(&ret, 1, 1);
713 log_io("gpu_read %08x\n", ret);
717 uint32_t GPUreadStatus(void)
721 if (unlikely(gpu.cmd_len > 0))
725 log_io("gpu_read_status %08x\n", ret);
731 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
732 uint32_t ulStatus; // current gpu status
733 uint32_t ulControl[256]; // latest control register values
734 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
737 long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
747 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
748 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
749 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
750 freeze->ulStatus = gpu.status;
754 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
755 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
756 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
757 gpu.status = freeze->ulStatus;
759 for (i = 8; i > 0; i--) {
760 gpu.regs[i] ^= 1; // avoid reg change detection
761 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
763 renderer_sync_ecmds(gpu.ex_regs);
764 renderer_update_caches(0, 0, 1024, 512);
771 void GPUupdateLace(void)
775 renderer_flush_queues();
777 if (gpu.status & PSX_GPU_STATUS_BLANKING) {
778 if (!gpu.state.blanked) {
780 gpu.state.blanked = 1;
781 gpu.state.fb_dirty = 1;
786 renderer_notify_update_lace(0);
788 if (!gpu.state.fb_dirty)
791 if (gpu.frameskip.set) {
792 if (!gpu.frameskip.frame_ready) {
793 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
795 gpu.frameskip.active = 0;
797 gpu.frameskip.frame_ready = 0;
801 gpu.state.fb_dirty = 0;
802 gpu.state.blanked = 0;
803 renderer_notify_update_lace(1);
806 void GPUvBlank(int is_vblank, int lcf)
808 int interlace = gpu.state.allow_interlace
809 && (gpu.status & PSX_GPU_STATUS_INTERLACE)
810 && (gpu.status & PSX_GPU_STATUS_DHEIGHT);
811 // interlace doesn't look nice on progressive displays,
812 // so we have this "auto" mode here for games that don't read vram
813 if (gpu.state.allow_interlace == 2
814 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
818 if (interlace || interlace != gpu.state.old_interlace) {
819 gpu.state.old_interlace = interlace;
823 renderer_flush_queues();
824 renderer_set_interlace(interlace, !lcf);
828 #include "../../frontend/plugin_lib.h"
830 void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
832 gpu.frameskip.set = cbs->frameskip;
833 gpu.frameskip.advice = &cbs->fskip_advice;
834 gpu.frameskip.force = &cbs->fskip_force;
835 gpu.frameskip.dirty = &cbs->fskip_dirty;
836 gpu.frameskip.active = 0;
837 gpu.frameskip.frame_ready = 1;
838 gpu.state.hcnt = cbs->gpu_hcnt;
839 gpu.state.frame_count = cbs->gpu_frame_count;
840 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
841 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
843 gpu.useDithering = cbs->gpu_neon.allow_dithering;
844 gpu.mmap = cbs->mmap;
845 gpu.munmap = cbs->munmap;
848 if (gpu.vram == NULL)
851 if (cbs->pl_vout_set_raw_vram)
852 cbs->pl_vout_set_raw_vram(gpu.vram);
853 renderer_set_config(cbs);
854 vout_set_config(cbs);
857 // vim:shiftwidth=2:expandtab