2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
4 * This work is licensed under the terms of any of these licenses
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
13 #include <stdlib.h> /* for calloc */
17 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
19 #define unlikely(x) __builtin_expect((x), 0)
20 #define preload __builtin_prefetch
21 #define noinline __attribute__((noinline))
28 #define gpu_log(fmt, ...) \
29 printf("%d:%03d: " fmt, *gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__)
31 //#define log_io gpu_log
33 //#define log_anomaly gpu_log
34 #define log_anomaly(...)
38 static noinline int do_cmd_buffer(uint32_t *data, int count);
39 static void finish_vram_transfer(int is_read);
41 static noinline void do_cmd_reset(void)
45 if (unlikely(gpu.cmd_len > 0))
46 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
49 if (unlikely(gpu.dma.h > 0))
50 finish_vram_transfer(gpu.dma_start.is_read);
54 static noinline void do_reset(void)
60 memset(gpu.regs, 0, sizeof(gpu.regs));
61 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
62 gpu.ex_regs[i] = (0xe0 + i) << 24;
63 gpu.status = 0x14802000;
66 gpu.screen.hres = gpu.screen.w = 256;
67 gpu.screen.vres = gpu.screen.h = 240;
70 static noinline void update_width(void)
72 int sw = gpu.screen.x2 - gpu.screen.x1;
73 if (sw <= 0 || sw >= 2560)
75 gpu.screen.w = gpu.screen.hres;
77 gpu.screen.w = sw * gpu.screen.hres / 2560;
80 static noinline void update_height(void)
82 // TODO: emulate this properly..
83 int sh = gpu.screen.y2 - gpu.screen.y1;
84 if (gpu.status & PSX_GPU_STATUS_DHEIGHT)
86 if (sh <= 0 || sh > gpu.screen.vres)
92 static noinline void decide_frameskip(void)
94 *gpu.frameskip.dirty = 1;
96 if (gpu.frameskip.active)
99 gpu.frameskip.cnt = 0;
100 gpu.frameskip.frame_ready = 1;
103 if (*gpu.frameskip.force)
104 gpu.frameskip.active = 1;
105 else if (!gpu.frameskip.active && *gpu.frameskip.advice)
106 gpu.frameskip.active = 1;
107 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
108 gpu.frameskip.active = 1;
110 gpu.frameskip.active = 0;
112 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
114 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy);
115 gpu.frameskip.pending_fill[0] = 0;
119 static noinline int decide_frameskip_allow(uint32_t cmd_e3)
121 // no frameskip if it decides to draw to display area,
122 // but not for interlace since it'll most likely always do that
123 uint32_t x = cmd_e3 & 0x3ff;
124 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
125 gpu.frameskip.allow = (gpu.status & PSX_GPU_STATUS_INTERLACE) ||
126 (uint32_t)(x - gpu.screen.x) >= (uint32_t)gpu.screen.w ||
127 (uint32_t)(y - gpu.screen.y) >= (uint32_t)gpu.screen.h;
128 return gpu.frameskip.allow;
131 static noinline void get_gpu_info(uint32_t data)
133 switch (data & 0x0f) {
138 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
141 gpu.gp0 = gpu.ex_regs[5] & 0xfffff;
152 // double, for overdraw guard
153 #define VRAM_SIZE ((1024 * 512 * 2 * 2) + 4096)
155 // Minimum 16-byte VRAM alignment needed by gpu_unai's pixel-skipping
156 // renderer/downscaler it uses in high res modes:
158 // On GCW platform (MIPS), align to 8192 bytes (1 TLB entry) to reduce # of
159 // fills. (Will change this value if it ever gets large page support)
160 #define VRAM_ALIGN 8192
162 #define VRAM_ALIGN 16
165 // vram ptr received from mmap/malloc/alloc (will deallocate using this)
166 static uint16_t *vram_ptr_orig = NULL;
168 #ifdef GPULIB_USE_MMAP
169 static int map_vram(void)
171 gpu.vram = vram_ptr_orig = gpu.mmap(VRAM_SIZE + (VRAM_ALIGN-1));
172 if (gpu.vram != NULL) {
173 // 4kb guard in front
174 gpu.vram += (4096 / 2);
176 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
180 fprintf(stderr, "could not map vram, expect crashes\n");
185 static int map_vram(void)
187 gpu.vram = vram_ptr_orig = (uint16_t*)calloc(VRAM_SIZE + (VRAM_ALIGN-1), 1);
188 if (gpu.vram != NULL) {
189 // 4kb guard in front
190 gpu.vram += (4096 / 2);
192 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
195 fprintf(stderr, "could not allocate vram, expect crashes\n");
200 static int allocate_vram(void)
202 gpu.vram = vram_ptr_orig = (uint16_t*)calloc(VRAM_SIZE + (VRAM_ALIGN-1), 1);
203 if (gpu.vram != NULL) {
204 // 4kb guard in front
205 gpu.vram += (4096 / 2);
207 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
210 fprintf(stderr, "could not allocate vram, expect crashes\n");
218 #ifndef GPULIB_USE_MMAP
219 if (gpu.vram == NULL) {
220 if (allocate_vram() != 0) {
221 printf("ERROR: could not allocate VRAM, exiting..\n");
227 //extern uint32_t hSyncCount; // in psxcounters.cpp
228 //extern uint32_t frame_counter; // in psxcounters.cpp
229 //gpu.state.hcnt = &hSyncCount;
230 //gpu.state.frame_count = &frame_counter;
234 ret |= renderer_init();
236 gpu.state.frame_count = &gpu.zero;
237 gpu.state.hcnt = &gpu.zero;
238 gpu.frameskip.active = 0;
242 /*if (gpu.mmap != NULL) {
249 long GPUshutdown(void)
256 if (vram_ptr_orig != NULL) {
257 #ifdef GPULIB_USE_MMAP
258 gpu.munmap(vram_ptr_orig, VRAM_SIZE);
263 vram_ptr_orig = gpu.vram = NULL;
268 void GPUwriteStatus(uint32_t data)
270 //senquack TODO: Would it be wise to add cmd buffer flush here, since
271 // status settings can affect commands already in buffer?
273 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
274 static const short vres[4] = { 240, 480, 256, 480 };
275 uint32_t cmd = data >> 24;
277 if (cmd < ARRAY_SIZE(gpu.regs)) {
278 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
280 gpu.regs[cmd] = data;
283 gpu.state.fb_dirty = 1;
294 gpu.status |= PSX_GPU_STATUS_BLANKING;
296 gpu.status &= ~PSX_GPU_STATUS_BLANKING;
299 gpu.status &= ~PSX_GPU_STATUS_DMA_MASK;
300 gpu.status |= PSX_GPU_STATUS_DMA(data & 3);
303 gpu.screen.x = data & 0x3ff;
304 gpu.screen.y = (data >> 10) & 0x1ff;
305 if (gpu.frameskip.set) {
306 decide_frameskip_allow(gpu.ex_regs[3]);
307 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
309 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
314 gpu.screen.x1 = data & 0xfff;
315 gpu.screen.x2 = (data >> 12) & 0xfff;
319 gpu.screen.y1 = data & 0x3ff;
320 gpu.screen.y2 = (data >> 10) & 0x3ff;
324 gpu.status = (gpu.status & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
325 gpu.screen.hres = hres[(gpu.status >> 16) & 7];
326 gpu.screen.vres = vres[(gpu.status >> 19) & 3];
329 renderer_notify_res_change();
332 if ((cmd & 0xf0) == 0x10)
337 #ifdef GPUwriteStatus_ext
338 GPUwriteStatus_ext(data);
342 const unsigned char cmd_lengths[256] =
344 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
345 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
346 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
347 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
348 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
349 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
350 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
351 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
352 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
353 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
354 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
355 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
356 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
357 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
358 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
359 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
362 #define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
364 static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
366 uint16_t *vram = VRAM_MEM_XY(x, y);
368 memcpy(mem, vram, l * 2);
370 memcpy(vram, mem, l * 2);
373 static int do_vram_io(uint32_t *data, int count, int is_read)
375 int count_initial = count;
376 uint16_t *sdata = (uint16_t *)data;
377 int x = gpu.dma.x, y = gpu.dma.y;
378 int w = gpu.dma.w, h = gpu.dma.h;
379 int o = gpu.dma.offset;
381 count *= 2; // operate in 16bpp pixels
385 if (gpu.dma.offset) {
386 l = w - gpu.dma.offset;
390 do_vram_line(x + o, y, sdata, l, is_read);
403 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
405 do_vram_line(x, y, sdata, w, is_read);
411 do_vram_line(x, y, sdata, count, is_read);
417 finish_vram_transfer(is_read);
422 return count_initial - count / 2;
425 static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
428 log_anomaly("start_vram_transfer while old unfinished\n");
430 gpu.dma.x = pos_word & 0x3ff;
431 gpu.dma.y = (pos_word >> 16) & 0x1ff;
432 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
433 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
435 gpu.dma.is_read = is_read;
436 gpu.dma_start = gpu.dma;
438 renderer_flush_queues();
440 gpu.status |= PSX_GPU_STATUS_IMG;
441 // XXX: wrong for width 1
442 gpu.gp0 = LE32TOH(*(uint32_t *) VRAM_MEM_XY(gpu.dma.x, gpu.dma.y));
443 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
446 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
447 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
450 static void finish_vram_transfer(int is_read)
453 gpu.status &= ~PSX_GPU_STATUS_IMG;
455 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
456 gpu.dma_start.w, gpu.dma_start.h);
459 static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
461 int cmd = 0, pos = 0, len, dummy, v;
464 gpu.frameskip.pending_fill[0] = 0;
466 while (pos < count && skip) {
467 uint32_t *list = data + pos;
468 cmd = LE32TOH(list[0]) >> 24;
469 len = 1 + cmd_lengths[cmd];
473 if ((LE32TOH(list[2]) & 0x3ff) > gpu.screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu.screen.h)
474 // clearing something large, don't skip
475 do_cmd_list(list, 3, &dummy);
477 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
483 gpu.ex_regs[1] &= ~0x1ff;
484 gpu.ex_regs[1] |= LE32TOH(list[4 + ((cmd >> 4) & 1)]) & 0x1ff;
487 for (v = 3; pos + v < count; v++)
489 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
495 for (v = 4; pos + v < count; v += 2)
497 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
504 skip = decide_frameskip_allow(LE32TOH(list[0]));
505 if ((cmd & 0xf8) == 0xe0)
506 gpu.ex_regs[cmd & 7] = LE32TOH(list[0]);
510 if (pos + len > count) {
512 break; // incomplete cmd
514 if (0xa0 <= cmd && cmd <= 0xdf)
520 renderer_sync_ecmds(gpu.ex_regs);
525 static noinline int do_cmd_buffer(uint32_t *data, int count)
528 uint32_t old_e3 = gpu.ex_regs[3];
532 for (pos = 0; pos < count; )
534 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
536 pos += do_vram_io(data + pos, count - pos, 0);
541 cmd = LE32TOH(data[pos]) >> 24;
542 if (0xa0 <= cmd && cmd <= 0xdf) {
543 if (unlikely((pos+2) >= count)) {
544 // incomplete vram write/read cmd, can't consume yet
549 // consume vram write/read cmd
550 start_vram_transfer(LE32TOH(data[pos + 1]), LE32TOH(data[pos + 2]), (cmd & 0xe0) == 0xc0);
555 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
556 if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0))
557 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
559 pos += do_cmd_list(data + pos, count - pos, &cmd);
568 gpu.status &= ~0x1fff;
569 gpu.status |= gpu.ex_regs[1] & 0x7ff;
570 gpu.status |= (gpu.ex_regs[6] & 3) << 11;
572 gpu.state.fb_dirty |= vram_dirty;
574 if (old_e3 != gpu.ex_regs[3])
575 decide_frameskip_allow(gpu.ex_regs[3]);
580 static void flush_cmd_buffer(void)
582 int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
584 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
588 void GPUwriteDataMem(uint32_t *mem, int count)
592 log_io("gpu_dma_write %p %d\n", mem, count);
594 if (unlikely(gpu.cmd_len > 0))
597 left = do_cmd_buffer(mem, count);
599 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
602 void GPUwriteData(uint32_t data)
604 log_io("gpu_write %08x\n", data);
605 gpu.cmd_buffer[gpu.cmd_len++] = HTOLE32(data);
606 if (gpu.cmd_len >= CMD_BUFFER_LEN)
610 long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
612 uint32_t addr, *list, ld_addr = 0;
613 int len, left, count;
616 preload(rambase + (start_addr & 0x1fffff) / 4);
618 if (unlikely(gpu.cmd_len > 0))
621 log_io("gpu_dma_chain\n");
622 addr = start_addr & 0xffffff;
623 for (count = 0; (addr & 0x800000) == 0; count++)
625 list = rambase + (addr & 0x1fffff) / 4;
626 len = LE32TOH(list[0]) >> 24;
627 addr = LE32TOH(list[0]) & 0xffffff;
628 preload(rambase + (addr & 0x1fffff) / 4);
632 cpu_cycles += 5 + len;
634 log_io(".chain %08x #%d\n", (list - rambase) * 4, len);
637 left = do_cmd_buffer(list + 1, len);
639 log_anomaly("GPUdmaChain: discarded %d/%d words\n", left, len);
642 #define LD_THRESHOLD (8*1024)
643 if (count >= LD_THRESHOLD) {
644 if (count == LD_THRESHOLD) {
649 // loop detection marker
650 // (bit23 set causes DMA error on real machine, so
651 // unlikely to be ever set by the game)
652 list[0] |= HTOLE32(0x800000);
657 // remove loop detection markers
658 count -= LD_THRESHOLD + 2;
659 addr = ld_addr & 0x1fffff;
660 while (count-- > 0) {
661 list = rambase + addr / 4;
662 addr = LE32TOH(list[0]) & 0x1fffff;
663 list[0] &= HTOLE32(~0x800000);
667 gpu.state.last_list.frame = *gpu.state.frame_count;
668 gpu.state.last_list.hcnt = *gpu.state.hcnt;
669 gpu.state.last_list.cycles = cpu_cycles;
670 gpu.state.last_list.addr = start_addr;
675 void GPUreadDataMem(uint32_t *mem, int count)
677 log_io("gpu_dma_read %p %d\n", mem, count);
679 if (unlikely(gpu.cmd_len > 0))
683 do_vram_io(mem, count, 1);
686 uint32_t GPUreadData(void)
690 if (unlikely(gpu.cmd_len > 0))
696 do_vram_io(&ret, 1, 1);
700 log_io("gpu_read %08x\n", ret);
704 uint32_t GPUreadStatus(void)
708 if (unlikely(gpu.cmd_len > 0))
712 log_io("gpu_read_status %08x\n", ret);
718 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
719 uint32_t ulStatus; // current gpu status
720 uint32_t ulControl[256]; // latest control register values
721 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
724 long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
734 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
735 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
736 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
737 freeze->ulStatus = gpu.status;
741 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
742 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
743 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
744 gpu.status = freeze->ulStatus;
746 for (i = 8; i > 0; i--) {
747 gpu.regs[i] ^= 1; // avoid reg change detection
748 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
750 renderer_sync_ecmds(gpu.ex_regs);
751 renderer_update_caches(0, 0, 1024, 512);
758 void GPUupdateLace(void)
762 renderer_flush_queues();
764 if (gpu.status & PSX_GPU_STATUS_BLANKING) {
765 if (!gpu.state.blanked) {
767 gpu.state.blanked = 1;
768 gpu.state.fb_dirty = 1;
773 renderer_notify_update_lace(0);
775 if (!gpu.state.fb_dirty)
778 if (gpu.frameskip.set) {
779 if (!gpu.frameskip.frame_ready) {
780 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
782 gpu.frameskip.active = 0;
784 gpu.frameskip.frame_ready = 0;
788 gpu.state.fb_dirty = 0;
789 gpu.state.blanked = 0;
790 renderer_notify_update_lace(1);
793 void GPUvBlank(int is_vblank, int lcf)
795 int interlace = gpu.state.allow_interlace
796 && (gpu.status & PSX_GPU_STATUS_INTERLACE)
797 && (gpu.status & PSX_GPU_STATUS_DHEIGHT);
798 // interlace doesn't look nice on progressive displays,
799 // so we have this "auto" mode here for games that don't read vram
800 if (gpu.state.allow_interlace == 2
801 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
805 if (interlace || interlace != gpu.state.old_interlace) {
806 gpu.state.old_interlace = interlace;
810 renderer_flush_queues();
811 renderer_set_interlace(interlace, !lcf);
815 #include "../../frontend/plugin_lib.h"
817 void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
819 gpu.frameskip.set = cbs->frameskip;
820 gpu.frameskip.advice = &cbs->fskip_advice;
821 gpu.frameskip.force = &cbs->fskip_force;
822 gpu.frameskip.dirty = &cbs->fskip_dirty;
823 gpu.frameskip.active = 0;
824 gpu.frameskip.frame_ready = 1;
825 gpu.state.hcnt = cbs->gpu_hcnt;
826 gpu.state.frame_count = cbs->gpu_frame_count;
827 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
828 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
830 gpu.useDithering = cbs->gpu_neon.allow_dithering;
831 gpu.mmap = cbs->mmap;
832 gpu.munmap = cbs->munmap;
835 if (gpu.vram == NULL)
838 if (cbs->pl_vout_set_raw_vram)
839 cbs->pl_vout_set_raw_vram(gpu.vram);
840 renderer_set_config(cbs);
841 vout_set_config(cbs);
844 // vim:shiftwidth=2:expandtab