2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
4 * This work is licensed under the terms of any of these licenses
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
13 #include <stdlib.h> /* for calloc */
17 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
19 #define unlikely(x) __builtin_expect((x), 0)
20 #define preload __builtin_prefetch
21 #define noinline __attribute__((noinline))
28 #define gpu_log(fmt, ...) \
29 printf("%d:%03d: " fmt, *gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__)
31 //#define log_io gpu_log
33 //#define log_anomaly gpu_log
34 #define log_anomaly(...)
38 static noinline int do_cmd_buffer(uint32_t *data, int count);
39 static void finish_vram_transfer(int is_read);
41 static noinline void do_cmd_reset(void)
45 if (unlikely(gpu.cmd_len > 0))
46 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
49 if (unlikely(gpu.dma.h > 0))
50 finish_vram_transfer(gpu.dma_start.is_read);
54 static noinline void do_reset(void)
59 memset(gpu.regs, 0, sizeof(gpu.regs));
60 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
61 gpu.ex_regs[i] = (0xe0 + i) << 24;
62 gpu.status.reg = 0x14802000;
65 gpu.screen.hres = gpu.screen.w = 256;
66 gpu.screen.vres = gpu.screen.h = 240;
69 static noinline void update_width(void)
71 int sw = gpu.screen.x2 - gpu.screen.x1;
72 if (sw <= 0 || sw >= 2560)
74 gpu.screen.w = gpu.screen.hres;
76 gpu.screen.w = sw * gpu.screen.hres / 2560;
79 static noinline void update_height(void)
81 // TODO: emulate this properly..
82 int sh = gpu.screen.y2 - gpu.screen.y1;
83 if (gpu.status.dheight)
85 if (sh <= 0 || sh > gpu.screen.vres)
91 static noinline void decide_frameskip(void)
93 *gpu.frameskip.dirty = 1;
95 if (gpu.frameskip.active)
98 gpu.frameskip.cnt = 0;
99 gpu.frameskip.frame_ready = 1;
102 if (*gpu.frameskip.force)
103 gpu.frameskip.active = 1;
104 else if (!gpu.frameskip.active && *gpu.frameskip.advice)
105 gpu.frameskip.active = 1;
106 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
107 gpu.frameskip.active = 1;
109 gpu.frameskip.active = 0;
111 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
113 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy);
114 gpu.frameskip.pending_fill[0] = 0;
118 static noinline int decide_frameskip_allow(uint32_t cmd_e3)
120 // no frameskip if it decides to draw to display area,
121 // but not for interlace since it'll most likely always do that
122 uint32_t x = cmd_e3 & 0x3ff;
123 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
124 gpu.frameskip.allow = gpu.status.interlace ||
125 (uint32_t)(x - gpu.screen.x) >= (uint32_t)gpu.screen.w ||
126 (uint32_t)(y - gpu.screen.y) >= (uint32_t)gpu.screen.h;
127 return gpu.frameskip.allow;
130 static noinline void get_gpu_info(uint32_t data)
132 switch (data & 0x0f) {
136 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
140 gpu.gp0 = gpu.ex_regs[5] & 0x3fffff;
151 // double, for overdraw guard
152 #define VRAM_SIZE ((1024 * 512 * 2 * 2) + 4096)
154 // Minimum 16-byte VRAM alignment needed by gpu_unai's pixel-skipping
155 // renderer/downscaler it uses in high res modes:
157 // On GCW platform (MIPS), align to 8192 bytes (1 TLB entry) to reduce # of
158 // fills. (Will change this value if it ever gets large page support)
159 #define VRAM_ALIGN 8192
161 #define VRAM_ALIGN 16
164 // vram ptr received from mmap/malloc/alloc (will deallocate using this)
165 static uint16_t *vram_ptr_orig = NULL;
167 #ifdef GPULIB_USE_MMAP
168 static int map_vram(void)
170 gpu.vram = vram_ptr_orig = gpu.mmap(VRAM_SIZE + (VRAM_ALIGN-1));
171 if (gpu.vram != NULL) {
172 // 4kb guard in front
173 gpu.vram += (4096 / 2);
175 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
179 fprintf(stderr, "could not map vram, expect crashes\n");
184 static int map_vram(void)
186 gpu.vram = vram_ptr_orig = (uint16_t*)calloc(VRAM_SIZE + (VRAM_ALIGN-1), 1);
187 if (gpu.vram != NULL) {
188 // 4kb guard in front
189 gpu.vram += (4096 / 2);
191 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
194 fprintf(stderr, "could not allocate vram, expect crashes\n");
199 static int allocate_vram(void)
201 gpu.vram = vram_ptr_orig = (uint16_t*)calloc(VRAM_SIZE + (VRAM_ALIGN-1), 1);
202 if (gpu.vram != NULL) {
203 // 4kb guard in front
204 gpu.vram += (4096 / 2);
206 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
209 fprintf(stderr, "could not allocate vram, expect crashes\n");
217 #ifndef GPULIB_USE_MMAP
218 if (gpu.vram == NULL) {
219 if (allocate_vram() != 0) {
220 printf("ERROR: could not allocate VRAM, exiting..\n");
226 //extern uint32_t hSyncCount; // in psxcounters.cpp
227 //extern uint32_t frame_counter; // in psxcounters.cpp
228 //gpu.state.hcnt = &hSyncCount;
229 //gpu.state.frame_count = &frame_counter;
233 ret |= renderer_init();
235 gpu.state.frame_count = &gpu.zero;
236 gpu.state.hcnt = &gpu.zero;
237 gpu.frameskip.active = 0;
241 /*if (gpu.mmap != NULL) {
248 long GPUshutdown(void)
255 if (vram_ptr_orig != NULL) {
256 #ifdef GPULIB_USE_MMAP
257 gpu.munmap(vram_ptr_orig, VRAM_SIZE);
262 vram_ptr_orig = gpu.vram = NULL;
267 void GPUwriteStatus(uint32_t data)
269 //senquack TODO: Would it be wise to add cmd buffer flush here, since
270 // status settings can affect commands already in buffer?
272 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
273 static const short vres[4] = { 240, 480, 256, 480 };
274 uint32_t cmd = data >> 24;
276 if (cmd < ARRAY_SIZE(gpu.regs)) {
277 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
279 gpu.regs[cmd] = data;
282 gpu.state.fb_dirty = 1;
292 gpu.status.blanking = data & 1;
295 gpu.status.dma = data & 3;
298 gpu.screen.x = data & 0x3ff;
299 gpu.screen.y = (data >> 10) & 0x1ff;
300 if (gpu.frameskip.set) {
301 decide_frameskip_allow(gpu.ex_regs[3]);
302 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
304 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
309 gpu.screen.x1 = data & 0xfff;
310 gpu.screen.x2 = (data >> 12) & 0xfff;
314 gpu.screen.y1 = data & 0x3ff;
315 gpu.screen.y2 = (data >> 10) & 0x3ff;
319 gpu.status.reg = (gpu.status.reg & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
320 gpu.screen.hres = hres[(gpu.status.reg >> 16) & 7];
321 gpu.screen.vres = vres[(gpu.status.reg >> 19) & 3];
324 renderer_notify_res_change();
327 if ((cmd & 0xf0) == 0x10)
332 #ifdef GPUwriteStatus_ext
333 GPUwriteStatus_ext(data);
337 const unsigned char cmd_lengths[256] =
339 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
340 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
341 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
342 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
343 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
344 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
345 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
346 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
347 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
348 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
349 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
350 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
351 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
352 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
353 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
354 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
357 #define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
359 static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
361 uint16_t *vram = VRAM_MEM_XY(x, y);
363 memcpy(mem, vram, l * 2);
365 memcpy(vram, mem, l * 2);
368 static int do_vram_io(uint32_t *data, int count, int is_read)
370 int count_initial = count;
371 uint16_t *sdata = (uint16_t *)data;
372 int x = gpu.dma.x, y = gpu.dma.y;
373 int w = gpu.dma.w, h = gpu.dma.h;
374 int o = gpu.dma.offset;
376 count *= 2; // operate in 16bpp pixels
380 if (gpu.dma.offset) {
381 l = w - gpu.dma.offset;
385 do_vram_line(x + o, y, sdata, l, is_read);
398 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
400 do_vram_line(x, y, sdata, w, is_read);
406 do_vram_line(x, y, sdata, count, is_read);
412 finish_vram_transfer(is_read);
417 return count_initial - count / 2;
420 static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
423 log_anomaly("start_vram_transfer while old unfinished\n");
425 gpu.dma.x = pos_word & 0x3ff;
426 gpu.dma.y = (pos_word >> 16) & 0x1ff;
427 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
428 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
430 gpu.dma.is_read = is_read;
431 gpu.dma_start = gpu.dma;
433 renderer_flush_queues();
436 // XXX: wrong for width 1
437 memcpy(&gpu.gp0, VRAM_MEM_XY(gpu.dma.x, gpu.dma.y), 4);
438 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
441 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
442 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
445 static void finish_vram_transfer(int is_read)
450 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
451 gpu.dma_start.w, gpu.dma_start.h);
454 static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
456 int cmd = 0, pos = 0, len, dummy, v;
459 gpu.frameskip.pending_fill[0] = 0;
461 while (pos < count && skip) {
462 uint32_t *list = data + pos;
464 len = 1 + cmd_lengths[cmd];
468 if ((int)(list[2] & 0x3ff) > gpu.screen.w || (int)((list[2] >> 16) & 0x1ff) > gpu.screen.h)
469 // clearing something large, don't skip
470 do_cmd_list(list, 3, &dummy);
472 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
478 gpu.ex_regs[1] &= ~0x1ff;
479 gpu.ex_regs[1] |= list[4 + ((cmd >> 4) & 1)] & 0x1ff;
482 for (v = 3; pos + v < count; v++)
484 if ((list[v] & 0xf000f000) == 0x50005000)
490 for (v = 4; pos + v < count; v += 2)
492 if ((list[v] & 0xf000f000) == 0x50005000)
499 skip = decide_frameskip_allow(list[0]);
500 if ((cmd & 0xf8) == 0xe0)
501 gpu.ex_regs[cmd & 7] = list[0];
505 if (pos + len > count) {
507 break; // incomplete cmd
509 if (0xa0 <= cmd && cmd <= 0xdf)
515 renderer_sync_ecmds(gpu.ex_regs);
520 static noinline int do_cmd_buffer(uint32_t *data, int count)
523 uint32_t old_e3 = gpu.ex_regs[3];
527 for (pos = 0; pos < count; )
529 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
531 pos += do_vram_io(data + pos, count - pos, 0);
536 cmd = data[pos] >> 24;
537 if (0xa0 <= cmd && cmd <= 0xdf) {
538 if (unlikely((pos+2) >= count)) {
539 // incomplete vram write/read cmd, can't consume yet
544 // consume vram write/read cmd
545 start_vram_transfer(data[pos + 1], data[pos + 2], (cmd & 0xe0) == 0xc0);
550 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
551 if (gpu.frameskip.active && (gpu.frameskip.allow || ((data[pos] >> 24) & 0xf0) == 0xe0))
552 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
554 pos += do_cmd_list(data + pos, count - pos, &cmd);
563 gpu.status.reg &= ~0x1fff;
564 gpu.status.reg |= gpu.ex_regs[1] & 0x7ff;
565 gpu.status.reg |= (gpu.ex_regs[6] & 3) << 11;
567 gpu.state.fb_dirty |= vram_dirty;
569 if (old_e3 != gpu.ex_regs[3])
570 decide_frameskip_allow(gpu.ex_regs[3]);
575 static void flush_cmd_buffer(void)
577 int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
579 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
583 void GPUwriteDataMem(uint32_t *mem, int count)
587 log_io("gpu_dma_write %p %d\n", mem, count);
589 if (unlikely(gpu.cmd_len > 0))
592 left = do_cmd_buffer(mem, count);
594 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
597 void GPUwriteData(uint32_t data)
599 log_io("gpu_write %08x\n", data);
600 gpu.cmd_buffer[gpu.cmd_len++] = data;
601 if (gpu.cmd_len >= CMD_BUFFER_LEN)
605 long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
607 uint32_t addr, *list, ld_addr = 0;
608 int len, left, count;
611 preload(rambase + (start_addr & 0x1fffff) / 4);
613 if (unlikely(gpu.cmd_len > 0))
616 log_io("gpu_dma_chain\n");
617 addr = start_addr & 0xffffff;
618 for (count = 0; (addr & 0x800000) == 0; count++)
620 list = rambase + (addr & 0x1fffff) / 4;
622 addr = list[0] & 0xffffff;
623 preload(rambase + (addr & 0x1fffff) / 4);
627 cpu_cycles += 5 + len;
629 log_io(".chain %08x #%d\n", (list - rambase) * 4, len);
632 left = do_cmd_buffer(list + 1, len);
634 log_anomaly("GPUdmaChain: discarded %d/%d words\n", left, len);
637 #define LD_THRESHOLD (8*1024)
638 if (count >= LD_THRESHOLD) {
639 if (count == LD_THRESHOLD) {
644 // loop detection marker
645 // (bit23 set causes DMA error on real machine, so
646 // unlikely to be ever set by the game)
652 // remove loop detection markers
653 count -= LD_THRESHOLD + 2;
654 addr = ld_addr & 0x1fffff;
655 while (count-- > 0) {
656 list = rambase + addr / 4;
657 addr = list[0] & 0x1fffff;
658 list[0] &= ~0x800000;
662 gpu.state.last_list.frame = *gpu.state.frame_count;
663 gpu.state.last_list.hcnt = *gpu.state.hcnt;
664 gpu.state.last_list.cycles = cpu_cycles;
665 gpu.state.last_list.addr = start_addr;
670 void GPUreadDataMem(uint32_t *mem, int count)
672 log_io("gpu_dma_read %p %d\n", mem, count);
674 if (unlikely(gpu.cmd_len > 0))
678 do_vram_io(mem, count, 1);
681 uint32_t GPUreadData(void)
685 if (unlikely(gpu.cmd_len > 0))
690 do_vram_io(&ret, 1, 1);
692 log_io("gpu_read %08x\n", ret);
696 uint32_t GPUreadStatus(void)
700 if (unlikely(gpu.cmd_len > 0))
703 ret = gpu.status.reg;
704 log_io("gpu_read_status %08x\n", ret);
710 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
711 uint32_t ulStatus; // current gpu status
712 uint32_t ulControl[256]; // latest control register values
713 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
716 long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
726 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
727 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
728 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
729 freeze->ulStatus = gpu.status.reg;
733 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
734 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
735 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
736 gpu.status.reg = freeze->ulStatus;
738 for (i = 8; i > 0; i--) {
739 gpu.regs[i] ^= 1; // avoid reg change detection
740 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
742 renderer_sync_ecmds(gpu.ex_regs);
743 renderer_update_caches(0, 0, 1024, 512);
750 void GPUupdateLace(void)
754 renderer_flush_queues();
756 if (gpu.status.blanking) {
757 if (!gpu.state.blanked) {
759 gpu.state.blanked = 1;
760 gpu.state.fb_dirty = 1;
765 renderer_notify_update_lace(0);
767 if (!gpu.state.fb_dirty)
770 if (gpu.frameskip.set) {
771 if (!gpu.frameskip.frame_ready) {
772 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
774 gpu.frameskip.active = 0;
776 gpu.frameskip.frame_ready = 0;
780 gpu.state.fb_dirty = 0;
781 gpu.state.blanked = 0;
782 renderer_notify_update_lace(1);
785 void GPUvBlank(int is_vblank, int lcf)
787 int interlace = gpu.state.allow_interlace
788 && gpu.status.interlace && gpu.status.dheight;
789 // interlace doesn't look nice on progressive displays,
790 // so we have this "auto" mode here for games that don't read vram
791 if (gpu.state.allow_interlace == 2
792 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
796 if (interlace || interlace != gpu.state.old_interlace) {
797 gpu.state.old_interlace = interlace;
801 renderer_flush_queues();
802 renderer_set_interlace(interlace, !lcf);
806 #include "../../frontend/plugin_lib.h"
808 void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
810 gpu.frameskip.set = cbs->frameskip;
811 gpu.frameskip.advice = &cbs->fskip_advice;
812 gpu.frameskip.force = &cbs->fskip_force;
813 gpu.frameskip.dirty = &cbs->fskip_dirty;
814 gpu.frameskip.active = 0;
815 gpu.frameskip.frame_ready = 1;
816 gpu.state.hcnt = cbs->gpu_hcnt;
817 gpu.state.frame_count = cbs->gpu_frame_count;
818 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
819 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
821 gpu.useDithering = cbs->gpu_neon.allow_dithering;
822 gpu.mmap = cbs->mmap;
823 gpu.munmap = cbs->munmap;
826 if (gpu.vram == NULL)
829 if (cbs->pl_vout_set_raw_vram)
830 cbs->pl_vout_set_raw_vram(gpu.vram);
831 renderer_set_config(cbs);
832 vout_set_config(cbs);
835 // vim:shiftwidth=2:expandtab