2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
4 * This work is licensed under the terms of any of these licenses
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
13 #include <stdlib.h> /* for calloc */
17 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
19 #define unlikely(x) __builtin_expect((x), 0)
20 #define preload __builtin_prefetch
21 #define noinline __attribute__((noinline))
28 #define gpu_log(fmt, ...) \
29 printf("%d:%03d: " fmt, *gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__)
31 //#define log_io gpu_log
33 //#define log_anomaly gpu_log
34 #define log_anomaly(...)
38 static noinline int do_cmd_buffer(uint32_t *data, int count);
39 static void finish_vram_transfer(int is_read);
41 static noinline void do_cmd_reset(void)
45 if (unlikely(gpu.cmd_len > 0))
46 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
49 if (unlikely(gpu.dma.h > 0))
50 finish_vram_transfer(gpu.dma_start.is_read);
54 static noinline void do_reset(void)
59 memset(gpu.regs, 0, sizeof(gpu.regs));
60 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
61 gpu.ex_regs[i] = (0xe0 + i) << 24;
62 gpu.status.reg = 0x14802000;
65 gpu.screen.hres = gpu.screen.w = 256;
66 gpu.screen.vres = gpu.screen.h = 240;
69 static noinline void update_width(void)
71 int sw = gpu.screen.x2 - gpu.screen.x1;
72 if (sw <= 0 || sw >= 2560)
74 gpu.screen.w = gpu.screen.hres;
76 gpu.screen.w = sw * gpu.screen.hres / 2560;
79 static noinline void update_height(void)
81 // TODO: emulate this properly..
82 int sh = gpu.screen.y2 - gpu.screen.y1;
83 if (gpu.status.dheight)
85 if (sh <= 0 || sh > gpu.screen.vres)
91 static noinline void decide_frameskip(void)
93 if (gpu.frameskip.active)
96 gpu.frameskip.cnt = 0;
97 gpu.frameskip.frame_ready = 1;
100 if (!gpu.frameskip.active && *gpu.frameskip.advice)
101 gpu.frameskip.active = 1;
102 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
103 gpu.frameskip.active = 1;
105 gpu.frameskip.active = 0;
107 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
109 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy);
110 gpu.frameskip.pending_fill[0] = 0;
114 static noinline int decide_frameskip_allow(uint32_t cmd_e3)
116 // no frameskip if it decides to draw to display area,
117 // but not for interlace since it'll most likely always do that
118 uint32_t x = cmd_e3 & 0x3ff;
119 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
120 gpu.frameskip.allow = gpu.status.interlace ||
121 (uint32_t)(x - gpu.screen.x) >= (uint32_t)gpu.screen.w ||
122 (uint32_t)(y - gpu.screen.y) >= (uint32_t)gpu.screen.h;
123 return gpu.frameskip.allow;
126 static noinline void get_gpu_info(uint32_t data)
128 switch (data & 0x0f) {
132 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
136 gpu.gp0 = gpu.ex_regs[5] & 0x3fffff;
147 // double, for overdraw guard
148 #define VRAM_SIZE ((1024 * 512 * 2 * 2) + 4096)
150 // Minimum 16-byte VRAM alignment needed by gpu_unai's pixel-skipping
151 // renderer/downscaler it uses in high res modes:
153 // On GCW platform (MIPS), align to 8192 bytes (1 TLB entry) to reduce # of
154 // fills. (Will change this value if it ever gets large page support)
155 #define VRAM_ALIGN 8192
157 #define VRAM_ALIGN 16
160 // vram ptr received from mmap/malloc/alloc (will deallocate using this)
161 static uint16_t *vram_ptr_orig = NULL;
163 #ifdef GPULIB_USE_MMAP
164 static int map_vram(void)
166 gpu.vram = vram_ptr_orig = gpu.mmap(VRAM_SIZE + (VRAM_ALIGN-1));
167 if (gpu.vram != NULL) {
168 // 4kb guard in front
169 gpu.vram += (4096 / 2);
171 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
175 fprintf(stderr, "could not map vram, expect crashes\n");
180 static int map_vram(void)
182 gpu.vram = vram_ptr_orig = (uint16_t*)calloc(VRAM_SIZE + (VRAM_ALIGN-1), 1);
183 if (gpu.vram != NULL) {
184 // 4kb guard in front
185 gpu.vram += (4096 / 2);
187 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
190 fprintf(stderr, "could not allocate vram, expect crashes\n");
195 static int allocate_vram(void)
197 gpu.vram = vram_ptr_orig = (uint16_t*)calloc(VRAM_SIZE + (VRAM_ALIGN-1), 1);
198 if (gpu.vram != NULL) {
199 // 4kb guard in front
200 gpu.vram += (4096 / 2);
202 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
205 fprintf(stderr, "could not allocate vram, expect crashes\n");
213 #ifndef GPULIB_USE_MMAP
214 if (gpu.vram == NULL) {
215 if (allocate_vram() != 0) {
216 printf("ERROR: could not allocate VRAM, exiting..\n");
222 //extern uint32_t hSyncCount; // in psxcounters.cpp
223 //extern uint32_t frame_counter; // in psxcounters.cpp
224 //gpu.state.hcnt = &hSyncCount;
225 //gpu.state.frame_count = &frame_counter;
229 ret |= renderer_init();
231 gpu.state.frame_count = &gpu.zero;
232 gpu.state.hcnt = &gpu.zero;
233 gpu.frameskip.active = 0;
237 /*if (gpu.mmap != NULL) {
244 long GPUshutdown(void)
251 if (vram_ptr_orig != NULL) {
252 #ifdef GPULIB_USE_MMAP
253 gpu.munmap(vram_ptr_orig, VRAM_SIZE);
258 vram_ptr_orig = gpu.vram = NULL;
263 void GPUwriteStatus(uint32_t data)
265 //senquack TODO: Would it be wise to add cmd buffer flush here, since
266 // status settings can affect commands already in buffer?
268 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
269 static const short vres[4] = { 240, 480, 256, 480 };
270 uint32_t cmd = data >> 24;
272 if (cmd < ARRAY_SIZE(gpu.regs)) {
273 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
275 gpu.regs[cmd] = data;
278 gpu.state.fb_dirty = 1;
288 gpu.status.blanking = data & 1;
291 gpu.status.dma = data & 3;
294 gpu.screen.x = data & 0x3ff;
295 gpu.screen.y = (data >> 10) & 0x1ff;
296 if (gpu.frameskip.set) {
297 decide_frameskip_allow(gpu.ex_regs[3]);
298 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
300 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
305 gpu.screen.x1 = data & 0xfff;
306 gpu.screen.x2 = (data >> 12) & 0xfff;
310 gpu.screen.y1 = data & 0x3ff;
311 gpu.screen.y2 = (data >> 10) & 0x3ff;
315 gpu.status.reg = (gpu.status.reg & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
316 gpu.screen.hres = hres[(gpu.status.reg >> 16) & 7];
317 gpu.screen.vres = vres[(gpu.status.reg >> 19) & 3];
320 renderer_notify_res_change();
323 if ((cmd & 0xf0) == 0x10)
328 #ifdef GPUwriteStatus_ext
329 GPUwriteStatus_ext(data);
333 const unsigned char cmd_lengths[256] =
335 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
336 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
337 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
338 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
339 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
340 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
341 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
342 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
343 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
344 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
345 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
346 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
347 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
348 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
349 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
350 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
353 #define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
355 static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
357 uint16_t *vram = VRAM_MEM_XY(x, y);
359 memcpy(mem, vram, l * 2);
361 memcpy(vram, mem, l * 2);
364 static int do_vram_io(uint32_t *data, int count, int is_read)
366 int count_initial = count;
367 uint16_t *sdata = (uint16_t *)data;
368 int x = gpu.dma.x, y = gpu.dma.y;
369 int w = gpu.dma.w, h = gpu.dma.h;
370 int o = gpu.dma.offset;
372 count *= 2; // operate in 16bpp pixels
376 if (gpu.dma.offset) {
377 l = w - gpu.dma.offset;
381 do_vram_line(x + o, y, sdata, l, is_read);
394 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
396 do_vram_line(x, y, sdata, w, is_read);
402 do_vram_line(x, y, sdata, count, is_read);
408 finish_vram_transfer(is_read);
413 return count_initial - count / 2;
416 static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
419 log_anomaly("start_vram_transfer while old unfinished\n");
421 gpu.dma.x = pos_word & 0x3ff;
422 gpu.dma.y = (pos_word >> 16) & 0x1ff;
423 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
424 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
426 gpu.dma.is_read = is_read;
427 gpu.dma_start = gpu.dma;
429 renderer_flush_queues();
432 // XXX: wrong for width 1
433 memcpy(&gpu.gp0, VRAM_MEM_XY(gpu.dma.x, gpu.dma.y), 4);
434 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
437 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
438 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
441 static void finish_vram_transfer(int is_read)
446 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
447 gpu.dma_start.w, gpu.dma_start.h);
450 static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
452 int cmd = 0, pos = 0, len, dummy, v;
455 gpu.frameskip.pending_fill[0] = 0;
457 while (pos < count && skip) {
458 uint32_t *list = data + pos;
460 len = 1 + cmd_lengths[cmd];
464 if ((int)(list[2] & 0x3ff) > gpu.screen.w || (int)((list[2] >> 16) & 0x1ff) > gpu.screen.h)
465 // clearing something large, don't skip
466 do_cmd_list(list, 3, &dummy);
468 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
474 gpu.ex_regs[1] &= ~0x1ff;
475 gpu.ex_regs[1] |= list[4 + ((cmd >> 4) & 1)] & 0x1ff;
478 for (v = 3; pos + v < count; v++)
480 if ((list[v] & 0xf000f000) == 0x50005000)
486 for (v = 4; pos + v < count; v += 2)
488 if ((list[v] & 0xf000f000) == 0x50005000)
495 skip = decide_frameskip_allow(list[0]);
496 if ((cmd & 0xf8) == 0xe0)
497 gpu.ex_regs[cmd & 7] = list[0];
501 if (pos + len > count) {
503 break; // incomplete cmd
505 if (0xa0 <= cmd && cmd <= 0xdf)
511 renderer_sync_ecmds(gpu.ex_regs);
516 static noinline int do_cmd_buffer(uint32_t *data, int count)
519 uint32_t old_e3 = gpu.ex_regs[3];
523 for (pos = 0; pos < count; )
525 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
527 pos += do_vram_io(data + pos, count - pos, 0);
532 cmd = data[pos] >> 24;
533 if (0xa0 <= cmd && cmd <= 0xdf) {
534 if (unlikely((pos+2) >= count)) {
535 // incomplete vram write/read cmd, can't consume yet
540 // consume vram write/read cmd
541 start_vram_transfer(data[pos + 1], data[pos + 2], (cmd & 0xe0) == 0xc0);
546 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
547 if (gpu.frameskip.active && (gpu.frameskip.allow || ((data[pos] >> 24) & 0xf0) == 0xe0))
548 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
550 pos += do_cmd_list(data + pos, count - pos, &cmd);
559 gpu.status.reg &= ~0x1fff;
560 gpu.status.reg |= gpu.ex_regs[1] & 0x7ff;
561 gpu.status.reg |= (gpu.ex_regs[6] & 3) << 11;
563 gpu.state.fb_dirty |= vram_dirty;
565 if (old_e3 != gpu.ex_regs[3])
566 decide_frameskip_allow(gpu.ex_regs[3]);
571 static void flush_cmd_buffer(void)
573 int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
575 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
579 void GPUwriteDataMem(uint32_t *mem, int count)
583 log_io("gpu_dma_write %p %d\n", mem, count);
585 if (unlikely(gpu.cmd_len > 0))
588 left = do_cmd_buffer(mem, count);
590 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
593 void GPUwriteData(uint32_t data)
595 log_io("gpu_write %08x\n", data);
596 gpu.cmd_buffer[gpu.cmd_len++] = data;
597 if (gpu.cmd_len >= CMD_BUFFER_LEN)
601 long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
603 uint32_t addr, *list, ld_addr = 0;
604 int len, left, count;
607 preload(rambase + (start_addr & 0x1fffff) / 4);
609 if (unlikely(gpu.cmd_len > 0))
612 log_io("gpu_dma_chain\n");
613 addr = start_addr & 0xffffff;
614 for (count = 0; (addr & 0x800000) == 0; count++)
616 list = rambase + (addr & 0x1fffff) / 4;
618 addr = list[0] & 0xffffff;
619 preload(rambase + (addr & 0x1fffff) / 4);
623 cpu_cycles += 5 + len;
625 log_io(".chain %08x #%d\n", (list - rambase) * 4, len);
628 left = do_cmd_buffer(list + 1, len);
630 log_anomaly("GPUdmaChain: discarded %d/%d words\n", left, len);
633 #define LD_THRESHOLD (8*1024)
634 if (count >= LD_THRESHOLD) {
635 if (count == LD_THRESHOLD) {
640 // loop detection marker
641 // (bit23 set causes DMA error on real machine, so
642 // unlikely to be ever set by the game)
648 // remove loop detection markers
649 count -= LD_THRESHOLD + 2;
650 addr = ld_addr & 0x1fffff;
651 while (count-- > 0) {
652 list = rambase + addr / 4;
653 addr = list[0] & 0x1fffff;
654 list[0] &= ~0x800000;
658 gpu.state.last_list.frame = *gpu.state.frame_count;
659 gpu.state.last_list.hcnt = *gpu.state.hcnt;
660 gpu.state.last_list.cycles = cpu_cycles;
661 gpu.state.last_list.addr = start_addr;
666 void GPUreadDataMem(uint32_t *mem, int count)
668 log_io("gpu_dma_read %p %d\n", mem, count);
670 if (unlikely(gpu.cmd_len > 0))
674 do_vram_io(mem, count, 1);
677 uint32_t GPUreadData(void)
681 if (unlikely(gpu.cmd_len > 0))
686 do_vram_io(&ret, 1, 1);
688 log_io("gpu_read %08x\n", ret);
692 uint32_t GPUreadStatus(void)
696 if (unlikely(gpu.cmd_len > 0))
699 ret = gpu.status.reg;
700 log_io("gpu_read_status %08x\n", ret);
706 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
707 uint32_t ulStatus; // current gpu status
708 uint32_t ulControl[256]; // latest control register values
709 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
712 long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
722 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
723 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
724 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
725 freeze->ulStatus = gpu.status.reg;
729 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
730 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
731 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
732 gpu.status.reg = freeze->ulStatus;
734 for (i = 8; i > 0; i--) {
735 gpu.regs[i] ^= 1; // avoid reg change detection
736 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
738 renderer_sync_ecmds(gpu.ex_regs);
739 renderer_update_caches(0, 0, 1024, 512);
746 void GPUupdateLace(void)
750 renderer_flush_queues();
752 if (gpu.status.blanking) {
753 if (!gpu.state.blanked) {
755 gpu.state.blanked = 1;
756 gpu.state.fb_dirty = 1;
761 renderer_notify_update_lace(0);
763 if (!gpu.state.fb_dirty)
766 if (gpu.frameskip.set) {
767 if (!gpu.frameskip.frame_ready) {
768 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
770 gpu.frameskip.active = 0;
772 gpu.frameskip.frame_ready = 0;
776 gpu.state.fb_dirty = 0;
777 gpu.state.blanked = 0;
778 renderer_notify_update_lace(1);
781 void GPUvBlank(int is_vblank, int lcf)
783 int interlace = gpu.state.allow_interlace
784 && gpu.status.interlace && gpu.status.dheight;
785 // interlace doesn't look nice on progressive displays,
786 // so we have this "auto" mode here for games that don't read vram
787 if (gpu.state.allow_interlace == 2
788 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
792 if (interlace || interlace != gpu.state.old_interlace) {
793 gpu.state.old_interlace = interlace;
797 renderer_flush_queues();
798 renderer_set_interlace(interlace, !lcf);
802 #include "../../frontend/plugin_lib.h"
804 void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
806 gpu.frameskip.set = cbs->frameskip;
807 gpu.frameskip.advice = &cbs->fskip_advice;
808 gpu.frameskip.active = 0;
809 gpu.frameskip.frame_ready = 1;
810 gpu.state.hcnt = cbs->gpu_hcnt;
811 gpu.state.frame_count = cbs->gpu_frame_count;
812 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
813 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
815 gpu.useDithering = cbs->gpu_neon.allow_dithering;
816 gpu.mmap = cbs->mmap;
817 gpu.munmap = cbs->munmap;
820 if (gpu.vram == NULL)
823 if (cbs->pl_vout_set_raw_vram)
824 cbs->pl_vout_set_raw_vram(gpu.vram);
825 renderer_set_config(cbs);
826 vout_set_config(cbs);
829 // vim:shiftwidth=2:expandtab