2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
4 * This work is licensed under the terms of any of these licenses
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
13 #include <stdlib.h> /* for calloc */
17 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
19 #define unlikely(x) __builtin_expect((x), 0)
20 #define preload __builtin_prefetch
21 #define noinline __attribute__((noinline))
28 #define gpu_log(fmt, ...) \
29 printf("%d:%03d: " fmt, *gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__)
31 //#define log_io gpu_log
33 //#define log_anomaly gpu_log
34 #define log_anomaly(...)
38 static noinline int do_cmd_buffer(uint32_t *data, int count);
39 static void finish_vram_transfer(int is_read);
41 static noinline void do_cmd_reset(void)
43 if (unlikely(gpu.cmd_len > 0))
44 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
47 if (unlikely(gpu.dma.h > 0))
48 finish_vram_transfer(gpu.dma_start.is_read);
52 static noinline void do_reset(void)
58 memset(gpu.regs, 0, sizeof(gpu.regs));
59 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
60 gpu.ex_regs[i] = (0xe0 + i) << 24;
61 gpu.status.reg = 0x14802000;
64 gpu.screen.hres = gpu.screen.w = 256;
65 gpu.screen.vres = gpu.screen.h = 240;
68 static noinline void update_width(void)
70 int sw = gpu.screen.x2 - gpu.screen.x1;
71 if (sw <= 0 || sw >= 2560)
73 gpu.screen.w = gpu.screen.hres;
75 gpu.screen.w = sw * gpu.screen.hres / 2560;
78 static noinline void update_height(void)
80 // TODO: emulate this properly..
81 int sh = gpu.screen.y2 - gpu.screen.y1;
82 if (gpu.status.dheight)
84 if (sh <= 0 || sh > gpu.screen.vres)
90 static noinline void decide_frameskip(void)
92 if (gpu.frameskip.active)
95 gpu.frameskip.cnt = 0;
96 gpu.frameskip.frame_ready = 1;
99 if (!gpu.frameskip.active && *gpu.frameskip.advice)
100 gpu.frameskip.active = 1;
101 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
102 gpu.frameskip.active = 1;
104 gpu.frameskip.active = 0;
106 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
108 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy);
109 gpu.frameskip.pending_fill[0] = 0;
113 static noinline int decide_frameskip_allow(uint32_t cmd_e3)
115 // no frameskip if it decides to draw to display area,
116 // but not for interlace since it'll most likely always do that
117 uint32_t x = cmd_e3 & 0x3ff;
118 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
119 gpu.frameskip.allow = gpu.status.interlace ||
120 (uint32_t)(x - gpu.screen.x) >= (uint32_t)gpu.screen.w ||
121 (uint32_t)(y - gpu.screen.y) >= (uint32_t)gpu.screen.h;
122 return gpu.frameskip.allow;
125 static noinline void get_gpu_info(uint32_t data)
127 switch (data & 0x0f) {
131 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
135 gpu.gp0 = gpu.ex_regs[5] & 0x3fffff;
146 // double, for overdraw guard
147 #define VRAM_SIZE ((1024 * 512 * 2 * 2) + 4096)
149 // Minimum 16-byte VRAM alignment needed by gpu_unai's pixel-skipping
150 // renderer/downscaler it uses in high res modes:
152 // On GCW platform (MIPS), align to 8192 bytes (1 TLB entry) to reduce # of
153 // fills. (Will change this value if it ever gets large page support)
154 #define VRAM_ALIGN 8192
156 #define VRAM_ALIGN 16
159 // vram ptr received from mmap/malloc/alloc (will deallocate using this)
160 static uint16_t *vram_ptr_orig = NULL;
162 #ifdef GPULIB_USE_MMAP
163 static int map_vram(void)
165 gpu.vram = vram_ptr_orig = gpu.mmap(VRAM_SIZE + (VRAM_ALIGN-1));
166 if (gpu.vram != NULL) {
167 // 4kb guard in front
168 gpu.vram += (4096 / 2);
170 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
174 fprintf(stderr, "could not map vram, expect crashes\n");
179 static int map_vram(void)
181 gpu.vram = vram_ptr_orig = (uint16_t*)calloc(VRAM_SIZE + (VRAM_ALIGN-1), 1);
182 if (gpu.vram != NULL) {
183 // 4kb guard in front
184 gpu.vram += (4096 / 2);
186 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
189 fprintf(stderr, "could not allocate vram, expect crashes\n");
194 static int allocate_vram(void)
196 gpu.vram = vram_ptr_orig = (uint16_t*)calloc(VRAM_SIZE + (VRAM_ALIGN-1), 1);
197 if (gpu.vram != NULL) {
198 // 4kb guard in front
199 gpu.vram += (4096 / 2);
201 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
204 fprintf(stderr, "could not allocate vram, expect crashes\n");
212 #ifndef GPULIB_USE_MMAP
213 if (gpu.vram == NULL) {
214 if (allocate_vram() != 0) {
215 printf("ERROR: could not allocate VRAM, exiting..\n");
221 //extern uint32_t hSyncCount; // in psxcounters.cpp
222 //extern uint32_t frame_counter; // in psxcounters.cpp
223 //gpu.state.hcnt = &hSyncCount;
224 //gpu.state.frame_count = &frame_counter;
228 ret |= renderer_init();
230 gpu.state.frame_count = &gpu.zero;
231 gpu.state.hcnt = &gpu.zero;
232 gpu.frameskip.active = 0;
236 /*if (gpu.mmap != NULL) {
243 long GPUshutdown(void)
250 if (vram_ptr_orig != NULL) {
251 #ifdef GPULIB_USE_MMAP
252 gpu.munmap(vram_ptr_orig, VRAM_SIZE);
257 vram_ptr_orig = gpu.vram = NULL;
262 void GPUwriteStatus(uint32_t data)
264 //senquack TODO: Would it be wise to add cmd buffer flush here, since
265 // status settings can affect commands already in buffer?
267 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
268 static const short vres[4] = { 240, 480, 256, 480 };
269 uint32_t cmd = data >> 24;
271 if (cmd < ARRAY_SIZE(gpu.regs)) {
272 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
274 gpu.regs[cmd] = data;
277 gpu.state.fb_dirty = 1;
287 gpu.status.blanking = data & 1;
290 gpu.status.dma = data & 3;
293 gpu.screen.x = data & 0x3ff;
294 gpu.screen.y = (data >> 10) & 0x1ff;
295 if (gpu.frameskip.set) {
296 decide_frameskip_allow(gpu.ex_regs[3]);
297 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
299 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
304 gpu.screen.x1 = data & 0xfff;
305 gpu.screen.x2 = (data >> 12) & 0xfff;
309 gpu.screen.y1 = data & 0x3ff;
310 gpu.screen.y2 = (data >> 10) & 0x3ff;
314 gpu.status.reg = (gpu.status.reg & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
315 gpu.screen.hres = hres[(gpu.status.reg >> 16) & 7];
316 gpu.screen.vres = vres[(gpu.status.reg >> 19) & 3];
319 renderer_notify_res_change();
322 if ((cmd & 0xf0) == 0x10)
327 #ifdef GPUwriteStatus_ext
328 GPUwriteStatus_ext(data);
332 const unsigned char cmd_lengths[256] =
334 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
335 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
336 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
337 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
338 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
339 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
340 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
341 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
342 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
343 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
344 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
345 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
346 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
347 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
348 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
349 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
352 #define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
354 static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
356 uint16_t *vram = VRAM_MEM_XY(x, y);
358 memcpy(mem, vram, l * 2);
360 memcpy(vram, mem, l * 2);
363 static int do_vram_io(uint32_t *data, int count, int is_read)
365 int count_initial = count;
366 uint16_t *sdata = (uint16_t *)data;
367 int x = gpu.dma.x, y = gpu.dma.y;
368 int w = gpu.dma.w, h = gpu.dma.h;
369 int o = gpu.dma.offset;
371 count *= 2; // operate in 16bpp pixels
373 if (gpu.dma.offset) {
374 l = w - gpu.dma.offset;
378 do_vram_line(x + o, y, sdata, l, is_read);
391 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
393 do_vram_line(x, y, sdata, w, is_read);
399 do_vram_line(x, y, sdata, count, is_read);
405 finish_vram_transfer(is_read);
410 return count_initial - count / 2;
413 static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
416 log_anomaly("start_vram_transfer while old unfinished\n");
418 gpu.dma.x = pos_word & 0x3ff;
419 gpu.dma.y = (pos_word >> 16) & 0x1ff;
420 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
421 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
423 gpu.dma.is_read = is_read;
424 gpu.dma_start = gpu.dma;
426 renderer_flush_queues();
429 // XXX: wrong for width 1
430 memcpy(&gpu.gp0, VRAM_MEM_XY(gpu.dma.x, gpu.dma.y), 4);
431 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
434 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
435 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
438 static void finish_vram_transfer(int is_read)
443 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
444 gpu.dma_start.w, gpu.dma_start.h);
447 static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
449 int cmd = 0, pos = 0, len, dummy, v;
452 gpu.frameskip.pending_fill[0] = 0;
454 while (pos < count && skip) {
455 uint32_t *list = data + pos;
457 len = 1 + cmd_lengths[cmd];
461 if ((int)(list[2] & 0x3ff) > gpu.screen.w || (int)((list[2] >> 16) & 0x1ff) > gpu.screen.h)
462 // clearing something large, don't skip
463 do_cmd_list(list, 3, &dummy);
465 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
471 gpu.ex_regs[1] &= ~0x1ff;
472 gpu.ex_regs[1] |= list[4 + ((cmd >> 4) & 1)] & 0x1ff;
475 for (v = 3; pos + v < count; v++)
477 if ((list[v] & 0xf000f000) == 0x50005000)
483 for (v = 4; pos + v < count; v += 2)
485 if ((list[v] & 0xf000f000) == 0x50005000)
492 skip = decide_frameskip_allow(list[0]);
493 if ((cmd & 0xf8) == 0xe0)
494 gpu.ex_regs[cmd & 7] = list[0];
498 if (pos + len > count) {
500 break; // incomplete cmd
502 if (0xa0 <= cmd && cmd <= 0xdf)
508 renderer_sync_ecmds(gpu.ex_regs);
513 static noinline int do_cmd_buffer(uint32_t *data, int count)
516 uint32_t old_e3 = gpu.ex_regs[3];
520 for (pos = 0; pos < count; )
522 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
524 pos += do_vram_io(data + pos, count - pos, 0);
529 cmd = data[pos] >> 24;
530 if (0xa0 <= cmd && cmd <= 0xdf) {
531 // consume vram write/read cmd
532 start_vram_transfer(data[pos + 1], data[pos + 2], (cmd & 0xe0) == 0xc0);
537 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
538 if (gpu.frameskip.active && (gpu.frameskip.allow || ((data[pos] >> 24) & 0xf0) == 0xe0))
539 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
541 pos += do_cmd_list(data + pos, count - pos, &cmd);
550 gpu.status.reg &= ~0x1fff;
551 gpu.status.reg |= gpu.ex_regs[1] & 0x7ff;
552 gpu.status.reg |= (gpu.ex_regs[6] & 3) << 11;
554 gpu.state.fb_dirty |= vram_dirty;
556 if (old_e3 != gpu.ex_regs[3])
557 decide_frameskip_allow(gpu.ex_regs[3]);
562 static void flush_cmd_buffer(void)
564 int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
566 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
570 void GPUwriteDataMem(uint32_t *mem, int count)
574 log_io("gpu_dma_write %p %d\n", mem, count);
576 if (unlikely(gpu.cmd_len > 0))
579 left = do_cmd_buffer(mem, count);
581 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
584 void GPUwriteData(uint32_t data)
586 log_io("gpu_write %08x\n", data);
587 gpu.cmd_buffer[gpu.cmd_len++] = data;
588 if (gpu.cmd_len >= CMD_BUFFER_LEN)
592 long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
594 uint32_t addr, *list, ld_addr = 0;
595 int len, left, count;
598 preload(rambase + (start_addr & 0x1fffff) / 4);
600 if (unlikely(gpu.cmd_len > 0))
603 log_io("gpu_dma_chain\n");
604 addr = start_addr & 0xffffff;
605 for (count = 0; (addr & 0x800000) == 0; count++)
607 list = rambase + (addr & 0x1fffff) / 4;
609 addr = list[0] & 0xffffff;
610 preload(rambase + (addr & 0x1fffff) / 4);
614 cpu_cycles += 5 + len;
616 log_io(".chain %08x #%d\n", (list - rambase) * 4, len);
619 left = do_cmd_buffer(list + 1, len);
621 log_anomaly("GPUdmaChain: discarded %d/%d words\n", left, len);
624 #define LD_THRESHOLD (8*1024)
625 if (count >= LD_THRESHOLD) {
626 if (count == LD_THRESHOLD) {
631 // loop detection marker
632 // (bit23 set causes DMA error on real machine, so
633 // unlikely to be ever set by the game)
639 // remove loop detection markers
640 count -= LD_THRESHOLD + 2;
641 addr = ld_addr & 0x1fffff;
642 while (count-- > 0) {
643 list = rambase + addr / 4;
644 addr = list[0] & 0x1fffff;
645 list[0] &= ~0x800000;
649 gpu.state.last_list.frame = *gpu.state.frame_count;
650 gpu.state.last_list.hcnt = *gpu.state.hcnt;
651 gpu.state.last_list.cycles = cpu_cycles;
652 gpu.state.last_list.addr = start_addr;
657 void GPUreadDataMem(uint32_t *mem, int count)
659 log_io("gpu_dma_read %p %d\n", mem, count);
661 if (unlikely(gpu.cmd_len > 0))
665 do_vram_io(mem, count, 1);
668 uint32_t GPUreadData(void)
672 if (unlikely(gpu.cmd_len > 0))
677 do_vram_io(&ret, 1, 1);
679 log_io("gpu_read %08x\n", ret);
683 uint32_t GPUreadStatus(void)
687 if (unlikely(gpu.cmd_len > 0))
690 ret = gpu.status.reg;
691 log_io("gpu_read_status %08x\n", ret);
697 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
698 uint32_t ulStatus; // current gpu status
699 uint32_t ulControl[256]; // latest control register values
700 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
703 long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
711 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
712 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
713 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
714 freeze->ulStatus = gpu.status.reg;
717 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
718 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
719 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
720 gpu.status.reg = freeze->ulStatus;
722 for (i = 8; i > 0; i--) {
723 gpu.regs[i] ^= 1; // avoid reg change detection
724 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
726 renderer_sync_ecmds(gpu.ex_regs);
727 renderer_update_caches(0, 0, 1024, 512);
734 void GPUupdateLace(void)
738 renderer_flush_queues();
740 if (gpu.status.blanking) {
741 if (!gpu.state.blanked) {
743 gpu.state.blanked = 1;
744 gpu.state.fb_dirty = 1;
749 if (!gpu.state.fb_dirty)
752 if (gpu.frameskip.set) {
753 if (!gpu.frameskip.frame_ready) {
754 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
756 gpu.frameskip.active = 0;
758 gpu.frameskip.frame_ready = 0;
762 gpu.state.fb_dirty = 0;
763 gpu.state.blanked = 0;
766 void GPUvBlank(int is_vblank, int lcf)
768 int interlace = gpu.state.allow_interlace
769 && gpu.status.interlace && gpu.status.dheight;
770 // interlace doesn't look nice on progressive displays,
771 // so we have this "auto" mode here for games that don't read vram
772 if (gpu.state.allow_interlace == 2
773 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
777 if (interlace || interlace != gpu.state.old_interlace) {
778 gpu.state.old_interlace = interlace;
782 renderer_flush_queues();
783 renderer_set_interlace(interlace, !lcf);
787 #include "../../frontend/plugin_lib.h"
789 void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
791 gpu.frameskip.set = cbs->frameskip;
792 gpu.frameskip.advice = &cbs->fskip_advice;
793 gpu.frameskip.active = 0;
794 gpu.frameskip.frame_ready = 1;
795 gpu.state.hcnt = cbs->gpu_hcnt;
796 gpu.state.frame_count = cbs->gpu_frame_count;
797 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
798 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
800 gpu.useDithering = cbs->gpu_neon.allow_dithering;
801 gpu.mmap = cbs->mmap;
802 gpu.munmap = cbs->munmap;
805 if (gpu.vram == NULL)
808 if (cbs->pl_vout_set_raw_vram)
809 cbs->pl_vout_set_raw_vram(gpu.vram);
810 renderer_set_config(cbs);
811 vout_set_config(cbs);
814 // vim:shiftwidth=2:expandtab