2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
4 * This work is licensed under the terms of any of these licenses
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
13 #include <stdlib.h> /* for calloc */
17 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
19 #define unlikely(x) __builtin_expect((x), 0)
20 #define preload __builtin_prefetch
21 #define noinline __attribute__((noinline))
28 #define gpu_log(fmt, ...) \
29 printf("%d:%03d: " fmt, *gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__)
31 //#define log_io gpu_log
33 //#define log_anomaly gpu_log
34 #define log_anomaly(...)
38 static noinline int do_cmd_buffer(uint32_t *data, int count);
39 static void finish_vram_transfer(int is_read);
41 static noinline void do_cmd_reset(void)
43 if (unlikely(gpu.cmd_len > 0))
44 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
47 if (unlikely(gpu.dma.h > 0))
48 finish_vram_transfer(gpu.dma_start.is_read);
52 static noinline void do_reset(void)
58 memset(gpu.regs, 0, sizeof(gpu.regs));
59 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
60 gpu.ex_regs[i] = (0xe0 + i) << 24;
61 gpu.status.reg = 0x14802000;
64 gpu.screen.hres = gpu.screen.w = 256;
65 gpu.screen.vres = gpu.screen.h = 240;
68 static noinline void update_width(void)
70 int sw = gpu.screen.x2 - gpu.screen.x1;
71 if (sw <= 0 || sw >= 2560)
73 gpu.screen.w = gpu.screen.hres;
75 gpu.screen.w = sw * gpu.screen.hres / 2560;
78 static noinline void update_height(void)
80 // TODO: emulate this properly..
81 int sh = gpu.screen.y2 - gpu.screen.y1;
82 if (gpu.status.dheight)
84 if (sh <= 0 || sh > gpu.screen.vres)
90 static noinline void decide_frameskip(void)
92 if (gpu.frameskip.active)
95 gpu.frameskip.cnt = 0;
96 gpu.frameskip.frame_ready = 1;
99 if (!gpu.frameskip.active && *gpu.frameskip.advice)
100 gpu.frameskip.active = 1;
101 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
102 gpu.frameskip.active = 1;
104 gpu.frameskip.active = 0;
106 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
108 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy);
109 gpu.frameskip.pending_fill[0] = 0;
113 static noinline int decide_frameskip_allow(uint32_t cmd_e3)
115 // no frameskip if it decides to draw to display area,
116 // but not for interlace since it'll most likely always do that
117 uint32_t x = cmd_e3 & 0x3ff;
118 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
119 gpu.frameskip.allow = gpu.status.interlace ||
120 (uint32_t)(x - gpu.screen.x) >= (uint32_t)gpu.screen.w ||
121 (uint32_t)(y - gpu.screen.y) >= (uint32_t)gpu.screen.h;
122 return gpu.frameskip.allow;
125 static noinline void get_gpu_info(uint32_t data)
127 switch (data & 0x0f) {
131 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
135 gpu.gp0 = gpu.ex_regs[5] & 0x3fffff;
146 // double, for overdraw guard
147 #define VRAM_SIZE ((1024 * 512 * 2 * 2) + 4096)
149 // Minimum 16-byte VRAM alignment needed by gpu_unai's pixel-skipping
150 // renderer/downscaler it uses in high res modes:
152 // On GCW platform (MIPS), align to 8192 bytes (1 TLB entry) to reduce # of
153 // fills. (Will change this value if it ever gets large page support)
154 #define VRAM_ALIGN 8192
156 #define VRAM_ALIGN 16
159 // vram ptr received from mmap/malloc/alloc (will deallocate using this)
160 static uint16_t *vram_ptr_orig = NULL;
162 #ifdef GPULIB_USE_MMAP
163 static int map_vram(void)
165 gpu.vram = vram_ptr_orig = gpu.mmap(VRAM_SIZE + (VRAM_ALIGN-1));
166 if (gpu.vram != NULL) {
167 // 4kb guard in front
168 gpu.vram += (4096 / 2);
170 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
174 fprintf(stderr, "could not map vram, expect crashes\n");
179 static int map_vram(void)
181 gpu.vram = vram_ptr_orig = (uint16_t*)calloc(VRAM_SIZE + (VRAM_ALIGN-1), 1);
182 if (gpu.vram != NULL) {
183 // 4kb guard in front
184 gpu.vram += (4096 / 2);
186 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
189 fprintf(stderr, "could not allocate vram, expect crashes\n");
194 static int allocate_vram(void)
196 gpu.vram = vram_ptr_orig = (uint16_t*)calloc(VRAM_SIZE + (VRAM_ALIGN-1), 1);
197 if (gpu.vram != NULL) {
198 // 4kb guard in front
199 gpu.vram += (4096 / 2);
201 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
204 fprintf(stderr, "could not allocate vram, expect crashes\n");
212 #ifndef GPULIB_USE_MMAP
213 if (gpu.vram == NULL) {
214 if (allocate_vram() != 0) {
215 printf("ERROR: could not allocate VRAM, exiting..\n");
221 //extern uint32_t hSyncCount; // in psxcounters.cpp
222 //extern uint32_t frame_counter; // in psxcounters.cpp
223 //gpu.state.hcnt = &hSyncCount;
224 //gpu.state.frame_count = &frame_counter;
228 ret |= renderer_init();
230 gpu.state.frame_count = &gpu.zero;
231 gpu.state.hcnt = &gpu.zero;
232 gpu.frameskip.active = 0;
236 /*if (gpu.mmap != NULL) {
243 long GPUshutdown(void)
250 if (vram_ptr_orig != NULL) {
251 #ifdef GPULIB_USE_MMAP
252 gpu.munmap(vram_ptr_orig, VRAM_SIZE);
257 vram_ptr_orig = gpu.vram = NULL;
262 void GPUwriteStatus(uint32_t data)
264 //senquack TODO: Would it be wise to add cmd buffer flush here, since
265 // status settings can affect commands already in buffer?
267 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
268 static const short vres[4] = { 240, 480, 256, 480 };
269 uint32_t cmd = data >> 24;
271 if (cmd < ARRAY_SIZE(gpu.regs)) {
272 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
274 gpu.regs[cmd] = data;
277 gpu.state.fb_dirty = 1;
287 gpu.status.blanking = data & 1;
290 gpu.status.dma = data & 3;
293 gpu.screen.x = data & 0x3ff;
294 gpu.screen.y = (data >> 10) & 0x1ff;
295 if (gpu.frameskip.set) {
296 decide_frameskip_allow(gpu.ex_regs[3]);
297 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
299 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
304 gpu.screen.x1 = data & 0xfff;
305 gpu.screen.x2 = (data >> 12) & 0xfff;
309 gpu.screen.y1 = data & 0x3ff;
310 gpu.screen.y2 = (data >> 10) & 0x3ff;
314 gpu.status.reg = (gpu.status.reg & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
315 gpu.screen.hres = hres[(gpu.status.reg >> 16) & 7];
316 gpu.screen.vres = vres[(gpu.status.reg >> 19) & 3];
319 renderer_notify_res_change();
322 if ((cmd & 0xf0) == 0x10)
327 #ifdef GPUwriteStatus_ext
328 GPUwriteStatus_ext(data);
332 const unsigned char cmd_lengths[256] =
334 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
335 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
336 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
337 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
338 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
339 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
340 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
341 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
342 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
343 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
344 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
345 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
346 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
347 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
348 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
349 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
352 #define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
354 static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
356 uint16_t *vram = VRAM_MEM_XY(x, y);
358 memcpy(mem, vram, l * 2);
360 memcpy(vram, mem, l * 2);
363 static int do_vram_io(uint32_t *data, int count, int is_read)
365 int count_initial = count;
366 uint16_t *sdata = (uint16_t *)data;
367 int x = gpu.dma.x, y = gpu.dma.y;
368 int w = gpu.dma.w, h = gpu.dma.h;
369 int o = gpu.dma.offset;
371 count *= 2; // operate in 16bpp pixels
373 if (gpu.dma.offset) {
374 l = w - gpu.dma.offset;
378 do_vram_line(x + o, y, sdata, l, is_read);
391 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
393 do_vram_line(x, y, sdata, w, is_read);
399 do_vram_line(x, y, sdata, count, is_read);
405 finish_vram_transfer(is_read);
410 return count_initial - count / 2;
413 static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
416 log_anomaly("start_vram_transfer while old unfinished\n");
418 gpu.dma.x = pos_word & 0x3ff;
419 gpu.dma.y = (pos_word >> 16) & 0x1ff;
420 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
421 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
423 gpu.dma.is_read = is_read;
424 gpu.dma_start = gpu.dma;
426 renderer_flush_queues();
429 // XXX: wrong for width 1
430 memcpy(&gpu.gp0, VRAM_MEM_XY(gpu.dma.x, gpu.dma.y), 4);
431 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
434 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
435 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
438 static void finish_vram_transfer(int is_read)
443 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
444 gpu.dma_start.w, gpu.dma_start.h);
447 static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
449 int cmd = 0, pos = 0, len, dummy, v;
452 gpu.frameskip.pending_fill[0] = 0;
454 while (pos < count && skip) {
455 uint32_t *list = data + pos;
457 len = 1 + cmd_lengths[cmd];
461 if ((int)(list[2] & 0x3ff) > gpu.screen.w || (int)((list[2] >> 16) & 0x1ff) > gpu.screen.h)
462 // clearing something large, don't skip
463 do_cmd_list(list, 3, &dummy);
465 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
471 gpu.ex_regs[1] &= ~0x1ff;
472 gpu.ex_regs[1] |= list[4 + ((cmd >> 4) & 1)] & 0x1ff;
475 for (v = 3; pos + v < count; v++)
477 if ((list[v] & 0xf000f000) == 0x50005000)
483 for (v = 4; pos + v < count; v += 2)
485 if ((list[v] & 0xf000f000) == 0x50005000)
492 skip = decide_frameskip_allow(list[0]);
493 if ((cmd & 0xf8) == 0xe0)
494 gpu.ex_regs[cmd & 7] = list[0];
498 if (pos + len > count) {
500 break; // incomplete cmd
502 if (0xa0 <= cmd && cmd <= 0xdf)
508 renderer_sync_ecmds(gpu.ex_regs);
513 static noinline int do_cmd_buffer(uint32_t *data, int count)
516 uint32_t old_e3 = gpu.ex_regs[3];
520 for (pos = 0; pos < count; )
522 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
524 pos += do_vram_io(data + pos, count - pos, 0);
529 cmd = data[pos] >> 24;
530 if (0xa0 <= cmd && cmd <= 0xdf) {
531 if (unlikely((pos+2) >= count)) {
532 // incomplete vram write/read cmd, can't consume yet
537 // consume vram write/read cmd
538 start_vram_transfer(data[pos + 1], data[pos + 2], (cmd & 0xe0) == 0xc0);
543 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
544 if (gpu.frameskip.active && (gpu.frameskip.allow || ((data[pos] >> 24) & 0xf0) == 0xe0))
545 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
547 pos += do_cmd_list(data + pos, count - pos, &cmd);
556 gpu.status.reg &= ~0x1fff;
557 gpu.status.reg |= gpu.ex_regs[1] & 0x7ff;
558 gpu.status.reg |= (gpu.ex_regs[6] & 3) << 11;
560 gpu.state.fb_dirty |= vram_dirty;
562 if (old_e3 != gpu.ex_regs[3])
563 decide_frameskip_allow(gpu.ex_regs[3]);
568 static void flush_cmd_buffer(void)
570 int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
572 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
576 void GPUwriteDataMem(uint32_t *mem, int count)
580 log_io("gpu_dma_write %p %d\n", mem, count);
582 if (unlikely(gpu.cmd_len > 0))
585 left = do_cmd_buffer(mem, count);
587 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
590 void GPUwriteData(uint32_t data)
592 log_io("gpu_write %08x\n", data);
593 gpu.cmd_buffer[gpu.cmd_len++] = data;
594 if (gpu.cmd_len >= CMD_BUFFER_LEN)
598 long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
600 uint32_t addr, *list, ld_addr = 0;
601 int len, left, count;
604 preload(rambase + (start_addr & 0x1fffff) / 4);
606 if (unlikely(gpu.cmd_len > 0))
609 log_io("gpu_dma_chain\n");
610 addr = start_addr & 0xffffff;
611 for (count = 0; (addr & 0x800000) == 0; count++)
613 list = rambase + (addr & 0x1fffff) / 4;
615 addr = list[0] & 0xffffff;
616 preload(rambase + (addr & 0x1fffff) / 4);
620 cpu_cycles += 5 + len;
622 log_io(".chain %08x #%d\n", (list - rambase) * 4, len);
625 left = do_cmd_buffer(list + 1, len);
627 log_anomaly("GPUdmaChain: discarded %d/%d words\n", left, len);
630 #define LD_THRESHOLD (8*1024)
631 if (count >= LD_THRESHOLD) {
632 if (count == LD_THRESHOLD) {
637 // loop detection marker
638 // (bit23 set causes DMA error on real machine, so
639 // unlikely to be ever set by the game)
645 // remove loop detection markers
646 count -= LD_THRESHOLD + 2;
647 addr = ld_addr & 0x1fffff;
648 while (count-- > 0) {
649 list = rambase + addr / 4;
650 addr = list[0] & 0x1fffff;
651 list[0] &= ~0x800000;
655 gpu.state.last_list.frame = *gpu.state.frame_count;
656 gpu.state.last_list.hcnt = *gpu.state.hcnt;
657 gpu.state.last_list.cycles = cpu_cycles;
658 gpu.state.last_list.addr = start_addr;
663 void GPUreadDataMem(uint32_t *mem, int count)
665 log_io("gpu_dma_read %p %d\n", mem, count);
667 if (unlikely(gpu.cmd_len > 0))
671 do_vram_io(mem, count, 1);
674 uint32_t GPUreadData(void)
678 if (unlikely(gpu.cmd_len > 0))
683 do_vram_io(&ret, 1, 1);
685 log_io("gpu_read %08x\n", ret);
689 uint32_t GPUreadStatus(void)
693 if (unlikely(gpu.cmd_len > 0))
696 ret = gpu.status.reg;
697 log_io("gpu_read_status %08x\n", ret);
703 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
704 uint32_t ulStatus; // current gpu status
705 uint32_t ulControl[256]; // latest control register values
706 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
709 long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
717 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
718 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
719 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
720 freeze->ulStatus = gpu.status.reg;
723 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
724 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
725 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
726 gpu.status.reg = freeze->ulStatus;
728 for (i = 8; i > 0; i--) {
729 gpu.regs[i] ^= 1; // avoid reg change detection
730 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
732 renderer_sync_ecmds(gpu.ex_regs);
733 renderer_update_caches(0, 0, 1024, 512);
740 void GPUupdateLace(void)
744 renderer_flush_queues();
746 if (gpu.status.blanking) {
747 if (!gpu.state.blanked) {
749 gpu.state.blanked = 1;
750 gpu.state.fb_dirty = 1;
755 if (!gpu.state.fb_dirty)
758 if (gpu.frameskip.set) {
759 if (!gpu.frameskip.frame_ready) {
760 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
762 gpu.frameskip.active = 0;
764 gpu.frameskip.frame_ready = 0;
768 gpu.state.fb_dirty = 0;
769 gpu.state.blanked = 0;
772 void GPUvBlank(int is_vblank, int lcf)
774 int interlace = gpu.state.allow_interlace
775 && gpu.status.interlace && gpu.status.dheight;
776 // interlace doesn't look nice on progressive displays,
777 // so we have this "auto" mode here for games that don't read vram
778 if (gpu.state.allow_interlace == 2
779 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
783 if (interlace || interlace != gpu.state.old_interlace) {
784 gpu.state.old_interlace = interlace;
788 renderer_flush_queues();
789 renderer_set_interlace(interlace, !lcf);
793 #include "../../frontend/plugin_lib.h"
795 void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
797 gpu.frameskip.set = cbs->frameskip;
798 gpu.frameskip.advice = &cbs->fskip_advice;
799 gpu.frameskip.active = 0;
800 gpu.frameskip.frame_ready = 1;
801 gpu.state.hcnt = cbs->gpu_hcnt;
802 gpu.state.frame_count = cbs->gpu_frame_count;
803 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
804 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
806 gpu.useDithering = cbs->gpu_neon.allow_dithering;
807 gpu.mmap = cbs->mmap;
808 gpu.munmap = cbs->munmap;
811 if (gpu.vram == NULL)
814 if (cbs->pl_vout_set_raw_vram)
815 cbs->pl_vout_set_raw_vram(gpu.vram);
816 renderer_set_config(cbs);
817 vout_set_config(cbs);
820 // vim:shiftwidth=2:expandtab