2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
4 * This work is licensed under the terms of any of these licenses
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
15 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
17 #define unlikely(x) __builtin_expect((x), 0)
18 #define preload __builtin_prefetch
19 #define noinline __attribute__((noinline))
26 #define gpu_log(fmt, ...) \
27 printf("%d:%03d: " fmt, *gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__)
29 //#define log_io gpu_log
31 //#define log_anomaly gpu_log
32 #define log_anomaly(...)
36 static noinline int do_cmd_buffer(uint32_t *data, int count);
37 static void finish_vram_transfer(int is_read);
39 static noinline void do_cmd_reset(void)
41 if (unlikely(gpu.cmd_len > 0))
42 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
45 if (unlikely(gpu.dma.h > 0))
46 finish_vram_transfer(gpu.dma_start.is_read);
50 static noinline void do_reset(void)
56 memset(gpu.regs, 0, sizeof(gpu.regs));
57 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
58 gpu.ex_regs[i] = (0xe0 + i) << 24;
59 gpu.status.reg = 0x14802000;
62 gpu.screen.hres = gpu.screen.w = 256;
63 gpu.screen.vres = gpu.screen.h = 240;
66 static noinline void update_width(void)
68 int sw = gpu.screen.x2 - gpu.screen.x1;
69 if (sw <= 0 || sw >= 2560)
71 gpu.screen.w = gpu.screen.hres;
73 gpu.screen.w = sw * gpu.screen.hres / 2560;
76 static noinline void update_height(void)
78 // TODO: emulate this properly..
79 int sh = gpu.screen.y2 - gpu.screen.y1;
80 if (gpu.status.dheight)
82 if (sh <= 0 || sh > gpu.screen.vres)
88 static noinline void decide_frameskip(void)
90 if (gpu.frameskip.active)
93 gpu.frameskip.cnt = 0;
94 gpu.frameskip.frame_ready = 1;
97 if (!gpu.frameskip.active && *gpu.frameskip.advice)
98 gpu.frameskip.active = 1;
99 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
100 gpu.frameskip.active = 1;
102 gpu.frameskip.active = 0;
104 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
106 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy);
107 gpu.frameskip.pending_fill[0] = 0;
111 static noinline int decide_frameskip_allow(uint32_t cmd_e3)
113 // no frameskip if it decides to draw to display area,
114 // but not for interlace since it'll most likely always do that
115 uint32_t x = cmd_e3 & 0x3ff;
116 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
117 gpu.frameskip.allow = gpu.status.interlace ||
118 (uint32_t)(x - gpu.screen.x) >= (uint32_t)gpu.screen.w ||
119 (uint32_t)(y - gpu.screen.y) >= (uint32_t)gpu.screen.h;
120 return gpu.frameskip.allow;
123 static noinline void get_gpu_info(uint32_t data)
125 switch (data & 0x0f) {
130 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
133 gpu.gp0 = gpu.ex_regs[5] & 0xfffff;
144 // double, for overdraw guard
145 #define VRAM_SIZE (1024 * 512 * 2 * 2)
147 static int map_vram(void)
149 gpu.vram = gpu.mmap(VRAM_SIZE);
150 if (gpu.vram != NULL) {
151 gpu.vram += 4096 / 2;
155 fprintf(stderr, "could not map vram, expect crashes\n");
164 ret |= renderer_init();
166 gpu.state.frame_count = &gpu.zero;
167 gpu.state.hcnt = &gpu.zero;
168 gpu.frameskip.active = 0;
172 if (gpu.mmap != NULL) {
179 long GPUshutdown(void)
185 if (gpu.vram != NULL) {
186 gpu.vram -= 4096 / 2;
187 gpu.munmap(gpu.vram, VRAM_SIZE);
194 void GPUwriteStatus(uint32_t data)
196 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
197 static const short vres[4] = { 240, 480, 256, 480 };
198 uint32_t cmd = data >> 24;
200 if (cmd < ARRAY_SIZE(gpu.regs)) {
201 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
203 gpu.regs[cmd] = data;
206 gpu.state.fb_dirty = 1;
216 gpu.status.blanking = data & 1;
219 gpu.status.dma = data & 3;
222 gpu.screen.x = data & 0x3ff;
223 gpu.screen.y = (data >> 10) & 0x1ff;
224 if (gpu.frameskip.set) {
225 decide_frameskip_allow(gpu.ex_regs[3]);
226 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
228 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
233 gpu.screen.x1 = data & 0xfff;
234 gpu.screen.x2 = (data >> 12) & 0xfff;
238 gpu.screen.y1 = data & 0x3ff;
239 gpu.screen.y2 = (data >> 10) & 0x3ff;
243 gpu.status.reg = (gpu.status.reg & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
244 gpu.screen.hres = hres[(gpu.status.reg >> 16) & 7];
245 gpu.screen.vres = vres[(gpu.status.reg >> 19) & 3];
248 renderer_notify_res_change();
251 if ((cmd & 0xf0) == 0x10)
256 #ifdef GPUwriteStatus_ext
257 GPUwriteStatus_ext(data);
261 const unsigned char cmd_lengths[256] =
263 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
264 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
265 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
266 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
267 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
268 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
269 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
270 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
271 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
272 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
273 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
274 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
275 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
276 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
277 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
278 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
281 #define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
283 static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
285 uint16_t *vram = VRAM_MEM_XY(x, y);
287 memcpy(mem, vram, l * 2);
289 memcpy(vram, mem, l * 2);
292 static int do_vram_io(uint32_t *data, int count, int is_read)
294 int count_initial = count;
295 uint16_t *sdata = (uint16_t *)data;
296 int x = gpu.dma.x, y = gpu.dma.y;
297 int w = gpu.dma.w, h = gpu.dma.h;
298 int o = gpu.dma.offset;
300 count *= 2; // operate in 16bpp pixels
302 if (gpu.dma.offset) {
303 l = w - gpu.dma.offset;
307 do_vram_line(x + o, y, sdata, l, is_read);
320 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
322 do_vram_line(x, y, sdata, w, is_read);
328 do_vram_line(x, y, sdata, count, is_read);
334 finish_vram_transfer(is_read);
339 return count_initial - count / 2;
342 static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
345 log_anomaly("start_vram_transfer while old unfinished\n");
347 gpu.dma.x = pos_word & 0x3ff;
348 gpu.dma.y = (pos_word >> 16) & 0x1ff;
349 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
350 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
352 gpu.dma.is_read = is_read;
353 gpu.dma_start = gpu.dma;
355 renderer_flush_queues();
358 // XXX: wrong for width 1
359 memcpy(&gpu.gp0, VRAM_MEM_XY(gpu.dma.x, gpu.dma.y), 4);
360 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
363 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
364 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
367 static void finish_vram_transfer(int is_read)
372 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
373 gpu.dma_start.w, gpu.dma_start.h);
376 static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
378 int cmd = 0, pos = 0, len, dummy, v;
381 gpu.frameskip.pending_fill[0] = 0;
383 while (pos < count && skip) {
384 uint32_t *list = data + pos;
386 len = 1 + cmd_lengths[cmd];
390 if ((list[2] & 0x3ff) > gpu.screen.w || ((list[2] >> 16) & 0x1ff) > gpu.screen.h)
391 // clearing something large, don't skip
392 do_cmd_list(list, 3, &dummy);
394 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
400 gpu.ex_regs[1] &= ~0x1ff;
401 gpu.ex_regs[1] |= list[4 + ((cmd >> 4) & 1)] & 0x1ff;
404 for (v = 3; pos + v < count; v++)
406 if ((list[v] & 0xf000f000) == 0x50005000)
412 for (v = 4; pos + v < count; v += 2)
414 if ((list[v] & 0xf000f000) == 0x50005000)
421 skip = decide_frameskip_allow(list[0]);
422 if ((cmd & 0xf8) == 0xe0)
423 gpu.ex_regs[cmd & 7] = list[0];
427 if (pos + len > count) {
429 break; // incomplete cmd
431 if (0xa0 <= cmd && cmd <= 0xdf)
437 renderer_sync_ecmds(gpu.ex_regs);
442 static noinline int do_cmd_buffer(uint32_t *data, int count)
445 uint32_t old_e3 = gpu.ex_regs[3];
449 for (pos = 0; pos < count; )
451 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
453 pos += do_vram_io(data + pos, count - pos, 0);
458 cmd = data[pos] >> 24;
459 if (0xa0 <= cmd && cmd <= 0xdf) {
460 // consume vram write/read cmd
461 start_vram_transfer(data[pos + 1], data[pos + 2], (cmd & 0xe0) == 0xc0);
466 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
467 if (gpu.frameskip.active && (gpu.frameskip.allow || ((data[pos] >> 24) & 0xf0) == 0xe0))
468 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
470 pos += do_cmd_list(data + pos, count - pos, &cmd);
479 gpu.status.reg &= ~0x1fff;
480 gpu.status.reg |= gpu.ex_regs[1] & 0x7ff;
481 gpu.status.reg |= (gpu.ex_regs[6] & 3) << 11;
483 gpu.state.fb_dirty |= vram_dirty;
485 if (old_e3 != gpu.ex_regs[3])
486 decide_frameskip_allow(gpu.ex_regs[3]);
491 static void flush_cmd_buffer(void)
493 int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
495 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
499 void GPUwriteDataMem(uint32_t *mem, int count)
503 log_io("gpu_dma_write %p %d\n", mem, count);
505 if (unlikely(gpu.cmd_len > 0))
508 left = do_cmd_buffer(mem, count);
510 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
513 void GPUwriteData(uint32_t data)
515 log_io("gpu_write %08x\n", data);
516 gpu.cmd_buffer[gpu.cmd_len++] = data;
517 if (gpu.cmd_len >= CMD_BUFFER_LEN)
521 long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
523 uint32_t addr, *list, ld_addr = 0;
524 int len, left, count;
527 preload(rambase + (start_addr & 0x1fffff) / 4);
529 if (unlikely(gpu.cmd_len > 0))
532 log_io("gpu_dma_chain\n");
533 addr = start_addr & 0xffffff;
534 for (count = 0; (addr & 0x800000) == 0; count++)
536 list = rambase + (addr & 0x1fffff) / 4;
538 addr = list[0] & 0xffffff;
539 preload(rambase + (addr & 0x1fffff) / 4);
543 cpu_cycles += 5 + len;
545 log_io(".chain %08x #%d\n", (list - rambase) * 4, len);
548 left = do_cmd_buffer(list + 1, len);
550 log_anomaly("GPUdmaChain: discarded %d/%d words\n", left, len);
553 #define LD_THRESHOLD (8*1024)
554 if (count >= LD_THRESHOLD) {
555 if (count == LD_THRESHOLD) {
560 // loop detection marker
561 // (bit23 set causes DMA error on real machine, so
562 // unlikely to be ever set by the game)
568 // remove loop detection markers
569 count -= LD_THRESHOLD + 2;
570 addr = ld_addr & 0x1fffff;
571 while (count-- > 0) {
572 list = rambase + addr / 4;
573 addr = list[0] & 0x1fffff;
574 list[0] &= ~0x800000;
578 gpu.state.last_list.frame = *gpu.state.frame_count;
579 gpu.state.last_list.hcnt = *gpu.state.hcnt;
580 gpu.state.last_list.cycles = cpu_cycles;
581 gpu.state.last_list.addr = start_addr;
586 void GPUreadDataMem(uint32_t *mem, int count)
588 log_io("gpu_dma_read %p %d\n", mem, count);
590 if (unlikely(gpu.cmd_len > 0))
594 do_vram_io(mem, count, 1);
597 uint32_t GPUreadData(void)
601 if (unlikely(gpu.cmd_len > 0))
606 do_vram_io(&ret, 1, 1);
608 log_io("gpu_read %08x\n", ret);
612 uint32_t GPUreadStatus(void)
616 if (unlikely(gpu.cmd_len > 0))
619 ret = gpu.status.reg;
620 log_io("gpu_read_status %08x\n", ret);
626 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
627 uint32_t ulStatus; // current gpu status
628 uint32_t ulControl[256]; // latest control register values
629 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
632 long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
640 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
641 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
642 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
643 freeze->ulStatus = gpu.status.reg;
646 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
647 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
648 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
649 gpu.status.reg = freeze->ulStatus;
651 for (i = 8; i > 0; i--) {
652 gpu.regs[i] ^= 1; // avoid reg change detection
653 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
655 renderer_sync_ecmds(gpu.ex_regs);
656 renderer_update_caches(0, 0, 1024, 512);
663 void GPUupdateLace(void)
667 renderer_flush_queues();
669 if (gpu.status.blanking) {
670 if (!gpu.state.blanked) {
672 gpu.state.blanked = 1;
673 gpu.state.fb_dirty = 1;
678 if (!gpu.state.fb_dirty)
681 if (gpu.frameskip.set) {
682 if (!gpu.frameskip.frame_ready) {
683 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
685 gpu.frameskip.active = 0;
687 gpu.frameskip.frame_ready = 0;
691 gpu.state.fb_dirty = 0;
692 gpu.state.blanked = 0;
695 void GPUvBlank(int is_vblank, int lcf)
697 int interlace = gpu.state.allow_interlace
698 && gpu.status.interlace && gpu.status.dheight;
699 // interlace doesn't look nice on progressive displays,
700 // so we have this "auto" mode here for games that don't read vram
701 if (gpu.state.allow_interlace == 2
702 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
706 if (interlace || interlace != gpu.state.old_interlace) {
707 gpu.state.old_interlace = interlace;
711 renderer_flush_queues();
712 renderer_set_interlace(interlace, !lcf);
716 #include "../../frontend/plugin_lib.h"
718 void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
720 gpu.frameskip.set = cbs->frameskip;
721 gpu.frameskip.advice = &cbs->fskip_advice;
722 gpu.frameskip.active = 0;
723 gpu.frameskip.frame_ready = 1;
724 gpu.state.hcnt = cbs->gpu_hcnt;
725 gpu.state.frame_count = cbs->gpu_frame_count;
726 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
727 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
729 gpu.mmap = cbs->mmap;
730 gpu.munmap = cbs->munmap;
733 if (gpu.vram == NULL)
736 if (cbs->pl_vout_set_raw_vram)
737 cbs->pl_vout_set_raw_vram(gpu.vram);
738 renderer_set_config(cbs);
739 vout_set_config(cbs);
742 // vim:shiftwidth=2:expandtab