2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
4 * This work is licensed under the terms of any of these licenses
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
15 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
17 #define unlikely(x) __builtin_expect((x), 0)
18 #define preload __builtin_prefetch
19 #define noinline __attribute__((noinline))
26 #define gpu_log(fmt, ...) \
27 printf("%d:%03d: " fmt, *gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__)
29 //#define log_io gpu_log
31 //#define log_anomaly gpu_log
32 #define log_anomaly(...)
36 static noinline int do_cmd_buffer(uint32_t *data, int count);
37 static void finish_vram_transfer(int is_read);
39 static noinline void do_cmd_reset(void)
41 if (unlikely(gpu.cmd_len > 0))
42 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
45 if (unlikely(gpu.dma.h > 0))
46 finish_vram_transfer(gpu.dma_start.is_read);
50 static noinline void do_reset(void)
56 memset(gpu.regs, 0, sizeof(gpu.regs));
57 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
58 gpu.ex_regs[i] = (0xe0 + i) << 24;
59 gpu.status.reg = 0x14802000;
62 gpu.screen.hres = gpu.screen.w = 256;
63 gpu.screen.vres = gpu.screen.h = 240;
66 static noinline void update_width(void)
68 int sw = gpu.screen.x2 - gpu.screen.x1;
69 if (sw <= 0 || sw >= 2560)
71 gpu.screen.w = gpu.screen.hres;
73 gpu.screen.w = sw * gpu.screen.hres / 2560;
76 static noinline void update_height(void)
78 // TODO: emulate this properly..
79 int sh = gpu.screen.y2 - gpu.screen.y1;
80 if (gpu.status.dheight)
82 if (sh <= 0 || sh > gpu.screen.vres)
88 static noinline void decide_frameskip(void)
90 if (gpu.frameskip.active)
93 gpu.frameskip.cnt = 0;
94 gpu.frameskip.frame_ready = 1;
97 if (!gpu.frameskip.active && *gpu.frameskip.advice)
98 gpu.frameskip.active = 1;
99 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
100 gpu.frameskip.active = 1;
102 gpu.frameskip.active = 0;
104 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
106 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy);
107 gpu.frameskip.pending_fill[0] = 0;
111 static noinline int decide_frameskip_allow(uint32_t cmd_e3)
113 // no frameskip if it decides to draw to display area,
114 // but not for interlace since it'll most likely always do that
115 uint32_t x = cmd_e3 & 0x3ff;
116 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
117 gpu.frameskip.allow = gpu.status.interlace ||
118 (uint32_t)(x - gpu.screen.x) >= (uint32_t)gpu.screen.w ||
119 (uint32_t)(y - gpu.screen.y) >= (uint32_t)gpu.screen.h;
120 return gpu.frameskip.allow;
123 static noinline void get_gpu_info(uint32_t data)
125 switch (data & 0x0f) {
130 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
133 gpu.gp0 = gpu.ex_regs[5] & 0xfffff;
144 // double, for overdraw guard
145 #define VRAM_SIZE (1024 * 512 * 2 * 2)
147 static int map_vram(void)
149 gpu.vram = gpu.mmap(VRAM_SIZE);
150 if (gpu.vram != NULL) {
151 gpu.vram += 4096 / 2;
155 fprintf(stderr, "could not map vram, expect crashes\n");
164 ret |= renderer_init();
166 gpu.state.frame_count = &gpu.zero;
167 gpu.state.hcnt = &gpu.zero;
168 gpu.frameskip.active = 0;
172 if (gpu.mmap != NULL) {
179 long GPUshutdown(void)
185 if (gpu.vram != NULL) {
186 gpu.vram -= 4096 / 2;
187 gpu.munmap(gpu.vram, VRAM_SIZE);
194 void GPUwriteStatus(uint32_t data)
196 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
197 static const short vres[4] = { 240, 480, 256, 480 };
198 uint32_t cmd = data >> 24;
200 if (cmd < ARRAY_SIZE(gpu.regs)) {
201 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
203 gpu.regs[cmd] = data;
206 gpu.state.fb_dirty = 1;
216 gpu.status.blanking = data & 1;
219 gpu.status.dma = data & 3;
222 gpu.screen.x = data & 0x3ff;
223 gpu.screen.y = (data >> 10) & 0x1ff;
224 if (gpu.frameskip.set) {
225 decide_frameskip_allow(gpu.ex_regs[3]);
226 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
228 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
233 gpu.screen.x1 = data & 0xfff;
234 gpu.screen.x2 = (data >> 12) & 0xfff;
238 gpu.screen.y1 = data & 0x3ff;
239 gpu.screen.y2 = (data >> 10) & 0x3ff;
243 gpu.status.reg = (gpu.status.reg & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
244 gpu.screen.hres = hres[(gpu.status.reg >> 16) & 7];
245 gpu.screen.vres = vres[(gpu.status.reg >> 19) & 3];
248 renderer_notify_res_change();
251 if ((cmd & 0xf0) == 0x10)
256 #ifdef GPUwriteStatus_ext
257 GPUwriteStatus_ext(data);
261 const unsigned char cmd_lengths[256] =
263 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
264 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
265 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
266 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
267 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
268 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
269 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
270 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
271 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
272 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
273 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
274 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
275 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
276 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
277 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
278 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
281 #define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
283 static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
285 uint16_t *vram = VRAM_MEM_XY(x, y);
287 memcpy(mem, vram, l * 2);
289 memcpy(vram, mem, l * 2);
292 static int do_vram_io(uint32_t *data, int count, int is_read)
294 int count_initial = count;
295 uint16_t *sdata = (uint16_t *)data;
296 int x = gpu.dma.x, y = gpu.dma.y;
297 int w = gpu.dma.w, h = gpu.dma.h;
298 int o = gpu.dma.offset;
300 count *= 2; // operate in 16bpp pixels
302 if (gpu.dma.offset) {
303 l = w - gpu.dma.offset;
307 do_vram_line(x + o, y, sdata, l, is_read);
320 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
322 do_vram_line(x, y, sdata, w, is_read);
328 do_vram_line(x, y, sdata, count, is_read);
334 finish_vram_transfer(is_read);
339 return count_initial - count / 2;
342 static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
345 log_anomaly("start_vram_transfer while old unfinished\n");
347 gpu.dma.x = pos_word & 0x3ff;
348 gpu.dma.y = (pos_word >> 16) & 0x1ff;
349 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
350 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
352 gpu.dma.is_read = is_read;
353 gpu.dma_start = gpu.dma;
355 renderer_flush_queues();
358 // XXX: wrong for width 1
359 memcpy(&gpu.gp0, VRAM_MEM_XY(gpu.dma.x, gpu.dma.y), 4);
360 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
363 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
364 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
367 static void finish_vram_transfer(int is_read)
372 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
373 gpu.dma_start.w, gpu.dma_start.h);
376 static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
378 int cmd = 0, pos = 0, len, dummy, v;
381 gpu.frameskip.pending_fill[0] = 0;
383 while (pos < count && skip) {
384 uint32_t *list = data + pos;
386 len = 1 + cmd_lengths[cmd];
390 if ((list[2] & 0x3ff) > gpu.screen.w || ((list[2] >> 16) & 0x1ff) > gpu.screen.h)
391 // clearing something large, don't skip
392 do_cmd_list(list, 3, &dummy);
394 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
400 gpu.ex_regs[1] &= ~0x1ff;
401 gpu.ex_regs[1] |= list[4 + ((cmd >> 4) & 1)] & 0x1ff;
404 for (v = 3; pos + v < count; v++)
406 if ((list[v] & 0xf000f000) == 0x50005000)
412 for (v = 4; pos + v < count; v += 2)
414 if ((list[v] & 0xf000f000) == 0x50005000)
421 skip = decide_frameskip_allow(list[0]);
422 if ((cmd & 0xf8) == 0xe0)
423 gpu.ex_regs[cmd & 7] = list[0];
427 if (pos + len > count) {
429 break; // incomplete cmd
431 if (0xa0 <= cmd && cmd <= 0xdf)
437 renderer_sync_ecmds(gpu.ex_regs);
442 static noinline int do_cmd_buffer(uint32_t *data, int count)
445 uint32_t old_e3 = gpu.ex_regs[3];
449 for (pos = 0; pos < count; )
451 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
453 pos += do_vram_io(data + pos, count - pos, 0);
458 cmd = data[pos] >> 24;
459 if (0xa0 <= cmd && cmd <= 0xdf) {
460 if (unlikely((pos+2) >= count)) {
461 // incomplete vram write/read cmd, can't consume yet
466 // consume vram write/read cmd
467 start_vram_transfer(data[pos + 1], data[pos + 2], (cmd & 0xe0) == 0xc0);
472 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
473 if (gpu.frameskip.active && (gpu.frameskip.allow || ((data[pos] >> 24) & 0xf0) == 0xe0))
474 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
476 pos += do_cmd_list(data + pos, count - pos, &cmd);
485 gpu.status.reg &= ~0x1fff;
486 gpu.status.reg |= gpu.ex_regs[1] & 0x7ff;
487 gpu.status.reg |= (gpu.ex_regs[6] & 3) << 11;
489 gpu.state.fb_dirty |= vram_dirty;
491 if (old_e3 != gpu.ex_regs[3])
492 decide_frameskip_allow(gpu.ex_regs[3]);
497 static void flush_cmd_buffer(void)
499 int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
501 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
505 void GPUwriteDataMem(uint32_t *mem, int count)
509 log_io("gpu_dma_write %p %d\n", mem, count);
511 if (unlikely(gpu.cmd_len > 0))
514 left = do_cmd_buffer(mem, count);
516 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
519 void GPUwriteData(uint32_t data)
521 log_io("gpu_write %08x\n", data);
522 gpu.cmd_buffer[gpu.cmd_len++] = data;
523 if (gpu.cmd_len >= CMD_BUFFER_LEN)
527 long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
529 uint32_t addr, *list, ld_addr = 0;
530 int len, left, count;
533 preload(rambase + (start_addr & 0x1fffff) / 4);
535 if (unlikely(gpu.cmd_len > 0))
538 log_io("gpu_dma_chain\n");
539 addr = start_addr & 0xffffff;
540 for (count = 0; (addr & 0x800000) == 0; count++)
542 list = rambase + (addr & 0x1fffff) / 4;
544 addr = list[0] & 0xffffff;
545 preload(rambase + (addr & 0x1fffff) / 4);
549 cpu_cycles += 5 + len;
551 log_io(".chain %08x #%d\n", (list - rambase) * 4, len);
554 left = do_cmd_buffer(list + 1, len);
556 log_anomaly("GPUdmaChain: discarded %d/%d words\n", left, len);
559 #define LD_THRESHOLD (8*1024)
560 if (count >= LD_THRESHOLD) {
561 if (count == LD_THRESHOLD) {
566 // loop detection marker
567 // (bit23 set causes DMA error on real machine, so
568 // unlikely to be ever set by the game)
574 // remove loop detection markers
575 count -= LD_THRESHOLD + 2;
576 addr = ld_addr & 0x1fffff;
577 while (count-- > 0) {
578 list = rambase + addr / 4;
579 addr = list[0] & 0x1fffff;
580 list[0] &= ~0x800000;
584 gpu.state.last_list.frame = *gpu.state.frame_count;
585 gpu.state.last_list.hcnt = *gpu.state.hcnt;
586 gpu.state.last_list.cycles = cpu_cycles;
587 gpu.state.last_list.addr = start_addr;
592 void GPUreadDataMem(uint32_t *mem, int count)
594 log_io("gpu_dma_read %p %d\n", mem, count);
596 if (unlikely(gpu.cmd_len > 0))
600 do_vram_io(mem, count, 1);
603 uint32_t GPUreadData(void)
607 if (unlikely(gpu.cmd_len > 0))
612 do_vram_io(&ret, 1, 1);
614 log_io("gpu_read %08x\n", ret);
618 uint32_t GPUreadStatus(void)
622 if (unlikely(gpu.cmd_len > 0))
625 ret = gpu.status.reg;
626 log_io("gpu_read_status %08x\n", ret);
632 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
633 uint32_t ulStatus; // current gpu status
634 uint32_t ulControl[256]; // latest control register values
635 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
638 long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
646 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
647 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
648 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
649 freeze->ulStatus = gpu.status.reg;
652 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
653 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
654 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
655 gpu.status.reg = freeze->ulStatus;
657 for (i = 8; i > 0; i--) {
658 gpu.regs[i] ^= 1; // avoid reg change detection
659 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
661 renderer_sync_ecmds(gpu.ex_regs);
662 renderer_update_caches(0, 0, 1024, 512);
669 void GPUupdateLace(void)
673 renderer_flush_queues();
675 if (gpu.status.blanking) {
676 if (!gpu.state.blanked) {
678 gpu.state.blanked = 1;
679 gpu.state.fb_dirty = 1;
684 if (!gpu.state.fb_dirty)
687 if (gpu.frameskip.set) {
688 if (!gpu.frameskip.frame_ready) {
689 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
691 gpu.frameskip.active = 0;
693 gpu.frameskip.frame_ready = 0;
697 gpu.state.fb_dirty = 0;
698 gpu.state.blanked = 0;
701 void GPUvBlank(int is_vblank, int lcf)
703 int interlace = gpu.state.allow_interlace
704 && gpu.status.interlace && gpu.status.dheight;
705 // interlace doesn't look nice on progressive displays,
706 // so we have this "auto" mode here for games that don't read vram
707 if (gpu.state.allow_interlace == 2
708 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
712 if (interlace || interlace != gpu.state.old_interlace) {
713 gpu.state.old_interlace = interlace;
717 renderer_flush_queues();
718 renderer_set_interlace(interlace, !lcf);
722 #include "../../frontend/plugin_lib.h"
724 void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
726 gpu.frameskip.set = cbs->frameskip;
727 gpu.frameskip.advice = &cbs->fskip_advice;
728 gpu.frameskip.active = 0;
729 gpu.frameskip.frame_ready = 1;
730 gpu.state.hcnt = cbs->gpu_hcnt;
731 gpu.state.frame_count = cbs->gpu_frame_count;
732 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
733 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
735 gpu.mmap = cbs->mmap;
736 gpu.munmap = cbs->munmap;
739 if (gpu.vram == NULL)
742 if (cbs->pl_vout_set_raw_vram)
743 cbs->pl_vout_set_raw_vram(gpu.vram);
744 renderer_set_config(cbs);
745 vout_set_config(cbs);
748 // vim:shiftwidth=2:expandtab