2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
4 * This work is licensed under the terms of any of these licenses
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
15 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
16 #define unlikely(x) __builtin_expect((x), 0)
17 #define noinline __attribute__((noinline))
19 #define gpu_log(fmt, ...) \
20 printf("%d:%03d: " fmt, *gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__)
22 //#define log_io gpu_log
24 //#define log_anomaly gpu_log
25 #define log_anomaly(...)
29 static noinline int do_cmd_buffer(uint32_t *data, int count);
30 static void finish_vram_transfer(int is_read);
32 static noinline void do_cmd_reset(void)
34 if (unlikely(gpu.cmd_len > 0))
35 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
38 if (unlikely(gpu.dma.h > 0))
39 finish_vram_transfer(gpu.dma_start.is_read);
43 static noinline void do_reset(void)
49 memset(gpu.regs, 0, sizeof(gpu.regs));
50 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
51 gpu.ex_regs[i] = (0xe0 + i) << 24;
52 gpu.status.reg = 0x14802000;
55 gpu.screen.hres = gpu.screen.w = 256;
56 gpu.screen.vres = gpu.screen.h = 240;
59 static noinline void update_width(void)
61 int sw = gpu.screen.x2 - gpu.screen.x1;
62 if (sw <= 0 || sw >= 2560)
64 gpu.screen.w = gpu.screen.hres;
66 gpu.screen.w = sw * gpu.screen.hres / 2560;
69 static noinline void update_height(void)
71 int sh = gpu.screen.y2 - gpu.screen.y1;
72 if (gpu.status.dheight)
80 static noinline void decide_frameskip(void)
82 if (gpu.frameskip.active)
85 gpu.frameskip.cnt = 0;
86 gpu.frameskip.frame_ready = 1;
89 if (!gpu.frameskip.active && *gpu.frameskip.advice)
90 gpu.frameskip.active = 1;
91 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
92 gpu.frameskip.active = 1;
94 gpu.frameskip.active = 0;
96 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
98 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy);
99 gpu.frameskip.pending_fill[0] = 0;
103 static noinline int decide_frameskip_allow(uint32_t cmd_e3)
105 // no frameskip if it decides to draw to display area,
106 // but not for interlace since it'll most likely always do that
107 uint32_t x = cmd_e3 & 0x3ff;
108 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
109 gpu.frameskip.allow = gpu.status.interlace ||
110 (uint32_t)(x - gpu.screen.x) >= (uint32_t)gpu.screen.w ||
111 (uint32_t)(y - gpu.screen.y) >= (uint32_t)gpu.screen.h;
112 return gpu.frameskip.allow;
115 static noinline void get_gpu_info(uint32_t data)
117 switch (data & 0x0f) {
122 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
125 gpu.gp0 = gpu.ex_regs[5] & 0xfffff;
136 // double, for overdraw guard
137 #define VRAM_SIZE (1024 * 512 * 2 * 2)
139 static int map_vram(void)
141 gpu.vram = gpu.mmap(VRAM_SIZE);
142 if (gpu.vram != NULL) {
143 gpu.vram += 4096 / 2;
147 fprintf(stderr, "could not map vram, expect crashes\n");
156 ret |= renderer_init();
158 gpu.state.frame_count = &gpu.zero;
159 gpu.state.hcnt = &gpu.zero;
160 gpu.frameskip.active = 0;
164 if (gpu.mmap != NULL) {
171 long GPUshutdown(void)
177 if (gpu.vram != NULL) {
178 gpu.vram -= 4096 / 2;
179 gpu.munmap(gpu.vram, VRAM_SIZE);
186 void GPUwriteStatus(uint32_t data)
188 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
189 static const short vres[4] = { 240, 480, 256, 480 };
190 uint32_t cmd = data >> 24;
192 if (cmd < ARRAY_SIZE(gpu.regs)) {
193 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
195 gpu.regs[cmd] = data;
198 gpu.state.fb_dirty = 1;
208 gpu.status.blanking = data & 1;
211 gpu.status.dma = data & 3;
214 gpu.screen.x = data & 0x3ff;
215 gpu.screen.y = (data >> 10) & 0x1ff;
216 if (gpu.frameskip.set) {
217 decide_frameskip_allow(gpu.ex_regs[3]);
218 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
220 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
225 gpu.screen.x1 = data & 0xfff;
226 gpu.screen.x2 = (data >> 12) & 0xfff;
230 gpu.screen.y1 = data & 0x3ff;
231 gpu.screen.y2 = (data >> 10) & 0x3ff;
235 gpu.status.reg = (gpu.status.reg & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
236 gpu.screen.hres = hres[(gpu.status.reg >> 16) & 7];
237 gpu.screen.vres = vres[(gpu.status.reg >> 19) & 3];
240 renderer_notify_res_change();
243 if ((cmd & 0xf0) == 0x10)
248 #ifdef GPUwriteStatus_ext
249 GPUwriteStatus_ext(data);
253 const unsigned char cmd_lengths[256] =
255 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
256 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
257 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
258 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
259 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
260 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
261 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
262 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
263 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
264 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
265 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
266 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
267 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
268 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
269 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
270 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
273 #define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
275 static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
277 uint16_t *vram = VRAM_MEM_XY(x, y);
279 memcpy(mem, vram, l * 2);
281 memcpy(vram, mem, l * 2);
284 static int do_vram_io(uint32_t *data, int count, int is_read)
286 int count_initial = count;
287 uint16_t *sdata = (uint16_t *)data;
288 int x = gpu.dma.x, y = gpu.dma.y;
289 int w = gpu.dma.w, h = gpu.dma.h;
290 int o = gpu.dma.offset;
292 count *= 2; // operate in 16bpp pixels
294 if (gpu.dma.offset) {
295 l = w - gpu.dma.offset;
299 do_vram_line(x + o, y, sdata, l, is_read);
312 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
314 do_vram_line(x, y, sdata, w, is_read);
320 do_vram_line(x, y, sdata, count, is_read);
326 finish_vram_transfer(is_read);
331 return count_initial - count / 2;
334 static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
337 log_anomaly("start_vram_transfer while old unfinished\n");
339 gpu.dma.x = pos_word & 0x3ff;
340 gpu.dma.y = (pos_word >> 16) & 0x1ff;
341 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
342 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
344 gpu.dma.is_read = is_read;
345 gpu.dma_start = gpu.dma;
347 renderer_flush_queues();
350 // XXX: wrong for width 1
351 memcpy(&gpu.gp0, VRAM_MEM_XY(gpu.dma.x, gpu.dma.y), 4);
352 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
355 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
356 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
359 static void finish_vram_transfer(int is_read)
364 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
365 gpu.dma_start.w, gpu.dma_start.h);
368 static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
370 int cmd = 0, pos = 0, len, dummy;
373 gpu.frameskip.pending_fill[0] = 0;
375 // XXX: polylines are not properly handled
376 while (pos < count && skip) {
377 uint32_t *list = data + pos;
379 len = 1 + cmd_lengths[cmd];
382 if ((list[2] & 0x3ff) > gpu.screen.w || ((list[2] >> 16) & 0x1ff) > gpu.screen.h)
383 // clearing something large, don't skip
384 do_cmd_list(list, 3, &dummy);
386 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
388 else if ((cmd & 0xf4) == 0x24) {
389 // flat textured prim
390 gpu.ex_regs[1] &= ~0x1ff;
391 gpu.ex_regs[1] |= list[4] & 0x1ff;
393 else if ((cmd & 0xf4) == 0x34) {
394 // shaded textured prim
395 gpu.ex_regs[1] &= ~0x1ff;
396 gpu.ex_regs[1] |= list[5] & 0x1ff;
398 else if (cmd == 0xe3)
399 skip = decide_frameskip_allow(list[0]);
401 if ((cmd & 0xf8) == 0xe0)
402 gpu.ex_regs[cmd & 7] = list[0];
404 if (pos + len > count) {
406 break; // incomplete cmd
408 if (cmd == 0xa0 || cmd == 0xc0)
413 renderer_sync_ecmds(gpu.ex_regs);
418 static noinline int do_cmd_buffer(uint32_t *data, int count)
421 uint32_t old_e3 = gpu.ex_regs[3];
425 for (pos = 0; pos < count; )
427 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
429 pos += do_vram_io(data + pos, count - pos, 0);
434 cmd = data[pos] >> 24;
435 if (cmd == 0xa0 || cmd == 0xc0) {
436 // consume vram write/read cmd
437 start_vram_transfer(data[pos + 1], data[pos + 2], cmd == 0xc0);
442 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
443 if (gpu.frameskip.active && (gpu.frameskip.allow || ((data[pos] >> 24) & 0xf0) == 0xe0))
444 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
446 pos += do_cmd_list(data + pos, count - pos, &cmd);
455 gpu.status.reg &= ~0x1fff;
456 gpu.status.reg |= gpu.ex_regs[1] & 0x7ff;
457 gpu.status.reg |= (gpu.ex_regs[6] & 3) << 11;
459 gpu.state.fb_dirty |= vram_dirty;
461 if (old_e3 != gpu.ex_regs[3])
462 decide_frameskip_allow(gpu.ex_regs[3]);
467 static void flush_cmd_buffer(void)
469 int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
471 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
475 void GPUwriteDataMem(uint32_t *mem, int count)
479 log_io("gpu_dma_write %p %d\n", mem, count);
481 if (unlikely(gpu.cmd_len > 0))
484 left = do_cmd_buffer(mem, count);
486 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
489 void GPUwriteData(uint32_t data)
491 log_io("gpu_write %08x\n", data);
492 gpu.cmd_buffer[gpu.cmd_len++] = data;
493 if (gpu.cmd_len >= CMD_BUFFER_LEN)
497 long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
499 uint32_t addr, *list;
500 uint32_t *llist_entry = NULL;
501 int len, left, count;
504 if (unlikely(gpu.cmd_len > 0))
507 // ff7 sends it's main list twice, detect this
508 if (*gpu.state.frame_count == gpu.state.last_list.frame &&
509 *gpu.state.hcnt - gpu.state.last_list.hcnt <= 1 &&
510 gpu.state.last_list.cycles > 2048)
512 llist_entry = rambase + (gpu.state.last_list.addr & 0x1fffff) / 4;
513 *llist_entry |= 0x800000;
516 log_io("gpu_dma_chain\n");
517 addr = start_addr & 0xffffff;
518 for (count = 0; addr != 0xffffff; count++)
520 list = rambase + (addr & 0x1fffff) / 4;
522 addr = list[0] & 0xffffff;
525 cpu_cycles += 5 + len;
527 log_io(".chain %08x #%d\n", (list - rambase) * 4, len);
529 // loop detection marker
530 // (bit23 set causes DMA error on real machine, so
531 // unlikely to be ever set by the game)
535 left = do_cmd_buffer(list + 1, len);
537 log_anomaly("GPUdmaChain: discarded %d/%d words\n", left, len);
544 // remove loop detection markers
545 addr = start_addr & 0x1fffff;
546 while (count-- > 0) {
547 list = rambase + addr / 4;
548 addr = list[0] & 0x1fffff;
549 list[0] &= ~0x800000;
552 *llist_entry &= ~0x800000;
554 gpu.state.last_list.frame = *gpu.state.frame_count;
555 gpu.state.last_list.hcnt = *gpu.state.hcnt;
556 gpu.state.last_list.cycles = cpu_cycles;
557 gpu.state.last_list.addr = start_addr;
562 void GPUreadDataMem(uint32_t *mem, int count)
564 log_io("gpu_dma_read %p %d\n", mem, count);
566 if (unlikely(gpu.cmd_len > 0))
570 do_vram_io(mem, count, 1);
573 uint32_t GPUreadData(void)
577 if (unlikely(gpu.cmd_len > 0))
582 do_vram_io(&ret, 1, 1);
584 log_io("gpu_read %08x\n", ret);
588 uint32_t GPUreadStatus(void)
592 if (unlikely(gpu.cmd_len > 0))
595 ret = gpu.status.reg;
596 log_io("gpu_read_status %08x\n", ret);
602 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
603 uint32_t ulStatus; // current gpu status
604 uint32_t ulControl[256]; // latest control register values
605 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
608 long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
616 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
617 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
618 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
619 freeze->ulStatus = gpu.status.reg;
622 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
623 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
624 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
625 gpu.status.reg = freeze->ulStatus;
627 for (i = 8; i > 0; i--) {
628 gpu.regs[i] ^= 1; // avoid reg change detection
629 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
631 renderer_sync_ecmds(gpu.ex_regs);
632 renderer_update_caches(0, 0, 1024, 512);
639 void GPUupdateLace(void)
643 renderer_flush_queues();
645 if (gpu.status.blanking) {
646 if (!gpu.state.blanked) {
648 gpu.state.blanked = 1;
649 gpu.state.fb_dirty = 1;
654 if (!gpu.state.fb_dirty)
657 if (gpu.frameskip.set) {
658 if (!gpu.frameskip.frame_ready) {
659 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
661 gpu.frameskip.active = 0;
663 gpu.frameskip.frame_ready = 0;
667 gpu.state.fb_dirty = 0;
668 gpu.state.blanked = 0;
671 void GPUvBlank(int is_vblank, int lcf)
673 int interlace = gpu.state.allow_interlace
674 && gpu.status.interlace && gpu.status.dheight;
675 // interlace doesn't look nice on progressive displays,
676 // so we have this "auto" mode here for games that don't read vram
677 if (gpu.state.allow_interlace == 2
678 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
682 if (interlace || interlace != gpu.state.old_interlace) {
683 gpu.state.old_interlace = interlace;
687 renderer_flush_queues();
688 renderer_set_interlace(interlace, !lcf);
692 #include "../../frontend/plugin_lib.h"
694 void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
696 gpu.frameskip.set = cbs->frameskip;
697 gpu.frameskip.advice = &cbs->fskip_advice;
698 gpu.frameskip.active = 0;
699 gpu.frameskip.frame_ready = 1;
700 gpu.state.hcnt = cbs->gpu_hcnt;
701 gpu.state.frame_count = cbs->gpu_frame_count;
702 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
703 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
705 gpu.mmap = cbs->mmap;
706 gpu.munmap = cbs->munmap;
709 if (gpu.vram == NULL)
712 if (cbs->pl_vout_set_raw_vram)
713 cbs->pl_vout_set_raw_vram(gpu.vram);
714 renderer_set_config(cbs);
715 vout_set_config(cbs);
718 // vim:shiftwidth=2:expandtab