2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
4 * This work is licensed under the terms of any of these licenses
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
15 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
16 #define unlikely(x) __builtin_expect((x), 0)
17 #define noinline __attribute__((noinline))
19 #define gpu_log(fmt, ...) \
20 printf("%d:%03d: " fmt, *gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__)
22 //#define log_io gpu_log
24 //#define log_anomaly gpu_log
25 #define log_anomaly(...)
27 struct psx_gpu gpu __attribute__((aligned(2048)));
29 static noinline int do_cmd_buffer(uint32_t *data, int count);
30 static void finish_vram_transfer(int is_read);
32 static noinline void do_cmd_reset(void)
34 if (unlikely(gpu.cmd_len > 0))
35 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
38 if (unlikely(gpu.dma.h > 0))
39 finish_vram_transfer(gpu.dma_start.is_read);
43 static noinline void do_reset(void)
49 memset(gpu.regs, 0, sizeof(gpu.regs));
50 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
51 gpu.ex_regs[i] = (0xe0 + i) << 24;
52 gpu.status.reg = 0x14802000;
55 gpu.screen.hres = gpu.screen.w = 256;
56 gpu.screen.vres = gpu.screen.h = 240;
59 static noinline void update_width(void)
61 int sw = gpu.screen.x2 - gpu.screen.x1;
62 if (sw <= 0 || sw >= 2560)
64 gpu.screen.w = gpu.screen.hres;
66 gpu.screen.w = sw * gpu.screen.hres / 2560;
69 static noinline void update_height(void)
71 int sh = gpu.screen.y2 - gpu.screen.y1;
72 if (gpu.status.dheight)
80 static noinline void decide_frameskip(void)
82 if (gpu.frameskip.active)
85 gpu.frameskip.cnt = 0;
86 gpu.frameskip.frame_ready = 1;
89 if (!gpu.frameskip.active && *gpu.frameskip.advice)
90 gpu.frameskip.active = 1;
91 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
92 gpu.frameskip.active = 1;
94 gpu.frameskip.active = 0;
97 static noinline int decide_frameskip_allow(uint32_t cmd_e3)
99 // no frameskip if it decides to draw to display area,
100 // but not for interlace since it'll most likely always do that
101 uint32_t x = cmd_e3 & 0x3ff;
102 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
103 gpu.frameskip.allow = gpu.status.interlace ||
104 (uint32_t)(x - gpu.screen.x) >= (uint32_t)gpu.screen.w ||
105 (uint32_t)(y - gpu.screen.y) >= (uint32_t)gpu.screen.h;
106 return gpu.frameskip.allow;
109 static noinline void get_gpu_info(uint32_t data)
111 switch (data & 0x0f) {
116 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
119 gpu.gp0 = gpu.ex_regs[5] & 0xfffff;
134 ret |= renderer_init();
136 gpu.state.frame_count = &gpu.zero;
137 gpu.state.hcnt = &gpu.zero;
138 gpu.frameskip.active = 0;
145 long GPUshutdown(void)
147 return vout_finish();
150 void GPUwriteStatus(uint32_t data)
152 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
153 static const short vres[4] = { 240, 480, 256, 480 };
154 uint32_t cmd = data >> 24;
156 if (cmd < ARRAY_SIZE(gpu.regs)) {
157 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
159 gpu.regs[cmd] = data;
162 gpu.state.fb_dirty = 1;
172 gpu.status.blanking = data & 1;
175 gpu.status.dma = data & 3;
178 gpu.screen.x = data & 0x3ff;
179 gpu.screen.y = (data >> 10) & 0x3ff;
180 if (gpu.frameskip.set) {
181 decide_frameskip_allow(gpu.ex_regs[3]);
182 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
184 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
189 gpu.screen.x1 = data & 0xfff;
190 gpu.screen.x2 = (data >> 12) & 0xfff;
194 gpu.screen.y1 = data & 0x3ff;
195 gpu.screen.y2 = (data >> 10) & 0x3ff;
199 gpu.status.reg = (gpu.status.reg & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
200 gpu.screen.hres = hres[(gpu.status.reg >> 16) & 7];
201 gpu.screen.vres = vres[(gpu.status.reg >> 19) & 3];
206 if ((cmd & 0xf0) == 0x10)
211 #ifdef GPUwriteStatus_ext
212 GPUwriteStatus_ext(data);
216 const unsigned char cmd_lengths[256] =
218 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
219 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
220 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
221 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
222 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
223 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
224 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
225 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
226 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
227 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
228 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
229 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
230 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
231 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
232 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
233 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
236 #define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
238 static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
240 uint16_t *vram = VRAM_MEM_XY(x, y);
242 memcpy(mem, vram, l * 2);
244 memcpy(vram, mem, l * 2);
247 static int do_vram_io(uint32_t *data, int count, int is_read)
249 int count_initial = count;
250 uint16_t *sdata = (uint16_t *)data;
251 int x = gpu.dma.x, y = gpu.dma.y;
252 int w = gpu.dma.w, h = gpu.dma.h;
253 int o = gpu.dma.offset;
255 count *= 2; // operate in 16bpp pixels
257 if (gpu.dma.offset) {
258 l = w - gpu.dma.offset;
262 do_vram_line(x + o, y, sdata, l, is_read);
275 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
277 do_vram_line(x, y, sdata, w, is_read);
283 do_vram_line(x, y, sdata, count, is_read);
289 finish_vram_transfer(is_read);
294 return count_initial - count / 2;
297 static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
300 log_anomaly("start_vram_transfer while old unfinished\n");
302 gpu.dma.x = pos_word & 0x3ff;
303 gpu.dma.y = (pos_word >> 16) & 0x1ff;
304 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
305 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
307 gpu.dma.is_read = is_read;
308 gpu.dma_start = gpu.dma;
310 renderer_flush_queues();
313 // XXX: wrong for width 1
314 memcpy(&gpu.gp0, VRAM_MEM_XY(gpu.dma.x, gpu.dma.y), 4);
315 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
318 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
319 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
322 static void finish_vram_transfer(int is_read)
327 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
328 gpu.dma_start.w, gpu.dma_start.h);
331 static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
333 int cmd = 0, pos = 0, len, dummy;
336 // XXX: polylines are not properly handled
337 while (pos < count && skip) {
338 uint32_t *list = data + pos;
340 len = 1 + cmd_lengths[cmd];
343 if ((list[2] & 0x3ff) > gpu.screen.w || ((list[2] >> 16) & 0x1ff) > gpu.screen.h)
344 // clearing something large, don't skip
345 do_cmd_list(data + pos, 3, &dummy);
347 else if ((cmd & 0xf4) == 0x24) {
348 // flat textured prim
349 gpu.ex_regs[1] &= ~0x1ff;
350 gpu.ex_regs[1] |= list[4] & 0x1ff;
352 else if ((cmd & 0xf4) == 0x34) {
353 // shaded textured prim
354 gpu.ex_regs[1] &= ~0x1ff;
355 gpu.ex_regs[1] |= list[5] & 0x1ff;
357 else if (cmd == 0xe3)
358 skip = decide_frameskip_allow(list[0]);
360 if ((cmd & 0xf8) == 0xe0)
361 gpu.ex_regs[cmd & 7] = list[0];
363 if (pos + len > count) {
365 break; // incomplete cmd
367 if (cmd == 0xa0 || cmd == 0xc0)
372 renderer_sync_ecmds(gpu.ex_regs);
377 static noinline int do_cmd_buffer(uint32_t *data, int count)
380 uint32_t old_e3 = gpu.ex_regs[3];
384 for (pos = 0; pos < count; )
386 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
388 pos += do_vram_io(data + pos, count - pos, 0);
393 cmd = data[pos] >> 24;
394 if (cmd == 0xa0 || cmd == 0xc0) {
395 // consume vram write/read cmd
396 start_vram_transfer(data[pos + 1], data[pos + 2], cmd == 0xc0);
401 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
402 if (gpu.frameskip.active && (gpu.frameskip.allow || ((data[pos] >> 24) & 0xf0) == 0xe0))
403 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
405 pos += do_cmd_list(data + pos, count - pos, &cmd);
414 gpu.status.reg &= ~0x1fff;
415 gpu.status.reg |= gpu.ex_regs[1] & 0x7ff;
416 gpu.status.reg |= (gpu.ex_regs[6] & 3) << 11;
418 gpu.state.fb_dirty |= vram_dirty;
420 if (old_e3 != gpu.ex_regs[3])
421 decide_frameskip_allow(gpu.ex_regs[3]);
426 static void flush_cmd_buffer(void)
428 int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
430 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
434 void GPUwriteDataMem(uint32_t *mem, int count)
438 log_io("gpu_dma_write %p %d\n", mem, count);
440 if (unlikely(gpu.cmd_len > 0))
443 left = do_cmd_buffer(mem, count);
445 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
448 void GPUwriteData(uint32_t data)
450 log_io("gpu_write %08x\n", data);
451 gpu.cmd_buffer[gpu.cmd_len++] = data;
452 if (gpu.cmd_len >= CMD_BUFFER_LEN)
456 long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
458 uint32_t addr, *list;
459 uint32_t *llist_entry = NULL;
460 int len, left, count;
463 if (unlikely(gpu.cmd_len > 0))
466 // ff7 sends it's main list twice, detect this
467 if (*gpu.state.frame_count == gpu.state.last_list.frame &&
468 *gpu.state.hcnt - gpu.state.last_list.hcnt <= 1 &&
469 gpu.state.last_list.cycles > 2048)
471 llist_entry = rambase + (gpu.state.last_list.addr & 0x1fffff) / 4;
472 *llist_entry |= 0x800000;
475 log_io("gpu_dma_chain\n");
476 addr = start_addr & 0xffffff;
477 for (count = 0; addr != 0xffffff; count++)
479 list = rambase + (addr & 0x1fffff) / 4;
481 addr = list[0] & 0xffffff;
484 cpu_cycles += 5 + len;
486 log_io(".chain %08x #%d\n", (list - rambase) * 4, len);
488 // loop detection marker
489 // (bit23 set causes DMA error on real machine, so
490 // unlikely to be ever set by the game)
494 left = do_cmd_buffer(list + 1, len);
496 log_anomaly("GPUdmaChain: discarded %d/%d words\n", left, len);
503 // remove loop detection markers
504 addr = start_addr & 0x1fffff;
505 while (count-- > 0) {
506 list = rambase + addr / 4;
507 addr = list[0] & 0x1fffff;
508 list[0] &= ~0x800000;
511 *llist_entry &= ~0x800000;
513 gpu.state.last_list.frame = *gpu.state.frame_count;
514 gpu.state.last_list.hcnt = *gpu.state.hcnt;
515 gpu.state.last_list.cycles = cpu_cycles;
516 gpu.state.last_list.addr = start_addr;
521 void GPUreadDataMem(uint32_t *mem, int count)
523 log_io("gpu_dma_read %p %d\n", mem, count);
525 if (unlikely(gpu.cmd_len > 0))
529 do_vram_io(mem, count, 1);
532 uint32_t GPUreadData(void)
536 if (unlikely(gpu.cmd_len > 0))
541 do_vram_io(&ret, 1, 1);
543 log_io("gpu_read %08x\n", ret);
547 uint32_t GPUreadStatus(void)
551 if (unlikely(gpu.cmd_len > 0))
554 ret = gpu.status.reg;
555 log_io("gpu_read_status %08x\n", ret);
561 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
562 uint32_t ulStatus; // current gpu status
563 uint32_t ulControl[256]; // latest control register values
564 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
567 long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
575 memcpy(freeze->psxVRam, gpu.vram, sizeof(gpu.vram));
576 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
577 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
578 freeze->ulStatus = gpu.status.reg;
581 memcpy(gpu.vram, freeze->psxVRam, sizeof(gpu.vram));
582 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
583 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
584 gpu.status.reg = freeze->ulStatus;
586 for (i = 8; i > 0; i--) {
587 gpu.regs[i] ^= 1; // avoid reg change detection
588 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
590 renderer_sync_ecmds(gpu.ex_regs);
591 renderer_update_caches(0, 0, 1024, 512);
598 void GPUupdateLace(void)
602 renderer_flush_queues();
604 if (gpu.status.blanking) {
605 if (!gpu.state.blanked) {
607 gpu.state.blanked = 1;
608 gpu.state.fb_dirty = 1;
613 if (!gpu.state.fb_dirty)
616 if (gpu.frameskip.set) {
617 if (!gpu.frameskip.frame_ready) {
618 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
620 gpu.frameskip.active = 0;
622 gpu.frameskip.frame_ready = 0;
626 gpu.state.fb_dirty = 0;
627 gpu.state.blanked = 0;
630 void GPUvBlank(int is_vblank, int lcf)
632 int interlace = gpu.state.allow_interlace
633 && gpu.status.interlace && gpu.status.dheight;
634 // interlace doesn't look nice on progressive displays,
635 // so we have this "auto" mode here for games that don't read vram
636 if (gpu.state.allow_interlace == 2
637 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
641 if (interlace || interlace != gpu.state.old_interlace) {
642 gpu.state.old_interlace = interlace;
646 renderer_flush_queues();
647 renderer_set_interlace(interlace, !lcf);
651 #include "../../frontend/plugin_lib.h"
653 void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
655 gpu.frameskip.set = cbs->frameskip;
656 gpu.frameskip.advice = &cbs->fskip_advice;
657 gpu.frameskip.active = 0;
658 gpu.frameskip.frame_ready = 1;
659 gpu.state.hcnt = cbs->gpu_hcnt;
660 gpu.state.frame_count = cbs->gpu_frame_count;
661 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
663 if (cbs->pl_vout_set_raw_vram)
664 cbs->pl_vout_set_raw_vram(gpu.vram);
665 renderer_set_config(cbs);
666 vout_set_config(cbs);
669 // vim:shiftwidth=2:expandtab