2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
4 * This work is licensed under the terms of any of these licenses
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
15 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
16 #define unlikely(x) __builtin_expect((x), 0)
17 #define noinline __attribute__((noinline))
19 #define gpu_log(fmt, ...) \
20 printf("%d:%03d: " fmt, *gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__)
22 //#define log_io gpu_log
24 //#define log_anomaly gpu_log
25 #define log_anomaly(...)
27 struct psx_gpu gpu __attribute__((aligned(2048)));
29 static noinline int do_cmd_buffer(uint32_t *data, int count);
30 static void finish_vram_transfer(int is_read);
32 static noinline void do_cmd_reset(void)
34 if (unlikely(gpu.cmd_len > 0))
35 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
38 if (unlikely(gpu.dma.h > 0))
39 finish_vram_transfer(gpu.dma_start.is_read);
43 static noinline void do_reset(void)
49 memset(gpu.regs, 0, sizeof(gpu.regs));
50 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
51 gpu.ex_regs[i] = (0xe0 + i) << 24;
52 gpu.status.reg = 0x14802000;
55 gpu.screen.hres = gpu.screen.w = 256;
56 gpu.screen.vres = gpu.screen.h = 240;
59 static noinline void update_width(void)
61 int sw = gpu.screen.x2 - gpu.screen.x1;
62 if (sw <= 0 || sw >= 2560)
64 gpu.screen.w = gpu.screen.hres;
66 gpu.screen.w = sw * gpu.screen.hres / 2560;
69 static noinline void update_height(void)
71 int sh = gpu.screen.y2 - gpu.screen.y1;
72 if (gpu.status.dheight)
80 static noinline void decide_frameskip(void)
82 if (gpu.frameskip.active)
85 gpu.frameskip.cnt = 0;
86 gpu.frameskip.frame_ready = 1;
89 if (!gpu.frameskip.active && *gpu.frameskip.advice)
90 gpu.frameskip.active = 1;
91 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
92 gpu.frameskip.active = 1;
94 gpu.frameskip.active = 0;
96 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
98 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy);
99 gpu.frameskip.pending_fill[0] = 0;
103 static noinline int decide_frameskip_allow(uint32_t cmd_e3)
105 // no frameskip if it decides to draw to display area,
106 // but not for interlace since it'll most likely always do that
107 uint32_t x = cmd_e3 & 0x3ff;
108 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
109 gpu.frameskip.allow = gpu.status.interlace ||
110 (uint32_t)(x - gpu.screen.x) >= (uint32_t)gpu.screen.w ||
111 (uint32_t)(y - gpu.screen.y) >= (uint32_t)gpu.screen.h;
112 return gpu.frameskip.allow;
115 static noinline void get_gpu_info(uint32_t data)
117 switch (data & 0x0f) {
122 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
125 gpu.gp0 = gpu.ex_regs[5] & 0xfffff;
140 ret |= renderer_init();
142 gpu.state.frame_count = &gpu.zero;
143 gpu.state.hcnt = &gpu.zero;
144 gpu.frameskip.active = 0;
151 long GPUshutdown(void)
153 return vout_finish();
156 void GPUwriteStatus(uint32_t data)
158 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
159 static const short vres[4] = { 240, 480, 256, 480 };
160 uint32_t cmd = data >> 24;
162 if (cmd < ARRAY_SIZE(gpu.regs)) {
163 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
165 gpu.regs[cmd] = data;
168 gpu.state.fb_dirty = 1;
178 gpu.status.blanking = data & 1;
181 gpu.status.dma = data & 3;
184 gpu.screen.x = data & 0x3ff;
185 gpu.screen.y = (data >> 10) & 0x3ff;
186 if (gpu.frameskip.set) {
187 decide_frameskip_allow(gpu.ex_regs[3]);
188 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
190 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
195 gpu.screen.x1 = data & 0xfff;
196 gpu.screen.x2 = (data >> 12) & 0xfff;
200 gpu.screen.y1 = data & 0x3ff;
201 gpu.screen.y2 = (data >> 10) & 0x3ff;
205 gpu.status.reg = (gpu.status.reg & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
206 gpu.screen.hres = hres[(gpu.status.reg >> 16) & 7];
207 gpu.screen.vres = vres[(gpu.status.reg >> 19) & 3];
212 if ((cmd & 0xf0) == 0x10)
217 #ifdef GPUwriteStatus_ext
218 GPUwriteStatus_ext(data);
222 const unsigned char cmd_lengths[256] =
224 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
225 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
226 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
227 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
228 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
229 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
230 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
231 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
232 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
233 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
234 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
235 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
236 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
237 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
238 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
239 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
242 #define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
244 static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
246 uint16_t *vram = VRAM_MEM_XY(x, y);
248 memcpy(mem, vram, l * 2);
250 memcpy(vram, mem, l * 2);
253 static int do_vram_io(uint32_t *data, int count, int is_read)
255 int count_initial = count;
256 uint16_t *sdata = (uint16_t *)data;
257 int x = gpu.dma.x, y = gpu.dma.y;
258 int w = gpu.dma.w, h = gpu.dma.h;
259 int o = gpu.dma.offset;
261 count *= 2; // operate in 16bpp pixels
263 if (gpu.dma.offset) {
264 l = w - gpu.dma.offset;
268 do_vram_line(x + o, y, sdata, l, is_read);
281 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
283 do_vram_line(x, y, sdata, w, is_read);
289 do_vram_line(x, y, sdata, count, is_read);
295 finish_vram_transfer(is_read);
300 return count_initial - count / 2;
303 static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
306 log_anomaly("start_vram_transfer while old unfinished\n");
308 gpu.dma.x = pos_word & 0x3ff;
309 gpu.dma.y = (pos_word >> 16) & 0x1ff;
310 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
311 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
313 gpu.dma.is_read = is_read;
314 gpu.dma_start = gpu.dma;
316 renderer_flush_queues();
319 // XXX: wrong for width 1
320 memcpy(&gpu.gp0, VRAM_MEM_XY(gpu.dma.x, gpu.dma.y), 4);
321 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
324 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
325 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
328 static void finish_vram_transfer(int is_read)
333 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
334 gpu.dma_start.w, gpu.dma_start.h);
337 static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
339 int cmd = 0, pos = 0, len, dummy;
342 gpu.frameskip.pending_fill[0] = 0;
344 // XXX: polylines are not properly handled
345 while (pos < count && skip) {
346 uint32_t *list = data + pos;
348 len = 1 + cmd_lengths[cmd];
351 if ((list[2] & 0x3ff) > gpu.screen.w || ((list[2] >> 16) & 0x1ff) > gpu.screen.h)
352 // clearing something large, don't skip
353 do_cmd_list(list, 3, &dummy);
355 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
357 else if ((cmd & 0xf4) == 0x24) {
358 // flat textured prim
359 gpu.ex_regs[1] &= ~0x1ff;
360 gpu.ex_regs[1] |= list[4] & 0x1ff;
362 else if ((cmd & 0xf4) == 0x34) {
363 // shaded textured prim
364 gpu.ex_regs[1] &= ~0x1ff;
365 gpu.ex_regs[1] |= list[5] & 0x1ff;
367 else if (cmd == 0xe3)
368 skip = decide_frameskip_allow(list[0]);
370 if ((cmd & 0xf8) == 0xe0)
371 gpu.ex_regs[cmd & 7] = list[0];
373 if (pos + len > count) {
375 break; // incomplete cmd
377 if (cmd == 0xa0 || cmd == 0xc0)
382 renderer_sync_ecmds(gpu.ex_regs);
387 static noinline int do_cmd_buffer(uint32_t *data, int count)
390 uint32_t old_e3 = gpu.ex_regs[3];
394 for (pos = 0; pos < count; )
396 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
398 pos += do_vram_io(data + pos, count - pos, 0);
403 cmd = data[pos] >> 24;
404 if (cmd == 0xa0 || cmd == 0xc0) {
405 // consume vram write/read cmd
406 start_vram_transfer(data[pos + 1], data[pos + 2], cmd == 0xc0);
411 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
412 if (gpu.frameskip.active && (gpu.frameskip.allow || ((data[pos] >> 24) & 0xf0) == 0xe0))
413 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
415 pos += do_cmd_list(data + pos, count - pos, &cmd);
424 gpu.status.reg &= ~0x1fff;
425 gpu.status.reg |= gpu.ex_regs[1] & 0x7ff;
426 gpu.status.reg |= (gpu.ex_regs[6] & 3) << 11;
428 gpu.state.fb_dirty |= vram_dirty;
430 if (old_e3 != gpu.ex_regs[3])
431 decide_frameskip_allow(gpu.ex_regs[3]);
436 static void flush_cmd_buffer(void)
438 int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
440 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
444 void GPUwriteDataMem(uint32_t *mem, int count)
448 log_io("gpu_dma_write %p %d\n", mem, count);
450 if (unlikely(gpu.cmd_len > 0))
453 left = do_cmd_buffer(mem, count);
455 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
458 void GPUwriteData(uint32_t data)
460 log_io("gpu_write %08x\n", data);
461 gpu.cmd_buffer[gpu.cmd_len++] = data;
462 if (gpu.cmd_len >= CMD_BUFFER_LEN)
466 long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
468 uint32_t addr, *list;
469 uint32_t *llist_entry = NULL;
470 int len, left, count;
473 if (unlikely(gpu.cmd_len > 0))
476 // ff7 sends it's main list twice, detect this
477 if (*gpu.state.frame_count == gpu.state.last_list.frame &&
478 *gpu.state.hcnt - gpu.state.last_list.hcnt <= 1 &&
479 gpu.state.last_list.cycles > 2048)
481 llist_entry = rambase + (gpu.state.last_list.addr & 0x1fffff) / 4;
482 *llist_entry |= 0x800000;
485 log_io("gpu_dma_chain\n");
486 addr = start_addr & 0xffffff;
487 for (count = 0; addr != 0xffffff; count++)
489 list = rambase + (addr & 0x1fffff) / 4;
491 addr = list[0] & 0xffffff;
494 cpu_cycles += 5 + len;
496 log_io(".chain %08x #%d\n", (list - rambase) * 4, len);
498 // loop detection marker
499 // (bit23 set causes DMA error on real machine, so
500 // unlikely to be ever set by the game)
504 left = do_cmd_buffer(list + 1, len);
506 log_anomaly("GPUdmaChain: discarded %d/%d words\n", left, len);
513 // remove loop detection markers
514 addr = start_addr & 0x1fffff;
515 while (count-- > 0) {
516 list = rambase + addr / 4;
517 addr = list[0] & 0x1fffff;
518 list[0] &= ~0x800000;
521 *llist_entry &= ~0x800000;
523 gpu.state.last_list.frame = *gpu.state.frame_count;
524 gpu.state.last_list.hcnt = *gpu.state.hcnt;
525 gpu.state.last_list.cycles = cpu_cycles;
526 gpu.state.last_list.addr = start_addr;
531 void GPUreadDataMem(uint32_t *mem, int count)
533 log_io("gpu_dma_read %p %d\n", mem, count);
535 if (unlikely(gpu.cmd_len > 0))
539 do_vram_io(mem, count, 1);
542 uint32_t GPUreadData(void)
546 if (unlikely(gpu.cmd_len > 0))
551 do_vram_io(&ret, 1, 1);
553 log_io("gpu_read %08x\n", ret);
557 uint32_t GPUreadStatus(void)
561 if (unlikely(gpu.cmd_len > 0))
564 ret = gpu.status.reg;
565 log_io("gpu_read_status %08x\n", ret);
571 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
572 uint32_t ulStatus; // current gpu status
573 uint32_t ulControl[256]; // latest control register values
574 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
577 long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
585 memcpy(freeze->psxVRam, gpu.vram, sizeof(gpu.vram));
586 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
587 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
588 freeze->ulStatus = gpu.status.reg;
591 memcpy(gpu.vram, freeze->psxVRam, sizeof(gpu.vram));
592 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
593 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
594 gpu.status.reg = freeze->ulStatus;
596 for (i = 8; i > 0; i--) {
597 gpu.regs[i] ^= 1; // avoid reg change detection
598 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
600 renderer_sync_ecmds(gpu.ex_regs);
601 renderer_update_caches(0, 0, 1024, 512);
608 void GPUupdateLace(void)
612 renderer_flush_queues();
614 if (gpu.status.blanking) {
615 if (!gpu.state.blanked) {
617 gpu.state.blanked = 1;
618 gpu.state.fb_dirty = 1;
623 if (!gpu.state.fb_dirty)
626 if (gpu.frameskip.set) {
627 if (!gpu.frameskip.frame_ready) {
628 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
630 gpu.frameskip.active = 0;
632 gpu.frameskip.frame_ready = 0;
636 gpu.state.fb_dirty = 0;
637 gpu.state.blanked = 0;
640 void GPUvBlank(int is_vblank, int lcf)
642 int interlace = gpu.state.allow_interlace
643 && gpu.status.interlace && gpu.status.dheight;
644 // interlace doesn't look nice on progressive displays,
645 // so we have this "auto" mode here for games that don't read vram
646 if (gpu.state.allow_interlace == 2
647 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
651 if (interlace || interlace != gpu.state.old_interlace) {
652 gpu.state.old_interlace = interlace;
656 renderer_flush_queues();
657 renderer_set_interlace(interlace, !lcf);
661 #include "../../frontend/plugin_lib.h"
663 void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
665 gpu.frameskip.set = cbs->frameskip;
666 gpu.frameskip.advice = &cbs->fskip_advice;
667 gpu.frameskip.active = 0;
668 gpu.frameskip.frame_ready = 1;
669 gpu.state.hcnt = cbs->gpu_hcnt;
670 gpu.state.frame_count = cbs->gpu_frame_count;
671 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
673 if (cbs->pl_vout_set_raw_vram)
674 cbs->pl_vout_set_raw_vram(gpu.vram);
675 renderer_set_config(cbs);
676 vout_set_config(cbs);
679 // vim:shiftwidth=2:expandtab