2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
4 * This work is licensed under the terms of any of these licenses
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
15 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
16 #define unlikely(x) __builtin_expect((x), 0)
17 #define noinline __attribute__((noinline))
19 #define gpu_log(fmt, ...) \
20 printf("%d:%03d: " fmt, *gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__)
22 //#define log_io gpu_log
24 //#define log_anomaly gpu_log
25 #define log_anomaly(...)
27 struct psx_gpu gpu __attribute__((aligned(2048)));
29 static noinline int do_cmd_buffer(uint32_t *data, int count);
30 static void finish_vram_transfer(int is_read);
32 static noinline void do_cmd_reset(void)
34 if (unlikely(gpu.cmd_len > 0))
35 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
38 if (unlikely(gpu.dma.h > 0))
39 finish_vram_transfer(gpu.dma_start.is_read);
43 static noinline void do_reset(void)
49 memset(gpu.regs, 0, sizeof(gpu.regs));
50 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
51 gpu.ex_regs[i] = (0xe0 + i) << 24;
52 gpu.status.reg = 0x14802000;
55 gpu.screen.hres = gpu.screen.w = 256;
56 gpu.screen.vres = gpu.screen.h = 240;
59 static noinline void update_width(void)
61 int sw = gpu.screen.x2 - gpu.screen.x1;
62 if (sw <= 0 || sw >= 2560)
64 gpu.screen.w = gpu.screen.hres;
66 gpu.screen.w = sw * gpu.screen.hres / 2560;
69 static noinline void update_height(void)
71 int sh = gpu.screen.y2 - gpu.screen.y1;
72 if (gpu.status.dheight)
80 static noinline void decide_frameskip(void)
82 if (gpu.frameskip.active)
85 gpu.frameskip.cnt = 0;
86 gpu.frameskip.frame_ready = 1;
89 if (!gpu.frameskip.active && *gpu.frameskip.advice)
90 gpu.frameskip.active = 1;
91 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
92 gpu.frameskip.active = 1;
94 gpu.frameskip.active = 0;
97 static noinline int decide_frameskip_allow(uint32_t cmd_e3)
99 // no frameskip if it decides to draw to display area,
100 // but not for interlace since it'll most likely always do that
101 uint32_t x = cmd_e3 & 0x3ff;
102 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
103 gpu.frameskip.allow = gpu.status.interlace ||
104 (uint32_t)(x - gpu.screen.x) >= (uint32_t)gpu.screen.w ||
105 (uint32_t)(y - gpu.screen.y) >= (uint32_t)gpu.screen.h;
106 return gpu.frameskip.allow;
109 static noinline void get_gpu_info(uint32_t data)
111 switch (data & 0x0f) {
116 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
119 gpu.gp0 = gpu.ex_regs[5] & 0xfffff;
134 ret |= renderer_init();
136 gpu.state.frame_count = &gpu.zero;
137 gpu.state.hcnt = &gpu.zero;
138 gpu.frameskip.active = 0;
145 long GPUshutdown(void)
147 return vout_finish();
150 void GPUwriteStatus(uint32_t data)
152 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
153 static const short vres[4] = { 240, 480, 256, 480 };
154 uint32_t cmd = data >> 24;
156 if (cmd < ARRAY_SIZE(gpu.regs)) {
157 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
159 gpu.regs[cmd] = data;
162 gpu.state.fb_dirty = 1;
172 gpu.status.blanking = data & 1;
175 gpu.status.dma = data & 3;
178 gpu.screen.x = data & 0x3ff;
179 gpu.screen.y = (data >> 10) & 0x3ff;
180 if (gpu.frameskip.set) {
181 decide_frameskip_allow(gpu.ex_regs[3]);
182 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
184 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
189 gpu.screen.x1 = data & 0xfff;
190 gpu.screen.x2 = (data >> 12) & 0xfff;
194 gpu.screen.y1 = data & 0x3ff;
195 gpu.screen.y2 = (data >> 10) & 0x3ff;
199 gpu.status.reg = (gpu.status.reg & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
200 gpu.screen.hres = hres[(gpu.status.reg >> 16) & 7];
201 gpu.screen.vres = vres[(gpu.status.reg >> 19) & 3];
206 if ((cmd & 0xf0) == 0x10)
211 #ifdef GPUwriteStatus_ext
212 GPUwriteStatus_ext(data);
216 const unsigned char cmd_lengths[256] =
218 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
219 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
220 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
221 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
222 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
223 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
224 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
225 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
226 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
227 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
228 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
229 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
230 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
231 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
232 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
233 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
236 #define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
238 static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
240 uint16_t *vram = VRAM_MEM_XY(x, y);
242 memcpy(mem, vram, l * 2);
244 memcpy(vram, mem, l * 2);
247 static int do_vram_io(uint32_t *data, int count, int is_read)
249 int count_initial = count;
250 uint16_t *sdata = (uint16_t *)data;
251 int x = gpu.dma.x, y = gpu.dma.y;
252 int w = gpu.dma.w, h = gpu.dma.h;
253 int o = gpu.dma.offset;
255 count *= 2; // operate in 16bpp pixels
257 if (gpu.dma.offset) {
258 l = w - gpu.dma.offset;
262 do_vram_line(x + o, y, sdata, l, is_read);
275 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
277 do_vram_line(x, y, sdata, w, is_read);
283 do_vram_line(x, y, sdata, count, is_read);
289 finish_vram_transfer(is_read);
294 return count_initial - count / 2;
297 static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
300 log_anomaly("start_vram_transfer while old unfinished\n");
302 gpu.dma.x = pos_word & 0x3ff;
303 gpu.dma.y = (pos_word >> 16) & 0x1ff;
304 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
305 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
307 gpu.dma.is_read = is_read;
308 gpu.dma_start = gpu.dma;
310 renderer_flush_queues();
313 // XXX: wrong for width 1
314 memcpy(&gpu.gp0, VRAM_MEM_XY(gpu.dma.x, gpu.dma.y), 4);
315 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
318 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
319 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
322 static void finish_vram_transfer(int is_read)
327 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
328 gpu.dma_start.w, gpu.dma_start.h);
331 static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
333 int cmd = 0, pos = 0, len, dummy;
336 while (pos < count && skip) {
337 uint32_t *list = data + pos;
339 len = 1 + cmd_lengths[cmd];
342 if ((list[2] & 0x3ff) > gpu.screen.w || ((list[2] >> 16) & 0x1ff) > gpu.screen.h)
343 // clearing something large, don't skip
344 do_cmd_list(data + pos, 3, &dummy);
346 else if ((cmd & 0xf4) == 0x24) {
347 // flat textured prim
348 gpu.ex_regs[1] &= ~0x1ff;
349 gpu.ex_regs[1] |= list[4] & 0x1ff;
351 else if ((cmd & 0xf4) == 0x34) {
352 // shaded textured prim
353 gpu.ex_regs[1] &= ~0x1ff;
354 gpu.ex_regs[1] |= list[5] & 0x1ff;
356 else if (cmd == 0xe3)
357 skip = decide_frameskip_allow(list[0]);
359 if ((cmd & 0xf8) == 0xe0)
360 gpu.ex_regs[cmd & 7] = list[0];
362 if (pos + len > count) {
364 break; // incomplete cmd
366 if (cmd == 0xa0 || cmd == 0xc0)
371 renderer_sync_ecmds(gpu.ex_regs);
376 static noinline int do_cmd_buffer(uint32_t *data, int count)
379 uint32_t old_e3 = gpu.ex_regs[3];
383 for (pos = 0; pos < count; )
385 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
387 pos += do_vram_io(data + pos, count - pos, 0);
392 cmd = data[pos] >> 24;
393 if (cmd == 0xa0 || cmd == 0xc0) {
394 // consume vram write/read cmd
395 start_vram_transfer(data[pos + 1], data[pos + 2], cmd == 0xc0);
400 if (gpu.frameskip.active && gpu.frameskip.allow)
401 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
403 pos += do_cmd_list(data + pos, count - pos, &cmd);
412 gpu.status.reg &= ~0x1fff;
413 gpu.status.reg |= gpu.ex_regs[1] & 0x7ff;
414 gpu.status.reg |= (gpu.ex_regs[6] & 3) << 11;
416 gpu.state.fb_dirty |= vram_dirty;
418 if (old_e3 != gpu.ex_regs[3])
419 decide_frameskip_allow(gpu.ex_regs[3]);
424 static void flush_cmd_buffer(void)
426 int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
428 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
432 void GPUwriteDataMem(uint32_t *mem, int count)
436 log_io("gpu_dma_write %p %d\n", mem, count);
438 if (unlikely(gpu.cmd_len > 0))
441 left = do_cmd_buffer(mem, count);
443 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
446 void GPUwriteData(uint32_t data)
448 log_io("gpu_write %08x\n", data);
449 gpu.cmd_buffer[gpu.cmd_len++] = data;
450 if (gpu.cmd_len >= CMD_BUFFER_LEN)
454 long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
456 uint32_t addr, *list;
457 uint32_t *llist_entry = NULL;
458 int len, left, count;
461 if (unlikely(gpu.cmd_len > 0))
464 // ff7 sends it's main list twice, detect this
465 if (*gpu.state.frame_count == gpu.state.last_list.frame &&
466 *gpu.state.hcnt - gpu.state.last_list.hcnt <= 1 &&
467 gpu.state.last_list.cycles > 2048)
469 llist_entry = rambase + (gpu.state.last_list.addr & 0x1fffff) / 4;
470 *llist_entry |= 0x800000;
473 log_io("gpu_dma_chain\n");
474 addr = start_addr & 0xffffff;
475 for (count = 0; addr != 0xffffff; count++)
477 list = rambase + (addr & 0x1fffff) / 4;
479 addr = list[0] & 0xffffff;
482 cpu_cycles += 5 + len;
484 log_io(".chain %08x #%d\n", (list - rambase) * 4, len);
486 // loop detection marker
487 // (bit23 set causes DMA error on real machine, so
488 // unlikely to be ever set by the game)
492 left = do_cmd_buffer(list + 1, len);
494 log_anomaly("GPUdmaChain: discarded %d/%d words\n", left, len);
501 // remove loop detection markers
502 addr = start_addr & 0x1fffff;
503 while (count-- > 0) {
504 list = rambase + addr / 4;
505 addr = list[0] & 0x1fffff;
506 list[0] &= ~0x800000;
509 *llist_entry &= ~0x800000;
511 gpu.state.last_list.frame = *gpu.state.frame_count;
512 gpu.state.last_list.hcnt = *gpu.state.hcnt;
513 gpu.state.last_list.cycles = cpu_cycles;
514 gpu.state.last_list.addr = start_addr;
519 void GPUreadDataMem(uint32_t *mem, int count)
521 log_io("gpu_dma_read %p %d\n", mem, count);
523 if (unlikely(gpu.cmd_len > 0))
527 do_vram_io(mem, count, 1);
530 uint32_t GPUreadData(void)
534 if (unlikely(gpu.cmd_len > 0))
539 do_vram_io(&ret, 1, 1);
541 log_io("gpu_read %08x\n", ret);
545 uint32_t GPUreadStatus(void)
549 if (unlikely(gpu.cmd_len > 0))
552 ret = gpu.status.reg;
553 log_io("gpu_read_status %08x\n", ret);
559 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
560 uint32_t ulStatus; // current gpu status
561 uint32_t ulControl[256]; // latest control register values
562 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
565 long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
573 memcpy(freeze->psxVRam, gpu.vram, sizeof(gpu.vram));
574 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
575 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
576 freeze->ulStatus = gpu.status.reg;
579 memcpy(gpu.vram, freeze->psxVRam, sizeof(gpu.vram));
580 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
581 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
582 gpu.status.reg = freeze->ulStatus;
583 for (i = 8; i > 0; i--) {
584 gpu.regs[i] ^= 1; // avoid reg change detection
585 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
587 renderer_sync_ecmds(gpu.ex_regs);
588 renderer_update_caches(0, 0, 1024, 512);
595 void GPUupdateLace(void)
599 renderer_flush_queues();
601 if (gpu.status.blanking || !gpu.state.fb_dirty)
604 if (gpu.frameskip.set) {
605 if (!gpu.frameskip.frame_ready) {
606 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
608 gpu.frameskip.active = 0;
610 gpu.frameskip.frame_ready = 0;
614 gpu.state.fb_dirty = 0;
617 void GPUvBlank(int is_vblank, int lcf)
619 int interlace = gpu.state.allow_interlace
620 && gpu.status.interlace && gpu.status.dheight;
621 // interlace doesn't look nice on progressive displays,
622 // so we have this "auto" mode here for games that don't read vram
623 if (gpu.state.allow_interlace == 2
624 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
628 if (interlace || interlace != gpu.state.old_interlace) {
629 gpu.state.old_interlace = interlace;
633 renderer_flush_queues();
634 renderer_set_interlace(interlace, !lcf);
638 #include "../../frontend/plugin_lib.h"
640 void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
642 gpu.frameskip.set = cbs->frameskip;
643 gpu.frameskip.advice = &cbs->fskip_advice;
644 gpu.frameskip.active = 0;
645 gpu.frameskip.frame_ready = 1;
646 gpu.state.hcnt = cbs->gpu_hcnt;
647 gpu.state.frame_count = cbs->gpu_frame_count;
648 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
650 if (cbs->pl_vout_set_raw_vram)
651 cbs->pl_vout_set_raw_vram(gpu.vram);
652 renderer_set_config(cbs);
653 vout_set_config(cbs);
656 // vim:shiftwidth=2:expandtab