2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
4 * This work is licensed under the terms of any of these licenses
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
15 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
16 #define unlikely(x) __builtin_expect((x), 0)
17 #define noinline __attribute__((noinline))
19 #define gpu_log(fmt, ...) \
20 printf("%d:%03d: " fmt, *gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__)
22 //#define log_io gpu_log
24 //#define log_anomaly gpu_log
25 #define log_anomaly(...)
27 struct psx_gpu gpu __attribute__((aligned(2048)));
29 static noinline int do_cmd_buffer(uint32_t *data, int count);
30 static void finish_vram_transfer(int is_read);
32 static noinline void do_cmd_reset(void)
34 if (unlikely(gpu.cmd_len > 0))
35 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
38 if (unlikely(gpu.dma.h > 0))
39 finish_vram_transfer(gpu.dma_start.is_read);
43 static noinline void do_reset(void)
49 memset(gpu.regs, 0, sizeof(gpu.regs));
50 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
51 gpu.ex_regs[i] = (0xe0 + i) << 24;
52 gpu.status.reg = 0x14802000;
55 gpu.screen.hres = gpu.screen.w = 256;
56 gpu.screen.vres = gpu.screen.h = 240;
59 static noinline void update_width(void)
61 int sw = gpu.screen.x2 - gpu.screen.x1;
62 if (sw <= 0 || sw >= 2560)
64 gpu.screen.w = gpu.screen.hres;
66 gpu.screen.w = sw * gpu.screen.hres / 2560;
69 static noinline void update_height(void)
71 int sh = gpu.screen.y2 - gpu.screen.y1;
72 if (gpu.status.dheight)
80 static noinline void decide_frameskip(void)
82 if (gpu.frameskip.active)
85 gpu.frameskip.cnt = 0;
86 gpu.frameskip.frame_ready = 1;
89 if (!gpu.frameskip.active && *gpu.frameskip.advice)
90 gpu.frameskip.active = 1;
91 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
92 gpu.frameskip.active = 1;
94 gpu.frameskip.active = 0;
96 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
98 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy);
99 gpu.frameskip.pending_fill[0] = 0;
103 static noinline int decide_frameskip_allow(uint32_t cmd_e3)
105 // no frameskip if it decides to draw to display area,
106 // but not for interlace since it'll most likely always do that
107 uint32_t x = cmd_e3 & 0x3ff;
108 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
109 gpu.frameskip.allow = gpu.status.interlace ||
110 (uint32_t)(x - gpu.screen.x) >= (uint32_t)gpu.screen.w ||
111 (uint32_t)(y - gpu.screen.y) >= (uint32_t)gpu.screen.h;
112 return gpu.frameskip.allow;
115 static noinline void get_gpu_info(uint32_t data)
117 switch (data & 0x0f) {
122 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
125 gpu.gp0 = gpu.ex_regs[5] & 0xfffff;
140 ret |= renderer_init();
142 gpu.state.frame_count = &gpu.zero;
143 gpu.state.hcnt = &gpu.zero;
144 gpu.frameskip.active = 0;
151 long GPUshutdown(void)
154 return vout_finish();
157 void GPUwriteStatus(uint32_t data)
159 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
160 static const short vres[4] = { 240, 480, 256, 480 };
161 uint32_t cmd = data >> 24;
163 if (cmd < ARRAY_SIZE(gpu.regs)) {
164 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
166 gpu.regs[cmd] = data;
169 gpu.state.fb_dirty = 1;
179 gpu.status.blanking = data & 1;
182 gpu.status.dma = data & 3;
185 gpu.screen.x = data & 0x3ff;
186 gpu.screen.y = (data >> 10) & 0x3ff;
187 if (gpu.frameskip.set) {
188 decide_frameskip_allow(gpu.ex_regs[3]);
189 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
191 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
196 gpu.screen.x1 = data & 0xfff;
197 gpu.screen.x2 = (data >> 12) & 0xfff;
201 gpu.screen.y1 = data & 0x3ff;
202 gpu.screen.y2 = (data >> 10) & 0x3ff;
206 gpu.status.reg = (gpu.status.reg & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
207 gpu.screen.hres = hres[(gpu.status.reg >> 16) & 7];
208 gpu.screen.vres = vres[(gpu.status.reg >> 19) & 3];
211 renderer_notify_res_change();
214 if ((cmd & 0xf0) == 0x10)
219 #ifdef GPUwriteStatus_ext
220 GPUwriteStatus_ext(data);
224 const unsigned char cmd_lengths[256] =
226 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
227 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
228 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
229 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
230 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
231 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
232 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
233 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
234 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
235 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
236 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
237 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
238 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
239 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
240 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
241 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
244 #define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
246 static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
248 uint16_t *vram = VRAM_MEM_XY(x, y);
250 memcpy(mem, vram, l * 2);
252 memcpy(vram, mem, l * 2);
255 static int do_vram_io(uint32_t *data, int count, int is_read)
257 int count_initial = count;
258 uint16_t *sdata = (uint16_t *)data;
259 int x = gpu.dma.x, y = gpu.dma.y;
260 int w = gpu.dma.w, h = gpu.dma.h;
261 int o = gpu.dma.offset;
263 count *= 2; // operate in 16bpp pixels
265 if (gpu.dma.offset) {
266 l = w - gpu.dma.offset;
270 do_vram_line(x + o, y, sdata, l, is_read);
283 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
285 do_vram_line(x, y, sdata, w, is_read);
291 do_vram_line(x, y, sdata, count, is_read);
297 finish_vram_transfer(is_read);
302 return count_initial - count / 2;
305 static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
308 log_anomaly("start_vram_transfer while old unfinished\n");
310 gpu.dma.x = pos_word & 0x3ff;
311 gpu.dma.y = (pos_word >> 16) & 0x1ff;
312 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
313 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
315 gpu.dma.is_read = is_read;
316 gpu.dma_start = gpu.dma;
318 renderer_flush_queues();
321 // XXX: wrong for width 1
322 memcpy(&gpu.gp0, VRAM_MEM_XY(gpu.dma.x, gpu.dma.y), 4);
323 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
326 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
327 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
330 static void finish_vram_transfer(int is_read)
335 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
336 gpu.dma_start.w, gpu.dma_start.h);
339 static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
341 int cmd = 0, pos = 0, len, dummy;
344 gpu.frameskip.pending_fill[0] = 0;
346 // XXX: polylines are not properly handled
347 while (pos < count && skip) {
348 uint32_t *list = data + pos;
350 len = 1 + cmd_lengths[cmd];
353 if ((list[2] & 0x3ff) > gpu.screen.w || ((list[2] >> 16) & 0x1ff) > gpu.screen.h)
354 // clearing something large, don't skip
355 do_cmd_list(list, 3, &dummy);
357 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
359 else if ((cmd & 0xf4) == 0x24) {
360 // flat textured prim
361 gpu.ex_regs[1] &= ~0x1ff;
362 gpu.ex_regs[1] |= list[4] & 0x1ff;
364 else if ((cmd & 0xf4) == 0x34) {
365 // shaded textured prim
366 gpu.ex_regs[1] &= ~0x1ff;
367 gpu.ex_regs[1] |= list[5] & 0x1ff;
369 else if (cmd == 0xe3)
370 skip = decide_frameskip_allow(list[0]);
372 if ((cmd & 0xf8) == 0xe0)
373 gpu.ex_regs[cmd & 7] = list[0];
375 if (pos + len > count) {
377 break; // incomplete cmd
379 if (cmd == 0xa0 || cmd == 0xc0)
384 renderer_sync_ecmds(gpu.ex_regs);
389 static noinline int do_cmd_buffer(uint32_t *data, int count)
392 uint32_t old_e3 = gpu.ex_regs[3];
396 for (pos = 0; pos < count; )
398 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
400 pos += do_vram_io(data + pos, count - pos, 0);
405 cmd = data[pos] >> 24;
406 if (cmd == 0xa0 || cmd == 0xc0) {
407 // consume vram write/read cmd
408 start_vram_transfer(data[pos + 1], data[pos + 2], cmd == 0xc0);
413 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
414 if (gpu.frameskip.active && (gpu.frameskip.allow || ((data[pos] >> 24) & 0xf0) == 0xe0))
415 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
417 pos += do_cmd_list(data + pos, count - pos, &cmd);
426 gpu.status.reg &= ~0x1fff;
427 gpu.status.reg |= gpu.ex_regs[1] & 0x7ff;
428 gpu.status.reg |= (gpu.ex_regs[6] & 3) << 11;
430 gpu.state.fb_dirty |= vram_dirty;
432 if (old_e3 != gpu.ex_regs[3])
433 decide_frameskip_allow(gpu.ex_regs[3]);
438 static void flush_cmd_buffer(void)
440 int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
442 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
446 void GPUwriteDataMem(uint32_t *mem, int count)
450 log_io("gpu_dma_write %p %d\n", mem, count);
452 if (unlikely(gpu.cmd_len > 0))
455 left = do_cmd_buffer(mem, count);
457 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
460 void GPUwriteData(uint32_t data)
462 log_io("gpu_write %08x\n", data);
463 gpu.cmd_buffer[gpu.cmd_len++] = data;
464 if (gpu.cmd_len >= CMD_BUFFER_LEN)
468 long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
470 uint32_t addr, *list;
471 uint32_t *llist_entry = NULL;
472 int len, left, count;
475 if (unlikely(gpu.cmd_len > 0))
478 // ff7 sends it's main list twice, detect this
479 if (*gpu.state.frame_count == gpu.state.last_list.frame &&
480 *gpu.state.hcnt - gpu.state.last_list.hcnt <= 1 &&
481 gpu.state.last_list.cycles > 2048)
483 llist_entry = rambase + (gpu.state.last_list.addr & 0x1fffff) / 4;
484 *llist_entry |= 0x800000;
487 log_io("gpu_dma_chain\n");
488 addr = start_addr & 0xffffff;
489 for (count = 0; addr != 0xffffff; count++)
491 list = rambase + (addr & 0x1fffff) / 4;
493 addr = list[0] & 0xffffff;
496 cpu_cycles += 5 + len;
498 log_io(".chain %08x #%d\n", (list - rambase) * 4, len);
500 // loop detection marker
501 // (bit23 set causes DMA error on real machine, so
502 // unlikely to be ever set by the game)
506 left = do_cmd_buffer(list + 1, len);
508 log_anomaly("GPUdmaChain: discarded %d/%d words\n", left, len);
515 // remove loop detection markers
516 addr = start_addr & 0x1fffff;
517 while (count-- > 0) {
518 list = rambase + addr / 4;
519 addr = list[0] & 0x1fffff;
520 list[0] &= ~0x800000;
523 *llist_entry &= ~0x800000;
525 gpu.state.last_list.frame = *gpu.state.frame_count;
526 gpu.state.last_list.hcnt = *gpu.state.hcnt;
527 gpu.state.last_list.cycles = cpu_cycles;
528 gpu.state.last_list.addr = start_addr;
533 void GPUreadDataMem(uint32_t *mem, int count)
535 log_io("gpu_dma_read %p %d\n", mem, count);
537 if (unlikely(gpu.cmd_len > 0))
541 do_vram_io(mem, count, 1);
544 uint32_t GPUreadData(void)
548 if (unlikely(gpu.cmd_len > 0))
553 do_vram_io(&ret, 1, 1);
555 log_io("gpu_read %08x\n", ret);
559 uint32_t GPUreadStatus(void)
563 if (unlikely(gpu.cmd_len > 0))
566 ret = gpu.status.reg;
567 log_io("gpu_read_status %08x\n", ret);
573 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
574 uint32_t ulStatus; // current gpu status
575 uint32_t ulControl[256]; // latest control register values
576 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
579 long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
587 memcpy(freeze->psxVRam, gpu.vram, sizeof(gpu.vram));
588 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
589 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
590 freeze->ulStatus = gpu.status.reg;
593 memcpy(gpu.vram, freeze->psxVRam, sizeof(gpu.vram));
594 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
595 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
596 gpu.status.reg = freeze->ulStatus;
598 for (i = 8; i > 0; i--) {
599 gpu.regs[i] ^= 1; // avoid reg change detection
600 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
602 renderer_sync_ecmds(gpu.ex_regs);
603 renderer_update_caches(0, 0, 1024, 512);
610 void GPUupdateLace(void)
614 renderer_flush_queues();
616 if (gpu.status.blanking) {
617 if (!gpu.state.blanked) {
619 gpu.state.blanked = 1;
620 gpu.state.fb_dirty = 1;
625 if (!gpu.state.fb_dirty)
628 if (gpu.frameskip.set) {
629 if (!gpu.frameskip.frame_ready) {
630 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
632 gpu.frameskip.active = 0;
634 gpu.frameskip.frame_ready = 0;
638 gpu.state.fb_dirty = 0;
639 gpu.state.blanked = 0;
642 void GPUvBlank(int is_vblank, int lcf)
644 int interlace = gpu.state.allow_interlace
645 && gpu.status.interlace && gpu.status.dheight;
646 // interlace doesn't look nice on progressive displays,
647 // so we have this "auto" mode here for games that don't read vram
648 if (gpu.state.allow_interlace == 2
649 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
653 if (interlace || interlace != gpu.state.old_interlace) {
654 gpu.state.old_interlace = interlace;
658 renderer_flush_queues();
659 renderer_set_interlace(interlace, !lcf);
663 #include "../../frontend/plugin_lib.h"
665 void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
667 gpu.frameskip.set = cbs->frameskip;
668 gpu.frameskip.advice = &cbs->fskip_advice;
669 gpu.frameskip.active = 0;
670 gpu.frameskip.frame_ready = 1;
671 gpu.state.hcnt = cbs->gpu_hcnt;
672 gpu.state.frame_count = cbs->gpu_frame_count;
673 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
674 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
676 if (cbs->pl_vout_set_raw_vram)
677 cbs->pl_vout_set_raw_vram(gpu.vram);
678 renderer_set_config(cbs);
679 vout_set_config(cbs);
682 // vim:shiftwidth=2:expandtab