2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
4 * This work is licensed under the terms of any of these licenses
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
16 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
17 #define unlikely(x) __builtin_expect((x), 0)
18 #define noinline __attribute__((noinline))
20 #define gpu_log(fmt, ...) \
21 printf("%d:%03d: " fmt, *gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__)
23 //#define log_io gpu_log
25 //#define log_anomaly gpu_log
26 #define log_anomaly(...)
28 struct psx_gpu gpu __attribute__((aligned(2048)));
30 static noinline int do_cmd_buffer(uint32_t *data, int count);
31 static void finish_vram_transfer(int is_read);
33 static noinline void do_cmd_reset(void)
35 if (unlikely(gpu.cmd_len > 0))
36 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
39 if (unlikely(gpu.dma.h > 0))
40 finish_vram_transfer(gpu.dma_start.is_read);
44 static noinline void do_reset(void)
50 memset(gpu.regs, 0, sizeof(gpu.regs));
51 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
52 gpu.ex_regs[i] = (0xe0 + i) << 24;
53 gpu.status.reg = 0x14802000;
56 gpu.screen.hres = gpu.screen.w = 256;
57 gpu.screen.vres = gpu.screen.h = 240;
60 static noinline void update_width(void)
62 int sw = gpu.screen.x2 - gpu.screen.x1;
63 if (sw <= 0 || sw >= 2560)
65 gpu.screen.w = gpu.screen.hres;
67 gpu.screen.w = sw * gpu.screen.hres / 2560;
70 static noinline void update_height(void)
72 int sh = gpu.screen.y2 - gpu.screen.y1;
73 if (gpu.status.dheight)
81 static noinline void decide_frameskip(void)
83 if (gpu.frameskip.active)
86 gpu.frameskip.cnt = 0;
87 gpu.frameskip.frame_ready = 1;
90 if (!gpu.frameskip.active && *gpu.frameskip.advice)
91 gpu.frameskip.active = 1;
92 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
93 gpu.frameskip.active = 1;
95 gpu.frameskip.active = 0;
97 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
99 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy);
100 gpu.frameskip.pending_fill[0] = 0;
104 static noinline int decide_frameskip_allow(uint32_t cmd_e3)
106 // no frameskip if it decides to draw to display area,
107 // but not for interlace since it'll most likely always do that
108 uint32_t x = cmd_e3 & 0x3ff;
109 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
110 gpu.frameskip.allow = gpu.status.interlace ||
111 (uint32_t)(x - gpu.screen.x) >= (uint32_t)gpu.screen.w ||
112 (uint32_t)(y - gpu.screen.y) >= (uint32_t)gpu.screen.h;
113 return gpu.frameskip.allow;
116 static noinline void get_gpu_info(uint32_t data)
118 switch (data & 0x0f) {
123 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
126 gpu.gp0 = gpu.ex_regs[5] & 0xfffff;
142 gpu.state.enhancement_available = 0;
143 ret |= renderer_init();
145 if (gpu.state.enhancement_available) {
146 if (gpu.enhancement_bufer == NULL)
147 gpu.enhancement_bufer = malloc(2048 * 1024 * 2 + 1024 * 512 * 2);
148 if (gpu.enhancement_bufer == NULL)
149 gpu_log("OOM for enhancement buffer\n");
151 else if (gpu.enhancement_bufer != NULL) {
152 free(gpu.enhancement_bufer);
153 gpu.enhancement_bufer = NULL;
156 gpu.state.frame_count = &gpu.zero;
157 gpu.state.hcnt = &gpu.zero;
158 gpu.frameskip.active = 0;
165 long GPUshutdown(void)
167 return vout_finish();
170 void GPUwriteStatus(uint32_t data)
172 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
173 static const short vres[4] = { 240, 480, 256, 480 };
174 uint32_t cmd = data >> 24;
176 if (cmd < ARRAY_SIZE(gpu.regs)) {
177 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
179 gpu.regs[cmd] = data;
182 gpu.state.fb_dirty = 1;
192 gpu.status.blanking = data & 1;
195 gpu.status.dma = data & 3;
198 gpu.screen.x = data & 0x3ff;
199 gpu.screen.y = (data >> 10) & 0x3ff;
200 if (gpu.frameskip.set) {
201 decide_frameskip_allow(gpu.ex_regs[3]);
202 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
204 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
209 gpu.screen.x1 = data & 0xfff;
210 gpu.screen.x2 = (data >> 12) & 0xfff;
214 gpu.screen.y1 = data & 0x3ff;
215 gpu.screen.y2 = (data >> 10) & 0x3ff;
219 gpu.status.reg = (gpu.status.reg & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
220 gpu.screen.hres = hres[(gpu.status.reg >> 16) & 7];
221 gpu.screen.vres = vres[(gpu.status.reg >> 19) & 3];
226 if ((cmd & 0xf0) == 0x10)
231 #ifdef GPUwriteStatus_ext
232 GPUwriteStatus_ext(data);
236 const unsigned char cmd_lengths[256] =
238 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
239 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
240 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
241 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
242 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
243 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
244 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
245 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
246 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
247 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
248 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
249 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
250 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
251 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
252 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
253 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
256 #define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
258 static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
260 uint16_t *vram = VRAM_MEM_XY(x, y);
262 memcpy(mem, vram, l * 2);
264 memcpy(vram, mem, l * 2);
267 static int do_vram_io(uint32_t *data, int count, int is_read)
269 int count_initial = count;
270 uint16_t *sdata = (uint16_t *)data;
271 int x = gpu.dma.x, y = gpu.dma.y;
272 int w = gpu.dma.w, h = gpu.dma.h;
273 int o = gpu.dma.offset;
275 count *= 2; // operate in 16bpp pixels
277 if (gpu.dma.offset) {
278 l = w - gpu.dma.offset;
282 do_vram_line(x + o, y, sdata, l, is_read);
295 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
297 do_vram_line(x, y, sdata, w, is_read);
303 do_vram_line(x, y, sdata, count, is_read);
309 finish_vram_transfer(is_read);
314 return count_initial - count / 2;
317 static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
320 log_anomaly("start_vram_transfer while old unfinished\n");
322 gpu.dma.x = pos_word & 0x3ff;
323 gpu.dma.y = (pos_word >> 16) & 0x1ff;
324 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
325 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
327 gpu.dma.is_read = is_read;
328 gpu.dma_start = gpu.dma;
330 renderer_flush_queues();
333 // XXX: wrong for width 1
334 memcpy(&gpu.gp0, VRAM_MEM_XY(gpu.dma.x, gpu.dma.y), 4);
335 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
338 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
339 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
342 static void finish_vram_transfer(int is_read)
347 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
348 gpu.dma_start.w, gpu.dma_start.h);
351 static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
353 int cmd = 0, pos = 0, len, dummy;
356 gpu.frameskip.pending_fill[0] = 0;
358 // XXX: polylines are not properly handled
359 while (pos < count && skip) {
360 uint32_t *list = data + pos;
362 len = 1 + cmd_lengths[cmd];
365 if ((list[2] & 0x3ff) > gpu.screen.w || ((list[2] >> 16) & 0x1ff) > gpu.screen.h)
366 // clearing something large, don't skip
367 do_cmd_list(list, 3, &dummy);
369 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
371 else if ((cmd & 0xf4) == 0x24) {
372 // flat textured prim
373 gpu.ex_regs[1] &= ~0x1ff;
374 gpu.ex_regs[1] |= list[4] & 0x1ff;
376 else if ((cmd & 0xf4) == 0x34) {
377 // shaded textured prim
378 gpu.ex_regs[1] &= ~0x1ff;
379 gpu.ex_regs[1] |= list[5] & 0x1ff;
381 else if (cmd == 0xe3)
382 skip = decide_frameskip_allow(list[0]);
384 if ((cmd & 0xf8) == 0xe0)
385 gpu.ex_regs[cmd & 7] = list[0];
387 if (pos + len > count) {
389 break; // incomplete cmd
391 if (cmd == 0xa0 || cmd == 0xc0)
396 renderer_sync_ecmds(gpu.ex_regs);
401 static noinline int do_cmd_buffer(uint32_t *data, int count)
404 uint32_t old_e3 = gpu.ex_regs[3];
408 for (pos = 0; pos < count; )
410 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
412 pos += do_vram_io(data + pos, count - pos, 0);
417 cmd = data[pos] >> 24;
418 if (cmd == 0xa0 || cmd == 0xc0) {
419 // consume vram write/read cmd
420 start_vram_transfer(data[pos + 1], data[pos + 2], cmd == 0xc0);
425 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
426 if (gpu.frameskip.active && (gpu.frameskip.allow || ((data[pos] >> 24) & 0xf0) == 0xe0))
427 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
429 pos += do_cmd_list(data + pos, count - pos, &cmd);
438 gpu.status.reg &= ~0x1fff;
439 gpu.status.reg |= gpu.ex_regs[1] & 0x7ff;
440 gpu.status.reg |= (gpu.ex_regs[6] & 3) << 11;
442 gpu.state.fb_dirty |= vram_dirty;
444 if (old_e3 != gpu.ex_regs[3])
445 decide_frameskip_allow(gpu.ex_regs[3]);
450 static void flush_cmd_buffer(void)
452 int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
454 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
458 void GPUwriteDataMem(uint32_t *mem, int count)
462 log_io("gpu_dma_write %p %d\n", mem, count);
464 if (unlikely(gpu.cmd_len > 0))
467 left = do_cmd_buffer(mem, count);
469 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
472 void GPUwriteData(uint32_t data)
474 log_io("gpu_write %08x\n", data);
475 gpu.cmd_buffer[gpu.cmd_len++] = data;
476 if (gpu.cmd_len >= CMD_BUFFER_LEN)
480 long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
482 uint32_t addr, *list;
483 uint32_t *llist_entry = NULL;
484 int len, left, count;
487 if (unlikely(gpu.cmd_len > 0))
490 // ff7 sends it's main list twice, detect this
491 if (*gpu.state.frame_count == gpu.state.last_list.frame &&
492 *gpu.state.hcnt - gpu.state.last_list.hcnt <= 1 &&
493 gpu.state.last_list.cycles > 2048)
495 llist_entry = rambase + (gpu.state.last_list.addr & 0x1fffff) / 4;
496 *llist_entry |= 0x800000;
499 log_io("gpu_dma_chain\n");
500 addr = start_addr & 0xffffff;
501 for (count = 0; addr != 0xffffff; count++)
503 list = rambase + (addr & 0x1fffff) / 4;
505 addr = list[0] & 0xffffff;
508 cpu_cycles += 5 + len;
510 log_io(".chain %08x #%d\n", (list - rambase) * 4, len);
512 // loop detection marker
513 // (bit23 set causes DMA error on real machine, so
514 // unlikely to be ever set by the game)
518 left = do_cmd_buffer(list + 1, len);
520 log_anomaly("GPUdmaChain: discarded %d/%d words\n", left, len);
527 // remove loop detection markers
528 addr = start_addr & 0x1fffff;
529 while (count-- > 0) {
530 list = rambase + addr / 4;
531 addr = list[0] & 0x1fffff;
532 list[0] &= ~0x800000;
535 *llist_entry &= ~0x800000;
537 gpu.state.last_list.frame = *gpu.state.frame_count;
538 gpu.state.last_list.hcnt = *gpu.state.hcnt;
539 gpu.state.last_list.cycles = cpu_cycles;
540 gpu.state.last_list.addr = start_addr;
545 void GPUreadDataMem(uint32_t *mem, int count)
547 log_io("gpu_dma_read %p %d\n", mem, count);
549 if (unlikely(gpu.cmd_len > 0))
553 do_vram_io(mem, count, 1);
556 uint32_t GPUreadData(void)
560 if (unlikely(gpu.cmd_len > 0))
565 do_vram_io(&ret, 1, 1);
567 log_io("gpu_read %08x\n", ret);
571 uint32_t GPUreadStatus(void)
575 if (unlikely(gpu.cmd_len > 0))
578 ret = gpu.status.reg;
579 log_io("gpu_read_status %08x\n", ret);
585 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
586 uint32_t ulStatus; // current gpu status
587 uint32_t ulControl[256]; // latest control register values
588 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
591 long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
599 memcpy(freeze->psxVRam, gpu.vram, sizeof(gpu.vram));
600 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
601 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
602 freeze->ulStatus = gpu.status.reg;
605 memcpy(gpu.vram, freeze->psxVRam, sizeof(gpu.vram));
606 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
607 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
608 gpu.status.reg = freeze->ulStatus;
610 for (i = 8; i > 0; i--) {
611 gpu.regs[i] ^= 1; // avoid reg change detection
612 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
614 renderer_sync_ecmds(gpu.ex_regs);
615 renderer_update_caches(0, 0, 1024, 512);
622 void GPUupdateLace(void)
626 renderer_flush_queues();
628 if (gpu.status.blanking) {
629 if (!gpu.state.blanked) {
631 gpu.state.blanked = 1;
632 gpu.state.fb_dirty = 1;
637 if (!gpu.state.fb_dirty)
640 if (gpu.frameskip.set) {
641 if (!gpu.frameskip.frame_ready) {
642 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
644 gpu.frameskip.active = 0;
646 gpu.frameskip.frame_ready = 0;
650 gpu.state.fb_dirty = 0;
651 gpu.state.blanked = 0;
654 void GPUvBlank(int is_vblank, int lcf)
656 int interlace = gpu.state.allow_interlace
657 && gpu.status.interlace && gpu.status.dheight;
658 // interlace doesn't look nice on progressive displays,
659 // so we have this "auto" mode here for games that don't read vram
660 if (gpu.state.allow_interlace == 2
661 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
665 if (interlace || interlace != gpu.state.old_interlace) {
666 gpu.state.old_interlace = interlace;
670 renderer_flush_queues();
671 renderer_set_interlace(interlace, !lcf);
675 #include "../../frontend/plugin_lib.h"
677 void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
679 gpu.frameskip.set = cbs->frameskip;
680 gpu.frameskip.advice = &cbs->fskip_advice;
681 gpu.frameskip.active = 0;
682 gpu.frameskip.frame_ready = 1;
683 gpu.state.hcnt = cbs->gpu_hcnt;
684 gpu.state.frame_count = cbs->gpu_frame_count;
685 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
686 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
688 if (cbs->pl_vout_set_raw_vram)
689 cbs->pl_vout_set_raw_vram(gpu.vram);
690 renderer_set_config(cbs);
691 vout_set_config(cbs);
694 // vim:shiftwidth=2:expandtab