2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
4 * This work is licensed under the terms of any of these licenses
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
15 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
16 #define unlikely(x) __builtin_expect((x), 0)
17 #define noinline __attribute__((noinline))
19 #define gpu_log(fmt, ...) \
20 printf("%d:%03d: " fmt, *gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__)
22 //#define log_io gpu_log
24 //#define log_anomaly gpu_log
25 #define log_anomaly(...)
27 struct psx_gpu gpu __attribute__((aligned(2048)));
29 static noinline int do_cmd_buffer(uint32_t *data, int count);
30 static void finish_vram_transfer(int is_read);
32 static noinline void do_cmd_reset(void)
34 if (unlikely(gpu.cmd_len > 0))
35 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
38 if (unlikely(gpu.dma.h > 0))
39 finish_vram_transfer(gpu.dma_start.is_read);
43 static noinline void do_reset(void)
49 memset(gpu.regs, 0, sizeof(gpu.regs));
50 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
51 gpu.ex_regs[i] = (0xe0 + i) << 24;
52 gpu.status.reg = 0x14802000;
55 gpu.screen.hres = gpu.screen.w = 256;
56 gpu.screen.vres = gpu.screen.h = 240;
59 static noinline void update_width(void)
61 int sw = gpu.screen.x2 - gpu.screen.x1;
62 if (sw <= 0 || sw >= 2560)
64 gpu.screen.w = gpu.screen.hres;
66 gpu.screen.w = sw * gpu.screen.hres / 2560;
69 static noinline void update_height(void)
71 int sh = gpu.screen.y2 - gpu.screen.y1;
72 if (gpu.status.dheight)
80 static noinline void decide_frameskip(void)
82 if (gpu.frameskip.active)
85 gpu.frameskip.cnt = 0;
86 gpu.frameskip.frame_ready = 1;
89 if (!gpu.frameskip.active && *gpu.frameskip.advice)
90 gpu.frameskip.active = 1;
91 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
92 gpu.frameskip.active = 1;
94 gpu.frameskip.active = 0;
97 static noinline int decide_frameskip_allow(uint32_t cmd_e3)
99 // no frameskip if it decides to draw to display area,
100 // but not for interlace since it'll most likely always do that
101 uint32_t x = cmd_e3 & 0x3ff;
102 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
103 gpu.frameskip.allow = gpu.status.interlace ||
104 (uint32_t)(x - gpu.screen.x) >= (uint32_t)gpu.screen.w ||
105 (uint32_t)(y - gpu.screen.y) >= (uint32_t)gpu.screen.h;
106 return gpu.frameskip.allow;
109 static noinline void get_gpu_info(uint32_t data)
111 switch (data & 0x0f) {
116 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
119 gpu.gp0 = gpu.ex_regs[5] & 0xfffff;
134 ret |= renderer_init();
136 gpu.state.frame_count = &gpu.zero;
137 gpu.state.hcnt = &gpu.zero;
138 gpu.frameskip.active = 0;
145 long GPUshutdown(void)
147 return vout_finish();
150 void GPUwriteStatus(uint32_t data)
152 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
153 static const short vres[4] = { 240, 480, 256, 480 };
154 uint32_t cmd = data >> 24;
156 if (cmd < ARRAY_SIZE(gpu.regs)) {
157 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
159 gpu.regs[cmd] = data;
162 gpu.state.fb_dirty = 1;
172 gpu.status.blanking = data & 1;
175 gpu.status.dma = data & 3;
178 gpu.screen.x = data & 0x3ff;
179 gpu.screen.y = (data >> 10) & 0x3ff;
180 if (gpu.frameskip.set) {
181 decide_frameskip_allow(gpu.ex_regs[3]);
182 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
184 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
189 gpu.screen.x1 = data & 0xfff;
190 gpu.screen.x2 = (data >> 12) & 0xfff;
194 gpu.screen.y1 = data & 0x3ff;
195 gpu.screen.y2 = (data >> 10) & 0x3ff;
199 gpu.status.reg = (gpu.status.reg & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
200 gpu.screen.hres = hres[(gpu.status.reg >> 16) & 7];
201 gpu.screen.vres = vres[(gpu.status.reg >> 19) & 3];
206 if ((cmd & 0xf0) == 0x10)
211 #ifdef GPUwriteStatus_ext
212 GPUwriteStatus_ext(data);
216 const unsigned char cmd_lengths[256] =
218 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
219 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
220 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
221 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
222 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
223 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
224 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
225 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
226 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
227 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
228 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
229 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
230 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
231 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
232 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
233 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
236 #define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
238 static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
240 uint16_t *vram = VRAM_MEM_XY(x, y);
242 memcpy(mem, vram, l * 2);
244 memcpy(vram, mem, l * 2);
247 static int do_vram_io(uint32_t *data, int count, int is_read)
249 int count_initial = count;
250 uint16_t *sdata = (uint16_t *)data;
251 int x = gpu.dma.x, y = gpu.dma.y;
252 int w = gpu.dma.w, h = gpu.dma.h;
253 int o = gpu.dma.offset;
255 count *= 2; // operate in 16bpp pixels
257 if (gpu.dma.offset) {
258 l = w - gpu.dma.offset;
262 do_vram_line(x + o, y, sdata, l, is_read);
275 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
277 do_vram_line(x, y, sdata, w, is_read);
283 do_vram_line(x, y, sdata, count, is_read);
289 finish_vram_transfer(is_read);
294 return count_initial - count / 2;
297 static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
300 log_anomaly("start_vram_transfer while old unfinished\n");
302 gpu.dma.x = pos_word & 0x3ff;
303 gpu.dma.y = (pos_word >> 16) & 0x1ff;
304 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
305 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
307 gpu.dma.is_read = is_read;
308 gpu.dma_start = gpu.dma;
310 renderer_flush_queues();
313 // XXX: wrong for width 1
314 memcpy(&gpu.gp0, VRAM_MEM_XY(gpu.dma.x, gpu.dma.y), 4);
315 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
318 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
319 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
322 static void finish_vram_transfer(int is_read)
327 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
328 gpu.dma_start.w, gpu.dma_start.h);
331 static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
333 int cmd = 0, pos = 0, len, dummy;
336 while (pos < count && skip) {
337 uint32_t *list = data + pos;
339 len = 1 + cmd_lengths[cmd];
342 if ((list[2] & 0x3ff) > gpu.screen.w || ((list[2] >> 16) & 0x1ff) > gpu.screen.h)
343 // clearing something large, don't skip
344 do_cmd_list(data + pos, 3, &dummy);
346 else if ((cmd & 0xf4) == 0x24) {
347 // flat textured prim
348 gpu.ex_regs[1] &= ~0x1ff;
349 gpu.ex_regs[1] |= list[4] & 0x1ff;
351 else if ((cmd & 0xf4) == 0x34) {
352 // shaded textured prim
353 gpu.ex_regs[1] &= ~0x1ff;
354 gpu.ex_regs[1] |= list[5] & 0x1ff;
356 else if (cmd == 0xe3)
357 skip = decide_frameskip_allow(list[0]);
359 if ((cmd & 0xf8) == 0xe0)
360 gpu.ex_regs[cmd & 7] = list[0];
362 if (pos + len > count) {
364 break; // incomplete cmd
366 if (cmd == 0xa0 || cmd == 0xc0)
371 renderer_sync_ecmds(gpu.ex_regs);
376 static noinline int do_cmd_buffer(uint32_t *data, int count)
379 uint32_t old_e3 = gpu.ex_regs[3];
383 for (pos = 0; pos < count; )
385 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
387 pos += do_vram_io(data + pos, count - pos, 0);
392 cmd = data[pos] >> 24;
393 if (cmd == 0xa0 || cmd == 0xc0) {
394 // consume vram write/read cmd
395 start_vram_transfer(data[pos + 1], data[pos + 2], cmd == 0xc0);
400 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
401 if (gpu.frameskip.active && (gpu.frameskip.allow || ((data[pos] >> 24) & 0xf0) == 0xe0))
402 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
404 pos += do_cmd_list(data + pos, count - pos, &cmd);
413 gpu.status.reg &= ~0x1fff;
414 gpu.status.reg |= gpu.ex_regs[1] & 0x7ff;
415 gpu.status.reg |= (gpu.ex_regs[6] & 3) << 11;
417 gpu.state.fb_dirty |= vram_dirty;
419 if (old_e3 != gpu.ex_regs[3])
420 decide_frameskip_allow(gpu.ex_regs[3]);
425 static void flush_cmd_buffer(void)
427 int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
429 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
433 void GPUwriteDataMem(uint32_t *mem, int count)
437 log_io("gpu_dma_write %p %d\n", mem, count);
439 if (unlikely(gpu.cmd_len > 0))
442 left = do_cmd_buffer(mem, count);
444 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
447 void GPUwriteData(uint32_t data)
449 log_io("gpu_write %08x\n", data);
450 gpu.cmd_buffer[gpu.cmd_len++] = data;
451 if (gpu.cmd_len >= CMD_BUFFER_LEN)
455 long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
457 uint32_t addr, *list;
458 uint32_t *llist_entry = NULL;
459 int len, left, count;
462 if (unlikely(gpu.cmd_len > 0))
465 // ff7 sends it's main list twice, detect this
466 if (*gpu.state.frame_count == gpu.state.last_list.frame &&
467 *gpu.state.hcnt - gpu.state.last_list.hcnt <= 1 &&
468 gpu.state.last_list.cycles > 2048)
470 llist_entry = rambase + (gpu.state.last_list.addr & 0x1fffff) / 4;
471 *llist_entry |= 0x800000;
474 log_io("gpu_dma_chain\n");
475 addr = start_addr & 0xffffff;
476 for (count = 0; addr != 0xffffff; count++)
478 list = rambase + (addr & 0x1fffff) / 4;
480 addr = list[0] & 0xffffff;
483 cpu_cycles += 5 + len;
485 log_io(".chain %08x #%d\n", (list - rambase) * 4, len);
487 // loop detection marker
488 // (bit23 set causes DMA error on real machine, so
489 // unlikely to be ever set by the game)
493 left = do_cmd_buffer(list + 1, len);
495 log_anomaly("GPUdmaChain: discarded %d/%d words\n", left, len);
502 // remove loop detection markers
503 addr = start_addr & 0x1fffff;
504 while (count-- > 0) {
505 list = rambase + addr / 4;
506 addr = list[0] & 0x1fffff;
507 list[0] &= ~0x800000;
510 *llist_entry &= ~0x800000;
512 gpu.state.last_list.frame = *gpu.state.frame_count;
513 gpu.state.last_list.hcnt = *gpu.state.hcnt;
514 gpu.state.last_list.cycles = cpu_cycles;
515 gpu.state.last_list.addr = start_addr;
520 void GPUreadDataMem(uint32_t *mem, int count)
522 log_io("gpu_dma_read %p %d\n", mem, count);
524 if (unlikely(gpu.cmd_len > 0))
528 do_vram_io(mem, count, 1);
531 uint32_t GPUreadData(void)
535 if (unlikely(gpu.cmd_len > 0))
540 do_vram_io(&ret, 1, 1);
542 log_io("gpu_read %08x\n", ret);
546 uint32_t GPUreadStatus(void)
550 if (unlikely(gpu.cmd_len > 0))
553 ret = gpu.status.reg;
554 log_io("gpu_read_status %08x\n", ret);
560 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
561 uint32_t ulStatus; // current gpu status
562 uint32_t ulControl[256]; // latest control register values
563 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
566 long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
574 memcpy(freeze->psxVRam, gpu.vram, sizeof(gpu.vram));
575 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
576 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
577 freeze->ulStatus = gpu.status.reg;
580 memcpy(gpu.vram, freeze->psxVRam, sizeof(gpu.vram));
581 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
582 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
583 gpu.status.reg = freeze->ulStatus;
584 for (i = 8; i > 0; i--) {
585 gpu.regs[i] ^= 1; // avoid reg change detection
586 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
588 renderer_sync_ecmds(gpu.ex_regs);
589 renderer_update_caches(0, 0, 1024, 512);
596 void GPUupdateLace(void)
600 renderer_flush_queues();
602 if (gpu.status.blanking || !gpu.state.fb_dirty)
605 if (gpu.frameskip.set) {
606 if (!gpu.frameskip.frame_ready) {
607 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
609 gpu.frameskip.active = 0;
611 gpu.frameskip.frame_ready = 0;
615 gpu.state.fb_dirty = 0;
618 void GPUvBlank(int is_vblank, int lcf)
620 int interlace = gpu.state.allow_interlace
621 && gpu.status.interlace && gpu.status.dheight;
622 // interlace doesn't look nice on progressive displays,
623 // so we have this "auto" mode here for games that don't read vram
624 if (gpu.state.allow_interlace == 2
625 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
629 if (interlace || interlace != gpu.state.old_interlace) {
630 gpu.state.old_interlace = interlace;
634 renderer_flush_queues();
635 renderer_set_interlace(interlace, !lcf);
639 #include "../../frontend/plugin_lib.h"
641 void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
643 gpu.frameskip.set = cbs->frameskip;
644 gpu.frameskip.advice = &cbs->fskip_advice;
645 gpu.frameskip.active = 0;
646 gpu.frameskip.frame_ready = 1;
647 gpu.state.hcnt = cbs->gpu_hcnt;
648 gpu.state.frame_count = cbs->gpu_frame_count;
649 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
651 if (cbs->pl_vout_set_raw_vram)
652 cbs->pl_vout_set_raw_vram(gpu.vram);
653 renderer_set_config(cbs);
654 vout_set_config(cbs);
657 // vim:shiftwidth=2:expandtab