2 * (C) GraÅžvydas "notaz" Ignotas, 2011
4 * This work is licensed under the terms of any of these licenses
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
15 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
16 #define unlikely(x) __builtin_expect((x), 0)
17 #define noinline __attribute__((noinline))
19 #define gpu_log(fmt, ...) \
20 printf("%d:%03d: " fmt, *gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__)
22 //#define log_io gpu_log
24 //#define log_anomaly gpu_log
25 #define log_anomaly(...)
27 struct psx_gpu gpu __attribute__((aligned(2048)));
29 static noinline int do_cmd_buffer(uint32_t *data, int count);
31 static noinline void do_cmd_reset(void)
33 if (unlikely(gpu.cmd_len > 0))
34 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
40 static noinline void do_reset(void)
46 memset(gpu.regs, 0, sizeof(gpu.regs));
47 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
48 gpu.ex_regs[i] = (0xe0 + i) << 24;
49 gpu.status.reg = 0x14802000;
52 gpu.screen.hres = gpu.screen.w = 256;
53 gpu.screen.vres = gpu.screen.h = 240;
56 static noinline void update_width(void)
58 int sw = gpu.screen.x2 - gpu.screen.x1;
59 if (sw <= 0 || sw >= 2560)
61 gpu.screen.w = gpu.screen.hres;
63 gpu.screen.w = sw * gpu.screen.hres / 2560;
66 static noinline void update_height(void)
68 int sh = gpu.screen.y2 - gpu.screen.y1;
69 if (gpu.status.dheight)
77 static noinline void decide_frameskip(void)
79 if (gpu.frameskip.active)
82 gpu.frameskip.cnt = 0;
83 gpu.frameskip.frame_ready = 1;
86 if (!gpu.frameskip.active && *gpu.frameskip.advice)
87 gpu.frameskip.active = 1;
88 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
89 gpu.frameskip.active = 1;
91 gpu.frameskip.active = 0;
94 static noinline void decide_frameskip_allow(uint32_t cmd_e3)
96 // no frameskip if it decides to draw to display area,
97 // but not for interlace since it'll most likely always do that
98 uint32_t x = cmd_e3 & 0x3ff;
99 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
100 gpu.frameskip.allow = gpu.status.interlace ||
101 (uint32_t)(x - gpu.screen.x) >= (uint32_t)gpu.screen.w ||
102 (uint32_t)(y - gpu.screen.y) >= (uint32_t)gpu.screen.h;
105 static noinline void get_gpu_info(uint32_t data)
107 switch (data & 0x0f) {
112 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
115 gpu.gp0 = gpu.ex_regs[5] & 0xfffff;
130 ret |= renderer_init();
132 gpu.state.frame_count = &gpu.zero;
133 gpu.state.hcnt = &gpu.zero;
134 gpu.frameskip.active = 0;
141 long GPUshutdown(void)
143 return vout_finish();
146 void GPUwriteStatus(uint32_t data)
148 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
149 static const short vres[4] = { 240, 480, 256, 480 };
150 uint32_t cmd = data >> 24;
152 if (cmd < ARRAY_SIZE(gpu.regs)) {
153 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
155 gpu.regs[cmd] = data;
158 gpu.state.fb_dirty = 1;
168 gpu.status.blanking = data & 1;
171 gpu.status.dma = data & 3;
174 gpu.screen.x = data & 0x3ff;
175 gpu.screen.y = (data >> 10) & 0x3ff;
176 if (gpu.frameskip.set) {
177 decide_frameskip_allow(gpu.ex_regs[3]);
178 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
180 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
185 gpu.screen.x1 = data & 0xfff;
186 gpu.screen.x2 = (data >> 12) & 0xfff;
190 gpu.screen.y1 = data & 0x3ff;
191 gpu.screen.y2 = (data >> 10) & 0x3ff;
195 gpu.status.reg = (gpu.status.reg & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
196 gpu.screen.hres = hres[(gpu.status.reg >> 16) & 7];
197 gpu.screen.vres = vres[(gpu.status.reg >> 19) & 3];
202 if ((cmd & 0xf0) == 0x10)
208 const unsigned char cmd_lengths[256] =
210 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
211 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
212 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
213 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
214 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
215 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
216 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
217 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
218 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
219 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
220 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
221 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
222 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
223 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
224 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
225 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
228 #define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
230 static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
232 uint16_t *vram = VRAM_MEM_XY(x, y);
234 memcpy(mem, vram, l * 2);
236 memcpy(vram, mem, l * 2);
239 static int do_vram_io(uint32_t *data, int count, int is_read)
241 int count_initial = count;
242 uint16_t *sdata = (uint16_t *)data;
243 int x = gpu.dma.x, y = gpu.dma.y;
244 int w = gpu.dma.w, h = gpu.dma.h;
245 int o = gpu.dma.offset;
247 count *= 2; // operate in 16bpp pixels
249 if (gpu.dma.offset) {
250 l = w - gpu.dma.offset;
254 do_vram_line(x + o, y, sdata, l, is_read);
267 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
269 do_vram_line(x, y, sdata, w, is_read);
272 if (h > 0 && count > 0) {
274 do_vram_line(x, y, sdata, count, is_read);
282 return count_initial - count / 2;
285 static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
288 log_anomaly("start_vram_transfer while old unfinished\n");
290 gpu.dma.x = pos_word & 0x3ff;
291 gpu.dma.y = (pos_word >> 16) & 0x1ff;
292 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
293 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
296 renderer_flush_queues();
299 // XXX: wrong for width 1
300 memcpy(&gpu.gp0, VRAM_MEM_XY(gpu.dma.x, gpu.dma.y), 4);
301 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
304 renderer_invalidate_caches(gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
307 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
308 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
311 static noinline int do_cmd_buffer(uint32_t *data, int count)
313 int len, cmd, start, pos;
317 for (start = pos = 0; pos < count; )
323 pos += do_vram_io(data + pos, count - pos, 0);
329 // do look-ahead pass to detect SR changes and VRAM i/o
330 while (pos < count) {
331 uint32_t *list = data + pos;
333 len = 1 + cmd_lengths[cmd];
335 //printf(" %3d: %02x %d\n", pos, cmd, len);
336 if ((cmd & 0xf4) == 0x24) {
337 // flat textured prim
338 gpu.ex_regs[1] &= ~0x1ff;
339 gpu.ex_regs[1] |= list[4] & 0x1ff;
341 else if ((cmd & 0xf4) == 0x34) {
342 // shaded textured prim
343 gpu.ex_regs[1] &= ~0x1ff;
344 gpu.ex_regs[1] |= list[5] & 0x1ff;
346 else if (cmd == 0xe3)
347 decide_frameskip_allow(list[0]);
349 if (2 <= cmd && cmd < 0xc0)
351 else if ((cmd & 0xf8) == 0xe0)
352 gpu.ex_regs[cmd & 7] = list[0];
354 if (pos + len > count) {
356 break; // incomplete cmd
358 if (cmd == 0xa0 || cmd == 0xc0)
363 if (pos - start > 0) {
364 if (!gpu.frameskip.active || !gpu.frameskip.allow)
365 do_cmd_list(data + start, pos - start);
369 if (cmd == 0xa0 || cmd == 0xc0) {
370 // consume vram write/read cmd
371 start_vram_transfer(data[pos + 1], data[pos + 2], cmd == 0xc0);
378 gpu.status.reg &= ~0x1fff;
379 gpu.status.reg |= gpu.ex_regs[1] & 0x7ff;
380 gpu.status.reg |= (gpu.ex_regs[6] & 3) << 11;
382 if (gpu.frameskip.active)
383 renderer_sync_ecmds(gpu.ex_regs);
384 gpu.state.fb_dirty |= vram_dirty;
389 static void flush_cmd_buffer(void)
391 int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
393 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
397 void GPUwriteDataMem(uint32_t *mem, int count)
401 log_io("gpu_dma_write %p %d\n", mem, count);
403 if (unlikely(gpu.cmd_len > 0))
406 left = do_cmd_buffer(mem, count);
408 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
411 void GPUwriteData(uint32_t data)
413 log_io("gpu_write %08x\n", data);
414 gpu.cmd_buffer[gpu.cmd_len++] = data;
415 if (gpu.cmd_len >= CMD_BUFFER_LEN)
419 long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
421 uint32_t addr, *list;
422 uint32_t *llist_entry = NULL;
423 int len, left, count;
426 if (unlikely(gpu.cmd_len > 0))
429 // ff7 sends it's main list twice, detect this
430 if (*gpu.state.frame_count == gpu.state.last_list.frame &&
431 *gpu.state.hcnt - gpu.state.last_list.hcnt <= 1 &&
432 gpu.state.last_list.cycles > 2048)
434 llist_entry = rambase + (gpu.state.last_list.addr & 0x1fffff) / 4;
435 *llist_entry |= 0x800000;
438 log_io("gpu_dma_chain\n");
439 addr = start_addr & 0xffffff;
440 for (count = 0; addr != 0xffffff; count++)
442 list = rambase + (addr & 0x1fffff) / 4;
444 addr = list[0] & 0xffffff;
447 cpu_cycles += 5 + len;
449 log_io(".chain %08x #%d\n", (list - rambase) * 4, len);
451 // loop detection marker
452 // (bit23 set causes DMA error on real machine, so
453 // unlikely to be ever set by the game)
457 left = do_cmd_buffer(list + 1, len);
459 log_anomaly("GPUdmaChain: discarded %d/%d words\n", left, len);
466 // remove loop detection markers
467 addr = start_addr & 0x1fffff;
468 while (count-- > 0) {
469 list = rambase + addr / 4;
470 addr = list[0] & 0x1fffff;
471 list[0] &= ~0x800000;
474 *llist_entry &= ~0x800000;
476 gpu.state.last_list.frame = *gpu.state.frame_count;
477 gpu.state.last_list.hcnt = *gpu.state.hcnt;
478 gpu.state.last_list.cycles = cpu_cycles;
479 gpu.state.last_list.addr = start_addr;
484 void GPUreadDataMem(uint32_t *mem, int count)
486 log_io("gpu_dma_read %p %d\n", mem, count);
488 if (unlikely(gpu.cmd_len > 0))
492 do_vram_io(mem, count, 1);
495 uint32_t GPUreadData(void)
499 if (unlikely(gpu.cmd_len > 0))
504 do_vram_io(&ret, 1, 1);
506 log_io("gpu_read %08x\n", ret);
510 uint32_t GPUreadStatus(void)
514 if (unlikely(gpu.cmd_len > 0))
517 ret = gpu.status.reg;
518 log_io("gpu_read_status %08x\n", ret);
524 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
525 uint32_t ulStatus; // current gpu status
526 uint32_t ulControl[256]; // latest control register values
527 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
530 long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
538 memcpy(freeze->psxVRam, gpu.vram, sizeof(gpu.vram));
539 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
540 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
541 freeze->ulStatus = gpu.status.reg;
544 renderer_invalidate_caches(0, 0, 1024, 512);
545 memcpy(gpu.vram, freeze->psxVRam, sizeof(gpu.vram));
546 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
547 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
548 gpu.status.reg = freeze->ulStatus;
549 for (i = 8; i > 0; i--) {
550 gpu.regs[i] ^= 1; // avoid reg change detection
551 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
553 renderer_sync_ecmds(gpu.ex_regs);
560 void GPUupdateLace(void)
564 renderer_flush_queues();
566 if (gpu.status.blanking || !gpu.state.fb_dirty)
569 if (gpu.frameskip.set) {
570 if (!gpu.frameskip.frame_ready) {
571 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
573 gpu.frameskip.active = 0;
575 gpu.frameskip.frame_ready = 0;
579 gpu.state.fb_dirty = 0;
582 void GPUvBlank(int is_vblank, int lcf)
584 int interlace = gpu.state.allow_interlace
585 && gpu.status.interlace && gpu.status.dheight;
586 // interlace doesn't look nice on progressive displays,
587 // so we have this "auto" mode here for games that don't read vram
588 if (gpu.state.allow_interlace == 2
589 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
593 if (interlace || interlace != gpu.state.old_interlace) {
594 gpu.state.old_interlace = interlace;
598 renderer_flush_queues();
599 renderer_set_interlace(interlace, !lcf);
603 #include "../../frontend/plugin_lib.h"
605 void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
607 gpu.frameskip.set = cbs->frameskip;
608 gpu.frameskip.advice = &cbs->fskip_advice;
609 gpu.frameskip.active = 0;
610 gpu.frameskip.frame_ready = 1;
611 gpu.state.hcnt = cbs->gpu_hcnt;
612 gpu.state.frame_count = cbs->gpu_frame_count;
613 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
615 if (cbs->pl_vout_set_raw_vram)
616 cbs->pl_vout_set_raw_vram(gpu.vram);
617 renderer_set_config(cbs);
618 vout_set_config(cbs);
621 // vim:shiftwidth=2:expandtab