2 * (C) GraÅžvydas "notaz" Ignotas, 2011
4 * This work is licensed under the terms of any of these licenses
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
15 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
16 #define unlikely(x) __builtin_expect((x), 0)
17 #define noinline __attribute__((noinline))
19 #define gpu_log(fmt, ...) \
20 printf("%d:%03d: " fmt, *gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__)
22 //#define log_io gpu_log
24 //#define log_anomaly gpu_log
25 #define log_anomaly(...)
27 struct psx_gpu gpu __attribute__((aligned(2048)));
29 static noinline void do_reset(void)
31 memset(gpu.regs, 0, sizeof(gpu.regs));
32 memset(gpu.ex_regs, 0, sizeof(gpu.ex_regs));
33 gpu.status.reg = 0x14802000;
36 gpu.screen.hres = gpu.screen.w = 256;
37 gpu.screen.vres = gpu.screen.h = 240;
40 static noinline void update_width(void)
42 int sw = gpu.screen.x2 - gpu.screen.x1;
43 if (sw <= 0 || sw >= 2560)
45 gpu.screen.w = gpu.screen.hres;
47 gpu.screen.w = sw * gpu.screen.hres / 2560;
50 static noinline void update_height(void)
52 int sh = gpu.screen.y2 - gpu.screen.y1;
53 if (gpu.status.dheight)
61 static noinline void decide_frameskip(void)
63 if (gpu.frameskip.active)
66 gpu.frameskip.cnt = 0;
67 gpu.frameskip.frame_ready = 1;
70 if (!gpu.frameskip.active && *gpu.frameskip.advice)
71 gpu.frameskip.active = 1;
72 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
73 gpu.frameskip.active = 1;
75 gpu.frameskip.active = 0;
78 static noinline void decide_frameskip_allow(uint32_t cmd_e3)
80 // no frameskip if it decides to draw to display area,
81 // but not for interlace since it'll most likely always do that
82 uint32_t x = cmd_e3 & 0x3ff;
83 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
84 gpu.frameskip.allow = gpu.status.interlace ||
85 (uint32_t)(x - gpu.screen.x) >= (uint32_t)gpu.screen.w ||
86 (uint32_t)(y - gpu.screen.y) >= (uint32_t)gpu.screen.h;
89 static noinline void get_gpu_info(uint32_t data)
91 switch (data & 0x0f) {
96 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
99 gpu.gp0 = gpu.ex_regs[5] & 0xfffff;
114 ret |= renderer_init();
116 gpu.state.frame_count = &gpu.zero;
117 gpu.state.hcnt = &gpu.zero;
122 long GPUshutdown(void)
124 return vout_finish();
127 void GPUwriteStatus(uint32_t data)
129 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
130 static const short vres[4] = { 240, 480, 256, 480 };
131 uint32_t cmd = data >> 24;
133 if (cmd < ARRAY_SIZE(gpu.regs)) {
134 if (cmd != 0 && cmd != 5 && gpu.regs[cmd] == data)
136 gpu.regs[cmd] = data;
139 gpu.state.fb_dirty = 1;
146 gpu.status.blanking = data & 1;
149 gpu.status.dma = data & 3;
152 gpu.screen.x = data & 0x3ff;
153 gpu.screen.y = (data >> 10) & 0x3ff;
154 if (gpu.frameskip.set) {
155 decide_frameskip_allow(gpu.ex_regs[3]);
156 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
158 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
163 gpu.screen.x1 = data & 0xfff;
164 gpu.screen.x2 = (data >> 12) & 0xfff;
168 gpu.screen.y1 = data & 0x3ff;
169 gpu.screen.y2 = (data >> 10) & 0x3ff;
173 gpu.status.reg = (gpu.status.reg & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
174 gpu.screen.hres = hres[(gpu.status.reg >> 16) & 7];
175 gpu.screen.vres = vres[(gpu.status.reg >> 19) & 3];
180 if ((cmd & 0xf0) == 0x10)
186 const unsigned char cmd_lengths[256] =
188 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
189 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
190 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
191 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
192 2, 2, 2, 2, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, // 40
193 3, 3, 3, 3, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4,
194 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 2, 2, 2, 2, // 60
195 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
196 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
197 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
198 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
199 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
200 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
201 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
202 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
203 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
206 #define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
208 static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
210 uint16_t *vram = VRAM_MEM_XY(x, y);
212 memcpy(mem, vram, l * 2);
214 memcpy(vram, mem, l * 2);
217 static int do_vram_io(uint32_t *data, int count, int is_read)
219 int count_initial = count;
220 uint16_t *sdata = (uint16_t *)data;
221 int x = gpu.dma.x, y = gpu.dma.y;
222 int w = gpu.dma.w, h = gpu.dma.h;
223 int o = gpu.dma.offset;
225 count *= 2; // operate in 16bpp pixels
227 if (gpu.dma.offset) {
228 l = w - gpu.dma.offset;
232 do_vram_line(x + o, y, sdata, l, is_read);
245 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
247 do_vram_line(x, y, sdata, w, is_read);
250 if (h > 0 && count > 0) {
252 do_vram_line(x, y, sdata, count, is_read);
260 return count_initial - count / 2;
263 static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
266 log_anomaly("start_vram_transfer while old unfinished\n");
268 gpu.dma.x = pos_word & 0x3ff;
269 gpu.dma.y = (pos_word >> 16) & 0x1ff;
270 gpu.dma.w = size_word & 0x3ff;
271 gpu.dma.h = (size_word >> 16) & 0x1ff;
274 renderer_flush_queues();
277 // XXX: wrong for width 1
278 memcpy(&gpu.gp0, VRAM_MEM_XY(gpu.dma.x, gpu.dma.y), 4);
279 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
282 renderer_invalidate_caches(gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
285 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
286 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
289 static int check_cmd(uint32_t *data, int count)
291 int len, cmd, start, pos;
295 for (start = pos = 0; pos < count; )
301 pos += do_vram_io(data + pos, count - pos, 0);
307 // do look-ahead pass to detect SR changes and VRAM i/o
308 while (pos < count) {
309 uint32_t *list = data + pos;
311 len = 1 + cmd_lengths[cmd];
313 //printf(" %3d: %02x %d\n", pos, cmd, len);
314 if ((cmd & 0xf4) == 0x24) {
315 // flat textured prim
316 gpu.ex_regs[1] &= ~0x1ff;
317 gpu.ex_regs[1] |= list[4] & 0x1ff;
319 else if ((cmd & 0xf4) == 0x34) {
320 // shaded textured prim
321 gpu.ex_regs[1] &= ~0x1ff;
322 gpu.ex_regs[1] |= list[5] & 0x1ff;
324 else if (cmd == 0xe3)
325 decide_frameskip_allow(list[0]);
327 if (2 <= cmd && cmd < 0xc0)
329 else if ((cmd & 0xf8) == 0xe0)
330 gpu.ex_regs[cmd & 7] = list[0];
332 if (pos + len > count) {
334 break; // incomplete cmd
336 if (cmd == 0xa0 || cmd == 0xc0)
341 if (pos - start > 0) {
342 if (!gpu.frameskip.active || !gpu.frameskip.allow)
343 do_cmd_list(data + start, pos - start);
347 if (cmd == 0xa0 || cmd == 0xc0) {
348 // consume vram write/read cmd
349 start_vram_transfer(data[pos + 1], data[pos + 2], cmd == 0xc0);
356 gpu.status.reg &= ~0x1fff;
357 gpu.status.reg |= gpu.ex_regs[1] & 0x7ff;
358 gpu.status.reg |= (gpu.ex_regs[6] & 3) << 11;
360 if (gpu.frameskip.active)
361 renderer_sync_ecmds(gpu.ex_regs);
362 gpu.state.fb_dirty |= vram_dirty;
367 static void flush_cmd_buffer(void)
369 int left = check_cmd(gpu.cmd_buffer, gpu.cmd_len);
371 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
375 void GPUwriteDataMem(uint32_t *mem, int count)
379 log_io("gpu_dma_write %p %d\n", mem, count);
381 if (unlikely(gpu.cmd_len > 0))
384 left = check_cmd(mem, count);
386 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
389 void GPUwriteData(uint32_t data)
391 log_io("gpu_write %08x\n", data);
392 gpu.cmd_buffer[gpu.cmd_len++] = data;
393 if (gpu.cmd_len >= CMD_BUFFER_LEN)
397 long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
399 uint32_t addr, *list;
400 uint32_t *llist_entry = NULL;
401 int len, left, count;
404 if (unlikely(gpu.cmd_len > 0))
407 // ff7 sends it's main list twice, detect this
408 if (*gpu.state.frame_count == gpu.state.last_list.frame &&
409 *gpu.state.hcnt - gpu.state.last_list.hcnt <= 1 &&
410 gpu.state.last_list.cycles > 2048)
412 llist_entry = rambase + (gpu.state.last_list.addr & 0x1fffff) / 4;
413 *llist_entry |= 0x800000;
416 log_io("gpu_dma_chain\n");
417 addr = start_addr & 0xffffff;
418 for (count = 0; addr != 0xffffff; count++)
420 list = rambase + (addr & 0x1fffff) / 4;
422 addr = list[0] & 0xffffff;
425 cpu_cycles += 5 + len;
427 log_io(".chain %08x #%d\n", (list - rambase) * 4, len);
429 // loop detection marker
430 // (bit23 set causes DMA error on real machine, so
431 // unlikely to be ever set by the game)
435 left = check_cmd(list + 1, len);
437 log_anomaly("GPUdmaChain: discarded %d/%d words\n", left, len);
444 // remove loop detection markers
445 addr = start_addr & 0x1fffff;
446 while (count-- > 0) {
447 list = rambase + addr / 4;
448 addr = list[0] & 0x1fffff;
449 list[0] &= ~0x800000;
452 *llist_entry &= ~0x800000;
454 gpu.state.last_list.frame = *gpu.state.frame_count;
455 gpu.state.last_list.hcnt = *gpu.state.hcnt;
456 gpu.state.last_list.cycles = cpu_cycles;
457 gpu.state.last_list.addr = start_addr;
462 void GPUreadDataMem(uint32_t *mem, int count)
464 log_io("gpu_dma_read %p %d\n", mem, count);
466 if (unlikely(gpu.cmd_len > 0))
470 do_vram_io(mem, count, 1);
473 uint32_t GPUreadData(void)
477 if (unlikely(gpu.cmd_len > 0))
482 do_vram_io(&ret, 1, 1);
484 log_io("gpu_read %08x\n", ret);
488 uint32_t GPUreadStatus(void)
492 if (unlikely(gpu.cmd_len > 0))
495 ret = gpu.status.reg;
496 log_io("gpu_read_status %08x\n", ret);
502 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
503 uint32_t ulStatus; // current gpu status
504 uint32_t ulControl[256]; // latest control register values
505 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
508 long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
516 memcpy(freeze->psxVRam, gpu.vram, sizeof(gpu.vram));
517 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
518 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
519 freeze->ulStatus = gpu.status.reg;
522 renderer_invalidate_caches(0, 0, 1024, 512);
523 memcpy(gpu.vram, freeze->psxVRam, sizeof(gpu.vram));
524 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
525 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
526 gpu.status.reg = freeze->ulStatus;
527 for (i = 8; i > 0; i--) {
528 gpu.regs[i] ^= 1; // avoid reg change detection
529 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
531 renderer_sync_ecmds(gpu.ex_regs);
538 void GPUupdateLace(void)
542 renderer_flush_queues();
544 if (gpu.status.blanking || !gpu.state.fb_dirty)
547 if (gpu.frameskip.set) {
548 if (!gpu.frameskip.frame_ready) {
549 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
551 gpu.frameskip.active = 0;
553 gpu.frameskip.frame_ready = 0;
557 gpu.state.fb_dirty = 0;
560 void GPUvBlank(int is_vblank, int lcf)
562 int interlace = gpu.state.allow_interlace
563 && gpu.status.interlace && gpu.status.dheight;
564 // interlace doesn't look nice on progressive displays,
565 // so we have this "auto" mode here for games that don't read vram
566 if (gpu.state.allow_interlace == 2
567 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
571 if (interlace || interlace != gpu.state.old_interlace) {
572 gpu.state.old_interlace = interlace;
576 renderer_flush_queues();
577 renderer_set_interlace(interlace, !lcf);
581 #include "../../frontend/plugin_lib.h"
583 void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
585 gpu.frameskip.set = cbs->frameskip;
586 gpu.frameskip.advice = &cbs->fskip_advice;
587 gpu.frameskip.active = 0;
588 gpu.frameskip.frame_ready = 1;
589 gpu.state.hcnt = cbs->gpu_hcnt;
590 gpu.state.frame_count = cbs->gpu_frame_count;
591 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
593 if (cbs->pl_vout_set_raw_vram)
594 cbs->pl_vout_set_raw_vram(gpu.vram);
595 renderer_set_config(cbs);
596 vout_set_config(cbs);
599 // vim:shiftwidth=2:expandtab