2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
4 * This work is licensed under the terms of any of these licenses
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
15 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
16 #define unlikely(x) __builtin_expect((x), 0)
17 #define noinline __attribute__((noinline))
19 #define gpu_log(fmt, ...) \
20 printf("%d:%03d: " fmt, *gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__)
22 //#define log_io gpu_log
24 //#define log_anomaly gpu_log
25 #define log_anomaly(...)
27 struct psx_gpu gpu __attribute__((aligned(2048)));
29 static noinline int do_cmd_buffer(uint32_t *data, int count);
30 static void finish_vram_transfer(int is_read);
32 static noinline void do_cmd_reset(void)
34 if (unlikely(gpu.cmd_len > 0))
35 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
38 if (unlikely(gpu.dma.h > 0))
39 finish_vram_transfer(gpu.dma_start.is_read);
43 static noinline void do_reset(void)
49 memset(gpu.regs, 0, sizeof(gpu.regs));
50 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
51 gpu.ex_regs[i] = (0xe0 + i) << 24;
52 gpu.status.reg = 0x14802000;
55 gpu.screen.hres = gpu.screen.w = 256;
56 gpu.screen.vres = gpu.screen.h = 240;
59 static noinline void update_width(void)
61 int sw = gpu.screen.x2 - gpu.screen.x1;
62 if (sw <= 0 || sw >= 2560)
64 gpu.screen.w = gpu.screen.hres;
66 gpu.screen.w = sw * gpu.screen.hres / 2560;
69 static noinline void update_height(void)
71 int sh = gpu.screen.y2 - gpu.screen.y1;
72 if (gpu.status.dheight)
80 static noinline void decide_frameskip(void)
82 if (gpu.frameskip.active)
85 gpu.frameskip.cnt = 0;
86 gpu.frameskip.frame_ready = 1;
89 if (!gpu.frameskip.active && *gpu.frameskip.advice)
90 gpu.frameskip.active = 1;
91 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
92 gpu.frameskip.active = 1;
94 gpu.frameskip.active = 0;
97 static noinline void decide_frameskip_allow(uint32_t cmd_e3)
99 // no frameskip if it decides to draw to display area,
100 // but not for interlace since it'll most likely always do that
101 uint32_t x = cmd_e3 & 0x3ff;
102 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
103 gpu.frameskip.allow = gpu.status.interlace ||
104 (uint32_t)(x - gpu.screen.x) >= (uint32_t)gpu.screen.w ||
105 (uint32_t)(y - gpu.screen.y) >= (uint32_t)gpu.screen.h;
108 static noinline void get_gpu_info(uint32_t data)
110 switch (data & 0x0f) {
115 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
118 gpu.gp0 = gpu.ex_regs[5] & 0xfffff;
133 ret |= renderer_init();
135 gpu.state.frame_count = &gpu.zero;
136 gpu.state.hcnt = &gpu.zero;
137 gpu.frameskip.active = 0;
144 long GPUshutdown(void)
146 return vout_finish();
149 void GPUwriteStatus(uint32_t data)
151 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
152 static const short vres[4] = { 240, 480, 256, 480 };
153 uint32_t cmd = data >> 24;
155 if (cmd < ARRAY_SIZE(gpu.regs)) {
156 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
158 gpu.regs[cmd] = data;
161 gpu.state.fb_dirty = 1;
171 gpu.status.blanking = data & 1;
174 gpu.status.dma = data & 3;
177 gpu.screen.x = data & 0x3ff;
178 gpu.screen.y = (data >> 10) & 0x3ff;
179 if (gpu.frameskip.set) {
180 decide_frameskip_allow(gpu.ex_regs[3]);
181 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
183 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
188 gpu.screen.x1 = data & 0xfff;
189 gpu.screen.x2 = (data >> 12) & 0xfff;
193 gpu.screen.y1 = data & 0x3ff;
194 gpu.screen.y2 = (data >> 10) & 0x3ff;
198 gpu.status.reg = (gpu.status.reg & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
199 gpu.screen.hres = hres[(gpu.status.reg >> 16) & 7];
200 gpu.screen.vres = vres[(gpu.status.reg >> 19) & 3];
205 if ((cmd & 0xf0) == 0x10)
210 #ifdef GPUwriteStatus_ext
211 GPUwriteStatus_ext(data);
215 const unsigned char cmd_lengths[256] =
217 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
218 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
219 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
220 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
221 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
222 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
223 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
224 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
225 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
226 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
227 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
228 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
229 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
230 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
231 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
232 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
235 #define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
237 static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
239 uint16_t *vram = VRAM_MEM_XY(x, y);
241 memcpy(mem, vram, l * 2);
243 memcpy(vram, mem, l * 2);
246 static int do_vram_io(uint32_t *data, int count, int is_read)
248 int count_initial = count;
249 uint16_t *sdata = (uint16_t *)data;
250 int x = gpu.dma.x, y = gpu.dma.y;
251 int w = gpu.dma.w, h = gpu.dma.h;
252 int o = gpu.dma.offset;
254 count *= 2; // operate in 16bpp pixels
256 if (gpu.dma.offset) {
257 l = w - gpu.dma.offset;
261 do_vram_line(x + o, y, sdata, l, is_read);
274 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
276 do_vram_line(x, y, sdata, w, is_read);
282 do_vram_line(x, y, sdata, count, is_read);
288 finish_vram_transfer(is_read);
293 return count_initial - count / 2;
296 static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
299 log_anomaly("start_vram_transfer while old unfinished\n");
301 gpu.dma.x = pos_word & 0x3ff;
302 gpu.dma.y = (pos_word >> 16) & 0x1ff;
303 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
304 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
306 gpu.dma.is_read = is_read;
307 gpu.dma_start = gpu.dma;
309 renderer_flush_queues();
312 // XXX: wrong for width 1
313 memcpy(&gpu.gp0, VRAM_MEM_XY(gpu.dma.x, gpu.dma.y), 4);
314 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
317 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
318 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
321 static void finish_vram_transfer(int is_read)
326 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
327 gpu.dma_start.w, gpu.dma_start.h);
330 static noinline int do_cmd_buffer(uint32_t *data, int count)
332 int len, cmd, start, pos;
336 for (start = pos = 0; pos < count; )
342 pos += do_vram_io(data + pos, count - pos, 0);
348 // do look-ahead pass to detect SR changes and VRAM i/o
349 while (pos < count) {
350 uint32_t *list = data + pos;
352 len = 1 + cmd_lengths[cmd];
354 //printf(" %3d: %02x %d\n", pos, cmd, len);
355 if ((cmd & 0xf4) == 0x24) {
356 // flat textured prim
357 gpu.ex_regs[1] &= ~0x1ff;
358 gpu.ex_regs[1] |= list[4] & 0x1ff;
360 else if ((cmd & 0xf4) == 0x34) {
361 // shaded textured prim
362 gpu.ex_regs[1] &= ~0x1ff;
363 gpu.ex_regs[1] |= list[5] & 0x1ff;
365 else if (cmd == 0xe3)
366 decide_frameskip_allow(list[0]);
368 if (2 <= cmd && cmd < 0xc0)
370 else if ((cmd & 0xf8) == 0xe0)
371 gpu.ex_regs[cmd & 7] = list[0];
373 if (pos + len > count) {
375 break; // incomplete cmd
377 if (cmd == 0xa0 || cmd == 0xc0)
382 if (pos - start > 0) {
383 if (!gpu.frameskip.active || !gpu.frameskip.allow)
384 do_cmd_list(data + start, pos - start);
388 if (cmd == 0xa0 || cmd == 0xc0) {
389 // consume vram write/read cmd
390 start_vram_transfer(data[pos + 1], data[pos + 2], cmd == 0xc0);
397 gpu.status.reg &= ~0x1fff;
398 gpu.status.reg |= gpu.ex_regs[1] & 0x7ff;
399 gpu.status.reg |= (gpu.ex_regs[6] & 3) << 11;
401 if (gpu.frameskip.active)
402 renderer_sync_ecmds(gpu.ex_regs);
403 gpu.state.fb_dirty |= vram_dirty;
408 static void flush_cmd_buffer(void)
410 int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
412 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
416 void GPUwriteDataMem(uint32_t *mem, int count)
420 log_io("gpu_dma_write %p %d\n", mem, count);
422 if (unlikely(gpu.cmd_len > 0))
425 left = do_cmd_buffer(mem, count);
427 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
430 void GPUwriteData(uint32_t data)
432 log_io("gpu_write %08x\n", data);
433 gpu.cmd_buffer[gpu.cmd_len++] = data;
434 if (gpu.cmd_len >= CMD_BUFFER_LEN)
438 long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
440 uint32_t addr, *list;
441 uint32_t *llist_entry = NULL;
442 int len, left, count;
445 if (unlikely(gpu.cmd_len > 0))
448 // ff7 sends it's main list twice, detect this
449 if (*gpu.state.frame_count == gpu.state.last_list.frame &&
450 *gpu.state.hcnt - gpu.state.last_list.hcnt <= 1 &&
451 gpu.state.last_list.cycles > 2048)
453 llist_entry = rambase + (gpu.state.last_list.addr & 0x1fffff) / 4;
454 *llist_entry |= 0x800000;
457 log_io("gpu_dma_chain\n");
458 addr = start_addr & 0xffffff;
459 for (count = 0; addr != 0xffffff; count++)
461 list = rambase + (addr & 0x1fffff) / 4;
463 addr = list[0] & 0xffffff;
466 cpu_cycles += 5 + len;
468 log_io(".chain %08x #%d\n", (list - rambase) * 4, len);
470 // loop detection marker
471 // (bit23 set causes DMA error on real machine, so
472 // unlikely to be ever set by the game)
476 left = do_cmd_buffer(list + 1, len);
478 log_anomaly("GPUdmaChain: discarded %d/%d words\n", left, len);
485 // remove loop detection markers
486 addr = start_addr & 0x1fffff;
487 while (count-- > 0) {
488 list = rambase + addr / 4;
489 addr = list[0] & 0x1fffff;
490 list[0] &= ~0x800000;
493 *llist_entry &= ~0x800000;
495 gpu.state.last_list.frame = *gpu.state.frame_count;
496 gpu.state.last_list.hcnt = *gpu.state.hcnt;
497 gpu.state.last_list.cycles = cpu_cycles;
498 gpu.state.last_list.addr = start_addr;
503 void GPUreadDataMem(uint32_t *mem, int count)
505 log_io("gpu_dma_read %p %d\n", mem, count);
507 if (unlikely(gpu.cmd_len > 0))
511 do_vram_io(mem, count, 1);
514 uint32_t GPUreadData(void)
518 if (unlikely(gpu.cmd_len > 0))
523 do_vram_io(&ret, 1, 1);
525 log_io("gpu_read %08x\n", ret);
529 uint32_t GPUreadStatus(void)
533 if (unlikely(gpu.cmd_len > 0))
536 ret = gpu.status.reg;
537 log_io("gpu_read_status %08x\n", ret);
543 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
544 uint32_t ulStatus; // current gpu status
545 uint32_t ulControl[256]; // latest control register values
546 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
549 long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
557 memcpy(freeze->psxVRam, gpu.vram, sizeof(gpu.vram));
558 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
559 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
560 freeze->ulStatus = gpu.status.reg;
563 memcpy(gpu.vram, freeze->psxVRam, sizeof(gpu.vram));
564 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
565 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
566 gpu.status.reg = freeze->ulStatus;
567 for (i = 8; i > 0; i--) {
568 gpu.regs[i] ^= 1; // avoid reg change detection
569 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
571 renderer_sync_ecmds(gpu.ex_regs);
572 renderer_update_caches(0, 0, 1024, 512);
579 void GPUupdateLace(void)
583 renderer_flush_queues();
585 if (gpu.status.blanking || !gpu.state.fb_dirty)
588 if (gpu.frameskip.set) {
589 if (!gpu.frameskip.frame_ready) {
590 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
592 gpu.frameskip.active = 0;
594 gpu.frameskip.frame_ready = 0;
598 gpu.state.fb_dirty = 0;
601 void GPUvBlank(int is_vblank, int lcf)
603 int interlace = gpu.state.allow_interlace
604 && gpu.status.interlace && gpu.status.dheight;
605 // interlace doesn't look nice on progressive displays,
606 // so we have this "auto" mode here for games that don't read vram
607 if (gpu.state.allow_interlace == 2
608 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
612 if (interlace || interlace != gpu.state.old_interlace) {
613 gpu.state.old_interlace = interlace;
617 renderer_flush_queues();
618 renderer_set_interlace(interlace, !lcf);
622 #include "../../frontend/plugin_lib.h"
624 void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
626 gpu.frameskip.set = cbs->frameskip;
627 gpu.frameskip.advice = &cbs->fskip_advice;
628 gpu.frameskip.active = 0;
629 gpu.frameskip.frame_ready = 1;
630 gpu.state.hcnt = cbs->gpu_hcnt;
631 gpu.state.frame_count = cbs->gpu_frame_count;
632 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
634 if (cbs->pl_vout_set_raw_vram)
635 cbs->pl_vout_set_raw_vram(gpu.vram);
636 renderer_set_config(cbs);
637 vout_set_config(cbs);
640 // vim:shiftwidth=2:expandtab