2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
4 * This work is licensed under the terms of any of these licenses
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
15 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
16 #define unlikely(x) __builtin_expect((x), 0)
17 #define noinline __attribute__((noinline))
19 #define gpu_log(fmt, ...) \
20 printf("%d:%03d: " fmt, *gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__)
22 //#define log_io gpu_log
24 //#define log_anomaly gpu_log
25 #define log_anomaly(...)
27 struct psx_gpu gpu __attribute__((aligned(2048)));
29 static noinline int do_cmd_buffer(uint32_t *data, int count);
30 static void finish_vram_transfer(int is_read);
32 static noinline void do_cmd_reset(void)
34 if (unlikely(gpu.cmd_len > 0))
35 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
38 if (unlikely(gpu.dma.h > 0))
39 finish_vram_transfer(gpu.dma_start.is_read);
43 static noinline void do_reset(void)
49 memset(gpu.regs, 0, sizeof(gpu.regs));
50 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
51 gpu.ex_regs[i] = (0xe0 + i) << 24;
52 gpu.status.reg = 0x14802000;
55 gpu.screen.hres = gpu.screen.w = 256;
56 gpu.screen.vres = gpu.screen.h = 240;
59 static noinline void update_width(void)
61 int sw = gpu.screen.x2 - gpu.screen.x1;
62 if (sw <= 0 || sw >= 2560)
64 gpu.screen.w = gpu.screen.hres;
66 gpu.screen.w = sw * gpu.screen.hres / 2560;
69 static noinline void update_height(void)
71 int sh = gpu.screen.y2 - gpu.screen.y1;
72 if (gpu.status.dheight)
80 static noinline void decide_frameskip(void)
82 if (gpu.frameskip.active)
85 gpu.frameskip.cnt = 0;
86 gpu.frameskip.frame_ready = 1;
89 if (!gpu.frameskip.active && *gpu.frameskip.advice)
90 gpu.frameskip.active = 1;
91 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
92 gpu.frameskip.active = 1;
94 gpu.frameskip.active = 0;
97 static noinline void decide_frameskip_allow(uint32_t cmd_e3)
99 // no frameskip if it decides to draw to display area,
100 // but not for interlace since it'll most likely always do that
101 uint32_t x = cmd_e3 & 0x3ff;
102 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
103 gpu.frameskip.allow = gpu.status.interlace ||
104 (uint32_t)(x - gpu.screen.x) >= (uint32_t)gpu.screen.w ||
105 (uint32_t)(y - gpu.screen.y) >= (uint32_t)gpu.screen.h;
108 static noinline void get_gpu_info(uint32_t data)
110 switch (data & 0x0f) {
115 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
118 gpu.gp0 = gpu.ex_regs[5] & 0xfffff;
133 ret |= renderer_init();
135 gpu.state.frame_count = &gpu.zero;
136 gpu.state.hcnt = &gpu.zero;
137 gpu.frameskip.active = 0;
144 long GPUshutdown(void)
146 return vout_finish();
149 void GPUwriteStatus(uint32_t data)
151 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
152 static const short vres[4] = { 240, 480, 256, 480 };
153 uint32_t cmd = data >> 24;
155 if (cmd < ARRAY_SIZE(gpu.regs)) {
156 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
158 gpu.regs[cmd] = data;
161 gpu.state.fb_dirty = 1;
171 gpu.status.blanking = data & 1;
174 gpu.status.dma = data & 3;
177 gpu.screen.x = data & 0x3ff;
178 gpu.screen.y = (data >> 10) & 0x3ff;
179 if (gpu.frameskip.set) {
180 decide_frameskip_allow(gpu.ex_regs[3]);
181 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
183 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
188 gpu.screen.x1 = data & 0xfff;
189 gpu.screen.x2 = (data >> 12) & 0xfff;
193 gpu.screen.y1 = data & 0x3ff;
194 gpu.screen.y2 = (data >> 10) & 0x3ff;
198 gpu.status.reg = (gpu.status.reg & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
199 gpu.screen.hres = hres[(gpu.status.reg >> 16) & 7];
200 gpu.screen.vres = vres[(gpu.status.reg >> 19) & 3];
205 if ((cmd & 0xf0) == 0x10)
211 const unsigned char cmd_lengths[256] =
213 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
214 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
215 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
216 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
217 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
218 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
219 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
220 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
221 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
222 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
223 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
224 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
225 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
226 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
227 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
228 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
231 #define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
233 static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
235 uint16_t *vram = VRAM_MEM_XY(x, y);
237 memcpy(mem, vram, l * 2);
239 memcpy(vram, mem, l * 2);
242 static int do_vram_io(uint32_t *data, int count, int is_read)
244 int count_initial = count;
245 uint16_t *sdata = (uint16_t *)data;
246 int x = gpu.dma.x, y = gpu.dma.y;
247 int w = gpu.dma.w, h = gpu.dma.h;
248 int o = gpu.dma.offset;
250 count *= 2; // operate in 16bpp pixels
252 if (gpu.dma.offset) {
253 l = w - gpu.dma.offset;
257 do_vram_line(x + o, y, sdata, l, is_read);
270 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
272 do_vram_line(x, y, sdata, w, is_read);
278 do_vram_line(x, y, sdata, count, is_read);
284 finish_vram_transfer(is_read);
289 return count_initial - count / 2;
292 static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
295 log_anomaly("start_vram_transfer while old unfinished\n");
297 gpu.dma.x = pos_word & 0x3ff;
298 gpu.dma.y = (pos_word >> 16) & 0x1ff;
299 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
300 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
302 gpu.dma.is_read = is_read;
303 gpu.dma_start = gpu.dma;
305 renderer_flush_queues();
308 // XXX: wrong for width 1
309 memcpy(&gpu.gp0, VRAM_MEM_XY(gpu.dma.x, gpu.dma.y), 4);
310 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
313 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
314 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
317 static void finish_vram_transfer(int is_read)
322 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
323 gpu.dma_start.w, gpu.dma_start.h);
326 static noinline int do_cmd_buffer(uint32_t *data, int count)
328 int len, cmd, start, pos;
332 for (start = pos = 0; pos < count; )
338 pos += do_vram_io(data + pos, count - pos, 0);
344 // do look-ahead pass to detect SR changes and VRAM i/o
345 while (pos < count) {
346 uint32_t *list = data + pos;
348 len = 1 + cmd_lengths[cmd];
350 //printf(" %3d: %02x %d\n", pos, cmd, len);
351 if ((cmd & 0xf4) == 0x24) {
352 // flat textured prim
353 gpu.ex_regs[1] &= ~0x1ff;
354 gpu.ex_regs[1] |= list[4] & 0x1ff;
356 else if ((cmd & 0xf4) == 0x34) {
357 // shaded textured prim
358 gpu.ex_regs[1] &= ~0x1ff;
359 gpu.ex_regs[1] |= list[5] & 0x1ff;
361 else if (cmd == 0xe3)
362 decide_frameskip_allow(list[0]);
364 if (2 <= cmd && cmd < 0xc0)
366 else if ((cmd & 0xf8) == 0xe0)
367 gpu.ex_regs[cmd & 7] = list[0];
369 if (pos + len > count) {
371 break; // incomplete cmd
373 if (cmd == 0xa0 || cmd == 0xc0)
378 if (pos - start > 0) {
379 if (!gpu.frameskip.active || !gpu.frameskip.allow)
380 do_cmd_list(data + start, pos - start);
384 if (cmd == 0xa0 || cmd == 0xc0) {
385 // consume vram write/read cmd
386 start_vram_transfer(data[pos + 1], data[pos + 2], cmd == 0xc0);
393 gpu.status.reg &= ~0x1fff;
394 gpu.status.reg |= gpu.ex_regs[1] & 0x7ff;
395 gpu.status.reg |= (gpu.ex_regs[6] & 3) << 11;
397 if (gpu.frameskip.active)
398 renderer_sync_ecmds(gpu.ex_regs);
399 gpu.state.fb_dirty |= vram_dirty;
404 static void flush_cmd_buffer(void)
406 int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
408 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
412 void GPUwriteDataMem(uint32_t *mem, int count)
416 log_io("gpu_dma_write %p %d\n", mem, count);
418 if (unlikely(gpu.cmd_len > 0))
421 left = do_cmd_buffer(mem, count);
423 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
426 void GPUwriteData(uint32_t data)
428 log_io("gpu_write %08x\n", data);
429 gpu.cmd_buffer[gpu.cmd_len++] = data;
430 if (gpu.cmd_len >= CMD_BUFFER_LEN)
434 long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
436 uint32_t addr, *list;
437 uint32_t *llist_entry = NULL;
438 int len, left, count;
441 if (unlikely(gpu.cmd_len > 0))
444 // ff7 sends it's main list twice, detect this
445 if (*gpu.state.frame_count == gpu.state.last_list.frame &&
446 *gpu.state.hcnt - gpu.state.last_list.hcnt <= 1 &&
447 gpu.state.last_list.cycles > 2048)
449 llist_entry = rambase + (gpu.state.last_list.addr & 0x1fffff) / 4;
450 *llist_entry |= 0x800000;
453 log_io("gpu_dma_chain\n");
454 addr = start_addr & 0xffffff;
455 for (count = 0; addr != 0xffffff; count++)
457 list = rambase + (addr & 0x1fffff) / 4;
459 addr = list[0] & 0xffffff;
462 cpu_cycles += 5 + len;
464 log_io(".chain %08x #%d\n", (list - rambase) * 4, len);
466 // loop detection marker
467 // (bit23 set causes DMA error on real machine, so
468 // unlikely to be ever set by the game)
472 left = do_cmd_buffer(list + 1, len);
474 log_anomaly("GPUdmaChain: discarded %d/%d words\n", left, len);
481 // remove loop detection markers
482 addr = start_addr & 0x1fffff;
483 while (count-- > 0) {
484 list = rambase + addr / 4;
485 addr = list[0] & 0x1fffff;
486 list[0] &= ~0x800000;
489 *llist_entry &= ~0x800000;
491 gpu.state.last_list.frame = *gpu.state.frame_count;
492 gpu.state.last_list.hcnt = *gpu.state.hcnt;
493 gpu.state.last_list.cycles = cpu_cycles;
494 gpu.state.last_list.addr = start_addr;
499 void GPUreadDataMem(uint32_t *mem, int count)
501 log_io("gpu_dma_read %p %d\n", mem, count);
503 if (unlikely(gpu.cmd_len > 0))
507 do_vram_io(mem, count, 1);
510 uint32_t GPUreadData(void)
514 if (unlikely(gpu.cmd_len > 0))
519 do_vram_io(&ret, 1, 1);
521 log_io("gpu_read %08x\n", ret);
525 uint32_t GPUreadStatus(void)
529 if (unlikely(gpu.cmd_len > 0))
532 ret = gpu.status.reg;
533 log_io("gpu_read_status %08x\n", ret);
539 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
540 uint32_t ulStatus; // current gpu status
541 uint32_t ulControl[256]; // latest control register values
542 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
545 long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
553 memcpy(freeze->psxVRam, gpu.vram, sizeof(gpu.vram));
554 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
555 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
556 freeze->ulStatus = gpu.status.reg;
559 memcpy(gpu.vram, freeze->psxVRam, sizeof(gpu.vram));
560 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
561 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
562 gpu.status.reg = freeze->ulStatus;
563 for (i = 8; i > 0; i--) {
564 gpu.regs[i] ^= 1; // avoid reg change detection
565 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
567 renderer_sync_ecmds(gpu.ex_regs);
568 renderer_update_caches(0, 0, 1024, 512);
575 void GPUupdateLace(void)
579 renderer_flush_queues();
581 if (gpu.status.blanking || !gpu.state.fb_dirty)
584 if (gpu.frameskip.set) {
585 if (!gpu.frameskip.frame_ready) {
586 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
588 gpu.frameskip.active = 0;
590 gpu.frameskip.frame_ready = 0;
594 gpu.state.fb_dirty = 0;
597 void GPUvBlank(int is_vblank, int lcf)
599 int interlace = gpu.state.allow_interlace
600 && gpu.status.interlace && gpu.status.dheight;
601 // interlace doesn't look nice on progressive displays,
602 // so we have this "auto" mode here for games that don't read vram
603 if (gpu.state.allow_interlace == 2
604 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
608 if (interlace || interlace != gpu.state.old_interlace) {
609 gpu.state.old_interlace = interlace;
613 renderer_flush_queues();
614 renderer_set_interlace(interlace, !lcf);
618 #include "../../frontend/plugin_lib.h"
620 void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
622 gpu.frameskip.set = cbs->frameskip;
623 gpu.frameskip.advice = &cbs->fskip_advice;
624 gpu.frameskip.active = 0;
625 gpu.frameskip.frame_ready = 1;
626 gpu.state.hcnt = cbs->gpu_hcnt;
627 gpu.state.frame_count = cbs->gpu_frame_count;
628 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
630 if (cbs->pl_vout_set_raw_vram)
631 cbs->pl_vout_set_raw_vram(gpu.vram);
632 renderer_set_config(cbs);
633 vout_set_config(cbs);
636 // vim:shiftwidth=2:expandtab