2 * (C) GraÅžvydas "notaz" Ignotas, 2011
4 * This work is licensed under the terms of any of these licenses
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
15 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
16 #define unlikely(x) __builtin_expect((x), 0)
17 #define noinline __attribute__((noinline))
19 #define gpu_log(fmt, ...) \
20 printf("%d:%03d: " fmt, *gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__)
22 //#define log_io gpu_log
24 //#define log_anomaly gpu_log
25 #define log_anomaly(...)
27 struct psx_gpu gpu __attribute__((aligned(2048)));
29 static noinline void do_reset(void)
31 memset(gpu.regs, 0, sizeof(gpu.regs));
32 gpu.status.reg = 0x14802000;
35 gpu.screen.hres = gpu.screen.w = 256;
36 gpu.screen.vres = gpu.screen.h = 240;
39 static noinline void update_width(void)
41 int sw = gpu.screen.x2 - gpu.screen.x1;
42 if (sw <= 0 || sw >= 2560)
44 gpu.screen.w = gpu.screen.hres;
46 gpu.screen.w = sw * gpu.screen.hres / 2560;
49 static noinline void update_height(void)
51 int sh = gpu.screen.y2 - gpu.screen.y1;
52 if (gpu.status.dheight)
60 static noinline void decide_frameskip(void)
62 if (gpu.frameskip.active)
65 gpu.frameskip.cnt = 0;
66 gpu.frameskip.frame_ready = 1;
69 if (!gpu.frameskip.active && *gpu.frameskip.advice)
70 gpu.frameskip.active = 1;
71 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
72 gpu.frameskip.active = 1;
74 gpu.frameskip.active = 0;
77 static noinline void decide_frameskip_allow(uint32_t cmd_e3)
79 // no frameskip if it decides to draw to display area,
80 // but not for interlace since it'll most likely always do that
81 uint32_t x = cmd_e3 & 0x3ff;
82 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
83 gpu.frameskip.allow = gpu.status.interlace ||
84 (uint32_t)(x - gpu.screen.x) >= (uint32_t)gpu.screen.w ||
85 (uint32_t)(y - gpu.screen.y) >= (uint32_t)gpu.screen.h;
88 static noinline void get_gpu_info(uint32_t data)
90 switch (data & 0x0f) {
95 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
98 gpu.gp0 = gpu.ex_regs[5] & 0xfffff;
113 ret |= renderer_init();
115 gpu.state.frame_count = &gpu.zero;
116 gpu.state.hcnt = &gpu.zero;
121 long GPUshutdown(void)
123 return vout_finish();
126 void GPUwriteStatus(uint32_t data)
128 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
129 static const short vres[4] = { 240, 480, 256, 480 };
130 uint32_t cmd = data >> 24;
132 if (cmd < ARRAY_SIZE(gpu.regs)) {
133 if (cmd != 0 && cmd != 5 && gpu.regs[cmd] == data)
135 gpu.regs[cmd] = data;
138 gpu.state.fb_dirty = 1;
145 gpu.status.blanking = data & 1;
148 gpu.status.dma = data & 3;
151 gpu.screen.x = data & 0x3ff;
152 gpu.screen.y = (data >> 10) & 0x3ff;
153 if (gpu.frameskip.set) {
154 decide_frameskip_allow(gpu.ex_regs[3]);
155 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
157 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
162 gpu.screen.x1 = data & 0xfff;
163 gpu.screen.x2 = (data >> 12) & 0xfff;
167 gpu.screen.y1 = data & 0x3ff;
168 gpu.screen.y2 = (data >> 10) & 0x3ff;
172 gpu.status.reg = (gpu.status.reg & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
173 gpu.screen.hres = hres[(gpu.status.reg >> 16) & 7];
174 gpu.screen.vres = vres[(gpu.status.reg >> 19) & 3];
179 if ((cmd & 0xf0) == 0x10)
185 const unsigned char cmd_lengths[256] =
187 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
188 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
189 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
190 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
191 2, 2, 2, 2, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, // 40
192 3, 3, 3, 3, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4,
193 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 2, 2, 2, 2, // 60
194 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
195 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
196 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
197 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
198 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
199 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
200 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
201 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
202 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
205 #define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
207 static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
209 uint16_t *vram = VRAM_MEM_XY(x, y);
211 memcpy(mem, vram, l * 2);
213 memcpy(vram, mem, l * 2);
216 static int do_vram_io(uint32_t *data, int count, int is_read)
218 int count_initial = count;
219 uint16_t *sdata = (uint16_t *)data;
220 int x = gpu.dma.x, y = gpu.dma.y;
221 int w = gpu.dma.w, h = gpu.dma.h;
222 int o = gpu.dma.offset;
224 count *= 2; // operate in 16bpp pixels
226 if (gpu.dma.offset) {
227 l = w - gpu.dma.offset;
231 do_vram_line(x + o, y, sdata, l, is_read);
244 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
246 do_vram_line(x, y, sdata, w, is_read);
249 if (h > 0 && count > 0) {
251 do_vram_line(x, y, sdata, count, is_read);
259 return count_initial - count / 2;
262 static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
265 log_anomaly("start_vram_transfer while old unfinished\n");
267 gpu.dma.x = pos_word & 1023;
268 gpu.dma.y = (pos_word >> 16) & 511;
269 gpu.dma.w = size_word & 0xffff; // ?
270 gpu.dma.h = size_word >> 16;
273 renderer_flush_queues();
276 // XXX: wrong for width 1
277 memcpy(&gpu.gp0, VRAM_MEM_XY(gpu.dma.x, gpu.dma.y), 4);
280 renderer_invalidate_caches(gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
283 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
284 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
287 static int check_cmd(uint32_t *data, int count)
289 int len, cmd, start, pos;
293 for (start = pos = 0; pos < count; )
299 pos += do_vram_io(data + pos, count - pos, 0);
305 // do look-ahead pass to detect SR changes and VRAM i/o
306 while (pos < count) {
307 uint32_t *list = data + pos;
309 len = 1 + cmd_lengths[cmd];
311 //printf(" %3d: %02x %d\n", pos, cmd, len);
312 if ((cmd & 0xf4) == 0x24) {
313 // flat textured prim
314 gpu.ex_regs[1] &= ~0x1ff;
315 gpu.ex_regs[1] |= list[4] & 0x1ff;
317 else if ((cmd & 0xf4) == 0x34) {
318 // shaded textured prim
319 gpu.ex_regs[1] &= ~0x1ff;
320 gpu.ex_regs[1] |= list[5] & 0x1ff;
322 else if (cmd == 0xe3)
323 decide_frameskip_allow(list[0]);
325 if (2 <= cmd && cmd < 0xc0)
327 else if ((cmd & 0xf8) == 0xe0)
328 gpu.ex_regs[cmd & 7] = list[0];
330 if (pos + len > count) {
332 break; // incomplete cmd
334 if (cmd == 0xa0 || cmd == 0xc0)
339 if (pos - start > 0) {
340 if (!gpu.frameskip.active || !gpu.frameskip.allow)
341 do_cmd_list(data + start, pos - start);
345 if (cmd == 0xa0 || cmd == 0xc0) {
346 // consume vram write/read cmd
347 start_vram_transfer(data[pos + 1], data[pos + 2], cmd == 0xc0);
354 gpu.status.reg &= ~0x1fff;
355 gpu.status.reg |= gpu.ex_regs[1] & 0x7ff;
356 gpu.status.reg |= (gpu.ex_regs[6] & 3) << 11;
358 if (gpu.frameskip.active)
359 renderer_sync_ecmds(gpu.ex_regs);
360 gpu.state.fb_dirty |= vram_dirty;
365 void flush_cmd_buffer(void)
367 int left = check_cmd(gpu.cmd_buffer, gpu.cmd_len);
369 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
373 void GPUwriteDataMem(uint32_t *mem, int count)
377 log_io("gpu_dma_write %p %d\n", mem, count);
379 if (unlikely(gpu.cmd_len > 0))
382 left = check_cmd(mem, count);
384 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
387 void GPUwriteData(uint32_t data)
389 log_io("gpu_write %08x\n", data);
390 gpu.cmd_buffer[gpu.cmd_len++] = data;
391 if (gpu.cmd_len >= CMD_BUFFER_LEN)
395 long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
397 uint32_t addr, *list;
398 uint32_t *llist_entry = NULL;
399 int len, left, count;
402 if (unlikely(gpu.cmd_len > 0))
405 // ff7 sends it's main list twice, detect this
406 if (*gpu.state.frame_count == gpu.state.last_list.frame &&
407 *gpu.state.hcnt - gpu.state.last_list.hcnt <= 1 &&
408 gpu.state.last_list.cycles > 2048)
410 llist_entry = rambase + (gpu.state.last_list.addr & 0x1fffff) / 4;
411 *llist_entry |= 0x800000;
414 log_io("gpu_dma_chain\n");
415 addr = start_addr & 0xffffff;
416 for (count = 0; addr != 0xffffff; count++)
418 list = rambase + (addr & 0x1fffff) / 4;
420 addr = list[0] & 0xffffff;
423 cpu_cycles += 5 + len;
425 log_io(".chain %08x #%d\n", (list - rambase) * 4, len);
427 // loop detection marker
428 // (bit23 set causes DMA error on real machine, so
429 // unlikely to be ever set by the game)
433 left = check_cmd(list + 1, len);
435 log_anomaly("GPUdmaChain: discarded %d/%d words\n", left, len);
442 // remove loop detection markers
443 addr = start_addr & 0x1fffff;
444 while (count-- > 0) {
445 list = rambase + addr / 4;
446 addr = list[0] & 0x1fffff;
447 list[0] &= ~0x800000;
450 *llist_entry &= ~0x800000;
452 gpu.state.last_list.frame = *gpu.state.frame_count;
453 gpu.state.last_list.hcnt = *gpu.state.hcnt;
454 gpu.state.last_list.cycles = cpu_cycles;
455 gpu.state.last_list.addr = start_addr;
460 void GPUreadDataMem(uint32_t *mem, int count)
462 log_io("gpu_dma_read %p %d\n", mem, count);
464 if (unlikely(gpu.cmd_len > 0))
468 do_vram_io(mem, count, 1);
471 uint32_t GPUreadData(void)
475 if (unlikely(gpu.cmd_len > 0))
480 do_vram_io(&ret, 1, 1);
482 log_io("gpu_read %08x\n", ret);
486 uint32_t GPUreadStatus(void)
490 if (unlikely(gpu.cmd_len > 0))
493 ret = gpu.status.reg;
494 log_io("gpu_read_status %08x\n", ret);
500 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
501 uint32_t ulStatus; // current gpu status
502 uint32_t ulControl[256]; // latest control register values
503 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
506 long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
514 memcpy(freeze->psxVRam, gpu.vram, sizeof(gpu.vram));
515 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
516 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
517 freeze->ulStatus = gpu.status.reg;
520 renderer_invalidate_caches(0, 0, 1024, 512);
521 memcpy(gpu.vram, freeze->psxVRam, sizeof(gpu.vram));
522 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
523 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
524 gpu.status.reg = freeze->ulStatus;
525 for (i = 8; i > 0; i--) {
526 gpu.regs[i] ^= 1; // avoid reg change detection
527 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
529 renderer_sync_ecmds(gpu.ex_regs);
536 // vim:shiftwidth=2:expandtab