2 * (C) GraÅžvydas "notaz" Ignotas, 2011
4 * This work is licensed under the terms of any of these licenses
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
15 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
16 #define unlikely(x) __builtin_expect((x), 0)
17 #define noinline __attribute__((noinline))
19 #define gpu_log(fmt, ...) \
20 printf("%d:%03d: " fmt, *gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__)
22 //#define log_io gpu_log
24 //#define log_anomaly gpu_log
25 #define log_anomaly(...)
27 struct psx_gpu gpu __attribute__((aligned(2048)));
29 static noinline void do_reset(void)
31 memset(gpu.regs, 0, sizeof(gpu.regs));
32 gpu.status.reg = 0x14802000;
35 gpu.screen.hres = gpu.screen.w = 256;
36 gpu.screen.vres = gpu.screen.h = 240;
39 static noinline void update_width(void)
41 int sw = gpu.screen.x2 - gpu.screen.x1;
42 if (sw <= 0 || sw >= 2560)
44 gpu.screen.w = gpu.screen.hres;
46 gpu.screen.w = sw * gpu.screen.hres / 2560;
49 static noinline void update_height(void)
51 int sh = gpu.screen.y2 - gpu.screen.y1;
52 if (gpu.status.dheight)
60 static noinline void decide_frameskip(void)
62 if (gpu.frameskip.active)
65 gpu.frameskip.cnt = 0;
66 gpu.frameskip.frame_ready = 1;
69 if (!gpu.frameskip.active && *gpu.frameskip.advice)
70 gpu.frameskip.active = 1;
71 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
72 gpu.frameskip.active = 1;
74 gpu.frameskip.active = 0;
77 static noinline void decide_frameskip_allow(uint32_t cmd_e3)
79 // no frameskip if it decides to draw to display area,
80 // but not for interlace since it'll most likely always do that
81 uint32_t x = cmd_e3 & 0x3ff;
82 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
83 gpu.frameskip.allow = gpu.status.interlace ||
84 (uint32_t)(x - gpu.screen.x) >= (uint32_t)gpu.screen.w ||
85 (uint32_t)(y - gpu.screen.y) >= (uint32_t)gpu.screen.h;
88 static noinline void get_gpu_info(uint32_t data)
90 switch (data & 0x0f) {
95 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
98 gpu.gp0 = gpu.ex_regs[5] & 0xfffff;
113 ret |= renderer_init();
115 gpu.state.frame_count = &gpu.zero;
116 gpu.state.hcnt = &gpu.zero;
121 long GPUshutdown(void)
123 return vout_finish();
126 void GPUwriteStatus(uint32_t data)
128 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
129 static const short vres[4] = { 240, 480, 256, 480 };
130 uint32_t cmd = data >> 24;
132 if (cmd < ARRAY_SIZE(gpu.regs)) {
133 if (cmd != 0 && cmd != 5 && gpu.regs[cmd] == data)
135 gpu.regs[cmd] = data;
138 gpu.state.fb_dirty = 1;
145 gpu.status.blanking = data & 1;
148 gpu.status.dma = data & 3;
151 gpu.screen.x = data & 0x3ff;
152 gpu.screen.y = (data >> 10) & 0x3ff;
153 if (gpu.frameskip.set) {
154 decide_frameskip_allow(gpu.ex_regs[3]);
155 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
157 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
162 gpu.screen.x1 = data & 0xfff;
163 gpu.screen.x2 = (data >> 12) & 0xfff;
167 gpu.screen.y1 = data & 0x3ff;
168 gpu.screen.y2 = (data >> 10) & 0x3ff;
172 gpu.status.reg = (gpu.status.reg & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
173 gpu.screen.hres = hres[(gpu.status.reg >> 16) & 7];
174 gpu.screen.vres = vres[(gpu.status.reg >> 19) & 3];
179 if ((cmd & 0xf0) == 0x10)
185 const unsigned char cmd_lengths[256] =
187 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
188 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
189 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
190 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
191 2, 2, 2, 2, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, // 40
192 3, 3, 3, 3, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4,
193 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 2, 2, 2, 2, // 60
194 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
195 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
196 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
197 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
198 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
199 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
200 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
201 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
202 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
205 #define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
207 static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
209 uint16_t *vram = VRAM_MEM_XY(x, y);
211 memcpy(mem, vram, l * 2);
213 memcpy(vram, mem, l * 2);
216 static int do_vram_io(uint32_t *data, int count, int is_read)
218 int count_initial = count;
219 uint16_t *sdata = (uint16_t *)data;
220 int x = gpu.dma.x, y = gpu.dma.y;
221 int w = gpu.dma.w, h = gpu.dma.h;
222 int o = gpu.dma.offset;
224 count *= 2; // operate in 16bpp pixels
226 if (gpu.dma.offset) {
227 l = w - gpu.dma.offset;
231 do_vram_line(x + o, y, sdata, l, is_read);
244 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
246 do_vram_line(x, y, sdata, w, is_read);
249 if (h > 0 && count > 0) {
251 do_vram_line(x, y, sdata, count, is_read);
259 return count_initial - count / 2;
262 static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
265 log_anomaly("start_vram_transfer while old unfinished\n");
267 gpu.dma.x = pos_word & 1023;
268 gpu.dma.y = (pos_word >> 16) & 511;
269 gpu.dma.w = size_word & 0xffff; // ?
270 gpu.dma.h = size_word >> 16;
276 renderer_flush_queues();
277 renderer_invalidate_caches(gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
280 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
281 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
284 static int check_cmd(uint32_t *data, int count)
286 int len, cmd, start, pos;
290 for (start = pos = 0; pos < count; )
296 pos += do_vram_io(data + pos, count - pos, 0);
302 // do look-ahead pass to detect SR changes and VRAM i/o
303 while (pos < count) {
304 uint32_t *list = data + pos;
306 len = 1 + cmd_lengths[cmd];
308 //printf(" %3d: %02x %d\n", pos, cmd, len);
309 if ((cmd & 0xf4) == 0x24) {
310 // flat textured prim
311 gpu.ex_regs[1] &= ~0x1ff;
312 gpu.ex_regs[1] |= list[4] & 0x1ff;
314 else if ((cmd & 0xf4) == 0x34) {
315 // shaded textured prim
316 gpu.ex_regs[1] &= ~0x1ff;
317 gpu.ex_regs[1] |= list[5] & 0x1ff;
319 else if (cmd == 0xe3)
320 decide_frameskip_allow(list[0]);
322 if (2 <= cmd && cmd < 0xc0)
324 else if ((cmd & 0xf8) == 0xe0)
325 gpu.ex_regs[cmd & 7] = list[0];
327 if (pos + len > count) {
329 break; // incomplete cmd
331 if (cmd == 0xa0 || cmd == 0xc0)
336 if (pos - start > 0) {
337 if (!gpu.frameskip.active || !gpu.frameskip.allow)
338 do_cmd_list(data + start, pos - start);
342 if (cmd == 0xa0 || cmd == 0xc0) {
343 // consume vram write/read cmd
344 start_vram_transfer(data[pos + 1], data[pos + 2], cmd == 0xc0);
351 gpu.status.reg &= ~0x1fff;
352 gpu.status.reg |= gpu.ex_regs[1] & 0x7ff;
353 gpu.status.reg |= (gpu.ex_regs[6] & 3) << 11;
355 if (gpu.frameskip.active)
356 renderer_sync_ecmds(gpu.ex_regs);
357 gpu.state.fb_dirty |= vram_dirty;
362 void flush_cmd_buffer(void)
364 int left = check_cmd(gpu.cmd_buffer, gpu.cmd_len);
366 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
370 void GPUwriteDataMem(uint32_t *mem, int count)
374 log_io("gpu_dma_write %p %d\n", mem, count);
376 if (unlikely(gpu.cmd_len > 0))
379 left = check_cmd(mem, count);
381 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
384 void GPUwriteData(uint32_t data)
386 log_io("gpu_write %08x\n", data);
387 gpu.cmd_buffer[gpu.cmd_len++] = data;
388 if (gpu.cmd_len >= CMD_BUFFER_LEN)
392 long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
394 uint32_t addr, *list;
395 uint32_t *llist_entry = NULL;
396 int len, left, count;
399 if (unlikely(gpu.cmd_len > 0))
402 // ff7 sends it's main list twice, detect this
403 if (*gpu.state.frame_count == gpu.state.last_list.frame &&
404 *gpu.state.hcnt - gpu.state.last_list.hcnt <= 1 &&
405 gpu.state.last_list.cycles > 2048)
407 llist_entry = rambase + (gpu.state.last_list.addr & 0x1fffff) / 4;
408 *llist_entry |= 0x800000;
411 log_io("gpu_dma_chain\n");
412 addr = start_addr & 0xffffff;
413 for (count = 0; addr != 0xffffff; count++)
415 list = rambase + (addr & 0x1fffff) / 4;
417 addr = list[0] & 0xffffff;
420 cpu_cycles += 5 + len;
422 log_io(".chain %08x #%d\n", (list - rambase) * 4, len);
424 // loop detection marker
425 // (bit23 set causes DMA error on real machine, so
426 // unlikely to be ever set by the game)
430 left = check_cmd(list + 1, len);
432 log_anomaly("GPUdmaChain: discarded %d/%d words\n", left, len);
439 // remove loop detection markers
440 addr = start_addr & 0x1fffff;
441 while (count-- > 0) {
442 list = rambase + addr / 4;
443 addr = list[0] & 0x1fffff;
444 list[0] &= ~0x800000;
447 *llist_entry &= ~0x800000;
449 gpu.state.last_list.frame = *gpu.state.frame_count;
450 gpu.state.last_list.hcnt = *gpu.state.hcnt;
451 gpu.state.last_list.cycles = cpu_cycles;
452 gpu.state.last_list.addr = start_addr;
457 void GPUreadDataMem(uint32_t *mem, int count)
459 log_io("gpu_dma_read %p %d\n", mem, count);
461 if (unlikely(gpu.cmd_len > 0))
465 do_vram_io(mem, count, 1);
468 uint32_t GPUreadData(void)
470 log_io("gpu_read\n");
472 if (unlikely(gpu.cmd_len > 0))
476 do_vram_io(&gpu.gp0, 1, 1);
481 uint32_t GPUreadStatus(void)
485 if (unlikely(gpu.cmd_len > 0))
488 ret = gpu.status.reg;
489 log_io("gpu_read_status %08x\n", ret);
495 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
496 uint32_t ulStatus; // current gpu status
497 uint32_t ulControl[256]; // latest control register values
498 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
501 long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
509 memcpy(freeze->psxVRam, gpu.vram, sizeof(gpu.vram));
510 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
511 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
512 freeze->ulStatus = gpu.status.reg;
515 renderer_invalidate_caches(0, 0, 1024, 512);
516 memcpy(gpu.vram, freeze->psxVRam, sizeof(gpu.vram));
517 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
518 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
519 gpu.status.reg = freeze->ulStatus;
520 for (i = 8; i > 0; i--) {
521 gpu.regs[i] ^= 1; // avoid reg change detection
522 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
524 renderer_sync_ecmds(gpu.ex_regs);
531 // vim:shiftwidth=2:expandtab