2 * (C) GraÅžvydas "notaz" Ignotas, 2011
4 * This work is licensed under the terms of any of these licenses
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
15 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
16 #define unlikely(x) __builtin_expect((x), 0)
17 #define noinline __attribute__((noinline))
19 #define gpu_log(fmt, ...) \
20 printf("%d:%03d: " fmt, gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__)
22 //#define log_io gpu_log
24 //#define log_anomaly gpu_log
25 #define log_anomaly(...)
27 struct psx_gpu gpu __attribute__((aligned(64)));
29 static noinline void do_reset(void)
31 memset(gpu.regs, 0, sizeof(gpu.regs));
32 gpu.status.reg = 0x14802000;
35 gpu.screen.hres = gpu.screen.w = 256;
36 gpu.screen.vres = gpu.screen.h = 240;
39 static noinline void update_width(void)
41 int sw = gpu.screen.x2 - gpu.screen.x1;
42 if (sw <= 0 || sw >= 2560)
44 gpu.screen.w = gpu.screen.hres;
46 gpu.screen.w = sw * gpu.screen.hres / 2560;
49 static noinline void update_height(void)
51 int sh = gpu.screen.y2 - gpu.screen.y1;
52 if (gpu.status.dheight)
60 static noinline void decide_frameskip(void)
62 gpu.frameskip.frame_ready = !gpu.frameskip.active;
64 if (!gpu.frameskip.active && (*gpu.frameskip.advice || gpu.frameskip.set == 1))
65 gpu.frameskip.active = 1;
67 gpu.frameskip.active = 0;
70 static noinline void get_gpu_info(uint32_t data)
72 switch (data & 0x0f) {
77 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
80 gpu.gp0 = gpu.ex_regs[5] & 0xfffff;
95 ret |= renderer_init();
97 gpu.state.frame_count = 0;
98 gpu.state.hcnt = &gpu.zero;
103 long GPUshutdown(void)
105 return vout_finish();
108 void GPUwriteStatus(uint32_t data)
110 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
111 static const short vres[4] = { 240, 480, 256, 480 };
112 uint32_t cmd = data >> 24;
114 if (cmd < ARRAY_SIZE(gpu.regs)) {
115 if (cmd != 0 && cmd != 5 && gpu.regs[cmd] == data)
117 gpu.regs[cmd] = data;
120 gpu.state.fb_dirty = 1;
127 gpu.status.blanking = data & 1;
130 gpu.status.dma = data & 3;
133 gpu.screen.x = data & 0x3ff;
134 gpu.screen.y = (data >> 10) & 0x3ff;
135 if (gpu.frameskip.set)
139 gpu.screen.x1 = data & 0xfff;
140 gpu.screen.x2 = (data >> 12) & 0xfff;
144 gpu.screen.y1 = data & 0x3ff;
145 gpu.screen.y2 = (data >> 10) & 0x3ff;
149 gpu.status.reg = (gpu.status.reg & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
150 gpu.screen.hres = hres[(gpu.status.reg >> 16) & 7];
151 gpu.screen.vres = vres[(gpu.status.reg >> 19) & 3];
156 if ((cmd & 0xf0) == 0x10)
162 const unsigned char cmd_lengths[256] =
164 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
165 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
166 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
167 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
168 2, 2, 2, 2, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, // 40
169 3, 3, 3, 3, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4,
170 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 2, 2, 2, 2, // 60
171 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
172 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
173 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
174 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
175 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
176 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
177 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
178 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
179 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
182 #define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
184 static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
186 uint16_t *vram = VRAM_MEM_XY(x, y);
188 memcpy(mem, vram, l * 2);
190 memcpy(vram, mem, l * 2);
193 static int do_vram_io(uint32_t *data, int count, int is_read)
195 int count_initial = count;
196 uint16_t *sdata = (uint16_t *)data;
197 int x = gpu.dma.x, y = gpu.dma.y;
198 int w = gpu.dma.w, h = gpu.dma.h;
199 int o = gpu.dma.offset;
201 count *= 2; // operate in 16bpp pixels
203 if (gpu.dma.offset) {
204 l = w - gpu.dma.offset;
208 do_vram_line(x + o, y, sdata, l, is_read);
221 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
223 do_vram_line(x, y, sdata, w, is_read);
226 if (h > 0 && count > 0) {
228 do_vram_line(x, y, sdata, count, is_read);
236 return count_initial - count / 2;
239 static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
242 log_anomaly("start_vram_transfer while old unfinished\n");
244 gpu.dma.x = pos_word & 1023;
245 gpu.dma.y = (pos_word >> 16) & 511;
246 gpu.dma.w = size_word & 0xffff; // ?
247 gpu.dma.h = size_word >> 16;
253 renderer_invalidate_caches(gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
255 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
256 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
259 static int check_cmd(uint32_t *data, int count)
261 int len, cmd, start, pos;
265 for (start = pos = 0; pos < count; )
271 pos += do_vram_io(data + pos, count - pos, 0);
277 // do look-ahead pass to detect SR changes and VRAM i/o
278 while (pos < count) {
279 uint32_t *list = data + pos;
281 len = 1 + cmd_lengths[cmd];
283 //printf(" %3d: %02x %d\n", pos, cmd, len);
284 if ((cmd & 0xf4) == 0x24) {
285 // flat textured prim
286 gpu.status.reg &= ~0x1ff;
287 gpu.status.reg |= list[4] & 0x1ff;
289 else if ((cmd & 0xf4) == 0x34) {
290 // shaded textured prim
291 gpu.status.reg &= ~0x1ff;
292 gpu.status.reg |= list[5] & 0x1ff;
297 gpu.status.reg &= ~0x7ff;
298 gpu.status.reg |= list[0] & 0x7ff;
301 gpu.status.reg &= ~0x1800;
302 gpu.status.reg |= (list[0] & 3) << 11;
305 if (2 <= cmd && cmd < 0xc0)
307 else if ((cmd & 0xf8) == 0xe0)
308 gpu.ex_regs[cmd & 7] = list[0];
310 if (pos + len > count) {
312 break; // incomplete cmd
314 if (cmd == 0xa0 || cmd == 0xc0)
319 if (pos - start > 0) {
320 if (!gpu.frameskip.active)
321 do_cmd_list(data + start, pos - start);
325 if (cmd == 0xa0 || cmd == 0xc0) {
326 // consume vram write/read cmd
327 start_vram_transfer(data[pos + 1], data[pos + 2], cmd == 0xc0);
334 if (gpu.frameskip.active)
335 renderer_sync_ecmds(gpu.ex_regs);
336 gpu.state.fb_dirty |= vram_dirty;
341 static void flush_cmd_buffer(void)
343 int left = check_cmd(gpu.cmd_buffer, gpu.cmd_len);
345 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
349 void GPUwriteDataMem(uint32_t *mem, int count)
353 log_io("gpu_dma_write %p %d\n", mem, count);
355 if (unlikely(gpu.cmd_len > 0))
358 left = check_cmd(mem, count);
360 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
363 void GPUwriteData(uint32_t data)
365 log_io("gpu_write %08x\n", data);
366 gpu.cmd_buffer[gpu.cmd_len++] = data;
367 if (gpu.cmd_len >= CMD_BUFFER_LEN)
371 long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
373 uint32_t addr, *list;
374 uint32_t *llist_entry = NULL;
375 int len, left, count;
378 if (unlikely(gpu.cmd_len > 0))
381 // ff7 sends it's main list twice, detect this
382 if (gpu.state.frame_count == gpu.state.last_list.frame &&
383 *gpu.state.hcnt - gpu.state.last_list.hcnt <= 1 &&
384 gpu.state.last_list.words > 1024)
386 llist_entry = rambase + (gpu.state.last_list.addr & 0x1fffff) / 4;
387 *llist_entry |= 0x800000;
390 log_io("gpu_dma_chain\n");
391 addr = start_addr & 0xffffff;
392 for (count = 0; addr != 0xffffff; count++)
394 list = rambase + (addr & 0x1fffff) / 4;
396 addr = list[0] & 0xffffff;
397 dma_words += 1 + len;
399 log_io(".chain %08x #%d\n", (list - rambase) * 4, len);
401 // loop detection marker
402 // (bit23 set causes DMA error on real machine, so
403 // unlikely to be ever set by the game)
407 left = check_cmd(list + 1, len);
409 log_anomaly("GPUdmaChain: discarded %d/%d words\n", left, len);
416 // remove loop detection markers
417 addr = start_addr & 0x1fffff;
418 while (count-- > 0) {
419 list = rambase + addr / 4;
420 addr = list[0] & 0x1fffff;
421 list[0] &= ~0x800000;
424 *llist_entry &= ~0x800000;
426 gpu.state.last_list.frame = gpu.state.frame_count;
427 gpu.state.last_list.hcnt = *gpu.state.hcnt;
428 gpu.state.last_list.words = dma_words;
429 gpu.state.last_list.addr = start_addr;
434 void GPUreadDataMem(uint32_t *mem, int count)
436 log_io("gpu_dma_read %p %d\n", mem, count);
438 if (unlikely(gpu.cmd_len > 0))
442 do_vram_io(mem, count, 1);
445 uint32_t GPUreadData(void)
447 log_io("gpu_read\n");
449 if (unlikely(gpu.cmd_len > 0))
453 do_vram_io(&gpu.gp0, 1, 1);
458 uint32_t GPUreadStatus(void)
462 if (unlikely(gpu.cmd_len > 0))
465 ret = gpu.status.reg;
466 log_io("gpu_read_status %08x\n", ret);
470 typedef struct GPUFREEZETAG
472 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
473 uint32_t ulStatus; // current gpu status
474 uint32_t ulControl[256]; // latest control register values
475 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
478 long GPUfreeze(uint32_t type, GPUFreeze_t *freeze)
486 memcpy(freeze->psxVRam, gpu.vram, sizeof(gpu.vram));
487 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
488 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
489 freeze->ulStatus = gpu.status.reg;
492 renderer_invalidate_caches(0, 0, 1024, 512);
493 memcpy(gpu.vram, freeze->psxVRam, sizeof(gpu.vram));
494 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
495 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
496 gpu.status.reg = freeze->ulStatus;
497 for (i = 8; i > 0; i--) {
498 gpu.regs[i] ^= 1; // avoid reg change detection
499 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
501 renderer_sync_ecmds(gpu.ex_regs);
508 // vim:shiftwidth=2:expandtab