2 * (C) GraÅžvydas "notaz" Ignotas, 2011
4 * This work is licensed under the terms of any of these licenses
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
15 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
16 #define unlikely(x) __builtin_expect((x), 0)
17 #define noinline __attribute__((noinline))
19 #define gpu_log(fmt, ...) \
20 printf("%d:%03d: " fmt, *gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__)
22 //#define log_io gpu_log
24 //#define log_anomaly gpu_log
25 #define log_anomaly(...)
27 struct psx_gpu gpu __attribute__((aligned(64)));
29 static noinline void do_reset(void)
31 memset(gpu.regs, 0, sizeof(gpu.regs));
32 gpu.status.reg = 0x14802000;
35 gpu.screen.hres = gpu.screen.w = 256;
36 gpu.screen.vres = gpu.screen.h = 240;
39 static noinline void update_width(void)
41 int sw = gpu.screen.x2 - gpu.screen.x1;
42 if (sw <= 0 || sw >= 2560)
44 gpu.screen.w = gpu.screen.hres;
46 gpu.screen.w = sw * gpu.screen.hres / 2560;
49 static noinline void update_height(void)
51 int sh = gpu.screen.y2 - gpu.screen.y1;
52 if (gpu.status.dheight)
60 static noinline void decide_frameskip(void)
62 gpu.frameskip.frame_ready = !gpu.frameskip.active;
64 if (!gpu.frameskip.active && (*gpu.frameskip.advice || gpu.frameskip.set == 1))
65 gpu.frameskip.active = 1;
67 gpu.frameskip.active = 0;
70 static noinline void get_gpu_info(uint32_t data)
72 switch (data & 0x0f) {
77 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
80 gpu.gp0 = gpu.ex_regs[5] & 0xfffff;
95 ret |= renderer_init();
97 gpu.state.frame_count = &gpu.zero;
98 gpu.state.hcnt = &gpu.zero;
103 long GPUshutdown(void)
105 return vout_finish();
108 void GPUwriteStatus(uint32_t data)
110 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
111 static const short vres[4] = { 240, 480, 256, 480 };
112 uint32_t cmd = data >> 24;
114 if (cmd < ARRAY_SIZE(gpu.regs)) {
115 if (cmd != 0 && cmd != 5 && gpu.regs[cmd] == data)
117 gpu.regs[cmd] = data;
120 gpu.state.fb_dirty = 1;
127 gpu.status.blanking = data & 1;
130 gpu.status.dma = data & 3;
133 gpu.screen.x = data & 0x3ff;
134 gpu.screen.y = (data >> 10) & 0x3ff;
135 if (gpu.frameskip.set && gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
137 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
141 gpu.screen.x1 = data & 0xfff;
142 gpu.screen.x2 = (data >> 12) & 0xfff;
146 gpu.screen.y1 = data & 0x3ff;
147 gpu.screen.y2 = (data >> 10) & 0x3ff;
151 gpu.status.reg = (gpu.status.reg & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
152 gpu.screen.hres = hres[(gpu.status.reg >> 16) & 7];
153 gpu.screen.vres = vres[(gpu.status.reg >> 19) & 3];
158 if ((cmd & 0xf0) == 0x10)
164 const unsigned char cmd_lengths[256] =
166 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
167 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
168 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
169 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
170 2, 2, 2, 2, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, // 40
171 3, 3, 3, 3, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4,
172 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 2, 2, 2, 2, // 60
173 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
174 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
175 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
176 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
177 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
178 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
179 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
180 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
181 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
184 #define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
186 static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
188 uint16_t *vram = VRAM_MEM_XY(x, y);
190 memcpy(mem, vram, l * 2);
192 memcpy(vram, mem, l * 2);
195 static int do_vram_io(uint32_t *data, int count, int is_read)
197 int count_initial = count;
198 uint16_t *sdata = (uint16_t *)data;
199 int x = gpu.dma.x, y = gpu.dma.y;
200 int w = gpu.dma.w, h = gpu.dma.h;
201 int o = gpu.dma.offset;
203 count *= 2; // operate in 16bpp pixels
205 if (gpu.dma.offset) {
206 l = w - gpu.dma.offset;
210 do_vram_line(x + o, y, sdata, l, is_read);
223 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
225 do_vram_line(x, y, sdata, w, is_read);
228 if (h > 0 && count > 0) {
230 do_vram_line(x, y, sdata, count, is_read);
238 return count_initial - count / 2;
241 static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
244 log_anomaly("start_vram_transfer while old unfinished\n");
246 gpu.dma.x = pos_word & 1023;
247 gpu.dma.y = (pos_word >> 16) & 511;
248 gpu.dma.w = size_word & 0xffff; // ?
249 gpu.dma.h = size_word >> 16;
255 renderer_invalidate_caches(gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
257 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
258 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
261 static int check_cmd(uint32_t *data, int count)
263 int len, cmd, start, pos;
267 for (start = pos = 0; pos < count; )
273 pos += do_vram_io(data + pos, count - pos, 0);
279 // do look-ahead pass to detect SR changes and VRAM i/o
280 while (pos < count) {
281 uint32_t *list = data + pos;
283 len = 1 + cmd_lengths[cmd];
285 //printf(" %3d: %02x %d\n", pos, cmd, len);
286 if ((cmd & 0xf4) == 0x24) {
287 // flat textured prim
288 gpu.ex_regs[1] &= ~0x1ff;
289 gpu.ex_regs[1] |= list[4] & 0x1ff;
291 else if ((cmd & 0xf4) == 0x34) {
292 // shaded textured prim
293 gpu.ex_regs[1] &= ~0x1ff;
294 gpu.ex_regs[1] |= list[5] & 0x1ff;
296 else if (cmd == 0xe3)
298 // no frameskip if it decides to draw to display area,
299 // but not for interlace since it'll most likely always do that
300 uint32_t x = list[0] & 0x3ff;
301 uint32_t y = (list[0] >> 10) & 0x3ff;
302 gpu.frameskip.allow = gpu.status.interlace ||
303 (uint32_t)(x - gpu.screen.x) >= (uint32_t)gpu.screen.w ||
304 (uint32_t)(y - gpu.screen.y) >= (uint32_t)gpu.screen.h;
306 if (2 <= cmd && cmd < 0xc0)
308 else if ((cmd & 0xf8) == 0xe0)
309 gpu.ex_regs[cmd & 7] = list[0];
311 if (pos + len > count) {
313 break; // incomplete cmd
315 if (cmd == 0xa0 || cmd == 0xc0)
320 if (pos - start > 0) {
321 if (!gpu.frameskip.active || !gpu.frameskip.allow)
322 do_cmd_list(data + start, pos - start);
326 if (cmd == 0xa0 || cmd == 0xc0) {
327 // consume vram write/read cmd
328 start_vram_transfer(data[pos + 1], data[pos + 2], cmd == 0xc0);
335 gpu.status.reg &= ~0x1fff;
336 gpu.status.reg |= gpu.ex_regs[1] & 0x7ff;
337 gpu.status.reg |= (gpu.ex_regs[6] & 3) << 11;
339 if (gpu.frameskip.active)
340 renderer_sync_ecmds(gpu.ex_regs);
341 gpu.state.fb_dirty |= vram_dirty;
346 static void flush_cmd_buffer(void)
348 int left = check_cmd(gpu.cmd_buffer, gpu.cmd_len);
350 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
354 void GPUwriteDataMem(uint32_t *mem, int count)
358 log_io("gpu_dma_write %p %d\n", mem, count);
360 if (unlikely(gpu.cmd_len > 0))
363 left = check_cmd(mem, count);
365 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
368 void GPUwriteData(uint32_t data)
370 log_io("gpu_write %08x\n", data);
371 gpu.cmd_buffer[gpu.cmd_len++] = data;
372 if (gpu.cmd_len >= CMD_BUFFER_LEN)
376 long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
378 uint32_t addr, *list;
379 uint32_t *llist_entry = NULL;
380 int len, left, count;
383 if (unlikely(gpu.cmd_len > 0))
386 // ff7 sends it's main list twice, detect this
387 if (*gpu.state.frame_count == gpu.state.last_list.frame &&
388 *gpu.state.hcnt - gpu.state.last_list.hcnt <= 1 &&
389 gpu.state.last_list.words > 1024)
391 llist_entry = rambase + (gpu.state.last_list.addr & 0x1fffff) / 4;
392 *llist_entry |= 0x800000;
395 log_io("gpu_dma_chain\n");
396 addr = start_addr & 0xffffff;
397 for (count = 0; addr != 0xffffff; count++)
399 list = rambase + (addr & 0x1fffff) / 4;
401 addr = list[0] & 0xffffff;
402 dma_words += 1 + len;
404 log_io(".chain %08x #%d\n", (list - rambase) * 4, len);
406 // loop detection marker
407 // (bit23 set causes DMA error on real machine, so
408 // unlikely to be ever set by the game)
412 left = check_cmd(list + 1, len);
414 log_anomaly("GPUdmaChain: discarded %d/%d words\n", left, len);
421 // remove loop detection markers
422 addr = start_addr & 0x1fffff;
423 while (count-- > 0) {
424 list = rambase + addr / 4;
425 addr = list[0] & 0x1fffff;
426 list[0] &= ~0x800000;
429 *llist_entry &= ~0x800000;
431 gpu.state.last_list.frame = *gpu.state.frame_count;
432 gpu.state.last_list.hcnt = *gpu.state.hcnt;
433 gpu.state.last_list.words = dma_words;
434 gpu.state.last_list.addr = start_addr;
439 void GPUreadDataMem(uint32_t *mem, int count)
441 log_io("gpu_dma_read %p %d\n", mem, count);
443 if (unlikely(gpu.cmd_len > 0))
447 do_vram_io(mem, count, 1);
450 uint32_t GPUreadData(void)
452 log_io("gpu_read\n");
454 if (unlikely(gpu.cmd_len > 0))
458 do_vram_io(&gpu.gp0, 1, 1);
463 uint32_t GPUreadStatus(void)
467 if (unlikely(gpu.cmd_len > 0))
470 ret = gpu.status.reg;
471 log_io("gpu_read_status %08x\n", ret);
477 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
478 uint32_t ulStatus; // current gpu status
479 uint32_t ulControl[256]; // latest control register values
480 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
483 long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
491 memcpy(freeze->psxVRam, gpu.vram, sizeof(gpu.vram));
492 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
493 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
494 freeze->ulStatus = gpu.status.reg;
497 renderer_invalidate_caches(0, 0, 1024, 512);
498 memcpy(gpu.vram, freeze->psxVRam, sizeof(gpu.vram));
499 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
500 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
501 gpu.status.reg = freeze->ulStatus;
502 for (i = 8; i > 0; i--) {
503 gpu.regs[i] ^= 1; // avoid reg change detection
504 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
506 renderer_sync_ecmds(gpu.ex_regs);
513 // vim:shiftwidth=2:expandtab