2 * (C) GraÅžvydas "notaz" Ignotas, 2011
4 * This work is licensed under the terms of any of these licenses
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
15 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
16 #define unlikely(x) __builtin_expect((x), 0)
17 #define noinline __attribute__((noinline))
19 #define gpu_log(fmt, ...) \
20 printf("%d:%03d: " fmt, gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__)
22 //#define log_io gpu_log
24 //#define log_anomaly gpu_log
25 #define log_anomaly(...)
27 struct psx_gpu gpu __attribute__((aligned(64)));
29 static noinline void do_reset(void)
31 memset(gpu.regs, 0, sizeof(gpu.regs));
32 gpu.status.reg = 0x14802000;
35 gpu.screen.hres = gpu.screen.w = 256;
36 gpu.screen.vres = gpu.screen.h = 240;
39 static noinline void update_width(void)
41 int sw = gpu.screen.x2 - gpu.screen.x1;
42 if (sw <= 0 || sw >= 2560)
44 gpu.screen.w = gpu.screen.hres;
46 gpu.screen.w = sw * gpu.screen.hres / 2560;
49 static noinline void update_height(void)
51 int sh = gpu.screen.y2 - gpu.screen.y1;
52 if (gpu.status.dheight)
60 static noinline void decide_frameskip(void)
62 gpu.frameskip.frame_ready = !gpu.frameskip.active;
64 if (!gpu.frameskip.active && *gpu.frameskip.advice)
65 gpu.frameskip.active = 1;
67 gpu.frameskip.active = 0;
70 static noinline void get_gpu_info(uint32_t data)
72 switch (data & 0x0f) {
77 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
80 gpu.gp0 = gpu.ex_regs[5] & 0xfffff;
95 ret |= renderer_init();
97 gpu.lcf_hc = &gpu.zero;
98 gpu.state.frame_count = 0;
99 gpu.state.hcnt = &gpu.zero;
104 long GPUshutdown(void)
106 return vout_finish();
109 void GPUwriteStatus(uint32_t data)
111 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
112 static const short vres[4] = { 240, 480, 256, 480 };
113 uint32_t cmd = data >> 24;
115 if (cmd < ARRAY_SIZE(gpu.regs)) {
116 if (cmd != 0 && cmd != 5 && gpu.regs[cmd] == data)
118 gpu.regs[cmd] = data;
121 gpu.state.fb_dirty = 1;
128 gpu.status.blanking = data & 1;
131 gpu.status.dma = data & 3;
134 gpu.screen.x = data & 0x3ff;
135 gpu.screen.y = (data >> 10) & 0x3ff;
136 if (gpu.frameskip.enabled)
140 gpu.screen.x1 = data & 0xfff;
141 gpu.screen.x2 = (data >> 12) & 0xfff;
145 gpu.screen.y1 = data & 0x3ff;
146 gpu.screen.y2 = (data >> 10) & 0x3ff;
150 gpu.status.reg = (gpu.status.reg & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
151 gpu.screen.hres = hres[(gpu.status.reg >> 16) & 7];
152 gpu.screen.vres = vres[(gpu.status.reg >> 19) & 3];
157 if ((cmd & 0xf0) == 0x10)
163 const unsigned char cmd_lengths[256] =
165 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
166 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
167 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
168 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
169 2, 2, 2, 2, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, // 40
170 3, 3, 3, 3, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4,
171 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 2, 2, 2, 2, // 60
172 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
173 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
174 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
175 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
176 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
177 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
178 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
179 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
180 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
183 #define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
185 static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
187 uint16_t *vram = VRAM_MEM_XY(x, y);
189 memcpy(mem, vram, l * 2);
191 memcpy(vram, mem, l * 2);
194 static int do_vram_io(uint32_t *data, int count, int is_read)
196 int count_initial = count;
197 uint16_t *sdata = (uint16_t *)data;
198 int x = gpu.dma.x, y = gpu.dma.y;
199 int w = gpu.dma.w, h = gpu.dma.h;
200 int o = gpu.dma.offset;
202 count *= 2; // operate in 16bpp pixels
204 if (gpu.dma.offset) {
205 l = w - gpu.dma.offset;
209 do_vram_line(x + o, y, sdata, l, is_read);
222 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
224 do_vram_line(x, y, sdata, w, is_read);
227 if (h > 0 && count > 0) {
229 do_vram_line(x, y, sdata, count, is_read);
237 return count_initial - count / 2;
240 static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
243 log_anomaly("start_vram_transfer while old unfinished\n");
245 gpu.dma.x = pos_word & 1023;
246 gpu.dma.y = (pos_word >> 16) & 511;
247 gpu.dma.w = size_word & 0xffff; // ?
248 gpu.dma.h = size_word >> 16;
254 renderer_invalidate_caches(gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
256 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
257 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
260 static int check_cmd(uint32_t *data, int count)
262 int len, cmd, start, pos;
266 for (start = pos = 0; pos < count; )
272 pos += do_vram_io(data + pos, count - pos, 0);
278 // do look-ahead pass to detect SR changes and VRAM i/o
279 while (pos < count) {
280 uint32_t *list = data + pos;
282 len = 1 + cmd_lengths[cmd];
284 //printf(" %3d: %02x %d\n", pos, cmd, len);
285 if ((cmd & 0xf4) == 0x24) {
286 // flat textured prim
287 gpu.status.reg &= ~0x1ff;
288 gpu.status.reg |= list[4] & 0x1ff;
290 else if ((cmd & 0xf4) == 0x34) {
291 // shaded textured prim
292 gpu.status.reg &= ~0x1ff;
293 gpu.status.reg |= list[5] & 0x1ff;
298 gpu.status.reg &= ~0x7ff;
299 gpu.status.reg |= list[0] & 0x7ff;
302 gpu.status.reg &= ~0x1800;
303 gpu.status.reg |= (list[0] & 3) << 11;
306 if (2 <= cmd && cmd < 0xc0)
308 else if ((cmd & 0xf8) == 0xe0)
309 gpu.ex_regs[cmd & 7] = list[0];
311 if (pos + len > count) {
313 break; // incomplete cmd
315 if (cmd == 0xa0 || cmd == 0xc0)
320 if (pos - start > 0) {
321 if (!gpu.frameskip.active)
322 do_cmd_list(data + start, pos - start);
326 if (cmd == 0xa0 || cmd == 0xc0) {
327 // consume vram write/read cmd
328 start_vram_transfer(data[pos + 1], data[pos + 2], cmd == 0xc0);
335 if (gpu.frameskip.active)
336 renderer_sync_ecmds(gpu.ex_regs);
337 gpu.state.fb_dirty |= vram_dirty;
342 static void flush_cmd_buffer(void)
344 int left = check_cmd(gpu.cmd_buffer, gpu.cmd_len);
346 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
350 void GPUwriteDataMem(uint32_t *mem, int count)
354 log_io("gpu_dma_write %p %d\n", mem, count);
356 if (unlikely(gpu.cmd_len > 0))
359 left = check_cmd(mem, count);
361 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
364 void GPUwriteData(uint32_t data)
366 log_io("gpu_write %08x\n", data);
367 gpu.cmd_buffer[gpu.cmd_len++] = data;
368 if (gpu.cmd_len >= CMD_BUFFER_LEN)
372 long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
374 uint32_t addr, *list;
375 uint32_t *llist_entry = NULL;
376 int len, left, count;
379 if (unlikely(gpu.cmd_len > 0))
382 // ff7 sends it's main list twice, detect this
383 if (gpu.state.frame_count == gpu.state.last_list.frame &&
384 *gpu.state.hcnt - gpu.state.last_list.hcnt <= 1 &&
385 gpu.state.last_list.words > 1024)
387 llist_entry = rambase + (gpu.state.last_list.addr & 0x1fffff) / 4;
388 *llist_entry |= 0x800000;
391 log_io("gpu_dma_chain\n");
392 addr = start_addr & 0xffffff;
393 for (count = 0; addr != 0xffffff; count++)
395 list = rambase + (addr & 0x1fffff) / 4;
397 addr = list[0] & 0xffffff;
398 dma_words += 1 + len;
400 log_io(".chain %08x #%d\n", (list - rambase) * 4, len);
402 // loop detection marker
403 // (bit23 set causes DMA error on real machine, so
404 // unlikely to be ever set by the game)
408 left = check_cmd(list + 1, len);
410 log_anomaly("GPUdmaChain: discarded %d/%d words\n", left, len);
417 // remove loop detection markers
418 addr = start_addr & 0x1fffff;
419 while (count-- > 0) {
420 list = rambase + addr / 4;
421 addr = list[0] & 0x1fffff;
422 list[0] &= ~0x800000;
425 *llist_entry &= ~0x800000;
427 gpu.state.last_list.frame = gpu.state.frame_count;
428 gpu.state.last_list.hcnt = *gpu.state.hcnt;
429 gpu.state.last_list.words = dma_words;
430 gpu.state.last_list.addr = start_addr;
435 void GPUreadDataMem(uint32_t *mem, int count)
437 log_io("gpu_dma_read %p %d\n", mem, count);
439 if (unlikely(gpu.cmd_len > 0))
443 do_vram_io(mem, count, 1);
446 uint32_t GPUreadData(void)
448 log_io("gpu_read\n");
450 if (unlikely(gpu.cmd_len > 0))
454 do_vram_io(&gpu.gp0, 1, 1);
459 uint32_t GPUreadStatus(void)
463 if (unlikely(gpu.cmd_len > 0))
466 ret = gpu.status.reg | (*gpu.lcf_hc << 31);
467 log_io("gpu_read_status %08x\n", ret);
471 typedef struct GPUFREEZETAG
473 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
474 uint32_t ulStatus; // current gpu status
475 uint32_t ulControl[256]; // latest control register values
476 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
479 long GPUfreeze(uint32_t type, GPUFreeze_t *freeze)
487 memcpy(freeze->psxVRam, gpu.vram, sizeof(gpu.vram));
488 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
489 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
490 freeze->ulStatus = gpu.status.reg;
493 renderer_invalidate_caches(0, 0, 1024, 512);
494 memcpy(gpu.vram, freeze->psxVRam, sizeof(gpu.vram));
495 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
496 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
497 gpu.status.reg = freeze->ulStatus;
498 for (i = 8; i > 0; i--) {
499 gpu.regs[i] ^= 1; // avoid reg change detection
500 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
502 renderer_sync_ecmds(gpu.ex_regs);
509 void GPUvBlank(int val, uint32_t *hcnt)
511 gpu.lcf_hc = &gpu.zero;
512 if (gpu.status.interlace) {
522 gpu.state.frame_count++;
524 gpu.state.hcnt = hcnt;
527 // vim:shiftwidth=2:expandtab