2 * (C) GraÅžvydas "notaz" Ignotas, 2011
4 * This work is licensed under the terms of any of these licenses
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
15 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
16 #define unlikely(x) __builtin_expect((x), 0)
17 #define noinline __attribute__((noinline))
19 #define gpu_log(fmt, ...) \
20 printf("%d:%03d: " fmt, *gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__)
22 //#define log_io gpu_log
24 //#define log_anomaly gpu_log
25 #define log_anomaly(...)
27 struct psx_gpu gpu __attribute__((aligned(64)));
29 static noinline void do_reset(void)
31 memset(gpu.regs, 0, sizeof(gpu.regs));
32 gpu.status.reg = 0x14802000;
35 gpu.screen.hres = gpu.screen.w = 256;
36 gpu.screen.vres = gpu.screen.h = 240;
39 static noinline void update_width(void)
41 int sw = gpu.screen.x2 - gpu.screen.x1;
42 if (sw <= 0 || sw >= 2560)
44 gpu.screen.w = gpu.screen.hres;
46 gpu.screen.w = sw * gpu.screen.hres / 2560;
49 static noinline void update_height(void)
51 int sh = gpu.screen.y2 - gpu.screen.y1;
52 if (gpu.status.dheight)
60 static noinline void decide_frameskip(void)
62 gpu.frameskip.frame_ready = !gpu.frameskip.active;
64 if (!gpu.frameskip.active && (*gpu.frameskip.advice || gpu.frameskip.set == 1))
65 gpu.frameskip.active = 1;
67 gpu.frameskip.active = 0;
70 static noinline void get_gpu_info(uint32_t data)
72 switch (data & 0x0f) {
77 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
80 gpu.gp0 = gpu.ex_regs[5] & 0xfffff;
95 ret |= renderer_init();
97 gpu.state.frame_count = &gpu.zero;
98 gpu.state.hcnt = &gpu.zero;
103 long GPUshutdown(void)
105 return vout_finish();
108 void GPUwriteStatus(uint32_t data)
110 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
111 static const short vres[4] = { 240, 480, 256, 480 };
112 uint32_t cmd = data >> 24;
114 if (cmd < ARRAY_SIZE(gpu.regs)) {
115 if (cmd != 0 && cmd != 5 && gpu.regs[cmd] == data)
117 gpu.regs[cmd] = data;
120 gpu.state.fb_dirty = 1;
127 gpu.status.blanking = data & 1;
130 gpu.status.dma = data & 3;
133 gpu.screen.x = data & 0x3ff;
134 gpu.screen.y = (data >> 10) & 0x3ff;
135 if (gpu.frameskip.set)
139 gpu.screen.x1 = data & 0xfff;
140 gpu.screen.x2 = (data >> 12) & 0xfff;
144 gpu.screen.y1 = data & 0x3ff;
145 gpu.screen.y2 = (data >> 10) & 0x3ff;
149 gpu.status.reg = (gpu.status.reg & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
150 gpu.screen.hres = hres[(gpu.status.reg >> 16) & 7];
151 gpu.screen.vres = vres[(gpu.status.reg >> 19) & 3];
156 if ((cmd & 0xf0) == 0x10)
162 const unsigned char cmd_lengths[256] =
164 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
165 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
166 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
167 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
168 2, 2, 2, 2, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, // 40
169 3, 3, 3, 3, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4,
170 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 2, 2, 2, 2, // 60
171 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
172 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
173 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
174 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
175 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
176 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
177 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
178 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
179 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
182 #define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
184 static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
186 uint16_t *vram = VRAM_MEM_XY(x, y);
188 memcpy(mem, vram, l * 2);
190 memcpy(vram, mem, l * 2);
193 static int do_vram_io(uint32_t *data, int count, int is_read)
195 int count_initial = count;
196 uint16_t *sdata = (uint16_t *)data;
197 int x = gpu.dma.x, y = gpu.dma.y;
198 int w = gpu.dma.w, h = gpu.dma.h;
199 int o = gpu.dma.offset;
201 count *= 2; // operate in 16bpp pixels
203 if (gpu.dma.offset) {
204 l = w - gpu.dma.offset;
208 do_vram_line(x + o, y, sdata, l, is_read);
221 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
223 do_vram_line(x, y, sdata, w, is_read);
226 if (h > 0 && count > 0) {
228 do_vram_line(x, y, sdata, count, is_read);
236 return count_initial - count / 2;
239 static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
242 log_anomaly("start_vram_transfer while old unfinished\n");
244 gpu.dma.x = pos_word & 1023;
245 gpu.dma.y = (pos_word >> 16) & 511;
246 gpu.dma.w = size_word & 0xffff; // ?
247 gpu.dma.h = size_word >> 16;
253 renderer_invalidate_caches(gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
255 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
256 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
259 static int check_cmd(uint32_t *data, int count)
261 int len, cmd, start, pos;
265 for (start = pos = 0; pos < count; )
271 pos += do_vram_io(data + pos, count - pos, 0);
277 // do look-ahead pass to detect SR changes and VRAM i/o
278 while (pos < count) {
279 uint32_t *list = data + pos;
281 len = 1 + cmd_lengths[cmd];
283 //printf(" %3d: %02x %d\n", pos, cmd, len);
284 if ((cmd & 0xf4) == 0x24) {
285 // flat textured prim
286 gpu.ex_regs[1] &= ~0x1ff;
287 gpu.ex_regs[1] |= list[4] & 0x1ff;
289 else if ((cmd & 0xf4) == 0x34) {
290 // shaded textured prim
291 gpu.ex_regs[1] &= ~0x1ff;
292 gpu.ex_regs[1] |= list[5] & 0x1ff;
294 if (2 <= cmd && cmd < 0xc0)
296 else if ((cmd & 0xf8) == 0xe0)
297 gpu.ex_regs[cmd & 7] = list[0];
299 if (pos + len > count) {
301 break; // incomplete cmd
303 if (cmd == 0xa0 || cmd == 0xc0)
308 if (pos - start > 0) {
309 if (!gpu.frameskip.active)
310 do_cmd_list(data + start, pos - start);
314 if (cmd == 0xa0 || cmd == 0xc0) {
315 // consume vram write/read cmd
316 start_vram_transfer(data[pos + 1], data[pos + 2], cmd == 0xc0);
323 gpu.status.reg &= ~0x1fff;
324 gpu.status.reg |= gpu.ex_regs[1] & 0x7ff;
325 gpu.status.reg |= (gpu.ex_regs[6] & 3) << 11;
327 if (gpu.frameskip.active)
328 renderer_sync_ecmds(gpu.ex_regs);
329 gpu.state.fb_dirty |= vram_dirty;
334 static void flush_cmd_buffer(void)
336 int left = check_cmd(gpu.cmd_buffer, gpu.cmd_len);
338 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
342 void GPUwriteDataMem(uint32_t *mem, int count)
346 log_io("gpu_dma_write %p %d\n", mem, count);
348 if (unlikely(gpu.cmd_len > 0))
351 left = check_cmd(mem, count);
353 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
356 void GPUwriteData(uint32_t data)
358 log_io("gpu_write %08x\n", data);
359 gpu.cmd_buffer[gpu.cmd_len++] = data;
360 if (gpu.cmd_len >= CMD_BUFFER_LEN)
364 long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
366 uint32_t addr, *list;
367 uint32_t *llist_entry = NULL;
368 int len, left, count;
371 if (unlikely(gpu.cmd_len > 0))
374 // ff7 sends it's main list twice, detect this
375 if (*gpu.state.frame_count == gpu.state.last_list.frame &&
376 *gpu.state.hcnt - gpu.state.last_list.hcnt <= 1 &&
377 gpu.state.last_list.words > 1024)
379 llist_entry = rambase + (gpu.state.last_list.addr & 0x1fffff) / 4;
380 *llist_entry |= 0x800000;
383 log_io("gpu_dma_chain\n");
384 addr = start_addr & 0xffffff;
385 for (count = 0; addr != 0xffffff; count++)
387 list = rambase + (addr & 0x1fffff) / 4;
389 addr = list[0] & 0xffffff;
390 dma_words += 1 + len;
392 log_io(".chain %08x #%d\n", (list - rambase) * 4, len);
394 // loop detection marker
395 // (bit23 set causes DMA error on real machine, so
396 // unlikely to be ever set by the game)
400 left = check_cmd(list + 1, len);
402 log_anomaly("GPUdmaChain: discarded %d/%d words\n", left, len);
409 // remove loop detection markers
410 addr = start_addr & 0x1fffff;
411 while (count-- > 0) {
412 list = rambase + addr / 4;
413 addr = list[0] & 0x1fffff;
414 list[0] &= ~0x800000;
417 *llist_entry &= ~0x800000;
419 gpu.state.last_list.frame = *gpu.state.frame_count;
420 gpu.state.last_list.hcnt = *gpu.state.hcnt;
421 gpu.state.last_list.words = dma_words;
422 gpu.state.last_list.addr = start_addr;
427 void GPUreadDataMem(uint32_t *mem, int count)
429 log_io("gpu_dma_read %p %d\n", mem, count);
431 if (unlikely(gpu.cmd_len > 0))
435 do_vram_io(mem, count, 1);
438 uint32_t GPUreadData(void)
440 log_io("gpu_read\n");
442 if (unlikely(gpu.cmd_len > 0))
446 do_vram_io(&gpu.gp0, 1, 1);
451 uint32_t GPUreadStatus(void)
455 if (unlikely(gpu.cmd_len > 0))
458 ret = gpu.status.reg;
459 log_io("gpu_read_status %08x\n", ret);
463 typedef struct GPUFREEZETAG
465 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
466 uint32_t ulStatus; // current gpu status
467 uint32_t ulControl[256]; // latest control register values
468 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
471 long GPUfreeze(uint32_t type, GPUFreeze_t *freeze)
479 memcpy(freeze->psxVRam, gpu.vram, sizeof(gpu.vram));
480 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
481 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
482 freeze->ulStatus = gpu.status.reg;
485 renderer_invalidate_caches(0, 0, 1024, 512);
486 memcpy(gpu.vram, freeze->psxVRam, sizeof(gpu.vram));
487 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
488 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
489 gpu.status.reg = freeze->ulStatus;
490 for (i = 8; i > 0; i--) {
491 gpu.regs[i] ^= 1; // avoid reg change detection
492 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
494 renderer_sync_ecmds(gpu.ex_regs);
501 // vim:shiftwidth=2:expandtab