2 * (C) GraÅžvydas "notaz" Ignotas, 2011
4 * This work is licensed under the terms of any of these licenses
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
15 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
16 #define unlikely(x) __builtin_expect((x), 0)
17 #define noinline __attribute__((noinline))
19 #define gpu_log(fmt, ...) \
20 printf("%d:%03d: " fmt, gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__)
22 //#define log_io gpu_log
24 #define log_anomaly gpu_log
25 //#define log_anomaly(...)
27 struct psx_gpu gpu __attribute__((aligned(64)));
29 static noinline void do_reset(void)
31 memset(gpu.regs, 0, sizeof(gpu.regs));
32 gpu.status.reg = 0x14802000;
35 gpu.screen.hres = gpu.screen.w = 256;
36 gpu.screen.vres = gpu.screen.h = 240;
39 static noinline void update_width(void)
41 int sw = gpu.screen.x2 - gpu.screen.x1;
42 if (sw <= 0 || sw >= 2560)
44 gpu.screen.w = gpu.screen.hres;
46 gpu.screen.w = sw * gpu.screen.hres / 2560;
49 static noinline void update_height(void)
51 int sh = gpu.screen.y2 - gpu.screen.y1;
52 if (gpu.status.dheight)
60 static noinline void decide_frameskip(void)
62 gpu.frameskip.frame_ready = !gpu.frameskip.active;
64 if (!gpu.frameskip.active && *gpu.frameskip.advice)
65 gpu.frameskip.active = 1;
67 gpu.frameskip.active = 0;
70 static noinline void get_gpu_info(uint32_t data)
72 switch (data & 0x0f) {
77 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
80 gpu.gp0 = gpu.ex_regs[5] & 0xfffff;
93 int ret = vout_init();
95 gpu.lcf_hc = &gpu.zero;
96 gpu.state.frame_count = 0;
97 gpu.state.hcnt = &gpu.zero;
101 long GPUshutdown(void)
103 return vout_finish();
106 void GPUwriteStatus(uint32_t data)
108 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
109 static const short vres[4] = { 240, 480, 256, 480 };
110 uint32_t cmd = data >> 24;
112 if (cmd < ARRAY_SIZE(gpu.regs)) {
113 if (cmd != 0 && gpu.regs[cmd] == data)
115 gpu.regs[cmd] = data;
118 gpu.state.fb_dirty = 1;
125 gpu.status.blanking = data & 1;
128 gpu.status.dma = data & 3;
131 gpu.screen.x = data & 0x3ff;
132 gpu.screen.y = (data >> 10) & 0x3ff;
133 if (gpu.frameskip.enabled)
137 gpu.screen.x1 = data & 0xfff;
138 gpu.screen.x2 = (data >> 12) & 0xfff;
142 gpu.screen.y1 = data & 0x3ff;
143 gpu.screen.y2 = (data >> 10) & 0x3ff;
147 gpu.status.reg = (gpu.status.reg & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
148 gpu.screen.hres = hres[(gpu.status.reg >> 16) & 7];
149 gpu.screen.vres = vres[(gpu.status.reg >> 19) & 3];
154 if ((cmd & 0xf0) == 0x10)
160 const unsigned char cmd_lengths[256] =
162 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
163 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
164 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
165 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
166 2, 2, 2, 2, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, // 40
167 3, 3, 3, 3, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4,
168 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 2, 2, 2, 2, // 60
169 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
170 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
171 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
172 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
173 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
174 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
175 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
176 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
177 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
180 #define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
182 static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
184 uint16_t *vram = VRAM_MEM_XY(x, y);
186 memcpy(mem, vram, l * 2);
188 memcpy(vram, mem, l * 2);
191 static int do_vram_io(uint32_t *data, int count, int is_read)
193 int count_initial = count;
194 uint16_t *sdata = (uint16_t *)data;
195 int x = gpu.dma.x, y = gpu.dma.y;
196 int w = gpu.dma.w, h = gpu.dma.h;
197 int o = gpu.dma.offset;
199 count *= 2; // operate in 16bpp pixels
201 if (gpu.dma.offset) {
202 l = w - gpu.dma.offset;
206 do_vram_line(x + o, y, sdata, l, is_read);
219 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
221 do_vram_line(x, y, sdata, w, is_read);
224 if (h > 0 && count > 0) {
226 do_vram_line(x, y, sdata, count, is_read);
234 return count_initial - count / 2;
237 static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
240 log_anomaly("start_vram_transfer while old unfinished\n");
242 gpu.dma.x = pos_word & 1023;
243 gpu.dma.y = (pos_word >> 16) & 511;
244 gpu.dma.w = size_word & 0xffff; // ?
245 gpu.dma.h = size_word >> 16;
251 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
252 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
255 static int check_cmd(uint32_t *data, int count)
257 int len, cmd, start, pos;
261 for (start = pos = 0; pos < count; )
267 pos += do_vram_io(data + pos, count - pos, 0);
273 // do look-ahead pass to detect SR changes and VRAM i/o
274 while (pos < count) {
275 uint32_t *list = data + pos;
277 len = 1 + cmd_lengths[cmd];
279 //printf(" %3d: %02x %d\n", pos, cmd, len);
280 if ((cmd & 0xf4) == 0x24) {
281 // flat textured prim
282 gpu.status.reg &= ~0x1ff;
283 gpu.status.reg |= list[4] & 0x1ff;
285 else if ((cmd & 0xf4) == 0x34) {
286 // shaded textured prim
287 gpu.status.reg &= ~0x1ff;
288 gpu.status.reg |= list[5] & 0x1ff;
293 gpu.status.reg &= ~0x7ff;
294 gpu.status.reg |= list[0] & 0x7ff;
297 gpu.status.reg &= ~0x1800;
298 gpu.status.reg |= (list[0] & 3) << 11;
301 if (2 <= cmd && cmd < 0xc0)
303 else if ((cmd & 0xf8) == 0xe0)
304 gpu.ex_regs[cmd & 7] = list[0];
306 if (pos + len > count) {
308 break; // incomplete cmd
310 if (cmd == 0xa0 || cmd == 0xc0)
315 if (pos - start > 0) {
316 if (!gpu.frameskip.active)
317 do_cmd_list(data + start, pos - start);
321 if (cmd == 0xa0 || cmd == 0xc0) {
322 // consume vram write/read cmd
323 start_vram_transfer(data[pos + 1], data[pos + 2], cmd == 0xc0);
331 gpu.state.fb_dirty |= vram_dirty;
336 static void flush_cmd_buffer(void)
338 int left = check_cmd(gpu.cmd_buffer, gpu.cmd_len);
340 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
344 void GPUwriteDataMem(uint32_t *mem, int count)
348 log_io("gpu_dma_write %p %d\n", mem, count);
350 if (unlikely(gpu.cmd_len > 0))
353 left = check_cmd(mem, count);
355 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
358 void GPUwriteData(uint32_t data)
360 log_io("gpu_write %08x\n", data);
361 gpu.cmd_buffer[gpu.cmd_len++] = data;
362 if (gpu.cmd_len >= CMD_BUFFER_LEN)
366 long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
368 uint32_t addr, *list;
369 uint32_t *llist_entry = NULL;
370 int len, left, count;
373 if (unlikely(gpu.cmd_len > 0))
376 // ff7 sends it's main list twice, detect this
377 if (gpu.state.frame_count == gpu.state.last_list.frame &&
378 *gpu.state.hcnt - gpu.state.last_list.hcnt <= 1 &&
379 gpu.state.last_list.words > 1024)
381 llist_entry = rambase + (gpu.state.last_list.addr & 0x1fffff) / 4;
382 *llist_entry |= 0x800000;
385 log_io("gpu_dma_chain\n");
386 addr = start_addr & 0xffffff;
387 for (count = 0; addr != 0xffffff; count++)
389 list = rambase + (addr & 0x1fffff) / 4;
391 addr = list[0] & 0xffffff;
392 dma_words += 1 + len;
394 log_io(".chain %08x #%d\n", (list - rambase) * 4, len);
396 // loop detection marker
397 // (bit23 set causes DMA error on real machine, so
398 // unlikely to be ever set by the game)
402 left = check_cmd(list + 1, len);
404 log_anomaly("GPUdmaChain: discarded %d/%d words\n", left, len);
411 // remove loop detection markers
412 addr = start_addr & 0x1fffff;
413 while (count-- > 0) {
414 list = rambase + addr / 4;
415 addr = list[0] & 0x1fffff;
416 list[0] &= ~0x800000;
419 *llist_entry &= ~0x800000;
421 gpu.state.last_list.frame = gpu.state.frame_count;
422 gpu.state.last_list.hcnt = *gpu.state.hcnt;
423 gpu.state.last_list.words = dma_words;
424 gpu.state.last_list.addr = start_addr;
429 void GPUreadDataMem(uint32_t *mem, int count)
431 log_io("gpu_dma_read %p %d\n", mem, count);
433 if (unlikely(gpu.cmd_len > 0))
437 do_vram_io(mem, count, 1);
440 uint32_t GPUreadData(void)
442 log_io("gpu_read\n");
444 if (unlikely(gpu.cmd_len > 0))
448 do_vram_io(&gpu.gp0, 1, 1);
453 uint32_t GPUreadStatus(void)
457 if (unlikely(gpu.cmd_len > 0))
460 ret = gpu.status.reg | (*gpu.lcf_hc << 31);
461 log_io("gpu_read_status %08x\n", ret);
465 typedef struct GPUFREEZETAG
467 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
468 uint32_t ulStatus; // current gpu status
469 uint32_t ulControl[256]; // latest control register values
470 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
473 long GPUfreeze(uint32_t type, GPUFreeze_t *freeze)
481 memcpy(freeze->psxVRam, gpu.vram, sizeof(gpu.vram));
482 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
483 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
484 freeze->ulStatus = gpu.status.reg;
487 memcpy(gpu.vram, freeze->psxVRam, sizeof(gpu.vram));
488 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
489 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
490 gpu.status.reg = freeze->ulStatus;
491 for (i = 8; i > 0; i--) {
492 gpu.regs[i] ^= 1; // avoid reg change detection
493 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
501 void GPUvBlank(int val, uint32_t *hcnt)
503 gpu.lcf_hc = &gpu.zero;
504 if (gpu.status.interlace) {
514 gpu.state.frame_count++;
516 gpu.state.hcnt = hcnt;
519 // vim:shiftwidth=2:expandtab