2 * (C) GraÅžvydas "notaz" Ignotas, 2011
4 * This work is licensed under the terms of any of these licenses
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
15 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
16 #define unlikely(x) __builtin_expect((x), 0)
17 #define noinline __attribute__((noinline))
19 #define gpu_log(fmt, ...) \
20 printf("%d:%03d: " fmt, gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__)
22 //#define log_io gpu_log
24 //#define log_anomaly gpu_log
25 #define log_anomaly(...)
27 struct psx_gpu gpu __attribute__((aligned(64)));
29 static noinline void do_reset(void)
31 memset(gpu.regs, 0, sizeof(gpu.regs));
32 gpu.status.reg = 0x14802000;
35 gpu.screen.hres = gpu.screen.w = 256;
36 gpu.screen.vres = gpu.screen.h = 240;
39 static noinline void update_width(void)
41 int sw = gpu.screen.x2 - gpu.screen.x1;
42 if (sw <= 0 || sw >= 2560)
44 gpu.screen.w = gpu.screen.hres;
46 gpu.screen.w = sw * gpu.screen.hres / 2560;
49 static noinline void update_height(void)
51 int sh = gpu.screen.y2 - gpu.screen.y1;
52 if (gpu.status.dheight)
60 static noinline void decide_frameskip(void)
62 gpu.frameskip.frame_ready = !gpu.frameskip.active;
64 if (!gpu.frameskip.active && *gpu.frameskip.advice)
65 gpu.frameskip.active = 1;
67 gpu.frameskip.active = 0;
70 static noinline void get_gpu_info(uint32_t data)
72 switch (data & 0x0f) {
77 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
80 gpu.gp0 = gpu.ex_regs[5] & 0xfffff;
95 ret |= renderer_init();
97 gpu.lcf_hc = &gpu.zero;
98 gpu.state.frame_count = 0;
99 gpu.state.hcnt = &gpu.zero;
104 long GPUshutdown(void)
106 return vout_finish();
109 void GPUwriteStatus(uint32_t data)
111 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
112 static const short vres[4] = { 240, 480, 256, 480 };
113 uint32_t cmd = data >> 24;
115 if (cmd < ARRAY_SIZE(gpu.regs)) {
116 if (cmd != 0 && cmd != 5 && gpu.regs[cmd] == data)
118 gpu.regs[cmd] = data;
121 gpu.state.fb_dirty = 1;
128 gpu.status.blanking = data & 1;
131 gpu.status.dma = data & 3;
134 gpu.screen.x = data & 0x3ff;
135 gpu.screen.y = (data >> 10) & 0x3ff;
136 if (gpu.frameskip.enabled)
140 gpu.screen.x1 = data & 0xfff;
141 gpu.screen.x2 = (data >> 12) & 0xfff;
145 gpu.screen.y1 = data & 0x3ff;
146 gpu.screen.y2 = (data >> 10) & 0x3ff;
150 gpu.status.reg = (gpu.status.reg & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
151 gpu.screen.hres = hres[(gpu.status.reg >> 16) & 7];
152 gpu.screen.vres = vres[(gpu.status.reg >> 19) & 3];
157 if ((cmd & 0xf0) == 0x10)
163 const unsigned char cmd_lengths[256] =
165 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
166 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
167 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
168 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
169 2, 2, 2, 2, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, // 40
170 3, 3, 3, 3, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4,
171 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 2, 2, 2, 2, // 60
172 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
173 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
174 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
175 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
176 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
177 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
178 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
179 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
180 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
183 #define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
185 static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
187 uint16_t *vram = VRAM_MEM_XY(x, y);
189 memcpy(mem, vram, l * 2);
191 memcpy(vram, mem, l * 2);
194 static int do_vram_io(uint32_t *data, int count, int is_read)
196 int count_initial = count;
197 uint16_t *sdata = (uint16_t *)data;
198 int x = gpu.dma.x, y = gpu.dma.y;
199 int w = gpu.dma.w, h = gpu.dma.h;
200 int o = gpu.dma.offset;
202 count *= 2; // operate in 16bpp pixels
204 if (gpu.dma.offset) {
205 l = w - gpu.dma.offset;
209 do_vram_line(x + o, y, sdata, l, is_read);
222 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
224 do_vram_line(x, y, sdata, w, is_read);
227 if (h > 0 && count > 0) {
229 do_vram_line(x, y, sdata, count, is_read);
237 return count_initial - count / 2;
240 static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
243 log_anomaly("start_vram_transfer while old unfinished\n");
245 gpu.dma.x = pos_word & 1023;
246 gpu.dma.y = (pos_word >> 16) & 511;
247 gpu.dma.w = size_word & 0xffff; // ?
248 gpu.dma.h = size_word >> 16;
254 renderer_invalidate_caches(gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
256 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
257 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
260 static int check_cmd(uint32_t *data, int count)
262 int len, cmd, start, pos;
266 for (start = pos = 0; pos < count; )
272 pos += do_vram_io(data + pos, count - pos, 0);
278 // do look-ahead pass to detect SR changes and VRAM i/o
279 while (pos < count) {
280 uint32_t *list = data + pos;
282 len = 1 + cmd_lengths[cmd];
284 //printf(" %3d: %02x %d\n", pos, cmd, len);
285 if ((cmd & 0xf4) == 0x24) {
286 // flat textured prim
287 gpu.status.reg &= ~0x1ff;
288 gpu.status.reg |= list[4] & 0x1ff;
290 else if ((cmd & 0xf4) == 0x34) {
291 // shaded textured prim
292 gpu.status.reg &= ~0x1ff;
293 gpu.status.reg |= list[5] & 0x1ff;
298 gpu.status.reg &= ~0x7ff;
299 gpu.status.reg |= list[0] & 0x7ff;
302 gpu.status.reg &= ~0x1800;
303 gpu.status.reg |= (list[0] & 3) << 11;
306 if (2 <= cmd && cmd < 0xc0)
308 else if ((cmd & 0xf8) == 0xe0)
309 gpu.ex_regs[cmd & 7] = list[0];
311 if (pos + len > count) {
313 break; // incomplete cmd
315 if (cmd == 0xa0 || cmd == 0xc0)
320 if (pos - start > 0) {
321 if (!gpu.frameskip.active)
322 do_cmd_list(data + start, pos - start);
326 if (cmd == 0xa0 || cmd == 0xc0) {
327 // consume vram write/read cmd
328 start_vram_transfer(data[pos + 1], data[pos + 2], cmd == 0xc0);
336 gpu.state.fb_dirty |= vram_dirty;
341 static void flush_cmd_buffer(void)
343 int left = check_cmd(gpu.cmd_buffer, gpu.cmd_len);
345 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
349 void GPUwriteDataMem(uint32_t *mem, int count)
353 log_io("gpu_dma_write %p %d\n", mem, count);
355 if (unlikely(gpu.cmd_len > 0))
358 left = check_cmd(mem, count);
360 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
363 void GPUwriteData(uint32_t data)
365 log_io("gpu_write %08x\n", data);
366 gpu.cmd_buffer[gpu.cmd_len++] = data;
367 if (gpu.cmd_len >= CMD_BUFFER_LEN)
371 long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
373 uint32_t addr, *list;
374 uint32_t *llist_entry = NULL;
375 int len, left, count;
378 if (unlikely(gpu.cmd_len > 0))
381 // ff7 sends it's main list twice, detect this
382 if (gpu.state.frame_count == gpu.state.last_list.frame &&
383 *gpu.state.hcnt - gpu.state.last_list.hcnt <= 1 &&
384 gpu.state.last_list.words > 1024)
386 llist_entry = rambase + (gpu.state.last_list.addr & 0x1fffff) / 4;
387 *llist_entry |= 0x800000;
390 log_io("gpu_dma_chain\n");
391 addr = start_addr & 0xffffff;
392 for (count = 0; addr != 0xffffff; count++)
394 list = rambase + (addr & 0x1fffff) / 4;
396 addr = list[0] & 0xffffff;
397 dma_words += 1 + len;
399 log_io(".chain %08x #%d\n", (list - rambase) * 4, len);
401 // loop detection marker
402 // (bit23 set causes DMA error on real machine, so
403 // unlikely to be ever set by the game)
407 left = check_cmd(list + 1, len);
409 log_anomaly("GPUdmaChain: discarded %d/%d words\n", left, len);
416 // remove loop detection markers
417 addr = start_addr & 0x1fffff;
418 while (count-- > 0) {
419 list = rambase + addr / 4;
420 addr = list[0] & 0x1fffff;
421 list[0] &= ~0x800000;
424 *llist_entry &= ~0x800000;
426 gpu.state.last_list.frame = gpu.state.frame_count;
427 gpu.state.last_list.hcnt = *gpu.state.hcnt;
428 gpu.state.last_list.words = dma_words;
429 gpu.state.last_list.addr = start_addr;
434 void GPUreadDataMem(uint32_t *mem, int count)
436 log_io("gpu_dma_read %p %d\n", mem, count);
438 if (unlikely(gpu.cmd_len > 0))
442 do_vram_io(mem, count, 1);
445 uint32_t GPUreadData(void)
447 log_io("gpu_read\n");
449 if (unlikely(gpu.cmd_len > 0))
453 do_vram_io(&gpu.gp0, 1, 1);
458 uint32_t GPUreadStatus(void)
462 if (unlikely(gpu.cmd_len > 0))
465 ret = gpu.status.reg | (*gpu.lcf_hc << 31);
466 log_io("gpu_read_status %08x\n", ret);
470 typedef struct GPUFREEZETAG
472 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
473 uint32_t ulStatus; // current gpu status
474 uint32_t ulControl[256]; // latest control register values
475 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
478 long GPUfreeze(uint32_t type, GPUFreeze_t *freeze)
486 memcpy(freeze->psxVRam, gpu.vram, sizeof(gpu.vram));
487 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
488 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
489 freeze->ulStatus = gpu.status.reg;
492 renderer_invalidate_caches(0, 0, 1024, 512);
493 memcpy(gpu.vram, freeze->psxVRam, sizeof(gpu.vram));
494 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
495 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
496 gpu.status.reg = freeze->ulStatus;
497 for (i = 8; i > 0; i--) {
498 gpu.regs[i] ^= 1; // avoid reg change detection
499 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
507 void GPUvBlank(int val, uint32_t *hcnt)
509 gpu.lcf_hc = &gpu.zero;
510 if (gpu.status.interlace) {
520 gpu.state.frame_count++;
522 gpu.state.hcnt = hcnt;
525 // vim:shiftwidth=2:expandtab