2 * (C) GraÅžvydas "notaz" Ignotas, 2011
4 * This work is licensed under the terms of any of these licenses
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
15 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
16 #define unlikely(x) __builtin_expect((x), 0)
17 #define noinline __attribute__((noinline))
19 //#define log_io printf
21 #define log_anomaly printf
23 struct psx_gpu gpu __attribute__((aligned(64)));
27 int ret = vout_init();
28 gpu.status.reg = 0x14802000;
29 gpu.lcf_hc = &gpu.zero;
33 long GPUshutdown(void)
38 static noinline void update_width(void)
40 int sw = gpu.screen.x2 - gpu.screen.x1;
41 if (sw <= 0 || sw >= 2560)
43 gpu.screen.w = gpu.screen.hres;
45 gpu.screen.w = sw * gpu.screen.hres / 2560;
48 static noinline void update_height(void)
50 int sh = gpu.screen.y2 - gpu.screen.y1;
51 if (gpu.status.dheight)
59 void GPUwriteStatus(uint32_t data)
61 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
62 static const short vres[4] = { 240, 480, 256, 480 };
63 uint32_t cmd = data >> 24;
65 if (cmd < ARRAY_SIZE(gpu.regs))
70 gpu.status.reg = 0x14802000;
71 gpu.status.blanking = 1;
74 gpu.status.blanking = data & 1;
77 gpu.status.dma = data & 3;
80 gpu.screen.x = data & 0x3ff;
81 gpu.screen.y = (data >> 10) & 0x3ff;
84 gpu.screen.x1 = data & 0xfff;
85 gpu.screen.x2 = (data >> 12) & 0xfff;
89 gpu.screen.y1 = data & 0x3ff;
90 gpu.screen.y2 = (data >> 10) & 0x3ff;
94 gpu.status.reg = (gpu.status.reg & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
95 gpu.screen.hres = hres[(gpu.status.reg >> 16) & 7];
96 gpu.screen.vres = vres[(gpu.status.reg >> 19) & 3];
103 const unsigned char cmd_lengths[256] =
105 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
106 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
107 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
108 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
109 2, 2, 2, 2, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, // 40
110 3, 3, 3, 3, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4,
111 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 2, 2, 2, 2, // 60
112 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
113 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
114 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
115 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
116 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
117 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
118 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
119 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
120 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
123 #define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
125 static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
127 uint16_t *vram = VRAM_MEM_XY(x, y);
129 memcpy(mem, vram, l * 2);
131 memcpy(vram, mem, l * 2);
134 static int do_vram_io(uint32_t *data, int count, int is_read)
136 int count_initial = count;
137 uint16_t *sdata = (uint16_t *)data;
138 int x = gpu.dma.x, y = gpu.dma.y;
139 int w = gpu.dma.w, h = gpu.dma.h;
140 int o = gpu.dma.offset;
142 count *= 2; // operate in 16bpp pixels
144 if (gpu.dma.offset) {
145 l = w - gpu.dma.offset;
149 do_vram_line(x + o, y, sdata, l, is_read);
162 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
164 do_vram_line(x, y, sdata, w, is_read);
167 if (h > 0 && count > 0) {
169 do_vram_line(x, y, sdata, count, is_read);
177 return count_initial - (count + 1) / 2;
180 static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
183 log_anomaly("start_vram_transfer while old unfinished\n");
185 gpu.dma.x = pos_word & 1023;
186 gpu.dma.y = (pos_word >> 16) & 511;
187 gpu.dma.w = size_word & 0xffff; // ?
188 gpu.dma.h = size_word >> 16;
194 //printf("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
195 // gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
198 static int check_cmd(uint32_t *data, int count)
200 int len, cmd, start, pos;
203 for (start = pos = 0; pos < count; )
209 pos += do_vram_io(data + pos, count - pos, 0);
215 // do look-ahead pass to detect SR changes and VRAM i/o
216 while (pos < count) {
217 uint32_t *list = data + pos;
219 len = 1 + cmd_lengths[cmd];
221 //printf(" %3d: %02x %d\n", pos, cmd, len);
222 if ((cmd & 0xf4) == 0x24) {
223 // flat textured prim
224 gpu.status.reg &= ~0x1ff;
225 gpu.status.reg |= list[4] & 0x1ff;
227 else if ((cmd & 0xf4) == 0x34) {
228 // shaded textured prim
229 gpu.status.reg &= ~0x1ff;
230 gpu.status.reg |= list[5] & 0x1ff;
235 gpu.status.reg &= ~0x7ff;
236 gpu.status.reg |= list[0] & 0x7ff;
239 gpu.status.reg &= ~0x1800;
240 gpu.status.reg |= (list[0] & 3) << 11;
244 if (pos + len > count) {
246 break; // incomplete cmd
248 if (cmd == 0xa0 || cmd == 0xc0)
253 if (pos - start > 0) {
254 do_cmd_list(data + start, pos - start);
258 if (cmd == 0xa0 || cmd == 0xc0) {
259 // consume vram write/read cmd
260 start_vram_transfer(data[pos + 1], data[pos + 2], cmd == 0xc0);
271 static void flush_cmd_buffer(void)
273 int left = check_cmd(gpu.cmd_buffer, gpu.cmd_len);
275 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
279 void GPUwriteDataMem(uint32_t *mem, int count)
283 log_io("gpu_dma_write %p %d\n", mem, count);
285 if (unlikely(gpu.cmd_len > 0))
288 left = check_cmd(mem, count);
290 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
293 void GPUwriteData(uint32_t data)
295 log_io("gpu_write %08x\n", data);
296 gpu.cmd_buffer[gpu.cmd_len++] = data;
297 if (gpu.cmd_len >= CMD_BUFFER_LEN)
301 long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
303 uint32_t addr, *list;
304 int len, left, count;
306 if (unlikely(gpu.cmd_len > 0))
309 log_io("gpu_dma_chain\n");
310 addr = start_addr & 0xffffff;
311 for (count = 0; addr != 0xffffff; count++)
313 log_io(".chain %08x\n", addr);
315 list = rambase + (addr & 0x1fffff) / 4;
317 addr = list[0] & 0xffffff;
319 // loop detection marker
320 // (bit23 set causes DMA error on real machine, so
321 // unlikely to be ever set by the game)
325 left = check_cmd(list + 1, len);
327 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, len);
334 // remove loop detection markers
335 addr = start_addr & 0x1fffff;
336 while (count-- > 0) {
337 list = rambase + addr / 4;
338 addr = list[0] & 0x1fffff;
339 list[0] &= ~0x800000;
345 void GPUreadDataMem(uint32_t *mem, int count)
347 log_io("gpu_dma_read %p %d\n", mem, count);
349 if (unlikely(gpu.cmd_len > 0))
353 do_vram_io(mem, count, 1);
356 uint32_t GPUreadData(void)
360 log_io("gpu_read\n");
362 if (unlikely(gpu.cmd_len > 0))
366 do_vram_io(&v, 1, 1);
371 uint32_t GPUreadStatus(void)
375 if (unlikely(gpu.cmd_len > 0))
378 ret = gpu.status.reg | (*gpu.lcf_hc << 31);
379 log_io("gpu_read_status %08x\n", ret);
383 typedef struct GPUFREEZETAG
385 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
386 uint32_t ulStatus; // current gpu status
387 uint32_t ulControl[256]; // latest control register values
388 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
391 long GPUfreeze(uint32_t type, GPUFreeze_t *freeze)
397 memcpy(freeze->psxVRam, gpu.vram, sizeof(gpu.vram));
398 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
399 freeze->ulStatus = gpu.status.reg;
402 memcpy(gpu.vram, freeze->psxVRam, sizeof(gpu.vram));
403 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
404 gpu.status.reg = freeze->ulStatus;
405 GPUwriteStatus((5 << 24) | gpu.regs[5]);
406 GPUwriteStatus((7 << 24) | gpu.regs[7]);
407 GPUwriteStatus((8 << 24) | gpu.regs[8]);
414 void GPUvBlank(int val, uint32_t *hcnt)
416 gpu.lcf_hc = &gpu.zero;
417 if (gpu.status.interlace) {
428 // vim:shiftwidth=2:expandtab