gpu_neon: vram fixes, list loop detection
[pcsx_rearmed.git] / plugins / gpu_neon / gpu.c
CommitLineData
1ab64c54
GI
1/*
2 * (C) GraÅžvydas "notaz" Ignotas, 2011
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
d30279e2 11#include <stdio.h>
1ab64c54 12#include <string.h>
56f08d83 13#include "gpu.h"
1ab64c54
GI
14
15#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
d30279e2 16#define unlikely(x) __builtin_expect((x), 0)
1ab64c54 17
56f08d83 18//#define log_io printf
19#define log_io(...)
20#define log_anomaly printf
21
22struct psx_gpu gpu __attribute__((aligned(64)));
1ab64c54
GI
23
24long GPUinit(void)
25{
56f08d83 26 int ret = vout_init();
d30279e2 27 gpu.status.reg = 0x14802000;
56f08d83 28 gpu.lcf_hc = &gpu.zero;
29 return ret;
1ab64c54
GI
30}
31
32long GPUshutdown(void)
33{
56f08d83 34 return vout_finish();
1ab64c54
GI
35}
36
1ab64c54
GI
37void GPUwriteStatus(uint32_t data)
38{
39 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
40 static const short vres[4] = { 240, 480, 256, 480 };
41 uint32_t cmd = data >> 24;
42
43 switch (data >> 24) {
44 case 0x00:
d30279e2 45 gpu.status.reg = 0x14802000;
1ab64c54
GI
46 break;
47 case 0x03:
d30279e2 48 gpu.status.blanking = data & 1;
1ab64c54
GI
49 break;
50 case 0x04:
51 gpu.status.dma = data & 3;
52 break;
53 case 0x05:
54 gpu.screen.x = data & 0x3ff;
55 gpu.screen.y = (data >> 10) & 0x3ff;
56 break;
57 case 0x07:
58 gpu.screen.y1 = data & 0x3ff;
59 gpu.screen.y2 = (data >> 10) & 0x3ff;
60 break;
61 case 0x08:
62 gpu.status.reg = (gpu.status.reg & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
63 gpu.screen.w = hres[(gpu.status.reg >> 16) & 7];
64 gpu.screen.h = vres[(gpu.status.reg >> 19) & 3];
65 break;
66 }
67
68 if (cmd < ARRAY_SIZE(gpu.regs))
69 gpu.regs[cmd] = data;
70}
71
56f08d83 72const unsigned char cmd_lengths[256] =
1ab64c54 73{
d30279e2
GI
74 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
75 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
76 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
77 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
78 2, 2, 2, 2, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, // 40
79 3, 3, 3, 3, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4,
80 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 2, 2, 2, 2, // 60
81 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
82 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
83 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
84 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
85 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
86 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
87 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
88 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
89 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
90};
91
d30279e2
GI
92#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
93
94static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
1ab64c54 95{
d30279e2
GI
96 uint16_t *vram = VRAM_MEM_XY(x, y);
97 if (is_read)
98 memcpy(mem, vram, l * 2);
99 else
100 memcpy(vram, mem, l * 2);
101}
102
103static int do_vram_io(uint32_t *data, int count, int is_read)
104{
105 int count_initial = count;
106 uint16_t *sdata = (uint16_t *)data;
107 int x = gpu.dma.x, y = gpu.dma.y;
108 int w = gpu.dma.w, h = gpu.dma.h;
ddd56f6e 109 int o = gpu.dma.offset;
d30279e2
GI
110 int l;
111 count *= 2; // operate in 16bpp pixels
112
113 if (gpu.dma.offset) {
114 l = w - gpu.dma.offset;
ddd56f6e 115 if (count < l)
d30279e2 116 l = count;
ddd56f6e 117
118 do_vram_line(x + o, y, sdata, l, is_read);
119
120 if (o + l < w)
121 o += l;
122 else {
123 o = 0;
124 y++;
125 h--;
126 }
d30279e2
GI
127 sdata += l;
128 count -= l;
d30279e2
GI
129 }
130
131 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
132 y &= 511;
133 do_vram_line(x, y, sdata, w, is_read);
134 }
135
136 if (h > 0 && count > 0) {
137 y &= 511;
138 do_vram_line(x, y, sdata, count, is_read);
ddd56f6e 139 o = count;
d30279e2
GI
140 count = 0;
141 }
d30279e2
GI
142 gpu.dma.y = y;
143 gpu.dma.h = h;
ddd56f6e 144 gpu.dma.offset = o;
d30279e2
GI
145
146 return count_initial - (count + 1) / 2;
147}
148
149static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
150{
ddd56f6e 151 if (gpu.dma.h)
152 log_anomaly("start_vram_transfer while old unfinished\n");
153
d30279e2
GI
154 gpu.dma.x = pos_word & 1023;
155 gpu.dma.y = (pos_word >> 16) & 511;
156 gpu.dma.w = size_word & 0xffff; // ?
157 gpu.dma.h = size_word >> 16;
158 gpu.dma.offset = 0;
159
160 if (is_read)
161 gpu.status.img = 1;
162
163 //printf("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
164 // gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
165}
166
167static int check_cmd(uint32_t *data, int count)
168{
169 int len, cmd, start, pos;
170
d30279e2 171 // process buffer
ddd56f6e 172 for (start = pos = 0; pos < count; )
d30279e2
GI
173 {
174 cmd = -1;
175 len = 0;
176
177 if (gpu.dma.h) {
178 pos += do_vram_io(data + pos, count - pos, 0);
ddd56f6e 179 if (pos == count)
180 break;
d30279e2
GI
181 start = pos;
182 }
183
ddd56f6e 184 // do look-ahead pass to detect SR changes and VRAM i/o
d30279e2 185 while (pos < count) {
56f08d83 186 uint32_t *list = data + pos;
187 cmd = list[0] >> 24;
d30279e2 188 len = 1 + cmd_lengths[cmd];
56f08d83 189
d30279e2 190 //printf(" %3d: %02x %d\n", pos, cmd, len);
56f08d83 191 if ((cmd & 0xf4) == 0x24) {
192 // flat textured prim
193 gpu.status.reg &= ~0x1ff;
194 gpu.status.reg |= list[4] & 0x1ff;
195 }
196 else if ((cmd & 0xf4) == 0x34) {
197 // shaded textured prim
198 gpu.status.reg &= ~0x1ff;
199 gpu.status.reg |= list[5] & 0x1ff;
200 }
201 else switch (cmd)
202 {
203 case 0xe1:
204 gpu.status.reg &= ~0x7ff;
205 gpu.status.reg |= list[0] & 0x7ff;
206 break;
207 case 0xe6:
208 gpu.status.reg &= ~0x1800;
209 gpu.status.reg |= (list[0] & 3) << 11;
210 break;
211 }
212
d30279e2
GI
213 if (pos + len > count) {
214 cmd = -1;
215 break; // incomplete cmd
216 }
217 if (cmd == 0xa0 || cmd == 0xc0)
218 break; // image i/o
219 pos += len;
220 }
221
222 if (pos - start > 0) {
56f08d83 223 do_cmd_list(data + start, pos - start);
d30279e2
GI
224 start = pos;
225 }
226
227 if (cmd == 0xa0 || cmd == 0xc0) {
228 // consume vram write/read cmd
229 start_vram_transfer(data[pos + 1], data[pos + 2], cmd == 0xc0);
230 pos += len;
231 }
232
ddd56f6e 233 if (cmd == -1)
234 break;
d30279e2 235 }
ddd56f6e 236
237 return count - pos;
d30279e2
GI
238}
239
240static void flush_cmd_buffer(void)
241{
242 int left = check_cmd(gpu.cmd_buffer, gpu.cmd_len);
243 if (left > 0)
244 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
245 gpu.cmd_len = left;
1ab64c54
GI
246}
247
248void GPUwriteDataMem(uint32_t *mem, int count)
249{
d30279e2
GI
250 int left;
251
56f08d83 252 log_io("gpu_dma_write %p %d\n", mem, count);
253
d30279e2
GI
254 if (unlikely(gpu.cmd_len > 0))
255 flush_cmd_buffer();
56f08d83 256
d30279e2
GI
257 left = check_cmd(mem, count);
258 if (left)
56f08d83 259 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
1ab64c54
GI
260}
261
d30279e2 262void GPUwriteData(uint32_t data)
1ab64c54 263{
56f08d83 264 log_io("gpu_write %08x\n", data);
d30279e2
GI
265 gpu.cmd_buffer[gpu.cmd_len++] = data;
266 if (gpu.cmd_len >= CMD_BUFFER_LEN)
267 flush_cmd_buffer();
1ab64c54
GI
268}
269
ddd56f6e 270long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
1ab64c54 271{
ddd56f6e 272 uint32_t addr, *list;
273 int len, left, count;
d30279e2
GI
274
275 if (unlikely(gpu.cmd_len > 0))
276 flush_cmd_buffer();
277
56f08d83 278 log_io("gpu_dma_chain\n");
ddd56f6e 279 addr = start_addr & 0xffffff;
280 for (count = 0; addr != 0xffffff; count++)
281 {
56f08d83 282 log_io(".chain %08x\n", addr);
283
ddd56f6e 284 list = rambase + (addr & 0x1fffff) / 4;
d30279e2
GI
285 len = list[0] >> 24;
286 addr = list[0] & 0xffffff;
ddd56f6e 287
288 // loop detection marker
289 // (bit23 set causes DMA error on real machine, so
290 // unlikely to be ever set by the game)
291 list[0] |= 0x800000;
292
56f08d83 293 if (len) {
294 left = check_cmd(list + 1, len);
295 if (left)
296 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, len);
297 }
ddd56f6e 298
299 if (addr & 0x800000)
300 break;
301 }
302
303 // remove loop detection markers
304 addr = start_addr & 0x1fffff;
305 while (count-- > 0) {
306 list = rambase + addr / 4;
307 addr = list[0] & 0x1fffff;
308 list[0] &= ~0x800000;
d30279e2
GI
309 }
310
1ab64c54
GI
311 return 0;
312}
313
d30279e2
GI
314void GPUreadDataMem(uint32_t *mem, int count)
315{
56f08d83 316 log_io("gpu_dma_read %p %d\n", mem, count);
317
d30279e2
GI
318 if (unlikely(gpu.cmd_len > 0))
319 flush_cmd_buffer();
56f08d83 320
d30279e2
GI
321 if (gpu.dma.h)
322 do_vram_io(mem, count, 1);
323}
324
325uint32_t GPUreadData(void)
326{
327 uint32_t v = 0;
56f08d83 328
329 log_io("gpu_read\n");
330
331 if (unlikely(gpu.cmd_len > 0))
332 flush_cmd_buffer();
333
334 if (gpu.dma.h)
335 do_vram_io(&v, 1, 1);
336
d30279e2
GI
337 return v;
338}
339
340uint32_t GPUreadStatus(void)
341{
ddd56f6e 342 uint32_t ret;
56f08d83 343
d30279e2
GI
344 if (unlikely(gpu.cmd_len > 0))
345 flush_cmd_buffer();
346
ddd56f6e 347 ret = gpu.status.reg | (*gpu.lcf_hc << 31);
348 log_io("gpu_read_status %08x\n", ret);
349 return ret;
d30279e2
GI
350}
351
1ab64c54
GI
352typedef struct GPUFREEZETAG
353{
354 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
355 uint32_t ulStatus; // current gpu status
356 uint32_t ulControl[256]; // latest control register values
357 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
358} GPUFreeze_t;
359
360long GPUfreeze(uint32_t type, GPUFreeze_t *freeze)
361{
362 switch (type) {
363 case 1: // save
d30279e2
GI
364 if (gpu.cmd_len > 0)
365 flush_cmd_buffer();
1ab64c54
GI
366 memcpy(freeze->psxVRam, gpu.vram, sizeof(gpu.vram));
367 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
368 freeze->ulStatus = gpu.status.reg;
1ab64c54
GI
369 break;
370 case 0: // load
371 memcpy(gpu.vram, freeze->psxVRam, sizeof(gpu.vram));
372 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
373 gpu.status.reg = freeze->ulStatus;
1ab64c54
GI
374 GPUwriteStatus((5 << 24) | gpu.regs[5]);
375 GPUwriteStatus((7 << 24) | gpu.regs[7]);
376 GPUwriteStatus((8 << 24) | gpu.regs[8]);
377 break;
378 }
379
380 return 1;
381}
382
d30279e2 383void GPUvBlank(int val, uint32_t *hcnt)
1ab64c54 384{
d30279e2
GI
385 gpu.lcf_hc = &gpu.zero;
386 if (gpu.status.interlace) {
387 if (val)
388 gpu.status.lcf ^= 1;
389 }
390 else {
391 gpu.status.lcf = 0;
392 if (!val)
393 gpu.lcf_hc = hcnt;
394 }
1ab64c54
GI
395}
396
1ab64c54 397// vim:shiftwidth=2:expandtab