gpu_neon: vram fixes, list loop detection
[pcsx_rearmed.git] / plugins / gpu_neon / gpu.c
... / ...
CommitLineData
1/*
2 * (C) GraÅžvydas "notaz" Ignotas, 2011
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
11#include <stdio.h>
12#include <string.h>
13#include "gpu.h"
14
15#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
16#define unlikely(x) __builtin_expect((x), 0)
17
18//#define log_io printf
19#define log_io(...)
20#define log_anomaly printf
21
22struct psx_gpu gpu __attribute__((aligned(64)));
23
24long GPUinit(void)
25{
26 int ret = vout_init();
27 gpu.status.reg = 0x14802000;
28 gpu.lcf_hc = &gpu.zero;
29 return ret;
30}
31
32long GPUshutdown(void)
33{
34 return vout_finish();
35}
36
37void GPUwriteStatus(uint32_t data)
38{
39 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
40 static const short vres[4] = { 240, 480, 256, 480 };
41 uint32_t cmd = data >> 24;
42
43 switch (data >> 24) {
44 case 0x00:
45 gpu.status.reg = 0x14802000;
46 break;
47 case 0x03:
48 gpu.status.blanking = data & 1;
49 break;
50 case 0x04:
51 gpu.status.dma = data & 3;
52 break;
53 case 0x05:
54 gpu.screen.x = data & 0x3ff;
55 gpu.screen.y = (data >> 10) & 0x3ff;
56 break;
57 case 0x07:
58 gpu.screen.y1 = data & 0x3ff;
59 gpu.screen.y2 = (data >> 10) & 0x3ff;
60 break;
61 case 0x08:
62 gpu.status.reg = (gpu.status.reg & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
63 gpu.screen.w = hres[(gpu.status.reg >> 16) & 7];
64 gpu.screen.h = vres[(gpu.status.reg >> 19) & 3];
65 break;
66 }
67
68 if (cmd < ARRAY_SIZE(gpu.regs))
69 gpu.regs[cmd] = data;
70}
71
72const unsigned char cmd_lengths[256] =
73{
74 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
75 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
76 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
77 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
78 2, 2, 2, 2, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, // 40
79 3, 3, 3, 3, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4,
80 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 2, 2, 2, 2, // 60
81 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
82 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
83 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
84 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
85 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
86 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
87 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
88 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
89 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
90};
91
92#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
93
94static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
95{
96 uint16_t *vram = VRAM_MEM_XY(x, y);
97 if (is_read)
98 memcpy(mem, vram, l * 2);
99 else
100 memcpy(vram, mem, l * 2);
101}
102
103static int do_vram_io(uint32_t *data, int count, int is_read)
104{
105 int count_initial = count;
106 uint16_t *sdata = (uint16_t *)data;
107 int x = gpu.dma.x, y = gpu.dma.y;
108 int w = gpu.dma.w, h = gpu.dma.h;
109 int o = gpu.dma.offset;
110 int l;
111 count *= 2; // operate in 16bpp pixels
112
113 if (gpu.dma.offset) {
114 l = w - gpu.dma.offset;
115 if (count < l)
116 l = count;
117
118 do_vram_line(x + o, y, sdata, l, is_read);
119
120 if (o + l < w)
121 o += l;
122 else {
123 o = 0;
124 y++;
125 h--;
126 }
127 sdata += l;
128 count -= l;
129 }
130
131 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
132 y &= 511;
133 do_vram_line(x, y, sdata, w, is_read);
134 }
135
136 if (h > 0 && count > 0) {
137 y &= 511;
138 do_vram_line(x, y, sdata, count, is_read);
139 o = count;
140 count = 0;
141 }
142 gpu.dma.y = y;
143 gpu.dma.h = h;
144 gpu.dma.offset = o;
145
146 return count_initial - (count + 1) / 2;
147}
148
149static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
150{
151 if (gpu.dma.h)
152 log_anomaly("start_vram_transfer while old unfinished\n");
153
154 gpu.dma.x = pos_word & 1023;
155 gpu.dma.y = (pos_word >> 16) & 511;
156 gpu.dma.w = size_word & 0xffff; // ?
157 gpu.dma.h = size_word >> 16;
158 gpu.dma.offset = 0;
159
160 if (is_read)
161 gpu.status.img = 1;
162
163 //printf("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
164 // gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
165}
166
167static int check_cmd(uint32_t *data, int count)
168{
169 int len, cmd, start, pos;
170
171 // process buffer
172 for (start = pos = 0; pos < count; )
173 {
174 cmd = -1;
175 len = 0;
176
177 if (gpu.dma.h) {
178 pos += do_vram_io(data + pos, count - pos, 0);
179 if (pos == count)
180 break;
181 start = pos;
182 }
183
184 // do look-ahead pass to detect SR changes and VRAM i/o
185 while (pos < count) {
186 uint32_t *list = data + pos;
187 cmd = list[0] >> 24;
188 len = 1 + cmd_lengths[cmd];
189
190 //printf(" %3d: %02x %d\n", pos, cmd, len);
191 if ((cmd & 0xf4) == 0x24) {
192 // flat textured prim
193 gpu.status.reg &= ~0x1ff;
194 gpu.status.reg |= list[4] & 0x1ff;
195 }
196 else if ((cmd & 0xf4) == 0x34) {
197 // shaded textured prim
198 gpu.status.reg &= ~0x1ff;
199 gpu.status.reg |= list[5] & 0x1ff;
200 }
201 else switch (cmd)
202 {
203 case 0xe1:
204 gpu.status.reg &= ~0x7ff;
205 gpu.status.reg |= list[0] & 0x7ff;
206 break;
207 case 0xe6:
208 gpu.status.reg &= ~0x1800;
209 gpu.status.reg |= (list[0] & 3) << 11;
210 break;
211 }
212
213 if (pos + len > count) {
214 cmd = -1;
215 break; // incomplete cmd
216 }
217 if (cmd == 0xa0 || cmd == 0xc0)
218 break; // image i/o
219 pos += len;
220 }
221
222 if (pos - start > 0) {
223 do_cmd_list(data + start, pos - start);
224 start = pos;
225 }
226
227 if (cmd == 0xa0 || cmd == 0xc0) {
228 // consume vram write/read cmd
229 start_vram_transfer(data[pos + 1], data[pos + 2], cmd == 0xc0);
230 pos += len;
231 }
232
233 if (cmd == -1)
234 break;
235 }
236
237 return count - pos;
238}
239
240static void flush_cmd_buffer(void)
241{
242 int left = check_cmd(gpu.cmd_buffer, gpu.cmd_len);
243 if (left > 0)
244 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
245 gpu.cmd_len = left;
246}
247
248void GPUwriteDataMem(uint32_t *mem, int count)
249{
250 int left;
251
252 log_io("gpu_dma_write %p %d\n", mem, count);
253
254 if (unlikely(gpu.cmd_len > 0))
255 flush_cmd_buffer();
256
257 left = check_cmd(mem, count);
258 if (left)
259 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
260}
261
262void GPUwriteData(uint32_t data)
263{
264 log_io("gpu_write %08x\n", data);
265 gpu.cmd_buffer[gpu.cmd_len++] = data;
266 if (gpu.cmd_len >= CMD_BUFFER_LEN)
267 flush_cmd_buffer();
268}
269
270long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
271{
272 uint32_t addr, *list;
273 int len, left, count;
274
275 if (unlikely(gpu.cmd_len > 0))
276 flush_cmd_buffer();
277
278 log_io("gpu_dma_chain\n");
279 addr = start_addr & 0xffffff;
280 for (count = 0; addr != 0xffffff; count++)
281 {
282 log_io(".chain %08x\n", addr);
283
284 list = rambase + (addr & 0x1fffff) / 4;
285 len = list[0] >> 24;
286 addr = list[0] & 0xffffff;
287
288 // loop detection marker
289 // (bit23 set causes DMA error on real machine, so
290 // unlikely to be ever set by the game)
291 list[0] |= 0x800000;
292
293 if (len) {
294 left = check_cmd(list + 1, len);
295 if (left)
296 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, len);
297 }
298
299 if (addr & 0x800000)
300 break;
301 }
302
303 // remove loop detection markers
304 addr = start_addr & 0x1fffff;
305 while (count-- > 0) {
306 list = rambase + addr / 4;
307 addr = list[0] & 0x1fffff;
308 list[0] &= ~0x800000;
309 }
310
311 return 0;
312}
313
314void GPUreadDataMem(uint32_t *mem, int count)
315{
316 log_io("gpu_dma_read %p %d\n", mem, count);
317
318 if (unlikely(gpu.cmd_len > 0))
319 flush_cmd_buffer();
320
321 if (gpu.dma.h)
322 do_vram_io(mem, count, 1);
323}
324
325uint32_t GPUreadData(void)
326{
327 uint32_t v = 0;
328
329 log_io("gpu_read\n");
330
331 if (unlikely(gpu.cmd_len > 0))
332 flush_cmd_buffer();
333
334 if (gpu.dma.h)
335 do_vram_io(&v, 1, 1);
336
337 return v;
338}
339
340uint32_t GPUreadStatus(void)
341{
342 uint32_t ret;
343
344 if (unlikely(gpu.cmd_len > 0))
345 flush_cmd_buffer();
346
347 ret = gpu.status.reg | (*gpu.lcf_hc << 31);
348 log_io("gpu_read_status %08x\n", ret);
349 return ret;
350}
351
352typedef struct GPUFREEZETAG
353{
354 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
355 uint32_t ulStatus; // current gpu status
356 uint32_t ulControl[256]; // latest control register values
357 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
358} GPUFreeze_t;
359
360long GPUfreeze(uint32_t type, GPUFreeze_t *freeze)
361{
362 switch (type) {
363 case 1: // save
364 if (gpu.cmd_len > 0)
365 flush_cmd_buffer();
366 memcpy(freeze->psxVRam, gpu.vram, sizeof(gpu.vram));
367 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
368 freeze->ulStatus = gpu.status.reg;
369 break;
370 case 0: // load
371 memcpy(gpu.vram, freeze->psxVRam, sizeof(gpu.vram));
372 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
373 gpu.status.reg = freeze->ulStatus;
374 GPUwriteStatus((5 << 24) | gpu.regs[5]);
375 GPUwriteStatus((7 << 24) | gpu.regs[7]);
376 GPUwriteStatus((8 << 24) | gpu.regs[8]);
377 break;
378 }
379
380 return 1;
381}
382
383void GPUvBlank(int val, uint32_t *hcnt)
384{
385 gpu.lcf_hc = &gpu.zero;
386 if (gpu.status.interlace) {
387 if (val)
388 gpu.status.lcf ^= 1;
389 }
390 else {
391 gpu.status.lcf = 0;
392 if (!val)
393 gpu.lcf_hc = hcnt;
394 }
395}
396
397// vim:shiftwidth=2:expandtab