gpu_neon: do list resubmit detection
[pcsx_rearmed.git] / plugins / gpu_neon / gpu.c
... / ...
CommitLineData
1/*
2 * (C) GraÅžvydas "notaz" Ignotas, 2011
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
11#include <stdio.h>
12#include <string.h>
13#include "gpu.h"
14
15#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
16#define unlikely(x) __builtin_expect((x), 0)
17#define noinline __attribute__((noinline))
18
19#define gpu_log(fmt, ...) \
20 printf("%d:%03d: " fmt, gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__)
21
22//#define log_io gpu_log
23#define log_io(...)
24#define log_anomaly gpu_log
25//#define log_anomaly(...)
26
27struct psx_gpu gpu __attribute__((aligned(64)));
28
29static noinline void do_reset(void)
30{
31 memset(gpu.regs, 0, sizeof(gpu.regs));
32 gpu.status.reg = 0x14802000;
33 gpu.gp0 = 0;
34 gpu.regs[3] = 1;
35 gpu.screen.hres = gpu.screen.w = 256;
36 gpu.screen.vres = gpu.screen.h = 240;
37}
38
39static noinline void update_width(void)
40{
41 int sw = gpu.screen.x2 - gpu.screen.x1;
42 if (sw <= 0 || sw >= 2560)
43 // full width
44 gpu.screen.w = gpu.screen.hres;
45 else
46 gpu.screen.w = sw * gpu.screen.hres / 2560;
47}
48
49static noinline void update_height(void)
50{
51 int sh = gpu.screen.y2 - gpu.screen.y1;
52 if (gpu.status.dheight)
53 sh *= 2;
54 if (sh <= 0)
55 sh = gpu.screen.vres;
56
57 gpu.screen.h = sh;
58}
59
60static noinline void decide_frameskip(void)
61{
62 gpu.frameskip.frame_ready = !gpu.frameskip.active;
63
64 if (!gpu.frameskip.active && *gpu.frameskip.advice)
65 gpu.frameskip.active = 1;
66 else
67 gpu.frameskip.active = 0;
68}
69
70static noinline void get_gpu_info(uint32_t data)
71{
72 switch (data & 0x0f) {
73 case 0x02:
74 case 0x03:
75 case 0x04:
76 case 0x05:
77 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
78 break;
79 case 0x06:
80 gpu.gp0 = gpu.ex_regs[5] & 0xfffff;
81 break;
82 case 0x07:
83 gpu.gp0 = 2;
84 break;
85 default:
86 gpu.gp0 = 0;
87 break;
88 }
89}
90
91long GPUinit(void)
92{
93 int ret = vout_init();
94 do_reset();
95 gpu.lcf_hc = &gpu.zero;
96 gpu.state.frame_count = 0;
97 gpu.state.hcnt = &gpu.zero;
98 return ret;
99}
100
101long GPUshutdown(void)
102{
103 return vout_finish();
104}
105
106void GPUwriteStatus(uint32_t data)
107{
108 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
109 static const short vres[4] = { 240, 480, 256, 480 };
110 uint32_t cmd = data >> 24;
111
112 if (cmd < ARRAY_SIZE(gpu.regs)) {
113 if (cmd != 0 && gpu.regs[cmd] == data)
114 return;
115 gpu.regs[cmd] = data;
116 }
117
118 gpu.state.fb_dirty = 1;
119
120 switch (cmd) {
121 case 0x00:
122 do_reset();
123 break;
124 case 0x03:
125 gpu.status.blanking = data & 1;
126 break;
127 case 0x04:
128 gpu.status.dma = data & 3;
129 break;
130 case 0x05:
131 gpu.screen.x = data & 0x3ff;
132 gpu.screen.y = (data >> 10) & 0x3ff;
133 if (gpu.frameskip.enabled)
134 decide_frameskip();
135 break;
136 case 0x06:
137 gpu.screen.x1 = data & 0xfff;
138 gpu.screen.x2 = (data >> 12) & 0xfff;
139 update_width();
140 break;
141 case 0x07:
142 gpu.screen.y1 = data & 0x3ff;
143 gpu.screen.y2 = (data >> 10) & 0x3ff;
144 update_height();
145 break;
146 case 0x08:
147 gpu.status.reg = (gpu.status.reg & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
148 gpu.screen.hres = hres[(gpu.status.reg >> 16) & 7];
149 gpu.screen.vres = vres[(gpu.status.reg >> 19) & 3];
150 update_width();
151 update_height();
152 break;
153 default:
154 if ((cmd & 0xf0) == 0x10)
155 get_gpu_info(data);
156 break;
157 }
158}
159
160const unsigned char cmd_lengths[256] =
161{
162 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
163 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
164 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
165 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
166 2, 2, 2, 2, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, // 40
167 3, 3, 3, 3, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4,
168 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 2, 2, 2, 2, // 60
169 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
170 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
171 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
172 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
173 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
174 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
175 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
176 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
177 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
178};
179
180#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
181
182static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
183{
184 uint16_t *vram = VRAM_MEM_XY(x, y);
185 if (is_read)
186 memcpy(mem, vram, l * 2);
187 else
188 memcpy(vram, mem, l * 2);
189}
190
191static int do_vram_io(uint32_t *data, int count, int is_read)
192{
193 int count_initial = count;
194 uint16_t *sdata = (uint16_t *)data;
195 int x = gpu.dma.x, y = gpu.dma.y;
196 int w = gpu.dma.w, h = gpu.dma.h;
197 int o = gpu.dma.offset;
198 int l;
199 count *= 2; // operate in 16bpp pixels
200
201 if (gpu.dma.offset) {
202 l = w - gpu.dma.offset;
203 if (count < l)
204 l = count;
205
206 do_vram_line(x + o, y, sdata, l, is_read);
207
208 if (o + l < w)
209 o += l;
210 else {
211 o = 0;
212 y++;
213 h--;
214 }
215 sdata += l;
216 count -= l;
217 }
218
219 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
220 y &= 511;
221 do_vram_line(x, y, sdata, w, is_read);
222 }
223
224 if (h > 0 && count > 0) {
225 y &= 511;
226 do_vram_line(x, y, sdata, count, is_read);
227 o = count;
228 count = 0;
229 }
230 gpu.dma.y = y;
231 gpu.dma.h = h;
232 gpu.dma.offset = o;
233
234 return count_initial - count / 2;
235}
236
237static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
238{
239 if (gpu.dma.h)
240 log_anomaly("start_vram_transfer while old unfinished\n");
241
242 gpu.dma.x = pos_word & 1023;
243 gpu.dma.y = (pos_word >> 16) & 511;
244 gpu.dma.w = size_word & 0xffff; // ?
245 gpu.dma.h = size_word >> 16;
246 gpu.dma.offset = 0;
247
248 if (is_read)
249 gpu.status.img = 1;
250
251 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
252 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
253}
254
255static int check_cmd(uint32_t *data, int count)
256{
257 int len, cmd, start, pos;
258 int vram_dirty = 0;
259
260 // process buffer
261 for (start = pos = 0; pos < count; )
262 {
263 cmd = -1;
264 len = 0;
265
266 if (gpu.dma.h) {
267 pos += do_vram_io(data + pos, count - pos, 0);
268 if (pos == count)
269 break;
270 start = pos;
271 }
272
273 // do look-ahead pass to detect SR changes and VRAM i/o
274 while (pos < count) {
275 uint32_t *list = data + pos;
276 cmd = list[0] >> 24;
277 len = 1 + cmd_lengths[cmd];
278
279 //printf(" %3d: %02x %d\n", pos, cmd, len);
280 if ((cmd & 0xf4) == 0x24) {
281 // flat textured prim
282 gpu.status.reg &= ~0x1ff;
283 gpu.status.reg |= list[4] & 0x1ff;
284 }
285 else if ((cmd & 0xf4) == 0x34) {
286 // shaded textured prim
287 gpu.status.reg &= ~0x1ff;
288 gpu.status.reg |= list[5] & 0x1ff;
289 }
290 else switch (cmd)
291 {
292 case 0xe1:
293 gpu.status.reg &= ~0x7ff;
294 gpu.status.reg |= list[0] & 0x7ff;
295 break;
296 case 0xe6:
297 gpu.status.reg &= ~0x1800;
298 gpu.status.reg |= (list[0] & 3) << 11;
299 break;
300 }
301 if (2 <= cmd && cmd < 0xc0)
302 vram_dirty = 1;
303 else if ((cmd & 0xf8) == 0xe0)
304 gpu.ex_regs[cmd & 7] = list[0];
305
306 if (pos + len > count) {
307 cmd = -1;
308 break; // incomplete cmd
309 }
310 if (cmd == 0xa0 || cmd == 0xc0)
311 break; // image i/o
312 pos += len;
313 }
314
315 if (pos - start > 0) {
316 if (!gpu.frameskip.active)
317 do_cmd_list(data + start, pos - start);
318 start = pos;
319 }
320
321 if (cmd == 0xa0 || cmd == 0xc0) {
322 // consume vram write/read cmd
323 start_vram_transfer(data[pos + 1], data[pos + 2], cmd == 0xc0);
324 pos += len;
325 }
326
327 if (cmd == -1)
328 break;
329 }
330
331 gpu.state.fb_dirty |= vram_dirty;
332
333 return count - pos;
334}
335
336static void flush_cmd_buffer(void)
337{
338 int left = check_cmd(gpu.cmd_buffer, gpu.cmd_len);
339 if (left > 0)
340 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
341 gpu.cmd_len = left;
342}
343
344void GPUwriteDataMem(uint32_t *mem, int count)
345{
346 int left;
347
348 log_io("gpu_dma_write %p %d\n", mem, count);
349
350 if (unlikely(gpu.cmd_len > 0))
351 flush_cmd_buffer();
352
353 left = check_cmd(mem, count);
354 if (left)
355 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
356}
357
358void GPUwriteData(uint32_t data)
359{
360 log_io("gpu_write %08x\n", data);
361 gpu.cmd_buffer[gpu.cmd_len++] = data;
362 if (gpu.cmd_len >= CMD_BUFFER_LEN)
363 flush_cmd_buffer();
364}
365
366long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
367{
368 uint32_t addr, *list;
369 uint32_t *llist_entry = NULL;
370 int len, left, count;
371 long dma_words = 0;
372
373 if (unlikely(gpu.cmd_len > 0))
374 flush_cmd_buffer();
375
376 // ff7 sends it's main list twice, detect this
377 if (gpu.state.frame_count == gpu.state.last_list.frame &&
378 *gpu.state.hcnt - gpu.state.last_list.hcnt <= 1 &&
379 gpu.state.last_list.words > 1024)
380 {
381 llist_entry = rambase + (gpu.state.last_list.addr & 0x1fffff) / 4;
382 *llist_entry |= 0x800000;
383 }
384
385 log_io("gpu_dma_chain\n");
386 addr = start_addr & 0xffffff;
387 for (count = 0; addr != 0xffffff; count++)
388 {
389 list = rambase + (addr & 0x1fffff) / 4;
390 len = list[0] >> 24;
391 addr = list[0] & 0xffffff;
392 dma_words += 1 + len;
393
394 log_io(".chain %08x #%d\n", (list - rambase) * 4, len);
395
396 // loop detection marker
397 // (bit23 set causes DMA error on real machine, so
398 // unlikely to be ever set by the game)
399 list[0] |= 0x800000;
400
401 if (len) {
402 left = check_cmd(list + 1, len);
403 if (left)
404 log_anomaly("GPUdmaChain: discarded %d/%d words\n", left, len);
405 }
406
407 if (addr & 0x800000)
408 break;
409 }
410
411 // remove loop detection markers
412 addr = start_addr & 0x1fffff;
413 while (count-- > 0) {
414 list = rambase + addr / 4;
415 addr = list[0] & 0x1fffff;
416 list[0] &= ~0x800000;
417 }
418 if (llist_entry)
419 *llist_entry &= ~0x800000;
420
421 gpu.state.last_list.frame = gpu.state.frame_count;
422 gpu.state.last_list.hcnt = *gpu.state.hcnt;
423 gpu.state.last_list.words = dma_words;
424 gpu.state.last_list.addr = start_addr;
425
426 return dma_words;
427}
428
429void GPUreadDataMem(uint32_t *mem, int count)
430{
431 log_io("gpu_dma_read %p %d\n", mem, count);
432
433 if (unlikely(gpu.cmd_len > 0))
434 flush_cmd_buffer();
435
436 if (gpu.dma.h)
437 do_vram_io(mem, count, 1);
438}
439
440uint32_t GPUreadData(void)
441{
442 log_io("gpu_read\n");
443
444 if (unlikely(gpu.cmd_len > 0))
445 flush_cmd_buffer();
446
447 if (gpu.dma.h)
448 do_vram_io(&gpu.gp0, 1, 1);
449
450 return gpu.gp0;
451}
452
453uint32_t GPUreadStatus(void)
454{
455 uint32_t ret;
456
457 if (unlikely(gpu.cmd_len > 0))
458 flush_cmd_buffer();
459
460 ret = gpu.status.reg | (*gpu.lcf_hc << 31);
461 log_io("gpu_read_status %08x\n", ret);
462 return ret;
463}
464
465typedef struct GPUFREEZETAG
466{
467 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
468 uint32_t ulStatus; // current gpu status
469 uint32_t ulControl[256]; // latest control register values
470 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
471} GPUFreeze_t;
472
473long GPUfreeze(uint32_t type, GPUFreeze_t *freeze)
474{
475 int i;
476
477 switch (type) {
478 case 1: // save
479 if (gpu.cmd_len > 0)
480 flush_cmd_buffer();
481 memcpy(freeze->psxVRam, gpu.vram, sizeof(gpu.vram));
482 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
483 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
484 freeze->ulStatus = gpu.status.reg;
485 break;
486 case 0: // load
487 memcpy(gpu.vram, freeze->psxVRam, sizeof(gpu.vram));
488 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
489 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
490 gpu.status.reg = freeze->ulStatus;
491 for (i = 8; i > 0; i--) {
492 gpu.regs[i] ^= 1; // avoid reg change detection
493 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
494 }
495 break;
496 }
497
498 return 1;
499}
500
501void GPUvBlank(int val, uint32_t *hcnt)
502{
503 gpu.lcf_hc = &gpu.zero;
504 if (gpu.status.interlace) {
505 if (val)
506 gpu.status.lcf ^= 1;
507 }
508 else {
509 gpu.status.lcf = 0;
510 if (!val)
511 gpu.lcf_hc = hcnt;
512 }
513 if (!val)
514 gpu.state.frame_count++;
515
516 gpu.state.hcnt = hcnt;
517}
518
519// vim:shiftwidth=2:expandtab