gpu_neon: do list resubmit detection
[pcsx_rearmed.git] / plugins / gpu_neon / gpu.c
CommitLineData
1ab64c54
GI
1/*
2 * (C) GraÅžvydas "notaz" Ignotas, 2011
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
d30279e2 11#include <stdio.h>
1ab64c54 12#include <string.h>
56f08d83 13#include "gpu.h"
1ab64c54
GI
14
15#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
d30279e2 16#define unlikely(x) __builtin_expect((x), 0)
8dd855cd 17#define noinline __attribute__((noinline))
1ab64c54 18
deb18d24 19#define gpu_log(fmt, ...) \
20 printf("%d:%03d: " fmt, gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__)
21
22//#define log_io gpu_log
56f08d83 23#define log_io(...)
deb18d24 24#define log_anomaly gpu_log
25//#define log_anomaly(...)
56f08d83 26
27struct psx_gpu gpu __attribute__((aligned(64)));
1ab64c54 28
6e9bdaef 29static noinline void do_reset(void)
1ab64c54 30{
6e9bdaef 31 memset(gpu.regs, 0, sizeof(gpu.regs));
d30279e2 32 gpu.status.reg = 0x14802000;
6e9bdaef 33 gpu.gp0 = 0;
fc84f618 34 gpu.regs[3] = 1;
6e9bdaef 35 gpu.screen.hres = gpu.screen.w = 256;
fc84f618 36 gpu.screen.vres = gpu.screen.h = 240;
1ab64c54
GI
37}
38
8dd855cd 39static noinline void update_width(void)
40{
41 int sw = gpu.screen.x2 - gpu.screen.x1;
42 if (sw <= 0 || sw >= 2560)
43 // full width
44 gpu.screen.w = gpu.screen.hres;
45 else
46 gpu.screen.w = sw * gpu.screen.hres / 2560;
47}
48
49static noinline void update_height(void)
50{
51 int sh = gpu.screen.y2 - gpu.screen.y1;
52 if (gpu.status.dheight)
53 sh *= 2;
54 if (sh <= 0)
55 sh = gpu.screen.vres;
56
57 gpu.screen.h = sh;
58}
59
fc84f618 60static noinline void decide_frameskip(void)
61{
62 gpu.frameskip.frame_ready = !gpu.frameskip.active;
63
64 if (!gpu.frameskip.active && *gpu.frameskip.advice)
65 gpu.frameskip.active = 1;
66 else
67 gpu.frameskip.active = 0;
68}
69
6e9bdaef 70static noinline void get_gpu_info(uint32_t data)
71{
72 switch (data & 0x0f) {
73 case 0x02:
74 case 0x03:
75 case 0x04:
76 case 0x05:
77 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
78 break;
79 case 0x06:
80 gpu.gp0 = gpu.ex_regs[5] & 0xfffff;
81 break;
82 case 0x07:
83 gpu.gp0 = 2;
84 break;
85 default:
86 gpu.gp0 = 0;
87 break;
88 }
89}
90
91long GPUinit(void)
92{
93 int ret = vout_init();
94 do_reset();
95 gpu.lcf_hc = &gpu.zero;
deb18d24 96 gpu.state.frame_count = 0;
97 gpu.state.hcnt = &gpu.zero;
6e9bdaef 98 return ret;
99}
100
101long GPUshutdown(void)
102{
103 return vout_finish();
104}
105
1ab64c54
GI
106void GPUwriteStatus(uint32_t data)
107{
108 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
109 static const short vres[4] = { 240, 480, 256, 480 };
110 uint32_t cmd = data >> 24;
111
fc84f618 112 if (cmd < ARRAY_SIZE(gpu.regs)) {
113 if (cmd != 0 && gpu.regs[cmd] == data)
114 return;
8dd855cd 115 gpu.regs[cmd] = data;
fc84f618 116 }
117
118 gpu.state.fb_dirty = 1;
8dd855cd 119
120 switch (cmd) {
1ab64c54 121 case 0x00:
6e9bdaef 122 do_reset();
1ab64c54
GI
123 break;
124 case 0x03:
d30279e2 125 gpu.status.blanking = data & 1;
1ab64c54
GI
126 break;
127 case 0x04:
128 gpu.status.dma = data & 3;
129 break;
130 case 0x05:
131 gpu.screen.x = data & 0x3ff;
132 gpu.screen.y = (data >> 10) & 0x3ff;
fc84f618 133 if (gpu.frameskip.enabled)
134 decide_frameskip();
1ab64c54 135 break;
8dd855cd 136 case 0x06:
137 gpu.screen.x1 = data & 0xfff;
138 gpu.screen.x2 = (data >> 12) & 0xfff;
139 update_width();
140 break;
1ab64c54
GI
141 case 0x07:
142 gpu.screen.y1 = data & 0x3ff;
143 gpu.screen.y2 = (data >> 10) & 0x3ff;
8dd855cd 144 update_height();
1ab64c54
GI
145 break;
146 case 0x08:
147 gpu.status.reg = (gpu.status.reg & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
8dd855cd 148 gpu.screen.hres = hres[(gpu.status.reg >> 16) & 7];
149 gpu.screen.vres = vres[(gpu.status.reg >> 19) & 3];
150 update_width();
151 update_height();
1ab64c54 152 break;
deb18d24 153 default:
154 if ((cmd & 0xf0) == 0x10)
155 get_gpu_info(data);
6e9bdaef 156 break;
1ab64c54 157 }
1ab64c54
GI
158}
159
56f08d83 160const unsigned char cmd_lengths[256] =
1ab64c54 161{
d30279e2
GI
162 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
163 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
164 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
165 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
166 2, 2, 2, 2, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, // 40
167 3, 3, 3, 3, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4,
168 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 2, 2, 2, 2, // 60
169 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
170 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
171 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
172 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
173 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
174 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
175 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
176 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
177 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
178};
179
d30279e2
GI
180#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
181
182static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
1ab64c54 183{
d30279e2
GI
184 uint16_t *vram = VRAM_MEM_XY(x, y);
185 if (is_read)
186 memcpy(mem, vram, l * 2);
187 else
188 memcpy(vram, mem, l * 2);
189}
190
191static int do_vram_io(uint32_t *data, int count, int is_read)
192{
193 int count_initial = count;
194 uint16_t *sdata = (uint16_t *)data;
195 int x = gpu.dma.x, y = gpu.dma.y;
196 int w = gpu.dma.w, h = gpu.dma.h;
ddd56f6e 197 int o = gpu.dma.offset;
d30279e2
GI
198 int l;
199 count *= 2; // operate in 16bpp pixels
200
201 if (gpu.dma.offset) {
202 l = w - gpu.dma.offset;
ddd56f6e 203 if (count < l)
d30279e2 204 l = count;
ddd56f6e 205
206 do_vram_line(x + o, y, sdata, l, is_read);
207
208 if (o + l < w)
209 o += l;
210 else {
211 o = 0;
212 y++;
213 h--;
214 }
d30279e2
GI
215 sdata += l;
216 count -= l;
d30279e2
GI
217 }
218
219 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
220 y &= 511;
221 do_vram_line(x, y, sdata, w, is_read);
222 }
223
224 if (h > 0 && count > 0) {
225 y &= 511;
226 do_vram_line(x, y, sdata, count, is_read);
ddd56f6e 227 o = count;
d30279e2
GI
228 count = 0;
229 }
d30279e2
GI
230 gpu.dma.y = y;
231 gpu.dma.h = h;
ddd56f6e 232 gpu.dma.offset = o;
d30279e2 233
6e9bdaef 234 return count_initial - count / 2;
d30279e2
GI
235}
236
237static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
238{
ddd56f6e 239 if (gpu.dma.h)
240 log_anomaly("start_vram_transfer while old unfinished\n");
241
d30279e2
GI
242 gpu.dma.x = pos_word & 1023;
243 gpu.dma.y = (pos_word >> 16) & 511;
244 gpu.dma.w = size_word & 0xffff; // ?
245 gpu.dma.h = size_word >> 16;
246 gpu.dma.offset = 0;
247
248 if (is_read)
249 gpu.status.img = 1;
250
6e9bdaef 251 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
252 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
d30279e2
GI
253}
254
255static int check_cmd(uint32_t *data, int count)
256{
257 int len, cmd, start, pos;
fc84f618 258 int vram_dirty = 0;
d30279e2 259
d30279e2 260 // process buffer
ddd56f6e 261 for (start = pos = 0; pos < count; )
d30279e2
GI
262 {
263 cmd = -1;
264 len = 0;
265
266 if (gpu.dma.h) {
267 pos += do_vram_io(data + pos, count - pos, 0);
ddd56f6e 268 if (pos == count)
269 break;
d30279e2
GI
270 start = pos;
271 }
272
ddd56f6e 273 // do look-ahead pass to detect SR changes and VRAM i/o
d30279e2 274 while (pos < count) {
56f08d83 275 uint32_t *list = data + pos;
276 cmd = list[0] >> 24;
d30279e2 277 len = 1 + cmd_lengths[cmd];
56f08d83 278
d30279e2 279 //printf(" %3d: %02x %d\n", pos, cmd, len);
56f08d83 280 if ((cmd & 0xf4) == 0x24) {
281 // flat textured prim
282 gpu.status.reg &= ~0x1ff;
283 gpu.status.reg |= list[4] & 0x1ff;
284 }
285 else if ((cmd & 0xf4) == 0x34) {
286 // shaded textured prim
287 gpu.status.reg &= ~0x1ff;
288 gpu.status.reg |= list[5] & 0x1ff;
289 }
290 else switch (cmd)
291 {
292 case 0xe1:
293 gpu.status.reg &= ~0x7ff;
294 gpu.status.reg |= list[0] & 0x7ff;
295 break;
296 case 0xe6:
297 gpu.status.reg &= ~0x1800;
298 gpu.status.reg |= (list[0] & 3) << 11;
299 break;
300 }
fc84f618 301 if (2 <= cmd && cmd < 0xc0)
302 vram_dirty = 1;
6e9bdaef 303 else if ((cmd & 0xf8) == 0xe0)
304 gpu.ex_regs[cmd & 7] = list[0];
56f08d83 305
d30279e2
GI
306 if (pos + len > count) {
307 cmd = -1;
308 break; // incomplete cmd
309 }
310 if (cmd == 0xa0 || cmd == 0xc0)
311 break; // image i/o
312 pos += len;
313 }
314
315 if (pos - start > 0) {
fc84f618 316 if (!gpu.frameskip.active)
317 do_cmd_list(data + start, pos - start);
d30279e2
GI
318 start = pos;
319 }
320
321 if (cmd == 0xa0 || cmd == 0xc0) {
322 // consume vram write/read cmd
323 start_vram_transfer(data[pos + 1], data[pos + 2], cmd == 0xc0);
324 pos += len;
325 }
326
ddd56f6e 327 if (cmd == -1)
328 break;
d30279e2 329 }
ddd56f6e 330
fc84f618 331 gpu.state.fb_dirty |= vram_dirty;
332
ddd56f6e 333 return count - pos;
d30279e2
GI
334}
335
336static void flush_cmd_buffer(void)
337{
338 int left = check_cmd(gpu.cmd_buffer, gpu.cmd_len);
339 if (left > 0)
340 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
341 gpu.cmd_len = left;
1ab64c54
GI
342}
343
344void GPUwriteDataMem(uint32_t *mem, int count)
345{
d30279e2
GI
346 int left;
347
56f08d83 348 log_io("gpu_dma_write %p %d\n", mem, count);
349
d30279e2
GI
350 if (unlikely(gpu.cmd_len > 0))
351 flush_cmd_buffer();
56f08d83 352
d30279e2
GI
353 left = check_cmd(mem, count);
354 if (left)
56f08d83 355 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
1ab64c54
GI
356}
357
d30279e2 358void GPUwriteData(uint32_t data)
1ab64c54 359{
56f08d83 360 log_io("gpu_write %08x\n", data);
d30279e2
GI
361 gpu.cmd_buffer[gpu.cmd_len++] = data;
362 if (gpu.cmd_len >= CMD_BUFFER_LEN)
363 flush_cmd_buffer();
1ab64c54
GI
364}
365
ddd56f6e 366long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
1ab64c54 367{
ddd56f6e 368 uint32_t addr, *list;
deb18d24 369 uint32_t *llist_entry = NULL;
ddd56f6e 370 int len, left, count;
deb18d24 371 long dma_words = 0;
d30279e2
GI
372
373 if (unlikely(gpu.cmd_len > 0))
374 flush_cmd_buffer();
375
deb18d24 376 // ff7 sends it's main list twice, detect this
377 if (gpu.state.frame_count == gpu.state.last_list.frame &&
378 *gpu.state.hcnt - gpu.state.last_list.hcnt <= 1 &&
379 gpu.state.last_list.words > 1024)
380 {
381 llist_entry = rambase + (gpu.state.last_list.addr & 0x1fffff) / 4;
382 *llist_entry |= 0x800000;
383 }
384
56f08d83 385 log_io("gpu_dma_chain\n");
ddd56f6e 386 addr = start_addr & 0xffffff;
387 for (count = 0; addr != 0xffffff; count++)
388 {
ddd56f6e 389 list = rambase + (addr & 0x1fffff) / 4;
d30279e2
GI
390 len = list[0] >> 24;
391 addr = list[0] & 0xffffff;
deb18d24 392 dma_words += 1 + len;
393
394 log_io(".chain %08x #%d\n", (list - rambase) * 4, len);
ddd56f6e 395
396 // loop detection marker
397 // (bit23 set causes DMA error on real machine, so
398 // unlikely to be ever set by the game)
399 list[0] |= 0x800000;
400
56f08d83 401 if (len) {
402 left = check_cmd(list + 1, len);
403 if (left)
deb18d24 404 log_anomaly("GPUdmaChain: discarded %d/%d words\n", left, len);
56f08d83 405 }
ddd56f6e 406
407 if (addr & 0x800000)
408 break;
409 }
410
411 // remove loop detection markers
412 addr = start_addr & 0x1fffff;
413 while (count-- > 0) {
414 list = rambase + addr / 4;
415 addr = list[0] & 0x1fffff;
416 list[0] &= ~0x800000;
d30279e2 417 }
deb18d24 418 if (llist_entry)
419 *llist_entry &= ~0x800000;
d30279e2 420
deb18d24 421 gpu.state.last_list.frame = gpu.state.frame_count;
422 gpu.state.last_list.hcnt = *gpu.state.hcnt;
423 gpu.state.last_list.words = dma_words;
424 gpu.state.last_list.addr = start_addr;
425
426 return dma_words;
1ab64c54
GI
427}
428
d30279e2
GI
429void GPUreadDataMem(uint32_t *mem, int count)
430{
56f08d83 431 log_io("gpu_dma_read %p %d\n", mem, count);
432
d30279e2
GI
433 if (unlikely(gpu.cmd_len > 0))
434 flush_cmd_buffer();
56f08d83 435
d30279e2
GI
436 if (gpu.dma.h)
437 do_vram_io(mem, count, 1);
438}
439
440uint32_t GPUreadData(void)
441{
56f08d83 442 log_io("gpu_read\n");
443
444 if (unlikely(gpu.cmd_len > 0))
445 flush_cmd_buffer();
446
447 if (gpu.dma.h)
6e9bdaef 448 do_vram_io(&gpu.gp0, 1, 1);
56f08d83 449
6e9bdaef 450 return gpu.gp0;
d30279e2
GI
451}
452
453uint32_t GPUreadStatus(void)
454{
ddd56f6e 455 uint32_t ret;
56f08d83 456
d30279e2
GI
457 if (unlikely(gpu.cmd_len > 0))
458 flush_cmd_buffer();
459
ddd56f6e 460 ret = gpu.status.reg | (*gpu.lcf_hc << 31);
461 log_io("gpu_read_status %08x\n", ret);
462 return ret;
d30279e2
GI
463}
464
1ab64c54
GI
465typedef struct GPUFREEZETAG
466{
467 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
468 uint32_t ulStatus; // current gpu status
469 uint32_t ulControl[256]; // latest control register values
470 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
471} GPUFreeze_t;
472
473long GPUfreeze(uint32_t type, GPUFreeze_t *freeze)
474{
fc84f618 475 int i;
476
1ab64c54
GI
477 switch (type) {
478 case 1: // save
d30279e2
GI
479 if (gpu.cmd_len > 0)
480 flush_cmd_buffer();
1ab64c54
GI
481 memcpy(freeze->psxVRam, gpu.vram, sizeof(gpu.vram));
482 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
6e9bdaef 483 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
1ab64c54 484 freeze->ulStatus = gpu.status.reg;
1ab64c54
GI
485 break;
486 case 0: // load
487 memcpy(gpu.vram, freeze->psxVRam, sizeof(gpu.vram));
488 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
6e9bdaef 489 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
1ab64c54 490 gpu.status.reg = freeze->ulStatus;
fc84f618 491 for (i = 8; i > 0; i--) {
492 gpu.regs[i] ^= 1; // avoid reg change detection
493 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
494 }
1ab64c54
GI
495 break;
496 }
497
498 return 1;
499}
500
d30279e2 501void GPUvBlank(int val, uint32_t *hcnt)
1ab64c54 502{
d30279e2
GI
503 gpu.lcf_hc = &gpu.zero;
504 if (gpu.status.interlace) {
505 if (val)
506 gpu.status.lcf ^= 1;
507 }
508 else {
509 gpu.status.lcf = 0;
510 if (!val)
511 gpu.lcf_hc = hcnt;
512 }
deb18d24 513 if (!val)
514 gpu.state.frame_count++;
515
516 gpu.state.hcnt = hcnt;
1ab64c54
GI
517}
518
1ab64c54 519// vim:shiftwidth=2:expandtab