psx_gpu: flush render buffer before move/cppy/fill
[pcsx_rearmed.git] / plugins / gpu_neon / gpu.c
CommitLineData
1ab64c54
GI
1/*
2 * (C) GraÅžvydas "notaz" Ignotas, 2011
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
d30279e2 11#include <stdio.h>
1ab64c54 12#include <string.h>
56f08d83 13#include "gpu.h"
1ab64c54
GI
14
15#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
d30279e2 16#define unlikely(x) __builtin_expect((x), 0)
8dd855cd 17#define noinline __attribute__((noinline))
1ab64c54 18
deb18d24 19#define gpu_log(fmt, ...) \
3ece2f0c 20 printf("%d:%03d: " fmt, *gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__)
deb18d24 21
22//#define log_io gpu_log
56f08d83 23#define log_io(...)
9394ada5 24//#define log_anomaly gpu_log
25#define log_anomaly(...)
56f08d83 26
7d993ee2 27struct psx_gpu gpu __attribute__((aligned(2048)));
1ab64c54 28
6e9bdaef 29static noinline void do_reset(void)
1ab64c54 30{
6e9bdaef 31 memset(gpu.regs, 0, sizeof(gpu.regs));
d30279e2 32 gpu.status.reg = 0x14802000;
6e9bdaef 33 gpu.gp0 = 0;
fc84f618 34 gpu.regs[3] = 1;
6e9bdaef 35 gpu.screen.hres = gpu.screen.w = 256;
fc84f618 36 gpu.screen.vres = gpu.screen.h = 240;
1ab64c54
GI
37}
38
8dd855cd 39static noinline void update_width(void)
40{
41 int sw = gpu.screen.x2 - gpu.screen.x1;
42 if (sw <= 0 || sw >= 2560)
43 // full width
44 gpu.screen.w = gpu.screen.hres;
45 else
46 gpu.screen.w = sw * gpu.screen.hres / 2560;
47}
48
49static noinline void update_height(void)
50{
51 int sh = gpu.screen.y2 - gpu.screen.y1;
52 if (gpu.status.dheight)
53 sh *= 2;
54 if (sh <= 0)
55 sh = gpu.screen.vres;
56
57 gpu.screen.h = sh;
58}
59
fc84f618 60static noinline void decide_frameskip(void)
61{
9fe27e25 62 if (gpu.frameskip.active)
63 gpu.frameskip.cnt++;
64 else {
65 gpu.frameskip.cnt = 0;
66 gpu.frameskip.frame_ready = 1;
67 }
fc84f618 68
9fe27e25 69 if (!gpu.frameskip.active && *gpu.frameskip.advice)
70 gpu.frameskip.active = 1;
71 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
fc84f618 72 gpu.frameskip.active = 1;
73 else
74 gpu.frameskip.active = 0;
75}
76
9fe27e25 77static noinline void decide_frameskip_allow(uint32_t cmd_e3)
78{
79 // no frameskip if it decides to draw to display area,
80 // but not for interlace since it'll most likely always do that
81 uint32_t x = cmd_e3 & 0x3ff;
82 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
83 gpu.frameskip.allow = gpu.status.interlace ||
84 (uint32_t)(x - gpu.screen.x) >= (uint32_t)gpu.screen.w ||
85 (uint32_t)(y - gpu.screen.y) >= (uint32_t)gpu.screen.h;
86}
87
6e9bdaef 88static noinline void get_gpu_info(uint32_t data)
89{
90 switch (data & 0x0f) {
91 case 0x02:
92 case 0x03:
93 case 0x04:
94 case 0x05:
95 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
96 break;
97 case 0x06:
98 gpu.gp0 = gpu.ex_regs[5] & 0xfffff;
99 break;
100 case 0x07:
101 gpu.gp0 = 2;
102 break;
103 default:
104 gpu.gp0 = 0;
105 break;
106 }
107}
108
109long GPUinit(void)
110{
9394ada5 111 int ret;
112 ret = vout_init();
113 ret |= renderer_init();
114
3ece2f0c 115 gpu.state.frame_count = &gpu.zero;
deb18d24 116 gpu.state.hcnt = &gpu.zero;
9394ada5 117 do_reset();
6e9bdaef 118 return ret;
119}
120
121long GPUshutdown(void)
122{
123 return vout_finish();
124}
125
1ab64c54
GI
126void GPUwriteStatus(uint32_t data)
127{
128 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
129 static const short vres[4] = { 240, 480, 256, 480 };
130 uint32_t cmd = data >> 24;
131
fc84f618 132 if (cmd < ARRAY_SIZE(gpu.regs)) {
19e7cf87 133 if (cmd != 0 && cmd != 5 && gpu.regs[cmd] == data)
fc84f618 134 return;
8dd855cd 135 gpu.regs[cmd] = data;
fc84f618 136 }
137
138 gpu.state.fb_dirty = 1;
8dd855cd 139
140 switch (cmd) {
1ab64c54 141 case 0x00:
6e9bdaef 142 do_reset();
1ab64c54
GI
143 break;
144 case 0x03:
d30279e2 145 gpu.status.blanking = data & 1;
1ab64c54
GI
146 break;
147 case 0x04:
148 gpu.status.dma = data & 3;
149 break;
150 case 0x05:
151 gpu.screen.x = data & 0x3ff;
152 gpu.screen.y = (data >> 10) & 0x3ff;
9fe27e25 153 if (gpu.frameskip.set) {
154 decide_frameskip_allow(gpu.ex_regs[3]);
155 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
156 decide_frameskip();
157 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
158 }
fb4c6fba 159 }
1ab64c54 160 break;
8dd855cd 161 case 0x06:
162 gpu.screen.x1 = data & 0xfff;
163 gpu.screen.x2 = (data >> 12) & 0xfff;
164 update_width();
165 break;
1ab64c54
GI
166 case 0x07:
167 gpu.screen.y1 = data & 0x3ff;
168 gpu.screen.y2 = (data >> 10) & 0x3ff;
8dd855cd 169 update_height();
1ab64c54
GI
170 break;
171 case 0x08:
172 gpu.status.reg = (gpu.status.reg & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
8dd855cd 173 gpu.screen.hres = hres[(gpu.status.reg >> 16) & 7];
174 gpu.screen.vres = vres[(gpu.status.reg >> 19) & 3];
175 update_width();
176 update_height();
1ab64c54 177 break;
deb18d24 178 default:
179 if ((cmd & 0xf0) == 0x10)
180 get_gpu_info(data);
6e9bdaef 181 break;
1ab64c54 182 }
1ab64c54
GI
183}
184
56f08d83 185const unsigned char cmd_lengths[256] =
1ab64c54 186{
d30279e2
GI
187 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
188 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
189 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
190 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
191 2, 2, 2, 2, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, // 40
192 3, 3, 3, 3, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4,
193 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 2, 2, 2, 2, // 60
194 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
195 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
196 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
197 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
198 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
199 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
200 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
201 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
202 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
203};
204
d30279e2
GI
205#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
206
207static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
1ab64c54 208{
d30279e2
GI
209 uint16_t *vram = VRAM_MEM_XY(x, y);
210 if (is_read)
211 memcpy(mem, vram, l * 2);
212 else
213 memcpy(vram, mem, l * 2);
214}
215
216static int do_vram_io(uint32_t *data, int count, int is_read)
217{
218 int count_initial = count;
219 uint16_t *sdata = (uint16_t *)data;
220 int x = gpu.dma.x, y = gpu.dma.y;
221 int w = gpu.dma.w, h = gpu.dma.h;
ddd56f6e 222 int o = gpu.dma.offset;
d30279e2
GI
223 int l;
224 count *= 2; // operate in 16bpp pixels
225
226 if (gpu.dma.offset) {
227 l = w - gpu.dma.offset;
ddd56f6e 228 if (count < l)
d30279e2 229 l = count;
ddd56f6e 230
231 do_vram_line(x + o, y, sdata, l, is_read);
232
233 if (o + l < w)
234 o += l;
235 else {
236 o = 0;
237 y++;
238 h--;
239 }
d30279e2
GI
240 sdata += l;
241 count -= l;
d30279e2
GI
242 }
243
244 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
245 y &= 511;
246 do_vram_line(x, y, sdata, w, is_read);
247 }
248
249 if (h > 0 && count > 0) {
250 y &= 511;
251 do_vram_line(x, y, sdata, count, is_read);
ddd56f6e 252 o = count;
d30279e2
GI
253 count = 0;
254 }
d30279e2
GI
255 gpu.dma.y = y;
256 gpu.dma.h = h;
ddd56f6e 257 gpu.dma.offset = o;
d30279e2 258
6e9bdaef 259 return count_initial - count / 2;
d30279e2
GI
260}
261
262static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
263{
ddd56f6e 264 if (gpu.dma.h)
265 log_anomaly("start_vram_transfer while old unfinished\n");
266
d30279e2
GI
267 gpu.dma.x = pos_word & 1023;
268 gpu.dma.y = (pos_word >> 16) & 511;
269 gpu.dma.w = size_word & 0xffff; // ?
270 gpu.dma.h = size_word >> 16;
271 gpu.dma.offset = 0;
272
273 if (is_read)
274 gpu.status.img = 1;
9394ada5 275 else
276 renderer_invalidate_caches(gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
d30279e2 277
6e9bdaef 278 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
279 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
d30279e2
GI
280}
281
282static int check_cmd(uint32_t *data, int count)
283{
284 int len, cmd, start, pos;
fc84f618 285 int vram_dirty = 0;
d30279e2 286
d30279e2 287 // process buffer
ddd56f6e 288 for (start = pos = 0; pos < count; )
d30279e2
GI
289 {
290 cmd = -1;
291 len = 0;
292
293 if (gpu.dma.h) {
294 pos += do_vram_io(data + pos, count - pos, 0);
ddd56f6e 295 if (pos == count)
296 break;
d30279e2
GI
297 start = pos;
298 }
299
ddd56f6e 300 // do look-ahead pass to detect SR changes and VRAM i/o
d30279e2 301 while (pos < count) {
56f08d83 302 uint32_t *list = data + pos;
303 cmd = list[0] >> 24;
d30279e2 304 len = 1 + cmd_lengths[cmd];
56f08d83 305
d30279e2 306 //printf(" %3d: %02x %d\n", pos, cmd, len);
56f08d83 307 if ((cmd & 0xf4) == 0x24) {
308 // flat textured prim
a3a9f519 309 gpu.ex_regs[1] &= ~0x1ff;
310 gpu.ex_regs[1] |= list[4] & 0x1ff;
56f08d83 311 }
312 else if ((cmd & 0xf4) == 0x34) {
313 // shaded textured prim
a3a9f519 314 gpu.ex_regs[1] &= ~0x1ff;
315 gpu.ex_regs[1] |= list[5] & 0x1ff;
56f08d83 316 }
fb4c6fba 317 else if (cmd == 0xe3)
9fe27e25 318 decide_frameskip_allow(list[0]);
319
fc84f618 320 if (2 <= cmd && cmd < 0xc0)
321 vram_dirty = 1;
6e9bdaef 322 else if ((cmd & 0xf8) == 0xe0)
323 gpu.ex_regs[cmd & 7] = list[0];
56f08d83 324
d30279e2
GI
325 if (pos + len > count) {
326 cmd = -1;
327 break; // incomplete cmd
328 }
329 if (cmd == 0xa0 || cmd == 0xc0)
330 break; // image i/o
331 pos += len;
332 }
333
334 if (pos - start > 0) {
fb4c6fba 335 if (!gpu.frameskip.active || !gpu.frameskip.allow)
fc84f618 336 do_cmd_list(data + start, pos - start);
d30279e2
GI
337 start = pos;
338 }
339
340 if (cmd == 0xa0 || cmd == 0xc0) {
341 // consume vram write/read cmd
342 start_vram_transfer(data[pos + 1], data[pos + 2], cmd == 0xc0);
343 pos += len;
344 }
5b745e5b 345 else if (cmd == -1)
ddd56f6e 346 break;
d30279e2 347 }
ddd56f6e 348
a3a9f519 349 gpu.status.reg &= ~0x1fff;
350 gpu.status.reg |= gpu.ex_regs[1] & 0x7ff;
351 gpu.status.reg |= (gpu.ex_regs[6] & 3) << 11;
352
5b745e5b 353 if (gpu.frameskip.active)
354 renderer_sync_ecmds(gpu.ex_regs);
fc84f618 355 gpu.state.fb_dirty |= vram_dirty;
356
ddd56f6e 357 return count - pos;
d30279e2
GI
358}
359
360static void flush_cmd_buffer(void)
361{
362 int left = check_cmd(gpu.cmd_buffer, gpu.cmd_len);
363 if (left > 0)
364 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
365 gpu.cmd_len = left;
1ab64c54
GI
366}
367
368void GPUwriteDataMem(uint32_t *mem, int count)
369{
d30279e2
GI
370 int left;
371
56f08d83 372 log_io("gpu_dma_write %p %d\n", mem, count);
373
d30279e2
GI
374 if (unlikely(gpu.cmd_len > 0))
375 flush_cmd_buffer();
56f08d83 376
d30279e2
GI
377 left = check_cmd(mem, count);
378 if (left)
56f08d83 379 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
1ab64c54
GI
380}
381
d30279e2 382void GPUwriteData(uint32_t data)
1ab64c54 383{
56f08d83 384 log_io("gpu_write %08x\n", data);
d30279e2
GI
385 gpu.cmd_buffer[gpu.cmd_len++] = data;
386 if (gpu.cmd_len >= CMD_BUFFER_LEN)
387 flush_cmd_buffer();
1ab64c54
GI
388}
389
ddd56f6e 390long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
1ab64c54 391{
ddd56f6e 392 uint32_t addr, *list;
deb18d24 393 uint32_t *llist_entry = NULL;
ddd56f6e 394 int len, left, count;
deb18d24 395 long dma_words = 0;
d30279e2
GI
396
397 if (unlikely(gpu.cmd_len > 0))
398 flush_cmd_buffer();
399
deb18d24 400 // ff7 sends it's main list twice, detect this
3ece2f0c 401 if (*gpu.state.frame_count == gpu.state.last_list.frame &&
402 *gpu.state.hcnt - gpu.state.last_list.hcnt <= 1 &&
403 gpu.state.last_list.words > 1024)
deb18d24 404 {
405 llist_entry = rambase + (gpu.state.last_list.addr & 0x1fffff) / 4;
406 *llist_entry |= 0x800000;
407 }
408
56f08d83 409 log_io("gpu_dma_chain\n");
ddd56f6e 410 addr = start_addr & 0xffffff;
411 for (count = 0; addr != 0xffffff; count++)
412 {
ddd56f6e 413 list = rambase + (addr & 0x1fffff) / 4;
d30279e2
GI
414 len = list[0] >> 24;
415 addr = list[0] & 0xffffff;
deb18d24 416 dma_words += 1 + len;
417
418 log_io(".chain %08x #%d\n", (list - rambase) * 4, len);
ddd56f6e 419
420 // loop detection marker
421 // (bit23 set causes DMA error on real machine, so
422 // unlikely to be ever set by the game)
423 list[0] |= 0x800000;
424
56f08d83 425 if (len) {
426 left = check_cmd(list + 1, len);
427 if (left)
deb18d24 428 log_anomaly("GPUdmaChain: discarded %d/%d words\n", left, len);
56f08d83 429 }
ddd56f6e 430
431 if (addr & 0x800000)
432 break;
433 }
434
435 // remove loop detection markers
436 addr = start_addr & 0x1fffff;
437 while (count-- > 0) {
438 list = rambase + addr / 4;
439 addr = list[0] & 0x1fffff;
440 list[0] &= ~0x800000;
d30279e2 441 }
deb18d24 442 if (llist_entry)
443 *llist_entry &= ~0x800000;
d30279e2 444
3ece2f0c 445 gpu.state.last_list.frame = *gpu.state.frame_count;
deb18d24 446 gpu.state.last_list.hcnt = *gpu.state.hcnt;
447 gpu.state.last_list.words = dma_words;
448 gpu.state.last_list.addr = start_addr;
449
450 return dma_words;
1ab64c54
GI
451}
452
d30279e2
GI
453void GPUreadDataMem(uint32_t *mem, int count)
454{
56f08d83 455 log_io("gpu_dma_read %p %d\n", mem, count);
456
d30279e2
GI
457 if (unlikely(gpu.cmd_len > 0))
458 flush_cmd_buffer();
56f08d83 459
d30279e2
GI
460 if (gpu.dma.h)
461 do_vram_io(mem, count, 1);
462}
463
464uint32_t GPUreadData(void)
465{
56f08d83 466 log_io("gpu_read\n");
467
468 if (unlikely(gpu.cmd_len > 0))
469 flush_cmd_buffer();
470
471 if (gpu.dma.h)
6e9bdaef 472 do_vram_io(&gpu.gp0, 1, 1);
56f08d83 473
6e9bdaef 474 return gpu.gp0;
d30279e2
GI
475}
476
477uint32_t GPUreadStatus(void)
478{
ddd56f6e 479 uint32_t ret;
56f08d83 480
d30279e2
GI
481 if (unlikely(gpu.cmd_len > 0))
482 flush_cmd_buffer();
483
24de2dd4 484 ret = gpu.status.reg;
ddd56f6e 485 log_io("gpu_read_status %08x\n", ret);
486 return ret;
d30279e2
GI
487}
488
096ec49b 489struct GPUFreeze
1ab64c54
GI
490{
491 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
492 uint32_t ulStatus; // current gpu status
493 uint32_t ulControl[256]; // latest control register values
494 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
096ec49b 495};
1ab64c54 496
096ec49b 497long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
1ab64c54 498{
fc84f618 499 int i;
500
1ab64c54
GI
501 switch (type) {
502 case 1: // save
d30279e2
GI
503 if (gpu.cmd_len > 0)
504 flush_cmd_buffer();
1ab64c54
GI
505 memcpy(freeze->psxVRam, gpu.vram, sizeof(gpu.vram));
506 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
6e9bdaef 507 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
1ab64c54 508 freeze->ulStatus = gpu.status.reg;
1ab64c54
GI
509 break;
510 case 0: // load
9394ada5 511 renderer_invalidate_caches(0, 0, 1024, 512);
1ab64c54
GI
512 memcpy(gpu.vram, freeze->psxVRam, sizeof(gpu.vram));
513 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
6e9bdaef 514 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
1ab64c54 515 gpu.status.reg = freeze->ulStatus;
fc84f618 516 for (i = 8; i > 0; i--) {
517 gpu.regs[i] ^= 1; // avoid reg change detection
518 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
519 }
5b745e5b 520 renderer_sync_ecmds(gpu.ex_regs);
1ab64c54
GI
521 break;
522 }
523
524 return 1;
525}
526
1ab64c54 527// vim:shiftwidth=2:expandtab