gpu_neon: flush queues on upper level too
[pcsx_rearmed.git] / plugins / gpu_neon / gpu.c
CommitLineData
1ab64c54
GI
1/*
2 * (C) GraÅžvydas "notaz" Ignotas, 2011
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
d30279e2 11#include <stdio.h>
1ab64c54 12#include <string.h>
56f08d83 13#include "gpu.h"
1ab64c54
GI
14
15#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
d30279e2 16#define unlikely(x) __builtin_expect((x), 0)
8dd855cd 17#define noinline __attribute__((noinline))
1ab64c54 18
deb18d24 19#define gpu_log(fmt, ...) \
3ece2f0c 20 printf("%d:%03d: " fmt, *gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__)
deb18d24 21
22//#define log_io gpu_log
56f08d83 23#define log_io(...)
9394ada5 24//#define log_anomaly gpu_log
25#define log_anomaly(...)
56f08d83 26
7d993ee2 27struct psx_gpu gpu __attribute__((aligned(2048)));
1ab64c54 28
6e9bdaef 29static noinline void do_reset(void)
1ab64c54 30{
6e9bdaef 31 memset(gpu.regs, 0, sizeof(gpu.regs));
d30279e2 32 gpu.status.reg = 0x14802000;
6e9bdaef 33 gpu.gp0 = 0;
fc84f618 34 gpu.regs[3] = 1;
6e9bdaef 35 gpu.screen.hres = gpu.screen.w = 256;
fc84f618 36 gpu.screen.vres = gpu.screen.h = 240;
1ab64c54
GI
37}
38
8dd855cd 39static noinline void update_width(void)
40{
41 int sw = gpu.screen.x2 - gpu.screen.x1;
42 if (sw <= 0 || sw >= 2560)
43 // full width
44 gpu.screen.w = gpu.screen.hres;
45 else
46 gpu.screen.w = sw * gpu.screen.hres / 2560;
47}
48
49static noinline void update_height(void)
50{
51 int sh = gpu.screen.y2 - gpu.screen.y1;
52 if (gpu.status.dheight)
53 sh *= 2;
54 if (sh <= 0)
55 sh = gpu.screen.vres;
56
57 gpu.screen.h = sh;
58}
59
fc84f618 60static noinline void decide_frameskip(void)
61{
9fe27e25 62 if (gpu.frameskip.active)
63 gpu.frameskip.cnt++;
64 else {
65 gpu.frameskip.cnt = 0;
66 gpu.frameskip.frame_ready = 1;
67 }
fc84f618 68
9fe27e25 69 if (!gpu.frameskip.active && *gpu.frameskip.advice)
70 gpu.frameskip.active = 1;
71 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
fc84f618 72 gpu.frameskip.active = 1;
73 else
74 gpu.frameskip.active = 0;
75}
76
9fe27e25 77static noinline void decide_frameskip_allow(uint32_t cmd_e3)
78{
79 // no frameskip if it decides to draw to display area,
80 // but not for interlace since it'll most likely always do that
81 uint32_t x = cmd_e3 & 0x3ff;
82 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
83 gpu.frameskip.allow = gpu.status.interlace ||
84 (uint32_t)(x - gpu.screen.x) >= (uint32_t)gpu.screen.w ||
85 (uint32_t)(y - gpu.screen.y) >= (uint32_t)gpu.screen.h;
86}
87
6e9bdaef 88static noinline void get_gpu_info(uint32_t data)
89{
90 switch (data & 0x0f) {
91 case 0x02:
92 case 0x03:
93 case 0x04:
94 case 0x05:
95 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
96 break;
97 case 0x06:
98 gpu.gp0 = gpu.ex_regs[5] & 0xfffff;
99 break;
100 case 0x07:
101 gpu.gp0 = 2;
102 break;
103 default:
104 gpu.gp0 = 0;
105 break;
106 }
107}
108
109long GPUinit(void)
110{
9394ada5 111 int ret;
112 ret = vout_init();
113 ret |= renderer_init();
114
3ece2f0c 115 gpu.state.frame_count = &gpu.zero;
deb18d24 116 gpu.state.hcnt = &gpu.zero;
9394ada5 117 do_reset();
6e9bdaef 118 return ret;
119}
120
121long GPUshutdown(void)
122{
123 return vout_finish();
124}
125
1ab64c54
GI
126void GPUwriteStatus(uint32_t data)
127{
128 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
129 static const short vres[4] = { 240, 480, 256, 480 };
130 uint32_t cmd = data >> 24;
131
fc84f618 132 if (cmd < ARRAY_SIZE(gpu.regs)) {
19e7cf87 133 if (cmd != 0 && cmd != 5 && gpu.regs[cmd] == data)
fc84f618 134 return;
8dd855cd 135 gpu.regs[cmd] = data;
fc84f618 136 }
137
138 gpu.state.fb_dirty = 1;
8dd855cd 139
140 switch (cmd) {
1ab64c54 141 case 0x00:
6e9bdaef 142 do_reset();
1ab64c54
GI
143 break;
144 case 0x03:
d30279e2 145 gpu.status.blanking = data & 1;
1ab64c54
GI
146 break;
147 case 0x04:
148 gpu.status.dma = data & 3;
149 break;
150 case 0x05:
151 gpu.screen.x = data & 0x3ff;
152 gpu.screen.y = (data >> 10) & 0x3ff;
9fe27e25 153 if (gpu.frameskip.set) {
154 decide_frameskip_allow(gpu.ex_regs[3]);
155 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
156 decide_frameskip();
157 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
158 }
fb4c6fba 159 }
1ab64c54 160 break;
8dd855cd 161 case 0x06:
162 gpu.screen.x1 = data & 0xfff;
163 gpu.screen.x2 = (data >> 12) & 0xfff;
164 update_width();
165 break;
1ab64c54
GI
166 case 0x07:
167 gpu.screen.y1 = data & 0x3ff;
168 gpu.screen.y2 = (data >> 10) & 0x3ff;
8dd855cd 169 update_height();
1ab64c54
GI
170 break;
171 case 0x08:
172 gpu.status.reg = (gpu.status.reg & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
8dd855cd 173 gpu.screen.hres = hres[(gpu.status.reg >> 16) & 7];
174 gpu.screen.vres = vres[(gpu.status.reg >> 19) & 3];
175 update_width();
176 update_height();
1ab64c54 177 break;
deb18d24 178 default:
179 if ((cmd & 0xf0) == 0x10)
180 get_gpu_info(data);
6e9bdaef 181 break;
1ab64c54 182 }
1ab64c54
GI
183}
184
56f08d83 185const unsigned char cmd_lengths[256] =
1ab64c54 186{
d30279e2
GI
187 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
188 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
189 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
190 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
191 2, 2, 2, 2, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, // 40
192 3, 3, 3, 3, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4,
193 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 2, 2, 2, 2, // 60
194 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
195 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
196 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
197 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
198 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
199 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
200 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
201 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
202 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
203};
204
d30279e2
GI
205#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
206
207static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
1ab64c54 208{
d30279e2
GI
209 uint16_t *vram = VRAM_MEM_XY(x, y);
210 if (is_read)
211 memcpy(mem, vram, l * 2);
212 else
213 memcpy(vram, mem, l * 2);
214}
215
216static int do_vram_io(uint32_t *data, int count, int is_read)
217{
218 int count_initial = count;
219 uint16_t *sdata = (uint16_t *)data;
220 int x = gpu.dma.x, y = gpu.dma.y;
221 int w = gpu.dma.w, h = gpu.dma.h;
ddd56f6e 222 int o = gpu.dma.offset;
d30279e2
GI
223 int l;
224 count *= 2; // operate in 16bpp pixels
225
226 if (gpu.dma.offset) {
227 l = w - gpu.dma.offset;
ddd56f6e 228 if (count < l)
d30279e2 229 l = count;
ddd56f6e 230
231 do_vram_line(x + o, y, sdata, l, is_read);
232
233 if (o + l < w)
234 o += l;
235 else {
236 o = 0;
237 y++;
238 h--;
239 }
d30279e2
GI
240 sdata += l;
241 count -= l;
d30279e2
GI
242 }
243
244 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
245 y &= 511;
246 do_vram_line(x, y, sdata, w, is_read);
247 }
248
249 if (h > 0 && count > 0) {
250 y &= 511;
251 do_vram_line(x, y, sdata, count, is_read);
ddd56f6e 252 o = count;
d30279e2
GI
253 count = 0;
254 }
d30279e2
GI
255 gpu.dma.y = y;
256 gpu.dma.h = h;
ddd56f6e 257 gpu.dma.offset = o;
d30279e2 258
6e9bdaef 259 return count_initial - count / 2;
d30279e2
GI
260}
261
262static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
263{
ddd56f6e 264 if (gpu.dma.h)
265 log_anomaly("start_vram_transfer while old unfinished\n");
266
d30279e2
GI
267 gpu.dma.x = pos_word & 1023;
268 gpu.dma.y = (pos_word >> 16) & 511;
269 gpu.dma.w = size_word & 0xffff; // ?
270 gpu.dma.h = size_word >> 16;
271 gpu.dma.offset = 0;
272
273 if (is_read)
274 gpu.status.img = 1;
c64af26f 275 else {
276 renderer_flush_queues();
9394ada5 277 renderer_invalidate_caches(gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
c64af26f 278 }
d30279e2 279
6e9bdaef 280 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
281 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
d30279e2
GI
282}
283
284static int check_cmd(uint32_t *data, int count)
285{
286 int len, cmd, start, pos;
fc84f618 287 int vram_dirty = 0;
d30279e2 288
d30279e2 289 // process buffer
ddd56f6e 290 for (start = pos = 0; pos < count; )
d30279e2
GI
291 {
292 cmd = -1;
293 len = 0;
294
295 if (gpu.dma.h) {
296 pos += do_vram_io(data + pos, count - pos, 0);
ddd56f6e 297 if (pos == count)
298 break;
d30279e2
GI
299 start = pos;
300 }
301
ddd56f6e 302 // do look-ahead pass to detect SR changes and VRAM i/o
d30279e2 303 while (pos < count) {
56f08d83 304 uint32_t *list = data + pos;
305 cmd = list[0] >> 24;
d30279e2 306 len = 1 + cmd_lengths[cmd];
56f08d83 307
d30279e2 308 //printf(" %3d: %02x %d\n", pos, cmd, len);
56f08d83 309 if ((cmd & 0xf4) == 0x24) {
310 // flat textured prim
a3a9f519 311 gpu.ex_regs[1] &= ~0x1ff;
312 gpu.ex_regs[1] |= list[4] & 0x1ff;
56f08d83 313 }
314 else if ((cmd & 0xf4) == 0x34) {
315 // shaded textured prim
a3a9f519 316 gpu.ex_regs[1] &= ~0x1ff;
317 gpu.ex_regs[1] |= list[5] & 0x1ff;
56f08d83 318 }
fb4c6fba 319 else if (cmd == 0xe3)
9fe27e25 320 decide_frameskip_allow(list[0]);
321
fc84f618 322 if (2 <= cmd && cmd < 0xc0)
323 vram_dirty = 1;
6e9bdaef 324 else if ((cmd & 0xf8) == 0xe0)
325 gpu.ex_regs[cmd & 7] = list[0];
56f08d83 326
d30279e2
GI
327 if (pos + len > count) {
328 cmd = -1;
329 break; // incomplete cmd
330 }
331 if (cmd == 0xa0 || cmd == 0xc0)
332 break; // image i/o
333 pos += len;
334 }
335
336 if (pos - start > 0) {
fb4c6fba 337 if (!gpu.frameskip.active || !gpu.frameskip.allow)
fc84f618 338 do_cmd_list(data + start, pos - start);
d30279e2
GI
339 start = pos;
340 }
341
342 if (cmd == 0xa0 || cmd == 0xc0) {
343 // consume vram write/read cmd
344 start_vram_transfer(data[pos + 1], data[pos + 2], cmd == 0xc0);
345 pos += len;
346 }
5b745e5b 347 else if (cmd == -1)
ddd56f6e 348 break;
d30279e2 349 }
ddd56f6e 350
a3a9f519 351 gpu.status.reg &= ~0x1fff;
352 gpu.status.reg |= gpu.ex_regs[1] & 0x7ff;
353 gpu.status.reg |= (gpu.ex_regs[6] & 3) << 11;
354
5b745e5b 355 if (gpu.frameskip.active)
356 renderer_sync_ecmds(gpu.ex_regs);
fc84f618 357 gpu.state.fb_dirty |= vram_dirty;
358
ddd56f6e 359 return count - pos;
d30279e2
GI
360}
361
362static void flush_cmd_buffer(void)
363{
364 int left = check_cmd(gpu.cmd_buffer, gpu.cmd_len);
365 if (left > 0)
366 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
367 gpu.cmd_len = left;
1ab64c54
GI
368}
369
370void GPUwriteDataMem(uint32_t *mem, int count)
371{
d30279e2
GI
372 int left;
373
56f08d83 374 log_io("gpu_dma_write %p %d\n", mem, count);
375
d30279e2
GI
376 if (unlikely(gpu.cmd_len > 0))
377 flush_cmd_buffer();
56f08d83 378
d30279e2
GI
379 left = check_cmd(mem, count);
380 if (left)
56f08d83 381 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
1ab64c54
GI
382}
383
d30279e2 384void GPUwriteData(uint32_t data)
1ab64c54 385{
56f08d83 386 log_io("gpu_write %08x\n", data);
d30279e2
GI
387 gpu.cmd_buffer[gpu.cmd_len++] = data;
388 if (gpu.cmd_len >= CMD_BUFFER_LEN)
389 flush_cmd_buffer();
1ab64c54
GI
390}
391
ddd56f6e 392long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
1ab64c54 393{
ddd56f6e 394 uint32_t addr, *list;
deb18d24 395 uint32_t *llist_entry = NULL;
ddd56f6e 396 int len, left, count;
deb18d24 397 long dma_words = 0;
d30279e2
GI
398
399 if (unlikely(gpu.cmd_len > 0))
400 flush_cmd_buffer();
401
deb18d24 402 // ff7 sends it's main list twice, detect this
3ece2f0c 403 if (*gpu.state.frame_count == gpu.state.last_list.frame &&
404 *gpu.state.hcnt - gpu.state.last_list.hcnt <= 1 &&
405 gpu.state.last_list.words > 1024)
deb18d24 406 {
407 llist_entry = rambase + (gpu.state.last_list.addr & 0x1fffff) / 4;
408 *llist_entry |= 0x800000;
409 }
410
56f08d83 411 log_io("gpu_dma_chain\n");
ddd56f6e 412 addr = start_addr & 0xffffff;
413 for (count = 0; addr != 0xffffff; count++)
414 {
ddd56f6e 415 list = rambase + (addr & 0x1fffff) / 4;
d30279e2
GI
416 len = list[0] >> 24;
417 addr = list[0] & 0xffffff;
deb18d24 418 dma_words += 1 + len;
419
420 log_io(".chain %08x #%d\n", (list - rambase) * 4, len);
ddd56f6e 421
422 // loop detection marker
423 // (bit23 set causes DMA error on real machine, so
424 // unlikely to be ever set by the game)
425 list[0] |= 0x800000;
426
56f08d83 427 if (len) {
428 left = check_cmd(list + 1, len);
429 if (left)
deb18d24 430 log_anomaly("GPUdmaChain: discarded %d/%d words\n", left, len);
56f08d83 431 }
ddd56f6e 432
433 if (addr & 0x800000)
434 break;
435 }
436
437 // remove loop detection markers
438 addr = start_addr & 0x1fffff;
439 while (count-- > 0) {
440 list = rambase + addr / 4;
441 addr = list[0] & 0x1fffff;
442 list[0] &= ~0x800000;
d30279e2 443 }
deb18d24 444 if (llist_entry)
445 *llist_entry &= ~0x800000;
d30279e2 446
3ece2f0c 447 gpu.state.last_list.frame = *gpu.state.frame_count;
deb18d24 448 gpu.state.last_list.hcnt = *gpu.state.hcnt;
449 gpu.state.last_list.words = dma_words;
450 gpu.state.last_list.addr = start_addr;
451
452 return dma_words;
1ab64c54
GI
453}
454
d30279e2
GI
455void GPUreadDataMem(uint32_t *mem, int count)
456{
56f08d83 457 log_io("gpu_dma_read %p %d\n", mem, count);
458
d30279e2
GI
459 if (unlikely(gpu.cmd_len > 0))
460 flush_cmd_buffer();
56f08d83 461
d30279e2
GI
462 if (gpu.dma.h)
463 do_vram_io(mem, count, 1);
464}
465
466uint32_t GPUreadData(void)
467{
56f08d83 468 log_io("gpu_read\n");
469
470 if (unlikely(gpu.cmd_len > 0))
471 flush_cmd_buffer();
472
473 if (gpu.dma.h)
6e9bdaef 474 do_vram_io(&gpu.gp0, 1, 1);
56f08d83 475
6e9bdaef 476 return gpu.gp0;
d30279e2
GI
477}
478
479uint32_t GPUreadStatus(void)
480{
ddd56f6e 481 uint32_t ret;
56f08d83 482
d30279e2
GI
483 if (unlikely(gpu.cmd_len > 0))
484 flush_cmd_buffer();
485
24de2dd4 486 ret = gpu.status.reg;
ddd56f6e 487 log_io("gpu_read_status %08x\n", ret);
488 return ret;
d30279e2
GI
489}
490
096ec49b 491struct GPUFreeze
1ab64c54
GI
492{
493 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
494 uint32_t ulStatus; // current gpu status
495 uint32_t ulControl[256]; // latest control register values
496 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
096ec49b 497};
1ab64c54 498
096ec49b 499long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
1ab64c54 500{
fc84f618 501 int i;
502
1ab64c54
GI
503 switch (type) {
504 case 1: // save
d30279e2
GI
505 if (gpu.cmd_len > 0)
506 flush_cmd_buffer();
1ab64c54
GI
507 memcpy(freeze->psxVRam, gpu.vram, sizeof(gpu.vram));
508 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
6e9bdaef 509 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
1ab64c54 510 freeze->ulStatus = gpu.status.reg;
1ab64c54
GI
511 break;
512 case 0: // load
9394ada5 513 renderer_invalidate_caches(0, 0, 1024, 512);
1ab64c54
GI
514 memcpy(gpu.vram, freeze->psxVRam, sizeof(gpu.vram));
515 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
6e9bdaef 516 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
1ab64c54 517 gpu.status.reg = freeze->ulStatus;
fc84f618 518 for (i = 8; i > 0; i--) {
519 gpu.regs[i] ^= 1; // avoid reg change detection
520 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
521 }
5b745e5b 522 renderer_sync_ecmds(gpu.ex_regs);
1ab64c54
GI
523 break;
524 }
525
526 return 1;
527}
528
1ab64c54 529// vim:shiftwidth=2:expandtab