gpu_neon: rework buffering to reduce flickering
[pcsx_rearmed.git] / plugins / gpulib / gpu.c
CommitLineData
1ab64c54 1/*
05740673 2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
1ab64c54
GI
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
d30279e2 11#include <stdio.h>
308c6e67 12#include <stdlib.h>
1ab64c54 13#include <string.h>
56f08d83 14#include "gpu.h"
abf09485 15#include "../../libpcsxcore/gpu.h" // meh
1ab64c54
GI
16
17#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
8f5f2dd5 18#ifdef __GNUC__
d30279e2 19#define unlikely(x) __builtin_expect((x), 0)
8f5f2dd5 20#define preload __builtin_prefetch
8dd855cd 21#define noinline __attribute__((noinline))
8f5f2dd5 22#else
23#define unlikely(x)
24#define preload(...)
25#define noinline
8f5f2dd5 26#endif
1ab64c54 27
deb18d24 28//#define log_io gpu_log
56f08d83 29#define log_io(...)
56f08d83 30
9ee0fd5b 31struct psx_gpu gpu;
1ab64c54 32
48f3d210 33static noinline int do_cmd_buffer(uint32_t *data, int count);
05740673 34static void finish_vram_transfer(int is_read);
48f3d210 35
36static noinline void do_cmd_reset(void)
37{
38 if (unlikely(gpu.cmd_len > 0))
39 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
48f3d210 40 gpu.cmd_len = 0;
05740673 41
42 if (unlikely(gpu.dma.h > 0))
43 finish_vram_transfer(gpu.dma_start.is_read);
48f3d210 44 gpu.dma.h = 0;
45}
46
6e9bdaef 47static noinline void do_reset(void)
1ab64c54 48{
7841712d 49 unsigned int i;
48f3d210 50
51 do_cmd_reset();
52
6e9bdaef 53 memset(gpu.regs, 0, sizeof(gpu.regs));
48f3d210 54 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
55 gpu.ex_regs[i] = (0xe0 + i) << 24;
f23b103c 56 gpu.status = 0x14802000;
6e9bdaef 57 gpu.gp0 = 0;
fc84f618 58 gpu.regs[3] = 1;
6e9bdaef 59 gpu.screen.hres = gpu.screen.w = 256;
fc84f618 60 gpu.screen.vres = gpu.screen.h = 240;
308c6e67 61 gpu.screen.x = gpu.screen.y = 0;
5fe1a2b1 62 renderer_sync_ecmds(gpu.ex_regs);
0b4038f8 63 renderer_notify_res_change();
1ab64c54
GI
64}
65
8dd855cd 66static noinline void update_width(void)
67{
308c6e67 68 static const short hres_all[8] = { 256, 368, 320, 368, 512, 368, 640, 368 };
69 static const uint8_t hdivs[8] = { 10, 7, 8, 7, 5, 7, 4, 7 };
70 uint8_t hdiv = hdivs[(gpu.status >> 16) & 7];
71 int hres = hres_all[(gpu.status >> 16) & 7];
72 int pal = gpu.status & PSX_GPU_STATUS_PAL;
8dd855cd 73 int sw = gpu.screen.x2 - gpu.screen.x1;
308c6e67 74 int x = 0, x_auto;
75 if (sw <= 0)
76 /* nothing displayed? */;
77 else {
78 int s = pal ? 656 : 608; // or 600? pal is just a guess
79 x = (gpu.screen.x1 - s) / hdiv;
80 x = (x + 1) & ~1; // blitter limitation
81 sw /= hdiv;
82 sw = (sw + 2) & ~3; // according to nocash
83 switch (gpu.state.screen_centering_type) {
84 case 1:
85 break;
86 case 2:
87 x = gpu.state.screen_centering_x;
88 break;
89 default:
90 // correct if slightly miscentered
91 x_auto = (hres - sw) / 2 & ~3;
92 if ((uint32_t)x_auto <= 8u && abs(x) < 24)
93 x = x_auto;
94 }
95 if (x + sw > hres)
96 sw = hres - x;
97 // .x range check is done in vout_update()
98 }
99 // reduce the unpleasant right border that a few games have
100 if (gpu.state.screen_centering_type == 0
101 && x <= 4 && hres - (x + sw) >= 4)
102 hres -= 4;
103 gpu.screen.x = x;
104 gpu.screen.w = sw;
105 gpu.screen.hres = hres;
106 gpu.state.dims_changed = 1;
107 //printf("xx %d %d -> %2d, %d / %d\n",
108 // gpu.screen.x1, gpu.screen.x2, x, sw, hres);
8dd855cd 109}
110
111static noinline void update_height(void)
112{
308c6e67 113 int pal = gpu.status & PSX_GPU_STATUS_PAL;
114 int dheight = gpu.status & PSX_GPU_STATUS_DHEIGHT;
115 int y = gpu.screen.y1 - (pal ? 39 : 16); // 39 for spyro
8dd855cd 116 int sh = gpu.screen.y2 - gpu.screen.y1;
308c6e67 117 int center_tol = 16;
118 int vres = 240;
119
120 if (pal && (sh > 240 || gpu.screen.vres == 256))
121 vres = 256;
122 if (dheight)
123 y *= 2, sh *= 2, vres *= 2, center_tol *= 2;
124 if (sh <= 0)
125 /* nothing displayed? */;
126 else {
127 switch (gpu.state.screen_centering_type) {
128 case 1:
129 break;
130 case 2:
131 y = gpu.state.screen_centering_y;
132 break;
133 default:
134 // correct if slightly miscentered
135 if ((uint32_t)(vres - sh) <= 1 && abs(y) <= center_tol)
136 y = 0;
137 }
138 if (y + sh > vres)
139 sh = vres - y;
140 }
141 gpu.screen.y = y;
8dd855cd 142 gpu.screen.h = sh;
308c6e67 143 gpu.screen.vres = vres;
144 gpu.state.dims_changed = 1;
145 //printf("yy %d %d -> %d, %d / %d\n",
146 // gpu.screen.y1, gpu.screen.y2, y, sh, vres);
8dd855cd 147}
148
fc84f618 149static noinline void decide_frameskip(void)
150{
9fe27e25 151 if (gpu.frameskip.active)
152 gpu.frameskip.cnt++;
153 else {
154 gpu.frameskip.cnt = 0;
155 gpu.frameskip.frame_ready = 1;
156 }
fc84f618 157
9fe27e25 158 if (!gpu.frameskip.active && *gpu.frameskip.advice)
159 gpu.frameskip.active = 1;
160 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
fc84f618 161 gpu.frameskip.active = 1;
162 else
163 gpu.frameskip.active = 0;
fbb4bfff 164
165 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
166 int dummy;
167 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy);
168 gpu.frameskip.pending_fill[0] = 0;
169 }
fc84f618 170}
171
b243416b 172static noinline int decide_frameskip_allow(uint32_t cmd_e3)
9fe27e25 173{
174 // no frameskip if it decides to draw to display area,
175 // but not for interlace since it'll most likely always do that
176 uint32_t x = cmd_e3 & 0x3ff;
177 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
f23b103c 178 gpu.frameskip.allow = (gpu.status & PSX_GPU_STATUS_INTERLACE) ||
308c6e67 179 (uint32_t)(x - gpu.screen.src_x) >= (uint32_t)gpu.screen.w ||
180 (uint32_t)(y - gpu.screen.src_y) >= (uint32_t)gpu.screen.h;
b243416b 181 return gpu.frameskip.allow;
9fe27e25 182}
183
5fe1a2b1 184static void flush_cmd_buffer(void);
185
6e9bdaef 186static noinline void get_gpu_info(uint32_t data)
187{
5fe1a2b1 188 if (unlikely(gpu.cmd_len > 0))
189 flush_cmd_buffer();
6e9bdaef 190 switch (data & 0x0f) {
191 case 0x02:
192 case 0x03:
193 case 0x04:
6e9bdaef 194 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
195 break;
d04b8924 196 case 0x05:
197 gpu.gp0 = gpu.ex_regs[5] & 0x3fffff;
6e9bdaef 198 break;
199 case 0x07:
200 gpu.gp0 = 2;
201 break;
202 default:
d04b8924 203 // gpu.gp0 unchanged
6e9bdaef 204 break;
205 }
206}
207
9ee0fd5b 208// double, for overdraw guard
209#define VRAM_SIZE (1024 * 512 * 2 * 2)
210
211static int map_vram(void)
212{
213 gpu.vram = gpu.mmap(VRAM_SIZE);
214 if (gpu.vram != NULL) {
215 gpu.vram += 4096 / 2;
216 return 0;
217 }
218 else {
219 fprintf(stderr, "could not map vram, expect crashes\n");
220 return -1;
221 }
222}
223
6e9bdaef 224long GPUinit(void)
225{
9394ada5 226 int ret;
227 ret = vout_init();
228 ret |= renderer_init();
229
0b4038f8 230 memset(&gpu.state, 0, sizeof(gpu.state));
231 memset(&gpu.frameskip, 0, sizeof(gpu.frameskip));
232 gpu.zero = 0;
3ece2f0c 233 gpu.state.frame_count = &gpu.zero;
deb18d24 234 gpu.state.hcnt = &gpu.zero;
48f3d210 235 gpu.cmd_len = 0;
9394ada5 236 do_reset();
48f3d210 237
9ee0fd5b 238 if (gpu.mmap != NULL) {
239 if (map_vram() != 0)
240 ret = -1;
241 }
6e9bdaef 242 return ret;
243}
244
245long GPUshutdown(void)
246{
9ee0fd5b 247 long ret;
248
e929dec5 249 renderer_finish();
9ee0fd5b 250 ret = vout_finish();
251 if (gpu.vram != NULL) {
252 gpu.vram -= 4096 / 2;
253 gpu.munmap(gpu.vram, VRAM_SIZE);
254 }
255 gpu.vram = NULL;
256
257 return ret;
6e9bdaef 258}
259
1ab64c54
GI
260void GPUwriteStatus(uint32_t data)
261{
1ab64c54 262 uint32_t cmd = data >> 24;
2da2fc76 263 int src_x, src_y;
1ab64c54 264
fc84f618 265 if (cmd < ARRAY_SIZE(gpu.regs)) {
48f3d210 266 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
fc84f618 267 return;
8dd855cd 268 gpu.regs[cmd] = data;
fc84f618 269 }
270
271 gpu.state.fb_dirty = 1;
8dd855cd 272
273 switch (cmd) {
1ab64c54 274 case 0x00:
6e9bdaef 275 do_reset();
1ab64c54 276 break;
48f3d210 277 case 0x01:
278 do_cmd_reset();
279 break;
1ab64c54 280 case 0x03:
308c6e67 281 if (data & 1) {
f23b103c 282 gpu.status |= PSX_GPU_STATUS_BLANKING;
308c6e67 283 gpu.state.dims_changed = 1; // for hud clearing
284 }
f23b103c
PC
285 else
286 gpu.status &= ~PSX_GPU_STATUS_BLANKING;
1ab64c54
GI
287 break;
288 case 0x04:
f23b103c
PC
289 gpu.status &= ~PSX_GPU_STATUS_DMA_MASK;
290 gpu.status |= PSX_GPU_STATUS_DMA(data & 3);
1ab64c54
GI
291 break;
292 case 0x05:
2da2fc76 293 src_x = data & 0x3ff; src_y = (data >> 10) & 0x1ff;
294 if (src_x != gpu.screen.src_x || src_y != gpu.screen.src_y) {
295 gpu.screen.src_x = src_x;
296 gpu.screen.src_y = src_y;
297 renderer_notify_scanout_change(src_x, src_y);
298 if (gpu.frameskip.set) {
299 decide_frameskip_allow(gpu.ex_regs[3]);
300 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
301 decide_frameskip();
302 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
303 }
9fe27e25 304 }
fb4c6fba 305 }
1ab64c54 306 break;
8dd855cd 307 case 0x06:
308 gpu.screen.x1 = data & 0xfff;
309 gpu.screen.x2 = (data >> 12) & 0xfff;
310 update_width();
311 break;
1ab64c54
GI
312 case 0x07:
313 gpu.screen.y1 = data & 0x3ff;
314 gpu.screen.y2 = (data >> 10) & 0x3ff;
8dd855cd 315 update_height();
1ab64c54
GI
316 break;
317 case 0x08:
f23b103c 318 gpu.status = (gpu.status & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
8dd855cd 319 update_width();
320 update_height();
e929dec5 321 renderer_notify_res_change();
1ab64c54 322 break;
deb18d24 323 default:
324 if ((cmd & 0xf0) == 0x10)
325 get_gpu_info(data);
6e9bdaef 326 break;
1ab64c54 327 }
7890a708 328
329#ifdef GPUwriteStatus_ext
330 GPUwriteStatus_ext(data);
331#endif
1ab64c54
GI
332}
333
56f08d83 334const unsigned char cmd_lengths[256] =
1ab64c54 335{
d30279e2
GI
336 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
337 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
338 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
339 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
652c6b8b 340 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
341 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
342 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
d30279e2 343 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
72583812 344 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 80
345 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
346 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // a0
347 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
348 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // c0
349 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
d30279e2
GI
350 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
351 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
352};
353
d30279e2
GI
354#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
355
72583812 356static void cpy_msb(uint16_t *dst, const uint16_t *src, int l, uint16_t msb)
357{
358 int i;
359 for (i = 0; i < l; i++)
360 dst[i] = src[i] | msb;
361}
362
363static inline void do_vram_line(int x, int y, uint16_t *mem, int l,
364 int is_read, uint16_t msb)
1ab64c54 365{
d30279e2 366 uint16_t *vram = VRAM_MEM_XY(x, y);
72583812 367 if (unlikely(is_read))
d30279e2 368 memcpy(mem, vram, l * 2);
72583812 369 else if (unlikely(msb))
370 cpy_msb(vram, mem, l, msb);
d30279e2
GI
371 else
372 memcpy(vram, mem, l * 2);
373}
374
375static int do_vram_io(uint32_t *data, int count, int is_read)
376{
377 int count_initial = count;
72583812 378 uint16_t msb = gpu.ex_regs[6] << 15;
d30279e2
GI
379 uint16_t *sdata = (uint16_t *)data;
380 int x = gpu.dma.x, y = gpu.dma.y;
381 int w = gpu.dma.w, h = gpu.dma.h;
ddd56f6e 382 int o = gpu.dma.offset;
d30279e2
GI
383 int l;
384 count *= 2; // operate in 16bpp pixels
385
386 if (gpu.dma.offset) {
387 l = w - gpu.dma.offset;
ddd56f6e 388 if (count < l)
d30279e2 389 l = count;
ddd56f6e 390
72583812 391 do_vram_line(x + o, y, sdata, l, is_read, msb);
ddd56f6e 392
393 if (o + l < w)
394 o += l;
395 else {
396 o = 0;
397 y++;
398 h--;
399 }
d30279e2
GI
400 sdata += l;
401 count -= l;
d30279e2
GI
402 }
403
404 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
405 y &= 511;
72583812 406 do_vram_line(x, y, sdata, w, is_read, msb);
d30279e2
GI
407 }
408
05740673 409 if (h > 0) {
410 if (count > 0) {
411 y &= 511;
72583812 412 do_vram_line(x, y, sdata, count, is_read, msb);
05740673 413 o = count;
414 count = 0;
415 }
d30279e2 416 }
05740673 417 else
418 finish_vram_transfer(is_read);
d30279e2
GI
419 gpu.dma.y = y;
420 gpu.dma.h = h;
ddd56f6e 421 gpu.dma.offset = o;
d30279e2 422
6e9bdaef 423 return count_initial - count / 2;
d30279e2
GI
424}
425
426static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
427{
ddd56f6e 428 if (gpu.dma.h)
429 log_anomaly("start_vram_transfer while old unfinished\n");
430
5440b88e 431 gpu.dma.x = pos_word & 0x3ff;
432 gpu.dma.y = (pos_word >> 16) & 0x1ff;
48f3d210 433 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
434 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
d30279e2 435 gpu.dma.offset = 0;
05740673 436 gpu.dma.is_read = is_read;
437 gpu.dma_start = gpu.dma;
d30279e2 438
9e146206 439 renderer_flush_queues();
440 if (is_read) {
f23b103c 441 gpu.status |= PSX_GPU_STATUS_IMG;
9e146206 442 // XXX: wrong for width 1
ae097dfb 443 gpu.gp0 = LE32TOH(*(uint32_t *) VRAM_MEM_XY(gpu.dma.x, gpu.dma.y));
5440b88e 444 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
9e146206 445 }
d30279e2 446
6e9bdaef 447 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
448 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
abf09485 449 if (gpu.gpu_state_change)
450 gpu.gpu_state_change(PGS_VRAM_TRANSFER_START);
d30279e2
GI
451}
452
05740673 453static void finish_vram_transfer(int is_read)
454{
455 if (is_read)
f23b103c 456 gpu.status &= ~PSX_GPU_STATUS_IMG;
893f780e 457 else {
458 gpu.state.fb_dirty = 1;
05740673 459 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
0b4038f8 460 gpu.dma_start.w, gpu.dma_start.h, 0);
893f780e 461 }
abf09485 462 if (gpu.gpu_state_change)
463 gpu.gpu_state_change(PGS_VRAM_TRANSFER_END);
05740673 464}
465
72583812 466static void do_vram_copy(const uint32_t *params)
467{
468 const uint32_t sx = LE32TOH(params[0]) & 0x3FF;
469 const uint32_t sy = (LE32TOH(params[0]) >> 16) & 0x1FF;
470 const uint32_t dx = LE32TOH(params[1]) & 0x3FF;
471 const uint32_t dy = (LE32TOH(params[1]) >> 16) & 0x1FF;
472 uint32_t w = ((LE32TOH(params[2]) - 1) & 0x3FF) + 1;
473 uint32_t h = (((LE32TOH(params[2]) >> 16) - 1) & 0x1FF) + 1;
474 uint16_t msb = gpu.ex_regs[6] << 15;
475 uint16_t lbuf[128];
476 uint32_t x, y;
477
478 if (sx == dx && sy == dy && msb == 0)
479 return;
480
481 renderer_flush_queues();
482
483 if (unlikely((sx < dx && dx < sx + w) || sx + w > 1024 || dx + w > 1024 || msb))
484 {
485 for (y = 0; y < h; y++)
486 {
487 const uint16_t *src = VRAM_MEM_XY(0, (sy + y) & 0x1ff);
488 uint16_t *dst = VRAM_MEM_XY(0, (dy + y) & 0x1ff);
489 for (x = 0; x < w; x += ARRAY_SIZE(lbuf))
490 {
491 uint32_t x1, w1 = w - x;
492 if (w1 > ARRAY_SIZE(lbuf))
493 w1 = ARRAY_SIZE(lbuf);
494 for (x1 = 0; x1 < w1; x1++)
495 lbuf[x1] = src[(sx + x + x1) & 0x3ff];
496 for (x1 = 0; x1 < w1; x1++)
497 dst[(dx + x + x1) & 0x3ff] = lbuf[x1] | msb;
498 }
499 }
500 }
501 else
502 {
503 uint32_t sy1 = sy, dy1 = dy;
504 for (y = 0; y < h; y++, sy1++, dy1++)
505 memcpy(VRAM_MEM_XY(dx, dy1 & 0x1ff), VRAM_MEM_XY(sx, sy1 & 0x1ff), w * 2);
506 }
507
508 renderer_update_caches(dx, dy, w, h, 0);
509}
510
b243416b 511static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
512{
97e07db9 513 int cmd = 0, pos = 0, len, dummy, v;
b243416b 514 int skip = 1;
515
fbb4bfff 516 gpu.frameskip.pending_fill[0] = 0;
517
b243416b 518 while (pos < count && skip) {
519 uint32_t *list = data + pos;
89df80c6 520 cmd = LE32TOH(list[0]) >> 24;
b243416b 521 len = 1 + cmd_lengths[cmd];
522
97e07db9 523 switch (cmd) {
524 case 0x02:
89df80c6 525 if ((LE32TOH(list[2]) & 0x3ff) > gpu.screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu.screen.h)
97e07db9 526 // clearing something large, don't skip
527 do_cmd_list(list, 3, &dummy);
528 else
529 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
530 break;
531 case 0x24 ... 0x27:
532 case 0x2c ... 0x2f:
533 case 0x34 ... 0x37:
534 case 0x3c ... 0x3f:
535 gpu.ex_regs[1] &= ~0x1ff;
89df80c6 536 gpu.ex_regs[1] |= LE32TOH(list[4 + ((cmd >> 4) & 1)]) & 0x1ff;
97e07db9 537 break;
538 case 0x48 ... 0x4F:
539 for (v = 3; pos + v < count; v++)
540 {
89df80c6 541 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
97e07db9 542 break;
543 }
544 len += v - 3;
545 break;
546 case 0x58 ... 0x5F:
547 for (v = 4; pos + v < count; v += 2)
548 {
89df80c6 549 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
97e07db9 550 break;
551 }
552 len += v - 4;
553 break;
554 default:
555 if (cmd == 0xe3)
89df80c6 556 skip = decide_frameskip_allow(LE32TOH(list[0]));
97e07db9 557 if ((cmd & 0xf8) == 0xe0)
89df80c6 558 gpu.ex_regs[cmd & 7] = LE32TOH(list[0]);
97e07db9 559 break;
b243416b 560 }
b243416b 561
562 if (pos + len > count) {
563 cmd = -1;
564 break; // incomplete cmd
565 }
72583812 566 if (0x80 <= cmd && cmd <= 0xdf)
b243416b 567 break; // image i/o
97e07db9 568
b243416b 569 pos += len;
570 }
571
572 renderer_sync_ecmds(gpu.ex_regs);
573 *last_cmd = cmd;
574 return pos;
575}
576
48f3d210 577static noinline int do_cmd_buffer(uint32_t *data, int count)
d30279e2 578{
b243416b 579 int cmd, pos;
580 uint32_t old_e3 = gpu.ex_regs[3];
fc84f618 581 int vram_dirty = 0;
d30279e2 582
d30279e2 583 // process buffer
b243416b 584 for (pos = 0; pos < count; )
d30279e2 585 {
b243416b 586 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
587 vram_dirty = 1;
d30279e2 588 pos += do_vram_io(data + pos, count - pos, 0);
ddd56f6e 589 if (pos == count)
590 break;
d30279e2
GI
591 }
592
89df80c6 593 cmd = LE32TOH(data[pos]) >> 24;
97e07db9 594 if (0xa0 <= cmd && cmd <= 0xdf) {
7194a46a
DS
595 if (unlikely((pos+2) >= count)) {
596 // incomplete vram write/read cmd, can't consume yet
597 cmd = -1;
598 break;
599 }
600
d30279e2 601 // consume vram write/read cmd
89df80c6 602 start_vram_transfer(LE32TOH(data[pos + 1]), LE32TOH(data[pos + 2]), (cmd & 0xe0) == 0xc0);
b243416b 603 pos += 3;
604 continue;
d30279e2 605 }
72583812 606 else if ((cmd & 0xe0) == 0x80) {
607 if (unlikely((pos+3) >= count)) {
608 cmd = -1; // incomplete cmd, can't consume yet
609 break;
610 }
611 do_vram_copy(data + pos + 1);
893f780e 612 vram_dirty = 1;
72583812 613 pos += 4;
614 continue;
615 }
b243416b 616
1e07f71d 617 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
89df80c6 618 if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0))
b243416b 619 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
620 else {
621 pos += do_cmd_list(data + pos, count - pos, &cmd);
622 vram_dirty = 1;
623 }
624
625 if (cmd == -1)
626 // incomplete cmd
ddd56f6e 627 break;
d30279e2 628 }
ddd56f6e 629
f23b103c
PC
630 gpu.status &= ~0x1fff;
631 gpu.status |= gpu.ex_regs[1] & 0x7ff;
632 gpu.status |= (gpu.ex_regs[6] & 3) << 11;
a3a9f519 633
fc84f618 634 gpu.state.fb_dirty |= vram_dirty;
635
b243416b 636 if (old_e3 != gpu.ex_regs[3])
637 decide_frameskip_allow(gpu.ex_regs[3]);
638
ddd56f6e 639 return count - pos;
d30279e2
GI
640}
641
abf09485 642static noinline void flush_cmd_buffer(void)
d30279e2 643{
48f3d210 644 int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
d30279e2
GI
645 if (left > 0)
646 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
abf09485 647 if (left != gpu.cmd_len) {
648 if (!gpu.dma.h && gpu.gpu_state_change)
649 gpu.gpu_state_change(PGS_PRIMITIVE_START);
650 gpu.cmd_len = left;
651 }
1ab64c54
GI
652}
653
654void GPUwriteDataMem(uint32_t *mem, int count)
655{
d30279e2
GI
656 int left;
657
56f08d83 658 log_io("gpu_dma_write %p %d\n", mem, count);
659
d30279e2
GI
660 if (unlikely(gpu.cmd_len > 0))
661 flush_cmd_buffer();
56f08d83 662
48f3d210 663 left = do_cmd_buffer(mem, count);
d30279e2 664 if (left)
56f08d83 665 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
1ab64c54
GI
666}
667
d30279e2 668void GPUwriteData(uint32_t data)
1ab64c54 669{
56f08d83 670 log_io("gpu_write %08x\n", data);
89df80c6 671 gpu.cmd_buffer[gpu.cmd_len++] = HTOLE32(data);
d30279e2
GI
672 if (gpu.cmd_len >= CMD_BUFFER_LEN)
673 flush_cmd_buffer();
1ab64c54
GI
674}
675
8c84ba5f 676long GPUdmaChain(uint32_t *rambase, uint32_t start_addr, uint32_t *progress_addr)
1ab64c54 677{
09159d99 678 uint32_t addr, *list, ld_addr = 0;
ddd56f6e 679 int len, left, count;
1c72b1c2 680 long cpu_cycles = 0;
d30279e2 681
8f5f2dd5 682 preload(rambase + (start_addr & 0x1fffff) / 4);
683
d30279e2
GI
684 if (unlikely(gpu.cmd_len > 0))
685 flush_cmd_buffer();
686
56f08d83 687 log_io("gpu_dma_chain\n");
ddd56f6e 688 addr = start_addr & 0xffffff;
09159d99 689 for (count = 0; (addr & 0x800000) == 0; count++)
ddd56f6e 690 {
ddd56f6e 691 list = rambase + (addr & 0x1fffff) / 4;
89df80c6
PC
692 len = LE32TOH(list[0]) >> 24;
693 addr = LE32TOH(list[0]) & 0xffffff;
8f5f2dd5 694 preload(rambase + (addr & 0x1fffff) / 4);
695
1c72b1c2 696 cpu_cycles += 10;
697 if (len > 0)
698 cpu_cycles += 5 + len;
deb18d24 699
ae36bb28 700 log_io(".chain %08lx #%d+%d\n",
701 (long)(list - rambase) * 4, len, gpu.cmd_len);
702 if (unlikely(gpu.cmd_len > 0)) {
23948df3 703 if (gpu.cmd_len + len > ARRAY_SIZE(gpu.cmd_buffer)) {
704 log_anomaly("cmd_buffer overflow, likely garbage commands\n");
705 gpu.cmd_len = 0;
706 }
ae36bb28 707 memcpy(gpu.cmd_buffer + gpu.cmd_len, list + 1, len * 4);
708 gpu.cmd_len += len;
709 flush_cmd_buffer();
710 continue;
711 }
ddd56f6e 712
56f08d83 713 if (len) {
48f3d210 714 left = do_cmd_buffer(list + 1, len);
ae36bb28 715 if (left) {
716 memcpy(gpu.cmd_buffer, list + 1 + len - left, left * 4);
717 gpu.cmd_len = left;
718 log_anomaly("GPUdmaChain: %d/%d words left\n", left, len);
719 }
56f08d83 720 }
ddd56f6e 721
8c84ba5f 722 if (progress_addr) {
723 *progress_addr = addr;
724 break;
725 }
09159d99 726 #define LD_THRESHOLD (8*1024)
727 if (count >= LD_THRESHOLD) {
728 if (count == LD_THRESHOLD) {
729 ld_addr = addr;
730 continue;
731 }
732
733 // loop detection marker
734 // (bit23 set causes DMA error on real machine, so
735 // unlikely to be ever set by the game)
89df80c6 736 list[0] |= HTOLE32(0x800000);
09159d99 737 }
ddd56f6e 738 }
739
09159d99 740 if (ld_addr != 0) {
741 // remove loop detection markers
742 count -= LD_THRESHOLD + 2;
743 addr = ld_addr & 0x1fffff;
744 while (count-- > 0) {
745 list = rambase + addr / 4;
89df80c6
PC
746 addr = LE32TOH(list[0]) & 0x1fffff;
747 list[0] &= HTOLE32(~0x800000);
09159d99 748 }
d30279e2 749 }
09159d99 750
3ece2f0c 751 gpu.state.last_list.frame = *gpu.state.frame_count;
deb18d24 752 gpu.state.last_list.hcnt = *gpu.state.hcnt;
1c72b1c2 753 gpu.state.last_list.cycles = cpu_cycles;
deb18d24 754 gpu.state.last_list.addr = start_addr;
755
1c72b1c2 756 return cpu_cycles;
1ab64c54
GI
757}
758
d30279e2
GI
759void GPUreadDataMem(uint32_t *mem, int count)
760{
56f08d83 761 log_io("gpu_dma_read %p %d\n", mem, count);
762
d30279e2
GI
763 if (unlikely(gpu.cmd_len > 0))
764 flush_cmd_buffer();
56f08d83 765
d30279e2
GI
766 if (gpu.dma.h)
767 do_vram_io(mem, count, 1);
768}
769
770uint32_t GPUreadData(void)
771{
9e146206 772 uint32_t ret;
56f08d83 773
774 if (unlikely(gpu.cmd_len > 0))
775 flush_cmd_buffer();
776
9e146206 777 ret = gpu.gp0;
ae097dfb
PC
778 if (gpu.dma.h) {
779 ret = HTOLE32(ret);
9e146206 780 do_vram_io(&ret, 1, 1);
ae097dfb
PC
781 ret = LE32TOH(ret);
782 }
56f08d83 783
9e146206 784 log_io("gpu_read %08x\n", ret);
785 return ret;
d30279e2
GI
786}
787
788uint32_t GPUreadStatus(void)
789{
ddd56f6e 790 uint32_t ret;
56f08d83 791
d30279e2
GI
792 if (unlikely(gpu.cmd_len > 0))
793 flush_cmd_buffer();
794
f23b103c 795 ret = gpu.status;
ddd56f6e 796 log_io("gpu_read_status %08x\n", ret);
797 return ret;
d30279e2
GI
798}
799
096ec49b 800struct GPUFreeze
1ab64c54
GI
801{
802 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
803 uint32_t ulStatus; // current gpu status
804 uint32_t ulControl[256]; // latest control register values
805 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
096ec49b 806};
1ab64c54 807
096ec49b 808long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
1ab64c54 809{
fc84f618 810 int i;
811
1ab64c54
GI
812 switch (type) {
813 case 1: // save
d30279e2
GI
814 if (gpu.cmd_len > 0)
815 flush_cmd_buffer();
9ee0fd5b 816 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
1ab64c54 817 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
6e9bdaef 818 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
f23b103c 819 freeze->ulStatus = gpu.status;
1ab64c54
GI
820 break;
821 case 0: // load
9ee0fd5b 822 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
1ab64c54 823 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
6e9bdaef 824 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
f23b103c 825 gpu.status = freeze->ulStatus;
3d47ef17 826 gpu.cmd_len = 0;
fc84f618 827 for (i = 8; i > 0; i--) {
828 gpu.regs[i] ^= 1; // avoid reg change detection
829 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
830 }
5b745e5b 831 renderer_sync_ecmds(gpu.ex_regs);
2da2fc76 832 renderer_update_caches(0, 0, 1024, 512, 0);
1ab64c54
GI
833 break;
834 }
835
836 return 1;
837}
838
5440b88e 839void GPUupdateLace(void)
840{
841 if (gpu.cmd_len > 0)
842 flush_cmd_buffer();
843 renderer_flush_queues();
844
cb245e56 845#ifndef RAW_FB_DISPLAY
f23b103c 846 if (gpu.status & PSX_GPU_STATUS_BLANKING) {
aafcb4dd 847 if (!gpu.state.blanked) {
848 vout_blank();
849 gpu.state.blanked = 1;
850 gpu.state.fb_dirty = 1;
851 }
852 return;
853 }
854
855 if (!gpu.state.fb_dirty)
5440b88e 856 return;
cb245e56 857#endif
5440b88e 858
859 if (gpu.frameskip.set) {
860 if (!gpu.frameskip.frame_ready) {
861 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
862 return;
863 gpu.frameskip.active = 0;
864 }
865 gpu.frameskip.frame_ready = 0;
866 }
867
868 vout_update();
0b4038f8 869 if (gpu.state.enhancement_active && !gpu.state.enhancement_was_active)
870 renderer_update_caches(0, 0, 1024, 512, 1);
871 gpu.state.enhancement_was_active = gpu.state.enhancement_active;
5440b88e 872 gpu.state.fb_dirty = 0;
aafcb4dd 873 gpu.state.blanked = 0;
5440b88e 874}
875
72e5023f 876void GPUvBlank(int is_vblank, int lcf)
877{
5440b88e 878 int interlace = gpu.state.allow_interlace
f23b103c
PC
879 && (gpu.status & PSX_GPU_STATUS_INTERLACE)
880 && (gpu.status & PSX_GPU_STATUS_DHEIGHT);
5440b88e 881 // interlace doesn't look nice on progressive displays,
882 // so we have this "auto" mode here for games that don't read vram
883 if (gpu.state.allow_interlace == 2
884 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
885 {
886 interlace = 0;
887 }
888 if (interlace || interlace != gpu.state.old_interlace) {
889 gpu.state.old_interlace = interlace;
890
891 if (gpu.cmd_len > 0)
892 flush_cmd_buffer();
893 renderer_flush_queues();
894 renderer_set_interlace(interlace, !lcf);
895 }
896}
897
ab88daca 898void GPUgetScreenInfo(int *y, int *base_hres)
899{
900 *y = gpu.screen.y;
901 *base_hres = gpu.screen.vres;
902 if (gpu.status & PSX_GPU_STATUS_DHEIGHT)
903 *base_hres >>= 1;
904}
905
5440b88e 906#include "../../frontend/plugin_lib.h"
907
908void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
909{
910 gpu.frameskip.set = cbs->frameskip;
911 gpu.frameskip.advice = &cbs->fskip_advice;
912 gpu.frameskip.active = 0;
913 gpu.frameskip.frame_ready = 1;
914 gpu.state.hcnt = cbs->gpu_hcnt;
915 gpu.state.frame_count = cbs->gpu_frame_count;
916 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
0b02eb77 917 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
308c6e67 918 if (gpu.state.screen_centering_type != cbs->screen_centering_type
919 || gpu.state.screen_centering_x != cbs->screen_centering_x
920 || gpu.state.screen_centering_y != cbs->screen_centering_y) {
921 gpu.state.screen_centering_type = cbs->screen_centering_type;
922 gpu.state.screen_centering_x = cbs->screen_centering_x;
923 gpu.state.screen_centering_y = cbs->screen_centering_y;
924 update_width();
925 update_height();
926 }
5440b88e 927
9ee0fd5b 928 gpu.mmap = cbs->mmap;
929 gpu.munmap = cbs->munmap;
abf09485 930 gpu.gpu_state_change = cbs->gpu_state_change;
9ee0fd5b 931
932 // delayed vram mmap
933 if (gpu.vram == NULL)
934 map_vram();
935
5440b88e 936 if (cbs->pl_vout_set_raw_vram)
937 cbs->pl_vout_set_raw_vram(gpu.vram);
938 renderer_set_config(cbs);
939 vout_set_config(cbs);
72e5023f 940}
941
1ab64c54 942// vim:shiftwidth=2:expandtab