gpulib: fix frameskip on weird lists
[pcsx_rearmed.git] / plugins / gpulib / gpu.c
CommitLineData
1ab64c54 1/*
05740673 2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
1ab64c54
GI
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
d30279e2 11#include <stdio.h>
5bbe183f 12#include <stdlib.h>
1ab64c54 13#include <string.h>
12367ad0 14#include <stdlib.h> /* for calloc */
15
56f08d83 16#include "gpu.h"
f99193c2 17#include "gpu_timing.h"
1328fa32 18#include "../../libpcsxcore/gpu.h" // meh
8f8ade9c 19#include "../../frontend/plugin_lib.h"
1ab64c54 20
8f8ade9c 21#ifndef ARRAY_SIZE
1ab64c54 22#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
8f8ade9c 23#endif
8f5f2dd5 24#ifdef __GNUC__
d30279e2 25#define unlikely(x) __builtin_expect((x), 0)
8f5f2dd5 26#define preload __builtin_prefetch
8dd855cd 27#define noinline __attribute__((noinline))
8f5f2dd5 28#else
29#define unlikely(x)
30#define preload(...)
31#define noinline
8f5f2dd5 32#endif
1ab64c54 33
deb18d24 34//#define log_io gpu_log
56f08d83 35#define log_io(...)
56f08d83 36
9ee0fd5b 37struct psx_gpu gpu;
1ab64c54 38
d02ab9fc 39static noinline int do_cmd_buffer(uint32_t *data, int count,
40 int *cycles_sum, int *cycles_last);
05740673 41static void finish_vram_transfer(int is_read);
48f3d210 42
43static noinline void do_cmd_reset(void)
44{
f99193c2 45 int dummy = 0;
c765eb86 46 renderer_sync();
48f3d210 47 if (unlikely(gpu.cmd_len > 0))
d02ab9fc 48 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len, &dummy, &dummy);
48f3d210 49 gpu.cmd_len = 0;
05740673 50
51 if (unlikely(gpu.dma.h > 0))
52 finish_vram_transfer(gpu.dma_start.is_read);
48f3d210 53 gpu.dma.h = 0;
54}
55
6e9bdaef 56static noinline void do_reset(void)
1ab64c54 57{
7841712d 58 unsigned int i;
5b568098 59
48f3d210 60 do_cmd_reset();
61
6e9bdaef 62 memset(gpu.regs, 0, sizeof(gpu.regs));
48f3d210 63 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
64 gpu.ex_regs[i] = (0xe0 + i) << 24;
61124a6d 65 gpu.status = 0x14802000;
6e9bdaef 66 gpu.gp0 = 0;
fc84f618 67 gpu.regs[3] = 1;
6e9bdaef 68 gpu.screen.hres = gpu.screen.w = 256;
fc84f618 69 gpu.screen.vres = gpu.screen.h = 240;
5bbe183f 70 gpu.screen.x = gpu.screen.y = 0;
01ff3105 71 renderer_sync_ecmds(gpu.ex_regs);
3b7b0065 72 renderer_notify_res_change();
1ab64c54
GI
73}
74
8dd855cd 75static noinline void update_width(void)
76{
5bbe183f 77 static const short hres_all[8] = { 256, 368, 320, 368, 512, 368, 640, 368 };
78 static const uint8_t hdivs[8] = { 10, 7, 8, 7, 5, 7, 4, 7 };
79 uint8_t hdiv = hdivs[(gpu.status >> 16) & 7];
80 int hres = hres_all[(gpu.status >> 16) & 7];
81 int pal = gpu.status & PSX_GPU_STATUS_PAL;
8dd855cd 82 int sw = gpu.screen.x2 - gpu.screen.x1;
b3ff74ba 83 int type = gpu.state.screen_centering_type;
5bbe183f 84 int x = 0, x_auto;
b3ff74ba 85 if (type == C_AUTO)
86 type = gpu.state.screen_centering_type_default;
5bbe183f 87 if (sw <= 0)
88 /* nothing displayed? */;
89 else {
90 int s = pal ? 656 : 608; // or 600? pal is just a guess
91 x = (gpu.screen.x1 - s) / hdiv;
92 x = (x + 1) & ~1; // blitter limitation
93 sw /= hdiv;
94 sw = (sw + 2) & ~3; // according to nocash
b3ff74ba 95 switch (type) {
8f8ade9c 96 case C_INGAME:
5bbe183f 97 break;
8f8ade9c 98 case C_MANUAL:
5bbe183f 99 x = gpu.state.screen_centering_x;
100 break;
101 default:
102 // correct if slightly miscentered
103 x_auto = (hres - sw) / 2 & ~3;
104 if ((uint32_t)x_auto <= 8u && abs(x) < 24)
105 x = x_auto;
106 }
107 if (x + sw > hres)
108 sw = hres - x;
109 // .x range check is done in vout_update()
110 }
111 // reduce the unpleasant right border that a few games have
112 if (gpu.state.screen_centering_type == 0
113 && x <= 4 && hres - (x + sw) >= 4)
114 hres -= 4;
115 gpu.screen.x = x;
116 gpu.screen.w = sw;
117 gpu.screen.hres = hres;
118 gpu.state.dims_changed = 1;
119 //printf("xx %d %d -> %2d, %d / %d\n",
120 // gpu.screen.x1, gpu.screen.x2, x, sw, hres);
8dd855cd 121}
122
123static noinline void update_height(void)
124{
5bbe183f 125 int pal = gpu.status & PSX_GPU_STATUS_PAL;
126 int dheight = gpu.status & PSX_GPU_STATUS_DHEIGHT;
127 int y = gpu.screen.y1 - (pal ? 39 : 16); // 39 for spyro
8dd855cd 128 int sh = gpu.screen.y2 - gpu.screen.y1;
5bbe183f 129 int center_tol = 16;
130 int vres = 240;
131
132 if (pal && (sh > 240 || gpu.screen.vres == 256))
133 vres = 256;
134 if (dheight)
135 y *= 2, sh *= 2, vres *= 2, center_tol *= 2;
136 if (sh <= 0)
137 /* nothing displayed? */;
138 else {
139 switch (gpu.state.screen_centering_type) {
8f8ade9c 140 case C_INGAME:
141 break;
142 case C_BORDERLESS:
143 y = 0;
5bbe183f 144 break;
8f8ade9c 145 case C_MANUAL:
5bbe183f 146 y = gpu.state.screen_centering_y;
147 break;
148 default:
149 // correct if slightly miscentered
150 if ((uint32_t)(vres - sh) <= 1 && abs(y) <= center_tol)
151 y = 0;
152 }
153 if (y + sh > vres)
154 sh = vres - y;
155 }
156 gpu.screen.y = y;
8dd855cd 157 gpu.screen.h = sh;
5bbe183f 158 gpu.screen.vres = vres;
159 gpu.state.dims_changed = 1;
160 //printf("yy %d %d -> %d, %d / %d\n",
161 // gpu.screen.y1, gpu.screen.y2, y, sh, vres);
8dd855cd 162}
163
fc84f618 164static noinline void decide_frameskip(void)
165{
5eaa13f1
A
166 *gpu.frameskip.dirty = 1;
167
9fe27e25 168 if (gpu.frameskip.active)
169 gpu.frameskip.cnt++;
170 else {
171 gpu.frameskip.cnt = 0;
172 gpu.frameskip.frame_ready = 1;
173 }
fc84f618 174
5eaa13f1
A
175 if (*gpu.frameskip.force)
176 gpu.frameskip.active = 1;
177 else if (!gpu.frameskip.active && *gpu.frameskip.advice)
9fe27e25 178 gpu.frameskip.active = 1;
179 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
fc84f618 180 gpu.frameskip.active = 1;
181 else
182 gpu.frameskip.active = 0;
fbb4bfff 183
184 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
f99193c2 185 int dummy = 0;
d02ab9fc 186 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy, &dummy, &dummy);
fbb4bfff 187 gpu.frameskip.pending_fill[0] = 0;
188 }
fc84f618 189}
190
b243416b 191static noinline int decide_frameskip_allow(uint32_t cmd_e3)
9fe27e25 192{
193 // no frameskip if it decides to draw to display area,
194 // but not for interlace since it'll most likely always do that
195 uint32_t x = cmd_e3 & 0x3ff;
196 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
61124a6d 197 gpu.frameskip.allow = (gpu.status & PSX_GPU_STATUS_INTERLACE) ||
5bbe183f 198 (uint32_t)(x - gpu.screen.src_x) >= (uint32_t)gpu.screen.w ||
199 (uint32_t)(y - gpu.screen.src_y) >= (uint32_t)gpu.screen.h;
b243416b 200 return gpu.frameskip.allow;
9fe27e25 201}
202
01ff3105 203static void flush_cmd_buffer(void);
204
6e9bdaef 205static noinline void get_gpu_info(uint32_t data)
206{
01ff3105 207 if (unlikely(gpu.cmd_len > 0))
208 flush_cmd_buffer();
6e9bdaef 209 switch (data & 0x0f) {
210 case 0x02:
211 case 0x03:
212 case 0x04:
6e9bdaef 213 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
214 break;
08b33377 215 case 0x05:
216 gpu.gp0 = gpu.ex_regs[5] & 0x3fffff;
6e9bdaef 217 break;
218 case 0x07:
219 gpu.gp0 = 2;
220 break;
221 default:
08b33377 222 // gpu.gp0 unchanged
6e9bdaef 223 break;
224 }
225}
226
5bd33f52 227#ifndef max
228#define max(a, b) (((a) > (b)) ? (a) : (b))
229#endif
12367ad0 230
231// Minimum 16-byte VRAM alignment needed by gpu_unai's pixel-skipping
232// renderer/downscaler it uses in high res modes:
233#ifdef GCW_ZERO
234 // On GCW platform (MIPS), align to 8192 bytes (1 TLB entry) to reduce # of
235 // fills. (Will change this value if it ever gets large page support)
236 #define VRAM_ALIGN 8192
237#else
238 #define VRAM_ALIGN 16
239#endif
240
5bd33f52 241// double, for overdraw guard + at least 1 page before
242#define VRAM_SIZE ((1024 * 512 * 2 * 2) + max(VRAM_ALIGN, 4096))
243
12367ad0 244// vram ptr received from mmap/malloc/alloc (will deallocate using this)
245static uint16_t *vram_ptr_orig = NULL;
9ee0fd5b 246
e34ef5ac 247#ifndef GPULIB_USE_MMAP
248# ifdef __linux__
249# define GPULIB_USE_MMAP 1
250# else
251# define GPULIB_USE_MMAP 0
252# endif
253#endif
9ee0fd5b 254static int map_vram(void)
255{
e34ef5ac 256#if GPULIB_USE_MMAP
5bd33f52 257 gpu.vram = vram_ptr_orig = gpu.mmap(VRAM_SIZE);
e34ef5ac 258#else
5bd33f52 259 gpu.vram = vram_ptr_orig = calloc(VRAM_SIZE, 1);
e34ef5ac 260#endif
261 if (gpu.vram != NULL && gpu.vram != (void *)(intptr_t)-1) {
262 // 4kb guard in front
12367ad0 263 gpu.vram += (4096 / 2);
e34ef5ac 264 // Align
265 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
9ee0fd5b 266 return 0;
267 }
268 else {
269 fprintf(stderr, "could not map vram, expect crashes\n");
270 return -1;
271 }
272}
273
6e9bdaef 274long GPUinit(void)
275{
9394ada5 276 int ret;
277 ret = vout_init();
278 ret |= renderer_init();
279
3b7b0065 280 memset(&gpu.state, 0, sizeof(gpu.state));
281 memset(&gpu.frameskip, 0, sizeof(gpu.frameskip));
282 gpu.zero = 0;
3ece2f0c 283 gpu.state.frame_count = &gpu.zero;
deb18d24 284 gpu.state.hcnt = &gpu.zero;
48f3d210 285 gpu.cmd_len = 0;
9394ada5 286 do_reset();
48f3d210 287
12367ad0 288 /*if (gpu.mmap != NULL) {
9ee0fd5b 289 if (map_vram() != 0)
290 ret = -1;
12367ad0 291 }*/
6e9bdaef 292 return ret;
293}
294
295long GPUshutdown(void)
296{
9ee0fd5b 297 long ret;
298
e929dec5 299 renderer_finish();
9ee0fd5b 300 ret = vout_finish();
12367ad0 301
302 if (vram_ptr_orig != NULL) {
e34ef5ac 303#if GPULIB_USE_MMAP
12367ad0 304 gpu.munmap(vram_ptr_orig, VRAM_SIZE);
305#else
306 free(vram_ptr_orig);
307#endif
9ee0fd5b 308 }
12367ad0 309 vram_ptr_orig = gpu.vram = NULL;
9ee0fd5b 310
311 return ret;
6e9bdaef 312}
313
1ab64c54
GI
314void GPUwriteStatus(uint32_t data)
315{
1ab64c54 316 uint32_t cmd = data >> 24;
9a864a8f 317 int src_x, src_y;
1ab64c54 318
fc84f618 319 if (cmd < ARRAY_SIZE(gpu.regs)) {
48f3d210 320 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
fc84f618 321 return;
8dd855cd 322 gpu.regs[cmd] = data;
fc84f618 323 }
324
325 gpu.state.fb_dirty = 1;
8dd855cd 326
327 switch (cmd) {
1ab64c54 328 case 0x00:
6e9bdaef 329 do_reset();
1ab64c54 330 break;
48f3d210 331 case 0x01:
332 do_cmd_reset();
333 break;
1ab64c54 334 case 0x03:
5bbe183f 335 if (data & 1) {
61124a6d 336 gpu.status |= PSX_GPU_STATUS_BLANKING;
5bbe183f 337 gpu.state.dims_changed = 1; // for hud clearing
338 }
61124a6d
PC
339 else
340 gpu.status &= ~PSX_GPU_STATUS_BLANKING;
1ab64c54
GI
341 break;
342 case 0x04:
61124a6d
PC
343 gpu.status &= ~PSX_GPU_STATUS_DMA_MASK;
344 gpu.status |= PSX_GPU_STATUS_DMA(data & 3);
1ab64c54
GI
345 break;
346 case 0x05:
9a864a8f 347 src_x = data & 0x3ff; src_y = (data >> 10) & 0x1ff;
348 if (src_x != gpu.screen.src_x || src_y != gpu.screen.src_y) {
349 gpu.screen.src_x = src_x;
350 gpu.screen.src_y = src_y;
351 renderer_notify_scanout_change(src_x, src_y);
352 if (gpu.frameskip.set) {
353 decide_frameskip_allow(gpu.ex_regs[3]);
354 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
355 decide_frameskip();
356 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
357 }
9fe27e25 358 }
fb4c6fba 359 }
1ab64c54 360 break;
8dd855cd 361 case 0x06:
362 gpu.screen.x1 = data & 0xfff;
363 gpu.screen.x2 = (data >> 12) & 0xfff;
364 update_width();
365 break;
1ab64c54
GI
366 case 0x07:
367 gpu.screen.y1 = data & 0x3ff;
368 gpu.screen.y2 = (data >> 10) & 0x3ff;
8dd855cd 369 update_height();
1ab64c54
GI
370 break;
371 case 0x08:
61124a6d 372 gpu.status = (gpu.status & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
8dd855cd 373 update_width();
374 update_height();
e929dec5 375 renderer_notify_res_change();
1ab64c54 376 break;
deb18d24 377 default:
378 if ((cmd & 0xf0) == 0x10)
379 get_gpu_info(data);
6e9bdaef 380 break;
1ab64c54 381 }
7890a708 382
383#ifdef GPUwriteStatus_ext
384 GPUwriteStatus_ext(data);
385#endif
1ab64c54
GI
386}
387
56f08d83 388const unsigned char cmd_lengths[256] =
1ab64c54 389{
d30279e2
GI
390 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
391 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
392 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
393 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
652c6b8b 394 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
395 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
396 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
d30279e2 397 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
36da9c13 398 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 80
399 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
400 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // a0
401 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
402 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // c0
403 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
d30279e2
GI
404 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
405 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
406};
407
d30279e2
GI
408#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
409
36da9c13 410static void cpy_msb(uint16_t *dst, const uint16_t *src, int l, uint16_t msb)
411{
412 int i;
413 for (i = 0; i < l; i++)
414 dst[i] = src[i] | msb;
415}
416
417static inline void do_vram_line(int x, int y, uint16_t *mem, int l,
418 int is_read, uint16_t msb)
1ab64c54 419{
d30279e2 420 uint16_t *vram = VRAM_MEM_XY(x, y);
36da9c13 421 if (unlikely(is_read))
d30279e2 422 memcpy(mem, vram, l * 2);
36da9c13 423 else if (unlikely(msb))
424 cpy_msb(vram, mem, l, msb);
d30279e2
GI
425 else
426 memcpy(vram, mem, l * 2);
427}
428
429static int do_vram_io(uint32_t *data, int count, int is_read)
430{
431 int count_initial = count;
36da9c13 432 uint16_t msb = gpu.ex_regs[6] << 15;
d30279e2
GI
433 uint16_t *sdata = (uint16_t *)data;
434 int x = gpu.dma.x, y = gpu.dma.y;
435 int w = gpu.dma.w, h = gpu.dma.h;
ddd56f6e 436 int o = gpu.dma.offset;
d30279e2
GI
437 int l;
438 count *= 2; // operate in 16bpp pixels
439
c765eb86
JW
440 renderer_sync();
441
d30279e2
GI
442 if (gpu.dma.offset) {
443 l = w - gpu.dma.offset;
ddd56f6e 444 if (count < l)
d30279e2 445 l = count;
ddd56f6e 446
36da9c13 447 do_vram_line(x + o, y, sdata, l, is_read, msb);
ddd56f6e 448
449 if (o + l < w)
450 o += l;
451 else {
452 o = 0;
453 y++;
454 h--;
455 }
d30279e2
GI
456 sdata += l;
457 count -= l;
d30279e2
GI
458 }
459
460 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
461 y &= 511;
36da9c13 462 do_vram_line(x, y, sdata, w, is_read, msb);
d30279e2
GI
463 }
464
05740673 465 if (h > 0) {
466 if (count > 0) {
467 y &= 511;
36da9c13 468 do_vram_line(x, y, sdata, count, is_read, msb);
05740673 469 o = count;
470 count = 0;
471 }
d30279e2 472 }
05740673 473 else
474 finish_vram_transfer(is_read);
d30279e2
GI
475 gpu.dma.y = y;
476 gpu.dma.h = h;
ddd56f6e 477 gpu.dma.offset = o;
d30279e2 478
6e9bdaef 479 return count_initial - count / 2;
d30279e2
GI
480}
481
482static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
483{
ddd56f6e 484 if (gpu.dma.h)
485 log_anomaly("start_vram_transfer while old unfinished\n");
486
5440b88e 487 gpu.dma.x = pos_word & 0x3ff;
488 gpu.dma.y = (pos_word >> 16) & 0x1ff;
48f3d210 489 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
490 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
d30279e2 491 gpu.dma.offset = 0;
05740673 492 gpu.dma.is_read = is_read;
493 gpu.dma_start = gpu.dma;
d30279e2 494
9e146206 495 renderer_flush_queues();
496 if (is_read) {
61124a6d 497 gpu.status |= PSX_GPU_STATUS_IMG;
9e146206 498 // XXX: wrong for width 1
495d603c 499 gpu.gp0 = LE32TOH(*(uint32_t *) VRAM_MEM_XY(gpu.dma.x, gpu.dma.y));
5440b88e 500 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
9e146206 501 }
d30279e2 502
6e9bdaef 503 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
504 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
1328fa32 505 if (gpu.gpu_state_change)
506 gpu.gpu_state_change(PGS_VRAM_TRANSFER_START);
d30279e2
GI
507}
508
05740673 509static void finish_vram_transfer(int is_read)
510{
511 if (is_read)
61124a6d 512 gpu.status &= ~PSX_GPU_STATUS_IMG;
b30fba56 513 else {
514 gpu.state.fb_dirty = 1;
05740673 515 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
3b7b0065 516 gpu.dma_start.w, gpu.dma_start.h, 0);
b30fba56 517 }
1328fa32 518 if (gpu.gpu_state_change)
519 gpu.gpu_state_change(PGS_VRAM_TRANSFER_END);
05740673 520}
521
f99193c2 522static void do_vram_copy(const uint32_t *params, int *cpu_cycles)
36da9c13 523{
524 const uint32_t sx = LE32TOH(params[0]) & 0x3FF;
525 const uint32_t sy = (LE32TOH(params[0]) >> 16) & 0x1FF;
526 const uint32_t dx = LE32TOH(params[1]) & 0x3FF;
527 const uint32_t dy = (LE32TOH(params[1]) >> 16) & 0x1FF;
528 uint32_t w = ((LE32TOH(params[2]) - 1) & 0x3FF) + 1;
529 uint32_t h = (((LE32TOH(params[2]) >> 16) - 1) & 0x1FF) + 1;
530 uint16_t msb = gpu.ex_regs[6] << 15;
531 uint16_t lbuf[128];
532 uint32_t x, y;
533
f99193c2 534 *cpu_cycles += gput_copy(w, h);
36da9c13 535 if (sx == dx && sy == dy && msb == 0)
536 return;
537
538 renderer_flush_queues();
539
540 if (unlikely((sx < dx && dx < sx + w) || sx + w > 1024 || dx + w > 1024 || msb))
541 {
542 for (y = 0; y < h; y++)
543 {
544 const uint16_t *src = VRAM_MEM_XY(0, (sy + y) & 0x1ff);
545 uint16_t *dst = VRAM_MEM_XY(0, (dy + y) & 0x1ff);
546 for (x = 0; x < w; x += ARRAY_SIZE(lbuf))
547 {
548 uint32_t x1, w1 = w - x;
549 if (w1 > ARRAY_SIZE(lbuf))
550 w1 = ARRAY_SIZE(lbuf);
551 for (x1 = 0; x1 < w1; x1++)
552 lbuf[x1] = src[(sx + x + x1) & 0x3ff];
553 for (x1 = 0; x1 < w1; x1++)
554 dst[(dx + x + x1) & 0x3ff] = lbuf[x1] | msb;
555 }
556 }
557 }
558 else
559 {
560 uint32_t sy1 = sy, dy1 = dy;
561 for (y = 0; y < h; y++, sy1++, dy1++)
562 memcpy(VRAM_MEM_XY(dx, dy1 & 0x1ff), VRAM_MEM_XY(sx, sy1 & 0x1ff), w * 2);
563 }
564
565 renderer_update_caches(dx, dy, w, h, 0);
566}
567
b243416b 568static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
569{
f99193c2 570 int cmd = 0, pos = 0, len, dummy = 0, v;
b243416b 571 int skip = 1;
572
fbb4bfff 573 gpu.frameskip.pending_fill[0] = 0;
574
b243416b 575 while (pos < count && skip) {
576 uint32_t *list = data + pos;
db215a72 577 cmd = LE32TOH(list[0]) >> 24;
b243416b 578 len = 1 + cmd_lengths[cmd];
bbb7cdec 579 if (pos + len > count) {
580 cmd = -1;
581 break; // incomplete cmd
582 }
b243416b 583
97e07db9 584 switch (cmd) {
585 case 0x02:
db215a72 586 if ((LE32TOH(list[2]) & 0x3ff) > gpu.screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu.screen.h)
97e07db9 587 // clearing something large, don't skip
d02ab9fc 588 do_cmd_list(list, 3, &dummy, &dummy, &dummy);
97e07db9 589 else
590 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
591 break;
592 case 0x24 ... 0x27:
593 case 0x2c ... 0x2f:
594 case 0x34 ... 0x37:
595 case 0x3c ... 0x3f:
596 gpu.ex_regs[1] &= ~0x1ff;
db215a72 597 gpu.ex_regs[1] |= LE32TOH(list[4 + ((cmd >> 4) & 1)]) & 0x1ff;
97e07db9 598 break;
599 case 0x48 ... 0x4F:
600 for (v = 3; pos + v < count; v++)
601 {
db215a72 602 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
97e07db9 603 break;
604 }
605 len += v - 3;
606 break;
607 case 0x58 ... 0x5F:
608 for (v = 4; pos + v < count; v += 2)
609 {
db215a72 610 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
97e07db9 611 break;
612 }
613 len += v - 4;
614 break;
615 default:
616 if (cmd == 0xe3)
db215a72 617 skip = decide_frameskip_allow(LE32TOH(list[0]));
97e07db9 618 if ((cmd & 0xf8) == 0xe0)
db215a72 619 gpu.ex_regs[cmd & 7] = LE32TOH(list[0]);
97e07db9 620 break;
b243416b 621 }
36da9c13 622 if (0x80 <= cmd && cmd <= 0xdf)
b243416b 623 break; // image i/o
97e07db9 624
b243416b 625 pos += len;
626 }
627
628 renderer_sync_ecmds(gpu.ex_regs);
629 *last_cmd = cmd;
630 return pos;
631}
632
d02ab9fc 633static noinline int do_cmd_buffer(uint32_t *data, int count,
634 int *cycles_sum, int *cycles_last)
d30279e2 635{
b243416b 636 int cmd, pos;
637 uint32_t old_e3 = gpu.ex_regs[3];
fc84f618 638 int vram_dirty = 0;
d30279e2 639
d30279e2 640 // process buffer
b243416b 641 for (pos = 0; pos < count; )
d30279e2 642 {
b243416b 643 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
644 vram_dirty = 1;
d30279e2 645 pos += do_vram_io(data + pos, count - pos, 0);
ddd56f6e 646 if (pos == count)
647 break;
d30279e2
GI
648 }
649
db215a72 650 cmd = LE32TOH(data[pos]) >> 24;
97e07db9 651 if (0xa0 <= cmd && cmd <= 0xdf) {
79573c20
DS
652 if (unlikely((pos+2) >= count)) {
653 // incomplete vram write/read cmd, can't consume yet
654 cmd = -1;
655 break;
656 }
657
d30279e2 658 // consume vram write/read cmd
db215a72 659 start_vram_transfer(LE32TOH(data[pos + 1]), LE32TOH(data[pos + 2]), (cmd & 0xe0) == 0xc0);
b243416b 660 pos += 3;
661 continue;
d30279e2 662 }
36da9c13 663 else if ((cmd & 0xe0) == 0x80) {
664 if (unlikely((pos+3) >= count)) {
665 cmd = -1; // incomplete cmd, can't consume yet
666 break;
667 }
025b6fde 668 renderer_sync();
d02ab9fc 669 *cycles_sum += *cycles_last;
670 *cycles_last = 0;
671 do_vram_copy(data + pos + 1, cycles_last);
b30fba56 672 vram_dirty = 1;
36da9c13 673 pos += 4;
674 continue;
675 }
c296224f 676 else if (cmd == 0x1f) {
677 log_anomaly("irq1?\n");
678 pos++;
679 continue;
680 }
b243416b 681
1e07f71d 682 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
db215a72 683 if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0))
b243416b 684 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
685 else {
d02ab9fc 686 pos += do_cmd_list(data + pos, count - pos, cycles_sum, cycles_last, &cmd);
b243416b 687 vram_dirty = 1;
688 }
689
690 if (cmd == -1)
691 // incomplete cmd
ddd56f6e 692 break;
d30279e2 693 }
ddd56f6e 694
61124a6d
PC
695 gpu.status &= ~0x1fff;
696 gpu.status |= gpu.ex_regs[1] & 0x7ff;
697 gpu.status |= (gpu.ex_regs[6] & 3) << 11;
a3a9f519 698
fc84f618 699 gpu.state.fb_dirty |= vram_dirty;
700
b243416b 701 if (old_e3 != gpu.ex_regs[3])
702 decide_frameskip_allow(gpu.ex_regs[3]);
703
ddd56f6e 704 return count - pos;
d30279e2
GI
705}
706
1328fa32 707static noinline void flush_cmd_buffer(void)
d30279e2 708{
f99193c2 709 int dummy = 0, left;
d02ab9fc 710 left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len, &dummy, &dummy);
d30279e2
GI
711 if (left > 0)
712 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
1328fa32 713 if (left != gpu.cmd_len) {
714 if (!gpu.dma.h && gpu.gpu_state_change)
715 gpu.gpu_state_change(PGS_PRIMITIVE_START);
716 gpu.cmd_len = left;
717 }
1ab64c54
GI
718}
719
720void GPUwriteDataMem(uint32_t *mem, int count)
721{
f99193c2 722 int dummy = 0, left;
d30279e2 723
56f08d83 724 log_io("gpu_dma_write %p %d\n", mem, count);
725
d30279e2
GI
726 if (unlikely(gpu.cmd_len > 0))
727 flush_cmd_buffer();
56f08d83 728
d02ab9fc 729 left = do_cmd_buffer(mem, count, &dummy, &dummy);
d30279e2 730 if (left)
56f08d83 731 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
1ab64c54
GI
732}
733
d30279e2 734void GPUwriteData(uint32_t data)
1ab64c54 735{
56f08d83 736 log_io("gpu_write %08x\n", data);
db215a72 737 gpu.cmd_buffer[gpu.cmd_len++] = HTOLE32(data);
d30279e2
GI
738 if (gpu.cmd_len >= CMD_BUFFER_LEN)
739 flush_cmd_buffer();
1ab64c54
GI
740}
741
d02ab9fc 742long GPUdmaChain(uint32_t *rambase, uint32_t start_addr,
743 uint32_t *progress_addr, int32_t *cycles_last_cmd)
1ab64c54 744{
2048ae31 745 uint32_t addr, *list, ld_addr;
746 int len, left, count, ld_count = 32;
d02ab9fc 747 int cpu_cycles_sum = 0;
748 int cpu_cycles_last = 0;
d30279e2 749
8f5f2dd5 750 preload(rambase + (start_addr & 0x1fffff) / 4);
751
d30279e2
GI
752 if (unlikely(gpu.cmd_len > 0))
753 flush_cmd_buffer();
754
56f08d83 755 log_io("gpu_dma_chain\n");
2048ae31 756 addr = ld_addr = start_addr & 0xffffff;
09159d99 757 for (count = 0; (addr & 0x800000) == 0; count++)
ddd56f6e 758 {
ddd56f6e 759 list = rambase + (addr & 0x1fffff) / 4;
db215a72
PC
760 len = LE32TOH(list[0]) >> 24;
761 addr = LE32TOH(list[0]) & 0xffffff;
8f5f2dd5 762 preload(rambase + (addr & 0x1fffff) / 4);
763
d02ab9fc 764 cpu_cycles_sum += 10;
1c72b1c2 765 if (len > 0)
d02ab9fc 766 cpu_cycles_sum += 5 + len;
deb18d24 767
d02ab9fc 768 log_io(".chain %08lx #%d+%d %u+%u\n",
769 (long)(list - rambase) * 4, len, gpu.cmd_len, cpu_cycles_sum, cpu_cycles_last);
a4e249a1 770 if (unlikely(gpu.cmd_len > 0)) {
81ff42e1 771 if (gpu.cmd_len + len > ARRAY_SIZE(gpu.cmd_buffer)) {
772 log_anomaly("cmd_buffer overflow, likely garbage commands\n");
773 gpu.cmd_len = 0;
774 }
a4e249a1 775 memcpy(gpu.cmd_buffer + gpu.cmd_len, list + 1, len * 4);
776 gpu.cmd_len += len;
777 flush_cmd_buffer();
778 continue;
779 }
ddd56f6e 780
56f08d83 781 if (len) {
d02ab9fc 782 left = do_cmd_buffer(list + 1, len, &cpu_cycles_sum, &cpu_cycles_last);
a4e249a1 783 if (left) {
784 memcpy(gpu.cmd_buffer, list + 1 + len - left, left * 4);
785 gpu.cmd_len = left;
786 log_anomaly("GPUdmaChain: %d/%d words left\n", left, len);
787 }
56f08d83 788 }
ddd56f6e 789
fae38d7a 790 if (progress_addr) {
791 *progress_addr = addr;
792 break;
793 }
2048ae31 794 if (addr == ld_addr) {
795 log_anomaly("GPUdmaChain: loop @ %08x, cnt=%u\n", addr, count);
796 break;
09159d99 797 }
2048ae31 798 if (count == ld_count) {
799 ld_addr = addr;
800 ld_count *= 2;
09159d99 801 }
d30279e2 802 }
09159d99 803
d02ab9fc 804 //printf(" -> %d %d\n", cpu_cycles_sum, cpu_cycles_last);
3ece2f0c 805 gpu.state.last_list.frame = *gpu.state.frame_count;
deb18d24 806 gpu.state.last_list.hcnt = *gpu.state.hcnt;
d02ab9fc 807 gpu.state.last_list.cycles = cpu_cycles_sum + cpu_cycles_last;
deb18d24 808 gpu.state.last_list.addr = start_addr;
809
d02ab9fc 810 *cycles_last_cmd = cpu_cycles_last;
811 return cpu_cycles_sum;
1ab64c54
GI
812}
813
d30279e2
GI
814void GPUreadDataMem(uint32_t *mem, int count)
815{
56f08d83 816 log_io("gpu_dma_read %p %d\n", mem, count);
817
d30279e2
GI
818 if (unlikely(gpu.cmd_len > 0))
819 flush_cmd_buffer();
56f08d83 820
d30279e2
GI
821 if (gpu.dma.h)
822 do_vram_io(mem, count, 1);
823}
824
825uint32_t GPUreadData(void)
826{
9e146206 827 uint32_t ret;
56f08d83 828
829 if (unlikely(gpu.cmd_len > 0))
830 flush_cmd_buffer();
831
9e146206 832 ret = gpu.gp0;
495d603c
PC
833 if (gpu.dma.h) {
834 ret = HTOLE32(ret);
9e146206 835 do_vram_io(&ret, 1, 1);
495d603c
PC
836 ret = LE32TOH(ret);
837 }
56f08d83 838
9e146206 839 log_io("gpu_read %08x\n", ret);
840 return ret;
d30279e2
GI
841}
842
843uint32_t GPUreadStatus(void)
844{
ddd56f6e 845 uint32_t ret;
56f08d83 846
d30279e2
GI
847 if (unlikely(gpu.cmd_len > 0))
848 flush_cmd_buffer();
849
61124a6d 850 ret = gpu.status;
ddd56f6e 851 log_io("gpu_read_status %08x\n", ret);
852 return ret;
d30279e2
GI
853}
854
096ec49b 855struct GPUFreeze
1ab64c54
GI
856{
857 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
858 uint32_t ulStatus; // current gpu status
859 uint32_t ulControl[256]; // latest control register values
860 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
096ec49b 861};
1ab64c54 862
096ec49b 863long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
1ab64c54 864{
fc84f618 865 int i;
866
1ab64c54
GI
867 switch (type) {
868 case 1: // save
d30279e2
GI
869 if (gpu.cmd_len > 0)
870 flush_cmd_buffer();
c765eb86
JW
871
872 renderer_sync();
9ee0fd5b 873 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
1ab64c54 874 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
6e9bdaef 875 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
61124a6d 876 freeze->ulStatus = gpu.status;
1ab64c54
GI
877 break;
878 case 0: // load
c765eb86 879 renderer_sync();
9ee0fd5b 880 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
1ab64c54 881 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
6e9bdaef 882 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
61124a6d 883 gpu.status = freeze->ulStatus;
3d47ef17 884 gpu.cmd_len = 0;
fc84f618 885 for (i = 8; i > 0; i--) {
886 gpu.regs[i] ^= 1; // avoid reg change detection
887 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
888 }
5b745e5b 889 renderer_sync_ecmds(gpu.ex_regs);
9a864a8f 890 renderer_update_caches(0, 0, 1024, 512, 0);
1ab64c54
GI
891 break;
892 }
893
894 return 1;
895}
896
5440b88e 897void GPUupdateLace(void)
898{
899 if (gpu.cmd_len > 0)
900 flush_cmd_buffer();
901 renderer_flush_queues();
902
7a20a6d0 903#ifndef RAW_FB_DISPLAY
61124a6d 904 if (gpu.status & PSX_GPU_STATUS_BLANKING) {
aafcb4dd 905 if (!gpu.state.blanked) {
906 vout_blank();
907 gpu.state.blanked = 1;
908 gpu.state.fb_dirty = 1;
909 }
910 return;
911 }
912
c765eb86
JW
913 renderer_notify_update_lace(0);
914
aafcb4dd 915 if (!gpu.state.fb_dirty)
5440b88e 916 return;
7a20a6d0 917#endif
5440b88e 918
919 if (gpu.frameskip.set) {
920 if (!gpu.frameskip.frame_ready) {
921 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
922 return;
923 gpu.frameskip.active = 0;
924 }
925 gpu.frameskip.frame_ready = 0;
926 }
927
928 vout_update();
3b7b0065 929 if (gpu.state.enhancement_active && !gpu.state.enhancement_was_active)
930 renderer_update_caches(0, 0, 1024, 512, 1);
931 gpu.state.enhancement_was_active = gpu.state.enhancement_active;
5440b88e 932 gpu.state.fb_dirty = 0;
aafcb4dd 933 gpu.state.blanked = 0;
c765eb86 934 renderer_notify_update_lace(1);
5440b88e 935}
936
72e5023f 937void GPUvBlank(int is_vblank, int lcf)
938{
5440b88e 939 int interlace = gpu.state.allow_interlace
61124a6d
PC
940 && (gpu.status & PSX_GPU_STATUS_INTERLACE)
941 && (gpu.status & PSX_GPU_STATUS_DHEIGHT);
5440b88e 942 // interlace doesn't look nice on progressive displays,
943 // so we have this "auto" mode here for games that don't read vram
944 if (gpu.state.allow_interlace == 2
945 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
946 {
947 interlace = 0;
948 }
949 if (interlace || interlace != gpu.state.old_interlace) {
950 gpu.state.old_interlace = interlace;
951
952 if (gpu.cmd_len > 0)
953 flush_cmd_buffer();
954 renderer_flush_queues();
955 renderer_set_interlace(interlace, !lcf);
956 }
957}
958
80bc1426 959void GPUgetScreenInfo(int *y, int *base_hres)
960{
961 *y = gpu.screen.y;
962 *base_hres = gpu.screen.vres;
963 if (gpu.status & PSX_GPU_STATUS_DHEIGHT)
964 *base_hres >>= 1;
965}
966
5440b88e 967void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
968{
969 gpu.frameskip.set = cbs->frameskip;
970 gpu.frameskip.advice = &cbs->fskip_advice;
5eaa13f1 971 gpu.frameskip.force = &cbs->fskip_force;
5bbe183f 972 gpu.frameskip.dirty = (void *)&cbs->fskip_dirty;
5440b88e 973 gpu.frameskip.active = 0;
974 gpu.frameskip.frame_ready = 1;
975 gpu.state.hcnt = cbs->gpu_hcnt;
976 gpu.state.frame_count = cbs->gpu_frame_count;
977 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
0b02eb77 978 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
b3ff74ba 979 gpu.state.screen_centering_type_default = cbs->screen_centering_type_default;
5bbe183f 980 if (gpu.state.screen_centering_type != cbs->screen_centering_type
981 || gpu.state.screen_centering_x != cbs->screen_centering_x
982 || gpu.state.screen_centering_y != cbs->screen_centering_y) {
983 gpu.state.screen_centering_type = cbs->screen_centering_type;
984 gpu.state.screen_centering_x = cbs->screen_centering_x;
985 gpu.state.screen_centering_y = cbs->screen_centering_y;
986 update_width();
987 update_height();
988 }
5440b88e 989
9ee0fd5b 990 gpu.mmap = cbs->mmap;
991 gpu.munmap = cbs->munmap;
1328fa32 992 gpu.gpu_state_change = cbs->gpu_state_change;
9ee0fd5b 993
994 // delayed vram mmap
995 if (gpu.vram == NULL)
996 map_vram();
997
5440b88e 998 if (cbs->pl_vout_set_raw_vram)
999 cbs->pl_vout_set_raw_vram(gpu.vram);
1000 renderer_set_config(cbs);
1001 vout_set_config(cbs);
72e5023f 1002}
1003
1ab64c54 1004// vim:shiftwidth=2:expandtab