gpulib: maybe better loop detection
[pcsx_rearmed.git] / plugins / gpulib / gpu.c
CommitLineData
1ab64c54 1/*
05740673 2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
1ab64c54
GI
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
d30279e2 11#include <stdio.h>
5bbe183f 12#include <stdlib.h>
1ab64c54 13#include <string.h>
12367ad0 14#include <stdlib.h> /* for calloc */
15
56f08d83 16#include "gpu.h"
f99193c2 17#include "gpu_timing.h"
1328fa32 18#include "../../libpcsxcore/gpu.h" // meh
8f8ade9c 19#include "../../frontend/plugin_lib.h"
1ab64c54 20
8f8ade9c 21#ifndef ARRAY_SIZE
1ab64c54 22#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
8f8ade9c 23#endif
8f5f2dd5 24#ifdef __GNUC__
d30279e2 25#define unlikely(x) __builtin_expect((x), 0)
8f5f2dd5 26#define preload __builtin_prefetch
8dd855cd 27#define noinline __attribute__((noinline))
8f5f2dd5 28#else
29#define unlikely(x)
30#define preload(...)
31#define noinline
8f5f2dd5 32#endif
1ab64c54 33
deb18d24 34//#define log_io gpu_log
56f08d83 35#define log_io(...)
56f08d83 36
9ee0fd5b 37struct psx_gpu gpu;
1ab64c54 38
d02ab9fc 39static noinline int do_cmd_buffer(uint32_t *data, int count,
40 int *cycles_sum, int *cycles_last);
05740673 41static void finish_vram_transfer(int is_read);
48f3d210 42
43static noinline void do_cmd_reset(void)
44{
f99193c2 45 int dummy = 0;
c765eb86 46 renderer_sync();
48f3d210 47 if (unlikely(gpu.cmd_len > 0))
d02ab9fc 48 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len, &dummy, &dummy);
48f3d210 49 gpu.cmd_len = 0;
05740673 50
51 if (unlikely(gpu.dma.h > 0))
52 finish_vram_transfer(gpu.dma_start.is_read);
48f3d210 53 gpu.dma.h = 0;
54}
55
6e9bdaef 56static noinline void do_reset(void)
1ab64c54 57{
7841712d 58 unsigned int i;
5b568098 59
48f3d210 60 do_cmd_reset();
61
6e9bdaef 62 memset(gpu.regs, 0, sizeof(gpu.regs));
48f3d210 63 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
64 gpu.ex_regs[i] = (0xe0 + i) << 24;
61124a6d 65 gpu.status = 0x14802000;
6e9bdaef 66 gpu.gp0 = 0;
fc84f618 67 gpu.regs[3] = 1;
6e9bdaef 68 gpu.screen.hres = gpu.screen.w = 256;
fc84f618 69 gpu.screen.vres = gpu.screen.h = 240;
5bbe183f 70 gpu.screen.x = gpu.screen.y = 0;
01ff3105 71 renderer_sync_ecmds(gpu.ex_regs);
3b7b0065 72 renderer_notify_res_change();
1ab64c54
GI
73}
74
8dd855cd 75static noinline void update_width(void)
76{
5bbe183f 77 static const short hres_all[8] = { 256, 368, 320, 368, 512, 368, 640, 368 };
78 static const uint8_t hdivs[8] = { 10, 7, 8, 7, 5, 7, 4, 7 };
79 uint8_t hdiv = hdivs[(gpu.status >> 16) & 7];
80 int hres = hres_all[(gpu.status >> 16) & 7];
81 int pal = gpu.status & PSX_GPU_STATUS_PAL;
8dd855cd 82 int sw = gpu.screen.x2 - gpu.screen.x1;
b3ff74ba 83 int type = gpu.state.screen_centering_type;
5bbe183f 84 int x = 0, x_auto;
b3ff74ba 85 if (type == C_AUTO)
86 type = gpu.state.screen_centering_type_default;
5bbe183f 87 if (sw <= 0)
88 /* nothing displayed? */;
89 else {
90 int s = pal ? 656 : 608; // or 600? pal is just a guess
91 x = (gpu.screen.x1 - s) / hdiv;
92 x = (x + 1) & ~1; // blitter limitation
93 sw /= hdiv;
94 sw = (sw + 2) & ~3; // according to nocash
b3ff74ba 95 switch (type) {
8f8ade9c 96 case C_INGAME:
5bbe183f 97 break;
8f8ade9c 98 case C_MANUAL:
5bbe183f 99 x = gpu.state.screen_centering_x;
100 break;
101 default:
102 // correct if slightly miscentered
103 x_auto = (hres - sw) / 2 & ~3;
104 if ((uint32_t)x_auto <= 8u && abs(x) < 24)
105 x = x_auto;
106 }
107 if (x + sw > hres)
108 sw = hres - x;
109 // .x range check is done in vout_update()
110 }
111 // reduce the unpleasant right border that a few games have
112 if (gpu.state.screen_centering_type == 0
113 && x <= 4 && hres - (x + sw) >= 4)
114 hres -= 4;
115 gpu.screen.x = x;
116 gpu.screen.w = sw;
117 gpu.screen.hres = hres;
118 gpu.state.dims_changed = 1;
119 //printf("xx %d %d -> %2d, %d / %d\n",
120 // gpu.screen.x1, gpu.screen.x2, x, sw, hres);
8dd855cd 121}
122
123static noinline void update_height(void)
124{
5bbe183f 125 int pal = gpu.status & PSX_GPU_STATUS_PAL;
126 int dheight = gpu.status & PSX_GPU_STATUS_DHEIGHT;
127 int y = gpu.screen.y1 - (pal ? 39 : 16); // 39 for spyro
8dd855cd 128 int sh = gpu.screen.y2 - gpu.screen.y1;
5bbe183f 129 int center_tol = 16;
130 int vres = 240;
131
132 if (pal && (sh > 240 || gpu.screen.vres == 256))
133 vres = 256;
134 if (dheight)
135 y *= 2, sh *= 2, vres *= 2, center_tol *= 2;
136 if (sh <= 0)
137 /* nothing displayed? */;
138 else {
139 switch (gpu.state.screen_centering_type) {
8f8ade9c 140 case C_INGAME:
141 break;
142 case C_BORDERLESS:
143 y = 0;
5bbe183f 144 break;
8f8ade9c 145 case C_MANUAL:
5bbe183f 146 y = gpu.state.screen_centering_y;
147 break;
148 default:
149 // correct if slightly miscentered
150 if ((uint32_t)(vres - sh) <= 1 && abs(y) <= center_tol)
151 y = 0;
152 }
153 if (y + sh > vres)
154 sh = vres - y;
155 }
156 gpu.screen.y = y;
8dd855cd 157 gpu.screen.h = sh;
5bbe183f 158 gpu.screen.vres = vres;
159 gpu.state.dims_changed = 1;
160 //printf("yy %d %d -> %d, %d / %d\n",
161 // gpu.screen.y1, gpu.screen.y2, y, sh, vres);
8dd855cd 162}
163
fc84f618 164static noinline void decide_frameskip(void)
165{
5eaa13f1
A
166 *gpu.frameskip.dirty = 1;
167
9fe27e25 168 if (gpu.frameskip.active)
169 gpu.frameskip.cnt++;
170 else {
171 gpu.frameskip.cnt = 0;
172 gpu.frameskip.frame_ready = 1;
173 }
fc84f618 174
5eaa13f1
A
175 if (*gpu.frameskip.force)
176 gpu.frameskip.active = 1;
177 else if (!gpu.frameskip.active && *gpu.frameskip.advice)
9fe27e25 178 gpu.frameskip.active = 1;
179 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
fc84f618 180 gpu.frameskip.active = 1;
181 else
182 gpu.frameskip.active = 0;
fbb4bfff 183
184 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
f99193c2 185 int dummy = 0;
d02ab9fc 186 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy, &dummy, &dummy);
fbb4bfff 187 gpu.frameskip.pending_fill[0] = 0;
188 }
fc84f618 189}
190
b243416b 191static noinline int decide_frameskip_allow(uint32_t cmd_e3)
9fe27e25 192{
193 // no frameskip if it decides to draw to display area,
194 // but not for interlace since it'll most likely always do that
195 uint32_t x = cmd_e3 & 0x3ff;
196 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
61124a6d 197 gpu.frameskip.allow = (gpu.status & PSX_GPU_STATUS_INTERLACE) ||
5bbe183f 198 (uint32_t)(x - gpu.screen.src_x) >= (uint32_t)gpu.screen.w ||
199 (uint32_t)(y - gpu.screen.src_y) >= (uint32_t)gpu.screen.h;
b243416b 200 return gpu.frameskip.allow;
9fe27e25 201}
202
01ff3105 203static void flush_cmd_buffer(void);
204
6e9bdaef 205static noinline void get_gpu_info(uint32_t data)
206{
01ff3105 207 if (unlikely(gpu.cmd_len > 0))
208 flush_cmd_buffer();
6e9bdaef 209 switch (data & 0x0f) {
210 case 0x02:
211 case 0x03:
212 case 0x04:
6e9bdaef 213 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
214 break;
08b33377 215 case 0x05:
216 gpu.gp0 = gpu.ex_regs[5] & 0x3fffff;
6e9bdaef 217 break;
218 case 0x07:
219 gpu.gp0 = 2;
220 break;
221 default:
08b33377 222 // gpu.gp0 unchanged
6e9bdaef 223 break;
224 }
225}
226
5bd33f52 227#ifndef max
228#define max(a, b) (((a) > (b)) ? (a) : (b))
229#endif
12367ad0 230
231// Minimum 16-byte VRAM alignment needed by gpu_unai's pixel-skipping
232// renderer/downscaler it uses in high res modes:
233#ifdef GCW_ZERO
234 // On GCW platform (MIPS), align to 8192 bytes (1 TLB entry) to reduce # of
235 // fills. (Will change this value if it ever gets large page support)
236 #define VRAM_ALIGN 8192
237#else
238 #define VRAM_ALIGN 16
239#endif
240
5bd33f52 241// double, for overdraw guard + at least 1 page before
242#define VRAM_SIZE ((1024 * 512 * 2 * 2) + max(VRAM_ALIGN, 4096))
243
12367ad0 244// vram ptr received from mmap/malloc/alloc (will deallocate using this)
245static uint16_t *vram_ptr_orig = NULL;
9ee0fd5b 246
e34ef5ac 247#ifndef GPULIB_USE_MMAP
248# ifdef __linux__
249# define GPULIB_USE_MMAP 1
250# else
251# define GPULIB_USE_MMAP 0
252# endif
253#endif
9ee0fd5b 254static int map_vram(void)
255{
e34ef5ac 256#if GPULIB_USE_MMAP
5bd33f52 257 gpu.vram = vram_ptr_orig = gpu.mmap(VRAM_SIZE);
e34ef5ac 258#else
5bd33f52 259 gpu.vram = vram_ptr_orig = calloc(VRAM_SIZE, 1);
e34ef5ac 260#endif
261 if (gpu.vram != NULL && gpu.vram != (void *)(intptr_t)-1) {
262 // 4kb guard in front
12367ad0 263 gpu.vram += (4096 / 2);
e34ef5ac 264 // Align
265 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
9ee0fd5b 266 return 0;
267 }
268 else {
269 fprintf(stderr, "could not map vram, expect crashes\n");
270 return -1;
271 }
272}
273
6e9bdaef 274long GPUinit(void)
275{
9394ada5 276 int ret;
277 ret = vout_init();
278 ret |= renderer_init();
279
3b7b0065 280 memset(&gpu.state, 0, sizeof(gpu.state));
281 memset(&gpu.frameskip, 0, sizeof(gpu.frameskip));
282 gpu.zero = 0;
3ece2f0c 283 gpu.state.frame_count = &gpu.zero;
deb18d24 284 gpu.state.hcnt = &gpu.zero;
48f3d210 285 gpu.cmd_len = 0;
9394ada5 286 do_reset();
48f3d210 287
12367ad0 288 /*if (gpu.mmap != NULL) {
9ee0fd5b 289 if (map_vram() != 0)
290 ret = -1;
12367ad0 291 }*/
6e9bdaef 292 return ret;
293}
294
295long GPUshutdown(void)
296{
9ee0fd5b 297 long ret;
298
e929dec5 299 renderer_finish();
9ee0fd5b 300 ret = vout_finish();
12367ad0 301
302 if (vram_ptr_orig != NULL) {
e34ef5ac 303#if GPULIB_USE_MMAP
12367ad0 304 gpu.munmap(vram_ptr_orig, VRAM_SIZE);
305#else
306 free(vram_ptr_orig);
307#endif
9ee0fd5b 308 }
12367ad0 309 vram_ptr_orig = gpu.vram = NULL;
9ee0fd5b 310
311 return ret;
6e9bdaef 312}
313
1ab64c54
GI
314void GPUwriteStatus(uint32_t data)
315{
1ab64c54 316 uint32_t cmd = data >> 24;
9a864a8f 317 int src_x, src_y;
1ab64c54 318
fc84f618 319 if (cmd < ARRAY_SIZE(gpu.regs)) {
48f3d210 320 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
fc84f618 321 return;
8dd855cd 322 gpu.regs[cmd] = data;
fc84f618 323 }
324
325 gpu.state.fb_dirty = 1;
8dd855cd 326
327 switch (cmd) {
1ab64c54 328 case 0x00:
6e9bdaef 329 do_reset();
1ab64c54 330 break;
48f3d210 331 case 0x01:
332 do_cmd_reset();
333 break;
1ab64c54 334 case 0x03:
5bbe183f 335 if (data & 1) {
61124a6d 336 gpu.status |= PSX_GPU_STATUS_BLANKING;
5bbe183f 337 gpu.state.dims_changed = 1; // for hud clearing
338 }
61124a6d
PC
339 else
340 gpu.status &= ~PSX_GPU_STATUS_BLANKING;
1ab64c54
GI
341 break;
342 case 0x04:
61124a6d
PC
343 gpu.status &= ~PSX_GPU_STATUS_DMA_MASK;
344 gpu.status |= PSX_GPU_STATUS_DMA(data & 3);
1ab64c54
GI
345 break;
346 case 0x05:
9a864a8f 347 src_x = data & 0x3ff; src_y = (data >> 10) & 0x1ff;
348 if (src_x != gpu.screen.src_x || src_y != gpu.screen.src_y) {
349 gpu.screen.src_x = src_x;
350 gpu.screen.src_y = src_y;
351 renderer_notify_scanout_change(src_x, src_y);
352 if (gpu.frameskip.set) {
353 decide_frameskip_allow(gpu.ex_regs[3]);
354 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
355 decide_frameskip();
356 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
357 }
9fe27e25 358 }
fb4c6fba 359 }
1ab64c54 360 break;
8dd855cd 361 case 0x06:
362 gpu.screen.x1 = data & 0xfff;
363 gpu.screen.x2 = (data >> 12) & 0xfff;
364 update_width();
365 break;
1ab64c54
GI
366 case 0x07:
367 gpu.screen.y1 = data & 0x3ff;
368 gpu.screen.y2 = (data >> 10) & 0x3ff;
8dd855cd 369 update_height();
1ab64c54
GI
370 break;
371 case 0x08:
61124a6d 372 gpu.status = (gpu.status & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
8dd855cd 373 update_width();
374 update_height();
e929dec5 375 renderer_notify_res_change();
1ab64c54 376 break;
deb18d24 377 default:
378 if ((cmd & 0xf0) == 0x10)
379 get_gpu_info(data);
6e9bdaef 380 break;
1ab64c54 381 }
7890a708 382
383#ifdef GPUwriteStatus_ext
384 GPUwriteStatus_ext(data);
385#endif
1ab64c54
GI
386}
387
56f08d83 388const unsigned char cmd_lengths[256] =
1ab64c54 389{
d30279e2
GI
390 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
391 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
392 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
393 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
652c6b8b 394 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
395 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
396 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
d30279e2 397 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
36da9c13 398 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 80
399 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
400 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // a0
401 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
402 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // c0
403 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
d30279e2
GI
404 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
405 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
406};
407
d30279e2
GI
408#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
409
36da9c13 410static void cpy_msb(uint16_t *dst, const uint16_t *src, int l, uint16_t msb)
411{
412 int i;
413 for (i = 0; i < l; i++)
414 dst[i] = src[i] | msb;
415}
416
417static inline void do_vram_line(int x, int y, uint16_t *mem, int l,
418 int is_read, uint16_t msb)
1ab64c54 419{
d30279e2 420 uint16_t *vram = VRAM_MEM_XY(x, y);
36da9c13 421 if (unlikely(is_read))
d30279e2 422 memcpy(mem, vram, l * 2);
36da9c13 423 else if (unlikely(msb))
424 cpy_msb(vram, mem, l, msb);
d30279e2
GI
425 else
426 memcpy(vram, mem, l * 2);
427}
428
429static int do_vram_io(uint32_t *data, int count, int is_read)
430{
431 int count_initial = count;
36da9c13 432 uint16_t msb = gpu.ex_regs[6] << 15;
d30279e2
GI
433 uint16_t *sdata = (uint16_t *)data;
434 int x = gpu.dma.x, y = gpu.dma.y;
435 int w = gpu.dma.w, h = gpu.dma.h;
ddd56f6e 436 int o = gpu.dma.offset;
d30279e2
GI
437 int l;
438 count *= 2; // operate in 16bpp pixels
439
c765eb86
JW
440 renderer_sync();
441
d30279e2
GI
442 if (gpu.dma.offset) {
443 l = w - gpu.dma.offset;
ddd56f6e 444 if (count < l)
d30279e2 445 l = count;
ddd56f6e 446
36da9c13 447 do_vram_line(x + o, y, sdata, l, is_read, msb);
ddd56f6e 448
449 if (o + l < w)
450 o += l;
451 else {
452 o = 0;
453 y++;
454 h--;
455 }
d30279e2
GI
456 sdata += l;
457 count -= l;
d30279e2
GI
458 }
459
460 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
461 y &= 511;
36da9c13 462 do_vram_line(x, y, sdata, w, is_read, msb);
d30279e2
GI
463 }
464
05740673 465 if (h > 0) {
466 if (count > 0) {
467 y &= 511;
36da9c13 468 do_vram_line(x, y, sdata, count, is_read, msb);
05740673 469 o = count;
470 count = 0;
471 }
d30279e2 472 }
05740673 473 else
474 finish_vram_transfer(is_read);
d30279e2
GI
475 gpu.dma.y = y;
476 gpu.dma.h = h;
ddd56f6e 477 gpu.dma.offset = o;
d30279e2 478
6e9bdaef 479 return count_initial - count / 2;
d30279e2
GI
480}
481
482static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
483{
ddd56f6e 484 if (gpu.dma.h)
485 log_anomaly("start_vram_transfer while old unfinished\n");
486
5440b88e 487 gpu.dma.x = pos_word & 0x3ff;
488 gpu.dma.y = (pos_word >> 16) & 0x1ff;
48f3d210 489 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
490 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
d30279e2 491 gpu.dma.offset = 0;
05740673 492 gpu.dma.is_read = is_read;
493 gpu.dma_start = gpu.dma;
d30279e2 494
9e146206 495 renderer_flush_queues();
496 if (is_read) {
61124a6d 497 gpu.status |= PSX_GPU_STATUS_IMG;
9e146206 498 // XXX: wrong for width 1
495d603c 499 gpu.gp0 = LE32TOH(*(uint32_t *) VRAM_MEM_XY(gpu.dma.x, gpu.dma.y));
5440b88e 500 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
9e146206 501 }
d30279e2 502
6e9bdaef 503 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
504 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
1328fa32 505 if (gpu.gpu_state_change)
506 gpu.gpu_state_change(PGS_VRAM_TRANSFER_START);
d30279e2
GI
507}
508
05740673 509static void finish_vram_transfer(int is_read)
510{
511 if (is_read)
61124a6d 512 gpu.status &= ~PSX_GPU_STATUS_IMG;
b30fba56 513 else {
514 gpu.state.fb_dirty = 1;
05740673 515 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
3b7b0065 516 gpu.dma_start.w, gpu.dma_start.h, 0);
b30fba56 517 }
1328fa32 518 if (gpu.gpu_state_change)
519 gpu.gpu_state_change(PGS_VRAM_TRANSFER_END);
05740673 520}
521
f99193c2 522static void do_vram_copy(const uint32_t *params, int *cpu_cycles)
36da9c13 523{
524 const uint32_t sx = LE32TOH(params[0]) & 0x3FF;
525 const uint32_t sy = (LE32TOH(params[0]) >> 16) & 0x1FF;
526 const uint32_t dx = LE32TOH(params[1]) & 0x3FF;
527 const uint32_t dy = (LE32TOH(params[1]) >> 16) & 0x1FF;
528 uint32_t w = ((LE32TOH(params[2]) - 1) & 0x3FF) + 1;
529 uint32_t h = (((LE32TOH(params[2]) >> 16) - 1) & 0x1FF) + 1;
530 uint16_t msb = gpu.ex_regs[6] << 15;
531 uint16_t lbuf[128];
532 uint32_t x, y;
533
f99193c2 534 *cpu_cycles += gput_copy(w, h);
36da9c13 535 if (sx == dx && sy == dy && msb == 0)
536 return;
537
538 renderer_flush_queues();
539
540 if (unlikely((sx < dx && dx < sx + w) || sx + w > 1024 || dx + w > 1024 || msb))
541 {
542 for (y = 0; y < h; y++)
543 {
544 const uint16_t *src = VRAM_MEM_XY(0, (sy + y) & 0x1ff);
545 uint16_t *dst = VRAM_MEM_XY(0, (dy + y) & 0x1ff);
546 for (x = 0; x < w; x += ARRAY_SIZE(lbuf))
547 {
548 uint32_t x1, w1 = w - x;
549 if (w1 > ARRAY_SIZE(lbuf))
550 w1 = ARRAY_SIZE(lbuf);
551 for (x1 = 0; x1 < w1; x1++)
552 lbuf[x1] = src[(sx + x + x1) & 0x3ff];
553 for (x1 = 0; x1 < w1; x1++)
554 dst[(dx + x + x1) & 0x3ff] = lbuf[x1] | msb;
555 }
556 }
557 }
558 else
559 {
560 uint32_t sy1 = sy, dy1 = dy;
561 for (y = 0; y < h; y++, sy1++, dy1++)
562 memcpy(VRAM_MEM_XY(dx, dy1 & 0x1ff), VRAM_MEM_XY(sx, sy1 & 0x1ff), w * 2);
563 }
564
565 renderer_update_caches(dx, dy, w, h, 0);
566}
567
b243416b 568static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
569{
f99193c2 570 int cmd = 0, pos = 0, len, dummy = 0, v;
b243416b 571 int skip = 1;
572
fbb4bfff 573 gpu.frameskip.pending_fill[0] = 0;
574
b243416b 575 while (pos < count && skip) {
576 uint32_t *list = data + pos;
db215a72 577 cmd = LE32TOH(list[0]) >> 24;
b243416b 578 len = 1 + cmd_lengths[cmd];
579
97e07db9 580 switch (cmd) {
581 case 0x02:
db215a72 582 if ((LE32TOH(list[2]) & 0x3ff) > gpu.screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu.screen.h)
97e07db9 583 // clearing something large, don't skip
d02ab9fc 584 do_cmd_list(list, 3, &dummy, &dummy, &dummy);
97e07db9 585 else
586 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
587 break;
588 case 0x24 ... 0x27:
589 case 0x2c ... 0x2f:
590 case 0x34 ... 0x37:
591 case 0x3c ... 0x3f:
592 gpu.ex_regs[1] &= ~0x1ff;
db215a72 593 gpu.ex_regs[1] |= LE32TOH(list[4 + ((cmd >> 4) & 1)]) & 0x1ff;
97e07db9 594 break;
595 case 0x48 ... 0x4F:
596 for (v = 3; pos + v < count; v++)
597 {
db215a72 598 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
97e07db9 599 break;
600 }
601 len += v - 3;
602 break;
603 case 0x58 ... 0x5F:
604 for (v = 4; pos + v < count; v += 2)
605 {
db215a72 606 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
97e07db9 607 break;
608 }
609 len += v - 4;
610 break;
611 default:
612 if (cmd == 0xe3)
db215a72 613 skip = decide_frameskip_allow(LE32TOH(list[0]));
97e07db9 614 if ((cmd & 0xf8) == 0xe0)
db215a72 615 gpu.ex_regs[cmd & 7] = LE32TOH(list[0]);
97e07db9 616 break;
b243416b 617 }
b243416b 618
619 if (pos + len > count) {
620 cmd = -1;
621 break; // incomplete cmd
622 }
36da9c13 623 if (0x80 <= cmd && cmd <= 0xdf)
b243416b 624 break; // image i/o
97e07db9 625
b243416b 626 pos += len;
627 }
628
629 renderer_sync_ecmds(gpu.ex_regs);
630 *last_cmd = cmd;
631 return pos;
632}
633
d02ab9fc 634static noinline int do_cmd_buffer(uint32_t *data, int count,
635 int *cycles_sum, int *cycles_last)
d30279e2 636{
b243416b 637 int cmd, pos;
638 uint32_t old_e3 = gpu.ex_regs[3];
fc84f618 639 int vram_dirty = 0;
d30279e2 640
d30279e2 641 // process buffer
b243416b 642 for (pos = 0; pos < count; )
d30279e2 643 {
b243416b 644 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
645 vram_dirty = 1;
d30279e2 646 pos += do_vram_io(data + pos, count - pos, 0);
ddd56f6e 647 if (pos == count)
648 break;
d30279e2
GI
649 }
650
db215a72 651 cmd = LE32TOH(data[pos]) >> 24;
97e07db9 652 if (0xa0 <= cmd && cmd <= 0xdf) {
79573c20
DS
653 if (unlikely((pos+2) >= count)) {
654 // incomplete vram write/read cmd, can't consume yet
655 cmd = -1;
656 break;
657 }
658
d30279e2 659 // consume vram write/read cmd
db215a72 660 start_vram_transfer(LE32TOH(data[pos + 1]), LE32TOH(data[pos + 2]), (cmd & 0xe0) == 0xc0);
b243416b 661 pos += 3;
662 continue;
d30279e2 663 }
36da9c13 664 else if ((cmd & 0xe0) == 0x80) {
665 if (unlikely((pos+3) >= count)) {
666 cmd = -1; // incomplete cmd, can't consume yet
667 break;
668 }
025b6fde 669 renderer_sync();
d02ab9fc 670 *cycles_sum += *cycles_last;
671 *cycles_last = 0;
672 do_vram_copy(data + pos + 1, cycles_last);
b30fba56 673 vram_dirty = 1;
36da9c13 674 pos += 4;
675 continue;
676 }
c296224f 677 else if (cmd == 0x1f) {
678 log_anomaly("irq1?\n");
679 pos++;
680 continue;
681 }
b243416b 682
1e07f71d 683 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
db215a72 684 if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0))
b243416b 685 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
686 else {
d02ab9fc 687 pos += do_cmd_list(data + pos, count - pos, cycles_sum, cycles_last, &cmd);
b243416b 688 vram_dirty = 1;
689 }
690
691 if (cmd == -1)
692 // incomplete cmd
ddd56f6e 693 break;
d30279e2 694 }
ddd56f6e 695
61124a6d
PC
696 gpu.status &= ~0x1fff;
697 gpu.status |= gpu.ex_regs[1] & 0x7ff;
698 gpu.status |= (gpu.ex_regs[6] & 3) << 11;
a3a9f519 699
fc84f618 700 gpu.state.fb_dirty |= vram_dirty;
701
b243416b 702 if (old_e3 != gpu.ex_regs[3])
703 decide_frameskip_allow(gpu.ex_regs[3]);
704
ddd56f6e 705 return count - pos;
d30279e2
GI
706}
707
1328fa32 708static noinline void flush_cmd_buffer(void)
d30279e2 709{
f99193c2 710 int dummy = 0, left;
d02ab9fc 711 left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len, &dummy, &dummy);
d30279e2
GI
712 if (left > 0)
713 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
1328fa32 714 if (left != gpu.cmd_len) {
715 if (!gpu.dma.h && gpu.gpu_state_change)
716 gpu.gpu_state_change(PGS_PRIMITIVE_START);
717 gpu.cmd_len = left;
718 }
1ab64c54
GI
719}
720
721void GPUwriteDataMem(uint32_t *mem, int count)
722{
f99193c2 723 int dummy = 0, left;
d30279e2 724
56f08d83 725 log_io("gpu_dma_write %p %d\n", mem, count);
726
d30279e2
GI
727 if (unlikely(gpu.cmd_len > 0))
728 flush_cmd_buffer();
56f08d83 729
d02ab9fc 730 left = do_cmd_buffer(mem, count, &dummy, &dummy);
d30279e2 731 if (left)
56f08d83 732 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
1ab64c54
GI
733}
734
d30279e2 735void GPUwriteData(uint32_t data)
1ab64c54 736{
56f08d83 737 log_io("gpu_write %08x\n", data);
db215a72 738 gpu.cmd_buffer[gpu.cmd_len++] = HTOLE32(data);
d30279e2
GI
739 if (gpu.cmd_len >= CMD_BUFFER_LEN)
740 flush_cmd_buffer();
1ab64c54
GI
741}
742
d02ab9fc 743long GPUdmaChain(uint32_t *rambase, uint32_t start_addr,
744 uint32_t *progress_addr, int32_t *cycles_last_cmd)
1ab64c54 745{
2048ae31 746 uint32_t addr, *list, ld_addr;
747 int len, left, count, ld_count = 32;
d02ab9fc 748 int cpu_cycles_sum = 0;
749 int cpu_cycles_last = 0;
d30279e2 750
8f5f2dd5 751 preload(rambase + (start_addr & 0x1fffff) / 4);
752
d30279e2
GI
753 if (unlikely(gpu.cmd_len > 0))
754 flush_cmd_buffer();
755
56f08d83 756 log_io("gpu_dma_chain\n");
2048ae31 757 addr = ld_addr = start_addr & 0xffffff;
09159d99 758 for (count = 0; (addr & 0x800000) == 0; count++)
ddd56f6e 759 {
ddd56f6e 760 list = rambase + (addr & 0x1fffff) / 4;
db215a72
PC
761 len = LE32TOH(list[0]) >> 24;
762 addr = LE32TOH(list[0]) & 0xffffff;
8f5f2dd5 763 preload(rambase + (addr & 0x1fffff) / 4);
764
d02ab9fc 765 cpu_cycles_sum += 10;
1c72b1c2 766 if (len > 0)
d02ab9fc 767 cpu_cycles_sum += 5 + len;
deb18d24 768
d02ab9fc 769 log_io(".chain %08lx #%d+%d %u+%u\n",
770 (long)(list - rambase) * 4, len, gpu.cmd_len, cpu_cycles_sum, cpu_cycles_last);
a4e249a1 771 if (unlikely(gpu.cmd_len > 0)) {
81ff42e1 772 if (gpu.cmd_len + len > ARRAY_SIZE(gpu.cmd_buffer)) {
773 log_anomaly("cmd_buffer overflow, likely garbage commands\n");
774 gpu.cmd_len = 0;
775 }
a4e249a1 776 memcpy(gpu.cmd_buffer + gpu.cmd_len, list + 1, len * 4);
777 gpu.cmd_len += len;
778 flush_cmd_buffer();
779 continue;
780 }
ddd56f6e 781
56f08d83 782 if (len) {
d02ab9fc 783 left = do_cmd_buffer(list + 1, len, &cpu_cycles_sum, &cpu_cycles_last);
a4e249a1 784 if (left) {
785 memcpy(gpu.cmd_buffer, list + 1 + len - left, left * 4);
786 gpu.cmd_len = left;
787 log_anomaly("GPUdmaChain: %d/%d words left\n", left, len);
788 }
56f08d83 789 }
ddd56f6e 790
fae38d7a 791 if (progress_addr) {
792 *progress_addr = addr;
793 break;
794 }
2048ae31 795 if (addr == ld_addr) {
796 log_anomaly("GPUdmaChain: loop @ %08x, cnt=%u\n", addr, count);
797 break;
09159d99 798 }
2048ae31 799 if (count == ld_count) {
800 ld_addr = addr;
801 ld_count *= 2;
09159d99 802 }
d30279e2 803 }
09159d99 804
d02ab9fc 805 //printf(" -> %d %d\n", cpu_cycles_sum, cpu_cycles_last);
3ece2f0c 806 gpu.state.last_list.frame = *gpu.state.frame_count;
deb18d24 807 gpu.state.last_list.hcnt = *gpu.state.hcnt;
d02ab9fc 808 gpu.state.last_list.cycles = cpu_cycles_sum + cpu_cycles_last;
deb18d24 809 gpu.state.last_list.addr = start_addr;
810
d02ab9fc 811 *cycles_last_cmd = cpu_cycles_last;
812 return cpu_cycles_sum;
1ab64c54
GI
813}
814
d30279e2
GI
815void GPUreadDataMem(uint32_t *mem, int count)
816{
56f08d83 817 log_io("gpu_dma_read %p %d\n", mem, count);
818
d30279e2
GI
819 if (unlikely(gpu.cmd_len > 0))
820 flush_cmd_buffer();
56f08d83 821
d30279e2
GI
822 if (gpu.dma.h)
823 do_vram_io(mem, count, 1);
824}
825
826uint32_t GPUreadData(void)
827{
9e146206 828 uint32_t ret;
56f08d83 829
830 if (unlikely(gpu.cmd_len > 0))
831 flush_cmd_buffer();
832
9e146206 833 ret = gpu.gp0;
495d603c
PC
834 if (gpu.dma.h) {
835 ret = HTOLE32(ret);
9e146206 836 do_vram_io(&ret, 1, 1);
495d603c
PC
837 ret = LE32TOH(ret);
838 }
56f08d83 839
9e146206 840 log_io("gpu_read %08x\n", ret);
841 return ret;
d30279e2
GI
842}
843
844uint32_t GPUreadStatus(void)
845{
ddd56f6e 846 uint32_t ret;
56f08d83 847
d30279e2
GI
848 if (unlikely(gpu.cmd_len > 0))
849 flush_cmd_buffer();
850
61124a6d 851 ret = gpu.status;
ddd56f6e 852 log_io("gpu_read_status %08x\n", ret);
853 return ret;
d30279e2
GI
854}
855
096ec49b 856struct GPUFreeze
1ab64c54
GI
857{
858 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
859 uint32_t ulStatus; // current gpu status
860 uint32_t ulControl[256]; // latest control register values
861 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
096ec49b 862};
1ab64c54 863
096ec49b 864long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
1ab64c54 865{
fc84f618 866 int i;
867
1ab64c54
GI
868 switch (type) {
869 case 1: // save
d30279e2
GI
870 if (gpu.cmd_len > 0)
871 flush_cmd_buffer();
c765eb86
JW
872
873 renderer_sync();
9ee0fd5b 874 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
1ab64c54 875 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
6e9bdaef 876 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
61124a6d 877 freeze->ulStatus = gpu.status;
1ab64c54
GI
878 break;
879 case 0: // load
c765eb86 880 renderer_sync();
9ee0fd5b 881 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
1ab64c54 882 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
6e9bdaef 883 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
61124a6d 884 gpu.status = freeze->ulStatus;
3d47ef17 885 gpu.cmd_len = 0;
fc84f618 886 for (i = 8; i > 0; i--) {
887 gpu.regs[i] ^= 1; // avoid reg change detection
888 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
889 }
5b745e5b 890 renderer_sync_ecmds(gpu.ex_regs);
9a864a8f 891 renderer_update_caches(0, 0, 1024, 512, 0);
1ab64c54
GI
892 break;
893 }
894
895 return 1;
896}
897
5440b88e 898void GPUupdateLace(void)
899{
900 if (gpu.cmd_len > 0)
901 flush_cmd_buffer();
902 renderer_flush_queues();
903
7a20a6d0 904#ifndef RAW_FB_DISPLAY
61124a6d 905 if (gpu.status & PSX_GPU_STATUS_BLANKING) {
aafcb4dd 906 if (!gpu.state.blanked) {
907 vout_blank();
908 gpu.state.blanked = 1;
909 gpu.state.fb_dirty = 1;
910 }
911 return;
912 }
913
c765eb86
JW
914 renderer_notify_update_lace(0);
915
aafcb4dd 916 if (!gpu.state.fb_dirty)
5440b88e 917 return;
7a20a6d0 918#endif
5440b88e 919
920 if (gpu.frameskip.set) {
921 if (!gpu.frameskip.frame_ready) {
922 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
923 return;
924 gpu.frameskip.active = 0;
925 }
926 gpu.frameskip.frame_ready = 0;
927 }
928
929 vout_update();
3b7b0065 930 if (gpu.state.enhancement_active && !gpu.state.enhancement_was_active)
931 renderer_update_caches(0, 0, 1024, 512, 1);
932 gpu.state.enhancement_was_active = gpu.state.enhancement_active;
5440b88e 933 gpu.state.fb_dirty = 0;
aafcb4dd 934 gpu.state.blanked = 0;
c765eb86 935 renderer_notify_update_lace(1);
5440b88e 936}
937
72e5023f 938void GPUvBlank(int is_vblank, int lcf)
939{
5440b88e 940 int interlace = gpu.state.allow_interlace
61124a6d
PC
941 && (gpu.status & PSX_GPU_STATUS_INTERLACE)
942 && (gpu.status & PSX_GPU_STATUS_DHEIGHT);
5440b88e 943 // interlace doesn't look nice on progressive displays,
944 // so we have this "auto" mode here for games that don't read vram
945 if (gpu.state.allow_interlace == 2
946 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
947 {
948 interlace = 0;
949 }
950 if (interlace || interlace != gpu.state.old_interlace) {
951 gpu.state.old_interlace = interlace;
952
953 if (gpu.cmd_len > 0)
954 flush_cmd_buffer();
955 renderer_flush_queues();
956 renderer_set_interlace(interlace, !lcf);
957 }
958}
959
80bc1426 960void GPUgetScreenInfo(int *y, int *base_hres)
961{
962 *y = gpu.screen.y;
963 *base_hres = gpu.screen.vres;
964 if (gpu.status & PSX_GPU_STATUS_DHEIGHT)
965 *base_hres >>= 1;
966}
967
5440b88e 968void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
969{
970 gpu.frameskip.set = cbs->frameskip;
971 gpu.frameskip.advice = &cbs->fskip_advice;
5eaa13f1 972 gpu.frameskip.force = &cbs->fskip_force;
5bbe183f 973 gpu.frameskip.dirty = (void *)&cbs->fskip_dirty;
5440b88e 974 gpu.frameskip.active = 0;
975 gpu.frameskip.frame_ready = 1;
976 gpu.state.hcnt = cbs->gpu_hcnt;
977 gpu.state.frame_count = cbs->gpu_frame_count;
978 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
0b02eb77 979 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
b3ff74ba 980 gpu.state.screen_centering_type_default = cbs->screen_centering_type_default;
5bbe183f 981 if (gpu.state.screen_centering_type != cbs->screen_centering_type
982 || gpu.state.screen_centering_x != cbs->screen_centering_x
983 || gpu.state.screen_centering_y != cbs->screen_centering_y) {
984 gpu.state.screen_centering_type = cbs->screen_centering_type;
985 gpu.state.screen_centering_x = cbs->screen_centering_x;
986 gpu.state.screen_centering_y = cbs->screen_centering_y;
987 update_width();
988 update_height();
989 }
5440b88e 990
9ee0fd5b 991 gpu.mmap = cbs->mmap;
992 gpu.munmap = cbs->munmap;
1328fa32 993 gpu.gpu_state_change = cbs->gpu_state_change;
9ee0fd5b 994
995 // delayed vram mmap
996 if (gpu.vram == NULL)
997 map_vram();
998
5440b88e 999 if (cbs->pl_vout_set_raw_vram)
1000 cbs->pl_vout_set_raw_vram(gpu.vram);
1001 renderer_set_config(cbs);
1002 vout_set_config(cbs);
72e5023f 1003}
1004
1ab64c54 1005// vim:shiftwidth=2:expandtab