libretro: drop the Frame Duping option
[pcsx_rearmed.git] / plugins / gpulib / gpu.c
CommitLineData
1ab64c54 1/*
05740673 2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
1ab64c54
GI
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
d30279e2 11#include <stdio.h>
5bbe183f 12#include <stdlib.h>
1ab64c54 13#include <string.h>
12367ad0 14#include <stdlib.h> /* for calloc */
15
56f08d83 16#include "gpu.h"
1328fa32 17#include "../../libpcsxcore/gpu.h" // meh
8f8ade9c 18#include "../../frontend/plugin_lib.h"
1ab64c54 19
8f8ade9c 20#ifndef ARRAY_SIZE
1ab64c54 21#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
8f8ade9c 22#endif
8f5f2dd5 23#ifdef __GNUC__
d30279e2 24#define unlikely(x) __builtin_expect((x), 0)
8f5f2dd5 25#define preload __builtin_prefetch
8dd855cd 26#define noinline __attribute__((noinline))
8f5f2dd5 27#else
28#define unlikely(x)
29#define preload(...)
30#define noinline
8f5f2dd5 31#endif
1ab64c54 32
deb18d24 33//#define log_io gpu_log
56f08d83 34#define log_io(...)
56f08d83 35
9ee0fd5b 36struct psx_gpu gpu;
1ab64c54 37
48f3d210 38static noinline int do_cmd_buffer(uint32_t *data, int count);
05740673 39static void finish_vram_transfer(int is_read);
48f3d210 40
41static noinline void do_cmd_reset(void)
42{
c765eb86
JW
43 renderer_sync();
44
48f3d210 45 if (unlikely(gpu.cmd_len > 0))
46 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
48f3d210 47 gpu.cmd_len = 0;
05740673 48
49 if (unlikely(gpu.dma.h > 0))
50 finish_vram_transfer(gpu.dma_start.is_read);
48f3d210 51 gpu.dma.h = 0;
52}
53
6e9bdaef 54static noinline void do_reset(void)
1ab64c54 55{
7841712d 56 unsigned int i;
5b568098 57
48f3d210 58 do_cmd_reset();
59
6e9bdaef 60 memset(gpu.regs, 0, sizeof(gpu.regs));
48f3d210 61 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
62 gpu.ex_regs[i] = (0xe0 + i) << 24;
61124a6d 63 gpu.status = 0x14802000;
6e9bdaef 64 gpu.gp0 = 0;
fc84f618 65 gpu.regs[3] = 1;
6e9bdaef 66 gpu.screen.hres = gpu.screen.w = 256;
fc84f618 67 gpu.screen.vres = gpu.screen.h = 240;
5bbe183f 68 gpu.screen.x = gpu.screen.y = 0;
01ff3105 69 renderer_sync_ecmds(gpu.ex_regs);
3b7b0065 70 renderer_notify_res_change();
1ab64c54
GI
71}
72
8dd855cd 73static noinline void update_width(void)
74{
5bbe183f 75 static const short hres_all[8] = { 256, 368, 320, 368, 512, 368, 640, 368 };
76 static const uint8_t hdivs[8] = { 10, 7, 8, 7, 5, 7, 4, 7 };
77 uint8_t hdiv = hdivs[(gpu.status >> 16) & 7];
78 int hres = hres_all[(gpu.status >> 16) & 7];
79 int pal = gpu.status & PSX_GPU_STATUS_PAL;
8dd855cd 80 int sw = gpu.screen.x2 - gpu.screen.x1;
b3ff74ba 81 int type = gpu.state.screen_centering_type;
5bbe183f 82 int x = 0, x_auto;
b3ff74ba 83 if (type == C_AUTO)
84 type = gpu.state.screen_centering_type_default;
5bbe183f 85 if (sw <= 0)
86 /* nothing displayed? */;
87 else {
88 int s = pal ? 656 : 608; // or 600? pal is just a guess
89 x = (gpu.screen.x1 - s) / hdiv;
90 x = (x + 1) & ~1; // blitter limitation
91 sw /= hdiv;
92 sw = (sw + 2) & ~3; // according to nocash
b3ff74ba 93 switch (type) {
8f8ade9c 94 case C_INGAME:
5bbe183f 95 break;
8f8ade9c 96 case C_MANUAL:
5bbe183f 97 x = gpu.state.screen_centering_x;
98 break;
99 default:
100 // correct if slightly miscentered
101 x_auto = (hres - sw) / 2 & ~3;
102 if ((uint32_t)x_auto <= 8u && abs(x) < 24)
103 x = x_auto;
104 }
105 if (x + sw > hres)
106 sw = hres - x;
107 // .x range check is done in vout_update()
108 }
109 // reduce the unpleasant right border that a few games have
110 if (gpu.state.screen_centering_type == 0
111 && x <= 4 && hres - (x + sw) >= 4)
112 hres -= 4;
113 gpu.screen.x = x;
114 gpu.screen.w = sw;
115 gpu.screen.hres = hres;
116 gpu.state.dims_changed = 1;
117 //printf("xx %d %d -> %2d, %d / %d\n",
118 // gpu.screen.x1, gpu.screen.x2, x, sw, hres);
8dd855cd 119}
120
121static noinline void update_height(void)
122{
5bbe183f 123 int pal = gpu.status & PSX_GPU_STATUS_PAL;
124 int dheight = gpu.status & PSX_GPU_STATUS_DHEIGHT;
125 int y = gpu.screen.y1 - (pal ? 39 : 16); // 39 for spyro
8dd855cd 126 int sh = gpu.screen.y2 - gpu.screen.y1;
5bbe183f 127 int center_tol = 16;
128 int vres = 240;
129
130 if (pal && (sh > 240 || gpu.screen.vres == 256))
131 vres = 256;
132 if (dheight)
133 y *= 2, sh *= 2, vres *= 2, center_tol *= 2;
134 if (sh <= 0)
135 /* nothing displayed? */;
136 else {
137 switch (gpu.state.screen_centering_type) {
8f8ade9c 138 case C_INGAME:
139 break;
140 case C_BORDERLESS:
141 y = 0;
5bbe183f 142 break;
8f8ade9c 143 case C_MANUAL:
5bbe183f 144 y = gpu.state.screen_centering_y;
145 break;
146 default:
147 // correct if slightly miscentered
148 if ((uint32_t)(vres - sh) <= 1 && abs(y) <= center_tol)
149 y = 0;
150 }
151 if (y + sh > vres)
152 sh = vres - y;
153 }
154 gpu.screen.y = y;
8dd855cd 155 gpu.screen.h = sh;
5bbe183f 156 gpu.screen.vres = vres;
157 gpu.state.dims_changed = 1;
158 //printf("yy %d %d -> %d, %d / %d\n",
159 // gpu.screen.y1, gpu.screen.y2, y, sh, vres);
8dd855cd 160}
161
fc84f618 162static noinline void decide_frameskip(void)
163{
5eaa13f1
A
164 *gpu.frameskip.dirty = 1;
165
9fe27e25 166 if (gpu.frameskip.active)
167 gpu.frameskip.cnt++;
168 else {
169 gpu.frameskip.cnt = 0;
170 gpu.frameskip.frame_ready = 1;
171 }
fc84f618 172
5eaa13f1
A
173 if (*gpu.frameskip.force)
174 gpu.frameskip.active = 1;
175 else if (!gpu.frameskip.active && *gpu.frameskip.advice)
9fe27e25 176 gpu.frameskip.active = 1;
177 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
fc84f618 178 gpu.frameskip.active = 1;
179 else
180 gpu.frameskip.active = 0;
fbb4bfff 181
182 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
183 int dummy;
184 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy);
185 gpu.frameskip.pending_fill[0] = 0;
186 }
fc84f618 187}
188
b243416b 189static noinline int decide_frameskip_allow(uint32_t cmd_e3)
9fe27e25 190{
191 // no frameskip if it decides to draw to display area,
192 // but not for interlace since it'll most likely always do that
193 uint32_t x = cmd_e3 & 0x3ff;
194 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
61124a6d 195 gpu.frameskip.allow = (gpu.status & PSX_GPU_STATUS_INTERLACE) ||
5bbe183f 196 (uint32_t)(x - gpu.screen.src_x) >= (uint32_t)gpu.screen.w ||
197 (uint32_t)(y - gpu.screen.src_y) >= (uint32_t)gpu.screen.h;
b243416b 198 return gpu.frameskip.allow;
9fe27e25 199}
200
01ff3105 201static void flush_cmd_buffer(void);
202
6e9bdaef 203static noinline void get_gpu_info(uint32_t data)
204{
01ff3105 205 if (unlikely(gpu.cmd_len > 0))
206 flush_cmd_buffer();
6e9bdaef 207 switch (data & 0x0f) {
208 case 0x02:
209 case 0x03:
210 case 0x04:
6e9bdaef 211 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
212 break;
08b33377 213 case 0x05:
214 gpu.gp0 = gpu.ex_regs[5] & 0x3fffff;
6e9bdaef 215 break;
216 case 0x07:
217 gpu.gp0 = 2;
218 break;
219 default:
08b33377 220 // gpu.gp0 unchanged
6e9bdaef 221 break;
222 }
223}
224
5bd33f52 225#ifndef max
226#define max(a, b) (((a) > (b)) ? (a) : (b))
227#endif
12367ad0 228
229// Minimum 16-byte VRAM alignment needed by gpu_unai's pixel-skipping
230// renderer/downscaler it uses in high res modes:
231#ifdef GCW_ZERO
232 // On GCW platform (MIPS), align to 8192 bytes (1 TLB entry) to reduce # of
233 // fills. (Will change this value if it ever gets large page support)
234 #define VRAM_ALIGN 8192
235#else
236 #define VRAM_ALIGN 16
237#endif
238
5bd33f52 239// double, for overdraw guard + at least 1 page before
240#define VRAM_SIZE ((1024 * 512 * 2 * 2) + max(VRAM_ALIGN, 4096))
241
12367ad0 242// vram ptr received from mmap/malloc/alloc (will deallocate using this)
243static uint16_t *vram_ptr_orig = NULL;
9ee0fd5b 244
e34ef5ac 245#ifndef GPULIB_USE_MMAP
246# ifdef __linux__
247# define GPULIB_USE_MMAP 1
248# else
249# define GPULIB_USE_MMAP 0
250# endif
251#endif
9ee0fd5b 252static int map_vram(void)
253{
e34ef5ac 254#if GPULIB_USE_MMAP
5bd33f52 255 gpu.vram = vram_ptr_orig = gpu.mmap(VRAM_SIZE);
e34ef5ac 256#else
5bd33f52 257 gpu.vram = vram_ptr_orig = calloc(VRAM_SIZE, 1);
e34ef5ac 258#endif
259 if (gpu.vram != NULL && gpu.vram != (void *)(intptr_t)-1) {
260 // 4kb guard in front
12367ad0 261 gpu.vram += (4096 / 2);
e34ef5ac 262 // Align
263 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
9ee0fd5b 264 return 0;
265 }
266 else {
267 fprintf(stderr, "could not map vram, expect crashes\n");
268 return -1;
269 }
270}
271
6e9bdaef 272long GPUinit(void)
273{
9394ada5 274 int ret;
275 ret = vout_init();
276 ret |= renderer_init();
277
3b7b0065 278 memset(&gpu.state, 0, sizeof(gpu.state));
279 memset(&gpu.frameskip, 0, sizeof(gpu.frameskip));
280 gpu.zero = 0;
3ece2f0c 281 gpu.state.frame_count = &gpu.zero;
deb18d24 282 gpu.state.hcnt = &gpu.zero;
48f3d210 283 gpu.cmd_len = 0;
9394ada5 284 do_reset();
48f3d210 285
12367ad0 286 /*if (gpu.mmap != NULL) {
9ee0fd5b 287 if (map_vram() != 0)
288 ret = -1;
12367ad0 289 }*/
6e9bdaef 290 return ret;
291}
292
293long GPUshutdown(void)
294{
9ee0fd5b 295 long ret;
296
e929dec5 297 renderer_finish();
9ee0fd5b 298 ret = vout_finish();
12367ad0 299
300 if (vram_ptr_orig != NULL) {
e34ef5ac 301#if GPULIB_USE_MMAP
12367ad0 302 gpu.munmap(vram_ptr_orig, VRAM_SIZE);
303#else
304 free(vram_ptr_orig);
305#endif
9ee0fd5b 306 }
12367ad0 307 vram_ptr_orig = gpu.vram = NULL;
9ee0fd5b 308
309 return ret;
6e9bdaef 310}
311
1ab64c54
GI
312void GPUwriteStatus(uint32_t data)
313{
1ab64c54 314 uint32_t cmd = data >> 24;
9a864a8f 315 int src_x, src_y;
1ab64c54 316
fc84f618 317 if (cmd < ARRAY_SIZE(gpu.regs)) {
48f3d210 318 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
fc84f618 319 return;
8dd855cd 320 gpu.regs[cmd] = data;
fc84f618 321 }
322
323 gpu.state.fb_dirty = 1;
8dd855cd 324
325 switch (cmd) {
1ab64c54 326 case 0x00:
6e9bdaef 327 do_reset();
1ab64c54 328 break;
48f3d210 329 case 0x01:
330 do_cmd_reset();
331 break;
1ab64c54 332 case 0x03:
5bbe183f 333 if (data & 1) {
61124a6d 334 gpu.status |= PSX_GPU_STATUS_BLANKING;
5bbe183f 335 gpu.state.dims_changed = 1; // for hud clearing
336 }
61124a6d
PC
337 else
338 gpu.status &= ~PSX_GPU_STATUS_BLANKING;
1ab64c54
GI
339 break;
340 case 0x04:
61124a6d
PC
341 gpu.status &= ~PSX_GPU_STATUS_DMA_MASK;
342 gpu.status |= PSX_GPU_STATUS_DMA(data & 3);
1ab64c54
GI
343 break;
344 case 0x05:
9a864a8f 345 src_x = data & 0x3ff; src_y = (data >> 10) & 0x1ff;
346 if (src_x != gpu.screen.src_x || src_y != gpu.screen.src_y) {
347 gpu.screen.src_x = src_x;
348 gpu.screen.src_y = src_y;
349 renderer_notify_scanout_change(src_x, src_y);
350 if (gpu.frameskip.set) {
351 decide_frameskip_allow(gpu.ex_regs[3]);
352 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
353 decide_frameskip();
354 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
355 }
9fe27e25 356 }
fb4c6fba 357 }
1ab64c54 358 break;
8dd855cd 359 case 0x06:
360 gpu.screen.x1 = data & 0xfff;
361 gpu.screen.x2 = (data >> 12) & 0xfff;
362 update_width();
363 break;
1ab64c54
GI
364 case 0x07:
365 gpu.screen.y1 = data & 0x3ff;
366 gpu.screen.y2 = (data >> 10) & 0x3ff;
8dd855cd 367 update_height();
1ab64c54
GI
368 break;
369 case 0x08:
61124a6d 370 gpu.status = (gpu.status & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
8dd855cd 371 update_width();
372 update_height();
e929dec5 373 renderer_notify_res_change();
1ab64c54 374 break;
deb18d24 375 default:
376 if ((cmd & 0xf0) == 0x10)
377 get_gpu_info(data);
6e9bdaef 378 break;
1ab64c54 379 }
7890a708 380
381#ifdef GPUwriteStatus_ext
382 GPUwriteStatus_ext(data);
383#endif
1ab64c54
GI
384}
385
56f08d83 386const unsigned char cmd_lengths[256] =
1ab64c54 387{
d30279e2
GI
388 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
389 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
390 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
391 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
652c6b8b 392 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
393 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
394 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
d30279e2 395 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
36da9c13 396 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 80
397 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
398 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // a0
399 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
400 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // c0
401 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
d30279e2
GI
402 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
403 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
404};
405
d30279e2
GI
406#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
407
36da9c13 408static void cpy_msb(uint16_t *dst, const uint16_t *src, int l, uint16_t msb)
409{
410 int i;
411 for (i = 0; i < l; i++)
412 dst[i] = src[i] | msb;
413}
414
415static inline void do_vram_line(int x, int y, uint16_t *mem, int l,
416 int is_read, uint16_t msb)
1ab64c54 417{
d30279e2 418 uint16_t *vram = VRAM_MEM_XY(x, y);
36da9c13 419 if (unlikely(is_read))
d30279e2 420 memcpy(mem, vram, l * 2);
36da9c13 421 else if (unlikely(msb))
422 cpy_msb(vram, mem, l, msb);
d30279e2
GI
423 else
424 memcpy(vram, mem, l * 2);
425}
426
427static int do_vram_io(uint32_t *data, int count, int is_read)
428{
429 int count_initial = count;
36da9c13 430 uint16_t msb = gpu.ex_regs[6] << 15;
d30279e2
GI
431 uint16_t *sdata = (uint16_t *)data;
432 int x = gpu.dma.x, y = gpu.dma.y;
433 int w = gpu.dma.w, h = gpu.dma.h;
ddd56f6e 434 int o = gpu.dma.offset;
d30279e2
GI
435 int l;
436 count *= 2; // operate in 16bpp pixels
437
c765eb86
JW
438 renderer_sync();
439
d30279e2
GI
440 if (gpu.dma.offset) {
441 l = w - gpu.dma.offset;
ddd56f6e 442 if (count < l)
d30279e2 443 l = count;
ddd56f6e 444
36da9c13 445 do_vram_line(x + o, y, sdata, l, is_read, msb);
ddd56f6e 446
447 if (o + l < w)
448 o += l;
449 else {
450 o = 0;
451 y++;
452 h--;
453 }
d30279e2
GI
454 sdata += l;
455 count -= l;
d30279e2
GI
456 }
457
458 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
459 y &= 511;
36da9c13 460 do_vram_line(x, y, sdata, w, is_read, msb);
d30279e2
GI
461 }
462
05740673 463 if (h > 0) {
464 if (count > 0) {
465 y &= 511;
36da9c13 466 do_vram_line(x, y, sdata, count, is_read, msb);
05740673 467 o = count;
468 count = 0;
469 }
d30279e2 470 }
05740673 471 else
472 finish_vram_transfer(is_read);
d30279e2
GI
473 gpu.dma.y = y;
474 gpu.dma.h = h;
ddd56f6e 475 gpu.dma.offset = o;
d30279e2 476
6e9bdaef 477 return count_initial - count / 2;
d30279e2
GI
478}
479
480static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
481{
ddd56f6e 482 if (gpu.dma.h)
483 log_anomaly("start_vram_transfer while old unfinished\n");
484
5440b88e 485 gpu.dma.x = pos_word & 0x3ff;
486 gpu.dma.y = (pos_word >> 16) & 0x1ff;
48f3d210 487 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
488 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
d30279e2 489 gpu.dma.offset = 0;
05740673 490 gpu.dma.is_read = is_read;
491 gpu.dma_start = gpu.dma;
d30279e2 492
9e146206 493 renderer_flush_queues();
494 if (is_read) {
61124a6d 495 gpu.status |= PSX_GPU_STATUS_IMG;
9e146206 496 // XXX: wrong for width 1
495d603c 497 gpu.gp0 = LE32TOH(*(uint32_t *) VRAM_MEM_XY(gpu.dma.x, gpu.dma.y));
5440b88e 498 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
9e146206 499 }
d30279e2 500
6e9bdaef 501 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
502 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
1328fa32 503 if (gpu.gpu_state_change)
504 gpu.gpu_state_change(PGS_VRAM_TRANSFER_START);
d30279e2
GI
505}
506
05740673 507static void finish_vram_transfer(int is_read)
508{
509 if (is_read)
61124a6d 510 gpu.status &= ~PSX_GPU_STATUS_IMG;
b30fba56 511 else {
512 gpu.state.fb_dirty = 1;
05740673 513 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
3b7b0065 514 gpu.dma_start.w, gpu.dma_start.h, 0);
b30fba56 515 }
1328fa32 516 if (gpu.gpu_state_change)
517 gpu.gpu_state_change(PGS_VRAM_TRANSFER_END);
05740673 518}
519
36da9c13 520static void do_vram_copy(const uint32_t *params)
521{
522 const uint32_t sx = LE32TOH(params[0]) & 0x3FF;
523 const uint32_t sy = (LE32TOH(params[0]) >> 16) & 0x1FF;
524 const uint32_t dx = LE32TOH(params[1]) & 0x3FF;
525 const uint32_t dy = (LE32TOH(params[1]) >> 16) & 0x1FF;
526 uint32_t w = ((LE32TOH(params[2]) - 1) & 0x3FF) + 1;
527 uint32_t h = (((LE32TOH(params[2]) >> 16) - 1) & 0x1FF) + 1;
528 uint16_t msb = gpu.ex_regs[6] << 15;
529 uint16_t lbuf[128];
530 uint32_t x, y;
531
532 if (sx == dx && sy == dy && msb == 0)
533 return;
534
535 renderer_flush_queues();
536
537 if (unlikely((sx < dx && dx < sx + w) || sx + w > 1024 || dx + w > 1024 || msb))
538 {
539 for (y = 0; y < h; y++)
540 {
541 const uint16_t *src = VRAM_MEM_XY(0, (sy + y) & 0x1ff);
542 uint16_t *dst = VRAM_MEM_XY(0, (dy + y) & 0x1ff);
543 for (x = 0; x < w; x += ARRAY_SIZE(lbuf))
544 {
545 uint32_t x1, w1 = w - x;
546 if (w1 > ARRAY_SIZE(lbuf))
547 w1 = ARRAY_SIZE(lbuf);
548 for (x1 = 0; x1 < w1; x1++)
549 lbuf[x1] = src[(sx + x + x1) & 0x3ff];
550 for (x1 = 0; x1 < w1; x1++)
551 dst[(dx + x + x1) & 0x3ff] = lbuf[x1] | msb;
552 }
553 }
554 }
555 else
556 {
557 uint32_t sy1 = sy, dy1 = dy;
558 for (y = 0; y < h; y++, sy1++, dy1++)
559 memcpy(VRAM_MEM_XY(dx, dy1 & 0x1ff), VRAM_MEM_XY(sx, sy1 & 0x1ff), w * 2);
560 }
561
562 renderer_update_caches(dx, dy, w, h, 0);
563}
564
b243416b 565static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
566{
97e07db9 567 int cmd = 0, pos = 0, len, dummy, v;
b243416b 568 int skip = 1;
569
fbb4bfff 570 gpu.frameskip.pending_fill[0] = 0;
571
b243416b 572 while (pos < count && skip) {
573 uint32_t *list = data + pos;
db215a72 574 cmd = LE32TOH(list[0]) >> 24;
b243416b 575 len = 1 + cmd_lengths[cmd];
576
97e07db9 577 switch (cmd) {
578 case 0x02:
db215a72 579 if ((LE32TOH(list[2]) & 0x3ff) > gpu.screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu.screen.h)
97e07db9 580 // clearing something large, don't skip
581 do_cmd_list(list, 3, &dummy);
582 else
583 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
584 break;
585 case 0x24 ... 0x27:
586 case 0x2c ... 0x2f:
587 case 0x34 ... 0x37:
588 case 0x3c ... 0x3f:
589 gpu.ex_regs[1] &= ~0x1ff;
db215a72 590 gpu.ex_regs[1] |= LE32TOH(list[4 + ((cmd >> 4) & 1)]) & 0x1ff;
97e07db9 591 break;
592 case 0x48 ... 0x4F:
593 for (v = 3; pos + v < count; v++)
594 {
db215a72 595 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
97e07db9 596 break;
597 }
598 len += v - 3;
599 break;
600 case 0x58 ... 0x5F:
601 for (v = 4; pos + v < count; v += 2)
602 {
db215a72 603 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
97e07db9 604 break;
605 }
606 len += v - 4;
607 break;
608 default:
609 if (cmd == 0xe3)
db215a72 610 skip = decide_frameskip_allow(LE32TOH(list[0]));
97e07db9 611 if ((cmd & 0xf8) == 0xe0)
db215a72 612 gpu.ex_regs[cmd & 7] = LE32TOH(list[0]);
97e07db9 613 break;
b243416b 614 }
b243416b 615
616 if (pos + len > count) {
617 cmd = -1;
618 break; // incomplete cmd
619 }
36da9c13 620 if (0x80 <= cmd && cmd <= 0xdf)
b243416b 621 break; // image i/o
97e07db9 622
b243416b 623 pos += len;
624 }
625
626 renderer_sync_ecmds(gpu.ex_regs);
627 *last_cmd = cmd;
628 return pos;
629}
630
48f3d210 631static noinline int do_cmd_buffer(uint32_t *data, int count)
d30279e2 632{
b243416b 633 int cmd, pos;
634 uint32_t old_e3 = gpu.ex_regs[3];
fc84f618 635 int vram_dirty = 0;
d30279e2 636
d30279e2 637 // process buffer
b243416b 638 for (pos = 0; pos < count; )
d30279e2 639 {
b243416b 640 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
641 vram_dirty = 1;
d30279e2 642 pos += do_vram_io(data + pos, count - pos, 0);
ddd56f6e 643 if (pos == count)
644 break;
d30279e2
GI
645 }
646
db215a72 647 cmd = LE32TOH(data[pos]) >> 24;
97e07db9 648 if (0xa0 <= cmd && cmd <= 0xdf) {
79573c20
DS
649 if (unlikely((pos+2) >= count)) {
650 // incomplete vram write/read cmd, can't consume yet
651 cmd = -1;
652 break;
653 }
654
d30279e2 655 // consume vram write/read cmd
db215a72 656 start_vram_transfer(LE32TOH(data[pos + 1]), LE32TOH(data[pos + 2]), (cmd & 0xe0) == 0xc0);
b243416b 657 pos += 3;
658 continue;
d30279e2 659 }
36da9c13 660 else if ((cmd & 0xe0) == 0x80) {
661 if (unlikely((pos+3) >= count)) {
662 cmd = -1; // incomplete cmd, can't consume yet
663 break;
664 }
665 do_vram_copy(data + pos + 1);
b30fba56 666 vram_dirty = 1;
36da9c13 667 pos += 4;
668 continue;
669 }
b243416b 670
1e07f71d 671 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
db215a72 672 if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0))
b243416b 673 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
674 else {
675 pos += do_cmd_list(data + pos, count - pos, &cmd);
676 vram_dirty = 1;
677 }
678
679 if (cmd == -1)
680 // incomplete cmd
ddd56f6e 681 break;
d30279e2 682 }
ddd56f6e 683
61124a6d
PC
684 gpu.status &= ~0x1fff;
685 gpu.status |= gpu.ex_regs[1] & 0x7ff;
686 gpu.status |= (gpu.ex_regs[6] & 3) << 11;
a3a9f519 687
fc84f618 688 gpu.state.fb_dirty |= vram_dirty;
689
b243416b 690 if (old_e3 != gpu.ex_regs[3])
691 decide_frameskip_allow(gpu.ex_regs[3]);
692
ddd56f6e 693 return count - pos;
d30279e2
GI
694}
695
1328fa32 696static noinline void flush_cmd_buffer(void)
d30279e2 697{
48f3d210 698 int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
d30279e2
GI
699 if (left > 0)
700 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
1328fa32 701 if (left != gpu.cmd_len) {
702 if (!gpu.dma.h && gpu.gpu_state_change)
703 gpu.gpu_state_change(PGS_PRIMITIVE_START);
704 gpu.cmd_len = left;
705 }
1ab64c54
GI
706}
707
708void GPUwriteDataMem(uint32_t *mem, int count)
709{
d30279e2
GI
710 int left;
711
56f08d83 712 log_io("gpu_dma_write %p %d\n", mem, count);
713
d30279e2
GI
714 if (unlikely(gpu.cmd_len > 0))
715 flush_cmd_buffer();
56f08d83 716
48f3d210 717 left = do_cmd_buffer(mem, count);
d30279e2 718 if (left)
56f08d83 719 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
1ab64c54
GI
720}
721
d30279e2 722void GPUwriteData(uint32_t data)
1ab64c54 723{
56f08d83 724 log_io("gpu_write %08x\n", data);
db215a72 725 gpu.cmd_buffer[gpu.cmd_len++] = HTOLE32(data);
d30279e2
GI
726 if (gpu.cmd_len >= CMD_BUFFER_LEN)
727 flush_cmd_buffer();
1ab64c54
GI
728}
729
fae38d7a 730long GPUdmaChain(uint32_t *rambase, uint32_t start_addr, uint32_t *progress_addr)
1ab64c54 731{
09159d99 732 uint32_t addr, *list, ld_addr = 0;
ddd56f6e 733 int len, left, count;
1c72b1c2 734 long cpu_cycles = 0;
d30279e2 735
8f5f2dd5 736 preload(rambase + (start_addr & 0x1fffff) / 4);
737
d30279e2
GI
738 if (unlikely(gpu.cmd_len > 0))
739 flush_cmd_buffer();
740
56f08d83 741 log_io("gpu_dma_chain\n");
ddd56f6e 742 addr = start_addr & 0xffffff;
09159d99 743 for (count = 0; (addr & 0x800000) == 0; count++)
ddd56f6e 744 {
ddd56f6e 745 list = rambase + (addr & 0x1fffff) / 4;
db215a72
PC
746 len = LE32TOH(list[0]) >> 24;
747 addr = LE32TOH(list[0]) & 0xffffff;
8f5f2dd5 748 preload(rambase + (addr & 0x1fffff) / 4);
749
1c72b1c2 750 cpu_cycles += 10;
751 if (len > 0)
752 cpu_cycles += 5 + len;
deb18d24 753
a4e249a1 754 log_io(".chain %08lx #%d+%d\n",
755 (long)(list - rambase) * 4, len, gpu.cmd_len);
756 if (unlikely(gpu.cmd_len > 0)) {
81ff42e1 757 if (gpu.cmd_len + len > ARRAY_SIZE(gpu.cmd_buffer)) {
758 log_anomaly("cmd_buffer overflow, likely garbage commands\n");
759 gpu.cmd_len = 0;
760 }
a4e249a1 761 memcpy(gpu.cmd_buffer + gpu.cmd_len, list + 1, len * 4);
762 gpu.cmd_len += len;
763 flush_cmd_buffer();
764 continue;
765 }
ddd56f6e 766
56f08d83 767 if (len) {
48f3d210 768 left = do_cmd_buffer(list + 1, len);
a4e249a1 769 if (left) {
770 memcpy(gpu.cmd_buffer, list + 1 + len - left, left * 4);
771 gpu.cmd_len = left;
772 log_anomaly("GPUdmaChain: %d/%d words left\n", left, len);
773 }
56f08d83 774 }
ddd56f6e 775
fae38d7a 776 if (progress_addr) {
777 *progress_addr = addr;
778 break;
779 }
09159d99 780 #define LD_THRESHOLD (8*1024)
781 if (count >= LD_THRESHOLD) {
782 if (count == LD_THRESHOLD) {
783 ld_addr = addr;
784 continue;
785 }
786
787 // loop detection marker
788 // (bit23 set causes DMA error on real machine, so
789 // unlikely to be ever set by the game)
db215a72 790 list[0] |= HTOLE32(0x800000);
09159d99 791 }
ddd56f6e 792 }
793
09159d99 794 if (ld_addr != 0) {
795 // remove loop detection markers
796 count -= LD_THRESHOLD + 2;
797 addr = ld_addr & 0x1fffff;
798 while (count-- > 0) {
799 list = rambase + addr / 4;
db215a72
PC
800 addr = LE32TOH(list[0]) & 0x1fffff;
801 list[0] &= HTOLE32(~0x800000);
09159d99 802 }
d30279e2 803 }
09159d99 804
3ece2f0c 805 gpu.state.last_list.frame = *gpu.state.frame_count;
deb18d24 806 gpu.state.last_list.hcnt = *gpu.state.hcnt;
1c72b1c2 807 gpu.state.last_list.cycles = cpu_cycles;
deb18d24 808 gpu.state.last_list.addr = start_addr;
809
1c72b1c2 810 return cpu_cycles;
1ab64c54
GI
811}
812
d30279e2
GI
813void GPUreadDataMem(uint32_t *mem, int count)
814{
56f08d83 815 log_io("gpu_dma_read %p %d\n", mem, count);
816
d30279e2
GI
817 if (unlikely(gpu.cmd_len > 0))
818 flush_cmd_buffer();
56f08d83 819
d30279e2
GI
820 if (gpu.dma.h)
821 do_vram_io(mem, count, 1);
822}
823
824uint32_t GPUreadData(void)
825{
9e146206 826 uint32_t ret;
56f08d83 827
828 if (unlikely(gpu.cmd_len > 0))
829 flush_cmd_buffer();
830
9e146206 831 ret = gpu.gp0;
495d603c
PC
832 if (gpu.dma.h) {
833 ret = HTOLE32(ret);
9e146206 834 do_vram_io(&ret, 1, 1);
495d603c
PC
835 ret = LE32TOH(ret);
836 }
56f08d83 837
9e146206 838 log_io("gpu_read %08x\n", ret);
839 return ret;
d30279e2
GI
840}
841
842uint32_t GPUreadStatus(void)
843{
ddd56f6e 844 uint32_t ret;
56f08d83 845
d30279e2
GI
846 if (unlikely(gpu.cmd_len > 0))
847 flush_cmd_buffer();
848
61124a6d 849 ret = gpu.status;
ddd56f6e 850 log_io("gpu_read_status %08x\n", ret);
851 return ret;
d30279e2
GI
852}
853
096ec49b 854struct GPUFreeze
1ab64c54
GI
855{
856 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
857 uint32_t ulStatus; // current gpu status
858 uint32_t ulControl[256]; // latest control register values
859 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
096ec49b 860};
1ab64c54 861
096ec49b 862long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
1ab64c54 863{
fc84f618 864 int i;
865
1ab64c54
GI
866 switch (type) {
867 case 1: // save
d30279e2
GI
868 if (gpu.cmd_len > 0)
869 flush_cmd_buffer();
c765eb86
JW
870
871 renderer_sync();
9ee0fd5b 872 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
1ab64c54 873 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
6e9bdaef 874 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
61124a6d 875 freeze->ulStatus = gpu.status;
1ab64c54
GI
876 break;
877 case 0: // load
c765eb86 878 renderer_sync();
9ee0fd5b 879 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
1ab64c54 880 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
6e9bdaef 881 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
61124a6d 882 gpu.status = freeze->ulStatus;
3d47ef17 883 gpu.cmd_len = 0;
fc84f618 884 for (i = 8; i > 0; i--) {
885 gpu.regs[i] ^= 1; // avoid reg change detection
886 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
887 }
5b745e5b 888 renderer_sync_ecmds(gpu.ex_regs);
9a864a8f 889 renderer_update_caches(0, 0, 1024, 512, 0);
1ab64c54
GI
890 break;
891 }
892
893 return 1;
894}
895
5440b88e 896void GPUupdateLace(void)
897{
898 if (gpu.cmd_len > 0)
899 flush_cmd_buffer();
900 renderer_flush_queues();
901
7a20a6d0 902#ifndef RAW_FB_DISPLAY
61124a6d 903 if (gpu.status & PSX_GPU_STATUS_BLANKING) {
aafcb4dd 904 if (!gpu.state.blanked) {
905 vout_blank();
906 gpu.state.blanked = 1;
907 gpu.state.fb_dirty = 1;
908 }
909 return;
910 }
911
c765eb86
JW
912 renderer_notify_update_lace(0);
913
aafcb4dd 914 if (!gpu.state.fb_dirty)
5440b88e 915 return;
7a20a6d0 916#endif
5440b88e 917
918 if (gpu.frameskip.set) {
919 if (!gpu.frameskip.frame_ready) {
920 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
921 return;
922 gpu.frameskip.active = 0;
923 }
924 gpu.frameskip.frame_ready = 0;
925 }
926
927 vout_update();
3b7b0065 928 if (gpu.state.enhancement_active && !gpu.state.enhancement_was_active)
929 renderer_update_caches(0, 0, 1024, 512, 1);
930 gpu.state.enhancement_was_active = gpu.state.enhancement_active;
5440b88e 931 gpu.state.fb_dirty = 0;
aafcb4dd 932 gpu.state.blanked = 0;
c765eb86 933 renderer_notify_update_lace(1);
5440b88e 934}
935
72e5023f 936void GPUvBlank(int is_vblank, int lcf)
937{
5440b88e 938 int interlace = gpu.state.allow_interlace
61124a6d
PC
939 && (gpu.status & PSX_GPU_STATUS_INTERLACE)
940 && (gpu.status & PSX_GPU_STATUS_DHEIGHT);
5440b88e 941 // interlace doesn't look nice on progressive displays,
942 // so we have this "auto" mode here for games that don't read vram
943 if (gpu.state.allow_interlace == 2
944 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
945 {
946 interlace = 0;
947 }
948 if (interlace || interlace != gpu.state.old_interlace) {
949 gpu.state.old_interlace = interlace;
950
951 if (gpu.cmd_len > 0)
952 flush_cmd_buffer();
953 renderer_flush_queues();
954 renderer_set_interlace(interlace, !lcf);
955 }
956}
957
80bc1426 958void GPUgetScreenInfo(int *y, int *base_hres)
959{
960 *y = gpu.screen.y;
961 *base_hres = gpu.screen.vres;
962 if (gpu.status & PSX_GPU_STATUS_DHEIGHT)
963 *base_hres >>= 1;
964}
965
5440b88e 966void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
967{
968 gpu.frameskip.set = cbs->frameskip;
969 gpu.frameskip.advice = &cbs->fskip_advice;
5eaa13f1 970 gpu.frameskip.force = &cbs->fskip_force;
5bbe183f 971 gpu.frameskip.dirty = (void *)&cbs->fskip_dirty;
5440b88e 972 gpu.frameskip.active = 0;
973 gpu.frameskip.frame_ready = 1;
974 gpu.state.hcnt = cbs->gpu_hcnt;
975 gpu.state.frame_count = cbs->gpu_frame_count;
976 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
0b02eb77 977 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
b3ff74ba 978 gpu.state.screen_centering_type_default = cbs->screen_centering_type_default;
5bbe183f 979 if (gpu.state.screen_centering_type != cbs->screen_centering_type
980 || gpu.state.screen_centering_x != cbs->screen_centering_x
981 || gpu.state.screen_centering_y != cbs->screen_centering_y) {
982 gpu.state.screen_centering_type = cbs->screen_centering_type;
983 gpu.state.screen_centering_x = cbs->screen_centering_x;
984 gpu.state.screen_centering_y = cbs->screen_centering_y;
985 update_width();
986 update_height();
987 }
5440b88e 988
9ee0fd5b 989 gpu.mmap = cbs->mmap;
990 gpu.munmap = cbs->munmap;
1328fa32 991 gpu.gpu_state_change = cbs->gpu_state_change;
9ee0fd5b 992
993 // delayed vram mmap
994 if (gpu.vram == NULL)
995 map_vram();
996
5440b88e 997 if (cbs->pl_vout_set_raw_vram)
998 cbs->pl_vout_set_raw_vram(gpu.vram);
999 renderer_set_config(cbs);
1000 vout_set_config(cbs);
72e5023f 1001}
1002
1ab64c54 1003// vim:shiftwidth=2:expandtab