gpulib: add a "borderless" option to restore old behavior
[pcsx_rearmed.git] / plugins / gpulib / gpu.c
CommitLineData
1ab64c54 1/*
05740673 2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
1ab64c54
GI
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
d30279e2 11#include <stdio.h>
5bbe183f 12#include <stdlib.h>
1ab64c54 13#include <string.h>
12367ad0 14#include <stdlib.h> /* for calloc */
15
56f08d83 16#include "gpu.h"
1328fa32 17#include "../../libpcsxcore/gpu.h" // meh
8f8ade9c 18#include "../../frontend/plugin_lib.h"
1ab64c54 19
8f8ade9c 20#ifndef ARRAY_SIZE
1ab64c54 21#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
8f8ade9c 22#endif
8f5f2dd5 23#ifdef __GNUC__
d30279e2 24#define unlikely(x) __builtin_expect((x), 0)
8f5f2dd5 25#define preload __builtin_prefetch
8dd855cd 26#define noinline __attribute__((noinline))
8f5f2dd5 27#else
28#define unlikely(x)
29#define preload(...)
30#define noinline
8f5f2dd5 31#endif
1ab64c54 32
deb18d24 33//#define log_io gpu_log
56f08d83 34#define log_io(...)
56f08d83 35
9ee0fd5b 36struct psx_gpu gpu;
1ab64c54 37
48f3d210 38static noinline int do_cmd_buffer(uint32_t *data, int count);
05740673 39static void finish_vram_transfer(int is_read);
48f3d210 40
41static noinline void do_cmd_reset(void)
42{
c765eb86
JW
43 renderer_sync();
44
48f3d210 45 if (unlikely(gpu.cmd_len > 0))
46 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
48f3d210 47 gpu.cmd_len = 0;
05740673 48
49 if (unlikely(gpu.dma.h > 0))
50 finish_vram_transfer(gpu.dma_start.is_read);
48f3d210 51 gpu.dma.h = 0;
52}
53
6e9bdaef 54static noinline void do_reset(void)
1ab64c54 55{
7841712d 56 unsigned int i;
5b568098 57
48f3d210 58 do_cmd_reset();
59
6e9bdaef 60 memset(gpu.regs, 0, sizeof(gpu.regs));
48f3d210 61 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
62 gpu.ex_regs[i] = (0xe0 + i) << 24;
61124a6d 63 gpu.status = 0x14802000;
6e9bdaef 64 gpu.gp0 = 0;
fc84f618 65 gpu.regs[3] = 1;
6e9bdaef 66 gpu.screen.hres = gpu.screen.w = 256;
fc84f618 67 gpu.screen.vres = gpu.screen.h = 240;
5bbe183f 68 gpu.screen.x = gpu.screen.y = 0;
01ff3105 69 renderer_sync_ecmds(gpu.ex_regs);
3b7b0065 70 renderer_notify_res_change();
1ab64c54
GI
71}
72
8dd855cd 73static noinline void update_width(void)
74{
5bbe183f 75 static const short hres_all[8] = { 256, 368, 320, 368, 512, 368, 640, 368 };
76 static const uint8_t hdivs[8] = { 10, 7, 8, 7, 5, 7, 4, 7 };
77 uint8_t hdiv = hdivs[(gpu.status >> 16) & 7];
78 int hres = hres_all[(gpu.status >> 16) & 7];
79 int pal = gpu.status & PSX_GPU_STATUS_PAL;
8dd855cd 80 int sw = gpu.screen.x2 - gpu.screen.x1;
5bbe183f 81 int x = 0, x_auto;
82 if (sw <= 0)
83 /* nothing displayed? */;
84 else {
85 int s = pal ? 656 : 608; // or 600? pal is just a guess
86 x = (gpu.screen.x1 - s) / hdiv;
87 x = (x + 1) & ~1; // blitter limitation
88 sw /= hdiv;
89 sw = (sw + 2) & ~3; // according to nocash
90 switch (gpu.state.screen_centering_type) {
8f8ade9c 91 case C_INGAME:
5bbe183f 92 break;
8f8ade9c 93 case C_MANUAL:
5bbe183f 94 x = gpu.state.screen_centering_x;
95 break;
96 default:
97 // correct if slightly miscentered
98 x_auto = (hres - sw) / 2 & ~3;
99 if ((uint32_t)x_auto <= 8u && abs(x) < 24)
100 x = x_auto;
101 }
102 if (x + sw > hres)
103 sw = hres - x;
104 // .x range check is done in vout_update()
105 }
106 // reduce the unpleasant right border that a few games have
107 if (gpu.state.screen_centering_type == 0
108 && x <= 4 && hres - (x + sw) >= 4)
109 hres -= 4;
110 gpu.screen.x = x;
111 gpu.screen.w = sw;
112 gpu.screen.hres = hres;
113 gpu.state.dims_changed = 1;
114 //printf("xx %d %d -> %2d, %d / %d\n",
115 // gpu.screen.x1, gpu.screen.x2, x, sw, hres);
8dd855cd 116}
117
118static noinline void update_height(void)
119{
5bbe183f 120 int pal = gpu.status & PSX_GPU_STATUS_PAL;
121 int dheight = gpu.status & PSX_GPU_STATUS_DHEIGHT;
122 int y = gpu.screen.y1 - (pal ? 39 : 16); // 39 for spyro
8dd855cd 123 int sh = gpu.screen.y2 - gpu.screen.y1;
5bbe183f 124 int center_tol = 16;
125 int vres = 240;
126
127 if (pal && (sh > 240 || gpu.screen.vres == 256))
128 vres = 256;
129 if (dheight)
130 y *= 2, sh *= 2, vres *= 2, center_tol *= 2;
131 if (sh <= 0)
132 /* nothing displayed? */;
133 else {
134 switch (gpu.state.screen_centering_type) {
8f8ade9c 135 case C_INGAME:
136 break;
137 case C_BORDERLESS:
138 y = 0;
5bbe183f 139 break;
8f8ade9c 140 case C_MANUAL:
5bbe183f 141 y = gpu.state.screen_centering_y;
142 break;
143 default:
144 // correct if slightly miscentered
145 if ((uint32_t)(vres - sh) <= 1 && abs(y) <= center_tol)
146 y = 0;
147 }
148 if (y + sh > vres)
149 sh = vres - y;
150 }
151 gpu.screen.y = y;
8dd855cd 152 gpu.screen.h = sh;
5bbe183f 153 gpu.screen.vres = vres;
154 gpu.state.dims_changed = 1;
155 //printf("yy %d %d -> %d, %d / %d\n",
156 // gpu.screen.y1, gpu.screen.y2, y, sh, vres);
8dd855cd 157}
158
fc84f618 159static noinline void decide_frameskip(void)
160{
5eaa13f1
A
161 *gpu.frameskip.dirty = 1;
162
9fe27e25 163 if (gpu.frameskip.active)
164 gpu.frameskip.cnt++;
165 else {
166 gpu.frameskip.cnt = 0;
167 gpu.frameskip.frame_ready = 1;
168 }
fc84f618 169
5eaa13f1
A
170 if (*gpu.frameskip.force)
171 gpu.frameskip.active = 1;
172 else if (!gpu.frameskip.active && *gpu.frameskip.advice)
9fe27e25 173 gpu.frameskip.active = 1;
174 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
fc84f618 175 gpu.frameskip.active = 1;
176 else
177 gpu.frameskip.active = 0;
fbb4bfff 178
179 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
180 int dummy;
181 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy);
182 gpu.frameskip.pending_fill[0] = 0;
183 }
fc84f618 184}
185
b243416b 186static noinline int decide_frameskip_allow(uint32_t cmd_e3)
9fe27e25 187{
188 // no frameskip if it decides to draw to display area,
189 // but not for interlace since it'll most likely always do that
190 uint32_t x = cmd_e3 & 0x3ff;
191 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
61124a6d 192 gpu.frameskip.allow = (gpu.status & PSX_GPU_STATUS_INTERLACE) ||
5bbe183f 193 (uint32_t)(x - gpu.screen.src_x) >= (uint32_t)gpu.screen.w ||
194 (uint32_t)(y - gpu.screen.src_y) >= (uint32_t)gpu.screen.h;
b243416b 195 return gpu.frameskip.allow;
9fe27e25 196}
197
01ff3105 198static void flush_cmd_buffer(void);
199
6e9bdaef 200static noinline void get_gpu_info(uint32_t data)
201{
01ff3105 202 if (unlikely(gpu.cmd_len > 0))
203 flush_cmd_buffer();
6e9bdaef 204 switch (data & 0x0f) {
205 case 0x02:
206 case 0x03:
207 case 0x04:
6e9bdaef 208 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
209 break;
08b33377 210 case 0x05:
211 gpu.gp0 = gpu.ex_regs[5] & 0x3fffff;
6e9bdaef 212 break;
213 case 0x07:
214 gpu.gp0 = 2;
215 break;
216 default:
08b33377 217 // gpu.gp0 unchanged
6e9bdaef 218 break;
219 }
220}
221
9ee0fd5b 222// double, for overdraw guard
12367ad0 223#define VRAM_SIZE ((1024 * 512 * 2 * 2) + 4096)
224
225// Minimum 16-byte VRAM alignment needed by gpu_unai's pixel-skipping
226// renderer/downscaler it uses in high res modes:
227#ifdef GCW_ZERO
228 // On GCW platform (MIPS), align to 8192 bytes (1 TLB entry) to reduce # of
229 // fills. (Will change this value if it ever gets large page support)
230 #define VRAM_ALIGN 8192
231#else
232 #define VRAM_ALIGN 16
233#endif
234
235// vram ptr received from mmap/malloc/alloc (will deallocate using this)
236static uint16_t *vram_ptr_orig = NULL;
9ee0fd5b 237
e34ef5ac 238#ifndef GPULIB_USE_MMAP
239# ifdef __linux__
240# define GPULIB_USE_MMAP 1
241# else
242# define GPULIB_USE_MMAP 0
243# endif
244#endif
9ee0fd5b 245static int map_vram(void)
246{
e34ef5ac 247#if GPULIB_USE_MMAP
12367ad0 248 gpu.vram = vram_ptr_orig = gpu.mmap(VRAM_SIZE + (VRAM_ALIGN-1));
e34ef5ac 249#else
250 gpu.vram = vram_ptr_orig = calloc(VRAM_SIZE + (VRAM_ALIGN-1), 1);
251#endif
252 if (gpu.vram != NULL && gpu.vram != (void *)(intptr_t)-1) {
253 // 4kb guard in front
12367ad0 254 gpu.vram += (4096 / 2);
e34ef5ac 255 // Align
256 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
9ee0fd5b 257 return 0;
258 }
259 else {
260 fprintf(stderr, "could not map vram, expect crashes\n");
261 return -1;
262 }
263}
264
6e9bdaef 265long GPUinit(void)
266{
9394ada5 267 int ret;
268 ret = vout_init();
269 ret |= renderer_init();
270
3b7b0065 271 memset(&gpu.state, 0, sizeof(gpu.state));
272 memset(&gpu.frameskip, 0, sizeof(gpu.frameskip));
273 gpu.zero = 0;
3ece2f0c 274 gpu.state.frame_count = &gpu.zero;
deb18d24 275 gpu.state.hcnt = &gpu.zero;
48f3d210 276 gpu.cmd_len = 0;
9394ada5 277 do_reset();
48f3d210 278
12367ad0 279 /*if (gpu.mmap != NULL) {
9ee0fd5b 280 if (map_vram() != 0)
281 ret = -1;
12367ad0 282 }*/
6e9bdaef 283 return ret;
284}
285
286long GPUshutdown(void)
287{
9ee0fd5b 288 long ret;
289
e929dec5 290 renderer_finish();
9ee0fd5b 291 ret = vout_finish();
12367ad0 292
293 if (vram_ptr_orig != NULL) {
e34ef5ac 294#if GPULIB_USE_MMAP
12367ad0 295 gpu.munmap(vram_ptr_orig, VRAM_SIZE);
296#else
297 free(vram_ptr_orig);
298#endif
9ee0fd5b 299 }
12367ad0 300 vram_ptr_orig = gpu.vram = NULL;
9ee0fd5b 301
302 return ret;
6e9bdaef 303}
304
1ab64c54
GI
305void GPUwriteStatus(uint32_t data)
306{
1ab64c54 307 uint32_t cmd = data >> 24;
9a864a8f 308 int src_x, src_y;
1ab64c54 309
fc84f618 310 if (cmd < ARRAY_SIZE(gpu.regs)) {
48f3d210 311 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
fc84f618 312 return;
8dd855cd 313 gpu.regs[cmd] = data;
fc84f618 314 }
315
316 gpu.state.fb_dirty = 1;
8dd855cd 317
318 switch (cmd) {
1ab64c54 319 case 0x00:
6e9bdaef 320 do_reset();
1ab64c54 321 break;
48f3d210 322 case 0x01:
323 do_cmd_reset();
324 break;
1ab64c54 325 case 0x03:
5bbe183f 326 if (data & 1) {
61124a6d 327 gpu.status |= PSX_GPU_STATUS_BLANKING;
5bbe183f 328 gpu.state.dims_changed = 1; // for hud clearing
329 }
61124a6d
PC
330 else
331 gpu.status &= ~PSX_GPU_STATUS_BLANKING;
1ab64c54
GI
332 break;
333 case 0x04:
61124a6d
PC
334 gpu.status &= ~PSX_GPU_STATUS_DMA_MASK;
335 gpu.status |= PSX_GPU_STATUS_DMA(data & 3);
1ab64c54
GI
336 break;
337 case 0x05:
9a864a8f 338 src_x = data & 0x3ff; src_y = (data >> 10) & 0x1ff;
339 if (src_x != gpu.screen.src_x || src_y != gpu.screen.src_y) {
340 gpu.screen.src_x = src_x;
341 gpu.screen.src_y = src_y;
342 renderer_notify_scanout_change(src_x, src_y);
343 if (gpu.frameskip.set) {
344 decide_frameskip_allow(gpu.ex_regs[3]);
345 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
346 decide_frameskip();
347 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
348 }
9fe27e25 349 }
fb4c6fba 350 }
1ab64c54 351 break;
8dd855cd 352 case 0x06:
353 gpu.screen.x1 = data & 0xfff;
354 gpu.screen.x2 = (data >> 12) & 0xfff;
355 update_width();
356 break;
1ab64c54
GI
357 case 0x07:
358 gpu.screen.y1 = data & 0x3ff;
359 gpu.screen.y2 = (data >> 10) & 0x3ff;
8dd855cd 360 update_height();
1ab64c54
GI
361 break;
362 case 0x08:
61124a6d 363 gpu.status = (gpu.status & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
8dd855cd 364 update_width();
365 update_height();
e929dec5 366 renderer_notify_res_change();
1ab64c54 367 break;
deb18d24 368 default:
369 if ((cmd & 0xf0) == 0x10)
370 get_gpu_info(data);
6e9bdaef 371 break;
1ab64c54 372 }
7890a708 373
374#ifdef GPUwriteStatus_ext
375 GPUwriteStatus_ext(data);
376#endif
1ab64c54
GI
377}
378
56f08d83 379const unsigned char cmd_lengths[256] =
1ab64c54 380{
d30279e2
GI
381 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
382 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
383 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
384 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
652c6b8b 385 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
386 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
387 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
d30279e2 388 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
36da9c13 389 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 80
390 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
391 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // a0
392 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
393 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // c0
394 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
d30279e2
GI
395 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
396 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
397};
398
d30279e2
GI
399#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
400
36da9c13 401static void cpy_msb(uint16_t *dst, const uint16_t *src, int l, uint16_t msb)
402{
403 int i;
404 for (i = 0; i < l; i++)
405 dst[i] = src[i] | msb;
406}
407
408static inline void do_vram_line(int x, int y, uint16_t *mem, int l,
409 int is_read, uint16_t msb)
1ab64c54 410{
d30279e2 411 uint16_t *vram = VRAM_MEM_XY(x, y);
36da9c13 412 if (unlikely(is_read))
d30279e2 413 memcpy(mem, vram, l * 2);
36da9c13 414 else if (unlikely(msb))
415 cpy_msb(vram, mem, l, msb);
d30279e2
GI
416 else
417 memcpy(vram, mem, l * 2);
418}
419
420static int do_vram_io(uint32_t *data, int count, int is_read)
421{
422 int count_initial = count;
36da9c13 423 uint16_t msb = gpu.ex_regs[6] << 15;
d30279e2
GI
424 uint16_t *sdata = (uint16_t *)data;
425 int x = gpu.dma.x, y = gpu.dma.y;
426 int w = gpu.dma.w, h = gpu.dma.h;
ddd56f6e 427 int o = gpu.dma.offset;
d30279e2
GI
428 int l;
429 count *= 2; // operate in 16bpp pixels
430
c765eb86
JW
431 renderer_sync();
432
d30279e2
GI
433 if (gpu.dma.offset) {
434 l = w - gpu.dma.offset;
ddd56f6e 435 if (count < l)
d30279e2 436 l = count;
ddd56f6e 437
36da9c13 438 do_vram_line(x + o, y, sdata, l, is_read, msb);
ddd56f6e 439
440 if (o + l < w)
441 o += l;
442 else {
443 o = 0;
444 y++;
445 h--;
446 }
d30279e2
GI
447 sdata += l;
448 count -= l;
d30279e2
GI
449 }
450
451 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
452 y &= 511;
36da9c13 453 do_vram_line(x, y, sdata, w, is_read, msb);
d30279e2
GI
454 }
455
05740673 456 if (h > 0) {
457 if (count > 0) {
458 y &= 511;
36da9c13 459 do_vram_line(x, y, sdata, count, is_read, msb);
05740673 460 o = count;
461 count = 0;
462 }
d30279e2 463 }
05740673 464 else
465 finish_vram_transfer(is_read);
d30279e2
GI
466 gpu.dma.y = y;
467 gpu.dma.h = h;
ddd56f6e 468 gpu.dma.offset = o;
d30279e2 469
6e9bdaef 470 return count_initial - count / 2;
d30279e2
GI
471}
472
473static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
474{
ddd56f6e 475 if (gpu.dma.h)
476 log_anomaly("start_vram_transfer while old unfinished\n");
477
5440b88e 478 gpu.dma.x = pos_word & 0x3ff;
479 gpu.dma.y = (pos_word >> 16) & 0x1ff;
48f3d210 480 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
481 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
d30279e2 482 gpu.dma.offset = 0;
05740673 483 gpu.dma.is_read = is_read;
484 gpu.dma_start = gpu.dma;
d30279e2 485
9e146206 486 renderer_flush_queues();
487 if (is_read) {
61124a6d 488 gpu.status |= PSX_GPU_STATUS_IMG;
9e146206 489 // XXX: wrong for width 1
495d603c 490 gpu.gp0 = LE32TOH(*(uint32_t *) VRAM_MEM_XY(gpu.dma.x, gpu.dma.y));
5440b88e 491 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
9e146206 492 }
d30279e2 493
6e9bdaef 494 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
495 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
1328fa32 496 if (gpu.gpu_state_change)
497 gpu.gpu_state_change(PGS_VRAM_TRANSFER_START);
d30279e2
GI
498}
499
05740673 500static void finish_vram_transfer(int is_read)
501{
502 if (is_read)
61124a6d 503 gpu.status &= ~PSX_GPU_STATUS_IMG;
b30fba56 504 else {
505 gpu.state.fb_dirty = 1;
05740673 506 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
3b7b0065 507 gpu.dma_start.w, gpu.dma_start.h, 0);
b30fba56 508 }
1328fa32 509 if (gpu.gpu_state_change)
510 gpu.gpu_state_change(PGS_VRAM_TRANSFER_END);
05740673 511}
512
36da9c13 513static void do_vram_copy(const uint32_t *params)
514{
515 const uint32_t sx = LE32TOH(params[0]) & 0x3FF;
516 const uint32_t sy = (LE32TOH(params[0]) >> 16) & 0x1FF;
517 const uint32_t dx = LE32TOH(params[1]) & 0x3FF;
518 const uint32_t dy = (LE32TOH(params[1]) >> 16) & 0x1FF;
519 uint32_t w = ((LE32TOH(params[2]) - 1) & 0x3FF) + 1;
520 uint32_t h = (((LE32TOH(params[2]) >> 16) - 1) & 0x1FF) + 1;
521 uint16_t msb = gpu.ex_regs[6] << 15;
522 uint16_t lbuf[128];
523 uint32_t x, y;
524
525 if (sx == dx && sy == dy && msb == 0)
526 return;
527
528 renderer_flush_queues();
529
530 if (unlikely((sx < dx && dx < sx + w) || sx + w > 1024 || dx + w > 1024 || msb))
531 {
532 for (y = 0; y < h; y++)
533 {
534 const uint16_t *src = VRAM_MEM_XY(0, (sy + y) & 0x1ff);
535 uint16_t *dst = VRAM_MEM_XY(0, (dy + y) & 0x1ff);
536 for (x = 0; x < w; x += ARRAY_SIZE(lbuf))
537 {
538 uint32_t x1, w1 = w - x;
539 if (w1 > ARRAY_SIZE(lbuf))
540 w1 = ARRAY_SIZE(lbuf);
541 for (x1 = 0; x1 < w1; x1++)
542 lbuf[x1] = src[(sx + x + x1) & 0x3ff];
543 for (x1 = 0; x1 < w1; x1++)
544 dst[(dx + x + x1) & 0x3ff] = lbuf[x1] | msb;
545 }
546 }
547 }
548 else
549 {
550 uint32_t sy1 = sy, dy1 = dy;
551 for (y = 0; y < h; y++, sy1++, dy1++)
552 memcpy(VRAM_MEM_XY(dx, dy1 & 0x1ff), VRAM_MEM_XY(sx, sy1 & 0x1ff), w * 2);
553 }
554
555 renderer_update_caches(dx, dy, w, h, 0);
556}
557
b243416b 558static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
559{
97e07db9 560 int cmd = 0, pos = 0, len, dummy, v;
b243416b 561 int skip = 1;
562
fbb4bfff 563 gpu.frameskip.pending_fill[0] = 0;
564
b243416b 565 while (pos < count && skip) {
566 uint32_t *list = data + pos;
db215a72 567 cmd = LE32TOH(list[0]) >> 24;
b243416b 568 len = 1 + cmd_lengths[cmd];
569
97e07db9 570 switch (cmd) {
571 case 0x02:
db215a72 572 if ((LE32TOH(list[2]) & 0x3ff) > gpu.screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu.screen.h)
97e07db9 573 // clearing something large, don't skip
574 do_cmd_list(list, 3, &dummy);
575 else
576 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
577 break;
578 case 0x24 ... 0x27:
579 case 0x2c ... 0x2f:
580 case 0x34 ... 0x37:
581 case 0x3c ... 0x3f:
582 gpu.ex_regs[1] &= ~0x1ff;
db215a72 583 gpu.ex_regs[1] |= LE32TOH(list[4 + ((cmd >> 4) & 1)]) & 0x1ff;
97e07db9 584 break;
585 case 0x48 ... 0x4F:
586 for (v = 3; pos + v < count; v++)
587 {
db215a72 588 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
97e07db9 589 break;
590 }
591 len += v - 3;
592 break;
593 case 0x58 ... 0x5F:
594 for (v = 4; pos + v < count; v += 2)
595 {
db215a72 596 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
97e07db9 597 break;
598 }
599 len += v - 4;
600 break;
601 default:
602 if (cmd == 0xe3)
db215a72 603 skip = decide_frameskip_allow(LE32TOH(list[0]));
97e07db9 604 if ((cmd & 0xf8) == 0xe0)
db215a72 605 gpu.ex_regs[cmd & 7] = LE32TOH(list[0]);
97e07db9 606 break;
b243416b 607 }
b243416b 608
609 if (pos + len > count) {
610 cmd = -1;
611 break; // incomplete cmd
612 }
36da9c13 613 if (0x80 <= cmd && cmd <= 0xdf)
b243416b 614 break; // image i/o
97e07db9 615
b243416b 616 pos += len;
617 }
618
619 renderer_sync_ecmds(gpu.ex_regs);
620 *last_cmd = cmd;
621 return pos;
622}
623
48f3d210 624static noinline int do_cmd_buffer(uint32_t *data, int count)
d30279e2 625{
b243416b 626 int cmd, pos;
627 uint32_t old_e3 = gpu.ex_regs[3];
fc84f618 628 int vram_dirty = 0;
d30279e2 629
d30279e2 630 // process buffer
b243416b 631 for (pos = 0; pos < count; )
d30279e2 632 {
b243416b 633 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
634 vram_dirty = 1;
d30279e2 635 pos += do_vram_io(data + pos, count - pos, 0);
ddd56f6e 636 if (pos == count)
637 break;
d30279e2
GI
638 }
639
db215a72 640 cmd = LE32TOH(data[pos]) >> 24;
97e07db9 641 if (0xa0 <= cmd && cmd <= 0xdf) {
79573c20
DS
642 if (unlikely((pos+2) >= count)) {
643 // incomplete vram write/read cmd, can't consume yet
644 cmd = -1;
645 break;
646 }
647
d30279e2 648 // consume vram write/read cmd
db215a72 649 start_vram_transfer(LE32TOH(data[pos + 1]), LE32TOH(data[pos + 2]), (cmd & 0xe0) == 0xc0);
b243416b 650 pos += 3;
651 continue;
d30279e2 652 }
36da9c13 653 else if ((cmd & 0xe0) == 0x80) {
654 if (unlikely((pos+3) >= count)) {
655 cmd = -1; // incomplete cmd, can't consume yet
656 break;
657 }
658 do_vram_copy(data + pos + 1);
b30fba56 659 vram_dirty = 1;
36da9c13 660 pos += 4;
661 continue;
662 }
b243416b 663
1e07f71d 664 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
db215a72 665 if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0))
b243416b 666 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
667 else {
668 pos += do_cmd_list(data + pos, count - pos, &cmd);
669 vram_dirty = 1;
670 }
671
672 if (cmd == -1)
673 // incomplete cmd
ddd56f6e 674 break;
d30279e2 675 }
ddd56f6e 676
61124a6d
PC
677 gpu.status &= ~0x1fff;
678 gpu.status |= gpu.ex_regs[1] & 0x7ff;
679 gpu.status |= (gpu.ex_regs[6] & 3) << 11;
a3a9f519 680
fc84f618 681 gpu.state.fb_dirty |= vram_dirty;
682
b243416b 683 if (old_e3 != gpu.ex_regs[3])
684 decide_frameskip_allow(gpu.ex_regs[3]);
685
ddd56f6e 686 return count - pos;
d30279e2
GI
687}
688
1328fa32 689static noinline void flush_cmd_buffer(void)
d30279e2 690{
48f3d210 691 int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
d30279e2
GI
692 if (left > 0)
693 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
1328fa32 694 if (left != gpu.cmd_len) {
695 if (!gpu.dma.h && gpu.gpu_state_change)
696 gpu.gpu_state_change(PGS_PRIMITIVE_START);
697 gpu.cmd_len = left;
698 }
1ab64c54
GI
699}
700
701void GPUwriteDataMem(uint32_t *mem, int count)
702{
d30279e2
GI
703 int left;
704
56f08d83 705 log_io("gpu_dma_write %p %d\n", mem, count);
706
d30279e2
GI
707 if (unlikely(gpu.cmd_len > 0))
708 flush_cmd_buffer();
56f08d83 709
48f3d210 710 left = do_cmd_buffer(mem, count);
d30279e2 711 if (left)
56f08d83 712 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
1ab64c54
GI
713}
714
d30279e2 715void GPUwriteData(uint32_t data)
1ab64c54 716{
56f08d83 717 log_io("gpu_write %08x\n", data);
db215a72 718 gpu.cmd_buffer[gpu.cmd_len++] = HTOLE32(data);
d30279e2
GI
719 if (gpu.cmd_len >= CMD_BUFFER_LEN)
720 flush_cmd_buffer();
1ab64c54
GI
721}
722
fae38d7a 723long GPUdmaChain(uint32_t *rambase, uint32_t start_addr, uint32_t *progress_addr)
1ab64c54 724{
09159d99 725 uint32_t addr, *list, ld_addr = 0;
ddd56f6e 726 int len, left, count;
1c72b1c2 727 long cpu_cycles = 0;
d30279e2 728
8f5f2dd5 729 preload(rambase + (start_addr & 0x1fffff) / 4);
730
d30279e2
GI
731 if (unlikely(gpu.cmd_len > 0))
732 flush_cmd_buffer();
733
56f08d83 734 log_io("gpu_dma_chain\n");
ddd56f6e 735 addr = start_addr & 0xffffff;
09159d99 736 for (count = 0; (addr & 0x800000) == 0; count++)
ddd56f6e 737 {
ddd56f6e 738 list = rambase + (addr & 0x1fffff) / 4;
db215a72
PC
739 len = LE32TOH(list[0]) >> 24;
740 addr = LE32TOH(list[0]) & 0xffffff;
8f5f2dd5 741 preload(rambase + (addr & 0x1fffff) / 4);
742
1c72b1c2 743 cpu_cycles += 10;
744 if (len > 0)
745 cpu_cycles += 5 + len;
deb18d24 746
a4e249a1 747 log_io(".chain %08lx #%d+%d\n",
748 (long)(list - rambase) * 4, len, gpu.cmd_len);
749 if (unlikely(gpu.cmd_len > 0)) {
81ff42e1 750 if (gpu.cmd_len + len > ARRAY_SIZE(gpu.cmd_buffer)) {
751 log_anomaly("cmd_buffer overflow, likely garbage commands\n");
752 gpu.cmd_len = 0;
753 }
a4e249a1 754 memcpy(gpu.cmd_buffer + gpu.cmd_len, list + 1, len * 4);
755 gpu.cmd_len += len;
756 flush_cmd_buffer();
757 continue;
758 }
ddd56f6e 759
56f08d83 760 if (len) {
48f3d210 761 left = do_cmd_buffer(list + 1, len);
a4e249a1 762 if (left) {
763 memcpy(gpu.cmd_buffer, list + 1 + len - left, left * 4);
764 gpu.cmd_len = left;
765 log_anomaly("GPUdmaChain: %d/%d words left\n", left, len);
766 }
56f08d83 767 }
ddd56f6e 768
fae38d7a 769 if (progress_addr) {
770 *progress_addr = addr;
771 break;
772 }
09159d99 773 #define LD_THRESHOLD (8*1024)
774 if (count >= LD_THRESHOLD) {
775 if (count == LD_THRESHOLD) {
776 ld_addr = addr;
777 continue;
778 }
779
780 // loop detection marker
781 // (bit23 set causes DMA error on real machine, so
782 // unlikely to be ever set by the game)
db215a72 783 list[0] |= HTOLE32(0x800000);
09159d99 784 }
ddd56f6e 785 }
786
09159d99 787 if (ld_addr != 0) {
788 // remove loop detection markers
789 count -= LD_THRESHOLD + 2;
790 addr = ld_addr & 0x1fffff;
791 while (count-- > 0) {
792 list = rambase + addr / 4;
db215a72
PC
793 addr = LE32TOH(list[0]) & 0x1fffff;
794 list[0] &= HTOLE32(~0x800000);
09159d99 795 }
d30279e2 796 }
09159d99 797
3ece2f0c 798 gpu.state.last_list.frame = *gpu.state.frame_count;
deb18d24 799 gpu.state.last_list.hcnt = *gpu.state.hcnt;
1c72b1c2 800 gpu.state.last_list.cycles = cpu_cycles;
deb18d24 801 gpu.state.last_list.addr = start_addr;
802
1c72b1c2 803 return cpu_cycles;
1ab64c54
GI
804}
805
d30279e2
GI
806void GPUreadDataMem(uint32_t *mem, int count)
807{
56f08d83 808 log_io("gpu_dma_read %p %d\n", mem, count);
809
d30279e2
GI
810 if (unlikely(gpu.cmd_len > 0))
811 flush_cmd_buffer();
56f08d83 812
d30279e2
GI
813 if (gpu.dma.h)
814 do_vram_io(mem, count, 1);
815}
816
817uint32_t GPUreadData(void)
818{
9e146206 819 uint32_t ret;
56f08d83 820
821 if (unlikely(gpu.cmd_len > 0))
822 flush_cmd_buffer();
823
9e146206 824 ret = gpu.gp0;
495d603c
PC
825 if (gpu.dma.h) {
826 ret = HTOLE32(ret);
9e146206 827 do_vram_io(&ret, 1, 1);
495d603c
PC
828 ret = LE32TOH(ret);
829 }
56f08d83 830
9e146206 831 log_io("gpu_read %08x\n", ret);
832 return ret;
d30279e2
GI
833}
834
835uint32_t GPUreadStatus(void)
836{
ddd56f6e 837 uint32_t ret;
56f08d83 838
d30279e2
GI
839 if (unlikely(gpu.cmd_len > 0))
840 flush_cmd_buffer();
841
61124a6d 842 ret = gpu.status;
ddd56f6e 843 log_io("gpu_read_status %08x\n", ret);
844 return ret;
d30279e2
GI
845}
846
096ec49b 847struct GPUFreeze
1ab64c54
GI
848{
849 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
850 uint32_t ulStatus; // current gpu status
851 uint32_t ulControl[256]; // latest control register values
852 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
096ec49b 853};
1ab64c54 854
096ec49b 855long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
1ab64c54 856{
fc84f618 857 int i;
858
1ab64c54
GI
859 switch (type) {
860 case 1: // save
d30279e2
GI
861 if (gpu.cmd_len > 0)
862 flush_cmd_buffer();
c765eb86
JW
863
864 renderer_sync();
9ee0fd5b 865 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
1ab64c54 866 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
6e9bdaef 867 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
61124a6d 868 freeze->ulStatus = gpu.status;
1ab64c54
GI
869 break;
870 case 0: // load
c765eb86 871 renderer_sync();
9ee0fd5b 872 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
1ab64c54 873 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
6e9bdaef 874 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
61124a6d 875 gpu.status = freeze->ulStatus;
3d47ef17 876 gpu.cmd_len = 0;
fc84f618 877 for (i = 8; i > 0; i--) {
878 gpu.regs[i] ^= 1; // avoid reg change detection
879 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
880 }
5b745e5b 881 renderer_sync_ecmds(gpu.ex_regs);
9a864a8f 882 renderer_update_caches(0, 0, 1024, 512, 0);
1ab64c54
GI
883 break;
884 }
885
886 return 1;
887}
888
5440b88e 889void GPUupdateLace(void)
890{
891 if (gpu.cmd_len > 0)
892 flush_cmd_buffer();
893 renderer_flush_queues();
894
7a20a6d0 895#ifndef RAW_FB_DISPLAY
61124a6d 896 if (gpu.status & PSX_GPU_STATUS_BLANKING) {
aafcb4dd 897 if (!gpu.state.blanked) {
898 vout_blank();
899 gpu.state.blanked = 1;
900 gpu.state.fb_dirty = 1;
901 }
902 return;
903 }
904
c765eb86
JW
905 renderer_notify_update_lace(0);
906
aafcb4dd 907 if (!gpu.state.fb_dirty)
5440b88e 908 return;
7a20a6d0 909#endif
5440b88e 910
911 if (gpu.frameskip.set) {
912 if (!gpu.frameskip.frame_ready) {
913 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
914 return;
915 gpu.frameskip.active = 0;
916 }
917 gpu.frameskip.frame_ready = 0;
918 }
919
920 vout_update();
3b7b0065 921 if (gpu.state.enhancement_active && !gpu.state.enhancement_was_active)
922 renderer_update_caches(0, 0, 1024, 512, 1);
923 gpu.state.enhancement_was_active = gpu.state.enhancement_active;
5440b88e 924 gpu.state.fb_dirty = 0;
aafcb4dd 925 gpu.state.blanked = 0;
c765eb86 926 renderer_notify_update_lace(1);
5440b88e 927}
928
72e5023f 929void GPUvBlank(int is_vblank, int lcf)
930{
5440b88e 931 int interlace = gpu.state.allow_interlace
61124a6d
PC
932 && (gpu.status & PSX_GPU_STATUS_INTERLACE)
933 && (gpu.status & PSX_GPU_STATUS_DHEIGHT);
5440b88e 934 // interlace doesn't look nice on progressive displays,
935 // so we have this "auto" mode here for games that don't read vram
936 if (gpu.state.allow_interlace == 2
937 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
938 {
939 interlace = 0;
940 }
941 if (interlace || interlace != gpu.state.old_interlace) {
942 gpu.state.old_interlace = interlace;
943
944 if (gpu.cmd_len > 0)
945 flush_cmd_buffer();
946 renderer_flush_queues();
947 renderer_set_interlace(interlace, !lcf);
948 }
949}
950
80bc1426 951void GPUgetScreenInfo(int *y, int *base_hres)
952{
953 *y = gpu.screen.y;
954 *base_hres = gpu.screen.vres;
955 if (gpu.status & PSX_GPU_STATUS_DHEIGHT)
956 *base_hres >>= 1;
957}
958
5440b88e 959void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
960{
961 gpu.frameskip.set = cbs->frameskip;
962 gpu.frameskip.advice = &cbs->fskip_advice;
5eaa13f1 963 gpu.frameskip.force = &cbs->fskip_force;
5bbe183f 964 gpu.frameskip.dirty = (void *)&cbs->fskip_dirty;
5440b88e 965 gpu.frameskip.active = 0;
966 gpu.frameskip.frame_ready = 1;
967 gpu.state.hcnt = cbs->gpu_hcnt;
968 gpu.state.frame_count = cbs->gpu_frame_count;
969 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
0b02eb77 970 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
5bbe183f 971 if (gpu.state.screen_centering_type != cbs->screen_centering_type
972 || gpu.state.screen_centering_x != cbs->screen_centering_x
973 || gpu.state.screen_centering_y != cbs->screen_centering_y) {
974 gpu.state.screen_centering_type = cbs->screen_centering_type;
975 gpu.state.screen_centering_x = cbs->screen_centering_x;
976 gpu.state.screen_centering_y = cbs->screen_centering_y;
977 update_width();
978 update_height();
979 }
5440b88e 980
9ee0fd5b 981 gpu.mmap = cbs->mmap;
982 gpu.munmap = cbs->munmap;
1328fa32 983 gpu.gpu_state_change = cbs->gpu_state_change;
9ee0fd5b 984
985 // delayed vram mmap
986 if (gpu.vram == NULL)
987 map_vram();
988
5440b88e 989 if (cbs->pl_vout_set_raw_vram)
990 cbs->pl_vout_set_raw_vram(gpu.vram);
991 renderer_set_config(cbs);
992 vout_set_config(cbs);
72e5023f 993}
994
1ab64c54 995// vim:shiftwidth=2:expandtab