gpu: improve timings of clipped sprites
[pcsx_rearmed.git] / plugins / gpulib / gpu.c
CommitLineData
1ab64c54 1/*
05740673 2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
1ab64c54
GI
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
d30279e2 11#include <stdio.h>
5bbe183f 12#include <stdlib.h>
1ab64c54 13#include <string.h>
12367ad0 14#include <stdlib.h> /* for calloc */
15
56f08d83 16#include "gpu.h"
f99193c2 17#include "gpu_timing.h"
1328fa32 18#include "../../libpcsxcore/gpu.h" // meh
8f8ade9c 19#include "../../frontend/plugin_lib.h"
1ab64c54 20
8f8ade9c 21#ifndef ARRAY_SIZE
1ab64c54 22#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
8f8ade9c 23#endif
8f5f2dd5 24#ifdef __GNUC__
d30279e2 25#define unlikely(x) __builtin_expect((x), 0)
8f5f2dd5 26#define preload __builtin_prefetch
8dd855cd 27#define noinline __attribute__((noinline))
8f5f2dd5 28#else
29#define unlikely(x)
30#define preload(...)
31#define noinline
8f5f2dd5 32#endif
1ab64c54 33
deb18d24 34//#define log_io gpu_log
56f08d83 35#define log_io(...)
56f08d83 36
9ee0fd5b 37struct psx_gpu gpu;
1ab64c54 38
f99193c2 39static noinline int do_cmd_buffer(uint32_t *data, int count, int *cpu_cycles);
05740673 40static void finish_vram_transfer(int is_read);
48f3d210 41
42static noinline void do_cmd_reset(void)
43{
f99193c2 44 int dummy = 0;
c765eb86 45 renderer_sync();
48f3d210 46 if (unlikely(gpu.cmd_len > 0))
f99193c2 47 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len, &dummy);
48f3d210 48 gpu.cmd_len = 0;
05740673 49
50 if (unlikely(gpu.dma.h > 0))
51 finish_vram_transfer(gpu.dma_start.is_read);
48f3d210 52 gpu.dma.h = 0;
53}
54
6e9bdaef 55static noinline void do_reset(void)
1ab64c54 56{
7841712d 57 unsigned int i;
5b568098 58
48f3d210 59 do_cmd_reset();
60
6e9bdaef 61 memset(gpu.regs, 0, sizeof(gpu.regs));
48f3d210 62 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
63 gpu.ex_regs[i] = (0xe0 + i) << 24;
61124a6d 64 gpu.status = 0x14802000;
6e9bdaef 65 gpu.gp0 = 0;
fc84f618 66 gpu.regs[3] = 1;
6e9bdaef 67 gpu.screen.hres = gpu.screen.w = 256;
fc84f618 68 gpu.screen.vres = gpu.screen.h = 240;
5bbe183f 69 gpu.screen.x = gpu.screen.y = 0;
01ff3105 70 renderer_sync_ecmds(gpu.ex_regs);
3b7b0065 71 renderer_notify_res_change();
1ab64c54
GI
72}
73
8dd855cd 74static noinline void update_width(void)
75{
5bbe183f 76 static const short hres_all[8] = { 256, 368, 320, 368, 512, 368, 640, 368 };
77 static const uint8_t hdivs[8] = { 10, 7, 8, 7, 5, 7, 4, 7 };
78 uint8_t hdiv = hdivs[(gpu.status >> 16) & 7];
79 int hres = hres_all[(gpu.status >> 16) & 7];
80 int pal = gpu.status & PSX_GPU_STATUS_PAL;
8dd855cd 81 int sw = gpu.screen.x2 - gpu.screen.x1;
b3ff74ba 82 int type = gpu.state.screen_centering_type;
5bbe183f 83 int x = 0, x_auto;
b3ff74ba 84 if (type == C_AUTO)
85 type = gpu.state.screen_centering_type_default;
5bbe183f 86 if (sw <= 0)
87 /* nothing displayed? */;
88 else {
89 int s = pal ? 656 : 608; // or 600? pal is just a guess
90 x = (gpu.screen.x1 - s) / hdiv;
91 x = (x + 1) & ~1; // blitter limitation
92 sw /= hdiv;
93 sw = (sw + 2) & ~3; // according to nocash
b3ff74ba 94 switch (type) {
8f8ade9c 95 case C_INGAME:
5bbe183f 96 break;
8f8ade9c 97 case C_MANUAL:
5bbe183f 98 x = gpu.state.screen_centering_x;
99 break;
100 default:
101 // correct if slightly miscentered
102 x_auto = (hres - sw) / 2 & ~3;
103 if ((uint32_t)x_auto <= 8u && abs(x) < 24)
104 x = x_auto;
105 }
106 if (x + sw > hres)
107 sw = hres - x;
108 // .x range check is done in vout_update()
109 }
110 // reduce the unpleasant right border that a few games have
111 if (gpu.state.screen_centering_type == 0
112 && x <= 4 && hres - (x + sw) >= 4)
113 hres -= 4;
114 gpu.screen.x = x;
115 gpu.screen.w = sw;
116 gpu.screen.hres = hres;
117 gpu.state.dims_changed = 1;
118 //printf("xx %d %d -> %2d, %d / %d\n",
119 // gpu.screen.x1, gpu.screen.x2, x, sw, hres);
8dd855cd 120}
121
122static noinline void update_height(void)
123{
5bbe183f 124 int pal = gpu.status & PSX_GPU_STATUS_PAL;
125 int dheight = gpu.status & PSX_GPU_STATUS_DHEIGHT;
126 int y = gpu.screen.y1 - (pal ? 39 : 16); // 39 for spyro
8dd855cd 127 int sh = gpu.screen.y2 - gpu.screen.y1;
5bbe183f 128 int center_tol = 16;
129 int vres = 240;
130
131 if (pal && (sh > 240 || gpu.screen.vres == 256))
132 vres = 256;
133 if (dheight)
134 y *= 2, sh *= 2, vres *= 2, center_tol *= 2;
135 if (sh <= 0)
136 /* nothing displayed? */;
137 else {
138 switch (gpu.state.screen_centering_type) {
8f8ade9c 139 case C_INGAME:
140 break;
141 case C_BORDERLESS:
142 y = 0;
5bbe183f 143 break;
8f8ade9c 144 case C_MANUAL:
5bbe183f 145 y = gpu.state.screen_centering_y;
146 break;
147 default:
148 // correct if slightly miscentered
149 if ((uint32_t)(vres - sh) <= 1 && abs(y) <= center_tol)
150 y = 0;
151 }
152 if (y + sh > vres)
153 sh = vres - y;
154 }
155 gpu.screen.y = y;
8dd855cd 156 gpu.screen.h = sh;
5bbe183f 157 gpu.screen.vres = vres;
158 gpu.state.dims_changed = 1;
159 //printf("yy %d %d -> %d, %d / %d\n",
160 // gpu.screen.y1, gpu.screen.y2, y, sh, vres);
8dd855cd 161}
162
fc84f618 163static noinline void decide_frameskip(void)
164{
5eaa13f1
A
165 *gpu.frameskip.dirty = 1;
166
9fe27e25 167 if (gpu.frameskip.active)
168 gpu.frameskip.cnt++;
169 else {
170 gpu.frameskip.cnt = 0;
171 gpu.frameskip.frame_ready = 1;
172 }
fc84f618 173
5eaa13f1
A
174 if (*gpu.frameskip.force)
175 gpu.frameskip.active = 1;
176 else if (!gpu.frameskip.active && *gpu.frameskip.advice)
9fe27e25 177 gpu.frameskip.active = 1;
178 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
fc84f618 179 gpu.frameskip.active = 1;
180 else
181 gpu.frameskip.active = 0;
fbb4bfff 182
183 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
f99193c2 184 int dummy = 0;
185 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy, &dummy);
fbb4bfff 186 gpu.frameskip.pending_fill[0] = 0;
187 }
fc84f618 188}
189
b243416b 190static noinline int decide_frameskip_allow(uint32_t cmd_e3)
9fe27e25 191{
192 // no frameskip if it decides to draw to display area,
193 // but not for interlace since it'll most likely always do that
194 uint32_t x = cmd_e3 & 0x3ff;
195 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
61124a6d 196 gpu.frameskip.allow = (gpu.status & PSX_GPU_STATUS_INTERLACE) ||
5bbe183f 197 (uint32_t)(x - gpu.screen.src_x) >= (uint32_t)gpu.screen.w ||
198 (uint32_t)(y - gpu.screen.src_y) >= (uint32_t)gpu.screen.h;
b243416b 199 return gpu.frameskip.allow;
9fe27e25 200}
201
01ff3105 202static void flush_cmd_buffer(void);
203
6e9bdaef 204static noinline void get_gpu_info(uint32_t data)
205{
01ff3105 206 if (unlikely(gpu.cmd_len > 0))
207 flush_cmd_buffer();
6e9bdaef 208 switch (data & 0x0f) {
209 case 0x02:
210 case 0x03:
211 case 0x04:
6e9bdaef 212 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
213 break;
08b33377 214 case 0x05:
215 gpu.gp0 = gpu.ex_regs[5] & 0x3fffff;
6e9bdaef 216 break;
217 case 0x07:
218 gpu.gp0 = 2;
219 break;
220 default:
08b33377 221 // gpu.gp0 unchanged
6e9bdaef 222 break;
223 }
224}
225
5bd33f52 226#ifndef max
227#define max(a, b) (((a) > (b)) ? (a) : (b))
228#endif
12367ad0 229
230// Minimum 16-byte VRAM alignment needed by gpu_unai's pixel-skipping
231// renderer/downscaler it uses in high res modes:
232#ifdef GCW_ZERO
233 // On GCW platform (MIPS), align to 8192 bytes (1 TLB entry) to reduce # of
234 // fills. (Will change this value if it ever gets large page support)
235 #define VRAM_ALIGN 8192
236#else
237 #define VRAM_ALIGN 16
238#endif
239
5bd33f52 240// double, for overdraw guard + at least 1 page before
241#define VRAM_SIZE ((1024 * 512 * 2 * 2) + max(VRAM_ALIGN, 4096))
242
12367ad0 243// vram ptr received from mmap/malloc/alloc (will deallocate using this)
244static uint16_t *vram_ptr_orig = NULL;
9ee0fd5b 245
e34ef5ac 246#ifndef GPULIB_USE_MMAP
247# ifdef __linux__
248# define GPULIB_USE_MMAP 1
249# else
250# define GPULIB_USE_MMAP 0
251# endif
252#endif
9ee0fd5b 253static int map_vram(void)
254{
e34ef5ac 255#if GPULIB_USE_MMAP
5bd33f52 256 gpu.vram = vram_ptr_orig = gpu.mmap(VRAM_SIZE);
e34ef5ac 257#else
5bd33f52 258 gpu.vram = vram_ptr_orig = calloc(VRAM_SIZE, 1);
e34ef5ac 259#endif
260 if (gpu.vram != NULL && gpu.vram != (void *)(intptr_t)-1) {
261 // 4kb guard in front
12367ad0 262 gpu.vram += (4096 / 2);
e34ef5ac 263 // Align
264 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
9ee0fd5b 265 return 0;
266 }
267 else {
268 fprintf(stderr, "could not map vram, expect crashes\n");
269 return -1;
270 }
271}
272
6e9bdaef 273long GPUinit(void)
274{
9394ada5 275 int ret;
276 ret = vout_init();
277 ret |= renderer_init();
278
3b7b0065 279 memset(&gpu.state, 0, sizeof(gpu.state));
280 memset(&gpu.frameskip, 0, sizeof(gpu.frameskip));
281 gpu.zero = 0;
3ece2f0c 282 gpu.state.frame_count = &gpu.zero;
deb18d24 283 gpu.state.hcnt = &gpu.zero;
48f3d210 284 gpu.cmd_len = 0;
9394ada5 285 do_reset();
48f3d210 286
12367ad0 287 /*if (gpu.mmap != NULL) {
9ee0fd5b 288 if (map_vram() != 0)
289 ret = -1;
12367ad0 290 }*/
6e9bdaef 291 return ret;
292}
293
294long GPUshutdown(void)
295{
9ee0fd5b 296 long ret;
297
e929dec5 298 renderer_finish();
9ee0fd5b 299 ret = vout_finish();
12367ad0 300
301 if (vram_ptr_orig != NULL) {
e34ef5ac 302#if GPULIB_USE_MMAP
12367ad0 303 gpu.munmap(vram_ptr_orig, VRAM_SIZE);
304#else
305 free(vram_ptr_orig);
306#endif
9ee0fd5b 307 }
12367ad0 308 vram_ptr_orig = gpu.vram = NULL;
9ee0fd5b 309
310 return ret;
6e9bdaef 311}
312
1ab64c54
GI
313void GPUwriteStatus(uint32_t data)
314{
1ab64c54 315 uint32_t cmd = data >> 24;
9a864a8f 316 int src_x, src_y;
1ab64c54 317
fc84f618 318 if (cmd < ARRAY_SIZE(gpu.regs)) {
48f3d210 319 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
fc84f618 320 return;
8dd855cd 321 gpu.regs[cmd] = data;
fc84f618 322 }
323
324 gpu.state.fb_dirty = 1;
8dd855cd 325
326 switch (cmd) {
1ab64c54 327 case 0x00:
6e9bdaef 328 do_reset();
1ab64c54 329 break;
48f3d210 330 case 0x01:
331 do_cmd_reset();
332 break;
1ab64c54 333 case 0x03:
5bbe183f 334 if (data & 1) {
61124a6d 335 gpu.status |= PSX_GPU_STATUS_BLANKING;
5bbe183f 336 gpu.state.dims_changed = 1; // for hud clearing
337 }
61124a6d
PC
338 else
339 gpu.status &= ~PSX_GPU_STATUS_BLANKING;
1ab64c54
GI
340 break;
341 case 0x04:
61124a6d
PC
342 gpu.status &= ~PSX_GPU_STATUS_DMA_MASK;
343 gpu.status |= PSX_GPU_STATUS_DMA(data & 3);
1ab64c54
GI
344 break;
345 case 0x05:
9a864a8f 346 src_x = data & 0x3ff; src_y = (data >> 10) & 0x1ff;
347 if (src_x != gpu.screen.src_x || src_y != gpu.screen.src_y) {
348 gpu.screen.src_x = src_x;
349 gpu.screen.src_y = src_y;
350 renderer_notify_scanout_change(src_x, src_y);
351 if (gpu.frameskip.set) {
352 decide_frameskip_allow(gpu.ex_regs[3]);
353 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
354 decide_frameskip();
355 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
356 }
9fe27e25 357 }
fb4c6fba 358 }
1ab64c54 359 break;
8dd855cd 360 case 0x06:
361 gpu.screen.x1 = data & 0xfff;
362 gpu.screen.x2 = (data >> 12) & 0xfff;
363 update_width();
364 break;
1ab64c54
GI
365 case 0x07:
366 gpu.screen.y1 = data & 0x3ff;
367 gpu.screen.y2 = (data >> 10) & 0x3ff;
8dd855cd 368 update_height();
1ab64c54
GI
369 break;
370 case 0x08:
61124a6d 371 gpu.status = (gpu.status & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
8dd855cd 372 update_width();
373 update_height();
e929dec5 374 renderer_notify_res_change();
1ab64c54 375 break;
deb18d24 376 default:
377 if ((cmd & 0xf0) == 0x10)
378 get_gpu_info(data);
6e9bdaef 379 break;
1ab64c54 380 }
7890a708 381
382#ifdef GPUwriteStatus_ext
383 GPUwriteStatus_ext(data);
384#endif
1ab64c54
GI
385}
386
56f08d83 387const unsigned char cmd_lengths[256] =
1ab64c54 388{
d30279e2
GI
389 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
390 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
391 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
392 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
652c6b8b 393 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
394 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
395 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
d30279e2 396 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
36da9c13 397 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 80
398 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
399 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // a0
400 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
401 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // c0
402 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
d30279e2
GI
403 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
404 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
405};
406
d30279e2
GI
407#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
408
36da9c13 409static void cpy_msb(uint16_t *dst, const uint16_t *src, int l, uint16_t msb)
410{
411 int i;
412 for (i = 0; i < l; i++)
413 dst[i] = src[i] | msb;
414}
415
416static inline void do_vram_line(int x, int y, uint16_t *mem, int l,
417 int is_read, uint16_t msb)
1ab64c54 418{
d30279e2 419 uint16_t *vram = VRAM_MEM_XY(x, y);
36da9c13 420 if (unlikely(is_read))
d30279e2 421 memcpy(mem, vram, l * 2);
36da9c13 422 else if (unlikely(msb))
423 cpy_msb(vram, mem, l, msb);
d30279e2
GI
424 else
425 memcpy(vram, mem, l * 2);
426}
427
428static int do_vram_io(uint32_t *data, int count, int is_read)
429{
430 int count_initial = count;
36da9c13 431 uint16_t msb = gpu.ex_regs[6] << 15;
d30279e2
GI
432 uint16_t *sdata = (uint16_t *)data;
433 int x = gpu.dma.x, y = gpu.dma.y;
434 int w = gpu.dma.w, h = gpu.dma.h;
ddd56f6e 435 int o = gpu.dma.offset;
d30279e2
GI
436 int l;
437 count *= 2; // operate in 16bpp pixels
438
c765eb86
JW
439 renderer_sync();
440
d30279e2
GI
441 if (gpu.dma.offset) {
442 l = w - gpu.dma.offset;
ddd56f6e 443 if (count < l)
d30279e2 444 l = count;
ddd56f6e 445
36da9c13 446 do_vram_line(x + o, y, sdata, l, is_read, msb);
ddd56f6e 447
448 if (o + l < w)
449 o += l;
450 else {
451 o = 0;
452 y++;
453 h--;
454 }
d30279e2
GI
455 sdata += l;
456 count -= l;
d30279e2
GI
457 }
458
459 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
460 y &= 511;
36da9c13 461 do_vram_line(x, y, sdata, w, is_read, msb);
d30279e2
GI
462 }
463
05740673 464 if (h > 0) {
465 if (count > 0) {
466 y &= 511;
36da9c13 467 do_vram_line(x, y, sdata, count, is_read, msb);
05740673 468 o = count;
469 count = 0;
470 }
d30279e2 471 }
05740673 472 else
473 finish_vram_transfer(is_read);
d30279e2
GI
474 gpu.dma.y = y;
475 gpu.dma.h = h;
ddd56f6e 476 gpu.dma.offset = o;
d30279e2 477
6e9bdaef 478 return count_initial - count / 2;
d30279e2
GI
479}
480
481static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
482{
ddd56f6e 483 if (gpu.dma.h)
484 log_anomaly("start_vram_transfer while old unfinished\n");
485
5440b88e 486 gpu.dma.x = pos_word & 0x3ff;
487 gpu.dma.y = (pos_word >> 16) & 0x1ff;
48f3d210 488 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
489 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
d30279e2 490 gpu.dma.offset = 0;
05740673 491 gpu.dma.is_read = is_read;
492 gpu.dma_start = gpu.dma;
d30279e2 493
9e146206 494 renderer_flush_queues();
495 if (is_read) {
61124a6d 496 gpu.status |= PSX_GPU_STATUS_IMG;
9e146206 497 // XXX: wrong for width 1
495d603c 498 gpu.gp0 = LE32TOH(*(uint32_t *) VRAM_MEM_XY(gpu.dma.x, gpu.dma.y));
5440b88e 499 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
9e146206 500 }
d30279e2 501
6e9bdaef 502 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
503 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
1328fa32 504 if (gpu.gpu_state_change)
505 gpu.gpu_state_change(PGS_VRAM_TRANSFER_START);
d30279e2
GI
506}
507
05740673 508static void finish_vram_transfer(int is_read)
509{
510 if (is_read)
61124a6d 511 gpu.status &= ~PSX_GPU_STATUS_IMG;
b30fba56 512 else {
513 gpu.state.fb_dirty = 1;
05740673 514 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
3b7b0065 515 gpu.dma_start.w, gpu.dma_start.h, 0);
b30fba56 516 }
1328fa32 517 if (gpu.gpu_state_change)
518 gpu.gpu_state_change(PGS_VRAM_TRANSFER_END);
05740673 519}
520
f99193c2 521static void do_vram_copy(const uint32_t *params, int *cpu_cycles)
36da9c13 522{
523 const uint32_t sx = LE32TOH(params[0]) & 0x3FF;
524 const uint32_t sy = (LE32TOH(params[0]) >> 16) & 0x1FF;
525 const uint32_t dx = LE32TOH(params[1]) & 0x3FF;
526 const uint32_t dy = (LE32TOH(params[1]) >> 16) & 0x1FF;
527 uint32_t w = ((LE32TOH(params[2]) - 1) & 0x3FF) + 1;
528 uint32_t h = (((LE32TOH(params[2]) >> 16) - 1) & 0x1FF) + 1;
529 uint16_t msb = gpu.ex_regs[6] << 15;
530 uint16_t lbuf[128];
531 uint32_t x, y;
532
f99193c2 533 *cpu_cycles += gput_copy(w, h);
36da9c13 534 if (sx == dx && sy == dy && msb == 0)
535 return;
536
537 renderer_flush_queues();
538
539 if (unlikely((sx < dx && dx < sx + w) || sx + w > 1024 || dx + w > 1024 || msb))
540 {
541 for (y = 0; y < h; y++)
542 {
543 const uint16_t *src = VRAM_MEM_XY(0, (sy + y) & 0x1ff);
544 uint16_t *dst = VRAM_MEM_XY(0, (dy + y) & 0x1ff);
545 for (x = 0; x < w; x += ARRAY_SIZE(lbuf))
546 {
547 uint32_t x1, w1 = w - x;
548 if (w1 > ARRAY_SIZE(lbuf))
549 w1 = ARRAY_SIZE(lbuf);
550 for (x1 = 0; x1 < w1; x1++)
551 lbuf[x1] = src[(sx + x + x1) & 0x3ff];
552 for (x1 = 0; x1 < w1; x1++)
553 dst[(dx + x + x1) & 0x3ff] = lbuf[x1] | msb;
554 }
555 }
556 }
557 else
558 {
559 uint32_t sy1 = sy, dy1 = dy;
560 for (y = 0; y < h; y++, sy1++, dy1++)
561 memcpy(VRAM_MEM_XY(dx, dy1 & 0x1ff), VRAM_MEM_XY(sx, sy1 & 0x1ff), w * 2);
562 }
563
564 renderer_update_caches(dx, dy, w, h, 0);
565}
566
b243416b 567static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
568{
f99193c2 569 int cmd = 0, pos = 0, len, dummy = 0, v;
b243416b 570 int skip = 1;
571
fbb4bfff 572 gpu.frameskip.pending_fill[0] = 0;
573
b243416b 574 while (pos < count && skip) {
575 uint32_t *list = data + pos;
db215a72 576 cmd = LE32TOH(list[0]) >> 24;
b243416b 577 len = 1 + cmd_lengths[cmd];
578
97e07db9 579 switch (cmd) {
580 case 0x02:
db215a72 581 if ((LE32TOH(list[2]) & 0x3ff) > gpu.screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu.screen.h)
97e07db9 582 // clearing something large, don't skip
f99193c2 583 do_cmd_list(list, 3, &dummy, &dummy);
97e07db9 584 else
585 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
586 break;
587 case 0x24 ... 0x27:
588 case 0x2c ... 0x2f:
589 case 0x34 ... 0x37:
590 case 0x3c ... 0x3f:
591 gpu.ex_regs[1] &= ~0x1ff;
db215a72 592 gpu.ex_regs[1] |= LE32TOH(list[4 + ((cmd >> 4) & 1)]) & 0x1ff;
97e07db9 593 break;
594 case 0x48 ... 0x4F:
595 for (v = 3; pos + v < count; v++)
596 {
db215a72 597 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
97e07db9 598 break;
599 }
600 len += v - 3;
601 break;
602 case 0x58 ... 0x5F:
603 for (v = 4; pos + v < count; v += 2)
604 {
db215a72 605 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
97e07db9 606 break;
607 }
608 len += v - 4;
609 break;
610 default:
611 if (cmd == 0xe3)
db215a72 612 skip = decide_frameskip_allow(LE32TOH(list[0]));
97e07db9 613 if ((cmd & 0xf8) == 0xe0)
db215a72 614 gpu.ex_regs[cmd & 7] = LE32TOH(list[0]);
97e07db9 615 break;
b243416b 616 }
b243416b 617
618 if (pos + len > count) {
619 cmd = -1;
620 break; // incomplete cmd
621 }
36da9c13 622 if (0x80 <= cmd && cmd <= 0xdf)
b243416b 623 break; // image i/o
97e07db9 624
b243416b 625 pos += len;
626 }
627
628 renderer_sync_ecmds(gpu.ex_regs);
629 *last_cmd = cmd;
630 return pos;
631}
632
f99193c2 633static noinline int do_cmd_buffer(uint32_t *data, int count, int *cpu_cycles)
d30279e2 634{
b243416b 635 int cmd, pos;
636 uint32_t old_e3 = gpu.ex_regs[3];
fc84f618 637 int vram_dirty = 0;
d30279e2 638
d30279e2 639 // process buffer
b243416b 640 for (pos = 0; pos < count; )
d30279e2 641 {
b243416b 642 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
643 vram_dirty = 1;
d30279e2 644 pos += do_vram_io(data + pos, count - pos, 0);
ddd56f6e 645 if (pos == count)
646 break;
d30279e2
GI
647 }
648
db215a72 649 cmd = LE32TOH(data[pos]) >> 24;
97e07db9 650 if (0xa0 <= cmd && cmd <= 0xdf) {
79573c20
DS
651 if (unlikely((pos+2) >= count)) {
652 // incomplete vram write/read cmd, can't consume yet
653 cmd = -1;
654 break;
655 }
656
d30279e2 657 // consume vram write/read cmd
db215a72 658 start_vram_transfer(LE32TOH(data[pos + 1]), LE32TOH(data[pos + 2]), (cmd & 0xe0) == 0xc0);
b243416b 659 pos += 3;
660 continue;
d30279e2 661 }
36da9c13 662 else if ((cmd & 0xe0) == 0x80) {
663 if (unlikely((pos+3) >= count)) {
664 cmd = -1; // incomplete cmd, can't consume yet
665 break;
666 }
f99193c2 667 do_vram_copy(data + pos + 1, cpu_cycles);
b30fba56 668 vram_dirty = 1;
36da9c13 669 pos += 4;
670 continue;
671 }
c296224f 672 else if (cmd == 0x1f) {
673 log_anomaly("irq1?\n");
674 pos++;
675 continue;
676 }
b243416b 677
1e07f71d 678 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
db215a72 679 if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0))
b243416b 680 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
681 else {
f99193c2 682 pos += do_cmd_list(data + pos, count - pos, cpu_cycles, &cmd);
b243416b 683 vram_dirty = 1;
684 }
685
686 if (cmd == -1)
687 // incomplete cmd
ddd56f6e 688 break;
d30279e2 689 }
ddd56f6e 690
61124a6d
PC
691 gpu.status &= ~0x1fff;
692 gpu.status |= gpu.ex_regs[1] & 0x7ff;
693 gpu.status |= (gpu.ex_regs[6] & 3) << 11;
a3a9f519 694
fc84f618 695 gpu.state.fb_dirty |= vram_dirty;
696
b243416b 697 if (old_e3 != gpu.ex_regs[3])
698 decide_frameskip_allow(gpu.ex_regs[3]);
699
ddd56f6e 700 return count - pos;
d30279e2
GI
701}
702
1328fa32 703static noinline void flush_cmd_buffer(void)
d30279e2 704{
f99193c2 705 int dummy = 0, left;
706 left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len, &dummy);
d30279e2
GI
707 if (left > 0)
708 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
1328fa32 709 if (left != gpu.cmd_len) {
710 if (!gpu.dma.h && gpu.gpu_state_change)
711 gpu.gpu_state_change(PGS_PRIMITIVE_START);
712 gpu.cmd_len = left;
713 }
1ab64c54
GI
714}
715
716void GPUwriteDataMem(uint32_t *mem, int count)
717{
f99193c2 718 int dummy = 0, left;
d30279e2 719
56f08d83 720 log_io("gpu_dma_write %p %d\n", mem, count);
721
d30279e2
GI
722 if (unlikely(gpu.cmd_len > 0))
723 flush_cmd_buffer();
56f08d83 724
f99193c2 725 left = do_cmd_buffer(mem, count, &dummy);
d30279e2 726 if (left)
56f08d83 727 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
1ab64c54
GI
728}
729
d30279e2 730void GPUwriteData(uint32_t data)
1ab64c54 731{
56f08d83 732 log_io("gpu_write %08x\n", data);
db215a72 733 gpu.cmd_buffer[gpu.cmd_len++] = HTOLE32(data);
d30279e2
GI
734 if (gpu.cmd_len >= CMD_BUFFER_LEN)
735 flush_cmd_buffer();
1ab64c54
GI
736}
737
fae38d7a 738long GPUdmaChain(uint32_t *rambase, uint32_t start_addr, uint32_t *progress_addr)
1ab64c54 739{
09159d99 740 uint32_t addr, *list, ld_addr = 0;
ddd56f6e 741 int len, left, count;
f99193c2 742 int cpu_cycles = 0;
d30279e2 743
8f5f2dd5 744 preload(rambase + (start_addr & 0x1fffff) / 4);
745
d30279e2
GI
746 if (unlikely(gpu.cmd_len > 0))
747 flush_cmd_buffer();
748
56f08d83 749 log_io("gpu_dma_chain\n");
ddd56f6e 750 addr = start_addr & 0xffffff;
09159d99 751 for (count = 0; (addr & 0x800000) == 0; count++)
ddd56f6e 752 {
ddd56f6e 753 list = rambase + (addr & 0x1fffff) / 4;
db215a72
PC
754 len = LE32TOH(list[0]) >> 24;
755 addr = LE32TOH(list[0]) & 0xffffff;
8f5f2dd5 756 preload(rambase + (addr & 0x1fffff) / 4);
757
1c72b1c2 758 cpu_cycles += 10;
759 if (len > 0)
760 cpu_cycles += 5 + len;
deb18d24 761
c296224f 762 log_io(".chain %08lx #%d+%d %u\n",
763 (long)(list - rambase) * 4, len, gpu.cmd_len, cpu_cycles);
a4e249a1 764 if (unlikely(gpu.cmd_len > 0)) {
81ff42e1 765 if (gpu.cmd_len + len > ARRAY_SIZE(gpu.cmd_buffer)) {
766 log_anomaly("cmd_buffer overflow, likely garbage commands\n");
767 gpu.cmd_len = 0;
768 }
a4e249a1 769 memcpy(gpu.cmd_buffer + gpu.cmd_len, list + 1, len * 4);
770 gpu.cmd_len += len;
771 flush_cmd_buffer();
772 continue;
773 }
ddd56f6e 774
56f08d83 775 if (len) {
f99193c2 776 left = do_cmd_buffer(list + 1, len, &cpu_cycles);
a4e249a1 777 if (left) {
778 memcpy(gpu.cmd_buffer, list + 1 + len - left, left * 4);
779 gpu.cmd_len = left;
780 log_anomaly("GPUdmaChain: %d/%d words left\n", left, len);
781 }
56f08d83 782 }
ddd56f6e 783
fae38d7a 784 if (progress_addr) {
785 *progress_addr = addr;
786 break;
787 }
09159d99 788 #define LD_THRESHOLD (8*1024)
789 if (count >= LD_THRESHOLD) {
790 if (count == LD_THRESHOLD) {
791 ld_addr = addr;
792 continue;
793 }
794
795 // loop detection marker
796 // (bit23 set causes DMA error on real machine, so
797 // unlikely to be ever set by the game)
db215a72 798 list[0] |= HTOLE32(0x800000);
09159d99 799 }
ddd56f6e 800 }
801
09159d99 802 if (ld_addr != 0) {
803 // remove loop detection markers
804 count -= LD_THRESHOLD + 2;
805 addr = ld_addr & 0x1fffff;
806 while (count-- > 0) {
807 list = rambase + addr / 4;
db215a72
PC
808 addr = LE32TOH(list[0]) & 0x1fffff;
809 list[0] &= HTOLE32(~0x800000);
09159d99 810 }
d30279e2 811 }
09159d99 812
3ece2f0c 813 gpu.state.last_list.frame = *gpu.state.frame_count;
deb18d24 814 gpu.state.last_list.hcnt = *gpu.state.hcnt;
1c72b1c2 815 gpu.state.last_list.cycles = cpu_cycles;
deb18d24 816 gpu.state.last_list.addr = start_addr;
817
1c72b1c2 818 return cpu_cycles;
1ab64c54
GI
819}
820
d30279e2
GI
821void GPUreadDataMem(uint32_t *mem, int count)
822{
56f08d83 823 log_io("gpu_dma_read %p %d\n", mem, count);
824
d30279e2
GI
825 if (unlikely(gpu.cmd_len > 0))
826 flush_cmd_buffer();
56f08d83 827
d30279e2
GI
828 if (gpu.dma.h)
829 do_vram_io(mem, count, 1);
830}
831
832uint32_t GPUreadData(void)
833{
9e146206 834 uint32_t ret;
56f08d83 835
836 if (unlikely(gpu.cmd_len > 0))
837 flush_cmd_buffer();
838
9e146206 839 ret = gpu.gp0;
495d603c
PC
840 if (gpu.dma.h) {
841 ret = HTOLE32(ret);
9e146206 842 do_vram_io(&ret, 1, 1);
495d603c
PC
843 ret = LE32TOH(ret);
844 }
56f08d83 845
9e146206 846 log_io("gpu_read %08x\n", ret);
847 return ret;
d30279e2
GI
848}
849
850uint32_t GPUreadStatus(void)
851{
ddd56f6e 852 uint32_t ret;
56f08d83 853
d30279e2
GI
854 if (unlikely(gpu.cmd_len > 0))
855 flush_cmd_buffer();
856
61124a6d 857 ret = gpu.status;
ddd56f6e 858 log_io("gpu_read_status %08x\n", ret);
859 return ret;
d30279e2
GI
860}
861
096ec49b 862struct GPUFreeze
1ab64c54
GI
863{
864 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
865 uint32_t ulStatus; // current gpu status
866 uint32_t ulControl[256]; // latest control register values
867 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
096ec49b 868};
1ab64c54 869
096ec49b 870long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
1ab64c54 871{
fc84f618 872 int i;
873
1ab64c54
GI
874 switch (type) {
875 case 1: // save
d30279e2
GI
876 if (gpu.cmd_len > 0)
877 flush_cmd_buffer();
c765eb86
JW
878
879 renderer_sync();
9ee0fd5b 880 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
1ab64c54 881 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
6e9bdaef 882 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
61124a6d 883 freeze->ulStatus = gpu.status;
1ab64c54
GI
884 break;
885 case 0: // load
c765eb86 886 renderer_sync();
9ee0fd5b 887 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
1ab64c54 888 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
6e9bdaef 889 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
61124a6d 890 gpu.status = freeze->ulStatus;
3d47ef17 891 gpu.cmd_len = 0;
fc84f618 892 for (i = 8; i > 0; i--) {
893 gpu.regs[i] ^= 1; // avoid reg change detection
894 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
895 }
5b745e5b 896 renderer_sync_ecmds(gpu.ex_regs);
9a864a8f 897 renderer_update_caches(0, 0, 1024, 512, 0);
1ab64c54
GI
898 break;
899 }
900
901 return 1;
902}
903
5440b88e 904void GPUupdateLace(void)
905{
906 if (gpu.cmd_len > 0)
907 flush_cmd_buffer();
908 renderer_flush_queues();
909
7a20a6d0 910#ifndef RAW_FB_DISPLAY
61124a6d 911 if (gpu.status & PSX_GPU_STATUS_BLANKING) {
aafcb4dd 912 if (!gpu.state.blanked) {
913 vout_blank();
914 gpu.state.blanked = 1;
915 gpu.state.fb_dirty = 1;
916 }
917 return;
918 }
919
c765eb86
JW
920 renderer_notify_update_lace(0);
921
aafcb4dd 922 if (!gpu.state.fb_dirty)
5440b88e 923 return;
7a20a6d0 924#endif
5440b88e 925
926 if (gpu.frameskip.set) {
927 if (!gpu.frameskip.frame_ready) {
928 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
929 return;
930 gpu.frameskip.active = 0;
931 }
932 gpu.frameskip.frame_ready = 0;
933 }
934
935 vout_update();
3b7b0065 936 if (gpu.state.enhancement_active && !gpu.state.enhancement_was_active)
937 renderer_update_caches(0, 0, 1024, 512, 1);
938 gpu.state.enhancement_was_active = gpu.state.enhancement_active;
5440b88e 939 gpu.state.fb_dirty = 0;
aafcb4dd 940 gpu.state.blanked = 0;
c765eb86 941 renderer_notify_update_lace(1);
5440b88e 942}
943
72e5023f 944void GPUvBlank(int is_vblank, int lcf)
945{
5440b88e 946 int interlace = gpu.state.allow_interlace
61124a6d
PC
947 && (gpu.status & PSX_GPU_STATUS_INTERLACE)
948 && (gpu.status & PSX_GPU_STATUS_DHEIGHT);
5440b88e 949 // interlace doesn't look nice on progressive displays,
950 // so we have this "auto" mode here for games that don't read vram
951 if (gpu.state.allow_interlace == 2
952 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
953 {
954 interlace = 0;
955 }
956 if (interlace || interlace != gpu.state.old_interlace) {
957 gpu.state.old_interlace = interlace;
958
959 if (gpu.cmd_len > 0)
960 flush_cmd_buffer();
961 renderer_flush_queues();
962 renderer_set_interlace(interlace, !lcf);
963 }
964}
965
80bc1426 966void GPUgetScreenInfo(int *y, int *base_hres)
967{
968 *y = gpu.screen.y;
969 *base_hres = gpu.screen.vres;
970 if (gpu.status & PSX_GPU_STATUS_DHEIGHT)
971 *base_hres >>= 1;
972}
973
5440b88e 974void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
975{
976 gpu.frameskip.set = cbs->frameskip;
977 gpu.frameskip.advice = &cbs->fskip_advice;
5eaa13f1 978 gpu.frameskip.force = &cbs->fskip_force;
5bbe183f 979 gpu.frameskip.dirty = (void *)&cbs->fskip_dirty;
5440b88e 980 gpu.frameskip.active = 0;
981 gpu.frameskip.frame_ready = 1;
982 gpu.state.hcnt = cbs->gpu_hcnt;
983 gpu.state.frame_count = cbs->gpu_frame_count;
984 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
0b02eb77 985 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
b3ff74ba 986 gpu.state.screen_centering_type_default = cbs->screen_centering_type_default;
5bbe183f 987 if (gpu.state.screen_centering_type != cbs->screen_centering_type
988 || gpu.state.screen_centering_x != cbs->screen_centering_x
989 || gpu.state.screen_centering_y != cbs->screen_centering_y) {
990 gpu.state.screen_centering_type = cbs->screen_centering_type;
991 gpu.state.screen_centering_x = cbs->screen_centering_x;
992 gpu.state.screen_centering_y = cbs->screen_centering_y;
993 update_width();
994 update_height();
995 }
5440b88e 996
9ee0fd5b 997 gpu.mmap = cbs->mmap;
998 gpu.munmap = cbs->munmap;
1328fa32 999 gpu.gpu_state_change = cbs->gpu_state_change;
9ee0fd5b 1000
1001 // delayed vram mmap
1002 if (gpu.vram == NULL)
1003 map_vram();
1004
5440b88e 1005 if (cbs->pl_vout_set_raw_vram)
1006 cbs->pl_vout_set_raw_vram(gpu.vram);
1007 renderer_set_config(cbs);
1008 vout_set_config(cbs);
72e5023f 1009}
1010
1ab64c54 1011// vim:shiftwidth=2:expandtab