libretro: improve retro_memory_map
[pcsx_rearmed.git] / plugins / gpulib / gpu.c
CommitLineData
1ab64c54 1/*
05740673 2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
1ab64c54
GI
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
d30279e2 11#include <stdio.h>
5bbe183f 12#include <stdlib.h>
1ab64c54 13#include <string.h>
12367ad0 14#include <stdlib.h> /* for calloc */
15
56f08d83 16#include "gpu.h"
f99193c2 17#include "gpu_timing.h"
1328fa32 18#include "../../libpcsxcore/gpu.h" // meh
8f8ade9c 19#include "../../frontend/plugin_lib.h"
1ab64c54 20
8f8ade9c 21#ifndef ARRAY_SIZE
1ab64c54 22#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
8f8ade9c 23#endif
8f5f2dd5 24#ifdef __GNUC__
d30279e2 25#define unlikely(x) __builtin_expect((x), 0)
8f5f2dd5 26#define preload __builtin_prefetch
8dd855cd 27#define noinline __attribute__((noinline))
8f5f2dd5 28#else
29#define unlikely(x)
30#define preload(...)
31#define noinline
8f5f2dd5 32#endif
1ab64c54 33
deb18d24 34//#define log_io gpu_log
56f08d83 35#define log_io(...)
56f08d83 36
9ee0fd5b 37struct psx_gpu gpu;
1ab64c54 38
d02ab9fc 39static noinline int do_cmd_buffer(uint32_t *data, int count,
40 int *cycles_sum, int *cycles_last);
05740673 41static void finish_vram_transfer(int is_read);
48f3d210 42
43static noinline void do_cmd_reset(void)
44{
f99193c2 45 int dummy = 0;
c765eb86 46 renderer_sync();
48f3d210 47 if (unlikely(gpu.cmd_len > 0))
d02ab9fc 48 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len, &dummy, &dummy);
48f3d210 49 gpu.cmd_len = 0;
05740673 50
51 if (unlikely(gpu.dma.h > 0))
52 finish_vram_transfer(gpu.dma_start.is_read);
48f3d210 53 gpu.dma.h = 0;
54}
55
6e9bdaef 56static noinline void do_reset(void)
1ab64c54 57{
7841712d 58 unsigned int i;
5b568098 59
48f3d210 60 do_cmd_reset();
61
6e9bdaef 62 memset(gpu.regs, 0, sizeof(gpu.regs));
48f3d210 63 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
64 gpu.ex_regs[i] = (0xe0 + i) << 24;
61124a6d 65 gpu.status = 0x14802000;
6e9bdaef 66 gpu.gp0 = 0;
fc84f618 67 gpu.regs[3] = 1;
6e9bdaef 68 gpu.screen.hres = gpu.screen.w = 256;
fc84f618 69 gpu.screen.vres = gpu.screen.h = 240;
5bbe183f 70 gpu.screen.x = gpu.screen.y = 0;
01ff3105 71 renderer_sync_ecmds(gpu.ex_regs);
3b7b0065 72 renderer_notify_res_change();
1ab64c54
GI
73}
74
8dd855cd 75static noinline void update_width(void)
76{
5bbe183f 77 static const short hres_all[8] = { 256, 368, 320, 368, 512, 368, 640, 368 };
78 static const uint8_t hdivs[8] = { 10, 7, 8, 7, 5, 7, 4, 7 };
79 uint8_t hdiv = hdivs[(gpu.status >> 16) & 7];
80 int hres = hres_all[(gpu.status >> 16) & 7];
81 int pal = gpu.status & PSX_GPU_STATUS_PAL;
8dd855cd 82 int sw = gpu.screen.x2 - gpu.screen.x1;
b3ff74ba 83 int type = gpu.state.screen_centering_type;
5bbe183f 84 int x = 0, x_auto;
b3ff74ba 85 if (type == C_AUTO)
86 type = gpu.state.screen_centering_type_default;
5bbe183f 87 if (sw <= 0)
88 /* nothing displayed? */;
89 else {
90 int s = pal ? 656 : 608; // or 600? pal is just a guess
91 x = (gpu.screen.x1 - s) / hdiv;
92 x = (x + 1) & ~1; // blitter limitation
93 sw /= hdiv;
94 sw = (sw + 2) & ~3; // according to nocash
b27f55be 95
96 if (gpu.state.show_overscan == 2) // widescreen hack
97 sw = (sw + 63) & ~63;
98 if (gpu.state.show_overscan && sw >= hres)
99 x = 0, hres = sw;
b3ff74ba 100 switch (type) {
8f8ade9c 101 case C_INGAME:
5bbe183f 102 break;
8f8ade9c 103 case C_MANUAL:
5bbe183f 104 x = gpu.state.screen_centering_x;
105 break;
106 default:
107 // correct if slightly miscentered
108 x_auto = (hres - sw) / 2 & ~3;
109 if ((uint32_t)x_auto <= 8u && abs(x) < 24)
110 x = x_auto;
111 }
112 if (x + sw > hres)
113 sw = hres - x;
114 // .x range check is done in vout_update()
115 }
116 // reduce the unpleasant right border that a few games have
117 if (gpu.state.screen_centering_type == 0
118 && x <= 4 && hres - (x + sw) >= 4)
119 hres -= 4;
120 gpu.screen.x = x;
121 gpu.screen.w = sw;
122 gpu.screen.hres = hres;
123 gpu.state.dims_changed = 1;
b27f55be 124 //printf("xx %d %d (%d) -> %2d, %d / %d\n", gpu.screen.x1,
125 // gpu.screen.x2, gpu.screen.x2 - gpu.screen.x1, x, sw, hres);
8dd855cd 126}
127
128static noinline void update_height(void)
129{
5bbe183f 130 int pal = gpu.status & PSX_GPU_STATUS_PAL;
131 int dheight = gpu.status & PSX_GPU_STATUS_DHEIGHT;
132 int y = gpu.screen.y1 - (pal ? 39 : 16); // 39 for spyro
8dd855cd 133 int sh = gpu.screen.y2 - gpu.screen.y1;
5bbe183f 134 int center_tol = 16;
135 int vres = 240;
136
137 if (pal && (sh > 240 || gpu.screen.vres == 256))
138 vres = 256;
139 if (dheight)
140 y *= 2, sh *= 2, vres *= 2, center_tol *= 2;
141 if (sh <= 0)
142 /* nothing displayed? */;
143 else {
144 switch (gpu.state.screen_centering_type) {
8f8ade9c 145 case C_INGAME:
146 break;
147 case C_BORDERLESS:
148 y = 0;
5bbe183f 149 break;
8f8ade9c 150 case C_MANUAL:
5bbe183f 151 y = gpu.state.screen_centering_y;
152 break;
153 default:
154 // correct if slightly miscentered
155 if ((uint32_t)(vres - sh) <= 1 && abs(y) <= center_tol)
156 y = 0;
157 }
158 if (y + sh > vres)
159 sh = vres - y;
160 }
161 gpu.screen.y = y;
8dd855cd 162 gpu.screen.h = sh;
5bbe183f 163 gpu.screen.vres = vres;
164 gpu.state.dims_changed = 1;
165 //printf("yy %d %d -> %d, %d / %d\n",
166 // gpu.screen.y1, gpu.screen.y2, y, sh, vres);
8dd855cd 167}
168
fc84f618 169static noinline void decide_frameskip(void)
170{
5eaa13f1
A
171 *gpu.frameskip.dirty = 1;
172
9fe27e25 173 if (gpu.frameskip.active)
174 gpu.frameskip.cnt++;
175 else {
176 gpu.frameskip.cnt = 0;
177 gpu.frameskip.frame_ready = 1;
178 }
fc84f618 179
5eaa13f1
A
180 if (*gpu.frameskip.force)
181 gpu.frameskip.active = 1;
182 else if (!gpu.frameskip.active && *gpu.frameskip.advice)
9fe27e25 183 gpu.frameskip.active = 1;
184 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
fc84f618 185 gpu.frameskip.active = 1;
186 else
187 gpu.frameskip.active = 0;
fbb4bfff 188
189 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
f99193c2 190 int dummy = 0;
d02ab9fc 191 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy, &dummy, &dummy);
fbb4bfff 192 gpu.frameskip.pending_fill[0] = 0;
193 }
fc84f618 194}
195
b243416b 196static noinline int decide_frameskip_allow(uint32_t cmd_e3)
9fe27e25 197{
198 // no frameskip if it decides to draw to display area,
199 // but not for interlace since it'll most likely always do that
200 uint32_t x = cmd_e3 & 0x3ff;
201 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
61124a6d 202 gpu.frameskip.allow = (gpu.status & PSX_GPU_STATUS_INTERLACE) ||
5bbe183f 203 (uint32_t)(x - gpu.screen.src_x) >= (uint32_t)gpu.screen.w ||
204 (uint32_t)(y - gpu.screen.src_y) >= (uint32_t)gpu.screen.h;
b243416b 205 return gpu.frameskip.allow;
9fe27e25 206}
207
01ff3105 208static void flush_cmd_buffer(void);
209
6e9bdaef 210static noinline void get_gpu_info(uint32_t data)
211{
01ff3105 212 if (unlikely(gpu.cmd_len > 0))
213 flush_cmd_buffer();
6e9bdaef 214 switch (data & 0x0f) {
215 case 0x02:
216 case 0x03:
217 case 0x04:
6e9bdaef 218 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
219 break;
08b33377 220 case 0x05:
221 gpu.gp0 = gpu.ex_regs[5] & 0x3fffff;
6e9bdaef 222 break;
223 case 0x07:
224 gpu.gp0 = 2;
225 break;
226 default:
08b33377 227 // gpu.gp0 unchanged
6e9bdaef 228 break;
229 }
230}
231
5bd33f52 232#ifndef max
233#define max(a, b) (((a) > (b)) ? (a) : (b))
234#endif
12367ad0 235
236// Minimum 16-byte VRAM alignment needed by gpu_unai's pixel-skipping
237// renderer/downscaler it uses in high res modes:
238#ifdef GCW_ZERO
239 // On GCW platform (MIPS), align to 8192 bytes (1 TLB entry) to reduce # of
240 // fills. (Will change this value if it ever gets large page support)
241 #define VRAM_ALIGN 8192
242#else
243 #define VRAM_ALIGN 16
244#endif
245
5bd33f52 246// double, for overdraw guard + at least 1 page before
247#define VRAM_SIZE ((1024 * 512 * 2 * 2) + max(VRAM_ALIGN, 4096))
248
12367ad0 249// vram ptr received from mmap/malloc/alloc (will deallocate using this)
250static uint16_t *vram_ptr_orig = NULL;
9ee0fd5b 251
e34ef5ac 252#ifndef GPULIB_USE_MMAP
253# ifdef __linux__
254# define GPULIB_USE_MMAP 1
255# else
256# define GPULIB_USE_MMAP 0
257# endif
258#endif
9ee0fd5b 259static int map_vram(void)
260{
e34ef5ac 261#if GPULIB_USE_MMAP
5bd33f52 262 gpu.vram = vram_ptr_orig = gpu.mmap(VRAM_SIZE);
e34ef5ac 263#else
5bd33f52 264 gpu.vram = vram_ptr_orig = calloc(VRAM_SIZE, 1);
e34ef5ac 265#endif
266 if (gpu.vram != NULL && gpu.vram != (void *)(intptr_t)-1) {
267 // 4kb guard in front
12367ad0 268 gpu.vram += (4096 / 2);
e34ef5ac 269 // Align
270 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
9ee0fd5b 271 return 0;
272 }
273 else {
274 fprintf(stderr, "could not map vram, expect crashes\n");
275 return -1;
276 }
277}
278
6e9bdaef 279long GPUinit(void)
280{
9394ada5 281 int ret;
282 ret = vout_init();
283 ret |= renderer_init();
284
3b7b0065 285 memset(&gpu.state, 0, sizeof(gpu.state));
286 memset(&gpu.frameskip, 0, sizeof(gpu.frameskip));
287 gpu.zero = 0;
3ece2f0c 288 gpu.state.frame_count = &gpu.zero;
deb18d24 289 gpu.state.hcnt = &gpu.zero;
48f3d210 290 gpu.cmd_len = 0;
9394ada5 291 do_reset();
48f3d210 292
12367ad0 293 /*if (gpu.mmap != NULL) {
9ee0fd5b 294 if (map_vram() != 0)
295 ret = -1;
12367ad0 296 }*/
6e9bdaef 297 return ret;
298}
299
300long GPUshutdown(void)
301{
9ee0fd5b 302 long ret;
303
e929dec5 304 renderer_finish();
9ee0fd5b 305 ret = vout_finish();
12367ad0 306
307 if (vram_ptr_orig != NULL) {
e34ef5ac 308#if GPULIB_USE_MMAP
12367ad0 309 gpu.munmap(vram_ptr_orig, VRAM_SIZE);
310#else
311 free(vram_ptr_orig);
312#endif
9ee0fd5b 313 }
12367ad0 314 vram_ptr_orig = gpu.vram = NULL;
9ee0fd5b 315
316 return ret;
6e9bdaef 317}
318
1ab64c54
GI
319void GPUwriteStatus(uint32_t data)
320{
1ab64c54 321 uint32_t cmd = data >> 24;
9a864a8f 322 int src_x, src_y;
1ab64c54 323
fc84f618 324 if (cmd < ARRAY_SIZE(gpu.regs)) {
48f3d210 325 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
fc84f618 326 return;
8dd855cd 327 gpu.regs[cmd] = data;
fc84f618 328 }
329
330 gpu.state.fb_dirty = 1;
8dd855cd 331
332 switch (cmd) {
1ab64c54 333 case 0x00:
6e9bdaef 334 do_reset();
1ab64c54 335 break;
48f3d210 336 case 0x01:
337 do_cmd_reset();
338 break;
1ab64c54 339 case 0x03:
5bbe183f 340 if (data & 1) {
61124a6d 341 gpu.status |= PSX_GPU_STATUS_BLANKING;
5bbe183f 342 gpu.state.dims_changed = 1; // for hud clearing
343 }
61124a6d
PC
344 else
345 gpu.status &= ~PSX_GPU_STATUS_BLANKING;
1ab64c54
GI
346 break;
347 case 0x04:
61124a6d
PC
348 gpu.status &= ~PSX_GPU_STATUS_DMA_MASK;
349 gpu.status |= PSX_GPU_STATUS_DMA(data & 3);
1ab64c54
GI
350 break;
351 case 0x05:
9a864a8f 352 src_x = data & 0x3ff; src_y = (data >> 10) & 0x1ff;
353 if (src_x != gpu.screen.src_x || src_y != gpu.screen.src_y) {
354 gpu.screen.src_x = src_x;
355 gpu.screen.src_y = src_y;
356 renderer_notify_scanout_change(src_x, src_y);
357 if (gpu.frameskip.set) {
358 decide_frameskip_allow(gpu.ex_regs[3]);
359 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
360 decide_frameskip();
361 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
362 }
9fe27e25 363 }
fb4c6fba 364 }
1ab64c54 365 break;
8dd855cd 366 case 0x06:
367 gpu.screen.x1 = data & 0xfff;
368 gpu.screen.x2 = (data >> 12) & 0xfff;
369 update_width();
370 break;
1ab64c54
GI
371 case 0x07:
372 gpu.screen.y1 = data & 0x3ff;
373 gpu.screen.y2 = (data >> 10) & 0x3ff;
8dd855cd 374 update_height();
1ab64c54
GI
375 break;
376 case 0x08:
61124a6d 377 gpu.status = (gpu.status & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
8dd855cd 378 update_width();
379 update_height();
e929dec5 380 renderer_notify_res_change();
1ab64c54 381 break;
deb18d24 382 default:
383 if ((cmd & 0xf0) == 0x10)
384 get_gpu_info(data);
6e9bdaef 385 break;
1ab64c54 386 }
7890a708 387
388#ifdef GPUwriteStatus_ext
389 GPUwriteStatus_ext(data);
390#endif
1ab64c54
GI
391}
392
56f08d83 393const unsigned char cmd_lengths[256] =
1ab64c54 394{
d30279e2
GI
395 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
396 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
397 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
398 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
652c6b8b 399 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
400 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
401 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
d30279e2 402 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
36da9c13 403 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 80
404 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
405 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // a0
406 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
407 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // c0
408 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
d30279e2
GI
409 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
410 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
411};
412
d30279e2
GI
413#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
414
3382c20f 415// this isn't very useful so should be rare
416static void cpy_mask(uint16_t *dst, const uint16_t *src, int l, uint32_t r6)
36da9c13 417{
418 int i;
3382c20f 419 if (r6 == 1) {
420 for (i = 0; i < l; i++)
421 dst[i] = src[i] | 0x8000;
422 }
423 else {
424 uint16_t msb = r6 << 15;
425 for (i = 0; i < l; i++) {
426 uint16_t mask = (int16_t)dst[i] >> 15;
427 dst[i] = (dst[i] & mask) | ((src[i] | msb) & ~mask);
428 }
429 }
36da9c13 430}
431
432static inline void do_vram_line(int x, int y, uint16_t *mem, int l,
3382c20f 433 int is_read, uint32_t r6)
1ab64c54 434{
d30279e2 435 uint16_t *vram = VRAM_MEM_XY(x, y);
36da9c13 436 if (unlikely(is_read))
d30279e2 437 memcpy(mem, vram, l * 2);
3382c20f 438 else if (unlikely(r6))
439 cpy_mask(vram, mem, l, r6);
d30279e2
GI
440 else
441 memcpy(vram, mem, l * 2);
442}
443
444static int do_vram_io(uint32_t *data, int count, int is_read)
445{
446 int count_initial = count;
3382c20f 447 uint32_t r6 = gpu.ex_regs[6] & 3;
d30279e2
GI
448 uint16_t *sdata = (uint16_t *)data;
449 int x = gpu.dma.x, y = gpu.dma.y;
450 int w = gpu.dma.w, h = gpu.dma.h;
ddd56f6e 451 int o = gpu.dma.offset;
d30279e2
GI
452 int l;
453 count *= 2; // operate in 16bpp pixels
454
c765eb86
JW
455 renderer_sync();
456
d30279e2
GI
457 if (gpu.dma.offset) {
458 l = w - gpu.dma.offset;
ddd56f6e 459 if (count < l)
d30279e2 460 l = count;
ddd56f6e 461
3382c20f 462 do_vram_line(x + o, y, sdata, l, is_read, r6);
ddd56f6e 463
464 if (o + l < w)
465 o += l;
466 else {
467 o = 0;
468 y++;
469 h--;
470 }
d30279e2
GI
471 sdata += l;
472 count -= l;
d30279e2
GI
473 }
474
475 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
476 y &= 511;
3382c20f 477 do_vram_line(x, y, sdata, w, is_read, r6);
d30279e2
GI
478 }
479
05740673 480 if (h > 0) {
481 if (count > 0) {
482 y &= 511;
3382c20f 483 do_vram_line(x, y, sdata, count, is_read, r6);
05740673 484 o = count;
485 count = 0;
486 }
d30279e2 487 }
05740673 488 else
489 finish_vram_transfer(is_read);
d30279e2
GI
490 gpu.dma.y = y;
491 gpu.dma.h = h;
ddd56f6e 492 gpu.dma.offset = o;
d30279e2 493
6e9bdaef 494 return count_initial - count / 2;
d30279e2
GI
495}
496
497static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
498{
ddd56f6e 499 if (gpu.dma.h)
500 log_anomaly("start_vram_transfer while old unfinished\n");
501
5440b88e 502 gpu.dma.x = pos_word & 0x3ff;
503 gpu.dma.y = (pos_word >> 16) & 0x1ff;
48f3d210 504 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
505 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
d30279e2 506 gpu.dma.offset = 0;
05740673 507 gpu.dma.is_read = is_read;
508 gpu.dma_start = gpu.dma;
d30279e2 509
9e146206 510 renderer_flush_queues();
511 if (is_read) {
61124a6d 512 gpu.status |= PSX_GPU_STATUS_IMG;
9e146206 513 // XXX: wrong for width 1
495d603c 514 gpu.gp0 = LE32TOH(*(uint32_t *) VRAM_MEM_XY(gpu.dma.x, gpu.dma.y));
5440b88e 515 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
9e146206 516 }
d30279e2 517
6e9bdaef 518 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
519 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
1328fa32 520 if (gpu.gpu_state_change)
521 gpu.gpu_state_change(PGS_VRAM_TRANSFER_START);
d30279e2
GI
522}
523
05740673 524static void finish_vram_transfer(int is_read)
525{
526 if (is_read)
61124a6d 527 gpu.status &= ~PSX_GPU_STATUS_IMG;
b30fba56 528 else {
529 gpu.state.fb_dirty = 1;
05740673 530 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
3b7b0065 531 gpu.dma_start.w, gpu.dma_start.h, 0);
b30fba56 532 }
1328fa32 533 if (gpu.gpu_state_change)
534 gpu.gpu_state_change(PGS_VRAM_TRANSFER_END);
05740673 535}
536
f99193c2 537static void do_vram_copy(const uint32_t *params, int *cpu_cycles)
36da9c13 538{
539 const uint32_t sx = LE32TOH(params[0]) & 0x3FF;
540 const uint32_t sy = (LE32TOH(params[0]) >> 16) & 0x1FF;
541 const uint32_t dx = LE32TOH(params[1]) & 0x3FF;
542 const uint32_t dy = (LE32TOH(params[1]) >> 16) & 0x1FF;
543 uint32_t w = ((LE32TOH(params[2]) - 1) & 0x3FF) + 1;
544 uint32_t h = (((LE32TOH(params[2]) >> 16) - 1) & 0x1FF) + 1;
545 uint16_t msb = gpu.ex_regs[6] << 15;
546 uint16_t lbuf[128];
547 uint32_t x, y;
548
f99193c2 549 *cpu_cycles += gput_copy(w, h);
36da9c13 550 if (sx == dx && sy == dy && msb == 0)
551 return;
552
553 renderer_flush_queues();
554
555 if (unlikely((sx < dx && dx < sx + w) || sx + w > 1024 || dx + w > 1024 || msb))
556 {
557 for (y = 0; y < h; y++)
558 {
559 const uint16_t *src = VRAM_MEM_XY(0, (sy + y) & 0x1ff);
560 uint16_t *dst = VRAM_MEM_XY(0, (dy + y) & 0x1ff);
561 for (x = 0; x < w; x += ARRAY_SIZE(lbuf))
562 {
563 uint32_t x1, w1 = w - x;
564 if (w1 > ARRAY_SIZE(lbuf))
565 w1 = ARRAY_SIZE(lbuf);
566 for (x1 = 0; x1 < w1; x1++)
567 lbuf[x1] = src[(sx + x + x1) & 0x3ff];
568 for (x1 = 0; x1 < w1; x1++)
569 dst[(dx + x + x1) & 0x3ff] = lbuf[x1] | msb;
570 }
571 }
572 }
573 else
574 {
575 uint32_t sy1 = sy, dy1 = dy;
576 for (y = 0; y < h; y++, sy1++, dy1++)
577 memcpy(VRAM_MEM_XY(dx, dy1 & 0x1ff), VRAM_MEM_XY(sx, sy1 & 0x1ff), w * 2);
578 }
579
580 renderer_update_caches(dx, dy, w, h, 0);
581}
582
b243416b 583static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
584{
f99193c2 585 int cmd = 0, pos = 0, len, dummy = 0, v;
b243416b 586 int skip = 1;
587
fbb4bfff 588 gpu.frameskip.pending_fill[0] = 0;
589
b243416b 590 while (pos < count && skip) {
591 uint32_t *list = data + pos;
db215a72 592 cmd = LE32TOH(list[0]) >> 24;
b243416b 593 len = 1 + cmd_lengths[cmd];
bbb7cdec 594 if (pos + len > count) {
595 cmd = -1;
596 break; // incomplete cmd
597 }
b243416b 598
97e07db9 599 switch (cmd) {
600 case 0x02:
db215a72 601 if ((LE32TOH(list[2]) & 0x3ff) > gpu.screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu.screen.h)
97e07db9 602 // clearing something large, don't skip
d02ab9fc 603 do_cmd_list(list, 3, &dummy, &dummy, &dummy);
97e07db9 604 else
605 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
606 break;
607 case 0x24 ... 0x27:
608 case 0x2c ... 0x2f:
609 case 0x34 ... 0x37:
610 case 0x3c ... 0x3f:
611 gpu.ex_regs[1] &= ~0x1ff;
db215a72 612 gpu.ex_regs[1] |= LE32TOH(list[4 + ((cmd >> 4) & 1)]) & 0x1ff;
97e07db9 613 break;
614 case 0x48 ... 0x4F:
615 for (v = 3; pos + v < count; v++)
616 {
db215a72 617 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
97e07db9 618 break;
619 }
620 len += v - 3;
621 break;
622 case 0x58 ... 0x5F:
623 for (v = 4; pos + v < count; v += 2)
624 {
db215a72 625 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
97e07db9 626 break;
627 }
628 len += v - 4;
629 break;
630 default:
631 if (cmd == 0xe3)
db215a72 632 skip = decide_frameskip_allow(LE32TOH(list[0]));
97e07db9 633 if ((cmd & 0xf8) == 0xe0)
db215a72 634 gpu.ex_regs[cmd & 7] = LE32TOH(list[0]);
97e07db9 635 break;
b243416b 636 }
36da9c13 637 if (0x80 <= cmd && cmd <= 0xdf)
b243416b 638 break; // image i/o
97e07db9 639
b243416b 640 pos += len;
641 }
642
643 renderer_sync_ecmds(gpu.ex_regs);
644 *last_cmd = cmd;
645 return pos;
646}
647
d02ab9fc 648static noinline int do_cmd_buffer(uint32_t *data, int count,
649 int *cycles_sum, int *cycles_last)
d30279e2 650{
b243416b 651 int cmd, pos;
652 uint32_t old_e3 = gpu.ex_regs[3];
fc84f618 653 int vram_dirty = 0;
d30279e2 654
d30279e2 655 // process buffer
b243416b 656 for (pos = 0; pos < count; )
d30279e2 657 {
b243416b 658 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
659 vram_dirty = 1;
d30279e2 660 pos += do_vram_io(data + pos, count - pos, 0);
ddd56f6e 661 if (pos == count)
662 break;
d30279e2
GI
663 }
664
db215a72 665 cmd = LE32TOH(data[pos]) >> 24;
97e07db9 666 if (0xa0 <= cmd && cmd <= 0xdf) {
79573c20
DS
667 if (unlikely((pos+2) >= count)) {
668 // incomplete vram write/read cmd, can't consume yet
669 cmd = -1;
670 break;
671 }
672
d30279e2 673 // consume vram write/read cmd
db215a72 674 start_vram_transfer(LE32TOH(data[pos + 1]), LE32TOH(data[pos + 2]), (cmd & 0xe0) == 0xc0);
b243416b 675 pos += 3;
676 continue;
d30279e2 677 }
36da9c13 678 else if ((cmd & 0xe0) == 0x80) {
679 if (unlikely((pos+3) >= count)) {
680 cmd = -1; // incomplete cmd, can't consume yet
681 break;
682 }
025b6fde 683 renderer_sync();
d02ab9fc 684 *cycles_sum += *cycles_last;
685 *cycles_last = 0;
686 do_vram_copy(data + pos + 1, cycles_last);
b30fba56 687 vram_dirty = 1;
36da9c13 688 pos += 4;
689 continue;
690 }
c296224f 691 else if (cmd == 0x1f) {
692 log_anomaly("irq1?\n");
693 pos++;
694 continue;
695 }
b243416b 696
1e07f71d 697 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
db215a72 698 if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0))
b243416b 699 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
700 else {
d02ab9fc 701 pos += do_cmd_list(data + pos, count - pos, cycles_sum, cycles_last, &cmd);
b243416b 702 vram_dirty = 1;
703 }
704
705 if (cmd == -1)
706 // incomplete cmd
ddd56f6e 707 break;
d30279e2 708 }
ddd56f6e 709
61124a6d
PC
710 gpu.status &= ~0x1fff;
711 gpu.status |= gpu.ex_regs[1] & 0x7ff;
712 gpu.status |= (gpu.ex_regs[6] & 3) << 11;
a3a9f519 713
fc84f618 714 gpu.state.fb_dirty |= vram_dirty;
715
b243416b 716 if (old_e3 != gpu.ex_regs[3])
717 decide_frameskip_allow(gpu.ex_regs[3]);
718
ddd56f6e 719 return count - pos;
d30279e2
GI
720}
721
1328fa32 722static noinline void flush_cmd_buffer(void)
d30279e2 723{
f99193c2 724 int dummy = 0, left;
d02ab9fc 725 left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len, &dummy, &dummy);
d30279e2
GI
726 if (left > 0)
727 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
1328fa32 728 if (left != gpu.cmd_len) {
729 if (!gpu.dma.h && gpu.gpu_state_change)
730 gpu.gpu_state_change(PGS_PRIMITIVE_START);
731 gpu.cmd_len = left;
732 }
1ab64c54
GI
733}
734
735void GPUwriteDataMem(uint32_t *mem, int count)
736{
f99193c2 737 int dummy = 0, left;
d30279e2 738
56f08d83 739 log_io("gpu_dma_write %p %d\n", mem, count);
740
d30279e2
GI
741 if (unlikely(gpu.cmd_len > 0))
742 flush_cmd_buffer();
56f08d83 743
d02ab9fc 744 left = do_cmd_buffer(mem, count, &dummy, &dummy);
d30279e2 745 if (left)
56f08d83 746 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
1ab64c54
GI
747}
748
d30279e2 749void GPUwriteData(uint32_t data)
1ab64c54 750{
56f08d83 751 log_io("gpu_write %08x\n", data);
db215a72 752 gpu.cmd_buffer[gpu.cmd_len++] = HTOLE32(data);
d30279e2
GI
753 if (gpu.cmd_len >= CMD_BUFFER_LEN)
754 flush_cmd_buffer();
1ab64c54
GI
755}
756
d02ab9fc 757long GPUdmaChain(uint32_t *rambase, uint32_t start_addr,
758 uint32_t *progress_addr, int32_t *cycles_last_cmd)
1ab64c54 759{
2048ae31 760 uint32_t addr, *list, ld_addr;
761 int len, left, count, ld_count = 32;
d02ab9fc 762 int cpu_cycles_sum = 0;
763 int cpu_cycles_last = 0;
d30279e2 764
8f5f2dd5 765 preload(rambase + (start_addr & 0x1fffff) / 4);
766
d30279e2
GI
767 if (unlikely(gpu.cmd_len > 0))
768 flush_cmd_buffer();
769
56f08d83 770 log_io("gpu_dma_chain\n");
2048ae31 771 addr = ld_addr = start_addr & 0xffffff;
09159d99 772 for (count = 0; (addr & 0x800000) == 0; count++)
ddd56f6e 773 {
ddd56f6e 774 list = rambase + (addr & 0x1fffff) / 4;
db215a72
PC
775 len = LE32TOH(list[0]) >> 24;
776 addr = LE32TOH(list[0]) & 0xffffff;
8f5f2dd5 777 preload(rambase + (addr & 0x1fffff) / 4);
778
d02ab9fc 779 cpu_cycles_sum += 10;
1c72b1c2 780 if (len > 0)
d02ab9fc 781 cpu_cycles_sum += 5 + len;
deb18d24 782
d02ab9fc 783 log_io(".chain %08lx #%d+%d %u+%u\n",
784 (long)(list - rambase) * 4, len, gpu.cmd_len, cpu_cycles_sum, cpu_cycles_last);
a4e249a1 785 if (unlikely(gpu.cmd_len > 0)) {
81ff42e1 786 if (gpu.cmd_len + len > ARRAY_SIZE(gpu.cmd_buffer)) {
787 log_anomaly("cmd_buffer overflow, likely garbage commands\n");
788 gpu.cmd_len = 0;
789 }
a4e249a1 790 memcpy(gpu.cmd_buffer + gpu.cmd_len, list + 1, len * 4);
791 gpu.cmd_len += len;
792 flush_cmd_buffer();
793 continue;
794 }
ddd56f6e 795
56f08d83 796 if (len) {
d02ab9fc 797 left = do_cmd_buffer(list + 1, len, &cpu_cycles_sum, &cpu_cycles_last);
a4e249a1 798 if (left) {
799 memcpy(gpu.cmd_buffer, list + 1 + len - left, left * 4);
800 gpu.cmd_len = left;
801 log_anomaly("GPUdmaChain: %d/%d words left\n", left, len);
802 }
56f08d83 803 }
ddd56f6e 804
fae38d7a 805 if (progress_addr) {
806 *progress_addr = addr;
807 break;
808 }
2048ae31 809 if (addr == ld_addr) {
810 log_anomaly("GPUdmaChain: loop @ %08x, cnt=%u\n", addr, count);
811 break;
09159d99 812 }
2048ae31 813 if (count == ld_count) {
814 ld_addr = addr;
815 ld_count *= 2;
09159d99 816 }
d30279e2 817 }
09159d99 818
d02ab9fc 819 //printf(" -> %d %d\n", cpu_cycles_sum, cpu_cycles_last);
3ece2f0c 820 gpu.state.last_list.frame = *gpu.state.frame_count;
deb18d24 821 gpu.state.last_list.hcnt = *gpu.state.hcnt;
d02ab9fc 822 gpu.state.last_list.cycles = cpu_cycles_sum + cpu_cycles_last;
deb18d24 823 gpu.state.last_list.addr = start_addr;
824
d02ab9fc 825 *cycles_last_cmd = cpu_cycles_last;
826 return cpu_cycles_sum;
1ab64c54
GI
827}
828
d30279e2
GI
829void GPUreadDataMem(uint32_t *mem, int count)
830{
56f08d83 831 log_io("gpu_dma_read %p %d\n", mem, count);
832
d30279e2
GI
833 if (unlikely(gpu.cmd_len > 0))
834 flush_cmd_buffer();
56f08d83 835
d30279e2
GI
836 if (gpu.dma.h)
837 do_vram_io(mem, count, 1);
838}
839
840uint32_t GPUreadData(void)
841{
9e146206 842 uint32_t ret;
56f08d83 843
844 if (unlikely(gpu.cmd_len > 0))
845 flush_cmd_buffer();
846
9e146206 847 ret = gpu.gp0;
495d603c
PC
848 if (gpu.dma.h) {
849 ret = HTOLE32(ret);
9e146206 850 do_vram_io(&ret, 1, 1);
495d603c
PC
851 ret = LE32TOH(ret);
852 }
56f08d83 853
9e146206 854 log_io("gpu_read %08x\n", ret);
855 return ret;
d30279e2
GI
856}
857
858uint32_t GPUreadStatus(void)
859{
ddd56f6e 860 uint32_t ret;
56f08d83 861
d30279e2
GI
862 if (unlikely(gpu.cmd_len > 0))
863 flush_cmd_buffer();
864
61124a6d 865 ret = gpu.status;
ddd56f6e 866 log_io("gpu_read_status %08x\n", ret);
867 return ret;
d30279e2
GI
868}
869
096ec49b 870struct GPUFreeze
1ab64c54
GI
871{
872 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
873 uint32_t ulStatus; // current gpu status
874 uint32_t ulControl[256]; // latest control register values
875 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
096ec49b 876};
1ab64c54 877
096ec49b 878long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
1ab64c54 879{
fc84f618 880 int i;
881
1ab64c54
GI
882 switch (type) {
883 case 1: // save
d30279e2
GI
884 if (gpu.cmd_len > 0)
885 flush_cmd_buffer();
c765eb86
JW
886
887 renderer_sync();
9ee0fd5b 888 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
1ab64c54 889 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
6e9bdaef 890 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
61124a6d 891 freeze->ulStatus = gpu.status;
1ab64c54
GI
892 break;
893 case 0: // load
c765eb86 894 renderer_sync();
9ee0fd5b 895 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
1ab64c54 896 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
6e9bdaef 897 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
61124a6d 898 gpu.status = freeze->ulStatus;
3d47ef17 899 gpu.cmd_len = 0;
fc84f618 900 for (i = 8; i > 0; i--) {
901 gpu.regs[i] ^= 1; // avoid reg change detection
902 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
903 }
5b745e5b 904 renderer_sync_ecmds(gpu.ex_regs);
9a864a8f 905 renderer_update_caches(0, 0, 1024, 512, 0);
1ab64c54
GI
906 break;
907 }
908
909 return 1;
910}
911
5440b88e 912void GPUupdateLace(void)
913{
914 if (gpu.cmd_len > 0)
915 flush_cmd_buffer();
916 renderer_flush_queues();
917
7a20a6d0 918#ifndef RAW_FB_DISPLAY
61124a6d 919 if (gpu.status & PSX_GPU_STATUS_BLANKING) {
aafcb4dd 920 if (!gpu.state.blanked) {
921 vout_blank();
922 gpu.state.blanked = 1;
923 gpu.state.fb_dirty = 1;
924 }
925 return;
926 }
927
c765eb86
JW
928 renderer_notify_update_lace(0);
929
aafcb4dd 930 if (!gpu.state.fb_dirty)
5440b88e 931 return;
7a20a6d0 932#endif
5440b88e 933
934 if (gpu.frameskip.set) {
935 if (!gpu.frameskip.frame_ready) {
936 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
937 return;
938 gpu.frameskip.active = 0;
939 }
940 gpu.frameskip.frame_ready = 0;
941 }
942
943 vout_update();
3b7b0065 944 if (gpu.state.enhancement_active && !gpu.state.enhancement_was_active)
945 renderer_update_caches(0, 0, 1024, 512, 1);
946 gpu.state.enhancement_was_active = gpu.state.enhancement_active;
5440b88e 947 gpu.state.fb_dirty = 0;
aafcb4dd 948 gpu.state.blanked = 0;
c765eb86 949 renderer_notify_update_lace(1);
5440b88e 950}
951
72e5023f 952void GPUvBlank(int is_vblank, int lcf)
953{
5440b88e 954 int interlace = gpu.state.allow_interlace
61124a6d
PC
955 && (gpu.status & PSX_GPU_STATUS_INTERLACE)
956 && (gpu.status & PSX_GPU_STATUS_DHEIGHT);
5440b88e 957 // interlace doesn't look nice on progressive displays,
958 // so we have this "auto" mode here for games that don't read vram
959 if (gpu.state.allow_interlace == 2
960 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
961 {
962 interlace = 0;
963 }
964 if (interlace || interlace != gpu.state.old_interlace) {
965 gpu.state.old_interlace = interlace;
966
967 if (gpu.cmd_len > 0)
968 flush_cmd_buffer();
969 renderer_flush_queues();
970 renderer_set_interlace(interlace, !lcf);
971 }
972}
973
80bc1426 974void GPUgetScreenInfo(int *y, int *base_hres)
975{
976 *y = gpu.screen.y;
977 *base_hres = gpu.screen.vres;
978 if (gpu.status & PSX_GPU_STATUS_DHEIGHT)
979 *base_hres >>= 1;
980}
981
5440b88e 982void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
983{
984 gpu.frameskip.set = cbs->frameskip;
985 gpu.frameskip.advice = &cbs->fskip_advice;
5eaa13f1 986 gpu.frameskip.force = &cbs->fskip_force;
5bbe183f 987 gpu.frameskip.dirty = (void *)&cbs->fskip_dirty;
5440b88e 988 gpu.frameskip.active = 0;
989 gpu.frameskip.frame_ready = 1;
19a79138
PC
990 gpu.state.hcnt = (uint32_t *)cbs->gpu_hcnt;
991 gpu.state.frame_count = (uint32_t *)cbs->gpu_frame_count;
5440b88e 992 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
0b02eb77 993 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
b3ff74ba 994 gpu.state.screen_centering_type_default = cbs->screen_centering_type_default;
5bbe183f 995 if (gpu.state.screen_centering_type != cbs->screen_centering_type
996 || gpu.state.screen_centering_x != cbs->screen_centering_x
b27f55be 997 || gpu.state.screen_centering_y != cbs->screen_centering_y
998 || gpu.state.show_overscan != cbs->show_overscan) {
5bbe183f 999 gpu.state.screen_centering_type = cbs->screen_centering_type;
1000 gpu.state.screen_centering_x = cbs->screen_centering_x;
1001 gpu.state.screen_centering_y = cbs->screen_centering_y;
b27f55be 1002 gpu.state.show_overscan = cbs->show_overscan;
5bbe183f 1003 update_width();
1004 update_height();
1005 }
5440b88e 1006
9ee0fd5b 1007 gpu.mmap = cbs->mmap;
1008 gpu.munmap = cbs->munmap;
1328fa32 1009 gpu.gpu_state_change = cbs->gpu_state_change;
9ee0fd5b 1010
1011 // delayed vram mmap
1012 if (gpu.vram == NULL)
1013 map_vram();
1014
5440b88e 1015 if (cbs->pl_vout_set_raw_vram)
1016 cbs->pl_vout_set_raw_vram(gpu.vram);
1017 renderer_set_config(cbs);
1018 vout_set_config(cbs);
72e5023f 1019}
1020
1ab64c54 1021// vim:shiftwidth=2:expandtab