add a libcrypt warning
[pcsx_rearmed.git] / plugins / gpulib / gpu.c
CommitLineData
1ab64c54 1/*
05740673 2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
1ab64c54
GI
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
d30279e2 11#include <stdio.h>
5bbe183f 12#include <stdlib.h>
1ab64c54 13#include <string.h>
12367ad0 14#include <stdlib.h> /* for calloc */
15
56f08d83 16#include "gpu.h"
1328fa32 17#include "../../libpcsxcore/gpu.h" // meh
1ab64c54
GI
18
19#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
8f5f2dd5 20#ifdef __GNUC__
d30279e2 21#define unlikely(x) __builtin_expect((x), 0)
8f5f2dd5 22#define preload __builtin_prefetch
8dd855cd 23#define noinline __attribute__((noinline))
8f5f2dd5 24#else
25#define unlikely(x)
26#define preload(...)
27#define noinline
8f5f2dd5 28#endif
1ab64c54 29
deb18d24 30//#define log_io gpu_log
56f08d83 31#define log_io(...)
56f08d83 32
9ee0fd5b 33struct psx_gpu gpu;
1ab64c54 34
48f3d210 35static noinline int do_cmd_buffer(uint32_t *data, int count);
05740673 36static void finish_vram_transfer(int is_read);
48f3d210 37
38static noinline void do_cmd_reset(void)
39{
c765eb86
JW
40 renderer_sync();
41
48f3d210 42 if (unlikely(gpu.cmd_len > 0))
43 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
48f3d210 44 gpu.cmd_len = 0;
05740673 45
46 if (unlikely(gpu.dma.h > 0))
47 finish_vram_transfer(gpu.dma_start.is_read);
48f3d210 48 gpu.dma.h = 0;
49}
50
6e9bdaef 51static noinline void do_reset(void)
1ab64c54 52{
7841712d 53 unsigned int i;
5b568098 54
48f3d210 55 do_cmd_reset();
56
6e9bdaef 57 memset(gpu.regs, 0, sizeof(gpu.regs));
48f3d210 58 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
59 gpu.ex_regs[i] = (0xe0 + i) << 24;
61124a6d 60 gpu.status = 0x14802000;
6e9bdaef 61 gpu.gp0 = 0;
fc84f618 62 gpu.regs[3] = 1;
6e9bdaef 63 gpu.screen.hres = gpu.screen.w = 256;
fc84f618 64 gpu.screen.vres = gpu.screen.h = 240;
5bbe183f 65 gpu.screen.x = gpu.screen.y = 0;
01ff3105 66 renderer_sync_ecmds(gpu.ex_regs);
3b7b0065 67 renderer_notify_res_change();
1ab64c54
GI
68}
69
8dd855cd 70static noinline void update_width(void)
71{
5bbe183f 72 static const short hres_all[8] = { 256, 368, 320, 368, 512, 368, 640, 368 };
73 static const uint8_t hdivs[8] = { 10, 7, 8, 7, 5, 7, 4, 7 };
74 uint8_t hdiv = hdivs[(gpu.status >> 16) & 7];
75 int hres = hres_all[(gpu.status >> 16) & 7];
76 int pal = gpu.status & PSX_GPU_STATUS_PAL;
8dd855cd 77 int sw = gpu.screen.x2 - gpu.screen.x1;
5bbe183f 78 int x = 0, x_auto;
79 if (sw <= 0)
80 /* nothing displayed? */;
81 else {
82 int s = pal ? 656 : 608; // or 600? pal is just a guess
83 x = (gpu.screen.x1 - s) / hdiv;
84 x = (x + 1) & ~1; // blitter limitation
85 sw /= hdiv;
86 sw = (sw + 2) & ~3; // according to nocash
87 switch (gpu.state.screen_centering_type) {
88 case 1:
89 break;
90 case 2:
91 x = gpu.state.screen_centering_x;
92 break;
93 default:
94 // correct if slightly miscentered
95 x_auto = (hres - sw) / 2 & ~3;
96 if ((uint32_t)x_auto <= 8u && abs(x) < 24)
97 x = x_auto;
98 }
99 if (x + sw > hres)
100 sw = hres - x;
101 // .x range check is done in vout_update()
102 }
103 // reduce the unpleasant right border that a few games have
104 if (gpu.state.screen_centering_type == 0
105 && x <= 4 && hres - (x + sw) >= 4)
106 hres -= 4;
107 gpu.screen.x = x;
108 gpu.screen.w = sw;
109 gpu.screen.hres = hres;
110 gpu.state.dims_changed = 1;
111 //printf("xx %d %d -> %2d, %d / %d\n",
112 // gpu.screen.x1, gpu.screen.x2, x, sw, hres);
8dd855cd 113}
114
115static noinline void update_height(void)
116{
5bbe183f 117 int pal = gpu.status & PSX_GPU_STATUS_PAL;
118 int dheight = gpu.status & PSX_GPU_STATUS_DHEIGHT;
119 int y = gpu.screen.y1 - (pal ? 39 : 16); // 39 for spyro
8dd855cd 120 int sh = gpu.screen.y2 - gpu.screen.y1;
5bbe183f 121 int center_tol = 16;
122 int vres = 240;
123
124 if (pal && (sh > 240 || gpu.screen.vres == 256))
125 vres = 256;
126 if (dheight)
127 y *= 2, sh *= 2, vres *= 2, center_tol *= 2;
128 if (sh <= 0)
129 /* nothing displayed? */;
130 else {
131 switch (gpu.state.screen_centering_type) {
132 case 1:
133 break;
134 case 2:
135 y = gpu.state.screen_centering_y;
136 break;
137 default:
138 // correct if slightly miscentered
139 if ((uint32_t)(vres - sh) <= 1 && abs(y) <= center_tol)
140 y = 0;
141 }
142 if (y + sh > vres)
143 sh = vres - y;
144 }
145 gpu.screen.y = y;
8dd855cd 146 gpu.screen.h = sh;
5bbe183f 147 gpu.screen.vres = vres;
148 gpu.state.dims_changed = 1;
149 //printf("yy %d %d -> %d, %d / %d\n",
150 // gpu.screen.y1, gpu.screen.y2, y, sh, vres);
8dd855cd 151}
152
fc84f618 153static noinline void decide_frameskip(void)
154{
5eaa13f1
A
155 *gpu.frameskip.dirty = 1;
156
9fe27e25 157 if (gpu.frameskip.active)
158 gpu.frameskip.cnt++;
159 else {
160 gpu.frameskip.cnt = 0;
161 gpu.frameskip.frame_ready = 1;
162 }
fc84f618 163
5eaa13f1
A
164 if (*gpu.frameskip.force)
165 gpu.frameskip.active = 1;
166 else if (!gpu.frameskip.active && *gpu.frameskip.advice)
9fe27e25 167 gpu.frameskip.active = 1;
168 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
fc84f618 169 gpu.frameskip.active = 1;
170 else
171 gpu.frameskip.active = 0;
fbb4bfff 172
173 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
174 int dummy;
175 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy);
176 gpu.frameskip.pending_fill[0] = 0;
177 }
fc84f618 178}
179
b243416b 180static noinline int decide_frameskip_allow(uint32_t cmd_e3)
9fe27e25 181{
182 // no frameskip if it decides to draw to display area,
183 // but not for interlace since it'll most likely always do that
184 uint32_t x = cmd_e3 & 0x3ff;
185 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
61124a6d 186 gpu.frameskip.allow = (gpu.status & PSX_GPU_STATUS_INTERLACE) ||
5bbe183f 187 (uint32_t)(x - gpu.screen.src_x) >= (uint32_t)gpu.screen.w ||
188 (uint32_t)(y - gpu.screen.src_y) >= (uint32_t)gpu.screen.h;
b243416b 189 return gpu.frameskip.allow;
9fe27e25 190}
191
01ff3105 192static void flush_cmd_buffer(void);
193
6e9bdaef 194static noinline void get_gpu_info(uint32_t data)
195{
01ff3105 196 if (unlikely(gpu.cmd_len > 0))
197 flush_cmd_buffer();
6e9bdaef 198 switch (data & 0x0f) {
199 case 0x02:
200 case 0x03:
201 case 0x04:
6e9bdaef 202 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
203 break;
08b33377 204 case 0x05:
205 gpu.gp0 = gpu.ex_regs[5] & 0x3fffff;
6e9bdaef 206 break;
207 case 0x07:
208 gpu.gp0 = 2;
209 break;
210 default:
08b33377 211 // gpu.gp0 unchanged
6e9bdaef 212 break;
213 }
214}
215
9ee0fd5b 216// double, for overdraw guard
12367ad0 217#define VRAM_SIZE ((1024 * 512 * 2 * 2) + 4096)
218
219// Minimum 16-byte VRAM alignment needed by gpu_unai's pixel-skipping
220// renderer/downscaler it uses in high res modes:
221#ifdef GCW_ZERO
222 // On GCW platform (MIPS), align to 8192 bytes (1 TLB entry) to reduce # of
223 // fills. (Will change this value if it ever gets large page support)
224 #define VRAM_ALIGN 8192
225#else
226 #define VRAM_ALIGN 16
227#endif
228
229// vram ptr received from mmap/malloc/alloc (will deallocate using this)
230static uint16_t *vram_ptr_orig = NULL;
9ee0fd5b 231
e34ef5ac 232#ifndef GPULIB_USE_MMAP
233# ifdef __linux__
234# define GPULIB_USE_MMAP 1
235# else
236# define GPULIB_USE_MMAP 0
237# endif
238#endif
9ee0fd5b 239static int map_vram(void)
240{
e34ef5ac 241#if GPULIB_USE_MMAP
12367ad0 242 gpu.vram = vram_ptr_orig = gpu.mmap(VRAM_SIZE + (VRAM_ALIGN-1));
e34ef5ac 243#else
244 gpu.vram = vram_ptr_orig = calloc(VRAM_SIZE + (VRAM_ALIGN-1), 1);
245#endif
246 if (gpu.vram != NULL && gpu.vram != (void *)(intptr_t)-1) {
247 // 4kb guard in front
12367ad0 248 gpu.vram += (4096 / 2);
e34ef5ac 249 // Align
250 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
9ee0fd5b 251 return 0;
252 }
253 else {
254 fprintf(stderr, "could not map vram, expect crashes\n");
255 return -1;
256 }
257}
258
6e9bdaef 259long GPUinit(void)
260{
9394ada5 261 int ret;
262 ret = vout_init();
263 ret |= renderer_init();
264
3b7b0065 265 memset(&gpu.state, 0, sizeof(gpu.state));
266 memset(&gpu.frameskip, 0, sizeof(gpu.frameskip));
267 gpu.zero = 0;
3ece2f0c 268 gpu.state.frame_count = &gpu.zero;
deb18d24 269 gpu.state.hcnt = &gpu.zero;
48f3d210 270 gpu.cmd_len = 0;
9394ada5 271 do_reset();
48f3d210 272
12367ad0 273 /*if (gpu.mmap != NULL) {
9ee0fd5b 274 if (map_vram() != 0)
275 ret = -1;
12367ad0 276 }*/
6e9bdaef 277 return ret;
278}
279
280long GPUshutdown(void)
281{
9ee0fd5b 282 long ret;
283
e929dec5 284 renderer_finish();
9ee0fd5b 285 ret = vout_finish();
12367ad0 286
287 if (vram_ptr_orig != NULL) {
e34ef5ac 288#if GPULIB_USE_MMAP
12367ad0 289 gpu.munmap(vram_ptr_orig, VRAM_SIZE);
290#else
291 free(vram_ptr_orig);
292#endif
9ee0fd5b 293 }
12367ad0 294 vram_ptr_orig = gpu.vram = NULL;
9ee0fd5b 295
296 return ret;
6e9bdaef 297}
298
1ab64c54
GI
299void GPUwriteStatus(uint32_t data)
300{
1ab64c54
GI
301 uint32_t cmd = data >> 24;
302
fc84f618 303 if (cmd < ARRAY_SIZE(gpu.regs)) {
48f3d210 304 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
fc84f618 305 return;
8dd855cd 306 gpu.regs[cmd] = data;
fc84f618 307 }
308
309 gpu.state.fb_dirty = 1;
8dd855cd 310
311 switch (cmd) {
1ab64c54 312 case 0x00:
6e9bdaef 313 do_reset();
1ab64c54 314 break;
48f3d210 315 case 0x01:
316 do_cmd_reset();
317 break;
1ab64c54 318 case 0x03:
5bbe183f 319 if (data & 1) {
61124a6d 320 gpu.status |= PSX_GPU_STATUS_BLANKING;
5bbe183f 321 gpu.state.dims_changed = 1; // for hud clearing
322 }
61124a6d
PC
323 else
324 gpu.status &= ~PSX_GPU_STATUS_BLANKING;
1ab64c54
GI
325 break;
326 case 0x04:
61124a6d
PC
327 gpu.status &= ~PSX_GPU_STATUS_DMA_MASK;
328 gpu.status |= PSX_GPU_STATUS_DMA(data & 3);
1ab64c54
GI
329 break;
330 case 0x05:
5bbe183f 331 gpu.screen.src_x = data & 0x3ff;
332 gpu.screen.src_y = (data >> 10) & 0x1ff;
3b7b0065 333 renderer_notify_scanout_x_change(gpu.screen.src_x, gpu.screen.hres);
9fe27e25 334 if (gpu.frameskip.set) {
335 decide_frameskip_allow(gpu.ex_regs[3]);
336 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
337 decide_frameskip();
338 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
339 }
fb4c6fba 340 }
1ab64c54 341 break;
8dd855cd 342 case 0x06:
343 gpu.screen.x1 = data & 0xfff;
344 gpu.screen.x2 = (data >> 12) & 0xfff;
345 update_width();
346 break;
1ab64c54
GI
347 case 0x07:
348 gpu.screen.y1 = data & 0x3ff;
349 gpu.screen.y2 = (data >> 10) & 0x3ff;
8dd855cd 350 update_height();
1ab64c54
GI
351 break;
352 case 0x08:
61124a6d 353 gpu.status = (gpu.status & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
8dd855cd 354 update_width();
355 update_height();
e929dec5 356 renderer_notify_res_change();
1ab64c54 357 break;
deb18d24 358 default:
359 if ((cmd & 0xf0) == 0x10)
360 get_gpu_info(data);
6e9bdaef 361 break;
1ab64c54 362 }
7890a708 363
364#ifdef GPUwriteStatus_ext
365 GPUwriteStatus_ext(data);
366#endif
1ab64c54
GI
367}
368
56f08d83 369const unsigned char cmd_lengths[256] =
1ab64c54 370{
d30279e2
GI
371 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
372 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
373 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
374 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
652c6b8b 375 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
376 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
377 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
d30279e2 378 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
36da9c13 379 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 80
380 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
381 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // a0
382 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
383 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // c0
384 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
d30279e2
GI
385 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
386 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
387};
388
d30279e2
GI
389#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
390
36da9c13 391static void cpy_msb(uint16_t *dst, const uint16_t *src, int l, uint16_t msb)
392{
393 int i;
394 for (i = 0; i < l; i++)
395 dst[i] = src[i] | msb;
396}
397
398static inline void do_vram_line(int x, int y, uint16_t *mem, int l,
399 int is_read, uint16_t msb)
1ab64c54 400{
d30279e2 401 uint16_t *vram = VRAM_MEM_XY(x, y);
36da9c13 402 if (unlikely(is_read))
d30279e2 403 memcpy(mem, vram, l * 2);
36da9c13 404 else if (unlikely(msb))
405 cpy_msb(vram, mem, l, msb);
d30279e2
GI
406 else
407 memcpy(vram, mem, l * 2);
408}
409
410static int do_vram_io(uint32_t *data, int count, int is_read)
411{
412 int count_initial = count;
36da9c13 413 uint16_t msb = gpu.ex_regs[6] << 15;
d30279e2
GI
414 uint16_t *sdata = (uint16_t *)data;
415 int x = gpu.dma.x, y = gpu.dma.y;
416 int w = gpu.dma.w, h = gpu.dma.h;
ddd56f6e 417 int o = gpu.dma.offset;
d30279e2
GI
418 int l;
419 count *= 2; // operate in 16bpp pixels
420
c765eb86
JW
421 renderer_sync();
422
d30279e2
GI
423 if (gpu.dma.offset) {
424 l = w - gpu.dma.offset;
ddd56f6e 425 if (count < l)
d30279e2 426 l = count;
ddd56f6e 427
36da9c13 428 do_vram_line(x + o, y, sdata, l, is_read, msb);
ddd56f6e 429
430 if (o + l < w)
431 o += l;
432 else {
433 o = 0;
434 y++;
435 h--;
436 }
d30279e2
GI
437 sdata += l;
438 count -= l;
d30279e2
GI
439 }
440
441 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
442 y &= 511;
36da9c13 443 do_vram_line(x, y, sdata, w, is_read, msb);
d30279e2
GI
444 }
445
05740673 446 if (h > 0) {
447 if (count > 0) {
448 y &= 511;
36da9c13 449 do_vram_line(x, y, sdata, count, is_read, msb);
05740673 450 o = count;
451 count = 0;
452 }
d30279e2 453 }
05740673 454 else
455 finish_vram_transfer(is_read);
d30279e2
GI
456 gpu.dma.y = y;
457 gpu.dma.h = h;
ddd56f6e 458 gpu.dma.offset = o;
d30279e2 459
6e9bdaef 460 return count_initial - count / 2;
d30279e2
GI
461}
462
463static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
464{
ddd56f6e 465 if (gpu.dma.h)
466 log_anomaly("start_vram_transfer while old unfinished\n");
467
5440b88e 468 gpu.dma.x = pos_word & 0x3ff;
469 gpu.dma.y = (pos_word >> 16) & 0x1ff;
48f3d210 470 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
471 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
d30279e2 472 gpu.dma.offset = 0;
05740673 473 gpu.dma.is_read = is_read;
474 gpu.dma_start = gpu.dma;
d30279e2 475
9e146206 476 renderer_flush_queues();
477 if (is_read) {
61124a6d 478 gpu.status |= PSX_GPU_STATUS_IMG;
9e146206 479 // XXX: wrong for width 1
495d603c 480 gpu.gp0 = LE32TOH(*(uint32_t *) VRAM_MEM_XY(gpu.dma.x, gpu.dma.y));
5440b88e 481 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
9e146206 482 }
d30279e2 483
6e9bdaef 484 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
485 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
1328fa32 486 if (gpu.gpu_state_change)
487 gpu.gpu_state_change(PGS_VRAM_TRANSFER_START);
d30279e2
GI
488}
489
05740673 490static void finish_vram_transfer(int is_read)
491{
492 if (is_read)
61124a6d 493 gpu.status &= ~PSX_GPU_STATUS_IMG;
b30fba56 494 else {
495 gpu.state.fb_dirty = 1;
05740673 496 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
3b7b0065 497 gpu.dma_start.w, gpu.dma_start.h, 0);
b30fba56 498 }
1328fa32 499 if (gpu.gpu_state_change)
500 gpu.gpu_state_change(PGS_VRAM_TRANSFER_END);
05740673 501}
502
36da9c13 503static void do_vram_copy(const uint32_t *params)
504{
505 const uint32_t sx = LE32TOH(params[0]) & 0x3FF;
506 const uint32_t sy = (LE32TOH(params[0]) >> 16) & 0x1FF;
507 const uint32_t dx = LE32TOH(params[1]) & 0x3FF;
508 const uint32_t dy = (LE32TOH(params[1]) >> 16) & 0x1FF;
509 uint32_t w = ((LE32TOH(params[2]) - 1) & 0x3FF) + 1;
510 uint32_t h = (((LE32TOH(params[2]) >> 16) - 1) & 0x1FF) + 1;
511 uint16_t msb = gpu.ex_regs[6] << 15;
512 uint16_t lbuf[128];
513 uint32_t x, y;
514
515 if (sx == dx && sy == dy && msb == 0)
516 return;
517
518 renderer_flush_queues();
519
520 if (unlikely((sx < dx && dx < sx + w) || sx + w > 1024 || dx + w > 1024 || msb))
521 {
522 for (y = 0; y < h; y++)
523 {
524 const uint16_t *src = VRAM_MEM_XY(0, (sy + y) & 0x1ff);
525 uint16_t *dst = VRAM_MEM_XY(0, (dy + y) & 0x1ff);
526 for (x = 0; x < w; x += ARRAY_SIZE(lbuf))
527 {
528 uint32_t x1, w1 = w - x;
529 if (w1 > ARRAY_SIZE(lbuf))
530 w1 = ARRAY_SIZE(lbuf);
531 for (x1 = 0; x1 < w1; x1++)
532 lbuf[x1] = src[(sx + x + x1) & 0x3ff];
533 for (x1 = 0; x1 < w1; x1++)
534 dst[(dx + x + x1) & 0x3ff] = lbuf[x1] | msb;
535 }
536 }
537 }
538 else
539 {
540 uint32_t sy1 = sy, dy1 = dy;
541 for (y = 0; y < h; y++, sy1++, dy1++)
542 memcpy(VRAM_MEM_XY(dx, dy1 & 0x1ff), VRAM_MEM_XY(sx, sy1 & 0x1ff), w * 2);
543 }
544
545 renderer_update_caches(dx, dy, w, h, 0);
546}
547
b243416b 548static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
549{
97e07db9 550 int cmd = 0, pos = 0, len, dummy, v;
b243416b 551 int skip = 1;
552
fbb4bfff 553 gpu.frameskip.pending_fill[0] = 0;
554
b243416b 555 while (pos < count && skip) {
556 uint32_t *list = data + pos;
db215a72 557 cmd = LE32TOH(list[0]) >> 24;
b243416b 558 len = 1 + cmd_lengths[cmd];
559
97e07db9 560 switch (cmd) {
561 case 0x02:
db215a72 562 if ((LE32TOH(list[2]) & 0x3ff) > gpu.screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu.screen.h)
97e07db9 563 // clearing something large, don't skip
564 do_cmd_list(list, 3, &dummy);
565 else
566 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
567 break;
568 case 0x24 ... 0x27:
569 case 0x2c ... 0x2f:
570 case 0x34 ... 0x37:
571 case 0x3c ... 0x3f:
572 gpu.ex_regs[1] &= ~0x1ff;
db215a72 573 gpu.ex_regs[1] |= LE32TOH(list[4 + ((cmd >> 4) & 1)]) & 0x1ff;
97e07db9 574 break;
575 case 0x48 ... 0x4F:
576 for (v = 3; pos + v < count; v++)
577 {
db215a72 578 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
97e07db9 579 break;
580 }
581 len += v - 3;
582 break;
583 case 0x58 ... 0x5F:
584 for (v = 4; pos + v < count; v += 2)
585 {
db215a72 586 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
97e07db9 587 break;
588 }
589 len += v - 4;
590 break;
591 default:
592 if (cmd == 0xe3)
db215a72 593 skip = decide_frameskip_allow(LE32TOH(list[0]));
97e07db9 594 if ((cmd & 0xf8) == 0xe0)
db215a72 595 gpu.ex_regs[cmd & 7] = LE32TOH(list[0]);
97e07db9 596 break;
b243416b 597 }
b243416b 598
599 if (pos + len > count) {
600 cmd = -1;
601 break; // incomplete cmd
602 }
36da9c13 603 if (0x80 <= cmd && cmd <= 0xdf)
b243416b 604 break; // image i/o
97e07db9 605
b243416b 606 pos += len;
607 }
608
609 renderer_sync_ecmds(gpu.ex_regs);
610 *last_cmd = cmd;
611 return pos;
612}
613
48f3d210 614static noinline int do_cmd_buffer(uint32_t *data, int count)
d30279e2 615{
b243416b 616 int cmd, pos;
617 uint32_t old_e3 = gpu.ex_regs[3];
fc84f618 618 int vram_dirty = 0;
d30279e2 619
d30279e2 620 // process buffer
b243416b 621 for (pos = 0; pos < count; )
d30279e2 622 {
b243416b 623 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
624 vram_dirty = 1;
d30279e2 625 pos += do_vram_io(data + pos, count - pos, 0);
ddd56f6e 626 if (pos == count)
627 break;
d30279e2
GI
628 }
629
db215a72 630 cmd = LE32TOH(data[pos]) >> 24;
97e07db9 631 if (0xa0 <= cmd && cmd <= 0xdf) {
79573c20
DS
632 if (unlikely((pos+2) >= count)) {
633 // incomplete vram write/read cmd, can't consume yet
634 cmd = -1;
635 break;
636 }
637
d30279e2 638 // consume vram write/read cmd
db215a72 639 start_vram_transfer(LE32TOH(data[pos + 1]), LE32TOH(data[pos + 2]), (cmd & 0xe0) == 0xc0);
b243416b 640 pos += 3;
641 continue;
d30279e2 642 }
36da9c13 643 else if ((cmd & 0xe0) == 0x80) {
644 if (unlikely((pos+3) >= count)) {
645 cmd = -1; // incomplete cmd, can't consume yet
646 break;
647 }
648 do_vram_copy(data + pos + 1);
b30fba56 649 vram_dirty = 1;
36da9c13 650 pos += 4;
651 continue;
652 }
b243416b 653
1e07f71d 654 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
db215a72 655 if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0))
b243416b 656 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
657 else {
658 pos += do_cmd_list(data + pos, count - pos, &cmd);
659 vram_dirty = 1;
660 }
661
662 if (cmd == -1)
663 // incomplete cmd
ddd56f6e 664 break;
d30279e2 665 }
ddd56f6e 666
61124a6d
PC
667 gpu.status &= ~0x1fff;
668 gpu.status |= gpu.ex_regs[1] & 0x7ff;
669 gpu.status |= (gpu.ex_regs[6] & 3) << 11;
a3a9f519 670
fc84f618 671 gpu.state.fb_dirty |= vram_dirty;
672
b243416b 673 if (old_e3 != gpu.ex_regs[3])
674 decide_frameskip_allow(gpu.ex_regs[3]);
675
ddd56f6e 676 return count - pos;
d30279e2
GI
677}
678
1328fa32 679static noinline void flush_cmd_buffer(void)
d30279e2 680{
48f3d210 681 int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
d30279e2
GI
682 if (left > 0)
683 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
1328fa32 684 if (left != gpu.cmd_len) {
685 if (!gpu.dma.h && gpu.gpu_state_change)
686 gpu.gpu_state_change(PGS_PRIMITIVE_START);
687 gpu.cmd_len = left;
688 }
1ab64c54
GI
689}
690
691void GPUwriteDataMem(uint32_t *mem, int count)
692{
d30279e2
GI
693 int left;
694
56f08d83 695 log_io("gpu_dma_write %p %d\n", mem, count);
696
d30279e2
GI
697 if (unlikely(gpu.cmd_len > 0))
698 flush_cmd_buffer();
56f08d83 699
48f3d210 700 left = do_cmd_buffer(mem, count);
d30279e2 701 if (left)
56f08d83 702 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
1ab64c54
GI
703}
704
d30279e2 705void GPUwriteData(uint32_t data)
1ab64c54 706{
56f08d83 707 log_io("gpu_write %08x\n", data);
db215a72 708 gpu.cmd_buffer[gpu.cmd_len++] = HTOLE32(data);
d30279e2
GI
709 if (gpu.cmd_len >= CMD_BUFFER_LEN)
710 flush_cmd_buffer();
1ab64c54
GI
711}
712
fae38d7a 713long GPUdmaChain(uint32_t *rambase, uint32_t start_addr, uint32_t *progress_addr)
1ab64c54 714{
09159d99 715 uint32_t addr, *list, ld_addr = 0;
ddd56f6e 716 int len, left, count;
1c72b1c2 717 long cpu_cycles = 0;
d30279e2 718
8f5f2dd5 719 preload(rambase + (start_addr & 0x1fffff) / 4);
720
d30279e2
GI
721 if (unlikely(gpu.cmd_len > 0))
722 flush_cmd_buffer();
723
56f08d83 724 log_io("gpu_dma_chain\n");
ddd56f6e 725 addr = start_addr & 0xffffff;
09159d99 726 for (count = 0; (addr & 0x800000) == 0; count++)
ddd56f6e 727 {
ddd56f6e 728 list = rambase + (addr & 0x1fffff) / 4;
db215a72
PC
729 len = LE32TOH(list[0]) >> 24;
730 addr = LE32TOH(list[0]) & 0xffffff;
8f5f2dd5 731 preload(rambase + (addr & 0x1fffff) / 4);
732
1c72b1c2 733 cpu_cycles += 10;
734 if (len > 0)
735 cpu_cycles += 5 + len;
deb18d24 736
a4e249a1 737 log_io(".chain %08lx #%d+%d\n",
738 (long)(list - rambase) * 4, len, gpu.cmd_len);
739 if (unlikely(gpu.cmd_len > 0)) {
81ff42e1 740 if (gpu.cmd_len + len > ARRAY_SIZE(gpu.cmd_buffer)) {
741 log_anomaly("cmd_buffer overflow, likely garbage commands\n");
742 gpu.cmd_len = 0;
743 }
a4e249a1 744 memcpy(gpu.cmd_buffer + gpu.cmd_len, list + 1, len * 4);
745 gpu.cmd_len += len;
746 flush_cmd_buffer();
747 continue;
748 }
ddd56f6e 749
56f08d83 750 if (len) {
48f3d210 751 left = do_cmd_buffer(list + 1, len);
a4e249a1 752 if (left) {
753 memcpy(gpu.cmd_buffer, list + 1 + len - left, left * 4);
754 gpu.cmd_len = left;
755 log_anomaly("GPUdmaChain: %d/%d words left\n", left, len);
756 }
56f08d83 757 }
ddd56f6e 758
fae38d7a 759 if (progress_addr) {
760 *progress_addr = addr;
761 break;
762 }
09159d99 763 #define LD_THRESHOLD (8*1024)
764 if (count >= LD_THRESHOLD) {
765 if (count == LD_THRESHOLD) {
766 ld_addr = addr;
767 continue;
768 }
769
770 // loop detection marker
771 // (bit23 set causes DMA error on real machine, so
772 // unlikely to be ever set by the game)
db215a72 773 list[0] |= HTOLE32(0x800000);
09159d99 774 }
ddd56f6e 775 }
776
09159d99 777 if (ld_addr != 0) {
778 // remove loop detection markers
779 count -= LD_THRESHOLD + 2;
780 addr = ld_addr & 0x1fffff;
781 while (count-- > 0) {
782 list = rambase + addr / 4;
db215a72
PC
783 addr = LE32TOH(list[0]) & 0x1fffff;
784 list[0] &= HTOLE32(~0x800000);
09159d99 785 }
d30279e2 786 }
09159d99 787
3ece2f0c 788 gpu.state.last_list.frame = *gpu.state.frame_count;
deb18d24 789 gpu.state.last_list.hcnt = *gpu.state.hcnt;
1c72b1c2 790 gpu.state.last_list.cycles = cpu_cycles;
deb18d24 791 gpu.state.last_list.addr = start_addr;
792
1c72b1c2 793 return cpu_cycles;
1ab64c54
GI
794}
795
d30279e2
GI
796void GPUreadDataMem(uint32_t *mem, int count)
797{
56f08d83 798 log_io("gpu_dma_read %p %d\n", mem, count);
799
d30279e2
GI
800 if (unlikely(gpu.cmd_len > 0))
801 flush_cmd_buffer();
56f08d83 802
d30279e2
GI
803 if (gpu.dma.h)
804 do_vram_io(mem, count, 1);
805}
806
807uint32_t GPUreadData(void)
808{
9e146206 809 uint32_t ret;
56f08d83 810
811 if (unlikely(gpu.cmd_len > 0))
812 flush_cmd_buffer();
813
9e146206 814 ret = gpu.gp0;
495d603c
PC
815 if (gpu.dma.h) {
816 ret = HTOLE32(ret);
9e146206 817 do_vram_io(&ret, 1, 1);
495d603c
PC
818 ret = LE32TOH(ret);
819 }
56f08d83 820
9e146206 821 log_io("gpu_read %08x\n", ret);
822 return ret;
d30279e2
GI
823}
824
825uint32_t GPUreadStatus(void)
826{
ddd56f6e 827 uint32_t ret;
56f08d83 828
d30279e2
GI
829 if (unlikely(gpu.cmd_len > 0))
830 flush_cmd_buffer();
831
61124a6d 832 ret = gpu.status;
ddd56f6e 833 log_io("gpu_read_status %08x\n", ret);
834 return ret;
d30279e2
GI
835}
836
096ec49b 837struct GPUFreeze
1ab64c54
GI
838{
839 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
840 uint32_t ulStatus; // current gpu status
841 uint32_t ulControl[256]; // latest control register values
842 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
096ec49b 843};
1ab64c54 844
096ec49b 845long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
1ab64c54 846{
fc84f618 847 int i;
848
1ab64c54
GI
849 switch (type) {
850 case 1: // save
d30279e2
GI
851 if (gpu.cmd_len > 0)
852 flush_cmd_buffer();
c765eb86
JW
853
854 renderer_sync();
9ee0fd5b 855 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
1ab64c54 856 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
6e9bdaef 857 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
61124a6d 858 freeze->ulStatus = gpu.status;
1ab64c54
GI
859 break;
860 case 0: // load
c765eb86 861 renderer_sync();
9ee0fd5b 862 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
1ab64c54 863 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
6e9bdaef 864 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
61124a6d 865 gpu.status = freeze->ulStatus;
3d47ef17 866 gpu.cmd_len = 0;
fc84f618 867 for (i = 8; i > 0; i--) {
868 gpu.regs[i] ^= 1; // avoid reg change detection
869 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
870 }
5b745e5b 871 renderer_sync_ecmds(gpu.ex_regs);
3b7b0065 872 renderer_update_caches(0, 0, 1024, 512, 1);
1ab64c54
GI
873 break;
874 }
875
876 return 1;
877}
878
5440b88e 879void GPUupdateLace(void)
880{
881 if (gpu.cmd_len > 0)
882 flush_cmd_buffer();
883 renderer_flush_queues();
884
7a20a6d0 885#ifndef RAW_FB_DISPLAY
61124a6d 886 if (gpu.status & PSX_GPU_STATUS_BLANKING) {
aafcb4dd 887 if (!gpu.state.blanked) {
888 vout_blank();
889 gpu.state.blanked = 1;
890 gpu.state.fb_dirty = 1;
891 }
892 return;
893 }
894
c765eb86
JW
895 renderer_notify_update_lace(0);
896
aafcb4dd 897 if (!gpu.state.fb_dirty)
5440b88e 898 return;
7a20a6d0 899#endif
5440b88e 900
901 if (gpu.frameskip.set) {
902 if (!gpu.frameskip.frame_ready) {
903 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
904 return;
905 gpu.frameskip.active = 0;
906 }
907 gpu.frameskip.frame_ready = 0;
908 }
909
910 vout_update();
3b7b0065 911 if (gpu.state.enhancement_active && !gpu.state.enhancement_was_active)
912 renderer_update_caches(0, 0, 1024, 512, 1);
913 gpu.state.enhancement_was_active = gpu.state.enhancement_active;
5440b88e 914 gpu.state.fb_dirty = 0;
aafcb4dd 915 gpu.state.blanked = 0;
c765eb86 916 renderer_notify_update_lace(1);
5440b88e 917}
918
72e5023f 919void GPUvBlank(int is_vblank, int lcf)
920{
5440b88e 921 int interlace = gpu.state.allow_interlace
61124a6d
PC
922 && (gpu.status & PSX_GPU_STATUS_INTERLACE)
923 && (gpu.status & PSX_GPU_STATUS_DHEIGHT);
5440b88e 924 // interlace doesn't look nice on progressive displays,
925 // so we have this "auto" mode here for games that don't read vram
926 if (gpu.state.allow_interlace == 2
927 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
928 {
929 interlace = 0;
930 }
931 if (interlace || interlace != gpu.state.old_interlace) {
932 gpu.state.old_interlace = interlace;
933
934 if (gpu.cmd_len > 0)
935 flush_cmd_buffer();
936 renderer_flush_queues();
937 renderer_set_interlace(interlace, !lcf);
938 }
939}
940
80bc1426 941void GPUgetScreenInfo(int *y, int *base_hres)
942{
943 *y = gpu.screen.y;
944 *base_hres = gpu.screen.vres;
945 if (gpu.status & PSX_GPU_STATUS_DHEIGHT)
946 *base_hres >>= 1;
947}
948
5440b88e 949#include "../../frontend/plugin_lib.h"
950
951void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
952{
953 gpu.frameskip.set = cbs->frameskip;
954 gpu.frameskip.advice = &cbs->fskip_advice;
5eaa13f1 955 gpu.frameskip.force = &cbs->fskip_force;
5bbe183f 956 gpu.frameskip.dirty = (void *)&cbs->fskip_dirty;
5440b88e 957 gpu.frameskip.active = 0;
958 gpu.frameskip.frame_ready = 1;
959 gpu.state.hcnt = cbs->gpu_hcnt;
960 gpu.state.frame_count = cbs->gpu_frame_count;
961 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
0b02eb77 962 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
5bbe183f 963 if (gpu.state.screen_centering_type != cbs->screen_centering_type
964 || gpu.state.screen_centering_x != cbs->screen_centering_x
965 || gpu.state.screen_centering_y != cbs->screen_centering_y) {
966 gpu.state.screen_centering_type = cbs->screen_centering_type;
967 gpu.state.screen_centering_x = cbs->screen_centering_x;
968 gpu.state.screen_centering_y = cbs->screen_centering_y;
969 update_width();
970 update_height();
971 }
5440b88e 972
9ee0fd5b 973 gpu.mmap = cbs->mmap;
974 gpu.munmap = cbs->munmap;
1328fa32 975 gpu.gpu_state_change = cbs->gpu_state_change;
9ee0fd5b 976
977 // delayed vram mmap
978 if (gpu.vram == NULL)
979 map_vram();
980
5440b88e 981 if (cbs->pl_vout_set_raw_vram)
982 cbs->pl_vout_set_raw_vram(gpu.vram);
983 renderer_set_config(cbs);
984 vout_set_config(cbs);
72e5023f 985}
986
1ab64c54 987// vim:shiftwidth=2:expandtab