cdrom: proper autopause int
[pcsx_rearmed.git] / plugins / gpulib / gpu.c
CommitLineData
1ab64c54 1/*
05740673 2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
1ab64c54
GI
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
d30279e2 11#include <stdio.h>
5bbe183f 12#include <stdlib.h>
1ab64c54 13#include <string.h>
12367ad0 14#include <stdlib.h> /* for calloc */
15
56f08d83 16#include "gpu.h"
1328fa32 17#include "../../libpcsxcore/gpu.h" // meh
8f8ade9c 18#include "../../frontend/plugin_lib.h"
1ab64c54 19
8f8ade9c 20#ifndef ARRAY_SIZE
1ab64c54 21#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
8f8ade9c 22#endif
8f5f2dd5 23#ifdef __GNUC__
d30279e2 24#define unlikely(x) __builtin_expect((x), 0)
8f5f2dd5 25#define preload __builtin_prefetch
8dd855cd 26#define noinline __attribute__((noinline))
8f5f2dd5 27#else
28#define unlikely(x)
29#define preload(...)
30#define noinline
8f5f2dd5 31#endif
1ab64c54 32
deb18d24 33//#define log_io gpu_log
56f08d83 34#define log_io(...)
56f08d83 35
9ee0fd5b 36struct psx_gpu gpu;
1ab64c54 37
48f3d210 38static noinline int do_cmd_buffer(uint32_t *data, int count);
05740673 39static void finish_vram_transfer(int is_read);
48f3d210 40
41static noinline void do_cmd_reset(void)
42{
c765eb86
JW
43 renderer_sync();
44
48f3d210 45 if (unlikely(gpu.cmd_len > 0))
46 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
48f3d210 47 gpu.cmd_len = 0;
05740673 48
49 if (unlikely(gpu.dma.h > 0))
50 finish_vram_transfer(gpu.dma_start.is_read);
48f3d210 51 gpu.dma.h = 0;
52}
53
6e9bdaef 54static noinline void do_reset(void)
1ab64c54 55{
7841712d 56 unsigned int i;
5b568098 57
48f3d210 58 do_cmd_reset();
59
6e9bdaef 60 memset(gpu.regs, 0, sizeof(gpu.regs));
48f3d210 61 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
62 gpu.ex_regs[i] = (0xe0 + i) << 24;
61124a6d 63 gpu.status = 0x14802000;
6e9bdaef 64 gpu.gp0 = 0;
fc84f618 65 gpu.regs[3] = 1;
6e9bdaef 66 gpu.screen.hres = gpu.screen.w = 256;
fc84f618 67 gpu.screen.vres = gpu.screen.h = 240;
5bbe183f 68 gpu.screen.x = gpu.screen.y = 0;
01ff3105 69 renderer_sync_ecmds(gpu.ex_regs);
3b7b0065 70 renderer_notify_res_change();
1ab64c54
GI
71}
72
8dd855cd 73static noinline void update_width(void)
74{
5bbe183f 75 static const short hres_all[8] = { 256, 368, 320, 368, 512, 368, 640, 368 };
76 static const uint8_t hdivs[8] = { 10, 7, 8, 7, 5, 7, 4, 7 };
77 uint8_t hdiv = hdivs[(gpu.status >> 16) & 7];
78 int hres = hres_all[(gpu.status >> 16) & 7];
79 int pal = gpu.status & PSX_GPU_STATUS_PAL;
8dd855cd 80 int sw = gpu.screen.x2 - gpu.screen.x1;
b3ff74ba 81 int type = gpu.state.screen_centering_type;
5bbe183f 82 int x = 0, x_auto;
b3ff74ba 83 if (type == C_AUTO)
84 type = gpu.state.screen_centering_type_default;
5bbe183f 85 if (sw <= 0)
86 /* nothing displayed? */;
87 else {
88 int s = pal ? 656 : 608; // or 600? pal is just a guess
89 x = (gpu.screen.x1 - s) / hdiv;
90 x = (x + 1) & ~1; // blitter limitation
91 sw /= hdiv;
92 sw = (sw + 2) & ~3; // according to nocash
b3ff74ba 93 switch (type) {
8f8ade9c 94 case C_INGAME:
5bbe183f 95 break;
8f8ade9c 96 case C_MANUAL:
5bbe183f 97 x = gpu.state.screen_centering_x;
98 break;
99 default:
100 // correct if slightly miscentered
101 x_auto = (hres - sw) / 2 & ~3;
102 if ((uint32_t)x_auto <= 8u && abs(x) < 24)
103 x = x_auto;
104 }
105 if (x + sw > hres)
106 sw = hres - x;
107 // .x range check is done in vout_update()
108 }
109 // reduce the unpleasant right border that a few games have
110 if (gpu.state.screen_centering_type == 0
111 && x <= 4 && hres - (x + sw) >= 4)
112 hres -= 4;
113 gpu.screen.x = x;
114 gpu.screen.w = sw;
115 gpu.screen.hres = hres;
116 gpu.state.dims_changed = 1;
117 //printf("xx %d %d -> %2d, %d / %d\n",
118 // gpu.screen.x1, gpu.screen.x2, x, sw, hres);
8dd855cd 119}
120
121static noinline void update_height(void)
122{
5bbe183f 123 int pal = gpu.status & PSX_GPU_STATUS_PAL;
124 int dheight = gpu.status & PSX_GPU_STATUS_DHEIGHT;
125 int y = gpu.screen.y1 - (pal ? 39 : 16); // 39 for spyro
8dd855cd 126 int sh = gpu.screen.y2 - gpu.screen.y1;
5bbe183f 127 int center_tol = 16;
128 int vres = 240;
129
130 if (pal && (sh > 240 || gpu.screen.vres == 256))
131 vres = 256;
132 if (dheight)
133 y *= 2, sh *= 2, vres *= 2, center_tol *= 2;
134 if (sh <= 0)
135 /* nothing displayed? */;
136 else {
137 switch (gpu.state.screen_centering_type) {
8f8ade9c 138 case C_INGAME:
139 break;
140 case C_BORDERLESS:
141 y = 0;
5bbe183f 142 break;
8f8ade9c 143 case C_MANUAL:
5bbe183f 144 y = gpu.state.screen_centering_y;
145 break;
146 default:
147 // correct if slightly miscentered
148 if ((uint32_t)(vres - sh) <= 1 && abs(y) <= center_tol)
149 y = 0;
150 }
151 if (y + sh > vres)
152 sh = vres - y;
153 }
154 gpu.screen.y = y;
8dd855cd 155 gpu.screen.h = sh;
5bbe183f 156 gpu.screen.vres = vres;
157 gpu.state.dims_changed = 1;
158 //printf("yy %d %d -> %d, %d / %d\n",
159 // gpu.screen.y1, gpu.screen.y2, y, sh, vres);
8dd855cd 160}
161
fc84f618 162static noinline void decide_frameskip(void)
163{
5eaa13f1
A
164 *gpu.frameskip.dirty = 1;
165
9fe27e25 166 if (gpu.frameskip.active)
167 gpu.frameskip.cnt++;
168 else {
169 gpu.frameskip.cnt = 0;
170 gpu.frameskip.frame_ready = 1;
171 }
fc84f618 172
5eaa13f1
A
173 if (*gpu.frameskip.force)
174 gpu.frameskip.active = 1;
175 else if (!gpu.frameskip.active && *gpu.frameskip.advice)
9fe27e25 176 gpu.frameskip.active = 1;
177 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
fc84f618 178 gpu.frameskip.active = 1;
179 else
180 gpu.frameskip.active = 0;
fbb4bfff 181
182 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
183 int dummy;
184 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy);
185 gpu.frameskip.pending_fill[0] = 0;
186 }
fc84f618 187}
188
b243416b 189static noinline int decide_frameskip_allow(uint32_t cmd_e3)
9fe27e25 190{
191 // no frameskip if it decides to draw to display area,
192 // but not for interlace since it'll most likely always do that
193 uint32_t x = cmd_e3 & 0x3ff;
194 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
61124a6d 195 gpu.frameskip.allow = (gpu.status & PSX_GPU_STATUS_INTERLACE) ||
5bbe183f 196 (uint32_t)(x - gpu.screen.src_x) >= (uint32_t)gpu.screen.w ||
197 (uint32_t)(y - gpu.screen.src_y) >= (uint32_t)gpu.screen.h;
b243416b 198 return gpu.frameskip.allow;
9fe27e25 199}
200
01ff3105 201static void flush_cmd_buffer(void);
202
6e9bdaef 203static noinline void get_gpu_info(uint32_t data)
204{
01ff3105 205 if (unlikely(gpu.cmd_len > 0))
206 flush_cmd_buffer();
6e9bdaef 207 switch (data & 0x0f) {
208 case 0x02:
209 case 0x03:
210 case 0x04:
6e9bdaef 211 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
212 break;
08b33377 213 case 0x05:
214 gpu.gp0 = gpu.ex_regs[5] & 0x3fffff;
6e9bdaef 215 break;
216 case 0x07:
217 gpu.gp0 = 2;
218 break;
219 default:
08b33377 220 // gpu.gp0 unchanged
6e9bdaef 221 break;
222 }
223}
224
9ee0fd5b 225// double, for overdraw guard
12367ad0 226#define VRAM_SIZE ((1024 * 512 * 2 * 2) + 4096)
227
228// Minimum 16-byte VRAM alignment needed by gpu_unai's pixel-skipping
229// renderer/downscaler it uses in high res modes:
230#ifdef GCW_ZERO
231 // On GCW platform (MIPS), align to 8192 bytes (1 TLB entry) to reduce # of
232 // fills. (Will change this value if it ever gets large page support)
233 #define VRAM_ALIGN 8192
234#else
235 #define VRAM_ALIGN 16
236#endif
237
238// vram ptr received from mmap/malloc/alloc (will deallocate using this)
239static uint16_t *vram_ptr_orig = NULL;
9ee0fd5b 240
e34ef5ac 241#ifndef GPULIB_USE_MMAP
242# ifdef __linux__
243# define GPULIB_USE_MMAP 1
244# else
245# define GPULIB_USE_MMAP 0
246# endif
247#endif
9ee0fd5b 248static int map_vram(void)
249{
e34ef5ac 250#if GPULIB_USE_MMAP
12367ad0 251 gpu.vram = vram_ptr_orig = gpu.mmap(VRAM_SIZE + (VRAM_ALIGN-1));
e34ef5ac 252#else
253 gpu.vram = vram_ptr_orig = calloc(VRAM_SIZE + (VRAM_ALIGN-1), 1);
254#endif
255 if (gpu.vram != NULL && gpu.vram != (void *)(intptr_t)-1) {
256 // 4kb guard in front
12367ad0 257 gpu.vram += (4096 / 2);
e34ef5ac 258 // Align
259 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
9ee0fd5b 260 return 0;
261 }
262 else {
263 fprintf(stderr, "could not map vram, expect crashes\n");
264 return -1;
265 }
266}
267
6e9bdaef 268long GPUinit(void)
269{
9394ada5 270 int ret;
271 ret = vout_init();
272 ret |= renderer_init();
273
3b7b0065 274 memset(&gpu.state, 0, sizeof(gpu.state));
275 memset(&gpu.frameskip, 0, sizeof(gpu.frameskip));
276 gpu.zero = 0;
3ece2f0c 277 gpu.state.frame_count = &gpu.zero;
deb18d24 278 gpu.state.hcnt = &gpu.zero;
48f3d210 279 gpu.cmd_len = 0;
9394ada5 280 do_reset();
48f3d210 281
12367ad0 282 /*if (gpu.mmap != NULL) {
9ee0fd5b 283 if (map_vram() != 0)
284 ret = -1;
12367ad0 285 }*/
6e9bdaef 286 return ret;
287}
288
289long GPUshutdown(void)
290{
9ee0fd5b 291 long ret;
292
e929dec5 293 renderer_finish();
9ee0fd5b 294 ret = vout_finish();
12367ad0 295
296 if (vram_ptr_orig != NULL) {
e34ef5ac 297#if GPULIB_USE_MMAP
12367ad0 298 gpu.munmap(vram_ptr_orig, VRAM_SIZE);
299#else
300 free(vram_ptr_orig);
301#endif
9ee0fd5b 302 }
12367ad0 303 vram_ptr_orig = gpu.vram = NULL;
9ee0fd5b 304
305 return ret;
6e9bdaef 306}
307
1ab64c54
GI
308void GPUwriteStatus(uint32_t data)
309{
1ab64c54 310 uint32_t cmd = data >> 24;
9a864a8f 311 int src_x, src_y;
1ab64c54 312
fc84f618 313 if (cmd < ARRAY_SIZE(gpu.regs)) {
48f3d210 314 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
fc84f618 315 return;
8dd855cd 316 gpu.regs[cmd] = data;
fc84f618 317 }
318
319 gpu.state.fb_dirty = 1;
8dd855cd 320
321 switch (cmd) {
1ab64c54 322 case 0x00:
6e9bdaef 323 do_reset();
1ab64c54 324 break;
48f3d210 325 case 0x01:
326 do_cmd_reset();
327 break;
1ab64c54 328 case 0x03:
5bbe183f 329 if (data & 1) {
61124a6d 330 gpu.status |= PSX_GPU_STATUS_BLANKING;
5bbe183f 331 gpu.state.dims_changed = 1; // for hud clearing
332 }
61124a6d
PC
333 else
334 gpu.status &= ~PSX_GPU_STATUS_BLANKING;
1ab64c54
GI
335 break;
336 case 0x04:
61124a6d
PC
337 gpu.status &= ~PSX_GPU_STATUS_DMA_MASK;
338 gpu.status |= PSX_GPU_STATUS_DMA(data & 3);
1ab64c54
GI
339 break;
340 case 0x05:
9a864a8f 341 src_x = data & 0x3ff; src_y = (data >> 10) & 0x1ff;
342 if (src_x != gpu.screen.src_x || src_y != gpu.screen.src_y) {
343 gpu.screen.src_x = src_x;
344 gpu.screen.src_y = src_y;
345 renderer_notify_scanout_change(src_x, src_y);
346 if (gpu.frameskip.set) {
347 decide_frameskip_allow(gpu.ex_regs[3]);
348 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
349 decide_frameskip();
350 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
351 }
9fe27e25 352 }
fb4c6fba 353 }
1ab64c54 354 break;
8dd855cd 355 case 0x06:
356 gpu.screen.x1 = data & 0xfff;
357 gpu.screen.x2 = (data >> 12) & 0xfff;
358 update_width();
359 break;
1ab64c54
GI
360 case 0x07:
361 gpu.screen.y1 = data & 0x3ff;
362 gpu.screen.y2 = (data >> 10) & 0x3ff;
8dd855cd 363 update_height();
1ab64c54
GI
364 break;
365 case 0x08:
61124a6d 366 gpu.status = (gpu.status & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
8dd855cd 367 update_width();
368 update_height();
e929dec5 369 renderer_notify_res_change();
1ab64c54 370 break;
deb18d24 371 default:
372 if ((cmd & 0xf0) == 0x10)
373 get_gpu_info(data);
6e9bdaef 374 break;
1ab64c54 375 }
7890a708 376
377#ifdef GPUwriteStatus_ext
378 GPUwriteStatus_ext(data);
379#endif
1ab64c54
GI
380}
381
56f08d83 382const unsigned char cmd_lengths[256] =
1ab64c54 383{
d30279e2
GI
384 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
385 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
386 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
387 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
652c6b8b 388 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
389 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
390 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
d30279e2 391 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
36da9c13 392 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 80
393 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
394 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // a0
395 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
396 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // c0
397 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
d30279e2
GI
398 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
399 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
400};
401
d30279e2
GI
402#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
403
36da9c13 404static void cpy_msb(uint16_t *dst, const uint16_t *src, int l, uint16_t msb)
405{
406 int i;
407 for (i = 0; i < l; i++)
408 dst[i] = src[i] | msb;
409}
410
411static inline void do_vram_line(int x, int y, uint16_t *mem, int l,
412 int is_read, uint16_t msb)
1ab64c54 413{
d30279e2 414 uint16_t *vram = VRAM_MEM_XY(x, y);
36da9c13 415 if (unlikely(is_read))
d30279e2 416 memcpy(mem, vram, l * 2);
36da9c13 417 else if (unlikely(msb))
418 cpy_msb(vram, mem, l, msb);
d30279e2
GI
419 else
420 memcpy(vram, mem, l * 2);
421}
422
423static int do_vram_io(uint32_t *data, int count, int is_read)
424{
425 int count_initial = count;
36da9c13 426 uint16_t msb = gpu.ex_regs[6] << 15;
d30279e2
GI
427 uint16_t *sdata = (uint16_t *)data;
428 int x = gpu.dma.x, y = gpu.dma.y;
429 int w = gpu.dma.w, h = gpu.dma.h;
ddd56f6e 430 int o = gpu.dma.offset;
d30279e2
GI
431 int l;
432 count *= 2; // operate in 16bpp pixels
433
c765eb86
JW
434 renderer_sync();
435
d30279e2
GI
436 if (gpu.dma.offset) {
437 l = w - gpu.dma.offset;
ddd56f6e 438 if (count < l)
d30279e2 439 l = count;
ddd56f6e 440
36da9c13 441 do_vram_line(x + o, y, sdata, l, is_read, msb);
ddd56f6e 442
443 if (o + l < w)
444 o += l;
445 else {
446 o = 0;
447 y++;
448 h--;
449 }
d30279e2
GI
450 sdata += l;
451 count -= l;
d30279e2
GI
452 }
453
454 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
455 y &= 511;
36da9c13 456 do_vram_line(x, y, sdata, w, is_read, msb);
d30279e2
GI
457 }
458
05740673 459 if (h > 0) {
460 if (count > 0) {
461 y &= 511;
36da9c13 462 do_vram_line(x, y, sdata, count, is_read, msb);
05740673 463 o = count;
464 count = 0;
465 }
d30279e2 466 }
05740673 467 else
468 finish_vram_transfer(is_read);
d30279e2
GI
469 gpu.dma.y = y;
470 gpu.dma.h = h;
ddd56f6e 471 gpu.dma.offset = o;
d30279e2 472
6e9bdaef 473 return count_initial - count / 2;
d30279e2
GI
474}
475
476static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
477{
ddd56f6e 478 if (gpu.dma.h)
479 log_anomaly("start_vram_transfer while old unfinished\n");
480
5440b88e 481 gpu.dma.x = pos_word & 0x3ff;
482 gpu.dma.y = (pos_word >> 16) & 0x1ff;
48f3d210 483 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
484 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
d30279e2 485 gpu.dma.offset = 0;
05740673 486 gpu.dma.is_read = is_read;
487 gpu.dma_start = gpu.dma;
d30279e2 488
9e146206 489 renderer_flush_queues();
490 if (is_read) {
61124a6d 491 gpu.status |= PSX_GPU_STATUS_IMG;
9e146206 492 // XXX: wrong for width 1
495d603c 493 gpu.gp0 = LE32TOH(*(uint32_t *) VRAM_MEM_XY(gpu.dma.x, gpu.dma.y));
5440b88e 494 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
9e146206 495 }
d30279e2 496
6e9bdaef 497 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
498 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
1328fa32 499 if (gpu.gpu_state_change)
500 gpu.gpu_state_change(PGS_VRAM_TRANSFER_START);
d30279e2
GI
501}
502
05740673 503static void finish_vram_transfer(int is_read)
504{
505 if (is_read)
61124a6d 506 gpu.status &= ~PSX_GPU_STATUS_IMG;
b30fba56 507 else {
508 gpu.state.fb_dirty = 1;
05740673 509 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
3b7b0065 510 gpu.dma_start.w, gpu.dma_start.h, 0);
b30fba56 511 }
1328fa32 512 if (gpu.gpu_state_change)
513 gpu.gpu_state_change(PGS_VRAM_TRANSFER_END);
05740673 514}
515
36da9c13 516static void do_vram_copy(const uint32_t *params)
517{
518 const uint32_t sx = LE32TOH(params[0]) & 0x3FF;
519 const uint32_t sy = (LE32TOH(params[0]) >> 16) & 0x1FF;
520 const uint32_t dx = LE32TOH(params[1]) & 0x3FF;
521 const uint32_t dy = (LE32TOH(params[1]) >> 16) & 0x1FF;
522 uint32_t w = ((LE32TOH(params[2]) - 1) & 0x3FF) + 1;
523 uint32_t h = (((LE32TOH(params[2]) >> 16) - 1) & 0x1FF) + 1;
524 uint16_t msb = gpu.ex_regs[6] << 15;
525 uint16_t lbuf[128];
526 uint32_t x, y;
527
528 if (sx == dx && sy == dy && msb == 0)
529 return;
530
531 renderer_flush_queues();
532
533 if (unlikely((sx < dx && dx < sx + w) || sx + w > 1024 || dx + w > 1024 || msb))
534 {
535 for (y = 0; y < h; y++)
536 {
537 const uint16_t *src = VRAM_MEM_XY(0, (sy + y) & 0x1ff);
538 uint16_t *dst = VRAM_MEM_XY(0, (dy + y) & 0x1ff);
539 for (x = 0; x < w; x += ARRAY_SIZE(lbuf))
540 {
541 uint32_t x1, w1 = w - x;
542 if (w1 > ARRAY_SIZE(lbuf))
543 w1 = ARRAY_SIZE(lbuf);
544 for (x1 = 0; x1 < w1; x1++)
545 lbuf[x1] = src[(sx + x + x1) & 0x3ff];
546 for (x1 = 0; x1 < w1; x1++)
547 dst[(dx + x + x1) & 0x3ff] = lbuf[x1] | msb;
548 }
549 }
550 }
551 else
552 {
553 uint32_t sy1 = sy, dy1 = dy;
554 for (y = 0; y < h; y++, sy1++, dy1++)
555 memcpy(VRAM_MEM_XY(dx, dy1 & 0x1ff), VRAM_MEM_XY(sx, sy1 & 0x1ff), w * 2);
556 }
557
558 renderer_update_caches(dx, dy, w, h, 0);
559}
560
b243416b 561static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
562{
97e07db9 563 int cmd = 0, pos = 0, len, dummy, v;
b243416b 564 int skip = 1;
565
fbb4bfff 566 gpu.frameskip.pending_fill[0] = 0;
567
b243416b 568 while (pos < count && skip) {
569 uint32_t *list = data + pos;
db215a72 570 cmd = LE32TOH(list[0]) >> 24;
b243416b 571 len = 1 + cmd_lengths[cmd];
572
97e07db9 573 switch (cmd) {
574 case 0x02:
db215a72 575 if ((LE32TOH(list[2]) & 0x3ff) > gpu.screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu.screen.h)
97e07db9 576 // clearing something large, don't skip
577 do_cmd_list(list, 3, &dummy);
578 else
579 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
580 break;
581 case 0x24 ... 0x27:
582 case 0x2c ... 0x2f:
583 case 0x34 ... 0x37:
584 case 0x3c ... 0x3f:
585 gpu.ex_regs[1] &= ~0x1ff;
db215a72 586 gpu.ex_regs[1] |= LE32TOH(list[4 + ((cmd >> 4) & 1)]) & 0x1ff;
97e07db9 587 break;
588 case 0x48 ... 0x4F:
589 for (v = 3; pos + v < count; v++)
590 {
db215a72 591 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
97e07db9 592 break;
593 }
594 len += v - 3;
595 break;
596 case 0x58 ... 0x5F:
597 for (v = 4; pos + v < count; v += 2)
598 {
db215a72 599 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
97e07db9 600 break;
601 }
602 len += v - 4;
603 break;
604 default:
605 if (cmd == 0xe3)
db215a72 606 skip = decide_frameskip_allow(LE32TOH(list[0]));
97e07db9 607 if ((cmd & 0xf8) == 0xe0)
db215a72 608 gpu.ex_regs[cmd & 7] = LE32TOH(list[0]);
97e07db9 609 break;
b243416b 610 }
b243416b 611
612 if (pos + len > count) {
613 cmd = -1;
614 break; // incomplete cmd
615 }
36da9c13 616 if (0x80 <= cmd && cmd <= 0xdf)
b243416b 617 break; // image i/o
97e07db9 618
b243416b 619 pos += len;
620 }
621
622 renderer_sync_ecmds(gpu.ex_regs);
623 *last_cmd = cmd;
624 return pos;
625}
626
48f3d210 627static noinline int do_cmd_buffer(uint32_t *data, int count)
d30279e2 628{
b243416b 629 int cmd, pos;
630 uint32_t old_e3 = gpu.ex_regs[3];
fc84f618 631 int vram_dirty = 0;
d30279e2 632
d30279e2 633 // process buffer
b243416b 634 for (pos = 0; pos < count; )
d30279e2 635 {
b243416b 636 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
637 vram_dirty = 1;
d30279e2 638 pos += do_vram_io(data + pos, count - pos, 0);
ddd56f6e 639 if (pos == count)
640 break;
d30279e2
GI
641 }
642
db215a72 643 cmd = LE32TOH(data[pos]) >> 24;
97e07db9 644 if (0xa0 <= cmd && cmd <= 0xdf) {
79573c20
DS
645 if (unlikely((pos+2) >= count)) {
646 // incomplete vram write/read cmd, can't consume yet
647 cmd = -1;
648 break;
649 }
650
d30279e2 651 // consume vram write/read cmd
db215a72 652 start_vram_transfer(LE32TOH(data[pos + 1]), LE32TOH(data[pos + 2]), (cmd & 0xe0) == 0xc0);
b243416b 653 pos += 3;
654 continue;
d30279e2 655 }
36da9c13 656 else if ((cmd & 0xe0) == 0x80) {
657 if (unlikely((pos+3) >= count)) {
658 cmd = -1; // incomplete cmd, can't consume yet
659 break;
660 }
661 do_vram_copy(data + pos + 1);
b30fba56 662 vram_dirty = 1;
36da9c13 663 pos += 4;
664 continue;
665 }
b243416b 666
1e07f71d 667 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
db215a72 668 if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0))
b243416b 669 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
670 else {
671 pos += do_cmd_list(data + pos, count - pos, &cmd);
672 vram_dirty = 1;
673 }
674
675 if (cmd == -1)
676 // incomplete cmd
ddd56f6e 677 break;
d30279e2 678 }
ddd56f6e 679
61124a6d
PC
680 gpu.status &= ~0x1fff;
681 gpu.status |= gpu.ex_regs[1] & 0x7ff;
682 gpu.status |= (gpu.ex_regs[6] & 3) << 11;
a3a9f519 683
fc84f618 684 gpu.state.fb_dirty |= vram_dirty;
685
b243416b 686 if (old_e3 != gpu.ex_regs[3])
687 decide_frameskip_allow(gpu.ex_regs[3]);
688
ddd56f6e 689 return count - pos;
d30279e2
GI
690}
691
1328fa32 692static noinline void flush_cmd_buffer(void)
d30279e2 693{
48f3d210 694 int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
d30279e2
GI
695 if (left > 0)
696 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
1328fa32 697 if (left != gpu.cmd_len) {
698 if (!gpu.dma.h && gpu.gpu_state_change)
699 gpu.gpu_state_change(PGS_PRIMITIVE_START);
700 gpu.cmd_len = left;
701 }
1ab64c54
GI
702}
703
704void GPUwriteDataMem(uint32_t *mem, int count)
705{
d30279e2
GI
706 int left;
707
56f08d83 708 log_io("gpu_dma_write %p %d\n", mem, count);
709
d30279e2
GI
710 if (unlikely(gpu.cmd_len > 0))
711 flush_cmd_buffer();
56f08d83 712
48f3d210 713 left = do_cmd_buffer(mem, count);
d30279e2 714 if (left)
56f08d83 715 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
1ab64c54
GI
716}
717
d30279e2 718void GPUwriteData(uint32_t data)
1ab64c54 719{
56f08d83 720 log_io("gpu_write %08x\n", data);
db215a72 721 gpu.cmd_buffer[gpu.cmd_len++] = HTOLE32(data);
d30279e2
GI
722 if (gpu.cmd_len >= CMD_BUFFER_LEN)
723 flush_cmd_buffer();
1ab64c54
GI
724}
725
fae38d7a 726long GPUdmaChain(uint32_t *rambase, uint32_t start_addr, uint32_t *progress_addr)
1ab64c54 727{
09159d99 728 uint32_t addr, *list, ld_addr = 0;
ddd56f6e 729 int len, left, count;
1c72b1c2 730 long cpu_cycles = 0;
d30279e2 731
8f5f2dd5 732 preload(rambase + (start_addr & 0x1fffff) / 4);
733
d30279e2
GI
734 if (unlikely(gpu.cmd_len > 0))
735 flush_cmd_buffer();
736
56f08d83 737 log_io("gpu_dma_chain\n");
ddd56f6e 738 addr = start_addr & 0xffffff;
09159d99 739 for (count = 0; (addr & 0x800000) == 0; count++)
ddd56f6e 740 {
ddd56f6e 741 list = rambase + (addr & 0x1fffff) / 4;
db215a72
PC
742 len = LE32TOH(list[0]) >> 24;
743 addr = LE32TOH(list[0]) & 0xffffff;
8f5f2dd5 744 preload(rambase + (addr & 0x1fffff) / 4);
745
1c72b1c2 746 cpu_cycles += 10;
747 if (len > 0)
748 cpu_cycles += 5 + len;
deb18d24 749
a4e249a1 750 log_io(".chain %08lx #%d+%d\n",
751 (long)(list - rambase) * 4, len, gpu.cmd_len);
752 if (unlikely(gpu.cmd_len > 0)) {
81ff42e1 753 if (gpu.cmd_len + len > ARRAY_SIZE(gpu.cmd_buffer)) {
754 log_anomaly("cmd_buffer overflow, likely garbage commands\n");
755 gpu.cmd_len = 0;
756 }
a4e249a1 757 memcpy(gpu.cmd_buffer + gpu.cmd_len, list + 1, len * 4);
758 gpu.cmd_len += len;
759 flush_cmd_buffer();
760 continue;
761 }
ddd56f6e 762
56f08d83 763 if (len) {
48f3d210 764 left = do_cmd_buffer(list + 1, len);
a4e249a1 765 if (left) {
766 memcpy(gpu.cmd_buffer, list + 1 + len - left, left * 4);
767 gpu.cmd_len = left;
768 log_anomaly("GPUdmaChain: %d/%d words left\n", left, len);
769 }
56f08d83 770 }
ddd56f6e 771
fae38d7a 772 if (progress_addr) {
773 *progress_addr = addr;
774 break;
775 }
09159d99 776 #define LD_THRESHOLD (8*1024)
777 if (count >= LD_THRESHOLD) {
778 if (count == LD_THRESHOLD) {
779 ld_addr = addr;
780 continue;
781 }
782
783 // loop detection marker
784 // (bit23 set causes DMA error on real machine, so
785 // unlikely to be ever set by the game)
db215a72 786 list[0] |= HTOLE32(0x800000);
09159d99 787 }
ddd56f6e 788 }
789
09159d99 790 if (ld_addr != 0) {
791 // remove loop detection markers
792 count -= LD_THRESHOLD + 2;
793 addr = ld_addr & 0x1fffff;
794 while (count-- > 0) {
795 list = rambase + addr / 4;
db215a72
PC
796 addr = LE32TOH(list[0]) & 0x1fffff;
797 list[0] &= HTOLE32(~0x800000);
09159d99 798 }
d30279e2 799 }
09159d99 800
3ece2f0c 801 gpu.state.last_list.frame = *gpu.state.frame_count;
deb18d24 802 gpu.state.last_list.hcnt = *gpu.state.hcnt;
1c72b1c2 803 gpu.state.last_list.cycles = cpu_cycles;
deb18d24 804 gpu.state.last_list.addr = start_addr;
805
1c72b1c2 806 return cpu_cycles;
1ab64c54
GI
807}
808
d30279e2
GI
809void GPUreadDataMem(uint32_t *mem, int count)
810{
56f08d83 811 log_io("gpu_dma_read %p %d\n", mem, count);
812
d30279e2
GI
813 if (unlikely(gpu.cmd_len > 0))
814 flush_cmd_buffer();
56f08d83 815
d30279e2
GI
816 if (gpu.dma.h)
817 do_vram_io(mem, count, 1);
818}
819
820uint32_t GPUreadData(void)
821{
9e146206 822 uint32_t ret;
56f08d83 823
824 if (unlikely(gpu.cmd_len > 0))
825 flush_cmd_buffer();
826
9e146206 827 ret = gpu.gp0;
495d603c
PC
828 if (gpu.dma.h) {
829 ret = HTOLE32(ret);
9e146206 830 do_vram_io(&ret, 1, 1);
495d603c
PC
831 ret = LE32TOH(ret);
832 }
56f08d83 833
9e146206 834 log_io("gpu_read %08x\n", ret);
835 return ret;
d30279e2
GI
836}
837
838uint32_t GPUreadStatus(void)
839{
ddd56f6e 840 uint32_t ret;
56f08d83 841
d30279e2
GI
842 if (unlikely(gpu.cmd_len > 0))
843 flush_cmd_buffer();
844
61124a6d 845 ret = gpu.status;
ddd56f6e 846 log_io("gpu_read_status %08x\n", ret);
847 return ret;
d30279e2
GI
848}
849
096ec49b 850struct GPUFreeze
1ab64c54
GI
851{
852 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
853 uint32_t ulStatus; // current gpu status
854 uint32_t ulControl[256]; // latest control register values
855 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
096ec49b 856};
1ab64c54 857
096ec49b 858long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
1ab64c54 859{
fc84f618 860 int i;
861
1ab64c54
GI
862 switch (type) {
863 case 1: // save
d30279e2
GI
864 if (gpu.cmd_len > 0)
865 flush_cmd_buffer();
c765eb86
JW
866
867 renderer_sync();
9ee0fd5b 868 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
1ab64c54 869 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
6e9bdaef 870 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
61124a6d 871 freeze->ulStatus = gpu.status;
1ab64c54
GI
872 break;
873 case 0: // load
c765eb86 874 renderer_sync();
9ee0fd5b 875 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
1ab64c54 876 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
6e9bdaef 877 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
61124a6d 878 gpu.status = freeze->ulStatus;
3d47ef17 879 gpu.cmd_len = 0;
fc84f618 880 for (i = 8; i > 0; i--) {
881 gpu.regs[i] ^= 1; // avoid reg change detection
882 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
883 }
5b745e5b 884 renderer_sync_ecmds(gpu.ex_regs);
9a864a8f 885 renderer_update_caches(0, 0, 1024, 512, 0);
1ab64c54
GI
886 break;
887 }
888
889 return 1;
890}
891
5440b88e 892void GPUupdateLace(void)
893{
894 if (gpu.cmd_len > 0)
895 flush_cmd_buffer();
896 renderer_flush_queues();
897
7a20a6d0 898#ifndef RAW_FB_DISPLAY
61124a6d 899 if (gpu.status & PSX_GPU_STATUS_BLANKING) {
aafcb4dd 900 if (!gpu.state.blanked) {
901 vout_blank();
902 gpu.state.blanked = 1;
903 gpu.state.fb_dirty = 1;
904 }
905 return;
906 }
907
c765eb86
JW
908 renderer_notify_update_lace(0);
909
aafcb4dd 910 if (!gpu.state.fb_dirty)
5440b88e 911 return;
7a20a6d0 912#endif
5440b88e 913
914 if (gpu.frameskip.set) {
915 if (!gpu.frameskip.frame_ready) {
916 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
917 return;
918 gpu.frameskip.active = 0;
919 }
920 gpu.frameskip.frame_ready = 0;
921 }
922
923 vout_update();
3b7b0065 924 if (gpu.state.enhancement_active && !gpu.state.enhancement_was_active)
925 renderer_update_caches(0, 0, 1024, 512, 1);
926 gpu.state.enhancement_was_active = gpu.state.enhancement_active;
5440b88e 927 gpu.state.fb_dirty = 0;
aafcb4dd 928 gpu.state.blanked = 0;
c765eb86 929 renderer_notify_update_lace(1);
5440b88e 930}
931
72e5023f 932void GPUvBlank(int is_vblank, int lcf)
933{
5440b88e 934 int interlace = gpu.state.allow_interlace
61124a6d
PC
935 && (gpu.status & PSX_GPU_STATUS_INTERLACE)
936 && (gpu.status & PSX_GPU_STATUS_DHEIGHT);
5440b88e 937 // interlace doesn't look nice on progressive displays,
938 // so we have this "auto" mode here for games that don't read vram
939 if (gpu.state.allow_interlace == 2
940 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
941 {
942 interlace = 0;
943 }
944 if (interlace || interlace != gpu.state.old_interlace) {
945 gpu.state.old_interlace = interlace;
946
947 if (gpu.cmd_len > 0)
948 flush_cmd_buffer();
949 renderer_flush_queues();
950 renderer_set_interlace(interlace, !lcf);
951 }
952}
953
80bc1426 954void GPUgetScreenInfo(int *y, int *base_hres)
955{
956 *y = gpu.screen.y;
957 *base_hres = gpu.screen.vres;
958 if (gpu.status & PSX_GPU_STATUS_DHEIGHT)
959 *base_hres >>= 1;
960}
961
5440b88e 962void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
963{
964 gpu.frameskip.set = cbs->frameskip;
965 gpu.frameskip.advice = &cbs->fskip_advice;
5eaa13f1 966 gpu.frameskip.force = &cbs->fskip_force;
5bbe183f 967 gpu.frameskip.dirty = (void *)&cbs->fskip_dirty;
5440b88e 968 gpu.frameskip.active = 0;
969 gpu.frameskip.frame_ready = 1;
970 gpu.state.hcnt = cbs->gpu_hcnt;
971 gpu.state.frame_count = cbs->gpu_frame_count;
972 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
0b02eb77 973 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
b3ff74ba 974 gpu.state.screen_centering_type_default = cbs->screen_centering_type_default;
5bbe183f 975 if (gpu.state.screen_centering_type != cbs->screen_centering_type
976 || gpu.state.screen_centering_x != cbs->screen_centering_x
977 || gpu.state.screen_centering_y != cbs->screen_centering_y) {
978 gpu.state.screen_centering_type = cbs->screen_centering_type;
979 gpu.state.screen_centering_x = cbs->screen_centering_x;
980 gpu.state.screen_centering_y = cbs->screen_centering_y;
981 update_width();
982 update_height();
983 }
5440b88e 984
9ee0fd5b 985 gpu.mmap = cbs->mmap;
986 gpu.munmap = cbs->munmap;
1328fa32 987 gpu.gpu_state_change = cbs->gpu_state_change;
9ee0fd5b 988
989 // delayed vram mmap
990 if (gpu.vram == NULL)
991 map_vram();
992
5440b88e 993 if (cbs->pl_vout_set_raw_vram)
994 cbs->pl_vout_set_raw_vram(gpu.vram);
995 renderer_set_config(cbs);
996 vout_set_config(cbs);
72e5023f 997}
998
1ab64c54 999// vim:shiftwidth=2:expandtab