psxdma: Fix endian issue in gpuInterrupt()
[pcsx_rearmed.git] / plugins / gpulib / gpu.c
CommitLineData
1ab64c54 1/*
05740673 2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
1ab64c54
GI
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
d30279e2 11#include <stdio.h>
5bbe183f 12#include <stdlib.h>
1ab64c54 13#include <string.h>
12367ad0 14#include <stdlib.h> /* for calloc */
15
56f08d83 16#include "gpu.h"
1328fa32 17#include "../../libpcsxcore/gpu.h" // meh
1ab64c54
GI
18
19#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
8f5f2dd5 20#ifdef __GNUC__
d30279e2 21#define unlikely(x) __builtin_expect((x), 0)
8f5f2dd5 22#define preload __builtin_prefetch
8dd855cd 23#define noinline __attribute__((noinline))
8f5f2dd5 24#else
25#define unlikely(x)
26#define preload(...)
27#define noinline
8f5f2dd5 28#endif
1ab64c54 29
deb18d24 30//#define log_io gpu_log
56f08d83 31#define log_io(...)
56f08d83 32
9ee0fd5b 33struct psx_gpu gpu;
1ab64c54 34
48f3d210 35static noinline int do_cmd_buffer(uint32_t *data, int count);
05740673 36static void finish_vram_transfer(int is_read);
48f3d210 37
38static noinline void do_cmd_reset(void)
39{
c765eb86
JW
40 renderer_sync();
41
48f3d210 42 if (unlikely(gpu.cmd_len > 0))
43 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
48f3d210 44 gpu.cmd_len = 0;
05740673 45
46 if (unlikely(gpu.dma.h > 0))
47 finish_vram_transfer(gpu.dma_start.is_read);
48f3d210 48 gpu.dma.h = 0;
49}
50
6e9bdaef 51static noinline void do_reset(void)
1ab64c54 52{
7841712d 53 unsigned int i;
5b568098 54
48f3d210 55 do_cmd_reset();
56
6e9bdaef 57 memset(gpu.regs, 0, sizeof(gpu.regs));
48f3d210 58 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
59 gpu.ex_regs[i] = (0xe0 + i) << 24;
61124a6d 60 gpu.status = 0x14802000;
6e9bdaef 61 gpu.gp0 = 0;
fc84f618 62 gpu.regs[3] = 1;
6e9bdaef 63 gpu.screen.hres = gpu.screen.w = 256;
fc84f618 64 gpu.screen.vres = gpu.screen.h = 240;
5bbe183f 65 gpu.screen.x = gpu.screen.y = 0;
01ff3105 66 renderer_sync_ecmds(gpu.ex_regs);
3b7b0065 67 renderer_notify_res_change();
1ab64c54
GI
68}
69
8dd855cd 70static noinline void update_width(void)
71{
5bbe183f 72 static const short hres_all[8] = { 256, 368, 320, 368, 512, 368, 640, 368 };
73 static const uint8_t hdivs[8] = { 10, 7, 8, 7, 5, 7, 4, 7 };
74 uint8_t hdiv = hdivs[(gpu.status >> 16) & 7];
75 int hres = hres_all[(gpu.status >> 16) & 7];
76 int pal = gpu.status & PSX_GPU_STATUS_PAL;
8dd855cd 77 int sw = gpu.screen.x2 - gpu.screen.x1;
5bbe183f 78 int x = 0, x_auto;
79 if (sw <= 0)
80 /* nothing displayed? */;
81 else {
82 int s = pal ? 656 : 608; // or 600? pal is just a guess
83 x = (gpu.screen.x1 - s) / hdiv;
84 x = (x + 1) & ~1; // blitter limitation
85 sw /= hdiv;
86 sw = (sw + 2) & ~3; // according to nocash
87 switch (gpu.state.screen_centering_type) {
88 case 1:
89 break;
90 case 2:
91 x = gpu.state.screen_centering_x;
92 break;
93 default:
94 // correct if slightly miscentered
95 x_auto = (hres - sw) / 2 & ~3;
96 if ((uint32_t)x_auto <= 8u && abs(x) < 24)
97 x = x_auto;
98 }
99 if (x + sw > hres)
100 sw = hres - x;
101 // .x range check is done in vout_update()
102 }
103 // reduce the unpleasant right border that a few games have
104 if (gpu.state.screen_centering_type == 0
105 && x <= 4 && hres - (x + sw) >= 4)
106 hres -= 4;
107 gpu.screen.x = x;
108 gpu.screen.w = sw;
109 gpu.screen.hres = hres;
110 gpu.state.dims_changed = 1;
111 //printf("xx %d %d -> %2d, %d / %d\n",
112 // gpu.screen.x1, gpu.screen.x2, x, sw, hres);
8dd855cd 113}
114
115static noinline void update_height(void)
116{
5bbe183f 117 int pal = gpu.status & PSX_GPU_STATUS_PAL;
118 int dheight = gpu.status & PSX_GPU_STATUS_DHEIGHT;
119 int y = gpu.screen.y1 - (pal ? 39 : 16); // 39 for spyro
8dd855cd 120 int sh = gpu.screen.y2 - gpu.screen.y1;
5bbe183f 121 int center_tol = 16;
122 int vres = 240;
123
124 if (pal && (sh > 240 || gpu.screen.vres == 256))
125 vres = 256;
126 if (dheight)
127 y *= 2, sh *= 2, vres *= 2, center_tol *= 2;
128 if (sh <= 0)
129 /* nothing displayed? */;
130 else {
131 switch (gpu.state.screen_centering_type) {
132 case 1:
133 break;
134 case 2:
135 y = gpu.state.screen_centering_y;
136 break;
137 default:
138 // correct if slightly miscentered
139 if ((uint32_t)(vres - sh) <= 1 && abs(y) <= center_tol)
140 y = 0;
141 }
142 if (y + sh > vres)
143 sh = vres - y;
144 }
145 gpu.screen.y = y;
8dd855cd 146 gpu.screen.h = sh;
5bbe183f 147 gpu.screen.vres = vres;
148 gpu.state.dims_changed = 1;
149 //printf("yy %d %d -> %d, %d / %d\n",
150 // gpu.screen.y1, gpu.screen.y2, y, sh, vres);
8dd855cd 151}
152
fc84f618 153static noinline void decide_frameskip(void)
154{
5eaa13f1
A
155 *gpu.frameskip.dirty = 1;
156
9fe27e25 157 if (gpu.frameskip.active)
158 gpu.frameskip.cnt++;
159 else {
160 gpu.frameskip.cnt = 0;
161 gpu.frameskip.frame_ready = 1;
162 }
fc84f618 163
5eaa13f1
A
164 if (*gpu.frameskip.force)
165 gpu.frameskip.active = 1;
166 else if (!gpu.frameskip.active && *gpu.frameskip.advice)
9fe27e25 167 gpu.frameskip.active = 1;
168 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
fc84f618 169 gpu.frameskip.active = 1;
170 else
171 gpu.frameskip.active = 0;
fbb4bfff 172
173 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
174 int dummy;
175 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy);
176 gpu.frameskip.pending_fill[0] = 0;
177 }
fc84f618 178}
179
b243416b 180static noinline int decide_frameskip_allow(uint32_t cmd_e3)
9fe27e25 181{
182 // no frameskip if it decides to draw to display area,
183 // but not for interlace since it'll most likely always do that
184 uint32_t x = cmd_e3 & 0x3ff;
185 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
61124a6d 186 gpu.frameskip.allow = (gpu.status & PSX_GPU_STATUS_INTERLACE) ||
5bbe183f 187 (uint32_t)(x - gpu.screen.src_x) >= (uint32_t)gpu.screen.w ||
188 (uint32_t)(y - gpu.screen.src_y) >= (uint32_t)gpu.screen.h;
b243416b 189 return gpu.frameskip.allow;
9fe27e25 190}
191
01ff3105 192static void flush_cmd_buffer(void);
193
6e9bdaef 194static noinline void get_gpu_info(uint32_t data)
195{
01ff3105 196 if (unlikely(gpu.cmd_len > 0))
197 flush_cmd_buffer();
6e9bdaef 198 switch (data & 0x0f) {
199 case 0x02:
200 case 0x03:
201 case 0x04:
6e9bdaef 202 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
203 break;
08b33377 204 case 0x05:
205 gpu.gp0 = gpu.ex_regs[5] & 0x3fffff;
6e9bdaef 206 break;
207 case 0x07:
208 gpu.gp0 = 2;
209 break;
210 default:
08b33377 211 // gpu.gp0 unchanged
6e9bdaef 212 break;
213 }
214}
215
9ee0fd5b 216// double, for overdraw guard
12367ad0 217#define VRAM_SIZE ((1024 * 512 * 2 * 2) + 4096)
218
219// Minimum 16-byte VRAM alignment needed by gpu_unai's pixel-skipping
220// renderer/downscaler it uses in high res modes:
221#ifdef GCW_ZERO
222 // On GCW platform (MIPS), align to 8192 bytes (1 TLB entry) to reduce # of
223 // fills. (Will change this value if it ever gets large page support)
224 #define VRAM_ALIGN 8192
225#else
226 #define VRAM_ALIGN 16
227#endif
228
229// vram ptr received from mmap/malloc/alloc (will deallocate using this)
230static uint16_t *vram_ptr_orig = NULL;
9ee0fd5b 231
e34ef5ac 232#ifndef GPULIB_USE_MMAP
233# ifdef __linux__
234# define GPULIB_USE_MMAP 1
235# else
236# define GPULIB_USE_MMAP 0
237# endif
238#endif
9ee0fd5b 239static int map_vram(void)
240{
e34ef5ac 241#if GPULIB_USE_MMAP
12367ad0 242 gpu.vram = vram_ptr_orig = gpu.mmap(VRAM_SIZE + (VRAM_ALIGN-1));
e34ef5ac 243#else
244 gpu.vram = vram_ptr_orig = calloc(VRAM_SIZE + (VRAM_ALIGN-1), 1);
245#endif
246 if (gpu.vram != NULL && gpu.vram != (void *)(intptr_t)-1) {
247 // 4kb guard in front
12367ad0 248 gpu.vram += (4096 / 2);
e34ef5ac 249 // Align
250 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
9ee0fd5b 251 return 0;
252 }
253 else {
254 fprintf(stderr, "could not map vram, expect crashes\n");
255 return -1;
256 }
257}
258
6e9bdaef 259long GPUinit(void)
260{
9394ada5 261 int ret;
262 ret = vout_init();
263 ret |= renderer_init();
264
3b7b0065 265 memset(&gpu.state, 0, sizeof(gpu.state));
266 memset(&gpu.frameskip, 0, sizeof(gpu.frameskip));
267 gpu.zero = 0;
3ece2f0c 268 gpu.state.frame_count = &gpu.zero;
deb18d24 269 gpu.state.hcnt = &gpu.zero;
48f3d210 270 gpu.cmd_len = 0;
9394ada5 271 do_reset();
48f3d210 272
12367ad0 273 /*if (gpu.mmap != NULL) {
9ee0fd5b 274 if (map_vram() != 0)
275 ret = -1;
12367ad0 276 }*/
6e9bdaef 277 return ret;
278}
279
280long GPUshutdown(void)
281{
9ee0fd5b 282 long ret;
283
e929dec5 284 renderer_finish();
9ee0fd5b 285 ret = vout_finish();
12367ad0 286
287 if (vram_ptr_orig != NULL) {
e34ef5ac 288#if GPULIB_USE_MMAP
12367ad0 289 gpu.munmap(vram_ptr_orig, VRAM_SIZE);
290#else
291 free(vram_ptr_orig);
292#endif
9ee0fd5b 293 }
12367ad0 294 vram_ptr_orig = gpu.vram = NULL;
9ee0fd5b 295
296 return ret;
6e9bdaef 297}
298
1ab64c54
GI
299void GPUwriteStatus(uint32_t data)
300{
1ab64c54 301 uint32_t cmd = data >> 24;
9a864a8f 302 int src_x, src_y;
1ab64c54 303
fc84f618 304 if (cmd < ARRAY_SIZE(gpu.regs)) {
48f3d210 305 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
fc84f618 306 return;
8dd855cd 307 gpu.regs[cmd] = data;
fc84f618 308 }
309
310 gpu.state.fb_dirty = 1;
8dd855cd 311
312 switch (cmd) {
1ab64c54 313 case 0x00:
6e9bdaef 314 do_reset();
1ab64c54 315 break;
48f3d210 316 case 0x01:
317 do_cmd_reset();
318 break;
1ab64c54 319 case 0x03:
5bbe183f 320 if (data & 1) {
61124a6d 321 gpu.status |= PSX_GPU_STATUS_BLANKING;
5bbe183f 322 gpu.state.dims_changed = 1; // for hud clearing
323 }
61124a6d
PC
324 else
325 gpu.status &= ~PSX_GPU_STATUS_BLANKING;
1ab64c54
GI
326 break;
327 case 0x04:
61124a6d
PC
328 gpu.status &= ~PSX_GPU_STATUS_DMA_MASK;
329 gpu.status |= PSX_GPU_STATUS_DMA(data & 3);
1ab64c54
GI
330 break;
331 case 0x05:
9a864a8f 332 src_x = data & 0x3ff; src_y = (data >> 10) & 0x1ff;
333 if (src_x != gpu.screen.src_x || src_y != gpu.screen.src_y) {
334 gpu.screen.src_x = src_x;
335 gpu.screen.src_y = src_y;
336 renderer_notify_scanout_change(src_x, src_y);
337 if (gpu.frameskip.set) {
338 decide_frameskip_allow(gpu.ex_regs[3]);
339 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
340 decide_frameskip();
341 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
342 }
9fe27e25 343 }
fb4c6fba 344 }
1ab64c54 345 break;
8dd855cd 346 case 0x06:
347 gpu.screen.x1 = data & 0xfff;
348 gpu.screen.x2 = (data >> 12) & 0xfff;
349 update_width();
350 break;
1ab64c54
GI
351 case 0x07:
352 gpu.screen.y1 = data & 0x3ff;
353 gpu.screen.y2 = (data >> 10) & 0x3ff;
8dd855cd 354 update_height();
1ab64c54
GI
355 break;
356 case 0x08:
61124a6d 357 gpu.status = (gpu.status & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
8dd855cd 358 update_width();
359 update_height();
e929dec5 360 renderer_notify_res_change();
1ab64c54 361 break;
deb18d24 362 default:
363 if ((cmd & 0xf0) == 0x10)
364 get_gpu_info(data);
6e9bdaef 365 break;
1ab64c54 366 }
7890a708 367
368#ifdef GPUwriteStatus_ext
369 GPUwriteStatus_ext(data);
370#endif
1ab64c54
GI
371}
372
56f08d83 373const unsigned char cmd_lengths[256] =
1ab64c54 374{
d30279e2
GI
375 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
376 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
377 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
378 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
652c6b8b 379 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
380 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
381 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
d30279e2 382 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
36da9c13 383 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 80
384 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
385 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // a0
386 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
387 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // c0
388 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
d30279e2
GI
389 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
390 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
391};
392
d30279e2
GI
393#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
394
36da9c13 395static void cpy_msb(uint16_t *dst, const uint16_t *src, int l, uint16_t msb)
396{
397 int i;
398 for (i = 0; i < l; i++)
399 dst[i] = src[i] | msb;
400}
401
402static inline void do_vram_line(int x, int y, uint16_t *mem, int l,
403 int is_read, uint16_t msb)
1ab64c54 404{
d30279e2 405 uint16_t *vram = VRAM_MEM_XY(x, y);
36da9c13 406 if (unlikely(is_read))
d30279e2 407 memcpy(mem, vram, l * 2);
36da9c13 408 else if (unlikely(msb))
409 cpy_msb(vram, mem, l, msb);
d30279e2
GI
410 else
411 memcpy(vram, mem, l * 2);
412}
413
414static int do_vram_io(uint32_t *data, int count, int is_read)
415{
416 int count_initial = count;
36da9c13 417 uint16_t msb = gpu.ex_regs[6] << 15;
d30279e2
GI
418 uint16_t *sdata = (uint16_t *)data;
419 int x = gpu.dma.x, y = gpu.dma.y;
420 int w = gpu.dma.w, h = gpu.dma.h;
ddd56f6e 421 int o = gpu.dma.offset;
d30279e2
GI
422 int l;
423 count *= 2; // operate in 16bpp pixels
424
c765eb86
JW
425 renderer_sync();
426
d30279e2
GI
427 if (gpu.dma.offset) {
428 l = w - gpu.dma.offset;
ddd56f6e 429 if (count < l)
d30279e2 430 l = count;
ddd56f6e 431
36da9c13 432 do_vram_line(x + o, y, sdata, l, is_read, msb);
ddd56f6e 433
434 if (o + l < w)
435 o += l;
436 else {
437 o = 0;
438 y++;
439 h--;
440 }
d30279e2
GI
441 sdata += l;
442 count -= l;
d30279e2
GI
443 }
444
445 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
446 y &= 511;
36da9c13 447 do_vram_line(x, y, sdata, w, is_read, msb);
d30279e2
GI
448 }
449
05740673 450 if (h > 0) {
451 if (count > 0) {
452 y &= 511;
36da9c13 453 do_vram_line(x, y, sdata, count, is_read, msb);
05740673 454 o = count;
455 count = 0;
456 }
d30279e2 457 }
05740673 458 else
459 finish_vram_transfer(is_read);
d30279e2
GI
460 gpu.dma.y = y;
461 gpu.dma.h = h;
ddd56f6e 462 gpu.dma.offset = o;
d30279e2 463
6e9bdaef 464 return count_initial - count / 2;
d30279e2
GI
465}
466
467static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
468{
ddd56f6e 469 if (gpu.dma.h)
470 log_anomaly("start_vram_transfer while old unfinished\n");
471
5440b88e 472 gpu.dma.x = pos_word & 0x3ff;
473 gpu.dma.y = (pos_word >> 16) & 0x1ff;
48f3d210 474 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
475 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
d30279e2 476 gpu.dma.offset = 0;
05740673 477 gpu.dma.is_read = is_read;
478 gpu.dma_start = gpu.dma;
d30279e2 479
9e146206 480 renderer_flush_queues();
481 if (is_read) {
61124a6d 482 gpu.status |= PSX_GPU_STATUS_IMG;
9e146206 483 // XXX: wrong for width 1
495d603c 484 gpu.gp0 = LE32TOH(*(uint32_t *) VRAM_MEM_XY(gpu.dma.x, gpu.dma.y));
5440b88e 485 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
9e146206 486 }
d30279e2 487
6e9bdaef 488 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
489 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
1328fa32 490 if (gpu.gpu_state_change)
491 gpu.gpu_state_change(PGS_VRAM_TRANSFER_START);
d30279e2
GI
492}
493
05740673 494static void finish_vram_transfer(int is_read)
495{
496 if (is_read)
61124a6d 497 gpu.status &= ~PSX_GPU_STATUS_IMG;
b30fba56 498 else {
499 gpu.state.fb_dirty = 1;
05740673 500 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
3b7b0065 501 gpu.dma_start.w, gpu.dma_start.h, 0);
b30fba56 502 }
1328fa32 503 if (gpu.gpu_state_change)
504 gpu.gpu_state_change(PGS_VRAM_TRANSFER_END);
05740673 505}
506
36da9c13 507static void do_vram_copy(const uint32_t *params)
508{
509 const uint32_t sx = LE32TOH(params[0]) & 0x3FF;
510 const uint32_t sy = (LE32TOH(params[0]) >> 16) & 0x1FF;
511 const uint32_t dx = LE32TOH(params[1]) & 0x3FF;
512 const uint32_t dy = (LE32TOH(params[1]) >> 16) & 0x1FF;
513 uint32_t w = ((LE32TOH(params[2]) - 1) & 0x3FF) + 1;
514 uint32_t h = (((LE32TOH(params[2]) >> 16) - 1) & 0x1FF) + 1;
515 uint16_t msb = gpu.ex_regs[6] << 15;
516 uint16_t lbuf[128];
517 uint32_t x, y;
518
519 if (sx == dx && sy == dy && msb == 0)
520 return;
521
522 renderer_flush_queues();
523
524 if (unlikely((sx < dx && dx < sx + w) || sx + w > 1024 || dx + w > 1024 || msb))
525 {
526 for (y = 0; y < h; y++)
527 {
528 const uint16_t *src = VRAM_MEM_XY(0, (sy + y) & 0x1ff);
529 uint16_t *dst = VRAM_MEM_XY(0, (dy + y) & 0x1ff);
530 for (x = 0; x < w; x += ARRAY_SIZE(lbuf))
531 {
532 uint32_t x1, w1 = w - x;
533 if (w1 > ARRAY_SIZE(lbuf))
534 w1 = ARRAY_SIZE(lbuf);
535 for (x1 = 0; x1 < w1; x1++)
536 lbuf[x1] = src[(sx + x + x1) & 0x3ff];
537 for (x1 = 0; x1 < w1; x1++)
538 dst[(dx + x + x1) & 0x3ff] = lbuf[x1] | msb;
539 }
540 }
541 }
542 else
543 {
544 uint32_t sy1 = sy, dy1 = dy;
545 for (y = 0; y < h; y++, sy1++, dy1++)
546 memcpy(VRAM_MEM_XY(dx, dy1 & 0x1ff), VRAM_MEM_XY(sx, sy1 & 0x1ff), w * 2);
547 }
548
549 renderer_update_caches(dx, dy, w, h, 0);
550}
551
b243416b 552static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
553{
97e07db9 554 int cmd = 0, pos = 0, len, dummy, v;
b243416b 555 int skip = 1;
556
fbb4bfff 557 gpu.frameskip.pending_fill[0] = 0;
558
b243416b 559 while (pos < count && skip) {
560 uint32_t *list = data + pos;
db215a72 561 cmd = LE32TOH(list[0]) >> 24;
b243416b 562 len = 1 + cmd_lengths[cmd];
563
97e07db9 564 switch (cmd) {
565 case 0x02:
db215a72 566 if ((LE32TOH(list[2]) & 0x3ff) > gpu.screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu.screen.h)
97e07db9 567 // clearing something large, don't skip
568 do_cmd_list(list, 3, &dummy);
569 else
570 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
571 break;
572 case 0x24 ... 0x27:
573 case 0x2c ... 0x2f:
574 case 0x34 ... 0x37:
575 case 0x3c ... 0x3f:
576 gpu.ex_regs[1] &= ~0x1ff;
db215a72 577 gpu.ex_regs[1] |= LE32TOH(list[4 + ((cmd >> 4) & 1)]) & 0x1ff;
97e07db9 578 break;
579 case 0x48 ... 0x4F:
580 for (v = 3; pos + v < count; v++)
581 {
db215a72 582 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
97e07db9 583 break;
584 }
585 len += v - 3;
586 break;
587 case 0x58 ... 0x5F:
588 for (v = 4; pos + v < count; v += 2)
589 {
db215a72 590 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
97e07db9 591 break;
592 }
593 len += v - 4;
594 break;
595 default:
596 if (cmd == 0xe3)
db215a72 597 skip = decide_frameskip_allow(LE32TOH(list[0]));
97e07db9 598 if ((cmd & 0xf8) == 0xe0)
db215a72 599 gpu.ex_regs[cmd & 7] = LE32TOH(list[0]);
97e07db9 600 break;
b243416b 601 }
b243416b 602
603 if (pos + len > count) {
604 cmd = -1;
605 break; // incomplete cmd
606 }
36da9c13 607 if (0x80 <= cmd && cmd <= 0xdf)
b243416b 608 break; // image i/o
97e07db9 609
b243416b 610 pos += len;
611 }
612
613 renderer_sync_ecmds(gpu.ex_regs);
614 *last_cmd = cmd;
615 return pos;
616}
617
48f3d210 618static noinline int do_cmd_buffer(uint32_t *data, int count)
d30279e2 619{
b243416b 620 int cmd, pos;
621 uint32_t old_e3 = gpu.ex_regs[3];
fc84f618 622 int vram_dirty = 0;
d30279e2 623
d30279e2 624 // process buffer
b243416b 625 for (pos = 0; pos < count; )
d30279e2 626 {
b243416b 627 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
628 vram_dirty = 1;
d30279e2 629 pos += do_vram_io(data + pos, count - pos, 0);
ddd56f6e 630 if (pos == count)
631 break;
d30279e2
GI
632 }
633
db215a72 634 cmd = LE32TOH(data[pos]) >> 24;
97e07db9 635 if (0xa0 <= cmd && cmd <= 0xdf) {
79573c20
DS
636 if (unlikely((pos+2) >= count)) {
637 // incomplete vram write/read cmd, can't consume yet
638 cmd = -1;
639 break;
640 }
641
d30279e2 642 // consume vram write/read cmd
db215a72 643 start_vram_transfer(LE32TOH(data[pos + 1]), LE32TOH(data[pos + 2]), (cmd & 0xe0) == 0xc0);
b243416b 644 pos += 3;
645 continue;
d30279e2 646 }
36da9c13 647 else if ((cmd & 0xe0) == 0x80) {
648 if (unlikely((pos+3) >= count)) {
649 cmd = -1; // incomplete cmd, can't consume yet
650 break;
651 }
652 do_vram_copy(data + pos + 1);
b30fba56 653 vram_dirty = 1;
36da9c13 654 pos += 4;
655 continue;
656 }
b243416b 657
1e07f71d 658 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
db215a72 659 if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0))
b243416b 660 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
661 else {
662 pos += do_cmd_list(data + pos, count - pos, &cmd);
663 vram_dirty = 1;
664 }
665
666 if (cmd == -1)
667 // incomplete cmd
ddd56f6e 668 break;
d30279e2 669 }
ddd56f6e 670
61124a6d
PC
671 gpu.status &= ~0x1fff;
672 gpu.status |= gpu.ex_regs[1] & 0x7ff;
673 gpu.status |= (gpu.ex_regs[6] & 3) << 11;
a3a9f519 674
fc84f618 675 gpu.state.fb_dirty |= vram_dirty;
676
b243416b 677 if (old_e3 != gpu.ex_regs[3])
678 decide_frameskip_allow(gpu.ex_regs[3]);
679
ddd56f6e 680 return count - pos;
d30279e2
GI
681}
682
1328fa32 683static noinline void flush_cmd_buffer(void)
d30279e2 684{
48f3d210 685 int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
d30279e2
GI
686 if (left > 0)
687 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
1328fa32 688 if (left != gpu.cmd_len) {
689 if (!gpu.dma.h && gpu.gpu_state_change)
690 gpu.gpu_state_change(PGS_PRIMITIVE_START);
691 gpu.cmd_len = left;
692 }
1ab64c54
GI
693}
694
695void GPUwriteDataMem(uint32_t *mem, int count)
696{
d30279e2
GI
697 int left;
698
56f08d83 699 log_io("gpu_dma_write %p %d\n", mem, count);
700
d30279e2
GI
701 if (unlikely(gpu.cmd_len > 0))
702 flush_cmd_buffer();
56f08d83 703
48f3d210 704 left = do_cmd_buffer(mem, count);
d30279e2 705 if (left)
56f08d83 706 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
1ab64c54
GI
707}
708
d30279e2 709void GPUwriteData(uint32_t data)
1ab64c54 710{
56f08d83 711 log_io("gpu_write %08x\n", data);
db215a72 712 gpu.cmd_buffer[gpu.cmd_len++] = HTOLE32(data);
d30279e2
GI
713 if (gpu.cmd_len >= CMD_BUFFER_LEN)
714 flush_cmd_buffer();
1ab64c54
GI
715}
716
fae38d7a 717long GPUdmaChain(uint32_t *rambase, uint32_t start_addr, uint32_t *progress_addr)
1ab64c54 718{
09159d99 719 uint32_t addr, *list, ld_addr = 0;
ddd56f6e 720 int len, left, count;
1c72b1c2 721 long cpu_cycles = 0;
d30279e2 722
8f5f2dd5 723 preload(rambase + (start_addr & 0x1fffff) / 4);
724
d30279e2
GI
725 if (unlikely(gpu.cmd_len > 0))
726 flush_cmd_buffer();
727
56f08d83 728 log_io("gpu_dma_chain\n");
ddd56f6e 729 addr = start_addr & 0xffffff;
09159d99 730 for (count = 0; (addr & 0x800000) == 0; count++)
ddd56f6e 731 {
ddd56f6e 732 list = rambase + (addr & 0x1fffff) / 4;
db215a72
PC
733 len = LE32TOH(list[0]) >> 24;
734 addr = LE32TOH(list[0]) & 0xffffff;
8f5f2dd5 735 preload(rambase + (addr & 0x1fffff) / 4);
736
1c72b1c2 737 cpu_cycles += 10;
738 if (len > 0)
739 cpu_cycles += 5 + len;
deb18d24 740
a4e249a1 741 log_io(".chain %08lx #%d+%d\n",
742 (long)(list - rambase) * 4, len, gpu.cmd_len);
743 if (unlikely(gpu.cmd_len > 0)) {
81ff42e1 744 if (gpu.cmd_len + len > ARRAY_SIZE(gpu.cmd_buffer)) {
745 log_anomaly("cmd_buffer overflow, likely garbage commands\n");
746 gpu.cmd_len = 0;
747 }
a4e249a1 748 memcpy(gpu.cmd_buffer + gpu.cmd_len, list + 1, len * 4);
749 gpu.cmd_len += len;
750 flush_cmd_buffer();
751 continue;
752 }
ddd56f6e 753
56f08d83 754 if (len) {
48f3d210 755 left = do_cmd_buffer(list + 1, len);
a4e249a1 756 if (left) {
757 memcpy(gpu.cmd_buffer, list + 1 + len - left, left * 4);
758 gpu.cmd_len = left;
759 log_anomaly("GPUdmaChain: %d/%d words left\n", left, len);
760 }
56f08d83 761 }
ddd56f6e 762
fae38d7a 763 if (progress_addr) {
764 *progress_addr = addr;
765 break;
766 }
09159d99 767 #define LD_THRESHOLD (8*1024)
768 if (count >= LD_THRESHOLD) {
769 if (count == LD_THRESHOLD) {
770 ld_addr = addr;
771 continue;
772 }
773
774 // loop detection marker
775 // (bit23 set causes DMA error on real machine, so
776 // unlikely to be ever set by the game)
db215a72 777 list[0] |= HTOLE32(0x800000);
09159d99 778 }
ddd56f6e 779 }
780
09159d99 781 if (ld_addr != 0) {
782 // remove loop detection markers
783 count -= LD_THRESHOLD + 2;
784 addr = ld_addr & 0x1fffff;
785 while (count-- > 0) {
786 list = rambase + addr / 4;
db215a72
PC
787 addr = LE32TOH(list[0]) & 0x1fffff;
788 list[0] &= HTOLE32(~0x800000);
09159d99 789 }
d30279e2 790 }
09159d99 791
3ece2f0c 792 gpu.state.last_list.frame = *gpu.state.frame_count;
deb18d24 793 gpu.state.last_list.hcnt = *gpu.state.hcnt;
1c72b1c2 794 gpu.state.last_list.cycles = cpu_cycles;
deb18d24 795 gpu.state.last_list.addr = start_addr;
796
1c72b1c2 797 return cpu_cycles;
1ab64c54
GI
798}
799
d30279e2
GI
800void GPUreadDataMem(uint32_t *mem, int count)
801{
56f08d83 802 log_io("gpu_dma_read %p %d\n", mem, count);
803
d30279e2
GI
804 if (unlikely(gpu.cmd_len > 0))
805 flush_cmd_buffer();
56f08d83 806
d30279e2
GI
807 if (gpu.dma.h)
808 do_vram_io(mem, count, 1);
809}
810
811uint32_t GPUreadData(void)
812{
9e146206 813 uint32_t ret;
56f08d83 814
815 if (unlikely(gpu.cmd_len > 0))
816 flush_cmd_buffer();
817
9e146206 818 ret = gpu.gp0;
495d603c
PC
819 if (gpu.dma.h) {
820 ret = HTOLE32(ret);
9e146206 821 do_vram_io(&ret, 1, 1);
495d603c
PC
822 ret = LE32TOH(ret);
823 }
56f08d83 824
9e146206 825 log_io("gpu_read %08x\n", ret);
826 return ret;
d30279e2
GI
827}
828
829uint32_t GPUreadStatus(void)
830{
ddd56f6e 831 uint32_t ret;
56f08d83 832
d30279e2
GI
833 if (unlikely(gpu.cmd_len > 0))
834 flush_cmd_buffer();
835
61124a6d 836 ret = gpu.status;
ddd56f6e 837 log_io("gpu_read_status %08x\n", ret);
838 return ret;
d30279e2
GI
839}
840
096ec49b 841struct GPUFreeze
1ab64c54
GI
842{
843 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
844 uint32_t ulStatus; // current gpu status
845 uint32_t ulControl[256]; // latest control register values
846 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
096ec49b 847};
1ab64c54 848
096ec49b 849long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
1ab64c54 850{
fc84f618 851 int i;
852
1ab64c54
GI
853 switch (type) {
854 case 1: // save
d30279e2
GI
855 if (gpu.cmd_len > 0)
856 flush_cmd_buffer();
c765eb86
JW
857
858 renderer_sync();
9ee0fd5b 859 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
1ab64c54 860 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
6e9bdaef 861 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
61124a6d 862 freeze->ulStatus = gpu.status;
1ab64c54
GI
863 break;
864 case 0: // load
c765eb86 865 renderer_sync();
9ee0fd5b 866 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
1ab64c54 867 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
6e9bdaef 868 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
61124a6d 869 gpu.status = freeze->ulStatus;
3d47ef17 870 gpu.cmd_len = 0;
fc84f618 871 for (i = 8; i > 0; i--) {
872 gpu.regs[i] ^= 1; // avoid reg change detection
873 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
874 }
5b745e5b 875 renderer_sync_ecmds(gpu.ex_regs);
9a864a8f 876 renderer_update_caches(0, 0, 1024, 512, 0);
1ab64c54
GI
877 break;
878 }
879
880 return 1;
881}
882
5440b88e 883void GPUupdateLace(void)
884{
885 if (gpu.cmd_len > 0)
886 flush_cmd_buffer();
887 renderer_flush_queues();
888
7a20a6d0 889#ifndef RAW_FB_DISPLAY
61124a6d 890 if (gpu.status & PSX_GPU_STATUS_BLANKING) {
aafcb4dd 891 if (!gpu.state.blanked) {
892 vout_blank();
893 gpu.state.blanked = 1;
894 gpu.state.fb_dirty = 1;
895 }
896 return;
897 }
898
c765eb86
JW
899 renderer_notify_update_lace(0);
900
aafcb4dd 901 if (!gpu.state.fb_dirty)
5440b88e 902 return;
7a20a6d0 903#endif
5440b88e 904
905 if (gpu.frameskip.set) {
906 if (!gpu.frameskip.frame_ready) {
907 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
908 return;
909 gpu.frameskip.active = 0;
910 }
911 gpu.frameskip.frame_ready = 0;
912 }
913
914 vout_update();
3b7b0065 915 if (gpu.state.enhancement_active && !gpu.state.enhancement_was_active)
916 renderer_update_caches(0, 0, 1024, 512, 1);
917 gpu.state.enhancement_was_active = gpu.state.enhancement_active;
5440b88e 918 gpu.state.fb_dirty = 0;
aafcb4dd 919 gpu.state.blanked = 0;
c765eb86 920 renderer_notify_update_lace(1);
5440b88e 921}
922
72e5023f 923void GPUvBlank(int is_vblank, int lcf)
924{
5440b88e 925 int interlace = gpu.state.allow_interlace
61124a6d
PC
926 && (gpu.status & PSX_GPU_STATUS_INTERLACE)
927 && (gpu.status & PSX_GPU_STATUS_DHEIGHT);
5440b88e 928 // interlace doesn't look nice on progressive displays,
929 // so we have this "auto" mode here for games that don't read vram
930 if (gpu.state.allow_interlace == 2
931 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
932 {
933 interlace = 0;
934 }
935 if (interlace || interlace != gpu.state.old_interlace) {
936 gpu.state.old_interlace = interlace;
937
938 if (gpu.cmd_len > 0)
939 flush_cmd_buffer();
940 renderer_flush_queues();
941 renderer_set_interlace(interlace, !lcf);
942 }
943}
944
80bc1426 945void GPUgetScreenInfo(int *y, int *base_hres)
946{
947 *y = gpu.screen.y;
948 *base_hres = gpu.screen.vres;
949 if (gpu.status & PSX_GPU_STATUS_DHEIGHT)
950 *base_hres >>= 1;
951}
952
5440b88e 953#include "../../frontend/plugin_lib.h"
954
955void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
956{
957 gpu.frameskip.set = cbs->frameskip;
958 gpu.frameskip.advice = &cbs->fskip_advice;
5eaa13f1 959 gpu.frameskip.force = &cbs->fskip_force;
5bbe183f 960 gpu.frameskip.dirty = (void *)&cbs->fskip_dirty;
5440b88e 961 gpu.frameskip.active = 0;
962 gpu.frameskip.frame_ready = 1;
963 gpu.state.hcnt = cbs->gpu_hcnt;
964 gpu.state.frame_count = cbs->gpu_frame_count;
965 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
0b02eb77 966 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
5bbe183f 967 if (gpu.state.screen_centering_type != cbs->screen_centering_type
968 || gpu.state.screen_centering_x != cbs->screen_centering_x
969 || gpu.state.screen_centering_y != cbs->screen_centering_y) {
970 gpu.state.screen_centering_type = cbs->screen_centering_type;
971 gpu.state.screen_centering_x = cbs->screen_centering_x;
972 gpu.state.screen_centering_y = cbs->screen_centering_y;
973 update_width();
974 update_height();
975 }
5440b88e 976
9ee0fd5b 977 gpu.mmap = cbs->mmap;
978 gpu.munmap = cbs->munmap;
1328fa32 979 gpu.gpu_state_change = cbs->gpu_state_change;
9ee0fd5b 980
981 // delayed vram mmap
982 if (gpu.vram == NULL)
983 map_vram();
984
5440b88e 985 if (cbs->pl_vout_set_raw_vram)
986 cbs->pl_vout_set_raw_vram(gpu.vram);
987 renderer_set_config(cbs);
988 vout_set_config(cbs);
72e5023f 989}
990
1ab64c54 991// vim:shiftwidth=2:expandtab