spu: adjust fmod to match nocash description
[pcsx_rearmed.git] / plugins / gpulib / gpu.c
CommitLineData
1ab64c54 1/*
05740673 2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
1ab64c54
GI
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
d30279e2 11#include <stdio.h>
5bbe183f 12#include <stdlib.h>
1ab64c54 13#include <string.h>
12367ad0 14#include <stdlib.h> /* for calloc */
15
56f08d83 16#include "gpu.h"
1ab64c54
GI
17
18#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
8f5f2dd5 19#ifdef __GNUC__
d30279e2 20#define unlikely(x) __builtin_expect((x), 0)
8f5f2dd5 21#define preload __builtin_prefetch
8dd855cd 22#define noinline __attribute__((noinline))
8f5f2dd5 23#else
24#define unlikely(x)
25#define preload(...)
26#define noinline
8f5f2dd5 27#endif
1ab64c54 28
deb18d24 29//#define log_io gpu_log
56f08d83 30#define log_io(...)
56f08d83 31
9ee0fd5b 32struct psx_gpu gpu;
1ab64c54 33
48f3d210 34static noinline int do_cmd_buffer(uint32_t *data, int count);
05740673 35static void finish_vram_transfer(int is_read);
48f3d210 36
37static noinline void do_cmd_reset(void)
38{
c765eb86
JW
39 renderer_sync();
40
48f3d210 41 if (unlikely(gpu.cmd_len > 0))
42 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
48f3d210 43 gpu.cmd_len = 0;
05740673 44
45 if (unlikely(gpu.dma.h > 0))
46 finish_vram_transfer(gpu.dma_start.is_read);
48f3d210 47 gpu.dma.h = 0;
48}
49
6e9bdaef 50static noinline void do_reset(void)
1ab64c54 51{
7841712d 52 unsigned int i;
5b568098 53
48f3d210 54 do_cmd_reset();
55
6e9bdaef 56 memset(gpu.regs, 0, sizeof(gpu.regs));
48f3d210 57 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
58 gpu.ex_regs[i] = (0xe0 + i) << 24;
61124a6d 59 gpu.status = 0x14802000;
6e9bdaef 60 gpu.gp0 = 0;
fc84f618 61 gpu.regs[3] = 1;
6e9bdaef 62 gpu.screen.hres = gpu.screen.w = 256;
fc84f618 63 gpu.screen.vres = gpu.screen.h = 240;
5bbe183f 64 gpu.screen.x = gpu.screen.y = 0;
01ff3105 65 renderer_sync_ecmds(gpu.ex_regs);
3b7b0065 66 renderer_notify_res_change();
1ab64c54
GI
67}
68
8dd855cd 69static noinline void update_width(void)
70{
5bbe183f 71 static const short hres_all[8] = { 256, 368, 320, 368, 512, 368, 640, 368 };
72 static const uint8_t hdivs[8] = { 10, 7, 8, 7, 5, 7, 4, 7 };
73 uint8_t hdiv = hdivs[(gpu.status >> 16) & 7];
74 int hres = hres_all[(gpu.status >> 16) & 7];
75 int pal = gpu.status & PSX_GPU_STATUS_PAL;
8dd855cd 76 int sw = gpu.screen.x2 - gpu.screen.x1;
5bbe183f 77 int x = 0, x_auto;
78 if (sw <= 0)
79 /* nothing displayed? */;
80 else {
81 int s = pal ? 656 : 608; // or 600? pal is just a guess
82 x = (gpu.screen.x1 - s) / hdiv;
83 x = (x + 1) & ~1; // blitter limitation
84 sw /= hdiv;
85 sw = (sw + 2) & ~3; // according to nocash
86 switch (gpu.state.screen_centering_type) {
87 case 1:
88 break;
89 case 2:
90 x = gpu.state.screen_centering_x;
91 break;
92 default:
93 // correct if slightly miscentered
94 x_auto = (hres - sw) / 2 & ~3;
95 if ((uint32_t)x_auto <= 8u && abs(x) < 24)
96 x = x_auto;
97 }
98 if (x + sw > hres)
99 sw = hres - x;
100 // .x range check is done in vout_update()
101 }
102 // reduce the unpleasant right border that a few games have
103 if (gpu.state.screen_centering_type == 0
104 && x <= 4 && hres - (x + sw) >= 4)
105 hres -= 4;
106 gpu.screen.x = x;
107 gpu.screen.w = sw;
108 gpu.screen.hres = hres;
109 gpu.state.dims_changed = 1;
110 //printf("xx %d %d -> %2d, %d / %d\n",
111 // gpu.screen.x1, gpu.screen.x2, x, sw, hres);
8dd855cd 112}
113
114static noinline void update_height(void)
115{
5bbe183f 116 int pal = gpu.status & PSX_GPU_STATUS_PAL;
117 int dheight = gpu.status & PSX_GPU_STATUS_DHEIGHT;
118 int y = gpu.screen.y1 - (pal ? 39 : 16); // 39 for spyro
8dd855cd 119 int sh = gpu.screen.y2 - gpu.screen.y1;
5bbe183f 120 int center_tol = 16;
121 int vres = 240;
122
123 if (pal && (sh > 240 || gpu.screen.vres == 256))
124 vres = 256;
125 if (dheight)
126 y *= 2, sh *= 2, vres *= 2, center_tol *= 2;
127 if (sh <= 0)
128 /* nothing displayed? */;
129 else {
130 switch (gpu.state.screen_centering_type) {
131 case 1:
132 break;
133 case 2:
134 y = gpu.state.screen_centering_y;
135 break;
136 default:
137 // correct if slightly miscentered
138 if ((uint32_t)(vres - sh) <= 1 && abs(y) <= center_tol)
139 y = 0;
140 }
141 if (y + sh > vres)
142 sh = vres - y;
143 }
144 gpu.screen.y = y;
8dd855cd 145 gpu.screen.h = sh;
5bbe183f 146 gpu.screen.vres = vres;
147 gpu.state.dims_changed = 1;
148 //printf("yy %d %d -> %d, %d / %d\n",
149 // gpu.screen.y1, gpu.screen.y2, y, sh, vres);
8dd855cd 150}
151
fc84f618 152static noinline void decide_frameskip(void)
153{
5eaa13f1
A
154 *gpu.frameskip.dirty = 1;
155
9fe27e25 156 if (gpu.frameskip.active)
157 gpu.frameskip.cnt++;
158 else {
159 gpu.frameskip.cnt = 0;
160 gpu.frameskip.frame_ready = 1;
161 }
fc84f618 162
5eaa13f1
A
163 if (*gpu.frameskip.force)
164 gpu.frameskip.active = 1;
165 else if (!gpu.frameskip.active && *gpu.frameskip.advice)
9fe27e25 166 gpu.frameskip.active = 1;
167 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
fc84f618 168 gpu.frameskip.active = 1;
169 else
170 gpu.frameskip.active = 0;
fbb4bfff 171
172 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
173 int dummy;
174 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy);
175 gpu.frameskip.pending_fill[0] = 0;
176 }
fc84f618 177}
178
b243416b 179static noinline int decide_frameskip_allow(uint32_t cmd_e3)
9fe27e25 180{
181 // no frameskip if it decides to draw to display area,
182 // but not for interlace since it'll most likely always do that
183 uint32_t x = cmd_e3 & 0x3ff;
184 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
61124a6d 185 gpu.frameskip.allow = (gpu.status & PSX_GPU_STATUS_INTERLACE) ||
5bbe183f 186 (uint32_t)(x - gpu.screen.src_x) >= (uint32_t)gpu.screen.w ||
187 (uint32_t)(y - gpu.screen.src_y) >= (uint32_t)gpu.screen.h;
b243416b 188 return gpu.frameskip.allow;
9fe27e25 189}
190
01ff3105 191static void flush_cmd_buffer(void);
192
6e9bdaef 193static noinline void get_gpu_info(uint32_t data)
194{
01ff3105 195 if (unlikely(gpu.cmd_len > 0))
196 flush_cmd_buffer();
6e9bdaef 197 switch (data & 0x0f) {
198 case 0x02:
199 case 0x03:
200 case 0x04:
6e9bdaef 201 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
202 break;
08b33377 203 case 0x05:
204 gpu.gp0 = gpu.ex_regs[5] & 0x3fffff;
6e9bdaef 205 break;
206 case 0x07:
207 gpu.gp0 = 2;
208 break;
209 default:
08b33377 210 // gpu.gp0 unchanged
6e9bdaef 211 break;
212 }
213}
214
9ee0fd5b 215// double, for overdraw guard
12367ad0 216#define VRAM_SIZE ((1024 * 512 * 2 * 2) + 4096)
217
218// Minimum 16-byte VRAM alignment needed by gpu_unai's pixel-skipping
219// renderer/downscaler it uses in high res modes:
220#ifdef GCW_ZERO
221 // On GCW platform (MIPS), align to 8192 bytes (1 TLB entry) to reduce # of
222 // fills. (Will change this value if it ever gets large page support)
223 #define VRAM_ALIGN 8192
224#else
225 #define VRAM_ALIGN 16
226#endif
227
228// vram ptr received from mmap/malloc/alloc (will deallocate using this)
229static uint16_t *vram_ptr_orig = NULL;
9ee0fd5b 230
e34ef5ac 231#ifndef GPULIB_USE_MMAP
232# ifdef __linux__
233# define GPULIB_USE_MMAP 1
234# else
235# define GPULIB_USE_MMAP 0
236# endif
237#endif
9ee0fd5b 238static int map_vram(void)
239{
e34ef5ac 240#if GPULIB_USE_MMAP
12367ad0 241 gpu.vram = vram_ptr_orig = gpu.mmap(VRAM_SIZE + (VRAM_ALIGN-1));
e34ef5ac 242#else
243 gpu.vram = vram_ptr_orig = calloc(VRAM_SIZE + (VRAM_ALIGN-1), 1);
244#endif
245 if (gpu.vram != NULL && gpu.vram != (void *)(intptr_t)-1) {
246 // 4kb guard in front
12367ad0 247 gpu.vram += (4096 / 2);
e34ef5ac 248 // Align
249 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
9ee0fd5b 250 return 0;
251 }
252 else {
253 fprintf(stderr, "could not map vram, expect crashes\n");
254 return -1;
255 }
256}
257
6e9bdaef 258long GPUinit(void)
259{
9394ada5 260 int ret;
261 ret = vout_init();
262 ret |= renderer_init();
263
3b7b0065 264 memset(&gpu.state, 0, sizeof(gpu.state));
265 memset(&gpu.frameskip, 0, sizeof(gpu.frameskip));
266 gpu.zero = 0;
3ece2f0c 267 gpu.state.frame_count = &gpu.zero;
deb18d24 268 gpu.state.hcnt = &gpu.zero;
48f3d210 269 gpu.cmd_len = 0;
9394ada5 270 do_reset();
48f3d210 271
12367ad0 272 /*if (gpu.mmap != NULL) {
9ee0fd5b 273 if (map_vram() != 0)
274 ret = -1;
12367ad0 275 }*/
6e9bdaef 276 return ret;
277}
278
279long GPUshutdown(void)
280{
9ee0fd5b 281 long ret;
282
e929dec5 283 renderer_finish();
9ee0fd5b 284 ret = vout_finish();
12367ad0 285
286 if (vram_ptr_orig != NULL) {
e34ef5ac 287#if GPULIB_USE_MMAP
12367ad0 288 gpu.munmap(vram_ptr_orig, VRAM_SIZE);
289#else
290 free(vram_ptr_orig);
291#endif
9ee0fd5b 292 }
12367ad0 293 vram_ptr_orig = gpu.vram = NULL;
9ee0fd5b 294
295 return ret;
6e9bdaef 296}
297
1ab64c54
GI
298void GPUwriteStatus(uint32_t data)
299{
1ab64c54
GI
300 uint32_t cmd = data >> 24;
301
fc84f618 302 if (cmd < ARRAY_SIZE(gpu.regs)) {
48f3d210 303 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
fc84f618 304 return;
8dd855cd 305 gpu.regs[cmd] = data;
fc84f618 306 }
307
308 gpu.state.fb_dirty = 1;
8dd855cd 309
310 switch (cmd) {
1ab64c54 311 case 0x00:
6e9bdaef 312 do_reset();
1ab64c54 313 break;
48f3d210 314 case 0x01:
315 do_cmd_reset();
316 break;
1ab64c54 317 case 0x03:
5bbe183f 318 if (data & 1) {
61124a6d 319 gpu.status |= PSX_GPU_STATUS_BLANKING;
5bbe183f 320 gpu.state.dims_changed = 1; // for hud clearing
321 }
61124a6d
PC
322 else
323 gpu.status &= ~PSX_GPU_STATUS_BLANKING;
1ab64c54
GI
324 break;
325 case 0x04:
61124a6d
PC
326 gpu.status &= ~PSX_GPU_STATUS_DMA_MASK;
327 gpu.status |= PSX_GPU_STATUS_DMA(data & 3);
1ab64c54
GI
328 break;
329 case 0x05:
5bbe183f 330 gpu.screen.src_x = data & 0x3ff;
331 gpu.screen.src_y = (data >> 10) & 0x1ff;
3b7b0065 332 renderer_notify_scanout_x_change(gpu.screen.src_x, gpu.screen.hres);
9fe27e25 333 if (gpu.frameskip.set) {
334 decide_frameskip_allow(gpu.ex_regs[3]);
335 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
336 decide_frameskip();
337 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
338 }
fb4c6fba 339 }
1ab64c54 340 break;
8dd855cd 341 case 0x06:
342 gpu.screen.x1 = data & 0xfff;
343 gpu.screen.x2 = (data >> 12) & 0xfff;
344 update_width();
345 break;
1ab64c54
GI
346 case 0x07:
347 gpu.screen.y1 = data & 0x3ff;
348 gpu.screen.y2 = (data >> 10) & 0x3ff;
8dd855cd 349 update_height();
1ab64c54
GI
350 break;
351 case 0x08:
61124a6d 352 gpu.status = (gpu.status & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
8dd855cd 353 update_width();
354 update_height();
e929dec5 355 renderer_notify_res_change();
1ab64c54 356 break;
deb18d24 357 default:
358 if ((cmd & 0xf0) == 0x10)
359 get_gpu_info(data);
6e9bdaef 360 break;
1ab64c54 361 }
7890a708 362
363#ifdef GPUwriteStatus_ext
364 GPUwriteStatus_ext(data);
365#endif
1ab64c54
GI
366}
367
56f08d83 368const unsigned char cmd_lengths[256] =
1ab64c54 369{
d30279e2
GI
370 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
371 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
372 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
373 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
652c6b8b 374 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
375 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
376 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
d30279e2 377 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
36da9c13 378 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 80
379 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
380 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // a0
381 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
382 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // c0
383 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
d30279e2
GI
384 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
385 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
386};
387
d30279e2
GI
388#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
389
36da9c13 390static void cpy_msb(uint16_t *dst, const uint16_t *src, int l, uint16_t msb)
391{
392 int i;
393 for (i = 0; i < l; i++)
394 dst[i] = src[i] | msb;
395}
396
397static inline void do_vram_line(int x, int y, uint16_t *mem, int l,
398 int is_read, uint16_t msb)
1ab64c54 399{
d30279e2 400 uint16_t *vram = VRAM_MEM_XY(x, y);
36da9c13 401 if (unlikely(is_read))
d30279e2 402 memcpy(mem, vram, l * 2);
36da9c13 403 else if (unlikely(msb))
404 cpy_msb(vram, mem, l, msb);
d30279e2
GI
405 else
406 memcpy(vram, mem, l * 2);
407}
408
409static int do_vram_io(uint32_t *data, int count, int is_read)
410{
411 int count_initial = count;
36da9c13 412 uint16_t msb = gpu.ex_regs[6] << 15;
d30279e2
GI
413 uint16_t *sdata = (uint16_t *)data;
414 int x = gpu.dma.x, y = gpu.dma.y;
415 int w = gpu.dma.w, h = gpu.dma.h;
ddd56f6e 416 int o = gpu.dma.offset;
d30279e2
GI
417 int l;
418 count *= 2; // operate in 16bpp pixels
419
c765eb86
JW
420 renderer_sync();
421
d30279e2
GI
422 if (gpu.dma.offset) {
423 l = w - gpu.dma.offset;
ddd56f6e 424 if (count < l)
d30279e2 425 l = count;
ddd56f6e 426
36da9c13 427 do_vram_line(x + o, y, sdata, l, is_read, msb);
ddd56f6e 428
429 if (o + l < w)
430 o += l;
431 else {
432 o = 0;
433 y++;
434 h--;
435 }
d30279e2
GI
436 sdata += l;
437 count -= l;
d30279e2
GI
438 }
439
440 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
441 y &= 511;
36da9c13 442 do_vram_line(x, y, sdata, w, is_read, msb);
d30279e2
GI
443 }
444
05740673 445 if (h > 0) {
446 if (count > 0) {
447 y &= 511;
36da9c13 448 do_vram_line(x, y, sdata, count, is_read, msb);
05740673 449 o = count;
450 count = 0;
451 }
d30279e2 452 }
05740673 453 else
454 finish_vram_transfer(is_read);
d30279e2
GI
455 gpu.dma.y = y;
456 gpu.dma.h = h;
ddd56f6e 457 gpu.dma.offset = o;
d30279e2 458
6e9bdaef 459 return count_initial - count / 2;
d30279e2
GI
460}
461
462static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
463{
ddd56f6e 464 if (gpu.dma.h)
465 log_anomaly("start_vram_transfer while old unfinished\n");
466
5440b88e 467 gpu.dma.x = pos_word & 0x3ff;
468 gpu.dma.y = (pos_word >> 16) & 0x1ff;
48f3d210 469 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
470 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
d30279e2 471 gpu.dma.offset = 0;
05740673 472 gpu.dma.is_read = is_read;
473 gpu.dma_start = gpu.dma;
d30279e2 474
9e146206 475 renderer_flush_queues();
476 if (is_read) {
61124a6d 477 gpu.status |= PSX_GPU_STATUS_IMG;
9e146206 478 // XXX: wrong for width 1
495d603c 479 gpu.gp0 = LE32TOH(*(uint32_t *) VRAM_MEM_XY(gpu.dma.x, gpu.dma.y));
5440b88e 480 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
9e146206 481 }
d30279e2 482
6e9bdaef 483 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
484 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
d30279e2
GI
485}
486
05740673 487static void finish_vram_transfer(int is_read)
488{
489 if (is_read)
61124a6d 490 gpu.status &= ~PSX_GPU_STATUS_IMG;
05740673 491 else
492 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
3b7b0065 493 gpu.dma_start.w, gpu.dma_start.h, 0);
05740673 494}
495
36da9c13 496static void do_vram_copy(const uint32_t *params)
497{
498 const uint32_t sx = LE32TOH(params[0]) & 0x3FF;
499 const uint32_t sy = (LE32TOH(params[0]) >> 16) & 0x1FF;
500 const uint32_t dx = LE32TOH(params[1]) & 0x3FF;
501 const uint32_t dy = (LE32TOH(params[1]) >> 16) & 0x1FF;
502 uint32_t w = ((LE32TOH(params[2]) - 1) & 0x3FF) + 1;
503 uint32_t h = (((LE32TOH(params[2]) >> 16) - 1) & 0x1FF) + 1;
504 uint16_t msb = gpu.ex_regs[6] << 15;
505 uint16_t lbuf[128];
506 uint32_t x, y;
507
508 if (sx == dx && sy == dy && msb == 0)
509 return;
510
511 renderer_flush_queues();
512
513 if (unlikely((sx < dx && dx < sx + w) || sx + w > 1024 || dx + w > 1024 || msb))
514 {
515 for (y = 0; y < h; y++)
516 {
517 const uint16_t *src = VRAM_MEM_XY(0, (sy + y) & 0x1ff);
518 uint16_t *dst = VRAM_MEM_XY(0, (dy + y) & 0x1ff);
519 for (x = 0; x < w; x += ARRAY_SIZE(lbuf))
520 {
521 uint32_t x1, w1 = w - x;
522 if (w1 > ARRAY_SIZE(lbuf))
523 w1 = ARRAY_SIZE(lbuf);
524 for (x1 = 0; x1 < w1; x1++)
525 lbuf[x1] = src[(sx + x + x1) & 0x3ff];
526 for (x1 = 0; x1 < w1; x1++)
527 dst[(dx + x + x1) & 0x3ff] = lbuf[x1] | msb;
528 }
529 }
530 }
531 else
532 {
533 uint32_t sy1 = sy, dy1 = dy;
534 for (y = 0; y < h; y++, sy1++, dy1++)
535 memcpy(VRAM_MEM_XY(dx, dy1 & 0x1ff), VRAM_MEM_XY(sx, sy1 & 0x1ff), w * 2);
536 }
537
538 renderer_update_caches(dx, dy, w, h, 0);
539}
540
b243416b 541static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
542{
97e07db9 543 int cmd = 0, pos = 0, len, dummy, v;
b243416b 544 int skip = 1;
545
fbb4bfff 546 gpu.frameskip.pending_fill[0] = 0;
547
b243416b 548 while (pos < count && skip) {
549 uint32_t *list = data + pos;
db215a72 550 cmd = LE32TOH(list[0]) >> 24;
b243416b 551 len = 1 + cmd_lengths[cmd];
552
97e07db9 553 switch (cmd) {
554 case 0x02:
db215a72 555 if ((LE32TOH(list[2]) & 0x3ff) > gpu.screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu.screen.h)
97e07db9 556 // clearing something large, don't skip
557 do_cmd_list(list, 3, &dummy);
558 else
559 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
560 break;
561 case 0x24 ... 0x27:
562 case 0x2c ... 0x2f:
563 case 0x34 ... 0x37:
564 case 0x3c ... 0x3f:
565 gpu.ex_regs[1] &= ~0x1ff;
db215a72 566 gpu.ex_regs[1] |= LE32TOH(list[4 + ((cmd >> 4) & 1)]) & 0x1ff;
97e07db9 567 break;
568 case 0x48 ... 0x4F:
569 for (v = 3; pos + v < count; v++)
570 {
db215a72 571 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
97e07db9 572 break;
573 }
574 len += v - 3;
575 break;
576 case 0x58 ... 0x5F:
577 for (v = 4; pos + v < count; v += 2)
578 {
db215a72 579 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
97e07db9 580 break;
581 }
582 len += v - 4;
583 break;
584 default:
585 if (cmd == 0xe3)
db215a72 586 skip = decide_frameskip_allow(LE32TOH(list[0]));
97e07db9 587 if ((cmd & 0xf8) == 0xe0)
db215a72 588 gpu.ex_regs[cmd & 7] = LE32TOH(list[0]);
97e07db9 589 break;
b243416b 590 }
b243416b 591
592 if (pos + len > count) {
593 cmd = -1;
594 break; // incomplete cmd
595 }
36da9c13 596 if (0x80 <= cmd && cmd <= 0xdf)
b243416b 597 break; // image i/o
97e07db9 598
b243416b 599 pos += len;
600 }
601
602 renderer_sync_ecmds(gpu.ex_regs);
603 *last_cmd = cmd;
604 return pos;
605}
606
48f3d210 607static noinline int do_cmd_buffer(uint32_t *data, int count)
d30279e2 608{
b243416b 609 int cmd, pos;
610 uint32_t old_e3 = gpu.ex_regs[3];
fc84f618 611 int vram_dirty = 0;
d30279e2 612
d30279e2 613 // process buffer
b243416b 614 for (pos = 0; pos < count; )
d30279e2 615 {
b243416b 616 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
617 vram_dirty = 1;
d30279e2 618 pos += do_vram_io(data + pos, count - pos, 0);
ddd56f6e 619 if (pos == count)
620 break;
d30279e2
GI
621 }
622
db215a72 623 cmd = LE32TOH(data[pos]) >> 24;
97e07db9 624 if (0xa0 <= cmd && cmd <= 0xdf) {
79573c20
DS
625 if (unlikely((pos+2) >= count)) {
626 // incomplete vram write/read cmd, can't consume yet
627 cmd = -1;
628 break;
629 }
630
d30279e2 631 // consume vram write/read cmd
db215a72 632 start_vram_transfer(LE32TOH(data[pos + 1]), LE32TOH(data[pos + 2]), (cmd & 0xe0) == 0xc0);
b243416b 633 pos += 3;
634 continue;
d30279e2 635 }
36da9c13 636 else if ((cmd & 0xe0) == 0x80) {
637 if (unlikely((pos+3) >= count)) {
638 cmd = -1; // incomplete cmd, can't consume yet
639 break;
640 }
641 do_vram_copy(data + pos + 1);
642 pos += 4;
643 continue;
644 }
b243416b 645
1e07f71d 646 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
db215a72 647 if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0))
b243416b 648 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
649 else {
650 pos += do_cmd_list(data + pos, count - pos, &cmd);
651 vram_dirty = 1;
652 }
653
654 if (cmd == -1)
655 // incomplete cmd
ddd56f6e 656 break;
d30279e2 657 }
ddd56f6e 658
61124a6d
PC
659 gpu.status &= ~0x1fff;
660 gpu.status |= gpu.ex_regs[1] & 0x7ff;
661 gpu.status |= (gpu.ex_regs[6] & 3) << 11;
a3a9f519 662
fc84f618 663 gpu.state.fb_dirty |= vram_dirty;
664
b243416b 665 if (old_e3 != gpu.ex_regs[3])
666 decide_frameskip_allow(gpu.ex_regs[3]);
667
ddd56f6e 668 return count - pos;
d30279e2
GI
669}
670
5440b88e 671static void flush_cmd_buffer(void)
d30279e2 672{
48f3d210 673 int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
d30279e2
GI
674 if (left > 0)
675 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
676 gpu.cmd_len = left;
1ab64c54
GI
677}
678
679void GPUwriteDataMem(uint32_t *mem, int count)
680{
d30279e2
GI
681 int left;
682
56f08d83 683 log_io("gpu_dma_write %p %d\n", mem, count);
684
d30279e2
GI
685 if (unlikely(gpu.cmd_len > 0))
686 flush_cmd_buffer();
56f08d83 687
48f3d210 688 left = do_cmd_buffer(mem, count);
d30279e2 689 if (left)
56f08d83 690 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
1ab64c54
GI
691}
692
d30279e2 693void GPUwriteData(uint32_t data)
1ab64c54 694{
56f08d83 695 log_io("gpu_write %08x\n", data);
db215a72 696 gpu.cmd_buffer[gpu.cmd_len++] = HTOLE32(data);
d30279e2
GI
697 if (gpu.cmd_len >= CMD_BUFFER_LEN)
698 flush_cmd_buffer();
1ab64c54
GI
699}
700
fae38d7a 701long GPUdmaChain(uint32_t *rambase, uint32_t start_addr, uint32_t *progress_addr)
1ab64c54 702{
09159d99 703 uint32_t addr, *list, ld_addr = 0;
ddd56f6e 704 int len, left, count;
1c72b1c2 705 long cpu_cycles = 0;
d30279e2 706
8f5f2dd5 707 preload(rambase + (start_addr & 0x1fffff) / 4);
708
d30279e2
GI
709 if (unlikely(gpu.cmd_len > 0))
710 flush_cmd_buffer();
711
56f08d83 712 log_io("gpu_dma_chain\n");
ddd56f6e 713 addr = start_addr & 0xffffff;
09159d99 714 for (count = 0; (addr & 0x800000) == 0; count++)
ddd56f6e 715 {
ddd56f6e 716 list = rambase + (addr & 0x1fffff) / 4;
db215a72
PC
717 len = LE32TOH(list[0]) >> 24;
718 addr = LE32TOH(list[0]) & 0xffffff;
8f5f2dd5 719 preload(rambase + (addr & 0x1fffff) / 4);
720
1c72b1c2 721 cpu_cycles += 10;
722 if (len > 0)
723 cpu_cycles += 5 + len;
deb18d24 724
a4e249a1 725 log_io(".chain %08lx #%d+%d\n",
726 (long)(list - rambase) * 4, len, gpu.cmd_len);
727 if (unlikely(gpu.cmd_len > 0)) {
81ff42e1 728 if (gpu.cmd_len + len > ARRAY_SIZE(gpu.cmd_buffer)) {
729 log_anomaly("cmd_buffer overflow, likely garbage commands\n");
730 gpu.cmd_len = 0;
731 }
a4e249a1 732 memcpy(gpu.cmd_buffer + gpu.cmd_len, list + 1, len * 4);
733 gpu.cmd_len += len;
734 flush_cmd_buffer();
735 continue;
736 }
ddd56f6e 737
56f08d83 738 if (len) {
48f3d210 739 left = do_cmd_buffer(list + 1, len);
a4e249a1 740 if (left) {
741 memcpy(gpu.cmd_buffer, list + 1 + len - left, left * 4);
742 gpu.cmd_len = left;
743 log_anomaly("GPUdmaChain: %d/%d words left\n", left, len);
744 }
56f08d83 745 }
ddd56f6e 746
fae38d7a 747 if (progress_addr) {
748 *progress_addr = addr;
749 break;
750 }
09159d99 751 #define LD_THRESHOLD (8*1024)
752 if (count >= LD_THRESHOLD) {
753 if (count == LD_THRESHOLD) {
754 ld_addr = addr;
755 continue;
756 }
757
758 // loop detection marker
759 // (bit23 set causes DMA error on real machine, so
760 // unlikely to be ever set by the game)
db215a72 761 list[0] |= HTOLE32(0x800000);
09159d99 762 }
ddd56f6e 763 }
764
09159d99 765 if (ld_addr != 0) {
766 // remove loop detection markers
767 count -= LD_THRESHOLD + 2;
768 addr = ld_addr & 0x1fffff;
769 while (count-- > 0) {
770 list = rambase + addr / 4;
db215a72
PC
771 addr = LE32TOH(list[0]) & 0x1fffff;
772 list[0] &= HTOLE32(~0x800000);
09159d99 773 }
d30279e2 774 }
09159d99 775
3ece2f0c 776 gpu.state.last_list.frame = *gpu.state.frame_count;
deb18d24 777 gpu.state.last_list.hcnt = *gpu.state.hcnt;
1c72b1c2 778 gpu.state.last_list.cycles = cpu_cycles;
deb18d24 779 gpu.state.last_list.addr = start_addr;
780
1c72b1c2 781 return cpu_cycles;
1ab64c54
GI
782}
783
d30279e2
GI
784void GPUreadDataMem(uint32_t *mem, int count)
785{
56f08d83 786 log_io("gpu_dma_read %p %d\n", mem, count);
787
d30279e2
GI
788 if (unlikely(gpu.cmd_len > 0))
789 flush_cmd_buffer();
56f08d83 790
d30279e2
GI
791 if (gpu.dma.h)
792 do_vram_io(mem, count, 1);
793}
794
795uint32_t GPUreadData(void)
796{
9e146206 797 uint32_t ret;
56f08d83 798
799 if (unlikely(gpu.cmd_len > 0))
800 flush_cmd_buffer();
801
9e146206 802 ret = gpu.gp0;
495d603c
PC
803 if (gpu.dma.h) {
804 ret = HTOLE32(ret);
9e146206 805 do_vram_io(&ret, 1, 1);
495d603c
PC
806 ret = LE32TOH(ret);
807 }
56f08d83 808
9e146206 809 log_io("gpu_read %08x\n", ret);
810 return ret;
d30279e2
GI
811}
812
813uint32_t GPUreadStatus(void)
814{
ddd56f6e 815 uint32_t ret;
56f08d83 816
d30279e2
GI
817 if (unlikely(gpu.cmd_len > 0))
818 flush_cmd_buffer();
819
61124a6d 820 ret = gpu.status;
ddd56f6e 821 log_io("gpu_read_status %08x\n", ret);
822 return ret;
d30279e2
GI
823}
824
096ec49b 825struct GPUFreeze
1ab64c54
GI
826{
827 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
828 uint32_t ulStatus; // current gpu status
829 uint32_t ulControl[256]; // latest control register values
830 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
096ec49b 831};
1ab64c54 832
096ec49b 833long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
1ab64c54 834{
fc84f618 835 int i;
836
1ab64c54
GI
837 switch (type) {
838 case 1: // save
d30279e2
GI
839 if (gpu.cmd_len > 0)
840 flush_cmd_buffer();
c765eb86
JW
841
842 renderer_sync();
9ee0fd5b 843 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
1ab64c54 844 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
6e9bdaef 845 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
61124a6d 846 freeze->ulStatus = gpu.status;
1ab64c54
GI
847 break;
848 case 0: // load
c765eb86 849 renderer_sync();
9ee0fd5b 850 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
1ab64c54 851 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
6e9bdaef 852 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
61124a6d 853 gpu.status = freeze->ulStatus;
3d47ef17 854 gpu.cmd_len = 0;
fc84f618 855 for (i = 8; i > 0; i--) {
856 gpu.regs[i] ^= 1; // avoid reg change detection
857 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
858 }
5b745e5b 859 renderer_sync_ecmds(gpu.ex_regs);
3b7b0065 860 renderer_update_caches(0, 0, 1024, 512, 1);
1ab64c54
GI
861 break;
862 }
863
864 return 1;
865}
866
5440b88e 867void GPUupdateLace(void)
868{
869 if (gpu.cmd_len > 0)
870 flush_cmd_buffer();
871 renderer_flush_queues();
872
7a20a6d0 873#ifndef RAW_FB_DISPLAY
61124a6d 874 if (gpu.status & PSX_GPU_STATUS_BLANKING) {
aafcb4dd 875 if (!gpu.state.blanked) {
876 vout_blank();
877 gpu.state.blanked = 1;
878 gpu.state.fb_dirty = 1;
879 }
880 return;
881 }
882
c765eb86
JW
883 renderer_notify_update_lace(0);
884
aafcb4dd 885 if (!gpu.state.fb_dirty)
5440b88e 886 return;
7a20a6d0 887#endif
5440b88e 888
889 if (gpu.frameskip.set) {
890 if (!gpu.frameskip.frame_ready) {
891 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
892 return;
893 gpu.frameskip.active = 0;
894 }
895 gpu.frameskip.frame_ready = 0;
896 }
897
898 vout_update();
3b7b0065 899 if (gpu.state.enhancement_active && !gpu.state.enhancement_was_active)
900 renderer_update_caches(0, 0, 1024, 512, 1);
901 gpu.state.enhancement_was_active = gpu.state.enhancement_active;
5440b88e 902 gpu.state.fb_dirty = 0;
aafcb4dd 903 gpu.state.blanked = 0;
c765eb86 904 renderer_notify_update_lace(1);
5440b88e 905}
906
72e5023f 907void GPUvBlank(int is_vblank, int lcf)
908{
5440b88e 909 int interlace = gpu.state.allow_interlace
61124a6d
PC
910 && (gpu.status & PSX_GPU_STATUS_INTERLACE)
911 && (gpu.status & PSX_GPU_STATUS_DHEIGHT);
5440b88e 912 // interlace doesn't look nice on progressive displays,
913 // so we have this "auto" mode here for games that don't read vram
914 if (gpu.state.allow_interlace == 2
915 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
916 {
917 interlace = 0;
918 }
919 if (interlace || interlace != gpu.state.old_interlace) {
920 gpu.state.old_interlace = interlace;
921
922 if (gpu.cmd_len > 0)
923 flush_cmd_buffer();
924 renderer_flush_queues();
925 renderer_set_interlace(interlace, !lcf);
926 }
927}
928
80bc1426 929void GPUgetScreenInfo(int *y, int *base_hres)
930{
931 *y = gpu.screen.y;
932 *base_hres = gpu.screen.vres;
933 if (gpu.status & PSX_GPU_STATUS_DHEIGHT)
934 *base_hres >>= 1;
935}
936
5440b88e 937#include "../../frontend/plugin_lib.h"
938
939void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
940{
941 gpu.frameskip.set = cbs->frameskip;
942 gpu.frameskip.advice = &cbs->fskip_advice;
5eaa13f1 943 gpu.frameskip.force = &cbs->fskip_force;
5bbe183f 944 gpu.frameskip.dirty = (void *)&cbs->fskip_dirty;
5440b88e 945 gpu.frameskip.active = 0;
946 gpu.frameskip.frame_ready = 1;
947 gpu.state.hcnt = cbs->gpu_hcnt;
948 gpu.state.frame_count = cbs->gpu_frame_count;
949 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
0b02eb77 950 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
5bbe183f 951 if (gpu.state.screen_centering_type != cbs->screen_centering_type
952 || gpu.state.screen_centering_x != cbs->screen_centering_x
953 || gpu.state.screen_centering_y != cbs->screen_centering_y) {
954 gpu.state.screen_centering_type = cbs->screen_centering_type;
955 gpu.state.screen_centering_x = cbs->screen_centering_x;
956 gpu.state.screen_centering_y = cbs->screen_centering_y;
957 update_width();
958 update_height();
959 }
5440b88e 960
9ee0fd5b 961 gpu.mmap = cbs->mmap;
962 gpu.munmap = cbs->munmap;
963
964 // delayed vram mmap
965 if (gpu.vram == NULL)
966 map_vram();
967
5440b88e 968 if (cbs->pl_vout_set_raw_vram)
969 cbs->pl_vout_set_raw_vram(gpu.vram);
970 renderer_set_config(cbs);
971 vout_set_config(cbs);
72e5023f 972}
973
1ab64c54 974// vim:shiftwidth=2:expandtab