setup spu r8 handlers
[pcsx_rearmed.git] / plugins / gpulib / gpu.c
CommitLineData
1ab64c54 1/*
05740673 2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
1ab64c54
GI
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
d30279e2 11#include <stdio.h>
5bbe183f 12#include <stdlib.h>
1ab64c54 13#include <string.h>
12367ad0 14#include <stdlib.h> /* for calloc */
15
56f08d83 16#include "gpu.h"
1ab64c54
GI
17
18#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
8f5f2dd5 19#ifdef __GNUC__
d30279e2 20#define unlikely(x) __builtin_expect((x), 0)
8f5f2dd5 21#define preload __builtin_prefetch
8dd855cd 22#define noinline __attribute__((noinline))
8f5f2dd5 23#else
24#define unlikely(x)
25#define preload(...)
26#define noinline
8f5f2dd5 27#endif
1ab64c54 28
deb18d24 29//#define log_io gpu_log
56f08d83 30#define log_io(...)
56f08d83 31
9ee0fd5b 32struct psx_gpu gpu;
1ab64c54 33
48f3d210 34static noinline int do_cmd_buffer(uint32_t *data, int count);
05740673 35static void finish_vram_transfer(int is_read);
48f3d210 36
37static noinline void do_cmd_reset(void)
38{
c765eb86
JW
39 renderer_sync();
40
48f3d210 41 if (unlikely(gpu.cmd_len > 0))
42 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
48f3d210 43 gpu.cmd_len = 0;
05740673 44
45 if (unlikely(gpu.dma.h > 0))
46 finish_vram_transfer(gpu.dma_start.is_read);
48f3d210 47 gpu.dma.h = 0;
48}
49
6e9bdaef 50static noinline void do_reset(void)
1ab64c54 51{
7841712d 52 unsigned int i;
5b568098 53
48f3d210 54 do_cmd_reset();
55
6e9bdaef 56 memset(gpu.regs, 0, sizeof(gpu.regs));
48f3d210 57 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
58 gpu.ex_regs[i] = (0xe0 + i) << 24;
61124a6d 59 gpu.status = 0x14802000;
6e9bdaef 60 gpu.gp0 = 0;
fc84f618 61 gpu.regs[3] = 1;
6e9bdaef 62 gpu.screen.hres = gpu.screen.w = 256;
fc84f618 63 gpu.screen.vres = gpu.screen.h = 240;
5bbe183f 64 gpu.screen.x = gpu.screen.y = 0;
01ff3105 65 renderer_sync_ecmds(gpu.ex_regs);
3b7b0065 66 renderer_notify_res_change();
1ab64c54
GI
67}
68
8dd855cd 69static noinline void update_width(void)
70{
5bbe183f 71 static const short hres_all[8] = { 256, 368, 320, 368, 512, 368, 640, 368 };
72 static const uint8_t hdivs[8] = { 10, 7, 8, 7, 5, 7, 4, 7 };
73 uint8_t hdiv = hdivs[(gpu.status >> 16) & 7];
74 int hres = hres_all[(gpu.status >> 16) & 7];
75 int pal = gpu.status & PSX_GPU_STATUS_PAL;
8dd855cd 76 int sw = gpu.screen.x2 - gpu.screen.x1;
5bbe183f 77 int x = 0, x_auto;
78 if (sw <= 0)
79 /* nothing displayed? */;
80 else {
81 int s = pal ? 656 : 608; // or 600? pal is just a guess
82 x = (gpu.screen.x1 - s) / hdiv;
83 x = (x + 1) & ~1; // blitter limitation
84 sw /= hdiv;
85 sw = (sw + 2) & ~3; // according to nocash
86 switch (gpu.state.screen_centering_type) {
87 case 1:
88 break;
89 case 2:
90 x = gpu.state.screen_centering_x;
91 break;
92 default:
93 // correct if slightly miscentered
94 x_auto = (hres - sw) / 2 & ~3;
95 if ((uint32_t)x_auto <= 8u && abs(x) < 24)
96 x = x_auto;
97 }
98 if (x + sw > hres)
99 sw = hres - x;
100 // .x range check is done in vout_update()
101 }
102 // reduce the unpleasant right border that a few games have
103 if (gpu.state.screen_centering_type == 0
104 && x <= 4 && hres - (x + sw) >= 4)
105 hres -= 4;
106 gpu.screen.x = x;
107 gpu.screen.w = sw;
108 gpu.screen.hres = hres;
109 gpu.state.dims_changed = 1;
110 //printf("xx %d %d -> %2d, %d / %d\n",
111 // gpu.screen.x1, gpu.screen.x2, x, sw, hres);
8dd855cd 112}
113
114static noinline void update_height(void)
115{
5bbe183f 116 int pal = gpu.status & PSX_GPU_STATUS_PAL;
117 int dheight = gpu.status & PSX_GPU_STATUS_DHEIGHT;
118 int y = gpu.screen.y1 - (pal ? 39 : 16); // 39 for spyro
8dd855cd 119 int sh = gpu.screen.y2 - gpu.screen.y1;
5bbe183f 120 int center_tol = 16;
121 int vres = 240;
122
123 if (pal && (sh > 240 || gpu.screen.vres == 256))
124 vres = 256;
125 if (dheight)
126 y *= 2, sh *= 2, vres *= 2, center_tol *= 2;
127 if (sh <= 0)
128 /* nothing displayed? */;
129 else {
130 switch (gpu.state.screen_centering_type) {
131 case 1:
132 break;
133 case 2:
134 y = gpu.state.screen_centering_y;
135 break;
136 default:
137 // correct if slightly miscentered
138 if ((uint32_t)(vres - sh) <= 1 && abs(y) <= center_tol)
139 y = 0;
140 }
141 if (y + sh > vres)
142 sh = vres - y;
143 }
144 gpu.screen.y = y;
8dd855cd 145 gpu.screen.h = sh;
5bbe183f 146 gpu.screen.vres = vres;
147 gpu.state.dims_changed = 1;
148 //printf("yy %d %d -> %d, %d / %d\n",
149 // gpu.screen.y1, gpu.screen.y2, y, sh, vres);
8dd855cd 150}
151
fc84f618 152static noinline void decide_frameskip(void)
153{
5eaa13f1
A
154 *gpu.frameskip.dirty = 1;
155
9fe27e25 156 if (gpu.frameskip.active)
157 gpu.frameskip.cnt++;
158 else {
159 gpu.frameskip.cnt = 0;
160 gpu.frameskip.frame_ready = 1;
161 }
fc84f618 162
5eaa13f1
A
163 if (*gpu.frameskip.force)
164 gpu.frameskip.active = 1;
165 else if (!gpu.frameskip.active && *gpu.frameskip.advice)
9fe27e25 166 gpu.frameskip.active = 1;
167 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
fc84f618 168 gpu.frameskip.active = 1;
169 else
170 gpu.frameskip.active = 0;
fbb4bfff 171
172 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
173 int dummy;
174 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy);
175 gpu.frameskip.pending_fill[0] = 0;
176 }
fc84f618 177}
178
b243416b 179static noinline int decide_frameskip_allow(uint32_t cmd_e3)
9fe27e25 180{
181 // no frameskip if it decides to draw to display area,
182 // but not for interlace since it'll most likely always do that
183 uint32_t x = cmd_e3 & 0x3ff;
184 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
61124a6d 185 gpu.frameskip.allow = (gpu.status & PSX_GPU_STATUS_INTERLACE) ||
5bbe183f 186 (uint32_t)(x - gpu.screen.src_x) >= (uint32_t)gpu.screen.w ||
187 (uint32_t)(y - gpu.screen.src_y) >= (uint32_t)gpu.screen.h;
b243416b 188 return gpu.frameskip.allow;
9fe27e25 189}
190
01ff3105 191static void flush_cmd_buffer(void);
192
6e9bdaef 193static noinline void get_gpu_info(uint32_t data)
194{
01ff3105 195 if (unlikely(gpu.cmd_len > 0))
196 flush_cmd_buffer();
6e9bdaef 197 switch (data & 0x0f) {
198 case 0x02:
199 case 0x03:
200 case 0x04:
6e9bdaef 201 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
202 break;
08b33377 203 case 0x05:
204 gpu.gp0 = gpu.ex_regs[5] & 0x3fffff;
6e9bdaef 205 break;
206 case 0x07:
207 gpu.gp0 = 2;
208 break;
209 default:
08b33377 210 // gpu.gp0 unchanged
6e9bdaef 211 break;
212 }
213}
214
9ee0fd5b 215// double, for overdraw guard
12367ad0 216#define VRAM_SIZE ((1024 * 512 * 2 * 2) + 4096)
217
218// Minimum 16-byte VRAM alignment needed by gpu_unai's pixel-skipping
219// renderer/downscaler it uses in high res modes:
220#ifdef GCW_ZERO
221 // On GCW platform (MIPS), align to 8192 bytes (1 TLB entry) to reduce # of
222 // fills. (Will change this value if it ever gets large page support)
223 #define VRAM_ALIGN 8192
224#else
225 #define VRAM_ALIGN 16
226#endif
227
228// vram ptr received from mmap/malloc/alloc (will deallocate using this)
229static uint16_t *vram_ptr_orig = NULL;
9ee0fd5b 230
e34ef5ac 231#ifndef GPULIB_USE_MMAP
232# ifdef __linux__
233# define GPULIB_USE_MMAP 1
234# else
235# define GPULIB_USE_MMAP 0
236# endif
237#endif
9ee0fd5b 238static int map_vram(void)
239{
e34ef5ac 240#if GPULIB_USE_MMAP
12367ad0 241 gpu.vram = vram_ptr_orig = gpu.mmap(VRAM_SIZE + (VRAM_ALIGN-1));
e34ef5ac 242#else
243 gpu.vram = vram_ptr_orig = calloc(VRAM_SIZE + (VRAM_ALIGN-1), 1);
244#endif
245 if (gpu.vram != NULL && gpu.vram != (void *)(intptr_t)-1) {
246 // 4kb guard in front
12367ad0 247 gpu.vram += (4096 / 2);
e34ef5ac 248 // Align
249 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
9ee0fd5b 250 return 0;
251 }
252 else {
253 fprintf(stderr, "could not map vram, expect crashes\n");
254 return -1;
255 }
256}
257
6e9bdaef 258long GPUinit(void)
259{
9394ada5 260 int ret;
261 ret = vout_init();
262 ret |= renderer_init();
263
3b7b0065 264 memset(&gpu.state, 0, sizeof(gpu.state));
265 memset(&gpu.frameskip, 0, sizeof(gpu.frameskip));
266 gpu.zero = 0;
3ece2f0c 267 gpu.state.frame_count = &gpu.zero;
deb18d24 268 gpu.state.hcnt = &gpu.zero;
48f3d210 269 gpu.cmd_len = 0;
9394ada5 270 do_reset();
48f3d210 271
12367ad0 272 /*if (gpu.mmap != NULL) {
9ee0fd5b 273 if (map_vram() != 0)
274 ret = -1;
12367ad0 275 }*/
6e9bdaef 276 return ret;
277}
278
279long GPUshutdown(void)
280{
9ee0fd5b 281 long ret;
282
e929dec5 283 renderer_finish();
9ee0fd5b 284 ret = vout_finish();
12367ad0 285
286 if (vram_ptr_orig != NULL) {
e34ef5ac 287#if GPULIB_USE_MMAP
12367ad0 288 gpu.munmap(vram_ptr_orig, VRAM_SIZE);
289#else
290 free(vram_ptr_orig);
291#endif
9ee0fd5b 292 }
12367ad0 293 vram_ptr_orig = gpu.vram = NULL;
9ee0fd5b 294
295 return ret;
6e9bdaef 296}
297
1ab64c54
GI
298void GPUwriteStatus(uint32_t data)
299{
1ab64c54
GI
300 uint32_t cmd = data >> 24;
301
fc84f618 302 if (cmd < ARRAY_SIZE(gpu.regs)) {
48f3d210 303 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
fc84f618 304 return;
8dd855cd 305 gpu.regs[cmd] = data;
fc84f618 306 }
307
308 gpu.state.fb_dirty = 1;
8dd855cd 309
310 switch (cmd) {
1ab64c54 311 case 0x00:
6e9bdaef 312 do_reset();
1ab64c54 313 break;
48f3d210 314 case 0x01:
315 do_cmd_reset();
316 break;
1ab64c54 317 case 0x03:
5bbe183f 318 if (data & 1) {
61124a6d 319 gpu.status |= PSX_GPU_STATUS_BLANKING;
5bbe183f 320 gpu.state.dims_changed = 1; // for hud clearing
321 }
61124a6d
PC
322 else
323 gpu.status &= ~PSX_GPU_STATUS_BLANKING;
1ab64c54
GI
324 break;
325 case 0x04:
61124a6d
PC
326 gpu.status &= ~PSX_GPU_STATUS_DMA_MASK;
327 gpu.status |= PSX_GPU_STATUS_DMA(data & 3);
1ab64c54
GI
328 break;
329 case 0x05:
5bbe183f 330 gpu.screen.src_x = data & 0x3ff;
331 gpu.screen.src_y = (data >> 10) & 0x1ff;
3b7b0065 332 renderer_notify_scanout_x_change(gpu.screen.src_x, gpu.screen.hres);
9fe27e25 333 if (gpu.frameskip.set) {
334 decide_frameskip_allow(gpu.ex_regs[3]);
335 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
336 decide_frameskip();
337 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
338 }
fb4c6fba 339 }
1ab64c54 340 break;
8dd855cd 341 case 0x06:
342 gpu.screen.x1 = data & 0xfff;
343 gpu.screen.x2 = (data >> 12) & 0xfff;
344 update_width();
345 break;
1ab64c54
GI
346 case 0x07:
347 gpu.screen.y1 = data & 0x3ff;
348 gpu.screen.y2 = (data >> 10) & 0x3ff;
8dd855cd 349 update_height();
1ab64c54
GI
350 break;
351 case 0x08:
61124a6d 352 gpu.status = (gpu.status & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
8dd855cd 353 update_width();
354 update_height();
e929dec5 355 renderer_notify_res_change();
1ab64c54 356 break;
deb18d24 357 default:
358 if ((cmd & 0xf0) == 0x10)
359 get_gpu_info(data);
6e9bdaef 360 break;
1ab64c54 361 }
7890a708 362
363#ifdef GPUwriteStatus_ext
364 GPUwriteStatus_ext(data);
365#endif
1ab64c54
GI
366}
367
56f08d83 368const unsigned char cmd_lengths[256] =
1ab64c54 369{
d30279e2
GI
370 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
371 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
372 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
373 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
652c6b8b 374 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
375 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
376 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
d30279e2 377 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
36da9c13 378 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 80
379 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
380 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // a0
381 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
382 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // c0
383 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
d30279e2
GI
384 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
385 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
386};
387
d30279e2
GI
388#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
389
36da9c13 390static void cpy_msb(uint16_t *dst, const uint16_t *src, int l, uint16_t msb)
391{
392 int i;
393 for (i = 0; i < l; i++)
394 dst[i] = src[i] | msb;
395}
396
397static inline void do_vram_line(int x, int y, uint16_t *mem, int l,
398 int is_read, uint16_t msb)
1ab64c54 399{
d30279e2 400 uint16_t *vram = VRAM_MEM_XY(x, y);
36da9c13 401 if (unlikely(is_read))
d30279e2 402 memcpy(mem, vram, l * 2);
36da9c13 403 else if (unlikely(msb))
404 cpy_msb(vram, mem, l, msb);
d30279e2
GI
405 else
406 memcpy(vram, mem, l * 2);
407}
408
409static int do_vram_io(uint32_t *data, int count, int is_read)
410{
411 int count_initial = count;
36da9c13 412 uint16_t msb = gpu.ex_regs[6] << 15;
d30279e2
GI
413 uint16_t *sdata = (uint16_t *)data;
414 int x = gpu.dma.x, y = gpu.dma.y;
415 int w = gpu.dma.w, h = gpu.dma.h;
ddd56f6e 416 int o = gpu.dma.offset;
d30279e2
GI
417 int l;
418 count *= 2; // operate in 16bpp pixels
419
c765eb86
JW
420 renderer_sync();
421
d30279e2
GI
422 if (gpu.dma.offset) {
423 l = w - gpu.dma.offset;
ddd56f6e 424 if (count < l)
d30279e2 425 l = count;
ddd56f6e 426
36da9c13 427 do_vram_line(x + o, y, sdata, l, is_read, msb);
ddd56f6e 428
429 if (o + l < w)
430 o += l;
431 else {
432 o = 0;
433 y++;
434 h--;
435 }
d30279e2
GI
436 sdata += l;
437 count -= l;
d30279e2
GI
438 }
439
440 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
441 y &= 511;
36da9c13 442 do_vram_line(x, y, sdata, w, is_read, msb);
d30279e2
GI
443 }
444
05740673 445 if (h > 0) {
446 if (count > 0) {
447 y &= 511;
36da9c13 448 do_vram_line(x, y, sdata, count, is_read, msb);
05740673 449 o = count;
450 count = 0;
451 }
d30279e2 452 }
05740673 453 else
454 finish_vram_transfer(is_read);
d30279e2
GI
455 gpu.dma.y = y;
456 gpu.dma.h = h;
ddd56f6e 457 gpu.dma.offset = o;
d30279e2 458
6e9bdaef 459 return count_initial - count / 2;
d30279e2
GI
460}
461
462static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
463{
ddd56f6e 464 if (gpu.dma.h)
465 log_anomaly("start_vram_transfer while old unfinished\n");
466
5440b88e 467 gpu.dma.x = pos_word & 0x3ff;
468 gpu.dma.y = (pos_word >> 16) & 0x1ff;
48f3d210 469 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
470 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
d30279e2 471 gpu.dma.offset = 0;
05740673 472 gpu.dma.is_read = is_read;
473 gpu.dma_start = gpu.dma;
d30279e2 474
9e146206 475 renderer_flush_queues();
476 if (is_read) {
61124a6d 477 gpu.status |= PSX_GPU_STATUS_IMG;
9e146206 478 // XXX: wrong for width 1
495d603c 479 gpu.gp0 = LE32TOH(*(uint32_t *) VRAM_MEM_XY(gpu.dma.x, gpu.dma.y));
5440b88e 480 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
9e146206 481 }
d30279e2 482
6e9bdaef 483 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
484 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
d30279e2
GI
485}
486
05740673 487static void finish_vram_transfer(int is_read)
488{
489 if (is_read)
61124a6d 490 gpu.status &= ~PSX_GPU_STATUS_IMG;
b30fba56 491 else {
492 gpu.state.fb_dirty = 1;
05740673 493 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
3b7b0065 494 gpu.dma_start.w, gpu.dma_start.h, 0);
b30fba56 495 }
05740673 496}
497
36da9c13 498static void do_vram_copy(const uint32_t *params)
499{
500 const uint32_t sx = LE32TOH(params[0]) & 0x3FF;
501 const uint32_t sy = (LE32TOH(params[0]) >> 16) & 0x1FF;
502 const uint32_t dx = LE32TOH(params[1]) & 0x3FF;
503 const uint32_t dy = (LE32TOH(params[1]) >> 16) & 0x1FF;
504 uint32_t w = ((LE32TOH(params[2]) - 1) & 0x3FF) + 1;
505 uint32_t h = (((LE32TOH(params[2]) >> 16) - 1) & 0x1FF) + 1;
506 uint16_t msb = gpu.ex_regs[6] << 15;
507 uint16_t lbuf[128];
508 uint32_t x, y;
509
510 if (sx == dx && sy == dy && msb == 0)
511 return;
512
513 renderer_flush_queues();
514
515 if (unlikely((sx < dx && dx < sx + w) || sx + w > 1024 || dx + w > 1024 || msb))
516 {
517 for (y = 0; y < h; y++)
518 {
519 const uint16_t *src = VRAM_MEM_XY(0, (sy + y) & 0x1ff);
520 uint16_t *dst = VRAM_MEM_XY(0, (dy + y) & 0x1ff);
521 for (x = 0; x < w; x += ARRAY_SIZE(lbuf))
522 {
523 uint32_t x1, w1 = w - x;
524 if (w1 > ARRAY_SIZE(lbuf))
525 w1 = ARRAY_SIZE(lbuf);
526 for (x1 = 0; x1 < w1; x1++)
527 lbuf[x1] = src[(sx + x + x1) & 0x3ff];
528 for (x1 = 0; x1 < w1; x1++)
529 dst[(dx + x + x1) & 0x3ff] = lbuf[x1] | msb;
530 }
531 }
532 }
533 else
534 {
535 uint32_t sy1 = sy, dy1 = dy;
536 for (y = 0; y < h; y++, sy1++, dy1++)
537 memcpy(VRAM_MEM_XY(dx, dy1 & 0x1ff), VRAM_MEM_XY(sx, sy1 & 0x1ff), w * 2);
538 }
539
540 renderer_update_caches(dx, dy, w, h, 0);
541}
542
b243416b 543static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
544{
97e07db9 545 int cmd = 0, pos = 0, len, dummy, v;
b243416b 546 int skip = 1;
547
fbb4bfff 548 gpu.frameskip.pending_fill[0] = 0;
549
b243416b 550 while (pos < count && skip) {
551 uint32_t *list = data + pos;
db215a72 552 cmd = LE32TOH(list[0]) >> 24;
b243416b 553 len = 1 + cmd_lengths[cmd];
554
97e07db9 555 switch (cmd) {
556 case 0x02:
db215a72 557 if ((LE32TOH(list[2]) & 0x3ff) > gpu.screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu.screen.h)
97e07db9 558 // clearing something large, don't skip
559 do_cmd_list(list, 3, &dummy);
560 else
561 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
562 break;
563 case 0x24 ... 0x27:
564 case 0x2c ... 0x2f:
565 case 0x34 ... 0x37:
566 case 0x3c ... 0x3f:
567 gpu.ex_regs[1] &= ~0x1ff;
db215a72 568 gpu.ex_regs[1] |= LE32TOH(list[4 + ((cmd >> 4) & 1)]) & 0x1ff;
97e07db9 569 break;
570 case 0x48 ... 0x4F:
571 for (v = 3; pos + v < count; v++)
572 {
db215a72 573 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
97e07db9 574 break;
575 }
576 len += v - 3;
577 break;
578 case 0x58 ... 0x5F:
579 for (v = 4; pos + v < count; v += 2)
580 {
db215a72 581 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
97e07db9 582 break;
583 }
584 len += v - 4;
585 break;
586 default:
587 if (cmd == 0xe3)
db215a72 588 skip = decide_frameskip_allow(LE32TOH(list[0]));
97e07db9 589 if ((cmd & 0xf8) == 0xe0)
db215a72 590 gpu.ex_regs[cmd & 7] = LE32TOH(list[0]);
97e07db9 591 break;
b243416b 592 }
b243416b 593
594 if (pos + len > count) {
595 cmd = -1;
596 break; // incomplete cmd
597 }
36da9c13 598 if (0x80 <= cmd && cmd <= 0xdf)
b243416b 599 break; // image i/o
97e07db9 600
b243416b 601 pos += len;
602 }
603
604 renderer_sync_ecmds(gpu.ex_regs);
605 *last_cmd = cmd;
606 return pos;
607}
608
48f3d210 609static noinline int do_cmd_buffer(uint32_t *data, int count)
d30279e2 610{
b243416b 611 int cmd, pos;
612 uint32_t old_e3 = gpu.ex_regs[3];
fc84f618 613 int vram_dirty = 0;
d30279e2 614
d30279e2 615 // process buffer
b243416b 616 for (pos = 0; pos < count; )
d30279e2 617 {
b243416b 618 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
619 vram_dirty = 1;
d30279e2 620 pos += do_vram_io(data + pos, count - pos, 0);
ddd56f6e 621 if (pos == count)
622 break;
d30279e2
GI
623 }
624
db215a72 625 cmd = LE32TOH(data[pos]) >> 24;
97e07db9 626 if (0xa0 <= cmd && cmd <= 0xdf) {
79573c20
DS
627 if (unlikely((pos+2) >= count)) {
628 // incomplete vram write/read cmd, can't consume yet
629 cmd = -1;
630 break;
631 }
632
d30279e2 633 // consume vram write/read cmd
db215a72 634 start_vram_transfer(LE32TOH(data[pos + 1]), LE32TOH(data[pos + 2]), (cmd & 0xe0) == 0xc0);
b243416b 635 pos += 3;
636 continue;
d30279e2 637 }
36da9c13 638 else if ((cmd & 0xe0) == 0x80) {
639 if (unlikely((pos+3) >= count)) {
640 cmd = -1; // incomplete cmd, can't consume yet
641 break;
642 }
643 do_vram_copy(data + pos + 1);
b30fba56 644 vram_dirty = 1;
36da9c13 645 pos += 4;
646 continue;
647 }
b243416b 648
1e07f71d 649 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
db215a72 650 if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0))
b243416b 651 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
652 else {
653 pos += do_cmd_list(data + pos, count - pos, &cmd);
654 vram_dirty = 1;
655 }
656
657 if (cmd == -1)
658 // incomplete cmd
ddd56f6e 659 break;
d30279e2 660 }
ddd56f6e 661
61124a6d
PC
662 gpu.status &= ~0x1fff;
663 gpu.status |= gpu.ex_regs[1] & 0x7ff;
664 gpu.status |= (gpu.ex_regs[6] & 3) << 11;
a3a9f519 665
fc84f618 666 gpu.state.fb_dirty |= vram_dirty;
667
b243416b 668 if (old_e3 != gpu.ex_regs[3])
669 decide_frameskip_allow(gpu.ex_regs[3]);
670
ddd56f6e 671 return count - pos;
d30279e2
GI
672}
673
5440b88e 674static void flush_cmd_buffer(void)
d30279e2 675{
48f3d210 676 int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
d30279e2
GI
677 if (left > 0)
678 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
679 gpu.cmd_len = left;
1ab64c54
GI
680}
681
682void GPUwriteDataMem(uint32_t *mem, int count)
683{
d30279e2
GI
684 int left;
685
56f08d83 686 log_io("gpu_dma_write %p %d\n", mem, count);
687
d30279e2
GI
688 if (unlikely(gpu.cmd_len > 0))
689 flush_cmd_buffer();
56f08d83 690
48f3d210 691 left = do_cmd_buffer(mem, count);
d30279e2 692 if (left)
56f08d83 693 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
1ab64c54
GI
694}
695
d30279e2 696void GPUwriteData(uint32_t data)
1ab64c54 697{
56f08d83 698 log_io("gpu_write %08x\n", data);
db215a72 699 gpu.cmd_buffer[gpu.cmd_len++] = HTOLE32(data);
d30279e2
GI
700 if (gpu.cmd_len >= CMD_BUFFER_LEN)
701 flush_cmd_buffer();
1ab64c54
GI
702}
703
fae38d7a 704long GPUdmaChain(uint32_t *rambase, uint32_t start_addr, uint32_t *progress_addr)
1ab64c54 705{
09159d99 706 uint32_t addr, *list, ld_addr = 0;
ddd56f6e 707 int len, left, count;
1c72b1c2 708 long cpu_cycles = 0;
d30279e2 709
8f5f2dd5 710 preload(rambase + (start_addr & 0x1fffff) / 4);
711
d30279e2
GI
712 if (unlikely(gpu.cmd_len > 0))
713 flush_cmd_buffer();
714
56f08d83 715 log_io("gpu_dma_chain\n");
ddd56f6e 716 addr = start_addr & 0xffffff;
09159d99 717 for (count = 0; (addr & 0x800000) == 0; count++)
ddd56f6e 718 {
ddd56f6e 719 list = rambase + (addr & 0x1fffff) / 4;
db215a72
PC
720 len = LE32TOH(list[0]) >> 24;
721 addr = LE32TOH(list[0]) & 0xffffff;
8f5f2dd5 722 preload(rambase + (addr & 0x1fffff) / 4);
723
1c72b1c2 724 cpu_cycles += 10;
725 if (len > 0)
726 cpu_cycles += 5 + len;
deb18d24 727
a4e249a1 728 log_io(".chain %08lx #%d+%d\n",
729 (long)(list - rambase) * 4, len, gpu.cmd_len);
730 if (unlikely(gpu.cmd_len > 0)) {
81ff42e1 731 if (gpu.cmd_len + len > ARRAY_SIZE(gpu.cmd_buffer)) {
732 log_anomaly("cmd_buffer overflow, likely garbage commands\n");
733 gpu.cmd_len = 0;
734 }
a4e249a1 735 memcpy(gpu.cmd_buffer + gpu.cmd_len, list + 1, len * 4);
736 gpu.cmd_len += len;
737 flush_cmd_buffer();
738 continue;
739 }
ddd56f6e 740
56f08d83 741 if (len) {
48f3d210 742 left = do_cmd_buffer(list + 1, len);
a4e249a1 743 if (left) {
744 memcpy(gpu.cmd_buffer, list + 1 + len - left, left * 4);
745 gpu.cmd_len = left;
746 log_anomaly("GPUdmaChain: %d/%d words left\n", left, len);
747 }
56f08d83 748 }
ddd56f6e 749
fae38d7a 750 if (progress_addr) {
751 *progress_addr = addr;
752 break;
753 }
09159d99 754 #define LD_THRESHOLD (8*1024)
755 if (count >= LD_THRESHOLD) {
756 if (count == LD_THRESHOLD) {
757 ld_addr = addr;
758 continue;
759 }
760
761 // loop detection marker
762 // (bit23 set causes DMA error on real machine, so
763 // unlikely to be ever set by the game)
db215a72 764 list[0] |= HTOLE32(0x800000);
09159d99 765 }
ddd56f6e 766 }
767
09159d99 768 if (ld_addr != 0) {
769 // remove loop detection markers
770 count -= LD_THRESHOLD + 2;
771 addr = ld_addr & 0x1fffff;
772 while (count-- > 0) {
773 list = rambase + addr / 4;
db215a72
PC
774 addr = LE32TOH(list[0]) & 0x1fffff;
775 list[0] &= HTOLE32(~0x800000);
09159d99 776 }
d30279e2 777 }
09159d99 778
3ece2f0c 779 gpu.state.last_list.frame = *gpu.state.frame_count;
deb18d24 780 gpu.state.last_list.hcnt = *gpu.state.hcnt;
1c72b1c2 781 gpu.state.last_list.cycles = cpu_cycles;
deb18d24 782 gpu.state.last_list.addr = start_addr;
783
1c72b1c2 784 return cpu_cycles;
1ab64c54
GI
785}
786
d30279e2
GI
787void GPUreadDataMem(uint32_t *mem, int count)
788{
56f08d83 789 log_io("gpu_dma_read %p %d\n", mem, count);
790
d30279e2
GI
791 if (unlikely(gpu.cmd_len > 0))
792 flush_cmd_buffer();
56f08d83 793
d30279e2
GI
794 if (gpu.dma.h)
795 do_vram_io(mem, count, 1);
796}
797
798uint32_t GPUreadData(void)
799{
9e146206 800 uint32_t ret;
56f08d83 801
802 if (unlikely(gpu.cmd_len > 0))
803 flush_cmd_buffer();
804
9e146206 805 ret = gpu.gp0;
495d603c
PC
806 if (gpu.dma.h) {
807 ret = HTOLE32(ret);
9e146206 808 do_vram_io(&ret, 1, 1);
495d603c
PC
809 ret = LE32TOH(ret);
810 }
56f08d83 811
9e146206 812 log_io("gpu_read %08x\n", ret);
813 return ret;
d30279e2
GI
814}
815
816uint32_t GPUreadStatus(void)
817{
ddd56f6e 818 uint32_t ret;
56f08d83 819
d30279e2
GI
820 if (unlikely(gpu.cmd_len > 0))
821 flush_cmd_buffer();
822
61124a6d 823 ret = gpu.status;
ddd56f6e 824 log_io("gpu_read_status %08x\n", ret);
825 return ret;
d30279e2
GI
826}
827
096ec49b 828struct GPUFreeze
1ab64c54
GI
829{
830 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
831 uint32_t ulStatus; // current gpu status
832 uint32_t ulControl[256]; // latest control register values
833 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
096ec49b 834};
1ab64c54 835
096ec49b 836long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
1ab64c54 837{
fc84f618 838 int i;
839
1ab64c54
GI
840 switch (type) {
841 case 1: // save
d30279e2
GI
842 if (gpu.cmd_len > 0)
843 flush_cmd_buffer();
c765eb86
JW
844
845 renderer_sync();
9ee0fd5b 846 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
1ab64c54 847 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
6e9bdaef 848 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
61124a6d 849 freeze->ulStatus = gpu.status;
1ab64c54
GI
850 break;
851 case 0: // load
c765eb86 852 renderer_sync();
9ee0fd5b 853 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
1ab64c54 854 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
6e9bdaef 855 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
61124a6d 856 gpu.status = freeze->ulStatus;
3d47ef17 857 gpu.cmd_len = 0;
fc84f618 858 for (i = 8; i > 0; i--) {
859 gpu.regs[i] ^= 1; // avoid reg change detection
860 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
861 }
5b745e5b 862 renderer_sync_ecmds(gpu.ex_regs);
3b7b0065 863 renderer_update_caches(0, 0, 1024, 512, 1);
1ab64c54
GI
864 break;
865 }
866
867 return 1;
868}
869
5440b88e 870void GPUupdateLace(void)
871{
872 if (gpu.cmd_len > 0)
873 flush_cmd_buffer();
874 renderer_flush_queues();
875
7a20a6d0 876#ifndef RAW_FB_DISPLAY
61124a6d 877 if (gpu.status & PSX_GPU_STATUS_BLANKING) {
aafcb4dd 878 if (!gpu.state.blanked) {
879 vout_blank();
880 gpu.state.blanked = 1;
881 gpu.state.fb_dirty = 1;
882 }
883 return;
884 }
885
c765eb86
JW
886 renderer_notify_update_lace(0);
887
aafcb4dd 888 if (!gpu.state.fb_dirty)
5440b88e 889 return;
7a20a6d0 890#endif
5440b88e 891
892 if (gpu.frameskip.set) {
893 if (!gpu.frameskip.frame_ready) {
894 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
895 return;
896 gpu.frameskip.active = 0;
897 }
898 gpu.frameskip.frame_ready = 0;
899 }
900
901 vout_update();
3b7b0065 902 if (gpu.state.enhancement_active && !gpu.state.enhancement_was_active)
903 renderer_update_caches(0, 0, 1024, 512, 1);
904 gpu.state.enhancement_was_active = gpu.state.enhancement_active;
5440b88e 905 gpu.state.fb_dirty = 0;
aafcb4dd 906 gpu.state.blanked = 0;
c765eb86 907 renderer_notify_update_lace(1);
5440b88e 908}
909
72e5023f 910void GPUvBlank(int is_vblank, int lcf)
911{
5440b88e 912 int interlace = gpu.state.allow_interlace
61124a6d
PC
913 && (gpu.status & PSX_GPU_STATUS_INTERLACE)
914 && (gpu.status & PSX_GPU_STATUS_DHEIGHT);
5440b88e 915 // interlace doesn't look nice on progressive displays,
916 // so we have this "auto" mode here for games that don't read vram
917 if (gpu.state.allow_interlace == 2
918 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
919 {
920 interlace = 0;
921 }
922 if (interlace || interlace != gpu.state.old_interlace) {
923 gpu.state.old_interlace = interlace;
924
925 if (gpu.cmd_len > 0)
926 flush_cmd_buffer();
927 renderer_flush_queues();
928 renderer_set_interlace(interlace, !lcf);
929 }
930}
931
80bc1426 932void GPUgetScreenInfo(int *y, int *base_hres)
933{
934 *y = gpu.screen.y;
935 *base_hres = gpu.screen.vres;
936 if (gpu.status & PSX_GPU_STATUS_DHEIGHT)
937 *base_hres >>= 1;
938}
939
5440b88e 940#include "../../frontend/plugin_lib.h"
941
942void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
943{
944 gpu.frameskip.set = cbs->frameskip;
945 gpu.frameskip.advice = &cbs->fskip_advice;
5eaa13f1 946 gpu.frameskip.force = &cbs->fskip_force;
5bbe183f 947 gpu.frameskip.dirty = (void *)&cbs->fskip_dirty;
5440b88e 948 gpu.frameskip.active = 0;
949 gpu.frameskip.frame_ready = 1;
950 gpu.state.hcnt = cbs->gpu_hcnt;
951 gpu.state.frame_count = cbs->gpu_frame_count;
952 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
0b02eb77 953 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
5bbe183f 954 if (gpu.state.screen_centering_type != cbs->screen_centering_type
955 || gpu.state.screen_centering_x != cbs->screen_centering_x
956 || gpu.state.screen_centering_y != cbs->screen_centering_y) {
957 gpu.state.screen_centering_type = cbs->screen_centering_type;
958 gpu.state.screen_centering_x = cbs->screen_centering_x;
959 gpu.state.screen_centering_y = cbs->screen_centering_y;
960 update_width();
961 update_height();
962 }
5440b88e 963
9ee0fd5b 964 gpu.mmap = cbs->mmap;
965 gpu.munmap = cbs->munmap;
966
967 // delayed vram mmap
968 if (gpu.vram == NULL)
969 map_vram();
970
5440b88e 971 if (cbs->pl_vout_set_raw_vram)
972 cbs->pl_vout_set_raw_vram(gpu.vram);
973 renderer_set_config(cbs);
974 vout_set_config(cbs);
72e5023f 975}
976
1ab64c54 977// vim:shiftwidth=2:expandtab