gpu: improve timings of clipped sprites
[pcsx_rearmed.git] / plugins / gpulib / gpu.c
CommitLineData
1ab64c54 1/*
05740673 2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
1ab64c54
GI
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
d30279e2 11#include <stdio.h>
308c6e67 12#include <stdlib.h>
1ab64c54 13#include <string.h>
56f08d83 14#include "gpu.h"
90ac6fed 15#include "gpu_timing.h"
abf09485 16#include "../../libpcsxcore/gpu.h" // meh
44e76f8a 17#include "../../frontend/plugin_lib.h"
1ab64c54 18
44e76f8a 19#ifndef ARRAY_SIZE
1ab64c54 20#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
44e76f8a 21#endif
8f5f2dd5 22#ifdef __GNUC__
d30279e2 23#define unlikely(x) __builtin_expect((x), 0)
8f5f2dd5 24#define preload __builtin_prefetch
8dd855cd 25#define noinline __attribute__((noinline))
8f5f2dd5 26#else
27#define unlikely(x)
28#define preload(...)
29#define noinline
8f5f2dd5 30#endif
1ab64c54 31
deb18d24 32//#define log_io gpu_log
56f08d83 33#define log_io(...)
56f08d83 34
9ee0fd5b 35struct psx_gpu gpu;
1ab64c54 36
90ac6fed 37static noinline int do_cmd_buffer(uint32_t *data, int count, int *cpu_cycles);
05740673 38static void finish_vram_transfer(int is_read);
48f3d210 39
40static noinline void do_cmd_reset(void)
41{
90ac6fed 42 int dummy = 0;
48f3d210 43 if (unlikely(gpu.cmd_len > 0))
90ac6fed 44 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len, &dummy);
48f3d210 45 gpu.cmd_len = 0;
05740673 46
47 if (unlikely(gpu.dma.h > 0))
48 finish_vram_transfer(gpu.dma_start.is_read);
48f3d210 49 gpu.dma.h = 0;
50}
51
6e9bdaef 52static noinline void do_reset(void)
1ab64c54 53{
7841712d 54 unsigned int i;
48f3d210 55
56 do_cmd_reset();
57
6e9bdaef 58 memset(gpu.regs, 0, sizeof(gpu.regs));
48f3d210 59 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
60 gpu.ex_regs[i] = (0xe0 + i) << 24;
f23b103c 61 gpu.status = 0x14802000;
6e9bdaef 62 gpu.gp0 = 0;
fc84f618 63 gpu.regs[3] = 1;
6e9bdaef 64 gpu.screen.hres = gpu.screen.w = 256;
fc84f618 65 gpu.screen.vres = gpu.screen.h = 240;
308c6e67 66 gpu.screen.x = gpu.screen.y = 0;
5fe1a2b1 67 renderer_sync_ecmds(gpu.ex_regs);
0b4038f8 68 renderer_notify_res_change();
1ab64c54
GI
69}
70
8dd855cd 71static noinline void update_width(void)
72{
308c6e67 73 static const short hres_all[8] = { 256, 368, 320, 368, 512, 368, 640, 368 };
74 static const uint8_t hdivs[8] = { 10, 7, 8, 7, 5, 7, 4, 7 };
75 uint8_t hdiv = hdivs[(gpu.status >> 16) & 7];
76 int hres = hres_all[(gpu.status >> 16) & 7];
77 int pal = gpu.status & PSX_GPU_STATUS_PAL;
8dd855cd 78 int sw = gpu.screen.x2 - gpu.screen.x1;
9ed80467 79 int type = gpu.state.screen_centering_type;
308c6e67 80 int x = 0, x_auto;
9ed80467 81 if (type == C_AUTO)
82 type = gpu.state.screen_centering_type_default;
308c6e67 83 if (sw <= 0)
84 /* nothing displayed? */;
85 else {
86 int s = pal ? 656 : 608; // or 600? pal is just a guess
87 x = (gpu.screen.x1 - s) / hdiv;
88 x = (x + 1) & ~1; // blitter limitation
89 sw /= hdiv;
90 sw = (sw + 2) & ~3; // according to nocash
9ed80467 91 switch (type) {
44e76f8a 92 case C_INGAME:
308c6e67 93 break;
44e76f8a 94 case C_MANUAL:
308c6e67 95 x = gpu.state.screen_centering_x;
96 break;
97 default:
98 // correct if slightly miscentered
99 x_auto = (hres - sw) / 2 & ~3;
100 if ((uint32_t)x_auto <= 8u && abs(x) < 24)
101 x = x_auto;
102 }
103 if (x + sw > hres)
104 sw = hres - x;
105 // .x range check is done in vout_update()
106 }
107 // reduce the unpleasant right border that a few games have
108 if (gpu.state.screen_centering_type == 0
109 && x <= 4 && hres - (x + sw) >= 4)
110 hres -= 4;
111 gpu.screen.x = x;
112 gpu.screen.w = sw;
113 gpu.screen.hres = hres;
114 gpu.state.dims_changed = 1;
115 //printf("xx %d %d -> %2d, %d / %d\n",
116 // gpu.screen.x1, gpu.screen.x2, x, sw, hres);
8dd855cd 117}
118
119static noinline void update_height(void)
120{
308c6e67 121 int pal = gpu.status & PSX_GPU_STATUS_PAL;
122 int dheight = gpu.status & PSX_GPU_STATUS_DHEIGHT;
123 int y = gpu.screen.y1 - (pal ? 39 : 16); // 39 for spyro
8dd855cd 124 int sh = gpu.screen.y2 - gpu.screen.y1;
308c6e67 125 int center_tol = 16;
126 int vres = 240;
127
128 if (pal && (sh > 240 || gpu.screen.vres == 256))
129 vres = 256;
130 if (dheight)
131 y *= 2, sh *= 2, vres *= 2, center_tol *= 2;
132 if (sh <= 0)
133 /* nothing displayed? */;
134 else {
135 switch (gpu.state.screen_centering_type) {
44e76f8a 136 case C_INGAME:
137 break;
138 case C_BORDERLESS:
139 y = 0;
308c6e67 140 break;
44e76f8a 141 case C_MANUAL:
308c6e67 142 y = gpu.state.screen_centering_y;
143 break;
144 default:
145 // correct if slightly miscentered
146 if ((uint32_t)(vres - sh) <= 1 && abs(y) <= center_tol)
147 y = 0;
148 }
149 if (y + sh > vres)
150 sh = vres - y;
151 }
152 gpu.screen.y = y;
8dd855cd 153 gpu.screen.h = sh;
308c6e67 154 gpu.screen.vres = vres;
155 gpu.state.dims_changed = 1;
156 //printf("yy %d %d -> %d, %d / %d\n",
157 // gpu.screen.y1, gpu.screen.y2, y, sh, vres);
8dd855cd 158}
159
fc84f618 160static noinline void decide_frameskip(void)
161{
9fe27e25 162 if (gpu.frameskip.active)
163 gpu.frameskip.cnt++;
164 else {
165 gpu.frameskip.cnt = 0;
166 gpu.frameskip.frame_ready = 1;
167 }
fc84f618 168
9fe27e25 169 if (!gpu.frameskip.active && *gpu.frameskip.advice)
170 gpu.frameskip.active = 1;
171 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
fc84f618 172 gpu.frameskip.active = 1;
173 else
174 gpu.frameskip.active = 0;
fbb4bfff 175
176 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
90ac6fed 177 int dummy = 0;
178 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy, &dummy);
fbb4bfff 179 gpu.frameskip.pending_fill[0] = 0;
180 }
fc84f618 181}
182
b243416b 183static noinline int decide_frameskip_allow(uint32_t cmd_e3)
9fe27e25 184{
185 // no frameskip if it decides to draw to display area,
186 // but not for interlace since it'll most likely always do that
187 uint32_t x = cmd_e3 & 0x3ff;
188 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
f23b103c 189 gpu.frameskip.allow = (gpu.status & PSX_GPU_STATUS_INTERLACE) ||
308c6e67 190 (uint32_t)(x - gpu.screen.src_x) >= (uint32_t)gpu.screen.w ||
191 (uint32_t)(y - gpu.screen.src_y) >= (uint32_t)gpu.screen.h;
b243416b 192 return gpu.frameskip.allow;
9fe27e25 193}
194
5fe1a2b1 195static void flush_cmd_buffer(void);
196
6e9bdaef 197static noinline void get_gpu_info(uint32_t data)
198{
5fe1a2b1 199 if (unlikely(gpu.cmd_len > 0))
200 flush_cmd_buffer();
6e9bdaef 201 switch (data & 0x0f) {
202 case 0x02:
203 case 0x03:
204 case 0x04:
6e9bdaef 205 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
206 break;
d04b8924 207 case 0x05:
208 gpu.gp0 = gpu.ex_regs[5] & 0x3fffff;
6e9bdaef 209 break;
210 case 0x07:
211 gpu.gp0 = 2;
212 break;
213 default:
d04b8924 214 // gpu.gp0 unchanged
6e9bdaef 215 break;
216 }
217}
218
9ee0fd5b 219// double, for overdraw guard
220#define VRAM_SIZE (1024 * 512 * 2 * 2)
221
222static int map_vram(void)
223{
224 gpu.vram = gpu.mmap(VRAM_SIZE);
225 if (gpu.vram != NULL) {
226 gpu.vram += 4096 / 2;
227 return 0;
228 }
229 else {
230 fprintf(stderr, "could not map vram, expect crashes\n");
231 return -1;
232 }
233}
234
6e9bdaef 235long GPUinit(void)
236{
9394ada5 237 int ret;
238 ret = vout_init();
239 ret |= renderer_init();
240
0b4038f8 241 memset(&gpu.state, 0, sizeof(gpu.state));
242 memset(&gpu.frameskip, 0, sizeof(gpu.frameskip));
243 gpu.zero = 0;
3ece2f0c 244 gpu.state.frame_count = &gpu.zero;
deb18d24 245 gpu.state.hcnt = &gpu.zero;
48f3d210 246 gpu.cmd_len = 0;
9394ada5 247 do_reset();
48f3d210 248
9ee0fd5b 249 if (gpu.mmap != NULL) {
250 if (map_vram() != 0)
251 ret = -1;
252 }
6e9bdaef 253 return ret;
254}
255
256long GPUshutdown(void)
257{
9ee0fd5b 258 long ret;
259
e929dec5 260 renderer_finish();
9ee0fd5b 261 ret = vout_finish();
262 if (gpu.vram != NULL) {
263 gpu.vram -= 4096 / 2;
264 gpu.munmap(gpu.vram, VRAM_SIZE);
265 }
266 gpu.vram = NULL;
267
268 return ret;
6e9bdaef 269}
270
1ab64c54
GI
271void GPUwriteStatus(uint32_t data)
272{
1ab64c54 273 uint32_t cmd = data >> 24;
2da2fc76 274 int src_x, src_y;
1ab64c54 275
fc84f618 276 if (cmd < ARRAY_SIZE(gpu.regs)) {
48f3d210 277 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
fc84f618 278 return;
8dd855cd 279 gpu.regs[cmd] = data;
fc84f618 280 }
281
282 gpu.state.fb_dirty = 1;
8dd855cd 283
284 switch (cmd) {
1ab64c54 285 case 0x00:
6e9bdaef 286 do_reset();
1ab64c54 287 break;
48f3d210 288 case 0x01:
289 do_cmd_reset();
290 break;
1ab64c54 291 case 0x03:
308c6e67 292 if (data & 1) {
f23b103c 293 gpu.status |= PSX_GPU_STATUS_BLANKING;
308c6e67 294 gpu.state.dims_changed = 1; // for hud clearing
295 }
f23b103c
PC
296 else
297 gpu.status &= ~PSX_GPU_STATUS_BLANKING;
1ab64c54
GI
298 break;
299 case 0x04:
f23b103c
PC
300 gpu.status &= ~PSX_GPU_STATUS_DMA_MASK;
301 gpu.status |= PSX_GPU_STATUS_DMA(data & 3);
1ab64c54
GI
302 break;
303 case 0x05:
2da2fc76 304 src_x = data & 0x3ff; src_y = (data >> 10) & 0x1ff;
305 if (src_x != gpu.screen.src_x || src_y != gpu.screen.src_y) {
306 gpu.screen.src_x = src_x;
307 gpu.screen.src_y = src_y;
308 renderer_notify_scanout_change(src_x, src_y);
309 if (gpu.frameskip.set) {
310 decide_frameskip_allow(gpu.ex_regs[3]);
311 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
312 decide_frameskip();
313 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
314 }
9fe27e25 315 }
fb4c6fba 316 }
1ab64c54 317 break;
8dd855cd 318 case 0x06:
319 gpu.screen.x1 = data & 0xfff;
320 gpu.screen.x2 = (data >> 12) & 0xfff;
321 update_width();
322 break;
1ab64c54
GI
323 case 0x07:
324 gpu.screen.y1 = data & 0x3ff;
325 gpu.screen.y2 = (data >> 10) & 0x3ff;
8dd855cd 326 update_height();
1ab64c54
GI
327 break;
328 case 0x08:
f23b103c 329 gpu.status = (gpu.status & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
8dd855cd 330 update_width();
331 update_height();
e929dec5 332 renderer_notify_res_change();
1ab64c54 333 break;
deb18d24 334 default:
335 if ((cmd & 0xf0) == 0x10)
336 get_gpu_info(data);
6e9bdaef 337 break;
1ab64c54 338 }
7890a708 339
340#ifdef GPUwriteStatus_ext
341 GPUwriteStatus_ext(data);
342#endif
1ab64c54
GI
343}
344
56f08d83 345const unsigned char cmd_lengths[256] =
1ab64c54 346{
d30279e2
GI
347 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
348 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
349 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
350 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
652c6b8b 351 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
352 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
353 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
d30279e2 354 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
72583812 355 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 80
356 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
357 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // a0
358 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
359 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // c0
360 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
d30279e2
GI
361 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
362 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
363};
364
d30279e2
GI
365#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
366
72583812 367static void cpy_msb(uint16_t *dst, const uint16_t *src, int l, uint16_t msb)
368{
369 int i;
370 for (i = 0; i < l; i++)
371 dst[i] = src[i] | msb;
372}
373
374static inline void do_vram_line(int x, int y, uint16_t *mem, int l,
375 int is_read, uint16_t msb)
1ab64c54 376{
d30279e2 377 uint16_t *vram = VRAM_MEM_XY(x, y);
72583812 378 if (unlikely(is_read))
d30279e2 379 memcpy(mem, vram, l * 2);
72583812 380 else if (unlikely(msb))
381 cpy_msb(vram, mem, l, msb);
d30279e2
GI
382 else
383 memcpy(vram, mem, l * 2);
384}
385
386static int do_vram_io(uint32_t *data, int count, int is_read)
387{
388 int count_initial = count;
72583812 389 uint16_t msb = gpu.ex_regs[6] << 15;
d30279e2
GI
390 uint16_t *sdata = (uint16_t *)data;
391 int x = gpu.dma.x, y = gpu.dma.y;
392 int w = gpu.dma.w, h = gpu.dma.h;
ddd56f6e 393 int o = gpu.dma.offset;
d30279e2
GI
394 int l;
395 count *= 2; // operate in 16bpp pixels
396
397 if (gpu.dma.offset) {
398 l = w - gpu.dma.offset;
ddd56f6e 399 if (count < l)
d30279e2 400 l = count;
ddd56f6e 401
72583812 402 do_vram_line(x + o, y, sdata, l, is_read, msb);
ddd56f6e 403
404 if (o + l < w)
405 o += l;
406 else {
407 o = 0;
408 y++;
409 h--;
410 }
d30279e2
GI
411 sdata += l;
412 count -= l;
d30279e2
GI
413 }
414
415 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
416 y &= 511;
72583812 417 do_vram_line(x, y, sdata, w, is_read, msb);
d30279e2
GI
418 }
419
05740673 420 if (h > 0) {
421 if (count > 0) {
422 y &= 511;
72583812 423 do_vram_line(x, y, sdata, count, is_read, msb);
05740673 424 o = count;
425 count = 0;
426 }
d30279e2 427 }
05740673 428 else
429 finish_vram_transfer(is_read);
d30279e2
GI
430 gpu.dma.y = y;
431 gpu.dma.h = h;
ddd56f6e 432 gpu.dma.offset = o;
d30279e2 433
6e9bdaef 434 return count_initial - count / 2;
d30279e2
GI
435}
436
437static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
438{
ddd56f6e 439 if (gpu.dma.h)
440 log_anomaly("start_vram_transfer while old unfinished\n");
441
5440b88e 442 gpu.dma.x = pos_word & 0x3ff;
443 gpu.dma.y = (pos_word >> 16) & 0x1ff;
48f3d210 444 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
445 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
d30279e2 446 gpu.dma.offset = 0;
05740673 447 gpu.dma.is_read = is_read;
448 gpu.dma_start = gpu.dma;
d30279e2 449
9e146206 450 renderer_flush_queues();
451 if (is_read) {
f23b103c 452 gpu.status |= PSX_GPU_STATUS_IMG;
9e146206 453 // XXX: wrong for width 1
ae097dfb 454 gpu.gp0 = LE32TOH(*(uint32_t *) VRAM_MEM_XY(gpu.dma.x, gpu.dma.y));
5440b88e 455 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
9e146206 456 }
d30279e2 457
6e9bdaef 458 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
459 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
abf09485 460 if (gpu.gpu_state_change)
461 gpu.gpu_state_change(PGS_VRAM_TRANSFER_START);
d30279e2
GI
462}
463
05740673 464static void finish_vram_transfer(int is_read)
465{
466 if (is_read)
f23b103c 467 gpu.status &= ~PSX_GPU_STATUS_IMG;
893f780e 468 else {
469 gpu.state.fb_dirty = 1;
05740673 470 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
0b4038f8 471 gpu.dma_start.w, gpu.dma_start.h, 0);
893f780e 472 }
abf09485 473 if (gpu.gpu_state_change)
474 gpu.gpu_state_change(PGS_VRAM_TRANSFER_END);
05740673 475}
476
90ac6fed 477static void do_vram_copy(const uint32_t *params, int *cpu_cycles)
72583812 478{
479 const uint32_t sx = LE32TOH(params[0]) & 0x3FF;
480 const uint32_t sy = (LE32TOH(params[0]) >> 16) & 0x1FF;
481 const uint32_t dx = LE32TOH(params[1]) & 0x3FF;
482 const uint32_t dy = (LE32TOH(params[1]) >> 16) & 0x1FF;
483 uint32_t w = ((LE32TOH(params[2]) - 1) & 0x3FF) + 1;
484 uint32_t h = (((LE32TOH(params[2]) >> 16) - 1) & 0x1FF) + 1;
485 uint16_t msb = gpu.ex_regs[6] << 15;
486 uint16_t lbuf[128];
487 uint32_t x, y;
488
90ac6fed 489 *cpu_cycles += gput_copy(w, h);
72583812 490 if (sx == dx && sy == dy && msb == 0)
491 return;
492
493 renderer_flush_queues();
494
495 if (unlikely((sx < dx && dx < sx + w) || sx + w > 1024 || dx + w > 1024 || msb))
496 {
497 for (y = 0; y < h; y++)
498 {
499 const uint16_t *src = VRAM_MEM_XY(0, (sy + y) & 0x1ff);
500 uint16_t *dst = VRAM_MEM_XY(0, (dy + y) & 0x1ff);
501 for (x = 0; x < w; x += ARRAY_SIZE(lbuf))
502 {
503 uint32_t x1, w1 = w - x;
504 if (w1 > ARRAY_SIZE(lbuf))
505 w1 = ARRAY_SIZE(lbuf);
506 for (x1 = 0; x1 < w1; x1++)
507 lbuf[x1] = src[(sx + x + x1) & 0x3ff];
508 for (x1 = 0; x1 < w1; x1++)
509 dst[(dx + x + x1) & 0x3ff] = lbuf[x1] | msb;
510 }
511 }
512 }
513 else
514 {
515 uint32_t sy1 = sy, dy1 = dy;
516 for (y = 0; y < h; y++, sy1++, dy1++)
517 memcpy(VRAM_MEM_XY(dx, dy1 & 0x1ff), VRAM_MEM_XY(sx, sy1 & 0x1ff), w * 2);
518 }
519
520 renderer_update_caches(dx, dy, w, h, 0);
521}
522
b243416b 523static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
524{
90ac6fed 525 int cmd = 0, pos = 0, len, dummy = 0, v;
b243416b 526 int skip = 1;
527
fbb4bfff 528 gpu.frameskip.pending_fill[0] = 0;
529
b243416b 530 while (pos < count && skip) {
531 uint32_t *list = data + pos;
89df80c6 532 cmd = LE32TOH(list[0]) >> 24;
b243416b 533 len = 1 + cmd_lengths[cmd];
534
97e07db9 535 switch (cmd) {
536 case 0x02:
89df80c6 537 if ((LE32TOH(list[2]) & 0x3ff) > gpu.screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu.screen.h)
97e07db9 538 // clearing something large, don't skip
90ac6fed 539 do_cmd_list(list, 3, &dummy, &dummy);
97e07db9 540 else
541 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
542 break;
543 case 0x24 ... 0x27:
544 case 0x2c ... 0x2f:
545 case 0x34 ... 0x37:
546 case 0x3c ... 0x3f:
547 gpu.ex_regs[1] &= ~0x1ff;
89df80c6 548 gpu.ex_regs[1] |= LE32TOH(list[4 + ((cmd >> 4) & 1)]) & 0x1ff;
97e07db9 549 break;
550 case 0x48 ... 0x4F:
551 for (v = 3; pos + v < count; v++)
552 {
89df80c6 553 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
97e07db9 554 break;
555 }
556 len += v - 3;
557 break;
558 case 0x58 ... 0x5F:
559 for (v = 4; pos + v < count; v += 2)
560 {
89df80c6 561 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
97e07db9 562 break;
563 }
564 len += v - 4;
565 break;
566 default:
567 if (cmd == 0xe3)
89df80c6 568 skip = decide_frameskip_allow(LE32TOH(list[0]));
97e07db9 569 if ((cmd & 0xf8) == 0xe0)
89df80c6 570 gpu.ex_regs[cmd & 7] = LE32TOH(list[0]);
97e07db9 571 break;
b243416b 572 }
b243416b 573
574 if (pos + len > count) {
575 cmd = -1;
576 break; // incomplete cmd
577 }
72583812 578 if (0x80 <= cmd && cmd <= 0xdf)
b243416b 579 break; // image i/o
97e07db9 580
b243416b 581 pos += len;
582 }
583
584 renderer_sync_ecmds(gpu.ex_regs);
585 *last_cmd = cmd;
586 return pos;
587}
588
90ac6fed 589static noinline int do_cmd_buffer(uint32_t *data, int count, int *cpu_cycles)
d30279e2 590{
b243416b 591 int cmd, pos;
592 uint32_t old_e3 = gpu.ex_regs[3];
fc84f618 593 int vram_dirty = 0;
d30279e2 594
d30279e2 595 // process buffer
b243416b 596 for (pos = 0; pos < count; )
d30279e2 597 {
b243416b 598 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
599 vram_dirty = 1;
d30279e2 600 pos += do_vram_io(data + pos, count - pos, 0);
ddd56f6e 601 if (pos == count)
602 break;
d30279e2
GI
603 }
604
89df80c6 605 cmd = LE32TOH(data[pos]) >> 24;
97e07db9 606 if (0xa0 <= cmd && cmd <= 0xdf) {
7194a46a
DS
607 if (unlikely((pos+2) >= count)) {
608 // incomplete vram write/read cmd, can't consume yet
609 cmd = -1;
610 break;
611 }
612
d30279e2 613 // consume vram write/read cmd
89df80c6 614 start_vram_transfer(LE32TOH(data[pos + 1]), LE32TOH(data[pos + 2]), (cmd & 0xe0) == 0xc0);
b243416b 615 pos += 3;
616 continue;
d30279e2 617 }
72583812 618 else if ((cmd & 0xe0) == 0x80) {
619 if (unlikely((pos+3) >= count)) {
620 cmd = -1; // incomplete cmd, can't consume yet
621 break;
622 }
90ac6fed 623 do_vram_copy(data + pos + 1, cpu_cycles);
893f780e 624 vram_dirty = 1;
72583812 625 pos += 4;
626 continue;
627 }
1cec4719 628 else if (cmd == 0x1f) {
629 log_anomaly("irq1?\n");
630 pos++;
631 continue;
632 }
b243416b 633
1e07f71d 634 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
89df80c6 635 if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0))
b243416b 636 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
637 else {
90ac6fed 638 pos += do_cmd_list(data + pos, count - pos, cpu_cycles, &cmd);
b243416b 639 vram_dirty = 1;
640 }
641
642 if (cmd == -1)
643 // incomplete cmd
ddd56f6e 644 break;
d30279e2 645 }
ddd56f6e 646
f23b103c
PC
647 gpu.status &= ~0x1fff;
648 gpu.status |= gpu.ex_regs[1] & 0x7ff;
649 gpu.status |= (gpu.ex_regs[6] & 3) << 11;
a3a9f519 650
fc84f618 651 gpu.state.fb_dirty |= vram_dirty;
652
b243416b 653 if (old_e3 != gpu.ex_regs[3])
654 decide_frameskip_allow(gpu.ex_regs[3]);
655
ddd56f6e 656 return count - pos;
d30279e2
GI
657}
658
abf09485 659static noinline void flush_cmd_buffer(void)
d30279e2 660{
90ac6fed 661 int dummy = 0, left;
662 left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len, &dummy);
d30279e2
GI
663 if (left > 0)
664 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
abf09485 665 if (left != gpu.cmd_len) {
666 if (!gpu.dma.h && gpu.gpu_state_change)
667 gpu.gpu_state_change(PGS_PRIMITIVE_START);
668 gpu.cmd_len = left;
669 }
1ab64c54
GI
670}
671
672void GPUwriteDataMem(uint32_t *mem, int count)
673{
90ac6fed 674 int dummy = 0, left;
d30279e2 675
56f08d83 676 log_io("gpu_dma_write %p %d\n", mem, count);
677
d30279e2
GI
678 if (unlikely(gpu.cmd_len > 0))
679 flush_cmd_buffer();
56f08d83 680
90ac6fed 681 left = do_cmd_buffer(mem, count, &dummy);
d30279e2 682 if (left)
56f08d83 683 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
1ab64c54
GI
684}
685
d30279e2 686void GPUwriteData(uint32_t data)
1ab64c54 687{
56f08d83 688 log_io("gpu_write %08x\n", data);
89df80c6 689 gpu.cmd_buffer[gpu.cmd_len++] = HTOLE32(data);
d30279e2
GI
690 if (gpu.cmd_len >= CMD_BUFFER_LEN)
691 flush_cmd_buffer();
1ab64c54
GI
692}
693
8c84ba5f 694long GPUdmaChain(uint32_t *rambase, uint32_t start_addr, uint32_t *progress_addr)
1ab64c54 695{
09159d99 696 uint32_t addr, *list, ld_addr = 0;
ddd56f6e 697 int len, left, count;
90ac6fed 698 int cpu_cycles = 0;
d30279e2 699
8f5f2dd5 700 preload(rambase + (start_addr & 0x1fffff) / 4);
701
d30279e2
GI
702 if (unlikely(gpu.cmd_len > 0))
703 flush_cmd_buffer();
704
56f08d83 705 log_io("gpu_dma_chain\n");
ddd56f6e 706 addr = start_addr & 0xffffff;
09159d99 707 for (count = 0; (addr & 0x800000) == 0; count++)
ddd56f6e 708 {
ddd56f6e 709 list = rambase + (addr & 0x1fffff) / 4;
89df80c6
PC
710 len = LE32TOH(list[0]) >> 24;
711 addr = LE32TOH(list[0]) & 0xffffff;
8f5f2dd5 712 preload(rambase + (addr & 0x1fffff) / 4);
713
1c72b1c2 714 cpu_cycles += 10;
715 if (len > 0)
716 cpu_cycles += 5 + len;
deb18d24 717
1cec4719 718 log_io(".chain %08lx #%d+%d %u\n",
719 (long)(list - rambase) * 4, len, gpu.cmd_len, cpu_cycles);
ae36bb28 720 if (unlikely(gpu.cmd_len > 0)) {
23948df3 721 if (gpu.cmd_len + len > ARRAY_SIZE(gpu.cmd_buffer)) {
722 log_anomaly("cmd_buffer overflow, likely garbage commands\n");
723 gpu.cmd_len = 0;
724 }
ae36bb28 725 memcpy(gpu.cmd_buffer + gpu.cmd_len, list + 1, len * 4);
726 gpu.cmd_len += len;
727 flush_cmd_buffer();
728 continue;
729 }
ddd56f6e 730
56f08d83 731 if (len) {
90ac6fed 732 left = do_cmd_buffer(list + 1, len, &cpu_cycles);
ae36bb28 733 if (left) {
734 memcpy(gpu.cmd_buffer, list + 1 + len - left, left * 4);
735 gpu.cmd_len = left;
736 log_anomaly("GPUdmaChain: %d/%d words left\n", left, len);
737 }
56f08d83 738 }
ddd56f6e 739
8c84ba5f 740 if (progress_addr) {
741 *progress_addr = addr;
742 break;
743 }
09159d99 744 #define LD_THRESHOLD (8*1024)
745 if (count >= LD_THRESHOLD) {
746 if (count == LD_THRESHOLD) {
747 ld_addr = addr;
748 continue;
749 }
750
751 // loop detection marker
752 // (bit23 set causes DMA error on real machine, so
753 // unlikely to be ever set by the game)
89df80c6 754 list[0] |= HTOLE32(0x800000);
09159d99 755 }
ddd56f6e 756 }
757
09159d99 758 if (ld_addr != 0) {
759 // remove loop detection markers
760 count -= LD_THRESHOLD + 2;
761 addr = ld_addr & 0x1fffff;
762 while (count-- > 0) {
763 list = rambase + addr / 4;
89df80c6
PC
764 addr = LE32TOH(list[0]) & 0x1fffff;
765 list[0] &= HTOLE32(~0x800000);
09159d99 766 }
d30279e2 767 }
09159d99 768
3ece2f0c 769 gpu.state.last_list.frame = *gpu.state.frame_count;
deb18d24 770 gpu.state.last_list.hcnt = *gpu.state.hcnt;
1c72b1c2 771 gpu.state.last_list.cycles = cpu_cycles;
deb18d24 772 gpu.state.last_list.addr = start_addr;
773
1c72b1c2 774 return cpu_cycles;
1ab64c54
GI
775}
776
d30279e2
GI
777void GPUreadDataMem(uint32_t *mem, int count)
778{
56f08d83 779 log_io("gpu_dma_read %p %d\n", mem, count);
780
d30279e2
GI
781 if (unlikely(gpu.cmd_len > 0))
782 flush_cmd_buffer();
56f08d83 783
d30279e2
GI
784 if (gpu.dma.h)
785 do_vram_io(mem, count, 1);
786}
787
788uint32_t GPUreadData(void)
789{
9e146206 790 uint32_t ret;
56f08d83 791
792 if (unlikely(gpu.cmd_len > 0))
793 flush_cmd_buffer();
794
9e146206 795 ret = gpu.gp0;
ae097dfb
PC
796 if (gpu.dma.h) {
797 ret = HTOLE32(ret);
9e146206 798 do_vram_io(&ret, 1, 1);
ae097dfb
PC
799 ret = LE32TOH(ret);
800 }
56f08d83 801
9e146206 802 log_io("gpu_read %08x\n", ret);
803 return ret;
d30279e2
GI
804}
805
806uint32_t GPUreadStatus(void)
807{
ddd56f6e 808 uint32_t ret;
56f08d83 809
d30279e2
GI
810 if (unlikely(gpu.cmd_len > 0))
811 flush_cmd_buffer();
812
f23b103c 813 ret = gpu.status;
ddd56f6e 814 log_io("gpu_read_status %08x\n", ret);
815 return ret;
d30279e2
GI
816}
817
096ec49b 818struct GPUFreeze
1ab64c54
GI
819{
820 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
821 uint32_t ulStatus; // current gpu status
822 uint32_t ulControl[256]; // latest control register values
823 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
096ec49b 824};
1ab64c54 825
096ec49b 826long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
1ab64c54 827{
fc84f618 828 int i;
829
1ab64c54
GI
830 switch (type) {
831 case 1: // save
d30279e2
GI
832 if (gpu.cmd_len > 0)
833 flush_cmd_buffer();
9ee0fd5b 834 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
1ab64c54 835 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
6e9bdaef 836 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
f23b103c 837 freeze->ulStatus = gpu.status;
1ab64c54
GI
838 break;
839 case 0: // load
9ee0fd5b 840 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
1ab64c54 841 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
6e9bdaef 842 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
f23b103c 843 gpu.status = freeze->ulStatus;
3d47ef17 844 gpu.cmd_len = 0;
fc84f618 845 for (i = 8; i > 0; i--) {
846 gpu.regs[i] ^= 1; // avoid reg change detection
847 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
848 }
5b745e5b 849 renderer_sync_ecmds(gpu.ex_regs);
2da2fc76 850 renderer_update_caches(0, 0, 1024, 512, 0);
1ab64c54
GI
851 break;
852 }
853
854 return 1;
855}
856
5440b88e 857void GPUupdateLace(void)
858{
859 if (gpu.cmd_len > 0)
860 flush_cmd_buffer();
861 renderer_flush_queues();
862
cb245e56 863#ifndef RAW_FB_DISPLAY
f23b103c 864 if (gpu.status & PSX_GPU_STATUS_BLANKING) {
aafcb4dd 865 if (!gpu.state.blanked) {
866 vout_blank();
867 gpu.state.blanked = 1;
868 gpu.state.fb_dirty = 1;
869 }
870 return;
871 }
872
873 if (!gpu.state.fb_dirty)
5440b88e 874 return;
cb245e56 875#endif
5440b88e 876
877 if (gpu.frameskip.set) {
878 if (!gpu.frameskip.frame_ready) {
879 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
880 return;
881 gpu.frameskip.active = 0;
882 }
883 gpu.frameskip.frame_ready = 0;
884 }
885
886 vout_update();
0b4038f8 887 if (gpu.state.enhancement_active && !gpu.state.enhancement_was_active)
888 renderer_update_caches(0, 0, 1024, 512, 1);
889 gpu.state.enhancement_was_active = gpu.state.enhancement_active;
5440b88e 890 gpu.state.fb_dirty = 0;
aafcb4dd 891 gpu.state.blanked = 0;
5440b88e 892}
893
72e5023f 894void GPUvBlank(int is_vblank, int lcf)
895{
5440b88e 896 int interlace = gpu.state.allow_interlace
f23b103c
PC
897 && (gpu.status & PSX_GPU_STATUS_INTERLACE)
898 && (gpu.status & PSX_GPU_STATUS_DHEIGHT);
5440b88e 899 // interlace doesn't look nice on progressive displays,
900 // so we have this "auto" mode here for games that don't read vram
901 if (gpu.state.allow_interlace == 2
902 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
903 {
904 interlace = 0;
905 }
906 if (interlace || interlace != gpu.state.old_interlace) {
907 gpu.state.old_interlace = interlace;
908
909 if (gpu.cmd_len > 0)
910 flush_cmd_buffer();
911 renderer_flush_queues();
912 renderer_set_interlace(interlace, !lcf);
913 }
914}
915
ab88daca 916void GPUgetScreenInfo(int *y, int *base_hres)
917{
918 *y = gpu.screen.y;
919 *base_hres = gpu.screen.vres;
920 if (gpu.status & PSX_GPU_STATUS_DHEIGHT)
921 *base_hres >>= 1;
922}
923
5440b88e 924void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
925{
926 gpu.frameskip.set = cbs->frameskip;
927 gpu.frameskip.advice = &cbs->fskip_advice;
928 gpu.frameskip.active = 0;
929 gpu.frameskip.frame_ready = 1;
930 gpu.state.hcnt = cbs->gpu_hcnt;
931 gpu.state.frame_count = cbs->gpu_frame_count;
932 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
0b02eb77 933 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
9ed80467 934 gpu.state.screen_centering_type_default = cbs->screen_centering_type_default;
308c6e67 935 if (gpu.state.screen_centering_type != cbs->screen_centering_type
936 || gpu.state.screen_centering_x != cbs->screen_centering_x
937 || gpu.state.screen_centering_y != cbs->screen_centering_y) {
938 gpu.state.screen_centering_type = cbs->screen_centering_type;
939 gpu.state.screen_centering_x = cbs->screen_centering_x;
940 gpu.state.screen_centering_y = cbs->screen_centering_y;
941 update_width();
942 update_height();
943 }
5440b88e 944
9ee0fd5b 945 gpu.mmap = cbs->mmap;
946 gpu.munmap = cbs->munmap;
abf09485 947 gpu.gpu_state_change = cbs->gpu_state_change;
9ee0fd5b 948
949 // delayed vram mmap
950 if (gpu.vram == NULL)
951 map_vram();
952
5440b88e 953 if (cbs->pl_vout_set_raw_vram)
954 cbs->pl_vout_set_raw_vram(gpu.vram);
955 renderer_set_config(cbs);
956 vout_set_config(cbs);
72e5023f 957}
958
1ab64c54 959// vim:shiftwidth=2:expandtab