gpu: start doing some basic gpu timing
[pcsx_rearmed.git] / plugins / gpulib / gpu.c
CommitLineData
1ab64c54 1/*
05740673 2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
1ab64c54
GI
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
d30279e2 11#include <stdio.h>
308c6e67 12#include <stdlib.h>
1ab64c54 13#include <string.h>
56f08d83 14#include "gpu.h"
90ac6fed 15#include "gpu_timing.h"
abf09485 16#include "../../libpcsxcore/gpu.h" // meh
44e76f8a 17#include "../../frontend/plugin_lib.h"
1ab64c54 18
44e76f8a 19#ifndef ARRAY_SIZE
1ab64c54 20#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
44e76f8a 21#endif
8f5f2dd5 22#ifdef __GNUC__
d30279e2 23#define unlikely(x) __builtin_expect((x), 0)
8f5f2dd5 24#define preload __builtin_prefetch
8dd855cd 25#define noinline __attribute__((noinline))
8f5f2dd5 26#else
27#define unlikely(x)
28#define preload(...)
29#define noinline
8f5f2dd5 30#endif
1ab64c54 31
deb18d24 32//#define log_io gpu_log
56f08d83 33#define log_io(...)
56f08d83 34
9ee0fd5b 35struct psx_gpu gpu;
1ab64c54 36
90ac6fed 37static noinline int do_cmd_buffer(uint32_t *data, int count, int *cpu_cycles);
05740673 38static void finish_vram_transfer(int is_read);
48f3d210 39
40static noinline void do_cmd_reset(void)
41{
90ac6fed 42 int dummy = 0;
48f3d210 43 if (unlikely(gpu.cmd_len > 0))
90ac6fed 44 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len, &dummy);
48f3d210 45 gpu.cmd_len = 0;
05740673 46
47 if (unlikely(gpu.dma.h > 0))
48 finish_vram_transfer(gpu.dma_start.is_read);
48f3d210 49 gpu.dma.h = 0;
50}
51
6e9bdaef 52static noinline void do_reset(void)
1ab64c54 53{
7841712d 54 unsigned int i;
48f3d210 55
56 do_cmd_reset();
57
6e9bdaef 58 memset(gpu.regs, 0, sizeof(gpu.regs));
48f3d210 59 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
60 gpu.ex_regs[i] = (0xe0 + i) << 24;
f23b103c 61 gpu.status = 0x14802000;
6e9bdaef 62 gpu.gp0 = 0;
fc84f618 63 gpu.regs[3] = 1;
6e9bdaef 64 gpu.screen.hres = gpu.screen.w = 256;
fc84f618 65 gpu.screen.vres = gpu.screen.h = 240;
308c6e67 66 gpu.screen.x = gpu.screen.y = 0;
5fe1a2b1 67 renderer_sync_ecmds(gpu.ex_regs);
0b4038f8 68 renderer_notify_res_change();
1ab64c54
GI
69}
70
8dd855cd 71static noinline void update_width(void)
72{
308c6e67 73 static const short hres_all[8] = { 256, 368, 320, 368, 512, 368, 640, 368 };
74 static const uint8_t hdivs[8] = { 10, 7, 8, 7, 5, 7, 4, 7 };
75 uint8_t hdiv = hdivs[(gpu.status >> 16) & 7];
76 int hres = hres_all[(gpu.status >> 16) & 7];
77 int pal = gpu.status & PSX_GPU_STATUS_PAL;
8dd855cd 78 int sw = gpu.screen.x2 - gpu.screen.x1;
9ed80467 79 int type = gpu.state.screen_centering_type;
308c6e67 80 int x = 0, x_auto;
9ed80467 81 if (type == C_AUTO)
82 type = gpu.state.screen_centering_type_default;
308c6e67 83 if (sw <= 0)
84 /* nothing displayed? */;
85 else {
86 int s = pal ? 656 : 608; // or 600? pal is just a guess
87 x = (gpu.screen.x1 - s) / hdiv;
88 x = (x + 1) & ~1; // blitter limitation
89 sw /= hdiv;
90 sw = (sw + 2) & ~3; // according to nocash
9ed80467 91 switch (type) {
44e76f8a 92 case C_INGAME:
308c6e67 93 break;
44e76f8a 94 case C_MANUAL:
308c6e67 95 x = gpu.state.screen_centering_x;
96 break;
97 default:
98 // correct if slightly miscentered
99 x_auto = (hres - sw) / 2 & ~3;
100 if ((uint32_t)x_auto <= 8u && abs(x) < 24)
101 x = x_auto;
102 }
103 if (x + sw > hres)
104 sw = hres - x;
105 // .x range check is done in vout_update()
106 }
107 // reduce the unpleasant right border that a few games have
108 if (gpu.state.screen_centering_type == 0
109 && x <= 4 && hres - (x + sw) >= 4)
110 hres -= 4;
111 gpu.screen.x = x;
112 gpu.screen.w = sw;
113 gpu.screen.hres = hres;
114 gpu.state.dims_changed = 1;
115 //printf("xx %d %d -> %2d, %d / %d\n",
116 // gpu.screen.x1, gpu.screen.x2, x, sw, hres);
8dd855cd 117}
118
119static noinline void update_height(void)
120{
308c6e67 121 int pal = gpu.status & PSX_GPU_STATUS_PAL;
122 int dheight = gpu.status & PSX_GPU_STATUS_DHEIGHT;
123 int y = gpu.screen.y1 - (pal ? 39 : 16); // 39 for spyro
8dd855cd 124 int sh = gpu.screen.y2 - gpu.screen.y1;
308c6e67 125 int center_tol = 16;
126 int vres = 240;
127
128 if (pal && (sh > 240 || gpu.screen.vres == 256))
129 vres = 256;
130 if (dheight)
131 y *= 2, sh *= 2, vres *= 2, center_tol *= 2;
132 if (sh <= 0)
133 /* nothing displayed? */;
134 else {
135 switch (gpu.state.screen_centering_type) {
44e76f8a 136 case C_INGAME:
137 break;
138 case C_BORDERLESS:
139 y = 0;
308c6e67 140 break;
44e76f8a 141 case C_MANUAL:
308c6e67 142 y = gpu.state.screen_centering_y;
143 break;
144 default:
145 // correct if slightly miscentered
146 if ((uint32_t)(vres - sh) <= 1 && abs(y) <= center_tol)
147 y = 0;
148 }
149 if (y + sh > vres)
150 sh = vres - y;
151 }
152 gpu.screen.y = y;
8dd855cd 153 gpu.screen.h = sh;
308c6e67 154 gpu.screen.vres = vres;
155 gpu.state.dims_changed = 1;
156 //printf("yy %d %d -> %d, %d / %d\n",
157 // gpu.screen.y1, gpu.screen.y2, y, sh, vres);
8dd855cd 158}
159
fc84f618 160static noinline void decide_frameskip(void)
161{
9fe27e25 162 if (gpu.frameskip.active)
163 gpu.frameskip.cnt++;
164 else {
165 gpu.frameskip.cnt = 0;
166 gpu.frameskip.frame_ready = 1;
167 }
fc84f618 168
9fe27e25 169 if (!gpu.frameskip.active && *gpu.frameskip.advice)
170 gpu.frameskip.active = 1;
171 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
fc84f618 172 gpu.frameskip.active = 1;
173 else
174 gpu.frameskip.active = 0;
fbb4bfff 175
176 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
90ac6fed 177 int dummy = 0;
178 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy, &dummy);
fbb4bfff 179 gpu.frameskip.pending_fill[0] = 0;
180 }
fc84f618 181}
182
b243416b 183static noinline int decide_frameskip_allow(uint32_t cmd_e3)
9fe27e25 184{
185 // no frameskip if it decides to draw to display area,
186 // but not for interlace since it'll most likely always do that
187 uint32_t x = cmd_e3 & 0x3ff;
188 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
f23b103c 189 gpu.frameskip.allow = (gpu.status & PSX_GPU_STATUS_INTERLACE) ||
308c6e67 190 (uint32_t)(x - gpu.screen.src_x) >= (uint32_t)gpu.screen.w ||
191 (uint32_t)(y - gpu.screen.src_y) >= (uint32_t)gpu.screen.h;
b243416b 192 return gpu.frameskip.allow;
9fe27e25 193}
194
5fe1a2b1 195static void flush_cmd_buffer(void);
196
6e9bdaef 197static noinline void get_gpu_info(uint32_t data)
198{
5fe1a2b1 199 if (unlikely(gpu.cmd_len > 0))
200 flush_cmd_buffer();
6e9bdaef 201 switch (data & 0x0f) {
202 case 0x02:
203 case 0x03:
204 case 0x04:
6e9bdaef 205 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
206 break;
d04b8924 207 case 0x05:
208 gpu.gp0 = gpu.ex_regs[5] & 0x3fffff;
6e9bdaef 209 break;
210 case 0x07:
211 gpu.gp0 = 2;
212 break;
213 default:
d04b8924 214 // gpu.gp0 unchanged
6e9bdaef 215 break;
216 }
217}
218
9ee0fd5b 219// double, for overdraw guard
220#define VRAM_SIZE (1024 * 512 * 2 * 2)
221
222static int map_vram(void)
223{
224 gpu.vram = gpu.mmap(VRAM_SIZE);
225 if (gpu.vram != NULL) {
226 gpu.vram += 4096 / 2;
227 return 0;
228 }
229 else {
230 fprintf(stderr, "could not map vram, expect crashes\n");
231 return -1;
232 }
233}
234
6e9bdaef 235long GPUinit(void)
236{
9394ada5 237 int ret;
238 ret = vout_init();
239 ret |= renderer_init();
240
0b4038f8 241 memset(&gpu.state, 0, sizeof(gpu.state));
242 memset(&gpu.frameskip, 0, sizeof(gpu.frameskip));
243 gpu.zero = 0;
3ece2f0c 244 gpu.state.frame_count = &gpu.zero;
deb18d24 245 gpu.state.hcnt = &gpu.zero;
48f3d210 246 gpu.cmd_len = 0;
9394ada5 247 do_reset();
48f3d210 248
9ee0fd5b 249 if (gpu.mmap != NULL) {
250 if (map_vram() != 0)
251 ret = -1;
252 }
6e9bdaef 253 return ret;
254}
255
256long GPUshutdown(void)
257{
9ee0fd5b 258 long ret;
259
e929dec5 260 renderer_finish();
9ee0fd5b 261 ret = vout_finish();
262 if (gpu.vram != NULL) {
263 gpu.vram -= 4096 / 2;
264 gpu.munmap(gpu.vram, VRAM_SIZE);
265 }
266 gpu.vram = NULL;
267
268 return ret;
6e9bdaef 269}
270
1ab64c54
GI
271void GPUwriteStatus(uint32_t data)
272{
1ab64c54 273 uint32_t cmd = data >> 24;
2da2fc76 274 int src_x, src_y;
1ab64c54 275
fc84f618 276 if (cmd < ARRAY_SIZE(gpu.regs)) {
48f3d210 277 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
fc84f618 278 return;
8dd855cd 279 gpu.regs[cmd] = data;
fc84f618 280 }
281
282 gpu.state.fb_dirty = 1;
8dd855cd 283
284 switch (cmd) {
1ab64c54 285 case 0x00:
6e9bdaef 286 do_reset();
1ab64c54 287 break;
48f3d210 288 case 0x01:
289 do_cmd_reset();
290 break;
1ab64c54 291 case 0x03:
308c6e67 292 if (data & 1) {
f23b103c 293 gpu.status |= PSX_GPU_STATUS_BLANKING;
308c6e67 294 gpu.state.dims_changed = 1; // for hud clearing
295 }
f23b103c
PC
296 else
297 gpu.status &= ~PSX_GPU_STATUS_BLANKING;
1ab64c54
GI
298 break;
299 case 0x04:
f23b103c
PC
300 gpu.status &= ~PSX_GPU_STATUS_DMA_MASK;
301 gpu.status |= PSX_GPU_STATUS_DMA(data & 3);
1ab64c54
GI
302 break;
303 case 0x05:
2da2fc76 304 src_x = data & 0x3ff; src_y = (data >> 10) & 0x1ff;
305 if (src_x != gpu.screen.src_x || src_y != gpu.screen.src_y) {
306 gpu.screen.src_x = src_x;
307 gpu.screen.src_y = src_y;
308 renderer_notify_scanout_change(src_x, src_y);
309 if (gpu.frameskip.set) {
310 decide_frameskip_allow(gpu.ex_regs[3]);
311 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
312 decide_frameskip();
313 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
314 }
9fe27e25 315 }
fb4c6fba 316 }
1ab64c54 317 break;
8dd855cd 318 case 0x06:
319 gpu.screen.x1 = data & 0xfff;
320 gpu.screen.x2 = (data >> 12) & 0xfff;
321 update_width();
322 break;
1ab64c54
GI
323 case 0x07:
324 gpu.screen.y1 = data & 0x3ff;
325 gpu.screen.y2 = (data >> 10) & 0x3ff;
8dd855cd 326 update_height();
1ab64c54
GI
327 break;
328 case 0x08:
f23b103c 329 gpu.status = (gpu.status & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
8dd855cd 330 update_width();
331 update_height();
e929dec5 332 renderer_notify_res_change();
1ab64c54 333 break;
deb18d24 334 default:
335 if ((cmd & 0xf0) == 0x10)
336 get_gpu_info(data);
6e9bdaef 337 break;
1ab64c54 338 }
7890a708 339
340#ifdef GPUwriteStatus_ext
341 GPUwriteStatus_ext(data);
342#endif
1ab64c54
GI
343}
344
56f08d83 345const unsigned char cmd_lengths[256] =
1ab64c54 346{
d30279e2
GI
347 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
348 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
349 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
350 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
652c6b8b 351 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
352 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
353 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
d30279e2 354 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
72583812 355 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 80
356 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
357 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // a0
358 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
359 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // c0
360 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
d30279e2
GI
361 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
362 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
363};
364
d30279e2
GI
365#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
366
72583812 367static void cpy_msb(uint16_t *dst, const uint16_t *src, int l, uint16_t msb)
368{
369 int i;
370 for (i = 0; i < l; i++)
371 dst[i] = src[i] | msb;
372}
373
374static inline void do_vram_line(int x, int y, uint16_t *mem, int l,
375 int is_read, uint16_t msb)
1ab64c54 376{
d30279e2 377 uint16_t *vram = VRAM_MEM_XY(x, y);
72583812 378 if (unlikely(is_read))
d30279e2 379 memcpy(mem, vram, l * 2);
72583812 380 else if (unlikely(msb))
381 cpy_msb(vram, mem, l, msb);
d30279e2
GI
382 else
383 memcpy(vram, mem, l * 2);
384}
385
386static int do_vram_io(uint32_t *data, int count, int is_read)
387{
388 int count_initial = count;
72583812 389 uint16_t msb = gpu.ex_regs[6] << 15;
d30279e2
GI
390 uint16_t *sdata = (uint16_t *)data;
391 int x = gpu.dma.x, y = gpu.dma.y;
392 int w = gpu.dma.w, h = gpu.dma.h;
ddd56f6e 393 int o = gpu.dma.offset;
d30279e2
GI
394 int l;
395 count *= 2; // operate in 16bpp pixels
396
397 if (gpu.dma.offset) {
398 l = w - gpu.dma.offset;
ddd56f6e 399 if (count < l)
d30279e2 400 l = count;
ddd56f6e 401
72583812 402 do_vram_line(x + o, y, sdata, l, is_read, msb);
ddd56f6e 403
404 if (o + l < w)
405 o += l;
406 else {
407 o = 0;
408 y++;
409 h--;
410 }
d30279e2
GI
411 sdata += l;
412 count -= l;
d30279e2
GI
413 }
414
415 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
416 y &= 511;
72583812 417 do_vram_line(x, y, sdata, w, is_read, msb);
d30279e2
GI
418 }
419
05740673 420 if (h > 0) {
421 if (count > 0) {
422 y &= 511;
72583812 423 do_vram_line(x, y, sdata, count, is_read, msb);
05740673 424 o = count;
425 count = 0;
426 }
d30279e2 427 }
05740673 428 else
429 finish_vram_transfer(is_read);
d30279e2
GI
430 gpu.dma.y = y;
431 gpu.dma.h = h;
ddd56f6e 432 gpu.dma.offset = o;
d30279e2 433
6e9bdaef 434 return count_initial - count / 2;
d30279e2
GI
435}
436
437static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
438{
ddd56f6e 439 if (gpu.dma.h)
440 log_anomaly("start_vram_transfer while old unfinished\n");
441
5440b88e 442 gpu.dma.x = pos_word & 0x3ff;
443 gpu.dma.y = (pos_word >> 16) & 0x1ff;
48f3d210 444 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
445 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
d30279e2 446 gpu.dma.offset = 0;
05740673 447 gpu.dma.is_read = is_read;
448 gpu.dma_start = gpu.dma;
d30279e2 449
9e146206 450 renderer_flush_queues();
451 if (is_read) {
f23b103c 452 gpu.status |= PSX_GPU_STATUS_IMG;
9e146206 453 // XXX: wrong for width 1
ae097dfb 454 gpu.gp0 = LE32TOH(*(uint32_t *) VRAM_MEM_XY(gpu.dma.x, gpu.dma.y));
5440b88e 455 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
9e146206 456 }
d30279e2 457
6e9bdaef 458 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
459 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
abf09485 460 if (gpu.gpu_state_change)
461 gpu.gpu_state_change(PGS_VRAM_TRANSFER_START);
d30279e2
GI
462}
463
05740673 464static void finish_vram_transfer(int is_read)
465{
466 if (is_read)
f23b103c 467 gpu.status &= ~PSX_GPU_STATUS_IMG;
893f780e 468 else {
469 gpu.state.fb_dirty = 1;
05740673 470 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
0b4038f8 471 gpu.dma_start.w, gpu.dma_start.h, 0);
893f780e 472 }
abf09485 473 if (gpu.gpu_state_change)
474 gpu.gpu_state_change(PGS_VRAM_TRANSFER_END);
05740673 475}
476
90ac6fed 477static void do_vram_copy(const uint32_t *params, int *cpu_cycles)
72583812 478{
479 const uint32_t sx = LE32TOH(params[0]) & 0x3FF;
480 const uint32_t sy = (LE32TOH(params[0]) >> 16) & 0x1FF;
481 const uint32_t dx = LE32TOH(params[1]) & 0x3FF;
482 const uint32_t dy = (LE32TOH(params[1]) >> 16) & 0x1FF;
483 uint32_t w = ((LE32TOH(params[2]) - 1) & 0x3FF) + 1;
484 uint32_t h = (((LE32TOH(params[2]) >> 16) - 1) & 0x1FF) + 1;
485 uint16_t msb = gpu.ex_regs[6] << 15;
486 uint16_t lbuf[128];
487 uint32_t x, y;
488
90ac6fed 489 *cpu_cycles += gput_copy(w, h);
72583812 490 if (sx == dx && sy == dy && msb == 0)
491 return;
492
493 renderer_flush_queues();
494
495 if (unlikely((sx < dx && dx < sx + w) || sx + w > 1024 || dx + w > 1024 || msb))
496 {
497 for (y = 0; y < h; y++)
498 {
499 const uint16_t *src = VRAM_MEM_XY(0, (sy + y) & 0x1ff);
500 uint16_t *dst = VRAM_MEM_XY(0, (dy + y) & 0x1ff);
501 for (x = 0; x < w; x += ARRAY_SIZE(lbuf))
502 {
503 uint32_t x1, w1 = w - x;
504 if (w1 > ARRAY_SIZE(lbuf))
505 w1 = ARRAY_SIZE(lbuf);
506 for (x1 = 0; x1 < w1; x1++)
507 lbuf[x1] = src[(sx + x + x1) & 0x3ff];
508 for (x1 = 0; x1 < w1; x1++)
509 dst[(dx + x + x1) & 0x3ff] = lbuf[x1] | msb;
510 }
511 }
512 }
513 else
514 {
515 uint32_t sy1 = sy, dy1 = dy;
516 for (y = 0; y < h; y++, sy1++, dy1++)
517 memcpy(VRAM_MEM_XY(dx, dy1 & 0x1ff), VRAM_MEM_XY(sx, sy1 & 0x1ff), w * 2);
518 }
519
520 renderer_update_caches(dx, dy, w, h, 0);
521}
522
b243416b 523static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
524{
90ac6fed 525 int cmd = 0, pos = 0, len, dummy = 0, v;
b243416b 526 int skip = 1;
527
fbb4bfff 528 gpu.frameskip.pending_fill[0] = 0;
529
b243416b 530 while (pos < count && skip) {
531 uint32_t *list = data + pos;
89df80c6 532 cmd = LE32TOH(list[0]) >> 24;
b243416b 533 len = 1 + cmd_lengths[cmd];
534
97e07db9 535 switch (cmd) {
536 case 0x02:
89df80c6 537 if ((LE32TOH(list[2]) & 0x3ff) > gpu.screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu.screen.h)
97e07db9 538 // clearing something large, don't skip
90ac6fed 539 do_cmd_list(list, 3, &dummy, &dummy);
97e07db9 540 else
541 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
542 break;
543 case 0x24 ... 0x27:
544 case 0x2c ... 0x2f:
545 case 0x34 ... 0x37:
546 case 0x3c ... 0x3f:
547 gpu.ex_regs[1] &= ~0x1ff;
89df80c6 548 gpu.ex_regs[1] |= LE32TOH(list[4 + ((cmd >> 4) & 1)]) & 0x1ff;
97e07db9 549 break;
550 case 0x48 ... 0x4F:
551 for (v = 3; pos + v < count; v++)
552 {
89df80c6 553 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
97e07db9 554 break;
555 }
556 len += v - 3;
557 break;
558 case 0x58 ... 0x5F:
559 for (v = 4; pos + v < count; v += 2)
560 {
89df80c6 561 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
97e07db9 562 break;
563 }
564 len += v - 4;
565 break;
566 default:
567 if (cmd == 0xe3)
89df80c6 568 skip = decide_frameskip_allow(LE32TOH(list[0]));
97e07db9 569 if ((cmd & 0xf8) == 0xe0)
89df80c6 570 gpu.ex_regs[cmd & 7] = LE32TOH(list[0]);
97e07db9 571 break;
b243416b 572 }
b243416b 573
574 if (pos + len > count) {
575 cmd = -1;
576 break; // incomplete cmd
577 }
72583812 578 if (0x80 <= cmd && cmd <= 0xdf)
b243416b 579 break; // image i/o
97e07db9 580
b243416b 581 pos += len;
582 }
583
584 renderer_sync_ecmds(gpu.ex_regs);
585 *last_cmd = cmd;
586 return pos;
587}
588
90ac6fed 589static noinline int do_cmd_buffer(uint32_t *data, int count, int *cpu_cycles)
d30279e2 590{
b243416b 591 int cmd, pos;
592 uint32_t old_e3 = gpu.ex_regs[3];
fc84f618 593 int vram_dirty = 0;
d30279e2 594
d30279e2 595 // process buffer
b243416b 596 for (pos = 0; pos < count; )
d30279e2 597 {
b243416b 598 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
599 vram_dirty = 1;
d30279e2 600 pos += do_vram_io(data + pos, count - pos, 0);
ddd56f6e 601 if (pos == count)
602 break;
d30279e2
GI
603 }
604
89df80c6 605 cmd = LE32TOH(data[pos]) >> 24;
97e07db9 606 if (0xa0 <= cmd && cmd <= 0xdf) {
7194a46a
DS
607 if (unlikely((pos+2) >= count)) {
608 // incomplete vram write/read cmd, can't consume yet
609 cmd = -1;
610 break;
611 }
612
d30279e2 613 // consume vram write/read cmd
89df80c6 614 start_vram_transfer(LE32TOH(data[pos + 1]), LE32TOH(data[pos + 2]), (cmd & 0xe0) == 0xc0);
b243416b 615 pos += 3;
616 continue;
d30279e2 617 }
72583812 618 else if ((cmd & 0xe0) == 0x80) {
619 if (unlikely((pos+3) >= count)) {
620 cmd = -1; // incomplete cmd, can't consume yet
621 break;
622 }
90ac6fed 623 do_vram_copy(data + pos + 1, cpu_cycles);
893f780e 624 vram_dirty = 1;
72583812 625 pos += 4;
626 continue;
627 }
b243416b 628
1e07f71d 629 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
89df80c6 630 if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0))
b243416b 631 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
632 else {
90ac6fed 633 pos += do_cmd_list(data + pos, count - pos, cpu_cycles, &cmd);
b243416b 634 vram_dirty = 1;
635 }
636
637 if (cmd == -1)
638 // incomplete cmd
ddd56f6e 639 break;
d30279e2 640 }
ddd56f6e 641
f23b103c
PC
642 gpu.status &= ~0x1fff;
643 gpu.status |= gpu.ex_regs[1] & 0x7ff;
644 gpu.status |= (gpu.ex_regs[6] & 3) << 11;
a3a9f519 645
fc84f618 646 gpu.state.fb_dirty |= vram_dirty;
647
b243416b 648 if (old_e3 != gpu.ex_regs[3])
649 decide_frameskip_allow(gpu.ex_regs[3]);
650
ddd56f6e 651 return count - pos;
d30279e2
GI
652}
653
abf09485 654static noinline void flush_cmd_buffer(void)
d30279e2 655{
90ac6fed 656 int dummy = 0, left;
657 left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len, &dummy);
d30279e2
GI
658 if (left > 0)
659 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
abf09485 660 if (left != gpu.cmd_len) {
661 if (!gpu.dma.h && gpu.gpu_state_change)
662 gpu.gpu_state_change(PGS_PRIMITIVE_START);
663 gpu.cmd_len = left;
664 }
1ab64c54
GI
665}
666
667void GPUwriteDataMem(uint32_t *mem, int count)
668{
90ac6fed 669 int dummy = 0, left;
d30279e2 670
56f08d83 671 log_io("gpu_dma_write %p %d\n", mem, count);
672
d30279e2
GI
673 if (unlikely(gpu.cmd_len > 0))
674 flush_cmd_buffer();
56f08d83 675
90ac6fed 676 left = do_cmd_buffer(mem, count, &dummy);
d30279e2 677 if (left)
56f08d83 678 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
1ab64c54
GI
679}
680
d30279e2 681void GPUwriteData(uint32_t data)
1ab64c54 682{
56f08d83 683 log_io("gpu_write %08x\n", data);
89df80c6 684 gpu.cmd_buffer[gpu.cmd_len++] = HTOLE32(data);
d30279e2
GI
685 if (gpu.cmd_len >= CMD_BUFFER_LEN)
686 flush_cmd_buffer();
1ab64c54
GI
687}
688
8c84ba5f 689long GPUdmaChain(uint32_t *rambase, uint32_t start_addr, uint32_t *progress_addr)
1ab64c54 690{
09159d99 691 uint32_t addr, *list, ld_addr = 0;
ddd56f6e 692 int len, left, count;
90ac6fed 693 int cpu_cycles = 0;
d30279e2 694
8f5f2dd5 695 preload(rambase + (start_addr & 0x1fffff) / 4);
696
d30279e2
GI
697 if (unlikely(gpu.cmd_len > 0))
698 flush_cmd_buffer();
699
56f08d83 700 log_io("gpu_dma_chain\n");
ddd56f6e 701 addr = start_addr & 0xffffff;
09159d99 702 for (count = 0; (addr & 0x800000) == 0; count++)
ddd56f6e 703 {
ddd56f6e 704 list = rambase + (addr & 0x1fffff) / 4;
89df80c6
PC
705 len = LE32TOH(list[0]) >> 24;
706 addr = LE32TOH(list[0]) & 0xffffff;
8f5f2dd5 707 preload(rambase + (addr & 0x1fffff) / 4);
708
1c72b1c2 709 cpu_cycles += 10;
710 if (len > 0)
711 cpu_cycles += 5 + len;
deb18d24 712
ae36bb28 713 log_io(".chain %08lx #%d+%d\n",
714 (long)(list - rambase) * 4, len, gpu.cmd_len);
715 if (unlikely(gpu.cmd_len > 0)) {
23948df3 716 if (gpu.cmd_len + len > ARRAY_SIZE(gpu.cmd_buffer)) {
717 log_anomaly("cmd_buffer overflow, likely garbage commands\n");
718 gpu.cmd_len = 0;
719 }
ae36bb28 720 memcpy(gpu.cmd_buffer + gpu.cmd_len, list + 1, len * 4);
721 gpu.cmd_len += len;
722 flush_cmd_buffer();
723 continue;
724 }
ddd56f6e 725
56f08d83 726 if (len) {
90ac6fed 727 left = do_cmd_buffer(list + 1, len, &cpu_cycles);
ae36bb28 728 if (left) {
729 memcpy(gpu.cmd_buffer, list + 1 + len - left, left * 4);
730 gpu.cmd_len = left;
731 log_anomaly("GPUdmaChain: %d/%d words left\n", left, len);
732 }
56f08d83 733 }
ddd56f6e 734
8c84ba5f 735 if (progress_addr) {
736 *progress_addr = addr;
737 break;
738 }
09159d99 739 #define LD_THRESHOLD (8*1024)
740 if (count >= LD_THRESHOLD) {
741 if (count == LD_THRESHOLD) {
742 ld_addr = addr;
743 continue;
744 }
745
746 // loop detection marker
747 // (bit23 set causes DMA error on real machine, so
748 // unlikely to be ever set by the game)
89df80c6 749 list[0] |= HTOLE32(0x800000);
09159d99 750 }
ddd56f6e 751 }
752
09159d99 753 if (ld_addr != 0) {
754 // remove loop detection markers
755 count -= LD_THRESHOLD + 2;
756 addr = ld_addr & 0x1fffff;
757 while (count-- > 0) {
758 list = rambase + addr / 4;
89df80c6
PC
759 addr = LE32TOH(list[0]) & 0x1fffff;
760 list[0] &= HTOLE32(~0x800000);
09159d99 761 }
d30279e2 762 }
09159d99 763
3ece2f0c 764 gpu.state.last_list.frame = *gpu.state.frame_count;
deb18d24 765 gpu.state.last_list.hcnt = *gpu.state.hcnt;
1c72b1c2 766 gpu.state.last_list.cycles = cpu_cycles;
deb18d24 767 gpu.state.last_list.addr = start_addr;
768
1c72b1c2 769 return cpu_cycles;
1ab64c54
GI
770}
771
d30279e2
GI
772void GPUreadDataMem(uint32_t *mem, int count)
773{
56f08d83 774 log_io("gpu_dma_read %p %d\n", mem, count);
775
d30279e2
GI
776 if (unlikely(gpu.cmd_len > 0))
777 flush_cmd_buffer();
56f08d83 778
d30279e2
GI
779 if (gpu.dma.h)
780 do_vram_io(mem, count, 1);
781}
782
783uint32_t GPUreadData(void)
784{
9e146206 785 uint32_t ret;
56f08d83 786
787 if (unlikely(gpu.cmd_len > 0))
788 flush_cmd_buffer();
789
9e146206 790 ret = gpu.gp0;
ae097dfb
PC
791 if (gpu.dma.h) {
792 ret = HTOLE32(ret);
9e146206 793 do_vram_io(&ret, 1, 1);
ae097dfb
PC
794 ret = LE32TOH(ret);
795 }
56f08d83 796
9e146206 797 log_io("gpu_read %08x\n", ret);
798 return ret;
d30279e2
GI
799}
800
801uint32_t GPUreadStatus(void)
802{
ddd56f6e 803 uint32_t ret;
56f08d83 804
d30279e2
GI
805 if (unlikely(gpu.cmd_len > 0))
806 flush_cmd_buffer();
807
f23b103c 808 ret = gpu.status;
ddd56f6e 809 log_io("gpu_read_status %08x\n", ret);
810 return ret;
d30279e2
GI
811}
812
096ec49b 813struct GPUFreeze
1ab64c54
GI
814{
815 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
816 uint32_t ulStatus; // current gpu status
817 uint32_t ulControl[256]; // latest control register values
818 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
096ec49b 819};
1ab64c54 820
096ec49b 821long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
1ab64c54 822{
fc84f618 823 int i;
824
1ab64c54
GI
825 switch (type) {
826 case 1: // save
d30279e2
GI
827 if (gpu.cmd_len > 0)
828 flush_cmd_buffer();
9ee0fd5b 829 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
1ab64c54 830 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
6e9bdaef 831 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
f23b103c 832 freeze->ulStatus = gpu.status;
1ab64c54
GI
833 break;
834 case 0: // load
9ee0fd5b 835 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
1ab64c54 836 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
6e9bdaef 837 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
f23b103c 838 gpu.status = freeze->ulStatus;
3d47ef17 839 gpu.cmd_len = 0;
fc84f618 840 for (i = 8; i > 0; i--) {
841 gpu.regs[i] ^= 1; // avoid reg change detection
842 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
843 }
5b745e5b 844 renderer_sync_ecmds(gpu.ex_regs);
2da2fc76 845 renderer_update_caches(0, 0, 1024, 512, 0);
1ab64c54
GI
846 break;
847 }
848
849 return 1;
850}
851
5440b88e 852void GPUupdateLace(void)
853{
854 if (gpu.cmd_len > 0)
855 flush_cmd_buffer();
856 renderer_flush_queues();
857
cb245e56 858#ifndef RAW_FB_DISPLAY
f23b103c 859 if (gpu.status & PSX_GPU_STATUS_BLANKING) {
aafcb4dd 860 if (!gpu.state.blanked) {
861 vout_blank();
862 gpu.state.blanked = 1;
863 gpu.state.fb_dirty = 1;
864 }
865 return;
866 }
867
868 if (!gpu.state.fb_dirty)
5440b88e 869 return;
cb245e56 870#endif
5440b88e 871
872 if (gpu.frameskip.set) {
873 if (!gpu.frameskip.frame_ready) {
874 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
875 return;
876 gpu.frameskip.active = 0;
877 }
878 gpu.frameskip.frame_ready = 0;
879 }
880
881 vout_update();
0b4038f8 882 if (gpu.state.enhancement_active && !gpu.state.enhancement_was_active)
883 renderer_update_caches(0, 0, 1024, 512, 1);
884 gpu.state.enhancement_was_active = gpu.state.enhancement_active;
5440b88e 885 gpu.state.fb_dirty = 0;
aafcb4dd 886 gpu.state.blanked = 0;
5440b88e 887}
888
72e5023f 889void GPUvBlank(int is_vblank, int lcf)
890{
5440b88e 891 int interlace = gpu.state.allow_interlace
f23b103c
PC
892 && (gpu.status & PSX_GPU_STATUS_INTERLACE)
893 && (gpu.status & PSX_GPU_STATUS_DHEIGHT);
5440b88e 894 // interlace doesn't look nice on progressive displays,
895 // so we have this "auto" mode here for games that don't read vram
896 if (gpu.state.allow_interlace == 2
897 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
898 {
899 interlace = 0;
900 }
901 if (interlace || interlace != gpu.state.old_interlace) {
902 gpu.state.old_interlace = interlace;
903
904 if (gpu.cmd_len > 0)
905 flush_cmd_buffer();
906 renderer_flush_queues();
907 renderer_set_interlace(interlace, !lcf);
908 }
909}
910
ab88daca 911void GPUgetScreenInfo(int *y, int *base_hres)
912{
913 *y = gpu.screen.y;
914 *base_hres = gpu.screen.vres;
915 if (gpu.status & PSX_GPU_STATUS_DHEIGHT)
916 *base_hres >>= 1;
917}
918
5440b88e 919void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
920{
921 gpu.frameskip.set = cbs->frameskip;
922 gpu.frameskip.advice = &cbs->fskip_advice;
923 gpu.frameskip.active = 0;
924 gpu.frameskip.frame_ready = 1;
925 gpu.state.hcnt = cbs->gpu_hcnt;
926 gpu.state.frame_count = cbs->gpu_frame_count;
927 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
0b02eb77 928 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
9ed80467 929 gpu.state.screen_centering_type_default = cbs->screen_centering_type_default;
308c6e67 930 if (gpu.state.screen_centering_type != cbs->screen_centering_type
931 || gpu.state.screen_centering_x != cbs->screen_centering_x
932 || gpu.state.screen_centering_y != cbs->screen_centering_y) {
933 gpu.state.screen_centering_type = cbs->screen_centering_type;
934 gpu.state.screen_centering_x = cbs->screen_centering_x;
935 gpu.state.screen_centering_y = cbs->screen_centering_y;
936 update_width();
937 update_height();
938 }
5440b88e 939
9ee0fd5b 940 gpu.mmap = cbs->mmap;
941 gpu.munmap = cbs->munmap;
abf09485 942 gpu.gpu_state_change = cbs->gpu_state_change;
9ee0fd5b 943
944 // delayed vram mmap
945 if (gpu.vram == NULL)
946 map_vram();
947
5440b88e 948 if (cbs->pl_vout_set_raw_vram)
949 cbs->pl_vout_set_raw_vram(gpu.vram);
950 renderer_set_config(cbs);
951 vout_set_config(cbs);
72e5023f 952}
953
1ab64c54 954// vim:shiftwidth=2:expandtab