gpulib: maybe better loop detection
[pcsx_rearmed.git] / plugins / gpulib / gpu.c
CommitLineData
1ab64c54 1/*
05740673 2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
1ab64c54
GI
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
d30279e2 11#include <stdio.h>
308c6e67 12#include <stdlib.h>
1ab64c54 13#include <string.h>
56f08d83 14#include "gpu.h"
90ac6fed 15#include "gpu_timing.h"
abf09485 16#include "../../libpcsxcore/gpu.h" // meh
44e76f8a 17#include "../../frontend/plugin_lib.h"
1ab64c54 18
44e76f8a 19#ifndef ARRAY_SIZE
1ab64c54 20#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
44e76f8a 21#endif
8f5f2dd5 22#ifdef __GNUC__
d30279e2 23#define unlikely(x) __builtin_expect((x), 0)
8f5f2dd5 24#define preload __builtin_prefetch
8dd855cd 25#define noinline __attribute__((noinline))
8f5f2dd5 26#else
27#define unlikely(x)
28#define preload(...)
29#define noinline
8f5f2dd5 30#endif
1ab64c54 31
deb18d24 32//#define log_io gpu_log
56f08d83 33#define log_io(...)
56f08d83 34
9ee0fd5b 35struct psx_gpu gpu;
1ab64c54 36
8412166f 37static noinline int do_cmd_buffer(uint32_t *data, int count,
38 int *cycles_sum, int *cycles_last);
05740673 39static void finish_vram_transfer(int is_read);
48f3d210 40
41static noinline void do_cmd_reset(void)
42{
90ac6fed 43 int dummy = 0;
48f3d210 44 if (unlikely(gpu.cmd_len > 0))
8412166f 45 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len, &dummy, &dummy);
48f3d210 46 gpu.cmd_len = 0;
05740673 47
48 if (unlikely(gpu.dma.h > 0))
49 finish_vram_transfer(gpu.dma_start.is_read);
48f3d210 50 gpu.dma.h = 0;
51}
52
6e9bdaef 53static noinline void do_reset(void)
1ab64c54 54{
7841712d 55 unsigned int i;
48f3d210 56
57 do_cmd_reset();
58
6e9bdaef 59 memset(gpu.regs, 0, sizeof(gpu.regs));
48f3d210 60 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
61 gpu.ex_regs[i] = (0xe0 + i) << 24;
f23b103c 62 gpu.status = 0x14802000;
6e9bdaef 63 gpu.gp0 = 0;
fc84f618 64 gpu.regs[3] = 1;
6e9bdaef 65 gpu.screen.hres = gpu.screen.w = 256;
fc84f618 66 gpu.screen.vres = gpu.screen.h = 240;
308c6e67 67 gpu.screen.x = gpu.screen.y = 0;
5fe1a2b1 68 renderer_sync_ecmds(gpu.ex_regs);
0b4038f8 69 renderer_notify_res_change();
1ab64c54
GI
70}
71
8dd855cd 72static noinline void update_width(void)
73{
308c6e67 74 static const short hres_all[8] = { 256, 368, 320, 368, 512, 368, 640, 368 };
75 static const uint8_t hdivs[8] = { 10, 7, 8, 7, 5, 7, 4, 7 };
76 uint8_t hdiv = hdivs[(gpu.status >> 16) & 7];
77 int hres = hres_all[(gpu.status >> 16) & 7];
78 int pal = gpu.status & PSX_GPU_STATUS_PAL;
8dd855cd 79 int sw = gpu.screen.x2 - gpu.screen.x1;
9ed80467 80 int type = gpu.state.screen_centering_type;
308c6e67 81 int x = 0, x_auto;
9ed80467 82 if (type == C_AUTO)
83 type = gpu.state.screen_centering_type_default;
308c6e67 84 if (sw <= 0)
85 /* nothing displayed? */;
86 else {
87 int s = pal ? 656 : 608; // or 600? pal is just a guess
88 x = (gpu.screen.x1 - s) / hdiv;
89 x = (x + 1) & ~1; // blitter limitation
90 sw /= hdiv;
91 sw = (sw + 2) & ~3; // according to nocash
9ed80467 92 switch (type) {
44e76f8a 93 case C_INGAME:
308c6e67 94 break;
44e76f8a 95 case C_MANUAL:
308c6e67 96 x = gpu.state.screen_centering_x;
97 break;
98 default:
99 // correct if slightly miscentered
100 x_auto = (hres - sw) / 2 & ~3;
101 if ((uint32_t)x_auto <= 8u && abs(x) < 24)
102 x = x_auto;
103 }
104 if (x + sw > hres)
105 sw = hres - x;
106 // .x range check is done in vout_update()
107 }
108 // reduce the unpleasant right border that a few games have
109 if (gpu.state.screen_centering_type == 0
110 && x <= 4 && hres - (x + sw) >= 4)
111 hres -= 4;
112 gpu.screen.x = x;
113 gpu.screen.w = sw;
114 gpu.screen.hres = hres;
115 gpu.state.dims_changed = 1;
116 //printf("xx %d %d -> %2d, %d / %d\n",
117 // gpu.screen.x1, gpu.screen.x2, x, sw, hres);
8dd855cd 118}
119
120static noinline void update_height(void)
121{
308c6e67 122 int pal = gpu.status & PSX_GPU_STATUS_PAL;
123 int dheight = gpu.status & PSX_GPU_STATUS_DHEIGHT;
124 int y = gpu.screen.y1 - (pal ? 39 : 16); // 39 for spyro
8dd855cd 125 int sh = gpu.screen.y2 - gpu.screen.y1;
308c6e67 126 int center_tol = 16;
127 int vres = 240;
128
129 if (pal && (sh > 240 || gpu.screen.vres == 256))
130 vres = 256;
131 if (dheight)
132 y *= 2, sh *= 2, vres *= 2, center_tol *= 2;
133 if (sh <= 0)
134 /* nothing displayed? */;
135 else {
136 switch (gpu.state.screen_centering_type) {
44e76f8a 137 case C_INGAME:
138 break;
139 case C_BORDERLESS:
140 y = 0;
308c6e67 141 break;
44e76f8a 142 case C_MANUAL:
308c6e67 143 y = gpu.state.screen_centering_y;
144 break;
145 default:
146 // correct if slightly miscentered
147 if ((uint32_t)(vres - sh) <= 1 && abs(y) <= center_tol)
148 y = 0;
149 }
150 if (y + sh > vres)
151 sh = vres - y;
152 }
153 gpu.screen.y = y;
8dd855cd 154 gpu.screen.h = sh;
308c6e67 155 gpu.screen.vres = vres;
156 gpu.state.dims_changed = 1;
157 //printf("yy %d %d -> %d, %d / %d\n",
158 // gpu.screen.y1, gpu.screen.y2, y, sh, vres);
8dd855cd 159}
160
fc84f618 161static noinline void decide_frameskip(void)
162{
9fe27e25 163 if (gpu.frameskip.active)
164 gpu.frameskip.cnt++;
165 else {
166 gpu.frameskip.cnt = 0;
167 gpu.frameskip.frame_ready = 1;
168 }
fc84f618 169
9fe27e25 170 if (!gpu.frameskip.active && *gpu.frameskip.advice)
171 gpu.frameskip.active = 1;
172 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
fc84f618 173 gpu.frameskip.active = 1;
174 else
175 gpu.frameskip.active = 0;
fbb4bfff 176
177 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
90ac6fed 178 int dummy = 0;
8412166f 179 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy, &dummy, &dummy);
fbb4bfff 180 gpu.frameskip.pending_fill[0] = 0;
181 }
fc84f618 182}
183
b243416b 184static noinline int decide_frameskip_allow(uint32_t cmd_e3)
9fe27e25 185{
186 // no frameskip if it decides to draw to display area,
187 // but not for interlace since it'll most likely always do that
188 uint32_t x = cmd_e3 & 0x3ff;
189 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
f23b103c 190 gpu.frameskip.allow = (gpu.status & PSX_GPU_STATUS_INTERLACE) ||
308c6e67 191 (uint32_t)(x - gpu.screen.src_x) >= (uint32_t)gpu.screen.w ||
192 (uint32_t)(y - gpu.screen.src_y) >= (uint32_t)gpu.screen.h;
b243416b 193 return gpu.frameskip.allow;
9fe27e25 194}
195
5fe1a2b1 196static void flush_cmd_buffer(void);
197
6e9bdaef 198static noinline void get_gpu_info(uint32_t data)
199{
5fe1a2b1 200 if (unlikely(gpu.cmd_len > 0))
201 flush_cmd_buffer();
6e9bdaef 202 switch (data & 0x0f) {
203 case 0x02:
204 case 0x03:
205 case 0x04:
6e9bdaef 206 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
207 break;
d04b8924 208 case 0x05:
209 gpu.gp0 = gpu.ex_regs[5] & 0x3fffff;
6e9bdaef 210 break;
211 case 0x07:
212 gpu.gp0 = 2;
213 break;
214 default:
d04b8924 215 // gpu.gp0 unchanged
6e9bdaef 216 break;
217 }
218}
219
9ee0fd5b 220// double, for overdraw guard
221#define VRAM_SIZE (1024 * 512 * 2 * 2)
222
223static int map_vram(void)
224{
225 gpu.vram = gpu.mmap(VRAM_SIZE);
226 if (gpu.vram != NULL) {
227 gpu.vram += 4096 / 2;
228 return 0;
229 }
230 else {
231 fprintf(stderr, "could not map vram, expect crashes\n");
232 return -1;
233 }
234}
235
6e9bdaef 236long GPUinit(void)
237{
9394ada5 238 int ret;
239 ret = vout_init();
240 ret |= renderer_init();
241
0b4038f8 242 memset(&gpu.state, 0, sizeof(gpu.state));
243 memset(&gpu.frameskip, 0, sizeof(gpu.frameskip));
244 gpu.zero = 0;
3ece2f0c 245 gpu.state.frame_count = &gpu.zero;
deb18d24 246 gpu.state.hcnt = &gpu.zero;
48f3d210 247 gpu.cmd_len = 0;
9394ada5 248 do_reset();
48f3d210 249
9ee0fd5b 250 if (gpu.mmap != NULL) {
251 if (map_vram() != 0)
252 ret = -1;
253 }
6e9bdaef 254 return ret;
255}
256
257long GPUshutdown(void)
258{
9ee0fd5b 259 long ret;
260
e929dec5 261 renderer_finish();
9ee0fd5b 262 ret = vout_finish();
263 if (gpu.vram != NULL) {
264 gpu.vram -= 4096 / 2;
265 gpu.munmap(gpu.vram, VRAM_SIZE);
266 }
267 gpu.vram = NULL;
268
269 return ret;
6e9bdaef 270}
271
1ab64c54
GI
272void GPUwriteStatus(uint32_t data)
273{
1ab64c54 274 uint32_t cmd = data >> 24;
2da2fc76 275 int src_x, src_y;
1ab64c54 276
fc84f618 277 if (cmd < ARRAY_SIZE(gpu.regs)) {
48f3d210 278 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
fc84f618 279 return;
8dd855cd 280 gpu.regs[cmd] = data;
fc84f618 281 }
282
283 gpu.state.fb_dirty = 1;
8dd855cd 284
285 switch (cmd) {
1ab64c54 286 case 0x00:
6e9bdaef 287 do_reset();
1ab64c54 288 break;
48f3d210 289 case 0x01:
290 do_cmd_reset();
291 break;
1ab64c54 292 case 0x03:
308c6e67 293 if (data & 1) {
f23b103c 294 gpu.status |= PSX_GPU_STATUS_BLANKING;
308c6e67 295 gpu.state.dims_changed = 1; // for hud clearing
296 }
f23b103c
PC
297 else
298 gpu.status &= ~PSX_GPU_STATUS_BLANKING;
1ab64c54
GI
299 break;
300 case 0x04:
f23b103c
PC
301 gpu.status &= ~PSX_GPU_STATUS_DMA_MASK;
302 gpu.status |= PSX_GPU_STATUS_DMA(data & 3);
1ab64c54
GI
303 break;
304 case 0x05:
2da2fc76 305 src_x = data & 0x3ff; src_y = (data >> 10) & 0x1ff;
306 if (src_x != gpu.screen.src_x || src_y != gpu.screen.src_y) {
307 gpu.screen.src_x = src_x;
308 gpu.screen.src_y = src_y;
309 renderer_notify_scanout_change(src_x, src_y);
310 if (gpu.frameskip.set) {
311 decide_frameskip_allow(gpu.ex_regs[3]);
312 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
313 decide_frameskip();
314 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
315 }
9fe27e25 316 }
fb4c6fba 317 }
1ab64c54 318 break;
8dd855cd 319 case 0x06:
320 gpu.screen.x1 = data & 0xfff;
321 gpu.screen.x2 = (data >> 12) & 0xfff;
322 update_width();
323 break;
1ab64c54
GI
324 case 0x07:
325 gpu.screen.y1 = data & 0x3ff;
326 gpu.screen.y2 = (data >> 10) & 0x3ff;
8dd855cd 327 update_height();
1ab64c54
GI
328 break;
329 case 0x08:
f23b103c 330 gpu.status = (gpu.status & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
8dd855cd 331 update_width();
332 update_height();
e929dec5 333 renderer_notify_res_change();
1ab64c54 334 break;
deb18d24 335 default:
336 if ((cmd & 0xf0) == 0x10)
337 get_gpu_info(data);
6e9bdaef 338 break;
1ab64c54 339 }
7890a708 340
341#ifdef GPUwriteStatus_ext
342 GPUwriteStatus_ext(data);
343#endif
1ab64c54
GI
344}
345
56f08d83 346const unsigned char cmd_lengths[256] =
1ab64c54 347{
d30279e2
GI
348 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
349 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
350 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
351 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
652c6b8b 352 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
353 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
354 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
d30279e2 355 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
72583812 356 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 80
357 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
358 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // a0
359 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
360 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // c0
361 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
d30279e2
GI
362 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
363 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
364};
365
d30279e2
GI
366#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
367
72583812 368static void cpy_msb(uint16_t *dst, const uint16_t *src, int l, uint16_t msb)
369{
370 int i;
371 for (i = 0; i < l; i++)
372 dst[i] = src[i] | msb;
373}
374
375static inline void do_vram_line(int x, int y, uint16_t *mem, int l,
376 int is_read, uint16_t msb)
1ab64c54 377{
d30279e2 378 uint16_t *vram = VRAM_MEM_XY(x, y);
72583812 379 if (unlikely(is_read))
d30279e2 380 memcpy(mem, vram, l * 2);
72583812 381 else if (unlikely(msb))
382 cpy_msb(vram, mem, l, msb);
d30279e2
GI
383 else
384 memcpy(vram, mem, l * 2);
385}
386
387static int do_vram_io(uint32_t *data, int count, int is_read)
388{
389 int count_initial = count;
72583812 390 uint16_t msb = gpu.ex_regs[6] << 15;
d30279e2
GI
391 uint16_t *sdata = (uint16_t *)data;
392 int x = gpu.dma.x, y = gpu.dma.y;
393 int w = gpu.dma.w, h = gpu.dma.h;
ddd56f6e 394 int o = gpu.dma.offset;
d30279e2
GI
395 int l;
396 count *= 2; // operate in 16bpp pixels
397
398 if (gpu.dma.offset) {
399 l = w - gpu.dma.offset;
ddd56f6e 400 if (count < l)
d30279e2 401 l = count;
ddd56f6e 402
72583812 403 do_vram_line(x + o, y, sdata, l, is_read, msb);
ddd56f6e 404
405 if (o + l < w)
406 o += l;
407 else {
408 o = 0;
409 y++;
410 h--;
411 }
d30279e2
GI
412 sdata += l;
413 count -= l;
d30279e2
GI
414 }
415
416 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
417 y &= 511;
72583812 418 do_vram_line(x, y, sdata, w, is_read, msb);
d30279e2
GI
419 }
420
05740673 421 if (h > 0) {
422 if (count > 0) {
423 y &= 511;
72583812 424 do_vram_line(x, y, sdata, count, is_read, msb);
05740673 425 o = count;
426 count = 0;
427 }
d30279e2 428 }
05740673 429 else
430 finish_vram_transfer(is_read);
d30279e2
GI
431 gpu.dma.y = y;
432 gpu.dma.h = h;
ddd56f6e 433 gpu.dma.offset = o;
d30279e2 434
6e9bdaef 435 return count_initial - count / 2;
d30279e2
GI
436}
437
438static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
439{
ddd56f6e 440 if (gpu.dma.h)
441 log_anomaly("start_vram_transfer while old unfinished\n");
442
5440b88e 443 gpu.dma.x = pos_word & 0x3ff;
444 gpu.dma.y = (pos_word >> 16) & 0x1ff;
48f3d210 445 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
446 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
d30279e2 447 gpu.dma.offset = 0;
05740673 448 gpu.dma.is_read = is_read;
449 gpu.dma_start = gpu.dma;
d30279e2 450
9e146206 451 renderer_flush_queues();
452 if (is_read) {
f23b103c 453 gpu.status |= PSX_GPU_STATUS_IMG;
9e146206 454 // XXX: wrong for width 1
ae097dfb 455 gpu.gp0 = LE32TOH(*(uint32_t *) VRAM_MEM_XY(gpu.dma.x, gpu.dma.y));
5440b88e 456 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
9e146206 457 }
d30279e2 458
6e9bdaef 459 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
460 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
abf09485 461 if (gpu.gpu_state_change)
462 gpu.gpu_state_change(PGS_VRAM_TRANSFER_START);
d30279e2
GI
463}
464
05740673 465static void finish_vram_transfer(int is_read)
466{
467 if (is_read)
f23b103c 468 gpu.status &= ~PSX_GPU_STATUS_IMG;
893f780e 469 else {
470 gpu.state.fb_dirty = 1;
05740673 471 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
0b4038f8 472 gpu.dma_start.w, gpu.dma_start.h, 0);
893f780e 473 }
abf09485 474 if (gpu.gpu_state_change)
475 gpu.gpu_state_change(PGS_VRAM_TRANSFER_END);
05740673 476}
477
90ac6fed 478static void do_vram_copy(const uint32_t *params, int *cpu_cycles)
72583812 479{
480 const uint32_t sx = LE32TOH(params[0]) & 0x3FF;
481 const uint32_t sy = (LE32TOH(params[0]) >> 16) & 0x1FF;
482 const uint32_t dx = LE32TOH(params[1]) & 0x3FF;
483 const uint32_t dy = (LE32TOH(params[1]) >> 16) & 0x1FF;
484 uint32_t w = ((LE32TOH(params[2]) - 1) & 0x3FF) + 1;
485 uint32_t h = (((LE32TOH(params[2]) >> 16) - 1) & 0x1FF) + 1;
486 uint16_t msb = gpu.ex_regs[6] << 15;
487 uint16_t lbuf[128];
488 uint32_t x, y;
489
90ac6fed 490 *cpu_cycles += gput_copy(w, h);
72583812 491 if (sx == dx && sy == dy && msb == 0)
492 return;
493
494 renderer_flush_queues();
495
496 if (unlikely((sx < dx && dx < sx + w) || sx + w > 1024 || dx + w > 1024 || msb))
497 {
498 for (y = 0; y < h; y++)
499 {
500 const uint16_t *src = VRAM_MEM_XY(0, (sy + y) & 0x1ff);
501 uint16_t *dst = VRAM_MEM_XY(0, (dy + y) & 0x1ff);
502 for (x = 0; x < w; x += ARRAY_SIZE(lbuf))
503 {
504 uint32_t x1, w1 = w - x;
505 if (w1 > ARRAY_SIZE(lbuf))
506 w1 = ARRAY_SIZE(lbuf);
507 for (x1 = 0; x1 < w1; x1++)
508 lbuf[x1] = src[(sx + x + x1) & 0x3ff];
509 for (x1 = 0; x1 < w1; x1++)
510 dst[(dx + x + x1) & 0x3ff] = lbuf[x1] | msb;
511 }
512 }
513 }
514 else
515 {
516 uint32_t sy1 = sy, dy1 = dy;
517 for (y = 0; y < h; y++, sy1++, dy1++)
518 memcpy(VRAM_MEM_XY(dx, dy1 & 0x1ff), VRAM_MEM_XY(sx, sy1 & 0x1ff), w * 2);
519 }
520
521 renderer_update_caches(dx, dy, w, h, 0);
522}
523
b243416b 524static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
525{
90ac6fed 526 int cmd = 0, pos = 0, len, dummy = 0, v;
b243416b 527 int skip = 1;
528
fbb4bfff 529 gpu.frameskip.pending_fill[0] = 0;
530
b243416b 531 while (pos < count && skip) {
532 uint32_t *list = data + pos;
89df80c6 533 cmd = LE32TOH(list[0]) >> 24;
b243416b 534 len = 1 + cmd_lengths[cmd];
535
97e07db9 536 switch (cmd) {
537 case 0x02:
89df80c6 538 if ((LE32TOH(list[2]) & 0x3ff) > gpu.screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu.screen.h)
97e07db9 539 // clearing something large, don't skip
8412166f 540 do_cmd_list(list, 3, &dummy, &dummy, &dummy);
97e07db9 541 else
542 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
543 break;
544 case 0x24 ... 0x27:
545 case 0x2c ... 0x2f:
546 case 0x34 ... 0x37:
547 case 0x3c ... 0x3f:
548 gpu.ex_regs[1] &= ~0x1ff;
89df80c6 549 gpu.ex_regs[1] |= LE32TOH(list[4 + ((cmd >> 4) & 1)]) & 0x1ff;
97e07db9 550 break;
551 case 0x48 ... 0x4F:
552 for (v = 3; pos + v < count; v++)
553 {
89df80c6 554 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
97e07db9 555 break;
556 }
557 len += v - 3;
558 break;
559 case 0x58 ... 0x5F:
560 for (v = 4; pos + v < count; v += 2)
561 {
89df80c6 562 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
97e07db9 563 break;
564 }
565 len += v - 4;
566 break;
567 default:
568 if (cmd == 0xe3)
89df80c6 569 skip = decide_frameskip_allow(LE32TOH(list[0]));
97e07db9 570 if ((cmd & 0xf8) == 0xe0)
89df80c6 571 gpu.ex_regs[cmd & 7] = LE32TOH(list[0]);
97e07db9 572 break;
b243416b 573 }
b243416b 574
575 if (pos + len > count) {
576 cmd = -1;
577 break; // incomplete cmd
578 }
72583812 579 if (0x80 <= cmd && cmd <= 0xdf)
b243416b 580 break; // image i/o
97e07db9 581
b243416b 582 pos += len;
583 }
584
585 renderer_sync_ecmds(gpu.ex_regs);
586 *last_cmd = cmd;
587 return pos;
588}
589
8412166f 590static noinline int do_cmd_buffer(uint32_t *data, int count,
591 int *cycles_sum, int *cycles_last)
d30279e2 592{
b243416b 593 int cmd, pos;
594 uint32_t old_e3 = gpu.ex_regs[3];
fc84f618 595 int vram_dirty = 0;
d30279e2 596
d30279e2 597 // process buffer
b243416b 598 for (pos = 0; pos < count; )
d30279e2 599 {
b243416b 600 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
601 vram_dirty = 1;
d30279e2 602 pos += do_vram_io(data + pos, count - pos, 0);
ddd56f6e 603 if (pos == count)
604 break;
d30279e2
GI
605 }
606
89df80c6 607 cmd = LE32TOH(data[pos]) >> 24;
97e07db9 608 if (0xa0 <= cmd && cmd <= 0xdf) {
7194a46a
DS
609 if (unlikely((pos+2) >= count)) {
610 // incomplete vram write/read cmd, can't consume yet
611 cmd = -1;
612 break;
613 }
614
d30279e2 615 // consume vram write/read cmd
89df80c6 616 start_vram_transfer(LE32TOH(data[pos + 1]), LE32TOH(data[pos + 2]), (cmd & 0xe0) == 0xc0);
b243416b 617 pos += 3;
618 continue;
d30279e2 619 }
72583812 620 else if ((cmd & 0xe0) == 0x80) {
621 if (unlikely((pos+3) >= count)) {
622 cmd = -1; // incomplete cmd, can't consume yet
623 break;
624 }
8412166f 625 *cycles_sum += *cycles_last;
626 *cycles_last = 0;
627 do_vram_copy(data + pos + 1, cycles_last);
893f780e 628 vram_dirty = 1;
72583812 629 pos += 4;
630 continue;
631 }
1cec4719 632 else if (cmd == 0x1f) {
633 log_anomaly("irq1?\n");
634 pos++;
635 continue;
636 }
b243416b 637
1e07f71d 638 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
89df80c6 639 if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0))
b243416b 640 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
641 else {
8412166f 642 pos += do_cmd_list(data + pos, count - pos, cycles_sum, cycles_last, &cmd);
b243416b 643 vram_dirty = 1;
644 }
645
646 if (cmd == -1)
647 // incomplete cmd
ddd56f6e 648 break;
d30279e2 649 }
ddd56f6e 650
f23b103c
PC
651 gpu.status &= ~0x1fff;
652 gpu.status |= gpu.ex_regs[1] & 0x7ff;
653 gpu.status |= (gpu.ex_regs[6] & 3) << 11;
a3a9f519 654
fc84f618 655 gpu.state.fb_dirty |= vram_dirty;
656
b243416b 657 if (old_e3 != gpu.ex_regs[3])
658 decide_frameskip_allow(gpu.ex_regs[3]);
659
ddd56f6e 660 return count - pos;
d30279e2
GI
661}
662
abf09485 663static noinline void flush_cmd_buffer(void)
d30279e2 664{
90ac6fed 665 int dummy = 0, left;
8412166f 666 left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len, &dummy, &dummy);
d30279e2
GI
667 if (left > 0)
668 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
abf09485 669 if (left != gpu.cmd_len) {
670 if (!gpu.dma.h && gpu.gpu_state_change)
671 gpu.gpu_state_change(PGS_PRIMITIVE_START);
672 gpu.cmd_len = left;
673 }
1ab64c54
GI
674}
675
676void GPUwriteDataMem(uint32_t *mem, int count)
677{
90ac6fed 678 int dummy = 0, left;
d30279e2 679
56f08d83 680 log_io("gpu_dma_write %p %d\n", mem, count);
681
d30279e2
GI
682 if (unlikely(gpu.cmd_len > 0))
683 flush_cmd_buffer();
56f08d83 684
8412166f 685 left = do_cmd_buffer(mem, count, &dummy, &dummy);
d30279e2 686 if (left)
56f08d83 687 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
1ab64c54
GI
688}
689
d30279e2 690void GPUwriteData(uint32_t data)
1ab64c54 691{
56f08d83 692 log_io("gpu_write %08x\n", data);
89df80c6 693 gpu.cmd_buffer[gpu.cmd_len++] = HTOLE32(data);
d30279e2
GI
694 if (gpu.cmd_len >= CMD_BUFFER_LEN)
695 flush_cmd_buffer();
1ab64c54
GI
696}
697
8412166f 698long GPUdmaChain(uint32_t *rambase, uint32_t start_addr,
699 uint32_t *progress_addr, int32_t *cycles_last_cmd)
1ab64c54 700{
24a7af90 701 uint32_t addr, *list, ld_addr;
702 int len, left, count, ld_count = 32;
8412166f 703 int cpu_cycles_sum = 0;
704 int cpu_cycles_last = 0;
d30279e2 705
8f5f2dd5 706 preload(rambase + (start_addr & 0x1fffff) / 4);
707
d30279e2
GI
708 if (unlikely(gpu.cmd_len > 0))
709 flush_cmd_buffer();
710
56f08d83 711 log_io("gpu_dma_chain\n");
24a7af90 712 addr = ld_addr = start_addr & 0xffffff;
09159d99 713 for (count = 0; (addr & 0x800000) == 0; count++)
ddd56f6e 714 {
ddd56f6e 715 list = rambase + (addr & 0x1fffff) / 4;
89df80c6
PC
716 len = LE32TOH(list[0]) >> 24;
717 addr = LE32TOH(list[0]) & 0xffffff;
8f5f2dd5 718 preload(rambase + (addr & 0x1fffff) / 4);
719
8412166f 720 cpu_cycles_sum += 10;
1c72b1c2 721 if (len > 0)
8412166f 722 cpu_cycles_sum += 5 + len;
deb18d24 723
8412166f 724 log_io(".chain %08lx #%d+%d %u+%u\n",
725 (long)(list - rambase) * 4, len, gpu.cmd_len, cpu_cycles_sum, cpu_cycles_last);
ae36bb28 726 if (unlikely(gpu.cmd_len > 0)) {
23948df3 727 if (gpu.cmd_len + len > ARRAY_SIZE(gpu.cmd_buffer)) {
728 log_anomaly("cmd_buffer overflow, likely garbage commands\n");
729 gpu.cmd_len = 0;
730 }
ae36bb28 731 memcpy(gpu.cmd_buffer + gpu.cmd_len, list + 1, len * 4);
732 gpu.cmd_len += len;
733 flush_cmd_buffer();
734 continue;
735 }
ddd56f6e 736
56f08d83 737 if (len) {
8412166f 738 left = do_cmd_buffer(list + 1, len, &cpu_cycles_sum, &cpu_cycles_last);
ae36bb28 739 if (left) {
740 memcpy(gpu.cmd_buffer, list + 1 + len - left, left * 4);
741 gpu.cmd_len = left;
742 log_anomaly("GPUdmaChain: %d/%d words left\n", left, len);
743 }
56f08d83 744 }
ddd56f6e 745
8c84ba5f 746 if (progress_addr) {
747 *progress_addr = addr;
748 break;
749 }
24a7af90 750 if (addr == ld_addr) {
751 log_anomaly("GPUdmaChain: loop @ %08x, cnt=%u\n", addr, count);
752 break;
09159d99 753 }
24a7af90 754 if (count == ld_count) {
755 ld_addr = addr;
756 ld_count *= 2;
09159d99 757 }
d30279e2 758 }
09159d99 759
8412166f 760 //printf(" -> %d %d\n", cpu_cycles_sum, cpu_cycles_last);
3ece2f0c 761 gpu.state.last_list.frame = *gpu.state.frame_count;
deb18d24 762 gpu.state.last_list.hcnt = *gpu.state.hcnt;
8412166f 763 gpu.state.last_list.cycles = cpu_cycles_sum + cpu_cycles_last;
deb18d24 764 gpu.state.last_list.addr = start_addr;
765
8412166f 766 *cycles_last_cmd = cpu_cycles_last;
767 return cpu_cycles_sum;
1ab64c54
GI
768}
769
d30279e2
GI
770void GPUreadDataMem(uint32_t *mem, int count)
771{
56f08d83 772 log_io("gpu_dma_read %p %d\n", mem, count);
773
d30279e2
GI
774 if (unlikely(gpu.cmd_len > 0))
775 flush_cmd_buffer();
56f08d83 776
d30279e2
GI
777 if (gpu.dma.h)
778 do_vram_io(mem, count, 1);
779}
780
781uint32_t GPUreadData(void)
782{
9e146206 783 uint32_t ret;
56f08d83 784
785 if (unlikely(gpu.cmd_len > 0))
786 flush_cmd_buffer();
787
9e146206 788 ret = gpu.gp0;
ae097dfb
PC
789 if (gpu.dma.h) {
790 ret = HTOLE32(ret);
9e146206 791 do_vram_io(&ret, 1, 1);
ae097dfb
PC
792 ret = LE32TOH(ret);
793 }
56f08d83 794
9e146206 795 log_io("gpu_read %08x\n", ret);
796 return ret;
d30279e2
GI
797}
798
799uint32_t GPUreadStatus(void)
800{
ddd56f6e 801 uint32_t ret;
56f08d83 802
d30279e2
GI
803 if (unlikely(gpu.cmd_len > 0))
804 flush_cmd_buffer();
805
f23b103c 806 ret = gpu.status;
ddd56f6e 807 log_io("gpu_read_status %08x\n", ret);
808 return ret;
d30279e2
GI
809}
810
096ec49b 811struct GPUFreeze
1ab64c54
GI
812{
813 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
814 uint32_t ulStatus; // current gpu status
815 uint32_t ulControl[256]; // latest control register values
816 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
096ec49b 817};
1ab64c54 818
096ec49b 819long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
1ab64c54 820{
fc84f618 821 int i;
822
1ab64c54
GI
823 switch (type) {
824 case 1: // save
d30279e2
GI
825 if (gpu.cmd_len > 0)
826 flush_cmd_buffer();
9ee0fd5b 827 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
1ab64c54 828 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
6e9bdaef 829 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
f23b103c 830 freeze->ulStatus = gpu.status;
1ab64c54
GI
831 break;
832 case 0: // load
9ee0fd5b 833 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
1ab64c54 834 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
6e9bdaef 835 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
f23b103c 836 gpu.status = freeze->ulStatus;
3d47ef17 837 gpu.cmd_len = 0;
fc84f618 838 for (i = 8; i > 0; i--) {
839 gpu.regs[i] ^= 1; // avoid reg change detection
840 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
841 }
5b745e5b 842 renderer_sync_ecmds(gpu.ex_regs);
2da2fc76 843 renderer_update_caches(0, 0, 1024, 512, 0);
1ab64c54
GI
844 break;
845 }
846
847 return 1;
848}
849
5440b88e 850void GPUupdateLace(void)
851{
852 if (gpu.cmd_len > 0)
853 flush_cmd_buffer();
854 renderer_flush_queues();
855
cb245e56 856#ifndef RAW_FB_DISPLAY
f23b103c 857 if (gpu.status & PSX_GPU_STATUS_BLANKING) {
aafcb4dd 858 if (!gpu.state.blanked) {
859 vout_blank();
860 gpu.state.blanked = 1;
861 gpu.state.fb_dirty = 1;
862 }
863 return;
864 }
865
866 if (!gpu.state.fb_dirty)
5440b88e 867 return;
cb245e56 868#endif
5440b88e 869
870 if (gpu.frameskip.set) {
871 if (!gpu.frameskip.frame_ready) {
872 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
873 return;
874 gpu.frameskip.active = 0;
875 }
876 gpu.frameskip.frame_ready = 0;
877 }
878
879 vout_update();
0b4038f8 880 if (gpu.state.enhancement_active && !gpu.state.enhancement_was_active)
881 renderer_update_caches(0, 0, 1024, 512, 1);
882 gpu.state.enhancement_was_active = gpu.state.enhancement_active;
5440b88e 883 gpu.state.fb_dirty = 0;
aafcb4dd 884 gpu.state.blanked = 0;
5440b88e 885}
886
72e5023f 887void GPUvBlank(int is_vblank, int lcf)
888{
5440b88e 889 int interlace = gpu.state.allow_interlace
f23b103c
PC
890 && (gpu.status & PSX_GPU_STATUS_INTERLACE)
891 && (gpu.status & PSX_GPU_STATUS_DHEIGHT);
5440b88e 892 // interlace doesn't look nice on progressive displays,
893 // so we have this "auto" mode here for games that don't read vram
894 if (gpu.state.allow_interlace == 2
895 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
896 {
897 interlace = 0;
898 }
899 if (interlace || interlace != gpu.state.old_interlace) {
900 gpu.state.old_interlace = interlace;
901
902 if (gpu.cmd_len > 0)
903 flush_cmd_buffer();
904 renderer_flush_queues();
905 renderer_set_interlace(interlace, !lcf);
906 }
907}
908
ab88daca 909void GPUgetScreenInfo(int *y, int *base_hres)
910{
911 *y = gpu.screen.y;
912 *base_hres = gpu.screen.vres;
913 if (gpu.status & PSX_GPU_STATUS_DHEIGHT)
914 *base_hres >>= 1;
915}
916
5440b88e 917void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
918{
919 gpu.frameskip.set = cbs->frameskip;
920 gpu.frameskip.advice = &cbs->fskip_advice;
921 gpu.frameskip.active = 0;
922 gpu.frameskip.frame_ready = 1;
923 gpu.state.hcnt = cbs->gpu_hcnt;
924 gpu.state.frame_count = cbs->gpu_frame_count;
925 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
0b02eb77 926 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
9ed80467 927 gpu.state.screen_centering_type_default = cbs->screen_centering_type_default;
308c6e67 928 if (gpu.state.screen_centering_type != cbs->screen_centering_type
929 || gpu.state.screen_centering_x != cbs->screen_centering_x
930 || gpu.state.screen_centering_y != cbs->screen_centering_y) {
931 gpu.state.screen_centering_type = cbs->screen_centering_type;
932 gpu.state.screen_centering_x = cbs->screen_centering_x;
933 gpu.state.screen_centering_y = cbs->screen_centering_y;
934 update_width();
935 update_height();
936 }
5440b88e 937
9ee0fd5b 938 gpu.mmap = cbs->mmap;
939 gpu.munmap = cbs->munmap;
abf09485 940 gpu.gpu_state_change = cbs->gpu_state_change;
9ee0fd5b 941
942 // delayed vram mmap
943 if (gpu.vram == NULL)
944 map_vram();
945
5440b88e 946 if (cbs->pl_vout_set_raw_vram)
947 cbs->pl_vout_set_raw_vram(gpu.vram);
948 renderer_set_config(cbs);
949 vout_set_config(cbs);
72e5023f 950}
951
1ab64c54 952// vim:shiftwidth=2:expandtab