libretro: improve retro_memory_map
[pcsx_rearmed.git] / plugins / gpulib / gpu.c
CommitLineData
1ab64c54 1/*
05740673 2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
1ab64c54
GI
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
d30279e2 11#include <stdio.h>
308c6e67 12#include <stdlib.h>
1ab64c54 13#include <string.h>
56f08d83 14#include "gpu.h"
90ac6fed 15#include "gpu_timing.h"
abf09485 16#include "../../libpcsxcore/gpu.h" // meh
44e76f8a 17#include "../../frontend/plugin_lib.h"
1ab64c54 18
44e76f8a 19#ifndef ARRAY_SIZE
1ab64c54 20#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
44e76f8a 21#endif
8f5f2dd5 22#ifdef __GNUC__
d30279e2 23#define unlikely(x) __builtin_expect((x), 0)
8f5f2dd5 24#define preload __builtin_prefetch
8dd855cd 25#define noinline __attribute__((noinline))
8f5f2dd5 26#else
27#define unlikely(x)
28#define preload(...)
29#define noinline
8f5f2dd5 30#endif
1ab64c54 31
deb18d24 32//#define log_io gpu_log
56f08d83 33#define log_io(...)
56f08d83 34
9ee0fd5b 35struct psx_gpu gpu;
1ab64c54 36
8412166f 37static noinline int do_cmd_buffer(uint32_t *data, int count,
38 int *cycles_sum, int *cycles_last);
05740673 39static void finish_vram_transfer(int is_read);
48f3d210 40
41static noinline void do_cmd_reset(void)
42{
90ac6fed 43 int dummy = 0;
48f3d210 44 if (unlikely(gpu.cmd_len > 0))
8412166f 45 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len, &dummy, &dummy);
48f3d210 46 gpu.cmd_len = 0;
05740673 47
48 if (unlikely(gpu.dma.h > 0))
49 finish_vram_transfer(gpu.dma_start.is_read);
48f3d210 50 gpu.dma.h = 0;
51}
52
6e9bdaef 53static noinline void do_reset(void)
1ab64c54 54{
7841712d 55 unsigned int i;
48f3d210 56
57 do_cmd_reset();
58
6e9bdaef 59 memset(gpu.regs, 0, sizeof(gpu.regs));
48f3d210 60 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
61 gpu.ex_regs[i] = (0xe0 + i) << 24;
f23b103c 62 gpu.status = 0x14802000;
6e9bdaef 63 gpu.gp0 = 0;
fc84f618 64 gpu.regs[3] = 1;
6e9bdaef 65 gpu.screen.hres = gpu.screen.w = 256;
fc84f618 66 gpu.screen.vres = gpu.screen.h = 240;
308c6e67 67 gpu.screen.x = gpu.screen.y = 0;
5fe1a2b1 68 renderer_sync_ecmds(gpu.ex_regs);
0b4038f8 69 renderer_notify_res_change();
1ab64c54
GI
70}
71
8dd855cd 72static noinline void update_width(void)
73{
308c6e67 74 static const short hres_all[8] = { 256, 368, 320, 368, 512, 368, 640, 368 };
75 static const uint8_t hdivs[8] = { 10, 7, 8, 7, 5, 7, 4, 7 };
76 uint8_t hdiv = hdivs[(gpu.status >> 16) & 7];
77 int hres = hres_all[(gpu.status >> 16) & 7];
78 int pal = gpu.status & PSX_GPU_STATUS_PAL;
8dd855cd 79 int sw = gpu.screen.x2 - gpu.screen.x1;
9ed80467 80 int type = gpu.state.screen_centering_type;
308c6e67 81 int x = 0, x_auto;
9ed80467 82 if (type == C_AUTO)
83 type = gpu.state.screen_centering_type_default;
308c6e67 84 if (sw <= 0)
85 /* nothing displayed? */;
86 else {
87 int s = pal ? 656 : 608; // or 600? pal is just a guess
88 x = (gpu.screen.x1 - s) / hdiv;
89 x = (x + 1) & ~1; // blitter limitation
90 sw /= hdiv;
91 sw = (sw + 2) & ~3; // according to nocash
f9ffa42c 92
93 if (gpu.state.show_overscan == 2) // widescreen hack
94 sw = (sw + 63) & ~63;
95 if (gpu.state.show_overscan && sw >= hres)
96 x = 0, hres = sw;
9ed80467 97 switch (type) {
44e76f8a 98 case C_INGAME:
308c6e67 99 break;
44e76f8a 100 case C_MANUAL:
308c6e67 101 x = gpu.state.screen_centering_x;
102 break;
103 default:
104 // correct if slightly miscentered
105 x_auto = (hres - sw) / 2 & ~3;
106 if ((uint32_t)x_auto <= 8u && abs(x) < 24)
107 x = x_auto;
108 }
109 if (x + sw > hres)
110 sw = hres - x;
111 // .x range check is done in vout_update()
112 }
113 // reduce the unpleasant right border that a few games have
114 if (gpu.state.screen_centering_type == 0
115 && x <= 4 && hres - (x + sw) >= 4)
116 hres -= 4;
117 gpu.screen.x = x;
118 gpu.screen.w = sw;
119 gpu.screen.hres = hres;
120 gpu.state.dims_changed = 1;
f9ffa42c 121 //printf("xx %d %d (%d) -> %2d, %d / %d\n", gpu.screen.x1,
122 // gpu.screen.x2, gpu.screen.x2 - gpu.screen.x1, x, sw, hres);
8dd855cd 123}
124
125static noinline void update_height(void)
126{
308c6e67 127 int pal = gpu.status & PSX_GPU_STATUS_PAL;
128 int dheight = gpu.status & PSX_GPU_STATUS_DHEIGHT;
129 int y = gpu.screen.y1 - (pal ? 39 : 16); // 39 for spyro
8dd855cd 130 int sh = gpu.screen.y2 - gpu.screen.y1;
308c6e67 131 int center_tol = 16;
132 int vres = 240;
133
134 if (pal && (sh > 240 || gpu.screen.vres == 256))
135 vres = 256;
136 if (dheight)
137 y *= 2, sh *= 2, vres *= 2, center_tol *= 2;
138 if (sh <= 0)
139 /* nothing displayed? */;
140 else {
141 switch (gpu.state.screen_centering_type) {
44e76f8a 142 case C_INGAME:
143 break;
144 case C_BORDERLESS:
145 y = 0;
308c6e67 146 break;
44e76f8a 147 case C_MANUAL:
308c6e67 148 y = gpu.state.screen_centering_y;
149 break;
150 default:
151 // correct if slightly miscentered
152 if ((uint32_t)(vres - sh) <= 1 && abs(y) <= center_tol)
153 y = 0;
154 }
155 if (y + sh > vres)
156 sh = vres - y;
157 }
158 gpu.screen.y = y;
8dd855cd 159 gpu.screen.h = sh;
308c6e67 160 gpu.screen.vres = vres;
161 gpu.state.dims_changed = 1;
162 //printf("yy %d %d -> %d, %d / %d\n",
163 // gpu.screen.y1, gpu.screen.y2, y, sh, vres);
8dd855cd 164}
165
fc84f618 166static noinline void decide_frameskip(void)
167{
9fe27e25 168 if (gpu.frameskip.active)
169 gpu.frameskip.cnt++;
170 else {
171 gpu.frameskip.cnt = 0;
172 gpu.frameskip.frame_ready = 1;
173 }
fc84f618 174
9fe27e25 175 if (!gpu.frameskip.active && *gpu.frameskip.advice)
176 gpu.frameskip.active = 1;
177 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
fc84f618 178 gpu.frameskip.active = 1;
179 else
180 gpu.frameskip.active = 0;
fbb4bfff 181
182 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
90ac6fed 183 int dummy = 0;
8412166f 184 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy, &dummy, &dummy);
fbb4bfff 185 gpu.frameskip.pending_fill[0] = 0;
186 }
fc84f618 187}
188
b243416b 189static noinline int decide_frameskip_allow(uint32_t cmd_e3)
9fe27e25 190{
191 // no frameskip if it decides to draw to display area,
192 // but not for interlace since it'll most likely always do that
193 uint32_t x = cmd_e3 & 0x3ff;
194 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
f23b103c 195 gpu.frameskip.allow = (gpu.status & PSX_GPU_STATUS_INTERLACE) ||
308c6e67 196 (uint32_t)(x - gpu.screen.src_x) >= (uint32_t)gpu.screen.w ||
197 (uint32_t)(y - gpu.screen.src_y) >= (uint32_t)gpu.screen.h;
b243416b 198 return gpu.frameskip.allow;
9fe27e25 199}
200
5fe1a2b1 201static void flush_cmd_buffer(void);
202
6e9bdaef 203static noinline void get_gpu_info(uint32_t data)
204{
5fe1a2b1 205 if (unlikely(gpu.cmd_len > 0))
206 flush_cmd_buffer();
6e9bdaef 207 switch (data & 0x0f) {
208 case 0x02:
209 case 0x03:
210 case 0x04:
6e9bdaef 211 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
212 break;
d04b8924 213 case 0x05:
214 gpu.gp0 = gpu.ex_regs[5] & 0x3fffff;
6e9bdaef 215 break;
216 case 0x07:
217 gpu.gp0 = 2;
218 break;
219 default:
d04b8924 220 // gpu.gp0 unchanged
6e9bdaef 221 break;
222 }
223}
224
9ee0fd5b 225// double, for overdraw guard
226#define VRAM_SIZE (1024 * 512 * 2 * 2)
227
228static int map_vram(void)
229{
230 gpu.vram = gpu.mmap(VRAM_SIZE);
231 if (gpu.vram != NULL) {
232 gpu.vram += 4096 / 2;
233 return 0;
234 }
235 else {
236 fprintf(stderr, "could not map vram, expect crashes\n");
237 return -1;
238 }
239}
240
6e9bdaef 241long GPUinit(void)
242{
9394ada5 243 int ret;
244 ret = vout_init();
245 ret |= renderer_init();
246
0b4038f8 247 memset(&gpu.state, 0, sizeof(gpu.state));
248 memset(&gpu.frameskip, 0, sizeof(gpu.frameskip));
249 gpu.zero = 0;
3ece2f0c 250 gpu.state.frame_count = &gpu.zero;
deb18d24 251 gpu.state.hcnt = &gpu.zero;
48f3d210 252 gpu.cmd_len = 0;
9394ada5 253 do_reset();
48f3d210 254
9ee0fd5b 255 if (gpu.mmap != NULL) {
256 if (map_vram() != 0)
257 ret = -1;
258 }
6e9bdaef 259 return ret;
260}
261
262long GPUshutdown(void)
263{
9ee0fd5b 264 long ret;
265
e929dec5 266 renderer_finish();
9ee0fd5b 267 ret = vout_finish();
268 if (gpu.vram != NULL) {
269 gpu.vram -= 4096 / 2;
270 gpu.munmap(gpu.vram, VRAM_SIZE);
271 }
272 gpu.vram = NULL;
273
274 return ret;
6e9bdaef 275}
276
1ab64c54
GI
277void GPUwriteStatus(uint32_t data)
278{
1ab64c54 279 uint32_t cmd = data >> 24;
2da2fc76 280 int src_x, src_y;
1ab64c54 281
fc84f618 282 if (cmd < ARRAY_SIZE(gpu.regs)) {
48f3d210 283 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
fc84f618 284 return;
8dd855cd 285 gpu.regs[cmd] = data;
fc84f618 286 }
287
288 gpu.state.fb_dirty = 1;
8dd855cd 289
290 switch (cmd) {
1ab64c54 291 case 0x00:
6e9bdaef 292 do_reset();
1ab64c54 293 break;
48f3d210 294 case 0x01:
295 do_cmd_reset();
296 break;
1ab64c54 297 case 0x03:
308c6e67 298 if (data & 1) {
f23b103c 299 gpu.status |= PSX_GPU_STATUS_BLANKING;
308c6e67 300 gpu.state.dims_changed = 1; // for hud clearing
301 }
f23b103c
PC
302 else
303 gpu.status &= ~PSX_GPU_STATUS_BLANKING;
1ab64c54
GI
304 break;
305 case 0x04:
f23b103c
PC
306 gpu.status &= ~PSX_GPU_STATUS_DMA_MASK;
307 gpu.status |= PSX_GPU_STATUS_DMA(data & 3);
1ab64c54
GI
308 break;
309 case 0x05:
2da2fc76 310 src_x = data & 0x3ff; src_y = (data >> 10) & 0x1ff;
311 if (src_x != gpu.screen.src_x || src_y != gpu.screen.src_y) {
312 gpu.screen.src_x = src_x;
313 gpu.screen.src_y = src_y;
314 renderer_notify_scanout_change(src_x, src_y);
315 if (gpu.frameskip.set) {
316 decide_frameskip_allow(gpu.ex_regs[3]);
317 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
318 decide_frameskip();
319 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
320 }
9fe27e25 321 }
fb4c6fba 322 }
1ab64c54 323 break;
8dd855cd 324 case 0x06:
325 gpu.screen.x1 = data & 0xfff;
326 gpu.screen.x2 = (data >> 12) & 0xfff;
327 update_width();
328 break;
1ab64c54
GI
329 case 0x07:
330 gpu.screen.y1 = data & 0x3ff;
331 gpu.screen.y2 = (data >> 10) & 0x3ff;
8dd855cd 332 update_height();
1ab64c54
GI
333 break;
334 case 0x08:
f23b103c 335 gpu.status = (gpu.status & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
8dd855cd 336 update_width();
337 update_height();
e929dec5 338 renderer_notify_res_change();
1ab64c54 339 break;
deb18d24 340 default:
341 if ((cmd & 0xf0) == 0x10)
342 get_gpu_info(data);
6e9bdaef 343 break;
1ab64c54 344 }
7890a708 345
346#ifdef GPUwriteStatus_ext
347 GPUwriteStatus_ext(data);
348#endif
1ab64c54
GI
349}
350
56f08d83 351const unsigned char cmd_lengths[256] =
1ab64c54 352{
d30279e2
GI
353 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
354 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
355 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
356 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
652c6b8b 357 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
358 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
359 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
d30279e2 360 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
72583812 361 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 80
362 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
363 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // a0
364 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
365 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // c0
366 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
d30279e2
GI
367 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
368 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
369};
370
d30279e2
GI
371#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
372
31a12b07 373// this isn't very useful so should be rare
374static void cpy_mask(uint16_t *dst, const uint16_t *src, int l, uint32_t r6)
72583812 375{
376 int i;
31a12b07 377 if (r6 == 1) {
378 for (i = 0; i < l; i++)
379 dst[i] = src[i] | 0x8000;
380 }
381 else {
382 uint16_t msb = r6 << 15;
383 for (i = 0; i < l; i++) {
384 uint16_t mask = (int16_t)dst[i] >> 15;
385 dst[i] = (dst[i] & mask) | ((src[i] | msb) & ~mask);
386 }
387 }
72583812 388}
389
390static inline void do_vram_line(int x, int y, uint16_t *mem, int l,
31a12b07 391 int is_read, uint32_t r6)
1ab64c54 392{
d30279e2 393 uint16_t *vram = VRAM_MEM_XY(x, y);
72583812 394 if (unlikely(is_read))
d30279e2 395 memcpy(mem, vram, l * 2);
31a12b07 396 else if (unlikely(r6))
397 cpy_mask(vram, mem, l, r6);
d30279e2
GI
398 else
399 memcpy(vram, mem, l * 2);
400}
401
402static int do_vram_io(uint32_t *data, int count, int is_read)
403{
404 int count_initial = count;
31a12b07 405 uint32_t r6 = gpu.ex_regs[6] & 3;
d30279e2
GI
406 uint16_t *sdata = (uint16_t *)data;
407 int x = gpu.dma.x, y = gpu.dma.y;
408 int w = gpu.dma.w, h = gpu.dma.h;
ddd56f6e 409 int o = gpu.dma.offset;
d30279e2
GI
410 int l;
411 count *= 2; // operate in 16bpp pixels
412
413 if (gpu.dma.offset) {
414 l = w - gpu.dma.offset;
ddd56f6e 415 if (count < l)
d30279e2 416 l = count;
ddd56f6e 417
31a12b07 418 do_vram_line(x + o, y, sdata, l, is_read, r6);
ddd56f6e 419
420 if (o + l < w)
421 o += l;
422 else {
423 o = 0;
424 y++;
425 h--;
426 }
d30279e2
GI
427 sdata += l;
428 count -= l;
d30279e2
GI
429 }
430
431 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
432 y &= 511;
31a12b07 433 do_vram_line(x, y, sdata, w, is_read, r6);
d30279e2
GI
434 }
435
05740673 436 if (h > 0) {
437 if (count > 0) {
438 y &= 511;
31a12b07 439 do_vram_line(x, y, sdata, count, is_read, r6);
05740673 440 o = count;
441 count = 0;
442 }
d30279e2 443 }
05740673 444 else
445 finish_vram_transfer(is_read);
d30279e2
GI
446 gpu.dma.y = y;
447 gpu.dma.h = h;
ddd56f6e 448 gpu.dma.offset = o;
d30279e2 449
6e9bdaef 450 return count_initial - count / 2;
d30279e2
GI
451}
452
453static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
454{
ddd56f6e 455 if (gpu.dma.h)
456 log_anomaly("start_vram_transfer while old unfinished\n");
457
5440b88e 458 gpu.dma.x = pos_word & 0x3ff;
459 gpu.dma.y = (pos_word >> 16) & 0x1ff;
48f3d210 460 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
461 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
d30279e2 462 gpu.dma.offset = 0;
05740673 463 gpu.dma.is_read = is_read;
464 gpu.dma_start = gpu.dma;
d30279e2 465
9e146206 466 renderer_flush_queues();
467 if (is_read) {
f23b103c 468 gpu.status |= PSX_GPU_STATUS_IMG;
9e146206 469 // XXX: wrong for width 1
ae097dfb 470 gpu.gp0 = LE32TOH(*(uint32_t *) VRAM_MEM_XY(gpu.dma.x, gpu.dma.y));
5440b88e 471 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
9e146206 472 }
d30279e2 473
6e9bdaef 474 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
475 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
abf09485 476 if (gpu.gpu_state_change)
477 gpu.gpu_state_change(PGS_VRAM_TRANSFER_START);
d30279e2
GI
478}
479
05740673 480static void finish_vram_transfer(int is_read)
481{
482 if (is_read)
f23b103c 483 gpu.status &= ~PSX_GPU_STATUS_IMG;
893f780e 484 else {
485 gpu.state.fb_dirty = 1;
05740673 486 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
0b4038f8 487 gpu.dma_start.w, gpu.dma_start.h, 0);
893f780e 488 }
abf09485 489 if (gpu.gpu_state_change)
490 gpu.gpu_state_change(PGS_VRAM_TRANSFER_END);
05740673 491}
492
90ac6fed 493static void do_vram_copy(const uint32_t *params, int *cpu_cycles)
72583812 494{
495 const uint32_t sx = LE32TOH(params[0]) & 0x3FF;
496 const uint32_t sy = (LE32TOH(params[0]) >> 16) & 0x1FF;
497 const uint32_t dx = LE32TOH(params[1]) & 0x3FF;
498 const uint32_t dy = (LE32TOH(params[1]) >> 16) & 0x1FF;
499 uint32_t w = ((LE32TOH(params[2]) - 1) & 0x3FF) + 1;
500 uint32_t h = (((LE32TOH(params[2]) >> 16) - 1) & 0x1FF) + 1;
501 uint16_t msb = gpu.ex_regs[6] << 15;
502 uint16_t lbuf[128];
503 uint32_t x, y;
504
90ac6fed 505 *cpu_cycles += gput_copy(w, h);
72583812 506 if (sx == dx && sy == dy && msb == 0)
507 return;
508
509 renderer_flush_queues();
510
511 if (unlikely((sx < dx && dx < sx + w) || sx + w > 1024 || dx + w > 1024 || msb))
512 {
513 for (y = 0; y < h; y++)
514 {
515 const uint16_t *src = VRAM_MEM_XY(0, (sy + y) & 0x1ff);
516 uint16_t *dst = VRAM_MEM_XY(0, (dy + y) & 0x1ff);
517 for (x = 0; x < w; x += ARRAY_SIZE(lbuf))
518 {
519 uint32_t x1, w1 = w - x;
520 if (w1 > ARRAY_SIZE(lbuf))
521 w1 = ARRAY_SIZE(lbuf);
522 for (x1 = 0; x1 < w1; x1++)
523 lbuf[x1] = src[(sx + x + x1) & 0x3ff];
524 for (x1 = 0; x1 < w1; x1++)
525 dst[(dx + x + x1) & 0x3ff] = lbuf[x1] | msb;
526 }
527 }
528 }
529 else
530 {
531 uint32_t sy1 = sy, dy1 = dy;
532 for (y = 0; y < h; y++, sy1++, dy1++)
533 memcpy(VRAM_MEM_XY(dx, dy1 & 0x1ff), VRAM_MEM_XY(sx, sy1 & 0x1ff), w * 2);
534 }
535
536 renderer_update_caches(dx, dy, w, h, 0);
537}
538
b243416b 539static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
540{
90ac6fed 541 int cmd = 0, pos = 0, len, dummy = 0, v;
b243416b 542 int skip = 1;
543
fbb4bfff 544 gpu.frameskip.pending_fill[0] = 0;
545
b243416b 546 while (pos < count && skip) {
547 uint32_t *list = data + pos;
89df80c6 548 cmd = LE32TOH(list[0]) >> 24;
b243416b 549 len = 1 + cmd_lengths[cmd];
8295d332 550 if (pos + len > count) {
551 cmd = -1;
552 break; // incomplete cmd
553 }
b243416b 554
97e07db9 555 switch (cmd) {
556 case 0x02:
89df80c6 557 if ((LE32TOH(list[2]) & 0x3ff) > gpu.screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu.screen.h)
97e07db9 558 // clearing something large, don't skip
8412166f 559 do_cmd_list(list, 3, &dummy, &dummy, &dummy);
97e07db9 560 else
561 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
562 break;
563 case 0x24 ... 0x27:
564 case 0x2c ... 0x2f:
565 case 0x34 ... 0x37:
566 case 0x3c ... 0x3f:
567 gpu.ex_regs[1] &= ~0x1ff;
89df80c6 568 gpu.ex_regs[1] |= LE32TOH(list[4 + ((cmd >> 4) & 1)]) & 0x1ff;
97e07db9 569 break;
570 case 0x48 ... 0x4F:
571 for (v = 3; pos + v < count; v++)
572 {
89df80c6 573 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
97e07db9 574 break;
575 }
576 len += v - 3;
577 break;
578 case 0x58 ... 0x5F:
579 for (v = 4; pos + v < count; v += 2)
580 {
89df80c6 581 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
97e07db9 582 break;
583 }
584 len += v - 4;
585 break;
586 default:
587 if (cmd == 0xe3)
89df80c6 588 skip = decide_frameskip_allow(LE32TOH(list[0]));
97e07db9 589 if ((cmd & 0xf8) == 0xe0)
89df80c6 590 gpu.ex_regs[cmd & 7] = LE32TOH(list[0]);
97e07db9 591 break;
b243416b 592 }
72583812 593 if (0x80 <= cmd && cmd <= 0xdf)
b243416b 594 break; // image i/o
97e07db9 595
b243416b 596 pos += len;
597 }
598
599 renderer_sync_ecmds(gpu.ex_regs);
600 *last_cmd = cmd;
601 return pos;
602}
603
8412166f 604static noinline int do_cmd_buffer(uint32_t *data, int count,
605 int *cycles_sum, int *cycles_last)
d30279e2 606{
b243416b 607 int cmd, pos;
608 uint32_t old_e3 = gpu.ex_regs[3];
fc84f618 609 int vram_dirty = 0;
d30279e2 610
d30279e2 611 // process buffer
b243416b 612 for (pos = 0; pos < count; )
d30279e2 613 {
b243416b 614 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
615 vram_dirty = 1;
d30279e2 616 pos += do_vram_io(data + pos, count - pos, 0);
ddd56f6e 617 if (pos == count)
618 break;
d30279e2
GI
619 }
620
89df80c6 621 cmd = LE32TOH(data[pos]) >> 24;
97e07db9 622 if (0xa0 <= cmd && cmd <= 0xdf) {
7194a46a
DS
623 if (unlikely((pos+2) >= count)) {
624 // incomplete vram write/read cmd, can't consume yet
625 cmd = -1;
626 break;
627 }
628
d30279e2 629 // consume vram write/read cmd
89df80c6 630 start_vram_transfer(LE32TOH(data[pos + 1]), LE32TOH(data[pos + 2]), (cmd & 0xe0) == 0xc0);
b243416b 631 pos += 3;
632 continue;
d30279e2 633 }
72583812 634 else if ((cmd & 0xe0) == 0x80) {
635 if (unlikely((pos+3) >= count)) {
636 cmd = -1; // incomplete cmd, can't consume yet
637 break;
638 }
8412166f 639 *cycles_sum += *cycles_last;
640 *cycles_last = 0;
641 do_vram_copy(data + pos + 1, cycles_last);
893f780e 642 vram_dirty = 1;
72583812 643 pos += 4;
644 continue;
645 }
1cec4719 646 else if (cmd == 0x1f) {
647 log_anomaly("irq1?\n");
648 pos++;
649 continue;
650 }
b243416b 651
1e07f71d 652 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
89df80c6 653 if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0))
b243416b 654 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
655 else {
8412166f 656 pos += do_cmd_list(data + pos, count - pos, cycles_sum, cycles_last, &cmd);
b243416b 657 vram_dirty = 1;
658 }
659
660 if (cmd == -1)
661 // incomplete cmd
ddd56f6e 662 break;
d30279e2 663 }
ddd56f6e 664
f23b103c
PC
665 gpu.status &= ~0x1fff;
666 gpu.status |= gpu.ex_regs[1] & 0x7ff;
667 gpu.status |= (gpu.ex_regs[6] & 3) << 11;
a3a9f519 668
fc84f618 669 gpu.state.fb_dirty |= vram_dirty;
670
b243416b 671 if (old_e3 != gpu.ex_regs[3])
672 decide_frameskip_allow(gpu.ex_regs[3]);
673
ddd56f6e 674 return count - pos;
d30279e2
GI
675}
676
abf09485 677static noinline void flush_cmd_buffer(void)
d30279e2 678{
90ac6fed 679 int dummy = 0, left;
8412166f 680 left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len, &dummy, &dummy);
d30279e2
GI
681 if (left > 0)
682 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
abf09485 683 if (left != gpu.cmd_len) {
684 if (!gpu.dma.h && gpu.gpu_state_change)
685 gpu.gpu_state_change(PGS_PRIMITIVE_START);
686 gpu.cmd_len = left;
687 }
1ab64c54
GI
688}
689
690void GPUwriteDataMem(uint32_t *mem, int count)
691{
90ac6fed 692 int dummy = 0, left;
d30279e2 693
56f08d83 694 log_io("gpu_dma_write %p %d\n", mem, count);
695
d30279e2
GI
696 if (unlikely(gpu.cmd_len > 0))
697 flush_cmd_buffer();
56f08d83 698
8412166f 699 left = do_cmd_buffer(mem, count, &dummy, &dummy);
d30279e2 700 if (left)
56f08d83 701 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
1ab64c54
GI
702}
703
d30279e2 704void GPUwriteData(uint32_t data)
1ab64c54 705{
56f08d83 706 log_io("gpu_write %08x\n", data);
89df80c6 707 gpu.cmd_buffer[gpu.cmd_len++] = HTOLE32(data);
d30279e2
GI
708 if (gpu.cmd_len >= CMD_BUFFER_LEN)
709 flush_cmd_buffer();
1ab64c54
GI
710}
711
8412166f 712long GPUdmaChain(uint32_t *rambase, uint32_t start_addr,
713 uint32_t *progress_addr, int32_t *cycles_last_cmd)
1ab64c54 714{
24a7af90 715 uint32_t addr, *list, ld_addr;
716 int len, left, count, ld_count = 32;
8412166f 717 int cpu_cycles_sum = 0;
718 int cpu_cycles_last = 0;
d30279e2 719
8f5f2dd5 720 preload(rambase + (start_addr & 0x1fffff) / 4);
721
d30279e2
GI
722 if (unlikely(gpu.cmd_len > 0))
723 flush_cmd_buffer();
724
56f08d83 725 log_io("gpu_dma_chain\n");
24a7af90 726 addr = ld_addr = start_addr & 0xffffff;
09159d99 727 for (count = 0; (addr & 0x800000) == 0; count++)
ddd56f6e 728 {
ddd56f6e 729 list = rambase + (addr & 0x1fffff) / 4;
89df80c6
PC
730 len = LE32TOH(list[0]) >> 24;
731 addr = LE32TOH(list[0]) & 0xffffff;
8f5f2dd5 732 preload(rambase + (addr & 0x1fffff) / 4);
733
8412166f 734 cpu_cycles_sum += 10;
1c72b1c2 735 if (len > 0)
8412166f 736 cpu_cycles_sum += 5 + len;
deb18d24 737
8412166f 738 log_io(".chain %08lx #%d+%d %u+%u\n",
739 (long)(list - rambase) * 4, len, gpu.cmd_len, cpu_cycles_sum, cpu_cycles_last);
ae36bb28 740 if (unlikely(gpu.cmd_len > 0)) {
23948df3 741 if (gpu.cmd_len + len > ARRAY_SIZE(gpu.cmd_buffer)) {
742 log_anomaly("cmd_buffer overflow, likely garbage commands\n");
743 gpu.cmd_len = 0;
744 }
ae36bb28 745 memcpy(gpu.cmd_buffer + gpu.cmd_len, list + 1, len * 4);
746 gpu.cmd_len += len;
747 flush_cmd_buffer();
748 continue;
749 }
ddd56f6e 750
56f08d83 751 if (len) {
8412166f 752 left = do_cmd_buffer(list + 1, len, &cpu_cycles_sum, &cpu_cycles_last);
ae36bb28 753 if (left) {
754 memcpy(gpu.cmd_buffer, list + 1 + len - left, left * 4);
755 gpu.cmd_len = left;
756 log_anomaly("GPUdmaChain: %d/%d words left\n", left, len);
757 }
56f08d83 758 }
ddd56f6e 759
8c84ba5f 760 if (progress_addr) {
761 *progress_addr = addr;
762 break;
763 }
24a7af90 764 if (addr == ld_addr) {
765 log_anomaly("GPUdmaChain: loop @ %08x, cnt=%u\n", addr, count);
766 break;
09159d99 767 }
24a7af90 768 if (count == ld_count) {
769 ld_addr = addr;
770 ld_count *= 2;
09159d99 771 }
d30279e2 772 }
09159d99 773
8412166f 774 //printf(" -> %d %d\n", cpu_cycles_sum, cpu_cycles_last);
3ece2f0c 775 gpu.state.last_list.frame = *gpu.state.frame_count;
deb18d24 776 gpu.state.last_list.hcnt = *gpu.state.hcnt;
8412166f 777 gpu.state.last_list.cycles = cpu_cycles_sum + cpu_cycles_last;
deb18d24 778 gpu.state.last_list.addr = start_addr;
779
8412166f 780 *cycles_last_cmd = cpu_cycles_last;
781 return cpu_cycles_sum;
1ab64c54
GI
782}
783
d30279e2
GI
784void GPUreadDataMem(uint32_t *mem, int count)
785{
56f08d83 786 log_io("gpu_dma_read %p %d\n", mem, count);
787
d30279e2
GI
788 if (unlikely(gpu.cmd_len > 0))
789 flush_cmd_buffer();
56f08d83 790
d30279e2
GI
791 if (gpu.dma.h)
792 do_vram_io(mem, count, 1);
793}
794
795uint32_t GPUreadData(void)
796{
9e146206 797 uint32_t ret;
56f08d83 798
799 if (unlikely(gpu.cmd_len > 0))
800 flush_cmd_buffer();
801
9e146206 802 ret = gpu.gp0;
ae097dfb
PC
803 if (gpu.dma.h) {
804 ret = HTOLE32(ret);
9e146206 805 do_vram_io(&ret, 1, 1);
ae097dfb
PC
806 ret = LE32TOH(ret);
807 }
56f08d83 808
9e146206 809 log_io("gpu_read %08x\n", ret);
810 return ret;
d30279e2
GI
811}
812
813uint32_t GPUreadStatus(void)
814{
ddd56f6e 815 uint32_t ret;
56f08d83 816
d30279e2
GI
817 if (unlikely(gpu.cmd_len > 0))
818 flush_cmd_buffer();
819
f23b103c 820 ret = gpu.status;
ddd56f6e 821 log_io("gpu_read_status %08x\n", ret);
822 return ret;
d30279e2
GI
823}
824
096ec49b 825struct GPUFreeze
1ab64c54
GI
826{
827 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
828 uint32_t ulStatus; // current gpu status
829 uint32_t ulControl[256]; // latest control register values
830 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
096ec49b 831};
1ab64c54 832
096ec49b 833long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
1ab64c54 834{
fc84f618 835 int i;
836
1ab64c54
GI
837 switch (type) {
838 case 1: // save
d30279e2
GI
839 if (gpu.cmd_len > 0)
840 flush_cmd_buffer();
9ee0fd5b 841 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
1ab64c54 842 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
6e9bdaef 843 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
f23b103c 844 freeze->ulStatus = gpu.status;
1ab64c54
GI
845 break;
846 case 0: // load
9ee0fd5b 847 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
1ab64c54 848 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
6e9bdaef 849 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
f23b103c 850 gpu.status = freeze->ulStatus;
3d47ef17 851 gpu.cmd_len = 0;
fc84f618 852 for (i = 8; i > 0; i--) {
853 gpu.regs[i] ^= 1; // avoid reg change detection
854 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
855 }
5b745e5b 856 renderer_sync_ecmds(gpu.ex_regs);
2da2fc76 857 renderer_update_caches(0, 0, 1024, 512, 0);
1ab64c54
GI
858 break;
859 }
860
861 return 1;
862}
863
5440b88e 864void GPUupdateLace(void)
865{
866 if (gpu.cmd_len > 0)
867 flush_cmd_buffer();
868 renderer_flush_queues();
869
cb245e56 870#ifndef RAW_FB_DISPLAY
f23b103c 871 if (gpu.status & PSX_GPU_STATUS_BLANKING) {
aafcb4dd 872 if (!gpu.state.blanked) {
873 vout_blank();
874 gpu.state.blanked = 1;
875 gpu.state.fb_dirty = 1;
876 }
877 return;
878 }
879
880 if (!gpu.state.fb_dirty)
5440b88e 881 return;
cb245e56 882#endif
5440b88e 883
884 if (gpu.frameskip.set) {
885 if (!gpu.frameskip.frame_ready) {
886 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
887 return;
888 gpu.frameskip.active = 0;
889 }
890 gpu.frameskip.frame_ready = 0;
891 }
892
893 vout_update();
0b4038f8 894 if (gpu.state.enhancement_active && !gpu.state.enhancement_was_active)
895 renderer_update_caches(0, 0, 1024, 512, 1);
896 gpu.state.enhancement_was_active = gpu.state.enhancement_active;
5440b88e 897 gpu.state.fb_dirty = 0;
aafcb4dd 898 gpu.state.blanked = 0;
5440b88e 899}
900
72e5023f 901void GPUvBlank(int is_vblank, int lcf)
902{
5440b88e 903 int interlace = gpu.state.allow_interlace
f23b103c
PC
904 && (gpu.status & PSX_GPU_STATUS_INTERLACE)
905 && (gpu.status & PSX_GPU_STATUS_DHEIGHT);
5440b88e 906 // interlace doesn't look nice on progressive displays,
907 // so we have this "auto" mode here for games that don't read vram
908 if (gpu.state.allow_interlace == 2
909 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
910 {
911 interlace = 0;
912 }
913 if (interlace || interlace != gpu.state.old_interlace) {
914 gpu.state.old_interlace = interlace;
915
916 if (gpu.cmd_len > 0)
917 flush_cmd_buffer();
918 renderer_flush_queues();
919 renderer_set_interlace(interlace, !lcf);
920 }
921}
922
ab88daca 923void GPUgetScreenInfo(int *y, int *base_hres)
924{
925 *y = gpu.screen.y;
926 *base_hres = gpu.screen.vres;
927 if (gpu.status & PSX_GPU_STATUS_DHEIGHT)
928 *base_hres >>= 1;
929}
930
5440b88e 931void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
932{
933 gpu.frameskip.set = cbs->frameskip;
934 gpu.frameskip.advice = &cbs->fskip_advice;
935 gpu.frameskip.active = 0;
936 gpu.frameskip.frame_ready = 1;
49f5a273
PC
937 gpu.state.hcnt = (uint32_t *)cbs->gpu_hcnt;
938 gpu.state.frame_count = (uint32_t *)cbs->gpu_frame_count;
5440b88e 939 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
0b02eb77 940 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
9ed80467 941 gpu.state.screen_centering_type_default = cbs->screen_centering_type_default;
308c6e67 942 if (gpu.state.screen_centering_type != cbs->screen_centering_type
943 || gpu.state.screen_centering_x != cbs->screen_centering_x
f9ffa42c 944 || gpu.state.screen_centering_y != cbs->screen_centering_y
945 || gpu.state.show_overscan != cbs->show_overscan) {
308c6e67 946 gpu.state.screen_centering_type = cbs->screen_centering_type;
947 gpu.state.screen_centering_x = cbs->screen_centering_x;
948 gpu.state.screen_centering_y = cbs->screen_centering_y;
f9ffa42c 949 gpu.state.show_overscan = cbs->show_overscan;
308c6e67 950 update_width();
951 update_height();
952 }
5440b88e 953
9ee0fd5b 954 gpu.mmap = cbs->mmap;
955 gpu.munmap = cbs->munmap;
abf09485 956 gpu.gpu_state_change = cbs->gpu_state_change;
9ee0fd5b 957
958 // delayed vram mmap
959 if (gpu.vram == NULL)
960 map_vram();
961
5440b88e 962 if (cbs->pl_vout_set_raw_vram)
963 cbs->pl_vout_set_raw_vram(gpu.vram);
964 renderer_set_config(cbs);
965 vout_set_config(cbs);
72e5023f 966}
967
1ab64c54 968// vim:shiftwidth=2:expandtab