gpulib: handle vram copy in gpulib
[pcsx_rearmed.git] / plugins / gpulib / gpu.c
CommitLineData
1ab64c54 1/*
05740673 2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
1ab64c54
GI
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
d30279e2 11#include <stdio.h>
308c6e67 12#include <stdlib.h>
1ab64c54 13#include <string.h>
56f08d83 14#include "gpu.h"
1ab64c54
GI
15
16#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
8f5f2dd5 17#ifdef __GNUC__
d30279e2 18#define unlikely(x) __builtin_expect((x), 0)
8f5f2dd5 19#define preload __builtin_prefetch
8dd855cd 20#define noinline __attribute__((noinline))
8f5f2dd5 21#else
22#define unlikely(x)
23#define preload(...)
24#define noinline
8f5f2dd5 25#endif
1ab64c54 26
deb18d24 27//#define log_io gpu_log
56f08d83 28#define log_io(...)
56f08d83 29
9ee0fd5b 30struct psx_gpu gpu;
1ab64c54 31
48f3d210 32static noinline int do_cmd_buffer(uint32_t *data, int count);
05740673 33static void finish_vram_transfer(int is_read);
48f3d210 34
35static noinline void do_cmd_reset(void)
36{
37 if (unlikely(gpu.cmd_len > 0))
38 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
48f3d210 39 gpu.cmd_len = 0;
05740673 40
41 if (unlikely(gpu.dma.h > 0))
42 finish_vram_transfer(gpu.dma_start.is_read);
48f3d210 43 gpu.dma.h = 0;
44}
45
6e9bdaef 46static noinline void do_reset(void)
1ab64c54 47{
7841712d 48 unsigned int i;
48f3d210 49
50 do_cmd_reset();
51
6e9bdaef 52 memset(gpu.regs, 0, sizeof(gpu.regs));
48f3d210 53 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
54 gpu.ex_regs[i] = (0xe0 + i) << 24;
f23b103c 55 gpu.status = 0x14802000;
6e9bdaef 56 gpu.gp0 = 0;
fc84f618 57 gpu.regs[3] = 1;
6e9bdaef 58 gpu.screen.hres = gpu.screen.w = 256;
fc84f618 59 gpu.screen.vres = gpu.screen.h = 240;
308c6e67 60 gpu.screen.x = gpu.screen.y = 0;
5fe1a2b1 61 renderer_sync_ecmds(gpu.ex_regs);
0b4038f8 62 renderer_notify_res_change();
1ab64c54
GI
63}
64
8dd855cd 65static noinline void update_width(void)
66{
308c6e67 67 static const short hres_all[8] = { 256, 368, 320, 368, 512, 368, 640, 368 };
68 static const uint8_t hdivs[8] = { 10, 7, 8, 7, 5, 7, 4, 7 };
69 uint8_t hdiv = hdivs[(gpu.status >> 16) & 7];
70 int hres = hres_all[(gpu.status >> 16) & 7];
71 int pal = gpu.status & PSX_GPU_STATUS_PAL;
8dd855cd 72 int sw = gpu.screen.x2 - gpu.screen.x1;
308c6e67 73 int x = 0, x_auto;
74 if (sw <= 0)
75 /* nothing displayed? */;
76 else {
77 int s = pal ? 656 : 608; // or 600? pal is just a guess
78 x = (gpu.screen.x1 - s) / hdiv;
79 x = (x + 1) & ~1; // blitter limitation
80 sw /= hdiv;
81 sw = (sw + 2) & ~3; // according to nocash
82 switch (gpu.state.screen_centering_type) {
83 case 1:
84 break;
85 case 2:
86 x = gpu.state.screen_centering_x;
87 break;
88 default:
89 // correct if slightly miscentered
90 x_auto = (hres - sw) / 2 & ~3;
91 if ((uint32_t)x_auto <= 8u && abs(x) < 24)
92 x = x_auto;
93 }
94 if (x + sw > hres)
95 sw = hres - x;
96 // .x range check is done in vout_update()
97 }
98 // reduce the unpleasant right border that a few games have
99 if (gpu.state.screen_centering_type == 0
100 && x <= 4 && hres - (x + sw) >= 4)
101 hres -= 4;
102 gpu.screen.x = x;
103 gpu.screen.w = sw;
104 gpu.screen.hres = hres;
105 gpu.state.dims_changed = 1;
106 //printf("xx %d %d -> %2d, %d / %d\n",
107 // gpu.screen.x1, gpu.screen.x2, x, sw, hres);
8dd855cd 108}
109
110static noinline void update_height(void)
111{
308c6e67 112 int pal = gpu.status & PSX_GPU_STATUS_PAL;
113 int dheight = gpu.status & PSX_GPU_STATUS_DHEIGHT;
114 int y = gpu.screen.y1 - (pal ? 39 : 16); // 39 for spyro
8dd855cd 115 int sh = gpu.screen.y2 - gpu.screen.y1;
308c6e67 116 int center_tol = 16;
117 int vres = 240;
118
119 if (pal && (sh > 240 || gpu.screen.vres == 256))
120 vres = 256;
121 if (dheight)
122 y *= 2, sh *= 2, vres *= 2, center_tol *= 2;
123 if (sh <= 0)
124 /* nothing displayed? */;
125 else {
126 switch (gpu.state.screen_centering_type) {
127 case 1:
128 break;
129 case 2:
130 y = gpu.state.screen_centering_y;
131 break;
132 default:
133 // correct if slightly miscentered
134 if ((uint32_t)(vres - sh) <= 1 && abs(y) <= center_tol)
135 y = 0;
136 }
137 if (y + sh > vres)
138 sh = vres - y;
139 }
140 gpu.screen.y = y;
8dd855cd 141 gpu.screen.h = sh;
308c6e67 142 gpu.screen.vres = vres;
143 gpu.state.dims_changed = 1;
144 //printf("yy %d %d -> %d, %d / %d\n",
145 // gpu.screen.y1, gpu.screen.y2, y, sh, vres);
8dd855cd 146}
147
fc84f618 148static noinline void decide_frameskip(void)
149{
9fe27e25 150 if (gpu.frameskip.active)
151 gpu.frameskip.cnt++;
152 else {
153 gpu.frameskip.cnt = 0;
154 gpu.frameskip.frame_ready = 1;
155 }
fc84f618 156
9fe27e25 157 if (!gpu.frameskip.active && *gpu.frameskip.advice)
158 gpu.frameskip.active = 1;
159 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
fc84f618 160 gpu.frameskip.active = 1;
161 else
162 gpu.frameskip.active = 0;
fbb4bfff 163
164 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
165 int dummy;
166 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy);
167 gpu.frameskip.pending_fill[0] = 0;
168 }
fc84f618 169}
170
b243416b 171static noinline int decide_frameskip_allow(uint32_t cmd_e3)
9fe27e25 172{
173 // no frameskip if it decides to draw to display area,
174 // but not for interlace since it'll most likely always do that
175 uint32_t x = cmd_e3 & 0x3ff;
176 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
f23b103c 177 gpu.frameskip.allow = (gpu.status & PSX_GPU_STATUS_INTERLACE) ||
308c6e67 178 (uint32_t)(x - gpu.screen.src_x) >= (uint32_t)gpu.screen.w ||
179 (uint32_t)(y - gpu.screen.src_y) >= (uint32_t)gpu.screen.h;
b243416b 180 return gpu.frameskip.allow;
9fe27e25 181}
182
5fe1a2b1 183static void flush_cmd_buffer(void);
184
6e9bdaef 185static noinline void get_gpu_info(uint32_t data)
186{
5fe1a2b1 187 if (unlikely(gpu.cmd_len > 0))
188 flush_cmd_buffer();
6e9bdaef 189 switch (data & 0x0f) {
190 case 0x02:
191 case 0x03:
192 case 0x04:
6e9bdaef 193 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
194 break;
d04b8924 195 case 0x05:
196 gpu.gp0 = gpu.ex_regs[5] & 0x3fffff;
6e9bdaef 197 break;
198 case 0x07:
199 gpu.gp0 = 2;
200 break;
201 default:
d04b8924 202 // gpu.gp0 unchanged
6e9bdaef 203 break;
204 }
205}
206
9ee0fd5b 207// double, for overdraw guard
208#define VRAM_SIZE (1024 * 512 * 2 * 2)
209
210static int map_vram(void)
211{
212 gpu.vram = gpu.mmap(VRAM_SIZE);
213 if (gpu.vram != NULL) {
214 gpu.vram += 4096 / 2;
215 return 0;
216 }
217 else {
218 fprintf(stderr, "could not map vram, expect crashes\n");
219 return -1;
220 }
221}
222
6e9bdaef 223long GPUinit(void)
224{
9394ada5 225 int ret;
226 ret = vout_init();
227 ret |= renderer_init();
228
0b4038f8 229 memset(&gpu.state, 0, sizeof(gpu.state));
230 memset(&gpu.frameskip, 0, sizeof(gpu.frameskip));
231 gpu.zero = 0;
3ece2f0c 232 gpu.state.frame_count = &gpu.zero;
deb18d24 233 gpu.state.hcnt = &gpu.zero;
48f3d210 234 gpu.cmd_len = 0;
9394ada5 235 do_reset();
48f3d210 236
9ee0fd5b 237 if (gpu.mmap != NULL) {
238 if (map_vram() != 0)
239 ret = -1;
240 }
6e9bdaef 241 return ret;
242}
243
244long GPUshutdown(void)
245{
9ee0fd5b 246 long ret;
247
e929dec5 248 renderer_finish();
9ee0fd5b 249 ret = vout_finish();
250 if (gpu.vram != NULL) {
251 gpu.vram -= 4096 / 2;
252 gpu.munmap(gpu.vram, VRAM_SIZE);
253 }
254 gpu.vram = NULL;
255
256 return ret;
6e9bdaef 257}
258
1ab64c54
GI
259void GPUwriteStatus(uint32_t data)
260{
1ab64c54
GI
261 uint32_t cmd = data >> 24;
262
fc84f618 263 if (cmd < ARRAY_SIZE(gpu.regs)) {
48f3d210 264 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
fc84f618 265 return;
8dd855cd 266 gpu.regs[cmd] = data;
fc84f618 267 }
268
269 gpu.state.fb_dirty = 1;
8dd855cd 270
271 switch (cmd) {
1ab64c54 272 case 0x00:
6e9bdaef 273 do_reset();
1ab64c54 274 break;
48f3d210 275 case 0x01:
276 do_cmd_reset();
277 break;
1ab64c54 278 case 0x03:
308c6e67 279 if (data & 1) {
f23b103c 280 gpu.status |= PSX_GPU_STATUS_BLANKING;
308c6e67 281 gpu.state.dims_changed = 1; // for hud clearing
282 }
f23b103c
PC
283 else
284 gpu.status &= ~PSX_GPU_STATUS_BLANKING;
1ab64c54
GI
285 break;
286 case 0x04:
f23b103c
PC
287 gpu.status &= ~PSX_GPU_STATUS_DMA_MASK;
288 gpu.status |= PSX_GPU_STATUS_DMA(data & 3);
1ab64c54
GI
289 break;
290 case 0x05:
308c6e67 291 gpu.screen.src_x = data & 0x3ff;
292 gpu.screen.src_y = (data >> 10) & 0x1ff;
0b4038f8 293 renderer_notify_scanout_x_change(gpu.screen.src_x, gpu.screen.hres);
9fe27e25 294 if (gpu.frameskip.set) {
295 decide_frameskip_allow(gpu.ex_regs[3]);
296 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
297 decide_frameskip();
298 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
299 }
fb4c6fba 300 }
1ab64c54 301 break;
8dd855cd 302 case 0x06:
303 gpu.screen.x1 = data & 0xfff;
304 gpu.screen.x2 = (data >> 12) & 0xfff;
305 update_width();
306 break;
1ab64c54
GI
307 case 0x07:
308 gpu.screen.y1 = data & 0x3ff;
309 gpu.screen.y2 = (data >> 10) & 0x3ff;
8dd855cd 310 update_height();
1ab64c54
GI
311 break;
312 case 0x08:
f23b103c 313 gpu.status = (gpu.status & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
8dd855cd 314 update_width();
315 update_height();
e929dec5 316 renderer_notify_res_change();
1ab64c54 317 break;
deb18d24 318 default:
319 if ((cmd & 0xf0) == 0x10)
320 get_gpu_info(data);
6e9bdaef 321 break;
1ab64c54 322 }
7890a708 323
324#ifdef GPUwriteStatus_ext
325 GPUwriteStatus_ext(data);
326#endif
1ab64c54
GI
327}
328
56f08d83 329const unsigned char cmd_lengths[256] =
1ab64c54 330{
d30279e2
GI
331 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
332 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
333 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
334 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
652c6b8b 335 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
336 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
337 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
d30279e2 338 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
72583812 339 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 80
340 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
341 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // a0
342 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
343 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // c0
344 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
d30279e2
GI
345 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
346 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
347};
348
d30279e2
GI
349#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
350
72583812 351static void cpy_msb(uint16_t *dst, const uint16_t *src, int l, uint16_t msb)
352{
353 int i;
354 for (i = 0; i < l; i++)
355 dst[i] = src[i] | msb;
356}
357
358static inline void do_vram_line(int x, int y, uint16_t *mem, int l,
359 int is_read, uint16_t msb)
1ab64c54 360{
d30279e2 361 uint16_t *vram = VRAM_MEM_XY(x, y);
72583812 362 if (unlikely(is_read))
d30279e2 363 memcpy(mem, vram, l * 2);
72583812 364 else if (unlikely(msb))
365 cpy_msb(vram, mem, l, msb);
d30279e2
GI
366 else
367 memcpy(vram, mem, l * 2);
368}
369
370static int do_vram_io(uint32_t *data, int count, int is_read)
371{
372 int count_initial = count;
72583812 373 uint16_t msb = gpu.ex_regs[6] << 15;
d30279e2
GI
374 uint16_t *sdata = (uint16_t *)data;
375 int x = gpu.dma.x, y = gpu.dma.y;
376 int w = gpu.dma.w, h = gpu.dma.h;
ddd56f6e 377 int o = gpu.dma.offset;
d30279e2
GI
378 int l;
379 count *= 2; // operate in 16bpp pixels
380
381 if (gpu.dma.offset) {
382 l = w - gpu.dma.offset;
ddd56f6e 383 if (count < l)
d30279e2 384 l = count;
ddd56f6e 385
72583812 386 do_vram_line(x + o, y, sdata, l, is_read, msb);
ddd56f6e 387
388 if (o + l < w)
389 o += l;
390 else {
391 o = 0;
392 y++;
393 h--;
394 }
d30279e2
GI
395 sdata += l;
396 count -= l;
d30279e2
GI
397 }
398
399 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
400 y &= 511;
72583812 401 do_vram_line(x, y, sdata, w, is_read, msb);
d30279e2
GI
402 }
403
05740673 404 if (h > 0) {
405 if (count > 0) {
406 y &= 511;
72583812 407 do_vram_line(x, y, sdata, count, is_read, msb);
05740673 408 o = count;
409 count = 0;
410 }
d30279e2 411 }
05740673 412 else
413 finish_vram_transfer(is_read);
d30279e2
GI
414 gpu.dma.y = y;
415 gpu.dma.h = h;
ddd56f6e 416 gpu.dma.offset = o;
d30279e2 417
6e9bdaef 418 return count_initial - count / 2;
d30279e2
GI
419}
420
421static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
422{
ddd56f6e 423 if (gpu.dma.h)
424 log_anomaly("start_vram_transfer while old unfinished\n");
425
5440b88e 426 gpu.dma.x = pos_word & 0x3ff;
427 gpu.dma.y = (pos_word >> 16) & 0x1ff;
48f3d210 428 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
429 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
d30279e2 430 gpu.dma.offset = 0;
05740673 431 gpu.dma.is_read = is_read;
432 gpu.dma_start = gpu.dma;
d30279e2 433
9e146206 434 renderer_flush_queues();
435 if (is_read) {
f23b103c 436 gpu.status |= PSX_GPU_STATUS_IMG;
9e146206 437 // XXX: wrong for width 1
ae097dfb 438 gpu.gp0 = LE32TOH(*(uint32_t *) VRAM_MEM_XY(gpu.dma.x, gpu.dma.y));
5440b88e 439 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
9e146206 440 }
d30279e2 441
6e9bdaef 442 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
443 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
d30279e2
GI
444}
445
05740673 446static void finish_vram_transfer(int is_read)
447{
448 if (is_read)
f23b103c 449 gpu.status &= ~PSX_GPU_STATUS_IMG;
05740673 450 else
451 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
0b4038f8 452 gpu.dma_start.w, gpu.dma_start.h, 0);
05740673 453}
454
72583812 455static void do_vram_copy(const uint32_t *params)
456{
457 const uint32_t sx = LE32TOH(params[0]) & 0x3FF;
458 const uint32_t sy = (LE32TOH(params[0]) >> 16) & 0x1FF;
459 const uint32_t dx = LE32TOH(params[1]) & 0x3FF;
460 const uint32_t dy = (LE32TOH(params[1]) >> 16) & 0x1FF;
461 uint32_t w = ((LE32TOH(params[2]) - 1) & 0x3FF) + 1;
462 uint32_t h = (((LE32TOH(params[2]) >> 16) - 1) & 0x1FF) + 1;
463 uint16_t msb = gpu.ex_regs[6] << 15;
464 uint16_t lbuf[128];
465 uint32_t x, y;
466
467 if (sx == dx && sy == dy && msb == 0)
468 return;
469
470 renderer_flush_queues();
471
472 if (unlikely((sx < dx && dx < sx + w) || sx + w > 1024 || dx + w > 1024 || msb))
473 {
474 for (y = 0; y < h; y++)
475 {
476 const uint16_t *src = VRAM_MEM_XY(0, (sy + y) & 0x1ff);
477 uint16_t *dst = VRAM_MEM_XY(0, (dy + y) & 0x1ff);
478 for (x = 0; x < w; x += ARRAY_SIZE(lbuf))
479 {
480 uint32_t x1, w1 = w - x;
481 if (w1 > ARRAY_SIZE(lbuf))
482 w1 = ARRAY_SIZE(lbuf);
483 for (x1 = 0; x1 < w1; x1++)
484 lbuf[x1] = src[(sx + x + x1) & 0x3ff];
485 for (x1 = 0; x1 < w1; x1++)
486 dst[(dx + x + x1) & 0x3ff] = lbuf[x1] | msb;
487 }
488 }
489 }
490 else
491 {
492 uint32_t sy1 = sy, dy1 = dy;
493 for (y = 0; y < h; y++, sy1++, dy1++)
494 memcpy(VRAM_MEM_XY(dx, dy1 & 0x1ff), VRAM_MEM_XY(sx, sy1 & 0x1ff), w * 2);
495 }
496
497 renderer_update_caches(dx, dy, w, h, 0);
498}
499
b243416b 500static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
501{
97e07db9 502 int cmd = 0, pos = 0, len, dummy, v;
b243416b 503 int skip = 1;
504
fbb4bfff 505 gpu.frameskip.pending_fill[0] = 0;
506
b243416b 507 while (pos < count && skip) {
508 uint32_t *list = data + pos;
89df80c6 509 cmd = LE32TOH(list[0]) >> 24;
b243416b 510 len = 1 + cmd_lengths[cmd];
511
97e07db9 512 switch (cmd) {
513 case 0x02:
89df80c6 514 if ((LE32TOH(list[2]) & 0x3ff) > gpu.screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu.screen.h)
97e07db9 515 // clearing something large, don't skip
516 do_cmd_list(list, 3, &dummy);
517 else
518 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
519 break;
520 case 0x24 ... 0x27:
521 case 0x2c ... 0x2f:
522 case 0x34 ... 0x37:
523 case 0x3c ... 0x3f:
524 gpu.ex_regs[1] &= ~0x1ff;
89df80c6 525 gpu.ex_regs[1] |= LE32TOH(list[4 + ((cmd >> 4) & 1)]) & 0x1ff;
97e07db9 526 break;
527 case 0x48 ... 0x4F:
528 for (v = 3; pos + v < count; v++)
529 {
89df80c6 530 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
97e07db9 531 break;
532 }
533 len += v - 3;
534 break;
535 case 0x58 ... 0x5F:
536 for (v = 4; pos + v < count; v += 2)
537 {
89df80c6 538 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
97e07db9 539 break;
540 }
541 len += v - 4;
542 break;
543 default:
544 if (cmd == 0xe3)
89df80c6 545 skip = decide_frameskip_allow(LE32TOH(list[0]));
97e07db9 546 if ((cmd & 0xf8) == 0xe0)
89df80c6 547 gpu.ex_regs[cmd & 7] = LE32TOH(list[0]);
97e07db9 548 break;
b243416b 549 }
b243416b 550
551 if (pos + len > count) {
552 cmd = -1;
553 break; // incomplete cmd
554 }
72583812 555 if (0x80 <= cmd && cmd <= 0xdf)
b243416b 556 break; // image i/o
97e07db9 557
b243416b 558 pos += len;
559 }
560
561 renderer_sync_ecmds(gpu.ex_regs);
562 *last_cmd = cmd;
563 return pos;
564}
565
48f3d210 566static noinline int do_cmd_buffer(uint32_t *data, int count)
d30279e2 567{
b243416b 568 int cmd, pos;
569 uint32_t old_e3 = gpu.ex_regs[3];
fc84f618 570 int vram_dirty = 0;
d30279e2 571
d30279e2 572 // process buffer
b243416b 573 for (pos = 0; pos < count; )
d30279e2 574 {
b243416b 575 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
576 vram_dirty = 1;
d30279e2 577 pos += do_vram_io(data + pos, count - pos, 0);
ddd56f6e 578 if (pos == count)
579 break;
d30279e2
GI
580 }
581
89df80c6 582 cmd = LE32TOH(data[pos]) >> 24;
97e07db9 583 if (0xa0 <= cmd && cmd <= 0xdf) {
7194a46a
DS
584 if (unlikely((pos+2) >= count)) {
585 // incomplete vram write/read cmd, can't consume yet
586 cmd = -1;
587 break;
588 }
589
d30279e2 590 // consume vram write/read cmd
89df80c6 591 start_vram_transfer(LE32TOH(data[pos + 1]), LE32TOH(data[pos + 2]), (cmd & 0xe0) == 0xc0);
b243416b 592 pos += 3;
593 continue;
d30279e2 594 }
72583812 595 else if ((cmd & 0xe0) == 0x80) {
596 if (unlikely((pos+3) >= count)) {
597 cmd = -1; // incomplete cmd, can't consume yet
598 break;
599 }
600 do_vram_copy(data + pos + 1);
601 pos += 4;
602 continue;
603 }
b243416b 604
1e07f71d 605 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
89df80c6 606 if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0))
b243416b 607 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
608 else {
609 pos += do_cmd_list(data + pos, count - pos, &cmd);
610 vram_dirty = 1;
611 }
612
613 if (cmd == -1)
614 // incomplete cmd
ddd56f6e 615 break;
d30279e2 616 }
ddd56f6e 617
f23b103c
PC
618 gpu.status &= ~0x1fff;
619 gpu.status |= gpu.ex_regs[1] & 0x7ff;
620 gpu.status |= (gpu.ex_regs[6] & 3) << 11;
a3a9f519 621
fc84f618 622 gpu.state.fb_dirty |= vram_dirty;
623
b243416b 624 if (old_e3 != gpu.ex_regs[3])
625 decide_frameskip_allow(gpu.ex_regs[3]);
626
ddd56f6e 627 return count - pos;
d30279e2
GI
628}
629
5440b88e 630static void flush_cmd_buffer(void)
d30279e2 631{
48f3d210 632 int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
d30279e2
GI
633 if (left > 0)
634 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
635 gpu.cmd_len = left;
1ab64c54
GI
636}
637
638void GPUwriteDataMem(uint32_t *mem, int count)
639{
d30279e2
GI
640 int left;
641
56f08d83 642 log_io("gpu_dma_write %p %d\n", mem, count);
643
d30279e2
GI
644 if (unlikely(gpu.cmd_len > 0))
645 flush_cmd_buffer();
56f08d83 646
48f3d210 647 left = do_cmd_buffer(mem, count);
d30279e2 648 if (left)
56f08d83 649 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
1ab64c54
GI
650}
651
d30279e2 652void GPUwriteData(uint32_t data)
1ab64c54 653{
56f08d83 654 log_io("gpu_write %08x\n", data);
89df80c6 655 gpu.cmd_buffer[gpu.cmd_len++] = HTOLE32(data);
d30279e2
GI
656 if (gpu.cmd_len >= CMD_BUFFER_LEN)
657 flush_cmd_buffer();
1ab64c54
GI
658}
659
8c84ba5f 660long GPUdmaChain(uint32_t *rambase, uint32_t start_addr, uint32_t *progress_addr)
1ab64c54 661{
09159d99 662 uint32_t addr, *list, ld_addr = 0;
ddd56f6e 663 int len, left, count;
1c72b1c2 664 long cpu_cycles = 0;
d30279e2 665
8f5f2dd5 666 preload(rambase + (start_addr & 0x1fffff) / 4);
667
d30279e2
GI
668 if (unlikely(gpu.cmd_len > 0))
669 flush_cmd_buffer();
670
56f08d83 671 log_io("gpu_dma_chain\n");
ddd56f6e 672 addr = start_addr & 0xffffff;
09159d99 673 for (count = 0; (addr & 0x800000) == 0; count++)
ddd56f6e 674 {
ddd56f6e 675 list = rambase + (addr & 0x1fffff) / 4;
89df80c6
PC
676 len = LE32TOH(list[0]) >> 24;
677 addr = LE32TOH(list[0]) & 0xffffff;
8f5f2dd5 678 preload(rambase + (addr & 0x1fffff) / 4);
679
1c72b1c2 680 cpu_cycles += 10;
681 if (len > 0)
682 cpu_cycles += 5 + len;
deb18d24 683
ae36bb28 684 log_io(".chain %08lx #%d+%d\n",
685 (long)(list - rambase) * 4, len, gpu.cmd_len);
686 if (unlikely(gpu.cmd_len > 0)) {
23948df3 687 if (gpu.cmd_len + len > ARRAY_SIZE(gpu.cmd_buffer)) {
688 log_anomaly("cmd_buffer overflow, likely garbage commands\n");
689 gpu.cmd_len = 0;
690 }
ae36bb28 691 memcpy(gpu.cmd_buffer + gpu.cmd_len, list + 1, len * 4);
692 gpu.cmd_len += len;
693 flush_cmd_buffer();
694 continue;
695 }
ddd56f6e 696
56f08d83 697 if (len) {
48f3d210 698 left = do_cmd_buffer(list + 1, len);
ae36bb28 699 if (left) {
700 memcpy(gpu.cmd_buffer, list + 1 + len - left, left * 4);
701 gpu.cmd_len = left;
702 log_anomaly("GPUdmaChain: %d/%d words left\n", left, len);
703 }
56f08d83 704 }
ddd56f6e 705
8c84ba5f 706 if (progress_addr) {
707 *progress_addr = addr;
708 break;
709 }
09159d99 710 #define LD_THRESHOLD (8*1024)
711 if (count >= LD_THRESHOLD) {
712 if (count == LD_THRESHOLD) {
713 ld_addr = addr;
714 continue;
715 }
716
717 // loop detection marker
718 // (bit23 set causes DMA error on real machine, so
719 // unlikely to be ever set by the game)
89df80c6 720 list[0] |= HTOLE32(0x800000);
09159d99 721 }
ddd56f6e 722 }
723
09159d99 724 if (ld_addr != 0) {
725 // remove loop detection markers
726 count -= LD_THRESHOLD + 2;
727 addr = ld_addr & 0x1fffff;
728 while (count-- > 0) {
729 list = rambase + addr / 4;
89df80c6
PC
730 addr = LE32TOH(list[0]) & 0x1fffff;
731 list[0] &= HTOLE32(~0x800000);
09159d99 732 }
d30279e2 733 }
09159d99 734
3ece2f0c 735 gpu.state.last_list.frame = *gpu.state.frame_count;
deb18d24 736 gpu.state.last_list.hcnt = *gpu.state.hcnt;
1c72b1c2 737 gpu.state.last_list.cycles = cpu_cycles;
deb18d24 738 gpu.state.last_list.addr = start_addr;
739
1c72b1c2 740 return cpu_cycles;
1ab64c54
GI
741}
742
d30279e2
GI
743void GPUreadDataMem(uint32_t *mem, int count)
744{
56f08d83 745 log_io("gpu_dma_read %p %d\n", mem, count);
746
d30279e2
GI
747 if (unlikely(gpu.cmd_len > 0))
748 flush_cmd_buffer();
56f08d83 749
d30279e2
GI
750 if (gpu.dma.h)
751 do_vram_io(mem, count, 1);
752}
753
754uint32_t GPUreadData(void)
755{
9e146206 756 uint32_t ret;
56f08d83 757
758 if (unlikely(gpu.cmd_len > 0))
759 flush_cmd_buffer();
760
9e146206 761 ret = gpu.gp0;
ae097dfb
PC
762 if (gpu.dma.h) {
763 ret = HTOLE32(ret);
9e146206 764 do_vram_io(&ret, 1, 1);
ae097dfb
PC
765 ret = LE32TOH(ret);
766 }
56f08d83 767
9e146206 768 log_io("gpu_read %08x\n", ret);
769 return ret;
d30279e2
GI
770}
771
772uint32_t GPUreadStatus(void)
773{
ddd56f6e 774 uint32_t ret;
56f08d83 775
d30279e2
GI
776 if (unlikely(gpu.cmd_len > 0))
777 flush_cmd_buffer();
778
f23b103c 779 ret = gpu.status;
ddd56f6e 780 log_io("gpu_read_status %08x\n", ret);
781 return ret;
d30279e2
GI
782}
783
096ec49b 784struct GPUFreeze
1ab64c54
GI
785{
786 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
787 uint32_t ulStatus; // current gpu status
788 uint32_t ulControl[256]; // latest control register values
789 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
096ec49b 790};
1ab64c54 791
096ec49b 792long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
1ab64c54 793{
fc84f618 794 int i;
795
1ab64c54
GI
796 switch (type) {
797 case 1: // save
d30279e2
GI
798 if (gpu.cmd_len > 0)
799 flush_cmd_buffer();
9ee0fd5b 800 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
1ab64c54 801 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
6e9bdaef 802 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
f23b103c 803 freeze->ulStatus = gpu.status;
1ab64c54
GI
804 break;
805 case 0: // load
9ee0fd5b 806 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
1ab64c54 807 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
6e9bdaef 808 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
f23b103c 809 gpu.status = freeze->ulStatus;
3d47ef17 810 gpu.cmd_len = 0;
fc84f618 811 for (i = 8; i > 0; i--) {
812 gpu.regs[i] ^= 1; // avoid reg change detection
813 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
814 }
5b745e5b 815 renderer_sync_ecmds(gpu.ex_regs);
0b4038f8 816 renderer_update_caches(0, 0, 1024, 512, 1);
1ab64c54
GI
817 break;
818 }
819
820 return 1;
821}
822
5440b88e 823void GPUupdateLace(void)
824{
825 if (gpu.cmd_len > 0)
826 flush_cmd_buffer();
827 renderer_flush_queues();
828
cb245e56 829#ifndef RAW_FB_DISPLAY
f23b103c 830 if (gpu.status & PSX_GPU_STATUS_BLANKING) {
aafcb4dd 831 if (!gpu.state.blanked) {
832 vout_blank();
833 gpu.state.blanked = 1;
834 gpu.state.fb_dirty = 1;
835 }
836 return;
837 }
838
839 if (!gpu.state.fb_dirty)
5440b88e 840 return;
cb245e56 841#endif
5440b88e 842
843 if (gpu.frameskip.set) {
844 if (!gpu.frameskip.frame_ready) {
845 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
846 return;
847 gpu.frameskip.active = 0;
848 }
849 gpu.frameskip.frame_ready = 0;
850 }
851
852 vout_update();
0b4038f8 853 if (gpu.state.enhancement_active && !gpu.state.enhancement_was_active)
854 renderer_update_caches(0, 0, 1024, 512, 1);
855 gpu.state.enhancement_was_active = gpu.state.enhancement_active;
5440b88e 856 gpu.state.fb_dirty = 0;
aafcb4dd 857 gpu.state.blanked = 0;
5440b88e 858}
859
72e5023f 860void GPUvBlank(int is_vblank, int lcf)
861{
5440b88e 862 int interlace = gpu.state.allow_interlace
f23b103c
PC
863 && (gpu.status & PSX_GPU_STATUS_INTERLACE)
864 && (gpu.status & PSX_GPU_STATUS_DHEIGHT);
5440b88e 865 // interlace doesn't look nice on progressive displays,
866 // so we have this "auto" mode here for games that don't read vram
867 if (gpu.state.allow_interlace == 2
868 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
869 {
870 interlace = 0;
871 }
872 if (interlace || interlace != gpu.state.old_interlace) {
873 gpu.state.old_interlace = interlace;
874
875 if (gpu.cmd_len > 0)
876 flush_cmd_buffer();
877 renderer_flush_queues();
878 renderer_set_interlace(interlace, !lcf);
879 }
880}
881
882#include "../../frontend/plugin_lib.h"
883
884void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
885{
886 gpu.frameskip.set = cbs->frameskip;
887 gpu.frameskip.advice = &cbs->fskip_advice;
888 gpu.frameskip.active = 0;
889 gpu.frameskip.frame_ready = 1;
890 gpu.state.hcnt = cbs->gpu_hcnt;
891 gpu.state.frame_count = cbs->gpu_frame_count;
892 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
0b02eb77 893 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
308c6e67 894 if (gpu.state.screen_centering_type != cbs->screen_centering_type
895 || gpu.state.screen_centering_x != cbs->screen_centering_x
896 || gpu.state.screen_centering_y != cbs->screen_centering_y) {
897 gpu.state.screen_centering_type = cbs->screen_centering_type;
898 gpu.state.screen_centering_x = cbs->screen_centering_x;
899 gpu.state.screen_centering_y = cbs->screen_centering_y;
900 update_width();
901 update_height();
902 }
5440b88e 903
9ee0fd5b 904 gpu.mmap = cbs->mmap;
905 gpu.munmap = cbs->munmap;
906
907 // delayed vram mmap
908 if (gpu.vram == NULL)
909 map_vram();
910
5440b88e 911 if (cbs->pl_vout_set_raw_vram)
912 cbs->pl_vout_set_raw_vram(gpu.vram);
913 renderer_set_config(cbs);
914 vout_set_config(cbs);
72e5023f 915}
916
1ab64c54 917// vim:shiftwidth=2:expandtab