gpu: a bit better idle bit handling
[pcsx_rearmed.git] / plugins / gpulib / gpu.c
CommitLineData
1ab64c54 1/*
05740673 2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
1ab64c54
GI
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
d30279e2 11#include <stdio.h>
308c6e67 12#include <stdlib.h>
1ab64c54 13#include <string.h>
56f08d83 14#include "gpu.h"
abf09485 15#include "../../libpcsxcore/gpu.h" // meh
1ab64c54
GI
16
17#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
8f5f2dd5 18#ifdef __GNUC__
d30279e2 19#define unlikely(x) __builtin_expect((x), 0)
8f5f2dd5 20#define preload __builtin_prefetch
8dd855cd 21#define noinline __attribute__((noinline))
8f5f2dd5 22#else
23#define unlikely(x)
24#define preload(...)
25#define noinline
8f5f2dd5 26#endif
1ab64c54 27
deb18d24 28//#define log_io gpu_log
56f08d83 29#define log_io(...)
56f08d83 30
9ee0fd5b 31struct psx_gpu gpu;
1ab64c54 32
48f3d210 33static noinline int do_cmd_buffer(uint32_t *data, int count);
05740673 34static void finish_vram_transfer(int is_read);
48f3d210 35
36static noinline void do_cmd_reset(void)
37{
38 if (unlikely(gpu.cmd_len > 0))
39 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
48f3d210 40 gpu.cmd_len = 0;
05740673 41
42 if (unlikely(gpu.dma.h > 0))
43 finish_vram_transfer(gpu.dma_start.is_read);
48f3d210 44 gpu.dma.h = 0;
45}
46
6e9bdaef 47static noinline void do_reset(void)
1ab64c54 48{
7841712d 49 unsigned int i;
48f3d210 50
51 do_cmd_reset();
52
6e9bdaef 53 memset(gpu.regs, 0, sizeof(gpu.regs));
48f3d210 54 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
55 gpu.ex_regs[i] = (0xe0 + i) << 24;
f23b103c 56 gpu.status = 0x14802000;
6e9bdaef 57 gpu.gp0 = 0;
fc84f618 58 gpu.regs[3] = 1;
6e9bdaef 59 gpu.screen.hres = gpu.screen.w = 256;
fc84f618 60 gpu.screen.vres = gpu.screen.h = 240;
308c6e67 61 gpu.screen.x = gpu.screen.y = 0;
5fe1a2b1 62 renderer_sync_ecmds(gpu.ex_regs);
0b4038f8 63 renderer_notify_res_change();
1ab64c54
GI
64}
65
8dd855cd 66static noinline void update_width(void)
67{
308c6e67 68 static const short hres_all[8] = { 256, 368, 320, 368, 512, 368, 640, 368 };
69 static const uint8_t hdivs[8] = { 10, 7, 8, 7, 5, 7, 4, 7 };
70 uint8_t hdiv = hdivs[(gpu.status >> 16) & 7];
71 int hres = hres_all[(gpu.status >> 16) & 7];
72 int pal = gpu.status & PSX_GPU_STATUS_PAL;
8dd855cd 73 int sw = gpu.screen.x2 - gpu.screen.x1;
308c6e67 74 int x = 0, x_auto;
75 if (sw <= 0)
76 /* nothing displayed? */;
77 else {
78 int s = pal ? 656 : 608; // or 600? pal is just a guess
79 x = (gpu.screen.x1 - s) / hdiv;
80 x = (x + 1) & ~1; // blitter limitation
81 sw /= hdiv;
82 sw = (sw + 2) & ~3; // according to nocash
83 switch (gpu.state.screen_centering_type) {
84 case 1:
85 break;
86 case 2:
87 x = gpu.state.screen_centering_x;
88 break;
89 default:
90 // correct if slightly miscentered
91 x_auto = (hres - sw) / 2 & ~3;
92 if ((uint32_t)x_auto <= 8u && abs(x) < 24)
93 x = x_auto;
94 }
95 if (x + sw > hres)
96 sw = hres - x;
97 // .x range check is done in vout_update()
98 }
99 // reduce the unpleasant right border that a few games have
100 if (gpu.state.screen_centering_type == 0
101 && x <= 4 && hres - (x + sw) >= 4)
102 hres -= 4;
103 gpu.screen.x = x;
104 gpu.screen.w = sw;
105 gpu.screen.hres = hres;
106 gpu.state.dims_changed = 1;
107 //printf("xx %d %d -> %2d, %d / %d\n",
108 // gpu.screen.x1, gpu.screen.x2, x, sw, hres);
8dd855cd 109}
110
111static noinline void update_height(void)
112{
308c6e67 113 int pal = gpu.status & PSX_GPU_STATUS_PAL;
114 int dheight = gpu.status & PSX_GPU_STATUS_DHEIGHT;
115 int y = gpu.screen.y1 - (pal ? 39 : 16); // 39 for spyro
8dd855cd 116 int sh = gpu.screen.y2 - gpu.screen.y1;
308c6e67 117 int center_tol = 16;
118 int vres = 240;
119
120 if (pal && (sh > 240 || gpu.screen.vres == 256))
121 vres = 256;
122 if (dheight)
123 y *= 2, sh *= 2, vres *= 2, center_tol *= 2;
124 if (sh <= 0)
125 /* nothing displayed? */;
126 else {
127 switch (gpu.state.screen_centering_type) {
128 case 1:
129 break;
130 case 2:
131 y = gpu.state.screen_centering_y;
132 break;
133 default:
134 // correct if slightly miscentered
135 if ((uint32_t)(vres - sh) <= 1 && abs(y) <= center_tol)
136 y = 0;
137 }
138 if (y + sh > vres)
139 sh = vres - y;
140 }
141 gpu.screen.y = y;
8dd855cd 142 gpu.screen.h = sh;
308c6e67 143 gpu.screen.vres = vres;
144 gpu.state.dims_changed = 1;
145 //printf("yy %d %d -> %d, %d / %d\n",
146 // gpu.screen.y1, gpu.screen.y2, y, sh, vres);
8dd855cd 147}
148
fc84f618 149static noinline void decide_frameskip(void)
150{
9fe27e25 151 if (gpu.frameskip.active)
152 gpu.frameskip.cnt++;
153 else {
154 gpu.frameskip.cnt = 0;
155 gpu.frameskip.frame_ready = 1;
156 }
fc84f618 157
9fe27e25 158 if (!gpu.frameskip.active && *gpu.frameskip.advice)
159 gpu.frameskip.active = 1;
160 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
fc84f618 161 gpu.frameskip.active = 1;
162 else
163 gpu.frameskip.active = 0;
fbb4bfff 164
165 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
166 int dummy;
167 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy);
168 gpu.frameskip.pending_fill[0] = 0;
169 }
fc84f618 170}
171
b243416b 172static noinline int decide_frameskip_allow(uint32_t cmd_e3)
9fe27e25 173{
174 // no frameskip if it decides to draw to display area,
175 // but not for interlace since it'll most likely always do that
176 uint32_t x = cmd_e3 & 0x3ff;
177 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
f23b103c 178 gpu.frameskip.allow = (gpu.status & PSX_GPU_STATUS_INTERLACE) ||
308c6e67 179 (uint32_t)(x - gpu.screen.src_x) >= (uint32_t)gpu.screen.w ||
180 (uint32_t)(y - gpu.screen.src_y) >= (uint32_t)gpu.screen.h;
b243416b 181 return gpu.frameskip.allow;
9fe27e25 182}
183
5fe1a2b1 184static void flush_cmd_buffer(void);
185
6e9bdaef 186static noinline void get_gpu_info(uint32_t data)
187{
5fe1a2b1 188 if (unlikely(gpu.cmd_len > 0))
189 flush_cmd_buffer();
6e9bdaef 190 switch (data & 0x0f) {
191 case 0x02:
192 case 0x03:
193 case 0x04:
6e9bdaef 194 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
195 break;
d04b8924 196 case 0x05:
197 gpu.gp0 = gpu.ex_regs[5] & 0x3fffff;
6e9bdaef 198 break;
199 case 0x07:
200 gpu.gp0 = 2;
201 break;
202 default:
d04b8924 203 // gpu.gp0 unchanged
6e9bdaef 204 break;
205 }
206}
207
9ee0fd5b 208// double, for overdraw guard
209#define VRAM_SIZE (1024 * 512 * 2 * 2)
210
211static int map_vram(void)
212{
213 gpu.vram = gpu.mmap(VRAM_SIZE);
214 if (gpu.vram != NULL) {
215 gpu.vram += 4096 / 2;
216 return 0;
217 }
218 else {
219 fprintf(stderr, "could not map vram, expect crashes\n");
220 return -1;
221 }
222}
223
6e9bdaef 224long GPUinit(void)
225{
9394ada5 226 int ret;
227 ret = vout_init();
228 ret |= renderer_init();
229
0b4038f8 230 memset(&gpu.state, 0, sizeof(gpu.state));
231 memset(&gpu.frameskip, 0, sizeof(gpu.frameskip));
232 gpu.zero = 0;
3ece2f0c 233 gpu.state.frame_count = &gpu.zero;
deb18d24 234 gpu.state.hcnt = &gpu.zero;
48f3d210 235 gpu.cmd_len = 0;
9394ada5 236 do_reset();
48f3d210 237
9ee0fd5b 238 if (gpu.mmap != NULL) {
239 if (map_vram() != 0)
240 ret = -1;
241 }
6e9bdaef 242 return ret;
243}
244
245long GPUshutdown(void)
246{
9ee0fd5b 247 long ret;
248
e929dec5 249 renderer_finish();
9ee0fd5b 250 ret = vout_finish();
251 if (gpu.vram != NULL) {
252 gpu.vram -= 4096 / 2;
253 gpu.munmap(gpu.vram, VRAM_SIZE);
254 }
255 gpu.vram = NULL;
256
257 return ret;
6e9bdaef 258}
259
1ab64c54
GI
260void GPUwriteStatus(uint32_t data)
261{
1ab64c54
GI
262 uint32_t cmd = data >> 24;
263
fc84f618 264 if (cmd < ARRAY_SIZE(gpu.regs)) {
48f3d210 265 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
fc84f618 266 return;
8dd855cd 267 gpu.regs[cmd] = data;
fc84f618 268 }
269
270 gpu.state.fb_dirty = 1;
8dd855cd 271
272 switch (cmd) {
1ab64c54 273 case 0x00:
6e9bdaef 274 do_reset();
1ab64c54 275 break;
48f3d210 276 case 0x01:
277 do_cmd_reset();
278 break;
1ab64c54 279 case 0x03:
308c6e67 280 if (data & 1) {
f23b103c 281 gpu.status |= PSX_GPU_STATUS_BLANKING;
308c6e67 282 gpu.state.dims_changed = 1; // for hud clearing
283 }
f23b103c
PC
284 else
285 gpu.status &= ~PSX_GPU_STATUS_BLANKING;
1ab64c54
GI
286 break;
287 case 0x04:
f23b103c
PC
288 gpu.status &= ~PSX_GPU_STATUS_DMA_MASK;
289 gpu.status |= PSX_GPU_STATUS_DMA(data & 3);
1ab64c54
GI
290 break;
291 case 0x05:
308c6e67 292 gpu.screen.src_x = data & 0x3ff;
293 gpu.screen.src_y = (data >> 10) & 0x1ff;
0b4038f8 294 renderer_notify_scanout_x_change(gpu.screen.src_x, gpu.screen.hres);
9fe27e25 295 if (gpu.frameskip.set) {
296 decide_frameskip_allow(gpu.ex_regs[3]);
297 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
298 decide_frameskip();
299 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
300 }
fb4c6fba 301 }
1ab64c54 302 break;
8dd855cd 303 case 0x06:
304 gpu.screen.x1 = data & 0xfff;
305 gpu.screen.x2 = (data >> 12) & 0xfff;
306 update_width();
307 break;
1ab64c54
GI
308 case 0x07:
309 gpu.screen.y1 = data & 0x3ff;
310 gpu.screen.y2 = (data >> 10) & 0x3ff;
8dd855cd 311 update_height();
1ab64c54
GI
312 break;
313 case 0x08:
f23b103c 314 gpu.status = (gpu.status & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
8dd855cd 315 update_width();
316 update_height();
e929dec5 317 renderer_notify_res_change();
1ab64c54 318 break;
deb18d24 319 default:
320 if ((cmd & 0xf0) == 0x10)
321 get_gpu_info(data);
6e9bdaef 322 break;
1ab64c54 323 }
7890a708 324
325#ifdef GPUwriteStatus_ext
326 GPUwriteStatus_ext(data);
327#endif
1ab64c54
GI
328}
329
56f08d83 330const unsigned char cmd_lengths[256] =
1ab64c54 331{
d30279e2
GI
332 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
333 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
334 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
335 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
652c6b8b 336 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
337 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
338 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
d30279e2 339 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
72583812 340 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 80
341 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
342 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // a0
343 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
344 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // c0
345 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
d30279e2
GI
346 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
347 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
348};
349
d30279e2
GI
350#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
351
72583812 352static void cpy_msb(uint16_t *dst, const uint16_t *src, int l, uint16_t msb)
353{
354 int i;
355 for (i = 0; i < l; i++)
356 dst[i] = src[i] | msb;
357}
358
359static inline void do_vram_line(int x, int y, uint16_t *mem, int l,
360 int is_read, uint16_t msb)
1ab64c54 361{
d30279e2 362 uint16_t *vram = VRAM_MEM_XY(x, y);
72583812 363 if (unlikely(is_read))
d30279e2 364 memcpy(mem, vram, l * 2);
72583812 365 else if (unlikely(msb))
366 cpy_msb(vram, mem, l, msb);
d30279e2
GI
367 else
368 memcpy(vram, mem, l * 2);
369}
370
371static int do_vram_io(uint32_t *data, int count, int is_read)
372{
373 int count_initial = count;
72583812 374 uint16_t msb = gpu.ex_regs[6] << 15;
d30279e2
GI
375 uint16_t *sdata = (uint16_t *)data;
376 int x = gpu.dma.x, y = gpu.dma.y;
377 int w = gpu.dma.w, h = gpu.dma.h;
ddd56f6e 378 int o = gpu.dma.offset;
d30279e2
GI
379 int l;
380 count *= 2; // operate in 16bpp pixels
381
382 if (gpu.dma.offset) {
383 l = w - gpu.dma.offset;
ddd56f6e 384 if (count < l)
d30279e2 385 l = count;
ddd56f6e 386
72583812 387 do_vram_line(x + o, y, sdata, l, is_read, msb);
ddd56f6e 388
389 if (o + l < w)
390 o += l;
391 else {
392 o = 0;
393 y++;
394 h--;
395 }
d30279e2
GI
396 sdata += l;
397 count -= l;
d30279e2
GI
398 }
399
400 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
401 y &= 511;
72583812 402 do_vram_line(x, y, sdata, w, is_read, msb);
d30279e2
GI
403 }
404
05740673 405 if (h > 0) {
406 if (count > 0) {
407 y &= 511;
72583812 408 do_vram_line(x, y, sdata, count, is_read, msb);
05740673 409 o = count;
410 count = 0;
411 }
d30279e2 412 }
05740673 413 else
414 finish_vram_transfer(is_read);
d30279e2
GI
415 gpu.dma.y = y;
416 gpu.dma.h = h;
ddd56f6e 417 gpu.dma.offset = o;
d30279e2 418
6e9bdaef 419 return count_initial - count / 2;
d30279e2
GI
420}
421
422static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
423{
ddd56f6e 424 if (gpu.dma.h)
425 log_anomaly("start_vram_transfer while old unfinished\n");
426
5440b88e 427 gpu.dma.x = pos_word & 0x3ff;
428 gpu.dma.y = (pos_word >> 16) & 0x1ff;
48f3d210 429 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
430 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
d30279e2 431 gpu.dma.offset = 0;
05740673 432 gpu.dma.is_read = is_read;
433 gpu.dma_start = gpu.dma;
d30279e2 434
9e146206 435 renderer_flush_queues();
436 if (is_read) {
f23b103c 437 gpu.status |= PSX_GPU_STATUS_IMG;
9e146206 438 // XXX: wrong for width 1
ae097dfb 439 gpu.gp0 = LE32TOH(*(uint32_t *) VRAM_MEM_XY(gpu.dma.x, gpu.dma.y));
5440b88e 440 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
9e146206 441 }
d30279e2 442
6e9bdaef 443 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
444 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
abf09485 445 if (gpu.gpu_state_change)
446 gpu.gpu_state_change(PGS_VRAM_TRANSFER_START);
d30279e2
GI
447}
448
05740673 449static void finish_vram_transfer(int is_read)
450{
451 if (is_read)
f23b103c 452 gpu.status &= ~PSX_GPU_STATUS_IMG;
893f780e 453 else {
454 gpu.state.fb_dirty = 1;
05740673 455 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
0b4038f8 456 gpu.dma_start.w, gpu.dma_start.h, 0);
893f780e 457 }
abf09485 458 if (gpu.gpu_state_change)
459 gpu.gpu_state_change(PGS_VRAM_TRANSFER_END);
05740673 460}
461
72583812 462static void do_vram_copy(const uint32_t *params)
463{
464 const uint32_t sx = LE32TOH(params[0]) & 0x3FF;
465 const uint32_t sy = (LE32TOH(params[0]) >> 16) & 0x1FF;
466 const uint32_t dx = LE32TOH(params[1]) & 0x3FF;
467 const uint32_t dy = (LE32TOH(params[1]) >> 16) & 0x1FF;
468 uint32_t w = ((LE32TOH(params[2]) - 1) & 0x3FF) + 1;
469 uint32_t h = (((LE32TOH(params[2]) >> 16) - 1) & 0x1FF) + 1;
470 uint16_t msb = gpu.ex_regs[6] << 15;
471 uint16_t lbuf[128];
472 uint32_t x, y;
473
474 if (sx == dx && sy == dy && msb == 0)
475 return;
476
477 renderer_flush_queues();
478
479 if (unlikely((sx < dx && dx < sx + w) || sx + w > 1024 || dx + w > 1024 || msb))
480 {
481 for (y = 0; y < h; y++)
482 {
483 const uint16_t *src = VRAM_MEM_XY(0, (sy + y) & 0x1ff);
484 uint16_t *dst = VRAM_MEM_XY(0, (dy + y) & 0x1ff);
485 for (x = 0; x < w; x += ARRAY_SIZE(lbuf))
486 {
487 uint32_t x1, w1 = w - x;
488 if (w1 > ARRAY_SIZE(lbuf))
489 w1 = ARRAY_SIZE(lbuf);
490 for (x1 = 0; x1 < w1; x1++)
491 lbuf[x1] = src[(sx + x + x1) & 0x3ff];
492 for (x1 = 0; x1 < w1; x1++)
493 dst[(dx + x + x1) & 0x3ff] = lbuf[x1] | msb;
494 }
495 }
496 }
497 else
498 {
499 uint32_t sy1 = sy, dy1 = dy;
500 for (y = 0; y < h; y++, sy1++, dy1++)
501 memcpy(VRAM_MEM_XY(dx, dy1 & 0x1ff), VRAM_MEM_XY(sx, sy1 & 0x1ff), w * 2);
502 }
503
504 renderer_update_caches(dx, dy, w, h, 0);
505}
506
b243416b 507static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
508{
97e07db9 509 int cmd = 0, pos = 0, len, dummy, v;
b243416b 510 int skip = 1;
511
fbb4bfff 512 gpu.frameskip.pending_fill[0] = 0;
513
b243416b 514 while (pos < count && skip) {
515 uint32_t *list = data + pos;
89df80c6 516 cmd = LE32TOH(list[0]) >> 24;
b243416b 517 len = 1 + cmd_lengths[cmd];
518
97e07db9 519 switch (cmd) {
520 case 0x02:
89df80c6 521 if ((LE32TOH(list[2]) & 0x3ff) > gpu.screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu.screen.h)
97e07db9 522 // clearing something large, don't skip
523 do_cmd_list(list, 3, &dummy);
524 else
525 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
526 break;
527 case 0x24 ... 0x27:
528 case 0x2c ... 0x2f:
529 case 0x34 ... 0x37:
530 case 0x3c ... 0x3f:
531 gpu.ex_regs[1] &= ~0x1ff;
89df80c6 532 gpu.ex_regs[1] |= LE32TOH(list[4 + ((cmd >> 4) & 1)]) & 0x1ff;
97e07db9 533 break;
534 case 0x48 ... 0x4F:
535 for (v = 3; pos + v < count; v++)
536 {
89df80c6 537 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
97e07db9 538 break;
539 }
540 len += v - 3;
541 break;
542 case 0x58 ... 0x5F:
543 for (v = 4; pos + v < count; v += 2)
544 {
89df80c6 545 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
97e07db9 546 break;
547 }
548 len += v - 4;
549 break;
550 default:
551 if (cmd == 0xe3)
89df80c6 552 skip = decide_frameskip_allow(LE32TOH(list[0]));
97e07db9 553 if ((cmd & 0xf8) == 0xe0)
89df80c6 554 gpu.ex_regs[cmd & 7] = LE32TOH(list[0]);
97e07db9 555 break;
b243416b 556 }
b243416b 557
558 if (pos + len > count) {
559 cmd = -1;
560 break; // incomplete cmd
561 }
72583812 562 if (0x80 <= cmd && cmd <= 0xdf)
b243416b 563 break; // image i/o
97e07db9 564
b243416b 565 pos += len;
566 }
567
568 renderer_sync_ecmds(gpu.ex_regs);
569 *last_cmd = cmd;
570 return pos;
571}
572
48f3d210 573static noinline int do_cmd_buffer(uint32_t *data, int count)
d30279e2 574{
b243416b 575 int cmd, pos;
576 uint32_t old_e3 = gpu.ex_regs[3];
fc84f618 577 int vram_dirty = 0;
d30279e2 578
d30279e2 579 // process buffer
b243416b 580 for (pos = 0; pos < count; )
d30279e2 581 {
b243416b 582 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
583 vram_dirty = 1;
d30279e2 584 pos += do_vram_io(data + pos, count - pos, 0);
ddd56f6e 585 if (pos == count)
586 break;
d30279e2
GI
587 }
588
89df80c6 589 cmd = LE32TOH(data[pos]) >> 24;
97e07db9 590 if (0xa0 <= cmd && cmd <= 0xdf) {
7194a46a
DS
591 if (unlikely((pos+2) >= count)) {
592 // incomplete vram write/read cmd, can't consume yet
593 cmd = -1;
594 break;
595 }
596
d30279e2 597 // consume vram write/read cmd
89df80c6 598 start_vram_transfer(LE32TOH(data[pos + 1]), LE32TOH(data[pos + 2]), (cmd & 0xe0) == 0xc0);
b243416b 599 pos += 3;
600 continue;
d30279e2 601 }
72583812 602 else if ((cmd & 0xe0) == 0x80) {
603 if (unlikely((pos+3) >= count)) {
604 cmd = -1; // incomplete cmd, can't consume yet
605 break;
606 }
607 do_vram_copy(data + pos + 1);
893f780e 608 vram_dirty = 1;
72583812 609 pos += 4;
610 continue;
611 }
b243416b 612
1e07f71d 613 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
89df80c6 614 if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0))
b243416b 615 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
616 else {
617 pos += do_cmd_list(data + pos, count - pos, &cmd);
618 vram_dirty = 1;
619 }
620
621 if (cmd == -1)
622 // incomplete cmd
ddd56f6e 623 break;
d30279e2 624 }
ddd56f6e 625
f23b103c
PC
626 gpu.status &= ~0x1fff;
627 gpu.status |= gpu.ex_regs[1] & 0x7ff;
628 gpu.status |= (gpu.ex_regs[6] & 3) << 11;
a3a9f519 629
fc84f618 630 gpu.state.fb_dirty |= vram_dirty;
631
b243416b 632 if (old_e3 != gpu.ex_regs[3])
633 decide_frameskip_allow(gpu.ex_regs[3]);
634
ddd56f6e 635 return count - pos;
d30279e2
GI
636}
637
abf09485 638static noinline void flush_cmd_buffer(void)
d30279e2 639{
48f3d210 640 int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
d30279e2
GI
641 if (left > 0)
642 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
abf09485 643 if (left != gpu.cmd_len) {
644 if (!gpu.dma.h && gpu.gpu_state_change)
645 gpu.gpu_state_change(PGS_PRIMITIVE_START);
646 gpu.cmd_len = left;
647 }
1ab64c54
GI
648}
649
650void GPUwriteDataMem(uint32_t *mem, int count)
651{
d30279e2
GI
652 int left;
653
56f08d83 654 log_io("gpu_dma_write %p %d\n", mem, count);
655
d30279e2
GI
656 if (unlikely(gpu.cmd_len > 0))
657 flush_cmd_buffer();
56f08d83 658
48f3d210 659 left = do_cmd_buffer(mem, count);
d30279e2 660 if (left)
56f08d83 661 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
1ab64c54
GI
662}
663
d30279e2 664void GPUwriteData(uint32_t data)
1ab64c54 665{
56f08d83 666 log_io("gpu_write %08x\n", data);
89df80c6 667 gpu.cmd_buffer[gpu.cmd_len++] = HTOLE32(data);
d30279e2
GI
668 if (gpu.cmd_len >= CMD_BUFFER_LEN)
669 flush_cmd_buffer();
1ab64c54
GI
670}
671
8c84ba5f 672long GPUdmaChain(uint32_t *rambase, uint32_t start_addr, uint32_t *progress_addr)
1ab64c54 673{
09159d99 674 uint32_t addr, *list, ld_addr = 0;
ddd56f6e 675 int len, left, count;
1c72b1c2 676 long cpu_cycles = 0;
d30279e2 677
8f5f2dd5 678 preload(rambase + (start_addr & 0x1fffff) / 4);
679
d30279e2
GI
680 if (unlikely(gpu.cmd_len > 0))
681 flush_cmd_buffer();
682
56f08d83 683 log_io("gpu_dma_chain\n");
ddd56f6e 684 addr = start_addr & 0xffffff;
09159d99 685 for (count = 0; (addr & 0x800000) == 0; count++)
ddd56f6e 686 {
ddd56f6e 687 list = rambase + (addr & 0x1fffff) / 4;
89df80c6
PC
688 len = LE32TOH(list[0]) >> 24;
689 addr = LE32TOH(list[0]) & 0xffffff;
8f5f2dd5 690 preload(rambase + (addr & 0x1fffff) / 4);
691
1c72b1c2 692 cpu_cycles += 10;
693 if (len > 0)
694 cpu_cycles += 5 + len;
deb18d24 695
ae36bb28 696 log_io(".chain %08lx #%d+%d\n",
697 (long)(list - rambase) * 4, len, gpu.cmd_len);
698 if (unlikely(gpu.cmd_len > 0)) {
23948df3 699 if (gpu.cmd_len + len > ARRAY_SIZE(gpu.cmd_buffer)) {
700 log_anomaly("cmd_buffer overflow, likely garbage commands\n");
701 gpu.cmd_len = 0;
702 }
ae36bb28 703 memcpy(gpu.cmd_buffer + gpu.cmd_len, list + 1, len * 4);
704 gpu.cmd_len += len;
705 flush_cmd_buffer();
706 continue;
707 }
ddd56f6e 708
56f08d83 709 if (len) {
48f3d210 710 left = do_cmd_buffer(list + 1, len);
ae36bb28 711 if (left) {
712 memcpy(gpu.cmd_buffer, list + 1 + len - left, left * 4);
713 gpu.cmd_len = left;
714 log_anomaly("GPUdmaChain: %d/%d words left\n", left, len);
715 }
56f08d83 716 }
ddd56f6e 717
8c84ba5f 718 if (progress_addr) {
719 *progress_addr = addr;
720 break;
721 }
09159d99 722 #define LD_THRESHOLD (8*1024)
723 if (count >= LD_THRESHOLD) {
724 if (count == LD_THRESHOLD) {
725 ld_addr = addr;
726 continue;
727 }
728
729 // loop detection marker
730 // (bit23 set causes DMA error on real machine, so
731 // unlikely to be ever set by the game)
89df80c6 732 list[0] |= HTOLE32(0x800000);
09159d99 733 }
ddd56f6e 734 }
735
09159d99 736 if (ld_addr != 0) {
737 // remove loop detection markers
738 count -= LD_THRESHOLD + 2;
739 addr = ld_addr & 0x1fffff;
740 while (count-- > 0) {
741 list = rambase + addr / 4;
89df80c6
PC
742 addr = LE32TOH(list[0]) & 0x1fffff;
743 list[0] &= HTOLE32(~0x800000);
09159d99 744 }
d30279e2 745 }
09159d99 746
3ece2f0c 747 gpu.state.last_list.frame = *gpu.state.frame_count;
deb18d24 748 gpu.state.last_list.hcnt = *gpu.state.hcnt;
1c72b1c2 749 gpu.state.last_list.cycles = cpu_cycles;
deb18d24 750 gpu.state.last_list.addr = start_addr;
751
1c72b1c2 752 return cpu_cycles;
1ab64c54
GI
753}
754
d30279e2
GI
755void GPUreadDataMem(uint32_t *mem, int count)
756{
56f08d83 757 log_io("gpu_dma_read %p %d\n", mem, count);
758
d30279e2
GI
759 if (unlikely(gpu.cmd_len > 0))
760 flush_cmd_buffer();
56f08d83 761
d30279e2
GI
762 if (gpu.dma.h)
763 do_vram_io(mem, count, 1);
764}
765
766uint32_t GPUreadData(void)
767{
9e146206 768 uint32_t ret;
56f08d83 769
770 if (unlikely(gpu.cmd_len > 0))
771 flush_cmd_buffer();
772
9e146206 773 ret = gpu.gp0;
ae097dfb
PC
774 if (gpu.dma.h) {
775 ret = HTOLE32(ret);
9e146206 776 do_vram_io(&ret, 1, 1);
ae097dfb
PC
777 ret = LE32TOH(ret);
778 }
56f08d83 779
9e146206 780 log_io("gpu_read %08x\n", ret);
781 return ret;
d30279e2
GI
782}
783
784uint32_t GPUreadStatus(void)
785{
ddd56f6e 786 uint32_t ret;
56f08d83 787
d30279e2
GI
788 if (unlikely(gpu.cmd_len > 0))
789 flush_cmd_buffer();
790
f23b103c 791 ret = gpu.status;
ddd56f6e 792 log_io("gpu_read_status %08x\n", ret);
793 return ret;
d30279e2
GI
794}
795
096ec49b 796struct GPUFreeze
1ab64c54
GI
797{
798 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
799 uint32_t ulStatus; // current gpu status
800 uint32_t ulControl[256]; // latest control register values
801 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
096ec49b 802};
1ab64c54 803
096ec49b 804long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
1ab64c54 805{
fc84f618 806 int i;
807
1ab64c54
GI
808 switch (type) {
809 case 1: // save
d30279e2
GI
810 if (gpu.cmd_len > 0)
811 flush_cmd_buffer();
9ee0fd5b 812 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
1ab64c54 813 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
6e9bdaef 814 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
f23b103c 815 freeze->ulStatus = gpu.status;
1ab64c54
GI
816 break;
817 case 0: // load
9ee0fd5b 818 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
1ab64c54 819 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
6e9bdaef 820 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
f23b103c 821 gpu.status = freeze->ulStatus;
3d47ef17 822 gpu.cmd_len = 0;
fc84f618 823 for (i = 8; i > 0; i--) {
824 gpu.regs[i] ^= 1; // avoid reg change detection
825 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
826 }
5b745e5b 827 renderer_sync_ecmds(gpu.ex_regs);
0b4038f8 828 renderer_update_caches(0, 0, 1024, 512, 1);
1ab64c54
GI
829 break;
830 }
831
832 return 1;
833}
834
5440b88e 835void GPUupdateLace(void)
836{
837 if (gpu.cmd_len > 0)
838 flush_cmd_buffer();
839 renderer_flush_queues();
840
cb245e56 841#ifndef RAW_FB_DISPLAY
f23b103c 842 if (gpu.status & PSX_GPU_STATUS_BLANKING) {
aafcb4dd 843 if (!gpu.state.blanked) {
844 vout_blank();
845 gpu.state.blanked = 1;
846 gpu.state.fb_dirty = 1;
847 }
848 return;
849 }
850
851 if (!gpu.state.fb_dirty)
5440b88e 852 return;
cb245e56 853#endif
5440b88e 854
855 if (gpu.frameskip.set) {
856 if (!gpu.frameskip.frame_ready) {
857 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
858 return;
859 gpu.frameskip.active = 0;
860 }
861 gpu.frameskip.frame_ready = 0;
862 }
863
864 vout_update();
0b4038f8 865 if (gpu.state.enhancement_active && !gpu.state.enhancement_was_active)
866 renderer_update_caches(0, 0, 1024, 512, 1);
867 gpu.state.enhancement_was_active = gpu.state.enhancement_active;
5440b88e 868 gpu.state.fb_dirty = 0;
aafcb4dd 869 gpu.state.blanked = 0;
5440b88e 870}
871
72e5023f 872void GPUvBlank(int is_vblank, int lcf)
873{
5440b88e 874 int interlace = gpu.state.allow_interlace
f23b103c
PC
875 && (gpu.status & PSX_GPU_STATUS_INTERLACE)
876 && (gpu.status & PSX_GPU_STATUS_DHEIGHT);
5440b88e 877 // interlace doesn't look nice on progressive displays,
878 // so we have this "auto" mode here for games that don't read vram
879 if (gpu.state.allow_interlace == 2
880 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
881 {
882 interlace = 0;
883 }
884 if (interlace || interlace != gpu.state.old_interlace) {
885 gpu.state.old_interlace = interlace;
886
887 if (gpu.cmd_len > 0)
888 flush_cmd_buffer();
889 renderer_flush_queues();
890 renderer_set_interlace(interlace, !lcf);
891 }
892}
893
ab88daca 894void GPUgetScreenInfo(int *y, int *base_hres)
895{
896 *y = gpu.screen.y;
897 *base_hres = gpu.screen.vres;
898 if (gpu.status & PSX_GPU_STATUS_DHEIGHT)
899 *base_hres >>= 1;
900}
901
5440b88e 902#include "../../frontend/plugin_lib.h"
903
904void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
905{
906 gpu.frameskip.set = cbs->frameskip;
907 gpu.frameskip.advice = &cbs->fskip_advice;
908 gpu.frameskip.active = 0;
909 gpu.frameskip.frame_ready = 1;
910 gpu.state.hcnt = cbs->gpu_hcnt;
911 gpu.state.frame_count = cbs->gpu_frame_count;
912 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
0b02eb77 913 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
308c6e67 914 if (gpu.state.screen_centering_type != cbs->screen_centering_type
915 || gpu.state.screen_centering_x != cbs->screen_centering_x
916 || gpu.state.screen_centering_y != cbs->screen_centering_y) {
917 gpu.state.screen_centering_type = cbs->screen_centering_type;
918 gpu.state.screen_centering_x = cbs->screen_centering_x;
919 gpu.state.screen_centering_y = cbs->screen_centering_y;
920 update_width();
921 update_height();
922 }
5440b88e 923
9ee0fd5b 924 gpu.mmap = cbs->mmap;
925 gpu.munmap = cbs->munmap;
abf09485 926 gpu.gpu_state_change = cbs->gpu_state_change;
9ee0fd5b 927
928 // delayed vram mmap
929 if (gpu.vram == NULL)
930 map_vram();
931
5440b88e 932 if (cbs->pl_vout_set_raw_vram)
933 cbs->pl_vout_set_raw_vram(gpu.vram);
934 renderer_set_config(cbs);
935 vout_set_config(cbs);
72e5023f 936}
937
1ab64c54 938// vim:shiftwidth=2:expandtab