Merge pull request #749 from pcercuei/lightrec-allow-mem-override
[pcsx_rearmed.git] / plugins / gpulib / gpu.c
CommitLineData
1ab64c54 1/*
05740673 2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
1ab64c54
GI
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
d30279e2 11#include <stdio.h>
5bbe183f 12#include <stdlib.h>
1ab64c54 13#include <string.h>
12367ad0 14#include <stdlib.h> /* for calloc */
15
56f08d83 16#include "gpu.h"
1ab64c54
GI
17
18#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
8f5f2dd5 19#ifdef __GNUC__
d30279e2 20#define unlikely(x) __builtin_expect((x), 0)
8f5f2dd5 21#define preload __builtin_prefetch
8dd855cd 22#define noinline __attribute__((noinline))
8f5f2dd5 23#else
24#define unlikely(x)
25#define preload(...)
26#define noinline
8f5f2dd5 27#endif
1ab64c54 28
deb18d24 29#define gpu_log(fmt, ...) \
3ece2f0c 30 printf("%d:%03d: " fmt, *gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__)
deb18d24 31
32//#define log_io gpu_log
56f08d83 33#define log_io(...)
9394ada5 34//#define log_anomaly gpu_log
35#define log_anomaly(...)
56f08d83 36
9ee0fd5b 37struct psx_gpu gpu;
1ab64c54 38
48f3d210 39static noinline int do_cmd_buffer(uint32_t *data, int count);
05740673 40static void finish_vram_transfer(int is_read);
48f3d210 41
42static noinline void do_cmd_reset(void)
43{
c765eb86
JW
44 renderer_sync();
45
48f3d210 46 if (unlikely(gpu.cmd_len > 0))
47 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
48f3d210 48 gpu.cmd_len = 0;
05740673 49
50 if (unlikely(gpu.dma.h > 0))
51 finish_vram_transfer(gpu.dma_start.is_read);
48f3d210 52 gpu.dma.h = 0;
53}
54
6e9bdaef 55static noinline void do_reset(void)
1ab64c54 56{
7841712d 57 unsigned int i;
5b568098 58
48f3d210 59 do_cmd_reset();
60
6e9bdaef 61 memset(gpu.regs, 0, sizeof(gpu.regs));
48f3d210 62 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
63 gpu.ex_regs[i] = (0xe0 + i) << 24;
61124a6d 64 gpu.status = 0x14802000;
6e9bdaef 65 gpu.gp0 = 0;
fc84f618 66 gpu.regs[3] = 1;
6e9bdaef 67 gpu.screen.hres = gpu.screen.w = 256;
fc84f618 68 gpu.screen.vres = gpu.screen.h = 240;
5bbe183f 69 gpu.screen.x = gpu.screen.y = 0;
1ab64c54
GI
70}
71
8dd855cd 72static noinline void update_width(void)
73{
5bbe183f 74 static const short hres_all[8] = { 256, 368, 320, 368, 512, 368, 640, 368 };
75 static const uint8_t hdivs[8] = { 10, 7, 8, 7, 5, 7, 4, 7 };
76 uint8_t hdiv = hdivs[(gpu.status >> 16) & 7];
77 int hres = hres_all[(gpu.status >> 16) & 7];
78 int pal = gpu.status & PSX_GPU_STATUS_PAL;
8dd855cd 79 int sw = gpu.screen.x2 - gpu.screen.x1;
5bbe183f 80 int x = 0, x_auto;
81 if (sw <= 0)
82 /* nothing displayed? */;
83 else {
84 int s = pal ? 656 : 608; // or 600? pal is just a guess
85 x = (gpu.screen.x1 - s) / hdiv;
86 x = (x + 1) & ~1; // blitter limitation
87 sw /= hdiv;
88 sw = (sw + 2) & ~3; // according to nocash
89 switch (gpu.state.screen_centering_type) {
90 case 1:
91 break;
92 case 2:
93 x = gpu.state.screen_centering_x;
94 break;
95 default:
96 // correct if slightly miscentered
97 x_auto = (hres - sw) / 2 & ~3;
98 if ((uint32_t)x_auto <= 8u && abs(x) < 24)
99 x = x_auto;
100 }
101 if (x + sw > hres)
102 sw = hres - x;
103 // .x range check is done in vout_update()
104 }
105 // reduce the unpleasant right border that a few games have
106 if (gpu.state.screen_centering_type == 0
107 && x <= 4 && hres - (x + sw) >= 4)
108 hres -= 4;
109 gpu.screen.x = x;
110 gpu.screen.w = sw;
111 gpu.screen.hres = hres;
112 gpu.state.dims_changed = 1;
113 //printf("xx %d %d -> %2d, %d / %d\n",
114 // gpu.screen.x1, gpu.screen.x2, x, sw, hres);
8dd855cd 115}
116
117static noinline void update_height(void)
118{
5bbe183f 119 int pal = gpu.status & PSX_GPU_STATUS_PAL;
120 int dheight = gpu.status & PSX_GPU_STATUS_DHEIGHT;
121 int y = gpu.screen.y1 - (pal ? 39 : 16); // 39 for spyro
8dd855cd 122 int sh = gpu.screen.y2 - gpu.screen.y1;
5bbe183f 123 int center_tol = 16;
124 int vres = 240;
125
126 if (pal && (sh > 240 || gpu.screen.vres == 256))
127 vres = 256;
128 if (dheight)
129 y *= 2, sh *= 2, vres *= 2, center_tol *= 2;
130 if (sh <= 0)
131 /* nothing displayed? */;
132 else {
133 switch (gpu.state.screen_centering_type) {
134 case 1:
135 break;
136 case 2:
137 y = gpu.state.screen_centering_y;
138 break;
139 default:
140 // correct if slightly miscentered
141 if ((uint32_t)(vres - sh) <= 1 && abs(y) <= center_tol)
142 y = 0;
143 }
144 if (y + sh > vres)
145 sh = vres - y;
146 }
147 gpu.screen.y = y;
8dd855cd 148 gpu.screen.h = sh;
5bbe183f 149 gpu.screen.vres = vres;
150 gpu.state.dims_changed = 1;
151 //printf("yy %d %d -> %d, %d / %d\n",
152 // gpu.screen.y1, gpu.screen.y2, y, sh, vres);
8dd855cd 153}
154
fc84f618 155static noinline void decide_frameskip(void)
156{
5eaa13f1
A
157 *gpu.frameskip.dirty = 1;
158
9fe27e25 159 if (gpu.frameskip.active)
160 gpu.frameskip.cnt++;
161 else {
162 gpu.frameskip.cnt = 0;
163 gpu.frameskip.frame_ready = 1;
164 }
fc84f618 165
5eaa13f1
A
166 if (*gpu.frameskip.force)
167 gpu.frameskip.active = 1;
168 else if (!gpu.frameskip.active && *gpu.frameskip.advice)
9fe27e25 169 gpu.frameskip.active = 1;
170 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
fc84f618 171 gpu.frameskip.active = 1;
172 else
173 gpu.frameskip.active = 0;
fbb4bfff 174
175 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
176 int dummy;
177 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy);
178 gpu.frameskip.pending_fill[0] = 0;
179 }
fc84f618 180}
181
b243416b 182static noinline int decide_frameskip_allow(uint32_t cmd_e3)
9fe27e25 183{
184 // no frameskip if it decides to draw to display area,
185 // but not for interlace since it'll most likely always do that
186 uint32_t x = cmd_e3 & 0x3ff;
187 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
61124a6d 188 gpu.frameskip.allow = (gpu.status & PSX_GPU_STATUS_INTERLACE) ||
5bbe183f 189 (uint32_t)(x - gpu.screen.src_x) >= (uint32_t)gpu.screen.w ||
190 (uint32_t)(y - gpu.screen.src_y) >= (uint32_t)gpu.screen.h;
b243416b 191 return gpu.frameskip.allow;
9fe27e25 192}
193
6e9bdaef 194static noinline void get_gpu_info(uint32_t data)
195{
196 switch (data & 0x0f) {
197 case 0x02:
198 case 0x03:
199 case 0x04:
6e9bdaef 200 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
201 break;
08b33377 202 case 0x05:
203 gpu.gp0 = gpu.ex_regs[5] & 0x3fffff;
6e9bdaef 204 break;
205 case 0x07:
206 gpu.gp0 = 2;
207 break;
208 default:
08b33377 209 // gpu.gp0 unchanged
6e9bdaef 210 break;
211 }
212}
213
9ee0fd5b 214// double, for overdraw guard
12367ad0 215#define VRAM_SIZE ((1024 * 512 * 2 * 2) + 4096)
216
217// Minimum 16-byte VRAM alignment needed by gpu_unai's pixel-skipping
218// renderer/downscaler it uses in high res modes:
219#ifdef GCW_ZERO
220 // On GCW platform (MIPS), align to 8192 bytes (1 TLB entry) to reduce # of
221 // fills. (Will change this value if it ever gets large page support)
222 #define VRAM_ALIGN 8192
223#else
224 #define VRAM_ALIGN 16
225#endif
226
227// vram ptr received from mmap/malloc/alloc (will deallocate using this)
228static uint16_t *vram_ptr_orig = NULL;
9ee0fd5b 229
12367ad0 230#ifdef GPULIB_USE_MMAP
9ee0fd5b 231static int map_vram(void)
232{
12367ad0 233 gpu.vram = vram_ptr_orig = gpu.mmap(VRAM_SIZE + (VRAM_ALIGN-1));
9ee0fd5b 234 if (gpu.vram != NULL) {
12367ad0 235 // 4kb guard in front
236 gpu.vram += (4096 / 2);
237 // Align
238 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
9ee0fd5b 239 return 0;
240 }
241 else {
242 fprintf(stderr, "could not map vram, expect crashes\n");
243 return -1;
244 }
245}
12367ad0 246#else
247static int map_vram(void)
248{
249 gpu.vram = vram_ptr_orig = (uint16_t*)calloc(VRAM_SIZE + (VRAM_ALIGN-1), 1);
250 if (gpu.vram != NULL) {
251 // 4kb guard in front
252 gpu.vram += (4096 / 2);
253 // Align
254 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
255 return 0;
256 } else {
257 fprintf(stderr, "could not allocate vram, expect crashes\n");
258 return -1;
259 }
260}
261
262static int allocate_vram(void)
263{
264 gpu.vram = vram_ptr_orig = (uint16_t*)calloc(VRAM_SIZE + (VRAM_ALIGN-1), 1);
265 if (gpu.vram != NULL) {
266 // 4kb guard in front
267 gpu.vram += (4096 / 2);
268 // Align
269 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
270 return 0;
271 } else {
272 fprintf(stderr, "could not allocate vram, expect crashes\n");
273 return -1;
274 }
275}
276#endif
9ee0fd5b 277
6e9bdaef 278long GPUinit(void)
279{
12367ad0 280#ifndef GPULIB_USE_MMAP
281 if (gpu.vram == NULL) {
282 if (allocate_vram() != 0) {
283 printf("ERROR: could not allocate VRAM, exiting..\n");
284 exit(1);
285 }
286 }
287#endif
288
289 //extern uint32_t hSyncCount; // in psxcounters.cpp
290 //extern uint32_t frame_counter; // in psxcounters.cpp
291 //gpu.state.hcnt = &hSyncCount;
292 //gpu.state.frame_count = &frame_counter;
293
9394ada5 294 int ret;
295 ret = vout_init();
296 ret |= renderer_init();
297
3ece2f0c 298 gpu.state.frame_count = &gpu.zero;
deb18d24 299 gpu.state.hcnt = &gpu.zero;
48f3d210 300 gpu.frameskip.active = 0;
301 gpu.cmd_len = 0;
9394ada5 302 do_reset();
48f3d210 303
12367ad0 304 /*if (gpu.mmap != NULL) {
9ee0fd5b 305 if (map_vram() != 0)
306 ret = -1;
12367ad0 307 }*/
6e9bdaef 308 return ret;
309}
310
311long GPUshutdown(void)
312{
9ee0fd5b 313 long ret;
314
e929dec5 315 renderer_finish();
9ee0fd5b 316 ret = vout_finish();
12367ad0 317
318 if (vram_ptr_orig != NULL) {
319#ifdef GPULIB_USE_MMAP
320 gpu.munmap(vram_ptr_orig, VRAM_SIZE);
321#else
322 free(vram_ptr_orig);
323#endif
9ee0fd5b 324 }
12367ad0 325 vram_ptr_orig = gpu.vram = NULL;
9ee0fd5b 326
327 return ret;
6e9bdaef 328}
329
1ab64c54
GI
330void GPUwriteStatus(uint32_t data)
331{
1ab64c54
GI
332 uint32_t cmd = data >> 24;
333
fc84f618 334 if (cmd < ARRAY_SIZE(gpu.regs)) {
48f3d210 335 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
fc84f618 336 return;
8dd855cd 337 gpu.regs[cmd] = data;
fc84f618 338 }
339
340 gpu.state.fb_dirty = 1;
8dd855cd 341
342 switch (cmd) {
1ab64c54 343 case 0x00:
6e9bdaef 344 do_reset();
1ab64c54 345 break;
48f3d210 346 case 0x01:
347 do_cmd_reset();
348 break;
1ab64c54 349 case 0x03:
5bbe183f 350 if (data & 1) {
61124a6d 351 gpu.status |= PSX_GPU_STATUS_BLANKING;
5bbe183f 352 gpu.state.dims_changed = 1; // for hud clearing
353 }
61124a6d
PC
354 else
355 gpu.status &= ~PSX_GPU_STATUS_BLANKING;
1ab64c54
GI
356 break;
357 case 0x04:
61124a6d
PC
358 gpu.status &= ~PSX_GPU_STATUS_DMA_MASK;
359 gpu.status |= PSX_GPU_STATUS_DMA(data & 3);
1ab64c54
GI
360 break;
361 case 0x05:
5bbe183f 362 gpu.screen.src_x = data & 0x3ff;
363 gpu.screen.src_y = (data >> 10) & 0x1ff;
9fe27e25 364 if (gpu.frameskip.set) {
365 decide_frameskip_allow(gpu.ex_regs[3]);
366 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
367 decide_frameskip();
368 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
369 }
fb4c6fba 370 }
1ab64c54 371 break;
8dd855cd 372 case 0x06:
373 gpu.screen.x1 = data & 0xfff;
374 gpu.screen.x2 = (data >> 12) & 0xfff;
375 update_width();
376 break;
1ab64c54
GI
377 case 0x07:
378 gpu.screen.y1 = data & 0x3ff;
379 gpu.screen.y2 = (data >> 10) & 0x3ff;
8dd855cd 380 update_height();
1ab64c54
GI
381 break;
382 case 0x08:
61124a6d 383 gpu.status = (gpu.status & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
8dd855cd 384 update_width();
385 update_height();
e929dec5 386 renderer_notify_res_change();
1ab64c54 387 break;
deb18d24 388 default:
389 if ((cmd & 0xf0) == 0x10)
390 get_gpu_info(data);
6e9bdaef 391 break;
1ab64c54 392 }
7890a708 393
394#ifdef GPUwriteStatus_ext
395 GPUwriteStatus_ext(data);
396#endif
1ab64c54
GI
397}
398
56f08d83 399const unsigned char cmd_lengths[256] =
1ab64c54 400{
d30279e2
GI
401 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
402 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
403 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
404 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
652c6b8b 405 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
406 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
407 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
d30279e2
GI
408 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
409 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
410 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
411 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
412 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
413 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
414 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
415 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
416 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
417};
418
d30279e2
GI
419#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
420
421static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
1ab64c54 422{
d30279e2
GI
423 uint16_t *vram = VRAM_MEM_XY(x, y);
424 if (is_read)
425 memcpy(mem, vram, l * 2);
426 else
427 memcpy(vram, mem, l * 2);
428}
429
430static int do_vram_io(uint32_t *data, int count, int is_read)
431{
432 int count_initial = count;
433 uint16_t *sdata = (uint16_t *)data;
434 int x = gpu.dma.x, y = gpu.dma.y;
435 int w = gpu.dma.w, h = gpu.dma.h;
ddd56f6e 436 int o = gpu.dma.offset;
d30279e2
GI
437 int l;
438 count *= 2; // operate in 16bpp pixels
439
c765eb86
JW
440 renderer_sync();
441
d30279e2
GI
442 if (gpu.dma.offset) {
443 l = w - gpu.dma.offset;
ddd56f6e 444 if (count < l)
d30279e2 445 l = count;
ddd56f6e 446
447 do_vram_line(x + o, y, sdata, l, is_read);
448
449 if (o + l < w)
450 o += l;
451 else {
452 o = 0;
453 y++;
454 h--;
455 }
d30279e2
GI
456 sdata += l;
457 count -= l;
d30279e2
GI
458 }
459
460 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
461 y &= 511;
462 do_vram_line(x, y, sdata, w, is_read);
463 }
464
05740673 465 if (h > 0) {
466 if (count > 0) {
467 y &= 511;
468 do_vram_line(x, y, sdata, count, is_read);
469 o = count;
470 count = 0;
471 }
d30279e2 472 }
05740673 473 else
474 finish_vram_transfer(is_read);
d30279e2
GI
475 gpu.dma.y = y;
476 gpu.dma.h = h;
ddd56f6e 477 gpu.dma.offset = o;
d30279e2 478
6e9bdaef 479 return count_initial - count / 2;
d30279e2
GI
480}
481
482static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
483{
ddd56f6e 484 if (gpu.dma.h)
485 log_anomaly("start_vram_transfer while old unfinished\n");
486
5440b88e 487 gpu.dma.x = pos_word & 0x3ff;
488 gpu.dma.y = (pos_word >> 16) & 0x1ff;
48f3d210 489 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
490 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
d30279e2 491 gpu.dma.offset = 0;
05740673 492 gpu.dma.is_read = is_read;
493 gpu.dma_start = gpu.dma;
d30279e2 494
9e146206 495 renderer_flush_queues();
496 if (is_read) {
61124a6d 497 gpu.status |= PSX_GPU_STATUS_IMG;
9e146206 498 // XXX: wrong for width 1
495d603c 499 gpu.gp0 = LE32TOH(*(uint32_t *) VRAM_MEM_XY(gpu.dma.x, gpu.dma.y));
5440b88e 500 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
9e146206 501 }
d30279e2 502
6e9bdaef 503 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
504 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
d30279e2
GI
505}
506
05740673 507static void finish_vram_transfer(int is_read)
508{
509 if (is_read)
61124a6d 510 gpu.status &= ~PSX_GPU_STATUS_IMG;
05740673 511 else
512 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
513 gpu.dma_start.w, gpu.dma_start.h);
514}
515
b243416b 516static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
517{
97e07db9 518 int cmd = 0, pos = 0, len, dummy, v;
b243416b 519 int skip = 1;
520
fbb4bfff 521 gpu.frameskip.pending_fill[0] = 0;
522
b243416b 523 while (pos < count && skip) {
524 uint32_t *list = data + pos;
db215a72 525 cmd = LE32TOH(list[0]) >> 24;
b243416b 526 len = 1 + cmd_lengths[cmd];
527
97e07db9 528 switch (cmd) {
529 case 0x02:
db215a72 530 if ((LE32TOH(list[2]) & 0x3ff) > gpu.screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu.screen.h)
97e07db9 531 // clearing something large, don't skip
532 do_cmd_list(list, 3, &dummy);
533 else
534 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
535 break;
536 case 0x24 ... 0x27:
537 case 0x2c ... 0x2f:
538 case 0x34 ... 0x37:
539 case 0x3c ... 0x3f:
540 gpu.ex_regs[1] &= ~0x1ff;
db215a72 541 gpu.ex_regs[1] |= LE32TOH(list[4 + ((cmd >> 4) & 1)]) & 0x1ff;
97e07db9 542 break;
543 case 0x48 ... 0x4F:
544 for (v = 3; pos + v < count; v++)
545 {
db215a72 546 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
97e07db9 547 break;
548 }
549 len += v - 3;
550 break;
551 case 0x58 ... 0x5F:
552 for (v = 4; pos + v < count; v += 2)
553 {
db215a72 554 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
97e07db9 555 break;
556 }
557 len += v - 4;
558 break;
559 default:
560 if (cmd == 0xe3)
db215a72 561 skip = decide_frameskip_allow(LE32TOH(list[0]));
97e07db9 562 if ((cmd & 0xf8) == 0xe0)
db215a72 563 gpu.ex_regs[cmd & 7] = LE32TOH(list[0]);
97e07db9 564 break;
b243416b 565 }
b243416b 566
567 if (pos + len > count) {
568 cmd = -1;
569 break; // incomplete cmd
570 }
97e07db9 571 if (0xa0 <= cmd && cmd <= 0xdf)
b243416b 572 break; // image i/o
97e07db9 573
b243416b 574 pos += len;
575 }
576
577 renderer_sync_ecmds(gpu.ex_regs);
578 *last_cmd = cmd;
579 return pos;
580}
581
48f3d210 582static noinline int do_cmd_buffer(uint32_t *data, int count)
d30279e2 583{
b243416b 584 int cmd, pos;
585 uint32_t old_e3 = gpu.ex_regs[3];
fc84f618 586 int vram_dirty = 0;
d30279e2 587
d30279e2 588 // process buffer
b243416b 589 for (pos = 0; pos < count; )
d30279e2 590 {
b243416b 591 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
592 vram_dirty = 1;
d30279e2 593 pos += do_vram_io(data + pos, count - pos, 0);
ddd56f6e 594 if (pos == count)
595 break;
d30279e2
GI
596 }
597
db215a72 598 cmd = LE32TOH(data[pos]) >> 24;
97e07db9 599 if (0xa0 <= cmd && cmd <= 0xdf) {
79573c20
DS
600 if (unlikely((pos+2) >= count)) {
601 // incomplete vram write/read cmd, can't consume yet
602 cmd = -1;
603 break;
604 }
605
d30279e2 606 // consume vram write/read cmd
db215a72 607 start_vram_transfer(LE32TOH(data[pos + 1]), LE32TOH(data[pos + 2]), (cmd & 0xe0) == 0xc0);
b243416b 608 pos += 3;
609 continue;
d30279e2 610 }
b243416b 611
1e07f71d 612 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
db215a72 613 if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0))
b243416b 614 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
615 else {
616 pos += do_cmd_list(data + pos, count - pos, &cmd);
617 vram_dirty = 1;
618 }
619
620 if (cmd == -1)
621 // incomplete cmd
ddd56f6e 622 break;
d30279e2 623 }
ddd56f6e 624
61124a6d
PC
625 gpu.status &= ~0x1fff;
626 gpu.status |= gpu.ex_regs[1] & 0x7ff;
627 gpu.status |= (gpu.ex_regs[6] & 3) << 11;
a3a9f519 628
fc84f618 629 gpu.state.fb_dirty |= vram_dirty;
630
b243416b 631 if (old_e3 != gpu.ex_regs[3])
632 decide_frameskip_allow(gpu.ex_regs[3]);
633
ddd56f6e 634 return count - pos;
d30279e2
GI
635}
636
5440b88e 637static void flush_cmd_buffer(void)
d30279e2 638{
48f3d210 639 int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
d30279e2
GI
640 if (left > 0)
641 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
642 gpu.cmd_len = left;
1ab64c54
GI
643}
644
645void GPUwriteDataMem(uint32_t *mem, int count)
646{
d30279e2
GI
647 int left;
648
56f08d83 649 log_io("gpu_dma_write %p %d\n", mem, count);
650
d30279e2
GI
651 if (unlikely(gpu.cmd_len > 0))
652 flush_cmd_buffer();
56f08d83 653
48f3d210 654 left = do_cmd_buffer(mem, count);
d30279e2 655 if (left)
56f08d83 656 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
1ab64c54
GI
657}
658
d30279e2 659void GPUwriteData(uint32_t data)
1ab64c54 660{
56f08d83 661 log_io("gpu_write %08x\n", data);
db215a72 662 gpu.cmd_buffer[gpu.cmd_len++] = HTOLE32(data);
d30279e2
GI
663 if (gpu.cmd_len >= CMD_BUFFER_LEN)
664 flush_cmd_buffer();
1ab64c54
GI
665}
666
fae38d7a 667long GPUdmaChain(uint32_t *rambase, uint32_t start_addr, uint32_t *progress_addr)
1ab64c54 668{
09159d99 669 uint32_t addr, *list, ld_addr = 0;
ddd56f6e 670 int len, left, count;
1c72b1c2 671 long cpu_cycles = 0;
d30279e2 672
8f5f2dd5 673 preload(rambase + (start_addr & 0x1fffff) / 4);
674
d30279e2
GI
675 if (unlikely(gpu.cmd_len > 0))
676 flush_cmd_buffer();
677
56f08d83 678 log_io("gpu_dma_chain\n");
ddd56f6e 679 addr = start_addr & 0xffffff;
09159d99 680 for (count = 0; (addr & 0x800000) == 0; count++)
ddd56f6e 681 {
ddd56f6e 682 list = rambase + (addr & 0x1fffff) / 4;
db215a72
PC
683 len = LE32TOH(list[0]) >> 24;
684 addr = LE32TOH(list[0]) & 0xffffff;
8f5f2dd5 685 preload(rambase + (addr & 0x1fffff) / 4);
686
1c72b1c2 687 cpu_cycles += 10;
688 if (len > 0)
689 cpu_cycles += 5 + len;
deb18d24 690
a4e249a1 691 log_io(".chain %08lx #%d+%d\n",
692 (long)(list - rambase) * 4, len, gpu.cmd_len);
693 if (unlikely(gpu.cmd_len > 0)) {
694 memcpy(gpu.cmd_buffer + gpu.cmd_len, list + 1, len * 4);
695 gpu.cmd_len += len;
696 flush_cmd_buffer();
697 continue;
698 }
ddd56f6e 699
56f08d83 700 if (len) {
48f3d210 701 left = do_cmd_buffer(list + 1, len);
a4e249a1 702 if (left) {
703 memcpy(gpu.cmd_buffer, list + 1 + len - left, left * 4);
704 gpu.cmd_len = left;
705 log_anomaly("GPUdmaChain: %d/%d words left\n", left, len);
706 }
56f08d83 707 }
ddd56f6e 708
fae38d7a 709 if (progress_addr) {
710 *progress_addr = addr;
711 break;
712 }
09159d99 713 #define LD_THRESHOLD (8*1024)
714 if (count >= LD_THRESHOLD) {
715 if (count == LD_THRESHOLD) {
716 ld_addr = addr;
717 continue;
718 }
719
720 // loop detection marker
721 // (bit23 set causes DMA error on real machine, so
722 // unlikely to be ever set by the game)
db215a72 723 list[0] |= HTOLE32(0x800000);
09159d99 724 }
ddd56f6e 725 }
726
09159d99 727 if (ld_addr != 0) {
728 // remove loop detection markers
729 count -= LD_THRESHOLD + 2;
730 addr = ld_addr & 0x1fffff;
731 while (count-- > 0) {
732 list = rambase + addr / 4;
db215a72
PC
733 addr = LE32TOH(list[0]) & 0x1fffff;
734 list[0] &= HTOLE32(~0x800000);
09159d99 735 }
d30279e2 736 }
09159d99 737
3ece2f0c 738 gpu.state.last_list.frame = *gpu.state.frame_count;
deb18d24 739 gpu.state.last_list.hcnt = *gpu.state.hcnt;
1c72b1c2 740 gpu.state.last_list.cycles = cpu_cycles;
deb18d24 741 gpu.state.last_list.addr = start_addr;
742
1c72b1c2 743 return cpu_cycles;
1ab64c54
GI
744}
745
d30279e2
GI
746void GPUreadDataMem(uint32_t *mem, int count)
747{
56f08d83 748 log_io("gpu_dma_read %p %d\n", mem, count);
749
d30279e2
GI
750 if (unlikely(gpu.cmd_len > 0))
751 flush_cmd_buffer();
56f08d83 752
d30279e2
GI
753 if (gpu.dma.h)
754 do_vram_io(mem, count, 1);
755}
756
757uint32_t GPUreadData(void)
758{
9e146206 759 uint32_t ret;
56f08d83 760
761 if (unlikely(gpu.cmd_len > 0))
762 flush_cmd_buffer();
763
9e146206 764 ret = gpu.gp0;
495d603c
PC
765 if (gpu.dma.h) {
766 ret = HTOLE32(ret);
9e146206 767 do_vram_io(&ret, 1, 1);
495d603c
PC
768 ret = LE32TOH(ret);
769 }
56f08d83 770
9e146206 771 log_io("gpu_read %08x\n", ret);
772 return ret;
d30279e2
GI
773}
774
775uint32_t GPUreadStatus(void)
776{
ddd56f6e 777 uint32_t ret;
56f08d83 778
d30279e2
GI
779 if (unlikely(gpu.cmd_len > 0))
780 flush_cmd_buffer();
781
61124a6d 782 ret = gpu.status;
ddd56f6e 783 log_io("gpu_read_status %08x\n", ret);
784 return ret;
d30279e2
GI
785}
786
096ec49b 787struct GPUFreeze
1ab64c54
GI
788{
789 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
790 uint32_t ulStatus; // current gpu status
791 uint32_t ulControl[256]; // latest control register values
792 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
096ec49b 793};
1ab64c54 794
096ec49b 795long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
1ab64c54 796{
fc84f618 797 int i;
798
1ab64c54
GI
799 switch (type) {
800 case 1: // save
d30279e2
GI
801 if (gpu.cmd_len > 0)
802 flush_cmd_buffer();
c765eb86
JW
803
804 renderer_sync();
9ee0fd5b 805 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
1ab64c54 806 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
6e9bdaef 807 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
61124a6d 808 freeze->ulStatus = gpu.status;
1ab64c54
GI
809 break;
810 case 0: // load
c765eb86 811 renderer_sync();
9ee0fd5b 812 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
1ab64c54 813 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
6e9bdaef 814 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
61124a6d 815 gpu.status = freeze->ulStatus;
3d47ef17 816 gpu.cmd_len = 0;
fc84f618 817 for (i = 8; i > 0; i--) {
818 gpu.regs[i] ^= 1; // avoid reg change detection
819 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
820 }
5b745e5b 821 renderer_sync_ecmds(gpu.ex_regs);
05740673 822 renderer_update_caches(0, 0, 1024, 512);
1ab64c54
GI
823 break;
824 }
825
826 return 1;
827}
828
5440b88e 829void GPUupdateLace(void)
830{
831 if (gpu.cmd_len > 0)
832 flush_cmd_buffer();
833 renderer_flush_queues();
834
61124a6d 835 if (gpu.status & PSX_GPU_STATUS_BLANKING) {
aafcb4dd 836 if (!gpu.state.blanked) {
837 vout_blank();
838 gpu.state.blanked = 1;
839 gpu.state.fb_dirty = 1;
840 }
841 return;
842 }
843
c765eb86
JW
844 renderer_notify_update_lace(0);
845
aafcb4dd 846 if (!gpu.state.fb_dirty)
5440b88e 847 return;
848
849 if (gpu.frameskip.set) {
850 if (!gpu.frameskip.frame_ready) {
851 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
852 return;
853 gpu.frameskip.active = 0;
854 }
855 gpu.frameskip.frame_ready = 0;
856 }
857
858 vout_update();
859 gpu.state.fb_dirty = 0;
aafcb4dd 860 gpu.state.blanked = 0;
c765eb86 861 renderer_notify_update_lace(1);
5440b88e 862}
863
72e5023f 864void GPUvBlank(int is_vblank, int lcf)
865{
5440b88e 866 int interlace = gpu.state.allow_interlace
61124a6d
PC
867 && (gpu.status & PSX_GPU_STATUS_INTERLACE)
868 && (gpu.status & PSX_GPU_STATUS_DHEIGHT);
5440b88e 869 // interlace doesn't look nice on progressive displays,
870 // so we have this "auto" mode here for games that don't read vram
871 if (gpu.state.allow_interlace == 2
872 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
873 {
874 interlace = 0;
875 }
876 if (interlace || interlace != gpu.state.old_interlace) {
877 gpu.state.old_interlace = interlace;
878
879 if (gpu.cmd_len > 0)
880 flush_cmd_buffer();
881 renderer_flush_queues();
882 renderer_set_interlace(interlace, !lcf);
883 }
884}
885
886#include "../../frontend/plugin_lib.h"
887
888void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
889{
890 gpu.frameskip.set = cbs->frameskip;
891 gpu.frameskip.advice = &cbs->fskip_advice;
5eaa13f1 892 gpu.frameskip.force = &cbs->fskip_force;
5bbe183f 893 gpu.frameskip.dirty = (void *)&cbs->fskip_dirty;
5440b88e 894 gpu.frameskip.active = 0;
895 gpu.frameskip.frame_ready = 1;
896 gpu.state.hcnt = cbs->gpu_hcnt;
897 gpu.state.frame_count = cbs->gpu_frame_count;
898 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
0b02eb77 899 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
5bbe183f 900 if (gpu.state.screen_centering_type != cbs->screen_centering_type
901 || gpu.state.screen_centering_x != cbs->screen_centering_x
902 || gpu.state.screen_centering_y != cbs->screen_centering_y) {
903 gpu.state.screen_centering_type = cbs->screen_centering_type;
904 gpu.state.screen_centering_x = cbs->screen_centering_x;
905 gpu.state.screen_centering_y = cbs->screen_centering_y;
906 update_width();
907 update_height();
908 }
5440b88e 909
9ee0fd5b 910 gpu.mmap = cbs->mmap;
911 gpu.munmap = cbs->munmap;
912
913 // delayed vram mmap
914 if (gpu.vram == NULL)
915 map_vram();
916
5440b88e 917 if (cbs->pl_vout_set_raw_vram)
918 cbs->pl_vout_set_raw_vram(gpu.vram);
919 renderer_set_config(cbs);
920 vout_set_config(cbs);
72e5023f 921}
922
1ab64c54 923// vim:shiftwidth=2:expandtab