more timing hacks
[pcsx_rearmed.git] / plugins / gpulib / gpu.c
CommitLineData
1ab64c54 1/*
05740673 2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
1ab64c54
GI
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
d30279e2 11#include <stdio.h>
1ab64c54 12#include <string.h>
12367ad0 13#include <stdlib.h> /* for calloc */
14
56f08d83 15#include "gpu.h"
1ab64c54
GI
16
17#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
8f5f2dd5 18#ifdef __GNUC__
d30279e2 19#define unlikely(x) __builtin_expect((x), 0)
8f5f2dd5 20#define preload __builtin_prefetch
8dd855cd 21#define noinline __attribute__((noinline))
8f5f2dd5 22#else
23#define unlikely(x)
24#define preload(...)
25#define noinline
8f5f2dd5 26#endif
1ab64c54 27
deb18d24 28#define gpu_log(fmt, ...) \
3ece2f0c 29 printf("%d:%03d: " fmt, *gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__)
deb18d24 30
31//#define log_io gpu_log
56f08d83 32#define log_io(...)
9394ada5 33//#define log_anomaly gpu_log
34#define log_anomaly(...)
56f08d83 35
9ee0fd5b 36struct psx_gpu gpu;
1ab64c54 37
48f3d210 38static noinline int do_cmd_buffer(uint32_t *data, int count);
05740673 39static void finish_vram_transfer(int is_read);
48f3d210 40
41static noinline void do_cmd_reset(void)
42{
c765eb86
JW
43 renderer_sync();
44
48f3d210 45 if (unlikely(gpu.cmd_len > 0))
46 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
48f3d210 47 gpu.cmd_len = 0;
05740673 48
49 if (unlikely(gpu.dma.h > 0))
50 finish_vram_transfer(gpu.dma_start.is_read);
48f3d210 51 gpu.dma.h = 0;
52}
53
6e9bdaef 54static noinline void do_reset(void)
1ab64c54 55{
7841712d 56 unsigned int i;
5b568098 57
48f3d210 58 do_cmd_reset();
59
6e9bdaef 60 memset(gpu.regs, 0, sizeof(gpu.regs));
48f3d210 61 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
62 gpu.ex_regs[i] = (0xe0 + i) << 24;
61124a6d 63 gpu.status = 0x14802000;
6e9bdaef 64 gpu.gp0 = 0;
fc84f618 65 gpu.regs[3] = 1;
6e9bdaef 66 gpu.screen.hres = gpu.screen.w = 256;
fc84f618 67 gpu.screen.vres = gpu.screen.h = 240;
1ab64c54
GI
68}
69
8dd855cd 70static noinline void update_width(void)
71{
72 int sw = gpu.screen.x2 - gpu.screen.x1;
73 if (sw <= 0 || sw >= 2560)
74 // full width
75 gpu.screen.w = gpu.screen.hres;
76 else
77 gpu.screen.w = sw * gpu.screen.hres / 2560;
78}
79
80static noinline void update_height(void)
81{
74df5906 82 // TODO: emulate this properly..
8dd855cd 83 int sh = gpu.screen.y2 - gpu.screen.y1;
61124a6d 84 if (gpu.status & PSX_GPU_STATUS_DHEIGHT)
8dd855cd 85 sh *= 2;
74df5906 86 if (sh <= 0 || sh > gpu.screen.vres)
8dd855cd 87 sh = gpu.screen.vres;
88
89 gpu.screen.h = sh;
90}
91
fc84f618 92static noinline void decide_frameskip(void)
93{
5eaa13f1
A
94 *gpu.frameskip.dirty = 1;
95
9fe27e25 96 if (gpu.frameskip.active)
97 gpu.frameskip.cnt++;
98 else {
99 gpu.frameskip.cnt = 0;
100 gpu.frameskip.frame_ready = 1;
101 }
fc84f618 102
5eaa13f1
A
103 if (*gpu.frameskip.force)
104 gpu.frameskip.active = 1;
105 else if (!gpu.frameskip.active && *gpu.frameskip.advice)
9fe27e25 106 gpu.frameskip.active = 1;
107 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
fc84f618 108 gpu.frameskip.active = 1;
109 else
110 gpu.frameskip.active = 0;
fbb4bfff 111
112 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
113 int dummy;
114 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy);
115 gpu.frameskip.pending_fill[0] = 0;
116 }
fc84f618 117}
118
b243416b 119static noinline int decide_frameskip_allow(uint32_t cmd_e3)
9fe27e25 120{
121 // no frameskip if it decides to draw to display area,
122 // but not for interlace since it'll most likely always do that
123 uint32_t x = cmd_e3 & 0x3ff;
124 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
61124a6d 125 gpu.frameskip.allow = (gpu.status & PSX_GPU_STATUS_INTERLACE) ||
9fe27e25 126 (uint32_t)(x - gpu.screen.x) >= (uint32_t)gpu.screen.w ||
127 (uint32_t)(y - gpu.screen.y) >= (uint32_t)gpu.screen.h;
b243416b 128 return gpu.frameskip.allow;
9fe27e25 129}
130
6e9bdaef 131static noinline void get_gpu_info(uint32_t data)
132{
133 switch (data & 0x0f) {
134 case 0x02:
135 case 0x03:
136 case 0x04:
6e9bdaef 137 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
138 break;
08b33377 139 case 0x05:
140 gpu.gp0 = gpu.ex_regs[5] & 0x3fffff;
6e9bdaef 141 break;
142 case 0x07:
143 gpu.gp0 = 2;
144 break;
145 default:
08b33377 146 // gpu.gp0 unchanged
6e9bdaef 147 break;
148 }
149}
150
9ee0fd5b 151// double, for overdraw guard
12367ad0 152#define VRAM_SIZE ((1024 * 512 * 2 * 2) + 4096)
153
154// Minimum 16-byte VRAM alignment needed by gpu_unai's pixel-skipping
155// renderer/downscaler it uses in high res modes:
156#ifdef GCW_ZERO
157 // On GCW platform (MIPS), align to 8192 bytes (1 TLB entry) to reduce # of
158 // fills. (Will change this value if it ever gets large page support)
159 #define VRAM_ALIGN 8192
160#else
161 #define VRAM_ALIGN 16
162#endif
163
164// vram ptr received from mmap/malloc/alloc (will deallocate using this)
165static uint16_t *vram_ptr_orig = NULL;
9ee0fd5b 166
12367ad0 167#ifdef GPULIB_USE_MMAP
9ee0fd5b 168static int map_vram(void)
169{
12367ad0 170 gpu.vram = vram_ptr_orig = gpu.mmap(VRAM_SIZE + (VRAM_ALIGN-1));
9ee0fd5b 171 if (gpu.vram != NULL) {
12367ad0 172 // 4kb guard in front
173 gpu.vram += (4096 / 2);
174 // Align
175 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
9ee0fd5b 176 return 0;
177 }
178 else {
179 fprintf(stderr, "could not map vram, expect crashes\n");
180 return -1;
181 }
182}
12367ad0 183#else
184static int map_vram(void)
185{
186 gpu.vram = vram_ptr_orig = (uint16_t*)calloc(VRAM_SIZE + (VRAM_ALIGN-1), 1);
187 if (gpu.vram != NULL) {
188 // 4kb guard in front
189 gpu.vram += (4096 / 2);
190 // Align
191 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
192 return 0;
193 } else {
194 fprintf(stderr, "could not allocate vram, expect crashes\n");
195 return -1;
196 }
197}
198
199static int allocate_vram(void)
200{
201 gpu.vram = vram_ptr_orig = (uint16_t*)calloc(VRAM_SIZE + (VRAM_ALIGN-1), 1);
202 if (gpu.vram != NULL) {
203 // 4kb guard in front
204 gpu.vram += (4096 / 2);
205 // Align
206 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
207 return 0;
208 } else {
209 fprintf(stderr, "could not allocate vram, expect crashes\n");
210 return -1;
211 }
212}
213#endif
9ee0fd5b 214
6e9bdaef 215long GPUinit(void)
216{
12367ad0 217#ifndef GPULIB_USE_MMAP
218 if (gpu.vram == NULL) {
219 if (allocate_vram() != 0) {
220 printf("ERROR: could not allocate VRAM, exiting..\n");
221 exit(1);
222 }
223 }
224#endif
225
226 //extern uint32_t hSyncCount; // in psxcounters.cpp
227 //extern uint32_t frame_counter; // in psxcounters.cpp
228 //gpu.state.hcnt = &hSyncCount;
229 //gpu.state.frame_count = &frame_counter;
230
9394ada5 231 int ret;
232 ret = vout_init();
233 ret |= renderer_init();
234
3ece2f0c 235 gpu.state.frame_count = &gpu.zero;
deb18d24 236 gpu.state.hcnt = &gpu.zero;
48f3d210 237 gpu.frameskip.active = 0;
238 gpu.cmd_len = 0;
9394ada5 239 do_reset();
48f3d210 240
12367ad0 241 /*if (gpu.mmap != NULL) {
9ee0fd5b 242 if (map_vram() != 0)
243 ret = -1;
12367ad0 244 }*/
6e9bdaef 245 return ret;
246}
247
248long GPUshutdown(void)
249{
9ee0fd5b 250 long ret;
251
e929dec5 252 renderer_finish();
9ee0fd5b 253 ret = vout_finish();
12367ad0 254
255 if (vram_ptr_orig != NULL) {
256#ifdef GPULIB_USE_MMAP
257 gpu.munmap(vram_ptr_orig, VRAM_SIZE);
258#else
259 free(vram_ptr_orig);
260#endif
9ee0fd5b 261 }
12367ad0 262 vram_ptr_orig = gpu.vram = NULL;
9ee0fd5b 263
264 return ret;
6e9bdaef 265}
266
1ab64c54
GI
267void GPUwriteStatus(uint32_t data)
268{
12367ad0 269 //senquack TODO: Would it be wise to add cmd buffer flush here, since
270 // status settings can affect commands already in buffer?
271
1ab64c54
GI
272 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
273 static const short vres[4] = { 240, 480, 256, 480 };
274 uint32_t cmd = data >> 24;
275
fc84f618 276 if (cmd < ARRAY_SIZE(gpu.regs)) {
48f3d210 277 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
fc84f618 278 return;
8dd855cd 279 gpu.regs[cmd] = data;
fc84f618 280 }
281
282 gpu.state.fb_dirty = 1;
8dd855cd 283
284 switch (cmd) {
1ab64c54 285 case 0x00:
6e9bdaef 286 do_reset();
1ab64c54 287 break;
48f3d210 288 case 0x01:
289 do_cmd_reset();
290 break;
1ab64c54 291 case 0x03:
61124a6d
PC
292 if (data & 1)
293 gpu.status |= PSX_GPU_STATUS_BLANKING;
294 else
295 gpu.status &= ~PSX_GPU_STATUS_BLANKING;
1ab64c54
GI
296 break;
297 case 0x04:
61124a6d
PC
298 gpu.status &= ~PSX_GPU_STATUS_DMA_MASK;
299 gpu.status |= PSX_GPU_STATUS_DMA(data & 3);
1ab64c54
GI
300 break;
301 case 0x05:
302 gpu.screen.x = data & 0x3ff;
c65553d0 303 gpu.screen.y = (data >> 10) & 0x1ff;
9fe27e25 304 if (gpu.frameskip.set) {
305 decide_frameskip_allow(gpu.ex_regs[3]);
306 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
307 decide_frameskip();
308 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
309 }
fb4c6fba 310 }
1ab64c54 311 break;
8dd855cd 312 case 0x06:
313 gpu.screen.x1 = data & 0xfff;
314 gpu.screen.x2 = (data >> 12) & 0xfff;
315 update_width();
316 break;
1ab64c54
GI
317 case 0x07:
318 gpu.screen.y1 = data & 0x3ff;
319 gpu.screen.y2 = (data >> 10) & 0x3ff;
8dd855cd 320 update_height();
1ab64c54
GI
321 break;
322 case 0x08:
61124a6d
PC
323 gpu.status = (gpu.status & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
324 gpu.screen.hres = hres[(gpu.status >> 16) & 7];
325 gpu.screen.vres = vres[(gpu.status >> 19) & 3];
8dd855cd 326 update_width();
327 update_height();
e929dec5 328 renderer_notify_res_change();
1ab64c54 329 break;
deb18d24 330 default:
331 if ((cmd & 0xf0) == 0x10)
332 get_gpu_info(data);
6e9bdaef 333 break;
1ab64c54 334 }
7890a708 335
336#ifdef GPUwriteStatus_ext
337 GPUwriteStatus_ext(data);
338#endif
1ab64c54
GI
339}
340
56f08d83 341const unsigned char cmd_lengths[256] =
1ab64c54 342{
d30279e2
GI
343 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
344 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
345 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
346 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
652c6b8b 347 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
348 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
349 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
d30279e2
GI
350 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
351 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
352 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
353 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
354 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
355 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
356 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
357 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
358 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
359};
360
d30279e2
GI
361#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
362
363static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
1ab64c54 364{
d30279e2
GI
365 uint16_t *vram = VRAM_MEM_XY(x, y);
366 if (is_read)
367 memcpy(mem, vram, l * 2);
368 else
369 memcpy(vram, mem, l * 2);
370}
371
372static int do_vram_io(uint32_t *data, int count, int is_read)
373{
374 int count_initial = count;
375 uint16_t *sdata = (uint16_t *)data;
376 int x = gpu.dma.x, y = gpu.dma.y;
377 int w = gpu.dma.w, h = gpu.dma.h;
ddd56f6e 378 int o = gpu.dma.offset;
d30279e2
GI
379 int l;
380 count *= 2; // operate in 16bpp pixels
381
c765eb86
JW
382 renderer_sync();
383
d30279e2
GI
384 if (gpu.dma.offset) {
385 l = w - gpu.dma.offset;
ddd56f6e 386 if (count < l)
d30279e2 387 l = count;
ddd56f6e 388
389 do_vram_line(x + o, y, sdata, l, is_read);
390
391 if (o + l < w)
392 o += l;
393 else {
394 o = 0;
395 y++;
396 h--;
397 }
d30279e2
GI
398 sdata += l;
399 count -= l;
d30279e2
GI
400 }
401
402 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
403 y &= 511;
404 do_vram_line(x, y, sdata, w, is_read);
405 }
406
05740673 407 if (h > 0) {
408 if (count > 0) {
409 y &= 511;
410 do_vram_line(x, y, sdata, count, is_read);
411 o = count;
412 count = 0;
413 }
d30279e2 414 }
05740673 415 else
416 finish_vram_transfer(is_read);
d30279e2
GI
417 gpu.dma.y = y;
418 gpu.dma.h = h;
ddd56f6e 419 gpu.dma.offset = o;
d30279e2 420
6e9bdaef 421 return count_initial - count / 2;
d30279e2
GI
422}
423
424static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
425{
ddd56f6e 426 if (gpu.dma.h)
427 log_anomaly("start_vram_transfer while old unfinished\n");
428
5440b88e 429 gpu.dma.x = pos_word & 0x3ff;
430 gpu.dma.y = (pos_word >> 16) & 0x1ff;
48f3d210 431 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
432 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
d30279e2 433 gpu.dma.offset = 0;
05740673 434 gpu.dma.is_read = is_read;
435 gpu.dma_start = gpu.dma;
d30279e2 436
9e146206 437 renderer_flush_queues();
438 if (is_read) {
61124a6d 439 gpu.status |= PSX_GPU_STATUS_IMG;
9e146206 440 // XXX: wrong for width 1
495d603c 441 gpu.gp0 = LE32TOH(*(uint32_t *) VRAM_MEM_XY(gpu.dma.x, gpu.dma.y));
5440b88e 442 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
9e146206 443 }
d30279e2 444
6e9bdaef 445 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
446 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
d30279e2
GI
447}
448
05740673 449static void finish_vram_transfer(int is_read)
450{
451 if (is_read)
61124a6d 452 gpu.status &= ~PSX_GPU_STATUS_IMG;
05740673 453 else
454 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
455 gpu.dma_start.w, gpu.dma_start.h);
456}
457
b243416b 458static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
459{
97e07db9 460 int cmd = 0, pos = 0, len, dummy, v;
b243416b 461 int skip = 1;
462
fbb4bfff 463 gpu.frameskip.pending_fill[0] = 0;
464
b243416b 465 while (pos < count && skip) {
466 uint32_t *list = data + pos;
db215a72 467 cmd = LE32TOH(list[0]) >> 24;
b243416b 468 len = 1 + cmd_lengths[cmd];
469
97e07db9 470 switch (cmd) {
471 case 0x02:
db215a72 472 if ((LE32TOH(list[2]) & 0x3ff) > gpu.screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu.screen.h)
97e07db9 473 // clearing something large, don't skip
474 do_cmd_list(list, 3, &dummy);
475 else
476 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
477 break;
478 case 0x24 ... 0x27:
479 case 0x2c ... 0x2f:
480 case 0x34 ... 0x37:
481 case 0x3c ... 0x3f:
482 gpu.ex_regs[1] &= ~0x1ff;
db215a72 483 gpu.ex_regs[1] |= LE32TOH(list[4 + ((cmd >> 4) & 1)]) & 0x1ff;
97e07db9 484 break;
485 case 0x48 ... 0x4F:
486 for (v = 3; pos + v < count; v++)
487 {
db215a72 488 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
97e07db9 489 break;
490 }
491 len += v - 3;
492 break;
493 case 0x58 ... 0x5F:
494 for (v = 4; pos + v < count; v += 2)
495 {
db215a72 496 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
97e07db9 497 break;
498 }
499 len += v - 4;
500 break;
501 default:
502 if (cmd == 0xe3)
db215a72 503 skip = decide_frameskip_allow(LE32TOH(list[0]));
97e07db9 504 if ((cmd & 0xf8) == 0xe0)
db215a72 505 gpu.ex_regs[cmd & 7] = LE32TOH(list[0]);
97e07db9 506 break;
b243416b 507 }
b243416b 508
509 if (pos + len > count) {
510 cmd = -1;
511 break; // incomplete cmd
512 }
97e07db9 513 if (0xa0 <= cmd && cmd <= 0xdf)
b243416b 514 break; // image i/o
97e07db9 515
b243416b 516 pos += len;
517 }
518
519 renderer_sync_ecmds(gpu.ex_regs);
520 *last_cmd = cmd;
521 return pos;
522}
523
48f3d210 524static noinline int do_cmd_buffer(uint32_t *data, int count)
d30279e2 525{
b243416b 526 int cmd, pos;
527 uint32_t old_e3 = gpu.ex_regs[3];
fc84f618 528 int vram_dirty = 0;
d30279e2 529
d30279e2 530 // process buffer
b243416b 531 for (pos = 0; pos < count; )
d30279e2 532 {
b243416b 533 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
534 vram_dirty = 1;
d30279e2 535 pos += do_vram_io(data + pos, count - pos, 0);
ddd56f6e 536 if (pos == count)
537 break;
d30279e2
GI
538 }
539
db215a72 540 cmd = LE32TOH(data[pos]) >> 24;
97e07db9 541 if (0xa0 <= cmd && cmd <= 0xdf) {
79573c20
DS
542 if (unlikely((pos+2) >= count)) {
543 // incomplete vram write/read cmd, can't consume yet
544 cmd = -1;
545 break;
546 }
547
d30279e2 548 // consume vram write/read cmd
db215a72 549 start_vram_transfer(LE32TOH(data[pos + 1]), LE32TOH(data[pos + 2]), (cmd & 0xe0) == 0xc0);
b243416b 550 pos += 3;
551 continue;
d30279e2 552 }
b243416b 553
1e07f71d 554 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
db215a72 555 if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0))
b243416b 556 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
557 else {
558 pos += do_cmd_list(data + pos, count - pos, &cmd);
559 vram_dirty = 1;
560 }
561
562 if (cmd == -1)
563 // incomplete cmd
ddd56f6e 564 break;
d30279e2 565 }
ddd56f6e 566
61124a6d
PC
567 gpu.status &= ~0x1fff;
568 gpu.status |= gpu.ex_regs[1] & 0x7ff;
569 gpu.status |= (gpu.ex_regs[6] & 3) << 11;
a3a9f519 570
fc84f618 571 gpu.state.fb_dirty |= vram_dirty;
572
b243416b 573 if (old_e3 != gpu.ex_regs[3])
574 decide_frameskip_allow(gpu.ex_regs[3]);
575
ddd56f6e 576 return count - pos;
d30279e2
GI
577}
578
5440b88e 579static void flush_cmd_buffer(void)
d30279e2 580{
48f3d210 581 int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
d30279e2
GI
582 if (left > 0)
583 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
584 gpu.cmd_len = left;
1ab64c54
GI
585}
586
587void GPUwriteDataMem(uint32_t *mem, int count)
588{
d30279e2
GI
589 int left;
590
56f08d83 591 log_io("gpu_dma_write %p %d\n", mem, count);
592
d30279e2
GI
593 if (unlikely(gpu.cmd_len > 0))
594 flush_cmd_buffer();
56f08d83 595
48f3d210 596 left = do_cmd_buffer(mem, count);
d30279e2 597 if (left)
56f08d83 598 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
1ab64c54
GI
599}
600
d30279e2 601void GPUwriteData(uint32_t data)
1ab64c54 602{
56f08d83 603 log_io("gpu_write %08x\n", data);
db215a72 604 gpu.cmd_buffer[gpu.cmd_len++] = HTOLE32(data);
d30279e2
GI
605 if (gpu.cmd_len >= CMD_BUFFER_LEN)
606 flush_cmd_buffer();
1ab64c54
GI
607}
608
fae38d7a 609long GPUdmaChain(uint32_t *rambase, uint32_t start_addr, uint32_t *progress_addr)
1ab64c54 610{
09159d99 611 uint32_t addr, *list, ld_addr = 0;
ddd56f6e 612 int len, left, count;
1c72b1c2 613 long cpu_cycles = 0;
d30279e2 614
8f5f2dd5 615 preload(rambase + (start_addr & 0x1fffff) / 4);
616
d30279e2
GI
617 if (unlikely(gpu.cmd_len > 0))
618 flush_cmd_buffer();
619
56f08d83 620 log_io("gpu_dma_chain\n");
ddd56f6e 621 addr = start_addr & 0xffffff;
09159d99 622 for (count = 0; (addr & 0x800000) == 0; count++)
ddd56f6e 623 {
ddd56f6e 624 list = rambase + (addr & 0x1fffff) / 4;
db215a72
PC
625 len = LE32TOH(list[0]) >> 24;
626 addr = LE32TOH(list[0]) & 0xffffff;
8f5f2dd5 627 preload(rambase + (addr & 0x1fffff) / 4);
628
1c72b1c2 629 cpu_cycles += 10;
630 if (len > 0)
631 cpu_cycles += 5 + len;
deb18d24 632
a4e249a1 633 log_io(".chain %08lx #%d+%d\n",
634 (long)(list - rambase) * 4, len, gpu.cmd_len);
635 if (unlikely(gpu.cmd_len > 0)) {
636 memcpy(gpu.cmd_buffer + gpu.cmd_len, list + 1, len * 4);
637 gpu.cmd_len += len;
638 flush_cmd_buffer();
639 continue;
640 }
ddd56f6e 641
56f08d83 642 if (len) {
48f3d210 643 left = do_cmd_buffer(list + 1, len);
a4e249a1 644 if (left) {
645 memcpy(gpu.cmd_buffer, list + 1 + len - left, left * 4);
646 gpu.cmd_len = left;
647 log_anomaly("GPUdmaChain: %d/%d words left\n", left, len);
648 }
56f08d83 649 }
ddd56f6e 650
fae38d7a 651 if (progress_addr) {
652 *progress_addr = addr;
653 break;
654 }
09159d99 655 #define LD_THRESHOLD (8*1024)
656 if (count >= LD_THRESHOLD) {
657 if (count == LD_THRESHOLD) {
658 ld_addr = addr;
659 continue;
660 }
661
662 // loop detection marker
663 // (bit23 set causes DMA error on real machine, so
664 // unlikely to be ever set by the game)
db215a72 665 list[0] |= HTOLE32(0x800000);
09159d99 666 }
ddd56f6e 667 }
668
09159d99 669 if (ld_addr != 0) {
670 // remove loop detection markers
671 count -= LD_THRESHOLD + 2;
672 addr = ld_addr & 0x1fffff;
673 while (count-- > 0) {
674 list = rambase + addr / 4;
db215a72
PC
675 addr = LE32TOH(list[0]) & 0x1fffff;
676 list[0] &= HTOLE32(~0x800000);
09159d99 677 }
d30279e2 678 }
09159d99 679
3ece2f0c 680 gpu.state.last_list.frame = *gpu.state.frame_count;
deb18d24 681 gpu.state.last_list.hcnt = *gpu.state.hcnt;
1c72b1c2 682 gpu.state.last_list.cycles = cpu_cycles;
deb18d24 683 gpu.state.last_list.addr = start_addr;
684
1c72b1c2 685 return cpu_cycles;
1ab64c54
GI
686}
687
d30279e2
GI
688void GPUreadDataMem(uint32_t *mem, int count)
689{
56f08d83 690 log_io("gpu_dma_read %p %d\n", mem, count);
691
d30279e2
GI
692 if (unlikely(gpu.cmd_len > 0))
693 flush_cmd_buffer();
56f08d83 694
d30279e2
GI
695 if (gpu.dma.h)
696 do_vram_io(mem, count, 1);
697}
698
699uint32_t GPUreadData(void)
700{
9e146206 701 uint32_t ret;
56f08d83 702
703 if (unlikely(gpu.cmd_len > 0))
704 flush_cmd_buffer();
705
9e146206 706 ret = gpu.gp0;
495d603c
PC
707 if (gpu.dma.h) {
708 ret = HTOLE32(ret);
9e146206 709 do_vram_io(&ret, 1, 1);
495d603c
PC
710 ret = LE32TOH(ret);
711 }
56f08d83 712
9e146206 713 log_io("gpu_read %08x\n", ret);
714 return ret;
d30279e2
GI
715}
716
717uint32_t GPUreadStatus(void)
718{
ddd56f6e 719 uint32_t ret;
56f08d83 720
d30279e2
GI
721 if (unlikely(gpu.cmd_len > 0))
722 flush_cmd_buffer();
723
61124a6d 724 ret = gpu.status;
ddd56f6e 725 log_io("gpu_read_status %08x\n", ret);
726 return ret;
d30279e2
GI
727}
728
096ec49b 729struct GPUFreeze
1ab64c54
GI
730{
731 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
732 uint32_t ulStatus; // current gpu status
733 uint32_t ulControl[256]; // latest control register values
734 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
096ec49b 735};
1ab64c54 736
096ec49b 737long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
1ab64c54 738{
fc84f618 739 int i;
740
1ab64c54
GI
741 switch (type) {
742 case 1: // save
d30279e2
GI
743 if (gpu.cmd_len > 0)
744 flush_cmd_buffer();
c765eb86
JW
745
746 renderer_sync();
9ee0fd5b 747 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
1ab64c54 748 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
6e9bdaef 749 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
61124a6d 750 freeze->ulStatus = gpu.status;
1ab64c54
GI
751 break;
752 case 0: // load
c765eb86 753 renderer_sync();
9ee0fd5b 754 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
1ab64c54 755 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
6e9bdaef 756 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
61124a6d 757 gpu.status = freeze->ulStatus;
3d47ef17 758 gpu.cmd_len = 0;
fc84f618 759 for (i = 8; i > 0; i--) {
760 gpu.regs[i] ^= 1; // avoid reg change detection
761 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
762 }
5b745e5b 763 renderer_sync_ecmds(gpu.ex_regs);
05740673 764 renderer_update_caches(0, 0, 1024, 512);
1ab64c54
GI
765 break;
766 }
767
768 return 1;
769}
770
5440b88e 771void GPUupdateLace(void)
772{
773 if (gpu.cmd_len > 0)
774 flush_cmd_buffer();
775 renderer_flush_queues();
776
61124a6d 777 if (gpu.status & PSX_GPU_STATUS_BLANKING) {
aafcb4dd 778 if (!gpu.state.blanked) {
779 vout_blank();
780 gpu.state.blanked = 1;
781 gpu.state.fb_dirty = 1;
782 }
783 return;
784 }
785
c765eb86
JW
786 renderer_notify_update_lace(0);
787
aafcb4dd 788 if (!gpu.state.fb_dirty)
5440b88e 789 return;
790
791 if (gpu.frameskip.set) {
792 if (!gpu.frameskip.frame_ready) {
793 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
794 return;
795 gpu.frameskip.active = 0;
796 }
797 gpu.frameskip.frame_ready = 0;
798 }
799
800 vout_update();
801 gpu.state.fb_dirty = 0;
aafcb4dd 802 gpu.state.blanked = 0;
c765eb86 803 renderer_notify_update_lace(1);
5440b88e 804}
805
72e5023f 806void GPUvBlank(int is_vblank, int lcf)
807{
5440b88e 808 int interlace = gpu.state.allow_interlace
61124a6d
PC
809 && (gpu.status & PSX_GPU_STATUS_INTERLACE)
810 && (gpu.status & PSX_GPU_STATUS_DHEIGHT);
5440b88e 811 // interlace doesn't look nice on progressive displays,
812 // so we have this "auto" mode here for games that don't read vram
813 if (gpu.state.allow_interlace == 2
814 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
815 {
816 interlace = 0;
817 }
818 if (interlace || interlace != gpu.state.old_interlace) {
819 gpu.state.old_interlace = interlace;
820
821 if (gpu.cmd_len > 0)
822 flush_cmd_buffer();
823 renderer_flush_queues();
824 renderer_set_interlace(interlace, !lcf);
825 }
826}
827
828#include "../../frontend/plugin_lib.h"
829
830void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
831{
832 gpu.frameskip.set = cbs->frameskip;
833 gpu.frameskip.advice = &cbs->fskip_advice;
5eaa13f1
A
834 gpu.frameskip.force = &cbs->fskip_force;
835 gpu.frameskip.dirty = &cbs->fskip_dirty;
5440b88e 836 gpu.frameskip.active = 0;
837 gpu.frameskip.frame_ready = 1;
838 gpu.state.hcnt = cbs->gpu_hcnt;
839 gpu.state.frame_count = cbs->gpu_frame_count;
840 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
0b02eb77 841 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
5440b88e 842
5c1cbedc 843 gpu.useDithering = cbs->gpu_neon.allow_dithering;
9ee0fd5b 844 gpu.mmap = cbs->mmap;
845 gpu.munmap = cbs->munmap;
846
847 // delayed vram mmap
848 if (gpu.vram == NULL)
849 map_vram();
850
5440b88e 851 if (cbs->pl_vout_set_raw_vram)
852 cbs->pl_vout_set_raw_vram(gpu.vram);
853 renderer_set_config(cbs);
854 vout_set_config(cbs);
72e5023f 855}
856
1ab64c54 857// vim:shiftwidth=2:expandtab