reduce differences from upstream
[pcsx_rearmed.git] / plugins / gpulib / gpu.c
CommitLineData
1ab64c54 1/*
05740673 2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
1ab64c54
GI
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
d30279e2 11#include <stdio.h>
1ab64c54 12#include <string.h>
12367ad0 13#include <stdlib.h> /* for calloc */
14
56f08d83 15#include "gpu.h"
1ab64c54
GI
16
17#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
8f5f2dd5 18#ifdef __GNUC__
d30279e2 19#define unlikely(x) __builtin_expect((x), 0)
8f5f2dd5 20#define preload __builtin_prefetch
8dd855cd 21#define noinline __attribute__((noinline))
8f5f2dd5 22#else
23#define unlikely(x)
24#define preload(...)
25#define noinline
8f5f2dd5 26#endif
1ab64c54 27
deb18d24 28#define gpu_log(fmt, ...) \
3ece2f0c 29 printf("%d:%03d: " fmt, *gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__)
deb18d24 30
31//#define log_io gpu_log
56f08d83 32#define log_io(...)
9394ada5 33//#define log_anomaly gpu_log
34#define log_anomaly(...)
56f08d83 35
9ee0fd5b 36struct psx_gpu gpu;
1ab64c54 37
48f3d210 38static noinline int do_cmd_buffer(uint32_t *data, int count);
05740673 39static void finish_vram_transfer(int is_read);
48f3d210 40
41static noinline void do_cmd_reset(void)
42{
c765eb86
JW
43 renderer_sync();
44
48f3d210 45 if (unlikely(gpu.cmd_len > 0))
46 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
48f3d210 47 gpu.cmd_len = 0;
05740673 48
49 if (unlikely(gpu.dma.h > 0))
50 finish_vram_transfer(gpu.dma_start.is_read);
48f3d210 51 gpu.dma.h = 0;
52}
53
6e9bdaef 54static noinline void do_reset(void)
1ab64c54 55{
7841712d 56 unsigned int i;
5b568098 57
48f3d210 58 do_cmd_reset();
59
6e9bdaef 60 memset(gpu.regs, 0, sizeof(gpu.regs));
48f3d210 61 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
62 gpu.ex_regs[i] = (0xe0 + i) << 24;
61124a6d 63 gpu.status = 0x14802000;
6e9bdaef 64 gpu.gp0 = 0;
fc84f618 65 gpu.regs[3] = 1;
6e9bdaef 66 gpu.screen.hres = gpu.screen.w = 256;
fc84f618 67 gpu.screen.vres = gpu.screen.h = 240;
1ab64c54
GI
68}
69
8dd855cd 70static noinline void update_width(void)
71{
72 int sw = gpu.screen.x2 - gpu.screen.x1;
73 if (sw <= 0 || sw >= 2560)
74 // full width
75 gpu.screen.w = gpu.screen.hres;
76 else
77 gpu.screen.w = sw * gpu.screen.hres / 2560;
78}
79
80static noinline void update_height(void)
81{
74df5906 82 // TODO: emulate this properly..
8dd855cd 83 int sh = gpu.screen.y2 - gpu.screen.y1;
61124a6d 84 if (gpu.status & PSX_GPU_STATUS_DHEIGHT)
8dd855cd 85 sh *= 2;
74df5906 86 if (sh <= 0 || sh > gpu.screen.vres)
8dd855cd 87 sh = gpu.screen.vres;
88
89 gpu.screen.h = sh;
90}
91
fc84f618 92static noinline void decide_frameskip(void)
93{
5eaa13f1
A
94 *gpu.frameskip.dirty = 1;
95
9fe27e25 96 if (gpu.frameskip.active)
97 gpu.frameskip.cnt++;
98 else {
99 gpu.frameskip.cnt = 0;
100 gpu.frameskip.frame_ready = 1;
101 }
fc84f618 102
5eaa13f1
A
103 if (*gpu.frameskip.force)
104 gpu.frameskip.active = 1;
105 else if (!gpu.frameskip.active && *gpu.frameskip.advice)
9fe27e25 106 gpu.frameskip.active = 1;
107 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
fc84f618 108 gpu.frameskip.active = 1;
109 else
110 gpu.frameskip.active = 0;
fbb4bfff 111
112 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
113 int dummy;
114 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy);
115 gpu.frameskip.pending_fill[0] = 0;
116 }
fc84f618 117}
118
b243416b 119static noinline int decide_frameskip_allow(uint32_t cmd_e3)
9fe27e25 120{
121 // no frameskip if it decides to draw to display area,
122 // but not for interlace since it'll most likely always do that
123 uint32_t x = cmd_e3 & 0x3ff;
124 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
61124a6d 125 gpu.frameskip.allow = (gpu.status & PSX_GPU_STATUS_INTERLACE) ||
9fe27e25 126 (uint32_t)(x - gpu.screen.x) >= (uint32_t)gpu.screen.w ||
127 (uint32_t)(y - gpu.screen.y) >= (uint32_t)gpu.screen.h;
b243416b 128 return gpu.frameskip.allow;
9fe27e25 129}
130
6e9bdaef 131static noinline void get_gpu_info(uint32_t data)
132{
133 switch (data & 0x0f) {
134 case 0x02:
135 case 0x03:
136 case 0x04:
5b568098 137 case 0x05:
6e9bdaef 138 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
139 break;
140 case 0x06:
5b568098 141 gpu.gp0 = gpu.ex_regs[5] & 0xfffff;
6e9bdaef 142 break;
143 case 0x07:
144 gpu.gp0 = 2;
145 break;
146 default:
147 gpu.gp0 = 0;
148 break;
149 }
150}
151
9ee0fd5b 152// double, for overdraw guard
12367ad0 153#define VRAM_SIZE ((1024 * 512 * 2 * 2) + 4096)
154
155// Minimum 16-byte VRAM alignment needed by gpu_unai's pixel-skipping
156// renderer/downscaler it uses in high res modes:
157#ifdef GCW_ZERO
158 // On GCW platform (MIPS), align to 8192 bytes (1 TLB entry) to reduce # of
159 // fills. (Will change this value if it ever gets large page support)
160 #define VRAM_ALIGN 8192
161#else
162 #define VRAM_ALIGN 16
163#endif
164
165// vram ptr received from mmap/malloc/alloc (will deallocate using this)
166static uint16_t *vram_ptr_orig = NULL;
9ee0fd5b 167
12367ad0 168#ifdef GPULIB_USE_MMAP
9ee0fd5b 169static int map_vram(void)
170{
12367ad0 171 gpu.vram = vram_ptr_orig = gpu.mmap(VRAM_SIZE + (VRAM_ALIGN-1));
9ee0fd5b 172 if (gpu.vram != NULL) {
12367ad0 173 // 4kb guard in front
174 gpu.vram += (4096 / 2);
175 // Align
176 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
9ee0fd5b 177 return 0;
178 }
179 else {
180 fprintf(stderr, "could not map vram, expect crashes\n");
181 return -1;
182 }
183}
12367ad0 184#else
185static int map_vram(void)
186{
187 gpu.vram = vram_ptr_orig = (uint16_t*)calloc(VRAM_SIZE + (VRAM_ALIGN-1), 1);
188 if (gpu.vram != NULL) {
189 // 4kb guard in front
190 gpu.vram += (4096 / 2);
191 // Align
192 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
193 return 0;
194 } else {
195 fprintf(stderr, "could not allocate vram, expect crashes\n");
196 return -1;
197 }
198}
199
200static int allocate_vram(void)
201{
202 gpu.vram = vram_ptr_orig = (uint16_t*)calloc(VRAM_SIZE + (VRAM_ALIGN-1), 1);
203 if (gpu.vram != NULL) {
204 // 4kb guard in front
205 gpu.vram += (4096 / 2);
206 // Align
207 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
208 return 0;
209 } else {
210 fprintf(stderr, "could not allocate vram, expect crashes\n");
211 return -1;
212 }
213}
214#endif
9ee0fd5b 215
6e9bdaef 216long GPUinit(void)
217{
12367ad0 218#ifndef GPULIB_USE_MMAP
219 if (gpu.vram == NULL) {
220 if (allocate_vram() != 0) {
221 printf("ERROR: could not allocate VRAM, exiting..\n");
222 exit(1);
223 }
224 }
225#endif
226
227 //extern uint32_t hSyncCount; // in psxcounters.cpp
228 //extern uint32_t frame_counter; // in psxcounters.cpp
229 //gpu.state.hcnt = &hSyncCount;
230 //gpu.state.frame_count = &frame_counter;
231
9394ada5 232 int ret;
233 ret = vout_init();
234 ret |= renderer_init();
235
3ece2f0c 236 gpu.state.frame_count = &gpu.zero;
deb18d24 237 gpu.state.hcnt = &gpu.zero;
48f3d210 238 gpu.frameskip.active = 0;
239 gpu.cmd_len = 0;
9394ada5 240 do_reset();
48f3d210 241
12367ad0 242 /*if (gpu.mmap != NULL) {
9ee0fd5b 243 if (map_vram() != 0)
244 ret = -1;
12367ad0 245 }*/
6e9bdaef 246 return ret;
247}
248
249long GPUshutdown(void)
250{
9ee0fd5b 251 long ret;
252
e929dec5 253 renderer_finish();
9ee0fd5b 254 ret = vout_finish();
12367ad0 255
256 if (vram_ptr_orig != NULL) {
257#ifdef GPULIB_USE_MMAP
258 gpu.munmap(vram_ptr_orig, VRAM_SIZE);
259#else
260 free(vram_ptr_orig);
261#endif
9ee0fd5b 262 }
12367ad0 263 vram_ptr_orig = gpu.vram = NULL;
9ee0fd5b 264
265 return ret;
6e9bdaef 266}
267
1ab64c54
GI
268void GPUwriteStatus(uint32_t data)
269{
12367ad0 270 //senquack TODO: Would it be wise to add cmd buffer flush here, since
271 // status settings can affect commands already in buffer?
272
1ab64c54
GI
273 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
274 static const short vres[4] = { 240, 480, 256, 480 };
275 uint32_t cmd = data >> 24;
276
fc84f618 277 if (cmd < ARRAY_SIZE(gpu.regs)) {
48f3d210 278 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
fc84f618 279 return;
8dd855cd 280 gpu.regs[cmd] = data;
fc84f618 281 }
282
283 gpu.state.fb_dirty = 1;
8dd855cd 284
285 switch (cmd) {
1ab64c54 286 case 0x00:
6e9bdaef 287 do_reset();
1ab64c54 288 break;
48f3d210 289 case 0x01:
290 do_cmd_reset();
291 break;
1ab64c54 292 case 0x03:
61124a6d
PC
293 if (data & 1)
294 gpu.status |= PSX_GPU_STATUS_BLANKING;
295 else
296 gpu.status &= ~PSX_GPU_STATUS_BLANKING;
1ab64c54
GI
297 break;
298 case 0x04:
61124a6d
PC
299 gpu.status &= ~PSX_GPU_STATUS_DMA_MASK;
300 gpu.status |= PSX_GPU_STATUS_DMA(data & 3);
1ab64c54
GI
301 break;
302 case 0x05:
303 gpu.screen.x = data & 0x3ff;
c65553d0 304 gpu.screen.y = (data >> 10) & 0x1ff;
9fe27e25 305 if (gpu.frameskip.set) {
306 decide_frameskip_allow(gpu.ex_regs[3]);
307 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
308 decide_frameskip();
309 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
310 }
fb4c6fba 311 }
1ab64c54 312 break;
8dd855cd 313 case 0x06:
314 gpu.screen.x1 = data & 0xfff;
315 gpu.screen.x2 = (data >> 12) & 0xfff;
316 update_width();
317 break;
1ab64c54
GI
318 case 0x07:
319 gpu.screen.y1 = data & 0x3ff;
320 gpu.screen.y2 = (data >> 10) & 0x3ff;
8dd855cd 321 update_height();
1ab64c54
GI
322 break;
323 case 0x08:
61124a6d
PC
324 gpu.status = (gpu.status & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
325 gpu.screen.hres = hres[(gpu.status >> 16) & 7];
326 gpu.screen.vres = vres[(gpu.status >> 19) & 3];
8dd855cd 327 update_width();
328 update_height();
e929dec5 329 renderer_notify_res_change();
1ab64c54 330 break;
deb18d24 331 default:
332 if ((cmd & 0xf0) == 0x10)
333 get_gpu_info(data);
6e9bdaef 334 break;
1ab64c54 335 }
7890a708 336
337#ifdef GPUwriteStatus_ext
338 GPUwriteStatus_ext(data);
339#endif
1ab64c54
GI
340}
341
56f08d83 342const unsigned char cmd_lengths[256] =
1ab64c54 343{
d30279e2
GI
344 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
345 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
346 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
347 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
652c6b8b 348 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
349 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
350 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
d30279e2
GI
351 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
352 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
353 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
354 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
355 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
356 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
357 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
358 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
359 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
360};
361
d30279e2
GI
362#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
363
364static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
1ab64c54 365{
d30279e2
GI
366 uint16_t *vram = VRAM_MEM_XY(x, y);
367 if (is_read)
368 memcpy(mem, vram, l * 2);
369 else
370 memcpy(vram, mem, l * 2);
371}
372
373static int do_vram_io(uint32_t *data, int count, int is_read)
374{
375 int count_initial = count;
376 uint16_t *sdata = (uint16_t *)data;
377 int x = gpu.dma.x, y = gpu.dma.y;
378 int w = gpu.dma.w, h = gpu.dma.h;
ddd56f6e 379 int o = gpu.dma.offset;
d30279e2
GI
380 int l;
381 count *= 2; // operate in 16bpp pixels
382
c765eb86
JW
383 renderer_sync();
384
d30279e2
GI
385 if (gpu.dma.offset) {
386 l = w - gpu.dma.offset;
ddd56f6e 387 if (count < l)
d30279e2 388 l = count;
ddd56f6e 389
390 do_vram_line(x + o, y, sdata, l, is_read);
391
392 if (o + l < w)
393 o += l;
394 else {
395 o = 0;
396 y++;
397 h--;
398 }
d30279e2
GI
399 sdata += l;
400 count -= l;
d30279e2
GI
401 }
402
403 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
404 y &= 511;
405 do_vram_line(x, y, sdata, w, is_read);
406 }
407
05740673 408 if (h > 0) {
409 if (count > 0) {
410 y &= 511;
411 do_vram_line(x, y, sdata, count, is_read);
412 o = count;
413 count = 0;
414 }
d30279e2 415 }
05740673 416 else
417 finish_vram_transfer(is_read);
d30279e2
GI
418 gpu.dma.y = y;
419 gpu.dma.h = h;
ddd56f6e 420 gpu.dma.offset = o;
d30279e2 421
6e9bdaef 422 return count_initial - count / 2;
d30279e2
GI
423}
424
425static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
426{
ddd56f6e 427 if (gpu.dma.h)
428 log_anomaly("start_vram_transfer while old unfinished\n");
429
5440b88e 430 gpu.dma.x = pos_word & 0x3ff;
431 gpu.dma.y = (pos_word >> 16) & 0x1ff;
48f3d210 432 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
433 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
d30279e2 434 gpu.dma.offset = 0;
05740673 435 gpu.dma.is_read = is_read;
436 gpu.dma_start = gpu.dma;
d30279e2 437
9e146206 438 renderer_flush_queues();
439 if (is_read) {
61124a6d 440 gpu.status |= PSX_GPU_STATUS_IMG;
9e146206 441 // XXX: wrong for width 1
495d603c 442 gpu.gp0 = LE32TOH(*(uint32_t *) VRAM_MEM_XY(gpu.dma.x, gpu.dma.y));
5440b88e 443 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
9e146206 444 }
d30279e2 445
6e9bdaef 446 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
447 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
d30279e2
GI
448}
449
05740673 450static void finish_vram_transfer(int is_read)
451{
452 if (is_read)
61124a6d 453 gpu.status &= ~PSX_GPU_STATUS_IMG;
05740673 454 else
455 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
456 gpu.dma_start.w, gpu.dma_start.h);
457}
458
b243416b 459static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
460{
97e07db9 461 int cmd = 0, pos = 0, len, dummy, v;
b243416b 462 int skip = 1;
463
fbb4bfff 464 gpu.frameskip.pending_fill[0] = 0;
465
b243416b 466 while (pos < count && skip) {
467 uint32_t *list = data + pos;
db215a72 468 cmd = LE32TOH(list[0]) >> 24;
b243416b 469 len = 1 + cmd_lengths[cmd];
470
97e07db9 471 switch (cmd) {
472 case 0x02:
db215a72 473 if ((LE32TOH(list[2]) & 0x3ff) > gpu.screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu.screen.h)
97e07db9 474 // clearing something large, don't skip
475 do_cmd_list(list, 3, &dummy);
476 else
477 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
478 break;
479 case 0x24 ... 0x27:
480 case 0x2c ... 0x2f:
481 case 0x34 ... 0x37:
482 case 0x3c ... 0x3f:
483 gpu.ex_regs[1] &= ~0x1ff;
db215a72 484 gpu.ex_regs[1] |= LE32TOH(list[4 + ((cmd >> 4) & 1)]) & 0x1ff;
97e07db9 485 break;
486 case 0x48 ... 0x4F:
487 for (v = 3; pos + v < count; v++)
488 {
db215a72 489 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
97e07db9 490 break;
491 }
492 len += v - 3;
493 break;
494 case 0x58 ... 0x5F:
495 for (v = 4; pos + v < count; v += 2)
496 {
db215a72 497 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
97e07db9 498 break;
499 }
500 len += v - 4;
501 break;
502 default:
503 if (cmd == 0xe3)
db215a72 504 skip = decide_frameskip_allow(LE32TOH(list[0]));
97e07db9 505 if ((cmd & 0xf8) == 0xe0)
db215a72 506 gpu.ex_regs[cmd & 7] = LE32TOH(list[0]);
97e07db9 507 break;
b243416b 508 }
b243416b 509
510 if (pos + len > count) {
511 cmd = -1;
512 break; // incomplete cmd
513 }
97e07db9 514 if (0xa0 <= cmd && cmd <= 0xdf)
b243416b 515 break; // image i/o
97e07db9 516
b243416b 517 pos += len;
518 }
519
520 renderer_sync_ecmds(gpu.ex_regs);
521 *last_cmd = cmd;
522 return pos;
523}
524
48f3d210 525static noinline int do_cmd_buffer(uint32_t *data, int count)
d30279e2 526{
b243416b 527 int cmd, pos;
528 uint32_t old_e3 = gpu.ex_regs[3];
fc84f618 529 int vram_dirty = 0;
d30279e2 530
d30279e2 531 // process buffer
b243416b 532 for (pos = 0; pos < count; )
d30279e2 533 {
b243416b 534 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
535 vram_dirty = 1;
d30279e2 536 pos += do_vram_io(data + pos, count - pos, 0);
ddd56f6e 537 if (pos == count)
538 break;
d30279e2
GI
539 }
540
db215a72 541 cmd = LE32TOH(data[pos]) >> 24;
97e07db9 542 if (0xa0 <= cmd && cmd <= 0xdf) {
79573c20
DS
543 if (unlikely((pos+2) >= count)) {
544 // incomplete vram write/read cmd, can't consume yet
545 cmd = -1;
546 break;
547 }
548
d30279e2 549 // consume vram write/read cmd
db215a72 550 start_vram_transfer(LE32TOH(data[pos + 1]), LE32TOH(data[pos + 2]), (cmd & 0xe0) == 0xc0);
b243416b 551 pos += 3;
552 continue;
d30279e2 553 }
b243416b 554
1e07f71d 555 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
db215a72 556 if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0))
b243416b 557 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
558 else {
559 pos += do_cmd_list(data + pos, count - pos, &cmd);
560 vram_dirty = 1;
561 }
562
563 if (cmd == -1)
564 // incomplete cmd
ddd56f6e 565 break;
d30279e2 566 }
ddd56f6e 567
61124a6d
PC
568 gpu.status &= ~0x1fff;
569 gpu.status |= gpu.ex_regs[1] & 0x7ff;
570 gpu.status |= (gpu.ex_regs[6] & 3) << 11;
a3a9f519 571
fc84f618 572 gpu.state.fb_dirty |= vram_dirty;
573
b243416b 574 if (old_e3 != gpu.ex_regs[3])
575 decide_frameskip_allow(gpu.ex_regs[3]);
576
ddd56f6e 577 return count - pos;
d30279e2
GI
578}
579
5440b88e 580static void flush_cmd_buffer(void)
d30279e2 581{
48f3d210 582 int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
d30279e2
GI
583 if (left > 0)
584 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
585 gpu.cmd_len = left;
1ab64c54
GI
586}
587
588void GPUwriteDataMem(uint32_t *mem, int count)
589{
d30279e2
GI
590 int left;
591
56f08d83 592 log_io("gpu_dma_write %p %d\n", mem, count);
593
d30279e2
GI
594 if (unlikely(gpu.cmd_len > 0))
595 flush_cmd_buffer();
56f08d83 596
48f3d210 597 left = do_cmd_buffer(mem, count);
d30279e2 598 if (left)
56f08d83 599 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
1ab64c54
GI
600}
601
d30279e2 602void GPUwriteData(uint32_t data)
1ab64c54 603{
56f08d83 604 log_io("gpu_write %08x\n", data);
db215a72 605 gpu.cmd_buffer[gpu.cmd_len++] = HTOLE32(data);
d30279e2
GI
606 if (gpu.cmd_len >= CMD_BUFFER_LEN)
607 flush_cmd_buffer();
1ab64c54
GI
608}
609
ddd56f6e 610long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
1ab64c54 611{
09159d99 612 uint32_t addr, *list, ld_addr = 0;
ddd56f6e 613 int len, left, count;
1c72b1c2 614 long cpu_cycles = 0;
d30279e2 615
8f5f2dd5 616 preload(rambase + (start_addr & 0x1fffff) / 4);
617
d30279e2
GI
618 if (unlikely(gpu.cmd_len > 0))
619 flush_cmd_buffer();
620
56f08d83 621 log_io("gpu_dma_chain\n");
ddd56f6e 622 addr = start_addr & 0xffffff;
09159d99 623 for (count = 0; (addr & 0x800000) == 0; count++)
ddd56f6e 624 {
ddd56f6e 625 list = rambase + (addr & 0x1fffff) / 4;
db215a72
PC
626 len = LE32TOH(list[0]) >> 24;
627 addr = LE32TOH(list[0]) & 0xffffff;
8f5f2dd5 628 preload(rambase + (addr & 0x1fffff) / 4);
629
1c72b1c2 630 cpu_cycles += 10;
631 if (len > 0)
632 cpu_cycles += 5 + len;
deb18d24 633
634 log_io(".chain %08x #%d\n", (list - rambase) * 4, len);
ddd56f6e 635
56f08d83 636 if (len) {
48f3d210 637 left = do_cmd_buffer(list + 1, len);
56f08d83 638 if (left)
deb18d24 639 log_anomaly("GPUdmaChain: discarded %d/%d words\n", left, len);
56f08d83 640 }
ddd56f6e 641
09159d99 642 #define LD_THRESHOLD (8*1024)
643 if (count >= LD_THRESHOLD) {
644 if (count == LD_THRESHOLD) {
645 ld_addr = addr;
646 continue;
647 }
648
649 // loop detection marker
650 // (bit23 set causes DMA error on real machine, so
651 // unlikely to be ever set by the game)
db215a72 652 list[0] |= HTOLE32(0x800000);
09159d99 653 }
ddd56f6e 654 }
655
09159d99 656 if (ld_addr != 0) {
657 // remove loop detection markers
658 count -= LD_THRESHOLD + 2;
659 addr = ld_addr & 0x1fffff;
660 while (count-- > 0) {
661 list = rambase + addr / 4;
db215a72
PC
662 addr = LE32TOH(list[0]) & 0x1fffff;
663 list[0] &= HTOLE32(~0x800000);
09159d99 664 }
d30279e2 665 }
09159d99 666
3ece2f0c 667 gpu.state.last_list.frame = *gpu.state.frame_count;
deb18d24 668 gpu.state.last_list.hcnt = *gpu.state.hcnt;
1c72b1c2 669 gpu.state.last_list.cycles = cpu_cycles;
deb18d24 670 gpu.state.last_list.addr = start_addr;
671
1c72b1c2 672 return cpu_cycles;
1ab64c54
GI
673}
674
d30279e2
GI
675void GPUreadDataMem(uint32_t *mem, int count)
676{
56f08d83 677 log_io("gpu_dma_read %p %d\n", mem, count);
678
d30279e2
GI
679 if (unlikely(gpu.cmd_len > 0))
680 flush_cmd_buffer();
56f08d83 681
d30279e2
GI
682 if (gpu.dma.h)
683 do_vram_io(mem, count, 1);
684}
685
686uint32_t GPUreadData(void)
687{
9e146206 688 uint32_t ret;
56f08d83 689
690 if (unlikely(gpu.cmd_len > 0))
691 flush_cmd_buffer();
692
9e146206 693 ret = gpu.gp0;
495d603c
PC
694 if (gpu.dma.h) {
695 ret = HTOLE32(ret);
9e146206 696 do_vram_io(&ret, 1, 1);
495d603c
PC
697 ret = LE32TOH(ret);
698 }
56f08d83 699
9e146206 700 log_io("gpu_read %08x\n", ret);
701 return ret;
d30279e2
GI
702}
703
704uint32_t GPUreadStatus(void)
705{
ddd56f6e 706 uint32_t ret;
56f08d83 707
d30279e2
GI
708 if (unlikely(gpu.cmd_len > 0))
709 flush_cmd_buffer();
710
61124a6d 711 ret = gpu.status;
ddd56f6e 712 log_io("gpu_read_status %08x\n", ret);
713 return ret;
d30279e2
GI
714}
715
096ec49b 716struct GPUFreeze
1ab64c54
GI
717{
718 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
719 uint32_t ulStatus; // current gpu status
720 uint32_t ulControl[256]; // latest control register values
721 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
096ec49b 722};
1ab64c54 723
096ec49b 724long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
1ab64c54 725{
fc84f618 726 int i;
727
1ab64c54
GI
728 switch (type) {
729 case 1: // save
d30279e2
GI
730 if (gpu.cmd_len > 0)
731 flush_cmd_buffer();
c765eb86
JW
732
733 renderer_sync();
9ee0fd5b 734 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
1ab64c54 735 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
6e9bdaef 736 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
61124a6d 737 freeze->ulStatus = gpu.status;
1ab64c54
GI
738 break;
739 case 0: // load
c765eb86 740 renderer_sync();
9ee0fd5b 741 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
1ab64c54 742 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
6e9bdaef 743 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
61124a6d 744 gpu.status = freeze->ulStatus;
3d47ef17 745 gpu.cmd_len = 0;
fc84f618 746 for (i = 8; i > 0; i--) {
747 gpu.regs[i] ^= 1; // avoid reg change detection
748 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
749 }
5b745e5b 750 renderer_sync_ecmds(gpu.ex_regs);
05740673 751 renderer_update_caches(0, 0, 1024, 512);
1ab64c54
GI
752 break;
753 }
754
755 return 1;
756}
757
5440b88e 758void GPUupdateLace(void)
759{
760 if (gpu.cmd_len > 0)
761 flush_cmd_buffer();
762 renderer_flush_queues();
763
61124a6d 764 if (gpu.status & PSX_GPU_STATUS_BLANKING) {
aafcb4dd 765 if (!gpu.state.blanked) {
766 vout_blank();
767 gpu.state.blanked = 1;
768 gpu.state.fb_dirty = 1;
769 }
770 return;
771 }
772
c765eb86
JW
773 renderer_notify_update_lace(0);
774
aafcb4dd 775 if (!gpu.state.fb_dirty)
5440b88e 776 return;
777
778 if (gpu.frameskip.set) {
779 if (!gpu.frameskip.frame_ready) {
780 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
781 return;
782 gpu.frameskip.active = 0;
783 }
784 gpu.frameskip.frame_ready = 0;
785 }
786
787 vout_update();
788 gpu.state.fb_dirty = 0;
aafcb4dd 789 gpu.state.blanked = 0;
c765eb86 790 renderer_notify_update_lace(1);
5440b88e 791}
792
72e5023f 793void GPUvBlank(int is_vblank, int lcf)
794{
5440b88e 795 int interlace = gpu.state.allow_interlace
61124a6d
PC
796 && (gpu.status & PSX_GPU_STATUS_INTERLACE)
797 && (gpu.status & PSX_GPU_STATUS_DHEIGHT);
5440b88e 798 // interlace doesn't look nice on progressive displays,
799 // so we have this "auto" mode here for games that don't read vram
800 if (gpu.state.allow_interlace == 2
801 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
802 {
803 interlace = 0;
804 }
805 if (interlace || interlace != gpu.state.old_interlace) {
806 gpu.state.old_interlace = interlace;
807
808 if (gpu.cmd_len > 0)
809 flush_cmd_buffer();
810 renderer_flush_queues();
811 renderer_set_interlace(interlace, !lcf);
812 }
813}
814
815#include "../../frontend/plugin_lib.h"
816
817void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
818{
819 gpu.frameskip.set = cbs->frameskip;
820 gpu.frameskip.advice = &cbs->fskip_advice;
5eaa13f1
A
821 gpu.frameskip.force = &cbs->fskip_force;
822 gpu.frameskip.dirty = &cbs->fskip_dirty;
5440b88e 823 gpu.frameskip.active = 0;
824 gpu.frameskip.frame_ready = 1;
825 gpu.state.hcnt = cbs->gpu_hcnt;
826 gpu.state.frame_count = cbs->gpu_frame_count;
827 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
0b02eb77 828 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
5440b88e 829
5c1cbedc 830 gpu.useDithering = cbs->gpu_neon.allow_dithering;
9ee0fd5b 831 gpu.mmap = cbs->mmap;
832 gpu.munmap = cbs->munmap;
833
834 // delayed vram mmap
835 if (gpu.vram == NULL)
836 map_vram();
837
5440b88e 838 if (cbs->pl_vout_set_raw_vram)
839 cbs->pl_vout_set_raw_vram(gpu.vram);
840 renderer_set_config(cbs);
841 vout_set_config(cbs);
72e5023f 842}
843
1ab64c54 844// vim:shiftwidth=2:expandtab