psxbios: Load() flushes cache
[pcsx_rearmed.git] / plugins / gpulib / gpu.c
CommitLineData
1ab64c54 1/*
05740673 2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
1ab64c54
GI
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
d30279e2 11#include <stdio.h>
5bbe183f 12#include <stdlib.h>
1ab64c54 13#include <string.h>
12367ad0 14#include <stdlib.h> /* for calloc */
15
56f08d83 16#include "gpu.h"
1ab64c54
GI
17
18#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
8f5f2dd5 19#ifdef __GNUC__
d30279e2 20#define unlikely(x) __builtin_expect((x), 0)
8f5f2dd5 21#define preload __builtin_prefetch
8dd855cd 22#define noinline __attribute__((noinline))
8f5f2dd5 23#else
24#define unlikely(x)
25#define preload(...)
26#define noinline
8f5f2dd5 27#endif
1ab64c54 28
deb18d24 29//#define log_io gpu_log
56f08d83 30#define log_io(...)
56f08d83 31
9ee0fd5b 32struct psx_gpu gpu;
1ab64c54 33
48f3d210 34static noinline int do_cmd_buffer(uint32_t *data, int count);
05740673 35static void finish_vram_transfer(int is_read);
48f3d210 36
37static noinline void do_cmd_reset(void)
38{
c765eb86
JW
39 renderer_sync();
40
48f3d210 41 if (unlikely(gpu.cmd_len > 0))
42 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
48f3d210 43 gpu.cmd_len = 0;
05740673 44
45 if (unlikely(gpu.dma.h > 0))
46 finish_vram_transfer(gpu.dma_start.is_read);
48f3d210 47 gpu.dma.h = 0;
48}
49
6e9bdaef 50static noinline void do_reset(void)
1ab64c54 51{
7841712d 52 unsigned int i;
5b568098 53
48f3d210 54 do_cmd_reset();
55
6e9bdaef 56 memset(gpu.regs, 0, sizeof(gpu.regs));
48f3d210 57 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
58 gpu.ex_regs[i] = (0xe0 + i) << 24;
61124a6d 59 gpu.status = 0x14802000;
6e9bdaef 60 gpu.gp0 = 0;
fc84f618 61 gpu.regs[3] = 1;
6e9bdaef 62 gpu.screen.hres = gpu.screen.w = 256;
fc84f618 63 gpu.screen.vres = gpu.screen.h = 240;
5bbe183f 64 gpu.screen.x = gpu.screen.y = 0;
3b7b0065 65 renderer_notify_res_change();
1ab64c54
GI
66}
67
8dd855cd 68static noinline void update_width(void)
69{
5bbe183f 70 static const short hres_all[8] = { 256, 368, 320, 368, 512, 368, 640, 368 };
71 static const uint8_t hdivs[8] = { 10, 7, 8, 7, 5, 7, 4, 7 };
72 uint8_t hdiv = hdivs[(gpu.status >> 16) & 7];
73 int hres = hres_all[(gpu.status >> 16) & 7];
74 int pal = gpu.status & PSX_GPU_STATUS_PAL;
8dd855cd 75 int sw = gpu.screen.x2 - gpu.screen.x1;
5bbe183f 76 int x = 0, x_auto;
77 if (sw <= 0)
78 /* nothing displayed? */;
79 else {
80 int s = pal ? 656 : 608; // or 600? pal is just a guess
81 x = (gpu.screen.x1 - s) / hdiv;
82 x = (x + 1) & ~1; // blitter limitation
83 sw /= hdiv;
84 sw = (sw + 2) & ~3; // according to nocash
85 switch (gpu.state.screen_centering_type) {
86 case 1:
87 break;
88 case 2:
89 x = gpu.state.screen_centering_x;
90 break;
91 default:
92 // correct if slightly miscentered
93 x_auto = (hres - sw) / 2 & ~3;
94 if ((uint32_t)x_auto <= 8u && abs(x) < 24)
95 x = x_auto;
96 }
97 if (x + sw > hres)
98 sw = hres - x;
99 // .x range check is done in vout_update()
100 }
101 // reduce the unpleasant right border that a few games have
102 if (gpu.state.screen_centering_type == 0
103 && x <= 4 && hres - (x + sw) >= 4)
104 hres -= 4;
105 gpu.screen.x = x;
106 gpu.screen.w = sw;
107 gpu.screen.hres = hres;
108 gpu.state.dims_changed = 1;
109 //printf("xx %d %d -> %2d, %d / %d\n",
110 // gpu.screen.x1, gpu.screen.x2, x, sw, hres);
8dd855cd 111}
112
113static noinline void update_height(void)
114{
5bbe183f 115 int pal = gpu.status & PSX_GPU_STATUS_PAL;
116 int dheight = gpu.status & PSX_GPU_STATUS_DHEIGHT;
117 int y = gpu.screen.y1 - (pal ? 39 : 16); // 39 for spyro
8dd855cd 118 int sh = gpu.screen.y2 - gpu.screen.y1;
5bbe183f 119 int center_tol = 16;
120 int vres = 240;
121
122 if (pal && (sh > 240 || gpu.screen.vres == 256))
123 vres = 256;
124 if (dheight)
125 y *= 2, sh *= 2, vres *= 2, center_tol *= 2;
126 if (sh <= 0)
127 /* nothing displayed? */;
128 else {
129 switch (gpu.state.screen_centering_type) {
130 case 1:
131 break;
132 case 2:
133 y = gpu.state.screen_centering_y;
134 break;
135 default:
136 // correct if slightly miscentered
137 if ((uint32_t)(vres - sh) <= 1 && abs(y) <= center_tol)
138 y = 0;
139 }
140 if (y + sh > vres)
141 sh = vres - y;
142 }
143 gpu.screen.y = y;
8dd855cd 144 gpu.screen.h = sh;
5bbe183f 145 gpu.screen.vres = vres;
146 gpu.state.dims_changed = 1;
147 //printf("yy %d %d -> %d, %d / %d\n",
148 // gpu.screen.y1, gpu.screen.y2, y, sh, vres);
8dd855cd 149}
150
fc84f618 151static noinline void decide_frameskip(void)
152{
5eaa13f1
A
153 *gpu.frameskip.dirty = 1;
154
9fe27e25 155 if (gpu.frameskip.active)
156 gpu.frameskip.cnt++;
157 else {
158 gpu.frameskip.cnt = 0;
159 gpu.frameskip.frame_ready = 1;
160 }
fc84f618 161
5eaa13f1
A
162 if (*gpu.frameskip.force)
163 gpu.frameskip.active = 1;
164 else if (!gpu.frameskip.active && *gpu.frameskip.advice)
9fe27e25 165 gpu.frameskip.active = 1;
166 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
fc84f618 167 gpu.frameskip.active = 1;
168 else
169 gpu.frameskip.active = 0;
fbb4bfff 170
171 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
172 int dummy;
173 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy);
174 gpu.frameskip.pending_fill[0] = 0;
175 }
fc84f618 176}
177
b243416b 178static noinline int decide_frameskip_allow(uint32_t cmd_e3)
9fe27e25 179{
180 // no frameskip if it decides to draw to display area,
181 // but not for interlace since it'll most likely always do that
182 uint32_t x = cmd_e3 & 0x3ff;
183 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
61124a6d 184 gpu.frameskip.allow = (gpu.status & PSX_GPU_STATUS_INTERLACE) ||
5bbe183f 185 (uint32_t)(x - gpu.screen.src_x) >= (uint32_t)gpu.screen.w ||
186 (uint32_t)(y - gpu.screen.src_y) >= (uint32_t)gpu.screen.h;
b243416b 187 return gpu.frameskip.allow;
9fe27e25 188}
189
6e9bdaef 190static noinline void get_gpu_info(uint32_t data)
191{
192 switch (data & 0x0f) {
193 case 0x02:
194 case 0x03:
195 case 0x04:
6e9bdaef 196 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
197 break;
08b33377 198 case 0x05:
199 gpu.gp0 = gpu.ex_regs[5] & 0x3fffff;
6e9bdaef 200 break;
201 case 0x07:
202 gpu.gp0 = 2;
203 break;
204 default:
08b33377 205 // gpu.gp0 unchanged
6e9bdaef 206 break;
207 }
208}
209
9ee0fd5b 210// double, for overdraw guard
12367ad0 211#define VRAM_SIZE ((1024 * 512 * 2 * 2) + 4096)
212
213// Minimum 16-byte VRAM alignment needed by gpu_unai's pixel-skipping
214// renderer/downscaler it uses in high res modes:
215#ifdef GCW_ZERO
216 // On GCW platform (MIPS), align to 8192 bytes (1 TLB entry) to reduce # of
217 // fills. (Will change this value if it ever gets large page support)
218 #define VRAM_ALIGN 8192
219#else
220 #define VRAM_ALIGN 16
221#endif
222
223// vram ptr received from mmap/malloc/alloc (will deallocate using this)
224static uint16_t *vram_ptr_orig = NULL;
9ee0fd5b 225
12367ad0 226#ifdef GPULIB_USE_MMAP
9ee0fd5b 227static int map_vram(void)
228{
12367ad0 229 gpu.vram = vram_ptr_orig = gpu.mmap(VRAM_SIZE + (VRAM_ALIGN-1));
9ee0fd5b 230 if (gpu.vram != NULL) {
12367ad0 231 // 4kb guard in front
232 gpu.vram += (4096 / 2);
233 // Align
234 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
9ee0fd5b 235 return 0;
236 }
237 else {
238 fprintf(stderr, "could not map vram, expect crashes\n");
239 return -1;
240 }
241}
12367ad0 242#else
243static int map_vram(void)
244{
245 gpu.vram = vram_ptr_orig = (uint16_t*)calloc(VRAM_SIZE + (VRAM_ALIGN-1), 1);
246 if (gpu.vram != NULL) {
247 // 4kb guard in front
248 gpu.vram += (4096 / 2);
249 // Align
250 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
251 return 0;
252 } else {
253 fprintf(stderr, "could not allocate vram, expect crashes\n");
254 return -1;
255 }
256}
257
258static int allocate_vram(void)
259{
260 gpu.vram = vram_ptr_orig = (uint16_t*)calloc(VRAM_SIZE + (VRAM_ALIGN-1), 1);
261 if (gpu.vram != NULL) {
262 // 4kb guard in front
263 gpu.vram += (4096 / 2);
264 // Align
265 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
266 return 0;
267 } else {
268 fprintf(stderr, "could not allocate vram, expect crashes\n");
269 return -1;
270 }
271}
272#endif
9ee0fd5b 273
6e9bdaef 274long GPUinit(void)
275{
12367ad0 276#ifndef GPULIB_USE_MMAP
277 if (gpu.vram == NULL) {
278 if (allocate_vram() != 0) {
279 printf("ERROR: could not allocate VRAM, exiting..\n");
280 exit(1);
281 }
282 }
283#endif
284
285 //extern uint32_t hSyncCount; // in psxcounters.cpp
286 //extern uint32_t frame_counter; // in psxcounters.cpp
287 //gpu.state.hcnt = &hSyncCount;
288 //gpu.state.frame_count = &frame_counter;
289
9394ada5 290 int ret;
291 ret = vout_init();
292 ret |= renderer_init();
293
3b7b0065 294 memset(&gpu.state, 0, sizeof(gpu.state));
295 memset(&gpu.frameskip, 0, sizeof(gpu.frameskip));
296 gpu.zero = 0;
3ece2f0c 297 gpu.state.frame_count = &gpu.zero;
deb18d24 298 gpu.state.hcnt = &gpu.zero;
48f3d210 299 gpu.cmd_len = 0;
9394ada5 300 do_reset();
48f3d210 301
12367ad0 302 /*if (gpu.mmap != NULL) {
9ee0fd5b 303 if (map_vram() != 0)
304 ret = -1;
12367ad0 305 }*/
6e9bdaef 306 return ret;
307}
308
309long GPUshutdown(void)
310{
9ee0fd5b 311 long ret;
312
e929dec5 313 renderer_finish();
9ee0fd5b 314 ret = vout_finish();
12367ad0 315
316 if (vram_ptr_orig != NULL) {
317#ifdef GPULIB_USE_MMAP
318 gpu.munmap(vram_ptr_orig, VRAM_SIZE);
319#else
320 free(vram_ptr_orig);
321#endif
9ee0fd5b 322 }
12367ad0 323 vram_ptr_orig = gpu.vram = NULL;
9ee0fd5b 324
325 return ret;
6e9bdaef 326}
327
1ab64c54
GI
328void GPUwriteStatus(uint32_t data)
329{
1ab64c54
GI
330 uint32_t cmd = data >> 24;
331
fc84f618 332 if (cmd < ARRAY_SIZE(gpu.regs)) {
48f3d210 333 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
fc84f618 334 return;
8dd855cd 335 gpu.regs[cmd] = data;
fc84f618 336 }
337
338 gpu.state.fb_dirty = 1;
8dd855cd 339
340 switch (cmd) {
1ab64c54 341 case 0x00:
6e9bdaef 342 do_reset();
1ab64c54 343 break;
48f3d210 344 case 0x01:
345 do_cmd_reset();
346 break;
1ab64c54 347 case 0x03:
5bbe183f 348 if (data & 1) {
61124a6d 349 gpu.status |= PSX_GPU_STATUS_BLANKING;
5bbe183f 350 gpu.state.dims_changed = 1; // for hud clearing
351 }
61124a6d
PC
352 else
353 gpu.status &= ~PSX_GPU_STATUS_BLANKING;
1ab64c54
GI
354 break;
355 case 0x04:
61124a6d
PC
356 gpu.status &= ~PSX_GPU_STATUS_DMA_MASK;
357 gpu.status |= PSX_GPU_STATUS_DMA(data & 3);
1ab64c54
GI
358 break;
359 case 0x05:
5bbe183f 360 gpu.screen.src_x = data & 0x3ff;
361 gpu.screen.src_y = (data >> 10) & 0x1ff;
3b7b0065 362 renderer_notify_scanout_x_change(gpu.screen.src_x, gpu.screen.hres);
9fe27e25 363 if (gpu.frameskip.set) {
364 decide_frameskip_allow(gpu.ex_regs[3]);
365 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
366 decide_frameskip();
367 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
368 }
fb4c6fba 369 }
1ab64c54 370 break;
8dd855cd 371 case 0x06:
372 gpu.screen.x1 = data & 0xfff;
373 gpu.screen.x2 = (data >> 12) & 0xfff;
374 update_width();
375 break;
1ab64c54
GI
376 case 0x07:
377 gpu.screen.y1 = data & 0x3ff;
378 gpu.screen.y2 = (data >> 10) & 0x3ff;
8dd855cd 379 update_height();
1ab64c54
GI
380 break;
381 case 0x08:
61124a6d 382 gpu.status = (gpu.status & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
8dd855cd 383 update_width();
384 update_height();
e929dec5 385 renderer_notify_res_change();
1ab64c54 386 break;
deb18d24 387 default:
388 if ((cmd & 0xf0) == 0x10)
389 get_gpu_info(data);
6e9bdaef 390 break;
1ab64c54 391 }
7890a708 392
393#ifdef GPUwriteStatus_ext
394 GPUwriteStatus_ext(data);
395#endif
1ab64c54
GI
396}
397
56f08d83 398const unsigned char cmd_lengths[256] =
1ab64c54 399{
d30279e2
GI
400 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
401 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
402 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
403 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
652c6b8b 404 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
405 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
406 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
d30279e2
GI
407 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
408 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
409 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
410 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
411 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
412 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
413 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
414 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
415 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
416};
417
d30279e2
GI
418#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
419
420static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
1ab64c54 421{
d30279e2
GI
422 uint16_t *vram = VRAM_MEM_XY(x, y);
423 if (is_read)
424 memcpy(mem, vram, l * 2);
425 else
426 memcpy(vram, mem, l * 2);
427}
428
429static int do_vram_io(uint32_t *data, int count, int is_read)
430{
431 int count_initial = count;
432 uint16_t *sdata = (uint16_t *)data;
433 int x = gpu.dma.x, y = gpu.dma.y;
434 int w = gpu.dma.w, h = gpu.dma.h;
ddd56f6e 435 int o = gpu.dma.offset;
d30279e2
GI
436 int l;
437 count *= 2; // operate in 16bpp pixels
438
c765eb86
JW
439 renderer_sync();
440
d30279e2
GI
441 if (gpu.dma.offset) {
442 l = w - gpu.dma.offset;
ddd56f6e 443 if (count < l)
d30279e2 444 l = count;
ddd56f6e 445
446 do_vram_line(x + o, y, sdata, l, is_read);
447
448 if (o + l < w)
449 o += l;
450 else {
451 o = 0;
452 y++;
453 h--;
454 }
d30279e2
GI
455 sdata += l;
456 count -= l;
d30279e2
GI
457 }
458
459 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
460 y &= 511;
461 do_vram_line(x, y, sdata, w, is_read);
462 }
463
05740673 464 if (h > 0) {
465 if (count > 0) {
466 y &= 511;
467 do_vram_line(x, y, sdata, count, is_read);
468 o = count;
469 count = 0;
470 }
d30279e2 471 }
05740673 472 else
473 finish_vram_transfer(is_read);
d30279e2
GI
474 gpu.dma.y = y;
475 gpu.dma.h = h;
ddd56f6e 476 gpu.dma.offset = o;
d30279e2 477
6e9bdaef 478 return count_initial - count / 2;
d30279e2
GI
479}
480
481static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
482{
ddd56f6e 483 if (gpu.dma.h)
484 log_anomaly("start_vram_transfer while old unfinished\n");
485
5440b88e 486 gpu.dma.x = pos_word & 0x3ff;
487 gpu.dma.y = (pos_word >> 16) & 0x1ff;
48f3d210 488 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
489 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
d30279e2 490 gpu.dma.offset = 0;
05740673 491 gpu.dma.is_read = is_read;
492 gpu.dma_start = gpu.dma;
d30279e2 493
9e146206 494 renderer_flush_queues();
495 if (is_read) {
61124a6d 496 gpu.status |= PSX_GPU_STATUS_IMG;
9e146206 497 // XXX: wrong for width 1
495d603c 498 gpu.gp0 = LE32TOH(*(uint32_t *) VRAM_MEM_XY(gpu.dma.x, gpu.dma.y));
5440b88e 499 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
9e146206 500 }
d30279e2 501
6e9bdaef 502 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
503 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
d30279e2
GI
504}
505
05740673 506static void finish_vram_transfer(int is_read)
507{
508 if (is_read)
61124a6d 509 gpu.status &= ~PSX_GPU_STATUS_IMG;
05740673 510 else
511 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
3b7b0065 512 gpu.dma_start.w, gpu.dma_start.h, 0);
05740673 513}
514
b243416b 515static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
516{
97e07db9 517 int cmd = 0, pos = 0, len, dummy, v;
b243416b 518 int skip = 1;
519
fbb4bfff 520 gpu.frameskip.pending_fill[0] = 0;
521
b243416b 522 while (pos < count && skip) {
523 uint32_t *list = data + pos;
db215a72 524 cmd = LE32TOH(list[0]) >> 24;
b243416b 525 len = 1 + cmd_lengths[cmd];
526
97e07db9 527 switch (cmd) {
528 case 0x02:
db215a72 529 if ((LE32TOH(list[2]) & 0x3ff) > gpu.screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu.screen.h)
97e07db9 530 // clearing something large, don't skip
531 do_cmd_list(list, 3, &dummy);
532 else
533 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
534 break;
535 case 0x24 ... 0x27:
536 case 0x2c ... 0x2f:
537 case 0x34 ... 0x37:
538 case 0x3c ... 0x3f:
539 gpu.ex_regs[1] &= ~0x1ff;
db215a72 540 gpu.ex_regs[1] |= LE32TOH(list[4 + ((cmd >> 4) & 1)]) & 0x1ff;
97e07db9 541 break;
542 case 0x48 ... 0x4F:
543 for (v = 3; pos + v < count; v++)
544 {
db215a72 545 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
97e07db9 546 break;
547 }
548 len += v - 3;
549 break;
550 case 0x58 ... 0x5F:
551 for (v = 4; pos + v < count; v += 2)
552 {
db215a72 553 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
97e07db9 554 break;
555 }
556 len += v - 4;
557 break;
558 default:
559 if (cmd == 0xe3)
db215a72 560 skip = decide_frameskip_allow(LE32TOH(list[0]));
97e07db9 561 if ((cmd & 0xf8) == 0xe0)
db215a72 562 gpu.ex_regs[cmd & 7] = LE32TOH(list[0]);
97e07db9 563 break;
b243416b 564 }
b243416b 565
566 if (pos + len > count) {
567 cmd = -1;
568 break; // incomplete cmd
569 }
97e07db9 570 if (0xa0 <= cmd && cmd <= 0xdf)
b243416b 571 break; // image i/o
97e07db9 572
b243416b 573 pos += len;
574 }
575
576 renderer_sync_ecmds(gpu.ex_regs);
577 *last_cmd = cmd;
578 return pos;
579}
580
48f3d210 581static noinline int do_cmd_buffer(uint32_t *data, int count)
d30279e2 582{
b243416b 583 int cmd, pos;
584 uint32_t old_e3 = gpu.ex_regs[3];
fc84f618 585 int vram_dirty = 0;
d30279e2 586
d30279e2 587 // process buffer
b243416b 588 for (pos = 0; pos < count; )
d30279e2 589 {
b243416b 590 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
591 vram_dirty = 1;
d30279e2 592 pos += do_vram_io(data + pos, count - pos, 0);
ddd56f6e 593 if (pos == count)
594 break;
d30279e2
GI
595 }
596
db215a72 597 cmd = LE32TOH(data[pos]) >> 24;
97e07db9 598 if (0xa0 <= cmd && cmd <= 0xdf) {
79573c20
DS
599 if (unlikely((pos+2) >= count)) {
600 // incomplete vram write/read cmd, can't consume yet
601 cmd = -1;
602 break;
603 }
604
d30279e2 605 // consume vram write/read cmd
db215a72 606 start_vram_transfer(LE32TOH(data[pos + 1]), LE32TOH(data[pos + 2]), (cmd & 0xe0) == 0xc0);
b243416b 607 pos += 3;
608 continue;
d30279e2 609 }
b243416b 610
1e07f71d 611 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
db215a72 612 if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0))
b243416b 613 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
614 else {
615 pos += do_cmd_list(data + pos, count - pos, &cmd);
616 vram_dirty = 1;
617 }
618
619 if (cmd == -1)
620 // incomplete cmd
ddd56f6e 621 break;
d30279e2 622 }
ddd56f6e 623
61124a6d
PC
624 gpu.status &= ~0x1fff;
625 gpu.status |= gpu.ex_regs[1] & 0x7ff;
626 gpu.status |= (gpu.ex_regs[6] & 3) << 11;
a3a9f519 627
fc84f618 628 gpu.state.fb_dirty |= vram_dirty;
629
b243416b 630 if (old_e3 != gpu.ex_regs[3])
631 decide_frameskip_allow(gpu.ex_regs[3]);
632
ddd56f6e 633 return count - pos;
d30279e2
GI
634}
635
5440b88e 636static void flush_cmd_buffer(void)
d30279e2 637{
48f3d210 638 int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
d30279e2
GI
639 if (left > 0)
640 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
641 gpu.cmd_len = left;
1ab64c54
GI
642}
643
644void GPUwriteDataMem(uint32_t *mem, int count)
645{
d30279e2
GI
646 int left;
647
56f08d83 648 log_io("gpu_dma_write %p %d\n", mem, count);
649
d30279e2
GI
650 if (unlikely(gpu.cmd_len > 0))
651 flush_cmd_buffer();
56f08d83 652
48f3d210 653 left = do_cmd_buffer(mem, count);
d30279e2 654 if (left)
56f08d83 655 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
1ab64c54
GI
656}
657
d30279e2 658void GPUwriteData(uint32_t data)
1ab64c54 659{
56f08d83 660 log_io("gpu_write %08x\n", data);
db215a72 661 gpu.cmd_buffer[gpu.cmd_len++] = HTOLE32(data);
d30279e2
GI
662 if (gpu.cmd_len >= CMD_BUFFER_LEN)
663 flush_cmd_buffer();
1ab64c54
GI
664}
665
fae38d7a 666long GPUdmaChain(uint32_t *rambase, uint32_t start_addr, uint32_t *progress_addr)
1ab64c54 667{
09159d99 668 uint32_t addr, *list, ld_addr = 0;
ddd56f6e 669 int len, left, count;
1c72b1c2 670 long cpu_cycles = 0;
d30279e2 671
8f5f2dd5 672 preload(rambase + (start_addr & 0x1fffff) / 4);
673
d30279e2
GI
674 if (unlikely(gpu.cmd_len > 0))
675 flush_cmd_buffer();
676
56f08d83 677 log_io("gpu_dma_chain\n");
ddd56f6e 678 addr = start_addr & 0xffffff;
09159d99 679 for (count = 0; (addr & 0x800000) == 0; count++)
ddd56f6e 680 {
ddd56f6e 681 list = rambase + (addr & 0x1fffff) / 4;
db215a72
PC
682 len = LE32TOH(list[0]) >> 24;
683 addr = LE32TOH(list[0]) & 0xffffff;
8f5f2dd5 684 preload(rambase + (addr & 0x1fffff) / 4);
685
1c72b1c2 686 cpu_cycles += 10;
687 if (len > 0)
688 cpu_cycles += 5 + len;
deb18d24 689
a4e249a1 690 log_io(".chain %08lx #%d+%d\n",
691 (long)(list - rambase) * 4, len, gpu.cmd_len);
692 if (unlikely(gpu.cmd_len > 0)) {
693 memcpy(gpu.cmd_buffer + gpu.cmd_len, list + 1, len * 4);
694 gpu.cmd_len += len;
695 flush_cmd_buffer();
696 continue;
697 }
ddd56f6e 698
56f08d83 699 if (len) {
48f3d210 700 left = do_cmd_buffer(list + 1, len);
a4e249a1 701 if (left) {
702 memcpy(gpu.cmd_buffer, list + 1 + len - left, left * 4);
703 gpu.cmd_len = left;
704 log_anomaly("GPUdmaChain: %d/%d words left\n", left, len);
705 }
56f08d83 706 }
ddd56f6e 707
fae38d7a 708 if (progress_addr) {
709 *progress_addr = addr;
710 break;
711 }
09159d99 712 #define LD_THRESHOLD (8*1024)
713 if (count >= LD_THRESHOLD) {
714 if (count == LD_THRESHOLD) {
715 ld_addr = addr;
716 continue;
717 }
718
719 // loop detection marker
720 // (bit23 set causes DMA error on real machine, so
721 // unlikely to be ever set by the game)
db215a72 722 list[0] |= HTOLE32(0x800000);
09159d99 723 }
ddd56f6e 724 }
725
09159d99 726 if (ld_addr != 0) {
727 // remove loop detection markers
728 count -= LD_THRESHOLD + 2;
729 addr = ld_addr & 0x1fffff;
730 while (count-- > 0) {
731 list = rambase + addr / 4;
db215a72
PC
732 addr = LE32TOH(list[0]) & 0x1fffff;
733 list[0] &= HTOLE32(~0x800000);
09159d99 734 }
d30279e2 735 }
09159d99 736
3ece2f0c 737 gpu.state.last_list.frame = *gpu.state.frame_count;
deb18d24 738 gpu.state.last_list.hcnt = *gpu.state.hcnt;
1c72b1c2 739 gpu.state.last_list.cycles = cpu_cycles;
deb18d24 740 gpu.state.last_list.addr = start_addr;
741
1c72b1c2 742 return cpu_cycles;
1ab64c54
GI
743}
744
d30279e2
GI
745void GPUreadDataMem(uint32_t *mem, int count)
746{
56f08d83 747 log_io("gpu_dma_read %p %d\n", mem, count);
748
d30279e2
GI
749 if (unlikely(gpu.cmd_len > 0))
750 flush_cmd_buffer();
56f08d83 751
d30279e2
GI
752 if (gpu.dma.h)
753 do_vram_io(mem, count, 1);
754}
755
756uint32_t GPUreadData(void)
757{
9e146206 758 uint32_t ret;
56f08d83 759
760 if (unlikely(gpu.cmd_len > 0))
761 flush_cmd_buffer();
762
9e146206 763 ret = gpu.gp0;
495d603c
PC
764 if (gpu.dma.h) {
765 ret = HTOLE32(ret);
9e146206 766 do_vram_io(&ret, 1, 1);
495d603c
PC
767 ret = LE32TOH(ret);
768 }
56f08d83 769
9e146206 770 log_io("gpu_read %08x\n", ret);
771 return ret;
d30279e2
GI
772}
773
774uint32_t GPUreadStatus(void)
775{
ddd56f6e 776 uint32_t ret;
56f08d83 777
d30279e2
GI
778 if (unlikely(gpu.cmd_len > 0))
779 flush_cmd_buffer();
780
61124a6d 781 ret = gpu.status;
ddd56f6e 782 log_io("gpu_read_status %08x\n", ret);
783 return ret;
d30279e2
GI
784}
785
096ec49b 786struct GPUFreeze
1ab64c54
GI
787{
788 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
789 uint32_t ulStatus; // current gpu status
790 uint32_t ulControl[256]; // latest control register values
791 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
096ec49b 792};
1ab64c54 793
096ec49b 794long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
1ab64c54 795{
fc84f618 796 int i;
797
1ab64c54
GI
798 switch (type) {
799 case 1: // save
d30279e2
GI
800 if (gpu.cmd_len > 0)
801 flush_cmd_buffer();
c765eb86
JW
802
803 renderer_sync();
9ee0fd5b 804 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
1ab64c54 805 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
6e9bdaef 806 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
61124a6d 807 freeze->ulStatus = gpu.status;
1ab64c54
GI
808 break;
809 case 0: // load
c765eb86 810 renderer_sync();
9ee0fd5b 811 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
1ab64c54 812 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
6e9bdaef 813 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
61124a6d 814 gpu.status = freeze->ulStatus;
3d47ef17 815 gpu.cmd_len = 0;
fc84f618 816 for (i = 8; i > 0; i--) {
817 gpu.regs[i] ^= 1; // avoid reg change detection
818 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
819 }
5b745e5b 820 renderer_sync_ecmds(gpu.ex_regs);
3b7b0065 821 renderer_update_caches(0, 0, 1024, 512, 1);
1ab64c54
GI
822 break;
823 }
824
825 return 1;
826}
827
5440b88e 828void GPUupdateLace(void)
829{
830 if (gpu.cmd_len > 0)
831 flush_cmd_buffer();
832 renderer_flush_queues();
833
61124a6d 834 if (gpu.status & PSX_GPU_STATUS_BLANKING) {
aafcb4dd 835 if (!gpu.state.blanked) {
836 vout_blank();
837 gpu.state.blanked = 1;
838 gpu.state.fb_dirty = 1;
839 }
840 return;
841 }
842
c765eb86
JW
843 renderer_notify_update_lace(0);
844
aafcb4dd 845 if (!gpu.state.fb_dirty)
5440b88e 846 return;
847
848 if (gpu.frameskip.set) {
849 if (!gpu.frameskip.frame_ready) {
850 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
851 return;
852 gpu.frameskip.active = 0;
853 }
854 gpu.frameskip.frame_ready = 0;
855 }
856
857 vout_update();
3b7b0065 858 if (gpu.state.enhancement_active && !gpu.state.enhancement_was_active)
859 renderer_update_caches(0, 0, 1024, 512, 1);
860 gpu.state.enhancement_was_active = gpu.state.enhancement_active;
5440b88e 861 gpu.state.fb_dirty = 0;
aafcb4dd 862 gpu.state.blanked = 0;
c765eb86 863 renderer_notify_update_lace(1);
5440b88e 864}
865
72e5023f 866void GPUvBlank(int is_vblank, int lcf)
867{
5440b88e 868 int interlace = gpu.state.allow_interlace
61124a6d
PC
869 && (gpu.status & PSX_GPU_STATUS_INTERLACE)
870 && (gpu.status & PSX_GPU_STATUS_DHEIGHT);
5440b88e 871 // interlace doesn't look nice on progressive displays,
872 // so we have this "auto" mode here for games that don't read vram
873 if (gpu.state.allow_interlace == 2
874 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
875 {
876 interlace = 0;
877 }
878 if (interlace || interlace != gpu.state.old_interlace) {
879 gpu.state.old_interlace = interlace;
880
881 if (gpu.cmd_len > 0)
882 flush_cmd_buffer();
883 renderer_flush_queues();
884 renderer_set_interlace(interlace, !lcf);
885 }
886}
887
888#include "../../frontend/plugin_lib.h"
889
890void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
891{
892 gpu.frameskip.set = cbs->frameskip;
893 gpu.frameskip.advice = &cbs->fskip_advice;
5eaa13f1 894 gpu.frameskip.force = &cbs->fskip_force;
5bbe183f 895 gpu.frameskip.dirty = (void *)&cbs->fskip_dirty;
5440b88e 896 gpu.frameskip.active = 0;
897 gpu.frameskip.frame_ready = 1;
898 gpu.state.hcnt = cbs->gpu_hcnt;
899 gpu.state.frame_count = cbs->gpu_frame_count;
900 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
0b02eb77 901 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
5bbe183f 902 if (gpu.state.screen_centering_type != cbs->screen_centering_type
903 || gpu.state.screen_centering_x != cbs->screen_centering_x
904 || gpu.state.screen_centering_y != cbs->screen_centering_y) {
905 gpu.state.screen_centering_type = cbs->screen_centering_type;
906 gpu.state.screen_centering_x = cbs->screen_centering_x;
907 gpu.state.screen_centering_y = cbs->screen_centering_y;
908 update_width();
909 update_height();
910 }
5440b88e 911
9ee0fd5b 912 gpu.mmap = cbs->mmap;
913 gpu.munmap = cbs->munmap;
914
915 // delayed vram mmap
916 if (gpu.vram == NULL)
917 map_vram();
918
5440b88e 919 if (cbs->pl_vout_set_raw_vram)
920 cbs->pl_vout_set_raw_vram(gpu.vram);
921 renderer_set_config(cbs);
922 vout_set_config(cbs);
72e5023f 923}
924
1ab64c54 925// vim:shiftwidth=2:expandtab