gpu: rework dma vs busy timing
[pcsx_rearmed.git] / plugins / gpulib / gpu.c
CommitLineData
1ab64c54 1/*
05740673 2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
1ab64c54
GI
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
d30279e2 11#include <stdio.h>
5bbe183f 12#include <stdlib.h>
1ab64c54 13#include <string.h>
12367ad0 14#include <stdlib.h> /* for calloc */
15
56f08d83 16#include "gpu.h"
f99193c2 17#include "gpu_timing.h"
1328fa32 18#include "../../libpcsxcore/gpu.h" // meh
8f8ade9c 19#include "../../frontend/plugin_lib.h"
1ab64c54 20
8f8ade9c 21#ifndef ARRAY_SIZE
1ab64c54 22#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
8f8ade9c 23#endif
8f5f2dd5 24#ifdef __GNUC__
d30279e2 25#define unlikely(x) __builtin_expect((x), 0)
8f5f2dd5 26#define preload __builtin_prefetch
8dd855cd 27#define noinline __attribute__((noinline))
8f5f2dd5 28#else
29#define unlikely(x)
30#define preload(...)
31#define noinline
8f5f2dd5 32#endif
1ab64c54 33
deb18d24 34//#define log_io gpu_log
56f08d83 35#define log_io(...)
56f08d83 36
9ee0fd5b 37struct psx_gpu gpu;
1ab64c54 38
d02ab9fc 39static noinline int do_cmd_buffer(uint32_t *data, int count,
40 int *cycles_sum, int *cycles_last);
05740673 41static void finish_vram_transfer(int is_read);
48f3d210 42
43static noinline void do_cmd_reset(void)
44{
f99193c2 45 int dummy = 0;
c765eb86 46 renderer_sync();
48f3d210 47 if (unlikely(gpu.cmd_len > 0))
d02ab9fc 48 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len, &dummy, &dummy);
48f3d210 49 gpu.cmd_len = 0;
05740673 50
51 if (unlikely(gpu.dma.h > 0))
52 finish_vram_transfer(gpu.dma_start.is_read);
48f3d210 53 gpu.dma.h = 0;
54}
55
6e9bdaef 56static noinline void do_reset(void)
1ab64c54 57{
7841712d 58 unsigned int i;
5b568098 59
48f3d210 60 do_cmd_reset();
61
6e9bdaef 62 memset(gpu.regs, 0, sizeof(gpu.regs));
48f3d210 63 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
64 gpu.ex_regs[i] = (0xe0 + i) << 24;
61124a6d 65 gpu.status = 0x14802000;
6e9bdaef 66 gpu.gp0 = 0;
fc84f618 67 gpu.regs[3] = 1;
6e9bdaef 68 gpu.screen.hres = gpu.screen.w = 256;
fc84f618 69 gpu.screen.vres = gpu.screen.h = 240;
5bbe183f 70 gpu.screen.x = gpu.screen.y = 0;
01ff3105 71 renderer_sync_ecmds(gpu.ex_regs);
3b7b0065 72 renderer_notify_res_change();
1ab64c54
GI
73}
74
8dd855cd 75static noinline void update_width(void)
76{
5bbe183f 77 static const short hres_all[8] = { 256, 368, 320, 368, 512, 368, 640, 368 };
78 static const uint8_t hdivs[8] = { 10, 7, 8, 7, 5, 7, 4, 7 };
79 uint8_t hdiv = hdivs[(gpu.status >> 16) & 7];
80 int hres = hres_all[(gpu.status >> 16) & 7];
81 int pal = gpu.status & PSX_GPU_STATUS_PAL;
8dd855cd 82 int sw = gpu.screen.x2 - gpu.screen.x1;
b3ff74ba 83 int type = gpu.state.screen_centering_type;
5bbe183f 84 int x = 0, x_auto;
b3ff74ba 85 if (type == C_AUTO)
86 type = gpu.state.screen_centering_type_default;
5bbe183f 87 if (sw <= 0)
88 /* nothing displayed? */;
89 else {
90 int s = pal ? 656 : 608; // or 600? pal is just a guess
91 x = (gpu.screen.x1 - s) / hdiv;
92 x = (x + 1) & ~1; // blitter limitation
93 sw /= hdiv;
94 sw = (sw + 2) & ~3; // according to nocash
b3ff74ba 95 switch (type) {
8f8ade9c 96 case C_INGAME:
5bbe183f 97 break;
8f8ade9c 98 case C_MANUAL:
5bbe183f 99 x = gpu.state.screen_centering_x;
100 break;
101 default:
102 // correct if slightly miscentered
103 x_auto = (hres - sw) / 2 & ~3;
104 if ((uint32_t)x_auto <= 8u && abs(x) < 24)
105 x = x_auto;
106 }
107 if (x + sw > hres)
108 sw = hres - x;
109 // .x range check is done in vout_update()
110 }
111 // reduce the unpleasant right border that a few games have
112 if (gpu.state.screen_centering_type == 0
113 && x <= 4 && hres - (x + sw) >= 4)
114 hres -= 4;
115 gpu.screen.x = x;
116 gpu.screen.w = sw;
117 gpu.screen.hres = hres;
118 gpu.state.dims_changed = 1;
119 //printf("xx %d %d -> %2d, %d / %d\n",
120 // gpu.screen.x1, gpu.screen.x2, x, sw, hres);
8dd855cd 121}
122
123static noinline void update_height(void)
124{
5bbe183f 125 int pal = gpu.status & PSX_GPU_STATUS_PAL;
126 int dheight = gpu.status & PSX_GPU_STATUS_DHEIGHT;
127 int y = gpu.screen.y1 - (pal ? 39 : 16); // 39 for spyro
8dd855cd 128 int sh = gpu.screen.y2 - gpu.screen.y1;
5bbe183f 129 int center_tol = 16;
130 int vres = 240;
131
132 if (pal && (sh > 240 || gpu.screen.vres == 256))
133 vres = 256;
134 if (dheight)
135 y *= 2, sh *= 2, vres *= 2, center_tol *= 2;
136 if (sh <= 0)
137 /* nothing displayed? */;
138 else {
139 switch (gpu.state.screen_centering_type) {
8f8ade9c 140 case C_INGAME:
141 break;
142 case C_BORDERLESS:
143 y = 0;
5bbe183f 144 break;
8f8ade9c 145 case C_MANUAL:
5bbe183f 146 y = gpu.state.screen_centering_y;
147 break;
148 default:
149 // correct if slightly miscentered
150 if ((uint32_t)(vres - sh) <= 1 && abs(y) <= center_tol)
151 y = 0;
152 }
153 if (y + sh > vres)
154 sh = vres - y;
155 }
156 gpu.screen.y = y;
8dd855cd 157 gpu.screen.h = sh;
5bbe183f 158 gpu.screen.vres = vres;
159 gpu.state.dims_changed = 1;
160 //printf("yy %d %d -> %d, %d / %d\n",
161 // gpu.screen.y1, gpu.screen.y2, y, sh, vres);
8dd855cd 162}
163
fc84f618 164static noinline void decide_frameskip(void)
165{
5eaa13f1
A
166 *gpu.frameskip.dirty = 1;
167
9fe27e25 168 if (gpu.frameskip.active)
169 gpu.frameskip.cnt++;
170 else {
171 gpu.frameskip.cnt = 0;
172 gpu.frameskip.frame_ready = 1;
173 }
fc84f618 174
5eaa13f1
A
175 if (*gpu.frameskip.force)
176 gpu.frameskip.active = 1;
177 else if (!gpu.frameskip.active && *gpu.frameskip.advice)
9fe27e25 178 gpu.frameskip.active = 1;
179 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
fc84f618 180 gpu.frameskip.active = 1;
181 else
182 gpu.frameskip.active = 0;
fbb4bfff 183
184 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
f99193c2 185 int dummy = 0;
d02ab9fc 186 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy, &dummy, &dummy);
fbb4bfff 187 gpu.frameskip.pending_fill[0] = 0;
188 }
fc84f618 189}
190
b243416b 191static noinline int decide_frameskip_allow(uint32_t cmd_e3)
9fe27e25 192{
193 // no frameskip if it decides to draw to display area,
194 // but not for interlace since it'll most likely always do that
195 uint32_t x = cmd_e3 & 0x3ff;
196 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
61124a6d 197 gpu.frameskip.allow = (gpu.status & PSX_GPU_STATUS_INTERLACE) ||
5bbe183f 198 (uint32_t)(x - gpu.screen.src_x) >= (uint32_t)gpu.screen.w ||
199 (uint32_t)(y - gpu.screen.src_y) >= (uint32_t)gpu.screen.h;
b243416b 200 return gpu.frameskip.allow;
9fe27e25 201}
202
01ff3105 203static void flush_cmd_buffer(void);
204
6e9bdaef 205static noinline void get_gpu_info(uint32_t data)
206{
01ff3105 207 if (unlikely(gpu.cmd_len > 0))
208 flush_cmd_buffer();
6e9bdaef 209 switch (data & 0x0f) {
210 case 0x02:
211 case 0x03:
212 case 0x04:
6e9bdaef 213 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
214 break;
08b33377 215 case 0x05:
216 gpu.gp0 = gpu.ex_regs[5] & 0x3fffff;
6e9bdaef 217 break;
218 case 0x07:
219 gpu.gp0 = 2;
220 break;
221 default:
08b33377 222 // gpu.gp0 unchanged
6e9bdaef 223 break;
224 }
225}
226
5bd33f52 227#ifndef max
228#define max(a, b) (((a) > (b)) ? (a) : (b))
229#endif
12367ad0 230
231// Minimum 16-byte VRAM alignment needed by gpu_unai's pixel-skipping
232// renderer/downscaler it uses in high res modes:
233#ifdef GCW_ZERO
234 // On GCW platform (MIPS), align to 8192 bytes (1 TLB entry) to reduce # of
235 // fills. (Will change this value if it ever gets large page support)
236 #define VRAM_ALIGN 8192
237#else
238 #define VRAM_ALIGN 16
239#endif
240
5bd33f52 241// double, for overdraw guard + at least 1 page before
242#define VRAM_SIZE ((1024 * 512 * 2 * 2) + max(VRAM_ALIGN, 4096))
243
12367ad0 244// vram ptr received from mmap/malloc/alloc (will deallocate using this)
245static uint16_t *vram_ptr_orig = NULL;
9ee0fd5b 246
e34ef5ac 247#ifndef GPULIB_USE_MMAP
248# ifdef __linux__
249# define GPULIB_USE_MMAP 1
250# else
251# define GPULIB_USE_MMAP 0
252# endif
253#endif
9ee0fd5b 254static int map_vram(void)
255{
e34ef5ac 256#if GPULIB_USE_MMAP
5bd33f52 257 gpu.vram = vram_ptr_orig = gpu.mmap(VRAM_SIZE);
e34ef5ac 258#else
5bd33f52 259 gpu.vram = vram_ptr_orig = calloc(VRAM_SIZE, 1);
e34ef5ac 260#endif
261 if (gpu.vram != NULL && gpu.vram != (void *)(intptr_t)-1) {
262 // 4kb guard in front
12367ad0 263 gpu.vram += (4096 / 2);
e34ef5ac 264 // Align
265 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
9ee0fd5b 266 return 0;
267 }
268 else {
269 fprintf(stderr, "could not map vram, expect crashes\n");
270 return -1;
271 }
272}
273
6e9bdaef 274long GPUinit(void)
275{
9394ada5 276 int ret;
277 ret = vout_init();
278 ret |= renderer_init();
279
3b7b0065 280 memset(&gpu.state, 0, sizeof(gpu.state));
281 memset(&gpu.frameskip, 0, sizeof(gpu.frameskip));
282 gpu.zero = 0;
3ece2f0c 283 gpu.state.frame_count = &gpu.zero;
deb18d24 284 gpu.state.hcnt = &gpu.zero;
48f3d210 285 gpu.cmd_len = 0;
9394ada5 286 do_reset();
48f3d210 287
12367ad0 288 /*if (gpu.mmap != NULL) {
9ee0fd5b 289 if (map_vram() != 0)
290 ret = -1;
12367ad0 291 }*/
6e9bdaef 292 return ret;
293}
294
295long GPUshutdown(void)
296{
9ee0fd5b 297 long ret;
298
e929dec5 299 renderer_finish();
9ee0fd5b 300 ret = vout_finish();
12367ad0 301
302 if (vram_ptr_orig != NULL) {
e34ef5ac 303#if GPULIB_USE_MMAP
12367ad0 304 gpu.munmap(vram_ptr_orig, VRAM_SIZE);
305#else
306 free(vram_ptr_orig);
307#endif
9ee0fd5b 308 }
12367ad0 309 vram_ptr_orig = gpu.vram = NULL;
9ee0fd5b 310
311 return ret;
6e9bdaef 312}
313
1ab64c54
GI
314void GPUwriteStatus(uint32_t data)
315{
1ab64c54 316 uint32_t cmd = data >> 24;
9a864a8f 317 int src_x, src_y;
1ab64c54 318
fc84f618 319 if (cmd < ARRAY_SIZE(gpu.regs)) {
48f3d210 320 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
fc84f618 321 return;
8dd855cd 322 gpu.regs[cmd] = data;
fc84f618 323 }
324
325 gpu.state.fb_dirty = 1;
8dd855cd 326
327 switch (cmd) {
1ab64c54 328 case 0x00:
6e9bdaef 329 do_reset();
1ab64c54 330 break;
48f3d210 331 case 0x01:
332 do_cmd_reset();
333 break;
1ab64c54 334 case 0x03:
5bbe183f 335 if (data & 1) {
61124a6d 336 gpu.status |= PSX_GPU_STATUS_BLANKING;
5bbe183f 337 gpu.state.dims_changed = 1; // for hud clearing
338 }
61124a6d
PC
339 else
340 gpu.status &= ~PSX_GPU_STATUS_BLANKING;
1ab64c54
GI
341 break;
342 case 0x04:
61124a6d
PC
343 gpu.status &= ~PSX_GPU_STATUS_DMA_MASK;
344 gpu.status |= PSX_GPU_STATUS_DMA(data & 3);
1ab64c54
GI
345 break;
346 case 0x05:
9a864a8f 347 src_x = data & 0x3ff; src_y = (data >> 10) & 0x1ff;
348 if (src_x != gpu.screen.src_x || src_y != gpu.screen.src_y) {
349 gpu.screen.src_x = src_x;
350 gpu.screen.src_y = src_y;
351 renderer_notify_scanout_change(src_x, src_y);
352 if (gpu.frameskip.set) {
353 decide_frameskip_allow(gpu.ex_regs[3]);
354 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
355 decide_frameskip();
356 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
357 }
9fe27e25 358 }
fb4c6fba 359 }
1ab64c54 360 break;
8dd855cd 361 case 0x06:
362 gpu.screen.x1 = data & 0xfff;
363 gpu.screen.x2 = (data >> 12) & 0xfff;
364 update_width();
365 break;
1ab64c54
GI
366 case 0x07:
367 gpu.screen.y1 = data & 0x3ff;
368 gpu.screen.y2 = (data >> 10) & 0x3ff;
8dd855cd 369 update_height();
1ab64c54
GI
370 break;
371 case 0x08:
61124a6d 372 gpu.status = (gpu.status & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
8dd855cd 373 update_width();
374 update_height();
e929dec5 375 renderer_notify_res_change();
1ab64c54 376 break;
deb18d24 377 default:
378 if ((cmd & 0xf0) == 0x10)
379 get_gpu_info(data);
6e9bdaef 380 break;
1ab64c54 381 }
7890a708 382
383#ifdef GPUwriteStatus_ext
384 GPUwriteStatus_ext(data);
385#endif
1ab64c54
GI
386}
387
56f08d83 388const unsigned char cmd_lengths[256] =
1ab64c54 389{
d30279e2
GI
390 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
391 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
392 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
393 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
652c6b8b 394 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
395 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
396 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
d30279e2 397 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
36da9c13 398 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 80
399 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
400 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // a0
401 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
402 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // c0
403 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
d30279e2
GI
404 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
405 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
406};
407
d30279e2
GI
408#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
409
36da9c13 410static void cpy_msb(uint16_t *dst, const uint16_t *src, int l, uint16_t msb)
411{
412 int i;
413 for (i = 0; i < l; i++)
414 dst[i] = src[i] | msb;
415}
416
417static inline void do_vram_line(int x, int y, uint16_t *mem, int l,
418 int is_read, uint16_t msb)
1ab64c54 419{
d30279e2 420 uint16_t *vram = VRAM_MEM_XY(x, y);
36da9c13 421 if (unlikely(is_read))
d30279e2 422 memcpy(mem, vram, l * 2);
36da9c13 423 else if (unlikely(msb))
424 cpy_msb(vram, mem, l, msb);
d30279e2
GI
425 else
426 memcpy(vram, mem, l * 2);
427}
428
429static int do_vram_io(uint32_t *data, int count, int is_read)
430{
431 int count_initial = count;
36da9c13 432 uint16_t msb = gpu.ex_regs[6] << 15;
d30279e2
GI
433 uint16_t *sdata = (uint16_t *)data;
434 int x = gpu.dma.x, y = gpu.dma.y;
435 int w = gpu.dma.w, h = gpu.dma.h;
ddd56f6e 436 int o = gpu.dma.offset;
d30279e2
GI
437 int l;
438 count *= 2; // operate in 16bpp pixels
439
c765eb86
JW
440 renderer_sync();
441
d30279e2
GI
442 if (gpu.dma.offset) {
443 l = w - gpu.dma.offset;
ddd56f6e 444 if (count < l)
d30279e2 445 l = count;
ddd56f6e 446
36da9c13 447 do_vram_line(x + o, y, sdata, l, is_read, msb);
ddd56f6e 448
449 if (o + l < w)
450 o += l;
451 else {
452 o = 0;
453 y++;
454 h--;
455 }
d30279e2
GI
456 sdata += l;
457 count -= l;
d30279e2
GI
458 }
459
460 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
461 y &= 511;
36da9c13 462 do_vram_line(x, y, sdata, w, is_read, msb);
d30279e2
GI
463 }
464
05740673 465 if (h > 0) {
466 if (count > 0) {
467 y &= 511;
36da9c13 468 do_vram_line(x, y, sdata, count, is_read, msb);
05740673 469 o = count;
470 count = 0;
471 }
d30279e2 472 }
05740673 473 else
474 finish_vram_transfer(is_read);
d30279e2
GI
475 gpu.dma.y = y;
476 gpu.dma.h = h;
ddd56f6e 477 gpu.dma.offset = o;
d30279e2 478
6e9bdaef 479 return count_initial - count / 2;
d30279e2
GI
480}
481
482static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
483{
ddd56f6e 484 if (gpu.dma.h)
485 log_anomaly("start_vram_transfer while old unfinished\n");
486
5440b88e 487 gpu.dma.x = pos_word & 0x3ff;
488 gpu.dma.y = (pos_word >> 16) & 0x1ff;
48f3d210 489 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
490 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
d30279e2 491 gpu.dma.offset = 0;
05740673 492 gpu.dma.is_read = is_read;
493 gpu.dma_start = gpu.dma;
d30279e2 494
9e146206 495 renderer_flush_queues();
496 if (is_read) {
61124a6d 497 gpu.status |= PSX_GPU_STATUS_IMG;
9e146206 498 // XXX: wrong for width 1
495d603c 499 gpu.gp0 = LE32TOH(*(uint32_t *) VRAM_MEM_XY(gpu.dma.x, gpu.dma.y));
5440b88e 500 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
9e146206 501 }
d30279e2 502
6e9bdaef 503 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
504 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
1328fa32 505 if (gpu.gpu_state_change)
506 gpu.gpu_state_change(PGS_VRAM_TRANSFER_START);
d30279e2
GI
507}
508
05740673 509static void finish_vram_transfer(int is_read)
510{
511 if (is_read)
61124a6d 512 gpu.status &= ~PSX_GPU_STATUS_IMG;
b30fba56 513 else {
514 gpu.state.fb_dirty = 1;
05740673 515 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
3b7b0065 516 gpu.dma_start.w, gpu.dma_start.h, 0);
b30fba56 517 }
1328fa32 518 if (gpu.gpu_state_change)
519 gpu.gpu_state_change(PGS_VRAM_TRANSFER_END);
05740673 520}
521
f99193c2 522static void do_vram_copy(const uint32_t *params, int *cpu_cycles)
36da9c13 523{
524 const uint32_t sx = LE32TOH(params[0]) & 0x3FF;
525 const uint32_t sy = (LE32TOH(params[0]) >> 16) & 0x1FF;
526 const uint32_t dx = LE32TOH(params[1]) & 0x3FF;
527 const uint32_t dy = (LE32TOH(params[1]) >> 16) & 0x1FF;
528 uint32_t w = ((LE32TOH(params[2]) - 1) & 0x3FF) + 1;
529 uint32_t h = (((LE32TOH(params[2]) >> 16) - 1) & 0x1FF) + 1;
530 uint16_t msb = gpu.ex_regs[6] << 15;
531 uint16_t lbuf[128];
532 uint32_t x, y;
533
f99193c2 534 *cpu_cycles += gput_copy(w, h);
36da9c13 535 if (sx == dx && sy == dy && msb == 0)
536 return;
537
538 renderer_flush_queues();
539
540 if (unlikely((sx < dx && dx < sx + w) || sx + w > 1024 || dx + w > 1024 || msb))
541 {
542 for (y = 0; y < h; y++)
543 {
544 const uint16_t *src = VRAM_MEM_XY(0, (sy + y) & 0x1ff);
545 uint16_t *dst = VRAM_MEM_XY(0, (dy + y) & 0x1ff);
546 for (x = 0; x < w; x += ARRAY_SIZE(lbuf))
547 {
548 uint32_t x1, w1 = w - x;
549 if (w1 > ARRAY_SIZE(lbuf))
550 w1 = ARRAY_SIZE(lbuf);
551 for (x1 = 0; x1 < w1; x1++)
552 lbuf[x1] = src[(sx + x + x1) & 0x3ff];
553 for (x1 = 0; x1 < w1; x1++)
554 dst[(dx + x + x1) & 0x3ff] = lbuf[x1] | msb;
555 }
556 }
557 }
558 else
559 {
560 uint32_t sy1 = sy, dy1 = dy;
561 for (y = 0; y < h; y++, sy1++, dy1++)
562 memcpy(VRAM_MEM_XY(dx, dy1 & 0x1ff), VRAM_MEM_XY(sx, sy1 & 0x1ff), w * 2);
563 }
564
565 renderer_update_caches(dx, dy, w, h, 0);
566}
567
b243416b 568static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
569{
f99193c2 570 int cmd = 0, pos = 0, len, dummy = 0, v;
b243416b 571 int skip = 1;
572
fbb4bfff 573 gpu.frameskip.pending_fill[0] = 0;
574
b243416b 575 while (pos < count && skip) {
576 uint32_t *list = data + pos;
db215a72 577 cmd = LE32TOH(list[0]) >> 24;
b243416b 578 len = 1 + cmd_lengths[cmd];
579
97e07db9 580 switch (cmd) {
581 case 0x02:
db215a72 582 if ((LE32TOH(list[2]) & 0x3ff) > gpu.screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu.screen.h)
97e07db9 583 // clearing something large, don't skip
d02ab9fc 584 do_cmd_list(list, 3, &dummy, &dummy, &dummy);
97e07db9 585 else
586 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
587 break;
588 case 0x24 ... 0x27:
589 case 0x2c ... 0x2f:
590 case 0x34 ... 0x37:
591 case 0x3c ... 0x3f:
592 gpu.ex_regs[1] &= ~0x1ff;
db215a72 593 gpu.ex_regs[1] |= LE32TOH(list[4 + ((cmd >> 4) & 1)]) & 0x1ff;
97e07db9 594 break;
595 case 0x48 ... 0x4F:
596 for (v = 3; pos + v < count; v++)
597 {
db215a72 598 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
97e07db9 599 break;
600 }
601 len += v - 3;
602 break;
603 case 0x58 ... 0x5F:
604 for (v = 4; pos + v < count; v += 2)
605 {
db215a72 606 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
97e07db9 607 break;
608 }
609 len += v - 4;
610 break;
611 default:
612 if (cmd == 0xe3)
db215a72 613 skip = decide_frameskip_allow(LE32TOH(list[0]));
97e07db9 614 if ((cmd & 0xf8) == 0xe0)
db215a72 615 gpu.ex_regs[cmd & 7] = LE32TOH(list[0]);
97e07db9 616 break;
b243416b 617 }
b243416b 618
619 if (pos + len > count) {
620 cmd = -1;
621 break; // incomplete cmd
622 }
36da9c13 623 if (0x80 <= cmd && cmd <= 0xdf)
b243416b 624 break; // image i/o
97e07db9 625
b243416b 626 pos += len;
627 }
628
629 renderer_sync_ecmds(gpu.ex_regs);
630 *last_cmd = cmd;
631 return pos;
632}
633
d02ab9fc 634static noinline int do_cmd_buffer(uint32_t *data, int count,
635 int *cycles_sum, int *cycles_last)
d30279e2 636{
b243416b 637 int cmd, pos;
638 uint32_t old_e3 = gpu.ex_regs[3];
fc84f618 639 int vram_dirty = 0;
d30279e2 640
d30279e2 641 // process buffer
b243416b 642 for (pos = 0; pos < count; )
d30279e2 643 {
b243416b 644 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
645 vram_dirty = 1;
d30279e2 646 pos += do_vram_io(data + pos, count - pos, 0);
ddd56f6e 647 if (pos == count)
648 break;
d30279e2
GI
649 }
650
db215a72 651 cmd = LE32TOH(data[pos]) >> 24;
97e07db9 652 if (0xa0 <= cmd && cmd <= 0xdf) {
79573c20
DS
653 if (unlikely((pos+2) >= count)) {
654 // incomplete vram write/read cmd, can't consume yet
655 cmd = -1;
656 break;
657 }
658
d30279e2 659 // consume vram write/read cmd
db215a72 660 start_vram_transfer(LE32TOH(data[pos + 1]), LE32TOH(data[pos + 2]), (cmd & 0xe0) == 0xc0);
b243416b 661 pos += 3;
662 continue;
d30279e2 663 }
36da9c13 664 else if ((cmd & 0xe0) == 0x80) {
665 if (unlikely((pos+3) >= count)) {
666 cmd = -1; // incomplete cmd, can't consume yet
667 break;
668 }
d02ab9fc 669 *cycles_sum += *cycles_last;
670 *cycles_last = 0;
671 do_vram_copy(data + pos + 1, cycles_last);
b30fba56 672 vram_dirty = 1;
36da9c13 673 pos += 4;
674 continue;
675 }
c296224f 676 else if (cmd == 0x1f) {
677 log_anomaly("irq1?\n");
678 pos++;
679 continue;
680 }
b243416b 681
1e07f71d 682 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
db215a72 683 if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0))
b243416b 684 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
685 else {
d02ab9fc 686 pos += do_cmd_list(data + pos, count - pos, cycles_sum, cycles_last, &cmd);
b243416b 687 vram_dirty = 1;
688 }
689
690 if (cmd == -1)
691 // incomplete cmd
ddd56f6e 692 break;
d30279e2 693 }
ddd56f6e 694
61124a6d
PC
695 gpu.status &= ~0x1fff;
696 gpu.status |= gpu.ex_regs[1] & 0x7ff;
697 gpu.status |= (gpu.ex_regs[6] & 3) << 11;
a3a9f519 698
fc84f618 699 gpu.state.fb_dirty |= vram_dirty;
700
b243416b 701 if (old_e3 != gpu.ex_regs[3])
702 decide_frameskip_allow(gpu.ex_regs[3]);
703
ddd56f6e 704 return count - pos;
d30279e2
GI
705}
706
1328fa32 707static noinline void flush_cmd_buffer(void)
d30279e2 708{
f99193c2 709 int dummy = 0, left;
d02ab9fc 710 left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len, &dummy, &dummy);
d30279e2
GI
711 if (left > 0)
712 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
1328fa32 713 if (left != gpu.cmd_len) {
714 if (!gpu.dma.h && gpu.gpu_state_change)
715 gpu.gpu_state_change(PGS_PRIMITIVE_START);
716 gpu.cmd_len = left;
717 }
1ab64c54
GI
718}
719
720void GPUwriteDataMem(uint32_t *mem, int count)
721{
f99193c2 722 int dummy = 0, left;
d30279e2 723
56f08d83 724 log_io("gpu_dma_write %p %d\n", mem, count);
725
d30279e2
GI
726 if (unlikely(gpu.cmd_len > 0))
727 flush_cmd_buffer();
56f08d83 728
d02ab9fc 729 left = do_cmd_buffer(mem, count, &dummy, &dummy);
d30279e2 730 if (left)
56f08d83 731 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
1ab64c54
GI
732}
733
d30279e2 734void GPUwriteData(uint32_t data)
1ab64c54 735{
56f08d83 736 log_io("gpu_write %08x\n", data);
db215a72 737 gpu.cmd_buffer[gpu.cmd_len++] = HTOLE32(data);
d30279e2
GI
738 if (gpu.cmd_len >= CMD_BUFFER_LEN)
739 flush_cmd_buffer();
1ab64c54
GI
740}
741
d02ab9fc 742long GPUdmaChain(uint32_t *rambase, uint32_t start_addr,
743 uint32_t *progress_addr, int32_t *cycles_last_cmd)
1ab64c54 744{
09159d99 745 uint32_t addr, *list, ld_addr = 0;
ddd56f6e 746 int len, left, count;
d02ab9fc 747 int cpu_cycles_sum = 0;
748 int cpu_cycles_last = 0;
d30279e2 749
8f5f2dd5 750 preload(rambase + (start_addr & 0x1fffff) / 4);
751
d30279e2
GI
752 if (unlikely(gpu.cmd_len > 0))
753 flush_cmd_buffer();
754
56f08d83 755 log_io("gpu_dma_chain\n");
ddd56f6e 756 addr = start_addr & 0xffffff;
09159d99 757 for (count = 0; (addr & 0x800000) == 0; count++)
ddd56f6e 758 {
ddd56f6e 759 list = rambase + (addr & 0x1fffff) / 4;
db215a72
PC
760 len = LE32TOH(list[0]) >> 24;
761 addr = LE32TOH(list[0]) & 0xffffff;
8f5f2dd5 762 preload(rambase + (addr & 0x1fffff) / 4);
763
d02ab9fc 764 cpu_cycles_sum += 10;
1c72b1c2 765 if (len > 0)
d02ab9fc 766 cpu_cycles_sum += 5 + len;
deb18d24 767
d02ab9fc 768 log_io(".chain %08lx #%d+%d %u+%u\n",
769 (long)(list - rambase) * 4, len, gpu.cmd_len, cpu_cycles_sum, cpu_cycles_last);
a4e249a1 770 if (unlikely(gpu.cmd_len > 0)) {
81ff42e1 771 if (gpu.cmd_len + len > ARRAY_SIZE(gpu.cmd_buffer)) {
772 log_anomaly("cmd_buffer overflow, likely garbage commands\n");
773 gpu.cmd_len = 0;
774 }
a4e249a1 775 memcpy(gpu.cmd_buffer + gpu.cmd_len, list + 1, len * 4);
776 gpu.cmd_len += len;
777 flush_cmd_buffer();
778 continue;
779 }
ddd56f6e 780
56f08d83 781 if (len) {
d02ab9fc 782 left = do_cmd_buffer(list + 1, len, &cpu_cycles_sum, &cpu_cycles_last);
a4e249a1 783 if (left) {
784 memcpy(gpu.cmd_buffer, list + 1 + len - left, left * 4);
785 gpu.cmd_len = left;
786 log_anomaly("GPUdmaChain: %d/%d words left\n", left, len);
787 }
56f08d83 788 }
ddd56f6e 789
fae38d7a 790 if (progress_addr) {
791 *progress_addr = addr;
792 break;
793 }
09159d99 794 #define LD_THRESHOLD (8*1024)
795 if (count >= LD_THRESHOLD) {
796 if (count == LD_THRESHOLD) {
797 ld_addr = addr;
798 continue;
799 }
800
801 // loop detection marker
802 // (bit23 set causes DMA error on real machine, so
803 // unlikely to be ever set by the game)
db215a72 804 list[0] |= HTOLE32(0x800000);
09159d99 805 }
ddd56f6e 806 }
807
09159d99 808 if (ld_addr != 0) {
809 // remove loop detection markers
810 count -= LD_THRESHOLD + 2;
811 addr = ld_addr & 0x1fffff;
812 while (count-- > 0) {
813 list = rambase + addr / 4;
db215a72
PC
814 addr = LE32TOH(list[0]) & 0x1fffff;
815 list[0] &= HTOLE32(~0x800000);
09159d99 816 }
d30279e2 817 }
09159d99 818
d02ab9fc 819 //printf(" -> %d %d\n", cpu_cycles_sum, cpu_cycles_last);
3ece2f0c 820 gpu.state.last_list.frame = *gpu.state.frame_count;
deb18d24 821 gpu.state.last_list.hcnt = *gpu.state.hcnt;
d02ab9fc 822 gpu.state.last_list.cycles = cpu_cycles_sum + cpu_cycles_last;
deb18d24 823 gpu.state.last_list.addr = start_addr;
824
d02ab9fc 825 *cycles_last_cmd = cpu_cycles_last;
826 return cpu_cycles_sum;
1ab64c54
GI
827}
828
d30279e2
GI
829void GPUreadDataMem(uint32_t *mem, int count)
830{
56f08d83 831 log_io("gpu_dma_read %p %d\n", mem, count);
832
d30279e2
GI
833 if (unlikely(gpu.cmd_len > 0))
834 flush_cmd_buffer();
56f08d83 835
d30279e2
GI
836 if (gpu.dma.h)
837 do_vram_io(mem, count, 1);
838}
839
840uint32_t GPUreadData(void)
841{
9e146206 842 uint32_t ret;
56f08d83 843
844 if (unlikely(gpu.cmd_len > 0))
845 flush_cmd_buffer();
846
9e146206 847 ret = gpu.gp0;
495d603c
PC
848 if (gpu.dma.h) {
849 ret = HTOLE32(ret);
9e146206 850 do_vram_io(&ret, 1, 1);
495d603c
PC
851 ret = LE32TOH(ret);
852 }
56f08d83 853
9e146206 854 log_io("gpu_read %08x\n", ret);
855 return ret;
d30279e2
GI
856}
857
858uint32_t GPUreadStatus(void)
859{
ddd56f6e 860 uint32_t ret;
56f08d83 861
d30279e2
GI
862 if (unlikely(gpu.cmd_len > 0))
863 flush_cmd_buffer();
864
61124a6d 865 ret = gpu.status;
ddd56f6e 866 log_io("gpu_read_status %08x\n", ret);
867 return ret;
d30279e2
GI
868}
869
096ec49b 870struct GPUFreeze
1ab64c54
GI
871{
872 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
873 uint32_t ulStatus; // current gpu status
874 uint32_t ulControl[256]; // latest control register values
875 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
096ec49b 876};
1ab64c54 877
096ec49b 878long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
1ab64c54 879{
fc84f618 880 int i;
881
1ab64c54
GI
882 switch (type) {
883 case 1: // save
d30279e2
GI
884 if (gpu.cmd_len > 0)
885 flush_cmd_buffer();
c765eb86
JW
886
887 renderer_sync();
9ee0fd5b 888 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
1ab64c54 889 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
6e9bdaef 890 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
61124a6d 891 freeze->ulStatus = gpu.status;
1ab64c54
GI
892 break;
893 case 0: // load
c765eb86 894 renderer_sync();
9ee0fd5b 895 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
1ab64c54 896 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
6e9bdaef 897 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
61124a6d 898 gpu.status = freeze->ulStatus;
3d47ef17 899 gpu.cmd_len = 0;
fc84f618 900 for (i = 8; i > 0; i--) {
901 gpu.regs[i] ^= 1; // avoid reg change detection
902 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
903 }
5b745e5b 904 renderer_sync_ecmds(gpu.ex_regs);
9a864a8f 905 renderer_update_caches(0, 0, 1024, 512, 0);
1ab64c54
GI
906 break;
907 }
908
909 return 1;
910}
911
5440b88e 912void GPUupdateLace(void)
913{
914 if (gpu.cmd_len > 0)
915 flush_cmd_buffer();
916 renderer_flush_queues();
917
7a20a6d0 918#ifndef RAW_FB_DISPLAY
61124a6d 919 if (gpu.status & PSX_GPU_STATUS_BLANKING) {
aafcb4dd 920 if (!gpu.state.blanked) {
921 vout_blank();
922 gpu.state.blanked = 1;
923 gpu.state.fb_dirty = 1;
924 }
925 return;
926 }
927
c765eb86
JW
928 renderer_notify_update_lace(0);
929
aafcb4dd 930 if (!gpu.state.fb_dirty)
5440b88e 931 return;
7a20a6d0 932#endif
5440b88e 933
934 if (gpu.frameskip.set) {
935 if (!gpu.frameskip.frame_ready) {
936 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
937 return;
938 gpu.frameskip.active = 0;
939 }
940 gpu.frameskip.frame_ready = 0;
941 }
942
943 vout_update();
3b7b0065 944 if (gpu.state.enhancement_active && !gpu.state.enhancement_was_active)
945 renderer_update_caches(0, 0, 1024, 512, 1);
946 gpu.state.enhancement_was_active = gpu.state.enhancement_active;
5440b88e 947 gpu.state.fb_dirty = 0;
aafcb4dd 948 gpu.state.blanked = 0;
c765eb86 949 renderer_notify_update_lace(1);
5440b88e 950}
951
72e5023f 952void GPUvBlank(int is_vblank, int lcf)
953{
5440b88e 954 int interlace = gpu.state.allow_interlace
61124a6d
PC
955 && (gpu.status & PSX_GPU_STATUS_INTERLACE)
956 && (gpu.status & PSX_GPU_STATUS_DHEIGHT);
5440b88e 957 // interlace doesn't look nice on progressive displays,
958 // so we have this "auto" mode here for games that don't read vram
959 if (gpu.state.allow_interlace == 2
960 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
961 {
962 interlace = 0;
963 }
964 if (interlace || interlace != gpu.state.old_interlace) {
965 gpu.state.old_interlace = interlace;
966
967 if (gpu.cmd_len > 0)
968 flush_cmd_buffer();
969 renderer_flush_queues();
970 renderer_set_interlace(interlace, !lcf);
971 }
972}
973
80bc1426 974void GPUgetScreenInfo(int *y, int *base_hres)
975{
976 *y = gpu.screen.y;
977 *base_hres = gpu.screen.vres;
978 if (gpu.status & PSX_GPU_STATUS_DHEIGHT)
979 *base_hres >>= 1;
980}
981
5440b88e 982void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
983{
984 gpu.frameskip.set = cbs->frameskip;
985 gpu.frameskip.advice = &cbs->fskip_advice;
5eaa13f1 986 gpu.frameskip.force = &cbs->fskip_force;
5bbe183f 987 gpu.frameskip.dirty = (void *)&cbs->fskip_dirty;
5440b88e 988 gpu.frameskip.active = 0;
989 gpu.frameskip.frame_ready = 1;
990 gpu.state.hcnt = cbs->gpu_hcnt;
991 gpu.state.frame_count = cbs->gpu_frame_count;
992 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
0b02eb77 993 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
b3ff74ba 994 gpu.state.screen_centering_type_default = cbs->screen_centering_type_default;
5bbe183f 995 if (gpu.state.screen_centering_type != cbs->screen_centering_type
996 || gpu.state.screen_centering_x != cbs->screen_centering_x
997 || gpu.state.screen_centering_y != cbs->screen_centering_y) {
998 gpu.state.screen_centering_type = cbs->screen_centering_type;
999 gpu.state.screen_centering_x = cbs->screen_centering_x;
1000 gpu.state.screen_centering_y = cbs->screen_centering_y;
1001 update_width();
1002 update_height();
1003 }
5440b88e 1004
9ee0fd5b 1005 gpu.mmap = cbs->mmap;
1006 gpu.munmap = cbs->munmap;
1328fa32 1007 gpu.gpu_state_change = cbs->gpu_state_change;
9ee0fd5b 1008
1009 // delayed vram mmap
1010 if (gpu.vram == NULL)
1011 map_vram();
1012
5440b88e 1013 if (cbs->pl_vout_set_raw_vram)
1014 cbs->pl_vout_set_raw_vram(gpu.vram);
1015 renderer_set_config(cbs);
1016 vout_set_config(cbs);
72e5023f 1017}
1018
1ab64c54 1019// vim:shiftwidth=2:expandtab