drc: try to unbreak 3ds
[pcsx_rearmed.git] / plugins / gpulib / gpu.c
CommitLineData
1ab64c54 1/*
05740673 2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
1ab64c54
GI
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
d30279e2 11#include <stdio.h>
5bbe183f 12#include <stdlib.h>
1ab64c54 13#include <string.h>
12367ad0 14#include <stdlib.h> /* for calloc */
15
56f08d83 16#include "gpu.h"
f99193c2 17#include "gpu_timing.h"
1328fa32 18#include "../../libpcsxcore/gpu.h" // meh
8f8ade9c 19#include "../../frontend/plugin_lib.h"
1ab64c54 20
8f8ade9c 21#ifndef ARRAY_SIZE
1ab64c54 22#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
8f8ade9c 23#endif
8f5f2dd5 24#ifdef __GNUC__
d30279e2 25#define unlikely(x) __builtin_expect((x), 0)
8f5f2dd5 26#define preload __builtin_prefetch
8dd855cd 27#define noinline __attribute__((noinline))
8f5f2dd5 28#else
29#define unlikely(x)
30#define preload(...)
31#define noinline
8f5f2dd5 32#endif
1ab64c54 33
deb18d24 34//#define log_io gpu_log
56f08d83 35#define log_io(...)
56f08d83 36
9ee0fd5b 37struct psx_gpu gpu;
1ab64c54 38
d02ab9fc 39static noinline int do_cmd_buffer(uint32_t *data, int count,
40 int *cycles_sum, int *cycles_last);
05740673 41static void finish_vram_transfer(int is_read);
48f3d210 42
43static noinline void do_cmd_reset(void)
44{
f99193c2 45 int dummy = 0;
c765eb86 46 renderer_sync();
48f3d210 47 if (unlikely(gpu.cmd_len > 0))
d02ab9fc 48 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len, &dummy, &dummy);
48f3d210 49 gpu.cmd_len = 0;
05740673 50
51 if (unlikely(gpu.dma.h > 0))
52 finish_vram_transfer(gpu.dma_start.is_read);
48f3d210 53 gpu.dma.h = 0;
54}
55
6e9bdaef 56static noinline void do_reset(void)
1ab64c54 57{
7841712d 58 unsigned int i;
5b568098 59
48f3d210 60 do_cmd_reset();
61
6e9bdaef 62 memset(gpu.regs, 0, sizeof(gpu.regs));
48f3d210 63 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
64 gpu.ex_regs[i] = (0xe0 + i) << 24;
61124a6d 65 gpu.status = 0x14802000;
6e9bdaef 66 gpu.gp0 = 0;
fc84f618 67 gpu.regs[3] = 1;
6e9bdaef 68 gpu.screen.hres = gpu.screen.w = 256;
fc84f618 69 gpu.screen.vres = gpu.screen.h = 240;
5bbe183f 70 gpu.screen.x = gpu.screen.y = 0;
01ff3105 71 renderer_sync_ecmds(gpu.ex_regs);
3b7b0065 72 renderer_notify_res_change();
1ab64c54
GI
73}
74
8dd855cd 75static noinline void update_width(void)
76{
5bbe183f 77 static const short hres_all[8] = { 256, 368, 320, 368, 512, 368, 640, 368 };
78 static const uint8_t hdivs[8] = { 10, 7, 8, 7, 5, 7, 4, 7 };
79 uint8_t hdiv = hdivs[(gpu.status >> 16) & 7];
80 int hres = hres_all[(gpu.status >> 16) & 7];
81 int pal = gpu.status & PSX_GPU_STATUS_PAL;
8dd855cd 82 int sw = gpu.screen.x2 - gpu.screen.x1;
b3ff74ba 83 int type = gpu.state.screen_centering_type;
5bbe183f 84 int x = 0, x_auto;
b3ff74ba 85 if (type == C_AUTO)
86 type = gpu.state.screen_centering_type_default;
5bbe183f 87 if (sw <= 0)
88 /* nothing displayed? */;
89 else {
90 int s = pal ? 656 : 608; // or 600? pal is just a guess
91 x = (gpu.screen.x1 - s) / hdiv;
92 x = (x + 1) & ~1; // blitter limitation
93 sw /= hdiv;
94 sw = (sw + 2) & ~3; // according to nocash
b3ff74ba 95 switch (type) {
8f8ade9c 96 case C_INGAME:
5bbe183f 97 break;
8f8ade9c 98 case C_MANUAL:
5bbe183f 99 x = gpu.state.screen_centering_x;
100 break;
101 default:
102 // correct if slightly miscentered
103 x_auto = (hres - sw) / 2 & ~3;
104 if ((uint32_t)x_auto <= 8u && abs(x) < 24)
105 x = x_auto;
106 }
107 if (x + sw > hres)
108 sw = hres - x;
109 // .x range check is done in vout_update()
110 }
111 // reduce the unpleasant right border that a few games have
112 if (gpu.state.screen_centering_type == 0
113 && x <= 4 && hres - (x + sw) >= 4)
114 hres -= 4;
115 gpu.screen.x = x;
116 gpu.screen.w = sw;
117 gpu.screen.hres = hres;
118 gpu.state.dims_changed = 1;
119 //printf("xx %d %d -> %2d, %d / %d\n",
120 // gpu.screen.x1, gpu.screen.x2, x, sw, hres);
8dd855cd 121}
122
123static noinline void update_height(void)
124{
5bbe183f 125 int pal = gpu.status & PSX_GPU_STATUS_PAL;
126 int dheight = gpu.status & PSX_GPU_STATUS_DHEIGHT;
127 int y = gpu.screen.y1 - (pal ? 39 : 16); // 39 for spyro
8dd855cd 128 int sh = gpu.screen.y2 - gpu.screen.y1;
5bbe183f 129 int center_tol = 16;
130 int vres = 240;
131
132 if (pal && (sh > 240 || gpu.screen.vres == 256))
133 vres = 256;
134 if (dheight)
135 y *= 2, sh *= 2, vres *= 2, center_tol *= 2;
136 if (sh <= 0)
137 /* nothing displayed? */;
138 else {
139 switch (gpu.state.screen_centering_type) {
8f8ade9c 140 case C_INGAME:
141 break;
142 case C_BORDERLESS:
143 y = 0;
5bbe183f 144 break;
8f8ade9c 145 case C_MANUAL:
5bbe183f 146 y = gpu.state.screen_centering_y;
147 break;
148 default:
149 // correct if slightly miscentered
150 if ((uint32_t)(vres - sh) <= 1 && abs(y) <= center_tol)
151 y = 0;
152 }
153 if (y + sh > vres)
154 sh = vres - y;
155 }
156 gpu.screen.y = y;
8dd855cd 157 gpu.screen.h = sh;
5bbe183f 158 gpu.screen.vres = vres;
159 gpu.state.dims_changed = 1;
160 //printf("yy %d %d -> %d, %d / %d\n",
161 // gpu.screen.y1, gpu.screen.y2, y, sh, vres);
8dd855cd 162}
163
fc84f618 164static noinline void decide_frameskip(void)
165{
5eaa13f1
A
166 *gpu.frameskip.dirty = 1;
167
9fe27e25 168 if (gpu.frameskip.active)
169 gpu.frameskip.cnt++;
170 else {
171 gpu.frameskip.cnt = 0;
172 gpu.frameskip.frame_ready = 1;
173 }
fc84f618 174
5eaa13f1
A
175 if (*gpu.frameskip.force)
176 gpu.frameskip.active = 1;
177 else if (!gpu.frameskip.active && *gpu.frameskip.advice)
9fe27e25 178 gpu.frameskip.active = 1;
179 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
fc84f618 180 gpu.frameskip.active = 1;
181 else
182 gpu.frameskip.active = 0;
fbb4bfff 183
184 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
f99193c2 185 int dummy = 0;
d02ab9fc 186 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy, &dummy, &dummy);
fbb4bfff 187 gpu.frameskip.pending_fill[0] = 0;
188 }
fc84f618 189}
190
b243416b 191static noinline int decide_frameskip_allow(uint32_t cmd_e3)
9fe27e25 192{
193 // no frameskip if it decides to draw to display area,
194 // but not for interlace since it'll most likely always do that
195 uint32_t x = cmd_e3 & 0x3ff;
196 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
61124a6d 197 gpu.frameskip.allow = (gpu.status & PSX_GPU_STATUS_INTERLACE) ||
5bbe183f 198 (uint32_t)(x - gpu.screen.src_x) >= (uint32_t)gpu.screen.w ||
199 (uint32_t)(y - gpu.screen.src_y) >= (uint32_t)gpu.screen.h;
b243416b 200 return gpu.frameskip.allow;
9fe27e25 201}
202
01ff3105 203static void flush_cmd_buffer(void);
204
6e9bdaef 205static noinline void get_gpu_info(uint32_t data)
206{
01ff3105 207 if (unlikely(gpu.cmd_len > 0))
208 flush_cmd_buffer();
6e9bdaef 209 switch (data & 0x0f) {
210 case 0x02:
211 case 0x03:
212 case 0x04:
6e9bdaef 213 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
214 break;
08b33377 215 case 0x05:
216 gpu.gp0 = gpu.ex_regs[5] & 0x3fffff;
6e9bdaef 217 break;
218 case 0x07:
219 gpu.gp0 = 2;
220 break;
221 default:
08b33377 222 // gpu.gp0 unchanged
6e9bdaef 223 break;
224 }
225}
226
5bd33f52 227#ifndef max
228#define max(a, b) (((a) > (b)) ? (a) : (b))
229#endif
12367ad0 230
231// Minimum 16-byte VRAM alignment needed by gpu_unai's pixel-skipping
232// renderer/downscaler it uses in high res modes:
233#ifdef GCW_ZERO
234 // On GCW platform (MIPS), align to 8192 bytes (1 TLB entry) to reduce # of
235 // fills. (Will change this value if it ever gets large page support)
236 #define VRAM_ALIGN 8192
237#else
238 #define VRAM_ALIGN 16
239#endif
240
5bd33f52 241// double, for overdraw guard + at least 1 page before
242#define VRAM_SIZE ((1024 * 512 * 2 * 2) + max(VRAM_ALIGN, 4096))
243
12367ad0 244// vram ptr received from mmap/malloc/alloc (will deallocate using this)
245static uint16_t *vram_ptr_orig = NULL;
9ee0fd5b 246
e34ef5ac 247#ifndef GPULIB_USE_MMAP
248# ifdef __linux__
249# define GPULIB_USE_MMAP 1
250# else
251# define GPULIB_USE_MMAP 0
252# endif
253#endif
9ee0fd5b 254static int map_vram(void)
255{
e34ef5ac 256#if GPULIB_USE_MMAP
5bd33f52 257 gpu.vram = vram_ptr_orig = gpu.mmap(VRAM_SIZE);
e34ef5ac 258#else
5bd33f52 259 gpu.vram = vram_ptr_orig = calloc(VRAM_SIZE, 1);
e34ef5ac 260#endif
261 if (gpu.vram != NULL && gpu.vram != (void *)(intptr_t)-1) {
262 // 4kb guard in front
12367ad0 263 gpu.vram += (4096 / 2);
e34ef5ac 264 // Align
265 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
9ee0fd5b 266 return 0;
267 }
268 else {
269 fprintf(stderr, "could not map vram, expect crashes\n");
270 return -1;
271 }
272}
273
6e9bdaef 274long GPUinit(void)
275{
9394ada5 276 int ret;
277 ret = vout_init();
278 ret |= renderer_init();
279
3b7b0065 280 memset(&gpu.state, 0, sizeof(gpu.state));
281 memset(&gpu.frameskip, 0, sizeof(gpu.frameskip));
282 gpu.zero = 0;
3ece2f0c 283 gpu.state.frame_count = &gpu.zero;
deb18d24 284 gpu.state.hcnt = &gpu.zero;
48f3d210 285 gpu.cmd_len = 0;
9394ada5 286 do_reset();
48f3d210 287
12367ad0 288 /*if (gpu.mmap != NULL) {
9ee0fd5b 289 if (map_vram() != 0)
290 ret = -1;
12367ad0 291 }*/
6e9bdaef 292 return ret;
293}
294
295long GPUshutdown(void)
296{
9ee0fd5b 297 long ret;
298
e929dec5 299 renderer_finish();
9ee0fd5b 300 ret = vout_finish();
12367ad0 301
302 if (vram_ptr_orig != NULL) {
e34ef5ac 303#if GPULIB_USE_MMAP
12367ad0 304 gpu.munmap(vram_ptr_orig, VRAM_SIZE);
305#else
306 free(vram_ptr_orig);
307#endif
9ee0fd5b 308 }
12367ad0 309 vram_ptr_orig = gpu.vram = NULL;
9ee0fd5b 310
311 return ret;
6e9bdaef 312}
313
1ab64c54
GI
314void GPUwriteStatus(uint32_t data)
315{
1ab64c54 316 uint32_t cmd = data >> 24;
9a864a8f 317 int src_x, src_y;
1ab64c54 318
fc84f618 319 if (cmd < ARRAY_SIZE(gpu.regs)) {
48f3d210 320 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
fc84f618 321 return;
8dd855cd 322 gpu.regs[cmd] = data;
fc84f618 323 }
324
325 gpu.state.fb_dirty = 1;
8dd855cd 326
327 switch (cmd) {
1ab64c54 328 case 0x00:
6e9bdaef 329 do_reset();
1ab64c54 330 break;
48f3d210 331 case 0x01:
332 do_cmd_reset();
333 break;
1ab64c54 334 case 0x03:
5bbe183f 335 if (data & 1) {
61124a6d 336 gpu.status |= PSX_GPU_STATUS_BLANKING;
5bbe183f 337 gpu.state.dims_changed = 1; // for hud clearing
338 }
61124a6d
PC
339 else
340 gpu.status &= ~PSX_GPU_STATUS_BLANKING;
1ab64c54
GI
341 break;
342 case 0x04:
61124a6d
PC
343 gpu.status &= ~PSX_GPU_STATUS_DMA_MASK;
344 gpu.status |= PSX_GPU_STATUS_DMA(data & 3);
1ab64c54
GI
345 break;
346 case 0x05:
9a864a8f 347 src_x = data & 0x3ff; src_y = (data >> 10) & 0x1ff;
348 if (src_x != gpu.screen.src_x || src_y != gpu.screen.src_y) {
349 gpu.screen.src_x = src_x;
350 gpu.screen.src_y = src_y;
351 renderer_notify_scanout_change(src_x, src_y);
352 if (gpu.frameskip.set) {
353 decide_frameskip_allow(gpu.ex_regs[3]);
354 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
355 decide_frameskip();
356 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
357 }
9fe27e25 358 }
fb4c6fba 359 }
1ab64c54 360 break;
8dd855cd 361 case 0x06:
362 gpu.screen.x1 = data & 0xfff;
363 gpu.screen.x2 = (data >> 12) & 0xfff;
364 update_width();
365 break;
1ab64c54
GI
366 case 0x07:
367 gpu.screen.y1 = data & 0x3ff;
368 gpu.screen.y2 = (data >> 10) & 0x3ff;
8dd855cd 369 update_height();
1ab64c54
GI
370 break;
371 case 0x08:
61124a6d 372 gpu.status = (gpu.status & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
8dd855cd 373 update_width();
374 update_height();
e929dec5 375 renderer_notify_res_change();
1ab64c54 376 break;
deb18d24 377 default:
378 if ((cmd & 0xf0) == 0x10)
379 get_gpu_info(data);
6e9bdaef 380 break;
1ab64c54 381 }
7890a708 382
383#ifdef GPUwriteStatus_ext
384 GPUwriteStatus_ext(data);
385#endif
1ab64c54
GI
386}
387
56f08d83 388const unsigned char cmd_lengths[256] =
1ab64c54 389{
d30279e2
GI
390 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
391 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
392 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
393 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
652c6b8b 394 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
395 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
396 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
d30279e2 397 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
36da9c13 398 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 80
399 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
400 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // a0
401 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
402 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // c0
403 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
d30279e2
GI
404 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
405 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
406};
407
d30279e2
GI
408#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
409
36da9c13 410static void cpy_msb(uint16_t *dst, const uint16_t *src, int l, uint16_t msb)
411{
412 int i;
413 for (i = 0; i < l; i++)
414 dst[i] = src[i] | msb;
415}
416
417static inline void do_vram_line(int x, int y, uint16_t *mem, int l,
418 int is_read, uint16_t msb)
1ab64c54 419{
d30279e2 420 uint16_t *vram = VRAM_MEM_XY(x, y);
36da9c13 421 if (unlikely(is_read))
d30279e2 422 memcpy(mem, vram, l * 2);
36da9c13 423 else if (unlikely(msb))
424 cpy_msb(vram, mem, l, msb);
d30279e2
GI
425 else
426 memcpy(vram, mem, l * 2);
427}
428
429static int do_vram_io(uint32_t *data, int count, int is_read)
430{
431 int count_initial = count;
36da9c13 432 uint16_t msb = gpu.ex_regs[6] << 15;
d30279e2
GI
433 uint16_t *sdata = (uint16_t *)data;
434 int x = gpu.dma.x, y = gpu.dma.y;
435 int w = gpu.dma.w, h = gpu.dma.h;
ddd56f6e 436 int o = gpu.dma.offset;
d30279e2
GI
437 int l;
438 count *= 2; // operate in 16bpp pixels
439
c765eb86
JW
440 renderer_sync();
441
d30279e2
GI
442 if (gpu.dma.offset) {
443 l = w - gpu.dma.offset;
ddd56f6e 444 if (count < l)
d30279e2 445 l = count;
ddd56f6e 446
36da9c13 447 do_vram_line(x + o, y, sdata, l, is_read, msb);
ddd56f6e 448
449 if (o + l < w)
450 o += l;
451 else {
452 o = 0;
453 y++;
454 h--;
455 }
d30279e2
GI
456 sdata += l;
457 count -= l;
d30279e2
GI
458 }
459
460 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
461 y &= 511;
36da9c13 462 do_vram_line(x, y, sdata, w, is_read, msb);
d30279e2
GI
463 }
464
05740673 465 if (h > 0) {
466 if (count > 0) {
467 y &= 511;
36da9c13 468 do_vram_line(x, y, sdata, count, is_read, msb);
05740673 469 o = count;
470 count = 0;
471 }
d30279e2 472 }
05740673 473 else
474 finish_vram_transfer(is_read);
d30279e2
GI
475 gpu.dma.y = y;
476 gpu.dma.h = h;
ddd56f6e 477 gpu.dma.offset = o;
d30279e2 478
6e9bdaef 479 return count_initial - count / 2;
d30279e2
GI
480}
481
482static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
483{
ddd56f6e 484 if (gpu.dma.h)
485 log_anomaly("start_vram_transfer while old unfinished\n");
486
5440b88e 487 gpu.dma.x = pos_word & 0x3ff;
488 gpu.dma.y = (pos_word >> 16) & 0x1ff;
48f3d210 489 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
490 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
d30279e2 491 gpu.dma.offset = 0;
05740673 492 gpu.dma.is_read = is_read;
493 gpu.dma_start = gpu.dma;
d30279e2 494
9e146206 495 renderer_flush_queues();
496 if (is_read) {
61124a6d 497 gpu.status |= PSX_GPU_STATUS_IMG;
9e146206 498 // XXX: wrong for width 1
495d603c 499 gpu.gp0 = LE32TOH(*(uint32_t *) VRAM_MEM_XY(gpu.dma.x, gpu.dma.y));
5440b88e 500 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
9e146206 501 }
d30279e2 502
6e9bdaef 503 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
504 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
1328fa32 505 if (gpu.gpu_state_change)
506 gpu.gpu_state_change(PGS_VRAM_TRANSFER_START);
d30279e2
GI
507}
508
05740673 509static void finish_vram_transfer(int is_read)
510{
511 if (is_read)
61124a6d 512 gpu.status &= ~PSX_GPU_STATUS_IMG;
b30fba56 513 else {
514 gpu.state.fb_dirty = 1;
05740673 515 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
3b7b0065 516 gpu.dma_start.w, gpu.dma_start.h, 0);
b30fba56 517 }
1328fa32 518 if (gpu.gpu_state_change)
519 gpu.gpu_state_change(PGS_VRAM_TRANSFER_END);
05740673 520}
521
f99193c2 522static void do_vram_copy(const uint32_t *params, int *cpu_cycles)
36da9c13 523{
524 const uint32_t sx = LE32TOH(params[0]) & 0x3FF;
525 const uint32_t sy = (LE32TOH(params[0]) >> 16) & 0x1FF;
526 const uint32_t dx = LE32TOH(params[1]) & 0x3FF;
527 const uint32_t dy = (LE32TOH(params[1]) >> 16) & 0x1FF;
528 uint32_t w = ((LE32TOH(params[2]) - 1) & 0x3FF) + 1;
529 uint32_t h = (((LE32TOH(params[2]) >> 16) - 1) & 0x1FF) + 1;
530 uint16_t msb = gpu.ex_regs[6] << 15;
531 uint16_t lbuf[128];
532 uint32_t x, y;
533
f99193c2 534 *cpu_cycles += gput_copy(w, h);
36da9c13 535 if (sx == dx && sy == dy && msb == 0)
536 return;
537
538 renderer_flush_queues();
539
540 if (unlikely((sx < dx && dx < sx + w) || sx + w > 1024 || dx + w > 1024 || msb))
541 {
542 for (y = 0; y < h; y++)
543 {
544 const uint16_t *src = VRAM_MEM_XY(0, (sy + y) & 0x1ff);
545 uint16_t *dst = VRAM_MEM_XY(0, (dy + y) & 0x1ff);
546 for (x = 0; x < w; x += ARRAY_SIZE(lbuf))
547 {
548 uint32_t x1, w1 = w - x;
549 if (w1 > ARRAY_SIZE(lbuf))
550 w1 = ARRAY_SIZE(lbuf);
551 for (x1 = 0; x1 < w1; x1++)
552 lbuf[x1] = src[(sx + x + x1) & 0x3ff];
553 for (x1 = 0; x1 < w1; x1++)
554 dst[(dx + x + x1) & 0x3ff] = lbuf[x1] | msb;
555 }
556 }
557 }
558 else
559 {
560 uint32_t sy1 = sy, dy1 = dy;
561 for (y = 0; y < h; y++, sy1++, dy1++)
562 memcpy(VRAM_MEM_XY(dx, dy1 & 0x1ff), VRAM_MEM_XY(sx, sy1 & 0x1ff), w * 2);
563 }
564
565 renderer_update_caches(dx, dy, w, h, 0);
566}
567
b243416b 568static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
569{
f99193c2 570 int cmd = 0, pos = 0, len, dummy = 0, v;
b243416b 571 int skip = 1;
572
fbb4bfff 573 gpu.frameskip.pending_fill[0] = 0;
574
b243416b 575 while (pos < count && skip) {
576 uint32_t *list = data + pos;
db215a72 577 cmd = LE32TOH(list[0]) >> 24;
b243416b 578 len = 1 + cmd_lengths[cmd];
579
97e07db9 580 switch (cmd) {
581 case 0x02:
db215a72 582 if ((LE32TOH(list[2]) & 0x3ff) > gpu.screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu.screen.h)
97e07db9 583 // clearing something large, don't skip
d02ab9fc 584 do_cmd_list(list, 3, &dummy, &dummy, &dummy);
97e07db9 585 else
586 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
587 break;
588 case 0x24 ... 0x27:
589 case 0x2c ... 0x2f:
590 case 0x34 ... 0x37:
591 case 0x3c ... 0x3f:
592 gpu.ex_regs[1] &= ~0x1ff;
db215a72 593 gpu.ex_regs[1] |= LE32TOH(list[4 + ((cmd >> 4) & 1)]) & 0x1ff;
97e07db9 594 break;
595 case 0x48 ... 0x4F:
596 for (v = 3; pos + v < count; v++)
597 {
db215a72 598 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
97e07db9 599 break;
600 }
601 len += v - 3;
602 break;
603 case 0x58 ... 0x5F:
604 for (v = 4; pos + v < count; v += 2)
605 {
db215a72 606 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
97e07db9 607 break;
608 }
609 len += v - 4;
610 break;
611 default:
612 if (cmd == 0xe3)
db215a72 613 skip = decide_frameskip_allow(LE32TOH(list[0]));
97e07db9 614 if ((cmd & 0xf8) == 0xe0)
db215a72 615 gpu.ex_regs[cmd & 7] = LE32TOH(list[0]);
97e07db9 616 break;
b243416b 617 }
b243416b 618
619 if (pos + len > count) {
620 cmd = -1;
621 break; // incomplete cmd
622 }
36da9c13 623 if (0x80 <= cmd && cmd <= 0xdf)
b243416b 624 break; // image i/o
97e07db9 625
b243416b 626 pos += len;
627 }
628
629 renderer_sync_ecmds(gpu.ex_regs);
630 *last_cmd = cmd;
631 return pos;
632}
633
d02ab9fc 634static noinline int do_cmd_buffer(uint32_t *data, int count,
635 int *cycles_sum, int *cycles_last)
d30279e2 636{
b243416b 637 int cmd, pos;
638 uint32_t old_e3 = gpu.ex_regs[3];
fc84f618 639 int vram_dirty = 0;
d30279e2 640
d30279e2 641 // process buffer
b243416b 642 for (pos = 0; pos < count; )
d30279e2 643 {
b243416b 644 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
645 vram_dirty = 1;
d30279e2 646 pos += do_vram_io(data + pos, count - pos, 0);
ddd56f6e 647 if (pos == count)
648 break;
d30279e2
GI
649 }
650
db215a72 651 cmd = LE32TOH(data[pos]) >> 24;
97e07db9 652 if (0xa0 <= cmd && cmd <= 0xdf) {
79573c20
DS
653 if (unlikely((pos+2) >= count)) {
654 // incomplete vram write/read cmd, can't consume yet
655 cmd = -1;
656 break;
657 }
658
d30279e2 659 // consume vram write/read cmd
db215a72 660 start_vram_transfer(LE32TOH(data[pos + 1]), LE32TOH(data[pos + 2]), (cmd & 0xe0) == 0xc0);
b243416b 661 pos += 3;
662 continue;
d30279e2 663 }
36da9c13 664 else if ((cmd & 0xe0) == 0x80) {
665 if (unlikely((pos+3) >= count)) {
666 cmd = -1; // incomplete cmd, can't consume yet
667 break;
668 }
025b6fde 669 renderer_sync();
d02ab9fc 670 *cycles_sum += *cycles_last;
671 *cycles_last = 0;
672 do_vram_copy(data + pos + 1, cycles_last);
b30fba56 673 vram_dirty = 1;
36da9c13 674 pos += 4;
675 continue;
676 }
c296224f 677 else if (cmd == 0x1f) {
678 log_anomaly("irq1?\n");
679 pos++;
680 continue;
681 }
b243416b 682
1e07f71d 683 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
db215a72 684 if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0))
b243416b 685 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
686 else {
d02ab9fc 687 pos += do_cmd_list(data + pos, count - pos, cycles_sum, cycles_last, &cmd);
b243416b 688 vram_dirty = 1;
689 }
690
691 if (cmd == -1)
692 // incomplete cmd
ddd56f6e 693 break;
d30279e2 694 }
ddd56f6e 695
61124a6d
PC
696 gpu.status &= ~0x1fff;
697 gpu.status |= gpu.ex_regs[1] & 0x7ff;
698 gpu.status |= (gpu.ex_regs[6] & 3) << 11;
a3a9f519 699
fc84f618 700 gpu.state.fb_dirty |= vram_dirty;
701
b243416b 702 if (old_e3 != gpu.ex_regs[3])
703 decide_frameskip_allow(gpu.ex_regs[3]);
704
ddd56f6e 705 return count - pos;
d30279e2
GI
706}
707
1328fa32 708static noinline void flush_cmd_buffer(void)
d30279e2 709{
f99193c2 710 int dummy = 0, left;
d02ab9fc 711 left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len, &dummy, &dummy);
d30279e2
GI
712 if (left > 0)
713 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
1328fa32 714 if (left != gpu.cmd_len) {
715 if (!gpu.dma.h && gpu.gpu_state_change)
716 gpu.gpu_state_change(PGS_PRIMITIVE_START);
717 gpu.cmd_len = left;
718 }
1ab64c54
GI
719}
720
721void GPUwriteDataMem(uint32_t *mem, int count)
722{
f99193c2 723 int dummy = 0, left;
d30279e2 724
56f08d83 725 log_io("gpu_dma_write %p %d\n", mem, count);
726
d30279e2
GI
727 if (unlikely(gpu.cmd_len > 0))
728 flush_cmd_buffer();
56f08d83 729
d02ab9fc 730 left = do_cmd_buffer(mem, count, &dummy, &dummy);
d30279e2 731 if (left)
56f08d83 732 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
1ab64c54
GI
733}
734
d30279e2 735void GPUwriteData(uint32_t data)
1ab64c54 736{
56f08d83 737 log_io("gpu_write %08x\n", data);
db215a72 738 gpu.cmd_buffer[gpu.cmd_len++] = HTOLE32(data);
d30279e2
GI
739 if (gpu.cmd_len >= CMD_BUFFER_LEN)
740 flush_cmd_buffer();
1ab64c54
GI
741}
742
d02ab9fc 743long GPUdmaChain(uint32_t *rambase, uint32_t start_addr,
744 uint32_t *progress_addr, int32_t *cycles_last_cmd)
1ab64c54 745{
09159d99 746 uint32_t addr, *list, ld_addr = 0;
ddd56f6e 747 int len, left, count;
d02ab9fc 748 int cpu_cycles_sum = 0;
749 int cpu_cycles_last = 0;
d30279e2 750
8f5f2dd5 751 preload(rambase + (start_addr & 0x1fffff) / 4);
752
d30279e2
GI
753 if (unlikely(gpu.cmd_len > 0))
754 flush_cmd_buffer();
755
56f08d83 756 log_io("gpu_dma_chain\n");
ddd56f6e 757 addr = start_addr & 0xffffff;
09159d99 758 for (count = 0; (addr & 0x800000) == 0; count++)
ddd56f6e 759 {
ddd56f6e 760 list = rambase + (addr & 0x1fffff) / 4;
db215a72
PC
761 len = LE32TOH(list[0]) >> 24;
762 addr = LE32TOH(list[0]) & 0xffffff;
8f5f2dd5 763 preload(rambase + (addr & 0x1fffff) / 4);
764
d02ab9fc 765 cpu_cycles_sum += 10;
1c72b1c2 766 if (len > 0)
d02ab9fc 767 cpu_cycles_sum += 5 + len;
deb18d24 768
d02ab9fc 769 log_io(".chain %08lx #%d+%d %u+%u\n",
770 (long)(list - rambase) * 4, len, gpu.cmd_len, cpu_cycles_sum, cpu_cycles_last);
a4e249a1 771 if (unlikely(gpu.cmd_len > 0)) {
81ff42e1 772 if (gpu.cmd_len + len > ARRAY_SIZE(gpu.cmd_buffer)) {
773 log_anomaly("cmd_buffer overflow, likely garbage commands\n");
774 gpu.cmd_len = 0;
775 }
a4e249a1 776 memcpy(gpu.cmd_buffer + gpu.cmd_len, list + 1, len * 4);
777 gpu.cmd_len += len;
778 flush_cmd_buffer();
779 continue;
780 }
ddd56f6e 781
56f08d83 782 if (len) {
d02ab9fc 783 left = do_cmd_buffer(list + 1, len, &cpu_cycles_sum, &cpu_cycles_last);
a4e249a1 784 if (left) {
785 memcpy(gpu.cmd_buffer, list + 1 + len - left, left * 4);
786 gpu.cmd_len = left;
787 log_anomaly("GPUdmaChain: %d/%d words left\n", left, len);
788 }
56f08d83 789 }
ddd56f6e 790
fae38d7a 791 if (progress_addr) {
792 *progress_addr = addr;
793 break;
794 }
09159d99 795 #define LD_THRESHOLD (8*1024)
796 if (count >= LD_THRESHOLD) {
797 if (count == LD_THRESHOLD) {
798 ld_addr = addr;
799 continue;
800 }
801
802 // loop detection marker
803 // (bit23 set causes DMA error on real machine, so
804 // unlikely to be ever set by the game)
db215a72 805 list[0] |= HTOLE32(0x800000);
09159d99 806 }
ddd56f6e 807 }
808
09159d99 809 if (ld_addr != 0) {
810 // remove loop detection markers
811 count -= LD_THRESHOLD + 2;
812 addr = ld_addr & 0x1fffff;
813 while (count-- > 0) {
814 list = rambase + addr / 4;
db215a72
PC
815 addr = LE32TOH(list[0]) & 0x1fffff;
816 list[0] &= HTOLE32(~0x800000);
09159d99 817 }
d30279e2 818 }
09159d99 819
d02ab9fc 820 //printf(" -> %d %d\n", cpu_cycles_sum, cpu_cycles_last);
3ece2f0c 821 gpu.state.last_list.frame = *gpu.state.frame_count;
deb18d24 822 gpu.state.last_list.hcnt = *gpu.state.hcnt;
d02ab9fc 823 gpu.state.last_list.cycles = cpu_cycles_sum + cpu_cycles_last;
deb18d24 824 gpu.state.last_list.addr = start_addr;
825
d02ab9fc 826 *cycles_last_cmd = cpu_cycles_last;
827 return cpu_cycles_sum;
1ab64c54
GI
828}
829
d30279e2
GI
830void GPUreadDataMem(uint32_t *mem, int count)
831{
56f08d83 832 log_io("gpu_dma_read %p %d\n", mem, count);
833
d30279e2
GI
834 if (unlikely(gpu.cmd_len > 0))
835 flush_cmd_buffer();
56f08d83 836
d30279e2
GI
837 if (gpu.dma.h)
838 do_vram_io(mem, count, 1);
839}
840
841uint32_t GPUreadData(void)
842{
9e146206 843 uint32_t ret;
56f08d83 844
845 if (unlikely(gpu.cmd_len > 0))
846 flush_cmd_buffer();
847
9e146206 848 ret = gpu.gp0;
495d603c
PC
849 if (gpu.dma.h) {
850 ret = HTOLE32(ret);
9e146206 851 do_vram_io(&ret, 1, 1);
495d603c
PC
852 ret = LE32TOH(ret);
853 }
56f08d83 854
9e146206 855 log_io("gpu_read %08x\n", ret);
856 return ret;
d30279e2
GI
857}
858
859uint32_t GPUreadStatus(void)
860{
ddd56f6e 861 uint32_t ret;
56f08d83 862
d30279e2
GI
863 if (unlikely(gpu.cmd_len > 0))
864 flush_cmd_buffer();
865
61124a6d 866 ret = gpu.status;
ddd56f6e 867 log_io("gpu_read_status %08x\n", ret);
868 return ret;
d30279e2
GI
869}
870
096ec49b 871struct GPUFreeze
1ab64c54
GI
872{
873 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
874 uint32_t ulStatus; // current gpu status
875 uint32_t ulControl[256]; // latest control register values
876 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
096ec49b 877};
1ab64c54 878
096ec49b 879long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
1ab64c54 880{
fc84f618 881 int i;
882
1ab64c54
GI
883 switch (type) {
884 case 1: // save
d30279e2
GI
885 if (gpu.cmd_len > 0)
886 flush_cmd_buffer();
c765eb86
JW
887
888 renderer_sync();
9ee0fd5b 889 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
1ab64c54 890 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
6e9bdaef 891 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
61124a6d 892 freeze->ulStatus = gpu.status;
1ab64c54
GI
893 break;
894 case 0: // load
c765eb86 895 renderer_sync();
9ee0fd5b 896 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
1ab64c54 897 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
6e9bdaef 898 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
61124a6d 899 gpu.status = freeze->ulStatus;
3d47ef17 900 gpu.cmd_len = 0;
fc84f618 901 for (i = 8; i > 0; i--) {
902 gpu.regs[i] ^= 1; // avoid reg change detection
903 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
904 }
5b745e5b 905 renderer_sync_ecmds(gpu.ex_regs);
9a864a8f 906 renderer_update_caches(0, 0, 1024, 512, 0);
1ab64c54
GI
907 break;
908 }
909
910 return 1;
911}
912
5440b88e 913void GPUupdateLace(void)
914{
915 if (gpu.cmd_len > 0)
916 flush_cmd_buffer();
917 renderer_flush_queues();
918
7a20a6d0 919#ifndef RAW_FB_DISPLAY
61124a6d 920 if (gpu.status & PSX_GPU_STATUS_BLANKING) {
aafcb4dd 921 if (!gpu.state.blanked) {
922 vout_blank();
923 gpu.state.blanked = 1;
924 gpu.state.fb_dirty = 1;
925 }
926 return;
927 }
928
c765eb86
JW
929 renderer_notify_update_lace(0);
930
aafcb4dd 931 if (!gpu.state.fb_dirty)
5440b88e 932 return;
7a20a6d0 933#endif
5440b88e 934
935 if (gpu.frameskip.set) {
936 if (!gpu.frameskip.frame_ready) {
937 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
938 return;
939 gpu.frameskip.active = 0;
940 }
941 gpu.frameskip.frame_ready = 0;
942 }
943
944 vout_update();
3b7b0065 945 if (gpu.state.enhancement_active && !gpu.state.enhancement_was_active)
946 renderer_update_caches(0, 0, 1024, 512, 1);
947 gpu.state.enhancement_was_active = gpu.state.enhancement_active;
5440b88e 948 gpu.state.fb_dirty = 0;
aafcb4dd 949 gpu.state.blanked = 0;
c765eb86 950 renderer_notify_update_lace(1);
5440b88e 951}
952
72e5023f 953void GPUvBlank(int is_vblank, int lcf)
954{
5440b88e 955 int interlace = gpu.state.allow_interlace
61124a6d
PC
956 && (gpu.status & PSX_GPU_STATUS_INTERLACE)
957 && (gpu.status & PSX_GPU_STATUS_DHEIGHT);
5440b88e 958 // interlace doesn't look nice on progressive displays,
959 // so we have this "auto" mode here for games that don't read vram
960 if (gpu.state.allow_interlace == 2
961 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
962 {
963 interlace = 0;
964 }
965 if (interlace || interlace != gpu.state.old_interlace) {
966 gpu.state.old_interlace = interlace;
967
968 if (gpu.cmd_len > 0)
969 flush_cmd_buffer();
970 renderer_flush_queues();
971 renderer_set_interlace(interlace, !lcf);
972 }
973}
974
80bc1426 975void GPUgetScreenInfo(int *y, int *base_hres)
976{
977 *y = gpu.screen.y;
978 *base_hres = gpu.screen.vres;
979 if (gpu.status & PSX_GPU_STATUS_DHEIGHT)
980 *base_hres >>= 1;
981}
982
5440b88e 983void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
984{
985 gpu.frameskip.set = cbs->frameskip;
986 gpu.frameskip.advice = &cbs->fskip_advice;
5eaa13f1 987 gpu.frameskip.force = &cbs->fskip_force;
5bbe183f 988 gpu.frameskip.dirty = (void *)&cbs->fskip_dirty;
5440b88e 989 gpu.frameskip.active = 0;
990 gpu.frameskip.frame_ready = 1;
991 gpu.state.hcnt = cbs->gpu_hcnt;
992 gpu.state.frame_count = cbs->gpu_frame_count;
993 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
0b02eb77 994 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
b3ff74ba 995 gpu.state.screen_centering_type_default = cbs->screen_centering_type_default;
5bbe183f 996 if (gpu.state.screen_centering_type != cbs->screen_centering_type
997 || gpu.state.screen_centering_x != cbs->screen_centering_x
998 || gpu.state.screen_centering_y != cbs->screen_centering_y) {
999 gpu.state.screen_centering_type = cbs->screen_centering_type;
1000 gpu.state.screen_centering_x = cbs->screen_centering_x;
1001 gpu.state.screen_centering_y = cbs->screen_centering_y;
1002 update_width();
1003 update_height();
1004 }
5440b88e 1005
9ee0fd5b 1006 gpu.mmap = cbs->mmap;
1007 gpu.munmap = cbs->munmap;
1328fa32 1008 gpu.gpu_state_change = cbs->gpu_state_change;
9ee0fd5b 1009
1010 // delayed vram mmap
1011 if (gpu.vram == NULL)
1012 map_vram();
1013
5440b88e 1014 if (cbs->pl_vout_set_raw_vram)
1015 cbs->pl_vout_set_raw_vram(gpu.vram);
1016 renderer_set_config(cbs);
1017 vout_set_config(cbs);
72e5023f 1018}
1019
1ab64c54 1020// vim:shiftwidth=2:expandtab