gpu_neon: revive the old tests
[pcsx_rearmed.git] / plugins / gpulib / gpu.c
CommitLineData
1ab64c54 1/*
05740673 2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
1ab64c54
GI
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
d30279e2 11#include <stdio.h>
5bbe183f 12#include <stdlib.h>
1ab64c54 13#include <string.h>
12367ad0 14#include <stdlib.h> /* for calloc */
15
56f08d83 16#include "gpu.h"
f99193c2 17#include "gpu_timing.h"
1328fa32 18#include "../../libpcsxcore/gpu.h" // meh
8f8ade9c 19#include "../../frontend/plugin_lib.h"
1ab64c54 20
8f8ade9c 21#ifndef ARRAY_SIZE
1ab64c54 22#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
8f8ade9c 23#endif
8f5f2dd5 24#ifdef __GNUC__
d30279e2 25#define unlikely(x) __builtin_expect((x), 0)
8f5f2dd5 26#define preload __builtin_prefetch
8dd855cd 27#define noinline __attribute__((noinline))
8f5f2dd5 28#else
29#define unlikely(x)
30#define preload(...)
31#define noinline
8f5f2dd5 32#endif
1ab64c54 33
deb18d24 34//#define log_io gpu_log
56f08d83 35#define log_io(...)
56f08d83 36
9ee0fd5b 37struct psx_gpu gpu;
1ab64c54 38
d02ab9fc 39static noinline int do_cmd_buffer(uint32_t *data, int count,
40 int *cycles_sum, int *cycles_last);
05740673 41static void finish_vram_transfer(int is_read);
48f3d210 42
43static noinline void do_cmd_reset(void)
44{
f99193c2 45 int dummy = 0;
c765eb86 46 renderer_sync();
48f3d210 47 if (unlikely(gpu.cmd_len > 0))
d02ab9fc 48 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len, &dummy, &dummy);
48f3d210 49 gpu.cmd_len = 0;
05740673 50
51 if (unlikely(gpu.dma.h > 0))
52 finish_vram_transfer(gpu.dma_start.is_read);
48f3d210 53 gpu.dma.h = 0;
54}
55
6e9bdaef 56static noinline void do_reset(void)
1ab64c54 57{
7841712d 58 unsigned int i;
5b568098 59
48f3d210 60 do_cmd_reset();
61
6e9bdaef 62 memset(gpu.regs, 0, sizeof(gpu.regs));
48f3d210 63 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
64 gpu.ex_regs[i] = (0xe0 + i) << 24;
61124a6d 65 gpu.status = 0x14802000;
6e9bdaef 66 gpu.gp0 = 0;
fc84f618 67 gpu.regs[3] = 1;
6e9bdaef 68 gpu.screen.hres = gpu.screen.w = 256;
fc84f618 69 gpu.screen.vres = gpu.screen.h = 240;
5bbe183f 70 gpu.screen.x = gpu.screen.y = 0;
01ff3105 71 renderer_sync_ecmds(gpu.ex_regs);
3b7b0065 72 renderer_notify_res_change();
1ab64c54
GI
73}
74
8dd855cd 75static noinline void update_width(void)
76{
5bbe183f 77 static const short hres_all[8] = { 256, 368, 320, 368, 512, 368, 640, 368 };
78 static const uint8_t hdivs[8] = { 10, 7, 8, 7, 5, 7, 4, 7 };
79 uint8_t hdiv = hdivs[(gpu.status >> 16) & 7];
80 int hres = hres_all[(gpu.status >> 16) & 7];
81 int pal = gpu.status & PSX_GPU_STATUS_PAL;
8dd855cd 82 int sw = gpu.screen.x2 - gpu.screen.x1;
b3ff74ba 83 int type = gpu.state.screen_centering_type;
5bbe183f 84 int x = 0, x_auto;
b3ff74ba 85 if (type == C_AUTO)
86 type = gpu.state.screen_centering_type_default;
5bbe183f 87 if (sw <= 0)
88 /* nothing displayed? */;
89 else {
90 int s = pal ? 656 : 608; // or 600? pal is just a guess
91 x = (gpu.screen.x1 - s) / hdiv;
92 x = (x + 1) & ~1; // blitter limitation
93 sw /= hdiv;
94 sw = (sw + 2) & ~3; // according to nocash
b27f55be 95
96 if (gpu.state.show_overscan == 2) // widescreen hack
97 sw = (sw + 63) & ~63;
98 if (gpu.state.show_overscan && sw >= hres)
99 x = 0, hres = sw;
b3ff74ba 100 switch (type) {
8f8ade9c 101 case C_INGAME:
5bbe183f 102 break;
8f8ade9c 103 case C_MANUAL:
5bbe183f 104 x = gpu.state.screen_centering_x;
105 break;
106 default:
107 // correct if slightly miscentered
108 x_auto = (hres - sw) / 2 & ~3;
109 if ((uint32_t)x_auto <= 8u && abs(x) < 24)
110 x = x_auto;
111 }
112 if (x + sw > hres)
113 sw = hres - x;
114 // .x range check is done in vout_update()
115 }
116 // reduce the unpleasant right border that a few games have
117 if (gpu.state.screen_centering_type == 0
118 && x <= 4 && hres - (x + sw) >= 4)
119 hres -= 4;
120 gpu.screen.x = x;
121 gpu.screen.w = sw;
122 gpu.screen.hres = hres;
123 gpu.state.dims_changed = 1;
b27f55be 124 //printf("xx %d %d (%d) -> %2d, %d / %d\n", gpu.screen.x1,
125 // gpu.screen.x2, gpu.screen.x2 - gpu.screen.x1, x, sw, hres);
8dd855cd 126}
127
128static noinline void update_height(void)
129{
5bbe183f 130 int pal = gpu.status & PSX_GPU_STATUS_PAL;
131 int dheight = gpu.status & PSX_GPU_STATUS_DHEIGHT;
132 int y = gpu.screen.y1 - (pal ? 39 : 16); // 39 for spyro
8dd855cd 133 int sh = gpu.screen.y2 - gpu.screen.y1;
5bbe183f 134 int center_tol = 16;
135 int vres = 240;
136
137 if (pal && (sh > 240 || gpu.screen.vres == 256))
138 vres = 256;
139 if (dheight)
140 y *= 2, sh *= 2, vres *= 2, center_tol *= 2;
141 if (sh <= 0)
142 /* nothing displayed? */;
143 else {
144 switch (gpu.state.screen_centering_type) {
8f8ade9c 145 case C_INGAME:
146 break;
147 case C_BORDERLESS:
148 y = 0;
5bbe183f 149 break;
8f8ade9c 150 case C_MANUAL:
5bbe183f 151 y = gpu.state.screen_centering_y;
152 break;
153 default:
154 // correct if slightly miscentered
155 if ((uint32_t)(vres - sh) <= 1 && abs(y) <= center_tol)
156 y = 0;
157 }
158 if (y + sh > vres)
159 sh = vres - y;
160 }
161 gpu.screen.y = y;
8dd855cd 162 gpu.screen.h = sh;
5bbe183f 163 gpu.screen.vres = vres;
164 gpu.state.dims_changed = 1;
165 //printf("yy %d %d -> %d, %d / %d\n",
166 // gpu.screen.y1, gpu.screen.y2, y, sh, vres);
8dd855cd 167}
168
fc84f618 169static noinline void decide_frameskip(void)
170{
5eaa13f1
A
171 *gpu.frameskip.dirty = 1;
172
9fe27e25 173 if (gpu.frameskip.active)
174 gpu.frameskip.cnt++;
175 else {
176 gpu.frameskip.cnt = 0;
177 gpu.frameskip.frame_ready = 1;
178 }
fc84f618 179
5eaa13f1
A
180 if (*gpu.frameskip.force)
181 gpu.frameskip.active = 1;
182 else if (!gpu.frameskip.active && *gpu.frameskip.advice)
9fe27e25 183 gpu.frameskip.active = 1;
184 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
fc84f618 185 gpu.frameskip.active = 1;
186 else
187 gpu.frameskip.active = 0;
fbb4bfff 188
189 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
f99193c2 190 int dummy = 0;
d02ab9fc 191 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy, &dummy, &dummy);
fbb4bfff 192 gpu.frameskip.pending_fill[0] = 0;
193 }
fc84f618 194}
195
b243416b 196static noinline int decide_frameskip_allow(uint32_t cmd_e3)
9fe27e25 197{
198 // no frameskip if it decides to draw to display area,
199 // but not for interlace since it'll most likely always do that
200 uint32_t x = cmd_e3 & 0x3ff;
201 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
61124a6d 202 gpu.frameskip.allow = (gpu.status & PSX_GPU_STATUS_INTERLACE) ||
5bbe183f 203 (uint32_t)(x - gpu.screen.src_x) >= (uint32_t)gpu.screen.w ||
204 (uint32_t)(y - gpu.screen.src_y) >= (uint32_t)gpu.screen.h;
b243416b 205 return gpu.frameskip.allow;
9fe27e25 206}
207
01ff3105 208static void flush_cmd_buffer(void);
209
6e9bdaef 210static noinline void get_gpu_info(uint32_t data)
211{
01ff3105 212 if (unlikely(gpu.cmd_len > 0))
213 flush_cmd_buffer();
6e9bdaef 214 switch (data & 0x0f) {
215 case 0x02:
216 case 0x03:
217 case 0x04:
6e9bdaef 218 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
219 break;
08b33377 220 case 0x05:
221 gpu.gp0 = gpu.ex_regs[5] & 0x3fffff;
6e9bdaef 222 break;
223 case 0x07:
224 gpu.gp0 = 2;
225 break;
226 default:
08b33377 227 // gpu.gp0 unchanged
6e9bdaef 228 break;
229 }
230}
231
5bd33f52 232#ifndef max
233#define max(a, b) (((a) > (b)) ? (a) : (b))
234#endif
12367ad0 235
236// Minimum 16-byte VRAM alignment needed by gpu_unai's pixel-skipping
237// renderer/downscaler it uses in high res modes:
238#ifdef GCW_ZERO
239 // On GCW platform (MIPS), align to 8192 bytes (1 TLB entry) to reduce # of
240 // fills. (Will change this value if it ever gets large page support)
241 #define VRAM_ALIGN 8192
242#else
243 #define VRAM_ALIGN 16
244#endif
245
5bd33f52 246// double, for overdraw guard + at least 1 page before
247#define VRAM_SIZE ((1024 * 512 * 2 * 2) + max(VRAM_ALIGN, 4096))
248
12367ad0 249// vram ptr received from mmap/malloc/alloc (will deallocate using this)
250static uint16_t *vram_ptr_orig = NULL;
9ee0fd5b 251
e34ef5ac 252#ifndef GPULIB_USE_MMAP
253# ifdef __linux__
254# define GPULIB_USE_MMAP 1
255# else
256# define GPULIB_USE_MMAP 0
257# endif
258#endif
9ee0fd5b 259static int map_vram(void)
260{
e34ef5ac 261#if GPULIB_USE_MMAP
5bd33f52 262 gpu.vram = vram_ptr_orig = gpu.mmap(VRAM_SIZE);
e34ef5ac 263#else
5bd33f52 264 gpu.vram = vram_ptr_orig = calloc(VRAM_SIZE, 1);
e34ef5ac 265#endif
266 if (gpu.vram != NULL && gpu.vram != (void *)(intptr_t)-1) {
267 // 4kb guard in front
12367ad0 268 gpu.vram += (4096 / 2);
e34ef5ac 269 // Align
270 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
9ee0fd5b 271 return 0;
272 }
273 else {
274 fprintf(stderr, "could not map vram, expect crashes\n");
275 return -1;
276 }
277}
278
6e9bdaef 279long GPUinit(void)
280{
9394ada5 281 int ret;
282 ret = vout_init();
283 ret |= renderer_init();
284
3b7b0065 285 memset(&gpu.state, 0, sizeof(gpu.state));
286 memset(&gpu.frameskip, 0, sizeof(gpu.frameskip));
287 gpu.zero = 0;
3ece2f0c 288 gpu.state.frame_count = &gpu.zero;
deb18d24 289 gpu.state.hcnt = &gpu.zero;
48f3d210 290 gpu.cmd_len = 0;
9394ada5 291 do_reset();
48f3d210 292
12367ad0 293 /*if (gpu.mmap != NULL) {
9ee0fd5b 294 if (map_vram() != 0)
295 ret = -1;
12367ad0 296 }*/
6e9bdaef 297 return ret;
298}
299
300long GPUshutdown(void)
301{
9ee0fd5b 302 long ret;
303
e929dec5 304 renderer_finish();
9ee0fd5b 305 ret = vout_finish();
12367ad0 306
307 if (vram_ptr_orig != NULL) {
e34ef5ac 308#if GPULIB_USE_MMAP
12367ad0 309 gpu.munmap(vram_ptr_orig, VRAM_SIZE);
310#else
311 free(vram_ptr_orig);
312#endif
9ee0fd5b 313 }
12367ad0 314 vram_ptr_orig = gpu.vram = NULL;
9ee0fd5b 315
316 return ret;
6e9bdaef 317}
318
1ab64c54
GI
319void GPUwriteStatus(uint32_t data)
320{
1ab64c54 321 uint32_t cmd = data >> 24;
9a864a8f 322 int src_x, src_y;
1ab64c54 323
fc84f618 324 if (cmd < ARRAY_SIZE(gpu.regs)) {
48f3d210 325 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
fc84f618 326 return;
8dd855cd 327 gpu.regs[cmd] = data;
fc84f618 328 }
329
330 gpu.state.fb_dirty = 1;
8dd855cd 331
332 switch (cmd) {
1ab64c54 333 case 0x00:
6e9bdaef 334 do_reset();
1ab64c54 335 break;
48f3d210 336 case 0x01:
337 do_cmd_reset();
338 break;
1ab64c54 339 case 0x03:
5bbe183f 340 if (data & 1) {
61124a6d 341 gpu.status |= PSX_GPU_STATUS_BLANKING;
5bbe183f 342 gpu.state.dims_changed = 1; // for hud clearing
343 }
61124a6d
PC
344 else
345 gpu.status &= ~PSX_GPU_STATUS_BLANKING;
1ab64c54
GI
346 break;
347 case 0x04:
61124a6d
PC
348 gpu.status &= ~PSX_GPU_STATUS_DMA_MASK;
349 gpu.status |= PSX_GPU_STATUS_DMA(data & 3);
1ab64c54
GI
350 break;
351 case 0x05:
9a864a8f 352 src_x = data & 0x3ff; src_y = (data >> 10) & 0x1ff;
353 if (src_x != gpu.screen.src_x || src_y != gpu.screen.src_y) {
354 gpu.screen.src_x = src_x;
355 gpu.screen.src_y = src_y;
356 renderer_notify_scanout_change(src_x, src_y);
357 if (gpu.frameskip.set) {
358 decide_frameskip_allow(gpu.ex_regs[3]);
359 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
360 decide_frameskip();
361 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
362 }
9fe27e25 363 }
fb4c6fba 364 }
1ab64c54 365 break;
8dd855cd 366 case 0x06:
367 gpu.screen.x1 = data & 0xfff;
368 gpu.screen.x2 = (data >> 12) & 0xfff;
369 update_width();
370 break;
1ab64c54
GI
371 case 0x07:
372 gpu.screen.y1 = data & 0x3ff;
373 gpu.screen.y2 = (data >> 10) & 0x3ff;
8dd855cd 374 update_height();
1ab64c54
GI
375 break;
376 case 0x08:
61124a6d 377 gpu.status = (gpu.status & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
8dd855cd 378 update_width();
379 update_height();
e929dec5 380 renderer_notify_res_change();
1ab64c54 381 break;
deb18d24 382 default:
383 if ((cmd & 0xf0) == 0x10)
384 get_gpu_info(data);
6e9bdaef 385 break;
1ab64c54 386 }
7890a708 387
388#ifdef GPUwriteStatus_ext
389 GPUwriteStatus_ext(data);
390#endif
1ab64c54
GI
391}
392
56f08d83 393const unsigned char cmd_lengths[256] =
1ab64c54 394{
d30279e2
GI
395 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
396 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
397 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
398 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
652c6b8b 399 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
400 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
401 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
d30279e2 402 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
36da9c13 403 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 80
404 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
405 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // a0
406 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
407 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // c0
408 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
d30279e2
GI
409 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
410 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
411};
412
d30279e2
GI
413#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
414
36da9c13 415static void cpy_msb(uint16_t *dst, const uint16_t *src, int l, uint16_t msb)
416{
417 int i;
418 for (i = 0; i < l; i++)
419 dst[i] = src[i] | msb;
420}
421
422static inline void do_vram_line(int x, int y, uint16_t *mem, int l,
423 int is_read, uint16_t msb)
1ab64c54 424{
d30279e2 425 uint16_t *vram = VRAM_MEM_XY(x, y);
36da9c13 426 if (unlikely(is_read))
d30279e2 427 memcpy(mem, vram, l * 2);
36da9c13 428 else if (unlikely(msb))
429 cpy_msb(vram, mem, l, msb);
d30279e2
GI
430 else
431 memcpy(vram, mem, l * 2);
432}
433
434static int do_vram_io(uint32_t *data, int count, int is_read)
435{
436 int count_initial = count;
36da9c13 437 uint16_t msb = gpu.ex_regs[6] << 15;
d30279e2
GI
438 uint16_t *sdata = (uint16_t *)data;
439 int x = gpu.dma.x, y = gpu.dma.y;
440 int w = gpu.dma.w, h = gpu.dma.h;
ddd56f6e 441 int o = gpu.dma.offset;
d30279e2
GI
442 int l;
443 count *= 2; // operate in 16bpp pixels
444
c765eb86
JW
445 renderer_sync();
446
d30279e2
GI
447 if (gpu.dma.offset) {
448 l = w - gpu.dma.offset;
ddd56f6e 449 if (count < l)
d30279e2 450 l = count;
ddd56f6e 451
36da9c13 452 do_vram_line(x + o, y, sdata, l, is_read, msb);
ddd56f6e 453
454 if (o + l < w)
455 o += l;
456 else {
457 o = 0;
458 y++;
459 h--;
460 }
d30279e2
GI
461 sdata += l;
462 count -= l;
d30279e2
GI
463 }
464
465 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
466 y &= 511;
36da9c13 467 do_vram_line(x, y, sdata, w, is_read, msb);
d30279e2
GI
468 }
469
05740673 470 if (h > 0) {
471 if (count > 0) {
472 y &= 511;
36da9c13 473 do_vram_line(x, y, sdata, count, is_read, msb);
05740673 474 o = count;
475 count = 0;
476 }
d30279e2 477 }
05740673 478 else
479 finish_vram_transfer(is_read);
d30279e2
GI
480 gpu.dma.y = y;
481 gpu.dma.h = h;
ddd56f6e 482 gpu.dma.offset = o;
d30279e2 483
6e9bdaef 484 return count_initial - count / 2;
d30279e2
GI
485}
486
487static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
488{
ddd56f6e 489 if (gpu.dma.h)
490 log_anomaly("start_vram_transfer while old unfinished\n");
491
5440b88e 492 gpu.dma.x = pos_word & 0x3ff;
493 gpu.dma.y = (pos_word >> 16) & 0x1ff;
48f3d210 494 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
495 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
d30279e2 496 gpu.dma.offset = 0;
05740673 497 gpu.dma.is_read = is_read;
498 gpu.dma_start = gpu.dma;
d30279e2 499
9e146206 500 renderer_flush_queues();
501 if (is_read) {
61124a6d 502 gpu.status |= PSX_GPU_STATUS_IMG;
9e146206 503 // XXX: wrong for width 1
495d603c 504 gpu.gp0 = LE32TOH(*(uint32_t *) VRAM_MEM_XY(gpu.dma.x, gpu.dma.y));
5440b88e 505 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
9e146206 506 }
d30279e2 507
6e9bdaef 508 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
509 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
1328fa32 510 if (gpu.gpu_state_change)
511 gpu.gpu_state_change(PGS_VRAM_TRANSFER_START);
d30279e2
GI
512}
513
05740673 514static void finish_vram_transfer(int is_read)
515{
516 if (is_read)
61124a6d 517 gpu.status &= ~PSX_GPU_STATUS_IMG;
b30fba56 518 else {
519 gpu.state.fb_dirty = 1;
05740673 520 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
3b7b0065 521 gpu.dma_start.w, gpu.dma_start.h, 0);
b30fba56 522 }
1328fa32 523 if (gpu.gpu_state_change)
524 gpu.gpu_state_change(PGS_VRAM_TRANSFER_END);
05740673 525}
526
f99193c2 527static void do_vram_copy(const uint32_t *params, int *cpu_cycles)
36da9c13 528{
529 const uint32_t sx = LE32TOH(params[0]) & 0x3FF;
530 const uint32_t sy = (LE32TOH(params[0]) >> 16) & 0x1FF;
531 const uint32_t dx = LE32TOH(params[1]) & 0x3FF;
532 const uint32_t dy = (LE32TOH(params[1]) >> 16) & 0x1FF;
533 uint32_t w = ((LE32TOH(params[2]) - 1) & 0x3FF) + 1;
534 uint32_t h = (((LE32TOH(params[2]) >> 16) - 1) & 0x1FF) + 1;
535 uint16_t msb = gpu.ex_regs[6] << 15;
536 uint16_t lbuf[128];
537 uint32_t x, y;
538
f99193c2 539 *cpu_cycles += gput_copy(w, h);
36da9c13 540 if (sx == dx && sy == dy && msb == 0)
541 return;
542
543 renderer_flush_queues();
544
545 if (unlikely((sx < dx && dx < sx + w) || sx + w > 1024 || dx + w > 1024 || msb))
546 {
547 for (y = 0; y < h; y++)
548 {
549 const uint16_t *src = VRAM_MEM_XY(0, (sy + y) & 0x1ff);
550 uint16_t *dst = VRAM_MEM_XY(0, (dy + y) & 0x1ff);
551 for (x = 0; x < w; x += ARRAY_SIZE(lbuf))
552 {
553 uint32_t x1, w1 = w - x;
554 if (w1 > ARRAY_SIZE(lbuf))
555 w1 = ARRAY_SIZE(lbuf);
556 for (x1 = 0; x1 < w1; x1++)
557 lbuf[x1] = src[(sx + x + x1) & 0x3ff];
558 for (x1 = 0; x1 < w1; x1++)
559 dst[(dx + x + x1) & 0x3ff] = lbuf[x1] | msb;
560 }
561 }
562 }
563 else
564 {
565 uint32_t sy1 = sy, dy1 = dy;
566 for (y = 0; y < h; y++, sy1++, dy1++)
567 memcpy(VRAM_MEM_XY(dx, dy1 & 0x1ff), VRAM_MEM_XY(sx, sy1 & 0x1ff), w * 2);
568 }
569
570 renderer_update_caches(dx, dy, w, h, 0);
571}
572
b243416b 573static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
574{
f99193c2 575 int cmd = 0, pos = 0, len, dummy = 0, v;
b243416b 576 int skip = 1;
577
fbb4bfff 578 gpu.frameskip.pending_fill[0] = 0;
579
b243416b 580 while (pos < count && skip) {
581 uint32_t *list = data + pos;
db215a72 582 cmd = LE32TOH(list[0]) >> 24;
b243416b 583 len = 1 + cmd_lengths[cmd];
bbb7cdec 584 if (pos + len > count) {
585 cmd = -1;
586 break; // incomplete cmd
587 }
b243416b 588
97e07db9 589 switch (cmd) {
590 case 0x02:
db215a72 591 if ((LE32TOH(list[2]) & 0x3ff) > gpu.screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu.screen.h)
97e07db9 592 // clearing something large, don't skip
d02ab9fc 593 do_cmd_list(list, 3, &dummy, &dummy, &dummy);
97e07db9 594 else
595 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
596 break;
597 case 0x24 ... 0x27:
598 case 0x2c ... 0x2f:
599 case 0x34 ... 0x37:
600 case 0x3c ... 0x3f:
601 gpu.ex_regs[1] &= ~0x1ff;
db215a72 602 gpu.ex_regs[1] |= LE32TOH(list[4 + ((cmd >> 4) & 1)]) & 0x1ff;
97e07db9 603 break;
604 case 0x48 ... 0x4F:
605 for (v = 3; pos + v < count; v++)
606 {
db215a72 607 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
97e07db9 608 break;
609 }
610 len += v - 3;
611 break;
612 case 0x58 ... 0x5F:
613 for (v = 4; pos + v < count; v += 2)
614 {
db215a72 615 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
97e07db9 616 break;
617 }
618 len += v - 4;
619 break;
620 default:
621 if (cmd == 0xe3)
db215a72 622 skip = decide_frameskip_allow(LE32TOH(list[0]));
97e07db9 623 if ((cmd & 0xf8) == 0xe0)
db215a72 624 gpu.ex_regs[cmd & 7] = LE32TOH(list[0]);
97e07db9 625 break;
b243416b 626 }
36da9c13 627 if (0x80 <= cmd && cmd <= 0xdf)
b243416b 628 break; // image i/o
97e07db9 629
b243416b 630 pos += len;
631 }
632
633 renderer_sync_ecmds(gpu.ex_regs);
634 *last_cmd = cmd;
635 return pos;
636}
637
d02ab9fc 638static noinline int do_cmd_buffer(uint32_t *data, int count,
639 int *cycles_sum, int *cycles_last)
d30279e2 640{
b243416b 641 int cmd, pos;
642 uint32_t old_e3 = gpu.ex_regs[3];
fc84f618 643 int vram_dirty = 0;
d30279e2 644
d30279e2 645 // process buffer
b243416b 646 for (pos = 0; pos < count; )
d30279e2 647 {
b243416b 648 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
649 vram_dirty = 1;
d30279e2 650 pos += do_vram_io(data + pos, count - pos, 0);
ddd56f6e 651 if (pos == count)
652 break;
d30279e2
GI
653 }
654
db215a72 655 cmd = LE32TOH(data[pos]) >> 24;
97e07db9 656 if (0xa0 <= cmd && cmd <= 0xdf) {
79573c20
DS
657 if (unlikely((pos+2) >= count)) {
658 // incomplete vram write/read cmd, can't consume yet
659 cmd = -1;
660 break;
661 }
662
d30279e2 663 // consume vram write/read cmd
db215a72 664 start_vram_transfer(LE32TOH(data[pos + 1]), LE32TOH(data[pos + 2]), (cmd & 0xe0) == 0xc0);
b243416b 665 pos += 3;
666 continue;
d30279e2 667 }
36da9c13 668 else if ((cmd & 0xe0) == 0x80) {
669 if (unlikely((pos+3) >= count)) {
670 cmd = -1; // incomplete cmd, can't consume yet
671 break;
672 }
025b6fde 673 renderer_sync();
d02ab9fc 674 *cycles_sum += *cycles_last;
675 *cycles_last = 0;
676 do_vram_copy(data + pos + 1, cycles_last);
b30fba56 677 vram_dirty = 1;
36da9c13 678 pos += 4;
679 continue;
680 }
c296224f 681 else if (cmd == 0x1f) {
682 log_anomaly("irq1?\n");
683 pos++;
684 continue;
685 }
b243416b 686
1e07f71d 687 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
db215a72 688 if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0))
b243416b 689 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
690 else {
d02ab9fc 691 pos += do_cmd_list(data + pos, count - pos, cycles_sum, cycles_last, &cmd);
b243416b 692 vram_dirty = 1;
693 }
694
695 if (cmd == -1)
696 // incomplete cmd
ddd56f6e 697 break;
d30279e2 698 }
ddd56f6e 699
61124a6d
PC
700 gpu.status &= ~0x1fff;
701 gpu.status |= gpu.ex_regs[1] & 0x7ff;
702 gpu.status |= (gpu.ex_regs[6] & 3) << 11;
a3a9f519 703
fc84f618 704 gpu.state.fb_dirty |= vram_dirty;
705
b243416b 706 if (old_e3 != gpu.ex_regs[3])
707 decide_frameskip_allow(gpu.ex_regs[3]);
708
ddd56f6e 709 return count - pos;
d30279e2
GI
710}
711
1328fa32 712static noinline void flush_cmd_buffer(void)
d30279e2 713{
f99193c2 714 int dummy = 0, left;
d02ab9fc 715 left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len, &dummy, &dummy);
d30279e2
GI
716 if (left > 0)
717 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
1328fa32 718 if (left != gpu.cmd_len) {
719 if (!gpu.dma.h && gpu.gpu_state_change)
720 gpu.gpu_state_change(PGS_PRIMITIVE_START);
721 gpu.cmd_len = left;
722 }
1ab64c54
GI
723}
724
725void GPUwriteDataMem(uint32_t *mem, int count)
726{
f99193c2 727 int dummy = 0, left;
d30279e2 728
56f08d83 729 log_io("gpu_dma_write %p %d\n", mem, count);
730
d30279e2
GI
731 if (unlikely(gpu.cmd_len > 0))
732 flush_cmd_buffer();
56f08d83 733
d02ab9fc 734 left = do_cmd_buffer(mem, count, &dummy, &dummy);
d30279e2 735 if (left)
56f08d83 736 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
1ab64c54
GI
737}
738
d30279e2 739void GPUwriteData(uint32_t data)
1ab64c54 740{
56f08d83 741 log_io("gpu_write %08x\n", data);
db215a72 742 gpu.cmd_buffer[gpu.cmd_len++] = HTOLE32(data);
d30279e2
GI
743 if (gpu.cmd_len >= CMD_BUFFER_LEN)
744 flush_cmd_buffer();
1ab64c54
GI
745}
746
d02ab9fc 747long GPUdmaChain(uint32_t *rambase, uint32_t start_addr,
748 uint32_t *progress_addr, int32_t *cycles_last_cmd)
1ab64c54 749{
2048ae31 750 uint32_t addr, *list, ld_addr;
751 int len, left, count, ld_count = 32;
d02ab9fc 752 int cpu_cycles_sum = 0;
753 int cpu_cycles_last = 0;
d30279e2 754
8f5f2dd5 755 preload(rambase + (start_addr & 0x1fffff) / 4);
756
d30279e2
GI
757 if (unlikely(gpu.cmd_len > 0))
758 flush_cmd_buffer();
759
56f08d83 760 log_io("gpu_dma_chain\n");
2048ae31 761 addr = ld_addr = start_addr & 0xffffff;
09159d99 762 for (count = 0; (addr & 0x800000) == 0; count++)
ddd56f6e 763 {
ddd56f6e 764 list = rambase + (addr & 0x1fffff) / 4;
db215a72
PC
765 len = LE32TOH(list[0]) >> 24;
766 addr = LE32TOH(list[0]) & 0xffffff;
8f5f2dd5 767 preload(rambase + (addr & 0x1fffff) / 4);
768
d02ab9fc 769 cpu_cycles_sum += 10;
1c72b1c2 770 if (len > 0)
d02ab9fc 771 cpu_cycles_sum += 5 + len;
deb18d24 772
d02ab9fc 773 log_io(".chain %08lx #%d+%d %u+%u\n",
774 (long)(list - rambase) * 4, len, gpu.cmd_len, cpu_cycles_sum, cpu_cycles_last);
a4e249a1 775 if (unlikely(gpu.cmd_len > 0)) {
81ff42e1 776 if (gpu.cmd_len + len > ARRAY_SIZE(gpu.cmd_buffer)) {
777 log_anomaly("cmd_buffer overflow, likely garbage commands\n");
778 gpu.cmd_len = 0;
779 }
a4e249a1 780 memcpy(gpu.cmd_buffer + gpu.cmd_len, list + 1, len * 4);
781 gpu.cmd_len += len;
782 flush_cmd_buffer();
783 continue;
784 }
ddd56f6e 785
56f08d83 786 if (len) {
d02ab9fc 787 left = do_cmd_buffer(list + 1, len, &cpu_cycles_sum, &cpu_cycles_last);
a4e249a1 788 if (left) {
789 memcpy(gpu.cmd_buffer, list + 1 + len - left, left * 4);
790 gpu.cmd_len = left;
791 log_anomaly("GPUdmaChain: %d/%d words left\n", left, len);
792 }
56f08d83 793 }
ddd56f6e 794
fae38d7a 795 if (progress_addr) {
796 *progress_addr = addr;
797 break;
798 }
2048ae31 799 if (addr == ld_addr) {
800 log_anomaly("GPUdmaChain: loop @ %08x, cnt=%u\n", addr, count);
801 break;
09159d99 802 }
2048ae31 803 if (count == ld_count) {
804 ld_addr = addr;
805 ld_count *= 2;
09159d99 806 }
d30279e2 807 }
09159d99 808
d02ab9fc 809 //printf(" -> %d %d\n", cpu_cycles_sum, cpu_cycles_last);
3ece2f0c 810 gpu.state.last_list.frame = *gpu.state.frame_count;
deb18d24 811 gpu.state.last_list.hcnt = *gpu.state.hcnt;
d02ab9fc 812 gpu.state.last_list.cycles = cpu_cycles_sum + cpu_cycles_last;
deb18d24 813 gpu.state.last_list.addr = start_addr;
814
d02ab9fc 815 *cycles_last_cmd = cpu_cycles_last;
816 return cpu_cycles_sum;
1ab64c54
GI
817}
818
d30279e2
GI
819void GPUreadDataMem(uint32_t *mem, int count)
820{
56f08d83 821 log_io("gpu_dma_read %p %d\n", mem, count);
822
d30279e2
GI
823 if (unlikely(gpu.cmd_len > 0))
824 flush_cmd_buffer();
56f08d83 825
d30279e2
GI
826 if (gpu.dma.h)
827 do_vram_io(mem, count, 1);
828}
829
830uint32_t GPUreadData(void)
831{
9e146206 832 uint32_t ret;
56f08d83 833
834 if (unlikely(gpu.cmd_len > 0))
835 flush_cmd_buffer();
836
9e146206 837 ret = gpu.gp0;
495d603c
PC
838 if (gpu.dma.h) {
839 ret = HTOLE32(ret);
9e146206 840 do_vram_io(&ret, 1, 1);
495d603c
PC
841 ret = LE32TOH(ret);
842 }
56f08d83 843
9e146206 844 log_io("gpu_read %08x\n", ret);
845 return ret;
d30279e2
GI
846}
847
848uint32_t GPUreadStatus(void)
849{
ddd56f6e 850 uint32_t ret;
56f08d83 851
d30279e2
GI
852 if (unlikely(gpu.cmd_len > 0))
853 flush_cmd_buffer();
854
61124a6d 855 ret = gpu.status;
ddd56f6e 856 log_io("gpu_read_status %08x\n", ret);
857 return ret;
d30279e2
GI
858}
859
096ec49b 860struct GPUFreeze
1ab64c54
GI
861{
862 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
863 uint32_t ulStatus; // current gpu status
864 uint32_t ulControl[256]; // latest control register values
865 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
096ec49b 866};
1ab64c54 867
096ec49b 868long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
1ab64c54 869{
fc84f618 870 int i;
871
1ab64c54
GI
872 switch (type) {
873 case 1: // save
d30279e2
GI
874 if (gpu.cmd_len > 0)
875 flush_cmd_buffer();
c765eb86
JW
876
877 renderer_sync();
9ee0fd5b 878 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
1ab64c54 879 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
6e9bdaef 880 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
61124a6d 881 freeze->ulStatus = gpu.status;
1ab64c54
GI
882 break;
883 case 0: // load
c765eb86 884 renderer_sync();
9ee0fd5b 885 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
1ab64c54 886 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
6e9bdaef 887 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
61124a6d 888 gpu.status = freeze->ulStatus;
3d47ef17 889 gpu.cmd_len = 0;
fc84f618 890 for (i = 8; i > 0; i--) {
891 gpu.regs[i] ^= 1; // avoid reg change detection
892 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
893 }
5b745e5b 894 renderer_sync_ecmds(gpu.ex_regs);
9a864a8f 895 renderer_update_caches(0, 0, 1024, 512, 0);
1ab64c54
GI
896 break;
897 }
898
899 return 1;
900}
901
5440b88e 902void GPUupdateLace(void)
903{
904 if (gpu.cmd_len > 0)
905 flush_cmd_buffer();
906 renderer_flush_queues();
907
7a20a6d0 908#ifndef RAW_FB_DISPLAY
61124a6d 909 if (gpu.status & PSX_GPU_STATUS_BLANKING) {
aafcb4dd 910 if (!gpu.state.blanked) {
911 vout_blank();
912 gpu.state.blanked = 1;
913 gpu.state.fb_dirty = 1;
914 }
915 return;
916 }
917
c765eb86
JW
918 renderer_notify_update_lace(0);
919
aafcb4dd 920 if (!gpu.state.fb_dirty)
5440b88e 921 return;
7a20a6d0 922#endif
5440b88e 923
924 if (gpu.frameskip.set) {
925 if (!gpu.frameskip.frame_ready) {
926 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
927 return;
928 gpu.frameskip.active = 0;
929 }
930 gpu.frameskip.frame_ready = 0;
931 }
932
933 vout_update();
3b7b0065 934 if (gpu.state.enhancement_active && !gpu.state.enhancement_was_active)
935 renderer_update_caches(0, 0, 1024, 512, 1);
936 gpu.state.enhancement_was_active = gpu.state.enhancement_active;
5440b88e 937 gpu.state.fb_dirty = 0;
aafcb4dd 938 gpu.state.blanked = 0;
c765eb86 939 renderer_notify_update_lace(1);
5440b88e 940}
941
72e5023f 942void GPUvBlank(int is_vblank, int lcf)
943{
5440b88e 944 int interlace = gpu.state.allow_interlace
61124a6d
PC
945 && (gpu.status & PSX_GPU_STATUS_INTERLACE)
946 && (gpu.status & PSX_GPU_STATUS_DHEIGHT);
5440b88e 947 // interlace doesn't look nice on progressive displays,
948 // so we have this "auto" mode here for games that don't read vram
949 if (gpu.state.allow_interlace == 2
950 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
951 {
952 interlace = 0;
953 }
954 if (interlace || interlace != gpu.state.old_interlace) {
955 gpu.state.old_interlace = interlace;
956
957 if (gpu.cmd_len > 0)
958 flush_cmd_buffer();
959 renderer_flush_queues();
960 renderer_set_interlace(interlace, !lcf);
961 }
962}
963
80bc1426 964void GPUgetScreenInfo(int *y, int *base_hres)
965{
966 *y = gpu.screen.y;
967 *base_hres = gpu.screen.vres;
968 if (gpu.status & PSX_GPU_STATUS_DHEIGHT)
969 *base_hres >>= 1;
970}
971
5440b88e 972void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
973{
974 gpu.frameskip.set = cbs->frameskip;
975 gpu.frameskip.advice = &cbs->fskip_advice;
5eaa13f1 976 gpu.frameskip.force = &cbs->fskip_force;
5bbe183f 977 gpu.frameskip.dirty = (void *)&cbs->fskip_dirty;
5440b88e 978 gpu.frameskip.active = 0;
979 gpu.frameskip.frame_ready = 1;
19a79138
PC
980 gpu.state.hcnt = (uint32_t *)cbs->gpu_hcnt;
981 gpu.state.frame_count = (uint32_t *)cbs->gpu_frame_count;
5440b88e 982 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
0b02eb77 983 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
b3ff74ba 984 gpu.state.screen_centering_type_default = cbs->screen_centering_type_default;
5bbe183f 985 if (gpu.state.screen_centering_type != cbs->screen_centering_type
986 || gpu.state.screen_centering_x != cbs->screen_centering_x
b27f55be 987 || gpu.state.screen_centering_y != cbs->screen_centering_y
988 || gpu.state.show_overscan != cbs->show_overscan) {
5bbe183f 989 gpu.state.screen_centering_type = cbs->screen_centering_type;
990 gpu.state.screen_centering_x = cbs->screen_centering_x;
991 gpu.state.screen_centering_y = cbs->screen_centering_y;
b27f55be 992 gpu.state.show_overscan = cbs->show_overscan;
5bbe183f 993 update_width();
994 update_height();
995 }
5440b88e 996
9ee0fd5b 997 gpu.mmap = cbs->mmap;
998 gpu.munmap = cbs->munmap;
1328fa32 999 gpu.gpu_state_change = cbs->gpu_state_change;
9ee0fd5b 1000
1001 // delayed vram mmap
1002 if (gpu.vram == NULL)
1003 map_vram();
1004
5440b88e 1005 if (cbs->pl_vout_set_raw_vram)
1006 cbs->pl_vout_set_raw_vram(gpu.vram);
1007 renderer_set_config(cbs);
1008 vout_set_config(cbs);
72e5023f 1009}
1010
1ab64c54 1011// vim:shiftwidth=2:expandtab