add a thp-based huge page alloc fallback
[pcsx_rearmed.git] / plugins / gpulib / gpu.c
CommitLineData
1ab64c54 1/*
05740673 2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
1ab64c54
GI
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
d30279e2 11#include <stdio.h>
5bbe183f 12#include <stdlib.h>
1ab64c54 13#include <string.h>
12367ad0 14#include <stdlib.h> /* for calloc */
15
56f08d83 16#include "gpu.h"
1ab64c54
GI
17
18#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
8f5f2dd5 19#ifdef __GNUC__
d30279e2 20#define unlikely(x) __builtin_expect((x), 0)
8f5f2dd5 21#define preload __builtin_prefetch
8dd855cd 22#define noinline __attribute__((noinline))
8f5f2dd5 23#else
24#define unlikely(x)
25#define preload(...)
26#define noinline
8f5f2dd5 27#endif
1ab64c54 28
deb18d24 29//#define log_io gpu_log
56f08d83 30#define log_io(...)
56f08d83 31
9ee0fd5b 32struct psx_gpu gpu;
1ab64c54 33
48f3d210 34static noinline int do_cmd_buffer(uint32_t *data, int count);
05740673 35static void finish_vram_transfer(int is_read);
48f3d210 36
37static noinline void do_cmd_reset(void)
38{
c765eb86
JW
39 renderer_sync();
40
48f3d210 41 if (unlikely(gpu.cmd_len > 0))
42 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
48f3d210 43 gpu.cmd_len = 0;
05740673 44
45 if (unlikely(gpu.dma.h > 0))
46 finish_vram_transfer(gpu.dma_start.is_read);
48f3d210 47 gpu.dma.h = 0;
48}
49
6e9bdaef 50static noinline void do_reset(void)
1ab64c54 51{
7841712d 52 unsigned int i;
5b568098 53
48f3d210 54 do_cmd_reset();
55
6e9bdaef 56 memset(gpu.regs, 0, sizeof(gpu.regs));
48f3d210 57 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
58 gpu.ex_regs[i] = (0xe0 + i) << 24;
61124a6d 59 gpu.status = 0x14802000;
6e9bdaef 60 gpu.gp0 = 0;
fc84f618 61 gpu.regs[3] = 1;
6e9bdaef 62 gpu.screen.hres = gpu.screen.w = 256;
fc84f618 63 gpu.screen.vres = gpu.screen.h = 240;
5bbe183f 64 gpu.screen.x = gpu.screen.y = 0;
01ff3105 65 renderer_sync_ecmds(gpu.ex_regs);
3b7b0065 66 renderer_notify_res_change();
1ab64c54
GI
67}
68
8dd855cd 69static noinline void update_width(void)
70{
5bbe183f 71 static const short hres_all[8] = { 256, 368, 320, 368, 512, 368, 640, 368 };
72 static const uint8_t hdivs[8] = { 10, 7, 8, 7, 5, 7, 4, 7 };
73 uint8_t hdiv = hdivs[(gpu.status >> 16) & 7];
74 int hres = hres_all[(gpu.status >> 16) & 7];
75 int pal = gpu.status & PSX_GPU_STATUS_PAL;
8dd855cd 76 int sw = gpu.screen.x2 - gpu.screen.x1;
5bbe183f 77 int x = 0, x_auto;
78 if (sw <= 0)
79 /* nothing displayed? */;
80 else {
81 int s = pal ? 656 : 608; // or 600? pal is just a guess
82 x = (gpu.screen.x1 - s) / hdiv;
83 x = (x + 1) & ~1; // blitter limitation
84 sw /= hdiv;
85 sw = (sw + 2) & ~3; // according to nocash
86 switch (gpu.state.screen_centering_type) {
87 case 1:
88 break;
89 case 2:
90 x = gpu.state.screen_centering_x;
91 break;
92 default:
93 // correct if slightly miscentered
94 x_auto = (hres - sw) / 2 & ~3;
95 if ((uint32_t)x_auto <= 8u && abs(x) < 24)
96 x = x_auto;
97 }
98 if (x + sw > hres)
99 sw = hres - x;
100 // .x range check is done in vout_update()
101 }
102 // reduce the unpleasant right border that a few games have
103 if (gpu.state.screen_centering_type == 0
104 && x <= 4 && hres - (x + sw) >= 4)
105 hres -= 4;
106 gpu.screen.x = x;
107 gpu.screen.w = sw;
108 gpu.screen.hres = hres;
109 gpu.state.dims_changed = 1;
110 //printf("xx %d %d -> %2d, %d / %d\n",
111 // gpu.screen.x1, gpu.screen.x2, x, sw, hres);
8dd855cd 112}
113
114static noinline void update_height(void)
115{
5bbe183f 116 int pal = gpu.status & PSX_GPU_STATUS_PAL;
117 int dheight = gpu.status & PSX_GPU_STATUS_DHEIGHT;
118 int y = gpu.screen.y1 - (pal ? 39 : 16); // 39 for spyro
8dd855cd 119 int sh = gpu.screen.y2 - gpu.screen.y1;
5bbe183f 120 int center_tol = 16;
121 int vres = 240;
122
123 if (pal && (sh > 240 || gpu.screen.vres == 256))
124 vres = 256;
125 if (dheight)
126 y *= 2, sh *= 2, vres *= 2, center_tol *= 2;
127 if (sh <= 0)
128 /* nothing displayed? */;
129 else {
130 switch (gpu.state.screen_centering_type) {
131 case 1:
132 break;
133 case 2:
134 y = gpu.state.screen_centering_y;
135 break;
136 default:
137 // correct if slightly miscentered
138 if ((uint32_t)(vres - sh) <= 1 && abs(y) <= center_tol)
139 y = 0;
140 }
141 if (y + sh > vres)
142 sh = vres - y;
143 }
144 gpu.screen.y = y;
8dd855cd 145 gpu.screen.h = sh;
5bbe183f 146 gpu.screen.vres = vres;
147 gpu.state.dims_changed = 1;
148 //printf("yy %d %d -> %d, %d / %d\n",
149 // gpu.screen.y1, gpu.screen.y2, y, sh, vres);
8dd855cd 150}
151
fc84f618 152static noinline void decide_frameskip(void)
153{
5eaa13f1
A
154 *gpu.frameskip.dirty = 1;
155
9fe27e25 156 if (gpu.frameskip.active)
157 gpu.frameskip.cnt++;
158 else {
159 gpu.frameskip.cnt = 0;
160 gpu.frameskip.frame_ready = 1;
161 }
fc84f618 162
5eaa13f1
A
163 if (*gpu.frameskip.force)
164 gpu.frameskip.active = 1;
165 else if (!gpu.frameskip.active && *gpu.frameskip.advice)
9fe27e25 166 gpu.frameskip.active = 1;
167 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
fc84f618 168 gpu.frameskip.active = 1;
169 else
170 gpu.frameskip.active = 0;
fbb4bfff 171
172 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
173 int dummy;
174 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy);
175 gpu.frameskip.pending_fill[0] = 0;
176 }
fc84f618 177}
178
b243416b 179static noinline int decide_frameskip_allow(uint32_t cmd_e3)
9fe27e25 180{
181 // no frameskip if it decides to draw to display area,
182 // but not for interlace since it'll most likely always do that
183 uint32_t x = cmd_e3 & 0x3ff;
184 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
61124a6d 185 gpu.frameskip.allow = (gpu.status & PSX_GPU_STATUS_INTERLACE) ||
5bbe183f 186 (uint32_t)(x - gpu.screen.src_x) >= (uint32_t)gpu.screen.w ||
187 (uint32_t)(y - gpu.screen.src_y) >= (uint32_t)gpu.screen.h;
b243416b 188 return gpu.frameskip.allow;
9fe27e25 189}
190
01ff3105 191static void flush_cmd_buffer(void);
192
6e9bdaef 193static noinline void get_gpu_info(uint32_t data)
194{
01ff3105 195 if (unlikely(gpu.cmd_len > 0))
196 flush_cmd_buffer();
6e9bdaef 197 switch (data & 0x0f) {
198 case 0x02:
199 case 0x03:
200 case 0x04:
6e9bdaef 201 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
202 break;
08b33377 203 case 0x05:
204 gpu.gp0 = gpu.ex_regs[5] & 0x3fffff;
6e9bdaef 205 break;
206 case 0x07:
207 gpu.gp0 = 2;
208 break;
209 default:
08b33377 210 // gpu.gp0 unchanged
6e9bdaef 211 break;
212 }
213}
214
9ee0fd5b 215// double, for overdraw guard
12367ad0 216#define VRAM_SIZE ((1024 * 512 * 2 * 2) + 4096)
217
218// Minimum 16-byte VRAM alignment needed by gpu_unai's pixel-skipping
219// renderer/downscaler it uses in high res modes:
220#ifdef GCW_ZERO
221 // On GCW platform (MIPS), align to 8192 bytes (1 TLB entry) to reduce # of
222 // fills. (Will change this value if it ever gets large page support)
223 #define VRAM_ALIGN 8192
224#else
225 #define VRAM_ALIGN 16
226#endif
227
228// vram ptr received from mmap/malloc/alloc (will deallocate using this)
229static uint16_t *vram_ptr_orig = NULL;
9ee0fd5b 230
12367ad0 231#ifdef GPULIB_USE_MMAP
9ee0fd5b 232static int map_vram(void)
233{
12367ad0 234 gpu.vram = vram_ptr_orig = gpu.mmap(VRAM_SIZE + (VRAM_ALIGN-1));
9ee0fd5b 235 if (gpu.vram != NULL) {
12367ad0 236 // 4kb guard in front
237 gpu.vram += (4096 / 2);
238 // Align
239 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
9ee0fd5b 240 return 0;
241 }
242 else {
243 fprintf(stderr, "could not map vram, expect crashes\n");
244 return -1;
245 }
246}
12367ad0 247#else
248static int map_vram(void)
249{
250 gpu.vram = vram_ptr_orig = (uint16_t*)calloc(VRAM_SIZE + (VRAM_ALIGN-1), 1);
251 if (gpu.vram != NULL) {
252 // 4kb guard in front
253 gpu.vram += (4096 / 2);
254 // Align
255 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
256 return 0;
257 } else {
258 fprintf(stderr, "could not allocate vram, expect crashes\n");
259 return -1;
260 }
261}
262
263static int allocate_vram(void)
264{
265 gpu.vram = vram_ptr_orig = (uint16_t*)calloc(VRAM_SIZE + (VRAM_ALIGN-1), 1);
266 if (gpu.vram != NULL) {
267 // 4kb guard in front
268 gpu.vram += (4096 / 2);
269 // Align
270 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
271 return 0;
272 } else {
273 fprintf(stderr, "could not allocate vram, expect crashes\n");
274 return -1;
275 }
276}
277#endif
9ee0fd5b 278
6e9bdaef 279long GPUinit(void)
280{
12367ad0 281#ifndef GPULIB_USE_MMAP
282 if (gpu.vram == NULL) {
283 if (allocate_vram() != 0) {
284 printf("ERROR: could not allocate VRAM, exiting..\n");
285 exit(1);
286 }
287 }
288#endif
289
290 //extern uint32_t hSyncCount; // in psxcounters.cpp
291 //extern uint32_t frame_counter; // in psxcounters.cpp
292 //gpu.state.hcnt = &hSyncCount;
293 //gpu.state.frame_count = &frame_counter;
294
9394ada5 295 int ret;
296 ret = vout_init();
297 ret |= renderer_init();
298
3b7b0065 299 memset(&gpu.state, 0, sizeof(gpu.state));
300 memset(&gpu.frameskip, 0, sizeof(gpu.frameskip));
301 gpu.zero = 0;
3ece2f0c 302 gpu.state.frame_count = &gpu.zero;
deb18d24 303 gpu.state.hcnt = &gpu.zero;
48f3d210 304 gpu.cmd_len = 0;
9394ada5 305 do_reset();
48f3d210 306
12367ad0 307 /*if (gpu.mmap != NULL) {
9ee0fd5b 308 if (map_vram() != 0)
309 ret = -1;
12367ad0 310 }*/
6e9bdaef 311 return ret;
312}
313
314long GPUshutdown(void)
315{
9ee0fd5b 316 long ret;
317
e929dec5 318 renderer_finish();
9ee0fd5b 319 ret = vout_finish();
12367ad0 320
321 if (vram_ptr_orig != NULL) {
322#ifdef GPULIB_USE_MMAP
323 gpu.munmap(vram_ptr_orig, VRAM_SIZE);
324#else
325 free(vram_ptr_orig);
326#endif
9ee0fd5b 327 }
12367ad0 328 vram_ptr_orig = gpu.vram = NULL;
9ee0fd5b 329
330 return ret;
6e9bdaef 331}
332
1ab64c54
GI
333void GPUwriteStatus(uint32_t data)
334{
1ab64c54
GI
335 uint32_t cmd = data >> 24;
336
fc84f618 337 if (cmd < ARRAY_SIZE(gpu.regs)) {
48f3d210 338 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
fc84f618 339 return;
8dd855cd 340 gpu.regs[cmd] = data;
fc84f618 341 }
342
343 gpu.state.fb_dirty = 1;
8dd855cd 344
345 switch (cmd) {
1ab64c54 346 case 0x00:
6e9bdaef 347 do_reset();
1ab64c54 348 break;
48f3d210 349 case 0x01:
350 do_cmd_reset();
351 break;
1ab64c54 352 case 0x03:
5bbe183f 353 if (data & 1) {
61124a6d 354 gpu.status |= PSX_GPU_STATUS_BLANKING;
5bbe183f 355 gpu.state.dims_changed = 1; // for hud clearing
356 }
61124a6d
PC
357 else
358 gpu.status &= ~PSX_GPU_STATUS_BLANKING;
1ab64c54
GI
359 break;
360 case 0x04:
61124a6d
PC
361 gpu.status &= ~PSX_GPU_STATUS_DMA_MASK;
362 gpu.status |= PSX_GPU_STATUS_DMA(data & 3);
1ab64c54
GI
363 break;
364 case 0x05:
5bbe183f 365 gpu.screen.src_x = data & 0x3ff;
366 gpu.screen.src_y = (data >> 10) & 0x1ff;
3b7b0065 367 renderer_notify_scanout_x_change(gpu.screen.src_x, gpu.screen.hres);
9fe27e25 368 if (gpu.frameskip.set) {
369 decide_frameskip_allow(gpu.ex_regs[3]);
370 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
371 decide_frameskip();
372 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
373 }
fb4c6fba 374 }
1ab64c54 375 break;
8dd855cd 376 case 0x06:
377 gpu.screen.x1 = data & 0xfff;
378 gpu.screen.x2 = (data >> 12) & 0xfff;
379 update_width();
380 break;
1ab64c54
GI
381 case 0x07:
382 gpu.screen.y1 = data & 0x3ff;
383 gpu.screen.y2 = (data >> 10) & 0x3ff;
8dd855cd 384 update_height();
1ab64c54
GI
385 break;
386 case 0x08:
61124a6d 387 gpu.status = (gpu.status & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
8dd855cd 388 update_width();
389 update_height();
e929dec5 390 renderer_notify_res_change();
1ab64c54 391 break;
deb18d24 392 default:
393 if ((cmd & 0xf0) == 0x10)
394 get_gpu_info(data);
6e9bdaef 395 break;
1ab64c54 396 }
7890a708 397
398#ifdef GPUwriteStatus_ext
399 GPUwriteStatus_ext(data);
400#endif
1ab64c54
GI
401}
402
56f08d83 403const unsigned char cmd_lengths[256] =
1ab64c54 404{
d30279e2
GI
405 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
406 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
407 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
408 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
652c6b8b 409 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
410 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
411 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
d30279e2 412 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
36da9c13 413 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 80
414 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
415 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // a0
416 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
417 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // c0
418 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
d30279e2
GI
419 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
420 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
421};
422
d30279e2
GI
423#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
424
36da9c13 425static void cpy_msb(uint16_t *dst, const uint16_t *src, int l, uint16_t msb)
426{
427 int i;
428 for (i = 0; i < l; i++)
429 dst[i] = src[i] | msb;
430}
431
432static inline void do_vram_line(int x, int y, uint16_t *mem, int l,
433 int is_read, uint16_t msb)
1ab64c54 434{
d30279e2 435 uint16_t *vram = VRAM_MEM_XY(x, y);
36da9c13 436 if (unlikely(is_read))
d30279e2 437 memcpy(mem, vram, l * 2);
36da9c13 438 else if (unlikely(msb))
439 cpy_msb(vram, mem, l, msb);
d30279e2
GI
440 else
441 memcpy(vram, mem, l * 2);
442}
443
444static int do_vram_io(uint32_t *data, int count, int is_read)
445{
446 int count_initial = count;
36da9c13 447 uint16_t msb = gpu.ex_regs[6] << 15;
d30279e2
GI
448 uint16_t *sdata = (uint16_t *)data;
449 int x = gpu.dma.x, y = gpu.dma.y;
450 int w = gpu.dma.w, h = gpu.dma.h;
ddd56f6e 451 int o = gpu.dma.offset;
d30279e2
GI
452 int l;
453 count *= 2; // operate in 16bpp pixels
454
c765eb86
JW
455 renderer_sync();
456
d30279e2
GI
457 if (gpu.dma.offset) {
458 l = w - gpu.dma.offset;
ddd56f6e 459 if (count < l)
d30279e2 460 l = count;
ddd56f6e 461
36da9c13 462 do_vram_line(x + o, y, sdata, l, is_read, msb);
ddd56f6e 463
464 if (o + l < w)
465 o += l;
466 else {
467 o = 0;
468 y++;
469 h--;
470 }
d30279e2
GI
471 sdata += l;
472 count -= l;
d30279e2
GI
473 }
474
475 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
476 y &= 511;
36da9c13 477 do_vram_line(x, y, sdata, w, is_read, msb);
d30279e2
GI
478 }
479
05740673 480 if (h > 0) {
481 if (count > 0) {
482 y &= 511;
36da9c13 483 do_vram_line(x, y, sdata, count, is_read, msb);
05740673 484 o = count;
485 count = 0;
486 }
d30279e2 487 }
05740673 488 else
489 finish_vram_transfer(is_read);
d30279e2
GI
490 gpu.dma.y = y;
491 gpu.dma.h = h;
ddd56f6e 492 gpu.dma.offset = o;
d30279e2 493
6e9bdaef 494 return count_initial - count / 2;
d30279e2
GI
495}
496
497static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
498{
ddd56f6e 499 if (gpu.dma.h)
500 log_anomaly("start_vram_transfer while old unfinished\n");
501
5440b88e 502 gpu.dma.x = pos_word & 0x3ff;
503 gpu.dma.y = (pos_word >> 16) & 0x1ff;
48f3d210 504 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
505 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
d30279e2 506 gpu.dma.offset = 0;
05740673 507 gpu.dma.is_read = is_read;
508 gpu.dma_start = gpu.dma;
d30279e2 509
9e146206 510 renderer_flush_queues();
511 if (is_read) {
61124a6d 512 gpu.status |= PSX_GPU_STATUS_IMG;
9e146206 513 // XXX: wrong for width 1
495d603c 514 gpu.gp0 = LE32TOH(*(uint32_t *) VRAM_MEM_XY(gpu.dma.x, gpu.dma.y));
5440b88e 515 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
9e146206 516 }
d30279e2 517
6e9bdaef 518 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
519 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
d30279e2
GI
520}
521
05740673 522static void finish_vram_transfer(int is_read)
523{
524 if (is_read)
61124a6d 525 gpu.status &= ~PSX_GPU_STATUS_IMG;
05740673 526 else
527 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
3b7b0065 528 gpu.dma_start.w, gpu.dma_start.h, 0);
05740673 529}
530
36da9c13 531static void do_vram_copy(const uint32_t *params)
532{
533 const uint32_t sx = LE32TOH(params[0]) & 0x3FF;
534 const uint32_t sy = (LE32TOH(params[0]) >> 16) & 0x1FF;
535 const uint32_t dx = LE32TOH(params[1]) & 0x3FF;
536 const uint32_t dy = (LE32TOH(params[1]) >> 16) & 0x1FF;
537 uint32_t w = ((LE32TOH(params[2]) - 1) & 0x3FF) + 1;
538 uint32_t h = (((LE32TOH(params[2]) >> 16) - 1) & 0x1FF) + 1;
539 uint16_t msb = gpu.ex_regs[6] << 15;
540 uint16_t lbuf[128];
541 uint32_t x, y;
542
543 if (sx == dx && sy == dy && msb == 0)
544 return;
545
546 renderer_flush_queues();
547
548 if (unlikely((sx < dx && dx < sx + w) || sx + w > 1024 || dx + w > 1024 || msb))
549 {
550 for (y = 0; y < h; y++)
551 {
552 const uint16_t *src = VRAM_MEM_XY(0, (sy + y) & 0x1ff);
553 uint16_t *dst = VRAM_MEM_XY(0, (dy + y) & 0x1ff);
554 for (x = 0; x < w; x += ARRAY_SIZE(lbuf))
555 {
556 uint32_t x1, w1 = w - x;
557 if (w1 > ARRAY_SIZE(lbuf))
558 w1 = ARRAY_SIZE(lbuf);
559 for (x1 = 0; x1 < w1; x1++)
560 lbuf[x1] = src[(sx + x + x1) & 0x3ff];
561 for (x1 = 0; x1 < w1; x1++)
562 dst[(dx + x + x1) & 0x3ff] = lbuf[x1] | msb;
563 }
564 }
565 }
566 else
567 {
568 uint32_t sy1 = sy, dy1 = dy;
569 for (y = 0; y < h; y++, sy1++, dy1++)
570 memcpy(VRAM_MEM_XY(dx, dy1 & 0x1ff), VRAM_MEM_XY(sx, sy1 & 0x1ff), w * 2);
571 }
572
573 renderer_update_caches(dx, dy, w, h, 0);
574}
575
b243416b 576static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
577{
97e07db9 578 int cmd = 0, pos = 0, len, dummy, v;
b243416b 579 int skip = 1;
580
fbb4bfff 581 gpu.frameskip.pending_fill[0] = 0;
582
b243416b 583 while (pos < count && skip) {
584 uint32_t *list = data + pos;
db215a72 585 cmd = LE32TOH(list[0]) >> 24;
b243416b 586 len = 1 + cmd_lengths[cmd];
587
97e07db9 588 switch (cmd) {
589 case 0x02:
db215a72 590 if ((LE32TOH(list[2]) & 0x3ff) > gpu.screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu.screen.h)
97e07db9 591 // clearing something large, don't skip
592 do_cmd_list(list, 3, &dummy);
593 else
594 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
595 break;
596 case 0x24 ... 0x27:
597 case 0x2c ... 0x2f:
598 case 0x34 ... 0x37:
599 case 0x3c ... 0x3f:
600 gpu.ex_regs[1] &= ~0x1ff;
db215a72 601 gpu.ex_regs[1] |= LE32TOH(list[4 + ((cmd >> 4) & 1)]) & 0x1ff;
97e07db9 602 break;
603 case 0x48 ... 0x4F:
604 for (v = 3; pos + v < count; v++)
605 {
db215a72 606 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
97e07db9 607 break;
608 }
609 len += v - 3;
610 break;
611 case 0x58 ... 0x5F:
612 for (v = 4; pos + v < count; v += 2)
613 {
db215a72 614 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
97e07db9 615 break;
616 }
617 len += v - 4;
618 break;
619 default:
620 if (cmd == 0xe3)
db215a72 621 skip = decide_frameskip_allow(LE32TOH(list[0]));
97e07db9 622 if ((cmd & 0xf8) == 0xe0)
db215a72 623 gpu.ex_regs[cmd & 7] = LE32TOH(list[0]);
97e07db9 624 break;
b243416b 625 }
b243416b 626
627 if (pos + len > count) {
628 cmd = -1;
629 break; // incomplete cmd
630 }
36da9c13 631 if (0x80 <= cmd && cmd <= 0xdf)
b243416b 632 break; // image i/o
97e07db9 633
b243416b 634 pos += len;
635 }
636
637 renderer_sync_ecmds(gpu.ex_regs);
638 *last_cmd = cmd;
639 return pos;
640}
641
48f3d210 642static noinline int do_cmd_buffer(uint32_t *data, int count)
d30279e2 643{
b243416b 644 int cmd, pos;
645 uint32_t old_e3 = gpu.ex_regs[3];
fc84f618 646 int vram_dirty = 0;
d30279e2 647
d30279e2 648 // process buffer
b243416b 649 for (pos = 0; pos < count; )
d30279e2 650 {
b243416b 651 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
652 vram_dirty = 1;
d30279e2 653 pos += do_vram_io(data + pos, count - pos, 0);
ddd56f6e 654 if (pos == count)
655 break;
d30279e2
GI
656 }
657
db215a72 658 cmd = LE32TOH(data[pos]) >> 24;
97e07db9 659 if (0xa0 <= cmd && cmd <= 0xdf) {
79573c20
DS
660 if (unlikely((pos+2) >= count)) {
661 // incomplete vram write/read cmd, can't consume yet
662 cmd = -1;
663 break;
664 }
665
d30279e2 666 // consume vram write/read cmd
db215a72 667 start_vram_transfer(LE32TOH(data[pos + 1]), LE32TOH(data[pos + 2]), (cmd & 0xe0) == 0xc0);
b243416b 668 pos += 3;
669 continue;
d30279e2 670 }
36da9c13 671 else if ((cmd & 0xe0) == 0x80) {
672 if (unlikely((pos+3) >= count)) {
673 cmd = -1; // incomplete cmd, can't consume yet
674 break;
675 }
676 do_vram_copy(data + pos + 1);
677 pos += 4;
678 continue;
679 }
b243416b 680
1e07f71d 681 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
db215a72 682 if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0))
b243416b 683 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
684 else {
685 pos += do_cmd_list(data + pos, count - pos, &cmd);
686 vram_dirty = 1;
687 }
688
689 if (cmd == -1)
690 // incomplete cmd
ddd56f6e 691 break;
d30279e2 692 }
ddd56f6e 693
61124a6d
PC
694 gpu.status &= ~0x1fff;
695 gpu.status |= gpu.ex_regs[1] & 0x7ff;
696 gpu.status |= (gpu.ex_regs[6] & 3) << 11;
a3a9f519 697
fc84f618 698 gpu.state.fb_dirty |= vram_dirty;
699
b243416b 700 if (old_e3 != gpu.ex_regs[3])
701 decide_frameskip_allow(gpu.ex_regs[3]);
702
ddd56f6e 703 return count - pos;
d30279e2
GI
704}
705
5440b88e 706static void flush_cmd_buffer(void)
d30279e2 707{
48f3d210 708 int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
d30279e2
GI
709 if (left > 0)
710 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
711 gpu.cmd_len = left;
1ab64c54
GI
712}
713
714void GPUwriteDataMem(uint32_t *mem, int count)
715{
d30279e2
GI
716 int left;
717
56f08d83 718 log_io("gpu_dma_write %p %d\n", mem, count);
719
d30279e2
GI
720 if (unlikely(gpu.cmd_len > 0))
721 flush_cmd_buffer();
56f08d83 722
48f3d210 723 left = do_cmd_buffer(mem, count);
d30279e2 724 if (left)
56f08d83 725 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
1ab64c54
GI
726}
727
d30279e2 728void GPUwriteData(uint32_t data)
1ab64c54 729{
56f08d83 730 log_io("gpu_write %08x\n", data);
db215a72 731 gpu.cmd_buffer[gpu.cmd_len++] = HTOLE32(data);
d30279e2
GI
732 if (gpu.cmd_len >= CMD_BUFFER_LEN)
733 flush_cmd_buffer();
1ab64c54
GI
734}
735
fae38d7a 736long GPUdmaChain(uint32_t *rambase, uint32_t start_addr, uint32_t *progress_addr)
1ab64c54 737{
09159d99 738 uint32_t addr, *list, ld_addr = 0;
ddd56f6e 739 int len, left, count;
1c72b1c2 740 long cpu_cycles = 0;
d30279e2 741
8f5f2dd5 742 preload(rambase + (start_addr & 0x1fffff) / 4);
743
d30279e2
GI
744 if (unlikely(gpu.cmd_len > 0))
745 flush_cmd_buffer();
746
56f08d83 747 log_io("gpu_dma_chain\n");
ddd56f6e 748 addr = start_addr & 0xffffff;
09159d99 749 for (count = 0; (addr & 0x800000) == 0; count++)
ddd56f6e 750 {
ddd56f6e 751 list = rambase + (addr & 0x1fffff) / 4;
db215a72
PC
752 len = LE32TOH(list[0]) >> 24;
753 addr = LE32TOH(list[0]) & 0xffffff;
8f5f2dd5 754 preload(rambase + (addr & 0x1fffff) / 4);
755
1c72b1c2 756 cpu_cycles += 10;
757 if (len > 0)
758 cpu_cycles += 5 + len;
deb18d24 759
a4e249a1 760 log_io(".chain %08lx #%d+%d\n",
761 (long)(list - rambase) * 4, len, gpu.cmd_len);
762 if (unlikely(gpu.cmd_len > 0)) {
81ff42e1 763 if (gpu.cmd_len + len > ARRAY_SIZE(gpu.cmd_buffer)) {
764 log_anomaly("cmd_buffer overflow, likely garbage commands\n");
765 gpu.cmd_len = 0;
766 }
a4e249a1 767 memcpy(gpu.cmd_buffer + gpu.cmd_len, list + 1, len * 4);
768 gpu.cmd_len += len;
769 flush_cmd_buffer();
770 continue;
771 }
ddd56f6e 772
56f08d83 773 if (len) {
48f3d210 774 left = do_cmd_buffer(list + 1, len);
a4e249a1 775 if (left) {
776 memcpy(gpu.cmd_buffer, list + 1 + len - left, left * 4);
777 gpu.cmd_len = left;
778 log_anomaly("GPUdmaChain: %d/%d words left\n", left, len);
779 }
56f08d83 780 }
ddd56f6e 781
fae38d7a 782 if (progress_addr) {
783 *progress_addr = addr;
784 break;
785 }
09159d99 786 #define LD_THRESHOLD (8*1024)
787 if (count >= LD_THRESHOLD) {
788 if (count == LD_THRESHOLD) {
789 ld_addr = addr;
790 continue;
791 }
792
793 // loop detection marker
794 // (bit23 set causes DMA error on real machine, so
795 // unlikely to be ever set by the game)
db215a72 796 list[0] |= HTOLE32(0x800000);
09159d99 797 }
ddd56f6e 798 }
799
09159d99 800 if (ld_addr != 0) {
801 // remove loop detection markers
802 count -= LD_THRESHOLD + 2;
803 addr = ld_addr & 0x1fffff;
804 while (count-- > 0) {
805 list = rambase + addr / 4;
db215a72
PC
806 addr = LE32TOH(list[0]) & 0x1fffff;
807 list[0] &= HTOLE32(~0x800000);
09159d99 808 }
d30279e2 809 }
09159d99 810
3ece2f0c 811 gpu.state.last_list.frame = *gpu.state.frame_count;
deb18d24 812 gpu.state.last_list.hcnt = *gpu.state.hcnt;
1c72b1c2 813 gpu.state.last_list.cycles = cpu_cycles;
deb18d24 814 gpu.state.last_list.addr = start_addr;
815
1c72b1c2 816 return cpu_cycles;
1ab64c54
GI
817}
818
d30279e2
GI
819void GPUreadDataMem(uint32_t *mem, int count)
820{
56f08d83 821 log_io("gpu_dma_read %p %d\n", mem, count);
822
d30279e2
GI
823 if (unlikely(gpu.cmd_len > 0))
824 flush_cmd_buffer();
56f08d83 825
d30279e2
GI
826 if (gpu.dma.h)
827 do_vram_io(mem, count, 1);
828}
829
830uint32_t GPUreadData(void)
831{
9e146206 832 uint32_t ret;
56f08d83 833
834 if (unlikely(gpu.cmd_len > 0))
835 flush_cmd_buffer();
836
9e146206 837 ret = gpu.gp0;
495d603c
PC
838 if (gpu.dma.h) {
839 ret = HTOLE32(ret);
9e146206 840 do_vram_io(&ret, 1, 1);
495d603c
PC
841 ret = LE32TOH(ret);
842 }
56f08d83 843
9e146206 844 log_io("gpu_read %08x\n", ret);
845 return ret;
d30279e2
GI
846}
847
848uint32_t GPUreadStatus(void)
849{
ddd56f6e 850 uint32_t ret;
56f08d83 851
d30279e2
GI
852 if (unlikely(gpu.cmd_len > 0))
853 flush_cmd_buffer();
854
61124a6d 855 ret = gpu.status;
ddd56f6e 856 log_io("gpu_read_status %08x\n", ret);
857 return ret;
d30279e2
GI
858}
859
096ec49b 860struct GPUFreeze
1ab64c54
GI
861{
862 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
863 uint32_t ulStatus; // current gpu status
864 uint32_t ulControl[256]; // latest control register values
865 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
096ec49b 866};
1ab64c54 867
096ec49b 868long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
1ab64c54 869{
fc84f618 870 int i;
871
1ab64c54
GI
872 switch (type) {
873 case 1: // save
d30279e2
GI
874 if (gpu.cmd_len > 0)
875 flush_cmd_buffer();
c765eb86
JW
876
877 renderer_sync();
9ee0fd5b 878 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
1ab64c54 879 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
6e9bdaef 880 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
61124a6d 881 freeze->ulStatus = gpu.status;
1ab64c54
GI
882 break;
883 case 0: // load
c765eb86 884 renderer_sync();
9ee0fd5b 885 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
1ab64c54 886 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
6e9bdaef 887 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
61124a6d 888 gpu.status = freeze->ulStatus;
3d47ef17 889 gpu.cmd_len = 0;
fc84f618 890 for (i = 8; i > 0; i--) {
891 gpu.regs[i] ^= 1; // avoid reg change detection
892 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
893 }
5b745e5b 894 renderer_sync_ecmds(gpu.ex_regs);
3b7b0065 895 renderer_update_caches(0, 0, 1024, 512, 1);
1ab64c54
GI
896 break;
897 }
898
899 return 1;
900}
901
5440b88e 902void GPUupdateLace(void)
903{
904 if (gpu.cmd_len > 0)
905 flush_cmd_buffer();
906 renderer_flush_queues();
907
7a20a6d0 908#ifndef RAW_FB_DISPLAY
61124a6d 909 if (gpu.status & PSX_GPU_STATUS_BLANKING) {
aafcb4dd 910 if (!gpu.state.blanked) {
911 vout_blank();
912 gpu.state.blanked = 1;
913 gpu.state.fb_dirty = 1;
914 }
915 return;
916 }
917
c765eb86
JW
918 renderer_notify_update_lace(0);
919
aafcb4dd 920 if (!gpu.state.fb_dirty)
5440b88e 921 return;
7a20a6d0 922#endif
5440b88e 923
924 if (gpu.frameskip.set) {
925 if (!gpu.frameskip.frame_ready) {
926 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
927 return;
928 gpu.frameskip.active = 0;
929 }
930 gpu.frameskip.frame_ready = 0;
931 }
932
933 vout_update();
3b7b0065 934 if (gpu.state.enhancement_active && !gpu.state.enhancement_was_active)
935 renderer_update_caches(0, 0, 1024, 512, 1);
936 gpu.state.enhancement_was_active = gpu.state.enhancement_active;
5440b88e 937 gpu.state.fb_dirty = 0;
aafcb4dd 938 gpu.state.blanked = 0;
c765eb86 939 renderer_notify_update_lace(1);
5440b88e 940}
941
72e5023f 942void GPUvBlank(int is_vblank, int lcf)
943{
5440b88e 944 int interlace = gpu.state.allow_interlace
61124a6d
PC
945 && (gpu.status & PSX_GPU_STATUS_INTERLACE)
946 && (gpu.status & PSX_GPU_STATUS_DHEIGHT);
5440b88e 947 // interlace doesn't look nice on progressive displays,
948 // so we have this "auto" mode here for games that don't read vram
949 if (gpu.state.allow_interlace == 2
950 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
951 {
952 interlace = 0;
953 }
954 if (interlace || interlace != gpu.state.old_interlace) {
955 gpu.state.old_interlace = interlace;
956
957 if (gpu.cmd_len > 0)
958 flush_cmd_buffer();
959 renderer_flush_queues();
960 renderer_set_interlace(interlace, !lcf);
961 }
962}
963
80bc1426 964void GPUgetScreenInfo(int *y, int *base_hres)
965{
966 *y = gpu.screen.y;
967 *base_hres = gpu.screen.vres;
968 if (gpu.status & PSX_GPU_STATUS_DHEIGHT)
969 *base_hres >>= 1;
970}
971
5440b88e 972#include "../../frontend/plugin_lib.h"
973
974void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
975{
976 gpu.frameskip.set = cbs->frameskip;
977 gpu.frameskip.advice = &cbs->fskip_advice;
5eaa13f1 978 gpu.frameskip.force = &cbs->fskip_force;
5bbe183f 979 gpu.frameskip.dirty = (void *)&cbs->fskip_dirty;
5440b88e 980 gpu.frameskip.active = 0;
981 gpu.frameskip.frame_ready = 1;
982 gpu.state.hcnt = cbs->gpu_hcnt;
983 gpu.state.frame_count = cbs->gpu_frame_count;
984 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
0b02eb77 985 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
5bbe183f 986 if (gpu.state.screen_centering_type != cbs->screen_centering_type
987 || gpu.state.screen_centering_x != cbs->screen_centering_x
988 || gpu.state.screen_centering_y != cbs->screen_centering_y) {
989 gpu.state.screen_centering_type = cbs->screen_centering_type;
990 gpu.state.screen_centering_x = cbs->screen_centering_x;
991 gpu.state.screen_centering_y = cbs->screen_centering_y;
992 update_width();
993 update_height();
994 }
5440b88e 995
9ee0fd5b 996 gpu.mmap = cbs->mmap;
997 gpu.munmap = cbs->munmap;
998
999 // delayed vram mmap
1000 if (gpu.vram == NULL)
1001 map_vram();
1002
5440b88e 1003 if (cbs->pl_vout_set_raw_vram)
1004 cbs->pl_vout_set_raw_vram(gpu.vram);
1005 renderer_set_config(cbs);
1006 vout_set_config(cbs);
72e5023f 1007}
1008
1ab64c54 1009// vim:shiftwidth=2:expandtab