frontend: update libpicofe, fix missed callbacks
[pcsx_rearmed.git] / plugins / gpulib / gpu.c
... / ...
CommitLineData
1/*
2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
11#include <stdio.h>
12#include <stdlib.h>
13#include <string.h>
14#include <stdlib.h> /* for calloc */
15
16#include "gpu.h"
17#include "gpu_timing.h"
18#include "../../libpcsxcore/gpu.h" // meh
19#include "../../frontend/plugin_lib.h"
20#include "../../include/compiler_features.h"
21
22#ifndef ARRAY_SIZE
23#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
24#endif
25
26//#define log_io gpu_log
27#define log_io(...)
28
29struct psx_gpu gpu;
30
31static noinline int do_cmd_buffer(struct psx_gpu *gpu, uint32_t *data, int count,
32 int *cycles_sum, int *cycles_last);
33static noinline void finish_vram_transfer(struct psx_gpu *gpu, int is_read);
34
35static noinline void do_cmd_reset(struct psx_gpu *gpu)
36{
37 int dummy = 0;
38 renderer_sync();
39 if (unlikely(gpu->cmd_len > 0))
40 do_cmd_buffer(gpu, gpu->cmd_buffer, gpu->cmd_len, &dummy, &dummy);
41 gpu->cmd_len = 0;
42
43 if (unlikely(gpu->dma.h > 0))
44 finish_vram_transfer(gpu, gpu->dma_start.is_read);
45 gpu->dma.h = 0;
46}
47
48static noinline void do_reset(struct psx_gpu *gpu)
49{
50 unsigned int i;
51
52 do_cmd_reset(gpu);
53
54 memset(gpu->regs, 0, sizeof(gpu->regs));
55 for (i = 0; i < sizeof(gpu->ex_regs) / sizeof(gpu->ex_regs[0]); i++)
56 gpu->ex_regs[i] = (0xe0 + i) << 24;
57 gpu->status = 0x14802000;
58 gpu->gp0 = 0;
59 gpu->regs[3] = 1;
60 gpu->screen.hres = gpu->screen.w = 256;
61 gpu->screen.vres = gpu->screen.h = 240;
62 gpu->screen.x = gpu->screen.y = 0;
63 renderer_sync_ecmds(gpu->ex_regs);
64 renderer_notify_res_change();
65}
66
67static noinline void update_width(struct psx_gpu *gpu)
68{
69 static const short hres_all[8] = { 256, 368, 320, 368, 512, 368, 640, 368 };
70 static const uint8_t hdivs[8] = { 10, 7, 8, 7, 5, 7, 4, 7 };
71 uint8_t hdiv = hdivs[(gpu->status >> 16) & 7];
72 int hres = hres_all[(gpu->status >> 16) & 7];
73 int pal = gpu->status & PSX_GPU_STATUS_PAL;
74 int sw = gpu->screen.x2 - gpu->screen.x1;
75 int type = gpu->state.screen_centering_type;
76 int x = 0, x_auto;
77 if (type == C_AUTO)
78 type = gpu->state.screen_centering_type_default;
79 if (sw <= 0)
80 /* nothing displayed? */;
81 else {
82 int s = pal ? 656 : 608; // or 600? pal is just a guess
83 x = (gpu->screen.x1 - s) / hdiv;
84 x = (x + 1) & ~1; // blitter limitation
85 sw /= hdiv;
86 sw = (sw + 2) & ~3; // according to nocash
87
88 if (gpu->state.show_overscan == 2) // widescreen hack
89 sw = (sw + 63) & ~63;
90 if (gpu->state.show_overscan && sw >= hres)
91 x = 0, hres = sw;
92 switch (type) {
93 case C_INGAME:
94 break;
95 case C_MANUAL:
96 x = gpu->state.screen_centering_x;
97 break;
98 default:
99 // correct if slightly miscentered
100 x_auto = (hres - sw) / 2 & ~3;
101 if ((uint32_t)x_auto <= 8u && abs(x) < 24)
102 x = x_auto;
103 }
104 if (x + sw > hres)
105 sw = hres - x;
106 // .x range check is done in vout_update()
107 }
108 // reduce the unpleasant right border that a few games have
109 if (gpu->state.screen_centering_type == 0
110 && x <= 4 && hres - (x + sw) >= 4)
111 hres -= 4;
112 gpu->screen.x = x;
113 gpu->screen.w = sw;
114 gpu->screen.hres = hres;
115 gpu->state.dims_changed = 1;
116 //printf("xx %d %d (%d) -> %2d, %d / %d\n", gpu->screen.x1,
117 // gpu->screen.x2, gpu->screen.x2 - gpu->screen.x1, x, sw, hres);
118}
119
120static noinline void update_height(struct psx_gpu *gpu)
121{
122 int pal = gpu->status & PSX_GPU_STATUS_PAL;
123 int dheight = gpu->status & PSX_GPU_STATUS_DHEIGHT;
124 int y = gpu->screen.y1 - (pal ? 39 : 16); // 39 for spyro
125 int sh = gpu->screen.y2 - gpu->screen.y1;
126 int center_tol = 16;
127 int vres = 240;
128
129 if (pal && (sh > 240 || gpu->screen.vres == 256))
130 vres = 256;
131 if (dheight)
132 y *= 2, sh *= 2, vres *= 2, center_tol *= 2;
133 if (sh <= 0)
134 /* nothing displayed? */;
135 else {
136 switch (gpu->state.screen_centering_type) {
137 case C_INGAME:
138 break;
139 case C_BORDERLESS:
140 y = 0;
141 break;
142 case C_MANUAL:
143 y = gpu->state.screen_centering_y;
144 vres += gpu->state.screen_centering_h_adj;
145 break;
146 default:
147 // correct if slightly miscentered
148 if ((uint32_t)(vres - sh) <= 1 && abs(y) <= center_tol)
149 y = 0;
150 }
151 if (y + sh > vres)
152 sh = vres - y;
153 }
154 gpu->screen.y = y;
155 gpu->screen.h = sh;
156 gpu->screen.vres = vres;
157 gpu->state.dims_changed = 1;
158 //printf("yy %d %d -> %d, %d / %d\n",
159 // gpu->screen.y1, gpu->screen.y2, y, sh, vres);
160}
161
162static noinline void decide_frameskip(struct psx_gpu *gpu)
163{
164 *gpu->frameskip.dirty = 1;
165
166 if (gpu->frameskip.active)
167 gpu->frameskip.cnt++;
168 else {
169 gpu->frameskip.cnt = 0;
170 gpu->frameskip.frame_ready = 1;
171 }
172
173 if (*gpu->frameskip.force)
174 gpu->frameskip.active = 1;
175 else if (!gpu->frameskip.active && *gpu->frameskip.advice)
176 gpu->frameskip.active = 1;
177 else if (gpu->frameskip.set > 0 && gpu->frameskip.cnt < gpu->frameskip.set)
178 gpu->frameskip.active = 1;
179 else
180 gpu->frameskip.active = 0;
181
182 if (!gpu->frameskip.active && gpu->frameskip.pending_fill[0] != 0) {
183 int dummy = 0;
184 do_cmd_list(gpu->frameskip.pending_fill, 3, &dummy, &dummy, &dummy);
185 gpu->frameskip.pending_fill[0] = 0;
186 }
187}
188
189static noinline int decide_frameskip_allow(struct psx_gpu *gpu)
190{
191 // no frameskip if it decides to draw to display area,
192 // but not for interlace since it'll most likely always do that
193 uint32_t cmd_e3 = gpu->ex_regs[3];
194 uint32_t x = cmd_e3 & 0x3ff;
195 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
196 gpu->frameskip.allow = (gpu->status & PSX_GPU_STATUS_INTERLACE) ||
197 (uint32_t)(x - gpu->screen.src_x) >= (uint32_t)gpu->screen.w ||
198 (uint32_t)(y - gpu->screen.src_y) >= (uint32_t)gpu->screen.h;
199 return gpu->frameskip.allow;
200}
201
202static void flush_cmd_buffer(struct psx_gpu *gpu);
203
204static noinline void get_gpu_info(struct psx_gpu *gpu, uint32_t data)
205{
206 if (unlikely(gpu->cmd_len > 0))
207 flush_cmd_buffer(gpu);
208 switch (data & 0x0f) {
209 case 0x02:
210 case 0x03:
211 case 0x04:
212 gpu->gp0 = gpu->ex_regs[data & 7] & 0xfffff;
213 break;
214 case 0x05:
215 gpu->gp0 = gpu->ex_regs[5] & 0x3fffff;
216 break;
217 case 0x07:
218 gpu->gp0 = 2;
219 break;
220 default:
221 // gpu->gp0 unchanged
222 break;
223 }
224}
225
226#ifndef max
227#define max(a, b) (((a) > (b)) ? (a) : (b))
228#endif
229
230// Minimum 16-byte VRAM alignment needed by gpu_unai's pixel-skipping
231// renderer/downscaler it uses in high res modes:
232#ifdef GCW_ZERO
233 // On GCW platform (MIPS), align to 8192 bytes (1 TLB entry) to reduce # of
234 // fills. (Will change this value if it ever gets large page support)
235 #define VRAM_ALIGN 8192
236#else
237 #define VRAM_ALIGN 64
238#endif
239
240// double, for overdraw/overscan guard + at least 1 page before
241#define VRAM_SIZE ((1024 * 512 * 2 * 2) + max(VRAM_ALIGN, 4096))
242
243// vram ptr received from mmap/malloc/alloc (will deallocate using this)
244static uint16_t *vram_ptr_orig = NULL;
245
246#ifndef GPULIB_USE_MMAP
247# if defined(__linux__) || defined(_3DS) || defined(HAVE_LIBNX) || defined(VITA)
248# define GPULIB_USE_MMAP 1
249# else
250# define GPULIB_USE_MMAP 0
251# endif
252#endif
253static int map_vram(void)
254{
255#if GPULIB_USE_MMAP
256 gpu.vram = vram_ptr_orig = gpu.mmap(VRAM_SIZE);
257#else
258 gpu.vram = vram_ptr_orig = calloc(VRAM_SIZE, 1);
259#endif
260 if (gpu.vram != NULL && gpu.vram != (void *)(intptr_t)-1) {
261 // 4kb guard in front
262 gpu.vram += (4096 / 2);
263 // Align
264 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
265 return 0;
266 }
267 else {
268 fprintf(stderr, "could not map vram, expect crashes\n");
269 gpu.vram = NULL;
270 return -1;
271 }
272}
273
274long GPUinit(void)
275{
276 int ret;
277 ret = vout_init();
278 ret |= renderer_init();
279
280 memset(&gpu.state, 0, sizeof(gpu.state));
281 memset(&gpu.frameskip, 0, sizeof(gpu.frameskip));
282 gpu.zero = 0;
283 gpu.state.frame_count = &gpu.zero;
284 gpu.state.hcnt = &gpu.zero;
285 gpu.cmd_len = 0;
286 do_reset(&gpu);
287
288 return ret;
289}
290
291long GPUshutdown(void)
292{
293 long ret;
294
295 renderer_finish();
296 ret = vout_finish();
297
298 if (vram_ptr_orig != NULL) {
299#if GPULIB_USE_MMAP
300 gpu.munmap(vram_ptr_orig, VRAM_SIZE);
301#else
302 free(vram_ptr_orig);
303#endif
304 }
305 vram_ptr_orig = gpu.vram = NULL;
306
307 return ret;
308}
309
310void GPUwriteStatus(uint32_t data)
311{
312 uint32_t cmd = data >> 24;
313 uint32_t fb_dirty = 1;
314 int src_x, src_y;
315
316 if (cmd < ARRAY_SIZE(gpu.regs)) {
317 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
318 return;
319 gpu.regs[cmd] = data;
320 }
321
322 switch (cmd) {
323 case 0x00:
324 do_reset(&gpu);
325 break;
326 case 0x01:
327 do_cmd_reset(&gpu);
328 fb_dirty = 0;
329 break;
330 case 0x03:
331 if (data & 1) {
332 gpu.status |= PSX_GPU_STATUS_BLANKING;
333 gpu.state.dims_changed = 1; // for hud clearing
334 }
335 else
336 gpu.status &= ~PSX_GPU_STATUS_BLANKING;
337 break;
338 case 0x04:
339 gpu.status &= ~PSX_GPU_STATUS_DMA_MASK;
340 gpu.status |= PSX_GPU_STATUS_DMA(data & 3);
341 fb_dirty = 0;
342 break;
343 case 0x05:
344 src_x = data & 0x3ff; src_y = (data >> 10) & 0x1ff;
345 if (src_x != gpu.screen.src_x || src_y != gpu.screen.src_y) {
346 gpu.screen.src_x = src_x;
347 gpu.screen.src_y = src_y;
348 renderer_notify_scanout_change(src_x, src_y);
349 if (gpu.frameskip.set) {
350 decide_frameskip_allow(&gpu);
351 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
352 decide_frameskip(&gpu);
353 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
354 }
355 }
356 }
357 break;
358 case 0x06:
359 gpu.screen.x1 = data & 0xfff;
360 gpu.screen.x2 = (data >> 12) & 0xfff;
361 update_width(&gpu);
362 break;
363 case 0x07:
364 gpu.screen.y1 = data & 0x3ff;
365 gpu.screen.y2 = (data >> 10) & 0x3ff;
366 update_height(&gpu);
367 break;
368 case 0x08:
369 gpu.status = (gpu.status & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
370 update_width(&gpu);
371 update_height(&gpu);
372 renderer_notify_res_change();
373 break;
374 default:
375 if ((cmd & 0xf0) == 0x10)
376 get_gpu_info(&gpu, data);
377 fb_dirty = 0;
378 break;
379 }
380
381 gpu.state.fb_dirty |= fb_dirty;
382
383#ifdef GPUwriteStatus_ext
384 GPUwriteStatus_ext(data);
385#endif
386}
387
388const unsigned char cmd_lengths[256] =
389{
390 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
391 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
392 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
393 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
394 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
395 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
396 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
397 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
398 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 80
399 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
400 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // a0
401 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
402 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // c0
403 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
404 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
405 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
406};
407
408#define VRAM_MEM_XY(vram_, x, y) &vram_[(y) * 1024 + (x)]
409
410// this isn't very useful so should be rare
411static void cpy_mask(uint16_t *dst, const uint16_t *src, int l, uint32_t r6)
412{
413 int i;
414 if (r6 == 1) {
415 for (i = 0; i < l; i++)
416 dst[i] = src[i] | 0x8000;
417 }
418 else {
419 uint16_t msb = r6 << 15;
420 for (i = 0; i < l; i++) {
421 uint16_t mask = (int16_t)dst[i] >> 15;
422 dst[i] = (dst[i] & mask) | ((src[i] | msb) & ~mask);
423 }
424 }
425}
426
427static inline void do_vram_line(uint16_t *vram_, int x, int y,
428 uint16_t *mem, int l, int is_read, uint32_t r6)
429{
430 uint16_t *vram = VRAM_MEM_XY(vram_, x, y);
431 if (unlikely(is_read))
432 memcpy(mem, vram, l * 2);
433 else if (unlikely(r6))
434 cpy_mask(vram, mem, l, r6);
435 else
436 memcpy(vram, mem, l * 2);
437}
438
439static int do_vram_io(struct psx_gpu *gpu, uint32_t *data, int count, int is_read)
440{
441 int count_initial = count;
442 uint32_t r6 = gpu->ex_regs[6] & 3;
443 uint16_t *sdata = (uint16_t *)data;
444 uint16_t *vram = gpu->vram;
445 int x = gpu->dma.x, y = gpu->dma.y;
446 int w = gpu->dma.w, h = gpu->dma.h;
447 int o = gpu->dma.offset;
448 int l;
449 count *= 2; // operate in 16bpp pixels
450
451 renderer_sync();
452
453 if (gpu->dma.offset) {
454 l = w - gpu->dma.offset;
455 if (count < l)
456 l = count;
457
458 do_vram_line(vram, x + o, y, sdata, l, is_read, r6);
459
460 if (o + l < w)
461 o += l;
462 else {
463 o = 0;
464 y++;
465 h--;
466 }
467 sdata += l;
468 count -= l;
469 }
470
471 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
472 y &= 511;
473 do_vram_line(vram, x, y, sdata, w, is_read, r6);
474 }
475
476 if (h > 0) {
477 if (count > 0) {
478 y &= 511;
479 do_vram_line(vram, x, y, sdata, count, is_read, r6);
480 o = count;
481 count = 0;
482 }
483 }
484 else
485 finish_vram_transfer(gpu, is_read);
486 gpu->dma.y = y;
487 gpu->dma.h = h;
488 gpu->dma.offset = o;
489
490 return count_initial - count / 2;
491}
492
493static noinline void start_vram_transfer(struct psx_gpu *gpu, uint32_t pos_word,
494 uint32_t size_word, int is_read)
495{
496 if (gpu->dma.h)
497 log_anomaly(gpu, "start_vram_transfer while old unfinished\n");
498
499 gpu->dma.x = pos_word & 0x3ff;
500 gpu->dma.y = (pos_word >> 16) & 0x1ff;
501 gpu->dma.w = ((size_word - 1) & 0x3ff) + 1;
502 gpu->dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
503 gpu->dma.offset = 0;
504 gpu->dma.is_read = is_read;
505 gpu->dma_start = gpu->dma;
506
507 renderer_flush_queues();
508 if (is_read) {
509 const uint16_t *mem = VRAM_MEM_XY(gpu->vram, gpu->dma.x, gpu->dma.y);
510 gpu->status |= PSX_GPU_STATUS_IMG;
511 // XXX: wrong for width 1
512 gpu->gp0 = LE16TOH(mem[0]) | ((uint32_t)LE16TOH(mem[1]) << 16);
513 gpu->state.last_vram_read_frame = *gpu->state.frame_count;
514 }
515
516 log_io(gpu, "start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
517 gpu->dma.x, gpu->dma.y, gpu->dma.w, gpu->dma.h);
518 if (gpu->gpu_state_change)
519 gpu->gpu_state_change(PGS_VRAM_TRANSFER_START, 0);
520}
521
522static void finish_vram_transfer(struct psx_gpu *gpu, int is_read)
523{
524 if (is_read)
525 gpu->status &= ~PSX_GPU_STATUS_IMG;
526 else {
527 int32_t screen_r = gpu->screen.src_x + gpu->screen.hres;
528 int32_t screen_b = gpu->screen.src_y + gpu->screen.vres;
529 int32_t dma_r = gpu->dma_start.x + gpu->dma_start.w;
530 int32_t dma_b = gpu->dma_start.y + gpu->dma_start.h;
531 int32_t not_dirty;
532 not_dirty = screen_r - gpu->dma_start.x - 1;
533 not_dirty |= screen_b - gpu->dma_start.y - 1;
534 not_dirty |= dma_r - gpu->screen.src_x - 1;
535 not_dirty |= dma_b - gpu->screen.src_y - 1;
536 not_dirty >>= 31;
537 log_io(gpu, "dma %3d,%3d %dx%d scr %3d,%3d %3dx%3d -> dirty %d\n",
538 gpu->dma_start.x, gpu->dma_start.y, gpu->dma_start.w, gpu->dma_start.h,
539 gpu->screen.src_x, gpu->screen.src_y, gpu->screen.hres, gpu->screen.vres, !not_dirty);
540 gpu->state.fb_dirty |= !not_dirty;
541 renderer_update_caches(gpu->dma_start.x, gpu->dma_start.y,
542 gpu->dma_start.w, gpu->dma_start.h, 0);
543 }
544 if (gpu->gpu_state_change)
545 gpu->gpu_state_change(PGS_VRAM_TRANSFER_END, 0);
546}
547
548static void do_vram_copy(struct psx_gpu *gpu, const uint32_t *params, int *cpu_cycles)
549{
550 const uint32_t sx = LE32TOH(params[0]) & 0x3FF;
551 const uint32_t sy = (LE32TOH(params[0]) >> 16) & 0x1FF;
552 const uint32_t dx = LE32TOH(params[1]) & 0x3FF;
553 const uint32_t dy = (LE32TOH(params[1]) >> 16) & 0x1FF;
554 uint32_t w = ((LE32TOH(params[2]) - 1) & 0x3FF) + 1;
555 uint32_t h = (((LE32TOH(params[2]) >> 16) - 1) & 0x1FF) + 1;
556 uint16_t msb = gpu->ex_regs[6] << 15;
557 uint16_t *vram = gpu->vram;
558 uint16_t lbuf[128];
559 uint32_t x, y;
560
561 *cpu_cycles += gput_copy(w, h);
562 if (sx == dx && sy == dy && msb == 0)
563 return;
564
565 renderer_flush_queues();
566
567 if (unlikely((sx < dx && dx < sx + w) || sx + w > 1024 || dx + w > 1024 || msb))
568 {
569 for (y = 0; y < h; y++)
570 {
571 const uint16_t *src = VRAM_MEM_XY(vram, 0, (sy + y) & 0x1ff);
572 uint16_t *dst = VRAM_MEM_XY(vram, 0, (dy + y) & 0x1ff);
573 for (x = 0; x < w; x += ARRAY_SIZE(lbuf))
574 {
575 uint32_t x1, w1 = w - x;
576 if (w1 > ARRAY_SIZE(lbuf))
577 w1 = ARRAY_SIZE(lbuf);
578 for (x1 = 0; x1 < w1; x1++)
579 lbuf[x1] = src[(sx + x + x1) & 0x3ff];
580 for (x1 = 0; x1 < w1; x1++)
581 dst[(dx + x + x1) & 0x3ff] = lbuf[x1] | msb;
582 }
583 }
584 }
585 else
586 {
587 uint32_t sy1 = sy, dy1 = dy;
588 for (y = 0; y < h; y++, sy1++, dy1++) {
589 memcpy(VRAM_MEM_XY(vram, dx, dy1 & 0x1ff),
590 VRAM_MEM_XY(vram, sx, sy1 & 0x1ff), w * 2);
591 }
592 }
593
594 renderer_update_caches(dx, dy, w, h, 0);
595}
596
597static noinline int do_cmd_list_skip(struct psx_gpu *gpu, uint32_t *data,
598 int count, int *last_cmd)
599{
600 int cmd = 0, pos = 0, len, dummy = 0, v;
601 int skip = 1;
602
603 gpu->frameskip.pending_fill[0] = 0;
604
605 while (pos < count && skip) {
606 uint32_t *list = data + pos;
607 cmd = LE32TOH(list[0]) >> 24;
608 len = 1 + cmd_lengths[cmd];
609 if (pos + len > count) {
610 cmd = -1;
611 break; // incomplete cmd
612 }
613
614 switch (cmd) {
615 case 0x02:
616 if ((LE32TOH(list[2]) & 0x3ff) > gpu->screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu->screen.h)
617 // clearing something large, don't skip
618 do_cmd_list(list, 3, &dummy, &dummy, &dummy);
619 else
620 memcpy(gpu->frameskip.pending_fill, list, 3 * 4);
621 break;
622 case 0x24 ... 0x27:
623 case 0x2c ... 0x2f:
624 case 0x34 ... 0x37:
625 case 0x3c ... 0x3f:
626 gpu->ex_regs[1] &= ~0x1ff;
627 gpu->ex_regs[1] |= LE32TOH(list[4 + ((cmd >> 4) & 1)]) & 0x1ff;
628 break;
629 case 0x48 ... 0x4F:
630 for (v = 3; pos + v < count; v++)
631 {
632 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
633 break;
634 }
635 len += v - 3;
636 break;
637 case 0x58 ... 0x5F:
638 for (v = 4; pos + v < count; v += 2)
639 {
640 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
641 break;
642 }
643 len += v - 4;
644 break;
645 default:
646 if ((cmd & 0xf8) == 0xe0) {
647 gpu->ex_regs[cmd & 7] = LE32TOH(list[0]);
648 if (cmd == 0xe3)
649 skip = decide_frameskip_allow(gpu);
650 }
651 break;
652 }
653 if (0x80 <= cmd && cmd <= 0xdf)
654 break; // image i/o
655
656 pos += len;
657 }
658
659 renderer_sync_ecmds(gpu->ex_regs);
660 *last_cmd = cmd;
661 return pos;
662}
663
664static noinline int do_cmd_buffer(struct psx_gpu *gpu, uint32_t *data, int count,
665 int *cycles_sum, int *cycles_last)
666{
667 int cmd, pos;
668 uint32_t old_e3 = gpu->ex_regs[3];
669 int vram_dirty = 0;
670
671 // process buffer
672 for (pos = 0; pos < count; )
673 {
674 if (gpu->dma.h && !gpu->dma_start.is_read) { // XXX: need to verify
675 // vram_dirty = 1; // handled in finish_vram_transfer()
676 pos += do_vram_io(gpu, data + pos, count - pos, 0);
677 if (pos == count)
678 break;
679 }
680
681 cmd = LE32TOH(data[pos]) >> 24;
682 switch (cmd & 0xe0) {
683 case 0xe0:
684 if (cmd < 0xe8) {
685 if (gpu->ex_regs[cmd & 7] == LE32TOH(data[pos])) {
686 pos++;
687 continue;
688 }
689 }
690 break;
691 case 0xc0:
692 case 0xa0:
693 if (unlikely((pos+2) >= count)) {
694 // incomplete vram write/read cmd, can't consume yet
695 cmd = -1;
696 break;
697 }
698
699 // consume vram write/read cmd
700 start_vram_transfer(gpu, LE32TOH(data[pos + 1]),
701 LE32TOH(data[pos + 2]), (cmd & 0xe0) == 0xc0);
702 pos += 3;
703 continue;
704 case 0x80:
705 if (unlikely((pos+3) >= count)) {
706 cmd = -1; // incomplete cmd, can't consume yet
707 break;
708 }
709 renderer_sync();
710 *cycles_sum += *cycles_last;
711 *cycles_last = 0;
712 do_vram_copy(gpu, data + pos + 1, cycles_last);
713 vram_dirty = 1;
714 pos += 4;
715 continue;
716 case 0x00:
717 if (cmd == 2)
718 break;
719 if (cmd == 0x1f)
720 log_anomaly(gpu, "irq1?\n");
721 pos++;
722 continue;
723 }
724
725 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
726 if (gpu->frameskip.active &&
727 (gpu->frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0)) {
728 pos += do_cmd_list_skip(gpu, data + pos, count - pos, &cmd);
729 }
730 else {
731 pos += do_cmd_list(data + pos, count - pos, cycles_sum, cycles_last, &cmd);
732 vram_dirty = 1;
733 }
734
735 if (cmd == -1)
736 // incomplete cmd
737 break;
738 }
739
740 gpu->status &= ~0x1fff;
741 gpu->status |= gpu->ex_regs[1] & 0x7ff;
742 gpu->status |= (gpu->ex_regs[6] & 3) << 11;
743
744 gpu->state.fb_dirty |= vram_dirty;
745
746 if (old_e3 != gpu->ex_regs[3])
747 decide_frameskip_allow(gpu);
748
749 return count - pos;
750}
751
752static noinline void flush_cmd_buffer(struct psx_gpu *gpu)
753{
754 int cycles_last = 0;
755 int dummy = 0, left;
756 left = do_cmd_buffer(gpu, gpu->cmd_buffer, gpu->cmd_len, &dummy, &cycles_last);
757 if (left > 0)
758 memmove(gpu->cmd_buffer, gpu->cmd_buffer + gpu->cmd_len - left, left * 4);
759 if (left != gpu->cmd_len) {
760 gpu->cmd_len = left;
761 if (!gpu->dma.h && gpu->gpu_state_change)
762 gpu->gpu_state_change(PGS_PRIMITIVE_START, cycles_last);
763 }
764}
765
766void GPUwriteDataMem(uint32_t *mem, int count)
767{
768 int dummy = 0, left;
769
770 log_io(&gpu, "gpu_dma_write %p %d\n", mem, count);
771
772 if (unlikely(gpu.cmd_len > 0))
773 flush_cmd_buffer(&gpu);
774
775 left = do_cmd_buffer(&gpu, mem, count, &dummy, &dummy);
776 if (left)
777 log_anomaly(&gpu, "GPUwriteDataMem: discarded %d/%d words\n", left, count);
778}
779
780void GPUwriteData(uint32_t data)
781{
782 log_io(&gpu, "gpu_write %08x\n", data);
783 gpu.cmd_buffer[gpu.cmd_len++] = HTOLE32(data);
784 if (gpu.cmd_len >= CMD_BUFFER_LEN)
785 flush_cmd_buffer(&gpu);
786}
787
788long GPUdmaChain(uint32_t *rambase, uint32_t start_addr,
789 uint32_t *progress_addr, int32_t *cycles_last_cmd)
790{
791 uint32_t addr, *list, ld_addr;
792 int len, left, count, ld_count = 32;
793 int cpu_cycles_sum = 0;
794 int cpu_cycles_last = 0;
795
796 preload(rambase + (start_addr & 0x1fffff) / 4);
797
798 if (unlikely(gpu.cmd_len > 0))
799 flush_cmd_buffer(&gpu);
800
801 log_io(&gpu, "gpu_dma_chain\n");
802 addr = ld_addr = start_addr & 0xffffff;
803 for (count = 0; (addr & 0x800000) == 0; count++)
804 {
805 list = rambase + (addr & 0x1fffff) / 4;
806 len = LE32TOH(list[0]) >> 24;
807 addr = LE32TOH(list[0]) & 0xffffff;
808 preload(rambase + (addr & 0x1fffff) / 4);
809
810 cpu_cycles_sum += 10;
811 if (len > 0)
812 cpu_cycles_sum += 5 + len;
813
814 log_io(&gpu, ".chain %08lx #%d+%d %u+%u\n",
815 (long)(list - rambase) * 4, len, gpu.cmd_len, cpu_cycles_sum, cpu_cycles_last);
816 if (unlikely(gpu.cmd_len > 0)) {
817 if (gpu.cmd_len + len > ARRAY_SIZE(gpu.cmd_buffer)) {
818 log_anomaly(&gpu, "cmd_buffer overflow, likely garbage commands\n");
819 gpu.cmd_len = 0;
820 }
821 memcpy(gpu.cmd_buffer + gpu.cmd_len, list + 1, len * 4);
822 gpu.cmd_len += len;
823 flush_cmd_buffer(&gpu);
824 continue;
825 }
826
827 if (len) {
828 left = do_cmd_buffer(&gpu, list + 1, len, &cpu_cycles_sum, &cpu_cycles_last);
829 if (left) {
830 memcpy(gpu.cmd_buffer, list + 1 + len - left, left * 4);
831 gpu.cmd_len = left;
832 log_anomaly(&gpu, "GPUdmaChain: %d/%d words left\n", left, len);
833 }
834 }
835
836 if (progress_addr) {
837 // hack for bios boot logo race (must be not too fast or too slow)
838 if (gpu.status & PSX_GPU_STATUS_DHEIGHT)
839 cpu_cycles_sum += 5;
840 if (cpu_cycles_sum > 512)
841 break;
842 }
843 if (addr == ld_addr) {
844 log_anomaly(&gpu, "GPUdmaChain: loop @ %08x, cnt=%u\n", addr, count);
845 break;
846 }
847 if (count == ld_count) {
848 ld_addr = addr;
849 ld_count *= 2;
850 }
851 }
852
853 //printf(" -> %d %d\n", cpu_cycles_sum, cpu_cycles_last);
854 gpu.state.last_list.frame = *gpu.state.frame_count;
855 gpu.state.last_list.hcnt = *gpu.state.hcnt;
856 gpu.state.last_list.cycles = cpu_cycles_sum + cpu_cycles_last;
857 gpu.state.last_list.addr = start_addr;
858
859 if (progress_addr)
860 *progress_addr = addr;
861 *cycles_last_cmd = cpu_cycles_last;
862 return cpu_cycles_sum;
863}
864
865void GPUreadDataMem(uint32_t *mem, int count)
866{
867 log_io(&gpu, "gpu_dma_read %p %d\n", mem, count);
868
869 if (unlikely(gpu.cmd_len > 0))
870 flush_cmd_buffer(&gpu);
871
872 if (gpu.dma.h)
873 do_vram_io(&gpu, mem, count, 1);
874}
875
876uint32_t GPUreadData(void)
877{
878 uint32_t ret;
879
880 if (unlikely(gpu.cmd_len > 0))
881 flush_cmd_buffer(&gpu);
882
883 ret = gpu.gp0;
884 if (gpu.dma.h) {
885 ret = HTOLE32(ret);
886 do_vram_io(&gpu, &ret, 1, 1);
887 ret = LE32TOH(ret);
888 }
889
890 log_io(&gpu, "gpu_read %08x\n", ret);
891 return ret;
892}
893
894uint32_t GPUreadStatus(void)
895{
896 uint32_t ret;
897
898 if (unlikely(gpu.cmd_len > 0))
899 flush_cmd_buffer(&gpu);
900
901 ret = gpu.status;
902 log_io(&gpu, "gpu_read_status %08x\n", ret);
903 return ret;
904}
905
906long GPUfreeze(uint32_t type, GPUFreeze_t *freeze)
907{
908 int i;
909
910 switch (type) {
911 case 1: // save
912 if (gpu.cmd_len > 0)
913 flush_cmd_buffer(&gpu);
914
915 renderer_sync();
916 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
917 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
918 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
919 freeze->ulStatus = gpu.status;
920 break;
921 case 0: // load
922 renderer_sync();
923 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
924 //memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
925 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
926 gpu.status = freeze->ulStatus;
927 gpu.cmd_len = 0;
928 for (i = 8; i > 1; i--)
929 GPUwriteStatus((i << 24) | freeze->ulControl[i]);
930 renderer_sync_ecmds(gpu.ex_regs);
931 renderer_update_caches(0, 0, 1024, 512, 0);
932 break;
933 }
934
935 return 1;
936}
937
938void GPUupdateLace(void)
939{
940 int updated = 0;
941
942 if (gpu.cmd_len > 0)
943 flush_cmd_buffer(&gpu);
944 renderer_flush_queues();
945
946#ifndef RAW_FB_DISPLAY
947 if (gpu.status & PSX_GPU_STATUS_BLANKING) {
948 if (!gpu.state.blanked) {
949 vout_blank();
950 gpu.state.blanked = 1;
951 gpu.state.fb_dirty = 1;
952 }
953 return;
954 }
955
956 renderer_notify_update_lace(0);
957
958 if (!gpu.state.fb_dirty)
959 return;
960#endif
961
962 if (gpu.frameskip.set) {
963 if (!gpu.frameskip.frame_ready) {
964 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
965 return;
966 gpu.frameskip.active = 0;
967 }
968 gpu.frameskip.frame_ready = 0;
969 }
970
971 updated = vout_update();
972 if (gpu.state.enhancement_active && !gpu.state.enhancement_was_active)
973 renderer_update_caches(0, 0, 1024, 512, 1);
974 gpu.state.enhancement_was_active = gpu.state.enhancement_active;
975 if (updated) {
976 gpu.state.fb_dirty = 0;
977 gpu.state.blanked = 0;
978 }
979 renderer_notify_update_lace(1);
980}
981
982void GPUvBlank(int is_vblank, int lcf)
983{
984 int interlace = gpu.state.allow_interlace
985 && (gpu.status & PSX_GPU_STATUS_INTERLACE)
986 && (gpu.status & PSX_GPU_STATUS_DHEIGHT);
987 // interlace doesn't look nice on progressive displays,
988 // so we have this "auto" mode here for games that don't read vram
989 if (gpu.state.allow_interlace == 2
990 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
991 {
992 interlace = 0;
993 }
994 if (interlace || interlace != gpu.state.old_interlace) {
995 gpu.state.old_interlace = interlace;
996
997 if (gpu.cmd_len > 0)
998 flush_cmd_buffer(&gpu);
999 renderer_flush_queues();
1000 renderer_set_interlace(interlace, !lcf);
1001 }
1002}
1003
1004void GPUgetScreenInfo(int *y, int *base_hres)
1005{
1006 *y = gpu.screen.y;
1007 *base_hres = gpu.screen.vres;
1008 if (gpu.status & PSX_GPU_STATUS_DHEIGHT)
1009 *base_hres >>= 1;
1010}
1011
1012void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
1013{
1014 gpu.frameskip.set = cbs->frameskip;
1015 gpu.frameskip.advice = &cbs->fskip_advice;
1016 gpu.frameskip.force = &cbs->fskip_force;
1017 gpu.frameskip.dirty = (void *)&cbs->fskip_dirty;
1018 gpu.frameskip.active = 0;
1019 gpu.frameskip.frame_ready = 1;
1020 gpu.state.hcnt = (uint32_t *)cbs->gpu_hcnt;
1021 gpu.state.frame_count = (uint32_t *)cbs->gpu_frame_count;
1022 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
1023 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
1024 gpu.state.screen_centering_type_default = cbs->screen_centering_type_default;
1025 if (gpu.state.screen_centering_type != cbs->screen_centering_type
1026 || gpu.state.screen_centering_x != cbs->screen_centering_x
1027 || gpu.state.screen_centering_y != cbs->screen_centering_y
1028 || gpu.state.screen_centering_h_adj != cbs->screen_centering_h_adj
1029 || gpu.state.show_overscan != cbs->show_overscan) {
1030 gpu.state.screen_centering_type = cbs->screen_centering_type;
1031 gpu.state.screen_centering_x = cbs->screen_centering_x;
1032 gpu.state.screen_centering_y = cbs->screen_centering_y;
1033 gpu.state.screen_centering_h_adj = cbs->screen_centering_h_adj;
1034 gpu.state.show_overscan = cbs->show_overscan;
1035 update_width(&gpu);
1036 update_height(&gpu);
1037 }
1038
1039 gpu.mmap = cbs->mmap;
1040 gpu.munmap = cbs->munmap;
1041 gpu.gpu_state_change = cbs->gpu_state_change;
1042
1043 // delayed vram mmap
1044 if (gpu.vram == NULL)
1045 map_vram();
1046
1047 if (cbs->pl_vout_set_raw_vram)
1048 cbs->pl_vout_set_raw_vram(gpu.vram);
1049 renderer_set_config(cbs);
1050 vout_set_config(cbs);
1051}
1052
1053// vim:shiftwidth=2:expandtab