spu: adjust fmod to match nocash description
[pcsx_rearmed.git] / plugins / gpulib / gpu.c
... / ...
CommitLineData
1/*
2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
11#include <stdio.h>
12#include <stdlib.h>
13#include <string.h>
14#include <stdlib.h> /* for calloc */
15
16#include "gpu.h"
17
18#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
19#ifdef __GNUC__
20#define unlikely(x) __builtin_expect((x), 0)
21#define preload __builtin_prefetch
22#define noinline __attribute__((noinline))
23#else
24#define unlikely(x)
25#define preload(...)
26#define noinline
27#endif
28
29//#define log_io gpu_log
30#define log_io(...)
31
32struct psx_gpu gpu;
33
34static noinline int do_cmd_buffer(uint32_t *data, int count);
35static void finish_vram_transfer(int is_read);
36
37static noinline void do_cmd_reset(void)
38{
39 renderer_sync();
40
41 if (unlikely(gpu.cmd_len > 0))
42 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
43 gpu.cmd_len = 0;
44
45 if (unlikely(gpu.dma.h > 0))
46 finish_vram_transfer(gpu.dma_start.is_read);
47 gpu.dma.h = 0;
48}
49
50static noinline void do_reset(void)
51{
52 unsigned int i;
53
54 do_cmd_reset();
55
56 memset(gpu.regs, 0, sizeof(gpu.regs));
57 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
58 gpu.ex_regs[i] = (0xe0 + i) << 24;
59 gpu.status = 0x14802000;
60 gpu.gp0 = 0;
61 gpu.regs[3] = 1;
62 gpu.screen.hres = gpu.screen.w = 256;
63 gpu.screen.vres = gpu.screen.h = 240;
64 gpu.screen.x = gpu.screen.y = 0;
65 renderer_sync_ecmds(gpu.ex_regs);
66 renderer_notify_res_change();
67}
68
69static noinline void update_width(void)
70{
71 static const short hres_all[8] = { 256, 368, 320, 368, 512, 368, 640, 368 };
72 static const uint8_t hdivs[8] = { 10, 7, 8, 7, 5, 7, 4, 7 };
73 uint8_t hdiv = hdivs[(gpu.status >> 16) & 7];
74 int hres = hres_all[(gpu.status >> 16) & 7];
75 int pal = gpu.status & PSX_GPU_STATUS_PAL;
76 int sw = gpu.screen.x2 - gpu.screen.x1;
77 int x = 0, x_auto;
78 if (sw <= 0)
79 /* nothing displayed? */;
80 else {
81 int s = pal ? 656 : 608; // or 600? pal is just a guess
82 x = (gpu.screen.x1 - s) / hdiv;
83 x = (x + 1) & ~1; // blitter limitation
84 sw /= hdiv;
85 sw = (sw + 2) & ~3; // according to nocash
86 switch (gpu.state.screen_centering_type) {
87 case 1:
88 break;
89 case 2:
90 x = gpu.state.screen_centering_x;
91 break;
92 default:
93 // correct if slightly miscentered
94 x_auto = (hres - sw) / 2 & ~3;
95 if ((uint32_t)x_auto <= 8u && abs(x) < 24)
96 x = x_auto;
97 }
98 if (x + sw > hres)
99 sw = hres - x;
100 // .x range check is done in vout_update()
101 }
102 // reduce the unpleasant right border that a few games have
103 if (gpu.state.screen_centering_type == 0
104 && x <= 4 && hres - (x + sw) >= 4)
105 hres -= 4;
106 gpu.screen.x = x;
107 gpu.screen.w = sw;
108 gpu.screen.hres = hres;
109 gpu.state.dims_changed = 1;
110 //printf("xx %d %d -> %2d, %d / %d\n",
111 // gpu.screen.x1, gpu.screen.x2, x, sw, hres);
112}
113
114static noinline void update_height(void)
115{
116 int pal = gpu.status & PSX_GPU_STATUS_PAL;
117 int dheight = gpu.status & PSX_GPU_STATUS_DHEIGHT;
118 int y = gpu.screen.y1 - (pal ? 39 : 16); // 39 for spyro
119 int sh = gpu.screen.y2 - gpu.screen.y1;
120 int center_tol = 16;
121 int vres = 240;
122
123 if (pal && (sh > 240 || gpu.screen.vres == 256))
124 vres = 256;
125 if (dheight)
126 y *= 2, sh *= 2, vres *= 2, center_tol *= 2;
127 if (sh <= 0)
128 /* nothing displayed? */;
129 else {
130 switch (gpu.state.screen_centering_type) {
131 case 1:
132 break;
133 case 2:
134 y = gpu.state.screen_centering_y;
135 break;
136 default:
137 // correct if slightly miscentered
138 if ((uint32_t)(vres - sh) <= 1 && abs(y) <= center_tol)
139 y = 0;
140 }
141 if (y + sh > vres)
142 sh = vres - y;
143 }
144 gpu.screen.y = y;
145 gpu.screen.h = sh;
146 gpu.screen.vres = vres;
147 gpu.state.dims_changed = 1;
148 //printf("yy %d %d -> %d, %d / %d\n",
149 // gpu.screen.y1, gpu.screen.y2, y, sh, vres);
150}
151
152static noinline void decide_frameskip(void)
153{
154 *gpu.frameskip.dirty = 1;
155
156 if (gpu.frameskip.active)
157 gpu.frameskip.cnt++;
158 else {
159 gpu.frameskip.cnt = 0;
160 gpu.frameskip.frame_ready = 1;
161 }
162
163 if (*gpu.frameskip.force)
164 gpu.frameskip.active = 1;
165 else if (!gpu.frameskip.active && *gpu.frameskip.advice)
166 gpu.frameskip.active = 1;
167 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
168 gpu.frameskip.active = 1;
169 else
170 gpu.frameskip.active = 0;
171
172 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
173 int dummy;
174 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy);
175 gpu.frameskip.pending_fill[0] = 0;
176 }
177}
178
179static noinline int decide_frameskip_allow(uint32_t cmd_e3)
180{
181 // no frameskip if it decides to draw to display area,
182 // but not for interlace since it'll most likely always do that
183 uint32_t x = cmd_e3 & 0x3ff;
184 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
185 gpu.frameskip.allow = (gpu.status & PSX_GPU_STATUS_INTERLACE) ||
186 (uint32_t)(x - gpu.screen.src_x) >= (uint32_t)gpu.screen.w ||
187 (uint32_t)(y - gpu.screen.src_y) >= (uint32_t)gpu.screen.h;
188 return gpu.frameskip.allow;
189}
190
191static void flush_cmd_buffer(void);
192
193static noinline void get_gpu_info(uint32_t data)
194{
195 if (unlikely(gpu.cmd_len > 0))
196 flush_cmd_buffer();
197 switch (data & 0x0f) {
198 case 0x02:
199 case 0x03:
200 case 0x04:
201 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
202 break;
203 case 0x05:
204 gpu.gp0 = gpu.ex_regs[5] & 0x3fffff;
205 break;
206 case 0x07:
207 gpu.gp0 = 2;
208 break;
209 default:
210 // gpu.gp0 unchanged
211 break;
212 }
213}
214
215// double, for overdraw guard
216#define VRAM_SIZE ((1024 * 512 * 2 * 2) + 4096)
217
218// Minimum 16-byte VRAM alignment needed by gpu_unai's pixel-skipping
219// renderer/downscaler it uses in high res modes:
220#ifdef GCW_ZERO
221 // On GCW platform (MIPS), align to 8192 bytes (1 TLB entry) to reduce # of
222 // fills. (Will change this value if it ever gets large page support)
223 #define VRAM_ALIGN 8192
224#else
225 #define VRAM_ALIGN 16
226#endif
227
228// vram ptr received from mmap/malloc/alloc (will deallocate using this)
229static uint16_t *vram_ptr_orig = NULL;
230
231#ifndef GPULIB_USE_MMAP
232# ifdef __linux__
233# define GPULIB_USE_MMAP 1
234# else
235# define GPULIB_USE_MMAP 0
236# endif
237#endif
238static int map_vram(void)
239{
240#if GPULIB_USE_MMAP
241 gpu.vram = vram_ptr_orig = gpu.mmap(VRAM_SIZE + (VRAM_ALIGN-1));
242#else
243 gpu.vram = vram_ptr_orig = calloc(VRAM_SIZE + (VRAM_ALIGN-1), 1);
244#endif
245 if (gpu.vram != NULL && gpu.vram != (void *)(intptr_t)-1) {
246 // 4kb guard in front
247 gpu.vram += (4096 / 2);
248 // Align
249 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
250 return 0;
251 }
252 else {
253 fprintf(stderr, "could not map vram, expect crashes\n");
254 return -1;
255 }
256}
257
258long GPUinit(void)
259{
260 int ret;
261 ret = vout_init();
262 ret |= renderer_init();
263
264 memset(&gpu.state, 0, sizeof(gpu.state));
265 memset(&gpu.frameskip, 0, sizeof(gpu.frameskip));
266 gpu.zero = 0;
267 gpu.state.frame_count = &gpu.zero;
268 gpu.state.hcnt = &gpu.zero;
269 gpu.cmd_len = 0;
270 do_reset();
271
272 /*if (gpu.mmap != NULL) {
273 if (map_vram() != 0)
274 ret = -1;
275 }*/
276 return ret;
277}
278
279long GPUshutdown(void)
280{
281 long ret;
282
283 renderer_finish();
284 ret = vout_finish();
285
286 if (vram_ptr_orig != NULL) {
287#if GPULIB_USE_MMAP
288 gpu.munmap(vram_ptr_orig, VRAM_SIZE);
289#else
290 free(vram_ptr_orig);
291#endif
292 }
293 vram_ptr_orig = gpu.vram = NULL;
294
295 return ret;
296}
297
298void GPUwriteStatus(uint32_t data)
299{
300 uint32_t cmd = data >> 24;
301
302 if (cmd < ARRAY_SIZE(gpu.regs)) {
303 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
304 return;
305 gpu.regs[cmd] = data;
306 }
307
308 gpu.state.fb_dirty = 1;
309
310 switch (cmd) {
311 case 0x00:
312 do_reset();
313 break;
314 case 0x01:
315 do_cmd_reset();
316 break;
317 case 0x03:
318 if (data & 1) {
319 gpu.status |= PSX_GPU_STATUS_BLANKING;
320 gpu.state.dims_changed = 1; // for hud clearing
321 }
322 else
323 gpu.status &= ~PSX_GPU_STATUS_BLANKING;
324 break;
325 case 0x04:
326 gpu.status &= ~PSX_GPU_STATUS_DMA_MASK;
327 gpu.status |= PSX_GPU_STATUS_DMA(data & 3);
328 break;
329 case 0x05:
330 gpu.screen.src_x = data & 0x3ff;
331 gpu.screen.src_y = (data >> 10) & 0x1ff;
332 renderer_notify_scanout_x_change(gpu.screen.src_x, gpu.screen.hres);
333 if (gpu.frameskip.set) {
334 decide_frameskip_allow(gpu.ex_regs[3]);
335 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
336 decide_frameskip();
337 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
338 }
339 }
340 break;
341 case 0x06:
342 gpu.screen.x1 = data & 0xfff;
343 gpu.screen.x2 = (data >> 12) & 0xfff;
344 update_width();
345 break;
346 case 0x07:
347 gpu.screen.y1 = data & 0x3ff;
348 gpu.screen.y2 = (data >> 10) & 0x3ff;
349 update_height();
350 break;
351 case 0x08:
352 gpu.status = (gpu.status & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
353 update_width();
354 update_height();
355 renderer_notify_res_change();
356 break;
357 default:
358 if ((cmd & 0xf0) == 0x10)
359 get_gpu_info(data);
360 break;
361 }
362
363#ifdef GPUwriteStatus_ext
364 GPUwriteStatus_ext(data);
365#endif
366}
367
368const unsigned char cmd_lengths[256] =
369{
370 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
371 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
372 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
373 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
374 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
375 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
376 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
377 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
378 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 80
379 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
380 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // a0
381 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
382 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // c0
383 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
384 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
385 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
386};
387
388#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
389
390static void cpy_msb(uint16_t *dst, const uint16_t *src, int l, uint16_t msb)
391{
392 int i;
393 for (i = 0; i < l; i++)
394 dst[i] = src[i] | msb;
395}
396
397static inline void do_vram_line(int x, int y, uint16_t *mem, int l,
398 int is_read, uint16_t msb)
399{
400 uint16_t *vram = VRAM_MEM_XY(x, y);
401 if (unlikely(is_read))
402 memcpy(mem, vram, l * 2);
403 else if (unlikely(msb))
404 cpy_msb(vram, mem, l, msb);
405 else
406 memcpy(vram, mem, l * 2);
407}
408
409static int do_vram_io(uint32_t *data, int count, int is_read)
410{
411 int count_initial = count;
412 uint16_t msb = gpu.ex_regs[6] << 15;
413 uint16_t *sdata = (uint16_t *)data;
414 int x = gpu.dma.x, y = gpu.dma.y;
415 int w = gpu.dma.w, h = gpu.dma.h;
416 int o = gpu.dma.offset;
417 int l;
418 count *= 2; // operate in 16bpp pixels
419
420 renderer_sync();
421
422 if (gpu.dma.offset) {
423 l = w - gpu.dma.offset;
424 if (count < l)
425 l = count;
426
427 do_vram_line(x + o, y, sdata, l, is_read, msb);
428
429 if (o + l < w)
430 o += l;
431 else {
432 o = 0;
433 y++;
434 h--;
435 }
436 sdata += l;
437 count -= l;
438 }
439
440 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
441 y &= 511;
442 do_vram_line(x, y, sdata, w, is_read, msb);
443 }
444
445 if (h > 0) {
446 if (count > 0) {
447 y &= 511;
448 do_vram_line(x, y, sdata, count, is_read, msb);
449 o = count;
450 count = 0;
451 }
452 }
453 else
454 finish_vram_transfer(is_read);
455 gpu.dma.y = y;
456 gpu.dma.h = h;
457 gpu.dma.offset = o;
458
459 return count_initial - count / 2;
460}
461
462static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
463{
464 if (gpu.dma.h)
465 log_anomaly("start_vram_transfer while old unfinished\n");
466
467 gpu.dma.x = pos_word & 0x3ff;
468 gpu.dma.y = (pos_word >> 16) & 0x1ff;
469 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
470 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
471 gpu.dma.offset = 0;
472 gpu.dma.is_read = is_read;
473 gpu.dma_start = gpu.dma;
474
475 renderer_flush_queues();
476 if (is_read) {
477 gpu.status |= PSX_GPU_STATUS_IMG;
478 // XXX: wrong for width 1
479 gpu.gp0 = LE32TOH(*(uint32_t *) VRAM_MEM_XY(gpu.dma.x, gpu.dma.y));
480 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
481 }
482
483 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
484 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
485}
486
487static void finish_vram_transfer(int is_read)
488{
489 if (is_read)
490 gpu.status &= ~PSX_GPU_STATUS_IMG;
491 else
492 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
493 gpu.dma_start.w, gpu.dma_start.h, 0);
494}
495
496static void do_vram_copy(const uint32_t *params)
497{
498 const uint32_t sx = LE32TOH(params[0]) & 0x3FF;
499 const uint32_t sy = (LE32TOH(params[0]) >> 16) & 0x1FF;
500 const uint32_t dx = LE32TOH(params[1]) & 0x3FF;
501 const uint32_t dy = (LE32TOH(params[1]) >> 16) & 0x1FF;
502 uint32_t w = ((LE32TOH(params[2]) - 1) & 0x3FF) + 1;
503 uint32_t h = (((LE32TOH(params[2]) >> 16) - 1) & 0x1FF) + 1;
504 uint16_t msb = gpu.ex_regs[6] << 15;
505 uint16_t lbuf[128];
506 uint32_t x, y;
507
508 if (sx == dx && sy == dy && msb == 0)
509 return;
510
511 renderer_flush_queues();
512
513 if (unlikely((sx < dx && dx < sx + w) || sx + w > 1024 || dx + w > 1024 || msb))
514 {
515 for (y = 0; y < h; y++)
516 {
517 const uint16_t *src = VRAM_MEM_XY(0, (sy + y) & 0x1ff);
518 uint16_t *dst = VRAM_MEM_XY(0, (dy + y) & 0x1ff);
519 for (x = 0; x < w; x += ARRAY_SIZE(lbuf))
520 {
521 uint32_t x1, w1 = w - x;
522 if (w1 > ARRAY_SIZE(lbuf))
523 w1 = ARRAY_SIZE(lbuf);
524 for (x1 = 0; x1 < w1; x1++)
525 lbuf[x1] = src[(sx + x + x1) & 0x3ff];
526 for (x1 = 0; x1 < w1; x1++)
527 dst[(dx + x + x1) & 0x3ff] = lbuf[x1] | msb;
528 }
529 }
530 }
531 else
532 {
533 uint32_t sy1 = sy, dy1 = dy;
534 for (y = 0; y < h; y++, sy1++, dy1++)
535 memcpy(VRAM_MEM_XY(dx, dy1 & 0x1ff), VRAM_MEM_XY(sx, sy1 & 0x1ff), w * 2);
536 }
537
538 renderer_update_caches(dx, dy, w, h, 0);
539}
540
541static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
542{
543 int cmd = 0, pos = 0, len, dummy, v;
544 int skip = 1;
545
546 gpu.frameskip.pending_fill[0] = 0;
547
548 while (pos < count && skip) {
549 uint32_t *list = data + pos;
550 cmd = LE32TOH(list[0]) >> 24;
551 len = 1 + cmd_lengths[cmd];
552
553 switch (cmd) {
554 case 0x02:
555 if ((LE32TOH(list[2]) & 0x3ff) > gpu.screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu.screen.h)
556 // clearing something large, don't skip
557 do_cmd_list(list, 3, &dummy);
558 else
559 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
560 break;
561 case 0x24 ... 0x27:
562 case 0x2c ... 0x2f:
563 case 0x34 ... 0x37:
564 case 0x3c ... 0x3f:
565 gpu.ex_regs[1] &= ~0x1ff;
566 gpu.ex_regs[1] |= LE32TOH(list[4 + ((cmd >> 4) & 1)]) & 0x1ff;
567 break;
568 case 0x48 ... 0x4F:
569 for (v = 3; pos + v < count; v++)
570 {
571 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
572 break;
573 }
574 len += v - 3;
575 break;
576 case 0x58 ... 0x5F:
577 for (v = 4; pos + v < count; v += 2)
578 {
579 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
580 break;
581 }
582 len += v - 4;
583 break;
584 default:
585 if (cmd == 0xe3)
586 skip = decide_frameskip_allow(LE32TOH(list[0]));
587 if ((cmd & 0xf8) == 0xe0)
588 gpu.ex_regs[cmd & 7] = LE32TOH(list[0]);
589 break;
590 }
591
592 if (pos + len > count) {
593 cmd = -1;
594 break; // incomplete cmd
595 }
596 if (0x80 <= cmd && cmd <= 0xdf)
597 break; // image i/o
598
599 pos += len;
600 }
601
602 renderer_sync_ecmds(gpu.ex_regs);
603 *last_cmd = cmd;
604 return pos;
605}
606
607static noinline int do_cmd_buffer(uint32_t *data, int count)
608{
609 int cmd, pos;
610 uint32_t old_e3 = gpu.ex_regs[3];
611 int vram_dirty = 0;
612
613 // process buffer
614 for (pos = 0; pos < count; )
615 {
616 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
617 vram_dirty = 1;
618 pos += do_vram_io(data + pos, count - pos, 0);
619 if (pos == count)
620 break;
621 }
622
623 cmd = LE32TOH(data[pos]) >> 24;
624 if (0xa0 <= cmd && cmd <= 0xdf) {
625 if (unlikely((pos+2) >= count)) {
626 // incomplete vram write/read cmd, can't consume yet
627 cmd = -1;
628 break;
629 }
630
631 // consume vram write/read cmd
632 start_vram_transfer(LE32TOH(data[pos + 1]), LE32TOH(data[pos + 2]), (cmd & 0xe0) == 0xc0);
633 pos += 3;
634 continue;
635 }
636 else if ((cmd & 0xe0) == 0x80) {
637 if (unlikely((pos+3) >= count)) {
638 cmd = -1; // incomplete cmd, can't consume yet
639 break;
640 }
641 do_vram_copy(data + pos + 1);
642 pos += 4;
643 continue;
644 }
645
646 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
647 if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0))
648 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
649 else {
650 pos += do_cmd_list(data + pos, count - pos, &cmd);
651 vram_dirty = 1;
652 }
653
654 if (cmd == -1)
655 // incomplete cmd
656 break;
657 }
658
659 gpu.status &= ~0x1fff;
660 gpu.status |= gpu.ex_regs[1] & 0x7ff;
661 gpu.status |= (gpu.ex_regs[6] & 3) << 11;
662
663 gpu.state.fb_dirty |= vram_dirty;
664
665 if (old_e3 != gpu.ex_regs[3])
666 decide_frameskip_allow(gpu.ex_regs[3]);
667
668 return count - pos;
669}
670
671static void flush_cmd_buffer(void)
672{
673 int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
674 if (left > 0)
675 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
676 gpu.cmd_len = left;
677}
678
679void GPUwriteDataMem(uint32_t *mem, int count)
680{
681 int left;
682
683 log_io("gpu_dma_write %p %d\n", mem, count);
684
685 if (unlikely(gpu.cmd_len > 0))
686 flush_cmd_buffer();
687
688 left = do_cmd_buffer(mem, count);
689 if (left)
690 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
691}
692
693void GPUwriteData(uint32_t data)
694{
695 log_io("gpu_write %08x\n", data);
696 gpu.cmd_buffer[gpu.cmd_len++] = HTOLE32(data);
697 if (gpu.cmd_len >= CMD_BUFFER_LEN)
698 flush_cmd_buffer();
699}
700
701long GPUdmaChain(uint32_t *rambase, uint32_t start_addr, uint32_t *progress_addr)
702{
703 uint32_t addr, *list, ld_addr = 0;
704 int len, left, count;
705 long cpu_cycles = 0;
706
707 preload(rambase + (start_addr & 0x1fffff) / 4);
708
709 if (unlikely(gpu.cmd_len > 0))
710 flush_cmd_buffer();
711
712 log_io("gpu_dma_chain\n");
713 addr = start_addr & 0xffffff;
714 for (count = 0; (addr & 0x800000) == 0; count++)
715 {
716 list = rambase + (addr & 0x1fffff) / 4;
717 len = LE32TOH(list[0]) >> 24;
718 addr = LE32TOH(list[0]) & 0xffffff;
719 preload(rambase + (addr & 0x1fffff) / 4);
720
721 cpu_cycles += 10;
722 if (len > 0)
723 cpu_cycles += 5 + len;
724
725 log_io(".chain %08lx #%d+%d\n",
726 (long)(list - rambase) * 4, len, gpu.cmd_len);
727 if (unlikely(gpu.cmd_len > 0)) {
728 if (gpu.cmd_len + len > ARRAY_SIZE(gpu.cmd_buffer)) {
729 log_anomaly("cmd_buffer overflow, likely garbage commands\n");
730 gpu.cmd_len = 0;
731 }
732 memcpy(gpu.cmd_buffer + gpu.cmd_len, list + 1, len * 4);
733 gpu.cmd_len += len;
734 flush_cmd_buffer();
735 continue;
736 }
737
738 if (len) {
739 left = do_cmd_buffer(list + 1, len);
740 if (left) {
741 memcpy(gpu.cmd_buffer, list + 1 + len - left, left * 4);
742 gpu.cmd_len = left;
743 log_anomaly("GPUdmaChain: %d/%d words left\n", left, len);
744 }
745 }
746
747 if (progress_addr) {
748 *progress_addr = addr;
749 break;
750 }
751 #define LD_THRESHOLD (8*1024)
752 if (count >= LD_THRESHOLD) {
753 if (count == LD_THRESHOLD) {
754 ld_addr = addr;
755 continue;
756 }
757
758 // loop detection marker
759 // (bit23 set causes DMA error on real machine, so
760 // unlikely to be ever set by the game)
761 list[0] |= HTOLE32(0x800000);
762 }
763 }
764
765 if (ld_addr != 0) {
766 // remove loop detection markers
767 count -= LD_THRESHOLD + 2;
768 addr = ld_addr & 0x1fffff;
769 while (count-- > 0) {
770 list = rambase + addr / 4;
771 addr = LE32TOH(list[0]) & 0x1fffff;
772 list[0] &= HTOLE32(~0x800000);
773 }
774 }
775
776 gpu.state.last_list.frame = *gpu.state.frame_count;
777 gpu.state.last_list.hcnt = *gpu.state.hcnt;
778 gpu.state.last_list.cycles = cpu_cycles;
779 gpu.state.last_list.addr = start_addr;
780
781 return cpu_cycles;
782}
783
784void GPUreadDataMem(uint32_t *mem, int count)
785{
786 log_io("gpu_dma_read %p %d\n", mem, count);
787
788 if (unlikely(gpu.cmd_len > 0))
789 flush_cmd_buffer();
790
791 if (gpu.dma.h)
792 do_vram_io(mem, count, 1);
793}
794
795uint32_t GPUreadData(void)
796{
797 uint32_t ret;
798
799 if (unlikely(gpu.cmd_len > 0))
800 flush_cmd_buffer();
801
802 ret = gpu.gp0;
803 if (gpu.dma.h) {
804 ret = HTOLE32(ret);
805 do_vram_io(&ret, 1, 1);
806 ret = LE32TOH(ret);
807 }
808
809 log_io("gpu_read %08x\n", ret);
810 return ret;
811}
812
813uint32_t GPUreadStatus(void)
814{
815 uint32_t ret;
816
817 if (unlikely(gpu.cmd_len > 0))
818 flush_cmd_buffer();
819
820 ret = gpu.status;
821 log_io("gpu_read_status %08x\n", ret);
822 return ret;
823}
824
825struct GPUFreeze
826{
827 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
828 uint32_t ulStatus; // current gpu status
829 uint32_t ulControl[256]; // latest control register values
830 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
831};
832
833long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
834{
835 int i;
836
837 switch (type) {
838 case 1: // save
839 if (gpu.cmd_len > 0)
840 flush_cmd_buffer();
841
842 renderer_sync();
843 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
844 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
845 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
846 freeze->ulStatus = gpu.status;
847 break;
848 case 0: // load
849 renderer_sync();
850 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
851 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
852 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
853 gpu.status = freeze->ulStatus;
854 gpu.cmd_len = 0;
855 for (i = 8; i > 0; i--) {
856 gpu.regs[i] ^= 1; // avoid reg change detection
857 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
858 }
859 renderer_sync_ecmds(gpu.ex_regs);
860 renderer_update_caches(0, 0, 1024, 512, 1);
861 break;
862 }
863
864 return 1;
865}
866
867void GPUupdateLace(void)
868{
869 if (gpu.cmd_len > 0)
870 flush_cmd_buffer();
871 renderer_flush_queues();
872
873#ifndef RAW_FB_DISPLAY
874 if (gpu.status & PSX_GPU_STATUS_BLANKING) {
875 if (!gpu.state.blanked) {
876 vout_blank();
877 gpu.state.blanked = 1;
878 gpu.state.fb_dirty = 1;
879 }
880 return;
881 }
882
883 renderer_notify_update_lace(0);
884
885 if (!gpu.state.fb_dirty)
886 return;
887#endif
888
889 if (gpu.frameskip.set) {
890 if (!gpu.frameskip.frame_ready) {
891 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
892 return;
893 gpu.frameskip.active = 0;
894 }
895 gpu.frameskip.frame_ready = 0;
896 }
897
898 vout_update();
899 if (gpu.state.enhancement_active && !gpu.state.enhancement_was_active)
900 renderer_update_caches(0, 0, 1024, 512, 1);
901 gpu.state.enhancement_was_active = gpu.state.enhancement_active;
902 gpu.state.fb_dirty = 0;
903 gpu.state.blanked = 0;
904 renderer_notify_update_lace(1);
905}
906
907void GPUvBlank(int is_vblank, int lcf)
908{
909 int interlace = gpu.state.allow_interlace
910 && (gpu.status & PSX_GPU_STATUS_INTERLACE)
911 && (gpu.status & PSX_GPU_STATUS_DHEIGHT);
912 // interlace doesn't look nice on progressive displays,
913 // so we have this "auto" mode here for games that don't read vram
914 if (gpu.state.allow_interlace == 2
915 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
916 {
917 interlace = 0;
918 }
919 if (interlace || interlace != gpu.state.old_interlace) {
920 gpu.state.old_interlace = interlace;
921
922 if (gpu.cmd_len > 0)
923 flush_cmd_buffer();
924 renderer_flush_queues();
925 renderer_set_interlace(interlace, !lcf);
926 }
927}
928
929void GPUgetScreenInfo(int *y, int *base_hres)
930{
931 *y = gpu.screen.y;
932 *base_hres = gpu.screen.vres;
933 if (gpu.status & PSX_GPU_STATUS_DHEIGHT)
934 *base_hres >>= 1;
935}
936
937#include "../../frontend/plugin_lib.h"
938
939void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
940{
941 gpu.frameskip.set = cbs->frameskip;
942 gpu.frameskip.advice = &cbs->fskip_advice;
943 gpu.frameskip.force = &cbs->fskip_force;
944 gpu.frameskip.dirty = (void *)&cbs->fskip_dirty;
945 gpu.frameskip.active = 0;
946 gpu.frameskip.frame_ready = 1;
947 gpu.state.hcnt = cbs->gpu_hcnt;
948 gpu.state.frame_count = cbs->gpu_frame_count;
949 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
950 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
951 if (gpu.state.screen_centering_type != cbs->screen_centering_type
952 || gpu.state.screen_centering_x != cbs->screen_centering_x
953 || gpu.state.screen_centering_y != cbs->screen_centering_y) {
954 gpu.state.screen_centering_type = cbs->screen_centering_type;
955 gpu.state.screen_centering_x = cbs->screen_centering_x;
956 gpu.state.screen_centering_y = cbs->screen_centering_y;
957 update_width();
958 update_height();
959 }
960
961 gpu.mmap = cbs->mmap;
962 gpu.munmap = cbs->munmap;
963
964 // delayed vram mmap
965 if (gpu.vram == NULL)
966 map_vram();
967
968 if (cbs->pl_vout_set_raw_vram)
969 cbs->pl_vout_set_raw_vram(gpu.vram);
970 renderer_set_config(cbs);
971 vout_set_config(cbs);
972}
973
974// vim:shiftwidth=2:expandtab