psxdma: Fix endian issue in gpuInterrupt()
[pcsx_rearmed.git] / plugins / gpulib / gpu.c
... / ...
CommitLineData
1/*
2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
11#include <stdio.h>
12#include <stdlib.h>
13#include <string.h>
14#include <stdlib.h> /* for calloc */
15
16#include "gpu.h"
17#include "../../libpcsxcore/gpu.h" // meh
18
19#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
20#ifdef __GNUC__
21#define unlikely(x) __builtin_expect((x), 0)
22#define preload __builtin_prefetch
23#define noinline __attribute__((noinline))
24#else
25#define unlikely(x)
26#define preload(...)
27#define noinline
28#endif
29
30//#define log_io gpu_log
31#define log_io(...)
32
33struct psx_gpu gpu;
34
35static noinline int do_cmd_buffer(uint32_t *data, int count);
36static void finish_vram_transfer(int is_read);
37
38static noinline void do_cmd_reset(void)
39{
40 renderer_sync();
41
42 if (unlikely(gpu.cmd_len > 0))
43 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
44 gpu.cmd_len = 0;
45
46 if (unlikely(gpu.dma.h > 0))
47 finish_vram_transfer(gpu.dma_start.is_read);
48 gpu.dma.h = 0;
49}
50
51static noinline void do_reset(void)
52{
53 unsigned int i;
54
55 do_cmd_reset();
56
57 memset(gpu.regs, 0, sizeof(gpu.regs));
58 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
59 gpu.ex_regs[i] = (0xe0 + i) << 24;
60 gpu.status = 0x14802000;
61 gpu.gp0 = 0;
62 gpu.regs[3] = 1;
63 gpu.screen.hres = gpu.screen.w = 256;
64 gpu.screen.vres = gpu.screen.h = 240;
65 gpu.screen.x = gpu.screen.y = 0;
66 renderer_sync_ecmds(gpu.ex_regs);
67 renderer_notify_res_change();
68}
69
70static noinline void update_width(void)
71{
72 static const short hres_all[8] = { 256, 368, 320, 368, 512, 368, 640, 368 };
73 static const uint8_t hdivs[8] = { 10, 7, 8, 7, 5, 7, 4, 7 };
74 uint8_t hdiv = hdivs[(gpu.status >> 16) & 7];
75 int hres = hres_all[(gpu.status >> 16) & 7];
76 int pal = gpu.status & PSX_GPU_STATUS_PAL;
77 int sw = gpu.screen.x2 - gpu.screen.x1;
78 int x = 0, x_auto;
79 if (sw <= 0)
80 /* nothing displayed? */;
81 else {
82 int s = pal ? 656 : 608; // or 600? pal is just a guess
83 x = (gpu.screen.x1 - s) / hdiv;
84 x = (x + 1) & ~1; // blitter limitation
85 sw /= hdiv;
86 sw = (sw + 2) & ~3; // according to nocash
87 switch (gpu.state.screen_centering_type) {
88 case 1:
89 break;
90 case 2:
91 x = gpu.state.screen_centering_x;
92 break;
93 default:
94 // correct if slightly miscentered
95 x_auto = (hres - sw) / 2 & ~3;
96 if ((uint32_t)x_auto <= 8u && abs(x) < 24)
97 x = x_auto;
98 }
99 if (x + sw > hres)
100 sw = hres - x;
101 // .x range check is done in vout_update()
102 }
103 // reduce the unpleasant right border that a few games have
104 if (gpu.state.screen_centering_type == 0
105 && x <= 4 && hres - (x + sw) >= 4)
106 hres -= 4;
107 gpu.screen.x = x;
108 gpu.screen.w = sw;
109 gpu.screen.hres = hres;
110 gpu.state.dims_changed = 1;
111 //printf("xx %d %d -> %2d, %d / %d\n",
112 // gpu.screen.x1, gpu.screen.x2, x, sw, hres);
113}
114
115static noinline void update_height(void)
116{
117 int pal = gpu.status & PSX_GPU_STATUS_PAL;
118 int dheight = gpu.status & PSX_GPU_STATUS_DHEIGHT;
119 int y = gpu.screen.y1 - (pal ? 39 : 16); // 39 for spyro
120 int sh = gpu.screen.y2 - gpu.screen.y1;
121 int center_tol = 16;
122 int vres = 240;
123
124 if (pal && (sh > 240 || gpu.screen.vres == 256))
125 vres = 256;
126 if (dheight)
127 y *= 2, sh *= 2, vres *= 2, center_tol *= 2;
128 if (sh <= 0)
129 /* nothing displayed? */;
130 else {
131 switch (gpu.state.screen_centering_type) {
132 case 1:
133 break;
134 case 2:
135 y = gpu.state.screen_centering_y;
136 break;
137 default:
138 // correct if slightly miscentered
139 if ((uint32_t)(vres - sh) <= 1 && abs(y) <= center_tol)
140 y = 0;
141 }
142 if (y + sh > vres)
143 sh = vres - y;
144 }
145 gpu.screen.y = y;
146 gpu.screen.h = sh;
147 gpu.screen.vres = vres;
148 gpu.state.dims_changed = 1;
149 //printf("yy %d %d -> %d, %d / %d\n",
150 // gpu.screen.y1, gpu.screen.y2, y, sh, vres);
151}
152
153static noinline void decide_frameskip(void)
154{
155 *gpu.frameskip.dirty = 1;
156
157 if (gpu.frameskip.active)
158 gpu.frameskip.cnt++;
159 else {
160 gpu.frameskip.cnt = 0;
161 gpu.frameskip.frame_ready = 1;
162 }
163
164 if (*gpu.frameskip.force)
165 gpu.frameskip.active = 1;
166 else if (!gpu.frameskip.active && *gpu.frameskip.advice)
167 gpu.frameskip.active = 1;
168 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
169 gpu.frameskip.active = 1;
170 else
171 gpu.frameskip.active = 0;
172
173 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
174 int dummy;
175 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy);
176 gpu.frameskip.pending_fill[0] = 0;
177 }
178}
179
180static noinline int decide_frameskip_allow(uint32_t cmd_e3)
181{
182 // no frameskip if it decides to draw to display area,
183 // but not for interlace since it'll most likely always do that
184 uint32_t x = cmd_e3 & 0x3ff;
185 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
186 gpu.frameskip.allow = (gpu.status & PSX_GPU_STATUS_INTERLACE) ||
187 (uint32_t)(x - gpu.screen.src_x) >= (uint32_t)gpu.screen.w ||
188 (uint32_t)(y - gpu.screen.src_y) >= (uint32_t)gpu.screen.h;
189 return gpu.frameskip.allow;
190}
191
192static void flush_cmd_buffer(void);
193
194static noinline void get_gpu_info(uint32_t data)
195{
196 if (unlikely(gpu.cmd_len > 0))
197 flush_cmd_buffer();
198 switch (data & 0x0f) {
199 case 0x02:
200 case 0x03:
201 case 0x04:
202 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
203 break;
204 case 0x05:
205 gpu.gp0 = gpu.ex_regs[5] & 0x3fffff;
206 break;
207 case 0x07:
208 gpu.gp0 = 2;
209 break;
210 default:
211 // gpu.gp0 unchanged
212 break;
213 }
214}
215
216// double, for overdraw guard
217#define VRAM_SIZE ((1024 * 512 * 2 * 2) + 4096)
218
219// Minimum 16-byte VRAM alignment needed by gpu_unai's pixel-skipping
220// renderer/downscaler it uses in high res modes:
221#ifdef GCW_ZERO
222 // On GCW platform (MIPS), align to 8192 bytes (1 TLB entry) to reduce # of
223 // fills. (Will change this value if it ever gets large page support)
224 #define VRAM_ALIGN 8192
225#else
226 #define VRAM_ALIGN 16
227#endif
228
229// vram ptr received from mmap/malloc/alloc (will deallocate using this)
230static uint16_t *vram_ptr_orig = NULL;
231
232#ifndef GPULIB_USE_MMAP
233# ifdef __linux__
234# define GPULIB_USE_MMAP 1
235# else
236# define GPULIB_USE_MMAP 0
237# endif
238#endif
239static int map_vram(void)
240{
241#if GPULIB_USE_MMAP
242 gpu.vram = vram_ptr_orig = gpu.mmap(VRAM_SIZE + (VRAM_ALIGN-1));
243#else
244 gpu.vram = vram_ptr_orig = calloc(VRAM_SIZE + (VRAM_ALIGN-1), 1);
245#endif
246 if (gpu.vram != NULL && gpu.vram != (void *)(intptr_t)-1) {
247 // 4kb guard in front
248 gpu.vram += (4096 / 2);
249 // Align
250 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
251 return 0;
252 }
253 else {
254 fprintf(stderr, "could not map vram, expect crashes\n");
255 return -1;
256 }
257}
258
259long GPUinit(void)
260{
261 int ret;
262 ret = vout_init();
263 ret |= renderer_init();
264
265 memset(&gpu.state, 0, sizeof(gpu.state));
266 memset(&gpu.frameskip, 0, sizeof(gpu.frameskip));
267 gpu.zero = 0;
268 gpu.state.frame_count = &gpu.zero;
269 gpu.state.hcnt = &gpu.zero;
270 gpu.cmd_len = 0;
271 do_reset();
272
273 /*if (gpu.mmap != NULL) {
274 if (map_vram() != 0)
275 ret = -1;
276 }*/
277 return ret;
278}
279
280long GPUshutdown(void)
281{
282 long ret;
283
284 renderer_finish();
285 ret = vout_finish();
286
287 if (vram_ptr_orig != NULL) {
288#if GPULIB_USE_MMAP
289 gpu.munmap(vram_ptr_orig, VRAM_SIZE);
290#else
291 free(vram_ptr_orig);
292#endif
293 }
294 vram_ptr_orig = gpu.vram = NULL;
295
296 return ret;
297}
298
299void GPUwriteStatus(uint32_t data)
300{
301 uint32_t cmd = data >> 24;
302 int src_x, src_y;
303
304 if (cmd < ARRAY_SIZE(gpu.regs)) {
305 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
306 return;
307 gpu.regs[cmd] = data;
308 }
309
310 gpu.state.fb_dirty = 1;
311
312 switch (cmd) {
313 case 0x00:
314 do_reset();
315 break;
316 case 0x01:
317 do_cmd_reset();
318 break;
319 case 0x03:
320 if (data & 1) {
321 gpu.status |= PSX_GPU_STATUS_BLANKING;
322 gpu.state.dims_changed = 1; // for hud clearing
323 }
324 else
325 gpu.status &= ~PSX_GPU_STATUS_BLANKING;
326 break;
327 case 0x04:
328 gpu.status &= ~PSX_GPU_STATUS_DMA_MASK;
329 gpu.status |= PSX_GPU_STATUS_DMA(data & 3);
330 break;
331 case 0x05:
332 src_x = data & 0x3ff; src_y = (data >> 10) & 0x1ff;
333 if (src_x != gpu.screen.src_x || src_y != gpu.screen.src_y) {
334 gpu.screen.src_x = src_x;
335 gpu.screen.src_y = src_y;
336 renderer_notify_scanout_change(src_x, src_y);
337 if (gpu.frameskip.set) {
338 decide_frameskip_allow(gpu.ex_regs[3]);
339 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
340 decide_frameskip();
341 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
342 }
343 }
344 }
345 break;
346 case 0x06:
347 gpu.screen.x1 = data & 0xfff;
348 gpu.screen.x2 = (data >> 12) & 0xfff;
349 update_width();
350 break;
351 case 0x07:
352 gpu.screen.y1 = data & 0x3ff;
353 gpu.screen.y2 = (data >> 10) & 0x3ff;
354 update_height();
355 break;
356 case 0x08:
357 gpu.status = (gpu.status & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
358 update_width();
359 update_height();
360 renderer_notify_res_change();
361 break;
362 default:
363 if ((cmd & 0xf0) == 0x10)
364 get_gpu_info(data);
365 break;
366 }
367
368#ifdef GPUwriteStatus_ext
369 GPUwriteStatus_ext(data);
370#endif
371}
372
373const unsigned char cmd_lengths[256] =
374{
375 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
376 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
377 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
378 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
379 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
380 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
381 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
382 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
383 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 80
384 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
385 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // a0
386 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
387 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // c0
388 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
389 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
390 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
391};
392
393#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
394
395static void cpy_msb(uint16_t *dst, const uint16_t *src, int l, uint16_t msb)
396{
397 int i;
398 for (i = 0; i < l; i++)
399 dst[i] = src[i] | msb;
400}
401
402static inline void do_vram_line(int x, int y, uint16_t *mem, int l,
403 int is_read, uint16_t msb)
404{
405 uint16_t *vram = VRAM_MEM_XY(x, y);
406 if (unlikely(is_read))
407 memcpy(mem, vram, l * 2);
408 else if (unlikely(msb))
409 cpy_msb(vram, mem, l, msb);
410 else
411 memcpy(vram, mem, l * 2);
412}
413
414static int do_vram_io(uint32_t *data, int count, int is_read)
415{
416 int count_initial = count;
417 uint16_t msb = gpu.ex_regs[6] << 15;
418 uint16_t *sdata = (uint16_t *)data;
419 int x = gpu.dma.x, y = gpu.dma.y;
420 int w = gpu.dma.w, h = gpu.dma.h;
421 int o = gpu.dma.offset;
422 int l;
423 count *= 2; // operate in 16bpp pixels
424
425 renderer_sync();
426
427 if (gpu.dma.offset) {
428 l = w - gpu.dma.offset;
429 if (count < l)
430 l = count;
431
432 do_vram_line(x + o, y, sdata, l, is_read, msb);
433
434 if (o + l < w)
435 o += l;
436 else {
437 o = 0;
438 y++;
439 h--;
440 }
441 sdata += l;
442 count -= l;
443 }
444
445 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
446 y &= 511;
447 do_vram_line(x, y, sdata, w, is_read, msb);
448 }
449
450 if (h > 0) {
451 if (count > 0) {
452 y &= 511;
453 do_vram_line(x, y, sdata, count, is_read, msb);
454 o = count;
455 count = 0;
456 }
457 }
458 else
459 finish_vram_transfer(is_read);
460 gpu.dma.y = y;
461 gpu.dma.h = h;
462 gpu.dma.offset = o;
463
464 return count_initial - count / 2;
465}
466
467static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
468{
469 if (gpu.dma.h)
470 log_anomaly("start_vram_transfer while old unfinished\n");
471
472 gpu.dma.x = pos_word & 0x3ff;
473 gpu.dma.y = (pos_word >> 16) & 0x1ff;
474 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
475 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
476 gpu.dma.offset = 0;
477 gpu.dma.is_read = is_read;
478 gpu.dma_start = gpu.dma;
479
480 renderer_flush_queues();
481 if (is_read) {
482 gpu.status |= PSX_GPU_STATUS_IMG;
483 // XXX: wrong for width 1
484 gpu.gp0 = LE32TOH(*(uint32_t *) VRAM_MEM_XY(gpu.dma.x, gpu.dma.y));
485 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
486 }
487
488 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
489 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
490 if (gpu.gpu_state_change)
491 gpu.gpu_state_change(PGS_VRAM_TRANSFER_START);
492}
493
494static void finish_vram_transfer(int is_read)
495{
496 if (is_read)
497 gpu.status &= ~PSX_GPU_STATUS_IMG;
498 else {
499 gpu.state.fb_dirty = 1;
500 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
501 gpu.dma_start.w, gpu.dma_start.h, 0);
502 }
503 if (gpu.gpu_state_change)
504 gpu.gpu_state_change(PGS_VRAM_TRANSFER_END);
505}
506
507static void do_vram_copy(const uint32_t *params)
508{
509 const uint32_t sx = LE32TOH(params[0]) & 0x3FF;
510 const uint32_t sy = (LE32TOH(params[0]) >> 16) & 0x1FF;
511 const uint32_t dx = LE32TOH(params[1]) & 0x3FF;
512 const uint32_t dy = (LE32TOH(params[1]) >> 16) & 0x1FF;
513 uint32_t w = ((LE32TOH(params[2]) - 1) & 0x3FF) + 1;
514 uint32_t h = (((LE32TOH(params[2]) >> 16) - 1) & 0x1FF) + 1;
515 uint16_t msb = gpu.ex_regs[6] << 15;
516 uint16_t lbuf[128];
517 uint32_t x, y;
518
519 if (sx == dx && sy == dy && msb == 0)
520 return;
521
522 renderer_flush_queues();
523
524 if (unlikely((sx < dx && dx < sx + w) || sx + w > 1024 || dx + w > 1024 || msb))
525 {
526 for (y = 0; y < h; y++)
527 {
528 const uint16_t *src = VRAM_MEM_XY(0, (sy + y) & 0x1ff);
529 uint16_t *dst = VRAM_MEM_XY(0, (dy + y) & 0x1ff);
530 for (x = 0; x < w; x += ARRAY_SIZE(lbuf))
531 {
532 uint32_t x1, w1 = w - x;
533 if (w1 > ARRAY_SIZE(lbuf))
534 w1 = ARRAY_SIZE(lbuf);
535 for (x1 = 0; x1 < w1; x1++)
536 lbuf[x1] = src[(sx + x + x1) & 0x3ff];
537 for (x1 = 0; x1 < w1; x1++)
538 dst[(dx + x + x1) & 0x3ff] = lbuf[x1] | msb;
539 }
540 }
541 }
542 else
543 {
544 uint32_t sy1 = sy, dy1 = dy;
545 for (y = 0; y < h; y++, sy1++, dy1++)
546 memcpy(VRAM_MEM_XY(dx, dy1 & 0x1ff), VRAM_MEM_XY(sx, sy1 & 0x1ff), w * 2);
547 }
548
549 renderer_update_caches(dx, dy, w, h, 0);
550}
551
552static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
553{
554 int cmd = 0, pos = 0, len, dummy, v;
555 int skip = 1;
556
557 gpu.frameskip.pending_fill[0] = 0;
558
559 while (pos < count && skip) {
560 uint32_t *list = data + pos;
561 cmd = LE32TOH(list[0]) >> 24;
562 len = 1 + cmd_lengths[cmd];
563
564 switch (cmd) {
565 case 0x02:
566 if ((LE32TOH(list[2]) & 0x3ff) > gpu.screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu.screen.h)
567 // clearing something large, don't skip
568 do_cmd_list(list, 3, &dummy);
569 else
570 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
571 break;
572 case 0x24 ... 0x27:
573 case 0x2c ... 0x2f:
574 case 0x34 ... 0x37:
575 case 0x3c ... 0x3f:
576 gpu.ex_regs[1] &= ~0x1ff;
577 gpu.ex_regs[1] |= LE32TOH(list[4 + ((cmd >> 4) & 1)]) & 0x1ff;
578 break;
579 case 0x48 ... 0x4F:
580 for (v = 3; pos + v < count; v++)
581 {
582 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
583 break;
584 }
585 len += v - 3;
586 break;
587 case 0x58 ... 0x5F:
588 for (v = 4; pos + v < count; v += 2)
589 {
590 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
591 break;
592 }
593 len += v - 4;
594 break;
595 default:
596 if (cmd == 0xe3)
597 skip = decide_frameskip_allow(LE32TOH(list[0]));
598 if ((cmd & 0xf8) == 0xe0)
599 gpu.ex_regs[cmd & 7] = LE32TOH(list[0]);
600 break;
601 }
602
603 if (pos + len > count) {
604 cmd = -1;
605 break; // incomplete cmd
606 }
607 if (0x80 <= cmd && cmd <= 0xdf)
608 break; // image i/o
609
610 pos += len;
611 }
612
613 renderer_sync_ecmds(gpu.ex_regs);
614 *last_cmd = cmd;
615 return pos;
616}
617
618static noinline int do_cmd_buffer(uint32_t *data, int count)
619{
620 int cmd, pos;
621 uint32_t old_e3 = gpu.ex_regs[3];
622 int vram_dirty = 0;
623
624 // process buffer
625 for (pos = 0; pos < count; )
626 {
627 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
628 vram_dirty = 1;
629 pos += do_vram_io(data + pos, count - pos, 0);
630 if (pos == count)
631 break;
632 }
633
634 cmd = LE32TOH(data[pos]) >> 24;
635 if (0xa0 <= cmd && cmd <= 0xdf) {
636 if (unlikely((pos+2) >= count)) {
637 // incomplete vram write/read cmd, can't consume yet
638 cmd = -1;
639 break;
640 }
641
642 // consume vram write/read cmd
643 start_vram_transfer(LE32TOH(data[pos + 1]), LE32TOH(data[pos + 2]), (cmd & 0xe0) == 0xc0);
644 pos += 3;
645 continue;
646 }
647 else if ((cmd & 0xe0) == 0x80) {
648 if (unlikely((pos+3) >= count)) {
649 cmd = -1; // incomplete cmd, can't consume yet
650 break;
651 }
652 do_vram_copy(data + pos + 1);
653 vram_dirty = 1;
654 pos += 4;
655 continue;
656 }
657
658 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
659 if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0))
660 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
661 else {
662 pos += do_cmd_list(data + pos, count - pos, &cmd);
663 vram_dirty = 1;
664 }
665
666 if (cmd == -1)
667 // incomplete cmd
668 break;
669 }
670
671 gpu.status &= ~0x1fff;
672 gpu.status |= gpu.ex_regs[1] & 0x7ff;
673 gpu.status |= (gpu.ex_regs[6] & 3) << 11;
674
675 gpu.state.fb_dirty |= vram_dirty;
676
677 if (old_e3 != gpu.ex_regs[3])
678 decide_frameskip_allow(gpu.ex_regs[3]);
679
680 return count - pos;
681}
682
683static noinline void flush_cmd_buffer(void)
684{
685 int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
686 if (left > 0)
687 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
688 if (left != gpu.cmd_len) {
689 if (!gpu.dma.h && gpu.gpu_state_change)
690 gpu.gpu_state_change(PGS_PRIMITIVE_START);
691 gpu.cmd_len = left;
692 }
693}
694
695void GPUwriteDataMem(uint32_t *mem, int count)
696{
697 int left;
698
699 log_io("gpu_dma_write %p %d\n", mem, count);
700
701 if (unlikely(gpu.cmd_len > 0))
702 flush_cmd_buffer();
703
704 left = do_cmd_buffer(mem, count);
705 if (left)
706 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
707}
708
709void GPUwriteData(uint32_t data)
710{
711 log_io("gpu_write %08x\n", data);
712 gpu.cmd_buffer[gpu.cmd_len++] = HTOLE32(data);
713 if (gpu.cmd_len >= CMD_BUFFER_LEN)
714 flush_cmd_buffer();
715}
716
717long GPUdmaChain(uint32_t *rambase, uint32_t start_addr, uint32_t *progress_addr)
718{
719 uint32_t addr, *list, ld_addr = 0;
720 int len, left, count;
721 long cpu_cycles = 0;
722
723 preload(rambase + (start_addr & 0x1fffff) / 4);
724
725 if (unlikely(gpu.cmd_len > 0))
726 flush_cmd_buffer();
727
728 log_io("gpu_dma_chain\n");
729 addr = start_addr & 0xffffff;
730 for (count = 0; (addr & 0x800000) == 0; count++)
731 {
732 list = rambase + (addr & 0x1fffff) / 4;
733 len = LE32TOH(list[0]) >> 24;
734 addr = LE32TOH(list[0]) & 0xffffff;
735 preload(rambase + (addr & 0x1fffff) / 4);
736
737 cpu_cycles += 10;
738 if (len > 0)
739 cpu_cycles += 5 + len;
740
741 log_io(".chain %08lx #%d+%d\n",
742 (long)(list - rambase) * 4, len, gpu.cmd_len);
743 if (unlikely(gpu.cmd_len > 0)) {
744 if (gpu.cmd_len + len > ARRAY_SIZE(gpu.cmd_buffer)) {
745 log_anomaly("cmd_buffer overflow, likely garbage commands\n");
746 gpu.cmd_len = 0;
747 }
748 memcpy(gpu.cmd_buffer + gpu.cmd_len, list + 1, len * 4);
749 gpu.cmd_len += len;
750 flush_cmd_buffer();
751 continue;
752 }
753
754 if (len) {
755 left = do_cmd_buffer(list + 1, len);
756 if (left) {
757 memcpy(gpu.cmd_buffer, list + 1 + len - left, left * 4);
758 gpu.cmd_len = left;
759 log_anomaly("GPUdmaChain: %d/%d words left\n", left, len);
760 }
761 }
762
763 if (progress_addr) {
764 *progress_addr = addr;
765 break;
766 }
767 #define LD_THRESHOLD (8*1024)
768 if (count >= LD_THRESHOLD) {
769 if (count == LD_THRESHOLD) {
770 ld_addr = addr;
771 continue;
772 }
773
774 // loop detection marker
775 // (bit23 set causes DMA error on real machine, so
776 // unlikely to be ever set by the game)
777 list[0] |= HTOLE32(0x800000);
778 }
779 }
780
781 if (ld_addr != 0) {
782 // remove loop detection markers
783 count -= LD_THRESHOLD + 2;
784 addr = ld_addr & 0x1fffff;
785 while (count-- > 0) {
786 list = rambase + addr / 4;
787 addr = LE32TOH(list[0]) & 0x1fffff;
788 list[0] &= HTOLE32(~0x800000);
789 }
790 }
791
792 gpu.state.last_list.frame = *gpu.state.frame_count;
793 gpu.state.last_list.hcnt = *gpu.state.hcnt;
794 gpu.state.last_list.cycles = cpu_cycles;
795 gpu.state.last_list.addr = start_addr;
796
797 return cpu_cycles;
798}
799
800void GPUreadDataMem(uint32_t *mem, int count)
801{
802 log_io("gpu_dma_read %p %d\n", mem, count);
803
804 if (unlikely(gpu.cmd_len > 0))
805 flush_cmd_buffer();
806
807 if (gpu.dma.h)
808 do_vram_io(mem, count, 1);
809}
810
811uint32_t GPUreadData(void)
812{
813 uint32_t ret;
814
815 if (unlikely(gpu.cmd_len > 0))
816 flush_cmd_buffer();
817
818 ret = gpu.gp0;
819 if (gpu.dma.h) {
820 ret = HTOLE32(ret);
821 do_vram_io(&ret, 1, 1);
822 ret = LE32TOH(ret);
823 }
824
825 log_io("gpu_read %08x\n", ret);
826 return ret;
827}
828
829uint32_t GPUreadStatus(void)
830{
831 uint32_t ret;
832
833 if (unlikely(gpu.cmd_len > 0))
834 flush_cmd_buffer();
835
836 ret = gpu.status;
837 log_io("gpu_read_status %08x\n", ret);
838 return ret;
839}
840
841struct GPUFreeze
842{
843 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
844 uint32_t ulStatus; // current gpu status
845 uint32_t ulControl[256]; // latest control register values
846 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
847};
848
849long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
850{
851 int i;
852
853 switch (type) {
854 case 1: // save
855 if (gpu.cmd_len > 0)
856 flush_cmd_buffer();
857
858 renderer_sync();
859 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
860 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
861 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
862 freeze->ulStatus = gpu.status;
863 break;
864 case 0: // load
865 renderer_sync();
866 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
867 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
868 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
869 gpu.status = freeze->ulStatus;
870 gpu.cmd_len = 0;
871 for (i = 8; i > 0; i--) {
872 gpu.regs[i] ^= 1; // avoid reg change detection
873 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
874 }
875 renderer_sync_ecmds(gpu.ex_regs);
876 renderer_update_caches(0, 0, 1024, 512, 0);
877 break;
878 }
879
880 return 1;
881}
882
883void GPUupdateLace(void)
884{
885 if (gpu.cmd_len > 0)
886 flush_cmd_buffer();
887 renderer_flush_queues();
888
889#ifndef RAW_FB_DISPLAY
890 if (gpu.status & PSX_GPU_STATUS_BLANKING) {
891 if (!gpu.state.blanked) {
892 vout_blank();
893 gpu.state.blanked = 1;
894 gpu.state.fb_dirty = 1;
895 }
896 return;
897 }
898
899 renderer_notify_update_lace(0);
900
901 if (!gpu.state.fb_dirty)
902 return;
903#endif
904
905 if (gpu.frameskip.set) {
906 if (!gpu.frameskip.frame_ready) {
907 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
908 return;
909 gpu.frameskip.active = 0;
910 }
911 gpu.frameskip.frame_ready = 0;
912 }
913
914 vout_update();
915 if (gpu.state.enhancement_active && !gpu.state.enhancement_was_active)
916 renderer_update_caches(0, 0, 1024, 512, 1);
917 gpu.state.enhancement_was_active = gpu.state.enhancement_active;
918 gpu.state.fb_dirty = 0;
919 gpu.state.blanked = 0;
920 renderer_notify_update_lace(1);
921}
922
923void GPUvBlank(int is_vblank, int lcf)
924{
925 int interlace = gpu.state.allow_interlace
926 && (gpu.status & PSX_GPU_STATUS_INTERLACE)
927 && (gpu.status & PSX_GPU_STATUS_DHEIGHT);
928 // interlace doesn't look nice on progressive displays,
929 // so we have this "auto" mode here for games that don't read vram
930 if (gpu.state.allow_interlace == 2
931 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
932 {
933 interlace = 0;
934 }
935 if (interlace || interlace != gpu.state.old_interlace) {
936 gpu.state.old_interlace = interlace;
937
938 if (gpu.cmd_len > 0)
939 flush_cmd_buffer();
940 renderer_flush_queues();
941 renderer_set_interlace(interlace, !lcf);
942 }
943}
944
945void GPUgetScreenInfo(int *y, int *base_hres)
946{
947 *y = gpu.screen.y;
948 *base_hres = gpu.screen.vres;
949 if (gpu.status & PSX_GPU_STATUS_DHEIGHT)
950 *base_hres >>= 1;
951}
952
953#include "../../frontend/plugin_lib.h"
954
955void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
956{
957 gpu.frameskip.set = cbs->frameskip;
958 gpu.frameskip.advice = &cbs->fskip_advice;
959 gpu.frameskip.force = &cbs->fskip_force;
960 gpu.frameskip.dirty = (void *)&cbs->fskip_dirty;
961 gpu.frameskip.active = 0;
962 gpu.frameskip.frame_ready = 1;
963 gpu.state.hcnt = cbs->gpu_hcnt;
964 gpu.state.frame_count = cbs->gpu_frame_count;
965 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
966 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
967 if (gpu.state.screen_centering_type != cbs->screen_centering_type
968 || gpu.state.screen_centering_x != cbs->screen_centering_x
969 || gpu.state.screen_centering_y != cbs->screen_centering_y) {
970 gpu.state.screen_centering_type = cbs->screen_centering_type;
971 gpu.state.screen_centering_x = cbs->screen_centering_x;
972 gpu.state.screen_centering_y = cbs->screen_centering_y;
973 update_width();
974 update_height();
975 }
976
977 gpu.mmap = cbs->mmap;
978 gpu.munmap = cbs->munmap;
979 gpu.gpu_state_change = cbs->gpu_state_change;
980
981 // delayed vram mmap
982 if (gpu.vram == NULL)
983 map_vram();
984
985 if (cbs->pl_vout_set_raw_vram)
986 cbs->pl_vout_set_raw_vram(gpu.vram);
987 renderer_set_config(cbs);
988 vout_set_config(cbs);
989}
990
991// vim:shiftwidth=2:expandtab