add a libcrypt warning
[pcsx_rearmed.git] / plugins / gpulib / gpu.c
... / ...
CommitLineData
1/*
2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
11#include <stdio.h>
12#include <stdlib.h>
13#include <string.h>
14#include <stdlib.h> /* for calloc */
15
16#include "gpu.h"
17#include "../../libpcsxcore/gpu.h" // meh
18
19#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
20#ifdef __GNUC__
21#define unlikely(x) __builtin_expect((x), 0)
22#define preload __builtin_prefetch
23#define noinline __attribute__((noinline))
24#else
25#define unlikely(x)
26#define preload(...)
27#define noinline
28#endif
29
30//#define log_io gpu_log
31#define log_io(...)
32
33struct psx_gpu gpu;
34
35static noinline int do_cmd_buffer(uint32_t *data, int count);
36static void finish_vram_transfer(int is_read);
37
38static noinline void do_cmd_reset(void)
39{
40 renderer_sync();
41
42 if (unlikely(gpu.cmd_len > 0))
43 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
44 gpu.cmd_len = 0;
45
46 if (unlikely(gpu.dma.h > 0))
47 finish_vram_transfer(gpu.dma_start.is_read);
48 gpu.dma.h = 0;
49}
50
51static noinline void do_reset(void)
52{
53 unsigned int i;
54
55 do_cmd_reset();
56
57 memset(gpu.regs, 0, sizeof(gpu.regs));
58 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
59 gpu.ex_regs[i] = (0xe0 + i) << 24;
60 gpu.status = 0x14802000;
61 gpu.gp0 = 0;
62 gpu.regs[3] = 1;
63 gpu.screen.hres = gpu.screen.w = 256;
64 gpu.screen.vres = gpu.screen.h = 240;
65 gpu.screen.x = gpu.screen.y = 0;
66 renderer_sync_ecmds(gpu.ex_regs);
67 renderer_notify_res_change();
68}
69
70static noinline void update_width(void)
71{
72 static const short hres_all[8] = { 256, 368, 320, 368, 512, 368, 640, 368 };
73 static const uint8_t hdivs[8] = { 10, 7, 8, 7, 5, 7, 4, 7 };
74 uint8_t hdiv = hdivs[(gpu.status >> 16) & 7];
75 int hres = hres_all[(gpu.status >> 16) & 7];
76 int pal = gpu.status & PSX_GPU_STATUS_PAL;
77 int sw = gpu.screen.x2 - gpu.screen.x1;
78 int x = 0, x_auto;
79 if (sw <= 0)
80 /* nothing displayed? */;
81 else {
82 int s = pal ? 656 : 608; // or 600? pal is just a guess
83 x = (gpu.screen.x1 - s) / hdiv;
84 x = (x + 1) & ~1; // blitter limitation
85 sw /= hdiv;
86 sw = (sw + 2) & ~3; // according to nocash
87 switch (gpu.state.screen_centering_type) {
88 case 1:
89 break;
90 case 2:
91 x = gpu.state.screen_centering_x;
92 break;
93 default:
94 // correct if slightly miscentered
95 x_auto = (hres - sw) / 2 & ~3;
96 if ((uint32_t)x_auto <= 8u && abs(x) < 24)
97 x = x_auto;
98 }
99 if (x + sw > hres)
100 sw = hres - x;
101 // .x range check is done in vout_update()
102 }
103 // reduce the unpleasant right border that a few games have
104 if (gpu.state.screen_centering_type == 0
105 && x <= 4 && hres - (x + sw) >= 4)
106 hres -= 4;
107 gpu.screen.x = x;
108 gpu.screen.w = sw;
109 gpu.screen.hres = hres;
110 gpu.state.dims_changed = 1;
111 //printf("xx %d %d -> %2d, %d / %d\n",
112 // gpu.screen.x1, gpu.screen.x2, x, sw, hres);
113}
114
115static noinline void update_height(void)
116{
117 int pal = gpu.status & PSX_GPU_STATUS_PAL;
118 int dheight = gpu.status & PSX_GPU_STATUS_DHEIGHT;
119 int y = gpu.screen.y1 - (pal ? 39 : 16); // 39 for spyro
120 int sh = gpu.screen.y2 - gpu.screen.y1;
121 int center_tol = 16;
122 int vres = 240;
123
124 if (pal && (sh > 240 || gpu.screen.vres == 256))
125 vres = 256;
126 if (dheight)
127 y *= 2, sh *= 2, vres *= 2, center_tol *= 2;
128 if (sh <= 0)
129 /* nothing displayed? */;
130 else {
131 switch (gpu.state.screen_centering_type) {
132 case 1:
133 break;
134 case 2:
135 y = gpu.state.screen_centering_y;
136 break;
137 default:
138 // correct if slightly miscentered
139 if ((uint32_t)(vres - sh) <= 1 && abs(y) <= center_tol)
140 y = 0;
141 }
142 if (y + sh > vres)
143 sh = vres - y;
144 }
145 gpu.screen.y = y;
146 gpu.screen.h = sh;
147 gpu.screen.vres = vres;
148 gpu.state.dims_changed = 1;
149 //printf("yy %d %d -> %d, %d / %d\n",
150 // gpu.screen.y1, gpu.screen.y2, y, sh, vres);
151}
152
153static noinline void decide_frameskip(void)
154{
155 *gpu.frameskip.dirty = 1;
156
157 if (gpu.frameskip.active)
158 gpu.frameskip.cnt++;
159 else {
160 gpu.frameskip.cnt = 0;
161 gpu.frameskip.frame_ready = 1;
162 }
163
164 if (*gpu.frameskip.force)
165 gpu.frameskip.active = 1;
166 else if (!gpu.frameskip.active && *gpu.frameskip.advice)
167 gpu.frameskip.active = 1;
168 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
169 gpu.frameskip.active = 1;
170 else
171 gpu.frameskip.active = 0;
172
173 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
174 int dummy;
175 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy);
176 gpu.frameskip.pending_fill[0] = 0;
177 }
178}
179
180static noinline int decide_frameskip_allow(uint32_t cmd_e3)
181{
182 // no frameskip if it decides to draw to display area,
183 // but not for interlace since it'll most likely always do that
184 uint32_t x = cmd_e3 & 0x3ff;
185 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
186 gpu.frameskip.allow = (gpu.status & PSX_GPU_STATUS_INTERLACE) ||
187 (uint32_t)(x - gpu.screen.src_x) >= (uint32_t)gpu.screen.w ||
188 (uint32_t)(y - gpu.screen.src_y) >= (uint32_t)gpu.screen.h;
189 return gpu.frameskip.allow;
190}
191
192static void flush_cmd_buffer(void);
193
194static noinline void get_gpu_info(uint32_t data)
195{
196 if (unlikely(gpu.cmd_len > 0))
197 flush_cmd_buffer();
198 switch (data & 0x0f) {
199 case 0x02:
200 case 0x03:
201 case 0x04:
202 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
203 break;
204 case 0x05:
205 gpu.gp0 = gpu.ex_regs[5] & 0x3fffff;
206 break;
207 case 0x07:
208 gpu.gp0 = 2;
209 break;
210 default:
211 // gpu.gp0 unchanged
212 break;
213 }
214}
215
216// double, for overdraw guard
217#define VRAM_SIZE ((1024 * 512 * 2 * 2) + 4096)
218
219// Minimum 16-byte VRAM alignment needed by gpu_unai's pixel-skipping
220// renderer/downscaler it uses in high res modes:
221#ifdef GCW_ZERO
222 // On GCW platform (MIPS), align to 8192 bytes (1 TLB entry) to reduce # of
223 // fills. (Will change this value if it ever gets large page support)
224 #define VRAM_ALIGN 8192
225#else
226 #define VRAM_ALIGN 16
227#endif
228
229// vram ptr received from mmap/malloc/alloc (will deallocate using this)
230static uint16_t *vram_ptr_orig = NULL;
231
232#ifndef GPULIB_USE_MMAP
233# ifdef __linux__
234# define GPULIB_USE_MMAP 1
235# else
236# define GPULIB_USE_MMAP 0
237# endif
238#endif
239static int map_vram(void)
240{
241#if GPULIB_USE_MMAP
242 gpu.vram = vram_ptr_orig = gpu.mmap(VRAM_SIZE + (VRAM_ALIGN-1));
243#else
244 gpu.vram = vram_ptr_orig = calloc(VRAM_SIZE + (VRAM_ALIGN-1), 1);
245#endif
246 if (gpu.vram != NULL && gpu.vram != (void *)(intptr_t)-1) {
247 // 4kb guard in front
248 gpu.vram += (4096 / 2);
249 // Align
250 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
251 return 0;
252 }
253 else {
254 fprintf(stderr, "could not map vram, expect crashes\n");
255 return -1;
256 }
257}
258
259long GPUinit(void)
260{
261 int ret;
262 ret = vout_init();
263 ret |= renderer_init();
264
265 memset(&gpu.state, 0, sizeof(gpu.state));
266 memset(&gpu.frameskip, 0, sizeof(gpu.frameskip));
267 gpu.zero = 0;
268 gpu.state.frame_count = &gpu.zero;
269 gpu.state.hcnt = &gpu.zero;
270 gpu.cmd_len = 0;
271 do_reset();
272
273 /*if (gpu.mmap != NULL) {
274 if (map_vram() != 0)
275 ret = -1;
276 }*/
277 return ret;
278}
279
280long GPUshutdown(void)
281{
282 long ret;
283
284 renderer_finish();
285 ret = vout_finish();
286
287 if (vram_ptr_orig != NULL) {
288#if GPULIB_USE_MMAP
289 gpu.munmap(vram_ptr_orig, VRAM_SIZE);
290#else
291 free(vram_ptr_orig);
292#endif
293 }
294 vram_ptr_orig = gpu.vram = NULL;
295
296 return ret;
297}
298
299void GPUwriteStatus(uint32_t data)
300{
301 uint32_t cmd = data >> 24;
302
303 if (cmd < ARRAY_SIZE(gpu.regs)) {
304 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
305 return;
306 gpu.regs[cmd] = data;
307 }
308
309 gpu.state.fb_dirty = 1;
310
311 switch (cmd) {
312 case 0x00:
313 do_reset();
314 break;
315 case 0x01:
316 do_cmd_reset();
317 break;
318 case 0x03:
319 if (data & 1) {
320 gpu.status |= PSX_GPU_STATUS_BLANKING;
321 gpu.state.dims_changed = 1; // for hud clearing
322 }
323 else
324 gpu.status &= ~PSX_GPU_STATUS_BLANKING;
325 break;
326 case 0x04:
327 gpu.status &= ~PSX_GPU_STATUS_DMA_MASK;
328 gpu.status |= PSX_GPU_STATUS_DMA(data & 3);
329 break;
330 case 0x05:
331 gpu.screen.src_x = data & 0x3ff;
332 gpu.screen.src_y = (data >> 10) & 0x1ff;
333 renderer_notify_scanout_x_change(gpu.screen.src_x, gpu.screen.hres);
334 if (gpu.frameskip.set) {
335 decide_frameskip_allow(gpu.ex_regs[3]);
336 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
337 decide_frameskip();
338 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
339 }
340 }
341 break;
342 case 0x06:
343 gpu.screen.x1 = data & 0xfff;
344 gpu.screen.x2 = (data >> 12) & 0xfff;
345 update_width();
346 break;
347 case 0x07:
348 gpu.screen.y1 = data & 0x3ff;
349 gpu.screen.y2 = (data >> 10) & 0x3ff;
350 update_height();
351 break;
352 case 0x08:
353 gpu.status = (gpu.status & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
354 update_width();
355 update_height();
356 renderer_notify_res_change();
357 break;
358 default:
359 if ((cmd & 0xf0) == 0x10)
360 get_gpu_info(data);
361 break;
362 }
363
364#ifdef GPUwriteStatus_ext
365 GPUwriteStatus_ext(data);
366#endif
367}
368
369const unsigned char cmd_lengths[256] =
370{
371 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
372 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
373 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
374 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
375 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
376 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
377 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
378 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
379 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 80
380 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
381 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // a0
382 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
383 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // c0
384 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
385 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
386 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
387};
388
389#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
390
391static void cpy_msb(uint16_t *dst, const uint16_t *src, int l, uint16_t msb)
392{
393 int i;
394 for (i = 0; i < l; i++)
395 dst[i] = src[i] | msb;
396}
397
398static inline void do_vram_line(int x, int y, uint16_t *mem, int l,
399 int is_read, uint16_t msb)
400{
401 uint16_t *vram = VRAM_MEM_XY(x, y);
402 if (unlikely(is_read))
403 memcpy(mem, vram, l * 2);
404 else if (unlikely(msb))
405 cpy_msb(vram, mem, l, msb);
406 else
407 memcpy(vram, mem, l * 2);
408}
409
410static int do_vram_io(uint32_t *data, int count, int is_read)
411{
412 int count_initial = count;
413 uint16_t msb = gpu.ex_regs[6] << 15;
414 uint16_t *sdata = (uint16_t *)data;
415 int x = gpu.dma.x, y = gpu.dma.y;
416 int w = gpu.dma.w, h = gpu.dma.h;
417 int o = gpu.dma.offset;
418 int l;
419 count *= 2; // operate in 16bpp pixels
420
421 renderer_sync();
422
423 if (gpu.dma.offset) {
424 l = w - gpu.dma.offset;
425 if (count < l)
426 l = count;
427
428 do_vram_line(x + o, y, sdata, l, is_read, msb);
429
430 if (o + l < w)
431 o += l;
432 else {
433 o = 0;
434 y++;
435 h--;
436 }
437 sdata += l;
438 count -= l;
439 }
440
441 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
442 y &= 511;
443 do_vram_line(x, y, sdata, w, is_read, msb);
444 }
445
446 if (h > 0) {
447 if (count > 0) {
448 y &= 511;
449 do_vram_line(x, y, sdata, count, is_read, msb);
450 o = count;
451 count = 0;
452 }
453 }
454 else
455 finish_vram_transfer(is_read);
456 gpu.dma.y = y;
457 gpu.dma.h = h;
458 gpu.dma.offset = o;
459
460 return count_initial - count / 2;
461}
462
463static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
464{
465 if (gpu.dma.h)
466 log_anomaly("start_vram_transfer while old unfinished\n");
467
468 gpu.dma.x = pos_word & 0x3ff;
469 gpu.dma.y = (pos_word >> 16) & 0x1ff;
470 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
471 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
472 gpu.dma.offset = 0;
473 gpu.dma.is_read = is_read;
474 gpu.dma_start = gpu.dma;
475
476 renderer_flush_queues();
477 if (is_read) {
478 gpu.status |= PSX_GPU_STATUS_IMG;
479 // XXX: wrong for width 1
480 gpu.gp0 = LE32TOH(*(uint32_t *) VRAM_MEM_XY(gpu.dma.x, gpu.dma.y));
481 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
482 }
483
484 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
485 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
486 if (gpu.gpu_state_change)
487 gpu.gpu_state_change(PGS_VRAM_TRANSFER_START);
488}
489
490static void finish_vram_transfer(int is_read)
491{
492 if (is_read)
493 gpu.status &= ~PSX_GPU_STATUS_IMG;
494 else {
495 gpu.state.fb_dirty = 1;
496 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
497 gpu.dma_start.w, gpu.dma_start.h, 0);
498 }
499 if (gpu.gpu_state_change)
500 gpu.gpu_state_change(PGS_VRAM_TRANSFER_END);
501}
502
503static void do_vram_copy(const uint32_t *params)
504{
505 const uint32_t sx = LE32TOH(params[0]) & 0x3FF;
506 const uint32_t sy = (LE32TOH(params[0]) >> 16) & 0x1FF;
507 const uint32_t dx = LE32TOH(params[1]) & 0x3FF;
508 const uint32_t dy = (LE32TOH(params[1]) >> 16) & 0x1FF;
509 uint32_t w = ((LE32TOH(params[2]) - 1) & 0x3FF) + 1;
510 uint32_t h = (((LE32TOH(params[2]) >> 16) - 1) & 0x1FF) + 1;
511 uint16_t msb = gpu.ex_regs[6] << 15;
512 uint16_t lbuf[128];
513 uint32_t x, y;
514
515 if (sx == dx && sy == dy && msb == 0)
516 return;
517
518 renderer_flush_queues();
519
520 if (unlikely((sx < dx && dx < sx + w) || sx + w > 1024 || dx + w > 1024 || msb))
521 {
522 for (y = 0; y < h; y++)
523 {
524 const uint16_t *src = VRAM_MEM_XY(0, (sy + y) & 0x1ff);
525 uint16_t *dst = VRAM_MEM_XY(0, (dy + y) & 0x1ff);
526 for (x = 0; x < w; x += ARRAY_SIZE(lbuf))
527 {
528 uint32_t x1, w1 = w - x;
529 if (w1 > ARRAY_SIZE(lbuf))
530 w1 = ARRAY_SIZE(lbuf);
531 for (x1 = 0; x1 < w1; x1++)
532 lbuf[x1] = src[(sx + x + x1) & 0x3ff];
533 for (x1 = 0; x1 < w1; x1++)
534 dst[(dx + x + x1) & 0x3ff] = lbuf[x1] | msb;
535 }
536 }
537 }
538 else
539 {
540 uint32_t sy1 = sy, dy1 = dy;
541 for (y = 0; y < h; y++, sy1++, dy1++)
542 memcpy(VRAM_MEM_XY(dx, dy1 & 0x1ff), VRAM_MEM_XY(sx, sy1 & 0x1ff), w * 2);
543 }
544
545 renderer_update_caches(dx, dy, w, h, 0);
546}
547
548static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
549{
550 int cmd = 0, pos = 0, len, dummy, v;
551 int skip = 1;
552
553 gpu.frameskip.pending_fill[0] = 0;
554
555 while (pos < count && skip) {
556 uint32_t *list = data + pos;
557 cmd = LE32TOH(list[0]) >> 24;
558 len = 1 + cmd_lengths[cmd];
559
560 switch (cmd) {
561 case 0x02:
562 if ((LE32TOH(list[2]) & 0x3ff) > gpu.screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu.screen.h)
563 // clearing something large, don't skip
564 do_cmd_list(list, 3, &dummy);
565 else
566 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
567 break;
568 case 0x24 ... 0x27:
569 case 0x2c ... 0x2f:
570 case 0x34 ... 0x37:
571 case 0x3c ... 0x3f:
572 gpu.ex_regs[1] &= ~0x1ff;
573 gpu.ex_regs[1] |= LE32TOH(list[4 + ((cmd >> 4) & 1)]) & 0x1ff;
574 break;
575 case 0x48 ... 0x4F:
576 for (v = 3; pos + v < count; v++)
577 {
578 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
579 break;
580 }
581 len += v - 3;
582 break;
583 case 0x58 ... 0x5F:
584 for (v = 4; pos + v < count; v += 2)
585 {
586 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
587 break;
588 }
589 len += v - 4;
590 break;
591 default:
592 if (cmd == 0xe3)
593 skip = decide_frameskip_allow(LE32TOH(list[0]));
594 if ((cmd & 0xf8) == 0xe0)
595 gpu.ex_regs[cmd & 7] = LE32TOH(list[0]);
596 break;
597 }
598
599 if (pos + len > count) {
600 cmd = -1;
601 break; // incomplete cmd
602 }
603 if (0x80 <= cmd && cmd <= 0xdf)
604 break; // image i/o
605
606 pos += len;
607 }
608
609 renderer_sync_ecmds(gpu.ex_regs);
610 *last_cmd = cmd;
611 return pos;
612}
613
614static noinline int do_cmd_buffer(uint32_t *data, int count)
615{
616 int cmd, pos;
617 uint32_t old_e3 = gpu.ex_regs[3];
618 int vram_dirty = 0;
619
620 // process buffer
621 for (pos = 0; pos < count; )
622 {
623 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
624 vram_dirty = 1;
625 pos += do_vram_io(data + pos, count - pos, 0);
626 if (pos == count)
627 break;
628 }
629
630 cmd = LE32TOH(data[pos]) >> 24;
631 if (0xa0 <= cmd && cmd <= 0xdf) {
632 if (unlikely((pos+2) >= count)) {
633 // incomplete vram write/read cmd, can't consume yet
634 cmd = -1;
635 break;
636 }
637
638 // consume vram write/read cmd
639 start_vram_transfer(LE32TOH(data[pos + 1]), LE32TOH(data[pos + 2]), (cmd & 0xe0) == 0xc0);
640 pos += 3;
641 continue;
642 }
643 else if ((cmd & 0xe0) == 0x80) {
644 if (unlikely((pos+3) >= count)) {
645 cmd = -1; // incomplete cmd, can't consume yet
646 break;
647 }
648 do_vram_copy(data + pos + 1);
649 vram_dirty = 1;
650 pos += 4;
651 continue;
652 }
653
654 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
655 if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0))
656 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
657 else {
658 pos += do_cmd_list(data + pos, count - pos, &cmd);
659 vram_dirty = 1;
660 }
661
662 if (cmd == -1)
663 // incomplete cmd
664 break;
665 }
666
667 gpu.status &= ~0x1fff;
668 gpu.status |= gpu.ex_regs[1] & 0x7ff;
669 gpu.status |= (gpu.ex_regs[6] & 3) << 11;
670
671 gpu.state.fb_dirty |= vram_dirty;
672
673 if (old_e3 != gpu.ex_regs[3])
674 decide_frameskip_allow(gpu.ex_regs[3]);
675
676 return count - pos;
677}
678
679static noinline void flush_cmd_buffer(void)
680{
681 int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
682 if (left > 0)
683 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
684 if (left != gpu.cmd_len) {
685 if (!gpu.dma.h && gpu.gpu_state_change)
686 gpu.gpu_state_change(PGS_PRIMITIVE_START);
687 gpu.cmd_len = left;
688 }
689}
690
691void GPUwriteDataMem(uint32_t *mem, int count)
692{
693 int left;
694
695 log_io("gpu_dma_write %p %d\n", mem, count);
696
697 if (unlikely(gpu.cmd_len > 0))
698 flush_cmd_buffer();
699
700 left = do_cmd_buffer(mem, count);
701 if (left)
702 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
703}
704
705void GPUwriteData(uint32_t data)
706{
707 log_io("gpu_write %08x\n", data);
708 gpu.cmd_buffer[gpu.cmd_len++] = HTOLE32(data);
709 if (gpu.cmd_len >= CMD_BUFFER_LEN)
710 flush_cmd_buffer();
711}
712
713long GPUdmaChain(uint32_t *rambase, uint32_t start_addr, uint32_t *progress_addr)
714{
715 uint32_t addr, *list, ld_addr = 0;
716 int len, left, count;
717 long cpu_cycles = 0;
718
719 preload(rambase + (start_addr & 0x1fffff) / 4);
720
721 if (unlikely(gpu.cmd_len > 0))
722 flush_cmd_buffer();
723
724 log_io("gpu_dma_chain\n");
725 addr = start_addr & 0xffffff;
726 for (count = 0; (addr & 0x800000) == 0; count++)
727 {
728 list = rambase + (addr & 0x1fffff) / 4;
729 len = LE32TOH(list[0]) >> 24;
730 addr = LE32TOH(list[0]) & 0xffffff;
731 preload(rambase + (addr & 0x1fffff) / 4);
732
733 cpu_cycles += 10;
734 if (len > 0)
735 cpu_cycles += 5 + len;
736
737 log_io(".chain %08lx #%d+%d\n",
738 (long)(list - rambase) * 4, len, gpu.cmd_len);
739 if (unlikely(gpu.cmd_len > 0)) {
740 if (gpu.cmd_len + len > ARRAY_SIZE(gpu.cmd_buffer)) {
741 log_anomaly("cmd_buffer overflow, likely garbage commands\n");
742 gpu.cmd_len = 0;
743 }
744 memcpy(gpu.cmd_buffer + gpu.cmd_len, list + 1, len * 4);
745 gpu.cmd_len += len;
746 flush_cmd_buffer();
747 continue;
748 }
749
750 if (len) {
751 left = do_cmd_buffer(list + 1, len);
752 if (left) {
753 memcpy(gpu.cmd_buffer, list + 1 + len - left, left * 4);
754 gpu.cmd_len = left;
755 log_anomaly("GPUdmaChain: %d/%d words left\n", left, len);
756 }
757 }
758
759 if (progress_addr) {
760 *progress_addr = addr;
761 break;
762 }
763 #define LD_THRESHOLD (8*1024)
764 if (count >= LD_THRESHOLD) {
765 if (count == LD_THRESHOLD) {
766 ld_addr = addr;
767 continue;
768 }
769
770 // loop detection marker
771 // (bit23 set causes DMA error on real machine, so
772 // unlikely to be ever set by the game)
773 list[0] |= HTOLE32(0x800000);
774 }
775 }
776
777 if (ld_addr != 0) {
778 // remove loop detection markers
779 count -= LD_THRESHOLD + 2;
780 addr = ld_addr & 0x1fffff;
781 while (count-- > 0) {
782 list = rambase + addr / 4;
783 addr = LE32TOH(list[0]) & 0x1fffff;
784 list[0] &= HTOLE32(~0x800000);
785 }
786 }
787
788 gpu.state.last_list.frame = *gpu.state.frame_count;
789 gpu.state.last_list.hcnt = *gpu.state.hcnt;
790 gpu.state.last_list.cycles = cpu_cycles;
791 gpu.state.last_list.addr = start_addr;
792
793 return cpu_cycles;
794}
795
796void GPUreadDataMem(uint32_t *mem, int count)
797{
798 log_io("gpu_dma_read %p %d\n", mem, count);
799
800 if (unlikely(gpu.cmd_len > 0))
801 flush_cmd_buffer();
802
803 if (gpu.dma.h)
804 do_vram_io(mem, count, 1);
805}
806
807uint32_t GPUreadData(void)
808{
809 uint32_t ret;
810
811 if (unlikely(gpu.cmd_len > 0))
812 flush_cmd_buffer();
813
814 ret = gpu.gp0;
815 if (gpu.dma.h) {
816 ret = HTOLE32(ret);
817 do_vram_io(&ret, 1, 1);
818 ret = LE32TOH(ret);
819 }
820
821 log_io("gpu_read %08x\n", ret);
822 return ret;
823}
824
825uint32_t GPUreadStatus(void)
826{
827 uint32_t ret;
828
829 if (unlikely(gpu.cmd_len > 0))
830 flush_cmd_buffer();
831
832 ret = gpu.status;
833 log_io("gpu_read_status %08x\n", ret);
834 return ret;
835}
836
837struct GPUFreeze
838{
839 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
840 uint32_t ulStatus; // current gpu status
841 uint32_t ulControl[256]; // latest control register values
842 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
843};
844
845long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
846{
847 int i;
848
849 switch (type) {
850 case 1: // save
851 if (gpu.cmd_len > 0)
852 flush_cmd_buffer();
853
854 renderer_sync();
855 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
856 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
857 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
858 freeze->ulStatus = gpu.status;
859 break;
860 case 0: // load
861 renderer_sync();
862 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
863 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
864 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
865 gpu.status = freeze->ulStatus;
866 gpu.cmd_len = 0;
867 for (i = 8; i > 0; i--) {
868 gpu.regs[i] ^= 1; // avoid reg change detection
869 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
870 }
871 renderer_sync_ecmds(gpu.ex_regs);
872 renderer_update_caches(0, 0, 1024, 512, 1);
873 break;
874 }
875
876 return 1;
877}
878
879void GPUupdateLace(void)
880{
881 if (gpu.cmd_len > 0)
882 flush_cmd_buffer();
883 renderer_flush_queues();
884
885#ifndef RAW_FB_DISPLAY
886 if (gpu.status & PSX_GPU_STATUS_BLANKING) {
887 if (!gpu.state.blanked) {
888 vout_blank();
889 gpu.state.blanked = 1;
890 gpu.state.fb_dirty = 1;
891 }
892 return;
893 }
894
895 renderer_notify_update_lace(0);
896
897 if (!gpu.state.fb_dirty)
898 return;
899#endif
900
901 if (gpu.frameskip.set) {
902 if (!gpu.frameskip.frame_ready) {
903 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
904 return;
905 gpu.frameskip.active = 0;
906 }
907 gpu.frameskip.frame_ready = 0;
908 }
909
910 vout_update();
911 if (gpu.state.enhancement_active && !gpu.state.enhancement_was_active)
912 renderer_update_caches(0, 0, 1024, 512, 1);
913 gpu.state.enhancement_was_active = gpu.state.enhancement_active;
914 gpu.state.fb_dirty = 0;
915 gpu.state.blanked = 0;
916 renderer_notify_update_lace(1);
917}
918
919void GPUvBlank(int is_vblank, int lcf)
920{
921 int interlace = gpu.state.allow_interlace
922 && (gpu.status & PSX_GPU_STATUS_INTERLACE)
923 && (gpu.status & PSX_GPU_STATUS_DHEIGHT);
924 // interlace doesn't look nice on progressive displays,
925 // so we have this "auto" mode here for games that don't read vram
926 if (gpu.state.allow_interlace == 2
927 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
928 {
929 interlace = 0;
930 }
931 if (interlace || interlace != gpu.state.old_interlace) {
932 gpu.state.old_interlace = interlace;
933
934 if (gpu.cmd_len > 0)
935 flush_cmd_buffer();
936 renderer_flush_queues();
937 renderer_set_interlace(interlace, !lcf);
938 }
939}
940
941void GPUgetScreenInfo(int *y, int *base_hres)
942{
943 *y = gpu.screen.y;
944 *base_hres = gpu.screen.vres;
945 if (gpu.status & PSX_GPU_STATUS_DHEIGHT)
946 *base_hres >>= 1;
947}
948
949#include "../../frontend/plugin_lib.h"
950
951void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
952{
953 gpu.frameskip.set = cbs->frameskip;
954 gpu.frameskip.advice = &cbs->fskip_advice;
955 gpu.frameskip.force = &cbs->fskip_force;
956 gpu.frameskip.dirty = (void *)&cbs->fskip_dirty;
957 gpu.frameskip.active = 0;
958 gpu.frameskip.frame_ready = 1;
959 gpu.state.hcnt = cbs->gpu_hcnt;
960 gpu.state.frame_count = cbs->gpu_frame_count;
961 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
962 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
963 if (gpu.state.screen_centering_type != cbs->screen_centering_type
964 || gpu.state.screen_centering_x != cbs->screen_centering_x
965 || gpu.state.screen_centering_y != cbs->screen_centering_y) {
966 gpu.state.screen_centering_type = cbs->screen_centering_type;
967 gpu.state.screen_centering_x = cbs->screen_centering_x;
968 gpu.state.screen_centering_y = cbs->screen_centering_y;
969 update_width();
970 update_height();
971 }
972
973 gpu.mmap = cbs->mmap;
974 gpu.munmap = cbs->munmap;
975 gpu.gpu_state_change = cbs->gpu_state_change;
976
977 // delayed vram mmap
978 if (gpu.vram == NULL)
979 map_vram();
980
981 if (cbs->pl_vout_set_raw_vram)
982 cbs->pl_vout_set_raw_vram(gpu.vram);
983 renderer_set_config(cbs);
984 vout_set_config(cbs);
985}
986
987// vim:shiftwidth=2:expandtab