gpu_neon: fix some missing ebuf updates
[pcsx_rearmed.git] / plugins / gpulib / gpu.c
... / ...
CommitLineData
1/*
2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
11#include <stdio.h>
12#include <stdlib.h>
13#include <string.h>
14#include <stdlib.h> /* for calloc */
15
16#include "gpu.h"
17#include "../../libpcsxcore/gpu.h" // meh
18#include "../../frontend/plugin_lib.h"
19
20#ifndef ARRAY_SIZE
21#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
22#endif
23#ifdef __GNUC__
24#define unlikely(x) __builtin_expect((x), 0)
25#define preload __builtin_prefetch
26#define noinline __attribute__((noinline))
27#else
28#define unlikely(x)
29#define preload(...)
30#define noinline
31#endif
32
33//#define log_io gpu_log
34#define log_io(...)
35
36struct psx_gpu gpu;
37
38static noinline int do_cmd_buffer(uint32_t *data, int count);
39static void finish_vram_transfer(int is_read);
40
41static noinline void do_cmd_reset(void)
42{
43 renderer_sync();
44
45 if (unlikely(gpu.cmd_len > 0))
46 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
47 gpu.cmd_len = 0;
48
49 if (unlikely(gpu.dma.h > 0))
50 finish_vram_transfer(gpu.dma_start.is_read);
51 gpu.dma.h = 0;
52}
53
54static noinline void do_reset(void)
55{
56 unsigned int i;
57
58 do_cmd_reset();
59
60 memset(gpu.regs, 0, sizeof(gpu.regs));
61 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
62 gpu.ex_regs[i] = (0xe0 + i) << 24;
63 gpu.status = 0x14802000;
64 gpu.gp0 = 0;
65 gpu.regs[3] = 1;
66 gpu.screen.hres = gpu.screen.w = 256;
67 gpu.screen.vres = gpu.screen.h = 240;
68 gpu.screen.x = gpu.screen.y = 0;
69 renderer_sync_ecmds(gpu.ex_regs);
70 renderer_notify_res_change();
71}
72
73static noinline void update_width(void)
74{
75 static const short hres_all[8] = { 256, 368, 320, 368, 512, 368, 640, 368 };
76 static const uint8_t hdivs[8] = { 10, 7, 8, 7, 5, 7, 4, 7 };
77 uint8_t hdiv = hdivs[(gpu.status >> 16) & 7];
78 int hres = hres_all[(gpu.status >> 16) & 7];
79 int pal = gpu.status & PSX_GPU_STATUS_PAL;
80 int sw = gpu.screen.x2 - gpu.screen.x1;
81 int type = gpu.state.screen_centering_type;
82 int x = 0, x_auto;
83 if (type == C_AUTO)
84 type = gpu.state.screen_centering_type_default;
85 if (sw <= 0)
86 /* nothing displayed? */;
87 else {
88 int s = pal ? 656 : 608; // or 600? pal is just a guess
89 x = (gpu.screen.x1 - s) / hdiv;
90 x = (x + 1) & ~1; // blitter limitation
91 sw /= hdiv;
92 sw = (sw + 2) & ~3; // according to nocash
93 switch (type) {
94 case C_INGAME:
95 break;
96 case C_MANUAL:
97 x = gpu.state.screen_centering_x;
98 break;
99 default:
100 // correct if slightly miscentered
101 x_auto = (hres - sw) / 2 & ~3;
102 if ((uint32_t)x_auto <= 8u && abs(x) < 24)
103 x = x_auto;
104 }
105 if (x + sw > hres)
106 sw = hres - x;
107 // .x range check is done in vout_update()
108 }
109 // reduce the unpleasant right border that a few games have
110 if (gpu.state.screen_centering_type == 0
111 && x <= 4 && hres - (x + sw) >= 4)
112 hres -= 4;
113 gpu.screen.x = x;
114 gpu.screen.w = sw;
115 gpu.screen.hres = hres;
116 gpu.state.dims_changed = 1;
117 //printf("xx %d %d -> %2d, %d / %d\n",
118 // gpu.screen.x1, gpu.screen.x2, x, sw, hres);
119}
120
121static noinline void update_height(void)
122{
123 int pal = gpu.status & PSX_GPU_STATUS_PAL;
124 int dheight = gpu.status & PSX_GPU_STATUS_DHEIGHT;
125 int y = gpu.screen.y1 - (pal ? 39 : 16); // 39 for spyro
126 int sh = gpu.screen.y2 - gpu.screen.y1;
127 int center_tol = 16;
128 int vres = 240;
129
130 if (pal && (sh > 240 || gpu.screen.vres == 256))
131 vres = 256;
132 if (dheight)
133 y *= 2, sh *= 2, vres *= 2, center_tol *= 2;
134 if (sh <= 0)
135 /* nothing displayed? */;
136 else {
137 switch (gpu.state.screen_centering_type) {
138 case C_INGAME:
139 break;
140 case C_BORDERLESS:
141 y = 0;
142 break;
143 case C_MANUAL:
144 y = gpu.state.screen_centering_y;
145 break;
146 default:
147 // correct if slightly miscentered
148 if ((uint32_t)(vres - sh) <= 1 && abs(y) <= center_tol)
149 y = 0;
150 }
151 if (y + sh > vres)
152 sh = vres - y;
153 }
154 gpu.screen.y = y;
155 gpu.screen.h = sh;
156 gpu.screen.vres = vres;
157 gpu.state.dims_changed = 1;
158 //printf("yy %d %d -> %d, %d / %d\n",
159 // gpu.screen.y1, gpu.screen.y2, y, sh, vres);
160}
161
162static noinline void decide_frameskip(void)
163{
164 *gpu.frameskip.dirty = 1;
165
166 if (gpu.frameskip.active)
167 gpu.frameskip.cnt++;
168 else {
169 gpu.frameskip.cnt = 0;
170 gpu.frameskip.frame_ready = 1;
171 }
172
173 if (*gpu.frameskip.force)
174 gpu.frameskip.active = 1;
175 else if (!gpu.frameskip.active && *gpu.frameskip.advice)
176 gpu.frameskip.active = 1;
177 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
178 gpu.frameskip.active = 1;
179 else
180 gpu.frameskip.active = 0;
181
182 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
183 int dummy;
184 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy);
185 gpu.frameskip.pending_fill[0] = 0;
186 }
187}
188
189static noinline int decide_frameskip_allow(uint32_t cmd_e3)
190{
191 // no frameskip if it decides to draw to display area,
192 // but not for interlace since it'll most likely always do that
193 uint32_t x = cmd_e3 & 0x3ff;
194 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
195 gpu.frameskip.allow = (gpu.status & PSX_GPU_STATUS_INTERLACE) ||
196 (uint32_t)(x - gpu.screen.src_x) >= (uint32_t)gpu.screen.w ||
197 (uint32_t)(y - gpu.screen.src_y) >= (uint32_t)gpu.screen.h;
198 return gpu.frameskip.allow;
199}
200
201static void flush_cmd_buffer(void);
202
203static noinline void get_gpu_info(uint32_t data)
204{
205 if (unlikely(gpu.cmd_len > 0))
206 flush_cmd_buffer();
207 switch (data & 0x0f) {
208 case 0x02:
209 case 0x03:
210 case 0x04:
211 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
212 break;
213 case 0x05:
214 gpu.gp0 = gpu.ex_regs[5] & 0x3fffff;
215 break;
216 case 0x07:
217 gpu.gp0 = 2;
218 break;
219 default:
220 // gpu.gp0 unchanged
221 break;
222 }
223}
224
225// double, for overdraw guard
226#define VRAM_SIZE ((1024 * 512 * 2 * 2) + 4096)
227
228// Minimum 16-byte VRAM alignment needed by gpu_unai's pixel-skipping
229// renderer/downscaler it uses in high res modes:
230#ifdef GCW_ZERO
231 // On GCW platform (MIPS), align to 8192 bytes (1 TLB entry) to reduce # of
232 // fills. (Will change this value if it ever gets large page support)
233 #define VRAM_ALIGN 8192
234#else
235 #define VRAM_ALIGN 16
236#endif
237
238// vram ptr received from mmap/malloc/alloc (will deallocate using this)
239static uint16_t *vram_ptr_orig = NULL;
240
241#ifndef GPULIB_USE_MMAP
242# ifdef __linux__
243# define GPULIB_USE_MMAP 1
244# else
245# define GPULIB_USE_MMAP 0
246# endif
247#endif
248static int map_vram(void)
249{
250#if GPULIB_USE_MMAP
251 gpu.vram = vram_ptr_orig = gpu.mmap(VRAM_SIZE + (VRAM_ALIGN-1));
252#else
253 gpu.vram = vram_ptr_orig = calloc(VRAM_SIZE + (VRAM_ALIGN-1), 1);
254#endif
255 if (gpu.vram != NULL && gpu.vram != (void *)(intptr_t)-1) {
256 // 4kb guard in front
257 gpu.vram += (4096 / 2);
258 // Align
259 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
260 return 0;
261 }
262 else {
263 fprintf(stderr, "could not map vram, expect crashes\n");
264 return -1;
265 }
266}
267
268long GPUinit(void)
269{
270 int ret;
271 ret = vout_init();
272 ret |= renderer_init();
273
274 memset(&gpu.state, 0, sizeof(gpu.state));
275 memset(&gpu.frameskip, 0, sizeof(gpu.frameskip));
276 gpu.zero = 0;
277 gpu.state.frame_count = &gpu.zero;
278 gpu.state.hcnt = &gpu.zero;
279 gpu.cmd_len = 0;
280 do_reset();
281
282 /*if (gpu.mmap != NULL) {
283 if (map_vram() != 0)
284 ret = -1;
285 }*/
286 return ret;
287}
288
289long GPUshutdown(void)
290{
291 long ret;
292
293 renderer_finish();
294 ret = vout_finish();
295
296 if (vram_ptr_orig != NULL) {
297#if GPULIB_USE_MMAP
298 gpu.munmap(vram_ptr_orig, VRAM_SIZE);
299#else
300 free(vram_ptr_orig);
301#endif
302 }
303 vram_ptr_orig = gpu.vram = NULL;
304
305 return ret;
306}
307
308void GPUwriteStatus(uint32_t data)
309{
310 uint32_t cmd = data >> 24;
311 int src_x, src_y;
312
313 if (cmd < ARRAY_SIZE(gpu.regs)) {
314 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
315 return;
316 gpu.regs[cmd] = data;
317 }
318
319 gpu.state.fb_dirty = 1;
320
321 switch (cmd) {
322 case 0x00:
323 do_reset();
324 break;
325 case 0x01:
326 do_cmd_reset();
327 break;
328 case 0x03:
329 if (data & 1) {
330 gpu.status |= PSX_GPU_STATUS_BLANKING;
331 gpu.state.dims_changed = 1; // for hud clearing
332 }
333 else
334 gpu.status &= ~PSX_GPU_STATUS_BLANKING;
335 break;
336 case 0x04:
337 gpu.status &= ~PSX_GPU_STATUS_DMA_MASK;
338 gpu.status |= PSX_GPU_STATUS_DMA(data & 3);
339 break;
340 case 0x05:
341 src_x = data & 0x3ff; src_y = (data >> 10) & 0x1ff;
342 if (src_x != gpu.screen.src_x || src_y != gpu.screen.src_y) {
343 gpu.screen.src_x = src_x;
344 gpu.screen.src_y = src_y;
345 renderer_notify_scanout_change(src_x, src_y);
346 if (gpu.frameskip.set) {
347 decide_frameskip_allow(gpu.ex_regs[3]);
348 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
349 decide_frameskip();
350 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
351 }
352 }
353 }
354 break;
355 case 0x06:
356 gpu.screen.x1 = data & 0xfff;
357 gpu.screen.x2 = (data >> 12) & 0xfff;
358 update_width();
359 break;
360 case 0x07:
361 gpu.screen.y1 = data & 0x3ff;
362 gpu.screen.y2 = (data >> 10) & 0x3ff;
363 update_height();
364 break;
365 case 0x08:
366 gpu.status = (gpu.status & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
367 update_width();
368 update_height();
369 renderer_notify_res_change();
370 break;
371 default:
372 if ((cmd & 0xf0) == 0x10)
373 get_gpu_info(data);
374 break;
375 }
376
377#ifdef GPUwriteStatus_ext
378 GPUwriteStatus_ext(data);
379#endif
380}
381
382const unsigned char cmd_lengths[256] =
383{
384 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
385 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
386 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
387 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
388 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
389 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
390 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
391 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
392 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 80
393 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
394 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // a0
395 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
396 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // c0
397 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
398 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
399 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
400};
401
402#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
403
404static void cpy_msb(uint16_t *dst, const uint16_t *src, int l, uint16_t msb)
405{
406 int i;
407 for (i = 0; i < l; i++)
408 dst[i] = src[i] | msb;
409}
410
411static inline void do_vram_line(int x, int y, uint16_t *mem, int l,
412 int is_read, uint16_t msb)
413{
414 uint16_t *vram = VRAM_MEM_XY(x, y);
415 if (unlikely(is_read))
416 memcpy(mem, vram, l * 2);
417 else if (unlikely(msb))
418 cpy_msb(vram, mem, l, msb);
419 else
420 memcpy(vram, mem, l * 2);
421}
422
423static int do_vram_io(uint32_t *data, int count, int is_read)
424{
425 int count_initial = count;
426 uint16_t msb = gpu.ex_regs[6] << 15;
427 uint16_t *sdata = (uint16_t *)data;
428 int x = gpu.dma.x, y = gpu.dma.y;
429 int w = gpu.dma.w, h = gpu.dma.h;
430 int o = gpu.dma.offset;
431 int l;
432 count *= 2; // operate in 16bpp pixels
433
434 renderer_sync();
435
436 if (gpu.dma.offset) {
437 l = w - gpu.dma.offset;
438 if (count < l)
439 l = count;
440
441 do_vram_line(x + o, y, sdata, l, is_read, msb);
442
443 if (o + l < w)
444 o += l;
445 else {
446 o = 0;
447 y++;
448 h--;
449 }
450 sdata += l;
451 count -= l;
452 }
453
454 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
455 y &= 511;
456 do_vram_line(x, y, sdata, w, is_read, msb);
457 }
458
459 if (h > 0) {
460 if (count > 0) {
461 y &= 511;
462 do_vram_line(x, y, sdata, count, is_read, msb);
463 o = count;
464 count = 0;
465 }
466 }
467 else
468 finish_vram_transfer(is_read);
469 gpu.dma.y = y;
470 gpu.dma.h = h;
471 gpu.dma.offset = o;
472
473 return count_initial - count / 2;
474}
475
476static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
477{
478 if (gpu.dma.h)
479 log_anomaly("start_vram_transfer while old unfinished\n");
480
481 gpu.dma.x = pos_word & 0x3ff;
482 gpu.dma.y = (pos_word >> 16) & 0x1ff;
483 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
484 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
485 gpu.dma.offset = 0;
486 gpu.dma.is_read = is_read;
487 gpu.dma_start = gpu.dma;
488
489 renderer_flush_queues();
490 if (is_read) {
491 gpu.status |= PSX_GPU_STATUS_IMG;
492 // XXX: wrong for width 1
493 gpu.gp0 = LE32TOH(*(uint32_t *) VRAM_MEM_XY(gpu.dma.x, gpu.dma.y));
494 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
495 }
496
497 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
498 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
499 if (gpu.gpu_state_change)
500 gpu.gpu_state_change(PGS_VRAM_TRANSFER_START);
501}
502
503static void finish_vram_transfer(int is_read)
504{
505 if (is_read)
506 gpu.status &= ~PSX_GPU_STATUS_IMG;
507 else {
508 gpu.state.fb_dirty = 1;
509 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
510 gpu.dma_start.w, gpu.dma_start.h, 0);
511 }
512 if (gpu.gpu_state_change)
513 gpu.gpu_state_change(PGS_VRAM_TRANSFER_END);
514}
515
516static void do_vram_copy(const uint32_t *params)
517{
518 const uint32_t sx = LE32TOH(params[0]) & 0x3FF;
519 const uint32_t sy = (LE32TOH(params[0]) >> 16) & 0x1FF;
520 const uint32_t dx = LE32TOH(params[1]) & 0x3FF;
521 const uint32_t dy = (LE32TOH(params[1]) >> 16) & 0x1FF;
522 uint32_t w = ((LE32TOH(params[2]) - 1) & 0x3FF) + 1;
523 uint32_t h = (((LE32TOH(params[2]) >> 16) - 1) & 0x1FF) + 1;
524 uint16_t msb = gpu.ex_regs[6] << 15;
525 uint16_t lbuf[128];
526 uint32_t x, y;
527
528 if (sx == dx && sy == dy && msb == 0)
529 return;
530
531 renderer_flush_queues();
532
533 if (unlikely((sx < dx && dx < sx + w) || sx + w > 1024 || dx + w > 1024 || msb))
534 {
535 for (y = 0; y < h; y++)
536 {
537 const uint16_t *src = VRAM_MEM_XY(0, (sy + y) & 0x1ff);
538 uint16_t *dst = VRAM_MEM_XY(0, (dy + y) & 0x1ff);
539 for (x = 0; x < w; x += ARRAY_SIZE(lbuf))
540 {
541 uint32_t x1, w1 = w - x;
542 if (w1 > ARRAY_SIZE(lbuf))
543 w1 = ARRAY_SIZE(lbuf);
544 for (x1 = 0; x1 < w1; x1++)
545 lbuf[x1] = src[(sx + x + x1) & 0x3ff];
546 for (x1 = 0; x1 < w1; x1++)
547 dst[(dx + x + x1) & 0x3ff] = lbuf[x1] | msb;
548 }
549 }
550 }
551 else
552 {
553 uint32_t sy1 = sy, dy1 = dy;
554 for (y = 0; y < h; y++, sy1++, dy1++)
555 memcpy(VRAM_MEM_XY(dx, dy1 & 0x1ff), VRAM_MEM_XY(sx, sy1 & 0x1ff), w * 2);
556 }
557
558 renderer_update_caches(dx, dy, w, h, 0);
559}
560
561static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
562{
563 int cmd = 0, pos = 0, len, dummy, v;
564 int skip = 1;
565
566 gpu.frameskip.pending_fill[0] = 0;
567
568 while (pos < count && skip) {
569 uint32_t *list = data + pos;
570 cmd = LE32TOH(list[0]) >> 24;
571 len = 1 + cmd_lengths[cmd];
572
573 switch (cmd) {
574 case 0x02:
575 if ((LE32TOH(list[2]) & 0x3ff) > gpu.screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu.screen.h)
576 // clearing something large, don't skip
577 do_cmd_list(list, 3, &dummy);
578 else
579 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
580 break;
581 case 0x24 ... 0x27:
582 case 0x2c ... 0x2f:
583 case 0x34 ... 0x37:
584 case 0x3c ... 0x3f:
585 gpu.ex_regs[1] &= ~0x1ff;
586 gpu.ex_regs[1] |= LE32TOH(list[4 + ((cmd >> 4) & 1)]) & 0x1ff;
587 break;
588 case 0x48 ... 0x4F:
589 for (v = 3; pos + v < count; v++)
590 {
591 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
592 break;
593 }
594 len += v - 3;
595 break;
596 case 0x58 ... 0x5F:
597 for (v = 4; pos + v < count; v += 2)
598 {
599 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
600 break;
601 }
602 len += v - 4;
603 break;
604 default:
605 if (cmd == 0xe3)
606 skip = decide_frameskip_allow(LE32TOH(list[0]));
607 if ((cmd & 0xf8) == 0xe0)
608 gpu.ex_regs[cmd & 7] = LE32TOH(list[0]);
609 break;
610 }
611
612 if (pos + len > count) {
613 cmd = -1;
614 break; // incomplete cmd
615 }
616 if (0x80 <= cmd && cmd <= 0xdf)
617 break; // image i/o
618
619 pos += len;
620 }
621
622 renderer_sync_ecmds(gpu.ex_regs);
623 *last_cmd = cmd;
624 return pos;
625}
626
627static noinline int do_cmd_buffer(uint32_t *data, int count)
628{
629 int cmd, pos;
630 uint32_t old_e3 = gpu.ex_regs[3];
631 int vram_dirty = 0;
632
633 // process buffer
634 for (pos = 0; pos < count; )
635 {
636 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
637 vram_dirty = 1;
638 pos += do_vram_io(data + pos, count - pos, 0);
639 if (pos == count)
640 break;
641 }
642
643 cmd = LE32TOH(data[pos]) >> 24;
644 if (0xa0 <= cmd && cmd <= 0xdf) {
645 if (unlikely((pos+2) >= count)) {
646 // incomplete vram write/read cmd, can't consume yet
647 cmd = -1;
648 break;
649 }
650
651 // consume vram write/read cmd
652 start_vram_transfer(LE32TOH(data[pos + 1]), LE32TOH(data[pos + 2]), (cmd & 0xe0) == 0xc0);
653 pos += 3;
654 continue;
655 }
656 else if ((cmd & 0xe0) == 0x80) {
657 if (unlikely((pos+3) >= count)) {
658 cmd = -1; // incomplete cmd, can't consume yet
659 break;
660 }
661 do_vram_copy(data + pos + 1);
662 vram_dirty = 1;
663 pos += 4;
664 continue;
665 }
666
667 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
668 if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0))
669 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
670 else {
671 pos += do_cmd_list(data + pos, count - pos, &cmd);
672 vram_dirty = 1;
673 }
674
675 if (cmd == -1)
676 // incomplete cmd
677 break;
678 }
679
680 gpu.status &= ~0x1fff;
681 gpu.status |= gpu.ex_regs[1] & 0x7ff;
682 gpu.status |= (gpu.ex_regs[6] & 3) << 11;
683
684 gpu.state.fb_dirty |= vram_dirty;
685
686 if (old_e3 != gpu.ex_regs[3])
687 decide_frameskip_allow(gpu.ex_regs[3]);
688
689 return count - pos;
690}
691
692static noinline void flush_cmd_buffer(void)
693{
694 int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
695 if (left > 0)
696 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
697 if (left != gpu.cmd_len) {
698 if (!gpu.dma.h && gpu.gpu_state_change)
699 gpu.gpu_state_change(PGS_PRIMITIVE_START);
700 gpu.cmd_len = left;
701 }
702}
703
704void GPUwriteDataMem(uint32_t *mem, int count)
705{
706 int left;
707
708 log_io("gpu_dma_write %p %d\n", mem, count);
709
710 if (unlikely(gpu.cmd_len > 0))
711 flush_cmd_buffer();
712
713 left = do_cmd_buffer(mem, count);
714 if (left)
715 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
716}
717
718void GPUwriteData(uint32_t data)
719{
720 log_io("gpu_write %08x\n", data);
721 gpu.cmd_buffer[gpu.cmd_len++] = HTOLE32(data);
722 if (gpu.cmd_len >= CMD_BUFFER_LEN)
723 flush_cmd_buffer();
724}
725
726long GPUdmaChain(uint32_t *rambase, uint32_t start_addr, uint32_t *progress_addr)
727{
728 uint32_t addr, *list, ld_addr = 0;
729 int len, left, count;
730 long cpu_cycles = 0;
731
732 preload(rambase + (start_addr & 0x1fffff) / 4);
733
734 if (unlikely(gpu.cmd_len > 0))
735 flush_cmd_buffer();
736
737 log_io("gpu_dma_chain\n");
738 addr = start_addr & 0xffffff;
739 for (count = 0; (addr & 0x800000) == 0; count++)
740 {
741 list = rambase + (addr & 0x1fffff) / 4;
742 len = LE32TOH(list[0]) >> 24;
743 addr = LE32TOH(list[0]) & 0xffffff;
744 preload(rambase + (addr & 0x1fffff) / 4);
745
746 cpu_cycles += 10;
747 if (len > 0)
748 cpu_cycles += 5 + len;
749
750 log_io(".chain %08lx #%d+%d\n",
751 (long)(list - rambase) * 4, len, gpu.cmd_len);
752 if (unlikely(gpu.cmd_len > 0)) {
753 if (gpu.cmd_len + len > ARRAY_SIZE(gpu.cmd_buffer)) {
754 log_anomaly("cmd_buffer overflow, likely garbage commands\n");
755 gpu.cmd_len = 0;
756 }
757 memcpy(gpu.cmd_buffer + gpu.cmd_len, list + 1, len * 4);
758 gpu.cmd_len += len;
759 flush_cmd_buffer();
760 continue;
761 }
762
763 if (len) {
764 left = do_cmd_buffer(list + 1, len);
765 if (left) {
766 memcpy(gpu.cmd_buffer, list + 1 + len - left, left * 4);
767 gpu.cmd_len = left;
768 log_anomaly("GPUdmaChain: %d/%d words left\n", left, len);
769 }
770 }
771
772 if (progress_addr) {
773 *progress_addr = addr;
774 break;
775 }
776 #define LD_THRESHOLD (8*1024)
777 if (count >= LD_THRESHOLD) {
778 if (count == LD_THRESHOLD) {
779 ld_addr = addr;
780 continue;
781 }
782
783 // loop detection marker
784 // (bit23 set causes DMA error on real machine, so
785 // unlikely to be ever set by the game)
786 list[0] |= HTOLE32(0x800000);
787 }
788 }
789
790 if (ld_addr != 0) {
791 // remove loop detection markers
792 count -= LD_THRESHOLD + 2;
793 addr = ld_addr & 0x1fffff;
794 while (count-- > 0) {
795 list = rambase + addr / 4;
796 addr = LE32TOH(list[0]) & 0x1fffff;
797 list[0] &= HTOLE32(~0x800000);
798 }
799 }
800
801 gpu.state.last_list.frame = *gpu.state.frame_count;
802 gpu.state.last_list.hcnt = *gpu.state.hcnt;
803 gpu.state.last_list.cycles = cpu_cycles;
804 gpu.state.last_list.addr = start_addr;
805
806 return cpu_cycles;
807}
808
809void GPUreadDataMem(uint32_t *mem, int count)
810{
811 log_io("gpu_dma_read %p %d\n", mem, count);
812
813 if (unlikely(gpu.cmd_len > 0))
814 flush_cmd_buffer();
815
816 if (gpu.dma.h)
817 do_vram_io(mem, count, 1);
818}
819
820uint32_t GPUreadData(void)
821{
822 uint32_t ret;
823
824 if (unlikely(gpu.cmd_len > 0))
825 flush_cmd_buffer();
826
827 ret = gpu.gp0;
828 if (gpu.dma.h) {
829 ret = HTOLE32(ret);
830 do_vram_io(&ret, 1, 1);
831 ret = LE32TOH(ret);
832 }
833
834 log_io("gpu_read %08x\n", ret);
835 return ret;
836}
837
838uint32_t GPUreadStatus(void)
839{
840 uint32_t ret;
841
842 if (unlikely(gpu.cmd_len > 0))
843 flush_cmd_buffer();
844
845 ret = gpu.status;
846 log_io("gpu_read_status %08x\n", ret);
847 return ret;
848}
849
850struct GPUFreeze
851{
852 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
853 uint32_t ulStatus; // current gpu status
854 uint32_t ulControl[256]; // latest control register values
855 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
856};
857
858long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
859{
860 int i;
861
862 switch (type) {
863 case 1: // save
864 if (gpu.cmd_len > 0)
865 flush_cmd_buffer();
866
867 renderer_sync();
868 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
869 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
870 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
871 freeze->ulStatus = gpu.status;
872 break;
873 case 0: // load
874 renderer_sync();
875 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
876 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
877 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
878 gpu.status = freeze->ulStatus;
879 gpu.cmd_len = 0;
880 for (i = 8; i > 0; i--) {
881 gpu.regs[i] ^= 1; // avoid reg change detection
882 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
883 }
884 renderer_sync_ecmds(gpu.ex_regs);
885 renderer_update_caches(0, 0, 1024, 512, 0);
886 break;
887 }
888
889 return 1;
890}
891
892void GPUupdateLace(void)
893{
894 if (gpu.cmd_len > 0)
895 flush_cmd_buffer();
896 renderer_flush_queues();
897
898#ifndef RAW_FB_DISPLAY
899 if (gpu.status & PSX_GPU_STATUS_BLANKING) {
900 if (!gpu.state.blanked) {
901 vout_blank();
902 gpu.state.blanked = 1;
903 gpu.state.fb_dirty = 1;
904 }
905 return;
906 }
907
908 renderer_notify_update_lace(0);
909
910 if (!gpu.state.fb_dirty)
911 return;
912#endif
913
914 if (gpu.frameskip.set) {
915 if (!gpu.frameskip.frame_ready) {
916 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
917 return;
918 gpu.frameskip.active = 0;
919 }
920 gpu.frameskip.frame_ready = 0;
921 }
922
923 vout_update();
924 if (gpu.state.enhancement_active && !gpu.state.enhancement_was_active)
925 renderer_update_caches(0, 0, 1024, 512, 1);
926 gpu.state.enhancement_was_active = gpu.state.enhancement_active;
927 gpu.state.fb_dirty = 0;
928 gpu.state.blanked = 0;
929 renderer_notify_update_lace(1);
930}
931
932void GPUvBlank(int is_vblank, int lcf)
933{
934 int interlace = gpu.state.allow_interlace
935 && (gpu.status & PSX_GPU_STATUS_INTERLACE)
936 && (gpu.status & PSX_GPU_STATUS_DHEIGHT);
937 // interlace doesn't look nice on progressive displays,
938 // so we have this "auto" mode here for games that don't read vram
939 if (gpu.state.allow_interlace == 2
940 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
941 {
942 interlace = 0;
943 }
944 if (interlace || interlace != gpu.state.old_interlace) {
945 gpu.state.old_interlace = interlace;
946
947 if (gpu.cmd_len > 0)
948 flush_cmd_buffer();
949 renderer_flush_queues();
950 renderer_set_interlace(interlace, !lcf);
951 }
952}
953
954void GPUgetScreenInfo(int *y, int *base_hres)
955{
956 *y = gpu.screen.y;
957 *base_hres = gpu.screen.vres;
958 if (gpu.status & PSX_GPU_STATUS_DHEIGHT)
959 *base_hres >>= 1;
960}
961
962void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
963{
964 gpu.frameskip.set = cbs->frameskip;
965 gpu.frameskip.advice = &cbs->fskip_advice;
966 gpu.frameskip.force = &cbs->fskip_force;
967 gpu.frameskip.dirty = (void *)&cbs->fskip_dirty;
968 gpu.frameskip.active = 0;
969 gpu.frameskip.frame_ready = 1;
970 gpu.state.hcnt = cbs->gpu_hcnt;
971 gpu.state.frame_count = cbs->gpu_frame_count;
972 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
973 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
974 gpu.state.screen_centering_type_default = cbs->screen_centering_type_default;
975 if (gpu.state.screen_centering_type != cbs->screen_centering_type
976 || gpu.state.screen_centering_x != cbs->screen_centering_x
977 || gpu.state.screen_centering_y != cbs->screen_centering_y) {
978 gpu.state.screen_centering_type = cbs->screen_centering_type;
979 gpu.state.screen_centering_x = cbs->screen_centering_x;
980 gpu.state.screen_centering_y = cbs->screen_centering_y;
981 update_width();
982 update_height();
983 }
984
985 gpu.mmap = cbs->mmap;
986 gpu.munmap = cbs->munmap;
987 gpu.gpu_state_change = cbs->gpu_state_change;
988
989 // delayed vram mmap
990 if (gpu.vram == NULL)
991 map_vram();
992
993 if (cbs->pl_vout_set_raw_vram)
994 cbs->pl_vout_set_raw_vram(gpu.vram);
995 renderer_set_config(cbs);
996 vout_set_config(cbs);
997}
998
999// vim:shiftwidth=2:expandtab