gpulib: maybe better loop detection
[pcsx_rearmed.git] / plugins / gpulib / gpu.c
... / ...
CommitLineData
1/*
2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
11#include <stdio.h>
12#include <stdlib.h>
13#include <string.h>
14#include <stdlib.h> /* for calloc */
15
16#include "gpu.h"
17#include "gpu_timing.h"
18#include "../../libpcsxcore/gpu.h" // meh
19#include "../../frontend/plugin_lib.h"
20
21#ifndef ARRAY_SIZE
22#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
23#endif
24#ifdef __GNUC__
25#define unlikely(x) __builtin_expect((x), 0)
26#define preload __builtin_prefetch
27#define noinline __attribute__((noinline))
28#else
29#define unlikely(x)
30#define preload(...)
31#define noinline
32#endif
33
34//#define log_io gpu_log
35#define log_io(...)
36
37struct psx_gpu gpu;
38
39static noinline int do_cmd_buffer(uint32_t *data, int count,
40 int *cycles_sum, int *cycles_last);
41static void finish_vram_transfer(int is_read);
42
43static noinline void do_cmd_reset(void)
44{
45 int dummy = 0;
46 renderer_sync();
47 if (unlikely(gpu.cmd_len > 0))
48 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len, &dummy, &dummy);
49 gpu.cmd_len = 0;
50
51 if (unlikely(gpu.dma.h > 0))
52 finish_vram_transfer(gpu.dma_start.is_read);
53 gpu.dma.h = 0;
54}
55
56static noinline void do_reset(void)
57{
58 unsigned int i;
59
60 do_cmd_reset();
61
62 memset(gpu.regs, 0, sizeof(gpu.regs));
63 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
64 gpu.ex_regs[i] = (0xe0 + i) << 24;
65 gpu.status = 0x14802000;
66 gpu.gp0 = 0;
67 gpu.regs[3] = 1;
68 gpu.screen.hres = gpu.screen.w = 256;
69 gpu.screen.vres = gpu.screen.h = 240;
70 gpu.screen.x = gpu.screen.y = 0;
71 renderer_sync_ecmds(gpu.ex_regs);
72 renderer_notify_res_change();
73}
74
75static noinline void update_width(void)
76{
77 static const short hres_all[8] = { 256, 368, 320, 368, 512, 368, 640, 368 };
78 static const uint8_t hdivs[8] = { 10, 7, 8, 7, 5, 7, 4, 7 };
79 uint8_t hdiv = hdivs[(gpu.status >> 16) & 7];
80 int hres = hres_all[(gpu.status >> 16) & 7];
81 int pal = gpu.status & PSX_GPU_STATUS_PAL;
82 int sw = gpu.screen.x2 - gpu.screen.x1;
83 int type = gpu.state.screen_centering_type;
84 int x = 0, x_auto;
85 if (type == C_AUTO)
86 type = gpu.state.screen_centering_type_default;
87 if (sw <= 0)
88 /* nothing displayed? */;
89 else {
90 int s = pal ? 656 : 608; // or 600? pal is just a guess
91 x = (gpu.screen.x1 - s) / hdiv;
92 x = (x + 1) & ~1; // blitter limitation
93 sw /= hdiv;
94 sw = (sw + 2) & ~3; // according to nocash
95 switch (type) {
96 case C_INGAME:
97 break;
98 case C_MANUAL:
99 x = gpu.state.screen_centering_x;
100 break;
101 default:
102 // correct if slightly miscentered
103 x_auto = (hres - sw) / 2 & ~3;
104 if ((uint32_t)x_auto <= 8u && abs(x) < 24)
105 x = x_auto;
106 }
107 if (x + sw > hres)
108 sw = hres - x;
109 // .x range check is done in vout_update()
110 }
111 // reduce the unpleasant right border that a few games have
112 if (gpu.state.screen_centering_type == 0
113 && x <= 4 && hres - (x + sw) >= 4)
114 hres -= 4;
115 gpu.screen.x = x;
116 gpu.screen.w = sw;
117 gpu.screen.hres = hres;
118 gpu.state.dims_changed = 1;
119 //printf("xx %d %d -> %2d, %d / %d\n",
120 // gpu.screen.x1, gpu.screen.x2, x, sw, hres);
121}
122
123static noinline void update_height(void)
124{
125 int pal = gpu.status & PSX_GPU_STATUS_PAL;
126 int dheight = gpu.status & PSX_GPU_STATUS_DHEIGHT;
127 int y = gpu.screen.y1 - (pal ? 39 : 16); // 39 for spyro
128 int sh = gpu.screen.y2 - gpu.screen.y1;
129 int center_tol = 16;
130 int vres = 240;
131
132 if (pal && (sh > 240 || gpu.screen.vres == 256))
133 vres = 256;
134 if (dheight)
135 y *= 2, sh *= 2, vres *= 2, center_tol *= 2;
136 if (sh <= 0)
137 /* nothing displayed? */;
138 else {
139 switch (gpu.state.screen_centering_type) {
140 case C_INGAME:
141 break;
142 case C_BORDERLESS:
143 y = 0;
144 break;
145 case C_MANUAL:
146 y = gpu.state.screen_centering_y;
147 break;
148 default:
149 // correct if slightly miscentered
150 if ((uint32_t)(vres - sh) <= 1 && abs(y) <= center_tol)
151 y = 0;
152 }
153 if (y + sh > vres)
154 sh = vres - y;
155 }
156 gpu.screen.y = y;
157 gpu.screen.h = sh;
158 gpu.screen.vres = vres;
159 gpu.state.dims_changed = 1;
160 //printf("yy %d %d -> %d, %d / %d\n",
161 // gpu.screen.y1, gpu.screen.y2, y, sh, vres);
162}
163
164static noinline void decide_frameskip(void)
165{
166 *gpu.frameskip.dirty = 1;
167
168 if (gpu.frameskip.active)
169 gpu.frameskip.cnt++;
170 else {
171 gpu.frameskip.cnt = 0;
172 gpu.frameskip.frame_ready = 1;
173 }
174
175 if (*gpu.frameskip.force)
176 gpu.frameskip.active = 1;
177 else if (!gpu.frameskip.active && *gpu.frameskip.advice)
178 gpu.frameskip.active = 1;
179 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
180 gpu.frameskip.active = 1;
181 else
182 gpu.frameskip.active = 0;
183
184 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
185 int dummy = 0;
186 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy, &dummy, &dummy);
187 gpu.frameskip.pending_fill[0] = 0;
188 }
189}
190
191static noinline int decide_frameskip_allow(uint32_t cmd_e3)
192{
193 // no frameskip if it decides to draw to display area,
194 // but not for interlace since it'll most likely always do that
195 uint32_t x = cmd_e3 & 0x3ff;
196 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
197 gpu.frameskip.allow = (gpu.status & PSX_GPU_STATUS_INTERLACE) ||
198 (uint32_t)(x - gpu.screen.src_x) >= (uint32_t)gpu.screen.w ||
199 (uint32_t)(y - gpu.screen.src_y) >= (uint32_t)gpu.screen.h;
200 return gpu.frameskip.allow;
201}
202
203static void flush_cmd_buffer(void);
204
205static noinline void get_gpu_info(uint32_t data)
206{
207 if (unlikely(gpu.cmd_len > 0))
208 flush_cmd_buffer();
209 switch (data & 0x0f) {
210 case 0x02:
211 case 0x03:
212 case 0x04:
213 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
214 break;
215 case 0x05:
216 gpu.gp0 = gpu.ex_regs[5] & 0x3fffff;
217 break;
218 case 0x07:
219 gpu.gp0 = 2;
220 break;
221 default:
222 // gpu.gp0 unchanged
223 break;
224 }
225}
226
227#ifndef max
228#define max(a, b) (((a) > (b)) ? (a) : (b))
229#endif
230
231// Minimum 16-byte VRAM alignment needed by gpu_unai's pixel-skipping
232// renderer/downscaler it uses in high res modes:
233#ifdef GCW_ZERO
234 // On GCW platform (MIPS), align to 8192 bytes (1 TLB entry) to reduce # of
235 // fills. (Will change this value if it ever gets large page support)
236 #define VRAM_ALIGN 8192
237#else
238 #define VRAM_ALIGN 16
239#endif
240
241// double, for overdraw guard + at least 1 page before
242#define VRAM_SIZE ((1024 * 512 * 2 * 2) + max(VRAM_ALIGN, 4096))
243
244// vram ptr received from mmap/malloc/alloc (will deallocate using this)
245static uint16_t *vram_ptr_orig = NULL;
246
247#ifndef GPULIB_USE_MMAP
248# ifdef __linux__
249# define GPULIB_USE_MMAP 1
250# else
251# define GPULIB_USE_MMAP 0
252# endif
253#endif
254static int map_vram(void)
255{
256#if GPULIB_USE_MMAP
257 gpu.vram = vram_ptr_orig = gpu.mmap(VRAM_SIZE);
258#else
259 gpu.vram = vram_ptr_orig = calloc(VRAM_SIZE, 1);
260#endif
261 if (gpu.vram != NULL && gpu.vram != (void *)(intptr_t)-1) {
262 // 4kb guard in front
263 gpu.vram += (4096 / 2);
264 // Align
265 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
266 return 0;
267 }
268 else {
269 fprintf(stderr, "could not map vram, expect crashes\n");
270 return -1;
271 }
272}
273
274long GPUinit(void)
275{
276 int ret;
277 ret = vout_init();
278 ret |= renderer_init();
279
280 memset(&gpu.state, 0, sizeof(gpu.state));
281 memset(&gpu.frameskip, 0, sizeof(gpu.frameskip));
282 gpu.zero = 0;
283 gpu.state.frame_count = &gpu.zero;
284 gpu.state.hcnt = &gpu.zero;
285 gpu.cmd_len = 0;
286 do_reset();
287
288 /*if (gpu.mmap != NULL) {
289 if (map_vram() != 0)
290 ret = -1;
291 }*/
292 return ret;
293}
294
295long GPUshutdown(void)
296{
297 long ret;
298
299 renderer_finish();
300 ret = vout_finish();
301
302 if (vram_ptr_orig != NULL) {
303#if GPULIB_USE_MMAP
304 gpu.munmap(vram_ptr_orig, VRAM_SIZE);
305#else
306 free(vram_ptr_orig);
307#endif
308 }
309 vram_ptr_orig = gpu.vram = NULL;
310
311 return ret;
312}
313
314void GPUwriteStatus(uint32_t data)
315{
316 uint32_t cmd = data >> 24;
317 int src_x, src_y;
318
319 if (cmd < ARRAY_SIZE(gpu.regs)) {
320 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
321 return;
322 gpu.regs[cmd] = data;
323 }
324
325 gpu.state.fb_dirty = 1;
326
327 switch (cmd) {
328 case 0x00:
329 do_reset();
330 break;
331 case 0x01:
332 do_cmd_reset();
333 break;
334 case 0x03:
335 if (data & 1) {
336 gpu.status |= PSX_GPU_STATUS_BLANKING;
337 gpu.state.dims_changed = 1; // for hud clearing
338 }
339 else
340 gpu.status &= ~PSX_GPU_STATUS_BLANKING;
341 break;
342 case 0x04:
343 gpu.status &= ~PSX_GPU_STATUS_DMA_MASK;
344 gpu.status |= PSX_GPU_STATUS_DMA(data & 3);
345 break;
346 case 0x05:
347 src_x = data & 0x3ff; src_y = (data >> 10) & 0x1ff;
348 if (src_x != gpu.screen.src_x || src_y != gpu.screen.src_y) {
349 gpu.screen.src_x = src_x;
350 gpu.screen.src_y = src_y;
351 renderer_notify_scanout_change(src_x, src_y);
352 if (gpu.frameskip.set) {
353 decide_frameskip_allow(gpu.ex_regs[3]);
354 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
355 decide_frameskip();
356 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
357 }
358 }
359 }
360 break;
361 case 0x06:
362 gpu.screen.x1 = data & 0xfff;
363 gpu.screen.x2 = (data >> 12) & 0xfff;
364 update_width();
365 break;
366 case 0x07:
367 gpu.screen.y1 = data & 0x3ff;
368 gpu.screen.y2 = (data >> 10) & 0x3ff;
369 update_height();
370 break;
371 case 0x08:
372 gpu.status = (gpu.status & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
373 update_width();
374 update_height();
375 renderer_notify_res_change();
376 break;
377 default:
378 if ((cmd & 0xf0) == 0x10)
379 get_gpu_info(data);
380 break;
381 }
382
383#ifdef GPUwriteStatus_ext
384 GPUwriteStatus_ext(data);
385#endif
386}
387
388const unsigned char cmd_lengths[256] =
389{
390 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
391 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
392 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
393 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
394 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
395 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
396 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
397 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
398 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 80
399 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
400 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // a0
401 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
402 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // c0
403 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
404 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
405 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
406};
407
408#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
409
410static void cpy_msb(uint16_t *dst, const uint16_t *src, int l, uint16_t msb)
411{
412 int i;
413 for (i = 0; i < l; i++)
414 dst[i] = src[i] | msb;
415}
416
417static inline void do_vram_line(int x, int y, uint16_t *mem, int l,
418 int is_read, uint16_t msb)
419{
420 uint16_t *vram = VRAM_MEM_XY(x, y);
421 if (unlikely(is_read))
422 memcpy(mem, vram, l * 2);
423 else if (unlikely(msb))
424 cpy_msb(vram, mem, l, msb);
425 else
426 memcpy(vram, mem, l * 2);
427}
428
429static int do_vram_io(uint32_t *data, int count, int is_read)
430{
431 int count_initial = count;
432 uint16_t msb = gpu.ex_regs[6] << 15;
433 uint16_t *sdata = (uint16_t *)data;
434 int x = gpu.dma.x, y = gpu.dma.y;
435 int w = gpu.dma.w, h = gpu.dma.h;
436 int o = gpu.dma.offset;
437 int l;
438 count *= 2; // operate in 16bpp pixels
439
440 renderer_sync();
441
442 if (gpu.dma.offset) {
443 l = w - gpu.dma.offset;
444 if (count < l)
445 l = count;
446
447 do_vram_line(x + o, y, sdata, l, is_read, msb);
448
449 if (o + l < w)
450 o += l;
451 else {
452 o = 0;
453 y++;
454 h--;
455 }
456 sdata += l;
457 count -= l;
458 }
459
460 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
461 y &= 511;
462 do_vram_line(x, y, sdata, w, is_read, msb);
463 }
464
465 if (h > 0) {
466 if (count > 0) {
467 y &= 511;
468 do_vram_line(x, y, sdata, count, is_read, msb);
469 o = count;
470 count = 0;
471 }
472 }
473 else
474 finish_vram_transfer(is_read);
475 gpu.dma.y = y;
476 gpu.dma.h = h;
477 gpu.dma.offset = o;
478
479 return count_initial - count / 2;
480}
481
482static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
483{
484 if (gpu.dma.h)
485 log_anomaly("start_vram_transfer while old unfinished\n");
486
487 gpu.dma.x = pos_word & 0x3ff;
488 gpu.dma.y = (pos_word >> 16) & 0x1ff;
489 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
490 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
491 gpu.dma.offset = 0;
492 gpu.dma.is_read = is_read;
493 gpu.dma_start = gpu.dma;
494
495 renderer_flush_queues();
496 if (is_read) {
497 gpu.status |= PSX_GPU_STATUS_IMG;
498 // XXX: wrong for width 1
499 gpu.gp0 = LE32TOH(*(uint32_t *) VRAM_MEM_XY(gpu.dma.x, gpu.dma.y));
500 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
501 }
502
503 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
504 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
505 if (gpu.gpu_state_change)
506 gpu.gpu_state_change(PGS_VRAM_TRANSFER_START);
507}
508
509static void finish_vram_transfer(int is_read)
510{
511 if (is_read)
512 gpu.status &= ~PSX_GPU_STATUS_IMG;
513 else {
514 gpu.state.fb_dirty = 1;
515 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
516 gpu.dma_start.w, gpu.dma_start.h, 0);
517 }
518 if (gpu.gpu_state_change)
519 gpu.gpu_state_change(PGS_VRAM_TRANSFER_END);
520}
521
522static void do_vram_copy(const uint32_t *params, int *cpu_cycles)
523{
524 const uint32_t sx = LE32TOH(params[0]) & 0x3FF;
525 const uint32_t sy = (LE32TOH(params[0]) >> 16) & 0x1FF;
526 const uint32_t dx = LE32TOH(params[1]) & 0x3FF;
527 const uint32_t dy = (LE32TOH(params[1]) >> 16) & 0x1FF;
528 uint32_t w = ((LE32TOH(params[2]) - 1) & 0x3FF) + 1;
529 uint32_t h = (((LE32TOH(params[2]) >> 16) - 1) & 0x1FF) + 1;
530 uint16_t msb = gpu.ex_regs[6] << 15;
531 uint16_t lbuf[128];
532 uint32_t x, y;
533
534 *cpu_cycles += gput_copy(w, h);
535 if (sx == dx && sy == dy && msb == 0)
536 return;
537
538 renderer_flush_queues();
539
540 if (unlikely((sx < dx && dx < sx + w) || sx + w > 1024 || dx + w > 1024 || msb))
541 {
542 for (y = 0; y < h; y++)
543 {
544 const uint16_t *src = VRAM_MEM_XY(0, (sy + y) & 0x1ff);
545 uint16_t *dst = VRAM_MEM_XY(0, (dy + y) & 0x1ff);
546 for (x = 0; x < w; x += ARRAY_SIZE(lbuf))
547 {
548 uint32_t x1, w1 = w - x;
549 if (w1 > ARRAY_SIZE(lbuf))
550 w1 = ARRAY_SIZE(lbuf);
551 for (x1 = 0; x1 < w1; x1++)
552 lbuf[x1] = src[(sx + x + x1) & 0x3ff];
553 for (x1 = 0; x1 < w1; x1++)
554 dst[(dx + x + x1) & 0x3ff] = lbuf[x1] | msb;
555 }
556 }
557 }
558 else
559 {
560 uint32_t sy1 = sy, dy1 = dy;
561 for (y = 0; y < h; y++, sy1++, dy1++)
562 memcpy(VRAM_MEM_XY(dx, dy1 & 0x1ff), VRAM_MEM_XY(sx, sy1 & 0x1ff), w * 2);
563 }
564
565 renderer_update_caches(dx, dy, w, h, 0);
566}
567
568static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
569{
570 int cmd = 0, pos = 0, len, dummy = 0, v;
571 int skip = 1;
572
573 gpu.frameskip.pending_fill[0] = 0;
574
575 while (pos < count && skip) {
576 uint32_t *list = data + pos;
577 cmd = LE32TOH(list[0]) >> 24;
578 len = 1 + cmd_lengths[cmd];
579
580 switch (cmd) {
581 case 0x02:
582 if ((LE32TOH(list[2]) & 0x3ff) > gpu.screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu.screen.h)
583 // clearing something large, don't skip
584 do_cmd_list(list, 3, &dummy, &dummy, &dummy);
585 else
586 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
587 break;
588 case 0x24 ... 0x27:
589 case 0x2c ... 0x2f:
590 case 0x34 ... 0x37:
591 case 0x3c ... 0x3f:
592 gpu.ex_regs[1] &= ~0x1ff;
593 gpu.ex_regs[1] |= LE32TOH(list[4 + ((cmd >> 4) & 1)]) & 0x1ff;
594 break;
595 case 0x48 ... 0x4F:
596 for (v = 3; pos + v < count; v++)
597 {
598 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
599 break;
600 }
601 len += v - 3;
602 break;
603 case 0x58 ... 0x5F:
604 for (v = 4; pos + v < count; v += 2)
605 {
606 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
607 break;
608 }
609 len += v - 4;
610 break;
611 default:
612 if (cmd == 0xe3)
613 skip = decide_frameskip_allow(LE32TOH(list[0]));
614 if ((cmd & 0xf8) == 0xe0)
615 gpu.ex_regs[cmd & 7] = LE32TOH(list[0]);
616 break;
617 }
618
619 if (pos + len > count) {
620 cmd = -1;
621 break; // incomplete cmd
622 }
623 if (0x80 <= cmd && cmd <= 0xdf)
624 break; // image i/o
625
626 pos += len;
627 }
628
629 renderer_sync_ecmds(gpu.ex_regs);
630 *last_cmd = cmd;
631 return pos;
632}
633
634static noinline int do_cmd_buffer(uint32_t *data, int count,
635 int *cycles_sum, int *cycles_last)
636{
637 int cmd, pos;
638 uint32_t old_e3 = gpu.ex_regs[3];
639 int vram_dirty = 0;
640
641 // process buffer
642 for (pos = 0; pos < count; )
643 {
644 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
645 vram_dirty = 1;
646 pos += do_vram_io(data + pos, count - pos, 0);
647 if (pos == count)
648 break;
649 }
650
651 cmd = LE32TOH(data[pos]) >> 24;
652 if (0xa0 <= cmd && cmd <= 0xdf) {
653 if (unlikely((pos+2) >= count)) {
654 // incomplete vram write/read cmd, can't consume yet
655 cmd = -1;
656 break;
657 }
658
659 // consume vram write/read cmd
660 start_vram_transfer(LE32TOH(data[pos + 1]), LE32TOH(data[pos + 2]), (cmd & 0xe0) == 0xc0);
661 pos += 3;
662 continue;
663 }
664 else if ((cmd & 0xe0) == 0x80) {
665 if (unlikely((pos+3) >= count)) {
666 cmd = -1; // incomplete cmd, can't consume yet
667 break;
668 }
669 renderer_sync();
670 *cycles_sum += *cycles_last;
671 *cycles_last = 0;
672 do_vram_copy(data + pos + 1, cycles_last);
673 vram_dirty = 1;
674 pos += 4;
675 continue;
676 }
677 else if (cmd == 0x1f) {
678 log_anomaly("irq1?\n");
679 pos++;
680 continue;
681 }
682
683 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
684 if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0))
685 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
686 else {
687 pos += do_cmd_list(data + pos, count - pos, cycles_sum, cycles_last, &cmd);
688 vram_dirty = 1;
689 }
690
691 if (cmd == -1)
692 // incomplete cmd
693 break;
694 }
695
696 gpu.status &= ~0x1fff;
697 gpu.status |= gpu.ex_regs[1] & 0x7ff;
698 gpu.status |= (gpu.ex_regs[6] & 3) << 11;
699
700 gpu.state.fb_dirty |= vram_dirty;
701
702 if (old_e3 != gpu.ex_regs[3])
703 decide_frameskip_allow(gpu.ex_regs[3]);
704
705 return count - pos;
706}
707
708static noinline void flush_cmd_buffer(void)
709{
710 int dummy = 0, left;
711 left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len, &dummy, &dummy);
712 if (left > 0)
713 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
714 if (left != gpu.cmd_len) {
715 if (!gpu.dma.h && gpu.gpu_state_change)
716 gpu.gpu_state_change(PGS_PRIMITIVE_START);
717 gpu.cmd_len = left;
718 }
719}
720
721void GPUwriteDataMem(uint32_t *mem, int count)
722{
723 int dummy = 0, left;
724
725 log_io("gpu_dma_write %p %d\n", mem, count);
726
727 if (unlikely(gpu.cmd_len > 0))
728 flush_cmd_buffer();
729
730 left = do_cmd_buffer(mem, count, &dummy, &dummy);
731 if (left)
732 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
733}
734
735void GPUwriteData(uint32_t data)
736{
737 log_io("gpu_write %08x\n", data);
738 gpu.cmd_buffer[gpu.cmd_len++] = HTOLE32(data);
739 if (gpu.cmd_len >= CMD_BUFFER_LEN)
740 flush_cmd_buffer();
741}
742
743long GPUdmaChain(uint32_t *rambase, uint32_t start_addr,
744 uint32_t *progress_addr, int32_t *cycles_last_cmd)
745{
746 uint32_t addr, *list, ld_addr;
747 int len, left, count, ld_count = 32;
748 int cpu_cycles_sum = 0;
749 int cpu_cycles_last = 0;
750
751 preload(rambase + (start_addr & 0x1fffff) / 4);
752
753 if (unlikely(gpu.cmd_len > 0))
754 flush_cmd_buffer();
755
756 log_io("gpu_dma_chain\n");
757 addr = ld_addr = start_addr & 0xffffff;
758 for (count = 0; (addr & 0x800000) == 0; count++)
759 {
760 list = rambase + (addr & 0x1fffff) / 4;
761 len = LE32TOH(list[0]) >> 24;
762 addr = LE32TOH(list[0]) & 0xffffff;
763 preload(rambase + (addr & 0x1fffff) / 4);
764
765 cpu_cycles_sum += 10;
766 if (len > 0)
767 cpu_cycles_sum += 5 + len;
768
769 log_io(".chain %08lx #%d+%d %u+%u\n",
770 (long)(list - rambase) * 4, len, gpu.cmd_len, cpu_cycles_sum, cpu_cycles_last);
771 if (unlikely(gpu.cmd_len > 0)) {
772 if (gpu.cmd_len + len > ARRAY_SIZE(gpu.cmd_buffer)) {
773 log_anomaly("cmd_buffer overflow, likely garbage commands\n");
774 gpu.cmd_len = 0;
775 }
776 memcpy(gpu.cmd_buffer + gpu.cmd_len, list + 1, len * 4);
777 gpu.cmd_len += len;
778 flush_cmd_buffer();
779 continue;
780 }
781
782 if (len) {
783 left = do_cmd_buffer(list + 1, len, &cpu_cycles_sum, &cpu_cycles_last);
784 if (left) {
785 memcpy(gpu.cmd_buffer, list + 1 + len - left, left * 4);
786 gpu.cmd_len = left;
787 log_anomaly("GPUdmaChain: %d/%d words left\n", left, len);
788 }
789 }
790
791 if (progress_addr) {
792 *progress_addr = addr;
793 break;
794 }
795 if (addr == ld_addr) {
796 log_anomaly("GPUdmaChain: loop @ %08x, cnt=%u\n", addr, count);
797 break;
798 }
799 if (count == ld_count) {
800 ld_addr = addr;
801 ld_count *= 2;
802 }
803 }
804
805 //printf(" -> %d %d\n", cpu_cycles_sum, cpu_cycles_last);
806 gpu.state.last_list.frame = *gpu.state.frame_count;
807 gpu.state.last_list.hcnt = *gpu.state.hcnt;
808 gpu.state.last_list.cycles = cpu_cycles_sum + cpu_cycles_last;
809 gpu.state.last_list.addr = start_addr;
810
811 *cycles_last_cmd = cpu_cycles_last;
812 return cpu_cycles_sum;
813}
814
815void GPUreadDataMem(uint32_t *mem, int count)
816{
817 log_io("gpu_dma_read %p %d\n", mem, count);
818
819 if (unlikely(gpu.cmd_len > 0))
820 flush_cmd_buffer();
821
822 if (gpu.dma.h)
823 do_vram_io(mem, count, 1);
824}
825
826uint32_t GPUreadData(void)
827{
828 uint32_t ret;
829
830 if (unlikely(gpu.cmd_len > 0))
831 flush_cmd_buffer();
832
833 ret = gpu.gp0;
834 if (gpu.dma.h) {
835 ret = HTOLE32(ret);
836 do_vram_io(&ret, 1, 1);
837 ret = LE32TOH(ret);
838 }
839
840 log_io("gpu_read %08x\n", ret);
841 return ret;
842}
843
844uint32_t GPUreadStatus(void)
845{
846 uint32_t ret;
847
848 if (unlikely(gpu.cmd_len > 0))
849 flush_cmd_buffer();
850
851 ret = gpu.status;
852 log_io("gpu_read_status %08x\n", ret);
853 return ret;
854}
855
856struct GPUFreeze
857{
858 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
859 uint32_t ulStatus; // current gpu status
860 uint32_t ulControl[256]; // latest control register values
861 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
862};
863
864long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
865{
866 int i;
867
868 switch (type) {
869 case 1: // save
870 if (gpu.cmd_len > 0)
871 flush_cmd_buffer();
872
873 renderer_sync();
874 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
875 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
876 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
877 freeze->ulStatus = gpu.status;
878 break;
879 case 0: // load
880 renderer_sync();
881 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
882 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
883 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
884 gpu.status = freeze->ulStatus;
885 gpu.cmd_len = 0;
886 for (i = 8; i > 0; i--) {
887 gpu.regs[i] ^= 1; // avoid reg change detection
888 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
889 }
890 renderer_sync_ecmds(gpu.ex_regs);
891 renderer_update_caches(0, 0, 1024, 512, 0);
892 break;
893 }
894
895 return 1;
896}
897
898void GPUupdateLace(void)
899{
900 if (gpu.cmd_len > 0)
901 flush_cmd_buffer();
902 renderer_flush_queues();
903
904#ifndef RAW_FB_DISPLAY
905 if (gpu.status & PSX_GPU_STATUS_BLANKING) {
906 if (!gpu.state.blanked) {
907 vout_blank();
908 gpu.state.blanked = 1;
909 gpu.state.fb_dirty = 1;
910 }
911 return;
912 }
913
914 renderer_notify_update_lace(0);
915
916 if (!gpu.state.fb_dirty)
917 return;
918#endif
919
920 if (gpu.frameskip.set) {
921 if (!gpu.frameskip.frame_ready) {
922 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
923 return;
924 gpu.frameskip.active = 0;
925 }
926 gpu.frameskip.frame_ready = 0;
927 }
928
929 vout_update();
930 if (gpu.state.enhancement_active && !gpu.state.enhancement_was_active)
931 renderer_update_caches(0, 0, 1024, 512, 1);
932 gpu.state.enhancement_was_active = gpu.state.enhancement_active;
933 gpu.state.fb_dirty = 0;
934 gpu.state.blanked = 0;
935 renderer_notify_update_lace(1);
936}
937
938void GPUvBlank(int is_vblank, int lcf)
939{
940 int interlace = gpu.state.allow_interlace
941 && (gpu.status & PSX_GPU_STATUS_INTERLACE)
942 && (gpu.status & PSX_GPU_STATUS_DHEIGHT);
943 // interlace doesn't look nice on progressive displays,
944 // so we have this "auto" mode here for games that don't read vram
945 if (gpu.state.allow_interlace == 2
946 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
947 {
948 interlace = 0;
949 }
950 if (interlace || interlace != gpu.state.old_interlace) {
951 gpu.state.old_interlace = interlace;
952
953 if (gpu.cmd_len > 0)
954 flush_cmd_buffer();
955 renderer_flush_queues();
956 renderer_set_interlace(interlace, !lcf);
957 }
958}
959
960void GPUgetScreenInfo(int *y, int *base_hres)
961{
962 *y = gpu.screen.y;
963 *base_hres = gpu.screen.vres;
964 if (gpu.status & PSX_GPU_STATUS_DHEIGHT)
965 *base_hres >>= 1;
966}
967
968void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
969{
970 gpu.frameskip.set = cbs->frameskip;
971 gpu.frameskip.advice = &cbs->fskip_advice;
972 gpu.frameskip.force = &cbs->fskip_force;
973 gpu.frameskip.dirty = (void *)&cbs->fskip_dirty;
974 gpu.frameskip.active = 0;
975 gpu.frameskip.frame_ready = 1;
976 gpu.state.hcnt = cbs->gpu_hcnt;
977 gpu.state.frame_count = cbs->gpu_frame_count;
978 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
979 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
980 gpu.state.screen_centering_type_default = cbs->screen_centering_type_default;
981 if (gpu.state.screen_centering_type != cbs->screen_centering_type
982 || gpu.state.screen_centering_x != cbs->screen_centering_x
983 || gpu.state.screen_centering_y != cbs->screen_centering_y) {
984 gpu.state.screen_centering_type = cbs->screen_centering_type;
985 gpu.state.screen_centering_x = cbs->screen_centering_x;
986 gpu.state.screen_centering_y = cbs->screen_centering_y;
987 update_width();
988 update_height();
989 }
990
991 gpu.mmap = cbs->mmap;
992 gpu.munmap = cbs->munmap;
993 gpu.gpu_state_change = cbs->gpu_state_change;
994
995 // delayed vram mmap
996 if (gpu.vram == NULL)
997 map_vram();
998
999 if (cbs->pl_vout_set_raw_vram)
1000 cbs->pl_vout_set_raw_vram(gpu.vram);
1001 renderer_set_config(cbs);
1002 vout_set_config(cbs);
1003}
1004
1005// vim:shiftwidth=2:expandtab