2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
4 * This work is licensed under the terms of any of these licenses
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
14 #include <stdlib.h> /* for calloc */
17 #include "gpu_timing.h"
18 #include "../../libpcsxcore/gpu.h" // meh
19 #include "../../frontend/plugin_lib.h"
20 #include "../../include/compiler_features.h"
23 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
26 //#define log_io gpu_log
31 static noinline int do_cmd_buffer(struct psx_gpu *gpu, uint32_t *data, int count,
32 int *cycles_sum, int *cycles_last);
33 static noinline void finish_vram_transfer(struct psx_gpu *gpu, int is_read);
35 static noinline void do_cmd_reset(struct psx_gpu *gpu)
39 if (unlikely(gpu->cmd_len > 0))
40 do_cmd_buffer(gpu, gpu->cmd_buffer, gpu->cmd_len, &dummy, &dummy);
43 if (unlikely(gpu->dma.h > 0))
44 finish_vram_transfer(gpu, gpu->dma_start.is_read);
48 static noinline void do_reset(struct psx_gpu *gpu)
54 memset(gpu->regs, 0, sizeof(gpu->regs));
55 for (i = 0; i < sizeof(gpu->ex_regs) / sizeof(gpu->ex_regs[0]); i++)
56 gpu->ex_regs[i] = (0xe0 + i) << 24;
57 gpu->status = 0x14802000;
60 gpu->screen.hres = gpu->screen.w = 256;
61 gpu->screen.vres = gpu->screen.h = 240;
62 gpu->screen.x = gpu->screen.y = 0;
63 renderer_sync_ecmds(gpu->ex_regs);
64 renderer_notify_res_change();
67 static noinline void update_width(struct psx_gpu *gpu)
69 static const short hres_all[8] = { 256, 368, 320, 368, 512, 368, 640, 368 };
70 static const uint8_t hdivs[8] = { 10, 7, 8, 7, 5, 7, 4, 7 };
71 uint8_t hdiv = hdivs[(gpu->status >> 16) & 7];
72 int hres = hres_all[(gpu->status >> 16) & 7];
73 int pal = gpu->status & PSX_GPU_STATUS_PAL;
74 int sw = gpu->screen.x2 - gpu->screen.x1;
75 int type = gpu->state.screen_centering_type;
78 type = gpu->state.screen_centering_type_default;
80 /* nothing displayed? */;
82 int s = pal ? 656 : 608; // or 600? pal is just a guess
83 x = (gpu->screen.x1 - s) / hdiv;
84 x = (x + 1) & ~1; // blitter limitation
86 sw = (sw + 2) & ~3; // according to nocash
88 if (gpu->state.show_overscan == 2) // widescreen hack
90 if (gpu->state.show_overscan && sw >= hres)
96 x = gpu->state.screen_centering_x;
99 // correct if slightly miscentered
100 x_auto = (hres - sw) / 2 & ~3;
101 if ((uint32_t)x_auto <= 8u && abs(x) < 24)
106 // .x range check is done in vout_update()
108 // reduce the unpleasant right border that a few games have
109 if (gpu->state.screen_centering_type == 0
110 && x <= 4 && hres - (x + sw) >= 4)
114 gpu->screen.hres = hres;
115 gpu->state.dims_changed = 1;
116 //printf("xx %d %d (%d) -> %2d, %d / %d\n", gpu->screen.x1,
117 // gpu->screen.x2, gpu->screen.x2 - gpu->screen.x1, x, sw, hres);
120 static noinline void update_height(struct psx_gpu *gpu)
122 int pal = gpu->status & PSX_GPU_STATUS_PAL;
123 int dheight = gpu->status & PSX_GPU_STATUS_DHEIGHT;
124 int y = gpu->screen.y1 - (pal ? 39 : 16); // 39 for spyro
125 int sh = gpu->screen.y2 - gpu->screen.y1;
129 if (pal && (sh > 240 || gpu->screen.vres == 256))
132 y *= 2, sh *= 2, vres *= 2, center_tol *= 2;
134 /* nothing displayed? */;
136 switch (gpu->state.screen_centering_type) {
143 y = gpu->state.screen_centering_y;
144 vres += gpu->state.screen_centering_h_adj;
147 // correct if slightly miscentered
148 if ((uint32_t)(vres - sh) <= 1 && abs(y) <= center_tol)
156 gpu->screen.vres = vres;
157 gpu->state.dims_changed = 1;
158 //printf("yy %d %d -> %d, %d / %d\n",
159 // gpu->screen.y1, gpu->screen.y2, y, sh, vres);
162 static noinline void decide_frameskip(struct psx_gpu *gpu)
164 *gpu->frameskip.dirty = 1;
166 if (gpu->frameskip.active)
167 gpu->frameskip.cnt++;
169 gpu->frameskip.cnt = 0;
170 gpu->frameskip.frame_ready = 1;
173 if (*gpu->frameskip.force)
174 gpu->frameskip.active = 1;
175 else if (!gpu->frameskip.active && *gpu->frameskip.advice)
176 gpu->frameskip.active = 1;
177 else if (gpu->frameskip.set > 0 && gpu->frameskip.cnt < gpu->frameskip.set)
178 gpu->frameskip.active = 1;
180 gpu->frameskip.active = 0;
182 if (!gpu->frameskip.active && gpu->frameskip.pending_fill[0] != 0) {
184 do_cmd_list(gpu->frameskip.pending_fill, 3, &dummy, &dummy, &dummy);
185 gpu->frameskip.pending_fill[0] = 0;
189 static noinline int decide_frameskip_allow(struct psx_gpu *gpu)
191 // no frameskip if it decides to draw to display area,
192 // but not for interlace since it'll most likely always do that
193 uint32_t cmd_e3 = gpu->ex_regs[3];
194 uint32_t x = cmd_e3 & 0x3ff;
195 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
196 gpu->frameskip.allow = (gpu->status & PSX_GPU_STATUS_INTERLACE) ||
197 (uint32_t)(x - gpu->screen.src_x) >= (uint32_t)gpu->screen.w ||
198 (uint32_t)(y - gpu->screen.src_y) >= (uint32_t)gpu->screen.h;
199 return gpu->frameskip.allow;
202 static void flush_cmd_buffer(struct psx_gpu *gpu);
204 static noinline void get_gpu_info(struct psx_gpu *gpu, uint32_t data)
206 if (unlikely(gpu->cmd_len > 0))
207 flush_cmd_buffer(gpu);
208 switch (data & 0x0f) {
212 gpu->gp0 = gpu->ex_regs[data & 7] & 0xfffff;
215 gpu->gp0 = gpu->ex_regs[5] & 0x3fffff;
221 // gpu->gp0 unchanged
227 #define max(a, b) (((a) > (b)) ? (a) : (b))
230 // Minimum 16-byte VRAM alignment needed by gpu_unai's pixel-skipping
231 // renderer/downscaler it uses in high res modes:
233 // On GCW platform (MIPS), align to 8192 bytes (1 TLB entry) to reduce # of
234 // fills. (Will change this value if it ever gets large page support)
235 #define VRAM_ALIGN 8192
237 #define VRAM_ALIGN 64
240 // double, for overdraw/overscan guard + at least 1 page before
241 #define VRAM_SIZE ((1024 * 512 * 2 * 2) + max(VRAM_ALIGN, 4096))
243 // vram ptr received from mmap/malloc/alloc (will deallocate using this)
244 static uint16_t *vram_ptr_orig = NULL;
246 #ifndef GPULIB_USE_MMAP
247 # if defined(__linux__) || defined(_3DS) || defined(HAVE_LIBNX) || defined(VITA)
248 # define GPULIB_USE_MMAP 1
250 # define GPULIB_USE_MMAP 0
253 static int map_vram(void)
256 gpu.vram = vram_ptr_orig = gpu.mmap(VRAM_SIZE);
258 gpu.vram = vram_ptr_orig = calloc(VRAM_SIZE, 1);
260 if (gpu.vram != NULL && gpu.vram != (void *)(intptr_t)-1) {
261 // 4kb guard in front
262 gpu.vram += (4096 / 2);
264 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
268 fprintf(stderr, "could not map vram, expect crashes\n");
278 ret |= renderer_init();
280 memset(&gpu.state, 0, sizeof(gpu.state));
281 memset(&gpu.frameskip, 0, sizeof(gpu.frameskip));
283 gpu.state.frame_count = &gpu.zero;
284 gpu.state.hcnt = &gpu.zero;
291 long GPUshutdown(void)
298 if (vram_ptr_orig != NULL) {
300 gpu.munmap(vram_ptr_orig, VRAM_SIZE);
305 vram_ptr_orig = gpu.vram = NULL;
310 void GPUwriteStatus(uint32_t data)
312 uint32_t cmd = data >> 24;
313 uint32_t fb_dirty = 1;
316 if (cmd < ARRAY_SIZE(gpu.regs)) {
317 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
319 gpu.regs[cmd] = data;
332 gpu.status |= PSX_GPU_STATUS_BLANKING;
333 gpu.state.dims_changed = 1; // for hud clearing
336 gpu.status &= ~PSX_GPU_STATUS_BLANKING;
339 gpu.status &= ~PSX_GPU_STATUS_DMA_MASK;
340 gpu.status |= PSX_GPU_STATUS_DMA(data & 3);
344 src_x = data & 0x3ff; src_y = (data >> 10) & 0x1ff;
345 if (src_x != gpu.screen.src_x || src_y != gpu.screen.src_y) {
346 gpu.screen.src_x = src_x;
347 gpu.screen.src_y = src_y;
348 renderer_notify_scanout_change(src_x, src_y);
349 if (gpu.frameskip.set) {
350 decide_frameskip_allow(&gpu);
351 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
352 decide_frameskip(&gpu);
353 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
359 gpu.screen.x1 = data & 0xfff;
360 gpu.screen.x2 = (data >> 12) & 0xfff;
364 gpu.screen.y1 = data & 0x3ff;
365 gpu.screen.y2 = (data >> 10) & 0x3ff;
369 gpu.status = (gpu.status & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
372 renderer_notify_res_change();
375 if ((cmd & 0xf0) == 0x10)
376 get_gpu_info(&gpu, data);
381 gpu.state.fb_dirty |= fb_dirty;
383 #ifdef GPUwriteStatus_ext
384 GPUwriteStatus_ext(data);
388 const unsigned char cmd_lengths[256] =
390 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
391 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
392 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
393 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
394 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
395 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
396 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
397 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
398 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 80
399 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
400 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // a0
401 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
402 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // c0
403 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
404 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
405 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
408 #define VRAM_MEM_XY(vram_, x, y) &vram_[(y) * 1024 + (x)]
410 // this isn't very useful so should be rare
411 static void cpy_mask(uint16_t *dst, const uint16_t *src, int l, uint32_t r6)
415 for (i = 0; i < l; i++)
416 dst[i] = src[i] | 0x8000;
419 uint16_t msb = r6 << 15;
420 for (i = 0; i < l; i++) {
421 uint16_t mask = (int16_t)dst[i] >> 15;
422 dst[i] = (dst[i] & mask) | ((src[i] | msb) & ~mask);
427 static inline void do_vram_line(uint16_t *vram_, int x, int y,
428 uint16_t *mem, int l, int is_read, uint32_t r6)
430 uint16_t *vram = VRAM_MEM_XY(vram_, x, y);
431 if (unlikely(is_read))
432 memcpy(mem, vram, l * 2);
433 else if (unlikely(r6))
434 cpy_mask(vram, mem, l, r6);
436 memcpy(vram, mem, l * 2);
439 static int do_vram_io(struct psx_gpu *gpu, uint32_t *data, int count, int is_read)
441 int count_initial = count;
442 uint32_t r6 = gpu->ex_regs[6] & 3;
443 uint16_t *sdata = (uint16_t *)data;
444 uint16_t *vram = gpu->vram;
445 int x = gpu->dma.x, y = gpu->dma.y;
446 int w = gpu->dma.w, h = gpu->dma.h;
447 int o = gpu->dma.offset;
449 count *= 2; // operate in 16bpp pixels
453 if (gpu->dma.offset) {
454 l = w - gpu->dma.offset;
458 do_vram_line(vram, x + o, y, sdata, l, is_read, r6);
471 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
473 do_vram_line(vram, x, y, sdata, w, is_read, r6);
479 do_vram_line(vram, x, y, sdata, count, is_read, r6);
485 finish_vram_transfer(gpu, is_read);
490 return count_initial - count / 2;
493 static noinline void start_vram_transfer(struct psx_gpu *gpu, uint32_t pos_word,
494 uint32_t size_word, int is_read)
497 log_anomaly(gpu, "start_vram_transfer while old unfinished\n");
499 gpu->dma.x = pos_word & 0x3ff;
500 gpu->dma.y = (pos_word >> 16) & 0x1ff;
501 gpu->dma.w = ((size_word - 1) & 0x3ff) + 1;
502 gpu->dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
504 gpu->dma.is_read = is_read;
505 gpu->dma_start = gpu->dma;
507 renderer_flush_queues();
509 const uint16_t *mem = VRAM_MEM_XY(gpu->vram, gpu->dma.x, gpu->dma.y);
510 gpu->status |= PSX_GPU_STATUS_IMG;
511 // XXX: wrong for width 1
512 gpu->gp0 = LE16TOH(mem[0]) | ((uint32_t)LE16TOH(mem[1]) << 16);
513 gpu->state.last_vram_read_frame = *gpu->state.frame_count;
516 log_io(gpu, "start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
517 gpu->dma.x, gpu->dma.y, gpu->dma.w, gpu->dma.h);
518 if (gpu->gpu_state_change)
519 gpu->gpu_state_change(PGS_VRAM_TRANSFER_START, 0);
522 static void finish_vram_transfer(struct psx_gpu *gpu, int is_read)
525 gpu->status &= ~PSX_GPU_STATUS_IMG;
527 int32_t screen_r = gpu->screen.src_x + gpu->screen.hres;
528 int32_t screen_b = gpu->screen.src_y + gpu->screen.vres;
529 int32_t dma_r = gpu->dma_start.x + gpu->dma_start.w;
530 int32_t dma_b = gpu->dma_start.y + gpu->dma_start.h;
532 not_dirty = screen_r - gpu->dma_start.x - 1;
533 not_dirty |= screen_b - gpu->dma_start.y - 1;
534 not_dirty |= dma_r - gpu->screen.src_x - 1;
535 not_dirty |= dma_b - gpu->screen.src_y - 1;
537 log_io(gpu, "dma %3d,%3d %dx%d scr %3d,%3d %3dx%3d -> dirty %d\n",
538 gpu->dma_start.x, gpu->dma_start.y, gpu->dma_start.w, gpu->dma_start.h,
539 gpu->screen.src_x, gpu->screen.src_y, gpu->screen.hres, gpu->screen.vres, !not_dirty);
540 gpu->state.fb_dirty |= !not_dirty;
541 renderer_update_caches(gpu->dma_start.x, gpu->dma_start.y,
542 gpu->dma_start.w, gpu->dma_start.h, 0);
544 if (gpu->gpu_state_change)
545 gpu->gpu_state_change(PGS_VRAM_TRANSFER_END, 0);
548 static void do_vram_copy(struct psx_gpu *gpu, const uint32_t *params, int *cpu_cycles)
550 const uint32_t sx = LE32TOH(params[0]) & 0x3FF;
551 const uint32_t sy = (LE32TOH(params[0]) >> 16) & 0x1FF;
552 const uint32_t dx = LE32TOH(params[1]) & 0x3FF;
553 const uint32_t dy = (LE32TOH(params[1]) >> 16) & 0x1FF;
554 uint32_t w = ((LE32TOH(params[2]) - 1) & 0x3FF) + 1;
555 uint32_t h = (((LE32TOH(params[2]) >> 16) - 1) & 0x1FF) + 1;
556 uint16_t msb = gpu->ex_regs[6] << 15;
557 uint16_t *vram = gpu->vram;
561 *cpu_cycles += gput_copy(w, h);
562 if (sx == dx && sy == dy && msb == 0)
565 renderer_flush_queues();
567 if (unlikely((sx < dx && dx < sx + w) || sx + w > 1024 || dx + w > 1024 || msb))
569 for (y = 0; y < h; y++)
571 const uint16_t *src = VRAM_MEM_XY(vram, 0, (sy + y) & 0x1ff);
572 uint16_t *dst = VRAM_MEM_XY(vram, 0, (dy + y) & 0x1ff);
573 for (x = 0; x < w; x += ARRAY_SIZE(lbuf))
575 uint32_t x1, w1 = w - x;
576 if (w1 > ARRAY_SIZE(lbuf))
577 w1 = ARRAY_SIZE(lbuf);
578 for (x1 = 0; x1 < w1; x1++)
579 lbuf[x1] = src[(sx + x + x1) & 0x3ff];
580 for (x1 = 0; x1 < w1; x1++)
581 dst[(dx + x + x1) & 0x3ff] = lbuf[x1] | msb;
587 uint32_t sy1 = sy, dy1 = dy;
588 for (y = 0; y < h; y++, sy1++, dy1++) {
589 memcpy(VRAM_MEM_XY(vram, dx, dy1 & 0x1ff),
590 VRAM_MEM_XY(vram, sx, sy1 & 0x1ff), w * 2);
594 renderer_update_caches(dx, dy, w, h, 0);
597 static noinline int do_cmd_list_skip(struct psx_gpu *gpu, uint32_t *data,
598 int count, int *last_cmd)
600 int cmd = 0, pos = 0, len, dummy = 0, v;
603 gpu->frameskip.pending_fill[0] = 0;
605 while (pos < count && skip) {
606 uint32_t *list = data + pos;
607 cmd = LE32TOH(list[0]) >> 24;
608 len = 1 + cmd_lengths[cmd];
609 if (pos + len > count) {
611 break; // incomplete cmd
616 if ((LE32TOH(list[2]) & 0x3ff) > gpu->screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu->screen.h)
617 // clearing something large, don't skip
618 do_cmd_list(list, 3, &dummy, &dummy, &dummy);
620 memcpy(gpu->frameskip.pending_fill, list, 3 * 4);
626 gpu->ex_regs[1] &= ~0x1ff;
627 gpu->ex_regs[1] |= LE32TOH(list[4 + ((cmd >> 4) & 1)]) & 0x1ff;
630 for (v = 3; pos + v < count; v++)
632 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
638 for (v = 4; pos + v < count; v += 2)
640 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
646 if ((cmd & 0xf8) == 0xe0) {
647 gpu->ex_regs[cmd & 7] = LE32TOH(list[0]);
649 skip = decide_frameskip_allow(gpu);
653 if (0x80 <= cmd && cmd <= 0xdf)
659 renderer_sync_ecmds(gpu->ex_regs);
664 static noinline int do_cmd_buffer(struct psx_gpu *gpu, uint32_t *data, int count,
665 int *cycles_sum, int *cycles_last)
668 uint32_t old_e3 = gpu->ex_regs[3];
672 for (pos = 0; pos < count; )
674 if (gpu->dma.h && !gpu->dma_start.is_read) { // XXX: need to verify
675 // vram_dirty = 1; // handled in finish_vram_transfer()
676 pos += do_vram_io(gpu, data + pos, count - pos, 0);
681 cmd = LE32TOH(data[pos]) >> 24;
682 switch (cmd & 0xe0) {
685 if (gpu->ex_regs[cmd & 7] == LE32TOH(data[pos])) {
693 if (unlikely((pos+2) >= count)) {
694 // incomplete vram write/read cmd, can't consume yet
699 // consume vram write/read cmd
700 start_vram_transfer(gpu, LE32TOH(data[pos + 1]),
701 LE32TOH(data[pos + 2]), (cmd & 0xe0) == 0xc0);
705 if (unlikely((pos+3) >= count)) {
706 cmd = -1; // incomplete cmd, can't consume yet
710 *cycles_sum += *cycles_last;
712 do_vram_copy(gpu, data + pos + 1, cycles_last);
720 log_anomaly(gpu, "irq1?\n");
725 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
726 if (gpu->frameskip.active &&
727 (gpu->frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0)) {
728 pos += do_cmd_list_skip(gpu, data + pos, count - pos, &cmd);
731 pos += do_cmd_list(data + pos, count - pos, cycles_sum, cycles_last, &cmd);
740 gpu->status &= ~0x1fff;
741 gpu->status |= gpu->ex_regs[1] & 0x7ff;
742 gpu->status |= (gpu->ex_regs[6] & 3) << 11;
744 gpu->state.fb_dirty |= vram_dirty;
746 if (old_e3 != gpu->ex_regs[3])
747 decide_frameskip_allow(gpu);
752 static noinline void flush_cmd_buffer(struct psx_gpu *gpu)
756 left = do_cmd_buffer(gpu, gpu->cmd_buffer, gpu->cmd_len, &dummy, &cycles_last);
758 memmove(gpu->cmd_buffer, gpu->cmd_buffer + gpu->cmd_len - left, left * 4);
759 if (left != gpu->cmd_len) {
761 if (!gpu->dma.h && gpu->gpu_state_change)
762 gpu->gpu_state_change(PGS_PRIMITIVE_START, cycles_last);
766 void GPUwriteDataMem(uint32_t *mem, int count)
770 log_io(&gpu, "gpu_dma_write %p %d\n", mem, count);
772 if (unlikely(gpu.cmd_len > 0))
773 flush_cmd_buffer(&gpu);
775 left = do_cmd_buffer(&gpu, mem, count, &dummy, &dummy);
777 log_anomaly(&gpu, "GPUwriteDataMem: discarded %d/%d words\n", left, count);
780 void GPUwriteData(uint32_t data)
782 log_io(&gpu, "gpu_write %08x\n", data);
783 gpu.cmd_buffer[gpu.cmd_len++] = HTOLE32(data);
784 if (gpu.cmd_len >= CMD_BUFFER_LEN)
785 flush_cmd_buffer(&gpu);
788 long GPUdmaChain(uint32_t *rambase, uint32_t start_addr,
789 uint32_t *progress_addr, int32_t *cycles_last_cmd)
791 uint32_t addr, *list, ld_addr;
792 int len, left, count, ld_count = 32;
793 int cpu_cycles_sum = 0;
794 int cpu_cycles_last = 0;
796 preload(rambase + (start_addr & 0x1fffff) / 4);
798 if (unlikely(gpu.cmd_len > 0))
799 flush_cmd_buffer(&gpu);
801 log_io(&gpu, "gpu_dma_chain\n");
802 addr = ld_addr = start_addr & 0xffffff;
803 for (count = 0; (addr & 0x800000) == 0; count++)
805 list = rambase + (addr & 0x1fffff) / 4;
806 len = LE32TOH(list[0]) >> 24;
807 addr = LE32TOH(list[0]) & 0xffffff;
808 preload(rambase + (addr & 0x1fffff) / 4);
810 cpu_cycles_sum += 10;
812 cpu_cycles_sum += 5 + len;
814 log_io(&gpu, ".chain %08lx #%d+%d %u+%u\n",
815 (long)(list - rambase) * 4, len, gpu.cmd_len, cpu_cycles_sum, cpu_cycles_last);
816 if (unlikely(gpu.cmd_len > 0)) {
817 if (gpu.cmd_len + len > ARRAY_SIZE(gpu.cmd_buffer)) {
818 log_anomaly(&gpu, "cmd_buffer overflow, likely garbage commands\n");
821 memcpy(gpu.cmd_buffer + gpu.cmd_len, list + 1, len * 4);
823 flush_cmd_buffer(&gpu);
828 left = do_cmd_buffer(&gpu, list + 1, len, &cpu_cycles_sum, &cpu_cycles_last);
830 memcpy(gpu.cmd_buffer, list + 1 + len - left, left * 4);
832 log_anomaly(&gpu, "GPUdmaChain: %d/%d words left\n", left, len);
837 // hack for bios boot logo race (must be not too fast or too slow)
838 if (gpu.status & PSX_GPU_STATUS_DHEIGHT)
840 if (cpu_cycles_sum > 512)
843 if (addr == ld_addr) {
844 log_anomaly(&gpu, "GPUdmaChain: loop @ %08x, cnt=%u\n", addr, count);
847 if (count == ld_count) {
853 //printf(" -> %d %d\n", cpu_cycles_sum, cpu_cycles_last);
854 gpu.state.last_list.frame = *gpu.state.frame_count;
855 gpu.state.last_list.hcnt = *gpu.state.hcnt;
856 gpu.state.last_list.cycles = cpu_cycles_sum + cpu_cycles_last;
857 gpu.state.last_list.addr = start_addr;
860 *progress_addr = addr;
861 *cycles_last_cmd = cpu_cycles_last;
862 return cpu_cycles_sum;
865 void GPUreadDataMem(uint32_t *mem, int count)
867 log_io(&gpu, "gpu_dma_read %p %d\n", mem, count);
869 if (unlikely(gpu.cmd_len > 0))
870 flush_cmd_buffer(&gpu);
873 do_vram_io(&gpu, mem, count, 1);
876 uint32_t GPUreadData(void)
880 if (unlikely(gpu.cmd_len > 0))
881 flush_cmd_buffer(&gpu);
886 do_vram_io(&gpu, &ret, 1, 1);
890 log_io(&gpu, "gpu_read %08x\n", ret);
894 uint32_t GPUreadStatus(void)
898 if (unlikely(gpu.cmd_len > 0))
899 flush_cmd_buffer(&gpu);
902 log_io(&gpu, "gpu_read_status %08x\n", ret);
906 long GPUfreeze(uint32_t type, GPUFreeze_t *freeze)
913 flush_cmd_buffer(&gpu);
916 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
917 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
918 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
919 freeze->ulStatus = gpu.status;
923 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
924 //memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
925 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
926 gpu.status = freeze->ulStatus;
928 for (i = 8; i > 1; i--)
929 GPUwriteStatus((i << 24) | freeze->ulControl[i]);
930 renderer_sync_ecmds(gpu.ex_regs);
931 renderer_update_caches(0, 0, 1024, 512, 0);
938 void GPUupdateLace(void)
943 flush_cmd_buffer(&gpu);
944 renderer_flush_queues();
946 #ifndef RAW_FB_DISPLAY
947 if (gpu.status & PSX_GPU_STATUS_BLANKING) {
948 if (!gpu.state.blanked) {
950 gpu.state.blanked = 1;
951 gpu.state.fb_dirty = 1;
956 renderer_notify_update_lace(0);
958 if (!gpu.state.fb_dirty)
962 if (gpu.frameskip.set) {
963 if (!gpu.frameskip.frame_ready) {
964 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
966 gpu.frameskip.active = 0;
968 gpu.frameskip.frame_ready = 0;
971 updated = vout_update();
972 if (gpu.state.enhancement_active && !gpu.state.enhancement_was_active)
973 renderer_update_caches(0, 0, 1024, 512, 1);
974 gpu.state.enhancement_was_active = gpu.state.enhancement_active;
976 gpu.state.fb_dirty = 0;
977 gpu.state.blanked = 0;
979 renderer_notify_update_lace(1);
982 void GPUvBlank(int is_vblank, int lcf)
984 int interlace = gpu.state.allow_interlace
985 && (gpu.status & PSX_GPU_STATUS_INTERLACE)
986 && (gpu.status & PSX_GPU_STATUS_DHEIGHT);
987 // interlace doesn't look nice on progressive displays,
988 // so we have this "auto" mode here for games that don't read vram
989 if (gpu.state.allow_interlace == 2
990 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
994 if (interlace || interlace != gpu.state.old_interlace) {
995 gpu.state.old_interlace = interlace;
998 flush_cmd_buffer(&gpu);
999 renderer_flush_queues();
1000 renderer_set_interlace(interlace, !lcf);
1004 void GPUgetScreenInfo(int *y, int *base_hres)
1007 *base_hres = gpu.screen.vres;
1008 if (gpu.status & PSX_GPU_STATUS_DHEIGHT)
1012 void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
1014 gpu.frameskip.set = cbs->frameskip;
1015 gpu.frameskip.advice = &cbs->fskip_advice;
1016 gpu.frameskip.force = &cbs->fskip_force;
1017 gpu.frameskip.dirty = (void *)&cbs->fskip_dirty;
1018 gpu.frameskip.active = 0;
1019 gpu.frameskip.frame_ready = 1;
1020 gpu.state.hcnt = (uint32_t *)cbs->gpu_hcnt;
1021 gpu.state.frame_count = (uint32_t *)cbs->gpu_frame_count;
1022 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
1023 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
1024 gpu.state.screen_centering_type_default = cbs->screen_centering_type_default;
1025 if (gpu.state.screen_centering_type != cbs->screen_centering_type
1026 || gpu.state.screen_centering_x != cbs->screen_centering_x
1027 || gpu.state.screen_centering_y != cbs->screen_centering_y
1028 || gpu.state.screen_centering_h_adj != cbs->screen_centering_h_adj
1029 || gpu.state.show_overscan != cbs->show_overscan) {
1030 gpu.state.screen_centering_type = cbs->screen_centering_type;
1031 gpu.state.screen_centering_x = cbs->screen_centering_x;
1032 gpu.state.screen_centering_y = cbs->screen_centering_y;
1033 gpu.state.screen_centering_h_adj = cbs->screen_centering_h_adj;
1034 gpu.state.show_overscan = cbs->show_overscan;
1036 update_height(&gpu);
1039 gpu.mmap = cbs->mmap;
1040 gpu.munmap = cbs->munmap;
1041 gpu.gpu_state_change = cbs->gpu_state_change;
1043 // delayed vram mmap
1044 if (gpu.vram == NULL)
1047 if (cbs->pl_vout_set_raw_vram)
1048 cbs->pl_vout_set_raw_vram(gpu.vram);
1049 renderer_set_config(cbs);
1050 vout_set_config(cbs);
1053 // vim:shiftwidth=2:expandtab