#include "gpu.h"
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#ifdef __GNUC__
#define unlikely(x) __builtin_expect((x), 0)
+#define preload __builtin_prefetch
#define noinline __attribute__((noinline))
+#else
+#define unlikely(x)
+#define preload(...)
+#define noinline
+#endif
#define gpu_log(fmt, ...) \
printf("%d:%03d: " fmt, *gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__)
memset(gpu.regs, 0, sizeof(gpu.regs));
for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
gpu.ex_regs[i] = (0xe0 + i) << 24;
- gpu.status.reg = 0x14802000;
+ gpu.status = 0x14802000;
gpu.gp0 = 0;
gpu.regs[3] = 1;
gpu.screen.hres = gpu.screen.w = 256;
static noinline void update_height(void)
{
+ // TODO: emulate this properly..
int sh = gpu.screen.y2 - gpu.screen.y1;
- if (gpu.status.dheight)
+ if (gpu.status & PSX_GPU_STATUS_DHEIGHT)
sh *= 2;
- if (sh <= 0)
+ if (sh <= 0 || sh > gpu.screen.vres)
sh = gpu.screen.vres;
gpu.screen.h = sh;
// but not for interlace since it'll most likely always do that
uint32_t x = cmd_e3 & 0x3ff;
uint32_t y = (cmd_e3 >> 10) & 0x3ff;
- gpu.frameskip.allow = gpu.status.interlace ||
+ gpu.frameskip.allow = (gpu.status & PSX_GPU_STATUS_INTERLACE) ||
(uint32_t)(x - gpu.screen.x) >= (uint32_t)gpu.screen.w ||
(uint32_t)(y - gpu.screen.y) >= (uint32_t)gpu.screen.h;
return gpu.frameskip.allow;
do_cmd_reset();
break;
case 0x03:
- gpu.status.blanking = data & 1;
+ if (data & 1)
+ gpu.status |= PSX_GPU_STATUS_BLANKING;
+ else
+ gpu.status &= ~PSX_GPU_STATUS_BLANKING;
break;
case 0x04:
- gpu.status.dma = data & 3;
+ gpu.status &= ~PSX_GPU_STATUS_DMA_MASK;
+ gpu.status |= PSX_GPU_STATUS_DMA(data & 3);
break;
case 0x05:
gpu.screen.x = data & 0x3ff;
update_height();
break;
case 0x08:
- gpu.status.reg = (gpu.status.reg & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
- gpu.screen.hres = hres[(gpu.status.reg >> 16) & 7];
- gpu.screen.vres = vres[(gpu.status.reg >> 19) & 3];
+ gpu.status = (gpu.status & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
+ gpu.screen.hres = hres[(gpu.status >> 16) & 7];
+ gpu.screen.vres = vres[(gpu.status >> 19) & 3];
update_width();
update_height();
renderer_notify_res_change();
renderer_flush_queues();
if (is_read) {
- gpu.status.img = 1;
+ gpu.status |= PSX_GPU_STATUS_IMG;
// XXX: wrong for width 1
memcpy(&gpu.gp0, VRAM_MEM_XY(gpu.dma.x, gpu.dma.y), 4);
gpu.state.last_vram_read_frame = *gpu.state.frame_count;
static void finish_vram_transfer(int is_read)
{
if (is_read)
- gpu.status.img = 0;
+ gpu.status &= ~PSX_GPU_STATUS_IMG;
else
renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
gpu.dma_start.w, gpu.dma_start.h);
cmd = data[pos] >> 24;
if (0xa0 <= cmd && cmd <= 0xdf) {
+ if (unlikely((pos+2) >= count)) {
+ // incomplete vram write/read cmd, can't consume yet
+ cmd = -1;
+ break;
+ }
+
// consume vram write/read cmd
start_vram_transfer(data[pos + 1], data[pos + 2], (cmd & 0xe0) == 0xc0);
pos += 3;
break;
}
- gpu.status.reg &= ~0x1fff;
- gpu.status.reg |= gpu.ex_regs[1] & 0x7ff;
- gpu.status.reg |= (gpu.ex_regs[6] & 3) << 11;
+ gpu.status &= ~0x1fff;
+ gpu.status |= gpu.ex_regs[1] & 0x7ff;
+ gpu.status |= (gpu.ex_regs[6] & 3) << 11;
gpu.state.fb_dirty |= vram_dirty;
long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
{
- uint32_t addr, *list;
- uint32_t *llist_entry = NULL;
+ uint32_t addr, *list, ld_addr = 0;
int len, left, count;
long cpu_cycles = 0;
+ preload(rambase + (start_addr & 0x1fffff) / 4);
+
if (unlikely(gpu.cmd_len > 0))
flush_cmd_buffer();
- // ff7 sends it's main list twice, detect this
- if (*gpu.state.frame_count == gpu.state.last_list.frame &&
- *gpu.state.hcnt - gpu.state.last_list.hcnt <= 1 &&
- gpu.state.last_list.cycles > 2048)
- {
- llist_entry = rambase + (gpu.state.last_list.addr & 0x1fffff) / 4;
- *llist_entry |= 0x800000;
- }
-
log_io("gpu_dma_chain\n");
addr = start_addr & 0xffffff;
- for (count = 0; addr != 0xffffff; count++)
+ for (count = 0; (addr & 0x800000) == 0; count++)
{
list = rambase + (addr & 0x1fffff) / 4;
len = list[0] >> 24;
addr = list[0] & 0xffffff;
+ preload(rambase + (addr & 0x1fffff) / 4);
+
cpu_cycles += 10;
if (len > 0)
cpu_cycles += 5 + len;
log_io(".chain %08x #%d\n", (list - rambase) * 4, len);
- // loop detection marker
- // (bit23 set causes DMA error on real machine, so
- // unlikely to be ever set by the game)
- list[0] |= 0x800000;
-
if (len) {
left = do_cmd_buffer(list + 1, len);
if (left)
log_anomaly("GPUdmaChain: discarded %d/%d words\n", left, len);
}
- if (addr & 0x800000)
- break;
+ #define LD_THRESHOLD (8*1024)
+ if (count >= LD_THRESHOLD) {
+ if (count == LD_THRESHOLD) {
+ ld_addr = addr;
+ continue;
+ }
+
+ // loop detection marker
+ // (bit23 set causes DMA error on real machine, so
+ // unlikely to be ever set by the game)
+ list[0] |= 0x800000;
+ }
}
- // remove loop detection markers
- addr = start_addr & 0x1fffff;
- while (count-- > 0) {
- list = rambase + addr / 4;
- addr = list[0] & 0x1fffff;
- list[0] &= ~0x800000;
+ if (ld_addr != 0) {
+ // remove loop detection markers
+ count -= LD_THRESHOLD + 2;
+ addr = ld_addr & 0x1fffff;
+ while (count-- > 0) {
+ list = rambase + addr / 4;
+ addr = list[0] & 0x1fffff;
+ list[0] &= ~0x800000;
+ }
}
- if (llist_entry)
- *llist_entry &= ~0x800000;
gpu.state.last_list.frame = *gpu.state.frame_count;
gpu.state.last_list.hcnt = *gpu.state.hcnt;
if (unlikely(gpu.cmd_len > 0))
flush_cmd_buffer();
- ret = gpu.status.reg;
+ ret = gpu.status;
log_io("gpu_read_status %08x\n", ret);
return ret;
}
memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
- freeze->ulStatus = gpu.status.reg;
+ freeze->ulStatus = gpu.status;
break;
case 0: // load
memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
- gpu.status.reg = freeze->ulStatus;
+ gpu.status = freeze->ulStatus;
gpu.cmd_len = 0;
for (i = 8; i > 0; i--) {
gpu.regs[i] ^= 1; // avoid reg change detection
flush_cmd_buffer();
renderer_flush_queues();
- if (gpu.status.blanking) {
+ if (gpu.status & PSX_GPU_STATUS_BLANKING) {
if (!gpu.state.blanked) {
vout_blank();
gpu.state.blanked = 1;
void GPUvBlank(int is_vblank, int lcf)
{
int interlace = gpu.state.allow_interlace
- && gpu.status.interlace && gpu.status.dheight;
+ && (gpu.status & PSX_GPU_STATUS_INTERLACE)
+ && (gpu.status & PSX_GPU_STATUS_DHEIGHT);
// interlace doesn't look nice on progressive displays,
// so we have this "auto" mode here for games that don't read vram
if (gpu.state.allow_interlace == 2