From d02ab9fc4a1557c10c6b6d545c11df34c3daa173 Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 13 Dec 2023 22:14:54 +0200 Subject: [PATCH] gpu: rework dma vs busy timing maybe should implement actual fifo instead someday libretro/pcsx_rearmed#809 --- frontend/plugin.c | 2 +- libpcsxcore/plugins.h | 2 +- libpcsxcore/psxdma.c | 33 +++++--- plugins/dfxvideo/gpulib_if.c | 54 ++++++------- plugins/gpu-gles/gpulib_if.c | 3 +- plugins/gpu_neon/psx_gpu/psx_gpu.h | 2 +- plugins/gpu_neon/psx_gpu/psx_gpu_parse.c | 96 ++++++++++++------------ plugins/gpu_neon/psx_gpu_if.c | 11 ++- plugins/gpu_unai/gpulib_if.cpp | 56 +++++++------- plugins/gpulib/gpu.c | 44 ++++++----- plugins/gpulib/gpu.h | 6 +- plugins/gpulib/gpu_timing.h | 6 +- 12 files changed, 175 insertions(+), 140 deletions(-) diff --git a/frontend/plugin.c b/frontend/plugin.c index 02354639..c400165f 100644 --- a/frontend/plugin.c +++ b/frontend/plugin.c @@ -122,7 +122,7 @@ extern void GPUwriteDataMem(uint32_t *, int); extern uint32_t GPUreadStatus(void); extern uint32_t GPUreadData(void); extern void GPUreadDataMem(uint32_t *, int); -extern long GPUdmaChain(uint32_t *, uint32_t, uint32_t *); +extern long GPUdmaChain(uint32_t *, uint32_t, uint32_t *, int32_t *); extern void GPUupdateLace(void); extern long GPUfreeze(uint32_t, void *); extern void GPUvBlank(int, int); diff --git a/libpcsxcore/plugins.h b/libpcsxcore/plugins.h index d080baed..df8ed87d 100644 --- a/libpcsxcore/plugins.h +++ b/libpcsxcore/plugins.h @@ -58,7 +58,7 @@ typedef void (CALLBACK* GPUwriteDataMem)(uint32_t *, int); typedef uint32_t (CALLBACK* GPUreadStatus)(void); typedef uint32_t (CALLBACK* GPUreadData)(void); typedef void (CALLBACK* GPUreadDataMem)(uint32_t *, int); -typedef long (CALLBACK* GPUdmaChain)(uint32_t *,uint32_t, uint32_t *); +typedef long (CALLBACK* GPUdmaChain)(uint32_t *, uint32_t, uint32_t *, int32_t *); typedef void (CALLBACK* GPUupdateLace)(void); typedef void (CALLBACK* GPUmakeSnapshot)(void); typedef void (CALLBACK* GPUkeypressed)(int); diff --git a/libpcsxcore/psxdma.c b/libpcsxcore/psxdma.c index 3ec42ede..25ee2f0d 100644 --- a/libpcsxcore/psxdma.c +++ b/libpcsxcore/psxdma.c @@ -90,6 +90,7 @@ void psxDma4(u32 madr, u32 bcr, u32 chcr) { // SPU DMA_INTERRUPT(4); } +#if 0 // Taken from PEOPS SOFTGPU static inline boolean CheckForEndlessLoop(u32 laddr, u32 *lUsedAddr) { if (laddr == lUsedAddr[1]) return TRUE; @@ -130,11 +131,12 @@ static u32 gpuDmaChainSize(u32 addr) { return size; } +#endif void psxDma2(u32 madr, u32 bcr, u32 chcr) { // GPU - u32 *ptr, madr_next, *madr_next_p, size; + u32 *ptr, madr_next, *madr_next_p; u32 words, words_left, words_max, words_copy; - int do_walking; + int cycles_sum, cycles_last_cmd = 0, do_walking; madr &= ~3; switch (chcr) { @@ -195,18 +197,19 @@ void psxDma2(u32 madr, u32 bcr, u32 chcr) { // GPU do_walking = Config.hacks.gpu_slow_list_walking; madr_next_p = do_walking ? &madr_next : NULL; - size = GPU_dmaChain((u32 *)psxM, madr & 0x1fffff, madr_next_p); - if ((int)size <= 0) - size = gpuDmaChainSize(madr); + cycles_sum = GPU_dmaChain((u32 *)psxM, madr & 0x1fffff, + madr_next_p, &cycles_last_cmd); HW_DMA2_MADR = SWAPu32(madr_next); // a hack for Judge Dredd which is annoyingly sensitive to timing if (Config.hacks.gpu_timing1024) - size = 1024; + cycles_sum = 1024; - psxRegs.gpuIdleAfter = psxRegs.cycle + size + 16; - set_event(PSXINT_GPUDMA, size); + psxRegs.gpuIdleAfter = psxRegs.cycle + cycles_sum + cycles_last_cmd; + set_event(PSXINT_GPUDMA, cycles_sum); + //printf("%u dma2cf: %d,%d %08x\n", psxRegs.cycle, cycles_sum, + // cycles_last_cmd, HW_DMA2_MADR); return; default: @@ -221,11 +224,17 @@ void psxDma2(u32 madr, u32 bcr, u32 chcr) { // GPU void gpuInterrupt() { if (HW_DMA2_CHCR == SWAP32(0x01000401) && !(HW_DMA2_MADR & SWAP32(0x800000))) { - u32 size, madr_next = 0xffffff, madr = SWAPu32(HW_DMA2_MADR); - size = GPU_dmaChain((u32 *)psxM, madr & 0x1fffff, &madr_next); + u32 madr_next = 0xffffff, madr = SWAPu32(HW_DMA2_MADR); + int cycles_sum, cycles_last_cmd = 0; + cycles_sum = GPU_dmaChain((u32 *)psxM, madr & 0x1fffff, + &madr_next, &cycles_last_cmd); HW_DMA2_MADR = SWAPu32(madr_next); - psxRegs.gpuIdleAfter = psxRegs.cycle + size + 64; - set_event(PSXINT_GPUDMA, size); + if ((s32)(psxRegs.gpuIdleAfter - psxRegs.cycle) > 0) + cycles_sum += psxRegs.gpuIdleAfter - psxRegs.cycle; + psxRegs.gpuIdleAfter = psxRegs.cycle + cycles_sum + cycles_last_cmd; + set_event(PSXINT_GPUDMA, cycles_sum); + //printf("%u dma2cn: %d,%d %08x\n", psxRegs.cycle, cycles_sum, + // cycles_last_cmd, HW_DMA2_MADR); return; } if (HW_DMA2_CHCR & SWAP32(0x01000000)) diff --git a/plugins/dfxvideo/gpulib_if.c b/plugins/dfxvideo/gpulib_if.c index ed8d9f2b..8a3f2f9a 100644 --- a/plugins/dfxvideo/gpulib_if.c +++ b/plugins/dfxvideo/gpulib_if.c @@ -322,12 +322,13 @@ void renderer_notify_scanout_change(int x, int y) #include "../gpulib/gpu_timing.h" extern const unsigned char cmd_lengths[256]; -int do_cmd_list(uint32_t *list, int list_len, int *cpu_cycles_out, int *last_cmd) +int do_cmd_list(uint32_t *list, int list_len, + int *cycles_sum_out, int *cycles_last, int *last_cmd) { + int cpu_cycles_sum = 0, cpu_cycles = *cycles_last; unsigned int cmd = 0, len; uint32_t *list_start = list; uint32_t *list_end = list + list_len; - u32 cpu_cycles = 0; for (; list < list_end; list += 1 + len) { @@ -357,7 +358,7 @@ int do_cmd_list(uint32_t *list, int list_len, int *cpu_cycles_out, int *last_cmd while(1) { - cpu_cycles += gput_line(0); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0)); if(list_position >= list_end) { cmd = -1; @@ -382,7 +383,7 @@ int do_cmd_list(uint32_t *list, int list_len, int *cpu_cycles_out, int *last_cmd while(1) { - cpu_cycles += gput_line(0); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0)); if(list_position >= list_end) { cmd = -1; @@ -403,8 +404,8 @@ int do_cmd_list(uint32_t *list, int list_len, int *cpu_cycles_out, int *last_cmd #ifdef TEST case 0xA0: // sys -> vid { - u32 load_width = LE2HOST32(slist[4]); - u32 load_height = LE2HOST32(slist[5]); + u32 load_width = LE2HOST16(slist[4]); + u32 load_height = LE2HOST16(slist[5]); u32 load_size = load_width * load_height; len += load_size / 2; @@ -414,32 +415,32 @@ int do_cmd_list(uint32_t *list, int list_len, int *cpu_cycles_out, int *last_cmd // timing case 0x02: - cpu_cycles += gput_fill(LE2HOST32(slist[4]) & 0x3ff, - LE2HOST32(slist[5]) & 0x1ff); + gput_sum(cpu_cycles_sum, cpu_cycles, + gput_fill(LE2HOST16(slist[4]) & 0x3ff, LE2HOST16(slist[5]) & 0x1ff)); break; - case 0x20 ... 0x23: cpu_cycles += gput_poly_base(); break; - case 0x24 ... 0x27: cpu_cycles += gput_poly_base_t(); break; - case 0x28 ... 0x2B: cpu_cycles += gput_quad_base(); break; - case 0x2C ... 0x2F: cpu_cycles += gput_quad_base_t(); break; - case 0x30 ... 0x33: cpu_cycles += gput_poly_base_g(); break; - case 0x34 ... 0x37: cpu_cycles += gput_poly_base_gt(); break; - case 0x38 ... 0x3B: cpu_cycles += gput_quad_base_g(); break; - case 0x3C ... 0x3F: cpu_cycles += gput_quad_base_gt(); break; - case 0x40 ... 0x47: cpu_cycles += gput_line(0); break; - case 0x50 ... 0x57: cpu_cycles += gput_line(0); break; + case 0x20 ... 0x23: gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base()); break; + case 0x24 ... 0x27: gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_t()); break; + case 0x28 ... 0x2B: gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base()); break; + case 0x2C ... 0x2F: gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_t()); break; + case 0x30 ... 0x33: gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_g()); break; + case 0x34 ... 0x37: gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_gt()); break; + case 0x38 ... 0x3B: gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_g()); break; + case 0x3C ... 0x3F: gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_gt()); break; + case 0x40 ... 0x47: gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0)); break; + case 0x50 ... 0x57: gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0)); break; case 0x60 ... 0x63: - cpu_cycles += gput_sprite(LE2HOST32(slist[4]) & 0x3ff, - LE2HOST32(slist[5]) & 0x1ff); + gput_sum(cpu_cycles_sum, cpu_cycles, + gput_sprite(LE2HOST16(slist[4]) & 0x3ff, LE2HOST16(slist[5]) & 0x1ff)); break; case 0x64 ... 0x67: - cpu_cycles += gput_sprite(LE2HOST32(slist[6]) & 0x3ff, - LE2HOST32(slist[7]) & 0x1ff); + gput_sum(cpu_cycles_sum, cpu_cycles, + gput_sprite(LE2HOST16(slist[6]) & 0x3ff, LE2HOST16(slist[7]) & 0x1ff)); break; - case 0x68 ... 0x6B: cpu_cycles += gput_sprite(1, 1); break; + case 0x68 ... 0x6B: gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(1, 1)); break; case 0x70 ... 0x73: - case 0x74 ... 0x77: cpu_cycles += gput_sprite(8, 8); break; + case 0x74 ... 0x77: gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(8, 8)); break; case 0x78 ... 0x7B: - case 0x7C ... 0x7F: cpu_cycles += gput_sprite(16, 16); break; + case 0x7C ... 0x7F: gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(16, 16)); break; } } @@ -447,7 +448,8 @@ breakloop: gpu.ex_regs[1] &= ~0x1ff; gpu.ex_regs[1] |= lGPUstatusRet & 0x1ff; - *cpu_cycles_out += cpu_cycles; + *cycles_sum_out += cpu_cycles_sum; + *cycles_last = cpu_cycles; *last_cmd = cmd; return list - list_start; } diff --git a/plugins/gpu-gles/gpulib_if.c b/plugins/gpu-gles/gpulib_if.c index 561b7cf8..ab95c641 100644 --- a/plugins/gpu-gles/gpulib_if.c +++ b/plugins/gpu-gles/gpulib_if.c @@ -521,7 +521,8 @@ void renderer_notify_scanout_change(int x, int y) extern const unsigned char cmd_lengths[256]; // XXX: mostly dupe code from soft peops -int do_cmd_list(unsigned int *list, int list_len, int *cycles, int *last_cmd) +int do_cmd_list(uint32_t *list, int list_len, + int *cycles_sum_out, int *cycles_last, int *last_cmd) { unsigned int cmd, len; unsigned int *list_start = list; diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.h b/plugins/gpu_neon/psx_gpu/psx_gpu.h index 764c2e70..88f3df51 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.h +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.h @@ -256,7 +256,7 @@ void flush_render_block_buffer(psx_gpu_struct *psx_gpu); void initialize_psx_gpu(psx_gpu_struct *psx_gpu, u16 *vram); u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, - s32 *cpu_cycles, u32 *last_command); + s32 *cpu_cycles_sum_out, s32 *cpu_cycles_last, u32 *last_command); void triangle_benchmark(psx_gpu_struct *psx_gpu); diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c index af26fa37..d401522a 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c @@ -252,10 +252,11 @@ static void do_fill(psx_gpu_struct *psx_gpu, u32 x, u32 y, #endif u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, - s32 *cpu_cycles_out, u32 *last_command) + s32 *cpu_cycles_sum_out, s32 *cpu_cycles_last, u32 *last_command) { vertex_struct vertexes[4] __attribute__((aligned(16))) = {}; - u32 current_command = 0, command_length, cpu_cycles = 0; + u32 current_command = 0, command_length; + u32 cpu_cycles_sum = 0, cpu_cycles = *cpu_cycles_last; u32 *list_start = list; u32 *list_end = list + (size / 4); @@ -284,7 +285,7 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 color = list[0] & 0xFFFFFF; do_fill(psx_gpu, x, y, width, height, color); - cpu_cycles += gput_fill(width, height); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_fill(width, height)); break; } @@ -297,7 +298,7 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, get_vertex_data_xy(2, 6); render_triangle(psx_gpu, vertexes, current_command); - cpu_cycles += gput_poly_base(); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base()); break; } @@ -312,7 +313,7 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, get_vertex_data_xy_uv(2, 10); render_triangle(psx_gpu, vertexes, current_command); - cpu_cycles += gput_poly_base_t(); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_t()); break; } @@ -327,7 +328,7 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, render_triangle(psx_gpu, vertexes, current_command); render_triangle(psx_gpu, &(vertexes[1]), current_command); - cpu_cycles += gput_quad_base(); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base()); break; } @@ -344,7 +345,7 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, render_triangle(psx_gpu, vertexes, current_command); render_triangle(psx_gpu, &(vertexes[1]), current_command); - cpu_cycles += gput_quad_base_t(); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_t()); break; } @@ -355,7 +356,7 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, get_vertex_data_xy_rgb(2, 8); render_triangle(psx_gpu, vertexes, current_command); - cpu_cycles += gput_poly_base_g(); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_g()); break; } @@ -369,7 +370,7 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, get_vertex_data_xy_uv_rgb(2, 12); render_triangle(psx_gpu, vertexes, current_command); - cpu_cycles += gput_poly_base_gt(); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_gt()); break; } @@ -382,7 +383,7 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, render_triangle(psx_gpu, vertexes, current_command); render_triangle(psx_gpu, &(vertexes[1]), current_command); - cpu_cycles += gput_quad_base_g(); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_g()); break; } @@ -398,7 +399,7 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, render_triangle(psx_gpu, vertexes, current_command); render_triangle(psx_gpu, &(vertexes[1]), current_command); - cpu_cycles += gput_quad_base_gt(); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_gt()); break; } @@ -410,7 +411,7 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, vertexes[1].y = list_s16[5] + psx_gpu->offset_y; render_line(psx_gpu, vertexes, current_command, list[0], 0); - cpu_cycles += gput_line(0); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0)); break; } @@ -432,7 +433,7 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, vertexes[1].y = (xy >> 16) + psx_gpu->offset_y; render_line(psx_gpu, vertexes, current_command, list[0], 0); - cpu_cycles += gput_line(0); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0)); list_position++; num_vertexes++; @@ -467,7 +468,7 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, vertexes[1].y = list_s16[7] + psx_gpu->offset_y; render_line(psx_gpu, vertexes, current_command, 0, 0); - cpu_cycles += gput_line(0); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0)); break; } @@ -498,7 +499,7 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, vertexes[1].y = (xy >> 16) + psx_gpu->offset_y; render_line(psx_gpu, vertexes, current_command, 0, 0); - cpu_cycles += gput_line(0); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0)); list_position += 2; num_vertexes++; @@ -527,7 +528,7 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, render_sprite(psx_gpu, x, y, 0, 0, &width, &height, current_command, list[0]); - cpu_cycles += gput_sprite(width, height); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(width, height)); break; } @@ -543,7 +544,7 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, render_sprite(psx_gpu, x, y, uv & 0xFF, (uv >> 8) & 0xFF, &width, &height, current_command, list[0]); - cpu_cycles += gput_sprite(width, height); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(width, height)); break; } @@ -555,7 +556,7 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, render_sprite(psx_gpu, x, y, 0, 0, &width, &height, current_command, list[0]); - cpu_cycles += gput_sprite(1, 1); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(1, 1)); break; } @@ -567,7 +568,7 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, render_sprite(psx_gpu, x, y, 0, 0, &width, &height, current_command, list[0]); - cpu_cycles += gput_sprite(width, height); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(width, height)); break; } @@ -582,7 +583,7 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, render_sprite(psx_gpu, x, y, uv & 0xFF, (uv >> 8) & 0xFF, &width, &height, current_command, list[0]); - cpu_cycles += gput_sprite(width, height); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(width, height)); break; } @@ -594,7 +595,7 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, render_sprite(psx_gpu, x, y, 0, 0, &width, &height, current_command, list[0]); - cpu_cycles += gput_sprite(width, height); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(width, height)); break; } @@ -609,7 +610,7 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, render_sprite(psx_gpu, x, y, uv & 0xFF, (uv >> 8) & 0xFF, &width, &height, current_command, list[0]); - cpu_cycles += gput_sprite(width, height); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(width, height)); break; } @@ -789,7 +790,8 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, } breakloop: - *cpu_cycles_out += cpu_cycles; + *cpu_cycles_sum_out += cpu_cycles_sum; + *cpu_cycles_last = cpu_cycles; *last_command = current_command; return list - list_start; } @@ -1202,10 +1204,11 @@ static void do_sprite_enhanced(psx_gpu_struct *psx_gpu, int x, int y, #endif u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, - s32 *cpu_cycles_out, u32 *last_command) + s32 *cpu_cycles_sum_out, s32 *cpu_cycles_last, u32 *last_command) { vertex_struct vertexes[4] __attribute__((aligned(16))) = {}; - u32 current_command = 0, command_length, cpu_cycles = 0; + u32 current_command = 0, command_length; + u32 cpu_cycles_sum = 0, cpu_cycles = *cpu_cycles_last; u32 *list_start = list; u32 *list_end = list + (size / 4); @@ -1244,7 +1247,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, x &= ~0xF; width = ((width + 0xF) & ~0xF); - cpu_cycles += gput_fill(width, height); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_fill(width, height)); if (width == 0 || height == 0) break; @@ -1275,7 +1278,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, get_vertex_data_xy(2, 6); do_triangle_enhanced(psx_gpu, vertexes, current_command); - cpu_cycles += gput_poly_base(); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base()); break; } @@ -1290,7 +1293,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, get_vertex_data_xy_uv(2, 10); do_triangle_enhanced(psx_gpu, vertexes, current_command); - cpu_cycles += gput_poly_base_t(); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_t()); break; } @@ -1304,7 +1307,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, get_vertex_data_xy(3, 8); do_quad_enhanced(psx_gpu, vertexes, current_command); - cpu_cycles += gput_quad_base(); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base()); break; } @@ -1321,7 +1324,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, uv_hack(vertexes, 4); do_quad_enhanced(psx_gpu, vertexes, current_command); - cpu_cycles += gput_quad_base_t(); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_t()); break; } @@ -1332,7 +1335,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, get_vertex_data_xy_rgb(2, 8); do_triangle_enhanced(psx_gpu, vertexes, current_command); - cpu_cycles += gput_poly_base_g(); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_g()); break; } @@ -1346,7 +1349,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, get_vertex_data_xy_uv_rgb(2, 12); do_triangle_enhanced(psx_gpu, vertexes, current_command); - cpu_cycles += gput_poly_base_gt(); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_gt()); break; } @@ -1358,7 +1361,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, get_vertex_data_xy_rgb(3, 12); do_quad_enhanced(psx_gpu, vertexes, current_command); - cpu_cycles += gput_quad_base_g(); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_g()); break; } @@ -1374,7 +1377,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, uv_hack(vertexes, 4); do_quad_enhanced(psx_gpu, vertexes, current_command); - cpu_cycles += gput_quad_base_gt(); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_gt()); break; } @@ -1388,7 +1391,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, render_line(psx_gpu, vertexes, current_command, list[0], 0); if (enhancement_enable(psx_gpu)) render_line(psx_gpu, vertexes, current_command, list[0], 1); - cpu_cycles += gput_line(0); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0)); break; } @@ -1413,7 +1416,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, render_line(psx_gpu, vertexes, current_command, list[0], 0); if (enhancement_enable(psx_gpu)) render_line(psx_gpu, vertexes, current_command, list[0], 1); - cpu_cycles += gput_line(0); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0)); list_position++; num_vertexes++; @@ -1450,7 +1453,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, render_line(psx_gpu, vertexes, current_command, 0, 0); if (enhancement_enable(psx_gpu)) render_line(psx_gpu, vertexes, current_command, 0, 1); - cpu_cycles += gput_line(0); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0)); break; } @@ -1484,7 +1487,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, render_line(psx_gpu, vertexes, current_command, 0, 0); if (enhancement_enable(psx_gpu)) render_line(psx_gpu, vertexes, current_command, 0, 1); - cpu_cycles += gput_line(0); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0)); list_position += 2; num_vertexes++; @@ -1516,7 +1519,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, if (check_enhanced_range(psx_gpu, x, x + width)) do_sprite_enhanced(psx_gpu, x, y, 0, 0, width, height, list[0]); - cpu_cycles += gput_sprite(width, height); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(width, height)); break; } @@ -1536,7 +1539,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, if (check_enhanced_range(psx_gpu, x, x + width)) do_sprite_enhanced(psx_gpu, x, y, u, v, width, height, list[0]); - cpu_cycles += gput_sprite(width, height); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(width, height)); break; } @@ -1551,7 +1554,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, if (check_enhanced_range(psx_gpu, x, x + 1)) do_sprite_enhanced(psx_gpu, x, y, 0, 0, width, height, list[0]); - cpu_cycles += gput_sprite(1, 1); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(1, 1)); break; } @@ -1566,7 +1569,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, if (check_enhanced_range(psx_gpu, x, x + 8)) do_sprite_enhanced(psx_gpu, x, y, 0, 0, width, height, list[0]); - cpu_cycles += gput_sprite(width, height); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(width, height)); break; } @@ -1585,7 +1588,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, if (check_enhanced_range(psx_gpu, x, x + 8)) do_sprite_enhanced(psx_gpu, x, y, u, v, width, height, list[0]); - cpu_cycles += gput_sprite(width, height); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(width, height)); break; } @@ -1600,7 +1603,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, if (check_enhanced_range(psx_gpu, x, x + 16)) do_sprite_enhanced(psx_gpu, x, y, 0, 0, width, height, list[0]); - cpu_cycles += gput_sprite(width, height); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(width, height)); break; } @@ -1619,7 +1622,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, if (check_enhanced_range(psx_gpu, x, x + 16)) do_sprite_enhanced(psx_gpu, x, y, u, v, width, height, list[0]); - cpu_cycles += gput_sprite(width, height); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(width, height)); break; } @@ -1773,7 +1776,8 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, enhancement_disable(); breakloop: - *cpu_cycles_out += cpu_cycles; + *cpu_cycles_sum_out += cpu_cycles_sum; + *cpu_cycles_last = cpu_cycles; *last_command = current_command; return list - list_start; } diff --git a/plugins/gpu_neon/psx_gpu_if.c b/plugins/gpu_neon/psx_gpu_if.c index 3f45b0e6..313f1f01 100644 --- a/plugins/gpu_neon/psx_gpu_if.c +++ b/plugins/gpu_neon/psx_gpu_if.c @@ -38,7 +38,8 @@ sync_enhancement_buffers(int x, int y, int w, int h); static psx_gpu_struct egpu __attribute__((aligned(256))); -int do_cmd_list(uint32_t *list, int count, int *cycles, int *last_cmd) +int do_cmd_list(uint32_t *list, int count, + int *cycles_sum, int *cycles_last, int *last_cmd) { int ret; @@ -48,9 +49,11 @@ int do_cmd_list(uint32_t *list, int count, int *cycles, int *last_cmd) #endif if (gpu.state.enhancement_active) - ret = gpu_parse_enhanced(&egpu, list, count * 4, cycles, (u32 *)last_cmd); + ret = gpu_parse_enhanced(&egpu, list, count * 4, + cycles_sum, cycles_last, (u32 *)last_cmd); else - ret = gpu_parse(&egpu, list, count * 4, cycles, (u32 *)last_cmd); + ret = gpu_parse(&egpu, list, count * 4, + cycles_sum, cycles_last, (u32 *)last_cmd); #if defined(__arm__) && defined(NEON_BUILD) && !defined(SIMD_BUILD) __asm__ __volatile__("":::"q4","q5","q6","q7"); @@ -156,7 +159,7 @@ void renderer_sync_ecmds(uint32_t *ecmds) { s32 dummy0 = 0; u32 dummy1 = 0; - gpu_parse(&egpu, ecmds + 1, 6 * 4, &dummy0, &dummy1); + gpu_parse(&egpu, ecmds + 1, 6 * 4, &dummy0, &dummy0, &dummy1); } void renderer_update_caches(int x, int y, int w, int h, int state_changed) diff --git a/plugins/gpu_unai/gpulib_if.cpp b/plugins/gpu_unai/gpulib_if.cpp index 45c73a73..6816e2bd 100644 --- a/plugins/gpu_unai/gpulib_if.cpp +++ b/plugins/gpu_unai/gpulib_if.cpp @@ -393,13 +393,14 @@ static void gpuGP0Cmd_0xEx(gpu_unai_t &gpu_unai, u32 cmd_word) #include "../gpulib/gpu_timing.h" extern const unsigned char cmd_lengths[256]; -int do_cmd_list(u32 *_list, int list_len, int *cpu_cycles_out, int *last_cmd) +int do_cmd_list(u32 *list_, int list_len, + int *cycles_sum_out, int *cycles_last, int *last_cmd) { + int cpu_cycles_sum = 0, cpu_cycles = *cycles_last; u32 cmd = 0, len, i; - le32_t *list = (le32_t *)_list; + le32_t *list = (le32_t *)list_; le32_t *list_start = list; le32_t *list_end = list + list_len; - u32 cpu_cycles = 0; //TODO: set ilace_mask when resolution changes instead of every time, // eliminate #ifdef below. @@ -432,8 +433,8 @@ int do_cmd_list(u32 *_list, int list_len, int *cpu_cycles_out, int *last_cmd) { case 0x02: gpuClearImage(packet); - cpu_cycles += gput_fill(le16_to_s16(packet.U2[4]) & 0x3ff, - le16_to_s16(packet.U2[5]) & 0x1ff); + gput_sum(cpu_cycles_sum, cpu_cycles, + gput_fill(le16_to_s16(packet.U2[4]) & 0x3ff, le16_to_s16(packet.U2[5]) & 0x1ff)); break; case 0x20: @@ -446,7 +447,7 @@ int do_cmd_list(u32 *_list, int list_len, int *cpu_cycles_out, int *last_cmd) gpu_unai.Masking | Blending | gpu_unai.PixelMSB ]; gpuDrawPolyF(packet, driver, false); - cpu_cycles += gput_poly_base(); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base()); } break; case 0x24: @@ -471,7 +472,7 @@ int do_cmd_list(u32 *_list, int list_len, int *cpu_cycles_out, int *last_cmd) PP driver = gpuPolySpanDrivers[driver_idx]; gpuDrawPolyFT(packet, driver, false); - cpu_cycles += gput_poly_base_t(); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_t()); } break; case 0x28: @@ -484,7 +485,7 @@ int do_cmd_list(u32 *_list, int list_len, int *cpu_cycles_out, int *last_cmd) gpu_unai.Masking | Blending | gpu_unai.PixelMSB ]; gpuDrawPolyF(packet, driver, true); // is_quad = true - cpu_cycles += gput_quad_base(); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base()); } break; case 0x2C: @@ -509,7 +510,7 @@ int do_cmd_list(u32 *_list, int list_len, int *cpu_cycles_out, int *last_cmd) PP driver = gpuPolySpanDrivers[driver_idx]; gpuDrawPolyFT(packet, driver, true); // is_quad = true - cpu_cycles += gput_quad_base_t(); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_t()); } break; case 0x30: @@ -527,7 +528,7 @@ int do_cmd_list(u32 *_list, int list_len, int *cpu_cycles_out, int *last_cmd) gpu_unai.Masking | Blending | 129 | gpu_unai.PixelMSB ]; gpuDrawPolyG(packet, driver, false); - cpu_cycles += gput_poly_base_g(); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_g()); } break; case 0x34: @@ -543,7 +544,7 @@ int do_cmd_list(u32 *_list, int list_len, int *cpu_cycles_out, int *last_cmd) gpu_unai.Masking | Blending | ((Lighting)?129:0) | gpu_unai.PixelMSB ]; gpuDrawPolyGT(packet, driver, false); - cpu_cycles += gput_poly_base_gt(); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_gt()); } break; case 0x38: @@ -558,7 +559,7 @@ int do_cmd_list(u32 *_list, int list_len, int *cpu_cycles_out, int *last_cmd) gpu_unai.Masking | Blending | 129 | gpu_unai.PixelMSB ]; gpuDrawPolyG(packet, driver, true); // is_quad = true - cpu_cycles += gput_quad_base_g(); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_g()); } break; case 0x3C: @@ -574,7 +575,7 @@ int do_cmd_list(u32 *_list, int list_len, int *cpu_cycles_out, int *last_cmd) gpu_unai.Masking | Blending | ((Lighting)?129:0) | gpu_unai.PixelMSB ]; gpuDrawPolyGT(packet, driver, true); // is_quad = true - cpu_cycles += gput_quad_base_gt(); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_gt()); } break; case 0x40: @@ -585,7 +586,7 @@ int do_cmd_list(u32 *_list, int list_len, int *cpu_cycles_out, int *last_cmd) u32 driver_idx = (Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1; PSD driver = gpuPixelSpanDrivers[driver_idx]; gpuDrawLineF(packet, driver); - cpu_cycles += gput_line(0); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0)); } break; case 0x48 ... 0x4F: { // Monochrome line strip @@ -602,7 +603,7 @@ int do_cmd_list(u32 *_list, int list_len, int *cpu_cycles_out, int *last_cmd) gpu_unai.PacketBuffer.U4[1] = gpu_unai.PacketBuffer.U4[2]; gpu_unai.PacketBuffer.U4[2] = *list_position++; gpuDrawLineF(packet, driver); - cpu_cycles += gput_line(0); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0)); num_vertexes++; if(list_position >= list_end) { @@ -626,7 +627,7 @@ int do_cmd_list(u32 *_list, int list_len, int *cpu_cycles_out, int *last_cmd) driver_idx |= (1 << 5); PSD driver = gpuPixelSpanDrivers[driver_idx]; gpuDrawLineG(packet, driver); - cpu_cycles += gput_line(0); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0)); } break; case 0x58 ... 0x5F: { // Gouraud-shaded line strip @@ -647,7 +648,7 @@ int do_cmd_list(u32 *_list, int list_len, int *cpu_cycles_out, int *last_cmd) gpu_unai.PacketBuffer.U4[2] = *list_position++; gpu_unai.PacketBuffer.U4[3] = *list_position++; gpuDrawLineG(packet, driver); - cpu_cycles += gput_line(0); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0)); num_vertexes++; if(list_position >= list_end) { @@ -668,7 +669,7 @@ int do_cmd_list(u32 *_list, int list_len, int *cpu_cycles_out, int *last_cmd) PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1]; s32 w = 0, h = 0; gpuDrawT(packet, driver, &w, &h); - cpu_cycles += gput_sprite(w, h); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(w, h)); } break; case 0x64: @@ -697,7 +698,7 @@ int do_cmd_list(u32 *_list, int list_len, int *cpu_cycles_out, int *last_cmd) driver_idx |= Lighting; PS driver = gpuSpriteSpanDrivers[driver_idx]; gpuDrawS(packet, driver, &w, &h); - cpu_cycles += gput_sprite(w, h); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(w, h)); } break; case 0x68: @@ -708,7 +709,7 @@ int do_cmd_list(u32 *_list, int list_len, int *cpu_cycles_out, int *last_cmd) PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1]; s32 w = 0, h = 0; gpuDrawT(packet, driver, &w, &h); - cpu_cycles += gput_sprite(1, 1); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(1, 1)); } break; case 0x70: @@ -719,7 +720,7 @@ int do_cmd_list(u32 *_list, int list_len, int *cpu_cycles_out, int *last_cmd) PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1]; s32 w = 0, h = 0; gpuDrawT(packet, driver, &w, &h); - cpu_cycles += gput_sprite(w, h); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(w, h)); } break; case 0x74: @@ -738,7 +739,7 @@ int do_cmd_list(u32 *_list, int list_len, int *cpu_cycles_out, int *last_cmd) driver_idx |= Lighting; PS driver = gpuSpriteSpanDrivers[driver_idx]; gpuDrawS(packet, driver, &w, &h); - cpu_cycles += gput_sprite(w, h); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(w, h)); } break; case 0x78: @@ -749,7 +750,7 @@ int do_cmd_list(u32 *_list, int list_len, int *cpu_cycles_out, int *last_cmd) PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1]; s32 w = 0, h = 0; gpuDrawT(packet, driver, &w, &h); - cpu_cycles += gput_sprite(w, h); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(w, h)); } break; case 0x7C: @@ -760,7 +761,7 @@ int do_cmd_list(u32 *_list, int list_len, int *cpu_cycles_out, int *last_cmd) s32 w = 0, h = 0; gpuSetCLUT(le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16); gpuDrawS16(packet, &w, &h); - cpu_cycles += gput_sprite(w, h); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(w, h)); break; } // fallthrough @@ -778,7 +779,7 @@ int do_cmd_list(u32 *_list, int list_len, int *cpu_cycles_out, int *last_cmd) driver_idx |= Lighting; PS driver = gpuSpriteSpanDrivers[driver_idx]; gpuDrawS(packet, driver, &w, &h); - cpu_cycles += gput_sprite(w, h); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(w, h)); } break; #ifdef TEST @@ -815,7 +816,8 @@ breakloop: gpu.ex_regs[1] &= ~0x1ff; gpu.ex_regs[1] |= gpu_unai.GPU_GP1 & 0x1ff; - *cpu_cycles_out += cpu_cycles; + *cycles_sum_out += cpu_cycles_sum; + *cycles_last = cpu_cycles; *last_cmd = cmd; return list - list_start; } @@ -823,7 +825,7 @@ breakloop: void renderer_sync_ecmds(u32 *ecmds) { int dummy; - do_cmd_list(&ecmds[1], 6, &dummy, &dummy); + do_cmd_list(&ecmds[1], 6, &dummy, &dummy, &dummy); } void renderer_update_caches(int x, int y, int w, int h, int state_changed) diff --git a/plugins/gpulib/gpu.c b/plugins/gpulib/gpu.c index e3943a25..fdaf388d 100644 --- a/plugins/gpulib/gpu.c +++ b/plugins/gpulib/gpu.c @@ -36,7 +36,8 @@ struct psx_gpu gpu; -static noinline int do_cmd_buffer(uint32_t *data, int count, int *cpu_cycles); +static noinline int do_cmd_buffer(uint32_t *data, int count, + int *cycles_sum, int *cycles_last); static void finish_vram_transfer(int is_read); static noinline void do_cmd_reset(void) @@ -44,7 +45,7 @@ static noinline void do_cmd_reset(void) int dummy = 0; renderer_sync(); if (unlikely(gpu.cmd_len > 0)) - do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len, &dummy); + do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len, &dummy, &dummy); gpu.cmd_len = 0; if (unlikely(gpu.dma.h > 0)) @@ -182,7 +183,7 @@ static noinline void decide_frameskip(void) if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) { int dummy = 0; - do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy, &dummy); + do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy, &dummy, &dummy); gpu.frameskip.pending_fill[0] = 0; } } @@ -580,7 +581,7 @@ static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd) case 0x02: if ((LE32TOH(list[2]) & 0x3ff) > gpu.screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu.screen.h) // clearing something large, don't skip - do_cmd_list(list, 3, &dummy, &dummy); + do_cmd_list(list, 3, &dummy, &dummy, &dummy); else memcpy(gpu.frameskip.pending_fill, list, 3 * 4); break; @@ -630,7 +631,8 @@ static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd) return pos; } -static noinline int do_cmd_buffer(uint32_t *data, int count, int *cpu_cycles) +static noinline int do_cmd_buffer(uint32_t *data, int count, + int *cycles_sum, int *cycles_last) { int cmd, pos; uint32_t old_e3 = gpu.ex_regs[3]; @@ -664,7 +666,9 @@ static noinline int do_cmd_buffer(uint32_t *data, int count, int *cpu_cycles) cmd = -1; // incomplete cmd, can't consume yet break; } - do_vram_copy(data + pos + 1, cpu_cycles); + *cycles_sum += *cycles_last; + *cycles_last = 0; + do_vram_copy(data + pos + 1, cycles_last); vram_dirty = 1; pos += 4; continue; @@ -679,7 +683,7 @@ static noinline int do_cmd_buffer(uint32_t *data, int count, int *cpu_cycles) if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0)) pos += do_cmd_list_skip(data + pos, count - pos, &cmd); else { - pos += do_cmd_list(data + pos, count - pos, cpu_cycles, &cmd); + pos += do_cmd_list(data + pos, count - pos, cycles_sum, cycles_last, &cmd); vram_dirty = 1; } @@ -703,7 +707,7 @@ static noinline int do_cmd_buffer(uint32_t *data, int count, int *cpu_cycles) static noinline void flush_cmd_buffer(void) { int dummy = 0, left; - left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len, &dummy); + left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len, &dummy, &dummy); if (left > 0) memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4); if (left != gpu.cmd_len) { @@ -722,7 +726,7 @@ void GPUwriteDataMem(uint32_t *mem, int count) if (unlikely(gpu.cmd_len > 0)) flush_cmd_buffer(); - left = do_cmd_buffer(mem, count, &dummy); + left = do_cmd_buffer(mem, count, &dummy, &dummy); if (left) log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count); } @@ -735,11 +739,13 @@ void GPUwriteData(uint32_t data) flush_cmd_buffer(); } -long GPUdmaChain(uint32_t *rambase, uint32_t start_addr, uint32_t *progress_addr) +long GPUdmaChain(uint32_t *rambase, uint32_t start_addr, + uint32_t *progress_addr, int32_t *cycles_last_cmd) { uint32_t addr, *list, ld_addr = 0; int len, left, count; - int cpu_cycles = 0; + int cpu_cycles_sum = 0; + int cpu_cycles_last = 0; preload(rambase + (start_addr & 0x1fffff) / 4); @@ -755,12 +761,12 @@ long GPUdmaChain(uint32_t *rambase, uint32_t start_addr, uint32_t *progress_addr addr = LE32TOH(list[0]) & 0xffffff; preload(rambase + (addr & 0x1fffff) / 4); - cpu_cycles += 10; + cpu_cycles_sum += 10; if (len > 0) - cpu_cycles += 5 + len; + cpu_cycles_sum += 5 + len; - log_io(".chain %08lx #%d+%d %u\n", - (long)(list - rambase) * 4, len, gpu.cmd_len, cpu_cycles); + log_io(".chain %08lx #%d+%d %u+%u\n", + (long)(list - rambase) * 4, len, gpu.cmd_len, cpu_cycles_sum, cpu_cycles_last); if (unlikely(gpu.cmd_len > 0)) { if (gpu.cmd_len + len > ARRAY_SIZE(gpu.cmd_buffer)) { log_anomaly("cmd_buffer overflow, likely garbage commands\n"); @@ -773,7 +779,7 @@ long GPUdmaChain(uint32_t *rambase, uint32_t start_addr, uint32_t *progress_addr } if (len) { - left = do_cmd_buffer(list + 1, len, &cpu_cycles); + left = do_cmd_buffer(list + 1, len, &cpu_cycles_sum, &cpu_cycles_last); if (left) { memcpy(gpu.cmd_buffer, list + 1 + len - left, left * 4); gpu.cmd_len = left; @@ -810,12 +816,14 @@ long GPUdmaChain(uint32_t *rambase, uint32_t start_addr, uint32_t *progress_addr } } + //printf(" -> %d %d\n", cpu_cycles_sum, cpu_cycles_last); gpu.state.last_list.frame = *gpu.state.frame_count; gpu.state.last_list.hcnt = *gpu.state.hcnt; - gpu.state.last_list.cycles = cpu_cycles; + gpu.state.last_list.cycles = cpu_cycles_sum + cpu_cycles_last; gpu.state.last_list.addr = start_addr; - return cpu_cycles; + *cycles_last_cmd = cpu_cycles_last; + return cpu_cycles_sum; } void GPUreadDataMem(uint32_t *mem, int count) diff --git a/plugins/gpulib/gpu.h b/plugins/gpulib/gpu.h index 201b5a46..7625c412 100644 --- a/plugins/gpulib/gpu.h +++ b/plugins/gpulib/gpu.h @@ -122,7 +122,8 @@ extern struct psx_gpu gpu; extern const unsigned char cmd_lengths[256]; -int do_cmd_list(uint32_t *list, int count, int *cycles, int *last_cmd); +int do_cmd_list(uint32_t *list, int count, + int *cycles_sum, int *cycles_last, int *last_cmd); struct rearmed_cbs; @@ -150,7 +151,8 @@ struct GPUFreeze; long GPUinit(void); long GPUshutdown(void); void GPUwriteDataMem(uint32_t *mem, int count); -long GPUdmaChain(uint32_t *rambase, uint32_t addr, uint32_t *progress_addr); +long GPUdmaChain(uint32_t *rambase, uint32_t addr, + uint32_t *progress_addr, int32_t *cycles_last_cmd); void GPUwriteData(uint32_t data); void GPUreadDataMem(uint32_t *mem, int count); uint32_t GPUreadData(void); diff --git a/plugins/gpulib/gpu_timing.h b/plugins/gpulib/gpu_timing.h index 363e608d..9991fd80 100644 --- a/plugins/gpulib/gpu_timing.h +++ b/plugins/gpulib/gpu_timing.h @@ -1,6 +1,6 @@ // very conservative and wrong -#define gput_fill(w, h) (23 + (4 + (w) / 32u) * (h)) +#define gput_fill(w, h) (23 + (4 + (w) / 16u) * (h)) #define gput_copy(w, h) ((w) * (h)) #define gput_poly_base() (23) #define gput_poly_base_t() (gput_poly_base() + 90) @@ -13,3 +13,7 @@ #define gput_line(k) (8 + (k)) #define gput_sprite(w, h) (8 + ((w) / 2u) * (h)) +// sort of a workaround for lack of proper fifo emulation +#define gput_sum(sum, cnt, new_cycles) do { \ + sum += cnt; cnt = new_cycles; \ +} while (0) -- 2.39.5