From 90ac6fed274c1d573a971c66f8a1338e8918f066 Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 28 Nov 2023 00:23:03 +0200 Subject: [PATCH] gpu: start doing some basic gpu timing minimum only for now, mostly based on Mednafen libretro/pcsx_rearmed#573 libretro/pcsx_rearmed#783 --- plugins/dfxvideo/gpulib_if.c | 42 +++- plugins/gpu-gles/gpulib_if.c | 2 +- plugins/gpu_neon/psx_gpu/psx_gpu.h | 3 +- plugins/gpu_neon/psx_gpu/psx_gpu_main.c | 6 +- plugins/gpu_neon/psx_gpu/psx_gpu_parse.c | 252 +++++++++++------------ plugins/gpu_neon/psx_gpu_if.c | 10 +- plugins/gpu_unai/gpulib_if.cpp | 31 ++- plugins/gpu_unai_old/gpulib_if.cpp | 4 +- plugins/gpulib/gpu.c | 34 +-- plugins/gpulib/gpu.h | 2 +- plugins/gpulib/gpu_timing.h | 15 ++ plugins/gpulib/test.c | 4 +- 12 files changed, 244 insertions(+), 161 deletions(-) create mode 100644 plugins/gpulib/gpu_timing.h diff --git a/plugins/dfxvideo/gpulib_if.c b/plugins/dfxvideo/gpulib_if.c index d08ca67e..20383ab5 100644 --- a/plugins/dfxvideo/gpulib_if.c +++ b/plugins/dfxvideo/gpulib_if.c @@ -303,16 +303,19 @@ void renderer_notify_scanout_change(int x, int y) { } +#include "../gpulib/gpu_timing.h" extern const unsigned char cmd_lengths[256]; -int do_cmd_list(uint32_t *list, int list_len, int *last_cmd) +int do_cmd_list(uint32_t *list, int list_len, int *cpu_cycles_out, int *last_cmd) { unsigned int cmd = 0, len; uint32_t *list_start = list; uint32_t *list_end = list + list_len; + u32 cpu_cycles = 0; for (; list < list_end; list += 1 + len) { + short *slist = (void *)list; cmd = GETLE32(list) >> 24; len = cmd_lengths[cmd]; if (list + 1 + len > list_end) { @@ -338,6 +341,8 @@ int do_cmd_list(uint32_t *list, int list_len, int *last_cmd) while(1) { + cpu_cycles += gput_line(0); + if(list_position >= list_end) { cmd = -1; goto breakloop; @@ -361,6 +366,8 @@ int do_cmd_list(uint32_t *list, int list_len, int *last_cmd) while(1) { + cpu_cycles += gput_line(0); + if(list_position >= list_end) { cmd = -1; goto breakloop; @@ -380,7 +387,6 @@ int do_cmd_list(uint32_t *list, int list_len, int *last_cmd) #ifdef TEST case 0xA0: // sys -> vid { - short *slist = (void *)list; u32 load_width = LE2HOST32(slist[4]); u32 load_height = LE2HOST32(slist[5]); u32 load_size = load_width * load_height; @@ -389,6 +395,35 @@ int do_cmd_list(uint32_t *list, int list_len, int *last_cmd) break; } #endif + + // timing + case 0x02: + cpu_cycles += gput_fill(LE2HOST32(slist[4]) & 0x3ff, + LE2HOST32(slist[5]) & 0x1ff); + break; + case 0x20 ... 0x23: cpu_cycles += gput_poly_base(); break; + case 0x24 ... 0x27: cpu_cycles += gput_poly_base_t(); break; + case 0x28 ... 0x2B: cpu_cycles += gput_quad_base(); break; + case 0x2C ... 0x2F: cpu_cycles += gput_quad_base_t(); break; + case 0x30 ... 0x33: cpu_cycles += gput_poly_base_g(); break; + case 0x34 ... 0x37: cpu_cycles += gput_poly_base_gt(); break; + case 0x38 ... 0x3B: cpu_cycles += gput_quad_base_g(); break; + case 0x3C ... 0x3F: cpu_cycles += gput_quad_base_gt(); break; + case 0x40 ... 0x47: cpu_cycles += gput_line(0); break; + case 0x50 ... 0x57: cpu_cycles += gput_line(0); break; + case 0x60 ... 0x63: + cpu_cycles += gput_sprite(LE2HOST32(slist[4]) & 0x3ff, + LE2HOST32(slist[5]) & 0x1ff); + break; + case 0x64 ... 0x67: + cpu_cycles += gput_sprite(LE2HOST32(slist[6]) & 0x3ff, + LE2HOST32(slist[7]) & 0x1ff); + break; + case 0x68 ... 0x6B: cpu_cycles += gput_sprite(1, 1); break; + case 0x70 ... 0x73: + case 0x74 ... 0x77: cpu_cycles += gput_sprite(8, 8); break; + case 0x78 ... 0x7B: + case 0x7C ... 0x7F: cpu_cycles += gput_sprite(16, 16); break; } } @@ -396,6 +431,7 @@ breakloop: gpu.ex_regs[1] &= ~0x1ff; gpu.ex_regs[1] |= lGPUstatusRet & 0x1ff; + *cpu_cycles_out += cpu_cycles; *last_cmd = cmd; return list - list_start; } @@ -440,3 +476,5 @@ void renderer_set_config(const struct rearmed_cbs *cbs) cbs->pl_set_gpu_caps(0); set_vram(gpu.vram); } + +// vim:ts=2:shiftwidth=2:expandtab diff --git a/plugins/gpu-gles/gpulib_if.c b/plugins/gpu-gles/gpulib_if.c index a3a0c43b..d440fdb1 100644 --- a/plugins/gpu-gles/gpulib_if.c +++ b/plugins/gpu-gles/gpulib_if.c @@ -521,7 +521,7 @@ void renderer_notify_scanout_change(int x, int y) extern const unsigned char cmd_lengths[256]; // XXX: mostly dupe code from soft peops -int do_cmd_list(unsigned int *list, int list_len, int *last_cmd) +int do_cmd_list(unsigned int *list, int list_len, int *cycles, int *last_cmd) { unsigned int cmd, len; unsigned int *list_start = list; diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.h b/plugins/gpu_neon/psx_gpu/psx_gpu.h index da9e3426..06514b95 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.h +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.h @@ -254,7 +254,8 @@ void update_texture_8bpp_cache(psx_gpu_struct *psx_gpu); void flush_render_block_buffer(psx_gpu_struct *psx_gpu); void initialize_psx_gpu(psx_gpu_struct *psx_gpu, u16 *vram); -u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command); +u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, + s32 *cpu_cycles, u32 *last_command); void triangle_benchmark(psx_gpu_struct *psx_gpu); diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_main.c b/plugins/gpu_neon/psx_gpu/psx_gpu_main.c index c7ce0ee4..435c51a2 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_main.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_main.c @@ -135,6 +135,8 @@ int main(int argc, char *argv[]) FILE *state_file; FILE *list_file; u32 no_display = 0; + s32 dummy0 = 0; + u32 dummy1 = 0; if((argc != 3) && (argc != 4)) { @@ -213,7 +215,7 @@ int main(int argc, char *argv[]) init_counter(); #endif - gpu_parse(psx_gpu, list, size, NULL); + gpu_parse(psx_gpu, list, size, &dummy0, &dummy1); flush_render_block_buffer(psx_gpu); clear_stats(); @@ -222,7 +224,7 @@ int main(int argc, char *argv[]) u32 cycles = get_counter(); #endif - gpu_parse(psx_gpu, list, size, NULL); + gpu_parse(psx_gpu, list, size, &dummy0, &dummy1); flush_render_block_buffer(psx_gpu); #ifdef NEON_BUILD diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c index 5f69919e..b0254aff 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c @@ -15,6 +15,7 @@ #include #include "common.h" +#include "../../gpulib/gpu_timing.h" #ifndef command_lengths const u8 command_lengths[256] = @@ -250,30 +251,31 @@ static void do_fill(psx_gpu_struct *psx_gpu, u32 x, u32 y, #define SET_Ex(r, v) #endif -u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) +u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, + s32 *cpu_cycles_out, u32 *last_command) { vertex_struct vertexes[4] __attribute__((aligned(16))) = {}; - u32 current_command = 0, command_length; + u32 current_command = 0, command_length, cpu_cycles = 0; u32 *list_start = list; u32 *list_end = list + (size / 4); for(; list < list_end; list += 1 + command_length) { - s16 *list_s16 = (void *)list; - current_command = *list >> 24; - command_length = command_lengths[current_command]; - if (list + 1 + command_length > list_end) { - current_command = (u32)-1; - break; - } - - switch(current_command) - { - case 0x00: - break; - - case 0x02: + s16 *list_s16 = (void *)list; + current_command = *list >> 24; + command_length = command_lengths[current_command]; + if (list + 1 + command_length > list_end) { + current_command = (u32)-1; + break; + } + + switch(current_command) + { + case 0x00: + break; + + case 0x02: { u32 x = list_s16[2] & 0x3FF; u32 y = list_s16[3] & 0x1FF; @@ -282,10 +284,11 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) u32 color = list[0] & 0xFFFFFF; do_fill(psx_gpu, x, y, width, height, color); - break; + cpu_cycles += gput_fill(width, height); + break; } - - case 0x20 ... 0x23: + + case 0x20 ... 0x23: { set_triangle_color(psx_gpu, list[0] & 0xFFFFFF); @@ -294,10 +297,11 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) get_vertex_data_xy(2, 6); render_triangle(psx_gpu, vertexes, current_command); - break; + cpu_cycles += gput_poly_base(); + break; } - case 0x24 ... 0x27: + case 0x24 ... 0x27: { set_clut(psx_gpu, list_s16[5]); set_texture(psx_gpu, list_s16[9]); @@ -308,10 +312,11 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) get_vertex_data_xy_uv(2, 10); render_triangle(psx_gpu, vertexes, current_command); - break; + cpu_cycles += gput_poly_base_t(); + break; } - case 0x28 ... 0x2B: + case 0x28 ... 0x2B: { set_triangle_color(psx_gpu, list[0] & 0xFFFFFF); @@ -322,10 +327,11 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) render_triangle(psx_gpu, vertexes, current_command); render_triangle(psx_gpu, &(vertexes[1]), current_command); - break; + cpu_cycles += gput_quad_base(); + break; } - case 0x2C ... 0x2F: + case 0x2C ... 0x2F: { set_clut(psx_gpu, list_s16[5]); set_texture(psx_gpu, list_s16[9]); @@ -338,23 +344,22 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) render_triangle(psx_gpu, vertexes, current_command); render_triangle(psx_gpu, &(vertexes[1]), current_command); - break; + cpu_cycles += gput_quad_base_t(); + break; } - case 0x30 ... 0x33: + case 0x30 ... 0x33: { get_vertex_data_xy_rgb(0, 0); get_vertex_data_xy_rgb(1, 4); get_vertex_data_xy_rgb(2, 8); render_triangle(psx_gpu, vertexes, current_command); - break; + cpu_cycles += gput_poly_base_g(); + break; } - case 0x34: - case 0x35: - case 0x36: - case 0x37: + case 0x34 ... 0x37: { set_clut(psx_gpu, list_s16[5]); set_texture(psx_gpu, list_s16[11]); @@ -364,13 +369,11 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) get_vertex_data_xy_uv_rgb(2, 12); render_triangle(psx_gpu, vertexes, current_command); - break; + cpu_cycles += gput_poly_base_gt(); + break; } - case 0x38: - case 0x39: - case 0x3A: - case 0x3B: + case 0x38 ... 0x3B: { get_vertex_data_xy_rgb(0, 0); get_vertex_data_xy_rgb(1, 4); @@ -379,13 +382,11 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) render_triangle(psx_gpu, vertexes, current_command); render_triangle(psx_gpu, &(vertexes[1]), current_command); - break; + cpu_cycles += gput_quad_base_g(); + break; } - case 0x3C: - case 0x3D: - case 0x3E: - case 0x3F: + case 0x3C ... 0x3F: { set_clut(psx_gpu, list_s16[5]); set_texture(psx_gpu, list_s16[11]); @@ -397,10 +398,11 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) render_triangle(psx_gpu, vertexes, current_command); render_triangle(psx_gpu, &(vertexes[1]), current_command); - break; + cpu_cycles += gput_quad_base_gt(); + break; } - case 0x40 ... 0x47: + case 0x40 ... 0x47: { vertexes[0].x = list_s16[2] + psx_gpu->offset_x; vertexes[0].y = list_s16[3] + psx_gpu->offset_y; @@ -408,10 +410,11 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) vertexes[1].y = list_s16[5] + psx_gpu->offset_y; render_line(psx_gpu, vertexes, current_command, list[0], 0); - break; + cpu_cycles += gput_line(0); + break; } - case 0x48 ... 0x4F: + case 0x48 ... 0x4F: { u32 num_vertexes = 1; u32 *list_position = &(list[2]); @@ -429,6 +432,7 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) vertexes[1].y = (xy >> 16) + psx_gpu->offset_y; render_line(psx_gpu, vertexes, current_command, list[0], 0); + cpu_cycles += gput_line(0); list_position++; num_vertexes++; @@ -448,7 +452,7 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) break; } - case 0x50 ... 0x57: + case 0x50 ... 0x57: { vertexes[0].r = list[0] & 0xFF; vertexes[0].g = (list[0] >> 8) & 0xFF; @@ -463,7 +467,8 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) vertexes[1].y = list_s16[7] + psx_gpu->offset_y; render_line(psx_gpu, vertexes, current_command, 0, 0); - break; + cpu_cycles += gput_line(0); + break; } case 0x58 ... 0x5F: @@ -493,6 +498,7 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) vertexes[1].y = (xy >> 16) + psx_gpu->offset_y; render_line(psx_gpu, vertexes, current_command, 0, 0); + cpu_cycles += gput_line(0); list_position += 2; num_vertexes++; @@ -512,7 +518,7 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) break; } - case 0x60 ... 0x63: + case 0x60 ... 0x63: { u32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x); u32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); @@ -520,10 +526,11 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) u32 height = list_s16[5] & 0x1FF; render_sprite(psx_gpu, x, y, 0, 0, width, height, current_command, list[0]); - break; + cpu_cycles += gput_sprite(width, height); + break; } - case 0x64 ... 0x67: + case 0x64 ... 0x67: { u32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x); u32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); @@ -535,37 +542,31 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) render_sprite(psx_gpu, x, y, uv & 0xFF, (uv >> 8) & 0xFF, width, height, current_command, list[0]); - break; + cpu_cycles += gput_sprite(width, height); + break; } - case 0x68: - case 0x69: - case 0x6A: - case 0x6B: + case 0x68 ... 0x6B: { s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x); s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); render_sprite(psx_gpu, x, y, 0, 0, 1, 1, current_command, list[0]); - break; + cpu_cycles += gput_sprite(1, 1); + break; } - case 0x70: - case 0x71: - case 0x72: - case 0x73: + case 0x70 ... 0x73: { s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x); s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); render_sprite(psx_gpu, x, y, 0, 0, 8, 8, current_command, list[0]); - break; + cpu_cycles += gput_sprite(8, 8); + break; } - case 0x74: - case 0x75: - case 0x76: - case 0x77: + case 0x74 ... 0x77: { s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x); s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); @@ -575,25 +576,21 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) render_sprite(psx_gpu, x, y, uv & 0xFF, (uv >> 8) & 0xFF, 8, 8, current_command, list[0]); - break; + cpu_cycles += gput_sprite(8, 8); + break; } - case 0x78: - case 0x79: - case 0x7A: - case 0x7B: + case 0x78 ... 0x7B: { s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x); s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); render_sprite(psx_gpu, x, y, 0, 0, 16, 16, current_command, list[0]); - break; + cpu_cycles += gput_sprite(16, 16); + break; } - case 0x7C: - case 0x7D: - case 0x7E: - case 0x7F: + case 0x7C ... 0x7F: { s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x); s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); @@ -603,7 +600,8 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) render_sprite(psx_gpu, x, y, uv & 0xFF, (uv >> 8) & 0xFF, 16, 16, current_command, list[0]); - break; + cpu_cycles += gput_sprite(16, 16); + break; } #ifdef PCSX @@ -643,14 +641,14 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) render_block_copy(psx_gpu, (u16 *)&(list_s16[6]), load_x, load_y, load_width, load_height, load_width); - break; + break; } case 0xC0 ... 0xDF: // vid -> sys break; #endif - case 0xE1: + case 0xE1: set_texture(psx_gpu, list[0]); if(list[0] & (1 << 9)) @@ -659,10 +657,10 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) psx_gpu->render_state_base &= ~RENDER_STATE_DITHER; psx_gpu->display_area_draw_enable = (list[0] >> 10) & 0x1; - SET_Ex(1, list[0]); - break; + SET_Ex(1, list[0]); + break; - case 0xE2: + case 0xE2: { // TODO: Clean u32 texture_window_settings = list[0]; @@ -751,11 +749,11 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) psx_gpu->offset_x = offset_x >> 21; psx_gpu->offset_y = offset_y >> 21; - SET_Ex(5, list[0]); - break; - } + SET_Ex(5, list[0]); + break; + } - case 0xE6: + case 0xE6: { u32 mask_settings = list[0]; u16 mask_msb = mask_settings << 15; @@ -771,18 +769,18 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) psx_gpu->mask_msb = mask_msb; } - SET_Ex(6, list[0]); - break; + SET_Ex(6, list[0]); + break; } - default: - break; - } + default: + break; + } } breakloop: - if (last_command != NULL) - *last_command = current_command; + *cpu_cycles_out += cpu_cycles; + *last_command = current_command; return list - list_start; } @@ -1194,10 +1192,10 @@ static void do_sprite_enhanced(psx_gpu_struct *psx_gpu, int x, int y, #endif u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, - u32 *last_command) + s32 *cpu_cycles_out, u32 *last_command) { vertex_struct vertexes[4] __attribute__((aligned(16))) = {}; - u32 current_command = 0, command_length; + u32 current_command = 0, command_length, cpu_cycles = 0; u32 *list_start = list; u32 *list_end = list + (size / 4); @@ -1236,6 +1234,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, x &= ~0xF; width = ((width + 0xF) & ~0xF); + cpu_cycles += gput_fill(width, height); if (width == 0 || height == 0) break; @@ -1266,6 +1265,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, get_vertex_data_xy(2, 6); do_triangle_enhanced(psx_gpu, vertexes, current_command); + cpu_cycles += gput_poly_base(); break; } @@ -1280,6 +1280,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, get_vertex_data_xy_uv(2, 10); do_triangle_enhanced(psx_gpu, vertexes, current_command); + cpu_cycles += gput_poly_base_t(); break; } @@ -1293,6 +1294,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, get_vertex_data_xy(3, 8); do_quad_enhanced(psx_gpu, vertexes, current_command); + cpu_cycles += gput_quad_base(); break; } @@ -1309,6 +1311,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, uv_hack(vertexes, 4); do_quad_enhanced(psx_gpu, vertexes, current_command); + cpu_cycles += gput_quad_base_t(); break; } @@ -1319,13 +1322,11 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, get_vertex_data_xy_rgb(2, 8); do_triangle_enhanced(psx_gpu, vertexes, current_command); + cpu_cycles += gput_poly_base_g(); break; } - case 0x34: - case 0x35: - case 0x36: - case 0x37: + case 0x34 ... 0x37: { set_clut(psx_gpu, list_s16[5]); set_texture(psx_gpu, list_s16[11]); @@ -1335,13 +1336,11 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, get_vertex_data_xy_uv_rgb(2, 12); do_triangle_enhanced(psx_gpu, vertexes, current_command); + cpu_cycles += gput_poly_base_gt(); break; } - case 0x38: - case 0x39: - case 0x3A: - case 0x3B: + case 0x38 ... 0x3B: { get_vertex_data_xy_rgb(0, 0); get_vertex_data_xy_rgb(1, 4); @@ -1349,13 +1348,11 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, get_vertex_data_xy_rgb(3, 12); do_quad_enhanced(psx_gpu, vertexes, current_command); + cpu_cycles += gput_quad_base_g(); break; } - case 0x3C: - case 0x3D: - case 0x3E: - case 0x3F: + case 0x3C ... 0x3F: { set_clut(psx_gpu, list_s16[5]); set_texture(psx_gpu, list_s16[11]); @@ -1367,6 +1364,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, uv_hack(vertexes, 4); do_quad_enhanced(psx_gpu, vertexes, current_command); + cpu_cycles += gput_quad_base_gt(); break; } @@ -1380,6 +1378,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, render_line(psx_gpu, vertexes, current_command, list[0], 0); if (enhancement_enable(psx_gpu)) render_line(psx_gpu, vertexes, current_command, list[0], 1); + cpu_cycles += gput_line(0); break; } @@ -1404,6 +1403,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, render_line(psx_gpu, vertexes, current_command, list[0], 0); if (enhancement_enable(psx_gpu)) render_line(psx_gpu, vertexes, current_command, list[0], 1); + cpu_cycles += gput_line(0); list_position++; num_vertexes++; @@ -1440,6 +1440,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, render_line(psx_gpu, vertexes, current_command, 0, 0); if (enhancement_enable(psx_gpu)) render_line(psx_gpu, vertexes, current_command, 0, 1); + cpu_cycles += gput_line(0); break; } @@ -1473,6 +1474,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, render_line(psx_gpu, vertexes, current_command, 0, 0); if (enhancement_enable(psx_gpu)) render_line(psx_gpu, vertexes, current_command, 0, 1); + cpu_cycles += gput_line(0); list_position += 2; num_vertexes++; @@ -1503,6 +1505,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, if (check_enhanced_range(psx_gpu, x, x + width)) do_sprite_enhanced(psx_gpu, x, y, 0, 0, width, height, list[0]); + cpu_cycles += gput_sprite(width, height); break; } @@ -1522,13 +1525,11 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, if (check_enhanced_range(psx_gpu, x, x + width)) do_sprite_enhanced(psx_gpu, x, y, u, v, width, height, list[0]); + cpu_cycles += gput_sprite(width, height); break; } - case 0x68: - case 0x69: - case 0x6A: - case 0x6B: + case 0x68 ... 0x6B: { s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x); s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); @@ -1537,13 +1538,11 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, if (check_enhanced_range(psx_gpu, x, x + 1)) do_sprite_enhanced(psx_gpu, x, y, 0, 0, 1, 1, list[0]); + cpu_cycles += gput_sprite(1, 1); break; } - case 0x70: - case 0x71: - case 0x72: - case 0x73: + case 0x70 ... 0x73: { s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x); s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); @@ -1552,13 +1551,11 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, if (check_enhanced_range(psx_gpu, x, x + 8)) do_sprite_enhanced(psx_gpu, x, y, 0, 0, 8, 8, list[0]); + cpu_cycles += gput_sprite(8, 8); break; } - case 0x74: - case 0x75: - case 0x76: - case 0x77: + case 0x74 ... 0x77: { s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x); s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); @@ -1572,13 +1569,11 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, if (check_enhanced_range(psx_gpu, x, x + 8)) do_sprite_enhanced(psx_gpu, x, y, u, v, 8, 8, list[0]); + cpu_cycles += gput_sprite(8, 8); break; } - case 0x78: - case 0x79: - case 0x7A: - case 0x7B: + case 0x78 ... 0x7B: { s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x); s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); @@ -1587,13 +1582,11 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, if (check_enhanced_range(psx_gpu, x, x + 16)) do_sprite_enhanced(psx_gpu, x, y, 0, 0, 16, 16, list[0]); + cpu_cycles += gput_sprite(16, 16); break; } - case 0x7C: - case 0x7D: - case 0x7E: - case 0x7F: + case 0x7C ... 0x7F: { s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x); s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); @@ -1606,6 +1599,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, if (check_enhanced_range(psx_gpu, x, x + 16)) do_sprite_enhanced(psx_gpu, x, y, u, v, 16, 16, list[0]); + cpu_cycles += gput_sprite(16, 16); break; } @@ -1759,8 +1753,8 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, enhancement_disable(); breakloop: - if (last_command != NULL) - *last_command = current_command; + *cpu_cycles_out += cpu_cycles; + *last_command = current_command; return list - list_start; } diff --git a/plugins/gpu_neon/psx_gpu_if.c b/plugins/gpu_neon/psx_gpu_if.c index a1476f48..570cc5d2 100644 --- a/plugins/gpu_neon/psx_gpu_if.c +++ b/plugins/gpu_neon/psx_gpu_if.c @@ -39,7 +39,7 @@ sync_enhancement_buffers(int x, int y, int w, int h); static psx_gpu_struct egpu __attribute__((aligned(256))); -int do_cmd_list(uint32_t *list, int count, int *last_cmd) +int do_cmd_list(uint32_t *list, int count, int *cycles, int *last_cmd) { int ret; @@ -49,9 +49,9 @@ int do_cmd_list(uint32_t *list, int count, int *last_cmd) #endif if (gpu.state.enhancement_active) - ret = gpu_parse_enhanced(&egpu, list, count * 4, (u32 *)last_cmd); + ret = gpu_parse_enhanced(&egpu, list, count * 4, cycles, (u32 *)last_cmd); else - ret = gpu_parse(&egpu, list, count * 4, (u32 *)last_cmd); + ret = gpu_parse(&egpu, list, count * 4, cycles, (u32 *)last_cmd); #if defined(__arm__) && defined(NEON_BUILD) && !defined(SIMD_BUILD) __asm__ __volatile__("":::"q4","q5","q6","q7"); @@ -153,7 +153,9 @@ sync_enhancement_buffers(int x, int y, int w, int h) void renderer_sync_ecmds(uint32_t *ecmds) { - gpu_parse(&egpu, ecmds + 1, 6 * 4, NULL); + s32 dummy0 = 0; + u32 dummy1 = 0; + gpu_parse(&egpu, ecmds + 1, 6 * 4, &dummy0, &dummy1); } void renderer_update_caches(int x, int y, int w, int h, int state_changed) diff --git a/plugins/gpu_unai/gpulib_if.cpp b/plugins/gpu_unai/gpulib_if.cpp index 20794316..191108b8 100644 --- a/plugins/gpu_unai/gpulib_if.cpp +++ b/plugins/gpu_unai/gpulib_if.cpp @@ -390,14 +390,16 @@ static void gpuGP0Cmd_0xEx(gpu_unai_t &gpu_unai, u32 cmd_word) } #endif +#include "../gpulib/gpu_timing.h" extern const unsigned char cmd_lengths[256]; -int do_cmd_list(u32 *_list, int list_len, int *last_cmd) +int do_cmd_list(u32 *_list, int list_len, int *cpu_cycles_out, int *last_cmd) { u32 cmd = 0, len, i; le32_t *list = (le32_t *)_list; le32_t *list_start = list; le32_t *list_end = list + list_len; + u32 cpu_cycles = 0; //TODO: set ilace_mask when resolution changes instead of every time, // eliminate #ifdef below. @@ -430,6 +432,8 @@ int do_cmd_list(u32 *_list, int list_len, int *last_cmd) { case 0x02: gpuClearImage(packet); + cpu_cycles += gput_fill(le16_to_s16(packet.U2[4]) & 0x3ff, + le16_to_s16(packet.U2[5]) & 0x1ff); break; case 0x20: @@ -442,6 +446,7 @@ int do_cmd_list(u32 *_list, int list_len, int *last_cmd) gpu_unai.Masking | Blending | gpu_unai.PixelMSB ]; gpuDrawPolyF(packet, driver, false); + cpu_cycles += gput_poly_base(); } break; case 0x24: @@ -466,6 +471,7 @@ int do_cmd_list(u32 *_list, int list_len, int *last_cmd) PP driver = gpuPolySpanDrivers[driver_idx]; gpuDrawPolyFT(packet, driver, false); + cpu_cycles += gput_poly_base_t(); } break; case 0x28: @@ -478,6 +484,7 @@ int do_cmd_list(u32 *_list, int list_len, int *last_cmd) gpu_unai.Masking | Blending | gpu_unai.PixelMSB ]; gpuDrawPolyF(packet, driver, true); // is_quad = true + cpu_cycles += gput_quad_base(); } break; case 0x2C: @@ -502,6 +509,7 @@ int do_cmd_list(u32 *_list, int list_len, int *last_cmd) PP driver = gpuPolySpanDrivers[driver_idx]; gpuDrawPolyFT(packet, driver, true); // is_quad = true + cpu_cycles += gput_quad_base_t(); } break; case 0x30: @@ -519,6 +527,7 @@ int do_cmd_list(u32 *_list, int list_len, int *last_cmd) gpu_unai.Masking | Blending | 129 | gpu_unai.PixelMSB ]; gpuDrawPolyG(packet, driver, false); + cpu_cycles += gput_poly_base_g(); } break; case 0x34: @@ -534,6 +543,7 @@ int do_cmd_list(u32 *_list, int list_len, int *last_cmd) gpu_unai.Masking | Blending | ((Lighting)?129:0) | gpu_unai.PixelMSB ]; gpuDrawPolyGT(packet, driver, false); + cpu_cycles += gput_poly_base_gt(); } break; case 0x38: @@ -548,6 +558,7 @@ int do_cmd_list(u32 *_list, int list_len, int *last_cmd) gpu_unai.Masking | Blending | 129 | gpu_unai.PixelMSB ]; gpuDrawPolyG(packet, driver, true); // is_quad = true + cpu_cycles += gput_quad_base_g(); } break; case 0x3C: @@ -563,6 +574,7 @@ int do_cmd_list(u32 *_list, int list_len, int *last_cmd) gpu_unai.Masking | Blending | ((Lighting)?129:0) | gpu_unai.PixelMSB ]; gpuDrawPolyGT(packet, driver, true); // is_quad = true + cpu_cycles += gput_quad_base_gt(); } break; case 0x40: @@ -573,6 +585,7 @@ int do_cmd_list(u32 *_list, int list_len, int *last_cmd) u32 driver_idx = (Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1; PSD driver = gpuPixelSpanDrivers[driver_idx]; gpuDrawLineF(packet, driver); + cpu_cycles += gput_line(0); } break; case 0x48 ... 0x4F: { // Monochrome line strip @@ -589,6 +602,7 @@ int do_cmd_list(u32 *_list, int list_len, int *last_cmd) gpu_unai.PacketBuffer.U4[1] = gpu_unai.PacketBuffer.U4[2]; gpu_unai.PacketBuffer.U4[2] = *list_position++; gpuDrawLineF(packet, driver); + cpu_cycles += gput_line(0); num_vertexes++; if(list_position >= list_end) { @@ -612,6 +626,7 @@ int do_cmd_list(u32 *_list, int list_len, int *last_cmd) driver_idx |= (1 << 5); PSD driver = gpuPixelSpanDrivers[driver_idx]; gpuDrawLineG(packet, driver); + cpu_cycles += gput_line(0); } break; case 0x58 ... 0x5F: { // Gouraud-shaded line strip @@ -632,6 +647,7 @@ int do_cmd_list(u32 *_list, int list_len, int *last_cmd) gpu_unai.PacketBuffer.U4[2] = *list_position++; gpu_unai.PacketBuffer.U4[3] = *list_position++; gpuDrawLineG(packet, driver); + cpu_cycles += gput_line(0); num_vertexes++; if(list_position >= list_end) { @@ -651,6 +667,8 @@ int do_cmd_list(u32 *_list, int list_len, int *last_cmd) case 0x63: { // Monochrome rectangle (variable size) PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1]; gpuDrawT(packet, driver); + cpu_cycles += gput_sprite(le16_to_u16(packet.U2[4]) & 0x3ff, + le16_to_u16(packet.U2[5]) & 0x1ff); } break; case 0x64: @@ -678,6 +696,8 @@ int do_cmd_list(u32 *_list, int list_len, int *last_cmd) driver_idx |= Lighting; PS driver = gpuSpriteSpanDrivers[driver_idx]; gpuDrawS(packet, driver); + cpu_cycles += gput_sprite(le16_to_u16(packet.U2[6]) & 0x3ff, + le16_to_u16(packet.U2[7]) & 0x1ff); } break; case 0x68: @@ -687,6 +707,7 @@ int do_cmd_list(u32 *_list, int list_len, int *last_cmd) gpu_unai.PacketBuffer.U4[2] = u32_to_le32(0x00010001); PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1]; gpuDrawT(packet, driver); + cpu_cycles += gput_sprite(1, 1); } break; case 0x70: @@ -696,6 +717,7 @@ int do_cmd_list(u32 *_list, int list_len, int *last_cmd) gpu_unai.PacketBuffer.U4[2] = u32_to_le32(0x00080008); PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1]; gpuDrawT(packet, driver); + cpu_cycles += gput_sprite(8, 8); } break; case 0x74: @@ -713,6 +735,7 @@ int do_cmd_list(u32 *_list, int list_len, int *last_cmd) driver_idx |= Lighting; PS driver = gpuSpriteSpanDrivers[driver_idx]; gpuDrawS(packet, driver); + cpu_cycles += gput_sprite(8, 8); } break; case 0x78: @@ -722,6 +745,7 @@ int do_cmd_list(u32 *_list, int list_len, int *last_cmd) gpu_unai.PacketBuffer.U4[2] = u32_to_le32(0x00100010); PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1]; gpuDrawT(packet, driver); + cpu_cycles += gput_sprite(16, 16); } break; case 0x7C: @@ -731,6 +755,7 @@ int do_cmd_list(u32 *_list, int list_len, int *last_cmd) { gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16); gpuDrawS16(packet); + cpu_cycles += gput_sprite(16, 16); break; } // fallthrough @@ -747,6 +772,7 @@ int do_cmd_list(u32 *_list, int list_len, int *last_cmd) driver_idx |= Lighting; PS driver = gpuSpriteSpanDrivers[driver_idx]; gpuDrawS(packet, driver); + cpu_cycles += gput_sprite(16, 16); } break; #ifdef TEST @@ -782,6 +808,7 @@ breakloop: gpu.ex_regs[1] &= ~0x1ff; gpu.ex_regs[1] |= gpu_unai.GPU_GP1 & 0x1ff; + *cpu_cycles_out += cpu_cycles; *last_cmd = cmd; return list - list_start; } @@ -789,7 +816,7 @@ breakloop: void renderer_sync_ecmds(u32 *ecmds) { int dummy; - do_cmd_list(&ecmds[1], 6, &dummy); + do_cmd_list(&ecmds[1], 6, &dummy, &dummy); } void renderer_update_caches(int x, int y, int w, int h, int state_changed) diff --git a/plugins/gpu_unai_old/gpulib_if.cpp b/plugins/gpu_unai_old/gpulib_if.cpp index cc328029..ee694d35 100644 --- a/plugins/gpu_unai_old/gpulib_if.cpp +++ b/plugins/gpu_unai_old/gpulib_if.cpp @@ -169,7 +169,7 @@ void renderer_notify_scanout_change(int x, int y) extern const unsigned char cmd_lengths[256]; -int do_cmd_list(unsigned int *list, int list_len, int *last_cmd) +int do_cmd_list(unsigned int *list, int list_len, int *cycles, int *last_cmd) { unsigned int cmd = 0, len, i; unsigned int *list_start = list; @@ -523,7 +523,7 @@ breakloop: void renderer_sync_ecmds(uint32_t *ecmds) { int dummy; - do_cmd_list(&ecmds[1], 6, &dummy); + do_cmd_list(&ecmds[1], 6, &dummy, &dummy); } void renderer_update_caches(int x, int y, int w, int h, int state_changed) diff --git a/plugins/gpulib/gpu.c b/plugins/gpulib/gpu.c index 2ac36c1b..7d40938f 100644 --- a/plugins/gpulib/gpu.c +++ b/plugins/gpulib/gpu.c @@ -12,6 +12,7 @@ #include #include #include "gpu.h" +#include "gpu_timing.h" #include "../../libpcsxcore/gpu.h" // meh #include "../../frontend/plugin_lib.h" @@ -33,13 +34,14 @@ struct psx_gpu gpu; -static noinline int do_cmd_buffer(uint32_t *data, int count); +static noinline int do_cmd_buffer(uint32_t *data, int count, int *cpu_cycles); static void finish_vram_transfer(int is_read); static noinline void do_cmd_reset(void) { + int dummy = 0; if (unlikely(gpu.cmd_len > 0)) - do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len); + do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len, &dummy); gpu.cmd_len = 0; if (unlikely(gpu.dma.h > 0)) @@ -172,8 +174,8 @@ static noinline void decide_frameskip(void) gpu.frameskip.active = 0; if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) { - int dummy; - do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy); + int dummy = 0; + do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy, &dummy); gpu.frameskip.pending_fill[0] = 0; } } @@ -472,7 +474,7 @@ static void finish_vram_transfer(int is_read) gpu.gpu_state_change(PGS_VRAM_TRANSFER_END); } -static void do_vram_copy(const uint32_t *params) +static void do_vram_copy(const uint32_t *params, int *cpu_cycles) { const uint32_t sx = LE32TOH(params[0]) & 0x3FF; const uint32_t sy = (LE32TOH(params[0]) >> 16) & 0x1FF; @@ -484,6 +486,7 @@ static void do_vram_copy(const uint32_t *params) uint16_t lbuf[128]; uint32_t x, y; + *cpu_cycles += gput_copy(w, h); if (sx == dx && sy == dy && msb == 0) return; @@ -519,7 +522,7 @@ static void do_vram_copy(const uint32_t *params) static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd) { - int cmd = 0, pos = 0, len, dummy, v; + int cmd = 0, pos = 0, len, dummy = 0, v; int skip = 1; gpu.frameskip.pending_fill[0] = 0; @@ -533,7 +536,7 @@ static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd) case 0x02: if ((LE32TOH(list[2]) & 0x3ff) > gpu.screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu.screen.h) // clearing something large, don't skip - do_cmd_list(list, 3, &dummy); + do_cmd_list(list, 3, &dummy, &dummy); else memcpy(gpu.frameskip.pending_fill, list, 3 * 4); break; @@ -583,7 +586,7 @@ static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd) return pos; } -static noinline int do_cmd_buffer(uint32_t *data, int count) +static noinline int do_cmd_buffer(uint32_t *data, int count, int *cpu_cycles) { int cmd, pos; uint32_t old_e3 = gpu.ex_regs[3]; @@ -617,7 +620,7 @@ static noinline int do_cmd_buffer(uint32_t *data, int count) cmd = -1; // incomplete cmd, can't consume yet break; } - do_vram_copy(data + pos + 1); + do_vram_copy(data + pos + 1, cpu_cycles); vram_dirty = 1; pos += 4; continue; @@ -627,7 +630,7 @@ static noinline int do_cmd_buffer(uint32_t *data, int count) if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0)) pos += do_cmd_list_skip(data + pos, count - pos, &cmd); else { - pos += do_cmd_list(data + pos, count - pos, &cmd); + pos += do_cmd_list(data + pos, count - pos, cpu_cycles, &cmd); vram_dirty = 1; } @@ -650,7 +653,8 @@ static noinline int do_cmd_buffer(uint32_t *data, int count) static noinline void flush_cmd_buffer(void) { - int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len); + int dummy = 0, left; + left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len, &dummy); if (left > 0) memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4); if (left != gpu.cmd_len) { @@ -662,14 +666,14 @@ static noinline void flush_cmd_buffer(void) void GPUwriteDataMem(uint32_t *mem, int count) { - int left; + int dummy = 0, left; log_io("gpu_dma_write %p %d\n", mem, count); if (unlikely(gpu.cmd_len > 0)) flush_cmd_buffer(); - left = do_cmd_buffer(mem, count); + left = do_cmd_buffer(mem, count, &dummy); if (left) log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count); } @@ -686,7 +690,7 @@ long GPUdmaChain(uint32_t *rambase, uint32_t start_addr, uint32_t *progress_addr { uint32_t addr, *list, ld_addr = 0; int len, left, count; - long cpu_cycles = 0; + int cpu_cycles = 0; preload(rambase + (start_addr & 0x1fffff) / 4); @@ -720,7 +724,7 @@ long GPUdmaChain(uint32_t *rambase, uint32_t start_addr, uint32_t *progress_addr } if (len) { - left = do_cmd_buffer(list + 1, len); + left = do_cmd_buffer(list + 1, len, &cpu_cycles); if (left) { memcpy(gpu.cmd_buffer, list + 1 + len - left, left * 4); gpu.cmd_len = left; diff --git a/plugins/gpulib/gpu.h b/plugins/gpulib/gpu.h index 886bb1f5..13e73c5a 100644 --- a/plugins/gpulib/gpu.h +++ b/plugins/gpulib/gpu.h @@ -119,7 +119,7 @@ extern struct psx_gpu gpu; extern const unsigned char cmd_lengths[256]; -int do_cmd_list(uint32_t *list, int count, int *last_cmd); +int do_cmd_list(uint32_t *list, int count, int *cycles, int *last_cmd); struct rearmed_cbs; diff --git a/plugins/gpulib/gpu_timing.h b/plugins/gpulib/gpu_timing.h new file mode 100644 index 00000000..0dfe0d68 --- /dev/null +++ b/plugins/gpulib/gpu_timing.h @@ -0,0 +1,15 @@ + +// very conservative and wrong +#define gput_fill(w, h) (23 + (4 + (w) / 16u) * (h)) +#define gput_copy(w, h) ((w) * (h)) +#define gput_poly_base() (23) +#define gput_poly_base_t() (gput_poly_base() + 90) +#define gput_poly_base_g() (gput_poly_base() + 144) +#define gput_poly_base_gt() (gput_poly_base() + 225) +#define gput_quad_base() gput_poly_base() +#define gput_quad_base_t() gput_poly_base_t() +#define gput_quad_base_g() gput_poly_base_g() +#define gput_quad_base_gt() gput_poly_base_gt() +#define gput_line(k) (8 + (k)) +#define gput_sprite(w, h) (8 + ((w) / 2u) * (h)) + diff --git a/plugins/gpulib/test.c b/plugins/gpulib/test.c index 80d0e9ef..3f24cc4f 100644 --- a/plugins/gpulib/test.c +++ b/plugins/gpulib/test.c @@ -88,13 +88,13 @@ int main(int argc, char *argv[]) pcnt_init(); renderer_init(); - memcpy(gpu.vram, state.vram, sizeof(gpu.vram)); + memcpy(gpu.vram, state.vram, 1024*512*2); if ((state.gpu_register[8] & 0x24) == 0x24) renderer_set_interlace(1, !(state.status >> 31)); start_cycles = pcnt_get(); - do_cmd_list(list, size / 4, &dummy); + do_cmd_list(list, size / 4, &dummy, &dummy); renderer_flush_queues(); printf("%u\n", pcnt_get() - start_cycles); -- 2.39.5