From b243416b907e6ce366b051e77ed8a434f7668d5d Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 24 Feb 2012 01:07:33 +0200 Subject: [PATCH] gpulib: eliminate list scan-ahead --- plugins/dfxvideo/gpulib_if.c | 39 +++++--- plugins/gpu-gles/gpulib_if.c | 38 ++++--- plugins/gpu_neon/psx_gpu/psx_gpu.h | 2 +- plugins/gpu_neon/psx_gpu/psx_gpu_main.c | 4 +- plugins/gpu_neon/psx_gpu/psx_gpu_parse.c | 40 ++++++-- plugins/gpu_neon/psx_gpu_if.c | 17 +++- plugins/gpu_unai/gpulib_if.cpp | 40 +++++--- plugins/gpulib/gpu.c | 120 +++++++++++++---------- plugins/gpulib/gpu.h | 2 +- 9 files changed, 198 insertions(+), 104 deletions(-) diff --git a/plugins/dfxvideo/gpulib_if.c b/plugins/dfxvideo/gpulib_if.c index 82bc38da..12aa0a3f 100644 --- a/plugins/dfxvideo/gpulib_if.c +++ b/plugins/dfxvideo/gpulib_if.c @@ -296,16 +296,27 @@ int renderer_init(void) extern const unsigned char cmd_lengths[256]; -void do_cmd_list(unsigned int *list, int list_len) +int do_cmd_list(unsigned int *list, int list_len, int *last_cmd) { - unsigned int cmd, len; - + unsigned int cmd = 0, len; + unsigned int *list_start = list; unsigned int *list_end = list + list_len; for (; list < list_end; list += 1 + len) { cmd = *list >> 24; len = cmd_lengths[cmd]; + if (list + 1 + len > list_end) { + cmd = -1; + break; + } + +#ifndef TEST + if (cmd == 0xa0 || cmd == 0xc0) + break; // image i/o, forward to upper layer + else if ((cmd & 0xf8) == 0xe0) + gpu.ex_regs[cmd & 7] = list[0]; +#endif primTableJ[cmd]((void *)list); @@ -313,8 +324,8 @@ void do_cmd_list(unsigned int *list, int list_len) { case 0x48 ... 0x4F: { - u32 num_vertexes = 1; - u32 *list_position = &(list[2]); + u32 num_vertexes = 2; + u32 *list_position = &(list[3]); while(1) { @@ -325,16 +336,14 @@ void do_cmd_list(unsigned int *list, int list_len) num_vertexes++; } - if(num_vertexes > 2) - len += (num_vertexes - 2); - + len += (num_vertexes - 2); break; } case 0x58 ... 0x5F: { - u32 num_vertexes = 1; - u32 *list_position = &(list[2]); + u32 num_vertexes = 2; + u32 *list_position = &(list[4]); while(1) { @@ -345,9 +354,7 @@ void do_cmd_list(unsigned int *list, int list_len) num_vertexes++; } - if(num_vertexes > 2) - len += (num_vertexes - 2) * 2; - + len += (num_vertexes - 2) * 2; break; } @@ -365,6 +372,12 @@ void do_cmd_list(unsigned int *list, int list_len) #endif } } + + gpu.ex_regs[1] &= ~0x1ff; + gpu.ex_regs[1] |= lGPUstatusRet & 0x1ff; + + *last_cmd = cmd; + return list - list_start; } void renderer_sync_ecmds(uint32_t *ecmds) diff --git a/plugins/gpu-gles/gpulib_if.c b/plugins/gpu-gles/gpulib_if.c index ce32aada..c669b634 100644 --- a/plugins/gpu-gles/gpulib_if.c +++ b/plugins/gpu-gles/gpulib_if.c @@ -502,16 +502,28 @@ int renderer_init(void) extern const unsigned char cmd_lengths[256]; -void do_cmd_list(unsigned int *list, int list_len) +// XXX: mostly dupe code from soft peops +int do_cmd_list(unsigned int *list, int list_len, int *last_cmd) { unsigned int cmd, len; - + unsigned int *list_start = list; unsigned int *list_end = list + list_len; for (; list < list_end; list += 1 + len) { cmd = *list >> 24; len = cmd_lengths[cmd]; + if (list + 1 + len > list_end) { + cmd = -1; + break; + } + +#ifndef TEST + if (cmd == 0xa0 || cmd == 0xc0) + break; // image i/o, forward to upper layer + else if ((cmd & 0xf8) == 0xe0) + gpu.ex_regs[cmd & 7] = list[0]; +#endif primTableJ[cmd]((void *)list); @@ -519,8 +531,8 @@ void do_cmd_list(unsigned int *list, int list_len) { case 0x48 ... 0x4F: { - uint32_t num_vertexes = 1; - uint32_t *list_position = &(list[2]); + uint32_t num_vertexes = 2; + uint32_t *list_position = &(list[3]); while(1) { @@ -531,16 +543,14 @@ void do_cmd_list(unsigned int *list, int list_len) num_vertexes++; } - if(num_vertexes > 2) - len += (num_vertexes - 2); - + len += (num_vertexes - 2); break; } case 0x58 ... 0x5F: { - uint32_t num_vertexes = 1; - uint32_t *list_position = &(list[2]); + uint32_t num_vertexes = 2; + uint32_t *list_position = &(list[4]); while(1) { @@ -551,9 +561,7 @@ void do_cmd_list(unsigned int *list, int list_len) num_vertexes++; } - if(num_vertexes > 2) - len += (num_vertexes - 2) * 2; - + len += (num_vertexes - 2) * 2; break; } @@ -571,6 +579,12 @@ void do_cmd_list(unsigned int *list, int list_len) #endif } } + + gpu.ex_regs[1] &= ~0x1ff; + gpu.ex_regs[1] |= lGPUstatusRet & 0x1ff; + + *last_cmd = cmd; + return list - list_start; } void renderer_sync_ecmds(uint32_t *ecmds) diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.h b/plugins/gpu_neon/psx_gpu/psx_gpu.h index 4605c39f..53a87177 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.h +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.h @@ -231,7 +231,7 @@ u32 texture_region_mask(s32 x1, s32 y1, s32 x2, s32 y2); void flush_render_block_buffer(psx_gpu_struct *psx_gpu); void initialize_psx_gpu(psx_gpu_struct *psx_gpu, u16 *vram); -void gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size); +u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command); void triangle_benchmark(psx_gpu_struct *psx_gpu); diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_main.c b/plugins/gpu_neon/psx_gpu/psx_gpu_main.c index 6c17b0ab..8ca3ad02 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_main.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_main.c @@ -192,7 +192,7 @@ int main(int argc, char *argv[]) init_counter(); #endif - gpu_parse(psx_gpu, list, size); + gpu_parse(psx_gpu, list, size, NULL); flush_render_block_buffer(psx_gpu); clear_stats(); @@ -201,7 +201,7 @@ int main(int argc, char *argv[]) u32 cycles = get_counter(); #endif - gpu_parse(psx_gpu, list, size); + gpu_parse(psx_gpu, list, size, NULL); flush_render_block_buffer(psx_gpu); #ifdef NEON_BUILD diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c index ac35631e..3fc040d8 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c @@ -199,12 +199,17 @@ void set_triangle_color(psx_gpu_struct *psx_gpu, u32 triangle_color) get_vertex_data_xy(vertex_number, offset16); \ set_vertex_color_constant(vertex_number, color) \ +#ifndef SET_Ex +#define SET_Ex(r, v) +#endif + vertex_struct vertexes[4] __attribute__((aligned(32))); -void gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size) +u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) { - u32 current_command, command_length; - + u32 current_command = 0, command_length; + + u32 *list_start = list; u32 *list_end = list + (size / 4); for(; list < list_end; list += 1 + command_length) @@ -212,6 +217,10 @@ void gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size) s16 *list_s16 = (void *)list; current_command = *list >> 24; command_length = command_lengths[current_command]; + if (list + 1 + command_length > list_end) { + current_command = (u32)-1; + break; + } switch(current_command) { @@ -590,7 +599,12 @@ void gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size) render_block_move(psx_gpu, list_s16[2] & 0x3FF, list_s16[3] & 0x1FF, list_s16[4] & 0x3FF, list_s16[5] & 0x1FF, list_s16[6], list_s16[7]); break; - + +#ifdef PCSX + case 0xA0: // sys -> vid + case 0xC0: // vid -> sys + goto breakloop; +#else case 0xA0: // sys -> vid { u32 load_x = list_s16[2] & 0x3FF; @@ -608,10 +622,11 @@ void gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size) load_width, load_height, load_width); break; } - + case 0xC0: // vid -> sys break; - +#endif + case 0xE1: set_texture(psx_gpu, list[0] & 0x1FF); @@ -621,6 +636,7 @@ void gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size) psx_gpu->render_state_base &= ~RENDER_STATE_DITHER; psx_gpu->display_area_draw_enable = (list[0] >> 10) & 0x1; + SET_Ex(1, list[0]); break; case 0xE2: @@ -653,6 +669,7 @@ void gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size) update_texture_ptr(psx_gpu); } + SET_Ex(2, list[0]); break; } @@ -666,6 +683,7 @@ void gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size) psx_gpu->viewport_start_y, psx_gpu->viewport_end_x, psx_gpu->viewport_end_y); #endif + SET_Ex(3, list[0]); break; case 0xE4: @@ -678,6 +696,7 @@ void gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size) psx_gpu->viewport_start_y, psx_gpu->viewport_end_x, psx_gpu->viewport_end_y); #endif + SET_Ex(4, list[0]); break; case 0xE5: @@ -687,6 +706,7 @@ void gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size) psx_gpu->offset_x = offset_x >> 21; psx_gpu->offset_y = offset_y >> 21; + SET_Ex(5, list[0]); break; } @@ -706,6 +726,7 @@ void gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size) psx_gpu->mask_msb = mask_msb; } + SET_Ex(6, list[0]); break; } @@ -713,5 +734,12 @@ void gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size) break; } } + +#ifdef PCSX +breakloop: +#endif + if (last_command != NULL) + *last_command = current_command; + return list - list_start; } diff --git a/plugins/gpu_neon/psx_gpu_if.c b/plugins/gpu_neon/psx_gpu_if.c index 3ff6e486..ff31c273 100644 --- a/plugins/gpu_neon/psx_gpu_if.c +++ b/plugins/gpu_neon/psx_gpu_if.c @@ -13,26 +13,37 @@ extern const unsigned char cmd_lengths[256]; #define command_lengths cmd_lengths +static unsigned int *ex_regs; + +#define PCSX +#define SET_Ex(r, v) \ + ex_regs[r] = v + #include "psx_gpu/psx_gpu.c" #include "psx_gpu/psx_gpu_parse.c" #include "../gpulib/gpu.h" static psx_gpu_struct egpu __attribute__((aligned(256))); -void do_cmd_list(uint32_t *list, int count) +int do_cmd_list(uint32_t *list, int count, int *last_cmd) { - gpu_parse(&egpu, list, count * 4); + int ret = gpu_parse(&egpu, list, count * 4, (u32 *)last_cmd); + + ex_regs[1] &= ~0x1ff; + ex_regs[1] |= egpu.texture_settings & 0x1ff; + return ret; } int renderer_init(void) { initialize_psx_gpu(&egpu, gpu.vram); + ex_regs = gpu.ex_regs; return 0; } void renderer_sync_ecmds(uint32_t *ecmds) { - gpu_parse(&egpu, ecmds + 1, 6 * 4); + gpu_parse(&egpu, ecmds + 1, 6 * 4, NULL); } void renderer_update_caches(int x, int y, int w, int h) diff --git a/plugins/gpu_unai/gpulib_if.cpp b/plugins/gpu_unai/gpulib_if.cpp index 22fe31ea..4994ef4b 100644 --- a/plugins/gpu_unai/gpulib_if.cpp +++ b/plugins/gpu_unai/gpulib_if.cpp @@ -162,9 +162,10 @@ int renderer_init(void) extern const unsigned char cmd_lengths[256]; -void do_cmd_list(unsigned int *list, int list_len) +int do_cmd_list(unsigned int *list, int list_len, int *last_cmd) { - unsigned int cmd, len; + unsigned int cmd = 0, len; + unsigned int *list_start = list; unsigned int *list_end = list + list_len; linesInterlace = force_interlace; @@ -176,6 +177,17 @@ void do_cmd_list(unsigned int *list, int list_len) { cmd = *list >> 24; len = cmd_lengths[cmd]; + if (list + 1 + len > list_end) { + cmd = -1; + break; + } + +#ifndef TEST + if (cmd == 0xa0 || cmd == 0xc0) + break; // image i/o, forward to upper layer + else if ((cmd & 0xf8) == 0xe0) + gpu.ex_regs[cmd & 7] = list[0]; +#endif switch(cmd) { @@ -190,19 +202,16 @@ void do_cmd_list(unsigned int *list, int list_len) while(1) { - if((*list_position & 0xf000f000) == 0x50005000 || list_position >= list_end) - break; - PacketBuffer.U4[1] = PacketBuffer.U4[2]; PacketBuffer.U4[2] = *list_position++; gpuDrawLF(gpuPixelDrivers [ (Blending_Mode | Masking | Blending | (PixelMSB>>3)) >> 1]); num_vertexes++; + if((*list_position & 0xf000f000) == 0x50005000 || list_position >= list_end) + break; } - if(num_vertexes > 2) - len += (num_vertexes - 2); - + len += (num_vertexes - 2); break; } @@ -217,9 +226,6 @@ void do_cmd_list(unsigned int *list, int list_len) while(1) { - if((*list_position & 0xf000f000) == 0x50005000 || list_position >= list_end) - break; - PacketBuffer.U4[0] = PacketBuffer.U4[2]; PacketBuffer.U4[1] = PacketBuffer.U4[3]; PacketBuffer.U4[2] = *list_position++; @@ -227,11 +233,11 @@ void do_cmd_list(unsigned int *list, int list_len) gpuDrawLG(gpuPixelDrivers [ (Blending_Mode | Masking | Blending | (PixelMSB>>3)) >> 1]); num_vertexes++; + if((*list_position & 0xf000f000) == 0x50005000 || list_position >= list_end) + break; } - if(num_vertexes > 2) - len += (num_vertexes - 2) * 2; - + len += (num_vertexes - 2) * 2; break; } @@ -252,6 +258,12 @@ void do_cmd_list(unsigned int *list, int list_len) break; } } + + gpu.ex_regs[1] &= ~0x1ff; + gpu.ex_regs[1] |= GPU_GP1 & 0x1ff; + + *last_cmd = cmd; + return list - list_start; } void renderer_sync_ecmds(uint32_t *ecmds) diff --git a/plugins/gpulib/gpu.c b/plugins/gpulib/gpu.c index 99b8edac..df0099c6 100644 --- a/plugins/gpulib/gpu.c +++ b/plugins/gpulib/gpu.c @@ -94,7 +94,7 @@ static noinline void decide_frameskip(void) gpu.frameskip.active = 0; } -static noinline void decide_frameskip_allow(uint32_t cmd_e3) +static noinline int decide_frameskip_allow(uint32_t cmd_e3) { // no frameskip if it decides to draw to display area, // but not for interlace since it'll most likely always do that @@ -103,6 +103,7 @@ static noinline void decide_frameskip_allow(uint32_t cmd_e3) gpu.frameskip.allow = gpu.status.interlace || (uint32_t)(x - gpu.screen.x) >= (uint32_t)gpu.screen.w || (uint32_t)(y - gpu.screen.y) >= (uint32_t)gpu.screen.h; + return gpu.frameskip.allow; } static noinline void get_gpu_info(uint32_t data) @@ -327,70 +328,84 @@ static void finish_vram_transfer(int is_read) gpu.dma_start.w, gpu.dma_start.h); } +static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd) +{ + int cmd = 0, pos = 0, len, dummy; + int skip = 1; + + while (pos < count && skip) { + uint32_t *list = data + pos; + cmd = list[0] >> 24; + len = 1 + cmd_lengths[cmd]; + + if (cmd == 0x02) { + if ((list[2] & 0x3ff) > gpu.screen.w || ((list[2] >> 16) & 0x1ff) > gpu.screen.h) + // clearing something large, don't skip + do_cmd_list(data + pos, 3, &dummy); + } + else if ((cmd & 0xf4) == 0x24) { + // flat textured prim + gpu.ex_regs[1] &= ~0x1ff; + gpu.ex_regs[1] |= list[4] & 0x1ff; + } + else if ((cmd & 0xf4) == 0x34) { + // shaded textured prim + gpu.ex_regs[1] &= ~0x1ff; + gpu.ex_regs[1] |= list[5] & 0x1ff; + } + else if (cmd == 0xe3) + skip = decide_frameskip_allow(list[0]); + + if ((cmd & 0xf8) == 0xe0) + gpu.ex_regs[cmd & 7] = list[0]; + + if (pos + len > count) { + cmd = -1; + break; // incomplete cmd + } + if (cmd == 0xa0 || cmd == 0xc0) + break; // image i/o + pos += len; + } + + renderer_sync_ecmds(gpu.ex_regs); + *last_cmd = cmd; + return pos; +} + static noinline int do_cmd_buffer(uint32_t *data, int count) { - int len, cmd, start, pos; + int cmd, pos; + uint32_t old_e3 = gpu.ex_regs[3]; int vram_dirty = 0; // process buffer - for (start = pos = 0; pos < count; ) + for (pos = 0; pos < count; ) { - cmd = -1; - len = 0; - - if (gpu.dma.h) { + if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify + vram_dirty = 1; pos += do_vram_io(data + pos, count - pos, 0); if (pos == count) break; - start = pos; - } - - // do look-ahead pass to detect SR changes and VRAM i/o - while (pos < count) { - uint32_t *list = data + pos; - cmd = list[0] >> 24; - len = 1 + cmd_lengths[cmd]; - - //printf(" %3d: %02x %d\n", pos, cmd, len); - if ((cmd & 0xf4) == 0x24) { - // flat textured prim - gpu.ex_regs[1] &= ~0x1ff; - gpu.ex_regs[1] |= list[4] & 0x1ff; - } - else if ((cmd & 0xf4) == 0x34) { - // shaded textured prim - gpu.ex_regs[1] &= ~0x1ff; - gpu.ex_regs[1] |= list[5] & 0x1ff; - } - else if (cmd == 0xe3) - decide_frameskip_allow(list[0]); - - if (2 <= cmd && cmd < 0xc0) - vram_dirty = 1; - else if ((cmd & 0xf8) == 0xe0) - gpu.ex_regs[cmd & 7] = list[0]; - - if (pos + len > count) { - cmd = -1; - break; // incomplete cmd - } - if (cmd == 0xa0 || cmd == 0xc0) - break; // image i/o - pos += len; - } - - if (pos - start > 0) { - if (!gpu.frameskip.active || !gpu.frameskip.allow) - do_cmd_list(data + start, pos - start); - start = pos; } + cmd = data[pos] >> 24; if (cmd == 0xa0 || cmd == 0xc0) { // consume vram write/read cmd start_vram_transfer(data[pos + 1], data[pos + 2], cmd == 0xc0); - pos += len; + pos += 3; + continue; } - else if (cmd == -1) + + if (gpu.frameskip.active && gpu.frameskip.allow) + pos += do_cmd_list_skip(data + pos, count - pos, &cmd); + else { + pos += do_cmd_list(data + pos, count - pos, &cmd); + vram_dirty = 1; + } + + if (cmd == -1) + // incomplete cmd break; } @@ -398,10 +413,11 @@ static noinline int do_cmd_buffer(uint32_t *data, int count) gpu.status.reg |= gpu.ex_regs[1] & 0x7ff; gpu.status.reg |= (gpu.ex_regs[6] & 3) << 11; - if (gpu.frameskip.active) - renderer_sync_ecmds(gpu.ex_regs); gpu.state.fb_dirty |= vram_dirty; + if (old_e3 != gpu.ex_regs[3]) + decide_frameskip_allow(gpu.ex_regs[3]); + return count - pos; } diff --git a/plugins/gpulib/gpu.h b/plugins/gpulib/gpu.h index d9ad416e..11bfe467 100644 --- a/plugins/gpulib/gpu.h +++ b/plugins/gpulib/gpu.h @@ -91,7 +91,7 @@ extern struct psx_gpu gpu; extern const unsigned char cmd_lengths[256]; -void do_cmd_list(uint32_t *list, int count); +int do_cmd_list(uint32_t *list, int count, int *last_cmd); struct rearmed_cbs; -- 2.39.2