From c296224f47ceebab4d6fbd071959bff294e80293 Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 30 Nov 2023 01:58:19 +0200 Subject: [PATCH] gpu: improve timings of clipped sprites Judge Dredd has tons of them. Too lazy to do this for peops so keep it neglected for now. --- plugins/gpu_neon/psx_gpu/psx_gpu.c | 26 +++--- plugins/gpu_neon/psx_gpu/psx_gpu.h | 2 +- plugins/gpu_neon/psx_gpu/psx_gpu_parse.c | 100 ++++++++++++++--------- plugins/gpu_unai/gpu_raster_sprite.h | 14 +++- plugins/gpu_unai/gpulib_if.cpp | 43 ++++++---- plugins/gpulib/gpu.c | 9 +- 6 files changed, 118 insertions(+), 76 deletions(-) diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.c b/plugins/gpu_neon/psx_gpu/psx_gpu.c index 62080f3f..b671a757 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.c @@ -4194,10 +4194,10 @@ render_block_handler_struct render_sprite_block_handlers[] = void render_sprite(psx_gpu_struct *psx_gpu, s32 x, s32 y, u32 u, u32 v, - s32 width, s32 height, u32 flags, u32 color) + s32 *width, s32 *height, u32 flags, u32 color) { - s32 x_right = x + width - 1; - s32 y_bottom = y + height - 1; + s32 x_right = x + *width - 1; + s32 y_bottom = y + *height - 1; #ifdef PROFILE sprites++; @@ -4206,6 +4206,7 @@ void render_sprite(psx_gpu_struct *psx_gpu, s32 x, s32 y, u32 u, u32 v, if(invalidate_texture_cache_region_viewport(psx_gpu, x, y, x_right, y_bottom) == 0) { + *width = *height = 0; return; } @@ -4214,7 +4215,7 @@ void render_sprite(psx_gpu_struct *psx_gpu, s32 x, s32 y, u32 u, u32 v, u32 clip = psx_gpu->viewport_start_x - x; x += clip; u += clip; - width -= clip; + *width -= clip; } if(y < psx_gpu->viewport_start_y) @@ -4222,21 +4223,24 @@ void render_sprite(psx_gpu_struct *psx_gpu, s32 x, s32 y, u32 u, u32 v, s32 clip = psx_gpu->viewport_start_y - y; y += clip; v += clip; - height -= clip; + *height -= clip; } if(x_right > psx_gpu->viewport_end_x) - width -= x_right - psx_gpu->viewport_end_x; + *width -= x_right - psx_gpu->viewport_end_x; if(y_bottom > psx_gpu->viewport_end_y) - height -= y_bottom - psx_gpu->viewport_end_y; + *height -= y_bottom - psx_gpu->viewport_end_y; - if((width <= 0) || (height <= 0)) + if((*width <= 0) || (*height <= 0)) + { + *width = *height = 0; return; + } #ifdef PROFILE - span_pixels += width * height; - spans += height; + span_pixels += *width * *height; + spans += *height; #endif u32 render_state = flags & @@ -4273,7 +4277,7 @@ void render_sprite(psx_gpu_struct *psx_gpu, s32 x, s32 y, u32 u, u32 v, psx_gpu->render_block_handler = render_block_handler; ((setup_sprite_function_type *)render_block_handler->setup_blocks) - (psx_gpu, x, y, u, v, width, height, color); + (psx_gpu, x, y, u, v, *width, *height, color); } #define draw_pixel_line_mask_evaluate_yes() \ diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.h b/plugins/gpu_neon/psx_gpu/psx_gpu.h index a2c19216..764c2e70 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.h +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.h @@ -245,7 +245,7 @@ void render_block_move(psx_gpu_struct *psx_gpu, u32 source_x, u32 source_y, void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, u32 flags); void render_sprite(psx_gpu_struct *psx_gpu, s32 x, s32 y, u32 u, u32 v, - s32 width, s32 height, u32 flags, u32 color); + s32 *width, s32 *height, u32 flags, u32 color); void render_line(psx_gpu_struct *gpu, vertex_struct *vertexes, u32 flags, u32 color, int double_resolution); diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c index b0254aff..af26fa37 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c @@ -522,10 +522,11 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, { u32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x); u32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); - u32 width = list_s16[4] & 0x3FF; - u32 height = list_s16[5] & 0x1FF; + s32 width = list_s16[4] & 0x3FF; + s32 height = list_s16[5] & 0x1FF; - render_sprite(psx_gpu, x, y, 0, 0, width, height, current_command, list[0]); + render_sprite(psx_gpu, x, y, 0, 0, &width, &height, + current_command, list[0]); cpu_cycles += gput_sprite(width, height); break; } @@ -535,13 +536,13 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x); u32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); u32 uv = list_s16[4]; - u32 width = list_s16[6] & 0x3FF; - u32 height = list_s16[7] & 0x1FF; + s32 width = list_s16[6] & 0x3FF; + s32 height = list_s16[7] & 0x1FF; set_clut(psx_gpu, list_s16[5]); - render_sprite(psx_gpu, x, y, uv & 0xFF, (uv >> 8) & 0xFF, width, height, - current_command, list[0]); + render_sprite(psx_gpu, x, y, uv & 0xFF, (uv >> 8) & 0xFF, + &width, &height, current_command, list[0]); cpu_cycles += gput_sprite(width, height); break; } @@ -550,8 +551,10 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, { s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x); s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); + s32 width = 1, height = 1; - render_sprite(psx_gpu, x, y, 0, 0, 1, 1, current_command, list[0]); + render_sprite(psx_gpu, x, y, 0, 0, &width, &height, + current_command, list[0]); cpu_cycles += gput_sprite(1, 1); break; } @@ -560,9 +563,11 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, { s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x); s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); + s32 width = 8, height = 8; - render_sprite(psx_gpu, x, y, 0, 0, 8, 8, current_command, list[0]); - cpu_cycles += gput_sprite(8, 8); + render_sprite(psx_gpu, x, y, 0, 0, &width, &height, + current_command, list[0]); + cpu_cycles += gput_sprite(width, height); break; } @@ -571,12 +576,13 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x); s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); u32 uv = list_s16[4]; + s32 width = 8, height = 8; set_clut(psx_gpu, list_s16[5]); - render_sprite(psx_gpu, x, y, uv & 0xFF, (uv >> 8) & 0xFF, 8, 8, - current_command, list[0]); - cpu_cycles += gput_sprite(8, 8); + render_sprite(psx_gpu, x, y, uv & 0xFF, (uv >> 8) & 0xFF, + &width, &height, current_command, list[0]); + cpu_cycles += gput_sprite(width, height); break; } @@ -584,9 +590,11 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, { s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x); s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); + s32 width = 16, height = 16; - render_sprite(psx_gpu, x, y, 0, 0, 16, 16, current_command, list[0]); - cpu_cycles += gput_sprite(16, 16); + render_sprite(psx_gpu, x, y, 0, 0, &width, &height, + current_command, list[0]); + cpu_cycles += gput_sprite(width, height); break; } @@ -595,16 +603,18 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x); s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); u32 uv = list_s16[4]; + s32 width = 16, height = 16; set_clut(psx_gpu, list_s16[5]); - render_sprite(psx_gpu, x, y, uv & 0xFF, (uv >> 8) & 0xFF, 16, 16, - current_command, list[0]); - cpu_cycles += gput_sprite(16, 16); + render_sprite(psx_gpu, x, y, uv & 0xFF, (uv >> 8) & 0xFF, + &width, &height, current_command, list[0]); + cpu_cycles += gput_sprite(width, height); break; } #ifdef PCSX + case 0x1F: // irq? case 0x80 ... 0x9F: // vid -> vid case 0xA0 ... 0xBF: // sys -> vid case 0xC0 ... 0xDF: // vid -> sys @@ -1498,10 +1508,11 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, { u32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x); u32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); - u32 width = list_s16[4] & 0x3FF; - u32 height = list_s16[5] & 0x1FF; + s32 width = list_s16[4] & 0x3FF; + s32 height = list_s16[5] & 0x1FF; - render_sprite(psx_gpu, x, y, 0, 0, width, height, current_command, list[0]); + render_sprite(psx_gpu, x, y, 0, 0, &width, &height, + current_command, list[0]); if (check_enhanced_range(psx_gpu, x, x + width)) do_sprite_enhanced(psx_gpu, x, y, 0, 0, width, height, list[0]); @@ -1515,13 +1526,13 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); u8 u = list_s16[4]; u8 v = list_s16[4] >> 8; - u32 width = list_s16[6] & 0x3FF; - u32 height = list_s16[7] & 0x1FF; + s32 width = list_s16[6] & 0x3FF; + s32 height = list_s16[7] & 0x1FF; set_clut(psx_gpu, list_s16[5]); - render_sprite(psx_gpu, x, y, u, v, width, height, - current_command, list[0]); + render_sprite(psx_gpu, x, y, u, v, + &width, &height, current_command, list[0]); if (check_enhanced_range(psx_gpu, x, x + width)) do_sprite_enhanced(psx_gpu, x, y, u, v, width, height, list[0]); @@ -1533,11 +1544,13 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, { s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x); s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); + s32 width = 1, height = 1; - render_sprite(psx_gpu, x, y, 0, 0, 1, 1, current_command, list[0]); + render_sprite(psx_gpu, x, y, 0, 0, &width, &height, + current_command, list[0]); if (check_enhanced_range(psx_gpu, x, x + 1)) - do_sprite_enhanced(psx_gpu, x, y, 0, 0, 1, 1, list[0]); + do_sprite_enhanced(psx_gpu, x, y, 0, 0, width, height, list[0]); cpu_cycles += gput_sprite(1, 1); break; } @@ -1546,12 +1559,14 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, { s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x); s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); + s32 width = 8, height = 8; - render_sprite(psx_gpu, x, y, 0, 0, 8, 8, current_command, list[0]); + render_sprite(psx_gpu, x, y, 0, 0, &width, &height, + current_command, list[0]); if (check_enhanced_range(psx_gpu, x, x + 8)) - do_sprite_enhanced(psx_gpu, x, y, 0, 0, 8, 8, list[0]); - cpu_cycles += gput_sprite(8, 8); + do_sprite_enhanced(psx_gpu, x, y, 0, 0, width, height, list[0]); + cpu_cycles += gput_sprite(width, height); break; } @@ -1561,15 +1576,16 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); u8 u = list_s16[4]; u8 v = list_s16[4] >> 8; + s32 width = 8, height = 8; set_clut(psx_gpu, list_s16[5]); - render_sprite(psx_gpu, x, y, u, v, 8, 8, - current_command, list[0]); + render_sprite(psx_gpu, x, y, u, v, + &width, &height, current_command, list[0]); if (check_enhanced_range(psx_gpu, x, x + 8)) - do_sprite_enhanced(psx_gpu, x, y, u, v, 8, 8, list[0]); - cpu_cycles += gput_sprite(8, 8); + do_sprite_enhanced(psx_gpu, x, y, u, v, width, height, list[0]); + cpu_cycles += gput_sprite(width, height); break; } @@ -1577,12 +1593,14 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, { s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x); s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); + s32 width = 16, height = 16; - render_sprite(psx_gpu, x, y, 0, 0, 16, 16, current_command, list[0]); + render_sprite(psx_gpu, x, y, 0, 0, &width, &height, + current_command, list[0]); if (check_enhanced_range(psx_gpu, x, x + 16)) - do_sprite_enhanced(psx_gpu, x, y, 0, 0, 16, 16, list[0]); - cpu_cycles += gput_sprite(16, 16); + do_sprite_enhanced(psx_gpu, x, y, 0, 0, width, height, list[0]); + cpu_cycles += gput_sprite(width, height); break; } @@ -1592,14 +1610,16 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); u8 u = list_s16[4]; u8 v = list_s16[4] >> 8; + s32 width = 16, height = 16; set_clut(psx_gpu, list_s16[5]); - render_sprite(psx_gpu, x, y, u, v, 16, 16, current_command, list[0]); + render_sprite(psx_gpu, x, y, u, v, + &width, &height, current_command, list[0]); if (check_enhanced_range(psx_gpu, x, x + 16)) - do_sprite_enhanced(psx_gpu, x, y, u, v, 16, 16, list[0]); - cpu_cycles += gput_sprite(16, 16); + do_sprite_enhanced(psx_gpu, x, y, u, v, width, height, list[0]); + cpu_cycles += gput_sprite(width, height); break; } diff --git a/plugins/gpu_unai/gpu_raster_sprite.h b/plugins/gpu_unai/gpu_raster_sprite.h index ea4e82f2..6909f4f8 100644 --- a/plugins/gpu_unai/gpu_raster_sprite.h +++ b/plugins/gpu_unai/gpu_raster_sprite.h @@ -24,7 +24,7 @@ /////////////////////////////////////////////////////////////////////////////// // GPU internal sprite drawing functions -void gpuDrawS(PtrUnion packet, const PS gpuSpriteSpanDriver) +void gpuDrawS(PtrUnion packet, const PS gpuSpriteSpanDriver, s32 *w_out, s32 *h_out) { s32 x0, x1, y0, y1; u32 u0, v0; @@ -58,6 +58,8 @@ void gpuDrawS(PtrUnion packet, const PS gpuSpriteSpanDriver) if (x1 > xmax) x1 = xmax; x1 -= x0; if (x1 <= 0) return; + *w_out = x1; + *h_out = y1 - y0; gpu_unai.r5 = packet.U1[0] >> 3; gpu_unai.g5 = packet.U1[1] >> 3; @@ -87,7 +89,7 @@ void gpuDrawS(PtrUnion packet, const PS gpuSpriteSpanDriver) #include "gpu_arm.h" /* Notaz 4bit sprites optimization */ -void gpuDrawS16(PtrUnion packet) +void gpuDrawS16(PtrUnion packet, s32 *w_out, s32 *h_out) { s32 x0, y0; s32 u0, v0; @@ -110,7 +112,7 @@ void gpuDrawS16(PtrUnion packet) ((u0 | v0) & 15) || !(gpu_unai.TextureWindow[2] & gpu_unai.TextureWindow[3] & 8)) { // send corner cases to general handler packet.U4[3] = u32_to_le32(0x00100010); - gpuDrawS(packet, gpuSpriteSpanFn<0x20>); + gpuDrawS(packet, gpuSpriteSpanFn<0x20>, w_out, h_out); return; } @@ -123,12 +125,14 @@ void gpuDrawS16(PtrUnion packet) } else if (ymax - y0 < 16) h = ymax - y0; + *w_out = 16; + *h_out = h; draw_spr16_full(&gpu_unai.vram[FRAME_OFFSET(x0, y0)], &gpu_unai.TBA[FRAME_OFFSET(u0/4, v0)], gpu_unai.CBA, h); } #endif // __arm__ -void gpuDrawT(PtrUnion packet, const PT gpuTileSpanDriver) +void gpuDrawT(PtrUnion packet, const PT gpuTileSpanDriver, s32 *w_out, s32 *h_out) { s32 x0, x1, y0, y1; @@ -153,6 +157,8 @@ void gpuDrawT(PtrUnion packet, const PT gpuTileSpanDriver) if (x1 > xmax) x1 = xmax; x1 -= x0; if (x1 <= 0) return; + *w_out = x1; + *h_out = y1 - y0; const u16 Data = GPU_RGB16(le32_to_u32(packet.U4[0])); le16_t *Pixel = &gpu_unai.vram[FRAME_OFFSET(x0, y0)]; diff --git a/plugins/gpu_unai/gpulib_if.cpp b/plugins/gpu_unai/gpulib_if.cpp index 191108b8..45c73a73 100644 --- a/plugins/gpu_unai/gpulib_if.cpp +++ b/plugins/gpu_unai/gpulib_if.cpp @@ -666,9 +666,9 @@ int do_cmd_list(u32 *_list, int list_len, int *cpu_cycles_out, int *last_cmd) case 0x62: case 0x63: { // Monochrome rectangle (variable size) PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1]; - gpuDrawT(packet, driver); - cpu_cycles += gput_sprite(le16_to_u16(packet.U2[4]) & 0x3ff, - le16_to_u16(packet.U2[5]) & 0x1ff); + s32 w = 0, h = 0; + gpuDrawT(packet, driver, &w, &h); + cpu_cycles += gput_sprite(w, h); } break; case 0x64: @@ -677,6 +677,7 @@ int do_cmd_list(u32 *_list, int list_len, int *cpu_cycles_out, int *last_cmd) case 0x67: { // Textured rectangle (variable size) gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16); u32 driver_idx = Blending_Mode | gpu_unai.TEXT_MODE | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>1); + s32 w = 0, h = 0; //senquack - Only color 808080h-878787h allows skipping lighting calculation: // This fixes Silent Hill running animation on loading screens: @@ -695,9 +696,8 @@ int do_cmd_list(u32 *_list, int list_len, int *cpu_cycles_out, int *last_cmd) if ((le32_raw(gpu_unai.PacketBuffer.U4[0]) & HTOLE32(0xF8F8F8)) != HTOLE32(0x808080)) driver_idx |= Lighting; PS driver = gpuSpriteSpanDrivers[driver_idx]; - gpuDrawS(packet, driver); - cpu_cycles += gput_sprite(le16_to_u16(packet.U2[6]) & 0x3ff, - le16_to_u16(packet.U2[7]) & 0x1ff); + gpuDrawS(packet, driver, &w, &h); + cpu_cycles += gput_sprite(w, h); } break; case 0x68: @@ -706,7 +706,8 @@ int do_cmd_list(u32 *_list, int list_len, int *cpu_cycles_out, int *last_cmd) case 0x6B: { // Monochrome rectangle (1x1 dot) gpu_unai.PacketBuffer.U4[2] = u32_to_le32(0x00010001); PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1]; - gpuDrawT(packet, driver); + s32 w = 0, h = 0; + gpuDrawT(packet, driver, &w, &h); cpu_cycles += gput_sprite(1, 1); } break; @@ -716,8 +717,9 @@ int do_cmd_list(u32 *_list, int list_len, int *cpu_cycles_out, int *last_cmd) case 0x73: { // Monochrome rectangle (8x8) gpu_unai.PacketBuffer.U4[2] = u32_to_le32(0x00080008); PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1]; - gpuDrawT(packet, driver); - cpu_cycles += gput_sprite(8, 8); + s32 w = 0, h = 0; + gpuDrawT(packet, driver, &w, &h); + cpu_cycles += gput_sprite(w, h); } break; case 0x74: @@ -727,6 +729,7 @@ int do_cmd_list(u32 *_list, int list_len, int *cpu_cycles_out, int *last_cmd) gpu_unai.PacketBuffer.U4[3] = u32_to_le32(0x00080008); gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16); u32 driver_idx = Blending_Mode | gpu_unai.TEXT_MODE | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>1); + s32 w = 0, h = 0; //senquack - Only color 808080h-878787h allows skipping lighting calculation: //if ((gpu_unai.PacketBuffer.U1[0]>0x5F) && (gpu_unai.PacketBuffer.U1[1]>0x5F) && (gpu_unai.PacketBuffer.U1[2]>0x5F)) @@ -734,8 +737,8 @@ int do_cmd_list(u32 *_list, int list_len, int *cpu_cycles_out, int *last_cmd) if ((le32_raw(gpu_unai.PacketBuffer.U4[0]) & HTOLE32(0xF8F8F8)) != HTOLE32(0x808080)) driver_idx |= Lighting; PS driver = gpuSpriteSpanDrivers[driver_idx]; - gpuDrawS(packet, driver); - cpu_cycles += gput_sprite(8, 8); + gpuDrawS(packet, driver, &w, &h); + cpu_cycles += gput_sprite(w, h); } break; case 0x78: @@ -744,8 +747,9 @@ int do_cmd_list(u32 *_list, int list_len, int *cpu_cycles_out, int *last_cmd) case 0x7B: { // Monochrome rectangle (16x16) gpu_unai.PacketBuffer.U4[2] = u32_to_le32(0x00100010); PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1]; - gpuDrawT(packet, driver); - cpu_cycles += gput_sprite(16, 16); + s32 w = 0, h = 0; + gpuDrawT(packet, driver, &w, &h); + cpu_cycles += gput_sprite(w, h); } break; case 0x7C: @@ -753,9 +757,10 @@ int do_cmd_list(u32 *_list, int list_len, int *cpu_cycles_out, int *last_cmd) #ifdef __arm__ if ((gpu_unai.GPU_GP1 & 0x180) == 0 && (gpu_unai.Masking | gpu_unai.PixelMSB) == 0) { - gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16); - gpuDrawS16(packet); - cpu_cycles += gput_sprite(16, 16); + s32 w = 0, h = 0; + gpuSetCLUT(le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16); + gpuDrawS16(packet, &w, &h); + cpu_cycles += gput_sprite(w, h); break; } // fallthrough @@ -765,14 +770,15 @@ int do_cmd_list(u32 *_list, int list_len, int *cpu_cycles_out, int *last_cmd) gpu_unai.PacketBuffer.U4[3] = u32_to_le32(0x00100010); gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16); u32 driver_idx = Blending_Mode | gpu_unai.TEXT_MODE | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>1); + s32 w = 0, h = 0; //senquack - Only color 808080h-878787h allows skipping lighting calculation: //if ((gpu_unai.PacketBuffer.U1[0]>0x5F) && (gpu_unai.PacketBuffer.U1[1]>0x5F) && (gpu_unai.PacketBuffer.U1[2]>0x5F)) // Strip lower 3 bits of each color and determine if lighting should be used: if ((le32_raw(gpu_unai.PacketBuffer.U4[0]) & HTOLE32(0xF8F8F8)) != HTOLE32(0x808080)) driver_idx |= Lighting; PS driver = gpuSpriteSpanDrivers[driver_idx]; - gpuDrawS(packet, driver); - cpu_cycles += gput_sprite(16, 16); + gpuDrawS(packet, driver, &w, &h); + cpu_cycles += gput_sprite(w, h); } break; #ifdef TEST @@ -792,6 +798,7 @@ int do_cmd_list(u32 *_list, int list_len, int *cpu_cycles_out, int *last_cmd) case 0xC0: break; #else + case 0x1F: // irq? case 0x80 ... 0x9F: // vid -> vid case 0xA0 ... 0xBF: // sys -> vid case 0xC0 ... 0xDF: // vid -> sys diff --git a/plugins/gpulib/gpu.c b/plugins/gpulib/gpu.c index 9785dbb8..e3943a25 100644 --- a/plugins/gpulib/gpu.c +++ b/plugins/gpulib/gpu.c @@ -669,6 +669,11 @@ static noinline int do_cmd_buffer(uint32_t *data, int count, int *cpu_cycles) pos += 4; continue; } + else if (cmd == 0x1f) { + log_anomaly("irq1?\n"); + pos++; + continue; + } // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0)) @@ -754,8 +759,8 @@ long GPUdmaChain(uint32_t *rambase, uint32_t start_addr, uint32_t *progress_addr if (len > 0) cpu_cycles += 5 + len; - log_io(".chain %08lx #%d+%d\n", - (long)(list - rambase) * 4, len, gpu.cmd_len); + log_io(".chain %08lx #%d+%d %u\n", + (long)(list - rambase) * 4, len, gpu.cmd_len, cpu_cycles); if (unlikely(gpu.cmd_len > 0)) { if (gpu.cmd_len + len > ARRAY_SIZE(gpu.cmd_buffer)) { log_anomaly("cmd_buffer overflow, likely garbage commands\n"); -- 2.39.5