From: notaz Date: Wed, 20 Nov 2024 00:41:04 +0000 (+0200) Subject: gpu_unai: avoid unneeded gouraud calculations X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=6aa72faf1fb29daac5887ee74ef1e3ea46711593;p=pcsx_rearmed.git gpu_unai: avoid unneeded gouraud calculations --- diff --git a/Makefile b/Makefile index 83559310..4000d489 100644 --- a/Makefile +++ b/Makefile @@ -273,8 +273,10 @@ else CFLAGS += -DGPU_UNAI_NO_OLD endif plugins/gpu_unai/gpulib_if.o: CFLAGS += -DREARMED -DUSE_GPULIB=1 +ifneq ($(DEBUG), 1) plugins/gpu_unai/gpulib_if.o \ plugins/gpu_unai/old/if.o: CFLAGS += -O3 +endif CC_LINK = $(CXX) endif diff --git a/plugins/gpu_unai/gpu_raster_polygon.h b/plugins/gpu_unai/gpu_raster_polygon.h index 1b9e08dc..ebd52eb6 100644 --- a/plugins/gpu_unai/gpu_raster_polygon.h +++ b/plugins/gpu_unai/gpu_raster_polygon.h @@ -223,13 +223,14 @@ static bool polyUseTriangle(const PolyVertex *vbuf, int tri_num, const PolyVerte /*---------------------------------------------------------------------- gpuDrawPolyF - Flat-shaded, untextured poly ----------------------------------------------------------------------*/ -void gpuDrawPolyF(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad) +void gpuDrawPolyF(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad, + PolyType ptype = POLYTYPE_F) { // Set up bgr555 color to be used across calls in inner driver gpu_unai.PixelData = GPU_RGB16(le32_to_u32(packet.U4[0])); PolyVertex vbuf[4]; - polyInitVertexBuffer(vbuf, packet, POLYTYPE_F, is_quad); + polyInitVertexBuffer(vbuf, packet, ptype, is_quad); int total_passes = is_quad ? 2 : 1; int cur_pass = 0; @@ -374,7 +375,8 @@ void gpuDrawPolyF(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad /*---------------------------------------------------------------------- gpuDrawPolyFT - Flat-shaded, textured poly ----------------------------------------------------------------------*/ -void gpuDrawPolyFT(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad) +void gpuDrawPolyFT(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad, + PolyType ptype = POLYTYPE_FT) { // r8/g8/b8 used if texture-blending & dithering is applied (24-bit light) gpu_unai.r8 = packet.U1[0]; @@ -386,7 +388,7 @@ void gpuDrawPolyFT(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_qua gpu_unai.b5 = packet.U1[2] >> 3; PolyVertex vbuf[4]; - polyInitVertexBuffer(vbuf, packet, POLYTYPE_FT, is_quad); + polyInitVertexBuffer(vbuf, packet, ptype, is_quad); int total_passes = is_quad ? 2 : 1; int cur_pass = 0; diff --git a/plugins/gpu_unai/gpulib_if.cpp b/plugins/gpu_unai/gpulib_if.cpp index 0f124f80..733b255d 100644 --- a/plugins/gpu_unai/gpulib_if.cpp +++ b/plugins/gpu_unai/gpulib_if.cpp @@ -376,6 +376,12 @@ static void gpuGP0Cmd_0xEx(gpu_unai_t &gpu_unai, u32 cmd_word) #include "../gpulib/gpu_timing.h" +// Strip lower 3 bits of each color and determine if lighting should be used: +static inline bool need_lighting(u32 rgb_raw) +{ + return (rgb_raw & HTOLE32(0xF8F8F8)) != HTOLE32(0x808080); +} + static inline void textured_sprite(int &cpu_cycles_sum, int &cpu_cycles) { u32 PRIM = le32_to_u32(gpu_unai.PacketBuffer.U4[0]) >> 24; @@ -395,9 +401,7 @@ static inline void textured_sprite(int &cpu_cycles_sum, int &cpu_cycles) // NOTE: I've changed all textured sprite draw commands here and // elsewhere to use proper behavior, but left poly commands // alone, I don't want to slow rendering down too much. (TODO) - //if ((gpu_unai.PacketBuffer.U1[0]>0x5F) && (gpu_unai.PacketBuffer.U1[1]>0x5F) && (gpu_unai.PacketBuffer.U1[2]>0x5F)) - // Strip lower 3 bits of each color and determine if lighting should be used: - if ((le32_raw(gpu_unai.PacketBuffer.U4[0]) & HTOLE32(0xF8F8F8)) != HTOLE32(0x808080)) + if (need_lighting(le32_raw(gpu_unai.PacketBuffer.U4[0]))) driver_idx |= Lighting; PS driver = gpuSpriteDrivers[driver_idx]; PtrUnion packet = { .ptr = (void*)&gpu_unai.PacketBuffer }; @@ -539,13 +543,22 @@ int do_cmd_list(u32 *list_, int list_len, // this is an untextured poly, so CF_LIGHT (texture blend) // shouldn't apply. Until the original array of template // instantiation ptrs is fixed, we're stuck with this. (TODO) + u8 gouraud = 129; + u32 xor_ = 0, rgb0 = le32_raw(gpu_unai.PacketBuffer.U4[0]); + for (i = 1; i < 3; i++) + xor_ |= rgb0 ^ le32_raw(gpu_unai.PacketBuffer.U4[i * 2]); + if ((xor_ & HTOLE32(0xf8f8f8)) == 0) + gouraud = 0; PP driver = gpuPolySpanDrivers[ //(gpu_unai.blit_mask?1024:0) | Dithering | Blending_Mode | - gpu_unai.Masking | Blending | 129 | gpu_unai.PixelMSB + gpu_unai.Masking | Blending | gouraud | gpu_unai.PixelMSB ]; - gpuDrawPolyG(packet, driver, false); + if (gouraud) + gpuDrawPolyG(packet, driver, false); + else + gpuDrawPolyF(packet, driver, false, POLYTYPE_G); gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_g()); } break; @@ -555,13 +568,28 @@ int do_cmd_list(u32 *list_, int list_len, case 0x37: { // Gouraud-shaded, textured 3-pt poly gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16); gpuSetTexture (le32_to_u32(gpu_unai.PacketBuffer.U4[5]) >> 16); + u8 lighting = Lighting; + u8 gouraud = lighting ? (1<<7) : 0; + if (lighting) { + u32 xor_ = 0, rgb0 = le32_raw(gpu_unai.PacketBuffer.U4[0]); + for (i = 1; i < 3; i++) + xor_ |= rgb0 ^ le32_raw(gpu_unai.PacketBuffer.U4[i * 3]); + if ((xor_ & HTOLE32(0xf8f8f8)) == 0) { + gouraud = 0; + if (!need_lighting(rgb0)) + lighting = 0; + } + } PP driver = gpuPolySpanDrivers[ //(gpu_unai.blit_mask?1024:0) | Dithering | Blending_Mode | gpu_unai.TEXT_MODE | - gpu_unai.Masking | Blending | ((Lighting)?129:0) | gpu_unai.PixelMSB + gpu_unai.Masking | Blending | gouraud | lighting | gpu_unai.PixelMSB ]; - gpuDrawPolyGT(packet, driver, false); + if (gouraud) + gpuDrawPolyGT(packet, driver, false); // is_quad = true + else + gpuDrawPolyFT(packet, driver, false, POLYTYPE_GT); gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_gt()); } break; @@ -570,13 +598,22 @@ int do_cmd_list(u32 *list_, int list_len, case 0x3A: case 0x3B: { // Gouraud-shaded 4-pt poly // See notes regarding '129' for 0x30..0x33 further above -senquack + u8 gouraud = 129; + u32 xor_ = 0, rgb0 = le32_raw(gpu_unai.PacketBuffer.U4[0]); + for (i = 1; i < 4; i++) + xor_ |= rgb0 ^ le32_raw(gpu_unai.PacketBuffer.U4[i * 2]); + if ((xor_ & HTOLE32(0xf8f8f8)) == 0) + gouraud = 0; PP driver = gpuPolySpanDrivers[ //(gpu_unai.blit_mask?1024:0) | Dithering | Blending_Mode | - gpu_unai.Masking | Blending | 129 | gpu_unai.PixelMSB + gpu_unai.Masking | Blending | gouraud | gpu_unai.PixelMSB ]; - gpuDrawPolyG(packet, driver, true); // is_quad = true + if (gouraud) + gpuDrawPolyG(packet, driver, true); // is_quad = true + else + gpuDrawPolyF(packet, driver, true, POLYTYPE_G); gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_g()); } break; @@ -598,13 +635,28 @@ int do_cmd_list(u32 *list_, int list_len, break; } gpuSetCLUT(le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16); + u8 lighting = Lighting; + u8 gouraud = lighting ? (1<<7) : 0; + if (lighting) { + u32 xor_ = 0, rgb0 = le32_raw(gpu_unai.PacketBuffer.U4[0]); + for (i = 1; i < 4; i++) + xor_ |= rgb0 ^ le32_raw(gpu_unai.PacketBuffer.U4[i * 3]); + if ((xor_ & HTOLE32(0xf8f8f8)) == 0) { + gouraud = 0; + if (!need_lighting(rgb0)) + lighting = 0; + } + } PP driver = gpuPolySpanDrivers[ //(gpu_unai.blit_mask?1024:0) | Dithering | Blending_Mode | gpu_unai.TEXT_MODE | - gpu_unai.Masking | Blending | ((Lighting)?129:0) | gpu_unai.PixelMSB + gpu_unai.Masking | Blending | gouraud | lighting | gpu_unai.PixelMSB ]; - gpuDrawPolyGT(packet, driver, true); // is_quad = true + if (gouraud) + gpuDrawPolyGT(packet, driver, true); // is_quad = true + else + gpuDrawPolyFT(packet, driver, true, POLYTYPE_GT); gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_gt()); } break;