From: notaz Date: Sat, 23 Nov 2024 23:32:12 +0000 (+0200) Subject: gpu_unai: more asm X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=2682f6edee3b043c511f399f627379af8b543527;p=pcsx_rearmed.git gpu_unai: more asm --- diff --git a/plugins/gpu_unai/gpu_arm.S b/plugins/gpu_unai/gpu_arm.S index 93269932..3b68acea 100644 --- a/plugins/gpu_unai/gpu_arm.S +++ b/plugins/gpu_unai/gpu_arm.S @@ -207,4 +207,76 @@ sprite_driver_8bpp_asm: .cfi_endproc +.global poly_4bpp_asm @ (void *d, const struct gpu_unai_inner_t *inn, int count) +poly_4bpp_asm: + .cfi_startproc + add r12, r1, #4 + stmfd sp!, {r4-r7,lr} + .cfi_def_cfa_offset 4*5 + .cfi_rel_offset lr, 4*4 + ldmia r12, {r3, r4, r7, r12, lr} @ clut, u, v, u_msk, v_msk + ldr r5, [r1, #0x18] @ u_inc + mov r6, r12 + ldr r12,[r1, #0x1c] @ v_inc + and r4, r4, r6 + and lr, lr, r7 @ v_msk & v + and lr, lr, #0xff<<10 + tst r12,r12 + bne poly_4bpp_asm_v + ldr r1, [r1] @ src + mov r7, r4, lsr #13 + add r1, r1, lr, lsl #1 + add r12,r1, r7, lsl #2 + pld_ r12,#2048 +0: + ldr lr, [r1, r7, lsl #2] + lsr r12,r4, #8 + and r12,r12,#0x1c + sub r12,r12,#1 + mov r12,lr, ror r12 + add r4, r4, r5 + and r12,r12,#0x1e + and r4, r4, r6 + ldrh r12,[r3, r12] + add r0, r0, #2 + mov r7, r4, lsr #13 + tst r12,r12 + strneh r12,[r0, #-2] + subs r2, r2, #1 + bgt 0b + + ldmfd sp!, {r4-r7,pc} + +poly_4bpp_asm_v: @ r3=clut, r4=u, r5=u_inc, r6=u_msk, r7=v, lr=v_masked + stmfd sp!, {r8-r9} + ldr r9, [r1, #0x14] @ v_msk + ldr r1, [r1] @ src + mov r8, r12 @ v_inc + mov r12,r4, lsr #13 + add lr, r1, lr, lsl #1 + and r9, r9, #0xff<<10 @ v_msk_final +0: + ldr lr, [lr, r12, lsl #2] + lsr r12,r4, #8 + and r12,r12,#0x1c + sub r12,r12,#1 + mov r12,lr, ror r12 + add r4, r4, r5 + and r12,r12,#0x1e + and r4, r4, r6 + ldrh r12,[r3, r12] + add r0, r0, #2 + add r7, r7, r8 + and lr, r7, r9 + tst r12,r12 + add lr, r1, lr, lsl #1 + strneh r12,[r0, #-2] + mov r12,r4, lsr #13 + subs r2, r2, #1 + bgt 0b + + ldmfd sp!, {r8-r9} + ldmfd sp!, {r4-r7,pc} + .cfi_endproc + @ vim:filetype=armasm diff --git a/plugins/gpu_unai/gpu_arm.h b/plugins/gpu_unai/gpu_arm.h index 2329c46c..287846e4 100644 --- a/plugins/gpu_unai/gpu_arm.h +++ b/plugins/gpu_unai/gpu_arm.h @@ -5,6 +5,7 @@ extern "C" { #endif +struct gpu_unai_inner_t; struct spriteDriverArg; void sprite_driver_4bpp_asm(void *pPixel, const u8 *pTxt_base, @@ -13,6 +14,8 @@ void sprite_driver_8bpp_asm(void *pPixel, const u8 *pTxt_base, u32 count, const struct spriteDriverArg *arg); void sprite_4bpp_x16_asm(void *d, const void *s, void *pal, int lines); +void poly_4bpp_asm(void *d, const struct gpu_unai_inner_t *inn, int count); + #ifdef __cplusplus } #endif diff --git a/plugins/gpu_unai/gpu_command.h b/plugins/gpu_unai/gpu_command.h index cf6b62b4..adede2b5 100644 --- a/plugins/gpu_unai/gpu_command.h +++ b/plugins/gpu_unai/gpu_command.h @@ -45,13 +45,13 @@ void gpuSetTexture(u16 tpage) gpu_unai.BLEND_MODE = ((tpage>>5) & 3) << 3; gpu_unai.TEXT_MODE = (tmode + 1) << 5; // gpu_unai.TEXT_MODE should be values 1..3, so add one - gpu_unai.TBA = &gpu_unai.vram[FRAME_OFFSET(tx, ty)]; + gpu_unai.inn.TBA = &gpu_unai.vram[FRAME_OFFSET(tx, ty)]; } /////////////////////////////////////////////////////////////////////////////// INLINE void gpuSetCLUT(u16 clut) { - gpu_unai.CBA = &gpu_unai.vram[(clut & 0x7FFF) << 4]; + gpu_unai.inn.CBA = &gpu_unai.vram[(clut & 0x7FFF) << 4]; } #ifdef ENABLE_GPU_NULL_SUPPORT diff --git a/plugins/gpu_unai/gpu_inner.h b/plugins/gpu_unai/gpu_inner.h index a80c3a3a..4f2b1156 100644 --- a/plugins/gpu_unai/gpu_inner.h +++ b/plugins/gpu_unai/gpu_inner.h @@ -385,9 +385,9 @@ static void gpuSpriteDriverFn(le16_t *pPixel, u32 count, const u8 *pTxt_base, u8 r5, g5, b5; if (CF_LIGHT) { - r5 = gpu_unai.r5; - g5 = gpu_unai.g5; - b5 = gpu_unai.b5; + r5 = gpu_unai.inn.r5; + g5 = gpu_unai.inn.g5; + b5 = gpu_unai.inn.b5; } if (CF_TEXTMODE==3) { @@ -531,6 +531,8 @@ const PS gpuSpriteDrivers[256] = { #undef TI #undef TN #undef TIBLOCK +#undef TA4 +#undef TA8 /////////////////////////////////////////////////////////////////////////////// // GPU Polygon innerloops generator @@ -569,7 +571,7 @@ static void gpuPolySpanFn(const gpu_unai_t &gpu_unai, le16_t *pDst, u32 count) if (!CF_GOURAUD) { // UNTEXTURED, NO GOURAUD - const u16 pix15 = gpu_unai.PixelData; + const u16 pix15 = gpu_unai.inn.PixelData; do { uint_fast16_t uSrc, uDst; @@ -596,8 +598,8 @@ endpolynotextnogou: else { // UNTEXTURED, GOURAUD - gcol_t l_gCol = gpu_unai.gCol; - gcol_t l_gInc = gpu_unai.gInc; + gcol_t l_gCol = gpu_unai.inn.gCol; + gcol_t l_gInc = gpu_unai.inn.gInc; do { uint_fast16_t uDst, uSrc; @@ -643,12 +645,15 @@ endpolynotextgou: //senquack - note: original UNAI code had gpu_unai.{u4/v4} packed into // one 32-bit unsigned int, but this proved to lose too much accuracy // (pixel drouputs noticeable in NFS3 sky), so now are separate vars. - u32 l_u_msk = gpu_unai.u_msk; u32 l_v_msk = gpu_unai.v_msk; - u32 l_u = gpu_unai.u & l_u_msk; u32 l_v = gpu_unai.v & l_v_msk; - s32 l_u_inc = gpu_unai.u_inc; s32 l_v_inc = gpu_unai.v_inc; + u32 l_u_msk = gpu_unai.inn.u_msk; u32 l_v_msk = gpu_unai.inn.v_msk; + u32 l_u = gpu_unai.inn.u & l_u_msk; u32 l_v = gpu_unai.inn.v & l_v_msk; + s32 l_u_inc = gpu_unai.inn.u_inc; s32 l_v_inc = gpu_unai.inn.v_inc; + l_v <<= 1; + l_v_inc <<= 1; + l_v_msk = (l_v_msk & (0xff<<10)) << 1; - const le16_t* TBA_ = gpu_unai.TBA; - const le16_t* CBA_; if (CF_TEXTMODE!=3) CBA_ = gpu_unai.CBA; + const le16_t* TBA_ = gpu_unai.inn.TBA; + const le16_t* CBA_; if (CF_TEXTMODE!=3) CBA_ = gpu_unai.inn.CBA; u8 r5, g5, b5; u8 r8, g8, b8; @@ -657,17 +662,17 @@ endpolynotextgou: if (CF_LIGHT) { if (CF_GOURAUD) { - l_gInc = gpu_unai.gInc; - l_gCol = gpu_unai.gCol; + l_gInc = gpu_unai.inn.gInc; + l_gCol = gpu_unai.inn.gCol; } else { if (CF_DITHER) { - r8 = gpu_unai.r8; - g8 = gpu_unai.g8; - b8 = gpu_unai.b8; + r8 = gpu_unai.inn.r8; + g8 = gpu_unai.inn.g8; + b8 = gpu_unai.inn.b8; } else { - r5 = gpu_unai.r5; - g5 = gpu_unai.g5; - b5 = gpu_unai.b5; + r5 = gpu_unai.inn.r5; + g5 = gpu_unai.inn.g5; + b5 = gpu_unai.inn.b5; } } } @@ -682,17 +687,19 @@ endpolynotextgou: // (UNAI originally used 16.16) if (CF_TEXTMODE==1) { // 4bpp (CLUT) u32 tu=(l_u>>10); - u32 tv=(l_v<<1)&(0xff<<11); + u32 tv=l_v&l_v_msk; u8 rgb=((u8*)TBA_)[tv+(tu>>1)]; uSrc=le16_to_u16(CBA_[(rgb>>((tu&1)<<2))&0xf]); if (!uSrc) goto endpolytext; } if (CF_TEXTMODE==2) { // 8bpp (CLUT) - uSrc = le16_to_u16(CBA_[(((u8*)TBA_)[(l_u>>10)+((l_v<<1)&(0xff<<11))])]); + u32 tv=l_v&l_v_msk; + uSrc = le16_to_u16(CBA_[((u8*)TBA_)[tv+(l_u>>10)]]); if (!uSrc) goto endpolytext; } if (CF_TEXTMODE==3) { // 16bpp - uSrc = le16_to_u16(TBA_[(l_u>>10)+((l_v)&(0xff<<10))]); + u32 tv=(l_v&l_v_msk)>>1; + uSrc = le16_to_u16(TBA_[tv+(l_u>>10)]); if (!uSrc) goto endpolytext; } @@ -736,7 +743,7 @@ endpolynotextgou: endpolytext: pDst++; l_u = (l_u + l_u_inc) & l_u_msk; - l_v = (l_v + l_v_inc) & l_v_msk; + l_v += l_v_inc; if (CF_LIGHT && CF_GOURAUD) l_gCol.raw += l_gInc.raw; } @@ -744,6 +751,13 @@ endpolytext: } } +#ifdef __arm__ +static void PolySpan4bppAsm(const gpu_unai_t &gpu_unai, le16_t *pDst, u32 count) +{ + poly_4bpp_asm(pDst, &gpu_unai.inn, count); +} +#endif + static void PolyNULL(const gpu_unai_t &gpu_unai, le16_t *pDst, u32 count) { #ifdef ENABLE_GPU_LOG_SUPPORT @@ -758,12 +772,17 @@ typedef void (*PP)(const gpu_unai_t &gpu_unai, le16_t *pDst, u32 count); // Template instantiation helper macros #define TI(cf) gpuPolySpanFn<(cf)> #define TN PolyNULL +#ifdef __arm__ +#define TA4(cf) PolySpan4bppAsm +#else +#define TA4(cf) TI(cf) +#endif #define TIBLOCK(ub) \ TI((ub)|0x00), TI((ub)|0x01), TI((ub)|0x02), TI((ub)|0x03), TI((ub)|0x04), TI((ub)|0x05), TI((ub)|0x06), TI((ub)|0x07), \ TN, TN, TI((ub)|0x0a), TI((ub)|0x0b), TN, TN, TI((ub)|0x0e), TI((ub)|0x0f), \ TN, TN, TI((ub)|0x12), TI((ub)|0x13), TN, TN, TI((ub)|0x16), TI((ub)|0x17), \ TN, TN, TI((ub)|0x1a), TI((ub)|0x1b), TN, TN, TI((ub)|0x1e), TI((ub)|0x1f), \ - TI((ub)|0x20), TI((ub)|0x21), TI((ub)|0x22), TI((ub)|0x23), TI((ub)|0x24), TI((ub)|0x25), TI((ub)|0x26), TI((ub)|0x27), \ + TA4((ub)|0x20),TI((ub)|0x21), TI((ub)|0x22), TI((ub)|0x23), TI((ub)|0x24), TI((ub)|0x25), TI((ub)|0x26), TI((ub)|0x27), \ TN, TN, TI((ub)|0x2a), TI((ub)|0x2b), TN, TN, TI((ub)|0x2e), TI((ub)|0x2f), \ TN, TN, TI((ub)|0x32), TI((ub)|0x33), TN, TN, TI((ub)|0x36), TI((ub)|0x37), \ TN, TN, TI((ub)|0x3a), TI((ub)|0x3b), TN, TN, TI((ub)|0x3e), TI((ub)|0x3f), \ @@ -800,5 +819,7 @@ const PP gpuPolySpanDrivers[2048] = { #undef TI #undef TN #undef TIBLOCK +#undef TA4 +#undef TA8 #endif /* __GPU_UNAI_GPU_INNER_H__ */ diff --git a/plugins/gpu_unai/gpu_raster_polygon.h b/plugins/gpu_unai/gpu_raster_polygon.h index ebd52eb6..1457afde 100644 --- a/plugins/gpu_unai/gpu_raster_polygon.h +++ b/plugins/gpu_unai/gpu_raster_polygon.h @@ -227,7 +227,7 @@ void gpuDrawPolyF(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad PolyType ptype = POLYTYPE_F) { // Set up bgr555 color to be used across calls in inner driver - gpu_unai.PixelData = GPU_RGB16(le32_to_u32(packet.U4[0])); + gpu_unai.inn.PixelData = GPU_RGB16(le32_to_u32(packet.U4[0])); PolyVertex vbuf[4]; polyInitVertexBuffer(vbuf, packet, ptype, is_quad); @@ -379,13 +379,13 @@ void gpuDrawPolyFT(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_qua PolyType ptype = POLYTYPE_FT) { // r8/g8/b8 used if texture-blending & dithering is applied (24-bit light) - gpu_unai.r8 = packet.U1[0]; - gpu_unai.g8 = packet.U1[1]; - gpu_unai.b8 = packet.U1[2]; + gpu_unai.inn.r8 = packet.U1[0]; + gpu_unai.inn.g8 = packet.U1[1]; + gpu_unai.inn.b8 = packet.U1[2]; // r5/g5/b5 used if just texture-blending is applied (15-bit light) - gpu_unai.r5 = packet.U1[0] >> 3; - gpu_unai.g5 = packet.U1[1] >> 3; - gpu_unai.b5 = packet.U1[2] >> 3; + gpu_unai.inn.r5 = packet.U1[0] >> 3; + gpu_unai.inn.g5 = packet.U1[1] >> 3; + gpu_unai.inn.b5 = packet.U1[2] >> 3; PolyVertex vbuf[4]; polyInitVertexBuffer(vbuf, packet, ptype, is_quad); @@ -462,8 +462,8 @@ void gpuDrawPolyFT(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_qua #endif #endif // Set u,v increments for inner driver - gpu_unai.u_inc = du4; - gpu_unai.v_inc = dv4; + gpu_unai.inn.u_inc = du4; + gpu_unai.inn.v_inc = dv4; //senquack - TODO: why is it always going through 2 iterations when sometimes one would suffice here? // (SAME ISSUE ELSEWHERE) @@ -695,8 +695,8 @@ void gpuDrawPolyFT(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_qua } // Set u,v coords for inner driver - gpu_unai.u = u4; - gpu_unai.v = v4; + gpu_unai.inn.u = u4; + gpu_unai.inn.v = v4; if (xb > xmax) xb = xmax; if ((xb - xa) > 0) @@ -792,7 +792,7 @@ void gpuDrawPolyG(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad #endif #endif // Setup packed Gouraud increment for inner driver - gpu_unai.gInc = gpuPackGouraudColInc(dr4, dg4, db4); + gpu_unai.inn.gInc = gpuPackGouraudColInc(dr4, dg4, db4); for (s32 loop0 = 2; loop0; loop0--) { if (loop0 == 2) { @@ -1044,7 +1044,7 @@ void gpuDrawPolyG(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad } // Setup packed Gouraud color for inner driver - gpu_unai.gCol = gpuPackGouraudCol(r4, g4, b4); + gpu_unai.inn.gCol = gpuPackGouraudCol(r4, g4, b4); if (xb > xmax) xb = xmax; if ((xb - xa) > 0) @@ -1158,9 +1158,9 @@ void gpuDrawPolyGT(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_qua #endif #endif // Set u,v increments and packed Gouraud increment for inner driver - gpu_unai.u_inc = du4; - gpu_unai.v_inc = dv4; - gpu_unai.gInc = gpuPackGouraudColInc(dr4, dg4, db4); + gpu_unai.inn.u_inc = du4; + gpu_unai.inn.v_inc = dv4; + gpu_unai.inn.gInc = gpuPackGouraudColInc(dr4, dg4, db4); for (s32 loop0 = 2; loop0; loop0--) { if (loop0 == 2) { @@ -1448,9 +1448,9 @@ void gpuDrawPolyGT(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_qua } // Set packed Gouraud color and u,v coords for inner driver - gpu_unai.u = u4; - gpu_unai.v = v4; - gpu_unai.gCol = gpuPackGouraudCol(r4, g4, b4); + gpu_unai.inn.u = u4; + gpu_unai.inn.v = v4; + gpu_unai.inn.gCol = gpuPackGouraudCol(r4, g4, b4); if (xb > xmax) xb = xmax; if ((xb - xa) > 0) diff --git a/plugins/gpu_unai/gpu_raster_sprite.h b/plugins/gpu_unai/gpu_raster_sprite.h index 2564e7f0..13d783e6 100644 --- a/plugins/gpu_unai/gpu_raster_sprite.h +++ b/plugins/gpu_unai/gpu_raster_sprite.h @@ -61,22 +61,22 @@ void gpuDrawS(PtrUnion packet, const PS gpuSpriteDriver, s32 *w_out, s32 *h_out) *w_out = x1; *h_out = y1 - y0; - gpu_unai.r5 = packet.U1[0] >> 3; - gpu_unai.g5 = packet.U1[1] >> 3; - gpu_unai.b5 = packet.U1[2] >> 3; + gpu_unai.inn.r5 = packet.U1[0] >> 3; + gpu_unai.inn.g5 = packet.U1[1] >> 3; + gpu_unai.inn.b5 = packet.U1[2] >> 3; le16_t *Pixel = &gpu_unai.vram[FRAME_OFFSET(x0, y0)]; const int li=gpu_unai.ilace_mask; //const int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.ilace_mask+1):0); //const int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.ilace_mask+1):0):1); unsigned int tmode = gpu_unai.TEXT_MODE >> 5; - u8* pTxt_base = (u8*)gpu_unai.TBA; + u8* pTxt_base = (u8*)gpu_unai.inn.TBA; // Texture is accessed byte-wise, so adjust idx if 16bpp if (tmode == 3) u0 <<= 1; spriteDriverArg arg; - arg.CBA = gpu_unai.CBA; + arg.CBA = gpu_unai.inn.CBA; arg.u0 = u0; arg.v0 = v0; arg.u0_mask = gpu_unai.TextureWindow[2]; diff --git a/plugins/gpu_unai/gpu_unai.h b/plugins/gpu_unai/gpu_unai.h index 844a8fd4..fff9126b 100644 --- a/plugins/gpu_unai/gpu_unai.h +++ b/plugins/gpu_unai/gpu_unai.h @@ -196,6 +196,34 @@ static inline s32 GPU_DIV(s32 rs, s32 rt) // 'Unsafe' version of above that doesn't check for div-by-zero #define GPU_FAST_DIV(rs, rt) ((signed)(rs) / (signed)(rt)) +// warning: gpu_arm.S asm uses this struct, update the asm if you change this +struct gpu_unai_inner_t { + le16_t* TBA; // 00 Ptr to current texture in VRAM + le16_t* CBA; // 04 Ptr to current CLUT in VRAM + + // 22.10 Fixed-pt texture coords, mask, scanline advance + // NOTE: U,V are no longer packed together into one u32, this proved to be + // too imprecise, leading to pixel dropouts. Example: NFS3's skybox. + u32 u, v; // 08 + u32 u_msk, v_msk; // 10 + s32 u_inc, v_inc; // 18 + + // Color for Gouraud-shaded prims + // Fixed-pt 8.8 rgb triplet + // Packed fixed-pt 8.3:8.3:8.2 rgb triplet + // layout: ccccccccXXXXXXXX for c in [r, g, b] + // ^ bit 16 + gcol_t gCol; + gcol_t gInc; // Increment along scanline for gCol + + // Color for flat-shaded, texture-blended prims + u8 r5, g5, b5; // 5-bit light for undithered prims + u8 r8, g8, b8; // 8-bit light for dithered prims + + // Color for flat-shaded, untextured prims + u16 PixelData; // bgr555 color for untextured flat-shaded polys +}; + struct gpu_unai_t { u32 GPU_GP1; GPUPacket PacketBuffer; @@ -260,33 +288,11 @@ struct gpu_unai_t { s16 DrawingOffset[2]; // [0] : Drawing offset X (signed) // [1] : Drawing offset Y (signed) - le16_t* TBA; // Ptr to current texture in VRAM - le16_t* CBA; // Ptr to current CLUT in VRAM - //////////////////////////////////////////////////////////////////////////// // Inner Loop parameters - // 22.10 Fixed-pt texture coords, mask, scanline advance - // NOTE: U,V are no longer packed together into one u32, this proved to be - // too imprecise, leading to pixel dropouts. Example: NFS3's skybox. - u32 u, v; - u32 u_msk, v_msk; - s32 u_inc, v_inc; - - // Color for Gouraud-shaded prims - // Fixed-pt 8.8 rgb triplet - // Packed fixed-pt 8.3:8.3:8.2 rgb triplet - // layout: ccccccccXXXXXXXX for c in [r, g, b] - // ^ bit 16 - gcol_t gCol; - gcol_t gInc; // Increment along scanline for gCol - - // Color for flat-shaded, texture-blended prims - u8 r5, g5, b5; // 5-bit light for undithered prims - u8 r8, g8, b8; // 8-bit light for dithered prims - - // Color for flat-shaded, untextured prims - u16 PixelData; // bgr555 color for untextured flat-shaded polys + __attribute__((aligned(32))) + gpu_unai_inner_t inn; // End of inner Loop parameters //////////////////////////////////////////////////////////////////////////// @@ -319,7 +325,7 @@ struct gpu_unai_t { u32 DitherMatrix[64]; // Matrix of dither coefficients }; -static gpu_unai_t gpu_unai; +static __attribute__((aligned(32))) gpu_unai_t gpu_unai; // Global config that frontend can alter.. Values are read in GPU_init(). // TODO: if frontend menu modifies a setting, add a function that can notify diff --git a/plugins/gpu_unai/gpulib_if.cpp b/plugins/gpu_unai/gpulib_if.cpp index 733b255d..53a1b1d3 100644 --- a/plugins/gpu_unai/gpulib_if.cpp +++ b/plugins/gpu_unai/gpulib_if.cpp @@ -243,8 +243,8 @@ int renderer_init(void) //senquack - new vars must be updated whenever texture window is changed: // (used for polygon-drawing in gpu_inner.h, gpu_raster_polygon.h) const u32 fb = FIXED_BITS; // # of fractional fixed-pt bits of u4/v4 - gpu_unai.u_msk = (((u32)gpu_unai.TextureWindow[2]) << fb) | ((1 << fb) - 1); - gpu_unai.v_msk = (((u32)gpu_unai.TextureWindow[3]) << fb) | ((1 << fb) - 1); + gpu_unai.inn.u_msk = (((u32)gpu_unai.TextureWindow[2]) << fb) | ((1 << fb) - 1); + gpu_unai.inn.v_msk = (((u32)gpu_unai.TextureWindow[3]) << fb) | ((1 << fb) - 1); // Configuration options gpu_unai.config = gpu_unai_config_ext; @@ -340,8 +340,8 @@ static void gpuGP0Cmd_0xEx(gpu_unai_t &gpu_unai, u32 cmd_word) // Inner loop vars must be updated whenever texture window is changed: const u32 fb = FIXED_BITS; // # of fractional fixed-pt bits of u4/v4 - gpu_unai.u_msk = (((u32)gpu_unai.TextureWindow[2]) << fb) | ((1 << fb) - 1); - gpu_unai.v_msk = (((u32)gpu_unai.TextureWindow[3]) << fb) | ((1 << fb) - 1); + gpu_unai.inn.u_msk = (((u32)gpu_unai.TextureWindow[2]) << fb) | ((1 << fb) - 1); + gpu_unai.inn.v_msk = (((u32)gpu_unai.TextureWindow[3]) << fb) | ((1 << fb) - 1); gpuSetTexture(gpu_unai.GPU_GP1); }