From: notaz Date: Fri, 9 Jan 2026 20:46:54 +0000 (+0200) Subject: gpu_unai: cleanup X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=0cdf7aa4936cf27a674a60c1bdea0ba91ba12004;p=pcsx_rearmed.git gpu_unai: cleanup - BLITMASK removed for good since it's unused and only doubles compilation time and generates tons of useless code - gpuBlending24 removed since real hw has no such thing - real dithering table is used, now folded in gpuLighting funcs to avoid some pack/unpack steps - pass y to PolySpan to avoid useless recalc from pDst --- diff --git a/plugins/gpu_unai/gpu_inner.h b/plugins/gpu_unai/gpu_inner.h index 3ac39b66..e561d95e 100644 --- a/plugins/gpu_unai/gpu_inner.h +++ b/plugins/gpu_unai/gpu_inner.h @@ -35,21 +35,6 @@ #define CF_GOURAUD ((CF>> 7)&1) // Gouraud shading #define CF_MASKSET ((CF>> 8)&1) // Mask bit set #define CF_DITHER ((CF>> 9)&1) // Dithering -#define CF_BLITMASK ((CF>>10)&1) // blit_mask check (skip rendering pixels - // that wouldn't end up displayed on - // low-res screen using simple downscaler) - -//#ifdef __arm__ -//#ifndef ENABLE_GPU_ARMV7 -/* ARMv5 */ -//#include "gpu_inner_blend_arm5.h" -//#else -/* ARMv7 optimized */ -//#include "gpu_inner_blend_arm7.h" -//#endif -//#else -//#include "gpu_inner_blend.h" -//#endif #include "gpu_inner_blend.h" #include "gpu_inner_quantization.h" @@ -61,9 +46,8 @@ #include "gpu_arm.h" #include "gpu_inner_blend_arm.h" #include "gpu_inner_light_arm.h" -#define gpuBlending gpuBlendingARM #endif -#ifndef gpuBlending +#ifndef gpuBlending // gpuBlendingARM #define gpuBlending gpuBlendingGeneric #endif #ifndef gpuLightingTXT // gpuLightingTXTARM @@ -73,10 +57,6 @@ #define gpuLightingTXTGouraud gpuLightingTXTGouraudGeneric #endif -// Non-dithering lighting and blending functions preserve uSrc -// MSB. This saves a few operations and useless load/stores. -#define MSB_PRESERVED (!CF_DITHER) - // If defined, Gouraud colors are fixed-point 5.11, otherwise they are 8.16 // This is only for debugging/verification of low-precision colors in C. // Low-precision Gouraud is intended for use by SIMD-optimized inner drivers @@ -129,10 +109,6 @@ static inline u16 gpuGouraudColor15bpp(u32 r, u32 g, u32 b) template static le16_t* gpuPixelSpanFn(le16_t* pDst, uintptr_t data, ptrdiff_t incr, size_t len) { - // Blend func can save an operation if it knows uSrc MSB is - // unset. For untextured prims, this is always true. - const bool skip_uSrc_mask = true; - u16 col; struct GouraudColor * gcPtr; u32 r, g, b; @@ -168,7 +144,7 @@ static le16_t* gpuPixelSpanFn(le16_t* pDst, uintptr_t data, ptrdiff_t incr, size uint_fast16_t uSrc = col; if (CF_BLEND) - uSrc = gpuBlending(uSrc, uDst); + uSrc = gpuBlending(uSrc, uDst); if (CF_MASKSET) { *pDst = u16_to_le16(uSrc | 0x8000); } else { *pDst = u16_to_le16(uSrc); } @@ -194,12 +170,8 @@ static le16_t* gpuPixelSpanFn(le16_t* pDst, uintptr_t data, ptrdiff_t incr, size uint_fast16_t uSrc = col; - // Blend func can save an operation if it knows uSrc MSB is - // unset. For untextured prims, this is always true. - const bool skip_uSrc_mask = true; - if (CF_BLEND) - uSrc = gpuBlending(uSrc, uDst); + uSrc = gpuBlending(uSrc, uDst); if (CF_MASKSET) { *pDst = u16_to_le16(uSrc | 0x8000); } else { *pDst = u16_to_le16(uSrc); } @@ -306,10 +278,6 @@ static inline void gpuTileSpanFn(le16_t *pDst, u16 data, u32 count) } while (--count); } else { - // Blend func can save an operation if it knows uSrc MSB is - // unset. For untextured prims, this is always true. - const bool skip_uSrc_mask = true; - uint_fast16_t uSrc, uDst; do { @@ -319,16 +287,10 @@ static inline void gpuTileSpanFn(le16_t *pDst, u16 data, u32 count) uSrc = data; if (CF_BLEND) - uSrc = gpuBlending(uSrc, uDst); + uSrc = gpuBlending(uSrc, uDst); if (CF_MASKSET) { *pDst = u16_to_le16(uSrc | 0x8000); } else { *pDst = u16_to_le16(uSrc); } - - //senquack - Did not apply "Silent Hill" mask-bit fix to here. - // It is hard to tell from scarce documentation available and - // lack of comments in code, but I believe the tile-span - // functions here should not bother to preserve any source MSB, - // as they are not drawing from a texture. endtile: pDst++; } @@ -422,21 +384,13 @@ template static noinline void gpuSpriteDriverFn(le16_t *pPixel, u32 count, const u8 *pTxt_base, const gpu_unai_inner_t &inn) { - // Blend func can save an operation if it knows uSrc MSB is unset. - // Untextured prims can always skip (source color always comes with MSB=0). - // For textured prims, the generic lighting funcs always return it unset. (bonus!) - const bool skip_uSrc_mask = MSB_PRESERVED ? (!CF_TEXTMODE) : (!CF_TEXTMODE) || CF_LIGHT; - - uint_fast16_t uSrc, uDst, srcMSB; + uint_fast16_t uSrc, uDst; bool should_blend; u32 u0_mask = inn.u_msk >> 10; u32 bgr0888; - if (CF_LIGHT) { - bgr0888 = (gpu_unai.inn.b8 << 16) | - (gpu_unai.inn.g8 << 8) | - gpu_unai.inn.r8; - } + if (CF_LIGHT) + bgr0888 = gpu_unai.inn.bgr0888; const le16_t *CBA_; if (CF_TEXTMODE!=3) CBA_ = inn.CBA; const u32 v0_mask = inn.v_msk >> 10; @@ -474,21 +428,15 @@ static noinline void gpuSpriteDriverFn(le16_t *pPixel, u32 count, const u8 *pTxt if (!uSrc) goto endsprite; - //senquack - save source MSB, as blending or lighting macros will not - // (Silent Hill gray rectangles mask bit bug) - if (CF_BLEND || CF_LIGHT) srcMSB = uSrc & 0x8000; - if (CF_LIGHT) uSrc = gpuLightingTXT(uSrc, bgr0888); - should_blend = MSB_PRESERVED ? uSrc & 0x8000 : srcMSB; - + should_blend = uSrc & 0x8000; if (CF_BLEND && should_blend) - uSrc = gpuBlending(uSrc, uDst); + uSrc = gpuBlending(uSrc, uDst) | 0x8000; - if (CF_MASKSET) { *pDst = u16_to_le16(uSrc | 0x8000); } - else if (!MSB_PRESERVED && (CF_BLEND || CF_LIGHT)) { *pDst = u16_to_le16(uSrc | srcMSB); } - else { *pDst = u16_to_le16(uSrc); } + if (CF_MASKSET) { *pDst = u16_to_le16(uSrc | 0x8000); } + else { *pDst = u16_to_le16(uSrc); } endsprite: u0 += (CF_TEXTMODE==3) ? 2 : 1; @@ -591,6 +539,10 @@ const PS gpuSpriteDrivers[256] = { #undef TA #undef TA6 +// this tries to avoid pointer shifting +#define DITHER_LKUP(lut, dst) \ + *(s16 *)((char *)(lut) + ((uintptr_t)(pDst) & 6)) + /////////////////////////////////////////////////////////////////////////////// // GPU Polygon innerloops generator @@ -613,15 +565,13 @@ const PS gpuSpriteDrivers[256] = { // relevant blend/light headers. // (see README_senquack.txt) template -static noinline void gpuPolySpanFn(const gpu_unai_t &gpu_unai, le16_t *pDst, u32 count) +static noinline void gpuPolySpanFn(const gpu_unai_t &gpu_unai, le16_t *pDst, u32 count, s32 y) { - // Blend func can save an operation if it knows uSrc MSB is unset. - // Untextured prims can always skip this (src color MSB is always 0). - // For textured prims, the generic lighting funcs always return it unset. (bonus!) - const bool skip_uSrc_mask = MSB_PRESERVED ? (!CF_TEXTMODE) : (!CF_TEXTMODE) || CF_LIGHT; bool should_blend; + s16 DitherLut16[4]; - u32 bMsk; if (CF_BLITMASK) bMsk = gpu_unai.inn.blit_mask; + if (CF_DITHER) + memcpy(DitherLut16, &gpu_unai.DitherLut16[y & 3][0], sizeof(DitherLut16)); if (!CF_TEXTMODE) { @@ -632,18 +582,13 @@ static noinline void gpuPolySpanFn(const gpu_unai_t &gpu_unai, le16_t *pDst, u32 do { uint_fast16_t uSrc, uDst; - // NOTE: Don't enable CF_BLITMASK pixel skipping (speed hack) - // on untextured polys. It seems to do more harm than good: see - // gravestone text at end of Medieval intro sequence. -senquack - //if (CF_BLITMASK) { if ((bMsk>>((((uintptr_t)pDst)>>1)&7))&1) { goto endpolynotextnogou; } } - if (CF_BLEND || CF_MASKCHECK) uDst = le16_to_u16(*pDst); if (CF_MASKCHECK) { if (uDst&0x8000) { goto endpolynotextnogou; } } uSrc = pix15; if (CF_BLEND) - uSrc = gpuBlending(uSrc, uDst); + uSrc = gpuBlending(uSrc, uDst); if (CF_MASKSET) { *pDst = u16_to_le16(uSrc | 0x8000); } else { *pDst = u16_to_le16(uSrc); } @@ -661,27 +606,19 @@ endpolynotextnogou: do { uint_fast16_t uDst, uSrc; - // See note in above loop regarding CF_BLITMASK - //if (CF_BLITMASK) { if ((bMsk>>((((uintptr_t)pDst)>>1)&7))&1) goto endpolynotextgou; } - if (CF_BLEND || CF_MASKCHECK) uDst = le16_to_u16(*pDst); if (CF_MASKCHECK) { if (uDst&0x8000) goto endpolynotextgou; } if (CF_DITHER) { // GOURAUD, DITHER - - u32 uSrc24 = gpuLightingRGB24(l_gCol); - if (CF_BLEND) - uSrc24 = gpuBlending24(uSrc24, uDst); - uSrc = gpuColorQuantization24(uSrc24, pDst); + int_fast16_t dv = DITHER_LKUP(DitherLut16, pDst); + uSrc = gpuLightingRGBDither(l_gCol, dv); } else { // GOURAUD, NO DITHER - uSrc = gpuLightingRGB(l_gCol); - - if (CF_BLEND) - uSrc = gpuBlending(uSrc, uDst); } + if (CF_BLEND) + uSrc = gpuBlending(uSrc, uDst); if (CF_MASKSET) { *pDst = u16_to_le16(uSrc | 0x8000); } else { *pDst = u16_to_le16(uSrc); } @@ -697,7 +634,7 @@ endpolynotextgou: { // TEXTURED - uint_fast16_t uDst, uSrc, srcMSB; + uint_fast16_t uDst, uSrc; //senquack - note: original UNAI code had gpu_unai.{u4/v4} packed into // one 32-bit unsigned int, but this proved to lose too much accuracy @@ -725,17 +662,13 @@ endpolynotextgou: l_gInc.set_counter(-1); l_gCol.set_counter(pcounter); } else { - // keep this packed, otherwise gcc runs out of regs - bgr0888 = (gpu_unai.inn.b8 << 16) | - (gpu_unai.inn.g8 << 8) | - gpu_unai.inn.r8; - // XXX pre-pack + // keep this packed, otherwise gcc spills too much + bgr0888 = gpu_unai.inn.bgr0888; } } do { - if (CF_BLITMASK) { if ((bMsk>>((((uintptr_t)pDst)>>1)&7))&1) goto endpolytext; } if (CF_MASKCHECK || CF_BLEND) { uDst = le16_to_u16(*pDst); } if (CF_MASKCHECK) if (uDst&0x8000) { goto endpolytext; } @@ -759,9 +692,6 @@ endpolynotextgou: if (!uSrc) goto endpolytext; } - // Save source MSB, as blending or lighting will not (Silent Hill) - if (CF_BLEND || CF_LIGHT) srcMSB = uSrc & 0x8000; - // When textured, only dither when LIGHT (texture blend) is enabled // LIGHT && BLEND => dither // LIGHT && !BLEND => dither @@ -769,33 +699,28 @@ endpolynotextgou: //!LIGHT && !BLEND => no dither if (CF_DITHER && CF_LIGHT) { - u32 uSrc24; - if ( CF_GOURAUD) - uSrc24 = gpuLightingTXT24Gouraud(uSrc, l_gCol); - if (!CF_GOURAUD) - uSrc24 = gpuLightingTXT24(uSrc, bgr0888); - - if (CF_BLEND && srcMSB) - uSrc24 = gpuBlending24(uSrc24, uDst); - - uSrc = gpuColorQuantization24(uSrc24, pDst); - } else + int_fast16_t dv = DITHER_LKUP(DitherLut16, pDst); + if (CF_GOURAUD) + uSrc = gpuLightingTXTGouraudDither(uSrc, l_gCol, dv); + else + uSrc = gpuLightingTXTDither(uSrc, bgr0888, dv); + } + else { if (CF_LIGHT) { - if ( CF_GOURAUD) + if (CF_GOURAUD) uSrc = gpuLightingTXTGouraud(uSrc, l_gCol); - if (!CF_GOURAUD) + else uSrc = gpuLightingTXT(uSrc, bgr0888); } - should_blend = MSB_PRESERVED ? uSrc & 0x8000 : srcMSB; - if (CF_BLEND && should_blend) - uSrc = gpuBlending(uSrc, uDst); } + should_blend = uSrc & 0x8000; + if (CF_BLEND && should_blend) + uSrc = gpuBlending(uSrc, uDst) | 0x8000; - if (CF_MASKSET) { *pDst = u16_to_le16(uSrc | 0x8000); } - else if (!MSB_PRESERVED && (CF_BLEND || CF_LIGHT)) { *pDst = u16_to_le16(uSrc | srcMSB); } - else { *pDst = u16_to_le16(uSrc); } + if (CF_MASKSET) { *pDst = u16_to_le16(uSrc | 0x8000); } + else { *pDst = u16_to_le16(uSrc); } endpolytext: pDst++; l_u = (l_u + l_u_inc) & l_u_msk; @@ -812,7 +737,7 @@ endpolytext: #ifdef __arm__ template -static void PolySpanMaybeAsm(const gpu_unai_t &gpu_unai, le16_t *pDst, u32 count) +static void PolySpanMaybeAsm(const gpu_unai_t &gpu_unai, le16_t *pDst, u32 count, s32 y) { switch (CF) { case 0x02: poly_untex_st0_asm (pDst, &gpu_unai.inn, count); break; @@ -829,12 +754,12 @@ static void PolySpanMaybeAsm(const gpu_unai_t &gpu_unai, le16_t *pDst, u32 count case 0x41: poly_8bpp_l1_std_asm(pDst, &gpu_unai.inn, count); break; case 0x43: poly_8bpp_l1_st0_asm(pDst, &gpu_unai.inn, count); break; #endif - default: gpuPolySpanFn(gpu_unai, pDst, count); + default: gpuPolySpanFn(gpu_unai, pDst, count, y); } } #endif -static void PolyNULL(const gpu_unai_t &gpu_unai, le16_t *pDst, u32 count) +static void PolyNULL(const gpu_unai_t &gpu_unai, le16_t *pDst, u32 count, s32 y) { #ifdef ENABLE_GPU_LOG_SUPPORT fprintf(stdout,"PolyNULL()\n"); @@ -843,7 +768,7 @@ static void PolyNULL(const gpu_unai_t &gpu_unai, le16_t *pDst, u32 count) /////////////////////////////////////////////////////////////////////////////// // Polygon innerloops driver -typedef void (*PP)(const gpu_unai_t &gpu_unai, le16_t *pDst, u32 count); +typedef void (*PP)(const gpu_unai_t &gpu_unai, le16_t *pDst, u32 count, s32 y); // Template instantiation helper macros #define TI(cf) gpuPolySpanFn<(cf)> @@ -892,9 +817,8 @@ typedef void (*PP)(const gpu_unai_t &gpu_unai, le16_t *pDst, u32 count); TN, TN, TN, TI((ub)|0xf3), TN, TN, TN, TI((ub)|0xf7), \ TN, TN, TN, TI((ub)|0xfb), TN, TN, TN, TI((ub)|0xff) -const PP gpuPolySpanDrivers[2048] = { - TIBLOCK(0<<8), TIBLOCK(1<<8), TIBLOCK(2<<8), TIBLOCK(3<<8), - TIBLOCK(4<<8), TIBLOCK(5<<8), TIBLOCK(6<<8), TIBLOCK(7<<8) +const PP gpuPolySpanDrivers[1024] = { + TIBLOCK(0<<8), TIBLOCK(1<<8), TIBLOCK(2<<8), TIBLOCK(3<<8) }; #undef TI diff --git a/plugins/gpu_unai/gpu_inner_blend.h b/plugins/gpu_unai/gpu_inner_blend.h index febc7ede..c0af0721 100644 --- a/plugins/gpu_unai/gpu_inner_blend.h +++ b/plugins/gpu_unai/gpu_inner_blend.h @@ -120,69 +120,4 @@ GPU_INLINE u32 gpuGetRGB24(uint_fast16_t uSrc) | ((uSrc & 0x001F)<< 4); } - -//////////////////////////////////////////////////////////////////////////////// -// Blend padded u32 5.4:5.4:5.4 bgr fixed-pt color triplet in 'uSrc24' -// (foreground color) with bgr555 color in 'uDst' (background color), -// returning the resulting u32 5.4:5.4:5.4 color. -// -// INPUT: -// 'uSrc24' input: 000bbbbbXXXX0gggggXXXX0rrrrrXXXX -// ^ bit 31 -// 'uDst' input: -bbbbbgggggrrrrr -// ^ bit 16 -// RETURNS: -// u32 output: 000bbbbbXXXX0gggggXXXX0rrrrrXXXX -// ^ bit 31 -// Where 'X' are fixed-pt bits, '0' is zero-padding, and '-' is don't care -//////////////////////////////////////////////////////////////////////////////// -template -GPU_INLINE u32 gpuBlending24(u32 uSrc24, uint_fast16_t uDst) -{ - // These use techniques adapted from Blargg's techniques mentioned in - // in gpuBlending() comments above. Not as much bitwise trickery is - // necessary because of presence of 0 padding in uSrc24 format. - - u32 uDst24 = gpuGetRGB24(uDst); - u32 mix; - - // 0.5 x Back + 0.5 x Forward - if (BLENDMODE==0) { - const u32 uMsk = 0x1FE7F9FE; - // Only need to mask LSBs of uSrc24, uDst24's LSBs are 0 already - mix = (uDst24 + (uSrc24 & uMsk)) >> 1; - } - - // 1.0 x Back + 1.0 x Forward - if (BLENDMODE==1) { - u32 sum = uSrc24 + uDst24; - u32 carries = sum & 0x20080200; - u32 modulo = sum - carries; - u32 clamp = carries - (carries >> 9); - mix = modulo | clamp; - } - - // 1.0 x Back - 1.0 x Forward - if (BLENDMODE==2) { - // Insert ones in 0-padded borrow slot of color to be subtracted from - uDst24 |= 0x20080200; - u32 diff = uDst24 - uSrc24; - u32 borrows = diff & 0x20080200; - u32 clamp = borrows - (borrows >> 9); - mix = diff & clamp; - } - - // 1.0 x Back + 0.25 x Forward - if (BLENDMODE==3) { - uSrc24 = (uSrc24 & 0x1FC7F1FC) >> 2; - u32 sum = uSrc24 + uDst24; - u32 carries = sum & 0x20080200; - u32 modulo = sum - carries; - u32 clamp = carries - (carries >> 9); - mix = modulo | clamp; - } - - return mix; -} - #endif //_OP_BLEND_H_ diff --git a/plugins/gpu_unai/gpu_inner_blend_arm.h b/plugins/gpu_unai/gpu_inner_blend_arm.h index f887374c..f53a5ee9 100644 --- a/plugins/gpu_unai/gpu_inner_blend_arm.h +++ b/plugins/gpu_unai/gpu_inner_blend_arm.h @@ -95,13 +95,8 @@ GPU_INLINE uint_fast16_t gpuBlendingARM(uint_fast16_t uSrc, uint_fast16_t uDst) : [uSrc] "r" (uSrc), [uDst] "r" (uDst), [mask] "r" (0x8420)); } - // There's not a case where we can get into this function, - // SKIP_USRC_MSB_MASK is false, and the msb of uSrc is unset. - if (!SKIP_USRC_MSB_MASK) { - asm ("orr %[mix], %[mix], #0x8000" : [mix] "+r" (mix)); - } - return mix; } +#define gpuBlending gpuBlendingARM #endif //_OP_BLEND_ARM_H_ diff --git a/plugins/gpu_unai/gpu_inner_blend_arm5.h b/plugins/gpu_unai/gpu_inner_blend_arm5.h deleted file mode 100644 index 0e9b74f1..00000000 --- a/plugins/gpu_unai/gpu_inner_blend_arm5.h +++ /dev/null @@ -1,100 +0,0 @@ -/*************************************************************************** -* Copyright (C) 2010 PCSX4ALL Team * -* Copyright (C) 2010 Unai * -* * -* This program is free software; you can redistribute it and/or modify * -* it under the terms of the GNU General Public License as published by * -* the Free Software Foundation; either version 2 of the License, or * -* (at your option) any later version. * -* * -* This program is distributed in the hope that it will be useful, * -* but WITHOUT ANY WARRANTY; without even the implied warranty of * -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * -* GNU General Public License for more details. * -* * -* You should have received a copy of the GNU General Public License * -* along with this program; if not, write to the * -* Free Software Foundation, Inc., * -* 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. * -***************************************************************************/ - -#ifndef _OP_BLEND_H_ -#define _OP_BLEND_H_ - -// GPU Blending operations functions - -#define gpuBlending00(uSrc,uDst) \ -{ \ - asm ("and %[src], %[src], %[msk] " : [src] "=r" (uSrc) : "0" (uSrc), [msk] "r" (uMsk) ); \ - asm ("and %[dst], %[dst], %[msk] " : [dst] "=r" (uDst) : "0" (uDst), [msk] "r" (uMsk) ); \ - asm ("add %[src], %[dst], %[src] " : [src] "=r" (uSrc) : [dst] "r" (uDst), "0" (uSrc) ); \ - asm ("mov %[src], %[src], lsr #1 " : [src] "=r" (uSrc) : "0" (uSrc) ); \ -} - -// 1.0 x Back + 1.0 x Forward -#define gpuBlending01(uSrc,uDst) \ -{ \ - u16 st,dt,out; \ - asm ("and %[dt], %[dst], #0x7C00 " : [dt] "=r" (dt) : [dst] "r" (uDst) ); \ - asm ("and %[st], %[src], #0x7C00 " : [st] "=r" (st) : [src] "r" (uSrc) ); \ - asm ("add %[out], %[dt], %[st] " : [out] "=r" (out) : [dt] "r" (dt), [st] "r" (st) ); \ - asm ("cmp %[out], #0x7C00 " : : [out] "r" (out) : "cc" ); \ - asm ("movhi %[out], #0x7C00 " : [out] "=r" (out) : "0" (out) ); \ - asm ("and %[dt], %[dst], #0x03E0 " : [dt] "=r" (dt) : [dst] "r" (uDst) ); \ - asm ("and %[st], %[src], #0x03E0 " : [st] "=r" (st) : [src] "r" (uSrc) ); \ - asm ("add %[dt], %[dt], %[st] " : [dt] "=r" (dt) : "0" (dt), [st] "r" (st) ); \ - asm ("cmp %[dt], #0x03E0 " : : [dt] "r" (dt) : "cc" ); \ - asm ("movhi %[dt], #0x03E0 " : [dt] "=r" (dt) : "0" (dt) ); \ - asm ("orr %[out], %[out], %[dt] " : [out] "=r" (out) : "0" (out), [dt] "r" (dt) ); \ - asm ("and %[dt], %[dst], #0x001F " : [dt] "=r" (dt) : [dst] "r" (uDst) ); \ - asm ("and %[st], %[src], #0x001F " : [st] "=r" (st) : [src] "r" (uSrc) ); \ - asm ("add %[dt], %[dt], %[st] " : [dt] "=r" (dt) : "0" (dt), [st] "r" (st) ); \ - asm ("cmp %[dt], #0x001F " : : [dt] "r" (dt) : "cc" ); \ - asm ("movhi %[dt], #0x001F " : [dt] "=r" (dt) : "0" (dt) ); \ - asm ("orr %[uSrc], %[out], %[dt] " : [uSrc] "=r" (uSrc) : [out] "r" (out), [dt] "r" (dt) ); \ -} - -// 1.0 x Back - 1.0 x Forward */ -#define gpuBlending02(uSrc,uDst) \ -{ \ - u16 st,dt,out; \ - asm ("and %[dt], %[dst], #0x7C00 " : [dt] "=r" (dt) : [dst] "r" (uDst) ); \ - asm ("and %[st], %[src], #0x7C00 " : [st] "=r" (st) : [src] "r" (uSrc) ); \ - asm ("subs %[out], %[dt], %[st] " : [out] "=r" (out) : [dt] "r" (dt), [st] "r" (st) : "cc" ); \ - asm ("movmi %[out], #0x0000 " : [out] "=r" (out) : "0" (out) ); \ - asm ("and %[dt], %[dst], #0x03E0 " : [dt] "=r" (dt) : [dst] "r" (uDst) ); \ - asm ("and %[st], %[src], #0x03E0 " : [st] "=r" (st) : [src] "r" (uSrc) ); \ - asm ("subs %[dt], %[dt], %[st] " : [dt] "=r" (dt) : "0" (dt), [st] "r" (st) : "cc" ); \ - asm ("orrpl %[out], %[out], %[dt] " : [out] "=r" (out) : "0" (out), [dt] "r" (dt) ); \ - asm ("and %[dt], %[dst], #0x001F " : [dt] "=r" (dt) : [dst] "r" (uDst) ); \ - asm ("and %[st], %[src], #0x001F " : [st] "=r" (st) : [src] "r" (uSrc) ); \ - asm ("subs %[dt], %[dt], %[st] " : [dt] "=r" (dt) : "0" (dt), [st] "r" (st) : "cc" ); \ - asm ("orrpl %[out], %[out], %[dt] " : [out] "=r" (out) : "0" (out), [dt] "r" (dt) ); \ - asm ("mov %[uSrc], %[out]" : [uSrc] "=r" (uSrc) : [out] "r" (out) ); \ -} - -// 1.0 x Back + 0.25 x Forward */ -#define gpuBlending03(uSrc,uDst) \ -{ \ - u16 st,dt,out; \ - asm ("mov %[src], %[src], lsr #2 " : [src] "=r" (uSrc) : "0" (uSrc) ); \ - asm ("and %[dt], %[dst], #0x7C00 " : [dt] "=r" (dt) : [dst] "r" (uDst) ); \ - asm ("and %[st], %[src], #0x1C00 " : [st] "=r" (st) : [src] "r" (uSrc) ); \ - asm ("add %[out], %[dt], %[st] " : [out] "=r" (out) : [dt] "r" (dt), [st] "r" (st) ); \ - asm ("cmp %[out], #0x7C00 " : : [out] "r" (out) : "cc" ); \ - asm ("movhi %[out], #0x7C00 " : [out] "=r" (out) : "0" (out) ); \ - asm ("and %[dt], %[dst], #0x03E0 " : [dt] "=r" (dt) : [dst] "r" (uDst) ); \ - asm ("and %[st], %[src], #0x00E0 " : [st] "=r" (st) : [src] "r" (uSrc) ); \ - asm ("add %[dt], %[dt], %[st] " : [dt] "=r" (dt) : "0" (dt), [st] "r" (st) ); \ - asm ("cmp %[dt], #0x03E0 " : : [dt] "r" (dt) : "cc" ); \ - asm ("movhi %[dt], #0x03E0 " : [dt] "=r" (dt) : "0" (dt) ); \ - asm ("orr %[out], %[out], %[dt] " : [out] "=r" (out) : "0" (out), [dt] "r" (dt) ); \ - asm ("and %[dt], %[dst], #0x001F " : [dt] "=r" (dt) : [dst] "r" (uDst) ); \ - asm ("and %[st], %[src], #0x0007 " : [st] "=r" (st) : [src] "r" (uSrc) ); \ - asm ("add %[dt], %[dt], %[st] " : [dt] "=r" (dt) : "0" (dt), [st] "r" (st) ); \ - asm ("cmp %[dt], #0x001F " : : [dt] "r" (dt) : "cc" ); \ - asm ("movhi %[dt], #0x001F " : [dt] "=r" (dt) : "0" (dt) ); \ - asm ("orr %[uSrc], %[out], %[dt] " : [uSrc] "=r" (uSrc) : [out] "r" (out), [dt] "r" (dt) ); \ -} - -#endif //_OP_BLEND_H_ diff --git a/plugins/gpu_unai/gpu_inner_blend_arm7.h b/plugins/gpu_unai/gpu_inner_blend_arm7.h deleted file mode 100644 index 083e62d8..00000000 --- a/plugins/gpu_unai/gpu_inner_blend_arm7.h +++ /dev/null @@ -1,107 +0,0 @@ -/*************************************************************************** -* Copyright (C) 2010 PCSX4ALL Team * -* Copyright (C) 2010 Unai * -* * -* This program is free software; you can redistribute it and/or modify * -* it under the terms of the GNU General Public License as published by * -* the Free Software Foundation; either version 2 of the License, or * -* (at your option) any later version. * -* * -* This program is distributed in the hope that it will be useful, * -* but WITHOUT ANY WARRANTY; without even the implied warranty of * -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * -* GNU General Public License for more details. * -* * -* You should have received a copy of the GNU General Public License * -* along with this program; if not, write to the * -* Free Software Foundation, Inc., * -* 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. * -***************************************************************************/ - -#ifndef _OP_BLEND_H_ -#define _OP_BLEND_H_ - -// GPU Blending operations functions - -#define gpuBlending00(uSrc,uDst) \ -{ \ - asm ("and %[src], %[src], %[msk]\n" \ - "and %[dst], %[dst], %[msk]\n" \ - "add %[src], %[dst], %[src]\n" \ - "mov %[src], %[src], lsr #1\n" \ - : [src] "=&r" (uSrc), [dst] "=&r" (uDst) : "0" (uSrc), "1" (uDst), [msk] "r" (uMsk)); \ -} - -// 1.0 x Back + 1.0 x Forward -#define gpuBlending01(uSrc,uDst) \ -{ \ - u32 st,dt,out; \ - asm ("and %[dt], %[dst], #0x7C00\n" \ - "and %[st], %[src], #0x7C00\n" \ - "add %[out], %[dt], %[st] \n" \ - "cmp %[out], #0x7C00 \n" \ - "movhi %[out], #0x7C00 \n" \ - "and %[dt], %[dst], #0x03E0\n" \ - "and %[st], %[src], #0x03E0\n" \ - "add %[dt], %[dt], %[st] \n" \ - "cmp %[dt], #0x03E0 \n" \ - "movhi %[dt], #0x03E0 \n" \ - "orr %[out], %[out], %[dt] \n" \ - "and %[dt], %[dst], #0x001F\n" \ - "and %[st], %[src], #0x001F\n" \ - "add %[dt], %[dt], %[st] \n" \ - "cmp %[dt], #0x001F \n" \ - "movhi %[dt], #0x001F \n" \ - "orr %[src], %[out], %[dt] \n" \ - : [src] "=r" (uSrc), [st] "=&r" (st), [dt] "=&r" (dt), [out] "=&r" (out) \ - : [dst] "r" (uDst), "0" (uSrc) : "cc"); \ -} - -// 1.0 x Back - 1.0 x Forward */ -#define gpuBlending02(uSrc,uDst) \ -{ \ - u32 st,dt,out; \ - asm ("and %[dt], %[dst], #0x7C00\n" \ - "and %[st], %[src], #0x7C00\n" \ - "subs %[out], %[dt], %[st] \n" \ - "movmi %[out], #0x0000 \n" \ - "and %[dt], %[dst], #0x03E0\n" \ - "and %[st], %[src], #0x03E0\n" \ - "subs %[dt], %[dt], %[st] \n" \ - "orrpl %[out], %[out], %[dt] \n" \ - "and %[dt], %[dst], #0x001F\n" \ - "and %[st], %[src], #0x001F\n" \ - "subs %[dt], %[dt], %[st] \n" \ - "orrpl %[out], %[out], %[dt] \n" \ - "mov %[src], %[out] \n" \ - : [src] "=r" (uSrc), [st] "=&r" (st), [dt] "=&r" (dt), [out] "=&r" (out) \ - : [dst] "r" (uDst), "0" (uSrc) : "cc"); \ -} - -// 1.0 x Back + 0.25 x Forward */ -#define gpuBlending03(uSrc,uDst) \ -{ \ - u32 st,dt,out; \ - asm ("mov %[src], %[src], lsr #2 \n" \ - "and %[dt], %[dst], #0x7C00\n" \ - "and %[st], %[src], #0x1C00\n" \ - "add %[out], %[dt], %[st] \n" \ - "cmp %[out], #0x7C00 \n" \ - "movhi %[out], #0x7C00 \n" \ - "and %[dt], %[dst], #0x03E0\n" \ - "and %[st], %[src], #0x00E0\n" \ - "add %[dt], %[dt], %[st] \n" \ - "cmp %[dt], #0x03E0 \n" \ - "movhi %[dt], #0x03E0 \n" \ - "orr %[out], %[out], %[dt] \n" \ - "and %[dt], %[dst], #0x001F\n" \ - "and %[st], %[src], #0x0007\n" \ - "add %[dt], %[dt], %[st] \n" \ - "cmp %[dt], #0x001F \n" \ - "movhi %[dt], #0x001F \n" \ - "orr %[src], %[out], %[dt] \n" \ - : [src] "=r" (uSrc), [st] "=&r" (st), [dt] "=&r" (dt), [out] "=&r" (out) \ - : [dst] "r" (uDst), "0" (uSrc) : "cc"); \ -} - -#endif //_OP_BLEND_H_ diff --git a/plugins/gpu_unai/gpu_inner_light.h b/plugins/gpu_unai/gpu_inner_light.h index f4ec2134..643c6e06 100644 --- a/plugins/gpu_unai/gpu_inner_light.h +++ b/plugins/gpu_unai/gpu_inner_light.h @@ -70,6 +70,12 @@ static void SetupLightLUT() } } +// gcc5+ and clang13+ understarnd this on ARM +GPU_INLINE s32 clamp_c(s32 x) { + if (x < 0) return 0; + if (x > 31) return 31; + return x; +} //////////////////////////////////////////////////////////////////////////////// // Create packed Gouraud fixed-pt 8.8 rgb triplet @@ -111,11 +117,12 @@ GPU_INLINE gcol_t gpuPackGouraudCol(u32 r, u32 g, u32 b) //////////////////////////////////////////////////////////////////////////////// GPU_INLINE gcol_t gpuPackGouraudColInc(s32 dr, s32 dg, s32 db) { - return (gcol_t){ + return (gcol_t){{ (u16)((dr >> 2) + (dr < 0)), (u16)((dg >> 2) + (dg < 0)), (u16)((db >> 2) + (db < 0)), - }; + 0 + }}; } //////////////////////////////////////////////////////////////////////////////// @@ -136,41 +143,29 @@ GPU_INLINE uint_fast16_t gpuLightingRGB(gcol_t gCol) ((gCol.c.b >> 1) & 0x7c00); } -//////////////////////////////////////////////////////////////////////////////// -// Convert packed Gouraud u32 fixed-pt 8.8 rgb triplet in 'gCol' -// to padded u32 5.4 bgr fixed-pt triplet, suitable for use -// with HQ 24-bit lighting/quantization. -// -// INPUT: -// 'gCol' input: ccccccccXXXXXXXX for c in [r, g, b] -// ^ bit 16 -// RETURNS: -// u32 output: 000bbbbbXXXX0gggggXXXX0rrrrrXXXX -// ^ bit 31 -// Where 'X' are fixed-pt bits, '0' zero-padding, and '-' is don't care -//////////////////////////////////////////////////////////////////////////////// -GPU_INLINE u32 gpuLightingRGB24(gcol_t gCol) +GPU_INLINE uint_fast16_t gpuLightingRGBDither(gcol_t gCol, int_fast16_t dt) { - return (gCol.c.r >> 7) - | ((gCol.c.g >> 7) << 10) - | ((gCol.c.b >> 7) << 20); + dt <<= 4; + return clamp_c(((s32)gCol.c.r + dt) >> 11) | + (clamp_c(((s32)gCol.c.g + dt) >> 11) << 5) | + (clamp_c(((s32)gCol.c.b + dt) >> 11) << 10); } //////////////////////////////////////////////////////////////////////////////// // Apply fast (low-precision) 5-bit lighting to bgr555 texture color: // // INPUT: -// 'r5','g5','b5' are unsigned 5-bit color values, value of 15 +// 'r8','g8','b8' are unsigned 8-bit color values, value of 127 // is midpoint that doesn't modify that component of texture -// 'uSrc' input: -bbbbbgggggrrrrr +// 'uSrc' input: mbbbbbgggggrrrrr // ^ bit 16 // RETURNS: -// u16 output: 0bbbbbgggggrrrrr -// Where 'X' are fixed-pt bits, '0' is zero-padding, and '-' is don't care +// u16 output: mbbbbbgggggrrrrr +// Where 'X' are fixed-pt bits, 'm' is the MSB to preserve //////////////////////////////////////////////////////////////////////////////// GPU_INLINE uint_fast16_t gpuLightingTXTGeneric(uint_fast16_t uSrc, u32 bgr0888) { - // gcc can move this out of the loop if it wants to + // the compiler can move this out of the loop if it wants to uint_fast32_t b5 = (bgr0888 >> 19); uint_fast32_t g5 = (bgr0888 >> 11) & 0x1f; uint_fast32_t r5 = (bgr0888 >> 3) & 0x1f; @@ -189,11 +184,11 @@ GPU_INLINE uint_fast16_t gpuLightingTXTGeneric(uint_fast16_t uSrc, u32 bgr0888) // 'gCol' is a Gouraud fixed-pt 8.8 rgb triplet // 'gCol' input: ccccccccXXXXXXXX for c in [r, g, b] // ^ bit 16 -// 'uSrc' input: -bbbbbgggggrrrrr +// 'uSrc' input: mbbbbbgggggrrrrr // ^ bit 16 // RETURNS: -// u16 output: 0bbbbbgggggrrrrr -// Where 'X' are fixed-pt bits, '0' is zero-padding, and '-' is don't care +// u16 output: mbbbbbgggggrrrrr +// Where 'X' are fixed-pt bits, 'm' is the MSB to preserve //////////////////////////////////////////////////////////////////////////////// GPU_INLINE uint_fast16_t gpuLightingTXTGouraudGeneric(uint_fast16_t uSrc, gcol_t gCol) { @@ -205,72 +200,41 @@ GPU_INLINE uint_fast16_t gpuLightingTXTGouraudGeneric(uint_fast16_t uSrc, gcol_t //////////////////////////////////////////////////////////////////////////////// // Apply high-precision 8-bit lighting to bgr555 texture color, -// returning a padded u32 5.4:5.4:5.4 bgr fixed-pt triplet -// suitable for use with HQ 24-bit lighting/quantization. // // INPUT: -// 'r8','g8','b8' are unsigned 8-bit color component values, value of +// 'r','g','b' are unsigned 8-bit color component values, value of // 127 is midpoint that doesn't modify that component of texture // -// uSrc input: -bbbbbgggggrrrrr +// uSrc input: mbbbbbgggggrrrrr // ^ bit 16 // RETURNS: -// u32 output: 000bbbbbXXXX0gggggXXXX0rrrrrXXXX -// ^ bit 31 -// Where 'X' are fixed-pt bits, '0' is zero-padding, and '-' is don't care +// u16 output: mbbbbbgggggrrrrr +// Where 'X' are fixed-pt bits, 'm' is the MSB to preserve //////////////////////////////////////////////////////////////////////////////// -GPU_INLINE u32 gpuLightingTXT24(uint_fast16_t uSrc, u32 bgr0888) +GPU_INLINE uint_fast16_t gpuLightingTXTDitherRGB(uint_fast16_t uSrc, + uint_fast8_t r, uint_fast8_t g, uint_fast8_t b, int_fast16_t dv) { - uint_fast16_t r1 = uSrc&0x001F; - uint_fast16_t g1 = uSrc&0x03E0; - uint_fast16_t b1 = uSrc&0x7C00; - - uint_fast16_t r2 = bgr0888 & 0x0000ff; - uint_fast32_t g2 = bgr0888 & 0x00ff00; - uint_fast16_t b2 = bgr0888 >> 16; - - u32 r3 = r1 * r2; if (r3 & 0xFFFFF000) r3 = ~0xFFFFF000; - u32 g3 = g1 * g2; if (g3 & 0xFE000000) g3 = ~0xFE000000; - u32 b3 = b1 * b2; if (b3 & 0xFFC00000) b3 = ~0xFFC00000; - - return ((r3>> 3) ) | - ((g3>>16)<<10) | - ((b3>>13)<<20); + uint_fast16_t rs = uSrc & 0x001F; + uint_fast16_t gs = uSrc & 0x03E0; + uint_fast16_t bs = uSrc & 0x7C00; + s32 r3 = rs * r + dv; + s32 g3 = gs * g + (dv << 5); + s32 b3 = bs * b + (dv << 10); + return clamp_c(r3 >> 7) | + (clamp_c(g3 >> 12) << 5) | + (clamp_c(b3 >> 17) << 10) | + (uSrc & 0x8000); } - -//////////////////////////////////////////////////////////////////////////////// -// Apply high-precision 8-bit lighting to bgr555 texture color in 'uSrc', -// returning a padded u32 5.4:5.4:5.4 bgr fixed-pt triplet -// suitable for use with HQ 24-bit lighting/quantization. -// -// INPUT: -// 'uSrc' input: -bbbbbgggggrrrrr -// ^ bit 16 -// 'gCol' input: ccccccccXXXXXXXX for c in [r, g, b] -// ^ bit 16 -// RETURNS: -// u32 output: 000bbbbbXXXX0gggggXXXX0rrrrrXXXX -// ^ bit 31 -// Where 'X' are fixed-pt bits, '0' is zero-padding, and '-' is don't care -//////////////////////////////////////////////////////////////////////////////// -GPU_INLINE u32 gpuLightingTXT24Gouraud(uint_fast16_t uSrc, gcol_t gCol) +GPU_INLINE uint_fast16_t gpuLightingTXTDither(uint_fast16_t uSrc, u32 bgr0888, int_fast16_t dv) { - uint_fast16_t r1 = uSrc&0x001F; - uint_fast16_t g1 = uSrc&0x03E0; - uint_fast16_t b1 = uSrc&0x7C00; - - uint_fast16_t r2 = gCol.c.r >> 8; - uint_fast16_t g2 = gCol.c.g >> 8; - uint_fast16_t b2 = gCol.c.b >> 8; - - u32 r3 = r1 * r2; if (r3 & 0xFFFFF000) r3 = ~0xFFFFF000; - u32 g3 = g1 * g2; if (g3 & 0xFFFE0000) g3 = ~0xFFFE0000; - u32 b3 = b1 * b2; if (b3 & 0xFFC00000) b3 = ~0xFFC00000; + return gpuLightingTXTDitherRGB(uSrc, bgr0888 & 0xff, + (bgr0888 >> 8) & 0xff, bgr0888 >> 16, dv); +} - return ((r3>> 3) ) | - ((g3>> 8)<<10) | - ((b3>>13)<<20); +GPU_INLINE uint_fast16_t gpuLightingTXTGouraudDither(uint_fast16_t uSrc, gcol_t gCol, int_fast8_t dv) +{ + return gpuLightingTXTDitherRGB(uSrc, gCol.c.r >> 8, gCol.c.g >> 8, gCol.c.b >> 8, dv); } #endif //_OP_LIGHT_H_ diff --git a/plugins/gpu_unai/gpu_inner_quantization.h b/plugins/gpu_unai/gpu_inner_quantization.h index 8a4e9354..5abcd2d3 100644 --- a/plugins/gpu_unai/gpu_inner_quantization.h +++ b/plugins/gpu_unai/gpu_inner_quantization.h @@ -22,87 +22,17 @@ static void SetupDitheringConstants() { - // Initialize Dithering Constants - // The screen is divided into 8x8 chunks and sub-unitary noise is applied - // using the following matrix. This ensures that data lost in color - // quantization will be added back to the image 'by chance' in predictable - // patterns that are naturally 'smoothed' by your sight when viewed from a - // certain distance. - // - // http://caca.zoy.org/study/index.html - // - // Shading colors are encoded in 4.5, and then are quantitized to 5.0, - // DitherMatrix constants reflect that. - - static const u8 DitherMatrix[] = { - 0, 32, 8, 40, 2, 34, 10, 42, - 48, 16, 56, 24, 50, 18, 58, 26, - 12, 44, 4, 36, 14, 46, 6, 38, - 60, 28, 52, 20, 62, 30, 54, 22, - 3, 35, 11, 43, 1, 33, 9, 41, - 51, 19, 59, 27, 49, 17, 57, 25, - 15, 47, 7, 39, 13, 45, 5, 37, - 63, 31, 55, 23, 61, 29, 53, 21 + static const s8 DitherMatrix[4][4] = { + { -4, 0, -3, 1 }, + { 2, -2, 3, -1 }, + { -3, 1, -4, 0 }, + { 3, -1, 2, -2 } }; int i, j; - for (i = 0; i < 8; i++) - { - for (j = 0; j < 8; j++) - { - u16 offset = (i << 3) | j; - - u32 component = ((DitherMatrix[offset] + 1) << 4) / 65; //[5.5] -> [5] - - // XXX - senquack - hack Dec 2016 - // Until JohnnyF gets the time to work further on dithering, - // force lower bit of component to 0. This fixes grid pattern - // affecting quality of dithered image, as well as loss of - // detail in dark areas. With lower bit unset like this, existing - // 27-bit accuracy of dithering math is unneeded, could be 24-bit. - // Is 8x8 matrix overkill as a result, can we use 4x4? - component &= ~1; - - gpu_unai.DitherMatrix[offset] = (component) - | (component << 10) - | (component << 20); - } - } -} - -//////////////////////////////////////////////////////////////////////////////// -// Convert padded u32 5.4:5.4:5.4 bgr fixed-pt triplet to final bgr555 color, -// applying dithering if specified by template parameter. -// -// INPUT: -// 'uSrc24' input: 000bbbbbXXXX0gggggXXXX0rrrrrXXXX -// ^ bit 31 -// 'pDst' is a pointer to destination framebuffer pixel, used -// to determine which DitherMatrix[] entry to apply. -// RETURNS: -// u16 output: 0bbbbbgggggrrrrr -// ^ bit 16 -// Where 'X' are fixed-pt bits, '0' is zero-padding, and '-' is don't care -//////////////////////////////////////////////////////////////////////////////// -template -GPU_INLINE u16 gpuColorQuantization24(u32 uSrc24, const le16_t *pDst) -{ - if (DITHER) - { - uintptr_t fbpos = pDst - gpu_unai.vram; - u16 offset = ((fbpos & (0x7 << 10)) >> 7) | (fbpos & 0x7); - - //clean overflow flags and add - uSrc24 = (uSrc24 & 0x1FF7FDFF) + gpu_unai.DitherMatrix[offset]; - - if (uSrc24 & (1<< 9)) uSrc24 |= (0x1FF ); - if (uSrc24 & (1<<19)) uSrc24 |= (0x1FF<<10); - if (uSrc24 & (1<<29)) uSrc24 |= (0x1FF<<20); - } - - return ((uSrc24>> 4) & (0x1F )) - | ((uSrc24>> 9) & (0x1F<<5 )) - | ((uSrc24>>14) & (0x1F<<10)); + for (i = 0; i < 4; i++) + for (j = 0; j < 4; j++) + gpu_unai.DitherLut16[i][j] = (u16)DitherMatrix[i][j] << 4; } #endif //_OP_DITHER_H_ diff --git a/plugins/gpu_unai/gpu_raster_polygon.h b/plugins/gpu_unai/gpu_raster_polygon.h index 9b259bb1..988e721f 100644 --- a/plugins/gpu_unai/gpu_raster_polygon.h +++ b/plugins/gpu_unai/gpu_raster_polygon.h @@ -371,7 +371,7 @@ void gpuDrawPolyF(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad if ((xmin - xa) > 0) xa = xmin; if (xb > xmax) xb = xmax; if ((xb - xa) > 0) - gpuPolySpanDriver(gpu_unai, PixelBase + xa, (xb - xa)); + gpuPolySpanDriver(gpu_unai, PixelBase + xa, (xb - xa), ya); } } } while (++cur_pass < total_passes); @@ -387,10 +387,6 @@ void gpuDrawPolyFT(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_qua gpu_unai.inn.r8 = packet.U1[0]; gpu_unai.inn.g8 = packet.U1[1]; gpu_unai.inn.b8 = packet.U1[2]; - // r5/g5/b5 used if just texture-blending is applied (15-bit light) - gpu_unai.inn.r5 = packet.U1[0] >> 3; - gpu_unai.inn.g5 = packet.U1[1] >> 3; - gpu_unai.inn.b5 = packet.U1[2] >> 3; PolyVertex vbuf[4]; polyInitVertexBuffer(vbuf, packet, ptype, is_quad); @@ -706,7 +702,7 @@ void gpuDrawPolyFT(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_qua if (xb > xmax) xb = xmax; if ((xb - xa) > 0) - gpuPolySpanDriver(gpu_unai, PixelBase + xa, (xb - xa)); + gpuPolySpanDriver(gpu_unai, PixelBase + xa, (xb - xa), ya); } } } while (++cur_pass < total_passes); @@ -1055,7 +1051,7 @@ void gpuDrawPolyG(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad if (xb > xmax) xb = xmax; if ((xb - xa) > 0) - gpuPolySpanDriver(gpu_unai, PixelBase + xa, (xb - xa)); + gpuPolySpanDriver(gpu_unai, PixelBase + xa, (xb - xa), ya); } } } while (++cur_pass < total_passes); @@ -1462,7 +1458,7 @@ void gpuDrawPolyGT(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_qua if (xb > xmax) xb = xmax; if ((xb - xa) > 0) - gpuPolySpanDriver(gpu_unai, PixelBase + xa, (xb - xa)); + gpuPolySpanDriver(gpu_unai, PixelBase + xa, (xb - xa), ya); } } } while (++cur_pass < total_passes); diff --git a/plugins/gpu_unai/gpu_unai.h b/plugins/gpu_unai/gpu_unai.h index 2e30a283..ec0e7151 100644 --- a/plugins/gpu_unai/gpu_unai.h +++ b/plugins/gpu_unai/gpu_unai.h @@ -255,12 +255,20 @@ struct gpu_unai_inner_t { }; // Color for flat-shaded, texture-blended prims - u8 r5, g5, b5, pad5; // 20 5-bit light for undithered prims - u8 r8, g8, b8, pad8; // 24 8-bit light for dithered prims + u8 r5, g5, b5, pad5; // 20 5-bit light for sprite asm + union { + u32 bgr0888; // 24 8-bit light for dithered prims + struct { +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + u8 pad8, b8, g8, r8; +#else + u8 r8, g8, b8, pad8; +#endif + }; + }; // Color for Gouraud-shaded prims // Fixed-pt 8.8 rgb triplet - // Packed fixed-pt 8.3:8.3:8.2 rgb triplet // layout: ccccccccXXXXXXXX for c in [r, g, b] // ^ bit 16 gcol_t gCol; // 28 @@ -269,10 +277,7 @@ struct gpu_unai_inner_t { // Color for flat-shaded, untextured prims u16 PixelData; // 38 bgr555 color for untextured flat-shaded polys - u8 blit_mask; // Determines what pixels to skip when rendering. - // Only useful on low-resolution devices using - // a simple pixel-dropping downscaler for PS1 - // high-res modes. See 'pixel_skip' option. + u8 unused2; u8 ilace_mask; // Determines what lines to skip when rendering. // Normally 0 when PS1 240 vertical res is in @@ -356,6 +361,8 @@ struct gpu_unai_t { // End of inner Loop parameters //////////////////////////////////////////////////////////////////////////// + s16 DitherLut16[4][4]; // shifted up by 4 and s16 to simplify lookup asm + bool prog_ilace_flag; // Tracks successive frames for 'prog_ilace' option u8 BLEND_MODE; @@ -367,7 +374,6 @@ struct gpu_unai_t { gpu_unai_config_t config; u8 LightLUT[32*32]; // 5-bit lighting LUT (gpu_inner_light.h) - u32 DitherMatrix[64]; // Matrix of dither coefficients }; static __attribute__((aligned(32))) gpu_unai_t gpu_unai; diff --git a/plugins/gpu_unai/gpulib_if.cpp b/plugins/gpu_unai/gpulib_if.cpp index 71c92728..2a8ae75e 100644 --- a/plugins/gpu_unai/gpulib_if.cpp +++ b/plugins/gpu_unai/gpulib_if.cpp @@ -274,7 +274,6 @@ int renderer_do_cmd_list(u32 *list_, int list_len, uint32_t *ex_regs, case 0x22: case 0x23: { // Monochrome 3-pt poly PP driver = gpuPolySpanDrivers[ - //(gpu_unai.blit_mask?1024:0) | Blending_Mode | gpu_unai.Masking | Blending | gpu_unai.PixelMSB ]; @@ -290,7 +289,6 @@ int renderer_do_cmd_list(u32 *list_, int list_len, uint32_t *ex_regs, gpuSetTexture(le32_to_u32(gpu_unai.PacketBuffer.U4[4]) >> 16); u32 driver_idx = - //(gpu_unai.blit_mask?1024:0) | Dithering | Blending_Mode | gpu_unai.TEXT_MODE | gpu_unai.Masking | Blending | gpu_unai.PixelMSB; @@ -312,7 +310,6 @@ int renderer_do_cmd_list(u32 *list_, int list_len, uint32_t *ex_regs, case 0x2A: case 0x2B: { // Monochrome 4-pt poly PP driver = gpuPolySpanDrivers[ - //(gpu_unai.blit_mask?1024:0) | Blending_Mode | gpu_unai.Masking | Blending | gpu_unai.PixelMSB ]; @@ -340,7 +337,6 @@ int renderer_do_cmd_list(u32 *list_, int list_len, uint32_t *ex_regs, gpuSetCLUT(le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16); u32 driver_idx = - //(gpu_unai.blit_mask?1024:0) | Dithering | Blending_Mode | gpu_unai.TEXT_MODE | gpu_unai.Masking | Blending | gpu_unai.PixelMSB; @@ -372,7 +368,6 @@ int renderer_do_cmd_list(u32 *list_, int list_len, uint32_t *ex_regs, if ((xor_ & HTOLE32(0xf8f8f8)) == 0) gouraud = 0; PP driver = gpuPolySpanDrivers[ - //(gpu_unai.blit_mask?1024:0) | Dithering | Blending_Mode | gpu_unai.Masking | Blending | gouraud | gpu_unai.PixelMSB @@ -403,7 +398,6 @@ int renderer_do_cmd_list(u32 *list_, int list_len, uint32_t *ex_regs, } } PP driver = gpuPolySpanDrivers[ - //(gpu_unai.blit_mask?1024:0) | Dithering | Blending_Mode | gpu_unai.TEXT_MODE | gpu_unai.Masking | Blending | gouraud | lighting | gpu_unai.PixelMSB @@ -427,7 +421,6 @@ int renderer_do_cmd_list(u32 *list_, int list_len, uint32_t *ex_regs, if ((xor_ & HTOLE32(0xf8f8f8)) == 0) gouraud = 0; PP driver = gpuPolySpanDrivers[ - //(gpu_unai.blit_mask?1024:0) | Dithering | Blending_Mode | gpu_unai.Masking | Blending | gouraud | gpu_unai.PixelMSB @@ -470,7 +463,6 @@ int renderer_do_cmd_list(u32 *list_, int list_len, uint32_t *ex_regs, } } PP driver = gpuPolySpanDrivers[ - //(gpu_unai.blit_mask?1024:0) | Dithering | Blending_Mode | gpu_unai.TEXT_MODE | gpu_unai.Masking | Blending | gouraud | lighting | gpu_unai.PixelMSB