unai: Use 8.8 RGB triplet format for gouraud shading

author Paul Cercueil <paul@crapouillou.net>

Tue, 31 Oct 2023 17:11:39 +0000 (18:11 +0100)

committer Paul Cercueil <paul@crapouillou.net>

Thu, 2 Nov 2023 09:50:59 +0000 (10:50 +0100)
author Paul Cercueil <paul@crapouillou.net>
Tue, 31 Oct 2023 17:11:39 +0000 (18:11 +0100)
committer Paul Cercueil <paul@crapouillou.net>
Thu, 2 Nov 2023 09:50:59 +0000 (10:50 +0100)
diff --git a/plugins/gpu_unai/gpu_inner.h b/plugins/gpu_unai/gpu_inner.h

index 9f18735..1a93a39 100644 (file)
--- a/plugins/gpu_unai/gpu_inner.h
+++ b/plugins/gpu_unai/gpu_inner.h
@@ -59,14 +59,10 @@
  #include "gpu_inner_blend_arm.h"
  #include "gpu_inner_light_arm.h"
  #define gpuBlending gpuBlendingARM
  #include "gpu_inner_blend_arm.h"
  #include "gpu_inner_light_arm.h"
  #define gpuBlending gpuBlendingARM
-#define gpuLightingRGB gpuLightingRGBARM
  #define gpuLightingTXT gpuLightingTXTARM
  #define gpuLightingTXT gpuLightingTXTARM
-#define gpuLightingTXTGouraud gpuLightingTXTGouraudARM
  #else
  #define gpuBlending gpuBlendingGeneric
  #else
  #define gpuBlending gpuBlendingGeneric
-#define gpuLightingRGB gpuLightingRGBGeneric
  #define gpuLightingTXT gpuLightingTXTGeneric
  #define gpuLightingTXT gpuLightingTXTGeneric
-#define gpuLightingTXTGouraud gpuLightingTXTGouraudGeneric
  #endif
  
  // Non-dithering lighting and blending functions preserve uSrc
  #endif
  
  // Non-dithering lighting and blending functions preserve uSrc
@@ -537,8 +533,8 @@ endpolynotextnogou:
                 else
                 {
                         // UNTEXTURED, GOURAUD
                 else
                 {
                         // UNTEXTURED, GOURAUD
-                       u32 l_gCol = gpu_unai.gCol;
-                       u32 l_gInc = gpu_unai.gInc;
+                       gcol_t l_gCol = gpu_unai.gCol;
+                       gcol_t l_gInc = gpu_unai.gInc;
  
                         do {
                                 uint_fast16_t uDst, uSrc;
  
                         do {
                                 uint_fast16_t uDst, uSrc;
@@ -570,7 +566,7 @@ endpolynotextnogou:
  
  endpolynotextgou:
                                 pDst++;
  
  endpolynotextgou:
                                 pDst++;
-                               l_gCol += l_gInc;
+                               l_gCol.raw += l_gInc.raw;
                         }
                         while (--count);
                 }
                         }
                         while (--count);
                 }
@@ -594,7 +590,7 @@ endpolynotextgou:
                 u8 r5, g5, b5;
                 u8 r8, g8, b8;
  
                 u8 r5, g5, b5;
                 u8 r8, g8, b8;
  
-               u32 l_gInc, l_gCol;
+               gcol_t l_gInc, l_gCol;
  
                 if (CF_LIGHT) {
                         if (CF_GOURAUD) {
  
                 if (CF_LIGHT) {
                         if (CF_GOURAUD) {
@@ -678,7 +674,8 @@ endpolytext:
                         pDst++;
                         l_u = (l_u + l_u_inc) & l_u_msk;
                         l_v = (l_v + l_v_inc) & l_v_msk;
                         pDst++;
                         l_u = (l_u + l_u_inc) & l_u_msk;
                         l_v = (l_v + l_v_inc) & l_v_msk;
-                       if (CF_LIGHT && CF_GOURAUD) l_gCol += l_gInc;
+                       if (CF_LIGHT && CF_GOURAUD)
+                               l_gCol.raw += l_gInc.raw;
                 }
                 while (--count);
         }
                 }
                 while (--count);
         }
diff --git a/plugins/gpu_unai/gpu_inner_light.h b/plugins/gpu_unai/gpu_inner_light.h

index f4f685b..44fecdc 100644 (file)
--- a/plugins/gpu_unai/gpu_inner_light.h
+++ b/plugins/gpu_unai/gpu_inner_light.h
@@ -72,90 +72,89 @@ static void SetupLightLUT()
  
  
  ////////////////////////////////////////////////////////////////////////////////
  
  
  ////////////////////////////////////////////////////////////////////////////////
-// Create packed Gouraud fixed-pt 8.3:8.3:8.2 rgb triplet
+// Create packed Gouraud fixed-pt 8.8 rgb triplet
  //
  // INPUT:
  // 'r','g','b' are 8.10 fixed-pt color components (r shown here)
  //     'r' input:  --------------rrrrrrrrXXXXXXXXXX
  //                 ^ bit 31
  // RETURNS:
  //
  // INPUT:
  // 'r','g','b' are 8.10 fixed-pt color components (r shown here)
  //     'r' input:  --------------rrrrrrrrXXXXXXXXXX
  //                 ^ bit 31
  // RETURNS:
-//    u32 output:  rrrrrrrrXXXggggggggXXXbbbbbbbbXX
-//                 ^ bit 31
+//    gcol_t output:  ccccccccXXXXXXXX for c in [r, g, b]
+//                    ^ bit 16
  // Where 'r,g,b' are integer bits of colors, 'X' fixed-pt, and '-' don't care
  ////////////////////////////////////////////////////////////////////////////////
  // Where 'r,g,b' are integer bits of colors, 'X' fixed-pt, and '-' don't care
  ////////////////////////////////////////////////////////////////////////////////
-GPU_INLINE u32 gpuPackGouraudCol(u32 r, u32 g, u32 b)
+GPU_INLINE gcol_t gpuPackGouraudCol(u32 r, u32 g, u32 b)
  {
  {
-       return ((u32)(b>> 8)&(0x03ff    ))
-            | ((u32)(g<< 3)&(0x07ff<<10))
-            | ((u32)(r<<14)&(0x07ff<<21));
+       return (gcol_t){
+               (u16)(r >> 2),
+               (u16)(g >> 2),
+               (u16)(b >> 2),
+       };
  }
  
  }
  
-
  ////////////////////////////////////////////////////////////////////////////////
  ////////////////////////////////////////////////////////////////////////////////
-// Create packed increment for Gouraud fixed-pt 8.3:8.3:8.2 rgb triplet
+// Create packed increment for Gouraud fixed-pt 8.8 rgb triplet
  //
  // INPUT:
  //  Sign-extended 8.10 fixed-pt r,g,b color increment values (only dr is shown)
  //   'dr' input:  ssssssssssssssrrrrrrrrXXXXXXXXXX
  //                ^ bit 31
  // RETURNS:
  //
  // INPUT:
  //  Sign-extended 8.10 fixed-pt r,g,b color increment values (only dr is shown)
  //   'dr' input:  ssssssssssssssrrrrrrrrXXXXXXXXXX
  //                ^ bit 31
  // RETURNS:
-//   u32 output:  rrrrrrrrXXXggggggggXXXbbbbbbbbXX
-//                ^ bit 31
+//   gcol_t output:  ccccccccXXXXXXXX for c in [r, g, b]
+//                   ^ bit 16
  // Where 'r,g,b' are integer bits of colors, 'X' fixed-pt, and 's' sign bits
  //
  // NOTE: The correctness of this code/method has not been fully verified,
  //       having been merely factored out from original code in
  //       poly-drawing functions. Feel free to check/improve it -senquack
  ////////////////////////////////////////////////////////////////////////////////
  // Where 'r,g,b' are integer bits of colors, 'X' fixed-pt, and 's' sign bits
  //
  // NOTE: The correctness of this code/method has not been fully verified,
  //       having been merely factored out from original code in
  //       poly-drawing functions. Feel free to check/improve it -senquack
  ////////////////////////////////////////////////////////////////////////////////
-GPU_INLINE u32 gpuPackGouraudColInc(s32 dr, s32 dg, s32 db)
+GPU_INLINE gcol_t gpuPackGouraudColInc(s32 dr, s32 dg, s32 db)
  {
  {
-       u32 dr_tmp = (u32)(dr << 14)&(0xffffffff<<21);  if (dr < 0) dr_tmp += 1<<21;
-       u32 dg_tmp = (u32)(dg <<  3)&(0xffffffff<<10);  if (dg < 0) dg_tmp += 1<<10;
-       u32 db_tmp = (u32)(db >>  8)&(0xffffffff    );  if (db < 0) db_tmp += 1<< 0;
-       return db_tmp + dg_tmp + dr_tmp;
+       return (gcol_t){
+               (u16)((dr >> 2) + (dr < 0)),
+               (u16)((dg >> 2) + (dg < 0)),
+               (u16)((db >> 2) + (db < 0)),
+       };
  }
  
  }
  
-
  ////////////////////////////////////////////////////////////////////////////////
  ////////////////////////////////////////////////////////////////////////////////
-// Extract bgr555 color from Gouraud u32 fixed-pt 8.3:8.3:8.2 rgb triplet
+// Extract bgr555 color from Gouraud u32 fixed-pt 8.8 rgb triplet
  //
  // INPUT:
  //
  // INPUT:
-//  'gCol' input:  rrrrrrrrXXXggggggggXXXbbbbbbbbXX
-//                 ^ bit 31
+//  'gCol' input:  ccccccccXXXXXXXX for c in [r, g, b]
+//                 ^ bit 16
  // RETURNS:
  //    u16 output:  0bbbbbgggggrrrrr
  //                 ^ bit 16
  // Where 'r,g,b' are integer bits of colors, 'X' fixed-pt, and '0' zero
  ////////////////////////////////////////////////////////////////////////////////
  // RETURNS:
  //    u16 output:  0bbbbbgggggrrrrr
  //                 ^ bit 16
  // Where 'r,g,b' are integer bits of colors, 'X' fixed-pt, and '0' zero
  ////////////////////////////////////////////////////////////////////////////////
-GPU_INLINE uint_fast16_t gpuLightingRGBGeneric(u32 gCol)
+GPU_INLINE uint_fast16_t gpuLightingRGB(gcol_t gCol)
  {
  {
-       return ((gCol<< 5)&0x7C00) |
-              ((gCol>>11)&0x03E0) |
-               (gCol>>27);
+       return (gCol.c.r >> 11) |
+               ((gCol.c.g >> 6) & 0x3e0) |
+               ((gCol.c.b >> 1) & 0x7c00);
  }
  
  }
  
-
  ////////////////////////////////////////////////////////////////////////////////
  ////////////////////////////////////////////////////////////////////////////////
-// Convert packed Gouraud u32 fixed-pt 8.3:8.3:8.2 rgb triplet in 'gCol'
-//  to padded u32 5.4:5.4:5.4 bgr fixed-pt triplet, suitable for use
+// Convert packed Gouraud u32 fixed-pt 8.8 rgb triplet in 'gCol'
+//  to padded u32 5.4 bgr fixed-pt triplet, suitable for use
  //  with HQ 24-bit lighting/quantization.
  //
  // INPUT:
  //  with HQ 24-bit lighting/quantization.
  //
  // INPUT:
-//       'gCol' input:  rrrrrrrrXXXggggggggXXXbbbbbbbbXX
-//                      ^ bit 31
+//       'gCol' input:  ccccccccXXXXXXXX for c in [r, g, b]
+//                      ^ bit 16
  // RETURNS:
  //         u32 output:  000bbbbbXXXX0gggggXXXX0rrrrrXXXX
  //                      ^ bit 31
  //  Where 'X' are fixed-pt bits, '0' zero-padding, and '-' is don't care
  ////////////////////////////////////////////////////////////////////////////////
  // RETURNS:
  //         u32 output:  000bbbbbXXXX0gggggXXXX0rrrrrXXXX
  //                      ^ bit 31
  //  Where 'X' are fixed-pt bits, '0' zero-padding, and '-' is don't care
  ////////////////////////////////////////////////////////////////////////////////
-GPU_INLINE u32 gpuLightingRGB24(u32 gCol)
+GPU_INLINE u32 gpuLightingRGB24(gcol_t gCol)
  {
  {
-       return ((gCol<<19) & (0x1FF<<20)) |
-              ((gCol>> 2) & (0x1FF<<10)) |
-               (gCol>>23);
+       return (gCol.c.r >> 7)
+               | ((gCol.c.g >> 7) << 10)
+               | ((gCol.c.b >> 7) << 20);
  }
  
  }
  
-
  ////////////////////////////////////////////////////////////////////////////////
  // Apply fast (low-precision) 5-bit lighting to bgr555 texture color:
  //
  ////////////////////////////////////////////////////////////////////////////////
  // Apply fast (low-precision) 5-bit lighting to bgr555 texture color:
  //
@@ -181,25 +180,23 @@ GPU_INLINE uint_fast16_t gpuLightingTXTGeneric(uint_fast16_t uSrc, u8 r5, u8 g5,
  // Apply fast (low-precision) 5-bit Gouraud lighting to bgr555 texture color:
  //
  // INPUT:
  // Apply fast (low-precision) 5-bit Gouraud lighting to bgr555 texture color:
  //
  // INPUT:
-//  'gCol' is a packed Gouraud u32 fixed-pt 8.3:8.3:8.2 rgb triplet, value of
-//     15.0 is midpoint that does not modify color of texture
-//         gCol input :  rrrrrXXXXXXgggggXXXXXXbbbbbXXXXX
-//                       ^ bit 31
+//  'gCol' is a Gouraud fixed-pt 8.8 rgb triplet
+//        'gCol' input:  ccccccccXXXXXXXX for c in [r, g, b]
+//                       ^ bit 16
  //        'uSrc' input:  -bbbbbgggggrrrrr
  //                       ^ bit 16
  // RETURNS:
  //          u16 output:  0bbbbbgggggrrrrr
  // Where 'X' are fixed-pt bits, '0' is zero-padding, and '-' is don't care
  ////////////////////////////////////////////////////////////////////////////////
  //        'uSrc' input:  -bbbbbgggggrrrrr
  //                       ^ bit 16
  // RETURNS:
  //          u16 output:  0bbbbbgggggrrrrr
  // Where 'X' are fixed-pt bits, '0' is zero-padding, and '-' is don't care
  ////////////////////////////////////////////////////////////////////////////////
-GPU_INLINE uint_fast16_t gpuLightingTXTGouraudGeneric(uint_fast16_t uSrc, u32 gCol)
+GPU_INLINE uint_fast16_t gpuLightingTXTGouraud(uint_fast16_t uSrc, gcol_t gCol)
  {
  {
-       return (gpu_unai.LightLUT[((uSrc&0x7C00)>>5) | ((gCol>> 5)&0x1F)]<<10) |
-              (gpu_unai.LightLUT[ (uSrc&0x03E0)     | ((gCol>>16)&0x1F)]<< 5) |
-              (gpu_unai.LightLUT[((uSrc&0x001F)<<5) |  (gCol>>27)      ]) |
+       return (gpu_unai.LightLUT[((uSrc&0x7C00)>>5) | (gCol.c.b >> 11)] << 10) |
+              (gpu_unai.LightLUT[ (uSrc&0x03E0)     | (gCol.c.g >> 11)] << 5) |
+              (gpu_unai.LightLUT[((uSrc&0x001F)<<5) | (gCol.c.r >> 11)]) |
                (uSrc & 0x8000);
  }
  
                (uSrc & 0x8000);
  }
  
-
  ////////////////////////////////////////////////////////////////////////////////
  // Apply high-precision 8-bit lighting to bgr555 texture color,
  //  returning a padded u32 5.4:5.4:5.4 bgr fixed-pt triplet
  ////////////////////////////////////////////////////////////////////////////////
  // Apply high-precision 8-bit lighting to bgr555 texture color,
  //  returning a padded u32 5.4:5.4:5.4 bgr fixed-pt triplet
@@ -244,22 +241,22 @@ GPU_INLINE u32 gpuLightingTXT24(uint_fast16_t uSrc, u8 r8, u8 g8, u8 b8)
  // INPUT:
  //       'uSrc' input: -bbbbbgggggrrrrr
  //                     ^ bit 16
  // INPUT:
  //       'uSrc' input: -bbbbbgggggrrrrr
  //                     ^ bit 16
-//       'gCol' input: rrrrrrrrXXXggggggggXXXbbbbbbbbXX
-//                     ^ bit 31
+//       'gCol' input: ccccccccXXXXXXXX for c in [r, g, b]
+//                     ^ bit 16
  // RETURNS:
  //         u32 output: 000bbbbbXXXX0gggggXXXX0rrrrrXXXX
  //                     ^ bit 31
  // Where 'X' are fixed-pt bits, '0' is zero-padding, and '-' is don't care
  ////////////////////////////////////////////////////////////////////////////////
  // RETURNS:
  //         u32 output: 000bbbbbXXXX0gggggXXXX0rrrrrXXXX
  //                     ^ bit 31
  // Where 'X' are fixed-pt bits, '0' is zero-padding, and '-' is don't care
  ////////////////////////////////////////////////////////////////////////////////
-GPU_INLINE u32 gpuLightingTXT24Gouraud(uint_fast16_t uSrc, u32 gCol)
+GPU_INLINE u32 gpuLightingTXT24Gouraud(uint_fast16_t uSrc, gcol_t gCol)
  {
         uint_fast16_t r1 = uSrc&0x001F;
         uint_fast16_t g1 = uSrc&0x03E0;
         uint_fast16_t b1 = uSrc&0x7C00;
  
  {
         uint_fast16_t r1 = uSrc&0x001F;
         uint_fast16_t g1 = uSrc&0x03E0;
         uint_fast16_t b1 = uSrc&0x7C00;
  
-       uint_fast16_t r2 = (gCol>>24) & 0xFF;
-       uint_fast16_t g2 = (gCol>>13) & 0xFF;
-       uint_fast16_t b2 = (gCol>> 2) & 0xFF;
+       uint_fast16_t r2 = gCol.c.r >> 8;
+       uint_fast16_t g2 = gCol.c.g >> 8;
+       uint_fast16_t b2 = gCol.c.b >> 8;
  
         u32 r3 = r1 * r2; if (r3 & 0xFFFFF000) r3 = ~0xFFFFF000;
         u32 g3 = g1 * g2; if (g3 & 0xFFFE0000) g3 = ~0xFFFE0000;
  
         u32 r3 = r1 * r2; if (r3 & 0xFFFFF000) r3 = ~0xFFFFF000;
         u32 g3 = g1 * g2; if (g3 & 0xFFFE0000) g3 = ~0xFFFE0000;
diff --git a/plugins/gpu_unai/gpu_unai.h b/plugins/gpu_unai/gpu_unai.h

index 4ab5a52..3306202 100644 (file)
--- a/plugins/gpu_unai/gpu_unai.h
+++ b/plugins/gpu_unai/gpu_unai.h
@@ -53,6 +53,14 @@
  #define u32 uint32_t
  #define s32 int32_t
  #define s64 int64_t
  #define u32 uint32_t
  #define s32 int32_t
  #define s64 int64_t
+#define u64 uint64_t
+
+typedef union {
+       struct {
+               u16 r, g, b;
+       } c;
+       u64 raw;
+} gcol_t;
  
  typedef struct {
          u32 v;
  
  typedef struct {
          u32 v;
@@ -253,11 +261,12 @@ struct gpu_unai_t {
         s32 u_inc, v_inc;
  
         // Color for Gouraud-shaded prims
         s32 u_inc, v_inc;
  
         // Color for Gouraud-shaded prims
+       // Fixed-pt 8.8 rgb triplet
         // Packed fixed-pt 8.3:8.3:8.2 rgb triplet
         // Packed fixed-pt 8.3:8.3:8.2 rgb triplet
-       //  layout:  rrrrrrrrXXXggggggggXXXbbbbbbbbXX
-       //           ^ bit 31                       ^ bit 0
-       u32 gCol;
-       u32 gInc;          // Increment along scanline for gCol
+       //  layout:  ccccccccXXXXXXXX for c in [r, g, b]
+       //           ^ bit 16
+       gcol_t gCol;
+       gcol_t gInc;       // Increment along scanline for gCol
  
         // Color for flat-shaded, texture-blended prims
         u8  r5, g5, b5;    // 5-bit light for undithered prims
  
         // Color for flat-shaded, texture-blended prims
         u8  r5, g5, b5;    // 5-bit light for undithered prims
author	Paul Cercueil <paul@crapouillou.net>
	Tue, 31 Oct 2023 17:11:39 +0000 (18:11 +0100)
committer	Paul Cercueil <paul@crapouillou.net>
	Thu, 2 Nov 2023 09:50:59 +0000 (10:50 +0100)
plugins/gpu_unai/gpu_inner.h		patch \| blob \| blame \| history
plugins/gpu_unai/gpu_inner_light.h		patch \| blob \| blame \| history
plugins/gpu_unai/gpu_unai.h		patch \| blob \| blame \| history