+
+
+////////////////////////////////////////////////////////////////////////////////
+// Blend padded u32 5.4:5.4:5.4 bgr fixed-pt color triplet in 'uSrc24'
+// (foreground color) with bgr555 color in 'uDst' (background color),
+// returning the resulting u32 5.4:5.4:5.4 color.
+//
+// INPUT:
+// 'uSrc24' input: 000bbbbbXXXX0gggggXXXX0rrrrrXXXX
+// ^ bit 31
+// 'uDst' input: -bbbbbgggggrrrrr
+// ^ bit 16
+// RETURNS:
+// u32 output: 000bbbbbXXXX0gggggXXXX0rrrrrXXXX
+// ^ bit 31
+// Where 'X' are fixed-pt bits, '0' is zero-padding, and '-' is don't care
+////////////////////////////////////////////////////////////////////////////////
+template <int BLENDMODE>
+GPU_INLINE u32 gpuBlending24(u32 uSrc24, uint_fast16_t uDst)
+{
+ // These use techniques adapted from Blargg's techniques mentioned in
+ // in gpuBlending() comments above. Not as much bitwise trickery is
+ // necessary because of presence of 0 padding in uSrc24 format.
+
+ u32 uDst24 = gpuGetRGB24(uDst);
+ u32 mix;
+
+ // 0.5 x Back + 0.5 x Forward
+ if (BLENDMODE==0) {
+ const u32 uMsk = 0x1FE7F9FE;
+ // Only need to mask LSBs of uSrc24, uDst24's LSBs are 0 already
+ mix = (uDst24 + (uSrc24 & uMsk)) >> 1;
+ }
+
+ // 1.0 x Back + 1.0 x Forward
+ if (BLENDMODE==1) {
+ u32 sum = uSrc24 + uDst24;
+ u32 carries = sum & 0x20080200;
+ u32 modulo = sum - carries;
+ u32 clamp = carries - (carries >> 9);
+ mix = modulo | clamp;
+ }
+
+ // 1.0 x Back - 1.0 x Forward
+ if (BLENDMODE==2) {
+ // Insert ones in 0-padded borrow slot of color to be subtracted from
+ uDst24 |= 0x20080200;
+ u32 diff = uDst24 - uSrc24;
+ u32 borrows = diff & 0x20080200;
+ u32 clamp = borrows - (borrows >> 9);
+ mix = diff & clamp;
+ }
+
+ // 1.0 x Back + 0.25 x Forward
+ if (BLENDMODE==3) {
+ uSrc24 = (uSrc24 & 0x1FC7F1FC) >> 2;
+ u32 sum = uSrc24 + uDst24;
+ u32 carries = sum & 0x20080200;
+ u32 modulo = sum - carries;
+ u32 clamp = carries - (carries >> 9);
+ mix = modulo | clamp;
+ }
+
+ return mix;