| 1 | /*************************************************************************** |
| 2 | * Copyright (C) 2010 PCSX4ALL Team * |
| 3 | * Copyright (C) 2010 Unai * |
| 4 | * * |
| 5 | * This program is free software; you can redistribute it and/or modify * |
| 6 | * it under the terms of the GNU General Public License as published by * |
| 7 | * the Free Software Foundation; either version 2 of the License, or * |
| 8 | * (at your option) any later version. * |
| 9 | * * |
| 10 | * This program is distributed in the hope that it will be useful, * |
| 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of * |
| 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * |
| 13 | * GNU General Public License for more details. * |
| 14 | * * |
| 15 | * You should have received a copy of the GNU General Public License * |
| 16 | * along with this program; if not, write to the * |
| 17 | * Free Software Foundation, Inc., * |
| 18 | * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. * |
| 19 | ***************************************************************************/ |
| 20 | |
| 21 | #ifndef _OP_BLEND_H_ |
| 22 | #define _OP_BLEND_H_ |
| 23 | |
| 24 | // GPU Blending operations functions |
| 25 | |
| 26 | //////////////////////////////////////////////////////////////////////////////// |
| 27 | // Blend bgr555 color in 'uSrc' (foreground) with bgr555 color |
| 28 | // in 'uDst' (background), returning resulting color. |
| 29 | // |
| 30 | // INPUT: |
| 31 | // 'uSrc','uDst' input: -bbbbbgggggrrrrr |
| 32 | // ^ bit 16 |
| 33 | // OUTPUT: |
| 34 | // u16 output: 0bbbbbgggggrrrrr |
| 35 | // ^ bit 16 |
| 36 | // RETURNS: |
| 37 | // Where '0' is zero-padding, and '-' is don't care |
| 38 | //////////////////////////////////////////////////////////////////////////////// |
| 39 | template <int BLENDMODE, bool SKIP_USRC_MSB_MASK> |
| 40 | GPU_INLINE uint_fast16_t gpuBlendingGeneric(uint_fast16_t uSrc, uint_fast16_t uDst) |
| 41 | { |
| 42 | // These use Blargg's bitwise modulo-clamping: |
| 43 | // http://blargg.8bitalley.com/info/rgb_mixing.html |
| 44 | // http://blargg.8bitalley.com/info/rgb_clamped_add.html |
| 45 | // http://blargg.8bitalley.com/info/rgb_clamped_sub.html |
| 46 | |
| 47 | uint_fast16_t mix; |
| 48 | |
| 49 | // 0.5 x Back + 0.5 x Forward |
| 50 | if (BLENDMODE==0) { |
| 51 | #ifdef GPU_UNAI_USE_ACCURATE_BLENDING |
| 52 | // Slower, but more accurate (doesn't lose LSB data) |
| 53 | uDst &= 0x7fff; |
| 54 | if (!SKIP_USRC_MSB_MASK) |
| 55 | uSrc &= 0x7fff; |
| 56 | mix = ((uSrc + uDst) - ((uSrc ^ uDst) & 0x0421)) >> 1; |
| 57 | #else |
| 58 | mix = ((uDst & 0x7bde) + (uSrc & 0x7bde)) >> 1; |
| 59 | #endif |
| 60 | } |
| 61 | |
| 62 | // 1.0 x Back + 1.0 x Forward |
| 63 | if (BLENDMODE==1) { |
| 64 | uDst &= 0x7fff; |
| 65 | if (!SKIP_USRC_MSB_MASK) |
| 66 | uSrc &= 0x7fff; |
| 67 | u32 sum = uSrc + uDst; |
| 68 | u32 low_bits = (uSrc ^ uDst) & 0x0421; |
| 69 | u32 carries = (sum - low_bits) & 0x8420; |
| 70 | u32 modulo = sum - carries; |
| 71 | u32 clamp = carries - (carries >> 5); |
| 72 | mix = modulo | clamp; |
| 73 | } |
| 74 | |
| 75 | // 1.0 x Back - 1.0 x Forward |
| 76 | if (BLENDMODE==2) { |
| 77 | uDst &= 0x7fff; |
| 78 | if (!SKIP_USRC_MSB_MASK) |
| 79 | uSrc &= 0x7fff; |
| 80 | u32 diff = uDst - uSrc + 0x8420; |
| 81 | u32 low_bits = (uDst ^ uSrc) & 0x8420; |
| 82 | u32 borrows = (diff - low_bits) & 0x8420; |
| 83 | u32 modulo = diff - borrows; |
| 84 | u32 clamp = borrows - (borrows >> 5); |
| 85 | mix = modulo & clamp; |
| 86 | } |
| 87 | |
| 88 | // 1.0 x Back + 0.25 x Forward |
| 89 | if (BLENDMODE==3) { |
| 90 | uDst &= 0x7fff; |
| 91 | uSrc = ((uSrc >> 2) & 0x1ce7); |
| 92 | u32 sum = uSrc + uDst; |
| 93 | u32 low_bits = (uSrc ^ uDst) & 0x0421; |
| 94 | u32 carries = (sum - low_bits) & 0x8420; |
| 95 | u32 modulo = sum - carries; |
| 96 | u32 clamp = carries - (carries >> 5); |
| 97 | mix = modulo | clamp; |
| 98 | } |
| 99 | |
| 100 | return mix; |
| 101 | } |
| 102 | |
| 103 | |
| 104 | //////////////////////////////////////////////////////////////////////////////// |
| 105 | // Convert bgr555 color in uSrc to padded u32 5.4:5.4:5.4 bgr fixed-pt |
| 106 | // color triplet suitable for use with HQ 24-bit quantization. |
| 107 | // |
| 108 | // INPUT: |
| 109 | // 'uDst' input: -bbbbbgggggrrrrr |
| 110 | // ^ bit 16 |
| 111 | // RETURNS: |
| 112 | // u32 output: 000bbbbbXXXX0gggggXXXX0rrrrrXXXX |
| 113 | // ^ bit 31 |
| 114 | // Where 'X' are fixed-pt bits, '0' is zero-padding, and '-' is don't care |
| 115 | //////////////////////////////////////////////////////////////////////////////// |
| 116 | GPU_INLINE u32 gpuGetRGB24(uint_fast16_t uSrc) |
| 117 | { |
| 118 | return ((uSrc & 0x7C00)<<14) |
| 119 | | ((uSrc & 0x03E0)<< 9) |
| 120 | | ((uSrc & 0x001F)<< 4); |
| 121 | } |
| 122 | |
| 123 | |
| 124 | //////////////////////////////////////////////////////////////////////////////// |
| 125 | // Blend padded u32 5.4:5.4:5.4 bgr fixed-pt color triplet in 'uSrc24' |
| 126 | // (foreground color) with bgr555 color in 'uDst' (background color), |
| 127 | // returning the resulting u32 5.4:5.4:5.4 color. |
| 128 | // |
| 129 | // INPUT: |
| 130 | // 'uSrc24' input: 000bbbbbXXXX0gggggXXXX0rrrrrXXXX |
| 131 | // ^ bit 31 |
| 132 | // 'uDst' input: -bbbbbgggggrrrrr |
| 133 | // ^ bit 16 |
| 134 | // RETURNS: |
| 135 | // u32 output: 000bbbbbXXXX0gggggXXXX0rrrrrXXXX |
| 136 | // ^ bit 31 |
| 137 | // Where 'X' are fixed-pt bits, '0' is zero-padding, and '-' is don't care |
| 138 | //////////////////////////////////////////////////////////////////////////////// |
| 139 | template <int BLENDMODE> |
| 140 | GPU_INLINE u32 gpuBlending24(u32 uSrc24, uint_fast16_t uDst) |
| 141 | { |
| 142 | // These use techniques adapted from Blargg's techniques mentioned in |
| 143 | // in gpuBlending() comments above. Not as much bitwise trickery is |
| 144 | // necessary because of presence of 0 padding in uSrc24 format. |
| 145 | |
| 146 | u32 uDst24 = gpuGetRGB24(uDst); |
| 147 | u32 mix; |
| 148 | |
| 149 | // 0.5 x Back + 0.5 x Forward |
| 150 | if (BLENDMODE==0) { |
| 151 | const u32 uMsk = 0x1FE7F9FE; |
| 152 | // Only need to mask LSBs of uSrc24, uDst24's LSBs are 0 already |
| 153 | mix = (uDst24 + (uSrc24 & uMsk)) >> 1; |
| 154 | } |
| 155 | |
| 156 | // 1.0 x Back + 1.0 x Forward |
| 157 | if (BLENDMODE==1) { |
| 158 | u32 sum = uSrc24 + uDst24; |
| 159 | u32 carries = sum & 0x20080200; |
| 160 | u32 modulo = sum - carries; |
| 161 | u32 clamp = carries - (carries >> 9); |
| 162 | mix = modulo | clamp; |
| 163 | } |
| 164 | |
| 165 | // 1.0 x Back - 1.0 x Forward |
| 166 | if (BLENDMODE==2) { |
| 167 | // Insert ones in 0-padded borrow slot of color to be subtracted from |
| 168 | uDst24 |= 0x20080200; |
| 169 | u32 diff = uDst24 - uSrc24; |
| 170 | u32 borrows = diff & 0x20080200; |
| 171 | u32 clamp = borrows - (borrows >> 9); |
| 172 | mix = diff & clamp; |
| 173 | } |
| 174 | |
| 175 | // 1.0 x Back + 0.25 x Forward |
| 176 | if (BLENDMODE==3) { |
| 177 | uSrc24 = (uSrc24 & 0x1FC7F1FC) >> 2; |
| 178 | u32 sum = uSrc24 + uDst24; |
| 179 | u32 carries = sum & 0x20080200; |
| 180 | u32 modulo = sum - carries; |
| 181 | u32 clamp = carries - (carries >> 9); |
| 182 | mix = modulo | clamp; |
| 183 | } |
| 184 | |
| 185 | return mix; |
| 186 | } |
| 187 | |
| 188 | #endif //_OP_BLEND_H_ |