| 1 | #ifndef _OP_BLEND_ARM_H_ |
| 2 | #define _OP_BLEND_ARM_H_ |
| 3 | |
| 4 | //////////////////////////////////////////////////////////////////////////////// |
| 5 | // Blend bgr555 color in 'uSrc' (foreground) with bgr555 color |
| 6 | // in 'uDst' (background), returning resulting color. |
| 7 | // |
| 8 | // INPUT: |
| 9 | // 'uSrc','uDst' input: -bbbbbgggggrrrrr |
| 10 | // ^ bit 16 |
| 11 | // OUTPUT: |
| 12 | // u16 output: 0bbbbbgggggrrrrr |
| 13 | // ^ bit 16 |
| 14 | // RETURNS: |
| 15 | // Where '0' is zero-padding, and '-' is don't care |
| 16 | //////////////////////////////////////////////////////////////////////////////// |
| 17 | template <int BLENDMODE, bool SKIP_USRC_MSB_MASK> |
| 18 | GPU_INLINE uint_fast16_t gpuBlendingARM(uint_fast16_t uSrc, uint_fast16_t uDst) |
| 19 | { |
| 20 | // These use Blargg's bitwise modulo-clamping: |
| 21 | // http://blargg.8bitalley.com/info/rgb_mixing.html |
| 22 | // http://blargg.8bitalley.com/info/rgb_clamped_add.html |
| 23 | // http://blargg.8bitalley.com/info/rgb_clamped_sub.html |
| 24 | |
| 25 | uint_fast16_t mix; |
| 26 | |
| 27 | // Clear preserved msb |
| 28 | asm ("bic %[uDst], %[uDst], #0x8000" : [uDst] "+r" (uDst)); |
| 29 | |
| 30 | if (BLENDMODE == 3) { |
| 31 | // Prepare uSrc for blending ((0.25 * uSrc) & (0.25 * mask)) |
| 32 | asm ("and %[uSrc], %[mask], %[uSrc], lsr #0x2" : [uSrc] "+r" (uSrc) : [mask] "r" (0x1ce7)); |
| 33 | } else if (!SKIP_USRC_MSB_MASK) { |
| 34 | asm ("bic %[uSrc], %[uSrc], #0x8000" : [uSrc] "+r" (uSrc)); |
| 35 | } |
| 36 | |
| 37 | |
| 38 | // 0.5 x Back + 0.5 x Forward |
| 39 | if (BLENDMODE==0) { |
| 40 | // mix = ((uSrc + uDst) - ((uSrc ^ uDst) & 0x0421)) >> 1; |
| 41 | asm ("eor %[mix], %[uSrc], %[uDst]\n\t" // uSrc ^ uDst |
| 42 | "and %[mix], %[mix], %[mask]\n\t" // ... & 0x0421 |
| 43 | "sub %[mix], %[uDst], %[mix]\n\t" // uDst - ... |
| 44 | "add %[mix], %[uSrc], %[mix]\n\t" // uSrc + ... |
| 45 | "mov %[mix], %[mix], lsr #0x1\n\t" // ... >> 1 |
| 46 | : [mix] "=&r" (mix) |
| 47 | : [uSrc] "r" (uSrc), [uDst] "r" (uDst), [mask] "r" (0x0421)); |
| 48 | } |
| 49 | |
| 50 | if (BLENDMODE == 1 || BLENDMODE == 3) { |
| 51 | // u32 sum = uSrc + uDst; |
| 52 | // u32 low_bits = (uSrc ^ uDst) & 0x0421; |
| 53 | // u32 carries = (sum - low_bits) & 0x8420; |
| 54 | // u32 modulo = sum - carries; |
| 55 | // u32 clamp = carries - (carries >> 5); |
| 56 | // mix = modulo | clamp; |
| 57 | |
| 58 | u32 sum; |
| 59 | |
| 60 | asm ("add %[sum], %[uSrc], %[uDst]\n\t" // sum = uSrc + uDst |
| 61 | "eor %[mix], %[uSrc], %[uDst]\n\t" // uSrc ^ uDst |
| 62 | "and %[mix], %[mix], %[mask]\n\t" // low_bits = (... & 0x0421) |
| 63 | "sub %[mix], %[sum], %[mix]\n\t" // sum - low_bits |
| 64 | "and %[mix], %[mix], %[mask], lsl #0x05\n\t" // carries = ... & 0x8420 |
| 65 | "sub %[sum], %[sum], %[mix] \n\t" // modulo = sum - carries |
| 66 | "sub %[mix], %[mix], %[mix], lsr #0x05\n\t" // clamp = carries - (carries >> 5) |
| 67 | "orr %[mix], %[sum], %[mix]" // mix = modulo | clamp |
| 68 | : [sum] "=&r" (sum), [mix] "=&r" (mix) |
| 69 | : [uSrc] "r" (uSrc), [uDst] "r" (uDst), [mask] "r" (0x0421)); |
| 70 | } |
| 71 | |
| 72 | // 1.0 x Back - 1.0 x Forward |
| 73 | if (BLENDMODE==2) { |
| 74 | u32 diff; |
| 75 | // u32 diff = uDst - uSrc + 0x8420; |
| 76 | // u32 low_bits = (uDst ^ uSrc) & 0x8420; |
| 77 | // u32 borrows = (diff - low_bits) & 0x8420; |
| 78 | // u32 modulo = diff - borrows; |
| 79 | // u32 clamp = borrows - (borrows >> 5); |
| 80 | // mix = modulo & clamp; |
| 81 | asm ("sub %[diff], %[uDst], %[uSrc]\n\t" // uDst - uSrc |
| 82 | "add %[diff], %[diff], %[mask]\n\t" // diff = ... + 0x8420 |
| 83 | "eor %[mix], %[uDst], %[uSrc]\n\t" // uDst ^ uSrc |
| 84 | "and %[mix], %[mix], %[mask]\n\t" // low_bits = ... & 0x8420 |
| 85 | "sub %[mix], %[diff], %[mix]\n\t" // diff - low_bits |
| 86 | "and %[mix], %[mix], %[mask]\n\t" // borrows = ... & 0x8420 |
| 87 | "sub %[diff], %[diff], %[mix]\n\t" // modulo = diff - borrows |
| 88 | "sub %[mix], %[mix], %[mix], lsr #0x05\n\t" // clamp = borrows - (borrows >> 5) |
| 89 | "and %[mix], %[diff], %[mix]" // mix = modulo & clamp |
| 90 | : [diff] "=&r" (diff), [mix] "=&r" (mix) |
| 91 | : [uSrc] "r" (uSrc), [uDst] "r" (uDst), [mask] "r" (0x8420)); |
| 92 | } |
| 93 | |
| 94 | // There's not a case where we can get into this function, |
| 95 | // SKIP_USRC_MSB_MASK is false, and the msb of uSrc is unset. |
| 96 | if (!SKIP_USRC_MSB_MASK) { |
| 97 | asm ("orr %[mix], %[mix], #0x8000" : [mix] "+r" (mix)); |
| 98 | } |
| 99 | |
| 100 | return mix; |
| 101 | } |
| 102 | |
| 103 | #endif //_OP_BLEND_ARM_H_ |