| 1 | #ifndef _OP_BLEND_ARM_H_ |
| 2 | #define _OP_BLEND_ARM_H_ |
| 3 | |
| 4 | //////////////////////////////////////////////////////////////////////////////// |
| 5 | // Blend bgr555 color in 'uSrc' (foreground) with bgr555 color |
| 6 | // in 'uDst' (background), returning resulting color. |
| 7 | // |
| 8 | // INPUT: |
| 9 | // 'uSrc','uDst' input: -bbbbbgggggrrrrr |
| 10 | // ^ bit 16 |
| 11 | // OUTPUT: |
| 12 | // u16 output: 0bbbbbgggggrrrrr |
| 13 | // ^ bit 16 |
| 14 | // RETURNS: |
| 15 | // Where '0' is zero-padding, and '-' is don't care |
| 16 | //////////////////////////////////////////////////////////////////////////////// |
| 17 | template <int BLENDMODE, bool SKIP_USRC_MSB_MASK> |
| 18 | GPU_INLINE u16 gpuBlendingARM(u16 uSrc, u16 uDst) |
| 19 | { |
| 20 | // These use Blargg's bitwise modulo-clamping: |
| 21 | // http://blargg.8bitalley.com/info/rgb_mixing.html |
| 22 | // http://blargg.8bitalley.com/info/rgb_clamped_add.html |
| 23 | // http://blargg.8bitalley.com/info/rgb_clamped_sub.html |
| 24 | |
| 25 | |
| 26 | u16 mix; |
| 27 | |
| 28 | asm ("bic %[uDst], %[uDst], #0x8000" : [uDst] "+r" (uDst)); |
| 29 | |
| 30 | if (BLENDMODE == 3) { |
| 31 | asm ("and %[uSrc], %[mask], %[uSrc], lsr #0x2" : [uSrc] "+r" (uSrc) : [mask] "r" (0x1ce7)); |
| 32 | } else if (!SKIP_USRC_MSB_MASK) { |
| 33 | asm ("bic %[uSrc], %[uSrc], #0x8000" : [uSrc] "+r" (uSrc)); |
| 34 | } |
| 35 | |
| 36 | |
| 37 | // 0.5 x Back + 0.5 x Forward |
| 38 | if (BLENDMODE==0) { |
| 39 | // mix = ((uSrc + uDst) - ((uSrc ^ uDst) & 0x0421)) >> 1; |
| 40 | asm ("eor %[mix], %[uSrc], %[uDst]\n\t" |
| 41 | "and %[mix], %[mix], %[mask]\n\t" |
| 42 | "sub %[mix], %[uDst], %[mix]\n\t" |
| 43 | "add %[mix], %[uSrc], %[mix]\n\t" |
| 44 | "mov %[mix], %[mix], lsr #0x1\n\t" |
| 45 | : [mix] "=&r" (mix) |
| 46 | : [uSrc] "r" (uSrc), [uDst] "r" (uDst), [mask] "r" (0x0421)); |
| 47 | } |
| 48 | |
| 49 | if (BLENDMODE == 1 || BLENDMODE == 3) { |
| 50 | // u32 sum = uSrc + uDst; |
| 51 | // u32 low_bits = (uSrc ^ uDst) & 0x0421; |
| 52 | // u32 carries = (sum - low_bits) & 0x8420; |
| 53 | // u32 modulo = sum - carries; |
| 54 | // u32 clamp = carries - (carries >> 5); |
| 55 | // mix = modulo | clamp; |
| 56 | |
| 57 | u32 sum; |
| 58 | |
| 59 | asm ("add %[sum], %[uSrc], %[uDst]\n\t" |
| 60 | "eor %[mix], %[uSrc], %[uDst]\n\t" |
| 61 | "and %[mix], %[mix], %[mask]\n\t" |
| 62 | "sub %[mix], %[sum], %[mix]\n\t" |
| 63 | "and %[mix], %[mix], %[mask], lsl #0x05\n\t" |
| 64 | "sub %[sum], %[sum], %[mix] \n\t" |
| 65 | "sub %[mix], %[mix], %[mix], lsr #0x05\n\t" |
| 66 | "orr %[mix], %[sum], %[mix]" |
| 67 | : [sum] "=&r" (sum), [mix] "=&r" (mix) |
| 68 | : [uSrc] "r" (uSrc), [uDst] "r" (uDst), [mask] "r" (0x0421)); |
| 69 | } |
| 70 | |
| 71 | // 1.0 x Back - 1.0 x Forward |
| 72 | if (BLENDMODE==2) { |
| 73 | u32 diff; |
| 74 | // u32 diff = uDst - uSrc + 0x8420; |
| 75 | // u32 low_bits = (uDst ^ uSrc) & 0x8420; |
| 76 | // u32 borrows = (diff - low_bits) & 0x8420; |
| 77 | // u32 modulo = diff - borrows; |
| 78 | // u32 clamp = borrows - (borrows >> 5); |
| 79 | // mix = modulo & clamp; |
| 80 | asm ("sub %[diff], %[uDst], %[uSrc]\n\t" |
| 81 | "add %[diff], %[diff], %[mask]\n\t" |
| 82 | "eor %[mix], %[uDst], %[uSrc]\n\t" |
| 83 | "and %[mix], %[mix], %[mask]\n\t" |
| 84 | "sub %[mix], %[diff], %[mix]\n\t" |
| 85 | "and %[mix], %[mix], %[mask]\n\t" |
| 86 | "sub %[diff], %[diff], %[mix]\n\t" |
| 87 | "sub %[mix], %[mix], %[mix], lsr #0x05\n\t" |
| 88 | "and %[mix], %[diff], %[mix]" |
| 89 | : [diff] "=&r" (diff), [mix] "=&r" (mix) |
| 90 | : [uSrc] "r" (uSrc), [uDst] "r" (uDst), [mask] "r" (0x8420)); |
| 91 | } |
| 92 | |
| 93 | return mix; |
| 94 | } |
| 95 | |
| 96 | #endif //_OP_BLEND_ARM_H_ |