Commit | Line | Data |
---|---|---|
788f5e89 JW |
1 | #ifndef _OP_BLEND_ARM_H_ |
2 | #define _OP_BLEND_ARM_H_ | |
3 | ||
4 | //////////////////////////////////////////////////////////////////////////////// | |
5 | // Blend bgr555 color in 'uSrc' (foreground) with bgr555 color | |
6 | // in 'uDst' (background), returning resulting color. | |
7 | // | |
8 | // INPUT: | |
9 | // 'uSrc','uDst' input: -bbbbbgggggrrrrr | |
10 | // ^ bit 16 | |
11 | // OUTPUT: | |
12 | // u16 output: 0bbbbbgggggrrrrr | |
13 | // ^ bit 16 | |
14 | // RETURNS: | |
15 | // Where '0' is zero-padding, and '-' is don't care | |
16 | //////////////////////////////////////////////////////////////////////////////// | |
17 | template <int BLENDMODE, bool SKIP_USRC_MSB_MASK> | |
92eab56a | 18 | GPU_INLINE uint_fast16_t gpuBlendingARM(uint_fast16_t uSrc, uint_fast16_t uDst) |
788f5e89 JW |
19 | { |
20 | // These use Blargg's bitwise modulo-clamping: | |
21 | // http://blargg.8bitalley.com/info/rgb_mixing.html | |
22 | // http://blargg.8bitalley.com/info/rgb_clamped_add.html | |
23 | // http://blargg.8bitalley.com/info/rgb_clamped_sub.html | |
24 | ||
92eab56a | 25 | uint_fast16_t mix; |
788f5e89 | 26 | |
335c3831 JW |
27 | // Clear preserved msb |
28 | asm ("bic %[uDst], %[uDst], #0x8000" : [uDst] "+r" (uDst)); | |
29 | ||
30 | if (BLENDMODE == 3) { | |
31 | // Prepare uSrc for blending ((0.25 * uSrc) & (0.25 * mask)) | |
32 | asm ("and %[uSrc], %[mask], %[uSrc], lsr #0x2" : [uSrc] "+r" (uSrc) : [mask] "r" (0x1ce7)); | |
33 | } else if (!SKIP_USRC_MSB_MASK) { | |
34 | asm ("bic %[uSrc], %[uSrc], #0x8000" : [uSrc] "+r" (uSrc)); | |
35 | } | |
788f5e89 | 36 | |
788f5e89 | 37 | |
788f5e89 JW |
38 | // 0.5 x Back + 0.5 x Forward |
39 | if (BLENDMODE==0) { | |
335c3831 JW |
40 | // mix = ((uSrc + uDst) - ((uSrc ^ uDst) & 0x0421)) >> 1; |
41 | asm ("eor %[mix], %[uSrc], %[uDst]\n\t" // uSrc ^ uDst | |
42 | "and %[mix], %[mix], %[mask]\n\t" // ... & 0x0421 | |
43 | "sub %[mix], %[uDst], %[mix]\n\t" // uDst - ... | |
44 | "add %[mix], %[uSrc], %[mix]\n\t" // uSrc + ... | |
45 | "mov %[mix], %[mix], lsr #0x1\n\t" // ... >> 1 | |
46 | : [mix] "=&r" (mix) | |
47 | : [uSrc] "r" (uSrc), [uDst] "r" (uDst), [mask] "r" (0x0421)); | |
48 | } | |
788f5e89 | 49 | |
335c3831 JW |
50 | if (BLENDMODE == 1 || BLENDMODE == 3) { |
51 | // u32 sum = uSrc + uDst; | |
788f5e89 JW |
52 | // u32 low_bits = (uSrc ^ uDst) & 0x0421; |
53 | // u32 carries = (sum - low_bits) & 0x8420; | |
54 | // u32 modulo = sum - carries; | |
55 | // u32 clamp = carries - (carries >> 5); | |
56 | // mix = modulo | clamp; | |
57 | ||
335c3831 | 58 | u32 sum; |
788f5e89 | 59 | |
335c3831 JW |
60 | asm ("add %[sum], %[uSrc], %[uDst]\n\t" // sum = uSrc + uDst |
61 | "eor %[mix], %[uSrc], %[uDst]\n\t" // uSrc ^ uDst | |
62 | "and %[mix], %[mix], %[mask]\n\t" // low_bits = (... & 0x0421) | |
63 | "sub %[mix], %[sum], %[mix]\n\t" // sum - low_bits | |
64 | "and %[mix], %[mix], %[mask], lsl #0x05\n\t" // carries = ... & 0x8420 | |
65 | "sub %[sum], %[sum], %[mix] \n\t" // modulo = sum - carries | |
66 | "sub %[mix], %[mix], %[mix], lsr #0x05\n\t" // clamp = carries - (carries >> 5) | |
67 | "orr %[mix], %[sum], %[mix]" // mix = modulo | clamp | |
68 | : [sum] "=&r" (sum), [mix] "=&r" (mix) | |
69 | : [uSrc] "r" (uSrc), [uDst] "r" (uDst), [mask] "r" (0x0421)); | |
70 | } | |
788f5e89 JW |
71 | |
72 | // 1.0 x Back - 1.0 x Forward | |
73 | if (BLENDMODE==2) { | |
335c3831 JW |
74 | u32 diff; |
75 | // u32 diff = uDst - uSrc + 0x8420; | |
788f5e89 JW |
76 | // u32 low_bits = (uDst ^ uSrc) & 0x8420; |
77 | // u32 borrows = (diff - low_bits) & 0x8420; | |
78 | // u32 modulo = diff - borrows; | |
79 | // u32 clamp = borrows - (borrows >> 5); | |
80 | // mix = modulo & clamp; | |
335c3831 JW |
81 | asm ("sub %[diff], %[uDst], %[uSrc]\n\t" // uDst - uSrc |
82 | "add %[diff], %[diff], %[mask]\n\t" // diff = ... + 0x8420 | |
83 | "eor %[mix], %[uDst], %[uSrc]\n\t" // uDst ^ uSrc | |
84 | "and %[mix], %[mix], %[mask]\n\t" // low_bits = ... & 0x8420 | |
85 | "sub %[mix], %[diff], %[mix]\n\t" // diff - low_bits | |
86 | "and %[mix], %[mix], %[mask]\n\t" // borrows = ... & 0x8420 | |
87 | "sub %[diff], %[diff], %[mix]\n\t" // modulo = diff - borrows | |
88 | "sub %[mix], %[mix], %[mix], lsr #0x05\n\t" // clamp = borrows - (borrows >> 5) | |
89 | "and %[mix], %[diff], %[mix]" // mix = modulo & clamp | |
90 | : [diff] "=&r" (diff), [mix] "=&r" (mix) | |
91 | : [uSrc] "r" (uSrc), [uDst] "r" (uDst), [mask] "r" (0x8420)); | |
788f5e89 | 92 | } |
92eab56a | 93 | |
335c3831 JW |
94 | // There's not a case where we can get into this function, |
95 | // SKIP_USRC_MSB_MASK is false, and the msb of uSrc is unset. | |
96 | if (!SKIP_USRC_MSB_MASK) { | |
97 | asm ("orr %[mix], %[mix], #0x8000" : [mix] "+r" (mix)); | |
98 | } | |
788f5e89 JW |
99 | |
100 | return mix; | |
101 | } | |
102 | ||
103 | #endif //_OP_BLEND_ARM_H_ |