0bfe8d59 |
1 | #ifndef _OP_BLEND_ARM_H_ |
2 | #define _OP_BLEND_ARM_H_ |
3 | |
4 | //////////////////////////////////////////////////////////////////////////////// |
5 | // Blend bgr555 color in 'uSrc' (foreground) with bgr555 color |
6 | // in 'uDst' (background), returning resulting color. |
7 | // |
8 | // INPUT: |
9 | // 'uSrc','uDst' input: -bbbbbgggggrrrrr |
10 | // ^ bit 16 |
11 | // OUTPUT: |
12 | // u16 output: 0bbbbbgggggrrrrr |
13 | // ^ bit 16 |
14 | // RETURNS: |
15 | // Where '0' is zero-padding, and '-' is don't care |
16 | //////////////////////////////////////////////////////////////////////////////// |
17 | template <int BLENDMODE, bool SKIP_USRC_MSB_MASK> |
18 | GPU_INLINE uint_fast16_t gpuBlendingARM(uint_fast16_t uSrc, uint_fast16_t uDst) |
19 | { |
20 | // These use Blargg's bitwise modulo-clamping: |
21 | // http://blargg.8bitalley.com/info/rgb_mixing.html |
22 | // http://blargg.8bitalley.com/info/rgb_clamped_add.html |
23 | // http://blargg.8bitalley.com/info/rgb_clamped_sub.html |
24 | |
25 | uint_fast16_t mix; |
26 | |
27 | // Clear preserved msb |
28 | asm ("bic %[uDst], %[uDst], #0x8000" : [uDst] "+r" (uDst)); |
29 | |
30 | if (BLENDMODE == 3) { |
31 | // Prepare uSrc for blending ((0.25 * uSrc) & (0.25 * mask)) |
32 | asm ("and %[uSrc], %[mask], %[uSrc], lsr #0x2" : [uSrc] "+r" (uSrc) : [mask] "r" (0x1ce7)); |
33 | } else if (!SKIP_USRC_MSB_MASK) { |
34 | asm ("bic %[uSrc], %[uSrc], #0x8000" : [uSrc] "+r" (uSrc)); |
35 | } |
36 | |
37 | |
38 | // 0.5 x Back + 0.5 x Forward |
39 | if (BLENDMODE==0) { |
40 | // mix = ((uSrc + uDst) - ((uSrc ^ uDst) & 0x0421)) >> 1; |
41 | asm ("eor %[mix], %[uSrc], %[uDst]\n\t" // uSrc ^ uDst |
42 | "and %[mix], %[mix], %[mask]\n\t" // ... & 0x0421 |
43 | "sub %[mix], %[uDst], %[mix]\n\t" // uDst - ... |
44 | "add %[mix], %[uSrc], %[mix]\n\t" // uSrc + ... |
45 | "mov %[mix], %[mix], lsr #0x1\n\t" // ... >> 1 |
46 | : [mix] "=&r" (mix) |
47 | : [uSrc] "r" (uSrc), [uDst] "r" (uDst), [mask] "r" (0x0421)); |
48 | } |
49 | |
50 | if (BLENDMODE == 1 || BLENDMODE == 3) { |
51 | // u32 sum = uSrc + uDst; |
52 | // u32 low_bits = (uSrc ^ uDst) & 0x0421; |
53 | // u32 carries = (sum - low_bits) & 0x8420; |
54 | // u32 modulo = sum - carries; |
55 | // u32 clamp = carries - (carries >> 5); |
56 | // mix = modulo | clamp; |
57 | |
58 | u32 sum; |
59 | |
60 | asm ("add %[sum], %[uSrc], %[uDst]\n\t" // sum = uSrc + uDst |
61 | "eor %[mix], %[uSrc], %[uDst]\n\t" // uSrc ^ uDst |
62 | "and %[mix], %[mix], %[mask]\n\t" // low_bits = (... & 0x0421) |
63 | "sub %[mix], %[sum], %[mix]\n\t" // sum - low_bits |
64 | "and %[mix], %[mix], %[mask], lsl #0x05\n\t" // carries = ... & 0x8420 |
65 | "sub %[sum], %[sum], %[mix] \n\t" // modulo = sum - carries |
66 | "sub %[mix], %[mix], %[mix], lsr #0x05\n\t" // clamp = carries - (carries >> 5) |
67 | "orr %[mix], %[sum], %[mix]" // mix = modulo | clamp |
68 | : [sum] "=&r" (sum), [mix] "=&r" (mix) |
69 | : [uSrc] "r" (uSrc), [uDst] "r" (uDst), [mask] "r" (0x0421)); |
70 | } |
71 | |
72 | // 1.0 x Back - 1.0 x Forward |
73 | if (BLENDMODE==2) { |
74 | u32 diff; |
75 | // u32 diff = uDst - uSrc + 0x8420; |
76 | // u32 low_bits = (uDst ^ uSrc) & 0x8420; |
77 | // u32 borrows = (diff - low_bits) & 0x8420; |
78 | // u32 modulo = diff - borrows; |
79 | // u32 clamp = borrows - (borrows >> 5); |
80 | // mix = modulo & clamp; |
81 | asm ("sub %[diff], %[uDst], %[uSrc]\n\t" // uDst - uSrc |
82 | "add %[diff], %[diff], %[mask]\n\t" // diff = ... + 0x8420 |
83 | "eor %[mix], %[uDst], %[uSrc]\n\t" // uDst ^ uSrc |
84 | "and %[mix], %[mix], %[mask]\n\t" // low_bits = ... & 0x8420 |
85 | "sub %[mix], %[diff], %[mix]\n\t" // diff - low_bits |
86 | "and %[mix], %[mix], %[mask]\n\t" // borrows = ... & 0x8420 |
87 | "sub %[diff], %[diff], %[mix]\n\t" // modulo = diff - borrows |
88 | "sub %[mix], %[mix], %[mix], lsr #0x05\n\t" // clamp = borrows - (borrows >> 5) |
89 | "and %[mix], %[diff], %[mix]" // mix = modulo & clamp |
90 | : [diff] "=&r" (diff), [mix] "=&r" (mix) |
91 | : [uSrc] "r" (uSrc), [uDst] "r" (uDst), [mask] "r" (0x8420)); |
92 | } |
93 | |
94 | // There's not a case where we can get into this function, |
95 | // SKIP_USRC_MSB_MASK is false, and the msb of uSrc is unset. |
96 | if (!SKIP_USRC_MSB_MASK) { |
97 | asm ("orr %[mix], %[mix], #0x8000" : [mix] "+r" (mix)); |
98 | } |
99 | |
100 | return mix; |
101 | } |
102 | |
103 | #endif //_OP_BLEND_ARM_H_ |