Commit | Line | Data |
---|---|---|
c9099d02 | 1 | /* |
4f949f3c | 2 | * (C) GraÅžvydas "notaz" Ignotas, 2011,2012,2022 |
c9099d02 | 3 | * |
4 | * This work is licensed under the terms of any of these licenses | |
5 | * (at your option): | |
6 | * - GNU GPL, version 2 or later. | |
7 | * - GNU LGPL, version 2.1 or later. | |
8 | * See the COPYING file in the top-level directory. | |
9 | */ | |
10 | ||
49e4b976 | 11 | #include <stdint.h> |
a80ae4a0 | 12 | #include "cspace.h" |
13 | ||
4ea7de6a | 14 | /* |
15 | * note: these are intended for testing and should be avoided | |
16 | * in favor of NEON version or platform-specific conversion | |
17 | */ | |
18 | ||
1318072a PC |
19 | #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ |
20 | #define SWAP16(x) __builtin_bswap16(x) | |
21 | #define LE16TOHx2(x) ((SWAP16((x) >> 16) << 16) | SWAP16(x)) | |
22 | #else | |
23 | #define LE16TOHx2(x) (x) | |
24 | #endif | |
25 | ||
4f949f3c | 26 | #if defined(HAVE_bgr555_to_rgb565) |
27 | ||
28 | /* have bgr555_to_rgb565 somewhere else */ | |
29 | ||
30 | #elif ((defined(__clang_major__) && __clang_major__ >= 4) \ | |
31 | || (defined(__GNUC__) && __GNUC__ >= 5)) \ | |
32 | && __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__ | |
33 | ||
4f949f3c | 34 | #include <assert.h> |
35 | ||
36 | #if defined(__ARM_NEON) || defined(__ARM_NEON__) | |
37 | #include <arm_neon.h> | |
38 | #define gsli(d_, s_, n_) d_ = vsliq_n_u16(d_, s_, n_) | |
39 | #define gsri(d_, s_, n_) d_ = vsriq_n_u16(d_, s_, n_) | |
40 | #else | |
41 | #define gsli(d_, s_, n_) d_ |= s_ << n_ | |
42 | #define gsri(d_, s_, n_) d_ |= s_ >> n_ | |
43 | #endif | |
44 | ||
45 | typedef uint16_t gvu16 __attribute__((vector_size(16),aligned(16))); | |
46 | typedef uint16_t gvu16u __attribute__((vector_size(16),aligned(2))); | |
47 | #define gdup(v_) {v_, v_, v_, v_, v_, v_, v_, v_} | |
48 | #define do_one(s) ({ \ | |
49 | uint16_t d_ = (s) << 1; d_ = (d_ & 0x07c0) | (d_ << 10) | (d_ >> 11); d_; \ | |
50 | }) | |
51 | #define do_one_simd(d_, s_, c0x07c0_) { \ | |
52 | gvu16 s1 = s_ << 1; \ | |
53 | d_ = s1 & c0x07c0_; \ | |
54 | gsli(d_, s_, 11); \ | |
55 | gsri(d_, s1, 11); \ | |
56 | } | |
57 | ||
58 | void bgr555_to_rgb565(void * __restrict__ dst_, const void * __restrict__ src_, int bytes) | |
59 | { | |
60 | const uint16_t * __restrict__ src = src_; | |
61 | uint16_t * __restrict__ dst = dst_; | |
62 | gvu16 c0x07c0 = gdup(0x07c0); | |
63 | ||
64 | assert(!(((uintptr_t)dst | (uintptr_t)src | bytes) & 1)); | |
65 | ||
66 | // align the destination | |
67 | if ((uintptr_t)dst & 0x0e) | |
68 | { | |
69 | uintptr_t left = 0x10 - ((uintptr_t)dst & 0x0e); | |
70 | gvu16 d, s = *(const gvu16u *)src; | |
71 | do_one_simd(d, s, c0x07c0); | |
72 | *(gvu16u *)dst = d; | |
73 | dst += left / 2; | |
74 | src += left / 2; | |
75 | bytes -= left; | |
76 | } | |
77 | // go | |
78 | for (; bytes >= 16; dst += 8, src += 8, bytes -= 16) | |
79 | { | |
80 | gvu16 d, s = *(const gvu16u *)src; | |
81 | do_one_simd(d, s, c0x07c0); | |
82 | *(gvu16 *)dst = d; | |
83 | __builtin_prefetch(src + 128/2); | |
84 | } | |
85 | // finish it | |
86 | for (; bytes > 0; dst++, src++, bytes -= 2) | |
87 | *dst = do_one(*src); | |
88 | } | |
89 | #undef do_one | |
90 | #undef do_one_simd | |
91 | ||
92 | #else | |
c9099d02 | 93 | |
55b0eeea | 94 | void bgr555_to_rgb565(void *dst_, const void *src_, int bytes) |
95 | { | |
49e4b976 | 96 | // source can be misaligned, but it's very rare, so just force |
97 | const unsigned int *src = (const void *)((intptr_t)src_ & ~3); | |
1318072a PC |
98 | unsigned int *dst = dst_; |
99 | unsigned int x, p, r, g, b; | |
100 | ||
101 | for (x = 0; x < bytes / 4; x++) { | |
102 | p = LE16TOHx2(src[x]); | |
103 | ||
104 | r = (p & 0x001f001f) << 11; | |
105 | g = (p & 0x03e003e0) << 1; | |
106 | b = (p & 0x7c007c00) >> 10; | |
107 | ||
108 | dst[x] = r | g | b; | |
109 | } | |
55b0eeea | 110 | } |
111 | ||
d57557c0 | 112 | #endif |
113 | ||
047899a4 | 114 | #ifndef HAVE_bgr888_to_x |
d57557c0 | 115 | |
4ea7de6a | 116 | void bgr888_to_rgb565(void *dst_, const void *src_, int bytes) |
117 | { | |
778ad9ce G |
118 | const unsigned char *src = src_; |
119 | unsigned int *dst = dst_; | |
120 | unsigned int r1, g1, b1, r2, g2, b2; | |
121 | ||
122 | for (; bytes >= 6; bytes -= 6, src += 6, dst++) { | |
123 | r1 = src[0] & 0xf8; | |
124 | g1 = src[1] & 0xfc; | |
125 | b1 = src[2] & 0xf8; | |
126 | r2 = src[3] & 0xf8; | |
127 | g2 = src[4] & 0xfc; | |
128 | b2 = src[5] & 0xf8; | |
1318072a PC |
129 | #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ |
130 | *dst = (r1 << 24) | (g1 << 19) | (b1 << 13) | | |
131 | (r2 << 8) | (g2 << 3) | (b2 >> 3); | |
132 | #else | |
778ad9ce G |
133 | *dst = (r2 << 24) | (g2 << 19) | (b2 << 13) | |
134 | (r1 << 8) | (g1 << 3) | (b1 >> 3); | |
1318072a | 135 | #endif |
778ad9ce | 136 | } |
4ea7de6a | 137 | } |
138 | ||
a80ae4a0 | 139 | // TODO? |
00a5d459 | 140 | void rgb888_to_rgb565(void *dst, const void *src, int bytes) {} |
a80ae4a0 | 141 | void bgr888_to_rgb888(void *dst, const void *src, int bytes) {} |
a80ae4a0 | 142 | |
c9099d02 | 143 | #endif // __ARM_NEON__ |
144 | ||
145 | /* YUV stuff */ | |
146 | static int yuv_ry[32], yuv_gy[32], yuv_by[32]; | |
147 | static unsigned char yuv_u[32 * 2], yuv_v[32 * 2]; | |
148 | ||
149 | void bgr_to_uyvy_init(void) | |
150 | { | |
151 | int i, v; | |
152 | ||
153 | /* init yuv converter: | |
154 | y0 = (int)((0.299f * r0) + (0.587f * g0) + (0.114f * b0)); | |
155 | y1 = (int)((0.299f * r1) + (0.587f * g1) + (0.114f * b1)); | |
156 | u = (int)(8 * 0.565f * (b0 - y0)) + 128; | |
157 | v = (int)(8 * 0.713f * (r0 - y0)) + 128; | |
158 | */ | |
159 | for (i = 0; i < 32; i++) { | |
160 | yuv_ry[i] = (int)(0.299f * i * 65536.0f + 0.5f); | |
161 | yuv_gy[i] = (int)(0.587f * i * 65536.0f + 0.5f); | |
162 | yuv_by[i] = (int)(0.114f * i * 65536.0f + 0.5f); | |
163 | } | |
164 | for (i = -32; i < 32; i++) { | |
165 | v = (int)(8 * 0.565f * i) + 128; | |
166 | if (v < 0) | |
167 | v = 0; | |
168 | if (v > 255) | |
169 | v = 255; | |
170 | yuv_u[i + 32] = v; | |
171 | v = (int)(8 * 0.713f * i) + 128; | |
172 | if (v < 0) | |
173 | v = 0; | |
174 | if (v > 255) | |
175 | v = 255; | |
176 | yuv_v[i + 32] = v; | |
177 | } | |
178 | } | |
179 | ||
5b9aa749 | 180 | void rgb565_to_uyvy(void *d, const void *s, int pixels) |
181 | { | |
182 | unsigned int *dst = d; | |
183 | const unsigned short *src = s; | |
184 | const unsigned char *yu = yuv_u + 32; | |
185 | const unsigned char *yv = yuv_v + 32; | |
186 | int r0, g0, b0, r1, g1, b1; | |
187 | int y0, y1, u, v; | |
188 | ||
189 | for (; pixels > 0; src += 2, dst++, pixels -= 2) | |
190 | { | |
191 | r0 = (src[0] >> 11) & 0x1f; | |
192 | g0 = (src[0] >> 6) & 0x1f; | |
193 | b0 = src[0] & 0x1f; | |
194 | r1 = (src[1] >> 11) & 0x1f; | |
195 | g1 = (src[1] >> 6) & 0x1f; | |
196 | b1 = src[1] & 0x1f; | |
197 | y0 = (yuv_ry[r0] + yuv_gy[g0] + yuv_by[b0]) >> 16; | |
198 | y1 = (yuv_ry[r1] + yuv_gy[g1] + yuv_by[b1]) >> 16; | |
199 | u = yu[b0 - y0]; | |
200 | v = yv[r0 - y0]; | |
201 | // valid Y range seems to be 16..235 | |
202 | y0 = 16 + 219 * y0 / 31; | |
203 | y1 = 16 + 219 * y1 / 31; | |
204 | ||
205 | *dst = (y1 << 24) | (v << 16) | (y0 << 8) | u; | |
206 | } | |
207 | } | |
208 | ||
c9099d02 | 209 | void bgr555_to_uyvy(void *d, const void *s, int pixels) |
210 | { | |
211 | unsigned int *dst = d; | |
212 | const unsigned short *src = s; | |
213 | const unsigned char *yu = yuv_u + 32; | |
214 | const unsigned char *yv = yuv_v + 32; | |
215 | int r0, g0, b0, r1, g1, b1; | |
216 | int y0, y1, u, v; | |
217 | ||
5bbe183f | 218 | for (; pixels > 1; src += 2, dst++, pixels -= 2) |
c9099d02 | 219 | { |
220 | b0 = (src[0] >> 10) & 0x1f; | |
221 | g0 = (src[0] >> 5) & 0x1f; | |
222 | r0 = src[0] & 0x1f; | |
223 | b1 = (src[1] >> 10) & 0x1f; | |
224 | g1 = (src[1] >> 5) & 0x1f; | |
225 | r1 = src[1] & 0x1f; | |
226 | y0 = (yuv_ry[r0] + yuv_gy[g0] + yuv_by[b0]) >> 16; | |
227 | y1 = (yuv_ry[r1] + yuv_gy[g1] + yuv_by[b1]) >> 16; | |
228 | u = yu[b0 - y0]; | |
229 | v = yv[r0 - y0]; | |
c9099d02 | 230 | y0 = 16 + 219 * y0 / 31; |
231 | y1 = 16 + 219 * y1 / 31; | |
232 | ||
233 | *dst = (y1 << 24) | (v << 16) | (y0 << 8) | u; | |
234 | } | |
235 | } | |
236 | ||
237 | void bgr888_to_uyvy(void *d, const void *s, int pixels) | |
238 | { | |
239 | unsigned int *dst = d; | |
240 | const unsigned char *src8 = s; | |
241 | const unsigned char *yu = yuv_u + 32; | |
242 | const unsigned char *yv = yuv_v + 32; | |
243 | int r0, g0, b0, r1, g1, b1; | |
244 | int y0, y1, u, v; | |
245 | ||
246 | for (; pixels > 0; src8 += 3*2, dst++, pixels -= 2) | |
247 | { | |
248 | r0 = src8[0], g0 = src8[1], b0 = src8[2]; | |
249 | r1 = src8[3], g1 = src8[4], b1 = src8[5]; | |
250 | y0 = (r0 * 19595 + g0 * 38470 + b0 * 7471) >> 16; | |
251 | y1 = (r1 * 19595 + g1 * 38470 + b1 * 7471) >> 16; | |
252 | u = yu[(b0 - y0) / 8]; | |
253 | v = yv[(r0 - y0) / 8]; | |
254 | y0 = 16 + 219 * y0 / 255; | |
255 | y1 = 16 + 219 * y1 / 255; | |
256 | ||
257 | *dst = (y1 << 24) | (v << 16) | (y0 << 8) | u; | |
258 | } | |
259 | } |